File size: 2,402 Bytes
875ab20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import {
  env,
  AutoTokenizer,
  AutoModelForCausalLM,
  TextStreamer,
  InterruptableStoppingCriteria,
} from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3";

env.allowLocalModels = false;

const MODEL_ID = "av-codes/Supra-50M-Instruct-ONNX";

let tokenizer = null;
let model = null;
let generating = false;
const stopping = new InterruptableStoppingCriteria();

function formatPrompt(instruction) {
  return (
    "Below is an instruction that describes a task. " +
    "Write a response that appropriately completes the request.\n\n" +
    "### Instruction:\n" +
    instruction +
    "\n\n### Response:\n"
  );
}

async function load() {
  self.postMessage({ type: "status", message: "Loading tokenizer..." });

  tokenizer = await AutoTokenizer.from_pretrained(MODEL_ID);

  self.postMessage({ type: "status", message: "Loading model (50 MB)..." });

  model = await AutoModelForCausalLM.from_pretrained(MODEL_ID, {
    dtype: "q8",
    progress_callback: (progress) => {
      if (progress.status === "progress") {
        self.postMessage({
          type: "progress",
          percent: progress.progress,
          file: progress.file,
        });
      }
    },
  });

  self.postMessage({ type: "ready" });
}

async function generate(instruction, params) {
  if (!model || !tokenizer || generating) return;
  generating = true;
  stopping.reset();

  const prompt = formatPrompt(instruction);
  const inputs = tokenizer(prompt);

  const streamer = new TextStreamer(tokenizer, {
    skip_prompt: true,
    skip_special_tokens: true,
    callback_function: (text) => {
      self.postMessage({ type: "token", text });
    },
  });

  try {
    await model.generate({
      ...inputs,
      max_new_tokens: params.max_new_tokens || 256,
      temperature: params.temperature || 0.7,
      top_k: params.top_k || 50,
      top_p: params.top_p || 0.9,
      repetition_penalty: params.repetition_penalty || 1.15,
      do_sample: params.temperature > 0,
      streamer,
      stopping_criteria: [stopping],
    });
  } catch (e) {
    self.postMessage({ type: "error", message: e.message });
  }

  generating = false;
  self.postMessage({ type: "done" });
}

self.onmessage = (e) => {
  const { type, instruction, params } = e.data;
  if (type === "load") load();
  else if (type === "generate") generate(instruction, params);
  else if (type === "stop") stopping.interrupt();
};