yujiepan commited on
Commit
2763bd6
·
verified ·
1 Parent(s): c0e9602

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +35 -2
  2. model.safetensors +2 -2
README.md CHANGED
@@ -13,6 +13,38 @@ This tiny model is intended for debugging. It is randomly initialized using the
13
 
14
  ### Example usage:
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  ```python
17
  from transformers import AutoModelForCausalLM, AutoTokenizer
18
 
@@ -85,8 +117,8 @@ config = AutoConfig.from_pretrained(
85
  )
86
  print(config)
87
  torch.set_default_dtype(torch.bfloat16)
 
88
  model = AutoModelForCausalLM.from_config(config, trust_remote_code=True).eval().cpu()
89
- torch.set_default_dtype(torch.float32)
90
  if file_exists(filename="generation_config.json", repo_id=source_model_id, repo_type='model'):
91
  model.generation_config = GenerationConfig.from_pretrained(
92
  source_model_id, trust_remote_code=True,
@@ -111,13 +143,14 @@ with torch.no_grad():
111
  print(name, p.shape, p.dtype, f'{p.numel() / n_params * 100: .2f}%')
112
 
113
  # expert bias is in float32
114
- for i in range(config.first_k_dense_replace, config.num_hidden_layers, 1):
115
  model.model.layers[i].mlp.e_score_correction_bias = nn.Parameter(torch.randn_like(
116
  model.model.layers[i].mlp.e_score_correction_bias
117
  ).float() * 0.002)
118
 
119
  model.save_pretrained(save_folder)
120
  print(model)
 
121
  ```
122
 
123
  </details>
 
13
 
14
  ### Example usage:
15
 
16
+ - vLLM
17
+
18
+ ```bash
19
+ # Multi-token prediction is supported
20
+ model_id=yujiepan/hy3-tiny-random
21
+ vllm serve $model_id \
22
+ --tensor-parallel-size 2 \
23
+ --speculative-config.method mtp \
24
+ --speculative-config.num_speculative_tokens 1 \
25
+ --tool-call-parser hy_v3 \
26
+ --reasoning-parser hy_v3 \
27
+ --enable-auto-tool-choice
28
+ ```
29
+
30
+ - SGLang
31
+
32
+ ```bash
33
+ # Multi-token prediction is supported
34
+ model_id=yujiepan/hy3-tiny-random
35
+ python3 -m sglang.launch_server \
36
+ --model tencent/Hy3-preview \
37
+ --tp 2 \
38
+ --tool-call-parser hunyuan \
39
+ --reasoning-parser hunyuan \
40
+ --speculative-num-steps 1 \
41
+ --speculative-eagle-topk 1 \
42
+ --speculative-num-draft-tokens 2 \
43
+ --speculative-algorithm EAGLE
44
+ ```
45
+
46
+ - Transformers
47
+
48
  ```python
49
  from transformers import AutoModelForCausalLM, AutoTokenizer
50
 
 
117
  )
118
  print(config)
119
  torch.set_default_dtype(torch.bfloat16)
120
+ set_seed(42)
121
  model = AutoModelForCausalLM.from_config(config, trust_remote_code=True).eval().cpu()
 
122
  if file_exists(filename="generation_config.json", repo_id=source_model_id, repo_type='model'):
123
  model.generation_config = GenerationConfig.from_pretrained(
124
  source_model_id, trust_remote_code=True,
 
143
  print(name, p.shape, p.dtype, f'{p.numel() / n_params * 100: .2f}%')
144
 
145
  # expert bias is in float32
146
+ for i in range(config.first_k_dense_replace, config.num_hidden_layers + 1, 1):
147
  model.model.layers[i].mlp.e_score_correction_bias = nn.Parameter(torch.randn_like(
148
  model.model.layers[i].mlp.e_score_correction_bias
149
  ).float() * 0.002)
150
 
151
  model.save_pretrained(save_folder)
152
  print(model)
153
+ torch.set_default_dtype(torch.float32)
154
  ```
155
 
156
  </details>
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30b39e65fb3d102afe15a892641b41b7ff8eb4a5f01f5a199dfb0bda397c5401
3
- size 5401256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d14912ca0a2c0a4487ff365b54caf64ef7885fc06059c225bc0668e7013055f8
3
+ size 5401352