Thrillcrazyer commited on
Commit
f32b51a
·
verified ·
1 Parent(s): 4a62c18

step 5000

Browse files
chat_template.jinja ADDED
@@ -0,0 +1 @@
 
 
1
+ {% for message in messages %}{% if (message['role'] == 'system') %}{{'<|im_start|>system<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'user') %}{{'<|im_start|>user<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'assistant') %}{{'<|im_start|>assistant<|im_sep|>' + message['content'] + '<|im_end|>'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant<|im_sep|>' }}{% endif %}
config.json ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "N": 20,
3
+ "base_config_dict": {
4
+ "_name_or_path": "microsoft/phi-4",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "bos_token_id": 100257,
11
+ "chunk_size_feed_forward": 0,
12
+ "dtype": "bfloat16",
13
+ "embd_pdrop": 0.0,
14
+ "eos_token_id": 100265,
15
+ "hidden_act": "silu",
16
+ "hidden_size": 5120,
17
+ "id2label": {
18
+ "0": "LABEL_0",
19
+ "1": "LABEL_1"
20
+ },
21
+ "ignore_keys_at_rope_validation": null,
22
+ "initializer_range": 0.02,
23
+ "intermediate_size": 17920,
24
+ "is_encoder_decoder": false,
25
+ "label2id": {
26
+ "LABEL_0": 0,
27
+ "LABEL_1": 1
28
+ },
29
+ "max_position_embeddings": 16384,
30
+ "model_type": "phi3",
31
+ "num_attention_heads": 40,
32
+ "num_hidden_layers": 40,
33
+ "num_key_value_heads": 10,
34
+ "original_max_position_embeddings": 16384,
35
+ "output_attentions": false,
36
+ "output_hidden_states": false,
37
+ "pad_token_id": 100349,
38
+ "partial_rotary_factor": 1.0,
39
+ "problem_type": null,
40
+ "resid_pdrop": 0.0,
41
+ "return_dict": true,
42
+ "rms_norm_eps": 1e-05,
43
+ "rope_parameters": {
44
+ "partial_rotary_factor": 1.0,
45
+ "rope_theta": 250000,
46
+ "rope_type": "default"
47
+ },
48
+ "sliding_window": null,
49
+ "tie_word_embeddings": false,
50
+ "transformers_version": "5.3.0",
51
+ "use_cache": true,
52
+ "vocab_size": 100352
53
+ },
54
+ "base_model_name_or_path": "microsoft/phi-4",
55
+ "decoder_layer_indices": [
56
+ 39
57
+ ],
58
+ "encoder_layer_indices": [
59
+ 0,
60
+ 1,
61
+ 2,
62
+ 3,
63
+ 4,
64
+ 5
65
+ ],
66
+ "hidden_size": 5120,
67
+ "model_type": "lds",
68
+ "q_threshold": 0.9,
69
+ "reasoning_layer_indices": [
70
+ 6,
71
+ 7,
72
+ 8,
73
+ 9,
74
+ 10,
75
+ 11,
76
+ 12,
77
+ 13,
78
+ 14,
79
+ 15,
80
+ 16,
81
+ 17,
82
+ 18,
83
+ 19,
84
+ 20,
85
+ 21,
86
+ 22,
87
+ 23,
88
+ 24,
89
+ 25,
90
+ 26,
91
+ 27,
92
+ 28,
93
+ 29,
94
+ 30,
95
+ 31,
96
+ 32,
97
+ 33,
98
+ 34,
99
+ 35,
100
+ 36,
101
+ 37,
102
+ 38
103
+ ],
104
+ "transformers_version": "5.3.0",
105
+ "vocab_size": 100352
106
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdcd1cdc1c6fb2ec6cc441929a853affc597a38e137110896f02ea6ce754d4cb
3
+ size 29332254348
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": "<|endoftext|>",
5
+ "clean_up_tokenization_spaces": false,
6
+ "eos_token": "<|im_end|>",
7
+ "is_local": false,
8
+ "model_max_length": 16384,
9
+ "pad_token": "<|dummy_85|>",
10
+ "tokenizer_class": "TokenizersBackend"
11
+ }