gyung commited on
Commit
86f0085
·
verified ·
1 Parent(s): addad28

Add files using upload-large-folder tool

Browse files
README.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ base_model: LiquidAI/LFM2-24B-A2B
4
+ tags:
5
+ - lfm
6
+ - terminal
7
+ - sft
8
+ - tb2-lite
9
+ ---
10
+
11
+ # LFM2-24B-A2B-Terminal-SFT-1Epoch-HF-FSDP-TemplateMasked
12
+
13
+ Terminal SFT checkpoint uploaded from the LFM retraining sweep.
14
+
15
+ ## Training
16
+
17
+ - Base model: `LiquidAI/LFM2-24B-A2B`
18
+ - Epoch: `1`
19
+ - Source checkpoint: `checkpoint-730`
20
+ - Recipe: Terminal SFT with model chat template and holdout-aware preprocessing
21
+ - Evaluation protocol: corrected TB2-lite replay, 303 steps / 50 tasks, vLLM, tokenizer chat template
22
+ - Recomputed score: `33.46` (`100 * avg_command_f1`)
23
+
24
+ ## Notes
25
+
26
+ Best checkpoint in the current LFM sweep under corrected TB2-lite avg_command_f1 scoring.
27
+
28
+ This upload intentionally excludes optimizer, scheduler, and RNG state files.
29
+ It contains model weights, tokenizer/config files, chat template, and lightweight training metadata only.
chat_template.jinja ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- bos_token -}}
2
+ {%- set keep_past_thinking = keep_past_thinking | default(false) -%}
3
+ {%- set ns = namespace(system_prompt="") -%}
4
+ {%- if messages[0]["role"] == "system" -%}
5
+ {%- set sys_content = messages[0]["content"] -%}
6
+ {%- if sys_content is not string -%}
7
+ {%- for item in sys_content -%}
8
+ {%- if item["type"] == "text" -%}
9
+ {%- set ns.system_prompt = ns.system_prompt + item["text"] -%}
10
+ {%- endif -%}
11
+ {%- endfor -%}
12
+ {%- else -%}
13
+ {%- set ns.system_prompt = sys_content -%}
14
+ {%- endif -%}
15
+ {%- set messages = messages[1:] -%}
16
+ {%- endif -%}
17
+ {%- if tools -%}
18
+ {%- set ns.system_prompt = ns.system_prompt + ("\n" if ns.system_prompt else "") + "List of tools: [" -%}
19
+ {%- for tool in tools -%}
20
+ {%- if tool is not string -%}
21
+ {%- set tool = tool | tojson -%}
22
+ {%- endif -%}
23
+ {%- set ns.system_prompt = ns.system_prompt + tool -%}
24
+ {%- if not loop.last -%}
25
+ {%- set ns.system_prompt = ns.system_prompt + ", " -%}
26
+ {%- endif -%}
27
+ {%- endfor -%}
28
+ {%- set ns.system_prompt = ns.system_prompt + "]" -%}
29
+ {%- endif -%}
30
+ {%- if ns.system_prompt -%}
31
+ {{- "<|im_start|>system\n" + ns.system_prompt + "<|im_end|>\n" -}}
32
+ {%- endif -%}
33
+ {%- set ns.last_assistant_index = -1 -%}
34
+ {%- for message in messages -%}
35
+ {%- if message["role"] == "assistant" -%}
36
+ {%- set ns.last_assistant_index = loop.index0 -%}
37
+ {%- endif -%}
38
+ {%- endfor -%}
39
+ {%- for message in messages -%}
40
+ {{- "<|im_start|>" + message["role"] + "\n" -}}
41
+ {%- set content = message["content"] -%}
42
+ {%- if content is not string -%}
43
+ {%- set ns.content = "" -%}
44
+ {%- for item in content -%}
45
+ {%- if item["type"] == "image" -%}
46
+ {%- set ns.content = ns.content + "<image>" -%}
47
+ {%- elif item["type"] == "text" -%}
48
+ {%- set ns.content = ns.content + item["text"] -%}
49
+ {%- else -%}
50
+ {%- set ns.content = ns.content + item | tojson -%}
51
+ {%- endif -%}
52
+ {%- endfor -%}
53
+ {%- set content = ns.content -%}
54
+ {%- endif -%}
55
+ {%- if message["role"] == "assistant" and not keep_past_thinking and loop.index0 != ns.last_assistant_index -%}
56
+ {%- if "</think>" in content -%}
57
+ {%- set content = content.split("</think>")[-1] | trim -%}
58
+ {%- endif -%}
59
+ {%- endif -%}
60
+ {{- content + "<|im_end|>\n" -}}
61
+ {%- endfor -%}
62
+ {%- if add_generation_prompt -%}
63
+ {{- "<|im_start|>assistant\n" -}}
64
+ {%- endif -%}
config.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Lfm2MoeForCausalLM"
4
+ ],
5
+ "bos_token_id": 1,
6
+ "conv_L_cache": 3,
7
+ "conv_bias": false,
8
+ "dtype": "float32",
9
+ "eos_token_id": 7,
10
+ "hidden_size": 2048,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 11776,
13
+ "layer_types": [
14
+ "conv",
15
+ "conv",
16
+ "full_attention",
17
+ "conv",
18
+ "conv",
19
+ "conv",
20
+ "full_attention",
21
+ "conv",
22
+ "conv",
23
+ "conv",
24
+ "full_attention",
25
+ "conv",
26
+ "conv",
27
+ "conv",
28
+ "full_attention",
29
+ "conv",
30
+ "conv",
31
+ "conv",
32
+ "full_attention",
33
+ "conv",
34
+ "conv",
35
+ "conv",
36
+ "full_attention",
37
+ "conv",
38
+ "conv",
39
+ "conv",
40
+ "full_attention",
41
+ "conv",
42
+ "conv",
43
+ "conv",
44
+ "full_attention",
45
+ "conv",
46
+ "conv",
47
+ "conv",
48
+ "full_attention",
49
+ "conv",
50
+ "conv",
51
+ "conv",
52
+ "full_attention",
53
+ "conv"
54
+ ],
55
+ "max_position_embeddings": 128000,
56
+ "model_type": "lfm2_moe",
57
+ "moe_intermediate_size": 1536,
58
+ "norm_eps": 1e-05,
59
+ "norm_topk_prob": true,
60
+ "num_attention_heads": 32,
61
+ "num_dense_layers": 2,
62
+ "num_experts": 64,
63
+ "num_experts_per_tok": 4,
64
+ "num_hidden_layers": 40,
65
+ "num_key_value_heads": 8,
66
+ "pad_token_id": 0,
67
+ "rope_parameters": {
68
+ "rope_theta": 1000000.0,
69
+ "rope_type": "default"
70
+ },
71
+ "routed_scaling_factor": 1.0,
72
+ "tie_word_embeddings": true,
73
+ "transformers_version": "5.5.0",
74
+ "use_cache": false,
75
+ "use_expert_bias": true,
76
+ "vocab_size": 65536
77
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 7,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "5.5.0"
7
+ }
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad32d26c67b66bcc3853fb39badb3d2b7258890b5033a1621b5c11675c7ab1e3
3
+ size 49722124976
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d01ae9a0cb791ffe5c03f5de93cab64275e765250dd47c5412d5d954db58f96
3
+ size 46190362888
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "bos_token": "<|startoftext|>",
4
+ "clean_up_tokenization_spaces": false,
5
+ "eos_token": "<|im_end|>",
6
+ "extra_special_tokens": [],
7
+ "is_local": true,
8
+ "legacy": false,
9
+ "model_input_names": [
10
+ "input_ids",
11
+ "attention_mask"
12
+ ],
13
+ "model_max_length": 1000000000000000019884624838656,
14
+ "model_specific_special_tokens": {},
15
+ "pad_token": "<|pad|>",
16
+ "sp_model_kwargs": {},
17
+ "spaces_between_special_tokens": false,
18
+ "tokenizer_class": "TokenizersBackend",
19
+ "use_default_system_prompt": false,
20
+ "use_fast": true
21
+ }
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84fb2e0722c7007ee860baf7af025c5a5563315330613babfc35d351279c67cc
3
+ size 5905