Yichen Feng commited on
Commit
aed26a9
·
verified ·
1 Parent(s): 4e91d85

Model save

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ base_model: Qwen/Qwen3.5-35B-A3B
5
+ tags:
6
+ - llama-factory
7
+ - generated_from_trainer
8
+ model-index:
9
+ - name: Qwen3.5-35B-A3B-SFT-artarena_sft-LR1.0e-6-EPOCHS3-LF
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # Qwen3.5-35B-A3B-SFT-artarena_sft-LR1.0e-6-EPOCHS3-LF
17
+
18
+ This model is a fine-tuned version of [Qwen/Qwen3.5-35B-A3B](https://huggingface.co/Qwen/Qwen3.5-35B-A3B) on an unknown dataset.
19
+
20
+ ## Model description
21
+
22
+ More information needed
23
+
24
+ ## Intended uses & limitations
25
+
26
+ More information needed
27
+
28
+ ## Training and evaluation data
29
+
30
+ More information needed
31
+
32
+ ## Training procedure
33
+
34
+ ### Training hyperparameters
35
+
36
+ The following hyperparameters were used during training:
37
+ - learning_rate: 1e-06
38
+ - train_batch_size: 4
39
+ - eval_batch_size: 8
40
+ - seed: 42
41
+ - distributed_type: multi-GPU
42
+ - num_devices: 8
43
+ - total_train_batch_size: 32
44
+ - total_eval_batch_size: 64
45
+ - optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
46
+ - lr_scheduler_type: cosine
47
+ - lr_scheduler_warmup_steps: 10.0
48
+ - num_epochs: 3.0
49
+
50
+ ### Training results
51
+
52
+
53
+
54
+ ### Framework versions
55
+
56
+ - Transformers 5.2.0
57
+ - Pytorch 2.10.0+cu128
58
+ - Datasets 4.0.0
59
+ - Tokenizers 0.22.2
chat_template.jinja ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- set image_count = namespace(value=0) %}
2
+ {%- set video_count = namespace(value=0) %}
3
+ {%- macro render_content(content, do_vision_count, is_system_content=false) %}
4
+ {%- if content is string %}
5
+ {{- content }}
6
+ {%- elif content is iterable and content is not mapping %}
7
+ {%- for item in content %}
8
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
9
+ {%- if is_system_content %}
10
+ {{- raise_exception('System message cannot contain images.') }}
11
+ {%- endif %}
12
+ {%- if do_vision_count %}
13
+ {%- set image_count.value = image_count.value + 1 %}
14
+ {%- endif %}
15
+ {%- if add_vision_id %}
16
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
17
+ {%- endif %}
18
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
19
+ {%- elif 'video' in item or item.type == 'video' %}
20
+ {%- if is_system_content %}
21
+ {{- raise_exception('System message cannot contain videos.') }}
22
+ {%- endif %}
23
+ {%- if do_vision_count %}
24
+ {%- set video_count.value = video_count.value + 1 %}
25
+ {%- endif %}
26
+ {%- if add_vision_id %}
27
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
28
+ {%- endif %}
29
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
30
+ {%- elif 'text' in item %}
31
+ {{- item.text }}
32
+ {%- else %}
33
+ {{- raise_exception('Unexpected item type in content.') }}
34
+ {%- endif %}
35
+ {%- endfor %}
36
+ {%- elif content is none or content is undefined %}
37
+ {{- '' }}
38
+ {%- else %}
39
+ {{- raise_exception('Unexpected content type.') }}
40
+ {%- endif %}
41
+ {%- endmacro %}
42
+ {%- if not messages %}
43
+ {{- raise_exception('No messages provided.') }}
44
+ {%- endif %}
45
+ {%- if tools and tools is iterable and tools is not mapping %}
46
+ {{- '<|im_start|>system\n' }}
47
+ {{- "# Tools\n\nYou have access to the following functions:\n\n<tools>" }}
48
+ {%- for tool in tools %}
49
+ {{- "\n" }}
50
+ {{- tool | tojson }}
51
+ {%- endfor %}
52
+ {{- "\n</tools>" }}
53
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
54
+ {%- if messages[0].role == 'system' %}
55
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
56
+ {%- if content %}
57
+ {{- '\n\n' + content }}
58
+ {%- endif %}
59
+ {%- endif %}
60
+ {{- '<|im_end|>\n' }}
61
+ {%- else %}
62
+ {%- if messages[0].role == 'system' %}
63
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
64
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
65
+ {%- endif %}
66
+ {%- endif %}
67
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
68
+ {%- for message in messages[::-1] %}
69
+ {%- set index = (messages|length - 1) - loop.index0 %}
70
+ {%- if ns.multi_step_tool and message.role == "user" %}
71
+ {%- set content = render_content(message.content, false)|trim %}
72
+ {%- if not(content.startswith('<tool_response>') and content.endswith('</tool_response>')) %}
73
+ {%- set ns.multi_step_tool = false %}
74
+ {%- set ns.last_query_index = index %}
75
+ {%- endif %}
76
+ {%- endif %}
77
+ {%- endfor %}
78
+ {%- if ns.multi_step_tool %}
79
+ {{- raise_exception('No user query found in messages.') }}
80
+ {%- endif %}
81
+ {%- for message in messages %}
82
+ {%- set content = render_content(message.content, true)|trim %}
83
+ {%- if message.role == "system" %}
84
+ {%- if not loop.first %}
85
+ {{- raise_exception('System message must be at the beginning.') }}
86
+ {%- endif %}
87
+ {%- elif message.role == "user" %}
88
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
89
+ {%- elif message.role == "assistant" %}
90
+ {%- set reasoning_content = '' %}
91
+ {%- if message.reasoning_content is string %}
92
+ {%- set reasoning_content = message.reasoning_content %}
93
+ {%- else %}
94
+ {%- if '</think>' in content %}
95
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
96
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
97
+ {%- endif %}
98
+ {%- endif %}
99
+ {%- set reasoning_content = reasoning_content|trim %}
100
+ {%- if loop.index0 > ns.last_query_index %}
101
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content + '\n</think>\n\n' + content }}
102
+ {%- else %}
103
+ {{- '<|im_start|>' + message.role + '\n' + content }}
104
+ {%- endif %}
105
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
106
+ {%- for tool_call in message.tool_calls %}
107
+ {%- if tool_call.function is defined %}
108
+ {%- set tool_call = tool_call.function %}
109
+ {%- endif %}
110
+ {%- if loop.first %}
111
+ {%- if content|trim %}
112
+ {{- '\n\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
113
+ {%- else %}
114
+ {{- '<tool_call>\n<function=' + tool_call.name + '>\n' }}
115
+ {%- endif %}
116
+ {%- else %}
117
+ {{- '\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
118
+ {%- endif %}
119
+ {%- if tool_call.arguments is defined %}
120
+ {%- for args_name, args_value in tool_call.arguments|items %}
121
+ {{- '<parameter=' + args_name + '>\n' }}
122
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
123
+ {{- args_value }}
124
+ {{- '\n</parameter>\n' }}
125
+ {%- endfor %}
126
+ {%- endif %}
127
+ {{- '</function>\n</tool_call>' }}
128
+ {%- endfor %}
129
+ {%- endif %}
130
+ {{- '<|im_end|>\n' }}
131
+ {%- elif message.role == "tool" %}
132
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
133
+ {{- '<|im_start|>user' }}
134
+ {%- endif %}
135
+ {{- '\n<tool_response>\n' }}
136
+ {{- content }}
137
+ {{- '\n</tool_response>' }}
138
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
139
+ {{- '<|im_end|>\n' }}
140
+ {%- elif loop.last %}
141
+ {{- '<|im_end|>\n' }}
142
+ {%- endif %}
143
+ {%- else %}
144
+ {{- raise_exception('Unexpected message role.') }}
145
+ {%- endif %}
146
+ {%- endfor %}
147
+ {%- if add_generation_prompt %}
148
+ {{- '<|im_start|>assistant\n' }}
149
+ {%- if enable_thinking is defined and enable_thinking is false %}
150
+ {{- '<think>\n\n</think>\n\n' }}
151
+ {%- else %}
152
+ {{- '<think>\n' }}
153
+ {%- endif %}
154
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3_5MoeForConditionalGeneration"
4
+ ],
5
+ "bos_token_id": null,
6
+ "dtype": "bfloat16",
7
+ "eos_token_id": 248046,
8
+ "hidden_size": 2048,
9
+ "image_token_id": 248056,
10
+ "model_type": "qwen3_5_moe",
11
+ "pad_token_id": 248044,
12
+ "text_config": {
13
+ "attention_bias": false,
14
+ "attention_dropout": 0.0,
15
+ "attn_output_gate": true,
16
+ "bos_token_id": null,
17
+ "dtype": "bfloat16",
18
+ "eos_token_id": 248044,
19
+ "full_attention_interval": 4,
20
+ "head_dim": 256,
21
+ "hidden_act": "silu",
22
+ "hidden_size": 2048,
23
+ "initializer_range": 0.02,
24
+ "layer_types": [
25
+ "linear_attention",
26
+ "linear_attention",
27
+ "linear_attention",
28
+ "full_attention",
29
+ "linear_attention",
30
+ "linear_attention",
31
+ "linear_attention",
32
+ "full_attention",
33
+ "linear_attention",
34
+ "linear_attention",
35
+ "linear_attention",
36
+ "full_attention",
37
+ "linear_attention",
38
+ "linear_attention",
39
+ "linear_attention",
40
+ "full_attention",
41
+ "linear_attention",
42
+ "linear_attention",
43
+ "linear_attention",
44
+ "full_attention",
45
+ "linear_attention",
46
+ "linear_attention",
47
+ "linear_attention",
48
+ "full_attention",
49
+ "linear_attention",
50
+ "linear_attention",
51
+ "linear_attention",
52
+ "full_attention",
53
+ "linear_attention",
54
+ "linear_attention",
55
+ "linear_attention",
56
+ "full_attention",
57
+ "linear_attention",
58
+ "linear_attention",
59
+ "linear_attention",
60
+ "full_attention",
61
+ "linear_attention",
62
+ "linear_attention",
63
+ "linear_attention",
64
+ "full_attention"
65
+ ],
66
+ "linear_conv_kernel_dim": 4,
67
+ "linear_key_head_dim": 128,
68
+ "linear_num_key_heads": 16,
69
+ "linear_num_value_heads": 32,
70
+ "linear_value_head_dim": 128,
71
+ "mamba_ssm_dtype": "float32",
72
+ "max_position_embeddings": 262144,
73
+ "mlp_only_layers": [],
74
+ "model_type": "qwen3_5_moe_text",
75
+ "moe_intermediate_size": 512,
76
+ "mtp_num_hidden_layers": 1,
77
+ "mtp_use_dedicated_embeddings": false,
78
+ "num_attention_heads": 16,
79
+ "num_experts": 256,
80
+ "num_experts_per_tok": 8,
81
+ "num_hidden_layers": 40,
82
+ "num_key_value_heads": 2,
83
+ "output_router_logits": false,
84
+ "pad_token_id": null,
85
+ "partial_rotary_factor": 0.25,
86
+ "rms_norm_eps": 1e-06,
87
+ "rope_parameters": {
88
+ "mrope_interleaved": true,
89
+ "mrope_section": [
90
+ 11,
91
+ 11,
92
+ 10
93
+ ],
94
+ "partial_rotary_factor": 0.25,
95
+ "rope_theta": 10000000,
96
+ "rope_type": "default"
97
+ },
98
+ "router_aux_loss_coef": 0.001,
99
+ "shared_expert_intermediate_size": 512,
100
+ "tie_word_embeddings": false,
101
+ "use_cache": false,
102
+ "vocab_size": 248320
103
+ },
104
+ "tie_word_embeddings": false,
105
+ "transformers_version": "5.2.0",
106
+ "use_cache": false,
107
+ "video_token_id": 248057,
108
+ "vision_config": {
109
+ "deepstack_visual_indexes": [],
110
+ "depth": 27,
111
+ "dtype": "bfloat16",
112
+ "hidden_act": "gelu_pytorch_tanh",
113
+ "hidden_size": 1152,
114
+ "in_channels": 3,
115
+ "initializer_range": 0.02,
116
+ "intermediate_size": 4304,
117
+ "model_type": "qwen3_5_moe",
118
+ "num_heads": 16,
119
+ "num_position_embeddings": 2304,
120
+ "out_hidden_size": 2048,
121
+ "patch_size": 16,
122
+ "spatial_merge_size": 2,
123
+ "temporal_patch_size": 2
124
+ },
125
+ "vision_end_token_id": 248054,
126
+ "vision_start_token_id": 248053
127
+ }
generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_sample": true,
3
+ "eos_token_id": [
4
+ 248046,
5
+ 248046,
6
+ 248044
7
+ ],
8
+ "pad_token_id": 248044,
9
+ "temperature": 1.0,
10
+ "top_k": 20,
11
+ "top_p": 0.95,
12
+ "transformers_version": "5.2.0"
13
+ }
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e20e63866142c1e869a650a2380a86bb9157c50c102db912ac09a53936b534b
3
+ size 49742567544
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d9d46149fcfd5b7b772bc51b1b4ff48581f9c04613784604b3f2a4789e7e7a3
3
+ size 20476251592
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
processor_config.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "image_processor": {
3
+ "data_format": "channels_first",
4
+ "do_convert_rgb": true,
5
+ "do_normalize": true,
6
+ "do_rescale": true,
7
+ "do_resize": true,
8
+ "image_mean": [
9
+ 0.5,
10
+ 0.5,
11
+ 0.5
12
+ ],
13
+ "image_processor_type": "Qwen2VLImageProcessorFast",
14
+ "image_std": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
+ ],
19
+ "merge_size": 2,
20
+ "patch_size": 16,
21
+ "resample": 3,
22
+ "rescale_factor": 0.00392156862745098,
23
+ "size": {
24
+ "longest_edge": 16777216,
25
+ "shortest_edge": 65536
26
+ },
27
+ "temporal_patch_size": 2
28
+ },
29
+ "processor_class": "Qwen3VLProcessor",
30
+ "video_processor": {
31
+ "data_format": "channels_first",
32
+ "default_to_square": true,
33
+ "do_convert_rgb": true,
34
+ "do_normalize": true,
35
+ "do_rescale": true,
36
+ "do_resize": true,
37
+ "do_sample_frames": true,
38
+ "fps": 2,
39
+ "image_mean": [
40
+ 0.5,
41
+ 0.5,
42
+ 0.5
43
+ ],
44
+ "image_std": [
45
+ 0.5,
46
+ 0.5,
47
+ 0.5
48
+ ],
49
+ "max_frames": 768,
50
+ "merge_size": 2,
51
+ "min_frames": 4,
52
+ "patch_size": 16,
53
+ "resample": 3,
54
+ "rescale_factor": 0.00392156862745098,
55
+ "return_metadata": false,
56
+ "size": {
57
+ "longest_edge": 25165824,
58
+ "shortest_edge": 4096
59
+ },
60
+ "temporal_patch_size": 2,
61
+ "video_processor_type": "Qwen3VLVideoProcessor"
62
+ }
63
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87a7830d63fcf43bf241c3c5242e96e62dd3fdc29224ca26fed8ea333db72de4
3
+ size 19989343
tokenizer_config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "audio_bos_token": "<|audio_start|>",
4
+ "audio_eos_token": "<|audio_end|>",
5
+ "audio_token": "<|audio_pad|>",
6
+ "backend": "tokenizers",
7
+ "bos_token": null,
8
+ "clean_up_tokenization_spaces": false,
9
+ "eos_token": "<|im_end|>",
10
+ "errors": "replace",
11
+ "image_token": "<|image_pad|>",
12
+ "is_local": false,
13
+ "model_max_length": 262144,
14
+ "model_specific_special_tokens": {
15
+ "audio_bos_token": "<|audio_start|>",
16
+ "audio_eos_token": "<|audio_end|>",
17
+ "audio_token": "<|audio_pad|>",
18
+ "image_token": "<|image_pad|>",
19
+ "video_token": "<|video_pad|>",
20
+ "vision_bos_token": "<|vision_start|>",
21
+ "vision_eos_token": "<|vision_end|>"
22
+ },
23
+ "pad_token": "<|endoftext|>",
24
+ "padding_side": "right",
25
+ "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
26
+ "processor_class": "Qwen3VLProcessor",
27
+ "split_special_tokens": false,
28
+ "tokenizer_class": "TokenizersBackend",
29
+ "unk_token": null,
30
+ "video_token": "<|video_pad|>",
31
+ "vision_bos_token": "<|vision_start|>",
32
+ "vision_eos_token": "<|vision_end|>"
33
+ }
trainer_log.jsonl ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"current_steps": 1, "total_steps": 159, "loss": 1.3308970928192139, "lr": 0.0, "epoch": 0.018867924528301886, "percentage": 0.63, "elapsed_time": "0:00:17", "remaining_time": "0:46:35"}
2
+ {"current_steps": 2, "total_steps": 159, "loss": 1.285962700843811, "lr": 1e-07, "epoch": 0.03773584905660377, "percentage": 1.26, "elapsed_time": "0:00:27", "remaining_time": "0:36:07"}
3
+ {"current_steps": 3, "total_steps": 159, "loss": 1.3014495372772217, "lr": 2e-07, "epoch": 0.05660377358490566, "percentage": 1.89, "elapsed_time": "0:00:36", "remaining_time": "0:31:36"}
4
+ {"current_steps": 4, "total_steps": 159, "loss": 1.306698203086853, "lr": 3e-07, "epoch": 0.07547169811320754, "percentage": 2.52, "elapsed_time": "0:00:46", "remaining_time": "0:30:02"}
5
+ {"current_steps": 5, "total_steps": 159, "loss": 1.316192388534546, "lr": 4e-07, "epoch": 0.09433962264150944, "percentage": 3.14, "elapsed_time": "0:00:55", "remaining_time": "0:28:20"}
6
+ {"current_steps": 6, "total_steps": 159, "loss": 1.3045374155044556, "lr": 5e-07, "epoch": 0.11320754716981132, "percentage": 3.77, "elapsed_time": "0:01:04", "remaining_time": "0:27:21"}
7
+ {"current_steps": 7, "total_steps": 159, "loss": 1.3311705589294434, "lr": 6e-07, "epoch": 0.1320754716981132, "percentage": 4.4, "elapsed_time": "0:01:14", "remaining_time": "0:26:51"}
8
+ {"current_steps": 8, "total_steps": 159, "loss": 1.2908077239990234, "lr": 7e-07, "epoch": 0.1509433962264151, "percentage": 5.03, "elapsed_time": "0:01:22", "remaining_time": "0:25:59"}
9
+ {"current_steps": 9, "total_steps": 159, "loss": 1.3058435916900635, "lr": 8e-07, "epoch": 0.16981132075471697, "percentage": 5.66, "elapsed_time": "0:01:30", "remaining_time": "0:25:14"}
10
+ {"current_steps": 10, "total_steps": 159, "loss": 1.2856130599975586, "lr": 9e-07, "epoch": 0.18867924528301888, "percentage": 6.29, "elapsed_time": "0:01:38", "remaining_time": "0:24:28"}
11
+ {"current_steps": 11, "total_steps": 159, "loss": 1.2199636697769165, "lr": 1e-06, "epoch": 0.20754716981132076, "percentage": 6.92, "elapsed_time": "0:01:46", "remaining_time": "0:23:56"}
12
+ {"current_steps": 12, "total_steps": 159, "loss": 1.1673463582992554, "lr": 9.99888864929809e-07, "epoch": 0.22641509433962265, "percentage": 7.55, "elapsed_time": "0:01:54", "remaining_time": "0:23:26"}
13
+ {"current_steps": 13, "total_steps": 159, "loss": 1.1699671745300293, "lr": 9.995555091232516e-07, "epoch": 0.24528301886792453, "percentage": 8.18, "elapsed_time": "0:02:02", "remaining_time": "0:22:59"}
14
+ {"current_steps": 14, "total_steps": 159, "loss": 1.1814613342285156, "lr": 9.990000807704114e-07, "epoch": 0.2641509433962264, "percentage": 8.81, "elapsed_time": "0:02:11", "remaining_time": "0:22:42"}
15
+ {"current_steps": 15, "total_steps": 159, "loss": 1.0652694702148438, "lr": 9.982228267815643e-07, "epoch": 0.2830188679245283, "percentage": 9.43, "elapsed_time": "0:02:19", "remaining_time": "0:22:21"}
16
+ {"current_steps": 16, "total_steps": 159, "loss": 1.0635337829589844, "lr": 9.972240926774166e-07, "epoch": 0.3018867924528302, "percentage": 10.06, "elapsed_time": "0:02:28", "remaining_time": "0:22:03"}
17
+ {"current_steps": 17, "total_steps": 159, "loss": 1.0902111530303955, "lr": 9.96004322435508e-07, "epoch": 0.32075471698113206, "percentage": 10.69, "elapsed_time": "0:02:36", "remaining_time": "0:21:47"}
18
+ {"current_steps": 18, "total_steps": 159, "loss": 1.06702721118927, "lr": 9.945640582928437e-07, "epoch": 0.33962264150943394, "percentage": 11.32, "elapsed_time": "0:02:44", "remaining_time": "0:21:28"}
19
+ {"current_steps": 19, "total_steps": 159, "loss": 1.0476477146148682, "lr": 9.9290394050485e-07, "epoch": 0.3584905660377358, "percentage": 11.95, "elapsed_time": "0:02:52", "remaining_time": "0:21:14"}
20
+ {"current_steps": 20, "total_steps": 159, "loss": 1.0617330074310303, "lr": 9.91024707060755e-07, "epoch": 0.37735849056603776, "percentage": 12.58, "elapsed_time": "0:03:01", "remaining_time": "0:20:58"}
21
+ {"current_steps": 21, "total_steps": 159, "loss": 1.07832932472229, "lr": 9.889271933555212e-07, "epoch": 0.39622641509433965, "percentage": 13.21, "elapsed_time": "0:03:09", "remaining_time": "0:20:42"}
22
+ {"current_steps": 22, "total_steps": 159, "loss": 1.0324124097824097, "lr": 9.8661233181848e-07, "epoch": 0.41509433962264153, "percentage": 13.84, "elapsed_time": "0:03:17", "remaining_time": "0:20:30"}
23
+ {"current_steps": 23, "total_steps": 159, "loss": 0.9815853834152222, "lr": 9.840811514988293e-07, "epoch": 0.4339622641509434, "percentage": 14.47, "elapsed_time": "0:03:25", "remaining_time": "0:20:17"}
24
+ {"current_steps": 24, "total_steps": 159, "loss": 1.0266845226287842, "lr": 9.813347776081788e-07, "epoch": 0.4528301886792453, "percentage": 15.09, "elapsed_time": "0:03:33", "remaining_time": "0:20:02"}
25
+ {"current_steps": 25, "total_steps": 159, "loss": 1.0085935592651367, "lr": 9.78374431020349e-07, "epoch": 0.4716981132075472, "percentage": 15.72, "elapsed_time": "0:03:41", "remaining_time": "0:19:48"}
26
+ {"current_steps": 26, "total_steps": 159, "loss": 0.9968965649604797, "lr": 9.752014277286431e-07, "epoch": 0.49056603773584906, "percentage": 16.35, "elapsed_time": "0:03:49", "remaining_time": "0:19:35"}
27
+ {"current_steps": 27, "total_steps": 159, "loss": 0.9803509712219238, "lr": 9.718171782608353e-07, "epoch": 0.5094339622641509, "percentage": 16.98, "elapsed_time": "0:03:57", "remaining_time": "0:19:23"}
28
+ {"current_steps": 28, "total_steps": 159, "loss": 0.9759021997451782, "lr": 9.682231870521345e-07, "epoch": 0.5283018867924528, "percentage": 17.61, "elapsed_time": "0:04:06", "remaining_time": "0:19:11"}
29
+ {"current_steps": 29, "total_steps": 159, "loss": 0.9812103509902954, "lr": 9.644210517764013e-07, "epoch": 0.5471698113207547, "percentage": 18.24, "elapsed_time": "0:04:14", "remaining_time": "0:18:59"}
30
+ {"current_steps": 30, "total_steps": 159, "loss": 0.9091012477874756, "lr": 9.60412462635919e-07, "epoch": 0.5660377358490566, "percentage": 18.87, "elapsed_time": "0:04:22", "remaining_time": "0:18:47"}
31
+ {"current_steps": 31, "total_steps": 159, "loss": 0.9503388404846191, "lr": 9.561992016100291e-07, "epoch": 0.5849056603773585, "percentage": 19.5, "elapsed_time": "0:04:30", "remaining_time": "0:18:35"}
32
+ {"current_steps": 32, "total_steps": 159, "loss": 0.9247981309890747, "lr": 9.517831416629716e-07, "epoch": 0.6037735849056604, "percentage": 20.13, "elapsed_time": "0:04:37", "remaining_time": "0:18:22"}
33
+ {"current_steps": 33, "total_steps": 159, "loss": 0.9473499655723572, "lr": 9.471662459112745e-07, "epoch": 0.6226415094339622, "percentage": 20.75, "elapsed_time": "0:04:45", "remaining_time": "0:18:11"}
34
+ {"current_steps": 34, "total_steps": 159, "loss": 0.9340516328811646, "lr": 9.423505667510723e-07, "epoch": 0.6415094339622641, "percentage": 21.38, "elapsed_time": "0:04:54", "remaining_time": "0:18:01"}
35
+ {"current_steps": 35, "total_steps": 159, "loss": 0.9248940348625183, "lr": 9.373382449457303e-07, "epoch": 0.660377358490566, "percentage": 22.01, "elapsed_time": "0:05:02", "remaining_time": "0:17:50"}
36
+ {"current_steps": 36, "total_steps": 159, "loss": 0.9420664310455322, "lr": 9.321315086741915e-07, "epoch": 0.6792452830188679, "percentage": 22.64, "elapsed_time": "0:05:10", "remaining_time": "0:17:39"}
37
+ {"current_steps": 37, "total_steps": 159, "loss": 0.9231287240982056, "lr": 9.267326725404598e-07, "epoch": 0.6981132075471698, "percentage": 23.27, "elapsed_time": "0:05:19", "remaining_time": "0:17:32"}
38
+ {"current_steps": 38, "total_steps": 159, "loss": 0.9293084740638733, "lr": 9.21144136544666e-07, "epoch": 0.7169811320754716, "percentage": 23.9, "elapsed_time": "0:05:27", "remaining_time": "0:17:22"}
39
+ {"current_steps": 39, "total_steps": 159, "loss": 0.9372609853744507, "lr": 9.153683850161705e-07, "epoch": 0.7358490566037735, "percentage": 24.53, "elapsed_time": "0:05:35", "remaining_time": "0:17:12"}
40
+ {"current_steps": 40, "total_steps": 159, "loss": 0.9204014539718628, "lr": 9.094079855091797e-07, "epoch": 0.7547169811320755, "percentage": 25.16, "elapsed_time": "0:05:43", "remaining_time": "0:17:03"}
41
+ {"current_steps": 41, "total_steps": 159, "loss": 0.9143469333648682, "lr": 9.032655876613635e-07, "epoch": 0.7735849056603774, "percentage": 25.79, "elapsed_time": "0:05:52", "remaining_time": "0:16:54"}
42
+ {"current_steps": 42, "total_steps": 159, "loss": 0.901626467704773, "lr": 8.96943922015986e-07, "epoch": 0.7924528301886793, "percentage": 26.42, "elapsed_time": "0:06:00", "remaining_time": "0:16:44"}
43
+ {"current_steps": 43, "total_steps": 159, "loss": 0.9193109273910522, "lr": 8.90445798808068e-07, "epoch": 0.8113207547169812, "percentage": 27.04, "elapsed_time": "0:06:08", "remaining_time": "0:16:34"}
44
+ {"current_steps": 44, "total_steps": 159, "loss": 0.9078618288040161, "lr": 8.837741067151249e-07, "epoch": 0.8301886792452831, "percentage": 27.67, "elapsed_time": "0:06:17", "remaining_time": "0:16:25"}
45
+ {"current_steps": 45, "total_steps": 159, "loss": 0.9032235145568848, "lr": 8.769318115730328e-07, "epoch": 0.8490566037735849, "percentage": 28.3, "elapsed_time": "0:06:25", "remaining_time": "0:16:16"}
46
+ {"current_steps": 46, "total_steps": 159, "loss": 0.8799638152122498, "lr": 8.699219550575952e-07, "epoch": 0.8679245283018868, "percentage": 28.93, "elapsed_time": "0:06:33", "remaining_time": "0:16:07"}
47
+ {"current_steps": 47, "total_steps": 159, "loss": 0.9072629809379578, "lr": 8.627476533323956e-07, "epoch": 0.8867924528301887, "percentage": 29.56, "elapsed_time": "0:06:42", "remaining_time": "0:15:58"}
48
+ {"current_steps": 48, "total_steps": 159, "loss": 0.879642128944397, "lr": 8.554120956635374e-07, "epoch": 0.9056603773584906, "percentage": 30.19, "elapsed_time": "0:06:50", "remaining_time": "0:15:48"}
49
+ {"current_steps": 49, "total_steps": 159, "loss": 0.9129672050476074, "lr": 8.479185430018858e-07, "epoch": 0.9245283018867925, "percentage": 30.82, "elapsed_time": "0:06:58", "remaining_time": "0:15:39"}
50
+ {"current_steps": 50, "total_steps": 159, "loss": 0.9072036147117615, "lr": 8.402703265334454e-07, "epoch": 0.9433962264150944, "percentage": 31.45, "elapsed_time": "0:07:07", "remaining_time": "0:15:31"}
51
+ {"current_steps": 51, "total_steps": 159, "loss": 0.8936312198638916, "lr": 8.324708461985124e-07, "epoch": 0.9622641509433962, "percentage": 32.08, "elapsed_time": "0:07:16", "remaining_time": "0:15:23"}
52
+ {"current_steps": 52, "total_steps": 159, "loss": 0.886029839515686, "lr": 8.245235691802643e-07, "epoch": 0.9811320754716981, "percentage": 32.7, "elapsed_time": "0:07:25", "remaining_time": "0:15:17"}
53
+ {"current_steps": 53, "total_steps": 159, "loss": 0.886949360370636, "lr": 8.164320283634585e-07, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "0:07:34", "remaining_time": "0:15:09"}
54
+ {"current_steps": 54, "total_steps": 159, "loss": 0.8734487891197205, "lr": 8.081998207639212e-07, "epoch": 1.0188679245283019, "percentage": 33.96, "elapsed_time": "0:07:45", "remaining_time": "0:15:04"}
55
+ {"current_steps": 55, "total_steps": 159, "loss": 0.8541756868362427, "lr": 7.998306059295302e-07, "epoch": 1.0377358490566038, "percentage": 34.59, "elapsed_time": "0:07:54", "remaining_time": "0:14:56"}
56
+ {"current_steps": 56, "total_steps": 159, "loss": 0.855162501335144, "lr": 7.913281043133977e-07, "epoch": 1.0566037735849056, "percentage": 35.22, "elapsed_time": "0:08:02", "remaining_time": "0:14:47"}
57
+ {"current_steps": 57, "total_steps": 159, "loss": 0.8469276428222656, "lr": 7.826960956199794e-07, "epoch": 1.0754716981132075, "percentage": 35.85, "elapsed_time": "0:08:10", "remaining_time": "0:14:38"}
58
+ {"current_steps": 58, "total_steps": 159, "loss": 0.8612252473831177, "lr": 7.739384171248434e-07, "epoch": 1.0943396226415094, "percentage": 36.48, "elapsed_time": "0:08:19", "remaining_time": "0:14:29"}
59
+ {"current_steps": 59, "total_steps": 159, "loss": 0.8504967093467712, "lr": 7.650589619688468e-07, "epoch": 1.1132075471698113, "percentage": 37.11, "elapsed_time": "0:08:28", "remaining_time": "0:14:21"}
60
+ {"current_steps": 60, "total_steps": 159, "loss": 0.8487892150878906, "lr": 7.560616774274774e-07, "epoch": 1.1320754716981132, "percentage": 37.74, "elapsed_time": "0:08:36", "remaining_time": "0:14:11"}
61
+ {"current_steps": 61, "total_steps": 159, "loss": 0.8430064916610718, "lr": 7.469505631561317e-07, "epoch": 1.150943396226415, "percentage": 38.36, "elapsed_time": "0:08:44", "remaining_time": "0:14:02"}
62
+ {"current_steps": 62, "total_steps": 159, "loss": 0.834577202796936, "lr": 7.377296694121058e-07, "epoch": 1.169811320754717, "percentage": 38.99, "elapsed_time": "0:08:52", "remaining_time": "0:13:53"}
63
+ {"current_steps": 63, "total_steps": 159, "loss": 0.8389214277267456, "lr": 7.284030952540936e-07, "epoch": 1.1886792452830188, "percentage": 39.62, "elapsed_time": "0:09:00", "remaining_time": "0:13:44"}
64
+ {"current_steps": 64, "total_steps": 159, "loss": 0.8442764282226562, "lr": 7.189749867199898e-07, "epoch": 1.2075471698113207, "percentage": 40.25, "elapsed_time": "0:09:09", "remaining_time": "0:13:36"}
65
+ {"current_steps": 65, "total_steps": 159, "loss": 0.802047848701477, "lr": 7.094495349838092e-07, "epoch": 1.2264150943396226, "percentage": 40.88, "elapsed_time": "0:09:17", "remaining_time": "0:13:26"}
66
+ {"current_steps": 66, "total_steps": 159, "loss": 0.8562427163124084, "lr": 6.998309744925411e-07, "epoch": 1.2452830188679245, "percentage": 41.51, "elapsed_time": "0:09:25", "remaining_time": "0:13:16"}
67
+ {"current_steps": 67, "total_steps": 159, "loss": 0.8214827179908752, "lr": 6.901235810837667e-07, "epoch": 1.2641509433962264, "percentage": 42.14, "elapsed_time": "0:09:33", "remaining_time": "0:13:07"}
68
+ {"current_steps": 68, "total_steps": 159, "loss": 0.7995479702949524, "lr": 6.803316700848778e-07, "epoch": 1.2830188679245282, "percentage": 42.77, "elapsed_time": "0:09:41", "remaining_time": "0:12:58"}
69
+ {"current_steps": 69, "total_steps": 159, "loss": 0.8077808022499084, "lr": 6.704595943947385e-07, "epoch": 1.3018867924528301, "percentage": 43.4, "elapsed_time": "0:09:49", "remaining_time": "0:12:49"}
70
+ {"current_steps": 70, "total_steps": 159, "loss": 0.8417398929595947, "lr": 6.605117425486481e-07, "epoch": 1.320754716981132, "percentage": 44.03, "elapsed_time": "0:09:57", "remaining_time": "0:12:39"}
71
+ {"current_steps": 71, "total_steps": 159, "loss": 0.8494030833244324, "lr": 6.504925367674594e-07, "epoch": 1.3396226415094339, "percentage": 44.65, "elapsed_time": "0:10:05", "remaining_time": "0:12:30"}
72
+ {"current_steps": 72, "total_steps": 159, "loss": 0.8620424866676331, "lr": 6.40406430991723e-07, "epoch": 1.3584905660377358, "percentage": 45.28, "elapsed_time": "0:10:13", "remaining_time": "0:12:20"}
73
+ {"current_steps": 73, "total_steps": 159, "loss": 0.8398749232292175, "lr": 6.302579089017327e-07, "epoch": 1.3773584905660377, "percentage": 45.91, "elapsed_time": "0:10:22", "remaining_time": "0:12:12"}
74
+ {"current_steps": 74, "total_steps": 159, "loss": 0.8420323133468628, "lr": 6.200514819243475e-07, "epoch": 1.3962264150943398, "percentage": 46.54, "elapsed_time": "0:10:30", "remaining_time": "0:12:04"}
75
+ {"current_steps": 75, "total_steps": 159, "loss": 0.8359158635139465, "lr": 6.097916872274814e-07, "epoch": 1.4150943396226414, "percentage": 47.17, "elapsed_time": "0:10:39", "remaining_time": "0:11:55"}
76
+ {"current_steps": 76, "total_steps": 159, "loss": 0.8336814641952515, "lr": 5.994830857031499e-07, "epoch": 1.4339622641509435, "percentage": 47.8, "elapsed_time": "0:10:47", "remaining_time": "0:11:47"}
77
+ {"current_steps": 77, "total_steps": 159, "loss": 0.7930982112884521, "lr": 5.891302599399684e-07, "epoch": 1.4528301886792452, "percentage": 48.43, "elapsed_time": "0:10:55", "remaining_time": "0:11:37"}
78
+ {"current_steps": 78, "total_steps": 159, "loss": 0.8192281723022461, "lr": 5.78737812186009e-07, "epoch": 1.4716981132075473, "percentage": 49.06, "elapsed_time": "0:11:03", "remaining_time": "0:11:29"}
79
+ {"current_steps": 79, "total_steps": 159, "loss": 0.8389377593994141, "lr": 5.683103623029134e-07, "epoch": 1.490566037735849, "percentage": 49.69, "elapsed_time": "0:11:12", "remaining_time": "0:11:20"}
80
+ {"current_steps": 80, "total_steps": 159, "loss": 0.8256187438964844, "lr": 5.578525457121806e-07, "epoch": 1.509433962264151, "percentage": 50.31, "elapsed_time": "0:11:20", "remaining_time": "0:11:12"}
81
+ {"current_steps": 81, "total_steps": 159, "loss": 0.8473238945007324, "lr": 5.473690113345342e-07, "epoch": 1.5283018867924527, "percentage": 50.94, "elapsed_time": "0:11:28", "remaining_time": "0:11:02"}
82
+ {"current_steps": 82, "total_steps": 159, "loss": 0.8165145516395569, "lr": 5.368644195232895e-07, "epoch": 1.5471698113207548, "percentage": 51.57, "elapsed_time": "0:11:36", "remaining_time": "0:10:53"}
83
+ {"current_steps": 83, "total_steps": 159, "loss": 0.8529609441757202, "lr": 5.263434399926398e-07, "epoch": 1.5660377358490565, "percentage": 52.2, "elapsed_time": "0:11:44", "remaining_time": "0:10:45"}
84
+ {"current_steps": 84, "total_steps": 159, "loss": 0.8249980211257935, "lr": 5.158107497417794e-07, "epoch": 1.5849056603773586, "percentage": 52.83, "elapsed_time": "0:11:52", "remaining_time": "0:10:36"}
85
+ {"current_steps": 85, "total_steps": 159, "loss": 0.7900608777999878, "lr": 5.052710309757898e-07, "epoch": 1.6037735849056602, "percentage": 53.46, "elapsed_time": "0:12:00", "remaining_time": "0:10:27"}
86
+ {"current_steps": 86, "total_steps": 159, "loss": 0.7917711734771729, "lr": 4.947289690242102e-07, "epoch": 1.6226415094339623, "percentage": 54.09, "elapsed_time": "0:12:08", "remaining_time": "0:10:18"}
87
+ {"current_steps": 87, "total_steps": 159, "loss": 0.8228881359100342, "lr": 4.841892502582205e-07, "epoch": 1.641509433962264, "percentage": 54.72, "elapsed_time": "0:12:17", "remaining_time": "0:10:09"}
88
+ {"current_steps": 88, "total_steps": 159, "loss": 0.8176588416099548, "lr": 4.736565600073602e-07, "epoch": 1.6603773584905661, "percentage": 55.35, "elapsed_time": "0:12:25", "remaining_time": "0:10:01"}
89
+ {"current_steps": 89, "total_steps": 159, "loss": 0.8315557837486267, "lr": 4.6313558047671047e-07, "epoch": 1.6792452830188678, "percentage": 55.97, "elapsed_time": "0:12:33", "remaining_time": "0:09:52"}
90
+ {"current_steps": 90, "total_steps": 159, "loss": 0.8079712390899658, "lr": 4.5263098866546586e-07, "epoch": 1.6981132075471699, "percentage": 56.6, "elapsed_time": "0:12:41", "remaining_time": "0:09:44"}
91
+ {"current_steps": 91, "total_steps": 159, "loss": 0.7854694128036499, "lr": 4.421474542878194e-07, "epoch": 1.7169811320754715, "percentage": 57.23, "elapsed_time": "0:12:49", "remaining_time": "0:09:35"}
92
+ {"current_steps": 92, "total_steps": 159, "loss": 0.8382487297058105, "lr": 4.316896376970866e-07, "epoch": 1.7358490566037736, "percentage": 57.86, "elapsed_time": "0:12:57", "remaining_time": "0:09:26"}
93
+ {"current_steps": 93, "total_steps": 159, "loss": 0.8337287902832031, "lr": 4.2126218781399114e-07, "epoch": 1.7547169811320755, "percentage": 58.49, "elapsed_time": "0:13:05", "remaining_time": "0:09:17"}
94
+ {"current_steps": 94, "total_steps": 159, "loss": 0.8450314402580261, "lr": 4.1086974006003154e-07, "epoch": 1.7735849056603774, "percentage": 59.12, "elapsed_time": "0:13:13", "remaining_time": "0:09:08"}
95
+ {"current_steps": 95, "total_steps": 159, "loss": 0.7846765518188477, "lr": 4.0051691429685023e-07, "epoch": 1.7924528301886793, "percentage": 59.75, "elapsed_time": "0:13:22", "remaining_time": "0:09:00"}
96
+ {"current_steps": 96, "total_steps": 159, "loss": 0.814504861831665, "lr": 3.902083127725186e-07, "epoch": 1.8113207547169812, "percentage": 60.38, "elapsed_time": "0:13:30", "remaining_time": "0:08:52"}
97
+ {"current_steps": 97, "total_steps": 159, "loss": 0.8011671304702759, "lr": 3.799485180756525e-07, "epoch": 1.830188679245283, "percentage": 61.01, "elapsed_time": "0:13:38", "remaining_time": "0:08:43"}
98
+ {"current_steps": 98, "total_steps": 159, "loss": 0.8165295124053955, "lr": 3.697420910982672e-07, "epoch": 1.849056603773585, "percentage": 61.64, "elapsed_time": "0:13:47", "remaining_time": "0:08:34"}
99
+ {"current_steps": 99, "total_steps": 159, "loss": 0.8199301958084106, "lr": 3.5959356900827687e-07, "epoch": 1.8679245283018868, "percentage": 62.26, "elapsed_time": "0:13:55", "remaining_time": "0:08:26"}
100
+ {"current_steps": 100, "total_steps": 159, "loss": 0.8019869327545166, "lr": 3.4950746323254063e-07, "epoch": 1.8867924528301887, "percentage": 62.89, "elapsed_time": "0:14:03", "remaining_time": "0:08:17"}
101
+ {"current_steps": 101, "total_steps": 159, "loss": 0.8060827255249023, "lr": 3.394882574513519e-07, "epoch": 1.9056603773584906, "percentage": 63.52, "elapsed_time": "0:14:11", "remaining_time": "0:08:09"}
102
+ {"current_steps": 102, "total_steps": 159, "loss": 0.8078351020812988, "lr": 3.295404056052616e-07, "epoch": 1.9245283018867925, "percentage": 64.15, "elapsed_time": "0:14:20", "remaining_time": "0:08:00"}
103
+ {"current_steps": 103, "total_steps": 159, "loss": 0.8068495988845825, "lr": 3.1966832991512225e-07, "epoch": 1.9433962264150944, "percentage": 64.78, "elapsed_time": "0:14:28", "remaining_time": "0:07:52"}
104
+ {"current_steps": 104, "total_steps": 159, "loss": 0.8184278011322021, "lr": 3.0987641891623315e-07, "epoch": 1.9622641509433962, "percentage": 65.41, "elapsed_time": "0:14:36", "remaining_time": "0:07:43"}
105
+ {"current_steps": 105, "total_steps": 159, "loss": 0.8299746513366699, "lr": 3.0016902550745895e-07, "epoch": 1.9811320754716981, "percentage": 66.04, "elapsed_time": "0:14:45", "remaining_time": "0:07:35"}
106
+ {"current_steps": 106, "total_steps": 159, "loss": 0.785747766494751, "lr": 2.9055046501619083e-07, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "0:14:53", "remaining_time": "0:07:26"}
107
+ {"current_steps": 107, "total_steps": 159, "loss": 0.7670397758483887, "lr": 2.810250132800103e-07, "epoch": 2.018867924528302, "percentage": 67.3, "elapsed_time": "0:15:03", "remaining_time": "0:07:18"}
108
+ {"current_steps": 108, "total_steps": 159, "loss": 0.7878092527389526, "lr": 2.715969047459066e-07, "epoch": 2.0377358490566038, "percentage": 67.92, "elapsed_time": "0:15:11", "remaining_time": "0:07:10"}
109
+ {"current_steps": 109, "total_steps": 159, "loss": 0.7904379367828369, "lr": 2.6227033058789403e-07, "epoch": 2.056603773584906, "percentage": 68.55, "elapsed_time": "0:15:19", "remaining_time": "0:07:01"}
110
+ {"current_steps": 110, "total_steps": 159, "loss": 0.8011707067489624, "lr": 2.5304943684386825e-07, "epoch": 2.0754716981132075, "percentage": 69.18, "elapsed_time": "0:15:28", "remaining_time": "0:06:53"}
111
+ {"current_steps": 111, "total_steps": 159, "loss": 0.7658779621124268, "lr": 2.439383225725225e-07, "epoch": 2.0943396226415096, "percentage": 69.81, "elapsed_time": "0:15:37", "remaining_time": "0:06:45"}
112
+ {"current_steps": 112, "total_steps": 159, "loss": 0.7720337510108948, "lr": 2.3494103803115318e-07, "epoch": 2.1132075471698113, "percentage": 70.44, "elapsed_time": "0:15:45", "remaining_time": "0:06:36"}
113
+ {"current_steps": 113, "total_steps": 159, "loss": 0.7842212915420532, "lr": 2.2606158287515658e-07, "epoch": 2.1320754716981134, "percentage": 71.07, "elapsed_time": "0:15:54", "remaining_time": "0:06:28"}
114
+ {"current_steps": 114, "total_steps": 159, "loss": 0.7690730094909668, "lr": 2.1730390438002056e-07, "epoch": 2.150943396226415, "percentage": 71.7, "elapsed_time": "0:16:03", "remaining_time": "0:06:20"}
115
+ {"current_steps": 115, "total_steps": 159, "loss": 0.7737655639648438, "lr": 2.0867189568660236e-07, "epoch": 2.169811320754717, "percentage": 72.33, "elapsed_time": "0:16:12", "remaining_time": "0:06:12"}
116
+ {"current_steps": 116, "total_steps": 159, "loss": 0.7852470278739929, "lr": 2.0016939407046986e-07, "epoch": 2.188679245283019, "percentage": 72.96, "elapsed_time": "0:16:20", "remaining_time": "0:06:03"}
117
+ {"current_steps": 117, "total_steps": 159, "loss": 0.7893455624580383, "lr": 1.9180017923607883e-07, "epoch": 2.207547169811321, "percentage": 73.58, "elapsed_time": "0:16:28", "remaining_time": "0:05:54"}
118
+ {"current_steps": 118, "total_steps": 159, "loss": 0.7634609937667847, "lr": 1.835679716365417e-07, "epoch": 2.2264150943396226, "percentage": 74.21, "elapsed_time": "0:16:36", "remaining_time": "0:05:46"}
119
+ {"current_steps": 119, "total_steps": 159, "loss": 0.7859703898429871, "lr": 1.7547643081973578e-07, "epoch": 2.2452830188679247, "percentage": 74.84, "elapsed_time": "0:16:45", "remaining_time": "0:05:37"}
120
+ {"current_steps": 120, "total_steps": 159, "loss": 0.7709099650382996, "lr": 1.6752915380148768e-07, "epoch": 2.2641509433962264, "percentage": 75.47, "elapsed_time": "0:16:53", "remaining_time": "0:05:29"}
121
+ {"current_steps": 121, "total_steps": 159, "loss": 0.7789061069488525, "lr": 1.5972967346655448e-07, "epoch": 2.2830188679245285, "percentage": 76.1, "elapsed_time": "0:17:01", "remaining_time": "0:05:20"}
122
+ {"current_steps": 122, "total_steps": 159, "loss": 0.7862054705619812, "lr": 1.5208145699811415e-07, "epoch": 2.30188679245283, "percentage": 76.73, "elapsed_time": "0:17:09", "remaining_time": "0:05:12"}
123
+ {"current_steps": 123, "total_steps": 159, "loss": 0.7816888689994812, "lr": 1.4458790433646263e-07, "epoch": 2.3207547169811322, "percentage": 77.36, "elapsed_time": "0:17:17", "remaining_time": "0:05:03"}
124
+ {"current_steps": 124, "total_steps": 159, "loss": 0.7391059398651123, "lr": 1.3725234666760427e-07, "epoch": 2.339622641509434, "percentage": 77.99, "elapsed_time": "0:17:25", "remaining_time": "0:04:55"}
125
+ {"current_steps": 125, "total_steps": 159, "loss": 0.7627633810043335, "lr": 1.3007804494240476e-07, "epoch": 2.358490566037736, "percentage": 78.62, "elapsed_time": "0:17:33", "remaining_time": "0:04:46"}
126
+ {"current_steps": 126, "total_steps": 159, "loss": 0.7769066095352173, "lr": 1.2306818842696715e-07, "epoch": 2.3773584905660377, "percentage": 79.25, "elapsed_time": "0:17:42", "remaining_time": "0:04:38"}
127
+ {"current_steps": 127, "total_steps": 159, "loss": 0.7934216856956482, "lr": 1.1622589328487503e-07, "epoch": 2.3962264150943398, "percentage": 79.87, "elapsed_time": "0:17:51", "remaining_time": "0:04:29"}
128
+ {"current_steps": 128, "total_steps": 159, "loss": 0.7673547863960266, "lr": 1.0955420119193198e-07, "epoch": 2.4150943396226414, "percentage": 80.5, "elapsed_time": "0:17:59", "remaining_time": "0:04:21"}
129
+ {"current_steps": 129, "total_steps": 159, "loss": 0.7849991917610168, "lr": 1.03056077984014e-07, "epoch": 2.4339622641509435, "percentage": 81.13, "elapsed_time": "0:18:08", "remaining_time": "0:04:13"}
130
+ {"current_steps": 130, "total_steps": 159, "loss": 0.7473263740539551, "lr": 9.673441233863661e-08, "epoch": 2.452830188679245, "percentage": 81.76, "elapsed_time": "0:18:16", "remaining_time": "0:04:04"}
131
+ {"current_steps": 131, "total_steps": 159, "loss": 0.784021258354187, "lr": 9.059201449082043e-08, "epoch": 2.4716981132075473, "percentage": 82.39, "elapsed_time": "0:18:24", "remaining_time": "0:03:56"}
132
+ {"current_steps": 132, "total_steps": 159, "loss": 0.7882828712463379, "lr": 8.463161498382949e-08, "epoch": 2.490566037735849, "percentage": 83.02, "elapsed_time": "0:18:32", "remaining_time": "0:03:47"}
133
+ {"current_steps": 133, "total_steps": 159, "loss": 0.7572199702262878, "lr": 7.885586345533396e-08, "epoch": 2.509433962264151, "percentage": 83.65, "elapsed_time": "0:18:40", "remaining_time": "0:03:39"}
134
+ {"current_steps": 134, "total_steps": 159, "loss": 0.7826784253120422, "lr": 7.326732745954e-08, "epoch": 2.5283018867924527, "percentage": 84.28, "elapsed_time": "0:18:49", "remaining_time": "0:03:30"}
135
+ {"current_steps": 135, "total_steps": 159, "loss": 0.7726486325263977, "lr": 6.786849132580841e-08, "epoch": 2.547169811320755, "percentage": 84.91, "elapsed_time": "0:18:57", "remaining_time": "0:03:22"}
136
+ {"current_steps": 136, "total_steps": 159, "loss": 0.7736940383911133, "lr": 6.266175505426957e-08, "epoch": 2.5660377358490565, "percentage": 85.53, "elapsed_time": "0:19:06", "remaining_time": "0:03:13"}
137
+ {"current_steps": 137, "total_steps": 159, "loss": 0.7888213396072388, "lr": 5.7649433248927794e-08, "epoch": 2.5849056603773586, "percentage": 86.16, "elapsed_time": "0:19:15", "remaining_time": "0:03:05"}
138
+ {"current_steps": 138, "total_steps": 159, "loss": 0.7611340284347534, "lr": 5.283375408872537e-08, "epoch": 2.6037735849056602, "percentage": 86.79, "elapsed_time": "0:19:23", "remaining_time": "0:02:57"}
139
+ {"current_steps": 139, "total_steps": 159, "loss": 0.779454231262207, "lr": 4.821685833702849e-08, "epoch": 2.6226415094339623, "percentage": 87.42, "elapsed_time": "0:19:32", "remaining_time": "0:02:48"}
140
+ {"current_steps": 140, "total_steps": 159, "loss": 0.769560694694519, "lr": 4.3800798389970863e-08, "epoch": 2.641509433962264, "percentage": 88.05, "elapsed_time": "0:19:40", "remaining_time": "0:02:40"}
141
+ {"current_steps": 141, "total_steps": 159, "loss": 0.7890896797180176, "lr": 3.958753736408105e-08, "epoch": 2.660377358490566, "percentage": 88.68, "elapsed_time": "0:19:48", "remaining_time": "0:02:31"}
142
+ {"current_steps": 142, "total_steps": 159, "loss": 0.7476776838302612, "lr": 3.557894822359864e-08, "epoch": 2.6792452830188678, "percentage": 89.31, "elapsed_time": "0:19:56", "remaining_time": "0:02:23"}
143
+ {"current_steps": 143, "total_steps": 159, "loss": 0.7551087737083435, "lr": 3.1776812947865384e-08, "epoch": 2.69811320754717, "percentage": 89.94, "elapsed_time": "0:20:04", "remaining_time": "0:02:14"}
144
+ {"current_steps": 144, "total_steps": 159, "loss": 0.7675119638442993, "lr": 2.818282173916453e-08, "epoch": 2.7169811320754715, "percentage": 90.57, "elapsed_time": "0:20:12", "remaining_time": "0:02:06"}
145
+ {"current_steps": 145, "total_steps": 159, "loss": 0.7670686841011047, "lr": 2.4798572271356843e-08, "epoch": 2.7358490566037736, "percentage": 91.19, "elapsed_time": "0:20:20", "remaining_time": "0:01:57"}
146
+ {"current_steps": 146, "total_steps": 159, "loss": 0.7993500828742981, "lr": 2.162556897965101e-08, "epoch": 2.7547169811320753, "percentage": 91.82, "elapsed_time": "0:20:28", "remaining_time": "0:01:49"}
147
+ {"current_steps": 147, "total_steps": 159, "loss": 0.7754116654396057, "lr": 1.8665222391821166e-08, "epoch": 2.7735849056603774, "percentage": 92.45, "elapsed_time": "0:20:37", "remaining_time": "0:01:40"}
148
+ {"current_steps": 148, "total_steps": 159, "loss": 0.7710179090499878, "lr": 1.5918848501170644e-08, "epoch": 2.7924528301886795, "percentage": 93.08, "elapsed_time": "0:20:45", "remaining_time": "0:01:32"}
149
+ {"current_steps": 149, "total_steps": 159, "loss": 0.7384580969810486, "lr": 1.3387668181519818e-08, "epoch": 2.811320754716981, "percentage": 93.71, "elapsed_time": "0:20:53", "remaining_time": "0:01:24"}
150
+ {"current_steps": 150, "total_steps": 159, "loss": 0.7740883827209473, "lr": 1.1072806644478738e-08, "epoch": 2.830188679245283, "percentage": 94.34, "elapsed_time": "0:21:01", "remaining_time": "0:01:15"}
151
+ {"current_steps": 151, "total_steps": 159, "loss": 0.7919697165489197, "lr": 8.975292939244927e-09, "epoch": 2.849056603773585, "percentage": 94.97, "elapsed_time": "0:21:10", "remaining_time": "0:01:07"}
152
+ {"current_steps": 152, "total_steps": 159, "loss": 0.781722903251648, "lr": 7.096059495149853e-09, "epoch": 2.867924528301887, "percentage": 95.6, "elapsed_time": "0:21:18", "remaining_time": "0:00:58"}
153
+ {"current_steps": 153, "total_steps": 159, "loss": 0.7471998929977417, "lr": 5.435941707156388e-09, "epoch": 2.8867924528301887, "percentage": 96.23, "elapsed_time": "0:21:26", "remaining_time": "0:00:50"}
154
+ {"current_steps": 154, "total_steps": 159, "loss": 0.7751771807670593, "lr": 3.995677564492039e-09, "epoch": 2.9056603773584904, "percentage": 96.86, "elapsed_time": "0:21:34", "remaining_time": "0:00:42"}
155
+ {"current_steps": 155, "total_steps": 159, "loss": 0.7668254375457764, "lr": 2.7759073225832597e-09, "epoch": 2.9245283018867925, "percentage": 97.48, "elapsed_time": "0:21:42", "remaining_time": "0:00:33"}
156
+ {"current_steps": 156, "total_steps": 159, "loss": 0.7961957454681396, "lr": 1.7771732184357901e-09, "epoch": 2.9433962264150946, "percentage": 98.11, "elapsed_time": "0:21:51", "remaining_time": "0:00:25"}
157
+ {"current_steps": 157, "total_steps": 159, "loss": 0.7848834991455078, "lr": 9.999192295886971e-10, "epoch": 2.9622641509433962, "percentage": 98.74, "elapsed_time": "0:22:00", "remaining_time": "0:00:16"}
158
+ {"current_steps": 158, "total_steps": 159, "loss": 0.777495801448822, "lr": 4.4449087674847117e-10, "epoch": 2.981132075471698, "percentage": 99.37, "elapsed_time": "0:22:08", "remaining_time": "0:00:08"}
159
+ {"current_steps": 159, "total_steps": 159, "loss": 0.7618961334228516, "lr": 1.1113507019094858e-10, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "0:22:16", "remaining_time": "0:00:00"}
160
+ {"current_steps": 159, "total_steps": 159, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "0:23:58", "remaining_time": "0:00:00"}
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cfe8e2eba96acad28d240c66abc8b2f184174e223eeb1e85b5997b07bbed767
3
+ size 7505