aamrinder commited on
Commit
6659ab7
·
verified ·
1 Parent(s): de41727

GRPO Run #1 (200 steps, lr=5e-06)

Browse files
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
  base_model: Qwen/Qwen2.5-3B-Instruct
3
  library_name: peft
4
- model_name: run2
5
  tags:
6
  - base_model:adapter:Qwen/Qwen2.5-3B-Instruct
7
  - grpo
@@ -12,7 +12,7 @@ licence: license
12
  pipeline_tag: text-generation
13
  ---
14
 
15
- # Model Card for run2
16
 
17
  This model is a fine-tuned version of [Qwen/Qwen2.5-3B-Instruct](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct).
18
  It has been trained using [TRL](https://github.com/huggingface/trl).
 
1
  ---
2
  base_model: Qwen/Qwen2.5-3B-Instruct
3
  library_name: peft
4
+ model_name: run3
5
  tags:
6
  - base_model:adapter:Qwen/Qwen2.5-3B-Instruct
7
  - grpo
 
12
  pipeline_tag: text-generation
13
  ---
14
 
15
+ # Model Card for run3
16
 
17
  This model is a fine-tuned version of [Qwen/Qwen2.5-3B-Instruct](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct).
18
  It has been trained using [TRL](https://github.com/huggingface/trl).
adapter_config.json CHANGED
@@ -31,9 +31,9 @@
31
  "revision": null,
32
  "target_modules": [
33
  "v_proj",
34
- "k_proj",
35
  "q_proj",
36
- "o_proj"
37
  ],
38
  "target_parameters": null,
39
  "task_type": "CAUSAL_LM",
 
31
  "revision": null,
32
  "target_modules": [
33
  "v_proj",
34
+ "o_proj",
35
  "q_proj",
36
+ "k_proj"
37
  ],
38
  "target_parameters": null,
39
  "task_type": "CAUSAL_LM",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e57649982aa01015cf35d9f3c87afe6d1ad025774c76e0faea80a5691265c1d5
3
  size 14784224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c117e0ba6dc16bb6b2e286f2778318318918ec60c94360defe9b74ce103861cd
3
  size 14784224
checkpoint-100/adapter_config.json CHANGED
@@ -31,9 +31,9 @@
31
  "revision": null,
32
  "target_modules": [
33
  "v_proj",
34
- "k_proj",
35
  "q_proj",
36
- "o_proj"
37
  ],
38
  "target_parameters": null,
39
  "task_type": "CAUSAL_LM",
 
31
  "revision": null,
32
  "target_modules": [
33
  "v_proj",
34
+ "o_proj",
35
  "q_proj",
36
+ "k_proj"
37
  ],
38
  "target_parameters": null,
39
  "task_type": "CAUSAL_LM",
checkpoint-100/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55ce831210c0aa34712d214f6239605162da23b64e18ede55feb5cbf3b6d7f97
3
  size 14784224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5379a34f791eff5568dc2fd45fee9ebbc91a30ca834361f301b29806ce201241
3
  size 14784224
checkpoint-100/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9763e713d8d8e98edb42d26cb9e5559c243adb62ef1a263ec6714eac22787d6
3
  size 29737291
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a5622c42c1f95c24babea1941981c17b832465eb0cb8791005c769b195f8cc8
3
  size 29737291
checkpoint-100/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2af1925de5e9b910d3f8f0d916341a7f40ee7ef8b08e18e6078008971f18374d
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:613b5ffd4575421732dc12bf0b60e8a55a83f070bff0afc543b8bdc04e248c98
3
  size 14645
checkpoint-100/trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
checkpoint-100/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:331ab9199b90ff1c78e3f0af98bbb7a6f8d8f8bcbda9987dd482a97cbf14f25e
3
  size 7185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d9d11428bb3243f52d290c02a99fbea6c72e434c81e27053295d8fa2c9caa21
3
  size 7185
checkpoint-150/adapter_config.json CHANGED
@@ -31,9 +31,9 @@
31
  "revision": null,
32
  "target_modules": [
33
  "v_proj",
34
- "k_proj",
35
  "q_proj",
36
- "o_proj"
37
  ],
38
  "target_parameters": null,
39
  "task_type": "CAUSAL_LM",
 
31
  "revision": null,
32
  "target_modules": [
33
  "v_proj",
34
+ "o_proj",
35
  "q_proj",
36
+ "k_proj"
37
  ],
38
  "target_parameters": null,
39
  "task_type": "CAUSAL_LM",
checkpoint-150/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46d2529791e023211f24c0b47cfc73bdfc79f9047c07d18ea239201fb28eeaac
3
  size 14784224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98373457597644d6646821e70767a089a5b7a07bdbd77835b8d4bb1b0d64d13e
3
  size 14784224
checkpoint-150/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e6fed1e2681f4d5472f2996fb28c0d586dcc4a2672e0c7fa11528f735584429
3
  size 29737291
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c6f6a711bbc852a0902c3d3a93f2835db117d00f6541593e0c0b07649990c0c
3
  size 29737291
checkpoint-150/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44f7127e12cd1b95c5cc31fc49fe5b335a41c76a606d4d1b96afe024c4361025
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d487d8321e4cd005fa1f4313d9a1103dbe0b413b76c978e17cd62940b61fc7d
3
  size 14645
checkpoint-150/trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
checkpoint-150/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:331ab9199b90ff1c78e3f0af98bbb7a6f8d8f8bcbda9987dd482a97cbf14f25e
3
  size 7185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d9d11428bb3243f52d290c02a99fbea6c72e434c81e27053295d8fa2c9caa21
3
  size 7185
checkpoint-200/adapter_config.json CHANGED
@@ -31,9 +31,9 @@
31
  "revision": null,
32
  "target_modules": [
33
  "v_proj",
34
- "k_proj",
35
  "q_proj",
36
- "o_proj"
37
  ],
38
  "target_parameters": null,
39
  "task_type": "CAUSAL_LM",
 
31
  "revision": null,
32
  "target_modules": [
33
  "v_proj",
34
+ "o_proj",
35
  "q_proj",
36
+ "k_proj"
37
  ],
38
  "target_parameters": null,
39
  "task_type": "CAUSAL_LM",
checkpoint-200/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e57649982aa01015cf35d9f3c87afe6d1ad025774c76e0faea80a5691265c1d5
3
  size 14784224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c117e0ba6dc16bb6b2e286f2778318318918ec60c94360defe9b74ce103861cd
3
  size 14784224
checkpoint-200/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa8a76a7edac5bd76bfcb69f8c7aac5442916a91c97ceee656a52a21ed590b12
3
  size 29737291
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68645218ccb4c906bc5021c524e49c36169fe6bcf9b979d846a25281d0b0f80f
3
  size 29737291
checkpoint-200/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b08ba37950368db53de7f721c55a250650f1e295b94406a01aa12dd11bdc5a5
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66d3b9b6cfaeaad6282c340c3066c6a458a1774ade80bfc05af7de33a3835f10
3
  size 14645
checkpoint-200/trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
checkpoint-200/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:331ab9199b90ff1c78e3f0af98bbb7a6f8d8f8bcbda9987dd482a97cbf14f25e
3
  size 7185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d9d11428bb3243f52d290c02a99fbea6c72e434c81e27053295d8fa2c9caa21
3
  size 7185
checkpoint-50/adapter_config.json CHANGED
@@ -31,9 +31,9 @@
31
  "revision": null,
32
  "target_modules": [
33
  "v_proj",
34
- "k_proj",
35
  "q_proj",
36
- "o_proj"
37
  ],
38
  "target_parameters": null,
39
  "task_type": "CAUSAL_LM",
 
31
  "revision": null,
32
  "target_modules": [
33
  "v_proj",
34
+ "o_proj",
35
  "q_proj",
36
+ "k_proj"
37
  ],
38
  "target_parameters": null,
39
  "task_type": "CAUSAL_LM",
checkpoint-50/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7eea23e7f03f7e76bea4cdd6325cd24ac708a70eee4617074ec6420c22791f6
3
  size 14784224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec7ce7188648f1898f0ac161f81b3d34343438b570573462624aa05cfe227e85
3
  size 14784224
checkpoint-50/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51a5abe7d40777594e781995c7f41096ca04fac545caed78f42c866906bcaf59
3
  size 29737291
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd8ea3c3435a473f36a21528fbe23d2037879d5eca62c8fe69da65ade7e1fcc9
3
  size 29737291
checkpoint-50/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9dd0cb48eaf6a4fe09ab7fd6e0d90664a93055b548599c58cfe577047fa7ab1
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b451b7ce825be93237fda7acd0eee55533ce9126e879d23535595652220112a
3
  size 14645
checkpoint-50/trainer_state.json CHANGED
@@ -16,25 +16,25 @@
16
  "clip_ratio/low_min": 0.0,
17
  "clip_ratio/region_mean": 0.0,
18
  "completions/clipped_ratio": 0.0,
19
- "completions/max_length": 165.0,
20
- "completions/max_terminated_length": 165.0,
21
- "completions/mean_length": 153.0,
22
- "completions/mean_terminated_length": 153.0,
23
- "completions/min_length": 138.0,
24
- "completions/min_terminated_length": 138.0,
25
- "entropy": 1.2105660438537598,
26
  "epoch": 0.0016666666666666668,
27
- "frac_reward_zero_std": 0.0,
28
- "grad_norm": 0.31640625,
29
  "learning_rate": 5e-06,
30
- "loss": -0.022634992375969887,
31
- "num_tokens": 2704.0,
32
- "reward": 0.6499999761581421,
33
- "reward_std": 0.36373066902160645,
34
- "rewards/reward_fn/mean": 0.6499999761581421,
35
- "rewards/reward_fn/std": 0.36373066902160645,
36
  "step": 1,
37
- "step_time": 22.95876925799996
38
  },
39
  {
40
  "clip_ratio/high_max": 0.0,
@@ -43,25 +43,25 @@
43
  "clip_ratio/low_min": 0.0,
44
  "clip_ratio/region_mean": 0.0,
45
  "completions/clipped_ratio": 0.0,
46
- "completions/max_length": 204.0,
47
- "completions/max_terminated_length": 204.0,
48
- "completions/mean_length": 138.0,
49
- "completions/mean_terminated_length": 138.0,
50
- "completions/min_length": 109.0,
51
- "completions/min_terminated_length": 109.0,
52
- "entropy": 1.2356818914413452,
53
  "epoch": 0.0033333333333333335,
54
  "frac_reward_zero_std": 0.0,
55
- "grad_norm": 0.365234375,
56
  "learning_rate": 4.975000000000001e-06,
57
- "loss": 0.23890334367752075,
58
- "num_tokens": 4868.0,
59
- "reward": 0.9125000238418579,
60
- "reward_std": 0.10499998182058334,
61
- "rewards/reward_fn/mean": 0.9125000238418579,
62
- "rewards/reward_fn/std": 0.10499998927116394,
63
  "step": 2,
64
- "step_time": 26.749743518999935
65
  },
66
  {
67
  "clip_ratio/high_max": 0.0,
@@ -70,25 +70,25 @@
70
  "clip_ratio/low_min": 0.0,
71
  "clip_ratio/region_mean": 0.0,
72
  "completions/clipped_ratio": 0.0,
73
- "completions/max_length": 141.0,
74
- "completions/max_terminated_length": 141.0,
75
- "completions/mean_length": 125.25,
76
- "completions/mean_terminated_length": 125.25,
77
- "completions/min_length": 97.0,
78
- "completions/min_terminated_length": 97.0,
79
- "entropy": 1.285774827003479,
80
  "epoch": 0.005,
81
- "frac_reward_zero_std": 1.0,
82
- "grad_norm": 0.0,
83
  "learning_rate": 4.95e-06,
84
- "loss": 0.0,
85
- "num_tokens": 8117.0,
86
- "reward": 0.9649999737739563,
87
- "reward_std": 0.0,
88
- "rewards/reward_fn/mean": 0.9649999737739563,
89
- "rewards/reward_fn/std": 0.0,
90
  "step": 3,
91
- "step_time": 19.971433689999913
92
  },
93
  {
94
  "clip_ratio/high_max": 0.0,
@@ -97,25 +97,25 @@
97
  "clip_ratio/low_min": 0.0,
98
  "clip_ratio/region_mean": 0.0,
99
  "completions/clipped_ratio": 0.0,
100
- "completions/max_length": 151.0,
101
- "completions/max_terminated_length": 151.0,
102
- "completions/mean_length": 134.25,
103
- "completions/mean_terminated_length": 134.25,
104
- "completions/min_length": 121.0,
105
- "completions/min_terminated_length": 121.0,
106
- "entropy": 1.2906893491744995,
107
  "epoch": 0.006666666666666667,
108
  "frac_reward_zero_std": 0.0,
109
- "grad_norm": 0.359375,
110
  "learning_rate": 4.925e-06,
111
- "loss": -0.028853099793195724,
112
- "num_tokens": 10938.0,
113
- "reward": 0.4662500023841858,
114
- "reward_std": 0.26249998807907104,
115
- "rewards/reward_fn/mean": 0.4662500023841858,
116
- "rewards/reward_fn/std": 0.26250001788139343,
117
  "step": 4,
118
- "step_time": 20.774990057999958
119
  },
120
  {
121
  "clip_ratio/high_max": 0.0,
@@ -124,25 +124,25 @@
124
  "clip_ratio/low_min": 0.0,
125
  "clip_ratio/region_mean": 0.0,
126
  "completions/clipped_ratio": 0.0,
127
- "completions/max_length": 173.0,
128
- "completions/max_terminated_length": 173.0,
129
- "completions/mean_length": 138.25,
130
- "completions/mean_terminated_length": 138.25,
131
- "completions/min_length": 90.0,
132
- "completions/min_terminated_length": 90.0,
133
- "entropy": 1.1731828451156616,
134
  "epoch": 0.008333333333333333,
135
- "frac_reward_zero_std": 1.0,
136
- "grad_norm": 0.0,
137
  "learning_rate": 4.9000000000000005e-06,
138
- "loss": 0.0,
139
- "num_tokens": 14179.0,
140
- "reward": 0.9649999737739563,
141
- "reward_std": 0.0,
142
- "rewards/reward_fn/mean": 0.9649999737739563,
143
- "rewards/reward_fn/std": 0.0,
144
  "step": 5,
145
- "step_time": 23.97440681699993
146
  },
147
  {
148
  "clip_ratio/high_max": 0.0,
@@ -151,25 +151,25 @@
151
  "clip_ratio/low_min": 0.0,
152
  "clip_ratio/region_mean": 0.0,
153
  "completions/clipped_ratio": 0.0,
154
- "completions/max_length": 173.0,
155
- "completions/max_terminated_length": 173.0,
156
- "completions/mean_length": 160.5,
157
- "completions/mean_terminated_length": 160.5,
158
- "completions/min_length": 146.0,
159
- "completions/min_terminated_length": 146.0,
160
- "entropy": 1.1732373237609863,
161
  "epoch": 0.01,
162
  "frac_reward_zero_std": 0.0,
163
- "grad_norm": 0.23828125,
164
  "learning_rate": 4.875e-06,
165
- "loss": -0.0015572253614664078,
166
- "num_tokens": 16449.0,
167
- "reward": 0.7899999618530273,
168
- "reward_std": 0.2800000011920929,
169
- "rewards/reward_fn/mean": 0.7899999618530273,
170
- "rewards/reward_fn/std": 0.2800000011920929,
171
  "step": 6,
172
- "step_time": 22.702045793000025
173
  },
174
  {
175
  "clip_ratio/high_max": 0.0,
@@ -178,25 +178,25 @@
178
  "clip_ratio/low_min": 0.0,
179
  "clip_ratio/region_mean": 0.0,
180
  "completions/clipped_ratio": 0.0,
181
- "completions/max_length": 195.0,
182
- "completions/max_terminated_length": 195.0,
183
- "completions/mean_length": 150.25,
184
- "completions/mean_terminated_length": 150.25,
185
- "completions/min_length": 113.0,
186
- "completions/min_terminated_length": 113.0,
187
- "entropy": 1.0819625854492188,
188
  "epoch": 0.011666666666666667,
189
- "frac_reward_zero_std": 1.0,
190
- "grad_norm": 0.0,
191
  "learning_rate": 4.85e-06,
192
- "loss": 0.0,
193
- "num_tokens": 19550.0,
194
- "reward": 0.33500000834465027,
195
- "reward_std": 0.0,
196
- "rewards/reward_fn/mean": 0.33500000834465027,
197
- "rewards/reward_fn/std": 0.0,
198
  "step": 7,
199
- "step_time": 26.63143956300007
200
  },
201
  {
202
  "clip_ratio/high_max": 0.0,
@@ -205,25 +205,25 @@
205
  "clip_ratio/low_min": 0.0,
206
  "clip_ratio/region_mean": 0.0,
207
  "completions/clipped_ratio": 0.0,
208
- "completions/max_length": 160.0,
209
- "completions/max_terminated_length": 160.0,
210
- "completions/mean_length": 136.75,
211
- "completions/mean_terminated_length": 136.75,
212
- "completions/min_length": 123.0,
213
- "completions/min_terminated_length": 123.0,
214
- "entropy": 1.0653905868530273,
215
  "epoch": 0.013333333333333334,
216
  "frac_reward_zero_std": 0.0,
217
- "grad_norm": 0.322265625,
218
  "learning_rate": 4.825e-06,
219
- "loss": 0.0333920456469059,
220
- "num_tokens": 22149.0,
221
- "reward": 0.6587499976158142,
222
- "reward_std": 0.35391557216644287,
223
- "rewards/reward_fn/mean": 0.6587499976158142,
224
- "rewards/reward_fn/std": 0.35391557216644287,
225
  "step": 8,
226
- "step_time": 21.686032391000026
227
  },
228
  {
229
  "clip_ratio/high_max": 0.0,
@@ -232,25 +232,25 @@
232
  "clip_ratio/low_min": 0.0,
233
  "clip_ratio/region_mean": 0.0,
234
  "completions/clipped_ratio": 0.0,
235
- "completions/max_length": 179.0,
236
- "completions/max_terminated_length": 179.0,
237
- "completions/mean_length": 145.0,
238
- "completions/mean_terminated_length": 145.0,
239
- "completions/min_length": 118.0,
240
- "completions/min_terminated_length": 118.0,
241
- "entropy": 1.2054963111877441,
242
  "epoch": 0.015,
243
- "frac_reward_zero_std": 0.0,
244
- "grad_norm": 0.29296875,
245
  "learning_rate": 4.800000000000001e-06,
246
- "loss": 0.05355554819107056,
247
- "num_tokens": 25225.0,
248
- "reward": 0.4846250116825104,
249
- "reward_std": 0.29815638065338135,
250
- "rewards/reward_fn/mean": 0.4846250116825104,
251
- "rewards/reward_fn/std": 0.29815641045570374,
252
  "step": 9,
253
- "step_time": 24.58158876100015
254
  },
255
  {
256
  "clip_ratio/high_max": 0.0,
@@ -259,25 +259,25 @@
259
  "clip_ratio/low_min": 0.0,
260
  "clip_ratio/region_mean": 0.0,
261
  "completions/clipped_ratio": 0.0,
262
- "completions/max_length": 165.0,
263
- "completions/max_terminated_length": 165.0,
264
- "completions/mean_length": 145.25,
265
- "completions/mean_terminated_length": 145.25,
266
- "completions/min_length": 123.0,
267
- "completions/min_terminated_length": 123.0,
268
- "entropy": 1.1894657611846924,
269
  "epoch": 0.016666666666666666,
270
  "frac_reward_zero_std": 0.0,
271
- "grad_norm": 0.3515625,
272
  "learning_rate": 4.775e-06,
273
- "loss": -0.026669645681977272,
274
- "num_tokens": 28250.0,
275
- "reward": 0.4925000071525574,
276
- "reward_std": 0.3149999976158142,
277
- "rewards/reward_fn/mean": 0.4925000071525574,
278
- "rewards/reward_fn/std": 0.3149999678134918,
279
  "step": 10,
280
- "step_time": 22.547139097000127
281
  },
282
  {
283
  "clip_ratio/high_max": 0.0,
@@ -286,25 +286,25 @@
286
  "clip_ratio/low_min": 0.0,
287
  "clip_ratio/region_mean": 0.0,
288
  "completions/clipped_ratio": 0.0,
289
- "completions/max_length": 155.0,
290
- "completions/max_terminated_length": 155.0,
291
- "completions/mean_length": 127.25,
292
- "completions/mean_terminated_length": 127.25,
293
- "completions/min_length": 94.0,
294
- "completions/min_terminated_length": 94.0,
295
- "entropy": 1.1050550937652588,
296
  "epoch": 0.018333333333333333,
297
  "frac_reward_zero_std": 0.0,
298
- "grad_norm": 0.33203125,
299
  "learning_rate": 4.75e-06,
300
- "loss": -0.06383034586906433,
301
- "num_tokens": 31011.0,
302
  "reward": 0.8075000047683716,
303
- "reward_std": 0.3149999976158142,
304
  "rewards/reward_fn/mean": 0.8075000047683716,
305
- "rewards/reward_fn/std": 0.3149999976158142,
306
  "step": 11,
307
- "step_time": 21.200924256999997
308
  },
309
  {
310
  "clip_ratio/high_max": 0.0,
@@ -313,25 +313,25 @@
313
  "clip_ratio/low_min": 0.0,
314
  "clip_ratio/region_mean": 0.0,
315
  "completions/clipped_ratio": 0.0,
316
- "completions/max_length": 190.0,
317
- "completions/max_terminated_length": 190.0,
318
- "completions/mean_length": 141.75,
319
- "completions/mean_terminated_length": 141.75,
320
- "completions/min_length": 113.0,
321
- "completions/min_terminated_length": 113.0,
322
- "entropy": 1.3850853443145752,
323
  "epoch": 0.02,
324
  "frac_reward_zero_std": 0.0,
325
- "grad_norm": 0.341796875,
326
  "learning_rate": 4.7250000000000005e-06,
327
- "loss": -0.17013315856456757,
328
- "num_tokens": 34138.0,
329
- "reward": 0.4750000238418579,
330
- "reward_std": 0.2800000011920929,
331
- "rewards/reward_fn/mean": 0.4750000238418579,
332
- "rewards/reward_fn/std": 0.2800000011920929,
333
  "step": 12,
334
- "step_time": 26.068631433999826
335
  },
336
  {
337
  "clip_ratio/high_max": 0.0,
@@ -340,25 +340,25 @@
340
  "clip_ratio/low_min": 0.0,
341
  "clip_ratio/region_mean": 0.0,
342
  "completions/clipped_ratio": 0.0,
343
- "completions/max_length": 148.0,
344
- "completions/max_terminated_length": 148.0,
345
- "completions/mean_length": 132.75,
346
- "completions/mean_terminated_length": 132.75,
347
- "completions/min_length": 112.0,
348
- "completions/min_terminated_length": 112.0,
349
- "entropy": 1.2321714162826538,
350
  "epoch": 0.021666666666666667,
351
  "frac_reward_zero_std": 0.0,
352
- "grad_norm": 0.328125,
353
  "learning_rate": 4.7e-06,
354
- "loss": -0.04280044510960579,
355
- "num_tokens": 37225.0,
356
- "reward": 0.4881249964237213,
357
- "reward_std": 0.295390784740448,
358
- "rewards/reward_fn/mean": 0.4881249964237213,
359
- "rewards/reward_fn/std": 0.2953908145427704,
360
  "step": 13,
361
- "step_time": 20.749948574999962
362
  },
363
  {
364
  "clip_ratio/high_max": 0.0,
@@ -367,25 +367,25 @@
367
  "clip_ratio/low_min": 0.0,
368
  "clip_ratio/region_mean": 0.0,
369
  "completions/clipped_ratio": 0.0,
370
- "completions/max_length": 143.0,
371
- "completions/max_terminated_length": 143.0,
372
- "completions/mean_length": 119.5,
373
- "completions/mean_terminated_length": 119.5,
374
- "completions/min_length": 101.0,
375
- "completions/min_terminated_length": 101.0,
376
- "entropy": 1.0852543115615845,
377
  "epoch": 0.023333333333333334,
378
  "frac_reward_zero_std": 0.0,
379
- "grad_norm": 0.314453125,
380
  "learning_rate": 4.675000000000001e-06,
381
- "loss": 0.0760100781917572,
382
- "num_tokens": 40239.0,
383
- "reward": 0.5012500286102295,
384
- "reward_std": 0.3096065819263458,
385
- "rewards/reward_fn/mean": 0.5012500286102295,
386
- "rewards/reward_fn/std": 0.3096065819263458,
387
  "step": 14,
388
- "step_time": 20.193131967
389
  },
390
  {
391
  "clip_ratio/high_max": 0.0,
@@ -394,25 +394,25 @@
394
  "clip_ratio/low_min": 0.0,
395
  "clip_ratio/region_mean": 0.0,
396
  "completions/clipped_ratio": 0.0,
397
- "completions/max_length": 227.0,
398
- "completions/max_terminated_length": 227.0,
399
- "completions/mean_length": 139.5,
400
- "completions/mean_terminated_length": 139.5,
401
- "completions/min_length": 79.0,
402
- "completions/min_terminated_length": 79.0,
403
- "entropy": 1.3950763940811157,
404
  "epoch": 0.025,
405
  "frac_reward_zero_std": 0.0,
406
- "grad_norm": 0.4140625,
407
  "learning_rate": 4.65e-06,
408
- "loss": 0.30745774507522583,
409
- "num_tokens": 43097.0,
410
- "reward": 0.8073749542236328,
411
- "reward_std": 0.29558849334716797,
412
- "rewards/reward_fn/mean": 0.8073749542236328,
413
- "rewards/reward_fn/std": 0.29558849334716797,
414
  "step": 15,
415
- "step_time": 30.199206002999972
416
  },
417
  {
418
  "clip_ratio/high_max": 0.0,
@@ -421,25 +421,25 @@
421
  "clip_ratio/low_min": 0.0,
422
  "clip_ratio/region_mean": 0.0,
423
  "completions/clipped_ratio": 0.0,
424
- "completions/max_length": 167.0,
425
- "completions/max_terminated_length": 167.0,
426
- "completions/mean_length": 136.0,
427
- "completions/mean_terminated_length": 136.0,
428
- "completions/min_length": 115.0,
429
- "completions/min_terminated_length": 115.0,
430
- "entropy": 1.117733120918274,
431
  "epoch": 0.02666666666666667,
432
  "frac_reward_zero_std": 0.0,
433
- "grad_norm": 0.337890625,
434
  "learning_rate": 4.625000000000001e-06,
435
- "loss": -4.2862751570282853e-07,
436
- "num_tokens": 45921.0,
437
- "reward": 0.9649999737739563,
438
- "reward_std": 0.02857736498117447,
439
- "rewards/reward_fn/mean": 0.9649999737739563,
440
- "rewards/reward_fn/std": 0.028577378019690514,
441
  "step": 16,
442
- "step_time": 22.748836753999967
443
  },
444
  {
445
  "clip_ratio/high_max": 0.0,
@@ -448,25 +448,25 @@
448
  "clip_ratio/low_min": 0.0,
449
  "clip_ratio/region_mean": 0.0,
450
  "completions/clipped_ratio": 0.0,
451
- "completions/max_length": 149.0,
452
- "completions/max_terminated_length": 149.0,
453
- "completions/mean_length": 134.75,
454
- "completions/mean_terminated_length": 134.75,
455
  "completions/min_length": 117.0,
456
  "completions/min_terminated_length": 117.0,
457
- "entropy": 1.064285397529602,
458
  "epoch": 0.028333333333333332,
459
- "frac_reward_zero_std": 1.0,
460
- "grad_norm": 0.0,
461
  "learning_rate": 4.600000000000001e-06,
462
- "loss": 0.0,
463
- "num_tokens": 48596.0,
464
- "reward": 0.33500000834465027,
465
- "reward_std": 0.0,
466
- "rewards/reward_fn/mean": 0.33500000834465027,
467
- "rewards/reward_fn/std": 0.0,
468
  "step": 17,
469
- "step_time": 20.287042794999934
470
  },
471
  {
472
  "clip_ratio/high_max": 0.0,
@@ -475,25 +475,25 @@
475
  "clip_ratio/low_min": 0.0,
476
  "clip_ratio/region_mean": 0.0,
477
  "completions/clipped_ratio": 0.0,
478
- "completions/max_length": 141.0,
479
- "completions/max_terminated_length": 141.0,
480
- "completions/mean_length": 111.75,
481
- "completions/mean_terminated_length": 111.75,
482
- "completions/min_length": 86.0,
483
- "completions/min_terminated_length": 86.0,
484
- "entropy": 1.1270049810409546,
485
  "epoch": 0.03,
486
  "frac_reward_zero_std": 0.0,
487
- "grad_norm": 0.4453125,
488
  "learning_rate": 4.575e-06,
489
- "loss": -0.07971261441707611,
490
- "num_tokens": 51319.0,
491
- "reward": 0.6667499542236328,
492
- "reward_std": 0.3443916141986847,
493
- "rewards/reward_fn/mean": 0.6667499542236328,
494
- "rewards/reward_fn/std": 0.3443916141986847,
495
  "step": 18,
496
- "step_time": 19.57149481900001
497
  },
498
  {
499
  "clip_ratio/high_max": 0.0,
@@ -502,25 +502,25 @@
502
  "clip_ratio/low_min": 0.0,
503
  "clip_ratio/region_mean": 0.0,
504
  "completions/clipped_ratio": 0.0,
505
- "completions/max_length": 143.0,
506
- "completions/max_terminated_length": 143.0,
507
- "completions/mean_length": 128.25,
508
- "completions/mean_terminated_length": 128.25,
509
- "completions/min_length": 112.0,
510
- "completions/min_terminated_length": 112.0,
511
- "entropy": 1.1092581748962402,
512
  "epoch": 0.03166666666666667,
513
- "frac_reward_zero_std": 1.0,
514
- "grad_norm": 0.0,
515
  "learning_rate": 4.5500000000000005e-06,
516
- "loss": 0.0,
517
- "num_tokens": 53924.0,
518
- "reward": 0.33500000834465027,
519
- "reward_std": 0.0,
520
- "rewards/reward_fn/mean": 0.33500000834465027,
521
- "rewards/reward_fn/std": 0.0,
522
  "step": 19,
523
- "step_time": 19.61959041399996
524
  },
525
  {
526
  "clip_ratio/high_max": 0.0,
@@ -529,25 +529,25 @@
529
  "clip_ratio/low_min": 0.0,
530
  "clip_ratio/region_mean": 0.0,
531
  "completions/clipped_ratio": 0.0,
532
- "completions/max_length": 153.0,
533
- "completions/max_terminated_length": 153.0,
534
- "completions/mean_length": 137.75,
535
- "completions/mean_terminated_length": 137.75,
536
- "completions/min_length": 113.0,
537
- "completions/min_terminated_length": 113.0,
538
- "entropy": 1.018242359161377,
539
  "epoch": 0.03333333333333333,
540
- "frac_reward_zero_std": 1.0,
541
- "grad_norm": 0.0,
542
  "learning_rate": 4.525000000000001e-06,
543
- "loss": 0.0,
544
- "num_tokens": 56503.0,
545
- "reward": 0.33500000834465027,
546
- "reward_std": 0.0,
547
- "rewards/reward_fn/mean": 0.33500000834465027,
548
- "rewards/reward_fn/std": 0.0,
549
  "step": 20,
550
- "step_time": 20.682466742000088
551
  },
552
  {
553
  "clip_ratio/high_max": 0.0,
@@ -556,25 +556,25 @@
556
  "clip_ratio/low_min": 0.0,
557
  "clip_ratio/region_mean": 0.0,
558
  "completions/clipped_ratio": 0.0,
559
- "completions/max_length": 186.0,
560
- "completions/max_terminated_length": 186.0,
561
- "completions/mean_length": 137.75,
562
- "completions/mean_terminated_length": 137.75,
563
- "completions/min_length": 85.0,
564
- "completions/min_terminated_length": 85.0,
565
- "entropy": 1.0036064386367798,
566
  "epoch": 0.035,
567
  "frac_reward_zero_std": 0.0,
568
- "grad_norm": 0.369140625,
569
  "learning_rate": 4.5e-06,
570
- "loss": -0.1912594735622406,
571
- "num_tokens": 59302.0,
572
- "reward": 0.28962498903274536,
573
- "reward_std": 0.09075000882148743,
574
- "rewards/reward_fn/mean": 0.28962498903274536,
575
- "rewards/reward_fn/std": 0.09075000137090683,
576
  "step": 21,
577
- "step_time": 25.141592648999904
578
  },
579
  {
580
  "clip_ratio/high_max": 0.0,
@@ -583,25 +583,25 @@
583
  "clip_ratio/low_min": 0.0,
584
  "clip_ratio/region_mean": 0.0,
585
  "completions/clipped_ratio": 0.0,
586
- "completions/max_length": 164.0,
587
- "completions/max_terminated_length": 164.0,
588
- "completions/mean_length": 136.5,
589
- "completions/mean_terminated_length": 136.5,
590
- "completions/min_length": 113.0,
591
- "completions/min_terminated_length": 113.0,
592
- "entropy": 1.1484683752059937,
593
  "epoch": 0.03666666666666667,
594
  "frac_reward_zero_std": 0.0,
595
- "grad_norm": 0.30859375,
596
  "learning_rate": 4.475e-06,
597
- "loss": -0.018206359818577766,
598
- "num_tokens": 62156.0,
599
- "reward": 0.8075000047683716,
600
- "reward_std": 0.2921329438686371,
601
- "rewards/reward_fn/mean": 0.8075000047683716,
602
- "rewards/reward_fn/std": 0.2921329438686371,
603
  "step": 22,
604
- "step_time": 22.48889189900001
605
  },
606
  {
607
  "clip_ratio/high_max": 0.0,
@@ -610,25 +610,25 @@
610
  "clip_ratio/low_min": 0.0,
611
  "clip_ratio/region_mean": 0.0,
612
  "completions/clipped_ratio": 0.0,
613
- "completions/max_length": 175.0,
614
- "completions/max_terminated_length": 175.0,
615
- "completions/mean_length": 156.0,
616
- "completions/mean_terminated_length": 156.0,
617
- "completions/min_length": 102.0,
618
- "completions/min_terminated_length": 102.0,
619
- "entropy": 1.273712396621704,
620
  "epoch": 0.03833333333333333,
621
  "frac_reward_zero_std": 0.0,
622
- "grad_norm": 0.35546875,
623
  "learning_rate": 4.450000000000001e-06,
624
- "loss": -0.06051231175661087,
625
- "num_tokens": 65228.0,
626
- "reward": 0.9728749990463257,
627
- "reward_std": 0.015750011429190636,
628
- "rewards/reward_fn/mean": 0.9728749990463257,
629
- "rewards/reward_fn/std": 0.015750020742416382,
630
  "step": 23,
631
- "step_time": 24.07554538599993
632
  },
633
  {
634
  "clip_ratio/high_max": 0.0,
@@ -637,25 +637,25 @@
637
  "clip_ratio/low_min": 0.0,
638
  "clip_ratio/region_mean": 0.0,
639
  "completions/clipped_ratio": 0.0,
640
- "completions/max_length": 218.0,
641
- "completions/max_terminated_length": 218.0,
642
- "completions/mean_length": 162.75,
643
- "completions/mean_terminated_length": 162.75,
644
- "completions/min_length": 118.0,
645
- "completions/min_terminated_length": 118.0,
646
- "entropy": 1.303102970123291,
647
  "epoch": 0.04,
648
  "frac_reward_zero_std": 0.0,
649
- "grad_norm": 0.310546875,
650
  "learning_rate": 4.425e-06,
651
- "loss": -0.060656558722257614,
652
- "num_tokens": 68235.0,
653
- "reward": 0.8075000047683716,
654
- "reward_std": 0.3149999976158142,
655
- "rewards/reward_fn/mean": 0.8075000047683716,
656
- "rewards/reward_fn/std": 0.3149999976158142,
657
  "step": 24,
658
- "step_time": 29.133661716999995
659
  },
660
  {
661
  "clip_ratio/high_max": 0.0,
@@ -664,25 +664,25 @@
664
  "clip_ratio/low_min": 0.0,
665
  "clip_ratio/region_mean": 0.0,
666
  "completions/clipped_ratio": 0.0,
667
- "completions/max_length": 197.0,
668
- "completions/max_terminated_length": 197.0,
669
- "completions/mean_length": 158.75,
670
- "completions/mean_terminated_length": 158.75,
671
- "completions/min_length": 124.0,
672
- "completions/min_terminated_length": 124.0,
673
- "entropy": 1.2195461988449097,
674
  "epoch": 0.041666666666666664,
675
  "frac_reward_zero_std": 0.0,
676
- "grad_norm": 0.298828125,
677
  "learning_rate": 4.4e-06,
678
- "loss": 0.0523429811000824,
679
- "num_tokens": 71338.0,
680
- "reward": 0.6543750166893005,
681
- "reward_std": 0.3688516616821289,
682
- "rewards/reward_fn/mean": 0.6543750166893005,
683
- "rewards/reward_fn/std": 0.3688516914844513,
684
  "step": 25,
685
- "step_time": 26.921175812
686
  },
687
  {
688
  "clip_ratio/high_max": 0.0,
@@ -691,25 +691,25 @@
691
  "clip_ratio/low_min": 0.0,
692
  "clip_ratio/region_mean": 0.0,
693
  "completions/clipped_ratio": 0.0,
694
- "completions/max_length": 181.0,
695
- "completions/max_terminated_length": 181.0,
696
- "completions/mean_length": 172.25,
697
- "completions/mean_terminated_length": 172.25,
698
- "completions/min_length": 159.0,
699
- "completions/min_terminated_length": 159.0,
700
- "entropy": 1.415091633796692,
701
  "epoch": 0.043333333333333335,
702
- "frac_reward_zero_std": 0.0,
703
- "grad_norm": 0.30859375,
704
  "learning_rate": 4.3750000000000005e-06,
705
- "loss": 0.04146662354469299,
706
- "num_tokens": 74187.0,
707
- "reward": 0.6324999928474426,
708
- "reward_std": 0.34352341294288635,
709
- "rewards/reward_fn/mean": 0.6324999928474426,
710
- "rewards/reward_fn/std": 0.34352341294288635,
711
  "step": 26,
712
- "step_time": 24.336814401000083
713
  },
714
  {
715
  "clip_ratio/high_max": 0.0,
@@ -718,25 +718,25 @@
718
  "clip_ratio/low_min": 0.0,
719
  "clip_ratio/region_mean": 0.0,
720
  "completions/clipped_ratio": 0.0,
721
- "completions/max_length": 173.0,
722
- "completions/max_terminated_length": 173.0,
723
- "completions/mean_length": 128.5,
724
- "completions/mean_terminated_length": 128.5,
725
- "completions/min_length": 108.0,
726
- "completions/min_terminated_length": 108.0,
727
- "entropy": 1.1021723747253418,
728
  "epoch": 0.045,
729
  "frac_reward_zero_std": 0.0,
730
- "grad_norm": 0.359375,
731
  "learning_rate": 4.350000000000001e-06,
732
- "loss": -0.17364557087421417,
733
- "num_tokens": 76761.0,
734
- "reward": 0.4881249964237213,
735
- "reward_std": 0.3180236518383026,
736
- "rewards/reward_fn/mean": 0.4881249964237213,
737
- "rewards/reward_fn/std": 0.318023681640625,
738
  "step": 27,
739
- "step_time": 23.40645558799997
740
  },
741
  {
742
  "clip_ratio/high_max": 0.0,
@@ -745,25 +745,25 @@
745
  "clip_ratio/low_min": 0.0,
746
  "clip_ratio/region_mean": 0.0,
747
  "completions/clipped_ratio": 0.0,
748
- "completions/max_length": 168.0,
749
- "completions/max_terminated_length": 168.0,
750
- "completions/mean_length": 122.25,
751
- "completions/mean_terminated_length": 122.25,
752
- "completions/min_length": 100.0,
753
- "completions/min_terminated_length": 100.0,
754
- "entropy": 1.0565608739852905,
755
  "epoch": 0.04666666666666667,
756
  "frac_reward_zero_std": 0.0,
757
- "grad_norm": 0.458984375,
758
  "learning_rate": 4.325e-06,
759
- "loss": 0.18605169653892517,
760
- "num_tokens": 79546.0,
761
- "reward": 0.9562499523162842,
762
- "reward_std": 0.017499983310699463,
763
- "rewards/reward_fn/mean": 0.9562499523162842,
764
- "rewards/reward_fn/std": 0.017499983310699463,
765
  "step": 28,
766
- "step_time": 23.06411772399997
767
  },
768
  {
769
  "clip_ratio/high_max": 0.0,
@@ -772,25 +772,25 @@
772
  "clip_ratio/low_min": 0.0,
773
  "clip_ratio/region_mean": 0.0,
774
  "completions/clipped_ratio": 0.0,
775
- "completions/max_length": 185.0,
776
- "completions/max_terminated_length": 185.0,
777
- "completions/mean_length": 142.75,
778
- "completions/mean_terminated_length": 142.75,
779
- "completions/min_length": 118.0,
780
- "completions/min_terminated_length": 118.0,
781
- "entropy": 1.1531003713607788,
782
  "epoch": 0.04833333333333333,
783
  "frac_reward_zero_std": 0.0,
784
- "grad_norm": 0.392578125,
785
  "learning_rate": 4.3e-06,
786
- "loss": -0.14320793747901917,
787
- "num_tokens": 82589.0,
788
- "reward": 0.33500000834465027,
789
- "reward_std": 0.02857738919556141,
790
- "rewards/reward_fn/mean": 0.33500000834465027,
791
- "rewards/reward_fn/std": 0.028577378019690514,
792
  "step": 29,
793
- "step_time": 25.436451160999923
794
  },
795
  {
796
  "clip_ratio/high_max": 0.0,
@@ -799,25 +799,25 @@
799
  "clip_ratio/low_min": 0.0,
800
  "clip_ratio/region_mean": 0.0,
801
  "completions/clipped_ratio": 0.0,
802
- "completions/max_length": 158.0,
803
- "completions/max_terminated_length": 158.0,
804
- "completions/mean_length": 128.0,
805
- "completions/mean_terminated_length": 128.0,
806
- "completions/min_length": 99.0,
807
- "completions/min_terminated_length": 99.0,
808
- "entropy": 1.2591049671173096,
809
  "epoch": 0.05,
810
  "frac_reward_zero_std": 0.0,
811
- "grad_norm": 0.37109375,
812
  "learning_rate": 4.2750000000000006e-06,
813
- "loss": 0.11585954576730728,
814
- "num_tokens": 85349.0,
815
- "reward": 0.8075000047683716,
816
- "reward_std": 0.3267708122730255,
817
- "rewards/reward_fn/mean": 0.8075000047683716,
818
- "rewards/reward_fn/std": 0.3267708122730255,
819
  "step": 30,
820
- "step_time": 21.778404101999968
821
  },
822
  {
823
  "clip_ratio/high_max": 0.0,
@@ -826,25 +826,25 @@
826
  "clip_ratio/low_min": 0.0,
827
  "clip_ratio/region_mean": 0.0,
828
  "completions/clipped_ratio": 0.0,
829
- "completions/max_length": 199.0,
830
- "completions/max_terminated_length": 199.0,
831
- "completions/mean_length": 162.5,
832
- "completions/mean_terminated_length": 162.5,
833
- "completions/min_length": 108.0,
834
- "completions/min_terminated_length": 108.0,
835
- "entropy": 1.256501317024231,
836
  "epoch": 0.051666666666666666,
837
  "frac_reward_zero_std": 0.0,
838
- "grad_norm": 0.2578125,
839
  "learning_rate": 4.25e-06,
840
- "loss": -0.16665051877498627,
841
- "num_tokens": 88547.0,
842
- "reward": 0.8031250238418579,
843
- "reward_std": 0.2898086607456207,
844
- "rewards/reward_fn/mean": 0.8031250238418579,
845
- "rewards/reward_fn/std": 0.2898086607456207,
846
  "step": 31,
847
- "step_time": 27.062192574000164
848
  },
849
  {
850
  "clip_ratio/high_max": 0.0,
@@ -853,25 +853,25 @@
853
  "clip_ratio/low_min": 0.0,
854
  "clip_ratio/region_mean": 0.0,
855
  "completions/clipped_ratio": 0.0,
856
- "completions/max_length": 202.0,
857
- "completions/max_terminated_length": 202.0,
858
- "completions/mean_length": 154.0,
859
- "completions/mean_terminated_length": 154.0,
860
- "completions/min_length": 123.0,
861
- "completions/min_terminated_length": 123.0,
862
- "entropy": 1.262739896774292,
863
  "epoch": 0.05333333333333334,
864
  "frac_reward_zero_std": 0.0,
865
- "grad_norm": 0.326171875,
866
  "learning_rate": 4.225e-06,
867
- "loss": -0.15677310526371002,
868
- "num_tokens": 91527.0,
869
- "reward": 0.4793750047683716,
870
- "reward_std": 0.30052992701530457,
871
- "rewards/reward_fn/mean": 0.4793750047683716,
872
- "rewards/reward_fn/std": 0.3005298972129822,
873
  "step": 32,
874
- "step_time": 27.461082653000176
875
  },
876
  {
877
  "clip_ratio/high_max": 0.0,
@@ -880,25 +880,25 @@
880
  "clip_ratio/low_min": 0.0,
881
  "clip_ratio/region_mean": 0.0,
882
  "completions/clipped_ratio": 0.0,
883
- "completions/max_length": 234.0,
884
- "completions/max_terminated_length": 234.0,
885
- "completions/mean_length": 155.0,
886
- "completions/mean_terminated_length": 155.0,
887
- "completions/min_length": 117.0,
888
- "completions/min_terminated_length": 117.0,
889
- "entropy": 1.4284884929656982,
890
  "epoch": 0.055,
891
  "frac_reward_zero_std": 0.0,
892
- "grad_norm": 0.3125,
893
  "learning_rate": 4.2000000000000004e-06,
894
- "loss": -0.12012849748134613,
895
- "num_tokens": 94511.0,
896
- "reward": 0.6563750505447388,
897
- "reward_std": 0.35119685530662537,
898
- "rewards/reward_fn/mean": 0.6563750505447388,
899
- "rewards/reward_fn/std": 0.35119685530662537,
900
  "step": 33,
901
- "step_time": 31.345140926000113
902
  },
903
  {
904
  "clip_ratio/high_max": 0.0,
@@ -907,25 +907,25 @@
907
  "clip_ratio/low_min": 0.0,
908
  "clip_ratio/region_mean": 0.0,
909
  "completions/clipped_ratio": 0.0,
910
- "completions/max_length": 225.0,
911
- "completions/max_terminated_length": 225.0,
912
- "completions/mean_length": 178.5,
913
- "completions/mean_terminated_length": 178.5,
914
- "completions/min_length": 117.0,
915
- "completions/min_terminated_length": 117.0,
916
- "entropy": 1.2518457174301147,
917
  "epoch": 0.056666666666666664,
918
  "frac_reward_zero_std": 0.0,
919
- "grad_norm": 0.2470703125,
920
  "learning_rate": 4.175e-06,
921
- "loss": 0.15931710600852966,
922
- "num_tokens": 97581.0,
923
- "reward": 0.9474999904632568,
924
- "reward_std": 0.0202072411775589,
925
- "rewards/reward_fn/mean": 0.9474999904632568,
926
- "rewards/reward_fn/std": 0.0202072411775589,
927
  "step": 34,
928
- "step_time": 30.19271043699996
929
  },
930
  {
931
  "clip_ratio/high_max": 0.0,
@@ -934,25 +934,25 @@
934
  "clip_ratio/low_min": 0.0,
935
  "clip_ratio/region_mean": 0.0,
936
  "completions/clipped_ratio": 0.0,
937
- "completions/max_length": 230.0,
938
- "completions/max_terminated_length": 230.0,
939
- "completions/mean_length": 162.75,
940
- "completions/mean_terminated_length": 162.75,
941
- "completions/min_length": 139.0,
942
- "completions/min_terminated_length": 139.0,
943
- "entropy": 1.2630821466445923,
944
  "epoch": 0.058333333333333334,
945
- "frac_reward_zero_std": 0.0,
946
- "grad_norm": 0.2734375,
947
  "learning_rate": 4.15e-06,
948
- "loss": 0.06504115462303162,
949
- "num_tokens": 100440.0,
950
- "reward": 0.5068750381469727,
951
- "reward_std": 0.30575981736183167,
952
- "rewards/reward_fn/mean": 0.5068750381469727,
953
- "rewards/reward_fn/std": 0.3057597875595093,
954
  "step": 35,
955
- "step_time": 30.6441785290001
956
  },
957
  {
958
  "clip_ratio/high_max": 0.0,
@@ -961,25 +961,25 @@
961
  "clip_ratio/low_min": 0.0,
962
  "clip_ratio/region_mean": 0.0,
963
  "completions/clipped_ratio": 0.0,
964
- "completions/max_length": 144.0,
965
- "completions/max_terminated_length": 144.0,
966
- "completions/mean_length": 132.0,
967
- "completions/mean_terminated_length": 132.0,
968
- "completions/min_length": 115.0,
969
- "completions/min_terminated_length": 115.0,
970
- "entropy": 1.1735280752182007,
971
  "epoch": 0.06,
972
  "frac_reward_zero_std": 0.0,
973
- "grad_norm": 0.328125,
974
  "learning_rate": 4.125e-06,
975
- "loss": -0.04516683891415596,
976
- "num_tokens": 103268.0,
977
- "reward": 0.9728749990463257,
978
- "reward_std": 0.015750011429190636,
979
- "rewards/reward_fn/mean": 0.9728749990463257,
980
- "rewards/reward_fn/std": 0.015750020742416382,
981
  "step": 36,
982
- "step_time": 19.9776081240002
983
  },
984
  {
985
  "clip_ratio/high_max": 0.0,
@@ -988,25 +988,25 @@
988
  "clip_ratio/low_min": 0.0,
989
  "clip_ratio/region_mean": 0.0,
990
  "completions/clipped_ratio": 0.0,
991
- "completions/max_length": 161.0,
992
- "completions/max_terminated_length": 161.0,
993
- "completions/mean_length": 128.0,
994
- "completions/mean_terminated_length": 128.0,
995
- "completions/min_length": 87.0,
996
- "completions/min_terminated_length": 87.0,
997
- "entropy": 1.1598751544952393,
998
  "epoch": 0.06166666666666667,
999
  "frac_reward_zero_std": 0.0,
1000
- "grad_norm": 0.326171875,
1001
  "learning_rate": 4.1e-06,
1002
- "loss": 0.0017657531425356865,
1003
- "num_tokens": 106020.0,
1004
- "reward": 0.7987499833106995,
1005
- "reward_std": 0.3096065819263458,
1006
- "rewards/reward_fn/mean": 0.7987499833106995,
1007
- "rewards/reward_fn/std": 0.3096065819263458,
1008
  "step": 37,
1009
- "step_time": 21.841671942999938
1010
  },
1011
  {
1012
  "clip_ratio/high_max": 0.0,
@@ -1015,25 +1015,25 @@
1015
  "clip_ratio/low_min": 0.0,
1016
  "clip_ratio/region_mean": 0.0,
1017
  "completions/clipped_ratio": 0.0,
1018
- "completions/max_length": 158.0,
1019
- "completions/max_terminated_length": 158.0,
1020
- "completions/mean_length": 115.25,
1021
- "completions/mean_terminated_length": 115.25,
1022
- "completions/min_length": 91.0,
1023
- "completions/min_terminated_length": 91.0,
1024
- "entropy": 1.033192753791809,
1025
  "epoch": 0.06333333333333334,
1026
  "frac_reward_zero_std": 0.0,
1027
- "grad_norm": 0.34765625,
1028
  "learning_rate": 4.075e-06,
1029
- "loss": 0.016084324568510056,
1030
- "num_tokens": 108717.0,
1031
- "reward": 0.3306249976158142,
1032
- "reward_std": 0.008750001899898052,
1033
- "rewards/reward_fn/mean": 0.3306249976158142,
1034
- "rewards/reward_fn/std": 0.008750006556510925,
1035
  "step": 38,
1036
- "step_time": 21.74608845700004
1037
  },
1038
  {
1039
  "clip_ratio/high_max": 0.0,
@@ -1042,25 +1042,25 @@
1042
  "clip_ratio/low_min": 0.0,
1043
  "clip_ratio/region_mean": 0.0,
1044
  "completions/clipped_ratio": 0.0,
1045
- "completions/max_length": 275.0,
1046
- "completions/max_terminated_length": 275.0,
1047
- "completions/mean_length": 173.5,
1048
- "completions/mean_terminated_length": 173.5,
1049
- "completions/min_length": 129.0,
1050
- "completions/min_terminated_length": 129.0,
1051
- "entropy": 1.184029221534729,
1052
  "epoch": 0.065,
1053
  "frac_reward_zero_std": 0.0,
1054
- "grad_norm": 0.298828125,
1055
  "learning_rate": 4.05e-06,
1056
- "loss": 0.2905029058456421,
1057
- "num_tokens": 112235.0,
1058
- "reward": 0.9577499628067017,
1059
- "reward_std": 0.014499982818961143,
1060
- "rewards/reward_fn/mean": 0.9577499628067017,
1061
- "rewards/reward_fn/std": 0.01449999213218689,
1062
  "step": 39,
1063
- "step_time": 37.047422505999975
1064
  },
1065
  {
1066
  "clip_ratio/high_max": 0.0,
@@ -1069,25 +1069,25 @@
1069
  "clip_ratio/low_min": 0.0,
1070
  "clip_ratio/region_mean": 0.0,
1071
  "completions/clipped_ratio": 0.0,
1072
- "completions/max_length": 206.0,
1073
- "completions/max_terminated_length": 206.0,
1074
- "completions/mean_length": 151.0,
1075
- "completions/mean_terminated_length": 151.0,
1076
- "completions/min_length": 118.0,
1077
- "completions/min_terminated_length": 118.0,
1078
- "entropy": 1.1345659494400024,
1079
  "epoch": 0.06666666666666667,
1080
- "frac_reward_zero_std": 1.0,
1081
- "grad_norm": 0.0,
1082
  "learning_rate": 4.0250000000000004e-06,
1083
- "loss": 0.0,
1084
- "num_tokens": 114995.0,
1085
- "reward": 0.33500000834465027,
1086
- "reward_std": 0.0,
1087
- "rewards/reward_fn/mean": 0.33500000834465027,
1088
- "rewards/reward_fn/std": 0.0,
1089
  "step": 40,
1090
- "step_time": 27.651464559000033
1091
  },
1092
  {
1093
  "clip_ratio/high_max": 0.0,
@@ -1096,25 +1096,25 @@
1096
  "clip_ratio/low_min": 0.0,
1097
  "clip_ratio/region_mean": 0.0,
1098
  "completions/clipped_ratio": 0.0,
1099
- "completions/max_length": 165.0,
1100
- "completions/max_terminated_length": 165.0,
1101
- "completions/mean_length": 136.0,
1102
- "completions/mean_terminated_length": 136.0,
1103
- "completions/min_length": 107.0,
1104
- "completions/min_terminated_length": 107.0,
1105
- "entropy": 1.2914133071899414,
1106
  "epoch": 0.06833333333333333,
1107
  "frac_reward_zero_std": 0.0,
1108
- "grad_norm": 0.322265625,
1109
  "learning_rate": 4.000000000000001e-06,
1110
- "loss": -0.10378827899694443,
1111
- "num_tokens": 118023.0,
1112
- "reward": 0.28437501192092896,
1113
- "reward_std": 0.08996237814426422,
1114
- "rewards/reward_fn/mean": 0.28437501192092896,
1115
- "rewards/reward_fn/std": 0.08996237814426422,
1116
  "step": 41,
1117
- "step_time": 22.878999739999927
1118
  },
1119
  {
1120
  "clip_ratio/high_max": 0.0,
@@ -1123,25 +1123,25 @@
1123
  "clip_ratio/low_min": 0.0,
1124
  "clip_ratio/region_mean": 0.0,
1125
  "completions/clipped_ratio": 0.0,
1126
- "completions/max_length": 159.0,
1127
- "completions/max_terminated_length": 159.0,
1128
- "completions/mean_length": 131.25,
1129
- "completions/mean_terminated_length": 131.25,
1130
- "completions/min_length": 104.0,
1131
- "completions/min_terminated_length": 104.0,
1132
- "entropy": 1.094796061515808,
1133
  "epoch": 0.07,
1134
  "frac_reward_zero_std": 0.0,
1135
  "grad_norm": 0.375,
1136
  "learning_rate": 3.975000000000001e-06,
1137
- "loss": -0.011134525761008263,
1138
- "num_tokens": 121116.0,
1139
- "reward": 0.918749988079071,
1140
- "reward_std": 0.07110730558633804,
1141
- "rewards/reward_fn/mean": 0.918749988079071,
1142
- "rewards/reward_fn/std": 0.07110730558633804,
1143
  "step": 42,
1144
- "step_time": 22.13838918800002
1145
  },
1146
  {
1147
  "clip_ratio/high_max": 0.0,
@@ -1150,25 +1150,25 @@
1150
  "clip_ratio/low_min": 0.0,
1151
  "clip_ratio/region_mean": 0.0,
1152
  "completions/clipped_ratio": 0.0,
1153
- "completions/max_length": 222.0,
1154
- "completions/max_terminated_length": 222.0,
1155
- "completions/mean_length": 162.0,
1156
- "completions/mean_terminated_length": 162.0,
1157
- "completions/min_length": 129.0,
1158
- "completions/min_terminated_length": 129.0,
1159
- "entropy": 1.3943829536437988,
1160
  "epoch": 0.07166666666666667,
1161
  "frac_reward_zero_std": 0.0,
1162
- "grad_norm": 0.2890625,
1163
  "learning_rate": 3.95e-06,
1164
- "loss": 0.09908480942249298,
1165
- "num_tokens": 124076.0,
1166
- "reward": 0.6493750214576721,
1167
- "reward_std": 0.30967977643013,
1168
- "rewards/reward_fn/mean": 0.6493750214576721,
1169
- "rewards/reward_fn/std": 0.3096797466278076,
1170
  "step": 43,
1171
- "step_time": 29.778661905000035
1172
  },
1173
  {
1174
  "clip_ratio/high_max": 0.0,
@@ -1177,25 +1177,25 @@
1177
  "clip_ratio/low_min": 0.0,
1178
  "clip_ratio/region_mean": 0.0,
1179
  "completions/clipped_ratio": 0.0,
1180
- "completions/max_length": 221.0,
1181
- "completions/max_terminated_length": 221.0,
1182
- "completions/mean_length": 156.5,
1183
- "completions/mean_terminated_length": 156.5,
1184
- "completions/min_length": 121.0,
1185
- "completions/min_terminated_length": 121.0,
1186
- "entropy": 1.0645285844802856,
1187
  "epoch": 0.07333333333333333,
1188
  "frac_reward_zero_std": 0.0,
1189
- "grad_norm": 0.318359375,
1190
  "learning_rate": 3.9250000000000005e-06,
1191
- "loss": 0.1972985565662384,
1192
- "num_tokens": 126698.0,
1193
- "reward": 0.33387500047683716,
1194
- "reward_std": 0.00224999594502151,
1195
- "rewards/reward_fn/mean": 0.33387500047683716,
1196
- "rewards/reward_fn/std": 0.002250000834465027,
1197
  "step": 44,
1198
- "step_time": 29.35123648700005
1199
  },
1200
  {
1201
  "clip_ratio/high_max": 0.0,
@@ -1204,25 +1204,25 @@
1204
  "clip_ratio/low_min": 0.0,
1205
  "clip_ratio/region_mean": 0.0,
1206
  "completions/clipped_ratio": 0.0,
1207
- "completions/max_length": 180.0,
1208
- "completions/max_terminated_length": 180.0,
1209
- "completions/mean_length": 136.75,
1210
- "completions/mean_terminated_length": 136.75,
1211
- "completions/min_length": 93.0,
1212
- "completions/min_terminated_length": 93.0,
1213
- "entropy": 1.0503398180007935,
1214
  "epoch": 0.075,
1215
  "frac_reward_zero_std": 0.0,
1216
- "grad_norm": 0.353515625,
1217
  "learning_rate": 3.900000000000001e-06,
1218
- "loss": -0.12474276125431061,
1219
- "num_tokens": 129473.0,
1220
- "reward": 0.33937501907348633,
1221
- "reward_std": 0.022020353004336357,
1222
- "rewards/reward_fn/mean": 0.33937501907348633,
1223
- "rewards/reward_fn/std": 0.022020353004336357,
1224
  "step": 45,
1225
- "step_time": 24.39131061000012
1226
  },
1227
  {
1228
  "clip_ratio/high_max": 0.0,
@@ -1231,25 +1231,25 @@
1231
  "clip_ratio/low_min": 0.0,
1232
  "clip_ratio/region_mean": 0.0,
1233
  "completions/clipped_ratio": 0.0,
1234
- "completions/max_length": 170.0,
1235
- "completions/max_terminated_length": 170.0,
1236
- "completions/mean_length": 142.0,
1237
- "completions/mean_terminated_length": 142.0,
1238
- "completions/min_length": 111.0,
1239
- "completions/min_terminated_length": 111.0,
1240
- "entropy": 1.1859209537506104,
1241
  "epoch": 0.07666666666666666,
1242
  "frac_reward_zero_std": 0.0,
1243
- "grad_norm": 0.35546875,
1244
  "learning_rate": 3.875e-06,
1245
- "loss": -0.014004894532263279,
1246
- "num_tokens": 132353.0,
1247
- "reward": 0.34375,
1248
- "reward_std": 0.017500003799796104,
1249
- "rewards/reward_fn/mean": 0.34375,
1250
- "rewards/reward_fn/std": 0.017499998211860657,
1251
  "step": 46,
1252
- "step_time": 23.079675882999936
1253
  },
1254
  {
1255
  "clip_ratio/high_max": 0.0,
@@ -1258,25 +1258,25 @@
1258
  "clip_ratio/low_min": 0.0,
1259
  "clip_ratio/region_mean": 0.0,
1260
  "completions/clipped_ratio": 0.0,
1261
- "completions/max_length": 187.0,
1262
- "completions/max_terminated_length": 187.0,
1263
- "completions/mean_length": 159.25,
1264
- "completions/mean_terminated_length": 159.25,
1265
- "completions/min_length": 127.0,
1266
- "completions/min_terminated_length": 127.0,
1267
- "entropy": 1.2016222476959229,
1268
  "epoch": 0.07833333333333334,
1269
- "frac_reward_zero_std": 0.0,
1270
- "grad_norm": 0.283203125,
1271
  "learning_rate": 3.85e-06,
1272
- "loss": 0.13225767016410828,
1273
- "num_tokens": 135306.0,
1274
- "reward": 0.6412500143051147,
1275
- "reward_std": 0.3741072416305542,
1276
- "rewards/reward_fn/mean": 0.6412500143051147,
1277
- "rewards/reward_fn/std": 0.3741072416305542,
1278
  "step": 47,
1279
- "step_time": 25.414647504999948
1280
  },
1281
  {
1282
  "clip_ratio/high_max": 0.0,
@@ -1285,25 +1285,25 @@
1285
  "clip_ratio/low_min": 0.0,
1286
  "clip_ratio/region_mean": 0.0,
1287
  "completions/clipped_ratio": 0.0,
1288
- "completions/max_length": 213.0,
1289
- "completions/max_terminated_length": 213.0,
1290
- "completions/mean_length": 159.25,
1291
- "completions/mean_terminated_length": 159.25,
1292
- "completions/min_length": 123.0,
1293
- "completions/min_terminated_length": 123.0,
1294
- "entropy": 1.1879173517227173,
1295
  "epoch": 0.08,
1296
  "frac_reward_zero_std": 0.0,
1297
- "grad_norm": 0.310546875,
1298
  "learning_rate": 3.825000000000001e-06,
1299
- "loss": 0.1464429497718811,
1300
- "num_tokens": 138111.0,
1301
- "reward": 0.7743749618530273,
1302
- "reward_std": 0.3024095892906189,
1303
- "rewards/reward_fn/mean": 0.7743749618530273,
1304
- "rewards/reward_fn/std": 0.3024095892906189,
1305
  "step": 48,
1306
- "step_time": 28.519766161000007
1307
  },
1308
  {
1309
  "clip_ratio/high_max": 0.0,
@@ -1312,25 +1312,25 @@
1312
  "clip_ratio/low_min": 0.0,
1313
  "clip_ratio/region_mean": 0.0,
1314
  "completions/clipped_ratio": 0.0,
1315
- "completions/max_length": 211.0,
1316
- "completions/max_terminated_length": 211.0,
1317
- "completions/mean_length": 151.0,
1318
- "completions/mean_terminated_length": 151.0,
1319
- "completions/min_length": 105.0,
1320
- "completions/min_terminated_length": 105.0,
1321
- "entropy": 1.045935869216919,
1322
  "epoch": 0.08166666666666667,
1323
  "frac_reward_zero_std": 0.0,
1324
- "grad_norm": 0.275390625,
1325
  "learning_rate": 3.8000000000000005e-06,
1326
- "loss": -0.08796487003564835,
1327
- "num_tokens": 140731.0,
1328
- "reward": 0.34812501072883606,
1329
- "reward_std": 0.026249995455145836,
1330
- "rewards/reward_fn/mean": 0.34812501072883606,
1331
- "rewards/reward_fn/std": 0.026250002905726433,
1332
  "step": 49,
1333
- "step_time": 28.07286430699969
1334
  },
1335
  {
1336
  "clip_ratio/high_max": 0.0,
@@ -1339,30 +1339,30 @@
1339
  "clip_ratio/low_min": 0.0,
1340
  "clip_ratio/region_mean": 0.0,
1341
  "completions/clipped_ratio": 0.0,
1342
- "completions/max_length": 211.0,
1343
- "completions/max_terminated_length": 211.0,
1344
- "completions/mean_length": 167.75,
1345
- "completions/mean_terminated_length": 167.75,
1346
- "completions/min_length": 125.0,
1347
- "completions/min_terminated_length": 125.0,
1348
- "entropy": 1.2584284543991089,
1349
  "epoch": 0.08333333333333333,
1350
  "frac_reward_zero_std": 0.0,
1351
- "grad_norm": 0.302734375,
1352
  "learning_rate": 3.7750000000000003e-06,
1353
- "loss": 0.021364537999033928,
1354
- "num_tokens": 144058.0,
1355
- "reward": 0.3306249976158142,
1356
- "reward_std": 0.008750001899898052,
1357
- "rewards/reward_fn/mean": 0.3306249976158142,
1358
- "rewards/reward_fn/std": 0.008750006556510925,
1359
  "step": 50,
1360
- "step_time": 28.99632900000006
1361
  }
1362
  ],
1363
  "logging_steps": 1,
1364
  "max_steps": 200,
1365
- "num_input_tokens_seen": 144058,
1366
  "num_train_epochs": 1,
1367
  "save_steps": 50,
1368
  "stateful_callbacks": {
 
16
  "clip_ratio/low_min": 0.0,
17
  "clip_ratio/region_mean": 0.0,
18
  "completions/clipped_ratio": 0.0,
19
+ "completions/max_length": 182.0,
20
+ "completions/max_terminated_length": 182.0,
21
+ "completions/mean_length": 137.25,
22
+ "completions/mean_terminated_length": 137.25,
23
+ "completions/min_length": 109.0,
24
+ "completions/min_terminated_length": 109.0,
25
+ "entropy": 1.2721036672592163,
26
  "epoch": 0.0016666666666666668,
27
+ "frac_reward_zero_std": 1.0,
28
+ "grad_norm": 0.0,
29
  "learning_rate": 5e-06,
30
+ "loss": 0.0,
31
+ "num_tokens": 3013.0,
32
+ "reward": 0.33500000834465027,
33
+ "reward_std": 0.0,
34
+ "rewards/reward_fn/mean": 0.33500000834465027,
35
+ "rewards/reward_fn/std": 0.0,
36
  "step": 1,
37
+ "step_time": 26.99397590800072
38
  },
39
  {
40
  "clip_ratio/high_max": 0.0,
 
43
  "clip_ratio/low_min": 0.0,
44
  "clip_ratio/region_mean": 0.0,
45
  "completions/clipped_ratio": 0.0,
46
+ "completions/max_length": 125.0,
47
+ "completions/max_terminated_length": 125.0,
48
+ "completions/mean_length": 114.0,
49
+ "completions/mean_terminated_length": 114.0,
50
+ "completions/min_length": 98.0,
51
+ "completions/min_terminated_length": 98.0,
52
+ "entropy": 0.901360273361206,
53
  "epoch": 0.0033333333333333335,
54
  "frac_reward_zero_std": 0.0,
55
+ "grad_norm": 0.376953125,
56
  "learning_rate": 4.975000000000001e-06,
57
+ "loss": -0.04769711568951607,
58
+ "num_tokens": 5917.0,
59
+ "reward": 0.9693750143051147,
60
+ "reward_std": 0.00875002145767212,
61
+ "rewards/reward_fn/mean": 0.9693750143051147,
62
+ "rewards/reward_fn/std": 0.00875002145767212,
63
  "step": 2,
64
+ "step_time": 19.053884660999756
65
  },
66
  {
67
  "clip_ratio/high_max": 0.0,
 
70
  "clip_ratio/low_min": 0.0,
71
  "clip_ratio/region_mean": 0.0,
72
  "completions/clipped_ratio": 0.0,
73
+ "completions/max_length": 197.0,
74
+ "completions/max_terminated_length": 197.0,
75
+ "completions/mean_length": 143.75,
76
+ "completions/mean_terminated_length": 143.75,
77
+ "completions/min_length": 114.0,
78
+ "completions/min_terminated_length": 114.0,
79
+ "entropy": 1.1808185577392578,
80
  "epoch": 0.005,
81
+ "frac_reward_zero_std": 0.0,
82
+ "grad_norm": 0.28515625,
83
  "learning_rate": 4.95e-06,
84
+ "loss": -0.011300479993224144,
85
+ "num_tokens": 8552.0,
86
+ "reward": 0.4793750047683716,
87
+ "reward_std": 0.28874996304512024,
88
+ "rewards/reward_fn/mean": 0.4793750047683716,
89
+ "rewards/reward_fn/std": 0.288750022649765,
90
  "step": 3,
91
+ "step_time": 28.53829234299701
92
  },
93
  {
94
  "clip_ratio/high_max": 0.0,
 
97
  "clip_ratio/low_min": 0.0,
98
  "clip_ratio/region_mean": 0.0,
99
  "completions/clipped_ratio": 0.0,
100
+ "completions/max_length": 132.0,
101
+ "completions/max_terminated_length": 132.0,
102
+ "completions/mean_length": 115.5,
103
+ "completions/mean_terminated_length": 115.5,
104
+ "completions/min_length": 99.0,
105
+ "completions/min_terminated_length": 99.0,
106
+ "entropy": 1.0730382204055786,
107
  "epoch": 0.006666666666666667,
108
  "frac_reward_zero_std": 0.0,
109
+ "grad_norm": 0.32421875,
110
  "learning_rate": 4.925e-06,
111
+ "loss": 0.07140593230724335,
112
+ "num_tokens": 11250.0,
113
+ "reward": 0.4925000071525574,
114
+ "reward_std": 0.3149999976158142,
115
+ "rewards/reward_fn/mean": 0.4925000071525574,
116
+ "rewards/reward_fn/std": 0.3149999678134918,
117
  "step": 4,
118
+ "step_time": 20.0260389340001
119
  },
120
  {
121
  "clip_ratio/high_max": 0.0,
 
124
  "clip_ratio/low_min": 0.0,
125
  "clip_ratio/region_mean": 0.0,
126
  "completions/clipped_ratio": 0.0,
127
+ "completions/max_length": 151.0,
128
+ "completions/max_terminated_length": 151.0,
129
+ "completions/mean_length": 125.5,
130
+ "completions/mean_terminated_length": 125.5,
131
+ "completions/min_length": 106.0,
132
+ "completions/min_terminated_length": 106.0,
133
+ "entropy": 1.044655680656433,
134
  "epoch": 0.008333333333333333,
135
+ "frac_reward_zero_std": 0.0,
136
+ "grad_norm": 0.30078125,
137
  "learning_rate": 4.9000000000000005e-06,
138
+ "loss": 0.005974027793854475,
139
+ "num_tokens": 14256.0,
140
+ "reward": 0.48374998569488525,
141
+ "reward_std": 0.29750001430511475,
142
+ "rewards/reward_fn/mean": 0.48374998569488525,
143
+ "rewards/reward_fn/std": 0.29750001430511475,
144
  "step": 5,
145
+ "step_time": 22.637786576000508
146
  },
147
  {
148
  "clip_ratio/high_max": 0.0,
 
151
  "clip_ratio/low_min": 0.0,
152
  "clip_ratio/region_mean": 0.0,
153
  "completions/clipped_ratio": 0.0,
154
+ "completions/max_length": 175.0,
155
+ "completions/max_terminated_length": 175.0,
156
+ "completions/mean_length": 146.5,
157
+ "completions/mean_terminated_length": 146.5,
158
+ "completions/min_length": 120.0,
159
+ "completions/min_terminated_length": 120.0,
160
+ "entropy": 1.157387137413025,
161
  "epoch": 0.01,
162
  "frac_reward_zero_std": 0.0,
163
+ "grad_norm": 0.30859375,
164
  "learning_rate": 4.875e-06,
165
+ "loss": -0.018765343353152275,
166
+ "num_tokens": 17146.0,
167
+ "reward": 0.4925000071525574,
168
+ "reward_std": 0.3149999976158142,
169
+ "rewards/reward_fn/mean": 0.4925000071525574,
170
+ "rewards/reward_fn/std": 0.3149999678134918,
171
  "step": 6,
172
+ "step_time": 25.646922176998487
173
  },
174
  {
175
  "clip_ratio/high_max": 0.0,
 
178
  "clip_ratio/low_min": 0.0,
179
  "clip_ratio/region_mean": 0.0,
180
  "completions/clipped_ratio": 0.0,
181
+ "completions/max_length": 185.0,
182
+ "completions/max_terminated_length": 185.0,
183
+ "completions/mean_length": 167.25,
184
+ "completions/mean_terminated_length": 167.25,
185
+ "completions/min_length": 135.0,
186
+ "completions/min_terminated_length": 135.0,
187
+ "entropy": 1.1936835050582886,
188
  "epoch": 0.011666666666666667,
189
+ "frac_reward_zero_std": 0.0,
190
+ "grad_norm": 0.28515625,
191
  "learning_rate": 4.85e-06,
192
+ "loss": 0.04119409993290901,
193
+ "num_tokens": 20239.0,
194
+ "reward": 0.6412500143051147,
195
+ "reward_std": 0.35391560196876526,
196
+ "rewards/reward_fn/mean": 0.6412500143051147,
197
+ "rewards/reward_fn/std": 0.35391557216644287,
198
  "step": 7,
199
+ "step_time": 27.48416990199985
200
  },
201
  {
202
  "clip_ratio/high_max": 0.0,
 
205
  "clip_ratio/low_min": 0.0,
206
  "clip_ratio/region_mean": 0.0,
207
  "completions/clipped_ratio": 0.0,
208
+ "completions/max_length": 221.0,
209
+ "completions/max_terminated_length": 221.0,
210
+ "completions/mean_length": 168.0,
211
+ "completions/mean_terminated_length": 168.0,
212
+ "completions/min_length": 136.0,
213
+ "completions/min_terminated_length": 136.0,
214
+ "entropy": 1.0776662826538086,
215
  "epoch": 0.013333333333333334,
216
  "frac_reward_zero_std": 0.0,
217
+ "grad_norm": 0.23046875,
218
  "learning_rate": 4.825e-06,
219
+ "loss": 0.08870118111371994,
220
+ "num_tokens": 22611.0,
221
+ "reward": 0.500374972820282,
222
+ "reward_std": 0.31010571122169495,
223
+ "rewards/reward_fn/mean": 0.500374972820282,
224
+ "rewards/reward_fn/std": 0.31010571122169495,
225
  "step": 8,
226
+ "step_time": 31.74235246600074
227
  },
228
  {
229
  "clip_ratio/high_max": 0.0,
 
232
  "clip_ratio/low_min": 0.0,
233
  "clip_ratio/region_mean": 0.0,
234
  "completions/clipped_ratio": 0.0,
235
+ "completions/max_length": 212.0,
236
+ "completions/max_terminated_length": 212.0,
237
+ "completions/mean_length": 159.25,
238
+ "completions/mean_terminated_length": 159.25,
239
+ "completions/min_length": 107.0,
240
+ "completions/min_terminated_length": 107.0,
241
+ "entropy": 1.1662333011627197,
242
  "epoch": 0.015,
243
+ "frac_reward_zero_std": 1.0,
244
+ "grad_norm": 0.0,
245
  "learning_rate": 4.800000000000001e-06,
246
+ "loss": 0.0,
247
+ "num_tokens": 25760.0,
248
+ "reward": 0.33500000834465027,
249
+ "reward_std": 0.0,
250
+ "rewards/reward_fn/mean": 0.33500000834465027,
251
+ "rewards/reward_fn/std": 0.0,
252
  "step": 9,
253
+ "step_time": 31.728966909002338
254
  },
255
  {
256
  "clip_ratio/high_max": 0.0,
 
259
  "clip_ratio/low_min": 0.0,
260
  "clip_ratio/region_mean": 0.0,
261
  "completions/clipped_ratio": 0.0,
262
+ "completions/max_length": 152.0,
263
+ "completions/max_terminated_length": 152.0,
264
+ "completions/mean_length": 121.5,
265
+ "completions/mean_terminated_length": 121.5,
266
+ "completions/min_length": 100.0,
267
+ "completions/min_terminated_length": 100.0,
268
+ "entropy": 1.0164639949798584,
269
  "epoch": 0.016666666666666666,
270
  "frac_reward_zero_std": 0.0,
271
+ "grad_norm": 0.306640625,
272
  "learning_rate": 4.775e-06,
273
+ "loss": 0.1400359570980072,
274
+ "num_tokens": 28494.0,
275
+ "reward": 0.7856249809265137,
276
+ "reward_std": 0.3052483797073364,
277
+ "rewards/reward_fn/mean": 0.7856249809265137,
278
+ "rewards/reward_fn/std": 0.3052483797073364,
279
  "step": 10,
280
+ "step_time": 23.630102548999275
281
  },
282
  {
283
  "clip_ratio/high_max": 0.0,
 
286
  "clip_ratio/low_min": 0.0,
287
  "clip_ratio/region_mean": 0.0,
288
  "completions/clipped_ratio": 0.0,
289
+ "completions/max_length": 183.0,
290
+ "completions/max_terminated_length": 183.0,
291
+ "completions/mean_length": 156.0,
292
+ "completions/mean_terminated_length": 156.0,
293
+ "completions/min_length": 127.0,
294
+ "completions/min_terminated_length": 127.0,
295
+ "entropy": 1.2048428058624268,
296
  "epoch": 0.018333333333333333,
297
  "frac_reward_zero_std": 0.0,
298
+ "grad_norm": 0.306640625,
299
  "learning_rate": 4.75e-06,
300
+ "loss": 0.04951930046081543,
301
+ "num_tokens": 31562.0,
302
  "reward": 0.8075000047683716,
303
+ "reward_std": 0.2921329736709595,
304
  "rewards/reward_fn/mean": 0.8075000047683716,
305
+ "rewards/reward_fn/std": 0.2921329438686371,
306
  "step": 11,
307
+ "step_time": 28.393185649998486
308
  },
309
  {
310
  "clip_ratio/high_max": 0.0,
 
313
  "clip_ratio/low_min": 0.0,
314
  "clip_ratio/region_mean": 0.0,
315
  "completions/clipped_ratio": 0.0,
316
+ "completions/max_length": 163.0,
317
+ "completions/max_terminated_length": 163.0,
318
+ "completions/mean_length": 116.5,
319
+ "completions/mean_terminated_length": 116.5,
320
+ "completions/min_length": 82.0,
321
+ "completions/min_terminated_length": 82.0,
322
+ "entropy": 1.0292104482650757,
323
  "epoch": 0.02,
324
  "frac_reward_zero_std": 0.0,
325
+ "grad_norm": 0.27734375,
326
  "learning_rate": 4.7250000000000005e-06,
327
+ "loss": -0.14611932635307312,
328
+ "num_tokens": 34288.0,
329
+ "reward": 0.33125001192092896,
330
+ "reward_std": 0.007500012870877981,
331
+ "rewards/reward_fn/mean": 0.33125001192092896,
332
+ "rewards/reward_fn/std": 0.007500007748603821,
333
  "step": 12,
334
+ "step_time": 25.381137749998743
335
  },
336
  {
337
  "clip_ratio/high_max": 0.0,
 
340
  "clip_ratio/low_min": 0.0,
341
  "clip_ratio/region_mean": 0.0,
342
  "completions/clipped_ratio": 0.0,
343
+ "completions/max_length": 157.0,
344
+ "completions/max_terminated_length": 157.0,
345
+ "completions/mean_length": 146.0,
346
+ "completions/mean_terminated_length": 146.0,
347
+ "completions/min_length": 120.0,
348
+ "completions/min_terminated_length": 120.0,
349
+ "entropy": 1.2967694997787476,
350
  "epoch": 0.021666666666666667,
351
  "frac_reward_zero_std": 0.0,
352
+ "grad_norm": 0.306640625,
353
  "learning_rate": 4.7e-06,
354
+ "loss": 0.04130696505308151,
355
+ "num_tokens": 37036.0,
356
+ "reward": 0.6412500143051147,
357
+ "reward_std": 0.35391560196876526,
358
+ "rewards/reward_fn/mean": 0.6412500143051147,
359
+ "rewards/reward_fn/std": 0.35391557216644287,
360
  "step": 13,
361
+ "step_time": 24.01959514600094
362
  },
363
  {
364
  "clip_ratio/high_max": 0.0,
 
367
  "clip_ratio/low_min": 0.0,
368
  "clip_ratio/region_mean": 0.0,
369
  "completions/clipped_ratio": 0.0,
370
+ "completions/max_length": 181.0,
371
+ "completions/max_terminated_length": 181.0,
372
+ "completions/mean_length": 163.0,
373
+ "completions/mean_terminated_length": 163.0,
374
+ "completions/min_length": 150.0,
375
+ "completions/min_terminated_length": 150.0,
376
+ "entropy": 1.2760556936264038,
377
  "epoch": 0.023333333333333334,
378
  "frac_reward_zero_std": 0.0,
379
+ "grad_norm": 0.279296875,
380
  "learning_rate": 4.675000000000001e-06,
381
+ "loss": 0.03943007439374924,
382
+ "num_tokens": 40060.0,
383
+ "reward": 0.9693750143051147,
384
+ "reward_std": 0.00875002145767212,
385
+ "rewards/reward_fn/mean": 0.9693750143051147,
386
+ "rewards/reward_fn/std": 0.00875002145767212,
387
  "step": 14,
388
+ "step_time": 26.05132091699852
389
  },
390
  {
391
  "clip_ratio/high_max": 0.0,
 
394
  "clip_ratio/low_min": 0.0,
395
  "clip_ratio/region_mean": 0.0,
396
  "completions/clipped_ratio": 0.0,
397
+ "completions/max_length": 163.0,
398
+ "completions/max_terminated_length": 163.0,
399
+ "completions/mean_length": 154.0,
400
+ "completions/mean_terminated_length": 154.0,
401
+ "completions/min_length": 145.0,
402
+ "completions/min_terminated_length": 145.0,
403
+ "entropy": 1.0788471698760986,
404
  "epoch": 0.025,
405
  "frac_reward_zero_std": 0.0,
406
+ "grad_norm": 0.291015625,
407
  "learning_rate": 4.65e-06,
408
+ "loss": -0.024401623755693436,
409
+ "num_tokens": 42856.0,
410
+ "reward": 0.9606249928474426,
411
+ "reward_std": 0.02202034927904606,
412
+ "rewards/reward_fn/mean": 0.9606249928474426,
413
+ "rewards/reward_fn/std": 0.02202034927904606,
414
  "step": 15,
415
+ "step_time": 24.084032504997595
416
  },
417
  {
418
  "clip_ratio/high_max": 0.0,
 
421
  "clip_ratio/low_min": 0.0,
422
  "clip_ratio/region_mean": 0.0,
423
  "completions/clipped_ratio": 0.0,
424
+ "completions/max_length": 257.0,
425
+ "completions/max_terminated_length": 257.0,
426
+ "completions/mean_length": 193.5,
427
+ "completions/mean_terminated_length": 193.5,
428
+ "completions/min_length": 159.0,
429
+ "completions/min_terminated_length": 159.0,
430
+ "entropy": 1.4748042821884155,
431
  "epoch": 0.02666666666666667,
432
  "frac_reward_zero_std": 0.0,
433
+ "grad_norm": 0.275390625,
434
  "learning_rate": 4.625000000000001e-06,
435
+ "loss": 0.10702173411846161,
436
+ "num_tokens": 46438.0,
437
+ "reward": 0.6493749618530273,
438
+ "reward_std": 0.355049192905426,
439
+ "rewards/reward_fn/mean": 0.6493749618530273,
440
+ "rewards/reward_fn/std": 0.35504916310310364,
441
  "step": 16,
442
+ "step_time": 37.7986828969988
443
  },
444
  {
445
  "clip_ratio/high_max": 0.0,
 
448
  "clip_ratio/low_min": 0.0,
449
  "clip_ratio/region_mean": 0.0,
450
  "completions/clipped_ratio": 0.0,
451
+ "completions/max_length": 156.0,
452
+ "completions/max_terminated_length": 156.0,
453
+ "completions/mean_length": 143.5,
454
+ "completions/mean_terminated_length": 143.5,
455
  "completions/min_length": 117.0,
456
  "completions/min_terminated_length": 117.0,
457
+ "entropy": 1.177292823791504,
458
  "epoch": 0.028333333333333332,
459
+ "frac_reward_zero_std": 0.0,
460
+ "grad_norm": 0.330078125,
461
  "learning_rate": 4.600000000000001e-06,
462
+ "loss": 0.04349641874432564,
463
+ "num_tokens": 48760.0,
464
+ "reward": 0.9275000095367432,
465
+ "reward_std": 0.07499998807907104,
466
+ "rewards/reward_fn/mean": 0.9275000095367432,
467
+ "rewards/reward_fn/std": 0.07499998807907104,
468
  "step": 17,
469
+ "step_time": 22.96707353400052
470
  },
471
  {
472
  "clip_ratio/high_max": 0.0,
 
475
  "clip_ratio/low_min": 0.0,
476
  "clip_ratio/region_mean": 0.0,
477
  "completions/clipped_ratio": 0.0,
478
+ "completions/max_length": 122.0,
479
+ "completions/max_terminated_length": 122.0,
480
+ "completions/mean_length": 108.75,
481
+ "completions/mean_terminated_length": 108.75,
482
+ "completions/min_length": 93.0,
483
+ "completions/min_terminated_length": 93.0,
484
+ "entropy": 0.9841785430908203,
485
  "epoch": 0.03,
486
  "frac_reward_zero_std": 0.0,
487
+ "grad_norm": 0.4375,
488
  "learning_rate": 4.575e-06,
489
+ "loss": -0.0374990850687027,
490
+ "num_tokens": 51631.0,
491
+ "reward": 0.9693750143051147,
492
+ "reward_std": 0.00875002145767212,
493
+ "rewards/reward_fn/mean": 0.9693750143051147,
494
+ "rewards/reward_fn/std": 0.00875002145767212,
495
  "step": 18,
496
+ "step_time": 18.896498049998627
497
  },
498
  {
499
  "clip_ratio/high_max": 0.0,
 
502
  "clip_ratio/low_min": 0.0,
503
  "clip_ratio/region_mean": 0.0,
504
  "completions/clipped_ratio": 0.0,
505
+ "completions/max_length": 125.0,
506
+ "completions/max_terminated_length": 125.0,
507
+ "completions/mean_length": 116.0,
508
+ "completions/mean_terminated_length": 116.0,
509
+ "completions/min_length": 107.0,
510
+ "completions/min_terminated_length": 107.0,
511
+ "entropy": 1.1009763479232788,
512
  "epoch": 0.03166666666666667,
513
+ "frac_reward_zero_std": 0.0,
514
+ "grad_norm": 0.375,
515
  "learning_rate": 4.5500000000000005e-06,
516
+ "loss": -0.02571682445704937,
517
+ "num_tokens": 54431.0,
518
+ "reward": 0.9562499523162842,
519
+ "reward_std": 0.017499983310699463,
520
+ "rewards/reward_fn/mean": 0.9562499523162842,
521
+ "rewards/reward_fn/std": 0.017499983310699463,
522
  "step": 19,
523
+ "step_time": 19.452795449000405
524
  },
525
  {
526
  "clip_ratio/high_max": 0.0,
 
529
  "clip_ratio/low_min": 0.0,
530
  "clip_ratio/region_mean": 0.0,
531
  "completions/clipped_ratio": 0.0,
532
+ "completions/max_length": 187.0,
533
+ "completions/max_terminated_length": 187.0,
534
+ "completions/mean_length": 140.75,
535
+ "completions/mean_terminated_length": 140.75,
536
+ "completions/min_length": 85.0,
537
+ "completions/min_terminated_length": 85.0,
538
+ "entropy": 1.152324914932251,
539
  "epoch": 0.03333333333333333,
540
+ "frac_reward_zero_std": 0.0,
541
+ "grad_norm": 0.361328125,
542
  "learning_rate": 4.525000000000001e-06,
543
+ "loss": 0.1016233041882515,
544
+ "num_tokens": 57738.0,
545
+ "reward": 0.953249990940094,
546
+ "reward_std": 0.01649998500943184,
547
+ "rewards/reward_fn/mean": 0.953249990940094,
548
+ "rewards/reward_fn/std": 0.01649998500943184,
549
  "step": 20,
550
+ "step_time": 28.531233167999744
551
  },
552
  {
553
  "clip_ratio/high_max": 0.0,
 
556
  "clip_ratio/low_min": 0.0,
557
  "clip_ratio/region_mean": 0.0,
558
  "completions/clipped_ratio": 0.0,
559
+ "completions/max_length": 144.0,
560
+ "completions/max_terminated_length": 144.0,
561
+ "completions/mean_length": 121.0,
562
+ "completions/mean_terminated_length": 121.0,
563
+ "completions/min_length": 100.0,
564
+ "completions/min_terminated_length": 100.0,
565
+ "entropy": 1.0693097114562988,
566
  "epoch": 0.035,
567
  "frac_reward_zero_std": 0.0,
568
+ "grad_norm": 0.39453125,
569
  "learning_rate": 4.5e-06,
570
+ "loss": 0.05338107421994209,
571
+ "num_tokens": 60750.0,
572
+ "reward": 0.9728749990463257,
573
+ "reward_std": 0.015750011429190636,
574
+ "rewards/reward_fn/mean": 0.9728749990463257,
575
+ "rewards/reward_fn/std": 0.015750020742416382,
576
  "step": 21,
577
+ "step_time": 22.120251809003094
578
  },
579
  {
580
  "clip_ratio/high_max": 0.0,
 
583
  "clip_ratio/low_min": 0.0,
584
  "clip_ratio/region_mean": 0.0,
585
  "completions/clipped_ratio": 0.0,
586
+ "completions/max_length": 146.0,
587
+ "completions/max_terminated_length": 146.0,
588
+ "completions/mean_length": 122.0,
589
+ "completions/mean_terminated_length": 122.0,
590
+ "completions/min_length": 106.0,
591
+ "completions/min_terminated_length": 106.0,
592
+ "entropy": 1.0988726615905762,
593
  "epoch": 0.03666666666666667,
594
  "frac_reward_zero_std": 0.0,
595
+ "grad_norm": 0.37890625,
596
  "learning_rate": 4.475e-06,
597
+ "loss": -0.032417263835668564,
598
+ "num_tokens": 63754.0,
599
+ "reward": 0.3306249976158142,
600
+ "reward_std": 0.008750001899898052,
601
+ "rewards/reward_fn/mean": 0.3306249976158142,
602
+ "rewards/reward_fn/std": 0.008750006556510925,
603
  "step": 22,
604
+ "step_time": 22.679130085998622
605
  },
606
  {
607
  "clip_ratio/high_max": 0.0,
 
610
  "clip_ratio/low_min": 0.0,
611
  "clip_ratio/region_mean": 0.0,
612
  "completions/clipped_ratio": 0.0,
613
+ "completions/max_length": 153.0,
614
+ "completions/max_terminated_length": 153.0,
615
+ "completions/mean_length": 121.5,
616
+ "completions/mean_terminated_length": 121.5,
617
+ "completions/min_length": 91.0,
618
+ "completions/min_terminated_length": 91.0,
619
+ "entropy": 1.1439276933670044,
620
  "epoch": 0.03833333333333333,
621
  "frac_reward_zero_std": 0.0,
622
+ "grad_norm": 0.306640625,
623
  "learning_rate": 4.450000000000001e-06,
624
+ "loss": -0.07120607793331146,
625
+ "num_tokens": 66768.0,
626
+ "reward": 0.9781249761581421,
627
+ "reward_std": 0.00875002145767212,
628
+ "rewards/reward_fn/mean": 0.9781249761581421,
629
+ "rewards/reward_fn/std": 0.00875002145767212,
630
  "step": 23,
631
+ "step_time": 23.13522673600164
632
  },
633
  {
634
  "clip_ratio/high_max": 0.0,
 
637
  "clip_ratio/low_min": 0.0,
638
  "clip_ratio/region_mean": 0.0,
639
  "completions/clipped_ratio": 0.0,
640
+ "completions/max_length": 136.0,
641
+ "completions/max_terminated_length": 136.0,
642
+ "completions/mean_length": 120.25,
643
+ "completions/mean_terminated_length": 120.25,
644
+ "completions/min_length": 102.0,
645
+ "completions/min_terminated_length": 102.0,
646
+ "entropy": 1.2446463108062744,
647
  "epoch": 0.04,
648
  "frac_reward_zero_std": 0.0,
649
+ "grad_norm": 0.38671875,
650
  "learning_rate": 4.425e-06,
651
+ "loss": 0.04352416470646858,
652
+ "num_tokens": 69557.0,
653
+ "reward": 0.7987499833106995,
654
+ "reward_std": 0.3096066117286682,
655
+ "rewards/reward_fn/mean": 0.7987499833106995,
656
+ "rewards/reward_fn/std": 0.3096065819263458,
657
  "step": 24,
658
+ "step_time": 20.66519093700117
659
  },
660
  {
661
  "clip_ratio/high_max": 0.0,
 
664
  "clip_ratio/low_min": 0.0,
665
  "clip_ratio/region_mean": 0.0,
666
  "completions/clipped_ratio": 0.0,
667
+ "completions/max_length": 153.0,
668
+ "completions/max_terminated_length": 153.0,
669
+ "completions/mean_length": 139.5,
670
+ "completions/mean_terminated_length": 139.5,
671
+ "completions/min_length": 111.0,
672
+ "completions/min_terminated_length": 111.0,
673
+ "entropy": 1.232834815979004,
674
  "epoch": 0.041666666666666664,
675
  "frac_reward_zero_std": 0.0,
676
+ "grad_norm": 0.3671875,
677
  "learning_rate": 4.4e-06,
678
+ "loss": -0.06304893642663956,
679
+ "num_tokens": 72543.0,
680
+ "reward": 0.6499999761581421,
681
+ "reward_std": 0.3441172242164612,
682
+ "rewards/reward_fn/mean": 0.6499999761581421,
683
+ "rewards/reward_fn/std": 0.3441172242164612,
684
  "step": 25,
685
+ "step_time": 22.765788336000696
686
  },
687
  {
688
  "clip_ratio/high_max": 0.0,
 
691
  "clip_ratio/low_min": 0.0,
692
  "clip_ratio/region_mean": 0.0,
693
  "completions/clipped_ratio": 0.0,
694
+ "completions/max_length": 161.0,
695
+ "completions/max_terminated_length": 161.0,
696
+ "completions/mean_length": 138.5,
697
+ "completions/mean_terminated_length": 138.5,
698
+ "completions/min_length": 112.0,
699
+ "completions/min_terminated_length": 112.0,
700
+ "entropy": 1.2499821186065674,
701
  "epoch": 0.043333333333333335,
702
+ "frac_reward_zero_std": 1.0,
703
+ "grad_norm": 0.0,
704
  "learning_rate": 4.3750000000000005e-06,
705
+ "loss": 0.0,
706
+ "num_tokens": 74829.0,
707
+ "reward": 0.9649999737739563,
708
+ "reward_std": 0.0,
709
+ "rewards/reward_fn/mean": 0.9649999737739563,
710
+ "rewards/reward_fn/std": 0.0,
711
  "step": 26,
712
+ "step_time": 23.262686510002823
713
  },
714
  {
715
  "clip_ratio/high_max": 0.0,
 
718
  "clip_ratio/low_min": 0.0,
719
  "clip_ratio/region_mean": 0.0,
720
  "completions/clipped_ratio": 0.0,
721
+ "completions/max_length": 167.0,
722
+ "completions/max_terminated_length": 167.0,
723
+ "completions/mean_length": 134.75,
724
+ "completions/mean_terminated_length": 134.75,
725
+ "completions/min_length": 118.0,
726
+ "completions/min_terminated_length": 118.0,
727
+ "entropy": 1.2326076030731201,
728
  "epoch": 0.045,
729
  "frac_reward_zero_std": 0.0,
730
+ "grad_norm": 0.40234375,
731
  "learning_rate": 4.350000000000001e-06,
732
+ "loss": -0.062136210501194,
733
+ "num_tokens": 78028.0,
734
+ "reward": 0.7699999809265137,
735
+ "reward_std": 0.38999998569488525,
736
+ "rewards/reward_fn/mean": 0.7699999809265137,
737
+ "rewards/reward_fn/std": 0.38999998569488525,
738
  "step": 27,
739
+ "step_time": 25.535409896998317
740
  },
741
  {
742
  "clip_ratio/high_max": 0.0,
 
745
  "clip_ratio/low_min": 0.0,
746
  "clip_ratio/region_mean": 0.0,
747
  "completions/clipped_ratio": 0.0,
748
+ "completions/max_length": 151.0,
749
+ "completions/max_terminated_length": 151.0,
750
+ "completions/mean_length": 106.5,
751
+ "completions/mean_terminated_length": 106.5,
752
+ "completions/min_length": 83.0,
753
+ "completions/min_terminated_length": 83.0,
754
+ "entropy": 1.0672565698623657,
755
  "epoch": 0.04666666666666667,
756
  "frac_reward_zero_std": 0.0,
757
+ "grad_norm": 0.361328125,
758
  "learning_rate": 4.325e-06,
759
+ "loss": 0.10957296937704086,
760
+ "num_tokens": 80670.0,
761
+ "reward": 0.9722499847412109,
762
+ "reward_std": 0.014500022865831852,
763
+ "rewards/reward_fn/mean": 0.9722499847412109,
764
+ "rewards/reward_fn/std": 0.014500021934509277,
765
  "step": 28,
766
+ "step_time": 22.534873515000072
767
  },
768
  {
769
  "clip_ratio/high_max": 0.0,
 
772
  "clip_ratio/low_min": 0.0,
773
  "clip_ratio/region_mean": 0.0,
774
  "completions/clipped_ratio": 0.0,
775
+ "completions/max_length": 206.0,
776
+ "completions/max_terminated_length": 206.0,
777
+ "completions/mean_length": 138.0,
778
+ "completions/mean_terminated_length": 138.0,
779
+ "completions/min_length": 107.0,
780
+ "completions/min_terminated_length": 107.0,
781
+ "entropy": 1.063458800315857,
782
  "epoch": 0.04833333333333333,
783
  "frac_reward_zero_std": 0.0,
784
+ "grad_norm": 0.298828125,
785
  "learning_rate": 4.3e-06,
786
+ "loss": -0.1097787395119667,
787
+ "num_tokens": 83466.0,
788
+ "reward": 0.8206249475479126,
789
+ "reward_std": 0.3005298972129822,
790
+ "rewards/reward_fn/mean": 0.8206249475479126,
791
+ "rewards/reward_fn/std": 0.3005298972129822,
792
  "step": 29,
793
+ "step_time": 31.418785993999336
794
  },
795
  {
796
  "clip_ratio/high_max": 0.0,
 
799
  "clip_ratio/low_min": 0.0,
800
  "clip_ratio/region_mean": 0.0,
801
  "completions/clipped_ratio": 0.0,
802
+ "completions/max_length": 170.0,
803
+ "completions/max_terminated_length": 170.0,
804
+ "completions/mean_length": 146.0,
805
+ "completions/mean_terminated_length": 146.0,
806
+ "completions/min_length": 127.0,
807
+ "completions/min_terminated_length": 127.0,
808
+ "entropy": 1.1856608390808105,
809
  "epoch": 0.05,
810
  "frac_reward_zero_std": 0.0,
811
+ "grad_norm": 0.3046875,
812
  "learning_rate": 4.2750000000000006e-06,
813
+ "loss": 0.020558878779411316,
814
+ "num_tokens": 86306.0,
815
+ "reward": 0.32625001668930054,
816
+ "reward_std": 0.010103637352585793,
817
+ "rewards/reward_fn/mean": 0.32625001668930054,
818
+ "rewards/reward_fn/std": 0.010103637352585793,
819
  "step": 30,
820
+ "step_time": 25.58633564399861
821
  },
822
  {
823
  "clip_ratio/high_max": 0.0,
 
826
  "clip_ratio/low_min": 0.0,
827
  "clip_ratio/region_mean": 0.0,
828
  "completions/clipped_ratio": 0.0,
829
+ "completions/max_length": 179.0,
830
+ "completions/max_terminated_length": 179.0,
831
+ "completions/mean_length": 134.0,
832
+ "completions/mean_terminated_length": 134.0,
833
+ "completions/min_length": 86.0,
834
+ "completions/min_terminated_length": 86.0,
835
+ "entropy": 1.0142335891723633,
836
  "epoch": 0.051666666666666666,
837
  "frac_reward_zero_std": 0.0,
838
+ "grad_norm": 0.28515625,
839
  "learning_rate": 4.25e-06,
840
+ "loss": -0.1461215764284134,
841
+ "num_tokens": 89374.0,
842
+ "reward": 0.33937501907348633,
843
+ "reward_std": 0.022020353004336357,
844
+ "rewards/reward_fn/mean": 0.33937501907348633,
845
+ "rewards/reward_fn/std": 0.022020353004336357,
846
  "step": 31,
847
+ "step_time": 26.99719125000047
848
  },
849
  {
850
  "clip_ratio/high_max": 0.0,
 
853
  "clip_ratio/low_min": 0.0,
854
  "clip_ratio/region_mean": 0.0,
855
  "completions/clipped_ratio": 0.0,
856
+ "completions/max_length": 170.0,
857
+ "completions/max_terminated_length": 170.0,
858
+ "completions/mean_length": 139.0,
859
+ "completions/mean_terminated_length": 139.0,
860
+ "completions/min_length": 92.0,
861
+ "completions/min_terminated_length": 92.0,
862
+ "entropy": 1.3681882619857788,
863
  "epoch": 0.05333333333333334,
864
  "frac_reward_zero_std": 0.0,
865
+ "grad_norm": 0.2734375,
866
  "learning_rate": 4.225e-06,
867
+ "loss": -0.09904350340366364,
868
+ "num_tokens": 92234.0,
869
+ "reward": 0.6587499976158142,
870
+ "reward_std": 0.35391557216644287,
871
+ "rewards/reward_fn/mean": 0.6587499976158142,
872
+ "rewards/reward_fn/std": 0.35391557216644287,
873
  "step": 32,
874
+ "step_time": 25.45712414800073
875
  },
876
  {
877
  "clip_ratio/high_max": 0.0,
 
880
  "clip_ratio/low_min": 0.0,
881
  "clip_ratio/region_mean": 0.0,
882
  "completions/clipped_ratio": 0.0,
883
+ "completions/max_length": 169.0,
884
+ "completions/max_terminated_length": 169.0,
885
+ "completions/mean_length": 110.75,
886
+ "completions/mean_terminated_length": 110.75,
887
+ "completions/min_length": 65.0,
888
+ "completions/min_terminated_length": 65.0,
889
+ "entropy": 1.112715721130371,
890
  "epoch": 0.055,
891
  "frac_reward_zero_std": 0.0,
892
+ "grad_norm": 0.359375,
893
  "learning_rate": 4.2000000000000004e-06,
894
+ "loss": -0.13209989666938782,
895
+ "num_tokens": 94781.0,
896
+ "reward": 0.37325000762939453,
897
+ "reward_std": 0.04913502559065819,
898
+ "rewards/reward_fn/mean": 0.37325000762939453,
899
+ "rewards/reward_fn/std": 0.04913502186536789,
900
  "step": 33,
901
+ "step_time": 24.924523759000294
902
  },
903
  {
904
  "clip_ratio/high_max": 0.0,
 
907
  "clip_ratio/low_min": 0.0,
908
  "clip_ratio/region_mean": 0.0,
909
  "completions/clipped_ratio": 0.0,
910
+ "completions/max_length": 196.0,
911
+ "completions/max_terminated_length": 196.0,
912
+ "completions/mean_length": 169.0,
913
+ "completions/mean_terminated_length": 169.0,
914
+ "completions/min_length": 149.0,
915
+ "completions/min_terminated_length": 149.0,
916
+ "entropy": 1.3067529201507568,
917
  "epoch": 0.056666666666666664,
918
  "frac_reward_zero_std": 0.0,
919
+ "grad_norm": 0.298828125,
920
  "learning_rate": 4.175e-06,
921
+ "loss": -0.0015746492426842451,
922
+ "num_tokens": 97417.0,
923
+ "reward": 0.5913749933242798,
924
+ "reward_std": 0.31556180119514465,
925
+ "rewards/reward_fn/mean": 0.5913749933242798,
926
+ "rewards/reward_fn/std": 0.31556177139282227,
927
  "step": 34,
928
+ "step_time": 29.42368617000102
929
  },
930
  {
931
  "clip_ratio/high_max": 0.0,
 
934
  "clip_ratio/low_min": 0.0,
935
  "clip_ratio/region_mean": 0.0,
936
  "completions/clipped_ratio": 0.0,
937
+ "completions/max_length": 152.0,
938
+ "completions/max_terminated_length": 152.0,
939
+ "completions/mean_length": 128.0,
940
+ "completions/mean_terminated_length": 128.0,
941
+ "completions/min_length": 93.0,
942
+ "completions/min_terminated_length": 93.0,
943
+ "entropy": 1.064958930015564,
944
  "epoch": 0.058333333333333334,
945
+ "frac_reward_zero_std": 1.0,
946
+ "grad_norm": 0.0,
947
  "learning_rate": 4.15e-06,
948
+ "loss": 0.0,
949
+ "num_tokens": 100201.0,
950
+ "reward": 0.9649999737739563,
951
+ "reward_std": 0.0,
952
+ "rewards/reward_fn/mean": 0.9649999737739563,
953
+ "rewards/reward_fn/std": 0.0,
954
  "step": 35,
955
+ "step_time": 22.600620116001664
956
  },
957
  {
958
  "clip_ratio/high_max": 0.0,
 
961
  "clip_ratio/low_min": 0.0,
962
  "clip_ratio/region_mean": 0.0,
963
  "completions/clipped_ratio": 0.0,
964
+ "completions/max_length": 133.0,
965
+ "completions/max_terminated_length": 133.0,
966
+ "completions/mean_length": 115.25,
967
+ "completions/mean_terminated_length": 115.25,
968
+ "completions/min_length": 100.0,
969
+ "completions/min_terminated_length": 100.0,
970
+ "entropy": 1.1022453308105469,
971
  "epoch": 0.06,
972
  "frac_reward_zero_std": 0.0,
973
+ "grad_norm": 0.357421875,
974
  "learning_rate": 4.125e-06,
975
+ "loss": -0.03460807725787163,
976
+ "num_tokens": 102946.0,
977
+ "reward": 0.6850000023841858,
978
+ "reward_std": 0.32457661628723145,
979
+ "rewards/reward_fn/mean": 0.6850000023841858,
980
+ "rewards/reward_fn/std": 0.32457664608955383,
981
  "step": 36,
982
+ "step_time": 19.86391903600088
983
  },
984
  {
985
  "clip_ratio/high_max": 0.0,
 
988
  "clip_ratio/low_min": 0.0,
989
  "clip_ratio/region_mean": 0.0,
990
  "completions/clipped_ratio": 0.0,
991
+ "completions/max_length": 181.0,
992
+ "completions/max_terminated_length": 181.0,
993
+ "completions/mean_length": 152.5,
994
+ "completions/mean_terminated_length": 152.5,
995
+ "completions/min_length": 125.0,
996
+ "completions/min_terminated_length": 125.0,
997
+ "entropy": 1.1503243446350098,
998
  "epoch": 0.06166666666666667,
999
  "frac_reward_zero_std": 0.0,
1000
+ "grad_norm": 0.29296875,
1001
  "learning_rate": 4.1e-06,
1002
+ "loss": 0.0848444476723671,
1003
+ "num_tokens": 105380.0,
1004
+ "reward": 0.6150000095367432,
1005
+ "reward_std": 0.34411725401878357,
1006
+ "rewards/reward_fn/mean": 0.6150000095367432,
1007
+ "rewards/reward_fn/std": 0.3441172242164612,
1008
  "step": 37,
1009
+ "step_time": 25.59769222900286
1010
  },
1011
  {
1012
  "clip_ratio/high_max": 0.0,
 
1015
  "clip_ratio/low_min": 0.0,
1016
  "clip_ratio/region_mean": 0.0,
1017
  "completions/clipped_ratio": 0.0,
1018
+ "completions/max_length": 116.0,
1019
+ "completions/max_terminated_length": 116.0,
1020
+ "completions/mean_length": 113.5,
1021
+ "completions/mean_terminated_length": 113.5,
1022
+ "completions/min_length": 109.0,
1023
+ "completions/min_terminated_length": 109.0,
1024
+ "entropy": 1.0994595289230347,
1025
  "epoch": 0.06333333333333334,
1026
  "frac_reward_zero_std": 0.0,
1027
+ "grad_norm": 0.400390625,
1028
  "learning_rate": 4.075e-06,
1029
+ "loss": 0.016566012054681778,
1030
+ "num_tokens": 108438.0,
1031
+ "reward": 0.9772499799728394,
1032
+ "reward_std": 0.015256185084581375,
1033
+ "rewards/reward_fn/mean": 0.9772499799728394,
1034
+ "rewards/reward_fn/std": 0.015256169252097607,
1035
  "step": 38,
1036
+ "step_time": 17.853799561002234
1037
  },
1038
  {
1039
  "clip_ratio/high_max": 0.0,
 
1042
  "clip_ratio/low_min": 0.0,
1043
  "clip_ratio/region_mean": 0.0,
1044
  "completions/clipped_ratio": 0.0,
1045
+ "completions/max_length": 187.0,
1046
+ "completions/max_terminated_length": 187.0,
1047
+ "completions/mean_length": 139.25,
1048
+ "completions/mean_terminated_length": 139.25,
1049
+ "completions/min_length": 97.0,
1050
+ "completions/min_terminated_length": 97.0,
1051
+ "entropy": 1.2051821947097778,
1052
  "epoch": 0.065,
1053
  "frac_reward_zero_std": 0.0,
1054
+ "grad_norm": 0.302734375,
1055
  "learning_rate": 4.05e-06,
1056
+ "loss": -0.16876700520515442,
1057
+ "num_tokens": 111567.0,
1058
+ "reward": 0.48375001549720764,
1059
+ "reward_std": 0.3209393620491028,
1060
+ "rewards/reward_fn/mean": 0.48375001549720764,
1061
+ "rewards/reward_fn/std": 0.3209393620491028,
1062
  "step": 39,
1063
+ "step_time": 27.307055137000134
1064
  },
1065
  {
1066
  "clip_ratio/high_max": 0.0,
 
1069
  "clip_ratio/low_min": 0.0,
1070
  "clip_ratio/region_mean": 0.0,
1071
  "completions/clipped_ratio": 0.0,
1072
+ "completions/max_length": 174.0,
1073
+ "completions/max_terminated_length": 174.0,
1074
+ "completions/mean_length": 132.5,
1075
+ "completions/mean_terminated_length": 132.5,
1076
+ "completions/min_length": 105.0,
1077
+ "completions/min_terminated_length": 105.0,
1078
+ "entropy": 1.0574369430541992,
1079
  "epoch": 0.06666666666666667,
1080
+ "frac_reward_zero_std": 0.0,
1081
+ "grad_norm": 0.328125,
1082
  "learning_rate": 4.0250000000000004e-06,
1083
+ "loss": 0.09384524822235107,
1084
+ "num_tokens": 114485.0,
1085
+ "reward": 0.32625001668930054,
1086
+ "reward_std": 0.010103637352585793,
1087
+ "rewards/reward_fn/mean": 0.32625001668930054,
1088
+ "rewards/reward_fn/std": 0.010103637352585793,
1089
  "step": 40,
1090
+ "step_time": 25.46330049500102
1091
  },
1092
  {
1093
  "clip_ratio/high_max": 0.0,
 
1096
  "clip_ratio/low_min": 0.0,
1097
  "clip_ratio/region_mean": 0.0,
1098
  "completions/clipped_ratio": 0.0,
1099
+ "completions/max_length": 138.0,
1100
+ "completions/max_terminated_length": 138.0,
1101
+ "completions/mean_length": 127.5,
1102
+ "completions/mean_terminated_length": 127.5,
1103
+ "completions/min_length": 119.0,
1104
+ "completions/min_terminated_length": 119.0,
1105
+ "entropy": 1.045518159866333,
1106
  "epoch": 0.06833333333333333,
1107
  "frac_reward_zero_std": 0.0,
1108
+ "grad_norm": 0.3359375,
1109
  "learning_rate": 4.000000000000001e-06,
1110
+ "loss": -0.001768031739629805,
1111
+ "num_tokens": 117383.0,
1112
+ "reward": 0.29750001430511475,
1113
+ "reward_std": 0.09970790892839432,
1114
+ "rewards/reward_fn/mean": 0.29750001430511475,
1115
+ "rewards/reward_fn/std": 0.09970790892839432,
1116
  "step": 41,
1117
+ "step_time": 20.58559491000051
1118
  },
1119
  {
1120
  "clip_ratio/high_max": 0.0,
 
1123
  "clip_ratio/low_min": 0.0,
1124
  "clip_ratio/region_mean": 0.0,
1125
  "completions/clipped_ratio": 0.0,
1126
+ "completions/max_length": 156.0,
1127
+ "completions/max_terminated_length": 156.0,
1128
+ "completions/mean_length": 136.75,
1129
+ "completions/mean_terminated_length": 136.75,
1130
+ "completions/min_length": 110.0,
1131
+ "completions/min_terminated_length": 110.0,
1132
+ "entropy": 1.2528449296951294,
1133
  "epoch": 0.07,
1134
  "frac_reward_zero_std": 0.0,
1135
  "grad_norm": 0.375,
1136
  "learning_rate": 3.975000000000001e-06,
1137
+ "loss": -0.0037579911295324564,
1138
+ "num_tokens": 120630.0,
1139
+ "reward": 0.33937501907348633,
1140
+ "reward_std": 0.036077164113521576,
1141
+ "rewards/reward_fn/mean": 0.33937501907348633,
1142
+ "rewards/reward_fn/std": 0.036077164113521576,
1143
  "step": 42,
1144
+ "step_time": 23.60465663499781
1145
  },
1146
  {
1147
  "clip_ratio/high_max": 0.0,
 
1150
  "clip_ratio/low_min": 0.0,
1151
  "clip_ratio/region_mean": 0.0,
1152
  "completions/clipped_ratio": 0.0,
1153
+ "completions/max_length": 149.0,
1154
+ "completions/max_terminated_length": 149.0,
1155
+ "completions/mean_length": 123.5,
1156
+ "completions/mean_terminated_length": 123.5,
1157
+ "completions/min_length": 96.0,
1158
+ "completions/min_terminated_length": 96.0,
1159
+ "entropy": 1.0263532400131226,
1160
  "epoch": 0.07166666666666667,
1161
  "frac_reward_zero_std": 0.0,
1162
+ "grad_norm": 0.396484375,
1163
  "learning_rate": 3.95e-06,
1164
+ "loss": -0.010047557763755322,
1165
+ "num_tokens": 123572.0,
1166
+ "reward": 0.972000002861023,
1167
+ "reward_std": 0.013999998569488525,
1168
+ "rewards/reward_fn/mean": 0.972000002861023,
1169
+ "rewards/reward_fn/std": 0.013999998569488525,
1170
  "step": 43,
1171
+ "step_time": 21.8416585140003
1172
  },
1173
  {
1174
  "clip_ratio/high_max": 0.0,
 
1177
  "clip_ratio/low_min": 0.0,
1178
  "clip_ratio/region_mean": 0.0,
1179
  "completions/clipped_ratio": 0.0,
1180
+ "completions/max_length": 179.0,
1181
+ "completions/max_terminated_length": 179.0,
1182
+ "completions/mean_length": 158.25,
1183
+ "completions/mean_terminated_length": 158.25,
1184
+ "completions/min_length": 139.0,
1185
+ "completions/min_terminated_length": 139.0,
1186
+ "entropy": 1.4695942401885986,
1187
  "epoch": 0.07333333333333333,
1188
  "frac_reward_zero_std": 0.0,
1189
+ "grad_norm": 0.294921875,
1190
  "learning_rate": 3.9250000000000005e-06,
1191
+ "loss": -0.07597894221544266,
1192
+ "num_tokens": 126769.0,
1193
+ "reward": 0.6499999761581421,
1194
+ "reward_std": 0.3441172242164612,
1195
+ "rewards/reward_fn/mean": 0.6499999761581421,
1196
+ "rewards/reward_fn/std": 0.3441172242164612,
1197
  "step": 44,
1198
+ "step_time": 26.284113427002012
1199
  },
1200
  {
1201
  "clip_ratio/high_max": 0.0,
 
1204
  "clip_ratio/low_min": 0.0,
1205
  "clip_ratio/region_mean": 0.0,
1206
  "completions/clipped_ratio": 0.0,
1207
+ "completions/max_length": 176.0,
1208
+ "completions/max_terminated_length": 176.0,
1209
+ "completions/mean_length": 132.75,
1210
+ "completions/mean_terminated_length": 132.75,
1211
+ "completions/min_length": 99.0,
1212
+ "completions/min_terminated_length": 99.0,
1213
+ "entropy": 1.3445155620574951,
1214
  "epoch": 0.075,
1215
  "frac_reward_zero_std": 0.0,
1216
+ "grad_norm": 0.37890625,
1217
  "learning_rate": 3.900000000000001e-06,
1218
+ "loss": 0.04559139907360077,
1219
+ "num_tokens": 129512.0,
1220
+ "reward": 0.6675000190734863,
1221
+ "reward_std": 0.34470999240875244,
1222
+ "rewards/reward_fn/mean": 0.6675000190734863,
1223
+ "rewards/reward_fn/std": 0.34470999240875244,
1224
  "step": 45,
1225
+ "step_time": 26.141577066002355
1226
  },
1227
  {
1228
  "clip_ratio/high_max": 0.0,
 
1231
  "clip_ratio/low_min": 0.0,
1232
  "clip_ratio/region_mean": 0.0,
1233
  "completions/clipped_ratio": 0.0,
1234
+ "completions/max_length": 131.0,
1235
+ "completions/max_terminated_length": 131.0,
1236
+ "completions/mean_length": 125.25,
1237
+ "completions/mean_terminated_length": 125.25,
1238
+ "completions/min_length": 115.0,
1239
+ "completions/min_terminated_length": 115.0,
1240
+ "entropy": 1.098323941230774,
1241
  "epoch": 0.07666666666666666,
1242
  "frac_reward_zero_std": 0.0,
1243
+ "grad_norm": 0.349609375,
1244
  "learning_rate": 3.875e-06,
1245
+ "loss": -0.021484289318323135,
1246
+ "num_tokens": 132537.0,
1247
+ "reward": 0.49162501096725464,
1248
+ "reward_std": 0.3345796763896942,
1249
+ "rewards/reward_fn/mean": 0.49162501096725464,
1250
+ "rewards/reward_fn/std": 0.3345796763896942,
1251
  "step": 46,
1252
+ "step_time": 20.43934466399878
1253
  },
1254
  {
1255
  "clip_ratio/high_max": 0.0,
 
1258
  "clip_ratio/low_min": 0.0,
1259
  "clip_ratio/region_mean": 0.0,
1260
  "completions/clipped_ratio": 0.0,
1261
+ "completions/max_length": 151.0,
1262
+ "completions/max_terminated_length": 151.0,
1263
+ "completions/mean_length": 127.75,
1264
+ "completions/mean_terminated_length": 127.75,
1265
+ "completions/min_length": 116.0,
1266
+ "completions/min_terminated_length": 116.0,
1267
+ "entropy": 1.1513502597808838,
1268
  "epoch": 0.07833333333333334,
1269
+ "frac_reward_zero_std": 1.0,
1270
+ "grad_norm": 0.0,
1271
  "learning_rate": 3.85e-06,
1272
+ "loss": 0.0,
1273
+ "num_tokens": 135108.0,
1274
+ "reward": 0.9649999737739563,
1275
+ "reward_std": 0.0,
1276
+ "rewards/reward_fn/mean": 0.9649999737739563,
1277
+ "rewards/reward_fn/std": 0.0,
1278
  "step": 47,
1279
+ "step_time": 24.161369523997564
1280
  },
1281
  {
1282
  "clip_ratio/high_max": 0.0,
 
1285
  "clip_ratio/low_min": 0.0,
1286
  "clip_ratio/region_mean": 0.0,
1287
  "completions/clipped_ratio": 0.0,
1288
+ "completions/max_length": 150.0,
1289
+ "completions/max_terminated_length": 150.0,
1290
+ "completions/mean_length": 139.0,
1291
+ "completions/mean_terminated_length": 139.0,
1292
+ "completions/min_length": 130.0,
1293
+ "completions/min_terminated_length": 130.0,
1294
+ "entropy": 1.0865966081619263,
1295
  "epoch": 0.08,
1296
  "frac_reward_zero_std": 0.0,
1297
+ "grad_norm": 0.296875,
1298
  "learning_rate": 3.825000000000001e-06,
1299
+ "loss": -0.03219185769557953,
1300
+ "num_tokens": 137804.0,
1301
+ "reward": 0.9562499523162842,
1302
+ "reward_std": 0.017499983310699463,
1303
+ "rewards/reward_fn/mean": 0.9562499523162842,
1304
+ "rewards/reward_fn/std": 0.017499983310699463,
1305
  "step": 48,
1306
+ "step_time": 23.38693464699827
1307
  },
1308
  {
1309
  "clip_ratio/high_max": 0.0,
 
1312
  "clip_ratio/low_min": 0.0,
1313
  "clip_ratio/region_mean": 0.0,
1314
  "completions/clipped_ratio": 0.0,
1315
+ "completions/max_length": 144.0,
1316
+ "completions/max_terminated_length": 144.0,
1317
+ "completions/mean_length": 116.75,
1318
+ "completions/mean_terminated_length": 116.75,
1319
+ "completions/min_length": 104.0,
1320
+ "completions/min_terminated_length": 104.0,
1321
+ "entropy": 1.214436650276184,
1322
  "epoch": 0.08166666666666667,
1323
  "frac_reward_zero_std": 0.0,
1324
+ "grad_norm": 0.35546875,
1325
  "learning_rate": 3.8000000000000005e-06,
1326
+ "loss": 0.026122601702809334,
1327
+ "num_tokens": 140483.0,
1328
+ "reward": 0.9518749713897705,
1329
+ "reward_std": 0.01675495319068432,
1330
+ "rewards/reward_fn/mean": 0.9518749713897705,
1331
+ "rewards/reward_fn/std": 0.016754958778619766,
1332
  "step": 49,
1333
+ "step_time": 22.844522692001192
1334
  },
1335
  {
1336
  "clip_ratio/high_max": 0.0,
 
1339
  "clip_ratio/low_min": 0.0,
1340
  "clip_ratio/region_mean": 0.0,
1341
  "completions/clipped_ratio": 0.0,
1342
+ "completions/max_length": 162.0,
1343
+ "completions/max_terminated_length": 162.0,
1344
+ "completions/mean_length": 135.0,
1345
+ "completions/mean_terminated_length": 135.0,
1346
+ "completions/min_length": 109.0,
1347
+ "completions/min_terminated_length": 109.0,
1348
+ "entropy": 1.2262229919433594,
1349
  "epoch": 0.08333333333333333,
1350
  "frac_reward_zero_std": 0.0,
1351
+ "grad_norm": 0.333984375,
1352
  "learning_rate": 3.7750000000000003e-06,
1353
+ "loss": -0.003206648863852024,
1354
+ "num_tokens": 143371.0,
1355
+ "reward": 0.6499999761581421,
1356
+ "reward_std": 0.36373066902160645,
1357
+ "rewards/reward_fn/mean": 0.6499999761581421,
1358
+ "rewards/reward_fn/std": 0.36373066902160645,
1359
  "step": 50,
1360
+ "step_time": 24.662258178999764
1361
  }
1362
  ],
1363
  "logging_steps": 1,
1364
  "max_steps": 200,
1365
+ "num_input_tokens_seen": 143371,
1366
  "num_train_epochs": 1,
1367
  "save_steps": 50,
1368
  "stateful_callbacks": {
checkpoint-50/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:331ab9199b90ff1c78e3f0af98bbb7a6f8d8f8bcbda9987dd482a97cbf14f25e
3
  size 7185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d9d11428bb3243f52d290c02a99fbea6c72e434c81e27053295d8fa2c9caa21
3
  size 7185
held_out_eval.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "n_eval_clips": 80,
3
- "mean_reward": 0.65025,
4
- "well_formed_rate": 1.0,
5
- "accuracy": 0.5,
6
  "pivot_in_eval": 6,
7
  "pivot_correct": 5,
8
  "results": [
@@ -15,18 +15,18 @@
15
  "correct": true,
16
  "well_formed": true,
17
  "reward_total": 0.965,
18
- "completion_text": "<think>\nThe prosody analysis shows that Raj's tone is flat and lacks any significant pitch variation or emphasis. There are no internal pauses, which could indicate sincerity. The pitch contour is also relatively flat, with minimal fluctuations. Additionally, the energy level is high, which might suggest enthusiasm but not necessarily sarcasm. However, the content of the statement is quite neutral and factual. He is simply stating what he would do with the broken iPod. Given the lack of emotional inflection and the straightforward nature of the statement, it seems more likely to be sincere rather than sarcastic.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
19
  },
20
  {
21
  "clip_id": "1_10462",
22
  "gold": "sarcastic",
23
  "is_pivot": false,
24
  "predicted": "sincere",
25
- "confidence": 0.9,
26
  "correct": false,
27
  "well_formed": true,
28
- "reward_total": 0.33499999999999996,
29
- "completion_text": "<think>\nHoward's tone is flat and lacks any significant pitch variation or emphasis. There are no internal pauses, and his speech sounds matter-of-fact. The phrase \"exciting opportunities in home care for the old and fat\" is presented neutrally without any indication of sarcasm. Sheldon's previous statements were clearly sarcastic, but Howard's line here does not follow the pattern of sarcasm. He is simply stating facts without any implied criticism or mockery.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
30
  },
31
  {
32
  "clip_id": "1_105",
@@ -37,7 +37,7 @@
37
  "correct": false,
38
  "well_formed": true,
39
  "reward_total": 0.33499999999999996,
40
- "completion_text": "<think>\nThe prosody features indicate a relatively flat pitch contour with minimal variability, which suggests sincerity rather than sarcasm. Sheldon's tone does not exhibit high pitch variability or strong emphasis through pauses. Additionally, his statement about the coffee table being a \"tiny garage sale\" implies a serious concern about organization, which aligns more with sincerity than sarcasm. The lack of any sarcastic elements such as exaggerated praise or mocking of the situation further supports a sincere delivery.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
41
  },
42
  {
43
  "clip_id": "1_10849",
@@ -48,7 +48,7 @@
48
  "correct": false,
49
  "well_formed": true,
50
  "reward_total": 0.37,
51
- "completion_text": "<think>\nHoward's tone is flat and there are no internal pauses, which suggests sincerity. However, the content of his statement is ironic and self-deprecating, implying that buying a comic book store is not very meaningful to him. This contradiction between the literal meaning and the implied sentiment is a common sign of sarcasm. Additionally, the prosody features indicate a low pitch variability and low energy level, which could support a sincere delivery but also might be consistent with a sarcastic delivery if the speaker is trying to downplay the importance of the action.\n</think>\n<final>{\"label\":\"sarcastic\",\"confidence\":0.8}</final>"
52
  },
53
  {
54
  "clip_id": "1_10853",
@@ -70,7 +70,7 @@
70
  "correct": true,
71
  "well_formed": true,
72
  "reward_total": 0.965,
73
- "completion_text": "<think>\nThe prosody features indicate that there is no pitch information available, making it impossible to analyze the tone based on pitch alone. However, let's consider the context and the content of the line. Raj is complimenting Penny and Leonard on their attractiveness, which seems genuine rather than sarcastic. The phrase \"It's true-- you're both gorgeous\" sounds sincere and appreciative, not mocking or ironic. Additionally, the context suggests that they are trying to move past a past disagreement, and Raj is acknowledging their current situation with a compliment. This aligns more with sincerity than sarcasm.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
74
  },
75
  {
76
  "clip_id": "1_11055",
@@ -92,18 +92,18 @@
92
  "correct": true,
93
  "well_formed": true,
94
  "reward_total": 0.965,
95
- "completion_text": "<think>\nAmy's tone here is flat and lacks any emphasis or inflection that might indicate sarcasm. She speaks in a neutral way, as if she's simply stating a fact about her dashboard. There are no pauses or breaks in her speech that suggest she's trying to convey something other than what she says. The prosody features also don't show any signs of high pitch variability or strong emphasis, which are typically associated with sarcasm. Instead, the pitch contour shows a flat pattern with minimal variation, indicating a sincere statement rather than a sarcastic one.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
96
  },
97
  {
98
  "clip_id": "1_11243",
99
  "gold": "sarcastic",
100
  "is_pivot": false,
101
  "predicted": "sincere",
102
- "confidence": 0.9,
103
  "correct": false,
104
  "well_formed": true,
105
- "reward_total": 0.33499999999999996,
106
- "completion_text": "<think>\nHoward's statement \"I love that you're too good to pee in the kitchen sink\" has a flat pitch contour without any significant pitch variation or emphasis pauses. The prosody features indicate low pitch variability and voiced speech, which suggests sincerity rather than sarcasm. Additionally, the content of the statement doesn't seem to contradict or mock Sheldon's previous comments about Amy being a neuroscientist. Howard is simply complimenting Sheldon's self-care routine, which aligns with a sincere tone.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
107
  },
108
  {
109
  "clip_id": "1_11439",
@@ -114,7 +114,7 @@
114
  "correct": false,
115
  "well_formed": true,
116
  "reward_total": 0.33499999999999996,
117
- "completion_text": "<think>\nPenny's tone is flat and she doesn't use any emphatic pauses or high-pitched intonations that would suggest sarcasm. Instead, her speech sounds sincere and matter-of-fact, as if she's simply stating a fact. She doesn't contradict the previous statements or express any hidden meaning through her pitch or rhythm. The pitch contour shows a steady rise, which could indicate a calm and straightforward delivery rather than a sarcastic one.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
118
  },
119
  {
120
  "clip_id": "1_1144",
@@ -125,7 +125,7 @@
125
  "correct": false,
126
  "well_formed": true,
127
  "reward_total": 0.33499999999999996,
128
- "completion_text": "<think>\nThe prosody analysis shows a relatively flat pitch contour with minimal variability, which suggests a sincere tone. Sheldon's speech lacks any notable emphasis or intonation that would indicate sarcasm. He speaks in a monotone manner, which aligns with a sincere delivery. Additionally, there are no pauses or breaks in his speech that could imply he is trying to disguise his true meaning, another sign of sincerity rather than sarcasm.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
129
  },
130
  {
131
  "clip_id": "1_11526",
@@ -136,7 +136,7 @@
136
  "correct": true,
137
  "well_formed": true,
138
  "reward_total": 0.965,
139
- "completion_text": "<think>\nThe prosody here is quite flat, with no significant pitch variation or emphasis. Sheldon is speaking in a monotone manner, which suggests sincerity rather than sarcasm. He is explaining his recent experiment with elevated anxiety levels and how he found it uncomfortable. The tone is informative and matter-of-fact, without any hint of mockery or irony.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
140
  },
141
  {
142
  "clip_id": "1_12275",
@@ -147,7 +147,7 @@
147
  "correct": true,
148
  "well_formed": true,
149
  "reward_total": 0.965,
150
- "completion_text": "<think>\nThe speaker uses \"Uh, no\" which is a hesitant expression that could indicate they are trying to downplay something. However, the prosody features suggest a flat affect with no significant pitch variation or emphasis on any particular word. The tone seems neutral and matter-of-fact, as if the speaker is simply stating a fact without much emotional weight. Additionally, there are no internal pauses or vocalized pauses that might indicate a shift in meaning or emotion. The prosody does not align with a sarcastic delivery, where one would expect more dramatic pauses and a rising intonation.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
151
  },
152
  {
153
  "clip_id": "1_12320",
@@ -158,7 +158,7 @@
158
  "correct": false,
159
  "well_formed": true,
160
  "reward_total": 0.37,
161
- "completion_text": "<think>\nThe prosody analysis shows that Raj's speech has high pitch variability and no internal pauses, which typically indicate sarcasm. Additionally, he emphasizes certain words with a rising intonation, as seen in \"Oh, yes,\" which suggests he is not being serious about the details he's providing. The lack of a flat affect and the presence of pauses could also point towards sarcasm. However, the pitch contour is relatively flat, which might temper the certainty of the sarcasm. Given these factors, the prosody seems more aligned with sarcasm than sincerity.\n</think>\n<final>{\"label\":\"sarcastic\",\"confidence\":0.8}</final>"
162
  },
163
  {
164
  "clip_id": "1_12331",
@@ -169,7 +169,7 @@
169
  "correct": true,
170
  "well_formed": true,
171
  "reward_total": 0.965,
172
- "completion_text": "<think>\nThe prosody analysis shows a relatively flat pitch contour with minimal variation, which suggests sincerity rather than sarcasm. Additionally, there are no emphasized pauses or unusual intonations that would indicate sarcasm. The phrase \"Wow, there's a Denny's in Vegas you can actually get married in\" is a common joke about Las Vegas weddings, but the tone here seems straightforward and not mocking or ironic. Penny is likely being sincere in her surprise at the possibility of getting married in such a place.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
173
  },
174
  {
175
  "clip_id": "1_1478",
@@ -180,7 +180,7 @@
180
  "correct": false,
181
  "well_formed": true,
182
  "reward_total": 0.33499999999999996,
183
- "completion_text": "<think>\nHoward's tone is flat and he doesn't use any internal pauses or emphasize his words with pauses. He speaks in a monotone voice, which suggests sincerity rather than sarcasm. Additionally, Howard's statement about Mrs. Giselle being sensitive to chlorine and liking his fashion models to be \"pruny and bug-eyed\" seems to be a straightforward observation without any hint of irony or mockery. The pitch contour shows a steady rise, which aligns with a sincere, matter-of-fact delivery.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
184
  },
185
  {
186
  "clip_id": "1_1627",
@@ -202,7 +202,7 @@
202
  "correct": false,
203
  "well_formed": true,
204
  "reward_total": 0.33499999999999996,
205
- "completion_text": "<think>\nPenny's tone is flat and lacks any significant pitch variation or emphasis. She doesn't use any positive or negative words that could indicate sarcasm. Instead, she delivers the line in a matter-of-fact way, which suggests sincerity rather than sarcasm. Additionally, her pitch contour shows a very flat pattern with minimal fluctuations, further supporting a sincere delivery.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
206
  },
207
  {
208
  "clip_id": "1_2216",
@@ -213,7 +213,7 @@
213
  "correct": true,
214
  "well_formed": true,
215
  "reward_total": 0.965,
216
- "completion_text": "<think>\nLeonard's tone is flat and lacks any significant pitch variation or emphasis. He doesn't use any positive or negative words to convey emotion. The pitch contour shows a steady decline, which suggests a lack of enthusiasm or sarcasm. Additionally, Leonard is simply asking a question about his inclusion, without any underlying meaning beyond the literal words. Howard and Raj's previous comments were clearly mocking Leonard, but Leonard's response is neutral and unemotional.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
217
  },
218
  {
219
  "clip_id": "1_2423",
@@ -224,7 +224,7 @@
224
  "correct": false,
225
  "well_formed": true,
226
  "reward_total": 0.33499999999999996,
227
- "completion_text": "<think>\nThe prosody analysis shows that Sheldon's tone is flat and lacks any significant pitch variation or emphasis. He doesn't take the praise seriously, as evidenced by his dismissive \"Oh, great.\" This matches the prosody, which indicates a lack of enthusiasm and a flat affect. The content of the sentence, while seemingly praising Penny's choice, is clearly sarcastic because it's not meant to be taken seriously. Sheldon is mocking the idea of the multiverse reestablishment, which is a concept Penny likely finds absurd. The prosody confirms this by showing no indication of sincerity or genuine appreciation.\n</think>\n<final>{\"label\":\"sarcastic\",\"confidence\":0.9}</final>"
228
  },
229
  {
230
  "clip_id": "1_2614",
@@ -257,7 +257,7 @@
257
  "correct": false,
258
  "well_formed": true,
259
  "reward_total": 0.33499999999999996,
260
- "completion_text": "<think>\nRaj's statement seems to be mocking Sheldon's boast about his fighting skills. The prosody features show a flat pitch contour with little variation, which suggests sincerity rather than sarcasm. Additionally, Raj uses the phrase \"big talk,\" which implies that he is not taking Sheldon's boasting seriously. The tone is more dismissive than sarcastic.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
261
  },
262
  {
263
  "clip_id": "1_2792",
@@ -268,7 +268,7 @@
268
  "correct": true,
269
  "well_formed": true,
270
  "reward_total": 0.965,
271
- "completion_text": "<think>\nThe prosody analysis shows that Penny's pitch is relatively flat with little variation, which suggests sincerity rather than sarcasm. Additionally, there are no internal pauses or emphatic pauses that would indicate sarcasm. The tone is matter-of-fact and doesn't convey any hidden meaning beyond what she says. She is simply stating that they can return to being friends, without any underlying criticism or mockery implied.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
272
  },
273
  {
274
  "clip_id": "1_3125",
@@ -279,18 +279,18 @@
279
  "correct": false,
280
  "well_formed": true,
281
  "reward_total": 0.33499999999999996,
282
- "completion_text": "<think>\nHoward's tone is flat and he doesn't use any emphatic pauses or high-pitched intonations that would indicate sarcasm. Instead, he speaks matter-of-factly about the consequences of Penny dumping him, which aligns with his previous statement that \"she's obviously way out of line.\" There's no indication of irony or exaggeration in his speech pattern. The prosody features also support a sincere delivery; the pitch is not varied and there are no pauses that might suggest he is being sarcastic.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
283
  },
284
  {
285
  "clip_id": "1_3177",
286
  "gold": "sincere",
287
  "is_pivot": false,
288
- "predicted": "sarcastic",
289
  "confidence": 0.9,
290
- "correct": false,
291
  "well_formed": true,
292
- "reward_total": 0.33499999999999996,
293
- "completion_text": "<think>\nThe speaker is clearly expressing disapproval of the movie, saying \"Not a great movie.\" However, they follow up with \"look at that beautiful desert,\" which is a sarcastic comment. The prosody features indicate high pitch variability and a flat affect, which suggests sarcasm. Additionally, the phrase \"look at that beautiful desert\" is a clear attempt to soften the criticism, implying that even though the movie was bad, there was something positive about it. This combination of features points towards sarcasm.\n</think>\n<final>{\"label\":\"sarcastic\",\"confidence\":0.9}</final>"
294
  },
295
  {
296
  "clip_id": "1_3256",
@@ -301,7 +301,7 @@
301
  "correct": false,
302
  "well_formed": true,
303
  "reward_total": 0.33499999999999996,
304
- "completion_text": "<think>\nThe speaker, [PERSON], uses the phrase \"I'm sorry, fiddling with yourself in the bathtub is a real class act.\" This sentence is clearly meant to be sarcastic, but let's analyze the prosody to see if it matches the intended sarcasm. The pitch contour shows a flat overall trend with minimal variation, which suggests a lack of emotional intensity. Additionally, there are no internal pauses or significant emphasis, which typically accompany sarcasm. The prosody features indicate low pitch variability and energy, further supporting a sincere tone rather than a sarcastic one. Howard's comment about it sounding \"cheap\" also implies that the speaker is trying to soften the harshness of their statement, which aligns with a sincere attempt to be polite rather than sarcastic. \n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
305
  },
306
  {
307
  "clip_id": "1_3660",
@@ -323,7 +323,7 @@
323
  "correct": true,
324
  "well_formed": true,
325
  "reward_total": 0.965,
326
- "completion_text": "<think>\nThe speaker is being polite and appreciative, but there's no indication of sarcasm. They are acknowledging the thoughtfulness of the offer but declining it. The tone is neutral, and there are no signs of exaggeration or irony. The pitch contour shows a flat trend with minimal variation, which suggests sincerity rather than sarcasm.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
327
  },
328
  {
329
  "clip_id": "1_4281",
@@ -334,7 +334,7 @@
334
  "correct": true,
335
  "well_formed": true,
336
  "reward_total": 0.965,
337
- "completion_text": "<think>\nHoward's statement seems to be about facing someone after doing something wrong. The prosody features indicate a relatively flat pitch contour with low energy, suggesting a lack of enthusiasm or strong emotion. There are no significant pauses or emphatic stresses that would typically accompany sarcasm. Additionally, Howard's tone doesn't seem to contradict his words; he's acknowledging a difficult situation without any apparent irony or mockery. Given these factors, the prosody does not align with typical sarcastic delivery.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
338
  },
339
  {
340
  "clip_id": "1_4544",
@@ -344,19 +344,19 @@
344
  "confidence": 0.9,
345
  "correct": false,
346
  "well_formed": true,
347
- "reward_total": 0.33499999999999996,
348
- "completion_text": "<think>\nAmy's tone is flat and there are no internal pauses, which suggests sincerity. However, the phrase \"our menses are synchronized\" is a clear indicator of sarcasm. The use of such a medical term in a joking context implies that Amy is being sarcastic about the idea of a girls' night. Additionally, her pitch contour shows a flat affect, which aligns with the lack of emphasis or inflection that would typically accompany sincere speech. The content of what she says is clearly meant to mock the idea of a girls' night, further supporting the sarcastic interpretation.</think>\n<final>{\"label\":\"sarcastic\",\"confidence\":0.9}</final>"
349
  },
350
  {
351
  "clip_id": "1_4552",
352
  "gold": "sincere",
353
  "is_pivot": false,
354
  "predicted": "sincere",
355
- "confidence": 0.8,
356
  "correct": true,
357
  "well_formed": true,
358
- "reward_total": 0.93,
359
- "completion_text": "<think>\nAmy's tone is flat and there are no internal pauses, which suggests sincerity. However, her use of \"so anyway, to make a long story short\" with a firm cervix could be interpreted as sarcastically downplaying the importance of the information she's about to share. The pitch contour shows a flat pattern without any significant ups and downs, which aligns with a sincere delivery. But the phrase \"to make a long story short\" can sometimes be used sarcastically to dismiss something important, especially when combined with a firm cervix, which might indicate that Amy is trying to downplay the significance of her cervix being firm.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.8}</final>"
360
  },
361
  {
362
  "clip_id": "1_4792",
@@ -367,7 +367,7 @@
367
  "correct": true,
368
  "well_formed": true,
369
  "reward_total": 0.965,
370
- "completion_text": "<think>\nThe prosody features indicate a very flat pitch contour with minimal variability, which suggests a lack of emotional expression. Sheldon's tone is neutral and matter-of-fact, as if he's simply stating a fact. He doesn't emphasize any particular word or use a rising intonation that would typically indicate sarcasm. The content of his statement, \"Obviously, we're no longer a Justice League. We have no choice but to switch to our Muppet Baby costumes,\" is also neutral and factual. There are no positive or negative connotations in the words themselves that would suggest sarcasm. The context of the conversation, where everyone else is joking about the absence of superheroes, doesn't align with Sheldon's neutral tone. Sheldon is likely being sincere in his statement.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
371
  },
372
  {
373
  "clip_id": "1_4995",
@@ -378,7 +378,7 @@
378
  "correct": false,
379
  "well_formed": true,
380
  "reward_total": 0.33499999999999996,
381
- "completion_text": "<think>\nThe prosody features indicate a very flat pitch contour with no significant pitch variation. Sheldon's tone is neutral and lacks any emphatic pauses or rising intonation that might suggest sarcasm. He is simply stating a fact about what he learned from Penny. The conversational context shows that they are engaged in a playful exchange about improvisation, but there's no indication of sarcasm here. Sheldon is acknowledging something he learned, which is a straightforward statement rather than a sarcastic remark.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
382
  },
383
  {
384
  "clip_id": "1_5212",
@@ -389,7 +389,7 @@
389
  "correct": true,
390
  "well_formed": true,
391
  "reward_total": 0.965,
392
- "completion_text": "<think>\nThe prosody analysis shows that the speaker's pitch is relatively flat with little variation, and there are no internal pauses or emphasized pauses. The tone is calm and matter-of-fact, which suggests sincerity rather than sarcasm. Additionally, the pitch contour does not show any peaks or valleys that would indicate sarcasm. The content of the statement is also neutral and factual, without any hint of irony or mockery.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
393
  },
394
  {
395
  "clip_id": "1_536",
@@ -400,7 +400,7 @@
400
  "correct": false,
401
  "well_formed": true,
402
  "reward_total": 0.33499999999999996,
403
- "completion_text": "<think>\nThe prosody analysis shows that Sheldon's pitch is relatively high with some variability, but it doesn't rise significantly. There are also no internal pauses, which would typically indicate a sarcastic tone. The content of his statement about \"Applied Particle Physics quarterly\" seems factual and not particularly noteworthy. Additionally, the prosodic features don't show any strong emphasis or unusual pauses that would suggest sarcasm. Sheldon is likely being sincere here, as he is simply stating a fact about a book he has read.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
404
  },
405
  {
406
  "clip_id": "1_5496",
@@ -411,7 +411,7 @@
411
  "correct": false,
412
  "well_formed": true,
413
  "reward_total": 0.33499999999999996,
414
- "completion_text": "<think>\nPenny's tone is flat and lacks any significant pitch variation or emphasis. She doesn't use any positive or negative words that would suggest sarcasm. Instead, she seems to be making a neutral statement about Amy's behavior, which is not aligned with Amy's previous comments about her being \"crafty.\" Her speech pattern and lack of emotional inflection further support a sincere delivery rather than sarcasm.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
415
  },
416
  {
417
  "clip_id": "1_5580",
@@ -433,7 +433,7 @@
433
  "correct": true,
434
  "well_formed": true,
435
  "reward_total": 0.965,
436
- "completion_text": "<think>\nThe prosody analysis shows a flat affect with no internal pauses, which suggests sincerity. Sheldon is making a comparison between Jewish and Indian parental attitudes, but he is doing so in a matter-of-fact way without any dramatic emphasis or pitch variation. The pitch contour is also consistently low and falling, indicating a flat tone. This matches the sincere delivery where the speaker is calmly explaining a point without adding extra emphasis or sarcasm.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
437
  },
438
  {
439
  "clip_id": "1_5953",
@@ -444,29 +444,29 @@
444
  "correct": true,
445
  "well_formed": true,
446
  "reward_total": 0.965,
447
- "completion_text": "<think>\nHoward's tone is flat and there are no internal pauses, which suggests sincerity. Additionally, the pitch contour shows a steady rise, indicating a monotone delivery that aligns with a sincere statement. The prosody features also support this interpretation, as they indicate a calm and unemotional delivery without any significant pitch variation or emphasis.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
448
  },
449
  {
450
  "clip_id": "1_5964",
451
  "gold": "sarcastic",
452
  "is_pivot": false,
453
- "predicted": "sarcastic",
454
- "confidence": 0.8,
455
- "correct": true,
456
- "well_formed": true,
457
- "reward_total": 0.93,
458
- "completion_text": "<think>\nHoward's tone is flat and there are no internal pauses, which suggests sincerity. However, his use of \"Smart\" with a whisper and the mention of \"deaf chick\" could imply sarcasm. The whisper and the suggestion that someone might not hear him could be a way to mock or belittle someone, especially if he's being sarcastic about someone being engaged to his friend. Given the context, it seems more likely that Howard is using this as a way to mock or make fun of the situation rather than being sincere.\n</think>\n<final>{\"label\":\"sarcastic\",\"confidence\":0.8}</final>"
459
  },
460
  {
461
  "clip_id": "1_6020",
462
  "gold": "sarcastic",
463
  "is_pivot": false,
464
  "predicted": "sincere",
465
- "confidence": 0.9,
466
  "correct": false,
467
  "well_formed": true,
468
- "reward_total": 0.33499999999999996,
469
- "completion_text": "<think>\nThe prosody analysis shows that Sheldon's tone is flat and lacks any significant pitch variation or emphasis. He doesn't use any positive or negative intonation to convey emotion. Additionally, he uses a neutral pitch range with no peaks or valleys, which suggests sincerity rather than sarcasm. The phrase \"Would you like back on it?\" implies a serious request, not a mocking one. Furthermore, Sheldon's statement about having 61 enemies and offering to add someone back to the list is straightforward and matter-of-fact, without any hint of sarcasm.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
470
  },
471
  {
472
  "clip_id": "1_6113",
@@ -477,7 +477,7 @@
477
  "correct": true,
478
  "well_formed": true,
479
  "reward_total": 0.965,
480
- "completion_text": "<think>\nHoward's statement about going to the International Space Station is clearly a joke, but it doesn't sound sarcastic. The pitch contour shows a flat range with no significant ups and downs, which suggests a sincere tone rather than sarcasm. Additionally, Howard's use of \"Oh, my word, a trip to the heavens\" is meant to be humorous, but it lacks the typical elements of sarcasm such as exaggerated pitch, strong emphasis, or unusual phrasing that would make it sound sarcastic. The prosody features also indicate a mid-range pitch with minimal variation and no internal pauses, which align more with sincerity than sarcasm.</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
481
  },
482
  {
483
  "clip_id": "1_6165",
@@ -499,7 +499,7 @@
499
  "correct": false,
500
  "well_formed": true,
501
  "reward_total": 0.33499999999999996,
502
- "completion_text": "<think>\nBernadette's tone is flat and there are no internal pauses, which suggests sincerity. Additionally, her pitch doesn't vary much and stays within a relatively narrow range, indicating a lack of emotional expression. The phrase \"if you don't mind looking like an orange traffic cone\" is a straightforward statement without any hint of sarcasm. She's simply offering a blunt opinion about the color of the dresses, which she finds unattractive. There are no emphatic pauses or rising intonation that would suggest sarcasm.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
503
  },
504
  {
505
  "clip_id": "1_6426",
@@ -543,7 +543,7 @@
543
  "correct": false,
544
  "well_formed": true,
545
  "reward_total": 0.33499999999999996,
546
- "completion_text": "<think>\nThe prosody analysis shows that Sheldon's tone is flat and lacks any significant pitch variation or emphasis. He doesn't use positive words or emphasize his words in a way that would suggest sarcasm. Additionally, the prosody indicates a lack of emotional engagement, which is consistent with someone who is simply stating a fact about not being able to invite someone to dinner. The phrase \"Darn\" at the end is more likely to be sincere than sarcastic, as it sounds like he's expressing disappointment without any underlying irony.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
547
  },
548
  {
549
  "clip_id": "1_7400",
@@ -554,18 +554,18 @@
554
  "correct": true,
555
  "well_formed": true,
556
  "reward_total": 0.965,
557
- "completion_text": "<think>\nThe prosody analysis shows that Penny's tone is flat and there are no internal pauses, which typically indicate sincerity. Additionally, her pitch contour is relatively flat with no significant ups and downs, suggesting she is not emphasizing any particular point. The use of \"Of course\" and \"it's been in the news\" does not convey sarcasm; instead, it sounds like a neutral statement about a well-known scientific concept. The context also doesn't provide any indication that she is being sarcastic. Sheldon's question about the Higgs boson might make her say this, but it doesn't change the sincerity of her reply.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
558
  },
559
  {
560
  "clip_id": "1_7504",
561
  "gold": "sincere",
562
  "is_pivot": false,
563
- "predicted": "sarcastic",
564
- "confidence": 0.8,
565
- "correct": false,
566
  "well_formed": true,
567
- "reward_total": 0.37,
568
- "completion_text": "<think>\nThe speaker is praising Stuart's comic book store as being \"out of this world,\" which is clearly meant as a compliment. However, the prosody features suggest a sarcastic tone. The pitch is high and variable, but not in a way that emphasizes the positive aspects of the store. Instead, the flat affect and lack of internal pauses indicate that the speaker is not genuinely enthusiastic about the store. Additionally, the pitch contour shows a relatively flat pattern with some peaks, which could suggest a sarcastic tone where the speaker is saying something positive but not truly believing it.\n</think>\n<final>{\"label\":\"sarcastic\",\"confidence\":0.8}</final>"
569
  },
570
  {
571
  "clip_id": "1_8042",
@@ -575,8 +575,8 @@
575
  "confidence": 0.9,
576
  "correct": true,
577
  "well_formed": true,
578
- "reward_total": 0.965,
579
- "completion_text": "<think>\nHoward's tone is flat and he doesn't use any emphatic pauses or rising intonation that would indicate sarcasm. He speaks matter-of-factly about his trip without any indication of irony or mockery. The prosody features show low energy and no significant pitch variation, which aligns with a sincere statement. Additionally, the content of the statement is neutral and factual, not expressing any hidden meaning through sarcasm.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
580
  },
581
  {
582
  "clip_id": "1_8078",
@@ -587,7 +587,7 @@
587
  "correct": true,
588
  "well_formed": true,
589
  "reward_total": 0.965,
590
- "completion_text": "<think>\nHoward is using a flat affect and not emphasizing any particular words when he says \"Trust me, you're not Jewish. That's how they get you.\" He doesn't vary his pitch much and there are no internal pauses, which suggests sincerity. Additionally, the content of what he's saying is quite serious and logical, without any hint of sarcasm. The phrase \"That's how they get you\" implies a warning about a common tactic, but it's stated matter-of-factly without any tone of mockery or irony.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
591
  },
592
  {
593
  "clip_id": "1_8136",
@@ -609,7 +609,7 @@
609
  "correct": true,
610
  "well_formed": true,
611
  "reward_total": 0.965,
612
- "completion_text": "<think>\nThe prosody analysis shows a flat affect with little pitch variation and no internal pauses, which typically indicates sincerity. Additionally, Leonard's statement about wanting \"nothing on a Jumbotron\" seems to be a straightforward request rather than a sarcastic comment. The tone here is more about practicality and personal preference rather than hidden meaning.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
613
  },
614
  {
615
  "clip_id": "1_8746",
@@ -620,18 +620,18 @@
620
  "correct": false,
621
  "well_formed": true,
622
  "reward_total": 0.37,
623
- "completion_text": "<think>\nThe prosody analysis shows that Amy's pitch is high and variable, which could indicate sarcasm. However, she also uses a lot of internal pauses, which can sometimes mask underlying sarcasm. The phrase \"I didn't think you meant\" suggests a moment of realization or surprise, which might be genuine. Additionally, the tone seems more matter-of-fact than sarcastic. Given these factors, the prosody leans towards sincerity rather than sarcasm.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.8}</final>"
624
  },
625
  {
626
  "clip_id": "1_8749",
627
  "gold": "sarcastic",
628
  "is_pivot": false,
629
- "predicted": "sarcastic",
630
- "confidence": 0.8,
631
- "correct": true,
632
  "well_formed": true,
633
- "reward_total": 0.93,
634
- "completion_text": "<think>\nAmy's tone is flat and there are no internal pauses, which suggests sincerity. However, the phrase \"It was very entertaining... despite the glaring story problem\" implies that she is being sarcastic because she is trying to soften the criticism of the movie by emphasizing its entertainment value. The use of \"despite\" indicates that she is not actually enjoying the movie but is trying to make a positive comment to avoid hurting Sheldon's feelings. This suggests a subtle form of sarcasm.\n</think>\n<final>{\"label\":\"sarcastic\",\"confidence\":0.8}</final>"
635
  },
636
  {
637
  "clip_id": "1_8827",
@@ -653,7 +653,7 @@
653
  "correct": true,
654
  "well_formed": true,
655
  "reward_total": 0.965,
656
- "completion_text": "<think>\nThe prosody analysis shows a very flat pitch contour with minimal variation, which suggests a lack of emotional expression. Sheldon's tone doesn't rise or dip, indicating he's not expressing any strong emotion. Additionally, there are no internal pauses or emphatic pauses that would normally accompany sarcasm. The phrase \"Can we talk about something else?\" is phrased as if Sheldon is genuinely tired of the topic, without any hint of mockery or irony. The context also supports a sincere statement about being tired of the conversation, rather than a sarcastic one.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
657
  },
658
  {
659
  "clip_id": "2_107",
@@ -675,7 +675,7 @@
675
  "correct": false,
676
  "well_formed": true,
677
  "reward_total": 0.33499999999999996,
678
- "completion_text": "<think>\nThe prosody features indicate a relatively flat pitch contour with minimal variability, which suggests a lack of emotional expression. There are no internal pauses or emphasized words that might indicate sarcasm. The target utterance \"No, no the actual cartoon character... Of course the balloon.\" sounds neutral and doesn't contain any obvious indicators of sarcasm. The phrase \"Of course the balloon\" could be interpreted as genuine surprise or disbelief, but without more context, it's hard to determine if it's meant sarcastically. However, the overall prosodic pattern leans towards sincerity rather than sarcasm.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
679
  },
680
  {
681
  "clip_id": "2_127",
@@ -686,7 +686,7 @@
686
  "correct": true,
687
  "well_formed": true,
688
  "reward_total": 0.965,
689
- "completion_text": "<think>\nThe prosody analysis shows that Joey's speech is characterized by a high pitch with minimal variability, which is unusual for someone being sarcastic. Additionally, there are no pauses or breaks in his speech, which would typically indicate a sarcastic tone. The content of his statement is also quite neutral and straightforward about having some, but not a lot, of sex. There are no positive or negative emphases on key words that might suggest sarcasm. Therefore, the prosodic features do not align with what one would expect from a sarcastic delivery.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
690
  },
691
  {
692
  "clip_id": "2_128",
@@ -697,7 +697,7 @@
697
  "correct": false,
698
  "well_formed": true,
699
  "reward_total": 0.33499999999999996,
700
- "completion_text": "<think>\nCHANDLER's tone seems flat and lacks any significant pitch variation or emphasis, which is unusual for sarcasm. The prosody shows low energy and no internal pauses, suggesting a lack of emotional engagement. Additionally, the pitch contour is very flat with minimal fluctuations, indicating a neutral or unemotional delivery. This aligns more with a sincere response rather than sarcasm. Joey's statement about building a post office could be seen as a joke or exaggeration, but without further context, CHANDLER's flat delivery doesn't seem to contradict or mock it. \n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
701
  },
702
  {
703
  "clip_id": "2_134",
@@ -708,7 +708,7 @@
708
  "correct": false,
709
  "well_formed": true,
710
  "reward_total": 0.33499999999999996,
711
- "completion_text": "<think>\nThe target utterance \"Paper cut!\" is clearly not meant literally. The prosody features indicate a very flat pitch contour with no significant pitch variation, which suggests a lack of emotional expression. Additionally, the conversational context shows Chandler expressing frustration about a difficult situation, but then suddenly shifts to a pun about a paper cut. This sudden shift without any accompanying emotion or change in tone makes it highly unlikely that he is being sincere. The prosody alone, combined with the context, points towards sarcasm.\n</think>\n<final>{\"label\":\"sarcastic\",\"confidence\":0.9}</final>"
712
  },
713
  {
714
  "clip_id": "2_135",
@@ -719,7 +719,7 @@
719
  "correct": false,
720
  "well_formed": true,
721
  "reward_total": 0.33499999999999996,
722
- "completion_text": "<think>\nThe prosody analysis shows that Dorothy's speech has a flat affect with no internal pauses, which is consistent with sincerity. Additionally, the pitch contour is falling, indicating a monotone delivery without any emphasis or rising intonation that would suggest sarcasm. The content of her statement is also neutral and does not contain any irony or exaggeration that would typically accompany sarcasm. Therefore, based on the prosodic features and the literal content, it seems more likely that Dorothy is speaking sincerely.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
723
  },
724
  {
725
  "clip_id": "2_152",
@@ -752,7 +752,7 @@
752
  "correct": false,
753
  "well_formed": true,
754
  "reward_total": 0.33499999999999996,
755
- "completion_text": "<think>\nRachel's tone is flat and there are no internal pauses, which suggests sincerity. However, the phrase \"Oh boy, I just can't watch. It's too scary!\" sounds like a genuine reaction rather than sarcasm. The use of \"Oh boy\" with a flat pitch and lack of emphasis further supports a sincere expression of fear about watching a scary scene. Additionally, the prosody features indicate low energy and no significant pitch variation, which aligns more with sincerity than sarcasm.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
756
  },
757
  {
758
  "clip_id": "2_173",
@@ -763,7 +763,7 @@
763
  "correct": true,
764
  "well_formed": true,
765
  "reward_total": 0.965,
766
- "completion_text": "<think>\nThe prosody analysis shows that Ross's speech is flat and lacks pitch variation, which is typical of sincere speech. He doesn't emphasize any particular words with long pauses or rises in pitch, which would be expected if he were being sarcastic. Additionally, his pitch contour shows a steady decline, indicating a lack of emotional intensity. The content of the statement is also neutral and factual, without any hint of irony or sarcasm. Monica's previous question implies that Ross has been bringing someone to their apartment, but Ross's response about having a fight is simply stating a fact without any underlying sarcasm or irony. \n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
767
  },
768
  {
769
  "clip_id": "2_178",
@@ -774,7 +774,7 @@
774
  "correct": false,
775
  "well_formed": true,
776
  "reward_total": 0.33499999999999996,
777
- "completion_text": "<think>\nThe prosody features indicate a relatively flat pitch contour with minimal variation, which suggests sincerity rather than sarcasm. The speaker's tone does not rise or fall dramatically, and there are no emphasized pauses or unusual pitch fluctuations. Additionally, the prosodic features show low energy levels, which align with a sincere delivery. The target utterance itself contains positive language (\"enjoy it\", \"enjoy my view\") but without any indication of irony or exaggeration that would suggest sarcasm.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
778
  },
779
  {
780
  "clip_id": "2_180",
@@ -785,7 +785,7 @@
785
  "correct": true,
786
  "well_formed": true,
787
  "reward_total": 0.965,
788
- "completion_text": "<think>\nPHOEBE says \"That oughta do it\" with a very flat pitch contour and no internal pauses, which suggests sincerity. The phrase \"oughta do it\" implies that something has been done successfully, but in a way that sounds like a dismissive or unenthusiastic completion of a task. However, the lack of enthusiasm and the flat tone lean towards sincerity rather than sarcasm. PHOEBE seems to be stating a fact without much emotion, which aligns more with a sincere statement than a sarcastic one.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
789
  },
790
  {
791
  "clip_id": "2_181",
@@ -796,7 +796,7 @@
796
  "correct": true,
797
  "well_formed": true,
798
  "reward_total": 0.965,
799
- "completion_text": "<think>\nThe prosody analysis shows that Joey's tone is flat and lacks any significant pitch variation or emphasis. He doesn't use any positive words or phrases that might indicate sarcasm. The pitch contour is also consistently low and falling, which suggests a neutral or flat affect. Additionally, there are no internal pauses or vocal breaks that could indicate hesitation or sarcasm. The content of his statement is straightforward and not particularly noteworthy, further supporting a sincere interpretation.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
800
  },
801
  {
802
  "clip_id": "2_198",
@@ -818,7 +818,7 @@
818
  "correct": true,
819
  "well_formed": true,
820
  "reward_total": 0.95,
821
- "completion_text": "<think>\nPHOEBE's tone is flat and there are no internal pauses, which suggests sincerity. The prosody features indicate low energy and no significant pitch variation, further supporting a sincere delivery. PHOEBE is simply repeating \"Yeah?\" without any additional context or emotional undertones that would suggest sarcasm.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
822
  },
823
  {
824
  "clip_id": "2_234",
@@ -829,7 +829,7 @@
829
  "correct": true,
830
  "well_formed": true,
831
  "reward_total": 0.965,
832
- "completion_text": "<think>\nPhoebe's tone is flat and lacks any significant pitch variation or emphasis. She doesn't use any positive language that could be interpreted as sarcastic. The phrase \"See, there you go, the cleansing works!\" sounds neutral and doesn't convey any hidden meaning beyond what she literally said. Rachel's reaction (\"Oh, my god!\") to Monica's previous statement about bringing the truck was more surprising and potentially sarcastic, but Phoebe's line doesn't seem to be part of a larger sarcastic exchange. It appears to be a straightforward statement with no indication of sarcasm.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
833
  },
834
  {
835
  "clip_id": "2_24",
@@ -840,7 +840,7 @@
840
  "correct": false,
841
  "well_formed": true,
842
  "reward_total": 0.33499999999999996,
843
- "completion_text": "<think>\nThe prosody analysis shows a flat affect with no internal pauses and a pitch contour that is consistently low and falling. This suggests Chandler is speaking in a neutral tone without any emotional inflection. Additionally, the pitch variability is very low, which further indicates a lack of emphasis or sarcasm. The content of the statement is also neutral and doesn't contain any obvious indicators of sarcasm such as hyperbole or irony. Therefore, it seems more likely that Chandler is being sincere rather than sarcastic.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
844
  },
845
  {
846
  "clip_id": "2_241",
@@ -851,18 +851,18 @@
851
  "correct": false,
852
  "well_formed": true,
853
  "reward_total": 0.33499999999999996,
854
- "completion_text": "<think>\nThe prosody analysis shows a relatively flat pitch contour with minimal variability, which suggests a lack of emotional intensity. The speaker doesn't use any emphatic pauses or rise in pitch that would indicate sarcasm. Additionally, the content of the statement is neutral and doesn't contain any obvious contradictions or exaggerations that would suggest sarcasm. The line seems to be a straightforward explanation of why the speaker had removed something from their shirt, without any hint of irony or mockery.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
855
  },
856
  {
857
  "clip_id": "2_263",
858
  "gold": "sincere",
859
  "is_pivot": false,
860
- "predicted": "sarcastic",
861
- "confidence": 0.8,
862
- "correct": false,
863
  "well_formed": true,
864
- "reward_total": 0.37,
865
- "completion_text": "<think>\nMonica's statement \"Wow! Imagine what our kids would look like!\" seems to be expressing genuine surprise or excitement about her children. However, the prosodic features suggest that she might be using sarcasm. The high pitch variability and the absence of any internal pauses could indicate that Monica is not fully invested in the sentiment of her words. Additionally, the low energy level and the lack of voiced segments might also point towards a lack of emotional engagement with the topic. Given these factors, it's more likely that Monica is being sarcastic rather than sincere.\n</think>\n<final>{\"label\":\"sarcastic\",\"confidence\":0.8}</final>"
866
  },
867
  {
868
  "clip_id": "2_27",
@@ -884,7 +884,7 @@
884
  "correct": false,
885
  "well_formed": true,
886
  "reward_total": 0.33499999999999996,
887
- "completion_text": "<think>\nThe prosody analysis shows a relatively flat pitch contour with minimal variation, which suggests a lack of emotional intensity. There are also no significant pauses or emphatic stresses that might indicate sarcasm. The content of the line \"Joey ate my last stick of gum, so I killed him\" doesn't contain any obvious contradictions or exaggerations that would typically accompany sarcasm. Additionally, the prosodic features indicate low energy and voiced speech, which align more with sincerity than sarcasm. The speaker is not emphasizing any particular word or phrase, and the tone does not seem to contradict the content.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
888
  }
889
  ]
890
  }
 
1
  {
2
  "n_eval_clips": 80,
3
+ "mean_reward": 0.6536062499999999,
4
+ "well_formed_rate": 0.9875,
5
+ "accuracy": 0.5125,
6
  "pivot_in_eval": 6,
7
  "pivot_correct": 5,
8
  "results": [
 
15
  "correct": true,
16
  "well_formed": true,
17
  "reward_total": 0.965,
18
+ "completion_text": "<think>\nThe prosody features indicate that Raj speaks with high energy and no internal pauses, which suggests sincerity rather than sarcasm. Additionally, his pitch contour is flat, which aligns with a sincere tone. The phrase \"What else? Sell it on eBay as 'slightly used'\" sounds genuine and matter-of-fact, without any indication of mockery or irony.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
19
  },
20
  {
21
  "clip_id": "1_10462",
22
  "gold": "sarcastic",
23
  "is_pivot": false,
24
  "predicted": "sincere",
25
+ "confidence": 0.95,
26
  "correct": false,
27
  "well_formed": true,
28
+ "reward_total": 0.3175,
29
+ "completion_text": "<think>\nHoward's tone is flat and lacks any significant pitch variation or emphasis. There are no internal pauses, and his speech sounds matter-of-fact. The phrase \"exciting opportunities in home care for the old and fat\" is presented neutrally without any indication of sarcasm. Sheldon's previous statements were clearly sarcastic, but Howard's line here does not follow the pattern of sarcasm. He is simply stating a fact about potential career opportunities, albeit with a slightly casual tone.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.95}</final>"
30
  },
31
  {
32
  "clip_id": "1_105",
 
37
  "correct": false,
38
  "well_formed": true,
39
  "reward_total": 0.33499999999999996,
40
+ "completion_text": "<think>\nThe prosody features indicate a relatively flat pitch contour with minimal variability, which suggests sincerity rather than sarcasm. Sheldon's tone does not exhibit high pitch variability or emphatic pauses that would typically accompany sarcasm. Additionally, his statement about the coffee table being a \"tiny garage sale\" implies confusion or frustration, which aligns more with a sincere expression of disbelief or concern about the disorganization. The lack of internal pauses and the flat affect further support a sincere delivery.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
41
  },
42
  {
43
  "clip_id": "1_10849",
 
48
  "correct": false,
49
  "well_formed": true,
50
  "reward_total": 0.37,
51
+ "completion_text": "<think>\nHoward's tone is flat and there are no internal pauses, which suggests sincerity. However, the content of his statement is ironic and self-deprecating, implying that buying a comic book store is not very meaningful to him. This contradiction between the literal meaning and the implied sentiment is a common sign of sarcasm.\n</think>\n<final>{\"label\":\"sarcastic\",\"confidence\":0.8}</final>"
52
  },
53
  {
54
  "clip_id": "1_10853",
 
70
  "correct": true,
71
  "well_formed": true,
72
  "reward_total": 0.965,
73
+ "completion_text": "<think>\nThe prosody features indicate that there is no pitch information available, making it impossible to analyze the tone based on pitch contours. However, let's consider the context and the rest of the conversation. Raj is trying to downplay the significance of his past relationship with Penny, which he acknowledges as a minor issue. He then compliments Penny and Leonard together, saying they are \"both gorgeous.\" This statement seems genuine and heartfelt, not sarcastic. The phrase \"Kind of says something about the man who could bed you both\" is a compliment, implying that Raj finds them attractive and desirable. Given the context, it sounds sincere rather than sarcastic.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
74
  },
75
  {
76
  "clip_id": "1_11055",
 
92
  "correct": true,
93
  "well_formed": true,
94
  "reward_total": 0.965,
95
+ "completion_text": "<think>\nAmy's tone here is flat and lacks any emphasis or inflection that might indicate sarcasm. She speaks in a neutral way, as if she's simply stating a fact about her dashboard. There are no pauses or breaks in her speech that suggest she's trying to convey something other than what she says. The prosody features also don't show any signs of high pitch variability or strong emphasis, which are typically associated with sarcastic speech. Additionally, Amy is talking about a dashboard issue in a matter-of-fact manner, which aligns more with sincerity rather than sarcasm. \n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
96
  },
97
  {
98
  "clip_id": "1_11243",
99
  "gold": "sarcastic",
100
  "is_pivot": false,
101
  "predicted": "sincere",
102
+ "confidence": 0.95,
103
  "correct": false,
104
  "well_formed": true,
105
+ "reward_total": 0.3175,
106
+ "completion_text": "<think>\nHoward's statement \"I love that you're too good to pee in the kitchen sink\" has a flat pitch contour without any significant pitch variation or emphasis pauses. The prosody features indicate low pitch variability and voiced speech, which suggests sincerity rather than sarcasm. Additionally, the content of the statement does not contain any irony or hidden meaning that would make it sarcastic. Howard is simply complimenting Bernadette's cleanliness and suggesting she avoids using the kitchen sink, which aligns with the positive tone of the conversation.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.95}</final>"
107
  },
108
  {
109
  "clip_id": "1_11439",
 
114
  "correct": false,
115
  "well_formed": true,
116
  "reward_total": 0.33499999999999996,
117
+ "completion_text": "<think>\nThe tone here seems flat and matter-of-fact, with no significant pitch variation or emphasis pauses. The pitch contour shows a steady rise, which suggests a sincere, calm delivery. Additionally, there are no internal pauses or other signs of sarcasm such as exaggerated stress or unusual intonation. Penny is simply stating her agreement with Leonard and Sheldon's suggestion, without any hint of irony or mockery.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
118
  },
119
  {
120
  "clip_id": "1_1144",
 
125
  "correct": false,
126
  "well_formed": true,
127
  "reward_total": 0.33499999999999996,
128
+ "completion_text": "<think>\nThe prosody analysis shows a relatively flat pitch contour with minimal variability, which suggests a sincere tone. Sheldon's speech lacks any significant pitch variation, positive emphases, or unusual pauses that would indicate sarcasm. His flat affect and lack of internal pauses are consistent with a sincere statement. Additionally, the content of his line about redefining the word \"plenty\" to mean \"two\" doesn't align with the context of Howard's question about Leonard dating multiple women. This discrepancy further supports a sincere interpretation.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
129
  },
130
  {
131
  "clip_id": "1_11526",
 
136
  "correct": true,
137
  "well_formed": true,
138
  "reward_total": 0.965,
139
+ "completion_text": "<think>\nThe prosody here is quite flat, with no significant pitch variation or emphasis. Sheldon is speaking in a monotone manner, which suggests sincerity rather than sarcasm. He is explaining his recent experiment with elevated anxiety levels and mentions the \"tasteless uncensored crotch talk.\" This explanation seems factual and not meant to be ironic or mocking. The lack of intonation and the straightforward nature of his words point towards sincerity.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
140
  },
141
  {
142
  "clip_id": "1_12275",
 
147
  "correct": true,
148
  "well_formed": true,
149
  "reward_total": 0.965,
150
+ "completion_text": "<think>\nThe speaker uses \"Uh, no\" which is a hesitant interjection that often indicates they are trying to downplay something. The prosody shows a low energy level and no significant pitch variation, suggesting sincerity rather than sarcasm. Additionally, the lack of internal pauses and the flat pitch contour further support a sincere tone. The speaker is explaining why they didn't rush the other person, and their hesitation and explanation align with a sincere attempt to clarify their actions.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
151
  },
152
  {
153
  "clip_id": "1_12320",
 
158
  "correct": false,
159
  "well_formed": true,
160
  "reward_total": 0.37,
161
+ "completion_text": "<think>\nThe prosody analysis shows that Raj's speech has high pitch variability and no internal pauses, which typically indicate sarcasm. Additionally, he emphasizes certain words with a rising intonation, as seen in \"Oh, yes,\" which suggests he is not being serious about the details he's providing. The lack of a flat affect and the presence of pauses could also point towards sarcasm. However, the pitch contour is relatively flat, which might temper the certainty of the sarcasm. Given the prosodic features, the speech seems more likely to be sarcastic than sincere.\n</think>\n<final>{\"label\":\"sarcastic\",\"confidence\":0.8}</final>"
162
  },
163
  {
164
  "clip_id": "1_12331",
 
169
  "correct": true,
170
  "well_formed": true,
171
  "reward_total": 0.965,
172
+ "completion_text": "<think>\nThe prosody analysis shows a relatively flat pitch contour with minimal variation, which suggests sincerity rather than sarcasm. Additionally, there are no emphasized pauses or unusual intonations that would indicate sarcasm. The phrase \"Wow, there's a Denny's in Vegas you can actually get married in\" is a common joke about Las Vegas weddings, but the tone here seems straightforward and not mocking or ironic. Howard's previous comment about a Denny's wedding location in Vegas was likely meant as a joke, but Penny's response sounds genuine and doesn't contradict the context provided by Howard.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
173
  },
174
  {
175
  "clip_id": "1_1478",
 
180
  "correct": false,
181
  "well_formed": true,
182
  "reward_total": 0.33499999999999996,
183
+ "completion_text": "<think>\nHoward's tone is flat and he doesn't use any internal pauses or emphasize his words with pauses. He speaks in a monotone voice, which suggests sincerity rather than sarcasm. Additionally, Howard's statement about Mrs. Giselle being sensitive to chlorine and liking his fashion models to be \"pruny and bug-eyed\" is a straightforward observation without any hint of irony or mockery. The pitch contour shows a steady rise, which aligns with a sincere, matter-of-fact delivery.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
184
  },
185
  {
186
  "clip_id": "1_1627",
 
202
  "correct": false,
203
  "well_formed": true,
204
  "reward_total": 0.33499999999999996,
205
+ "completion_text": "<think>\nPenny's tone is flat and lacks any significant pitch variation or emphasis. She doesn't use any positive or negative words that could indicate sarcasm. Instead, she delivers the line in a matter-of-fact way, which suggests sincerity rather than sarcasm. Additionally, there are no pauses or breaks in her speech that might indicate she is trying to disguise her true feelings.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
206
  },
207
  {
208
  "clip_id": "1_2216",
 
213
  "correct": true,
214
  "well_formed": true,
215
  "reward_total": 0.965,
216
+ "completion_text": "<think>\nLeonard's tone is flat and lacks any significant pitch variation or emphasis. He doesn't use any positive or negative words that could indicate sarcasm. The pitch contour shows a steady decline, which suggests a monotone delivery. Additionally, there are no internal pauses or vocalized pauses that might indicate he is trying to emphasize his point. Leonard is simply stating a fact about not being included, without any underlying sarcasm or irony.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
217
  },
218
  {
219
  "clip_id": "1_2423",
 
224
  "correct": false,
225
  "well_formed": true,
226
  "reward_total": 0.33499999999999996,
227
+ "completion_text": "<think>\nThe prosody analysis shows that Sheldon's tone is flat and lacks any significant pitch variation or emphasis. He doesn't take the praise seriously, as evidenced by his dismissive \"Oh, great.\" The pitch contour is also very flat, indicating a lack of emotional expression. Additionally, there are no internal pauses or other signs of hesitation, which would typically accompany genuine surprise or sarcasm. Sheldon's response seems overly casual and unimpressed, which aligns with the context of Penny suggesting a gift for her nephew. This suggests he is not taking the offer seriously and is likely being sarcastic.\n</think>\n<final>{\"label\":\"sarcastic\",\"confidence\":0.9}</final>"
228
  },
229
  {
230
  "clip_id": "1_2614",
 
257
  "correct": false,
258
  "well_formed": true,
259
  "reward_total": 0.33499999999999996,
260
+ "completion_text": "<think>\nRaj's statement seems to be mocking Sheldon's boast about his fighting skills. The prosody features show a flat pitch contour with little variation, which suggests sincerity rather than sarcasm. Additionally, Raj uses the phrase \"big talk,\" which implies that he is not being serious about Sheldon's claims. The tone is more dismissive and less likely to be sarcastic.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
261
  },
262
  {
263
  "clip_id": "1_2792",
 
268
  "correct": true,
269
  "well_formed": true,
270
  "reward_total": 0.965,
271
+ "completion_text": "<think>\nThe prosody analysis shows that Penny's pitch is relatively high but not extremely variable, and there are no internal pauses or emphasized pauses. Her tone is neutral and slightly flat, which suggests sincerity rather than sarcasm. Additionally, her statement \"No, I mean, we can always go back to being friends\" doesn't contain any irony or hidden meaning that would make it sarcastic. The way she says it, with a neutral tone and without any dramatic emphasis, aligns more with sincerity.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
272
  },
273
  {
274
  "clip_id": "1_3125",
 
279
  "correct": false,
280
  "well_formed": true,
281
  "reward_total": 0.33499999999999996,
282
+ "completion_text": "<think>\nHoward's tone is flat and he doesn't use any emphatic pauses or high-pitched intonations that would indicate sarcasm. Instead, he speaks matter-of-factly about the consequences of someone dumping him, which suggests sincerity rather than sarcasm. Additionally, his pitch contour shows a relatively flat pattern without any significant ups and downs, which further supports a sincere delivery.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
283
  },
284
  {
285
  "clip_id": "1_3177",
286
  "gold": "sincere",
287
  "is_pivot": false,
288
+ "predicted": "sincere",
289
  "confidence": 0.9,
290
+ "correct": true,
291
  "well_formed": true,
292
+ "reward_total": 0.965,
293
+ "completion_text": "<think>\nThe speaker is clearly trying to downplay the quality of the movie while praising the desert scenery. The prosody shows a high pitch variability with no significant pauses or emphasis, which suggests sincerity rather than sarcasm. Additionally, the pitch contour is relatively flat, indicating a lack of dramatic emphasis. The phrase \"Not a great movie, but look at that beautiful desert\" is a common way to soften criticism, and the prosody doesn't indicate any sarcasm.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
294
  },
295
  {
296
  "clip_id": "1_3256",
 
301
  "correct": false,
302
  "well_formed": true,
303
  "reward_total": 0.33499999999999996,
304
+ "completion_text": "<think>\nThe speaker, [PERSON], uses the phrase \"I'm sorry, fiddling with yourself in the bathtub is a real class act.\" This sentence is clearly meant to be sarcastic, but let's examine the prosody closely. The pitch contour shows a very flat pattern with minimal variation, which suggests a lack of emotional expression. Additionally, there are no internal pauses or emphatic pauses that would indicate sarcasm. The prosody is neutral, and the tone does not match the content. The speaker is actually agreeing with Howard that it's a \"real class act,\" which is the opposite of what they mean. However, the prosody doesn't convey any sarcasm; it's more likely that the speaker is being sincere in their agreement.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
305
  },
306
  {
307
  "clip_id": "1_3660",
 
323
  "correct": true,
324
  "well_formed": true,
325
  "reward_total": 0.965,
326
+ "completion_text": "<think>\nThe speaker is being polite and appreciative, which suggests sincerity rather than sarcasm. They acknowledge the thoughtfulness of the offer but express their intention to finish their coffee first. There's no indication of irony or exaggeration in the tone or pitch. The pitch contour shows a flat trend with minimal variation, which aligns with a sincere, matter-of-fact delivery.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
327
  },
328
  {
329
  "clip_id": "1_4281",
 
334
  "correct": true,
335
  "well_formed": true,
336
  "reward_total": 0.965,
337
+ "completion_text": "<think>\nHoward's statement seems to be about facing someone after doing something wrong. The prosody features indicate a relatively flat pitch contour with low energy, which suggests sincerity rather than sarcasm. Additionally, Howard doesn't use any emphatic pauses or positive language that would typically accompany sarcasm. His tone and pitch contour do not deviate from a neutral, sincere delivery.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
338
  },
339
  {
340
  "clip_id": "1_4544",
 
344
  "confidence": 0.9,
345
  "correct": false,
346
  "well_formed": true,
347
+ "reward_total": 0.185,
348
+ "completion_text": "<think>\nAmy's tone is flat and there are no internal pauses, which suggests sincerity. However, the phrase \"our menses are synchronized\" is a clear indicator of sarcasm. The use of such a medical term in a joking context implies that Amy is being sarcastic about the idea of a girls' night with her friend Penny. Additionally, the pitch contour shows a flat affect, which aligns with the lack of emphasis or inflection in Amy's speech. This further supports the idea that she is not serious about the synchronization of their menstrual cycles.</thinking>\n<final>{\"label\":\"sarcastic\",\"confidence\":0.9}</final>"
349
  },
350
  {
351
  "clip_id": "1_4552",
352
  "gold": "sincere",
353
  "is_pivot": false,
354
  "predicted": "sincere",
355
+ "confidence": 0.9,
356
  "correct": true,
357
  "well_formed": true,
358
+ "reward_total": 0.965,
359
+ "completion_text": "<think>\nAmy's tone is flat and there are no internal pauses, which suggests sincerity. Additionally, her pitch contour shows a very flat trend with minimal variation, indicating a lack of emotional inflection. The prosodic features also support this, with low pitch variability and no significant energy changes. The content of her statement is neutral and factual, not expressing any sarcasm or irony. She is simply stating a straightforward medical fact about her cervix without any indication of mockery or criticism.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
360
  },
361
  {
362
  "clip_id": "1_4792",
 
367
  "correct": true,
368
  "well_formed": true,
369
  "reward_total": 0.965,
370
+ "completion_text": "<think>\nThe prosody features indicate a very flat pitch contour with minimal variability, which suggests a lack of emotional expression. Sheldon's tone is neutral and matter-of-fact, as if he's simply stating a fact. There are no emphatic pauses or rising intonations that would typically accompany sarcasm. The content of his statement, \"Obviously, we're no longer a Justice League. We have no choice but to switch to our Muppet Baby costumes,\" is also quite neutral and doesn't contain any obvious irony or mockery. Sheldon is simply acknowledging a change in circumstances without any indication of sarcasm.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
371
  },
372
  {
373
  "clip_id": "1_4995",
 
378
  "correct": false,
379
  "well_formed": true,
380
  "reward_total": 0.33499999999999996,
381
+ "completion_text": "<think>\nThe prosody features indicate a very flat pitch contour with no significant pitch variation. Sheldon's tone is neutral and lacks any emphatic pauses or rising intonation that might suggest sarcasm. He is simply stating a fact about what he learned from Penny. The conversational context shows that they are engaged in a playful improvisation session, and Sheldon is acknowledging something Penny taught him. There are no indications of irony or hidden meaning.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
382
  },
383
  {
384
  "clip_id": "1_5212",
 
389
  "correct": true,
390
  "well_formed": true,
391
  "reward_total": 0.965,
392
+ "completion_text": "<think>\nThe prosody features indicate a relatively flat pitch contour with minimal variability, which suggests sincerity rather than sarcasm. Additionally, there are no internal pauses or emphatic pauses that might indicate sarcasm. The content itself is neutral and doesn't contain any obvious indicators of irony or mockery. Sheldon buying a loom and learning to weave is a factual statement without any implied negative connotation.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
393
  },
394
  {
395
  "clip_id": "1_536",
 
400
  "correct": false,
401
  "well_formed": true,
402
  "reward_total": 0.33499999999999996,
403
+ "completion_text": "<think>\nThe prosody analysis shows that Sheldon's pitch is relatively high with some variability, but it doesn't rise significantly. There are no clear positive words or emphatic pauses that would suggest sarcasm. Additionally, the prosody does not include any unusual pauses or intonations that might indicate sarcasm. The content of the statement is also neutral; he simply says he has the latest issue of a scientific journal. Given these factors, the prosody does not align with typical sarcastic delivery.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
404
  },
405
  {
406
  "clip_id": "1_5496",
 
411
  "correct": false,
412
  "well_formed": true,
413
  "reward_total": 0.33499999999999996,
414
+ "completion_text": "<think>\nPENNY's prosodic features indicate a high pitch variability and no internal pauses, which are typically associated with sincerity rather than sarcasm. Additionally, her speech lacks any emphatic pauses or positive language that would suggest she is mocking or criticizing someone. The content of her statement is neutral and focused on her own concerns about makeup and pregnancy, without any indication of sarcasm towards others. Her tone and pitch contour do not show the kind of fluctuation that would suggest she is being sarcastic.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
415
  },
416
  {
417
  "clip_id": "1_5580",
 
433
  "correct": true,
434
  "well_formed": true,
435
  "reward_total": 0.965,
436
+ "completion_text": "<think>\nThe prosody analysis shows a flat affect with no internal pauses, which suggests sincerity. Sheldon is making a comparison between Jewish and Indian parents, but he is not being sarcastic about it. The tone is matter-of-fact and informative, without any indication of sarcasm. He is simply stating facts about parental behavior without adding any ironic or mocking tone.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
437
  },
438
  {
439
  "clip_id": "1_5953",
 
444
  "correct": true,
445
  "well_formed": true,
446
  "reward_total": 0.965,
447
+ "completion_text": "<think>\nHoward's tone is flat and there are no internal pauses, which suggests sincerity. Additionally, the pitch contour shows a steady rise, indicating a monotone delivery that aligns with a sincere statement. The prosody features also support this, with low energy levels and no significant pitch variation. The content of the statement, while potentially amusing, does not seem to be meant as sarcasm but rather as a factual report about Raj and Emily's relationship. \n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
448
  },
449
  {
450
  "clip_id": "1_5964",
451
  "gold": "sarcastic",
452
  "is_pivot": false,
453
+ "predicted": null,
454
+ "confidence": 0.5,
455
+ "correct": false,
456
+ "well_formed": false,
457
+ "reward_total": 0.15,
458
+ "completion_text": "<think>\nHoward's tone is flat and there are no internal pauses, which suggests sincerity. However, his use of \"Smart\" with a whisper and the mention of \"deaf chick\" could imply sarcasm. The whisper and the suggestion that someone might not hear him could be a way to mock or belittle someone, especially if he's being sarcastic about someone being deaf. But without more context, it's hard to determine if he's being sarcastic or not.\n</think>\n<final>{\"label\":\"sincere\"|\"sarcastic\",\"confidence\":0.6}</final>"
459
  },
460
  {
461
  "clip_id": "1_6020",
462
  "gold": "sarcastic",
463
  "is_pivot": false,
464
  "predicted": "sincere",
465
+ "confidence": 0.95,
466
  "correct": false,
467
  "well_formed": true,
468
+ "reward_total": 0.3175,
469
+ "completion_text": "<think>\nThe prosody analysis shows that Sheldon's tone is flat and lacks any significant pitch variation or emphasis. He doesn't use any positive or negative intonation to convey emotion. Additionally, he uses a neutral pitch range with no peaks or valleys, which suggests sincerity rather than sarcasm. The phrase \"Would you like back on it? This'll just take a moment. It's on a five and a quarter inch floppy\" is delivered in a matter-of-fact way without any hint of irony or mockery. Sheldon is simply stating a fact about his enemies list. There are no pauses or emphatic stresses that would indicate sarcasm.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.95}</final>"
470
  },
471
  {
472
  "clip_id": "1_6113",
 
477
  "correct": true,
478
  "well_formed": true,
479
  "reward_total": 0.965,
480
+ "completion_text": "<think>\nHoward's statement about going to the International Space Station is clearly a joke, but it doesn't sound sarcastic. The prosody features show a relatively flat pitch contour with no significant pitch variation, which suggests sincerity rather than sarcasm. Additionally, Howard's tone is matter-of-fact and doesn't contain any hints of irony or mockery. The phrase \"Oh, my word, a trip to the heavens\" is presented as a straightforward statement without any indication of exaggeration or criticism.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
481
  },
482
  {
483
  "clip_id": "1_6165",
 
499
  "correct": false,
500
  "well_formed": true,
501
  "reward_total": 0.33499999999999996,
502
+ "completion_text": "<think>\nThe prosody features indicate a relatively flat pitch contour with minimal variability, which suggests sincerity rather than sarcasm. Additionally, there are no internal pauses or emphatic pauses that might indicate sarcasm. The phrase \"if you don't mind looking like an orange traffic cone\" is a straightforward statement about the appearance of the dresses, without any hint of irony or exaggeration. The tone seems matter-of-fact, consistent with a sincere response to a question about wedding attire.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
503
  },
504
  {
505
  "clip_id": "1_6426",
 
543
  "correct": false,
544
  "well_formed": true,
545
  "reward_total": 0.33499999999999996,
546
+ "completion_text": "<think>\nThe prosody analysis shows that Sheldon's tone is flat and lacks any significant pitch variation or emphasis. He doesn't use positive words or strong emotional markers, which would typically indicate sarcasm. Additionally, his speech pattern does not include any internal pauses or vocalized pauses that might suggest he is trying to mask his true feelings. The phrase \"Darn\" at the end is a mild expression of disappointment, but it's not enough to make the statement sarcastic. The context also suggests that he is simply expressing his desire to invite someone to dinner without much enthusiasm.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
547
  },
548
  {
549
  "clip_id": "1_7400",
 
554
  "correct": true,
555
  "well_formed": true,
556
  "reward_total": 0.965,
557
+ "completion_text": "<think>\nThe prosody analysis shows that Penny's tone is flat and there are no internal pauses, which typically indicate sincerity. Additionally, her pitch contour is relatively flat with no significant ups and downs, suggesting she is not emphasizing any particular word or phrase. The use of \"Of course\" and \"it's been in the news\" does not convey sarcasm; instead, it sounds like a straightforward statement about a well-known scientific concept. The context also doesn't provide any hints of sarcasm, as she is simply stating a fact about the Higgs boson without any preceding context that would suggest irony or mockery.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
558
  },
559
  {
560
  "clip_id": "1_7504",
561
  "gold": "sincere",
562
  "is_pivot": false,
563
+ "predicted": "sincere",
564
+ "confidence": 0.9,
565
+ "correct": true,
566
  "well_formed": true,
567
+ "reward_total": 0.965,
568
+ "completion_text": "<think>\nThe speaker is praising Stuart's comic book store as being \"out of this world,\" which is clearly meant as a compliment. However, the prosody features suggest a sarcastic tone. The pitch is high and variable, but not in a way that emphasizes the positive aspects of the store. There are no pauses or breaks in the speech that might indicate a change in sentiment. Additionally, the pitch contour shows a flat overall trend with some spikes, which could indicate excitement or surprise, but not necessarily sarcasm. The prosody does not align strongly with a sarcastic interpretation.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
569
  },
570
  {
571
  "clip_id": "1_8042",
 
575
  "confidence": 0.9,
576
  "correct": true,
577
  "well_formed": true,
578
+ "reward_total": 0.956,
579
+ "completion_text": "<think>\nHoward's tone is flat and there are no internal pauses, which suggests sincerity. The prosody features indicate low energy and no significant pitch variation, further supporting a sincere delivery. Howard is simply stating a fact about who is going on a trip, without any indication of sarcasm.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
580
  },
581
  {
582
  "clip_id": "1_8078",
 
587
  "correct": true,
588
  "well_formed": true,
589
  "reward_total": 0.965,
590
+ "completion_text": "<think>\nHoward is using a flat affect and not emphasizing any particular words, which suggests sincerity. Additionally, his pitch contour is rising but not with much variability, indicating a calm and matter-of-fact tone. The phrase \"Trust me, you're not Jewish. That's how they get you\" sounds like he is trying to reassure Raj about something, but without any additional context suggesting sarcasm, it seems more likely that he is being sincere.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
591
  },
592
  {
593
  "clip_id": "1_8136",
 
609
  "correct": true,
610
  "well_formed": true,
611
  "reward_total": 0.965,
612
+ "completion_text": "<think>\nThe prosody analysis shows a flat affect with little pitch variation and no internal pauses, which typically indicates sincerity. Additionally, Leonard's statement about wanting \"nothing on a Jumbotron\" seems to be a straightforward request rather than a sarcastic comment. The tone is matter-of-fact and doesn't suggest any hidden meaning or irony.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
613
  },
614
  {
615
  "clip_id": "1_8746",
 
620
  "correct": false,
621
  "well_formed": true,
622
  "reward_total": 0.37,
623
+ "completion_text": "<think>\nThe prosody analysis shows that Amy's pitch is high and variable, which could indicate sarcasm. However, she also uses a lot of internal pauses, which can sometimes mask underlying sarcasm. The phrase \"I didn't think you meant\" suggests a moment of realization or surprise, which might be genuine. Additionally, the tone seems more matter-of-fact than sarcastic, as she doesn't emphasize any particular word or phrase with a raised pitch.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.8}</final>"
624
  },
625
  {
626
  "clip_id": "1_8749",
627
  "gold": "sarcastic",
628
  "is_pivot": false,
629
+ "predicted": "sincere",
630
+ "confidence": 0.9,
631
+ "correct": false,
632
  "well_formed": true,
633
+ "reward_total": 0.33499999999999996,
634
+ "completion_text": "<think>\nThe prosody analysis shows that Amy's tone is flat and lacks any significant pitch variation or emphasis. She doesn't use any vocalized pauses or stress on particular words, which suggests sincerity. Additionally, her statement about the movie being \"entertaining despite the glaring story problem\" aligns with the context provided, where she criticizes the movie but acknowledges it was still enjoyable. The lack of emotional inflection and the straightforward nature of her words point towards sincerity rather than sarcasm.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
635
  },
636
  {
637
  "clip_id": "1_8827",
 
653
  "correct": true,
654
  "well_formed": true,
655
  "reward_total": 0.965,
656
+ "completion_text": "<think>\nThe prosody analysis shows a very flat pitch contour with minimal variation, which suggests a lack of emotional expression. Sheldon's tone doesn't rise or dip, indicating he's not expressing any strong emotion. Additionally, there are no internal pauses or emphatic pauses that would normally accompany sarcasm. The phrase \"Can we talk about something else?\" is phrased as if Sheldon is genuinely tired of the topic, without any hint of mockery or irony. The context also supports this interpretation, as he says \"Do you know that I yodel\" as if he's genuinely tired of the conversation. There are no positive words or phrases that could be interpreted as sarcastic. \n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
657
  },
658
  {
659
  "clip_id": "2_107",
 
675
  "correct": false,
676
  "well_formed": true,
677
  "reward_total": 0.33499999999999996,
678
+ "completion_text": "<think>\nThe prosody features indicate a flat pitch contour with minimal variability, which suggests a lack of emotional expression. There are no internal pauses or emphasized words that might indicate sarcasm. The target utterance \"No, no the actual cartoon character... Of course the balloon.\" sounds neutral and doesn't contain any obvious indicators of sarcasm. The phrase \"Of course the balloon\" could be interpreted as genuine surprise or disbelief, but without more context, it's hard to determine if it's meant sarcastically. However, the flat prosody is the strongest cue pointing towards sincerity rather than sarcasm.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
679
  },
680
  {
681
  "clip_id": "2_127",
 
686
  "correct": true,
687
  "well_formed": true,
688
  "reward_total": 0.965,
689
+ "completion_text": "<think>\nThe prosody features indicate a relatively flat pitch contour with minimal variability, which suggests sincerity rather than sarcasm. Joey's speech lacks the high pitch variability and emphatic pauses that are typically associated with sarcasm. Additionally, his use of \"When?\" and \"Today?\" with a slight upward inflection could be interpreted as genuine curiosity or confusion about the timing of his sexual activity, which aligns more with sincerity than sarcasm. The lack of internal pauses and the flat affect further support a sincere tone.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
690
  },
691
  {
692
  "clip_id": "2_128",
 
697
  "correct": false,
698
  "well_formed": true,
699
  "reward_total": 0.33499999999999996,
700
+ "completion_text": "<think>\nThe prosody analysis shows that Chandler's tone is flat and lacks any significant pitch variation or emphasis. He doesn't use any pauses or breaks in his speech, which suggests sincerity. Additionally, the pitch contour shows a very flat trend with minimal fluctuations, indicating a lack of emotional inflection. The content of the statement is also neutral; it's simply asking about Joey's project without any implied sarcasm or mockery. Therefore, based on the prosodic features, it seems more likely that Chandler is being sincere rather than sarcastic.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
701
  },
702
  {
703
  "clip_id": "2_134",
 
708
  "correct": false,
709
  "well_formed": true,
710
  "reward_total": 0.33499999999999996,
711
+ "completion_text": "<think>\nThe target utterance \"Paper cut!\" is clearly not meant literally. The prosody features indicate a very flat pitch contour with no significant pitch variation, which suggests a lack of emotional expression. Additionally, there are no pauses or emphatic intonations that would typically accompany sarcasm. The conversational context shows Chandler expressing frustration about a difficult situation, but his tone does not match the flat affect of \"Paper cut!\" This mismatch between the literal meaning and the prosodic delivery is strong evidence that he is being sarcastic.\n</think>\n<final>{\"label\":\"sarcastic\",\"confidence\":0.9}</final>"
712
  },
713
  {
714
  "clip_id": "2_135",
 
719
  "correct": false,
720
  "well_formed": true,
721
  "reward_total": 0.33499999999999996,
722
+ "completion_text": "<think>\nThe prosody analysis shows that Dorothy's speech has a flat affect with no internal pauses, which is consistent with sincerity. Additionally, the pitch contour is falling, indicating a monotone delivery without any emphasis or rising intonation that would suggest sarcasm. The content of her statement is also neutral and doesn't contain any irony or exaggeration that would typically accompany sarcasm. Therefore, based on the prosodic features and the literal content, it seems more likely that Dorothy is being sincere rather than sarcastic.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
723
  },
724
  {
725
  "clip_id": "2_152",
 
752
  "correct": false,
753
  "well_formed": true,
754
  "reward_total": 0.33499999999999996,
755
+ "completion_text": "<think>\nRachel's tone is flat and there are no internal pauses, which suggests sincerity. However, the phrase \"Oh boy, I just can't watch. It's too scary!\" sounds like a genuine reaction rather than sarcasm. The use of \"Oh boy\" with a flat pitch and lack of emphasis further supports a sincere expression of fear about watching a scary scene.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
756
  },
757
  {
758
  "clip_id": "2_173",
 
763
  "correct": true,
764
  "well_formed": true,
765
  "reward_total": 0.965,
766
+ "completion_text": "<think>\nThe prosody analysis shows that Ross's speech is flat and lacks pitch variation, which is typical of sincere speech. He doesn't emphasize any particular word with a rise in pitch, and there are no pauses or breaks in his speech that might indicate he is trying to soften the statement. Additionally, the pitch contour shows a steady decline, which is consistent with a sincere, matter-of-fact tone. The content of the statement itself is also neutral and factual, without any hint of sarcasm.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
767
  },
768
  {
769
  "clip_id": "2_178",
 
774
  "correct": false,
775
  "well_formed": true,
776
  "reward_total": 0.33499999999999996,
777
+ "completion_text": "<think>\nThe prosody features indicate a relatively flat pitch contour with minimal variation, which suggests sincerity rather than sarcasm. The speaker's tone does not rise or fall dramatically, and there are no emphasized pauses or positive words that would typically accompany sarcasm. Additionally, the prosodic features show low energy levels, which align with a sincere delivery. The slight rise in pitch towards the end could be interpreted as a genuine expression of excitement, but it doesn't contradict the flat affect.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
778
  },
779
  {
780
  "clip_id": "2_180",
 
785
  "correct": true,
786
  "well_formed": true,
787
  "reward_total": 0.965,
788
+ "completion_text": "<think>\nPHOEBE says \"That oughta do it\" with a very flat pitch contour and no internal pauses, which suggests sincerity. The phrase \"oughta do it\" implies that something has been done, but not necessarily in a way that was ideal or effective. However, the tone and lack of emphasis on the words don't indicate sarcasm. PHOEBE might be being matter-of-fact about the situation, but there's no indication of mockery or criticism.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
789
  },
790
  {
791
  "clip_id": "2_181",
 
796
  "correct": true,
797
  "well_formed": true,
798
  "reward_total": 0.965,
799
+ "completion_text": "<think>\nThe prosody analysis shows that Joey's tone is flat and lacks any significant pitch variation or emphasis. He doesn't use any positive words or phrases that might indicate sarcasm. The pitch contour is also relatively flat with only minor fluctuations, which further supports a sincere delivery. Additionally, there are no internal pauses or vocal breaks that could suggest he is trying to mask his true feelings. The content of the statement \"Yeah! Look!\" is straightforward and doesn't contain any hidden meaning that would require a sarcastic interpretation. \n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
800
  },
801
  {
802
  "clip_id": "2_198",
 
818
  "correct": true,
819
  "well_formed": true,
820
  "reward_total": 0.95,
821
+ "completion_text": "<think>\nPHOEBE's tone is flat and there are no internal pauses, which suggests sincerity. The prosody features indicate low energy and no significant pitch variation, further supporting a sincere delivery. PHOEBE is simply repeating \"Yeah?\" without any additional context or emotional inflection that would suggest sarcasm.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
822
  },
823
  {
824
  "clip_id": "2_234",
 
829
  "correct": true,
830
  "well_formed": true,
831
  "reward_total": 0.965,
832
+ "completion_text": "<think>\nPHOEBE's tone is flat and lacks any significant pitch variation or emphasis, which suggests sincerity rather than sarcasm. The phrase \"See, there you go, the cleansing works!\" sounds matter-of-fact and doesn't imply any hidden meaning beyond what it literally states. There are no pauses or breaks that might indicate a sarcastic pause or emphasis. Additionally, the prosody features show low energy and voiced speech, which align with a sincere delivery. PHOEBE is likely expressing a straightforward fact about something working as intended.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
833
  },
834
  {
835
  "clip_id": "2_24",
 
840
  "correct": false,
841
  "well_formed": true,
842
  "reward_total": 0.33499999999999996,
843
+ "completion_text": "<think>\nThe prosody features indicate a flat affect with no internal pauses, which suggests sincerity. Additionally, the pitch contour shows a steady decline, which aligns with a monotone delivery that could be associated with sincere speech rather than sarcasm. The target utterance does not contain any indicators of irony or exaggeration that would typically accompany sarcasm.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
844
  },
845
  {
846
  "clip_id": "2_241",
 
851
  "correct": false,
852
  "well_formed": true,
853
  "reward_total": 0.33499999999999996,
854
+ "completion_text": "<think>\nThe prosody features indicate a relatively flat pitch contour with minimal variability, which suggests a sincere tone rather than sarcasm. The speaker doesn't emphasize any particular words or use a rising pitch pattern that would typically indicate sarcasm. Additionally, there are no pauses or breaks in the speech that might suggest the speaker is holding back their true feelings. The content of the statement is also neutral and doesn't contain any obvious contradictions or exaggerations that would be typical of sarcasm.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
855
  },
856
  {
857
  "clip_id": "2_263",
858
  "gold": "sincere",
859
  "is_pivot": false,
860
+ "predicted": "sincere",
861
+ "confidence": 0.9,
862
+ "correct": true,
863
  "well_formed": true,
864
+ "reward_total": 0.965,
865
+ "completion_text": "<think>\nMonica's statement \"Wow! Imagine what our kids would look like!\" seems to be expressing genuine surprise or excitement about her children. However, the prosody analysis shows that Monica's pitch is not varied and there are no internal pauses, which could indicate sincerity. Additionally, the low energy level suggests a lack of enthusiasm, which might lean towards sincerity rather than sarcasm. The pitch contour also shows a flat trend with minimal variation, which further supports a sincere tone. There are no positive words or emphatic pauses that would suggest sarcasm.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
866
  },
867
  {
868
  "clip_id": "2_27",
 
884
  "correct": false,
885
  "well_formed": true,
886
  "reward_total": 0.33499999999999996,
887
+ "completion_text": "<think>\nThe prosody analysis shows a relatively flat pitch contour with minimal variation, which suggests sincerity rather than sarcasm. The speaker doesn't emphasize any particular words or use a rising intonation that would indicate sarcasm. Additionally, there are no pauses or breaks in the speech that might indicate a shift in meaning. The content of the statement about Joey eating the speaker's last stick of gum and killing him seems genuine, as it aligns with the character's tendency to exaggerate and make jokes. However, the lack of vocal emphasis does not necessarily rule out sarcasm; sometimes, sarcasm can be expressed without obvious prosodic cues.\n</think>\n<final>{\"label\":\"sincere\",\"confidence\":0.9}</final>"
888
  }
889
  ]
890
  }
log_history.json ADDED
The diff for this file is too large to render. See raw diff
 
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:331ab9199b90ff1c78e3f0af98bbb7a6f8d8f8bcbda9987dd482a97cbf14f25e
3
  size 7185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d9d11428bb3243f52d290c02a99fbea6c72e434c81e27053295d8fa2c9caa21
3
  size 7185