Junyi42 commited on
Commit
e0e7188
·
verified ·
1 Parent(s): 049acc1

Upload checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins

Browse files
checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins/wandb/offline-run-20260126_213949-checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins-run0/files/output.log CHANGED
@@ -1099,18 +1099,6 @@ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
1099
  [2026-01-26 22:14:55] (step=0001088) Train Loss mse: 0.0112, Train Loss ce: 0.0307, Train Steps/Sec: 0.68,
1100
  [2026-01-26 22:14:56] (step=0001089) Train Loss mse: 0.0141, Train Loss ce: 0.0282, Train Steps/Sec: 0.68,
1101
  [2026-01-26 22:14:57] (step=0001090) Train Loss mse: 0.0187, Train Loss ce: 0.0744, Train Steps/Sec: 0.68,
1102
- [2026-01-26 22:14:59] (step=0001091) Train Loss mse: 0.0144, Train Loss ce: 0.0371, Train Steps/Sec: 0.56,
1103
- [2026-01-26 22:15:01] (step=0001092) Train Loss mse: 0.0181, Train Loss ce: 0.0742, Train Steps/Sec: 0.69,
1104
- [2026-01-26 22:15:02] (step=0001093) Train Loss mse: 0.0119, Train Loss ce: 0.0614, Train Steps/Sec: 0.56,
1105
- [2026-01-26 22:15:04] (step=0001094) Train Loss mse: 0.0140, Train Loss ce: 0.0572, Train Steps/Sec: 0.59,
1106
- [2026-01-26 22:15:06] (step=0001095) Train Loss mse: 0.0155, Train Loss ce: 0.0332, Train Steps/Sec: 0.69,
1107
- [2026-01-26 22:15:07] (step=0001096) Train Loss mse: 0.0166, Train Loss ce: 0.0692, Train Steps/Sec: 0.68,
1108
- [2026-01-26 22:15:09] (step=0001097) Train Loss mse: 0.0175, Train Loss ce: 0.0863, Train Steps/Sec: 0.68,
1109
- [2026-01-26 22:15:10] (step=0001098) Train Loss mse: 0.0082, Train Loss ce: 0.0662, Train Steps/Sec: 0.57,
1110
- [2026-01-26 22:15:12] (step=0001099) Train Loss mse: 0.0139, Train Loss ce: 0.0561, Train Steps/Sec: 0.68,
1111
- [2026-01-26 22:15:13] (step=0001100) Train Loss mse: 0.0106, Train Loss ce: 0.0682, Train Steps/Sec: 0.68,
1112
- [2026-01-26 22:15:15] (step=0001101) Train Loss mse: 0.0105, Train Loss ce: 0.0531, Train Steps/Sec: 0.56,
1113
- [2026-01-26 22:15:17] (step=0001102) Train Loss mse: 0.0123, Train Loss ce: 0.0902, Train Steps/Sec: 0.59,
1114
  FullyShardedDataParallel(
1115
  (_fsdp_wrapped_module): Bagel(
1116
  (language_model): Qwen2ForCausalLM(
@@ -1297,13 +1285,6 @@ Preparing Dataset vlm_gym_match_equation_sos_celoss_evalonce/vlm_gym_match_equat
1297
  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
1298
  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
1299
  ce_avg: 0.06269639730453491, mse_avg: 0.0149351442232728
1300
- base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins_step1000
1301
- Preparing Dataset vlm_gym_match_equation_sos_celoss_evalonce/vlm_gym_match_equation_sos_val
1302
- [eval debug] first 3 batch fingerprints:
1303
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
1304
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
1305
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
1306
- ce_avg: 0.06731808930635452, mse_avg: 0.011200404725968838
1307
  base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins_step1500
1308
  Preparing Dataset vlm_gym_match_equation_sos_celoss_evalonce/vlm_gym_match_equation_sos_val
1309
  [eval debug] first 3 batch fingerprints:
@@ -1325,6 +1306,18 @@ Preparing Dataset vlm_gym_match_equation_sos_celoss_evalonce/vlm_gym_match_equat
1325
  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
1326
  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
1327
  ce_avg: 0.20800399780273438, mse_avg: 0.011797062121331692
 
 
 
 
 
 
 
 
 
 
 
 
1328
  [2026-01-26 22:15:18] (step=0001103) Train Loss mse: 0.0096, Train Loss ce: 0.0452, Train Steps/Sec: 0.68,
1329
  [2026-01-26 22:15:20] (step=0001104) Train Loss mse: 0.0135, Train Loss ce: 0.0637, Train Steps/Sec: 0.68,
1330
  [2026-01-26 22:15:21] (step=0001105) Train Loss mse: 0.0077, Train Loss ce: 0.0449, Train Steps/Sec: 0.68,
@@ -2815,6 +2808,20 @@ ce_avg: 0.20800399780273438, mse_avg: 0.011797062121331692
2815
  [2026-01-26 22:57:58] (step=0002587) Train Loss mse: 0.0121, Train Loss ce: 0.0432, Train Steps/Sec: 0.68,
2816
  [2026-01-26 22:57:59] (step=0002588) Train Loss mse: 0.0060, Train Loss ce: 0.0675, Train Steps/Sec: 0.68,
2817
  [2026-01-26 22:58:01] (step=0002589) Train Loss mse: 0.0137, Train Loss ce: 0.0460, Train Steps/Sec: 0.69,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2818
  [2026-01-26 22:58:02] (step=0002590) Train Loss mse: 0.0089, Train Loss ce: 0.0360, Train Steps/Sec: 0.68,
2819
  [2026-01-26 22:58:04] (step=0002591) Train Loss mse: 0.0089, Train Loss ce: 0.0466, Train Steps/Sec: 0.56,
2820
  [2026-01-26 22:58:06] (step=0002592) Train Loss mse: 0.0074, Train Loss ce: 0.0222, Train Steps/Sec: 0.59,
@@ -2902,27 +2909,6 @@ ce_avg: 0.20800399780273438, mse_avg: 0.011797062121331692
2902
  [2026-01-26 23:00:13] (step=0002674) Train Loss mse: 0.0056, Train Loss ce: 0.0613, Train Steps/Sec: 0.68,
2903
  [2026-01-26 23:00:15] (step=0002675) Train Loss mse: 0.0054, Train Loss ce: 0.0283, Train Steps/Sec: 0.49,
2904
  [2026-01-26 23:00:16] (step=0002676) Train Loss mse: 0.0099, Train Loss ce: 0.0483, Train Steps/Sec: 0.56,
2905
- base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins_step3000
2906
- Preparing Dataset vlm_gym_match_equation_sos_celoss_evalonce/vlm_gym_match_equation_sos_val
2907
- [eval debug] first 3 batch fingerprints:
2908
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
2909
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
2910
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
2911
- ce_avg: 0.04329414293169975, mse_avg: 0.006353132426738739
2912
- base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins_step3500
2913
- Preparing Dataset vlm_gym_match_equation_sos_celoss_evalonce/vlm_gym_match_equation_sos_val
2914
- [eval debug] first 3 batch fingerprints:
2915
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
2916
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
2917
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
2918
- ce_avg: 0.04275057464838028, mse_avg: 0.0056571937166154385
2919
- base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins_step4000
2920
- Preparing Dataset vlm_gym_match_equation_sos_celoss_evalonce/vlm_gym_match_equation_sos_val
2921
- [eval debug] first 3 batch fingerprints:
2922
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
2923
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
2924
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
2925
- ce_avg: 0.03896063566207886, mse_avg: 0.005920059513300657
2926
  [2026-01-26 23:00:18] (step=0002677) Train Loss mse: 0.0085, Train Loss ce: 0.0506, Train Steps/Sec: 0.69,
2927
  [2026-01-26 23:00:19] (step=0002678) Train Loss mse: 0.0104, Train Loss ce: 0.0571, Train Steps/Sec: 0.68,
2928
  [2026-01-26 23:00:21] (step=0002679) Train Loss mse: 0.0179, Train Loss ce: 0.0435, Train Steps/Sec: 0.68,
@@ -3899,6 +3885,27 @@ ce_avg: 0.03896063566207886, mse_avg: 0.005920059513300657
3899
  [2026-01-26 23:26:22] (step=0003650) Train Loss mse: 0.0108, Train Loss ce: 0.0542, Train Steps/Sec: 0.49,
3900
  [2026-01-26 23:26:24] (step=0003651) Train Loss mse: 0.0060, Train Loss ce: 0.0508, Train Steps/Sec: 0.57,
3901
  [2026-01-26 23:26:26] (step=0003652) Train Loss mse: 0.0067, Train Loss ce: 0.0496, Train Steps/Sec: 0.68,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3902
  [2026-01-26 23:26:27] (step=0003653) Train Loss mse: 0.0060, Train Loss ce: 0.0390, Train Steps/Sec: 0.68,
3903
  [2026-01-26 23:26:29] (step=0003654) Train Loss mse: 0.0091, Train Loss ce: 0.0192, Train Steps/Sec: 0.68,
3904
  [2026-01-26 23:26:30] (step=0003655) Train Loss mse: 0.0045, Train Loss ce: 0.0370, Train Steps/Sec: 0.58,
@@ -4059,20 +4066,6 @@ ce_avg: 0.03896063566207886, mse_avg: 0.005920059513300657
4059
  [2026-01-26 23:30:41] (step=0003810) Train Loss mse: 0.0053, Train Loss ce: 0.0262, Train Steps/Sec: 0.67,
4060
  [2026-01-26 23:30:43] (step=0003811) Train Loss mse: 0.0043, Train Loss ce: 0.0578, Train Steps/Sec: 0.58,
4061
  [2026-01-26 23:30:44] (step=0003812) Train Loss mse: 0.0063, Train Loss ce: 0.0334, Train Steps/Sec: 0.68,
4062
- base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins_step4500
4063
- Preparing Dataset vlm_gym_match_equation_sos_celoss_evalonce/vlm_gym_match_equation_sos_val
4064
- [eval debug] first 3 batch fingerprints:
4065
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
4066
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
4067
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
4068
- ce_avg: 0.03593315929174423, mse_avg: 0.005641990341246128
4069
- base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins_step5000
4070
- Preparing Dataset vlm_gym_match_equation_sos_celoss_evalonce/vlm_gym_match_equation_sos_val
4071
- [eval debug] first 3 batch fingerprints:
4072
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
4073
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
4074
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
4075
- ce_avg: 0.03604895621538162, mse_avg: 0.005358231253921986
4076
  [2026-01-26 23:30:46] (step=0003813) Train Loss mse: 0.0074, Train Loss ce: 0.0200, Train Steps/Sec: 0.58,
4077
  [2026-01-26 23:30:48] (step=0003814) Train Loss mse: 0.0077, Train Loss ce: 0.0290, Train Steps/Sec: 0.55,
4078
  [2026-01-26 23:30:49] (step=0003815) Train Loss mse: 0.0057, Train Loss ce: 0.0341, Train Steps/Sec: 0.68,
 
1099
  [2026-01-26 22:14:55] (step=0001088) Train Loss mse: 0.0112, Train Loss ce: 0.0307, Train Steps/Sec: 0.68,
1100
  [2026-01-26 22:14:56] (step=0001089) Train Loss mse: 0.0141, Train Loss ce: 0.0282, Train Steps/Sec: 0.68,
1101
  [2026-01-26 22:14:57] (step=0001090) Train Loss mse: 0.0187, Train Loss ce: 0.0744, Train Steps/Sec: 0.68,
 
 
 
 
 
 
 
 
 
 
 
 
1102
  FullyShardedDataParallel(
1103
  (_fsdp_wrapped_module): Bagel(
1104
  (language_model): Qwen2ForCausalLM(
 
1285
  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
1286
  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
1287
  ce_avg: 0.06269639730453491, mse_avg: 0.0149351442232728
 
 
 
 
 
 
 
1288
  base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins_step1500
1289
  Preparing Dataset vlm_gym_match_equation_sos_celoss_evalonce/vlm_gym_match_equation_sos_val
1290
  [eval debug] first 3 batch fingerprints:
 
1306
  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
1307
  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
1308
  ce_avg: 0.20800399780273438, mse_avg: 0.011797062121331692
1309
+ [2026-01-26 22:14:59] (step=0001091) Train Loss mse: 0.0144, Train Loss ce: 0.0371, Train Steps/Sec: 0.56,
1310
+ [2026-01-26 22:15:01] (step=0001092) Train Loss mse: 0.0181, Train Loss ce: 0.0742, Train Steps/Sec: 0.69,
1311
+ [2026-01-26 22:15:02] (step=0001093) Train Loss mse: 0.0119, Train Loss ce: 0.0614, Train Steps/Sec: 0.56,
1312
+ [2026-01-26 22:15:04] (step=0001094) Train Loss mse: 0.0140, Train Loss ce: 0.0572, Train Steps/Sec: 0.59,
1313
+ [2026-01-26 22:15:06] (step=0001095) Train Loss mse: 0.0155, Train Loss ce: 0.0332, Train Steps/Sec: 0.69,
1314
+ [2026-01-26 22:15:07] (step=0001096) Train Loss mse: 0.0166, Train Loss ce: 0.0692, Train Steps/Sec: 0.68,
1315
+ [2026-01-26 22:15:09] (step=0001097) Train Loss mse: 0.0175, Train Loss ce: 0.0863, Train Steps/Sec: 0.68,
1316
+ [2026-01-26 22:15:10] (step=0001098) Train Loss mse: 0.0082, Train Loss ce: 0.0662, Train Steps/Sec: 0.57,
1317
+ [2026-01-26 22:15:12] (step=0001099) Train Loss mse: 0.0139, Train Loss ce: 0.0561, Train Steps/Sec: 0.68,
1318
+ [2026-01-26 22:15:13] (step=0001100) Train Loss mse: 0.0106, Train Loss ce: 0.0682, Train Steps/Sec: 0.68,
1319
+ [2026-01-26 22:15:15] (step=0001101) Train Loss mse: 0.0105, Train Loss ce: 0.0531, Train Steps/Sec: 0.56,
1320
+ [2026-01-26 22:15:17] (step=0001102) Train Loss mse: 0.0123, Train Loss ce: 0.0902, Train Steps/Sec: 0.59,
1321
  [2026-01-26 22:15:18] (step=0001103) Train Loss mse: 0.0096, Train Loss ce: 0.0452, Train Steps/Sec: 0.68,
1322
  [2026-01-26 22:15:20] (step=0001104) Train Loss mse: 0.0135, Train Loss ce: 0.0637, Train Steps/Sec: 0.68,
1323
  [2026-01-26 22:15:21] (step=0001105) Train Loss mse: 0.0077, Train Loss ce: 0.0449, Train Steps/Sec: 0.68,
 
2808
  [2026-01-26 22:57:58] (step=0002587) Train Loss mse: 0.0121, Train Loss ce: 0.0432, Train Steps/Sec: 0.68,
2809
  [2026-01-26 22:57:59] (step=0002588) Train Loss mse: 0.0060, Train Loss ce: 0.0675, Train Steps/Sec: 0.68,
2810
  [2026-01-26 22:58:01] (step=0002589) Train Loss mse: 0.0137, Train Loss ce: 0.0460, Train Steps/Sec: 0.69,
2811
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins_step3000
2812
+ Preparing Dataset vlm_gym_match_equation_sos_celoss_evalonce/vlm_gym_match_equation_sos_val
2813
+ [eval debug] first 3 batch fingerprints:
2814
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
2815
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
2816
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
2817
+ ce_avg: 0.04329414293169975, mse_avg: 0.006353132426738739
2818
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins_step3500
2819
+ Preparing Dataset vlm_gym_match_equation_sos_celoss_evalonce/vlm_gym_match_equation_sos_val
2820
+ [eval debug] first 3 batch fingerprints:
2821
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
2822
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
2823
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
2824
+ ce_avg: 0.04275057464838028, mse_avg: 0.0056571937166154385
2825
  [2026-01-26 22:58:02] (step=0002590) Train Loss mse: 0.0089, Train Loss ce: 0.0360, Train Steps/Sec: 0.68,
2826
  [2026-01-26 22:58:04] (step=0002591) Train Loss mse: 0.0089, Train Loss ce: 0.0466, Train Steps/Sec: 0.56,
2827
  [2026-01-26 22:58:06] (step=0002592) Train Loss mse: 0.0074, Train Loss ce: 0.0222, Train Steps/Sec: 0.59,
 
2909
  [2026-01-26 23:00:13] (step=0002674) Train Loss mse: 0.0056, Train Loss ce: 0.0613, Train Steps/Sec: 0.68,
2910
  [2026-01-26 23:00:15] (step=0002675) Train Loss mse: 0.0054, Train Loss ce: 0.0283, Train Steps/Sec: 0.49,
2911
  [2026-01-26 23:00:16] (step=0002676) Train Loss mse: 0.0099, Train Loss ce: 0.0483, Train Steps/Sec: 0.56,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2912
  [2026-01-26 23:00:18] (step=0002677) Train Loss mse: 0.0085, Train Loss ce: 0.0506, Train Steps/Sec: 0.69,
2913
  [2026-01-26 23:00:19] (step=0002678) Train Loss mse: 0.0104, Train Loss ce: 0.0571, Train Steps/Sec: 0.68,
2914
  [2026-01-26 23:00:21] (step=0002679) Train Loss mse: 0.0179, Train Loss ce: 0.0435, Train Steps/Sec: 0.68,
 
3885
  [2026-01-26 23:26:22] (step=0003650) Train Loss mse: 0.0108, Train Loss ce: 0.0542, Train Steps/Sec: 0.49,
3886
  [2026-01-26 23:26:24] (step=0003651) Train Loss mse: 0.0060, Train Loss ce: 0.0508, Train Steps/Sec: 0.57,
3887
  [2026-01-26 23:26:26] (step=0003652) Train Loss mse: 0.0067, Train Loss ce: 0.0496, Train Steps/Sec: 0.68,
3888
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins_step4000
3889
+ Preparing Dataset vlm_gym_match_equation_sos_celoss_evalonce/vlm_gym_match_equation_sos_val
3890
+ [eval debug] first 3 batch fingerprints:
3891
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
3892
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
3893
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
3894
+ ce_avg: 0.03896063566207886, mse_avg: 0.005920059513300657
3895
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins_step4500
3896
+ Preparing Dataset vlm_gym_match_equation_sos_celoss_evalonce/vlm_gym_match_equation_sos_val
3897
+ [eval debug] first 3 batch fingerprints:
3898
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
3899
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
3900
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
3901
+ ce_avg: 0.03593315929174423, mse_avg: 0.005641990341246128
3902
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_ins_step5000
3903
+ Preparing Dataset vlm_gym_match_equation_sos_celoss_evalonce/vlm_gym_match_equation_sos_val
3904
+ [eval debug] first 3 batch fingerprints:
3905
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
3906
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
3907
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_evalonce'}]
3908
+ ce_avg: 0.03604895621538162, mse_avg: 0.005358231253921986
3909
  [2026-01-26 23:26:27] (step=0003653) Train Loss mse: 0.0060, Train Loss ce: 0.0390, Train Steps/Sec: 0.68,
3910
  [2026-01-26 23:26:29] (step=0003654) Train Loss mse: 0.0091, Train Loss ce: 0.0192, Train Steps/Sec: 0.68,
3911
  [2026-01-26 23:26:30] (step=0003655) Train Loss mse: 0.0045, Train Loss ce: 0.0370, Train Steps/Sec: 0.58,
 
4066
  [2026-01-26 23:30:41] (step=0003810) Train Loss mse: 0.0053, Train Loss ce: 0.0262, Train Steps/Sec: 0.67,
4067
  [2026-01-26 23:30:43] (step=0003811) Train Loss mse: 0.0043, Train Loss ce: 0.0578, Train Steps/Sec: 0.58,
4068
  [2026-01-26 23:30:44] (step=0003812) Train Loss mse: 0.0063, Train Loss ce: 0.0334, Train Steps/Sec: 0.68,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4069
  [2026-01-26 23:30:46] (step=0003813) Train Loss mse: 0.0074, Train Loss ce: 0.0200, Train Steps/Sec: 0.58,
4070
  [2026-01-26 23:30:48] (step=0003814) Train Loss mse: 0.0077, Train Loss ce: 0.0290, Train Steps/Sec: 0.55,
4071
  [2026-01-26 23:30:49] (step=0003815) Train Loss mse: 0.0057, Train Loss ce: 0.0341, Train Steps/Sec: 0.68,