Junyi42 commited on
Commit
865e627
·
verified ·
1 Parent(s): 46e6c55

Upload checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins

Browse files
checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/wandb/offline-run-20260129_221658-checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins-run0/files/output.log CHANGED
@@ -882,27 +882,6 @@ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
882
  [2026-01-30 02:11:40] (step=0000871) Train Loss mse: 0.0635, Train Loss ce: 0.0536, Train Steps/Sec: 0.06,
883
  [2026-01-30 02:11:56] (step=0000872) Train Loss mse: 0.0721, Train Loss ce: 0.0497, Train Steps/Sec: 0.07,
884
  [2026-01-30 02:12:11] (step=0000873) Train Loss mse: 0.0725, Train Loss ce: 0.0511, Train Steps/Sec: 0.07,
885
- [2026-01-30 02:12:26] (step=0000874) Train Loss mse: 0.0770, Train Loss ce: 0.0528, Train Steps/Sec: 0.06,
886
- [2026-01-30 02:12:42] (step=0000875) Train Loss mse: 0.0661, Train Loss ce: 0.0531, Train Steps/Sec: 0.06,
887
- [2026-01-30 02:12:57] (step=0000876) Train Loss mse: 0.0701, Train Loss ce: 0.0541, Train Steps/Sec: 0.07,
888
- [2026-01-30 02:13:12] (step=0000877) Train Loss mse: 0.0701, Train Loss ce: 0.0489, Train Steps/Sec: 0.06,
889
- [2026-01-30 02:13:29] (step=0000878) Train Loss mse: 0.0672, Train Loss ce: 0.0516, Train Steps/Sec: 0.06,
890
- [2026-01-30 02:13:45] (step=0000879) Train Loss mse: 0.0585, Train Loss ce: 0.0506, Train Steps/Sec: 0.06,
891
- [2026-01-30 02:14:00] (step=0000880) Train Loss mse: 0.0618, Train Loss ce: 0.0550, Train Steps/Sec: 0.07,
892
- [2026-01-30 02:14:15] (step=0000881) Train Loss mse: 0.0649, Train Loss ce: 0.0513, Train Steps/Sec: 0.07,
893
- [2026-01-30 02:14:30] (step=0000882) Train Loss mse: 0.0751, Train Loss ce: 0.0525, Train Steps/Sec: 0.07,
894
- [2026-01-30 02:14:46] (step=0000883) Train Loss mse: 0.0483, Train Loss ce: 0.0483, Train Steps/Sec: 0.06,
895
- [2026-01-30 02:15:01] (step=0000884) Train Loss mse: 0.0715, Train Loss ce: 0.0517, Train Steps/Sec: 0.06,
896
- [2026-01-30 02:15:18] (step=0000885) Train Loss mse: 0.0637, Train Loss ce: 0.0533, Train Steps/Sec: 0.06,
897
- [2026-01-30 02:15:33] (step=0000886) Train Loss mse: 0.0739, Train Loss ce: 0.0564, Train Steps/Sec: 0.07,
898
- [2026-01-30 02:15:50] (step=0000887) Train Loss mse: 0.0569, Train Loss ce: 0.0532, Train Steps/Sec: 0.06,
899
- [2026-01-30 02:16:04] (step=0000888) Train Loss mse: 0.0588, Train Loss ce: 0.0569, Train Steps/Sec: 0.07,
900
- [2026-01-30 02:16:19] (step=0000889) Train Loss mse: 0.0650, Train Loss ce: 0.0505, Train Steps/Sec: 0.07,
901
- [2026-01-30 02:16:34] (step=0000890) Train Loss mse: 0.0644, Train Loss ce: 0.0563, Train Steps/Sec: 0.07,
902
- [2026-01-30 02:16:49] (step=0000891) Train Loss mse: 0.0654, Train Loss ce: 0.0555, Train Steps/Sec: 0.06,
903
- [2026-01-30 02:17:04] (step=0000892) Train Loss mse: 0.0543, Train Loss ce: 0.0541, Train Steps/Sec: 0.07,
904
- [2026-01-30 02:17:19] (step=0000893) Train Loss mse: 0.0614, Train Loss ce: 0.0534, Train Steps/Sec: 0.07,
905
- [2026-01-30 02:17:36] (step=0000894) Train Loss mse: 0.0646, Train Loss ce: 0.0524, Train Steps/Sec: 0.06,
906
  FullyShardedDataParallel(
907
  (_fsdp_wrapped_module): Bagel(
908
  (language_model): Qwen2ForCausalLM(
@@ -1089,20 +1068,34 @@ Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalo
1089
  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
1090
  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
1091
  ce_avg: 0.056111279875040054, mse_avg: 0.05582037195563316
1092
- base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step1500
1093
- Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
1094
- [eval debug] first 3 batch fingerprints:
1095
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
1096
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
1097
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
1098
- ce_avg: 0.06184602156281471, mse_avg: 0.05278971791267395
1099
- base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step2000
1100
  Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
1101
  [eval debug] first 3 batch fingerprints:
1102
  fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
1103
  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
1104
  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
1105
- ce_avg: 0.05899278074502945, mse_avg: 0.052241820842027664
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1106
  [2026-01-30 02:17:52] (step=0000895) Train Loss mse: 0.0557, Train Loss ce: 0.0519, Train Steps/Sec: 0.06,
1107
  [2026-01-30 02:18:07] (step=0000896) Train Loss mse: 0.0742, Train Loss ce: 0.0534, Train Steps/Sec: 0.06,
1108
  [2026-01-30 02:18:23] (step=0000897) Train Loss mse: 0.0826, Train Loss ce: 0.0551, Train Steps/Sec: 0.06,
@@ -2313,6 +2306,20 @@ ce_avg: 0.05899278074502945, mse_avg: 0.052241820842027664
2313
  [2026-01-30 07:34:20] (step=0002102) Train Loss mse: 0.0760, Train Loss ce: 0.0494, Train Steps/Sec: 0.07,
2314
  [2026-01-30 07:34:35] (step=0002103) Train Loss mse: 0.0668, Train Loss ce: 0.0457, Train Steps/Sec: 0.07,
2315
  [2026-01-30 07:34:50] (step=0002104) Train Loss mse: 0.0554, Train Loss ce: 0.0456, Train Steps/Sec: 0.07,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2316
  [2026-01-30 07:35:05] (step=0002105) Train Loss mse: 0.0616, Train Loss ce: 0.0466, Train Steps/Sec: 0.07,
2317
  [2026-01-30 07:35:19] (step=0002106) Train Loss mse: 0.0599, Train Loss ce: 0.0531, Train Steps/Sec: 0.07,
2318
  [2026-01-30 07:35:35] (step=0002107) Train Loss mse: 0.0801, Train Loss ce: 0.0488, Train Steps/Sec: 0.06,
@@ -2347,20 +2354,6 @@ ce_avg: 0.05899278074502945, mse_avg: 0.052241820842027664
2347
  [2026-01-30 07:43:09] (step=0002136) Train Loss mse: 0.0754, Train Loss ce: 0.0465, Train Steps/Sec: 0.06,
2348
  [2026-01-30 07:43:26] (step=0002137) Train Loss mse: 0.0662, Train Loss ce: 0.0484, Train Steps/Sec: 0.06,
2349
  [2026-01-30 07:43:41] (step=0002138) Train Loss mse: 0.0688, Train Loss ce: 0.0494, Train Steps/Sec: 0.07,
2350
- base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step2500
2351
- Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
2352
- [eval debug] first 3 batch fingerprints:
2353
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
2354
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
2355
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
2356
- ce_avg: 0.0595068633556366, mse_avg: 0.05313115939497948
2357
- base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step3000
2358
- Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
2359
- [eval debug] first 3 batch fingerprints:
2360
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
2361
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
2362
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
2363
- ce_avg: 0.04585418477654457, mse_avg: 0.050133317708969116
2364
  [2026-01-30 07:43:56] (step=0002139) Train Loss mse: 0.0552, Train Loss ce: 0.0488, Train Steps/Sec: 0.07,
2365
  [2026-01-30 07:44:12] (step=0002140) Train Loss mse: 0.0666, Train Loss ce: 0.0472, Train Steps/Sec: 0.06,
2366
  [2026-01-30 07:44:27] (step=0002141) Train Loss mse: 0.0669, Train Loss ce: 0.0468, Train Steps/Sec: 0.07,
@@ -3206,6 +3199,34 @@ ce_avg: 0.04585418477654457, mse_avg: 0.050133317708969116
3206
  [2026-01-30 11:26:18] (step=0002978) Train Loss mse: 0.0600, Train Loss ce: 0.0457, Train Steps/Sec: 0.06,
3207
  [2026-01-30 11:26:33] (step=0002979) Train Loss mse: 0.0572, Train Loss ce: 0.0449, Train Steps/Sec: 0.06,
3208
  [2026-01-30 11:26:48] (step=0002980) Train Loss mse: 0.0565, Train Loss ce: 0.0443, Train Steps/Sec: 0.07,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3209
  [2026-01-30 11:27:04] (step=0002981) Train Loss mse: 0.0733, Train Loss ce: 0.0434, Train Steps/Sec: 0.07,
3210
  [2026-01-30 11:27:20] (step=0002982) Train Loss mse: 0.0724, Train Loss ce: 0.0553, Train Steps/Sec: 0.06,
3211
  [2026-01-30 11:27:36] (step=0002983) Train Loss mse: 0.0568, Train Loss ce: 0.0460, Train Steps/Sec: 0.06,
@@ -3441,20 +3462,6 @@ ce_avg: 0.04585418477654457, mse_avg: 0.050133317708969116
3441
  [2026-01-30 12:28:00] (step=0003213) Train Loss mse: 0.0647, Train Loss ce: 0.0521, Train Steps/Sec: 0.06,
3442
  [2026-01-30 12:28:16] (step=0003214) Train Loss mse: 0.0573, Train Loss ce: 0.0436, Train Steps/Sec: 0.06,
3443
  [2026-01-30 12:28:31] (step=0003215) Train Loss mse: 0.0523, Train Loss ce: 0.0444, Train Steps/Sec: 0.07,
3444
- [2026-01-30 12:28:47
3445
- base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step3500
3446
- Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
3447
- [eval debug] first 3 batch fingerprints:
3448
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
3449
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
3450
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
3451
- ce_avg: 0.0454382449388504, mse_avg: 0.050951480865478516
3452
- base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step4000
3453
- Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
3454
- [eval debug] first 3 batch fingerprints:
3455
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
3456
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
3457
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
3458
  [2026-01-30 12:28:47] (step=0003216) Train Loss mse: 0.0666, Train Loss ce: 0.0429, Train Steps/Sec: 0.06,
3459
  [2026-01-30 12:29:02] (step=0003217) Train Loss mse: 0.0587, Train Loss ce: 0.0472, Train Steps/Sec: 0.06,
3460
  [2026-01-30 12:29:17] (step=0003218) Train Loss mse: 0.0663, Train Loss ce: 0.0479, Train Steps/Sec: 0.07,
@@ -4540,6 +4547,20 @@ Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalo
4540
  [2026-01-30 17:12:18] (step=0004298) Train Loss mse: 0.0616, Train Loss ce: 0.0438, Train Steps/Sec: 0.06,
4541
  [2026-01-30 17:12:34] (step=0004299) Train Loss mse: 0.0613, Train Loss ce: 0.0470, Train Steps/Sec: 0.06,
4542
  [2026-01-30 17:12:50] (step=0004300) Train Loss mse: 0.0674, Train Loss ce: 0.0424, Train Steps/Sec: 0.06,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4543
  [2026-01-30 17:13:06] (step=0004301) Train Loss mse: 0.0708, Train Loss ce: 0.0447, Train Steps/Sec: 0.06,
4544
  [2026-01-30 17:13:23] (step=0004302) Train Loss mse: 0.0632, Train Loss ce: 0.0413, Train Steps/Sec: 0.06,
4545
  [2026-01-30 17:13:38] (step=0004303) Train Loss mse: 0.0644, Train Loss ce: 0.0458, Train Steps/Sec: 0.07,
@@ -4560,20 +4581,6 @@ Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalo
4560
  [2026-01-30 17:17:35] (step=0004318) Train Loss mse: 0.0564, Train Loss ce: 0.0434, Train Steps/Sec: 0.06,
4561
  [2026-01-30 17:17:51] (step=0004319) Train Loss mse: 0.0597, Train Loss ce: 0.0463, Train Steps/Sec: 0.06,
4562
  [2026-01-30 17:18:07] (step=0004320) Train Loss mse: 0.0718, Train Loss ce: 0.0441, Train Steps/Sec: 0.06,
4563
- base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step4500
4564
- Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
4565
- [eval debug] first 3 batch fingerprints:
4566
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
4567
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
4568
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
4569
- ce_avg: 0.04478031024336815, mse_avg: 0.04991578683257103
4570
- base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step5000
4571
- Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
4572
- [eval debug] first 3 batch fingerprints:
4573
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
4574
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
4575
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
4576
- ce_avg: 0.04477126523852348, mse_avg: 0.05164656043052673
4577
  [2026-01-30 17:18:22] (step=0004321) Train Loss mse: 0.0604, Train Loss ce: 0.0477, Train Steps/Sec: 0.07,
4578
  [2026-01-30 17:18:39] (step=0004322) Train Loss mse: 0.0561, Train Loss ce: 0.0457, Train Steps/Sec: 0.06,
4579
  [2026-01-30 17:18:54] (step=0004323) Train Loss mse: 0.0657, Train Loss ce: 0.0452, Train Steps/Sec: 0.07,
 
882
  [2026-01-30 02:11:40] (step=0000871) Train Loss mse: 0.0635, Train Loss ce: 0.0536, Train Steps/Sec: 0.06,
883
  [2026-01-30 02:11:56] (step=0000872) Train Loss mse: 0.0721, Train Loss ce: 0.0497, Train Steps/Sec: 0.07,
884
  [2026-01-30 02:12:11] (step=0000873) Train Loss mse: 0.0725, Train Loss ce: 0.0511, Train Steps/Sec: 0.07,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
885
  FullyShardedDataParallel(
886
  (_fsdp_wrapped_module): Bagel(
887
  (language_model): Qwen2ForCausalLM(
 
1068
  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
1069
  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
1070
  ce_avg: 0.056111279875040054, mse_avg: 0.05582037195563316
1071
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step1000
 
 
 
 
 
 
 
1072
  Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
1073
  [eval debug] first 3 batch fingerprints:
1074
  fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
1075
  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
1076
  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
1077
+ ce_avg: 0.057540930807590485, mse_avg: 0.054740797728300095
1078
+ [2026-01-30 02:12:26] (step=0000874) Train Loss mse: 0.0770, Train Loss ce: 0.0528, Train Steps/Sec: 0.06,
1079
+ [2026-01-30 02:12:42] (step=0000875) Train Loss mse: 0.0661, Train Loss ce: 0.0531, Train Steps/Sec: 0.06,
1080
+ [2026-01-30 02:12:57] (step=0000876) Train Loss mse: 0.0701, Train Loss ce: 0.0541, Train Steps/Sec: 0.07,
1081
+ [2026-01-30 02:13:12] (step=0000877) Train Loss mse: 0.0701, Train Loss ce: 0.0489, Train Steps/Sec: 0.06,
1082
+ [2026-01-30 02:13:29] (step=0000878) Train Loss mse: 0.0672, Train Loss ce: 0.0516, Train Steps/Sec: 0.06,
1083
+ [2026-01-30 02:13:45] (step=0000879) Train Loss mse: 0.0585, Train Loss ce: 0.0506, Train Steps/Sec: 0.06,
1084
+ [2026-01-30 02:14:00] (step=0000880) Train Loss mse: 0.0618, Train Loss ce: 0.0550, Train Steps/Sec: 0.07,
1085
+ [2026-01-30 02:14:15] (step=0000881) Train Loss mse: 0.0649, Train Loss ce: 0.0513, Train Steps/Sec: 0.07,
1086
+ [2026-01-30 02:14:30] (step=0000882) Train Loss mse: 0.0751, Train Loss ce: 0.0525, Train Steps/Sec: 0.07,
1087
+ [2026-01-30 02:14:46] (step=0000883) Train Loss mse: 0.0483, Train Loss ce: 0.0483, Train Steps/Sec: 0.06,
1088
+ [2026-01-30 02:15:01] (step=0000884) Train Loss mse: 0.0715, Train Loss ce: 0.0517, Train Steps/Sec: 0.06,
1089
+ [2026-01-30 02:15:18] (step=0000885) Train Loss mse: 0.0637, Train Loss ce: 0.0533, Train Steps/Sec: 0.06,
1090
+ [2026-01-30 02:15:33] (step=0000886) Train Loss mse: 0.0739, Train Loss ce: 0.0564, Train Steps/Sec: 0.07,
1091
+ [2026-01-30 02:15:50] (step=0000887) Train Loss mse: 0.0569, Train Loss ce: 0.0532, Train Steps/Sec: 0.06,
1092
+ [2026-01-30 02:16:04] (step=0000888) Train Loss mse: 0.0588, Train Loss ce: 0.0569, Train Steps/Sec: 0.07,
1093
+ [2026-01-30 02:16:19] (step=0000889) Train Loss mse: 0.0650, Train Loss ce: 0.0505, Train Steps/Sec: 0.07,
1094
+ [2026-01-30 02:16:34] (step=0000890) Train Loss mse: 0.0644, Train Loss ce: 0.0563, Train Steps/Sec: 0.07,
1095
+ [2026-01-30 02:16:49] (step=0000891) Train Loss mse: 0.0654, Train Loss ce: 0.0555, Train Steps/Sec: 0.06,
1096
+ [2026-01-30 02:17:04] (step=0000892) Train Loss mse: 0.0543, Train Loss ce: 0.0541, Train Steps/Sec: 0.07,
1097
+ [2026-01-30 02:17:19] (step=0000893) Train Loss mse: 0.0614, Train Loss ce: 0.0534, Train Steps/Sec: 0.07,
1098
+ [2026-01-30 02:17:36] (step=0000894) Train Loss mse: 0.0646, Train Loss ce: 0.0524, Train Steps/Sec: 0.06,
1099
  [2026-01-30 02:17:52] (step=0000895) Train Loss mse: 0.0557, Train Loss ce: 0.0519, Train Steps/Sec: 0.06,
1100
  [2026-01-30 02:18:07] (step=0000896) Train Loss mse: 0.0742, Train Loss ce: 0.0534, Train Steps/Sec: 0.06,
1101
  [2026-01-30 02:18:23] (step=0000897) Train Loss mse: 0.0826, Train Loss ce: 0.0551, Train Steps/Sec: 0.06,
 
2306
  [2026-01-30 07:34:20] (step=0002102) Train Loss mse: 0.0760, Train Loss ce: 0.0494, Train Steps/Sec: 0.07,
2307
  [2026-01-30 07:34:35] (step=0002103) Train Loss mse: 0.0668, Train Loss ce: 0.0457, Train Steps/Sec: 0.07,
2308
  [2026-01-30 07:34:50] (step=0002104) Train Loss mse: 0.0554, Train Loss ce: 0.0456, Train Steps/Sec: 0.07,
2309
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step1500
2310
+ Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
2311
+ [eval debug] first 3 batch fingerprints:
2312
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
2313
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
2314
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
2315
+ ce_avg: 0.06184602156281471, mse_avg: 0.05278971791267395
2316
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step2000
2317
+ Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
2318
+ [eval debug] first 3 batch fingerprints:
2319
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
2320
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
2321
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
2322
+ ce_avg: 0.05899278074502945, mse_avg: 0.052241820842027664
2323
  [2026-01-30 07:35:05] (step=0002105) Train Loss mse: 0.0616, Train Loss ce: 0.0466, Train Steps/Sec: 0.07,
2324
  [2026-01-30 07:35:19] (step=0002106) Train Loss mse: 0.0599, Train Loss ce: 0.0531, Train Steps/Sec: 0.07,
2325
  [2026-01-30 07:35:35] (step=0002107) Train Loss mse: 0.0801, Train Loss ce: 0.0488, Train Steps/Sec: 0.06,
 
2354
  [2026-01-30 07:43:09] (step=0002136) Train Loss mse: 0.0754, Train Loss ce: 0.0465, Train Steps/Sec: 0.06,
2355
  [2026-01-30 07:43:26] (step=0002137) Train Loss mse: 0.0662, Train Loss ce: 0.0484, Train Steps/Sec: 0.06,
2356
  [2026-01-30 07:43:41] (step=0002138) Train Loss mse: 0.0688, Train Loss ce: 0.0494, Train Steps/Sec: 0.07,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2357
  [2026-01-30 07:43:56] (step=0002139) Train Loss mse: 0.0552, Train Loss ce: 0.0488, Train Steps/Sec: 0.07,
2358
  [2026-01-30 07:44:12] (step=0002140) Train Loss mse: 0.0666, Train Loss ce: 0.0472, Train Steps/Sec: 0.06,
2359
  [2026-01-30 07:44:27] (step=0002141) Train Loss mse: 0.0669, Train Loss ce: 0.0468, Train Steps/Sec: 0.07,
 
3199
  [2026-01-30 11:26:18] (step=0002978) Train Loss mse: 0.0600, Train Loss ce: 0.0457, Train Steps/Sec: 0.06,
3200
  [2026-01-30 11:26:33] (step=0002979) Train Loss mse: 0.0572, Train Loss ce: 0.0449, Train Steps/Sec: 0.06,
3201
  [2026-01-30 11:26:48] (step=0002980) Train Loss mse: 0.0565, Train Loss ce: 0.0443, Train Steps/Sec: 0.07,
3202
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step2500
3203
+ Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
3204
+ [eval debug] first 3 batch fingerprints:
3205
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
3206
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
3207
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
3208
+ ce_avg: 0.0595068633556366, mse_avg: 0.05313115939497948
3209
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step3000
3210
+ Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
3211
+ [eval debug] first 3 batch fingerprints:
3212
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
3213
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
3214
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
3215
+ ce_avg: 0.04585418477654457, mse_avg: 0.050133317708969116
3216
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step3500
3217
+ Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
3218
+ [eval debug] first 3 batch fingerprints:
3219
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
3220
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
3221
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
3222
+ ce_avg: 0.0454382449388504, mse_avg: 0.050951480865478516
3223
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step4000
3224
+ Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
3225
+ [eval debug] first 3 batch fingerprints:
3226
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
3227
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
3228
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
3229
+ ce_avg: 0.04481803998351097, mse_avg: 0.051003698259592056
3230
  [2026-01-30 11:27:04] (step=0002981) Train Loss mse: 0.0733, Train Loss ce: 0.0434, Train Steps/Sec: 0.07,
3231
  [2026-01-30 11:27:20] (step=0002982) Train Loss mse: 0.0724, Train Loss ce: 0.0553, Train Steps/Sec: 0.06,
3232
  [2026-01-30 11:27:36] (step=0002983) Train Loss mse: 0.0568, Train Loss ce: 0.0460, Train Steps/Sec: 0.06,
 
3462
  [2026-01-30 12:28:00] (step=0003213) Train Loss mse: 0.0647, Train Loss ce: 0.0521, Train Steps/Sec: 0.06,
3463
  [2026-01-30 12:28:16] (step=0003214) Train Loss mse: 0.0573, Train Loss ce: 0.0436, Train Steps/Sec: 0.06,
3464
  [2026-01-30 12:28:31] (step=0003215) Train Loss mse: 0.0523, Train Loss ce: 0.0444, Train Steps/Sec: 0.07,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3465
  [2026-01-30 12:28:47] (step=0003216) Train Loss mse: 0.0666, Train Loss ce: 0.0429, Train Steps/Sec: 0.06,
3466
  [2026-01-30 12:29:02] (step=0003217) Train Loss mse: 0.0587, Train Loss ce: 0.0472, Train Steps/Sec: 0.06,
3467
  [2026-01-30 12:29:17] (step=0003218) Train Loss mse: 0.0663, Train Loss ce: 0.0479, Train Steps/Sec: 0.07,
 
4547
  [2026-01-30 17:12:18] (step=0004298) Train Loss mse: 0.0616, Train Loss ce: 0.0438, Train Steps/Sec: 0.06,
4548
  [2026-01-30 17:12:34] (step=0004299) Train Loss mse: 0.0613, Train Loss ce: 0.0470, Train Steps/Sec: 0.06,
4549
  [2026-01-30 17:12:50] (step=0004300) Train Loss mse: 0.0674, Train Loss ce: 0.0424, Train Steps/Sec: 0.06,
4550
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step4500
4551
+ Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
4552
+ [eval debug] first 3 batch fingerprints:
4553
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
4554
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
4555
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
4556
+ ce_avg: 0.04478031024336815, mse_avg: 0.04991578683257103
4557
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step5000
4558
+ Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
4559
+ [eval debug] first 3 batch fingerprints:
4560
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
4561
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
4562
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
4563
+ ce_avg: 0.04477126523852348, mse_avg: 0.05164656043052673
4564
  [2026-01-30 17:13:06] (step=0004301) Train Loss mse: 0.0708, Train Loss ce: 0.0447, Train Steps/Sec: 0.06,
4565
  [2026-01-30 17:13:23] (step=0004302) Train Loss mse: 0.0632, Train Loss ce: 0.0413, Train Steps/Sec: 0.06,
4566
  [2026-01-30 17:13:38] (step=0004303) Train Loss mse: 0.0644, Train Loss ce: 0.0458, Train Steps/Sec: 0.07,
 
4581
  [2026-01-30 17:17:35] (step=0004318) Train Loss mse: 0.0564, Train Loss ce: 0.0434, Train Steps/Sec: 0.06,
4582
  [2026-01-30 17:17:51] (step=0004319) Train Loss mse: 0.0597, Train Loss ce: 0.0463, Train Steps/Sec: 0.06,
4583
  [2026-01-30 17:18:07] (step=0004320) Train Loss mse: 0.0718, Train Loss ce: 0.0441, Train Steps/Sec: 0.06,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4584
  [2026-01-30 17:18:22] (step=0004321) Train Loss mse: 0.0604, Train Loss ce: 0.0477, Train Steps/Sec: 0.07,
4585
  [2026-01-30 17:18:39] (step=0004322) Train Loss mse: 0.0561, Train Loss ce: 0.0457, Train Steps/Sec: 0.06,
4586
  [2026-01-30 17:18:54] (step=0004323) Train Loss mse: 0.0657, Train Loss ce: 0.0452, Train Steps/Sec: 0.07,