Upload checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins
Browse files
checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/wandb/offline-run-20260129_221658-checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins-run0/files/output.log
CHANGED
|
@@ -882,27 +882,6 @@ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
| 882 |
[[34m2026-01-30 02:11:40[39m] (step=0000871) Train Loss mse: 0.0635, Train Loss ce: 0.0536, Train Steps/Sec: 0.06,
|
| 883 |
[[34m2026-01-30 02:11:56[39m] (step=0000872) Train Loss mse: 0.0721, Train Loss ce: 0.0497, Train Steps/Sec: 0.07,
|
| 884 |
[[34m2026-01-30 02:12:11[39m] (step=0000873) Train Loss mse: 0.0725, Train Loss ce: 0.0511, Train Steps/Sec: 0.07,
|
| 885 |
-
[[34m2026-01-30 02:12:26[39m] (step=0000874) Train Loss mse: 0.0770, Train Loss ce: 0.0528, Train Steps/Sec: 0.06,
|
| 886 |
-
[[34m2026-01-30 02:12:42[39m] (step=0000875) Train Loss mse: 0.0661, Train Loss ce: 0.0531, Train Steps/Sec: 0.06,
|
| 887 |
-
[[34m2026-01-30 02:12:57[39m] (step=0000876) Train Loss mse: 0.0701, Train Loss ce: 0.0541, Train Steps/Sec: 0.07,
|
| 888 |
-
[[34m2026-01-30 02:13:12[39m] (step=0000877) Train Loss mse: 0.0701, Train Loss ce: 0.0489, Train Steps/Sec: 0.06,
|
| 889 |
-
[[34m2026-01-30 02:13:29[39m] (step=0000878) Train Loss mse: 0.0672, Train Loss ce: 0.0516, Train Steps/Sec: 0.06,
|
| 890 |
-
[[34m2026-01-30 02:13:45[39m] (step=0000879) Train Loss mse: 0.0585, Train Loss ce: 0.0506, Train Steps/Sec: 0.06,
|
| 891 |
-
[[34m2026-01-30 02:14:00[39m] (step=0000880) Train Loss mse: 0.0618, Train Loss ce: 0.0550, Train Steps/Sec: 0.07,
|
| 892 |
-
[[34m2026-01-30 02:14:15[39m] (step=0000881) Train Loss mse: 0.0649, Train Loss ce: 0.0513, Train Steps/Sec: 0.07,
|
| 893 |
-
[[34m2026-01-30 02:14:30[39m] (step=0000882) Train Loss mse: 0.0751, Train Loss ce: 0.0525, Train Steps/Sec: 0.07,
|
| 894 |
-
[[34m2026-01-30 02:14:46[39m] (step=0000883) Train Loss mse: 0.0483, Train Loss ce: 0.0483, Train Steps/Sec: 0.06,
|
| 895 |
-
[[34m2026-01-30 02:15:01[39m] (step=0000884) Train Loss mse: 0.0715, Train Loss ce: 0.0517, Train Steps/Sec: 0.06,
|
| 896 |
-
[[34m2026-01-30 02:15:18[39m] (step=0000885) Train Loss mse: 0.0637, Train Loss ce: 0.0533, Train Steps/Sec: 0.06,
|
| 897 |
-
[[34m2026-01-30 02:15:33[39m] (step=0000886) Train Loss mse: 0.0739, Train Loss ce: 0.0564, Train Steps/Sec: 0.07,
|
| 898 |
-
[[34m2026-01-30 02:15:50[39m] (step=0000887) Train Loss mse: 0.0569, Train Loss ce: 0.0532, Train Steps/Sec: 0.06,
|
| 899 |
-
[[34m2026-01-30 02:16:04[39m] (step=0000888) Train Loss mse: 0.0588, Train Loss ce: 0.0569, Train Steps/Sec: 0.07,
|
| 900 |
-
[[34m2026-01-30 02:16:19[39m] (step=0000889) Train Loss mse: 0.0650, Train Loss ce: 0.0505, Train Steps/Sec: 0.07,
|
| 901 |
-
[[34m2026-01-30 02:16:34[39m] (step=0000890) Train Loss mse: 0.0644, Train Loss ce: 0.0563, Train Steps/Sec: 0.07,
|
| 902 |
-
[[34m2026-01-30 02:16:49[39m] (step=0000891) Train Loss mse: 0.0654, Train Loss ce: 0.0555, Train Steps/Sec: 0.06,
|
| 903 |
-
[[34m2026-01-30 02:17:04[39m] (step=0000892) Train Loss mse: 0.0543, Train Loss ce: 0.0541, Train Steps/Sec: 0.07,
|
| 904 |
-
[[34m2026-01-30 02:17:19[39m] (step=0000893) Train Loss mse: 0.0614, Train Loss ce: 0.0534, Train Steps/Sec: 0.07,
|
| 905 |
-
[[34m2026-01-30 02:17:36[39m] (step=0000894) Train Loss mse: 0.0646, Train Loss ce: 0.0524, Train Steps/Sec: 0.06,
|
| 906 |
FullyShardedDataParallel(
|
| 907 |
(_fsdp_wrapped_module): Bagel(
|
| 908 |
(language_model): Qwen2ForCausalLM(
|
|
@@ -1089,20 +1068,34 @@ Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalo
|
|
| 1089 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 1090 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 1091 |
ce_avg: 0.056111279875040054, mse_avg: 0.05582037195563316
|
| 1092 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is
|
| 1093 |
-
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 1094 |
-
[eval debug] first 3 batch fingerprints:
|
| 1095 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 1096 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 1097 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 1098 |
-
ce_avg: 0.06184602156281471, mse_avg: 0.05278971791267395
|
| 1099 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step2000
|
| 1100 |
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 1101 |
[eval debug] first 3 batch fingerprints:
|
| 1102 |
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 1103 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 1104 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 1105 |
-
ce_avg: 0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1106 |
[[34m2026-01-30 02:17:52[39m] (step=0000895) Train Loss mse: 0.0557, Train Loss ce: 0.0519, Train Steps/Sec: 0.06,
|
| 1107 |
[[34m2026-01-30 02:18:07[39m] (step=0000896) Train Loss mse: 0.0742, Train Loss ce: 0.0534, Train Steps/Sec: 0.06,
|
| 1108 |
[[34m2026-01-30 02:18:23[39m] (step=0000897) Train Loss mse: 0.0826, Train Loss ce: 0.0551, Train Steps/Sec: 0.06,
|
|
@@ -2313,6 +2306,20 @@ ce_avg: 0.05899278074502945, mse_avg: 0.052241820842027664
|
|
| 2313 |
[[34m2026-01-30 07:34:20[39m] (step=0002102) Train Loss mse: 0.0760, Train Loss ce: 0.0494, Train Steps/Sec: 0.07,
|
| 2314 |
[[34m2026-01-30 07:34:35[39m] (step=0002103) Train Loss mse: 0.0668, Train Loss ce: 0.0457, Train Steps/Sec: 0.07,
|
| 2315 |
[[34m2026-01-30 07:34:50[39m] (step=0002104) Train Loss mse: 0.0554, Train Loss ce: 0.0456, Train Steps/Sec: 0.07,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2316 |
[[34m2026-01-30 07:35:05[39m] (step=0002105) Train Loss mse: 0.0616, Train Loss ce: 0.0466, Train Steps/Sec: 0.07,
|
| 2317 |
[[34m2026-01-30 07:35:19[39m] (step=0002106) Train Loss mse: 0.0599, Train Loss ce: 0.0531, Train Steps/Sec: 0.07,
|
| 2318 |
[[34m2026-01-30 07:35:35[39m] (step=0002107) Train Loss mse: 0.0801, Train Loss ce: 0.0488, Train Steps/Sec: 0.06,
|
|
@@ -2347,20 +2354,6 @@ ce_avg: 0.05899278074502945, mse_avg: 0.052241820842027664
|
|
| 2347 |
[[34m2026-01-30 07:43:09[39m] (step=0002136) Train Loss mse: 0.0754, Train Loss ce: 0.0465, Train Steps/Sec: 0.06,
|
| 2348 |
[[34m2026-01-30 07:43:26[39m] (step=0002137) Train Loss mse: 0.0662, Train Loss ce: 0.0484, Train Steps/Sec: 0.06,
|
| 2349 |
[[34m2026-01-30 07:43:41[39m] (step=0002138) Train Loss mse: 0.0688, Train Loss ce: 0.0494, Train Steps/Sec: 0.07,
|
| 2350 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step2500
|
| 2351 |
-
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 2352 |
-
[eval debug] first 3 batch fingerprints:
|
| 2353 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 2354 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 2355 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 2356 |
-
ce_avg: 0.0595068633556366, mse_avg: 0.05313115939497948
|
| 2357 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step3000
|
| 2358 |
-
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 2359 |
-
[eval debug] first 3 batch fingerprints:
|
| 2360 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 2361 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 2362 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 2363 |
-
ce_avg: 0.04585418477654457, mse_avg: 0.050133317708969116
|
| 2364 |
[[34m2026-01-30 07:43:56[39m] (step=0002139) Train Loss mse: 0.0552, Train Loss ce: 0.0488, Train Steps/Sec: 0.07,
|
| 2365 |
[[34m2026-01-30 07:44:12[39m] (step=0002140) Train Loss mse: 0.0666, Train Loss ce: 0.0472, Train Steps/Sec: 0.06,
|
| 2366 |
[[34m2026-01-30 07:44:27[39m] (step=0002141) Train Loss mse: 0.0669, Train Loss ce: 0.0468, Train Steps/Sec: 0.07,
|
|
@@ -3206,6 +3199,34 @@ ce_avg: 0.04585418477654457, mse_avg: 0.050133317708969116
|
|
| 3206 |
[[34m2026-01-30 11:26:18[39m] (step=0002978) Train Loss mse: 0.0600, Train Loss ce: 0.0457, Train Steps/Sec: 0.06,
|
| 3207 |
[[34m2026-01-30 11:26:33[39m] (step=0002979) Train Loss mse: 0.0572, Train Loss ce: 0.0449, Train Steps/Sec: 0.06,
|
| 3208 |
[[34m2026-01-30 11:26:48[39m] (step=0002980) Train Loss mse: 0.0565, Train Loss ce: 0.0443, Train Steps/Sec: 0.07,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3209 |
[[34m2026-01-30 11:27:04[39m] (step=0002981) Train Loss mse: 0.0733, Train Loss ce: 0.0434, Train Steps/Sec: 0.07,
|
| 3210 |
[[34m2026-01-30 11:27:20[39m] (step=0002982) Train Loss mse: 0.0724, Train Loss ce: 0.0553, Train Steps/Sec: 0.06,
|
| 3211 |
[[34m2026-01-30 11:27:36[39m] (step=0002983) Train Loss mse: 0.0568, Train Loss ce: 0.0460, Train Steps/Sec: 0.06,
|
|
@@ -3441,20 +3462,6 @@ ce_avg: 0.04585418477654457, mse_avg: 0.050133317708969116
|
|
| 3441 |
[[34m2026-01-30 12:28:00[39m] (step=0003213) Train Loss mse: 0.0647, Train Loss ce: 0.0521, Train Steps/Sec: 0.06,
|
| 3442 |
[[34m2026-01-30 12:28:16[39m] (step=0003214) Train Loss mse: 0.0573, Train Loss ce: 0.0436, Train Steps/Sec: 0.06,
|
| 3443 |
[[34m2026-01-30 12:28:31[39m] (step=0003215) Train Loss mse: 0.0523, Train Loss ce: 0.0444, Train Steps/Sec: 0.07,
|
| 3444 |
-
[[34m2026-01-30 12:28:47
|
| 3445 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step3500
|
| 3446 |
-
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 3447 |
-
[eval debug] first 3 batch fingerprints:
|
| 3448 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 3449 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 3450 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 3451 |
-
ce_avg: 0.0454382449388504, mse_avg: 0.050951480865478516
|
| 3452 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step4000
|
| 3453 |
-
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 3454 |
-
[eval debug] first 3 batch fingerprints:
|
| 3455 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 3456 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 3457 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 3458 |
[[34m2026-01-30 12:28:47[39m] (step=0003216) Train Loss mse: 0.0666, Train Loss ce: 0.0429, Train Steps/Sec: 0.06,
|
| 3459 |
[[34m2026-01-30 12:29:02[39m] (step=0003217) Train Loss mse: 0.0587, Train Loss ce: 0.0472, Train Steps/Sec: 0.06,
|
| 3460 |
[[34m2026-01-30 12:29:17[39m] (step=0003218) Train Loss mse: 0.0663, Train Loss ce: 0.0479, Train Steps/Sec: 0.07,
|
|
@@ -4540,6 +4547,20 @@ Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalo
|
|
| 4540 |
[[34m2026-01-30 17:12:18[39m] (step=0004298) Train Loss mse: 0.0616, Train Loss ce: 0.0438, Train Steps/Sec: 0.06,
|
| 4541 |
[[34m2026-01-30 17:12:34[39m] (step=0004299) Train Loss mse: 0.0613, Train Loss ce: 0.0470, Train Steps/Sec: 0.06,
|
| 4542 |
[[34m2026-01-30 17:12:50[39m] (step=0004300) Train Loss mse: 0.0674, Train Loss ce: 0.0424, Train Steps/Sec: 0.06,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4543 |
[[34m2026-01-30 17:13:06[39m] (step=0004301) Train Loss mse: 0.0708, Train Loss ce: 0.0447, Train Steps/Sec: 0.06,
|
| 4544 |
[[34m2026-01-30 17:13:23[39m] (step=0004302) Train Loss mse: 0.0632, Train Loss ce: 0.0413, Train Steps/Sec: 0.06,
|
| 4545 |
[[34m2026-01-30 17:13:38[39m] (step=0004303) Train Loss mse: 0.0644, Train Loss ce: 0.0458, Train Steps/Sec: 0.07,
|
|
@@ -4560,20 +4581,6 @@ Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalo
|
|
| 4560 |
[[34m2026-01-30 17:17:35[39m] (step=0004318) Train Loss mse: 0.0564, Train Loss ce: 0.0434, Train Steps/Sec: 0.06,
|
| 4561 |
[[34m2026-01-30 17:17:51[39m] (step=0004319) Train Loss mse: 0.0597, Train Loss ce: 0.0463, Train Steps/Sec: 0.06,
|
| 4562 |
[[34m2026-01-30 17:18:07[39m] (step=0004320) Train Loss mse: 0.0718, Train Loss ce: 0.0441, Train Steps/Sec: 0.06,
|
| 4563 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step4500
|
| 4564 |
-
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 4565 |
-
[eval debug] first 3 batch fingerprints:
|
| 4566 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 4567 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 4568 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 4569 |
-
ce_avg: 0.04478031024336815, mse_avg: 0.04991578683257103
|
| 4570 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step5000
|
| 4571 |
-
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 4572 |
-
[eval debug] first 3 batch fingerprints:
|
| 4573 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 4574 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 4575 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 4576 |
-
ce_avg: 0.04477126523852348, mse_avg: 0.05164656043052673
|
| 4577 |
[[34m2026-01-30 17:18:22[39m] (step=0004321) Train Loss mse: 0.0604, Train Loss ce: 0.0477, Train Steps/Sec: 0.07,
|
| 4578 |
[[34m2026-01-30 17:18:39[39m] (step=0004322) Train Loss mse: 0.0561, Train Loss ce: 0.0457, Train Steps/Sec: 0.06,
|
| 4579 |
[[34m2026-01-30 17:18:54[39m] (step=0004323) Train Loss mse: 0.0657, Train Loss ce: 0.0452, Train Steps/Sec: 0.07,
|
|
|
|
| 882 |
[[34m2026-01-30 02:11:40[39m] (step=0000871) Train Loss mse: 0.0635, Train Loss ce: 0.0536, Train Steps/Sec: 0.06,
|
| 883 |
[[34m2026-01-30 02:11:56[39m] (step=0000872) Train Loss mse: 0.0721, Train Loss ce: 0.0497, Train Steps/Sec: 0.07,
|
| 884 |
[[34m2026-01-30 02:12:11[39m] (step=0000873) Train Loss mse: 0.0725, Train Loss ce: 0.0511, Train Steps/Sec: 0.07,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 885 |
FullyShardedDataParallel(
|
| 886 |
(_fsdp_wrapped_module): Bagel(
|
| 887 |
(language_model): Qwen2ForCausalLM(
|
|
|
|
| 1068 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 1069 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 1070 |
ce_avg: 0.056111279875040054, mse_avg: 0.05582037195563316
|
| 1071 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step1000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1072 |
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 1073 |
[eval debug] first 3 batch fingerprints:
|
| 1074 |
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 1075 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 1076 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 1077 |
+
ce_avg: 0.057540930807590485, mse_avg: 0.054740797728300095
|
| 1078 |
+
[[34m2026-01-30 02:12:26[39m] (step=0000874) Train Loss mse: 0.0770, Train Loss ce: 0.0528, Train Steps/Sec: 0.06,
|
| 1079 |
+
[[34m2026-01-30 02:12:42[39m] (step=0000875) Train Loss mse: 0.0661, Train Loss ce: 0.0531, Train Steps/Sec: 0.06,
|
| 1080 |
+
[[34m2026-01-30 02:12:57[39m] (step=0000876) Train Loss mse: 0.0701, Train Loss ce: 0.0541, Train Steps/Sec: 0.07,
|
| 1081 |
+
[[34m2026-01-30 02:13:12[39m] (step=0000877) Train Loss mse: 0.0701, Train Loss ce: 0.0489, Train Steps/Sec: 0.06,
|
| 1082 |
+
[[34m2026-01-30 02:13:29[39m] (step=0000878) Train Loss mse: 0.0672, Train Loss ce: 0.0516, Train Steps/Sec: 0.06,
|
| 1083 |
+
[[34m2026-01-30 02:13:45[39m] (step=0000879) Train Loss mse: 0.0585, Train Loss ce: 0.0506, Train Steps/Sec: 0.06,
|
| 1084 |
+
[[34m2026-01-30 02:14:00[39m] (step=0000880) Train Loss mse: 0.0618, Train Loss ce: 0.0550, Train Steps/Sec: 0.07,
|
| 1085 |
+
[[34m2026-01-30 02:14:15[39m] (step=0000881) Train Loss mse: 0.0649, Train Loss ce: 0.0513, Train Steps/Sec: 0.07,
|
| 1086 |
+
[[34m2026-01-30 02:14:30[39m] (step=0000882) Train Loss mse: 0.0751, Train Loss ce: 0.0525, Train Steps/Sec: 0.07,
|
| 1087 |
+
[[34m2026-01-30 02:14:46[39m] (step=0000883) Train Loss mse: 0.0483, Train Loss ce: 0.0483, Train Steps/Sec: 0.06,
|
| 1088 |
+
[[34m2026-01-30 02:15:01[39m] (step=0000884) Train Loss mse: 0.0715, Train Loss ce: 0.0517, Train Steps/Sec: 0.06,
|
| 1089 |
+
[[34m2026-01-30 02:15:18[39m] (step=0000885) Train Loss mse: 0.0637, Train Loss ce: 0.0533, Train Steps/Sec: 0.06,
|
| 1090 |
+
[[34m2026-01-30 02:15:33[39m] (step=0000886) Train Loss mse: 0.0739, Train Loss ce: 0.0564, Train Steps/Sec: 0.07,
|
| 1091 |
+
[[34m2026-01-30 02:15:50[39m] (step=0000887) Train Loss mse: 0.0569, Train Loss ce: 0.0532, Train Steps/Sec: 0.06,
|
| 1092 |
+
[[34m2026-01-30 02:16:04[39m] (step=0000888) Train Loss mse: 0.0588, Train Loss ce: 0.0569, Train Steps/Sec: 0.07,
|
| 1093 |
+
[[34m2026-01-30 02:16:19[39m] (step=0000889) Train Loss mse: 0.0650, Train Loss ce: 0.0505, Train Steps/Sec: 0.07,
|
| 1094 |
+
[[34m2026-01-30 02:16:34[39m] (step=0000890) Train Loss mse: 0.0644, Train Loss ce: 0.0563, Train Steps/Sec: 0.07,
|
| 1095 |
+
[[34m2026-01-30 02:16:49[39m] (step=0000891) Train Loss mse: 0.0654, Train Loss ce: 0.0555, Train Steps/Sec: 0.06,
|
| 1096 |
+
[[34m2026-01-30 02:17:04[39m] (step=0000892) Train Loss mse: 0.0543, Train Loss ce: 0.0541, Train Steps/Sec: 0.07,
|
| 1097 |
+
[[34m2026-01-30 02:17:19[39m] (step=0000893) Train Loss mse: 0.0614, Train Loss ce: 0.0534, Train Steps/Sec: 0.07,
|
| 1098 |
+
[[34m2026-01-30 02:17:36[39m] (step=0000894) Train Loss mse: 0.0646, Train Loss ce: 0.0524, Train Steps/Sec: 0.06,
|
| 1099 |
[[34m2026-01-30 02:17:52[39m] (step=0000895) Train Loss mse: 0.0557, Train Loss ce: 0.0519, Train Steps/Sec: 0.06,
|
| 1100 |
[[34m2026-01-30 02:18:07[39m] (step=0000896) Train Loss mse: 0.0742, Train Loss ce: 0.0534, Train Steps/Sec: 0.06,
|
| 1101 |
[[34m2026-01-30 02:18:23[39m] (step=0000897) Train Loss mse: 0.0826, Train Loss ce: 0.0551, Train Steps/Sec: 0.06,
|
|
|
|
| 2306 |
[[34m2026-01-30 07:34:20[39m] (step=0002102) Train Loss mse: 0.0760, Train Loss ce: 0.0494, Train Steps/Sec: 0.07,
|
| 2307 |
[[34m2026-01-30 07:34:35[39m] (step=0002103) Train Loss mse: 0.0668, Train Loss ce: 0.0457, Train Steps/Sec: 0.07,
|
| 2308 |
[[34m2026-01-30 07:34:50[39m] (step=0002104) Train Loss mse: 0.0554, Train Loss ce: 0.0456, Train Steps/Sec: 0.07,
|
| 2309 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step1500
|
| 2310 |
+
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 2311 |
+
[eval debug] first 3 batch fingerprints:
|
| 2312 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 2313 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 2314 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 2315 |
+
ce_avg: 0.06184602156281471, mse_avg: 0.05278971791267395
|
| 2316 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step2000
|
| 2317 |
+
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 2318 |
+
[eval debug] first 3 batch fingerprints:
|
| 2319 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 2320 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 2321 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 2322 |
+
ce_avg: 0.05899278074502945, mse_avg: 0.052241820842027664
|
| 2323 |
[[34m2026-01-30 07:35:05[39m] (step=0002105) Train Loss mse: 0.0616, Train Loss ce: 0.0466, Train Steps/Sec: 0.07,
|
| 2324 |
[[34m2026-01-30 07:35:19[39m] (step=0002106) Train Loss mse: 0.0599, Train Loss ce: 0.0531, Train Steps/Sec: 0.07,
|
| 2325 |
[[34m2026-01-30 07:35:35[39m] (step=0002107) Train Loss mse: 0.0801, Train Loss ce: 0.0488, Train Steps/Sec: 0.06,
|
|
|
|
| 2354 |
[[34m2026-01-30 07:43:09[39m] (step=0002136) Train Loss mse: 0.0754, Train Loss ce: 0.0465, Train Steps/Sec: 0.06,
|
| 2355 |
[[34m2026-01-30 07:43:26[39m] (step=0002137) Train Loss mse: 0.0662, Train Loss ce: 0.0484, Train Steps/Sec: 0.06,
|
| 2356 |
[[34m2026-01-30 07:43:41[39m] (step=0002138) Train Loss mse: 0.0688, Train Loss ce: 0.0494, Train Steps/Sec: 0.07,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2357 |
[[34m2026-01-30 07:43:56[39m] (step=0002139) Train Loss mse: 0.0552, Train Loss ce: 0.0488, Train Steps/Sec: 0.07,
|
| 2358 |
[[34m2026-01-30 07:44:12[39m] (step=0002140) Train Loss mse: 0.0666, Train Loss ce: 0.0472, Train Steps/Sec: 0.06,
|
| 2359 |
[[34m2026-01-30 07:44:27[39m] (step=0002141) Train Loss mse: 0.0669, Train Loss ce: 0.0468, Train Steps/Sec: 0.07,
|
|
|
|
| 3199 |
[[34m2026-01-30 11:26:18[39m] (step=0002978) Train Loss mse: 0.0600, Train Loss ce: 0.0457, Train Steps/Sec: 0.06,
|
| 3200 |
[[34m2026-01-30 11:26:33[39m] (step=0002979) Train Loss mse: 0.0572, Train Loss ce: 0.0449, Train Steps/Sec: 0.06,
|
| 3201 |
[[34m2026-01-30 11:26:48[39m] (step=0002980) Train Loss mse: 0.0565, Train Loss ce: 0.0443, Train Steps/Sec: 0.07,
|
| 3202 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step2500
|
| 3203 |
+
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 3204 |
+
[eval debug] first 3 batch fingerprints:
|
| 3205 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 3206 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 3207 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 3208 |
+
ce_avg: 0.0595068633556366, mse_avg: 0.05313115939497948
|
| 3209 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step3000
|
| 3210 |
+
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 3211 |
+
[eval debug] first 3 batch fingerprints:
|
| 3212 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 3213 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 3214 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 3215 |
+
ce_avg: 0.04585418477654457, mse_avg: 0.050133317708969116
|
| 3216 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step3500
|
| 3217 |
+
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 3218 |
+
[eval debug] first 3 batch fingerprints:
|
| 3219 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 3220 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 3221 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 3222 |
+
ce_avg: 0.0454382449388504, mse_avg: 0.050951480865478516
|
| 3223 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step4000
|
| 3224 |
+
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 3225 |
+
[eval debug] first 3 batch fingerprints:
|
| 3226 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 3227 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 3228 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 3229 |
+
ce_avg: 0.04481803998351097, mse_avg: 0.051003698259592056
|
| 3230 |
[[34m2026-01-30 11:27:04[39m] (step=0002981) Train Loss mse: 0.0733, Train Loss ce: 0.0434, Train Steps/Sec: 0.07,
|
| 3231 |
[[34m2026-01-30 11:27:20[39m] (step=0002982) Train Loss mse: 0.0724, Train Loss ce: 0.0553, Train Steps/Sec: 0.06,
|
| 3232 |
[[34m2026-01-30 11:27:36[39m] (step=0002983) Train Loss mse: 0.0568, Train Loss ce: 0.0460, Train Steps/Sec: 0.06,
|
|
|
|
| 3462 |
[[34m2026-01-30 12:28:00[39m] (step=0003213) Train Loss mse: 0.0647, Train Loss ce: 0.0521, Train Steps/Sec: 0.06,
|
| 3463 |
[[34m2026-01-30 12:28:16[39m] (step=0003214) Train Loss mse: 0.0573, Train Loss ce: 0.0436, Train Steps/Sec: 0.06,
|
| 3464 |
[[34m2026-01-30 12:28:31[39m] (step=0003215) Train Loss mse: 0.0523, Train Loss ce: 0.0444, Train Steps/Sec: 0.07,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3465 |
[[34m2026-01-30 12:28:47[39m] (step=0003216) Train Loss mse: 0.0666, Train Loss ce: 0.0429, Train Steps/Sec: 0.06,
|
| 3466 |
[[34m2026-01-30 12:29:02[39m] (step=0003217) Train Loss mse: 0.0587, Train Loss ce: 0.0472, Train Steps/Sec: 0.06,
|
| 3467 |
[[34m2026-01-30 12:29:17[39m] (step=0003218) Train Loss mse: 0.0663, Train Loss ce: 0.0479, Train Steps/Sec: 0.07,
|
|
|
|
| 4547 |
[[34m2026-01-30 17:12:18[39m] (step=0004298) Train Loss mse: 0.0616, Train Loss ce: 0.0438, Train Steps/Sec: 0.06,
|
| 4548 |
[[34m2026-01-30 17:12:34[39m] (step=0004299) Train Loss mse: 0.0613, Train Loss ce: 0.0470, Train Steps/Sec: 0.06,
|
| 4549 |
[[34m2026-01-30 17:12:50[39m] (step=0004300) Train Loss mse: 0.0674, Train Loss ce: 0.0424, Train Steps/Sec: 0.06,
|
| 4550 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step4500
|
| 4551 |
+
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 4552 |
+
[eval debug] first 3 batch fingerprints:
|
| 4553 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 4554 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 4555 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 4556 |
+
ce_avg: 0.04478031024336815, mse_avg: 0.04991578683257103
|
| 4557 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_ins_step5000
|
| 4558 |
+
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 4559 |
+
[eval debug] first 3 batch fingerprints:
|
| 4560 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 4561 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 4562 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_evalonce'}]
|
| 4563 |
+
ce_avg: 0.04477126523852348, mse_avg: 0.05164656043052673
|
| 4564 |
[[34m2026-01-30 17:13:06[39m] (step=0004301) Train Loss mse: 0.0708, Train Loss ce: 0.0447, Train Steps/Sec: 0.06,
|
| 4565 |
[[34m2026-01-30 17:13:23[39m] (step=0004302) Train Loss mse: 0.0632, Train Loss ce: 0.0413, Train Steps/Sec: 0.06,
|
| 4566 |
[[34m2026-01-30 17:13:38[39m] (step=0004303) Train Loss mse: 0.0644, Train Loss ce: 0.0458, Train Steps/Sec: 0.07,
|
|
|
|
| 4581 |
[[34m2026-01-30 17:17:35[39m] (step=0004318) Train Loss mse: 0.0564, Train Loss ce: 0.0434, Train Steps/Sec: 0.06,
|
| 4582 |
[[34m2026-01-30 17:17:51[39m] (step=0004319) Train Loss mse: 0.0597, Train Loss ce: 0.0463, Train Steps/Sec: 0.06,
|
| 4583 |
[[34m2026-01-30 17:18:07[39m] (step=0004320) Train Loss mse: 0.0718, Train Loss ce: 0.0441, Train Steps/Sec: 0.06,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4584 |
[[34m2026-01-30 17:18:22[39m] (step=0004321) Train Loss mse: 0.0604, Train Loss ce: 0.0477, Train Steps/Sec: 0.07,
|
| 4585 |
[[34m2026-01-30 17:18:39[39m] (step=0004322) Train Loss mse: 0.0561, Train Loss ce: 0.0457, Train Steps/Sec: 0.06,
|
| 4586 |
[[34m2026-01-30 17:18:54[39m] (step=0004323) Train Loss mse: 0.0657, Train Loss ce: 0.0452, Train Steps/Sec: 0.07,
|