Upload checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins
Browse files
checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins/wandb/offline-run-20260129_221235-checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins-run0/files/output.log
CHANGED
|
@@ -168,6 +168,13 @@ Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_ms
|
|
| 168 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 169 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 170 |
ce_avg: 0.05472184717655182, mse_avg: 0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
wandb: Detected [huggingface_hub.inference] in use.
|
| 172 |
wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
|
| 173 |
wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
@@ -1220,6 +1227,27 @@ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
| 1220 |
[[34m2026-01-29 22:55:52[39m] (step=0001039) Train Loss mse: 0.0000, Train Loss ce: 0.0495, Train Steps/Sec: 0.42,
|
| 1221 |
[[34m2026-01-29 22:55:54[39m] (step=0001040) Train Loss mse: 0.0000, Train Loss ce: 0.0516, Train Steps/Sec: 0.52,
|
| 1222 |
[[34m2026-01-29 22:55:56[39m] (step=0001041) Train Loss mse: 0.0000, Train Loss ce: 0.0560, Train Steps/Sec: 0.52,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1223 |
[[34m2026-01-29 22:55:58[39m] (step=0001042) Train Loss mse: 0.0000, Train Loss ce: 0.0513, Train Steps/Sec: 0.45,
|
| 1224 |
[[34m2026-01-29 22:56:01[39m] (step=0001043) Train Loss mse: 0.0000, Train Loss ce: 0.0505, Train Steps/Sec: 0.43,
|
| 1225 |
[[34m2026-01-29 22:56:03[39m] (step=0001044) Train Loss mse: 0.0000, Train Loss ce: 0.0511, Train Steps/Sec: 0.52,
|
|
@@ -1301,27 +1329,6 @@ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
| 1301 |
[[34m2026-01-29 22:58:44[39m] (step=0001120) Train Loss mse: 0.0000, Train Loss ce: 0.0513, Train Steps/Sec: 0.52,
|
| 1302 |
[[34m2026-01-29 22:58:46[39m] (step=0001121) Train Loss mse: 0.0000, Train Loss ce: 0.0540, Train Steps/Sec: 0.52,
|
| 1303 |
[[34m2026-01-29 22:58:48[39m] (step=0001122) Train Loss mse: 0.0000, Train Loss ce: 0.0531, Train Steps/Sec: 0.47,
|
| 1304 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins_step1000
|
| 1305 |
-
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 1306 |
-
[eval debug] first 3 batch fingerprints:
|
| 1307 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 1308 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 1309 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 1310 |
-
ce_avg: 0.0573262944817543, mse_avg: 0.0
|
| 1311 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins_step1500
|
| 1312 |
-
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 1313 |
-
[eval debug] first 3 batch fingerprints:
|
| 1314 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 1315 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 1316 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 1317 |
-
ce_avg: 0.06291162222623825, mse_avg: 0.0
|
| 1318 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins_step2000
|
| 1319 |
-
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 1320 |
-
[eval debug] first 3 batch fingerprints:
|
| 1321 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 1322 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 1323 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 1324 |
-
ce_avg: 0.07033854722976685, mse_avg: 0.0
|
| 1325 |
[[34m2026-01-29 22:58:51[39m] (step=0001123) Train Loss mse: 0.0000, Train Loss ce: 0.0544, Train Steps/Sec: 0.41,
|
| 1326 |
[[34m2026-01-29 22:58:53[39m] (step=0001124) Train Loss mse: 0.0000, Train Loss ce: 0.0505, Train Steps/Sec: 0.40,
|
| 1327 |
[[34m2026-01-29 22:58:55[39m] (step=0001125) Train Loss mse: 0.0000, Train Loss ce: 0.0520, Train Steps/Sec: 0.46,
|
|
@@ -2774,6 +2781,20 @@ ce_avg: 0.07033854722976685, mse_avg: 0.0
|
|
| 2774 |
[[34m2026-01-29 23:50:38[39m] (step=0002572) Train Loss mse: 0.0000, Train Loss ce: 0.0482, Train Steps/Sec: 0.45,
|
| 2775 |
[[34m2026-01-29 23:50:40[39m] (step=0002573) Train Loss mse: 0.0000, Train Loss ce: 0.0470, Train Steps/Sec: 0.41,
|
| 2776 |
[[34m2026-01-29 23:50:42[39m] (step=0002574) Train Loss mse: 0.0000, Train Loss ce: 0.0454, Train Steps/Sec: 0.52,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2777 |
[[34m2026-01-29 23:50:44[39m] (step=0002575) Train Loss mse: 0.0000, Train Loss ce: 0.0474, Train Steps/Sec: 0.52,
|
| 2778 |
[[34m2026-01-29 23:50:46[39m] (step=0002576) Train Loss mse: 0.0000, Train Loss ce: 0.0484, Train Steps/Sec: 0.52,
|
| 2779 |
[[34m2026-01-29 23:50:48[39m] (step=0002577) Train Loss mse: 0.0000, Train Loss ce: 0.0470, Train Steps/Sec: 0.52,
|
|
@@ -2860,27 +2881,6 @@ ce_avg: 0.07033854722976685, mse_avg: 0.0
|
|
| 2860 |
[[34m2026-01-29 23:53:42[39m] (step=0002658) Train Loss mse: 0.0000, Train Loss ce: 0.0479, Train Steps/Sec: 0.44,
|
| 2861 |
[[34m2026-01-29 23:53:44[39m] (step=0002659) Train Loss mse: 0.0000, Train Loss ce: 0.0465, Train Steps/Sec: 0.52,
|
| 2862 |
[[34m2026-01-29 23:53:46[39m] (step=0002660) Train Loss mse: 0.0000, Train Loss ce: 0.0461, Train Steps/Sec: 0.51,
|
| 2863 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins_step2500
|
| 2864 |
-
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 2865 |
-
[eval debug] first 3 batch fingerprints:
|
| 2866 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 2867 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 2868 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 2869 |
-
ce_avg: 0.08825891464948654, mse_avg: 0.0
|
| 2870 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins_step3000
|
| 2871 |
-
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 2872 |
-
[eval debug] first 3 batch fingerprints:
|
| 2873 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 2874 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 2875 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 2876 |
-
ce_avg: 0.14744605123996735, mse_avg: 0.0
|
| 2877 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins_step3500
|
| 2878 |
-
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 2879 |
-
[eval debug] first 3 batch fingerprints:
|
| 2880 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 2881 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 2882 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 2883 |
-
ce_avg: 0.38679561018943787, mse_avg: 0.0
|
| 2884 |
[[34m2026-01-29 23:53:48[39m] (step=0002661) Train Loss mse: 0.0000, Train Loss ce: 0.0476, Train Steps/Sec: 0.41,
|
| 2885 |
[[34m2026-01-29 23:53:50[39m] (step=0002662) Train Loss mse: 0.0000, Train Loss ce: 0.0465, Train Steps/Sec: 0.51,
|
| 2886 |
[[34m2026-01-29 23:53:52[39m] (step=0002663) Train Loss mse: 0.0000, Train Loss ce: 0.0473, Train Steps/Sec: 0.47,
|
|
@@ -3801,6 +3801,27 @@ ce_avg: 0.38679561018943787, mse_avg: 0.0
|
|
| 3801 |
[[34m2026-01-30 00:26:31[39m] (step=0003578) Train Loss mse: 0.0000, Train Loss ce: 0.0419, Train Steps/Sec: 0.47,
|
| 3802 |
[[34m2026-01-30 00:26:34[39m] (step=0003579) Train Loss mse: 0.0000, Train Loss ce: 0.0445, Train Steps/Sec: 0.47,
|
| 3803 |
[[34m2026-01-30 00:26:36[39m] (step=0003580) Train Loss mse: 0.0000, Train Loss ce: 0.0464, Train Steps/Sec: 0.51,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3804 |
[[34m2026-01-30 00:26:38[39m] (step=0003581) Train Loss mse: 0.0000, Train Loss ce: 0.0462, Train Steps/Sec: 0.41,
|
| 3805 |
[[34m2026-01-30 00:26:40[39m] (step=0003582) Train Loss mse: 0.0000, Train Loss ce: 0.0450, Train Steps/Sec: 0.52,
|
| 3806 |
[[34m2026-01-30 00:26:42[39m] (step=0003583) Train Loss mse: 0.0000, Train Loss ce: 0.0436, Train Steps/Sec: 0.52,
|
|
@@ -4027,27 +4048,6 @@ ce_avg: 0.38679561018943787, mse_avg: 0.0
|
|
| 4027 |
[[34m2026-01-30 00:34:36[39m] (step=0003804) Train Loss mse: 0.0000, Train Loss ce: 0.0451, Train Steps/Sec: 0.46,
|
| 4028 |
[[34m2026-01-30 00:34:39[39m] (step=0003805) Train Loss mse: 0.0000, Train Loss ce: 0.0433, Train Steps/Sec: 0.42,
|
| 4029 |
[[34m2026-01-30 00:34:41[39m] (step=0003806) Train Loss mse: 0.0000, Train Loss ce: 0.0466, Train Steps/Sec: 0.43,
|
| 4030 |
-
[[34m2026-01-30 00:34:43
|
| 4031 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins_step4000
|
| 4032 |
-
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 4033 |
-
[eval debug] first 3 batch fingerprints:
|
| 4034 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 4035 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 4036 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 4037 |
-
ce_avg: 0.6535353660583496, mse_avg: 0.0
|
| 4038 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins_step4500
|
| 4039 |
-
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 4040 |
-
[eval debug] first 3 batch fingerprints:
|
| 4041 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 4042 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 4043 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 4044 |
-
ce_avg: 0.757265567779541, mse_avg: 0.0
|
| 4045 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins_step5000
|
| 4046 |
-
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 4047 |
-
[eval debug] first 3 batch fingerprints:
|
| 4048 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 4049 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 4050 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 4051 |
[[34m2026-01-30 00:34:43[39m] (step=0003807) Train Loss mse: 0.0000, Train Loss ce: 0.0443, Train Steps/Sec: 0.52,
|
| 4052 |
[[34m2026-01-30 00:34:45[39m] (step=0003808) Train Loss mse: 0.0000, Train Loss ce: 0.0425, Train Steps/Sec: 0.52,
|
| 4053 |
[[34m2026-01-30 00:34:47[39m] (step=0003809) Train Loss mse: 0.0000, Train Loss ce: 0.0486, Train Steps/Sec: 0.52,
|
|
|
|
| 168 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 169 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 170 |
ce_avg: 0.05472184717655182, mse_avg: 0.0
|
| 171 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins_step1000
|
| 172 |
+
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 173 |
+
[eval debug] first 3 batch fingerprints:
|
| 174 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 175 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 176 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 177 |
+
ce_avg: 0.0573262944817543, mse_avg: 0.0
|
| 178 |
wandb: Detected [huggingface_hub.inference] in use.
|
| 179 |
wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
|
| 180 |
wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
|
|
| 1227 |
[[34m2026-01-29 22:55:52[39m] (step=0001039) Train Loss mse: 0.0000, Train Loss ce: 0.0495, Train Steps/Sec: 0.42,
|
| 1228 |
[[34m2026-01-29 22:55:54[39m] (step=0001040) Train Loss mse: 0.0000, Train Loss ce: 0.0516, Train Steps/Sec: 0.52,
|
| 1229 |
[[34m2026-01-29 22:55:56[39m] (step=0001041) Train Loss mse: 0.0000, Train Loss ce: 0.0560, Train Steps/Sec: 0.52,
|
| 1230 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins_step1500
|
| 1231 |
+
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 1232 |
+
[eval debug] first 3 batch fingerprints:
|
| 1233 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 1234 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 1235 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 1236 |
+
ce_avg: 0.06291162222623825, mse_avg: 0.0
|
| 1237 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins_step2000
|
| 1238 |
+
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 1239 |
+
[eval debug] first 3 batch fingerprints:
|
| 1240 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 1241 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 1242 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 1243 |
+
ce_avg: 0.07033854722976685, mse_avg: 0.0
|
| 1244 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins_step2500
|
| 1245 |
+
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 1246 |
+
[eval debug] first 3 batch fingerprints:
|
| 1247 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 1248 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 1249 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 1250 |
+
ce_avg: 0.08825891464948654, mse_avg: 0.0
|
| 1251 |
[[34m2026-01-29 22:55:58[39m] (step=0001042) Train Loss mse: 0.0000, Train Loss ce: 0.0513, Train Steps/Sec: 0.45,
|
| 1252 |
[[34m2026-01-29 22:56:01[39m] (step=0001043) Train Loss mse: 0.0000, Train Loss ce: 0.0505, Train Steps/Sec: 0.43,
|
| 1253 |
[[34m2026-01-29 22:56:03[39m] (step=0001044) Train Loss mse: 0.0000, Train Loss ce: 0.0511, Train Steps/Sec: 0.52,
|
|
|
|
| 1329 |
[[34m2026-01-29 22:58:44[39m] (step=0001120) Train Loss mse: 0.0000, Train Loss ce: 0.0513, Train Steps/Sec: 0.52,
|
| 1330 |
[[34m2026-01-29 22:58:46[39m] (step=0001121) Train Loss mse: 0.0000, Train Loss ce: 0.0540, Train Steps/Sec: 0.52,
|
| 1331 |
[[34m2026-01-29 22:58:48[39m] (step=0001122) Train Loss mse: 0.0000, Train Loss ce: 0.0531, Train Steps/Sec: 0.47,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1332 |
[[34m2026-01-29 22:58:51[39m] (step=0001123) Train Loss mse: 0.0000, Train Loss ce: 0.0544, Train Steps/Sec: 0.41,
|
| 1333 |
[[34m2026-01-29 22:58:53[39m] (step=0001124) Train Loss mse: 0.0000, Train Loss ce: 0.0505, Train Steps/Sec: 0.40,
|
| 1334 |
[[34m2026-01-29 22:58:55[39m] (step=0001125) Train Loss mse: 0.0000, Train Loss ce: 0.0520, Train Steps/Sec: 0.46,
|
|
|
|
| 2781 |
[[34m2026-01-29 23:50:38[39m] (step=0002572) Train Loss mse: 0.0000, Train Loss ce: 0.0482, Train Steps/Sec: 0.45,
|
| 2782 |
[[34m2026-01-29 23:50:40[39m] (step=0002573) Train Loss mse: 0.0000, Train Loss ce: 0.0470, Train Steps/Sec: 0.41,
|
| 2783 |
[[34m2026-01-29 23:50:42[39m] (step=0002574) Train Loss mse: 0.0000, Train Loss ce: 0.0454, Train Steps/Sec: 0.52,
|
| 2784 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins_step3000
|
| 2785 |
+
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 2786 |
+
[eval debug] first 3 batch fingerprints:
|
| 2787 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 2788 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 2789 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 2790 |
+
ce_avg: 0.14744605123996735, mse_avg: 0.0
|
| 2791 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins_step3500
|
| 2792 |
+
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 2793 |
+
[eval debug] first 3 batch fingerprints:
|
| 2794 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 2795 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 2796 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 2797 |
+
ce_avg: 0.38679561018943787, mse_avg: 0.0
|
| 2798 |
[[34m2026-01-29 23:50:44[39m] (step=0002575) Train Loss mse: 0.0000, Train Loss ce: 0.0474, Train Steps/Sec: 0.52,
|
| 2799 |
[[34m2026-01-29 23:50:46[39m] (step=0002576) Train Loss mse: 0.0000, Train Loss ce: 0.0484, Train Steps/Sec: 0.52,
|
| 2800 |
[[34m2026-01-29 23:50:48[39m] (step=0002577) Train Loss mse: 0.0000, Train Loss ce: 0.0470, Train Steps/Sec: 0.52,
|
|
|
|
| 2881 |
[[34m2026-01-29 23:53:42[39m] (step=0002658) Train Loss mse: 0.0000, Train Loss ce: 0.0479, Train Steps/Sec: 0.44,
|
| 2882 |
[[34m2026-01-29 23:53:44[39m] (step=0002659) Train Loss mse: 0.0000, Train Loss ce: 0.0465, Train Steps/Sec: 0.52,
|
| 2883 |
[[34m2026-01-29 23:53:46[39m] (step=0002660) Train Loss mse: 0.0000, Train Loss ce: 0.0461, Train Steps/Sec: 0.51,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2884 |
[[34m2026-01-29 23:53:48[39m] (step=0002661) Train Loss mse: 0.0000, Train Loss ce: 0.0476, Train Steps/Sec: 0.41,
|
| 2885 |
[[34m2026-01-29 23:53:50[39m] (step=0002662) Train Loss mse: 0.0000, Train Loss ce: 0.0465, Train Steps/Sec: 0.51,
|
| 2886 |
[[34m2026-01-29 23:53:52[39m] (step=0002663) Train Loss mse: 0.0000, Train Loss ce: 0.0473, Train Steps/Sec: 0.47,
|
|
|
|
| 3801 |
[[34m2026-01-30 00:26:31[39m] (step=0003578) Train Loss mse: 0.0000, Train Loss ce: 0.0419, Train Steps/Sec: 0.47,
|
| 3802 |
[[34m2026-01-30 00:26:34[39m] (step=0003579) Train Loss mse: 0.0000, Train Loss ce: 0.0445, Train Steps/Sec: 0.47,
|
| 3803 |
[[34m2026-01-30 00:26:36[39m] (step=0003580) Train Loss mse: 0.0000, Train Loss ce: 0.0464, Train Steps/Sec: 0.51,
|
| 3804 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins_step4000
|
| 3805 |
+
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 3806 |
+
[eval debug] first 3 batch fingerprints:
|
| 3807 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 3808 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 3809 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 3810 |
+
ce_avg: 0.6535353660583496, mse_avg: 0.0
|
| 3811 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins_step4500
|
| 3812 |
+
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 3813 |
+
[eval debug] first 3 batch fingerprints:
|
| 3814 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 3815 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 3816 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 3817 |
+
ce_avg: 0.757265567779541, mse_avg: 0.0
|
| 3818 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_ce_no_mse_ins_step5000
|
| 3819 |
+
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 3820 |
+
[eval debug] first 3 batch fingerprints:
|
| 3821 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 3822 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 3823 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_celoss_no_mse_evalonce'}]
|
| 3824 |
+
ce_avg: 0.7789239287376404, mse_avg: 0.0
|
| 3825 |
[[34m2026-01-30 00:26:38[39m] (step=0003581) Train Loss mse: 0.0000, Train Loss ce: 0.0462, Train Steps/Sec: 0.41,
|
| 3826 |
[[34m2026-01-30 00:26:40[39m] (step=0003582) Train Loss mse: 0.0000, Train Loss ce: 0.0450, Train Steps/Sec: 0.52,
|
| 3827 |
[[34m2026-01-30 00:26:42[39m] (step=0003583) Train Loss mse: 0.0000, Train Loss ce: 0.0436, Train Steps/Sec: 0.52,
|
|
|
|
| 4048 |
[[34m2026-01-30 00:34:36[39m] (step=0003804) Train Loss mse: 0.0000, Train Loss ce: 0.0451, Train Steps/Sec: 0.46,
|
| 4049 |
[[34m2026-01-30 00:34:39[39m] (step=0003805) Train Loss mse: 0.0000, Train Loss ce: 0.0433, Train Steps/Sec: 0.42,
|
| 4050 |
[[34m2026-01-30 00:34:41[39m] (step=0003806) Train Loss mse: 0.0000, Train Loss ce: 0.0466, Train Steps/Sec: 0.43,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4051 |
[[34m2026-01-30 00:34:43[39m] (step=0003807) Train Loss mse: 0.0000, Train Loss ce: 0.0443, Train Steps/Sec: 0.52,
|
| 4052 |
[[34m2026-01-30 00:34:45[39m] (step=0003808) Train Loss mse: 0.0000, Train Loss ce: 0.0425, Train Steps/Sec: 0.52,
|
| 4053 |
[[34m2026-01-30 00:34:47[39m] (step=0003809) Train Loss mse: 0.0000, Train Loss ce: 0.0486, Train Steps/Sec: 0.52,
|