Upload checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins
Browse files
checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins/wandb/offline-run-20260127_054730-checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins-run0/files/output.log
CHANGED
|
@@ -1240,6 +1240,27 @@ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
| 1240 |
[[34m2026-01-27 06:18:24[39m] (step=0001052) Train Loss mse: 0.0000, Train Loss ce: 0.2349, Train Steps/Sec: 0.62,
|
| 1241 |
[[34m2026-01-27 06:18:25[39m] (step=0001053) Train Loss mse: 0.0000, Train Loss ce: 0.2006, Train Steps/Sec: 0.75,
|
| 1242 |
[[34m2026-01-27 06:18:27[39m] (step=0001054) Train Loss mse: 0.0000, Train Loss ce: 0.2222, Train Steps/Sec: 0.77,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1243 |
[[34m2026-01-27 06:18:28[39m] (step=0001055) Train Loss mse: 0.0000, Train Loss ce: 0.2943, Train Steps/Sec: 0.76,
|
| 1244 |
[[34m2026-01-27 06:18:29[39m] (step=0001056) Train Loss mse: 0.0000, Train Loss ce: 0.2244, Train Steps/Sec: 0.76,
|
| 1245 |
[[34m2026-01-27 06:18:31[39m] (step=0001057) Train Loss mse: 0.0000, Train Loss ce: 0.1987, Train Steps/Sec: 0.67,
|
|
@@ -1318,20 +1339,6 @@ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
| 1318 |
[[34m2026-01-27 06:20:13[39m] (step=0001130) Train Loss mse: 0.0000, Train Loss ce: 0.2712, Train Steps/Sec: 0.76,
|
| 1319 |
[[34m2026-01-27 06:20:14[39m] (step=0001131) Train Loss mse: 0.0000, Train Loss ce: 0.2670, Train Steps/Sec: 0.74,
|
| 1320 |
[[34m2026-01-27 06:20:16[39m] (step=0001132) Train Loss mse: 0.0000, Train Loss ce: 0.2389, Train Steps/Sec: 0.63,
|
| 1321 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins_step1500
|
| 1322 |
-
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce/vlm_gym_mental_rotation_2d_val
|
| 1323 |
-
[eval debug] first 3 batch fingerprints:
|
| 1324 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 1325 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 1326 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 1327 |
-
ce_avg: 0.482469767332077, mse_avg: 0.0
|
| 1328 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins_step2000
|
| 1329 |
-
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce/vlm_gym_mental_rotation_2d_val
|
| 1330 |
-
[eval debug] first 3 batch fingerprints:
|
| 1331 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 1332 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 1333 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 1334 |
-
ce_avg: 0.5253902673721313, mse_avg: 0.0
|
| 1335 |
[[34m2026-01-27 06:20:17[39m] (step=0001133) Train Loss mse: 0.0000, Train Loss ce: 0.2480, Train Steps/Sec: 0.76,
|
| 1336 |
[[34m2026-01-27 06:20:18[39m] (step=0001134) Train Loss mse: 0.0000, Train Loss ce: 0.2425, Train Steps/Sec: 0.76,
|
| 1337 |
[[34m2026-01-27 06:20:20[39m] (step=0001135) Train Loss mse: 0.0000, Train Loss ce: 0.2217, Train Steps/Sec: 0.77,
|
|
@@ -2754,6 +2761,20 @@ ce_avg: 0.5253902673721313, mse_avg: 0.0
|
|
| 2754 |
[[34m2026-01-27 06:53:27[39m] (step=0002552) Train Loss mse: 0.0000, Train Loss ce: 0.2395, Train Steps/Sec: 0.65,
|
| 2755 |
[[34m2026-01-27 06:53:29[39m] (step=0002553) Train Loss mse: 0.0000, Train Loss ce: 0.2514, Train Steps/Sec: 0.77,
|
| 2756 |
[[34m2026-01-27 06:53:30[39m] (step=0002554) Train Loss mse: 0.0000, Train Loss ce: 0.2746, Train Steps/Sec: 0.76,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2757 |
[[34m2026-01-27 06:53:31[39m] (step=0002555) Train Loss mse: 0.0000, Train Loss ce: 0.2420, Train Steps/Sec: 0.77,
|
| 2758 |
[[34m2026-01-27 06:53:33[39m] (step=0002556) Train Loss mse: 0.0000, Train Loss ce: 0.2485, Train Steps/Sec: 0.76,
|
| 2759 |
[[34m2026-01-27 06:53:34[39m] (step=0002557) Train Loss mse: 0.0000, Train Loss ce: 0.2549, Train Steps/Sec: 0.74,
|
|
@@ -2843,27 +2864,6 @@ ce_avg: 0.5253902673721313, mse_avg: 0.0
|
|
| 2843 |
[[34m2026-01-27 06:55:28[39m] (step=0002641) Train Loss mse: 0.0000, Train Loss ce: 0.2655, Train Steps/Sec: 0.61,
|
| 2844 |
[[34m2026-01-27 06:55:30[39m] (step=0002642) Train Loss mse: 0.0000, Train Loss ce: 0.2336, Train Steps/Sec: 0.76,
|
| 2845 |
[[34m2026-01-27 06:55:31[39m] (step=0002643) Train Loss mse: 0.0000, Train Loss ce: 0.2325, Train Steps/Sec: 0.76,
|
| 2846 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins_step2500
|
| 2847 |
-
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce/vlm_gym_mental_rotation_2d_val
|
| 2848 |
-
[eval debug] first 3 batch fingerprints:
|
| 2849 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 2850 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 2851 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 2852 |
-
ce_avg: 0.563564658164978, mse_avg: 0.0
|
| 2853 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins_step3000
|
| 2854 |
-
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce/vlm_gym_mental_rotation_2d_val
|
| 2855 |
-
[eval debug] first 3 batch fingerprints:
|
| 2856 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 2857 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 2858 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 2859 |
-
ce_avg: 0.565230667591095, mse_avg: 0.0
|
| 2860 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins_step3500
|
| 2861 |
-
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce/vlm_gym_mental_rotation_2d_val
|
| 2862 |
-
[eval debug] first 3 batch fingerprints:
|
| 2863 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 2864 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 2865 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 2866 |
-
ce_avg: 0.5503921508789062, mse_avg: 0.0
|
| 2867 |
[[34m2026-01-27 06:55:32[39m] (step=0002644) Train Loss mse: 0.0000, Train Loss ce: 0.2420, Train Steps/Sec: 0.76,
|
| 2868 |
[[34m2026-01-27 06:55:33[39m] (step=0002645) Train Loss mse: 0.0000, Train Loss ce: 0.2868, Train Steps/Sec: 0.84,
|
| 2869 |
[[34m2026-01-27 06:55:35[39m] (step=0002646) Train Loss mse: 0.0000, Train Loss ce: 0.2387, Train Steps/Sec: 0.66,
|
|
@@ -3827,6 +3827,20 @@ ce_avg: 0.5503921508789062, mse_avg: 0.0
|
|
| 3827 |
[[34m2026-01-27 07:17:46[39m] (step=0003604) Train Loss mse: 0.0000, Train Loss ce: 0.2357, Train Steps/Sec: 0.77,
|
| 3828 |
[[34m2026-01-27 07:17:48[39m] (step=0003605) Train Loss mse: 0.0000, Train Loss ce: 0.2432, Train Steps/Sec: 0.77,
|
| 3829 |
[[34m2026-01-27 07:17:49[39m] (step=0003606) Train Loss mse: 0.0000, Train Loss ce: 0.2302, Train Steps/Sec: 0.61,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3830 |
[[34m2026-01-27 07:17:51[39m] (step=0003607) Train Loss mse: 0.0000, Train Loss ce: 0.2441, Train Steps/Sec: 0.90,
|
| 3831 |
[[34m2026-01-27 07:17:52[39m] (step=0003608) Train Loss mse: 0.0000, Train Loss ce: 0.2167, Train Steps/Sec: 0.77,
|
| 3832 |
[[34m2026-01-27 07:17:53[39m] (step=0003609) Train Loss mse: 0.0000, Train Loss ce: 0.2331, Train Steps/Sec: 0.65,
|
|
@@ -3941,27 +3955,6 @@ ce_avg: 0.5503921508789062, mse_avg: 0.0
|
|
| 3941 |
[[34m2026-01-27 07:20:23[39m] (step=0003718) Train Loss mse: 0.0000, Train Loss ce: 0.2512, Train Steps/Sec: 0.77,
|
| 3942 |
[[34m2026-01-27 07:20:24[39m] (step=0003719) Train Loss mse: 0.0000, Train Loss ce: 0.2666, Train Steps/Sec: 0.66,
|
| 3943 |
[[34m2026-01-27 07:20:26[39m] (step=0003720) Train Loss mse: 0.0000, Train Loss ce: 0.2608, Train Steps/Sec: 0.77,
|
| 3944 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins_step4000
|
| 3945 |
-
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce/vlm_gym_mental_rotation_2d_val
|
| 3946 |
-
[eval debug] first 3 batch fingerprints:
|
| 3947 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 3948 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 3949 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 3950 |
-
ce_avg: 0.5391465425491333, mse_avg: 0.0
|
| 3951 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins_step4500
|
| 3952 |
-
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce/vlm_gym_mental_rotation_2d_val
|
| 3953 |
-
[eval debug] first 3 batch fingerprints:
|
| 3954 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 3955 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 3956 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 3957 |
-
ce_avg: 0.5354103446006775, mse_avg: 0.0
|
| 3958 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins_step5000
|
| 3959 |
-
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce/vlm_gym_mental_rotation_2d_val
|
| 3960 |
-
[eval debug] first 3 batch fingerprints:
|
| 3961 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 3962 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 3963 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 3964 |
-
ce_avg: 0.5331812500953674, mse_avg: 0.0
|
| 3965 |
[[34m2026-01-27 07:20:27[39m] (step=0003721) Train Loss mse: 0.0000, Train Loss ce: 0.2387, Train Steps/Sec: 0.62,
|
| 3966 |
[[34m2026-01-27 07:20:28[39m] (step=0003722) Train Loss mse: 0.0000, Train Loss ce: 0.2330, Train Steps/Sec: 0.77,
|
| 3967 |
[[34m2026-01-27 07:20:30[39m] (step=0003723) Train Loss mse: 0.0000, Train Loss ce: 0.1913, Train Steps/Sec: 0.80,
|
|
@@ -5245,4 +5238,11 @@ ce_avg: 0.5331812500953674, mse_avg: 0.0
|
|
| 5245 |
[[34m2026-01-27 07:50:40[39m] Saving checkpoint to /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins/0005000.
|
| 5246 |
/opt/conda/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py:690: FutureWarning: FSDP.state_dict_type() and FSDP.set_state_dict_type() are being deprecated. Please use APIs, get_state_dict() and set_state_dict(), which can support different parallelisms, FSDP1, FSDP2, DDP. API doc: https://pytorch.org/docs/stable/distributed.checkpoint.html#torch.distributed.checkpoint.state_dict.get_state_dict .Tutorial: https://pytorch.org/tutorials/recipes/distributed_checkpoint_recipe.html .
|
| 5247 |
warnings.warn(
|
| 5248 |
-
[[34m2026-01-27 07:53:17[39m] Done!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1240 |
[[34m2026-01-27 06:18:24[39m] (step=0001052) Train Loss mse: 0.0000, Train Loss ce: 0.2349, Train Steps/Sec: 0.62,
|
| 1241 |
[[34m2026-01-27 06:18:25[39m] (step=0001053) Train Loss mse: 0.0000, Train Loss ce: 0.2006, Train Steps/Sec: 0.75,
|
| 1242 |
[[34m2026-01-27 06:18:27[39m] (step=0001054) Train Loss mse: 0.0000, Train Loss ce: 0.2222, Train Steps/Sec: 0.77,
|
| 1243 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins_step1500
|
| 1244 |
+
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce/vlm_gym_mental_rotation_2d_val
|
| 1245 |
+
[eval debug] first 3 batch fingerprints:
|
| 1246 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 1247 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 1248 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 1249 |
+
ce_avg: 0.482469767332077, mse_avg: 0.0
|
| 1250 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins_step2000
|
| 1251 |
+
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce/vlm_gym_mental_rotation_2d_val
|
| 1252 |
+
[eval debug] first 3 batch fingerprints:
|
| 1253 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 1254 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 1255 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 1256 |
+
ce_avg: 0.5253902673721313, mse_avg: 0.0
|
| 1257 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins_step2500
|
| 1258 |
+
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce/vlm_gym_mental_rotation_2d_val
|
| 1259 |
+
[eval debug] first 3 batch fingerprints:
|
| 1260 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 1261 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 1262 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 1263 |
+
ce_avg: 0.563564658164978, mse_avg: 0.0
|
| 1264 |
[[34m2026-01-27 06:18:28[39m] (step=0001055) Train Loss mse: 0.0000, Train Loss ce: 0.2943, Train Steps/Sec: 0.76,
|
| 1265 |
[[34m2026-01-27 06:18:29[39m] (step=0001056) Train Loss mse: 0.0000, Train Loss ce: 0.2244, Train Steps/Sec: 0.76,
|
| 1266 |
[[34m2026-01-27 06:18:31[39m] (step=0001057) Train Loss mse: 0.0000, Train Loss ce: 0.1987, Train Steps/Sec: 0.67,
|
|
|
|
| 1339 |
[[34m2026-01-27 06:20:13[39m] (step=0001130) Train Loss mse: 0.0000, Train Loss ce: 0.2712, Train Steps/Sec: 0.76,
|
| 1340 |
[[34m2026-01-27 06:20:14[39m] (step=0001131) Train Loss mse: 0.0000, Train Loss ce: 0.2670, Train Steps/Sec: 0.74,
|
| 1341 |
[[34m2026-01-27 06:20:16[39m] (step=0001132) Train Loss mse: 0.0000, Train Loss ce: 0.2389, Train Steps/Sec: 0.63,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1342 |
[[34m2026-01-27 06:20:17[39m] (step=0001133) Train Loss mse: 0.0000, Train Loss ce: 0.2480, Train Steps/Sec: 0.76,
|
| 1343 |
[[34m2026-01-27 06:20:18[39m] (step=0001134) Train Loss mse: 0.0000, Train Loss ce: 0.2425, Train Steps/Sec: 0.76,
|
| 1344 |
[[34m2026-01-27 06:20:20[39m] (step=0001135) Train Loss mse: 0.0000, Train Loss ce: 0.2217, Train Steps/Sec: 0.77,
|
|
|
|
| 2761 |
[[34m2026-01-27 06:53:27[39m] (step=0002552) Train Loss mse: 0.0000, Train Loss ce: 0.2395, Train Steps/Sec: 0.65,
|
| 2762 |
[[34m2026-01-27 06:53:29[39m] (step=0002553) Train Loss mse: 0.0000, Train Loss ce: 0.2514, Train Steps/Sec: 0.77,
|
| 2763 |
[[34m2026-01-27 06:53:30[39m] (step=0002554) Train Loss mse: 0.0000, Train Loss ce: 0.2746, Train Steps/Sec: 0.76,
|
| 2764 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins_step3000
|
| 2765 |
+
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce/vlm_gym_mental_rotation_2d_val
|
| 2766 |
+
[eval debug] first 3 batch fingerprints:
|
| 2767 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 2768 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 2769 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 2770 |
+
ce_avg: 0.565230667591095, mse_avg: 0.0
|
| 2771 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins_step3500
|
| 2772 |
+
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce/vlm_gym_mental_rotation_2d_val
|
| 2773 |
+
[eval debug] first 3 batch fingerprints:
|
| 2774 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 2775 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 2776 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 2777 |
+
ce_avg: 0.5503921508789062, mse_avg: 0.0
|
| 2778 |
[[34m2026-01-27 06:53:31[39m] (step=0002555) Train Loss mse: 0.0000, Train Loss ce: 0.2420, Train Steps/Sec: 0.77,
|
| 2779 |
[[34m2026-01-27 06:53:33[39m] (step=0002556) Train Loss mse: 0.0000, Train Loss ce: 0.2485, Train Steps/Sec: 0.76,
|
| 2780 |
[[34m2026-01-27 06:53:34[39m] (step=0002557) Train Loss mse: 0.0000, Train Loss ce: 0.2549, Train Steps/Sec: 0.74,
|
|
|
|
| 2864 |
[[34m2026-01-27 06:55:28[39m] (step=0002641) Train Loss mse: 0.0000, Train Loss ce: 0.2655, Train Steps/Sec: 0.61,
|
| 2865 |
[[34m2026-01-27 06:55:30[39m] (step=0002642) Train Loss mse: 0.0000, Train Loss ce: 0.2336, Train Steps/Sec: 0.76,
|
| 2866 |
[[34m2026-01-27 06:55:31[39m] (step=0002643) Train Loss mse: 0.0000, Train Loss ce: 0.2325, Train Steps/Sec: 0.76,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2867 |
[[34m2026-01-27 06:55:32[39m] (step=0002644) Train Loss mse: 0.0000, Train Loss ce: 0.2420, Train Steps/Sec: 0.76,
|
| 2868 |
[[34m2026-01-27 06:55:33[39m] (step=0002645) Train Loss mse: 0.0000, Train Loss ce: 0.2868, Train Steps/Sec: 0.84,
|
| 2869 |
[[34m2026-01-27 06:55:35[39m] (step=0002646) Train Loss mse: 0.0000, Train Loss ce: 0.2387, Train Steps/Sec: 0.66,
|
|
|
|
| 3827 |
[[34m2026-01-27 07:17:46[39m] (step=0003604) Train Loss mse: 0.0000, Train Loss ce: 0.2357, Train Steps/Sec: 0.77,
|
| 3828 |
[[34m2026-01-27 07:17:48[39m] (step=0003605) Train Loss mse: 0.0000, Train Loss ce: 0.2432, Train Steps/Sec: 0.77,
|
| 3829 |
[[34m2026-01-27 07:17:49[39m] (step=0003606) Train Loss mse: 0.0000, Train Loss ce: 0.2302, Train Steps/Sec: 0.61,
|
| 3830 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins_step4000
|
| 3831 |
+
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce/vlm_gym_mental_rotation_2d_val
|
| 3832 |
+
[eval debug] first 3 batch fingerprints:
|
| 3833 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 3834 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 3835 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 3836 |
+
ce_avg: 0.5391465425491333, mse_avg: 0.0
|
| 3837 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins_step4500
|
| 3838 |
+
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce/vlm_gym_mental_rotation_2d_val
|
| 3839 |
+
[eval debug] first 3 batch fingerprints:
|
| 3840 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 3841 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 3842 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 3843 |
+
ce_avg: 0.5354103446006775, mse_avg: 0.0
|
| 3844 |
[[34m2026-01-27 07:17:51[39m] (step=0003607) Train Loss mse: 0.0000, Train Loss ce: 0.2441, Train Steps/Sec: 0.90,
|
| 3845 |
[[34m2026-01-27 07:17:52[39m] (step=0003608) Train Loss mse: 0.0000, Train Loss ce: 0.2167, Train Steps/Sec: 0.77,
|
| 3846 |
[[34m2026-01-27 07:17:53[39m] (step=0003609) Train Loss mse: 0.0000, Train Loss ce: 0.2331, Train Steps/Sec: 0.65,
|
|
|
|
| 3955 |
[[34m2026-01-27 07:20:23[39m] (step=0003718) Train Loss mse: 0.0000, Train Loss ce: 0.2512, Train Steps/Sec: 0.77,
|
| 3956 |
[[34m2026-01-27 07:20:24[39m] (step=0003719) Train Loss mse: 0.0000, Train Loss ce: 0.2666, Train Steps/Sec: 0.66,
|
| 3957 |
[[34m2026-01-27 07:20:26[39m] (step=0003720) Train Loss mse: 0.0000, Train Loss ce: 0.2608, Train Steps/Sec: 0.77,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3958 |
[[34m2026-01-27 07:20:27[39m] (step=0003721) Train Loss mse: 0.0000, Train Loss ce: 0.2387, Train Steps/Sec: 0.62,
|
| 3959 |
[[34m2026-01-27 07:20:28[39m] (step=0003722) Train Loss mse: 0.0000, Train Loss ce: 0.2330, Train Steps/Sec: 0.77,
|
| 3960 |
[[34m2026-01-27 07:20:30[39m] (step=0003723) Train Loss mse: 0.0000, Train Loss ce: 0.1913, Train Steps/Sec: 0.80,
|
|
|
|
| 5238 |
[[34m2026-01-27 07:50:40[39m] Saving checkpoint to /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins/0005000.
|
| 5239 |
/opt/conda/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py:690: FutureWarning: FSDP.state_dict_type() and FSDP.set_state_dict_type() are being deprecated. Please use APIs, get_state_dict() and set_state_dict(), which can support different parallelisms, FSDP1, FSDP2, DDP. API doc: https://pytorch.org/docs/stable/distributed.checkpoint.html#torch.distributed.checkpoint.state_dict.get_state_dict .Tutorial: https://pytorch.org/tutorials/recipes/distributed_checkpoint_recipe.html .
|
| 5240 |
warnings.warn(
|
| 5241 |
+
[[34m2026-01-27 07:53:17[39m] Done!
|
| 5242 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_no_mse_ins_step5000
|
| 5243 |
+
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce/vlm_gym_mental_rotation_2d_val
|
| 5244 |
+
[eval debug] first 3 batch fingerprints:
|
| 5245 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 5246 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 5247 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_no_mse_evalonce'}]
|
| 5248 |
+
ce_avg: 0.5331812500953674, mse_avg: 0.0
|