Upload checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_mse_only_ins/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_mse_only_ins
Browse files
checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_mse_only_ins/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_mse_only_ins/wandb/offline-run-20260129_221331-vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_img_lr2e_5_mse_only_ins-run0/files/output.log
CHANGED
|
@@ -791,105 +791,6 @@ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
| 791 |
[[34m2026-01-30 01:57:28[39m] (step=0000780) Train Loss mse: 0.0664, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 792 |
[[34m2026-01-30 01:57:45[39m] (step=0000781) Train Loss mse: 0.0600, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 793 |
[[34m2026-01-30 01:58:02[39m] (step=0000782) Train Loss mse: 0.0489, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 794 |
-
[[34m2026-01-30 01:58:19[39m] (step=0000783) Train Loss mse: 0.0493, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 795 |
-
[[34m2026-01-30 01:58:35[39m] (step=0000784) Train Loss mse: 0.0575, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 796 |
-
[[34m2026-01-30 01:58:52[39m] (step=0000785) Train Loss mse: 0.0494, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 797 |
-
[[34m2026-01-30 01:59:09[39m] (step=0000786) Train Loss mse: 0.0599, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 798 |
-
[[34m2026-01-30 01:59:25[39m] (step=0000787) Train Loss mse: 0.0569, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 799 |
-
[[34m2026-01-30 01:59:42[39m] (step=0000788) Train Loss mse: 0.0589, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 800 |
-
[[34m2026-01-30 01:59:59[39m] (step=0000789) Train Loss mse: 0.0451, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 801 |
-
[[34m2026-01-30 02:00:16[39m] (step=0000790) Train Loss mse: 0.0543, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 802 |
-
[[34m2026-01-30 02:00:33[39m] (step=0000791) Train Loss mse: 0.0641, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 803 |
-
[[34m2026-01-30 02:00:49[39m] (step=0000792) Train Loss mse: 0.0502, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 804 |
-
[[34m2026-01-30 02:01:06[39m] (step=0000793) Train Loss mse: 0.0501, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 805 |
-
[[34m2026-01-30 02:01:22[39m] (step=0000794) Train Loss mse: 0.0646, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 806 |
-
[[34m2026-01-30 02:01:38[39m] (step=0000795) Train Loss mse: 0.0553, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 807 |
-
[[34m2026-01-30 02:01:55[39m] (step=0000796) Train Loss mse: 0.0584, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 808 |
-
[[34m2026-01-30 02:02:12[39m] (step=0000797) Train Loss mse: 0.0652, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 809 |
-
[[34m2026-01-30 02:02:28[39m] (step=0000798) Train Loss mse: 0.0514, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 810 |
-
[[34m2026-01-30 02:02:45[39m] (step=0000799) Train Loss mse: 0.0468, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 811 |
-
[[34m2026-01-30 02:03:02[39m] (step=0000800) Train Loss mse: 0.0632, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 812 |
-
[[34m2026-01-30 02:03:18[39m] (step=0000801) Train Loss mse: 0.0577, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 813 |
-
[[34m2026-01-30 02:03:35[39m] (step=0000802) Train Loss mse: 0.0555, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 814 |
-
[[34m2026-01-30 02:03:51[39m] (step=0000803) Train Loss mse: 0.0573, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 815 |
-
[[34m2026-01-30 02:04:08[39m] (step=0000804) Train Loss mse: 0.0592, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 816 |
-
[[34m2026-01-30 02:04:25[39m] (step=0000805) Train Loss mse: 0.0615, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 817 |
-
[[34m2026-01-30 02:04:42[39m] (step=0000806) Train Loss mse: 0.0607, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 818 |
-
[[34m2026-01-30 02:04:58[39m] (step=0000807) Train Loss mse: 0.0611, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 819 |
-
[[34m2026-01-30 02:05:15[39m] (step=0000808) Train Loss mse: 0.0460, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 820 |
-
[[34m2026-01-30 02:05:32[39m] (step=0000809) Train Loss mse: 0.0611, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 821 |
-
[[34m2026-01-30 02:05:48[39m] (step=0000810) Train Loss mse: 0.0489, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 822 |
-
[[34m2026-01-30 02:06:05[39m] (step=0000811) Train Loss mse: 0.0634, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 823 |
-
[[34m2026-01-30 02:06:22[39m] (step=0000812) Train Loss mse: 0.0506, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 824 |
-
[[34m2026-01-30 02:06:38[39m] (step=0000813) Train Loss mse: 0.0574, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 825 |
-
[[34m2026-01-30 02:06:55[39m] (step=0000814) Train Loss mse: 0.0571, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 826 |
-
[[34m2026-01-30 02:07:12[39m] (step=0000815) Train Loss mse: 0.0613, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 827 |
-
[[34m2026-01-30 02:07:28[39m] (step=0000816) Train Loss mse: 0.0563, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 828 |
-
[[34m2026-01-30 02:07:45[39m] (step=0000817) Train Loss mse: 0.0533, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 829 |
-
[[34m2026-01-30 02:08:01[39m] (step=0000818) Train Loss mse: 0.0604, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 830 |
-
[[34m2026-01-30 02:08:18[39m] (step=0000819) Train Loss mse: 0.0552, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 831 |
-
[[34m2026-01-30 02:08:35[39m] (step=0000820) Train Loss mse: 0.0533, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 832 |
-
[[34m2026-01-30 02:08:52[39m] (step=0000821) Train Loss mse: 0.0696, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 833 |
-
[[34m2026-01-30 02:09:08[39m] (step=0000822) Train Loss mse: 0.0618, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 834 |
-
[[34m2026-01-30 02:09:25[39m] (step=0000823) Train Loss mse: 0.0537, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 835 |
-
[[34m2026-01-30 02:09:42[39m] (step=0000824) Train Loss mse: 0.0643, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 836 |
-
[[34m2026-01-30 02:09:58[39m] (step=0000825) Train Loss mse: 0.0703, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 837 |
-
[[34m2026-01-30 02:10:15[39m] (step=0000826) Train Loss mse: 0.0553, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 838 |
-
[[34m2026-01-30 02:10:32[39m] (step=0000827) Train Loss mse: 0.0499, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 839 |
-
[[34m2026-01-30 02:10:49[39m] (step=0000828) Train Loss mse: 0.0466, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 840 |
-
[[34m2026-01-30 02:11:05[39m] (step=0000829) Train Loss mse: 0.0493, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 841 |
-
[[34m2026-01-30 02:11:22[39m] (step=0000830) Train Loss mse: 0.0555, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 842 |
-
[[34m2026-01-30 02:11:39[39m] (step=0000831) Train Loss mse: 0.0543, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 843 |
-
[[34m2026-01-30 02:11:56[39m] (step=0000832) Train Loss mse: 0.0656, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 844 |
-
[[34m2026-01-30 02:12:12[39m] (step=0000833) Train Loss mse: 0.0556, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 845 |
-
[[34m2026-01-30 02:12:29[39m] (step=0000834) Train Loss mse: 0.0591, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 846 |
-
[[34m2026-01-30 02:12:46[39m] (step=0000835) Train Loss mse: 0.0585, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 847 |
-
[[34m2026-01-30 02:13:02[39m] (step=0000836) Train Loss mse: 0.0481, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 848 |
-
[[34m2026-01-30 02:13:19[39m] (step=0000837) Train Loss mse: 0.0632, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 849 |
-
[[34m2026-01-30 02:13:35[39m] (step=0000838) Train Loss mse: 0.0626, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 850 |
-
[[34m2026-01-30 02:13:52[39m] (step=0000839) Train Loss mse: 0.0607, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 851 |
-
[[34m2026-01-30 02:14:09[39m] (step=0000840) Train Loss mse: 0.0552, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 852 |
-
[[34m2026-01-30 02:14:26[39m] (step=0000841) Train Loss mse: 0.0525, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 853 |
-
[[34m2026-01-30 02:14:42[39m] (step=0000842) Train Loss mse: 0.0637, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 854 |
-
[[34m2026-01-30 02:14:59[39m] (step=0000843) Train Loss mse: 0.0680, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 855 |
-
[[34m2026-01-30 02:15:15[39m] (step=0000844) Train Loss mse: 0.0513, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 856 |
-
[[34m2026-01-30 02:15:32[39m] (step=0000845) Train Loss mse: 0.0573, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 857 |
-
[[34m2026-01-30 02:15:49[39m] (step=0000846) Train Loss mse: 0.0547, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 858 |
-
[[34m2026-01-30 02:16:06[39m] (step=0000847) Train Loss mse: 0.0585, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 859 |
-
[[34m2026-01-30 02:16:22[39m] (step=0000848) Train Loss mse: 0.0592, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 860 |
-
[[34m2026-01-30 02:16:39[39m] (step=0000849) Train Loss mse: 0.0468, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 861 |
-
[[34m2026-01-30 02:16:56[39m] (step=0000850) Train Loss mse: 0.0466, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 862 |
-
[[34m2026-01-30 02:17:12[39m] (step=0000851) Train Loss mse: 0.0648, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 863 |
-
[[34m2026-01-30 02:17:29[39m] (step=0000852) Train Loss mse: 0.0504, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 864 |
-
[[34m2026-01-30 02:17:46[39m] (step=0000853) Train Loss mse: 0.0633, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 865 |
-
[[34m2026-01-30 02:18:02[39m] (step=0000854) Train Loss mse: 0.0531, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 866 |
-
[[34m2026-01-30 02:18:19[39m] (step=0000855) Train Loss mse: 0.0441, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 867 |
-
[[34m2026-01-30 02:18:36[39m] (step=0000856) Train Loss mse: 0.0665, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 868 |
-
[[34m2026-01-30 02:18:53[39m] (step=0000857) Train Loss mse: 0.0502, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 869 |
-
[[34m2026-01-30 02:19:10[39m] (step=0000858) Train Loss mse: 0.0536, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 870 |
-
[[34m2026-01-30 02:19:27[39m] (step=0000859) Train Loss mse: 0.0497, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 871 |
-
[[34m2026-01-30 02:19:43[39m] (step=0000860) Train Loss mse: 0.0548, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 872 |
-
[[34m2026-01-30 02:20:00[39m] (step=0000861) Train Loss mse: 0.0600, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 873 |
-
[[34m2026-01-30 02:20:16[39m] (step=0000862) Train Loss mse: 0.0653, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 874 |
-
[[34m2026-01-30 02:20:33[39m] (step=0000863) Train Loss mse: 0.0665, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 875 |
-
[[34m2026-01-30 02:20:50[39m] (step=0000864) Train Loss mse: 0.0509, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 876 |
-
[[34m2026-01-30 02:21:07[39m] (step=0000865) Train Loss mse: 0.0538, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 877 |
-
[[34m2026-01-30 02:21:24[39m] (step=0000866) Train Loss mse: 0.0583, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 878 |
-
[[34m2026-01-30 02:21:40[39m] (step=0000867) Train Loss mse: 0.0626, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 879 |
-
[[34m2026-01-30 02:21:57[39m] (step=0000868) Train Loss mse: 0.0621, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 880 |
-
[[34m2026-01-30 02:22:14[39m] (step=0000869) Train Loss mse: 0.0509, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 881 |
-
[[34m2026-01-30 02:22:31[39m] (step=0000870) Train Loss mse: 0.0561, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 882 |
-
[[34m2026-01-30 02:22:47[39m] (step=0000871) Train Loss mse: 0.0560, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 883 |
-
[[34m2026-01-30 02:23:04[39m] (step=0000872) Train Loss mse: 0.0603, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 884 |
-
[[34m2026-01-30 02:23:21[39m] (step=0000873) Train Loss mse: 0.0624, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 885 |
-
[[34m2026-01-30 02:23:38[39m] (step=0000874) Train Loss mse: 0.0663, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 886 |
-
[[34m2026-01-30 02:23:54[39m] (step=0000875) Train Loss mse: 0.0530, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 887 |
-
[[34m2026-01-30 02:24:11[39m] (step=0000876) Train Loss mse: 0.0586, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 888 |
-
[[34m2026-01-30 02:24:28[39m] (step=0000877) Train Loss mse: 0.0585, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 889 |
-
[[34m2026-01-30 02:24:44[39m] (step=0000878) Train Loss mse: 0.0560, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 890 |
-
[[34m2026-01-30 02:25:01[39m] (step=0000879) Train Loss mse: 0.0518, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 891 |
-
[[34m2026-01-30 02:25:18[39m] (step=0000880) Train Loss mse: 0.0568, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 892 |
-
[[34m2026-01-30 02:25:34[39m] (step=0000881) Train Loss mse: 0.0554, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 893 |
FullyShardedDataParallel(
|
| 894 |
(_fsdp_wrapped_module): Bagel(
|
| 895 |
(language_model): Qwen2ForCausalLM(
|
|
@@ -1076,20 +977,119 @@ Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_onl
|
|
| 1076 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 1077 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 1078 |
ce_avg: 0.0, mse_avg: 0.05453861877322197
|
| 1079 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is
|
| 1080 |
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 1081 |
[eval debug] first 3 batch fingerprints:
|
| 1082 |
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 1083 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 1084 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 1085 |
-
ce_avg: 0.0, mse_avg: 0.
|
| 1086 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is
|
| 1087 |
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 1088 |
[eval debug] first 3 batch fingerprints:
|
| 1089 |
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 1090 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 1091 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 1092 |
-
ce_avg: 0.0, mse_avg: 0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1093 |
[[34m2026-01-30 02:25:51[39m] (step=0000882) Train Loss mse: 0.0666, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1094 |
[[34m2026-01-30 02:26:08[39m] (step=0000883) Train Loss mse: 0.0433, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1095 |
[[34m2026-01-30 02:26:25[39m] (step=0000884) Train Loss mse: 0.0643, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
|
@@ -2174,6 +2174,20 @@ ce_avg: 0.0, mse_avg: 0.05133244767785072
|
|
| 2174 |
[[34m2026-01-30 07:26:45[39m] (step=0001963) Train Loss mse: 0.0571, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 2175 |
[[34m2026-01-30 07:27:02[39m] (step=0001964) Train Loss mse: 0.0519, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 2176 |
[[34m2026-01-30 07:27:18[39m] (step=0001965) Train Loss mse: 0.0568, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2177 |
[[34m2026-01-30 07:27:35[39m] (step=0001966) Train Loss mse: 0.0562, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 2178 |
[[34m2026-01-30 07:27:52[39m] (step=0001967) Train Loss mse: 0.0602, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 2179 |
[[34m2026-01-30 07:28:08[39m] (step=0001968) Train Loss mse: 0.0593, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
|
@@ -2319,20 +2333,6 @@ ce_avg: 0.0, mse_avg: 0.05133244767785072
|
|
| 2319 |
[[34m2026-01-30 08:07:20[39m] (step=0002108) Train Loss mse: 0.0624, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 2320 |
[[34m2026-01-30 08:07:37[39m] (step=0002109) Train Loss mse: 0.0565, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 2321 |
[[34m2026-01-30 08:07:54[39m] (step=0002110) Train Loss mse: 0.0507, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 2322 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_img_lr2e_5_mse_only_ins_step2500
|
| 2323 |
-
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 2324 |
-
[eval debug] first 3 batch fingerprints:
|
| 2325 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 2326 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 2327 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 2328 |
-
ce_avg: 0.0, mse_avg: 0.05205056443810463
|
| 2329 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_img_lr2e_5_mse_only_ins_step3000
|
| 2330 |
-
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 2331 |
-
[eval debug] first 3 batch fingerprints:
|
| 2332 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 2333 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 2334 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 2335 |
-
ce_avg: 0.0, mse_avg: 0.05160127952694893
|
| 2336 |
[[34m2026-01-30 08:08:10[39m] (step=0002111) Train Loss mse: 0.0557, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 2337 |
[[34m2026-01-30 08:08:27[39m] (step=0002112) Train Loss mse: 0.0546, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 2338 |
[[34m2026-01-30 08:08:44[39m] (step=0002113) Train Loss mse: 0.0537, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
|
@@ -3228,20 +3228,26 @@ ce_avg: 0.0, mse_avg: 0.05160127952694893
|
|
| 3228 |
[[34m2026-01-30 12:16:26[39m] (step=0003003) Train Loss mse: 0.0488, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3229 |
[[34m2026-01-30 12:16:42[39m] (step=0003004) Train Loss mse: 0.0548, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3230 |
[[34m2026-01-30 12:16:59[39m] (step=0003005) Train Loss mse: 0.0518, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3231 |
-
|
| 3232 |
-
|
| 3233 |
-
[
|
| 3234 |
-
|
| 3235 |
-
|
| 3236 |
-
|
| 3237 |
-
|
| 3238 |
-
|
| 3239 |
-
|
| 3240 |
-
[
|
| 3241 |
-
|
| 3242 |
-
|
| 3243 |
-
|
| 3244 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3245 |
[[34m2026-01-30 12:22:49[39m] (step=0003026) Train Loss mse: 0.0576, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3246 |
[[34m2026-01-30 12:23:06[39m] (step=0003027) Train Loss mse: 0.0597, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3247 |
[[34m2026-01-30 12:23:22[39m] (step=0003028) Train Loss mse: 0.0511, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
|
@@ -4204,6 +4210,41 @@ ce_avg: 0.0, mse_avg: 0.05156182870268822
|
|
| 4204 |
[[34m2026-01-30 16:49:13[39m] (step=0003985) Train Loss mse: 0.0617, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 4205 |
[[34m2026-01-30 16:49:30[39m] (step=0003986) Train Loss mse: 0.0581, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 4206 |
[[34m2026-01-30 16:49:47[39m] (step=0003987) Train Loss mse: 0.0509, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4207 |
[[34m2026-01-30 16:50:03[39m] (step=0003988) Train Loss mse: 0.0652, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 4208 |
[[34m2026-01-30 16:50:20[39m] (step=0003989) Train Loss mse: 0.0522, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 4209 |
[[34m2026-01-30 16:50:36[39m] (step=0003990) Train Loss mse: 0.0497, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
|
@@ -4510,20 +4551,6 @@ ce_avg: 0.0, mse_avg: 0.05156182870268822
|
|
| 4510 |
[[34m2026-01-30 18:14:29[39m] (step=0004291) Train Loss mse: 0.0559, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 4511 |
[[34m2026-01-30 18:14:46[39m] (step=0004292) Train Loss mse: 0.0504, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 4512 |
[[34m2026-01-30 18:15:02[39m] (step=0004293) Train Loss mse: 0.0432, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 4513 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_img_lr2e_5_mse_only_ins_step4500
|
| 4514 |
-
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 4515 |
-
[eval debug] first 3 batch fingerprints:
|
| 4516 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 4517 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 4518 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 4519 |
-
ce_avg: 0.0, mse_avg: 0.05144112929701805
|
| 4520 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_img_lr2e_5_mse_only_ins_step5000
|
| 4521 |
-
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 4522 |
-
[eval debug] first 3 batch fingerprints:
|
| 4523 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 4524 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 4525 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 4526 |
-
ce_avg: 0.0, mse_avg: 0.05311766639351845
|
| 4527 |
[[34m2026-01-30 18:15:19[39m] (step=0004294) Train Loss mse: 0.0541, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 4528 |
[[34m2026-01-30 18:15:36[39m] (step=0004295) Train Loss mse: 0.0577, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 4529 |
[[34m2026-01-30 18:15:53[39m] (step=0004296) Train Loss mse: 0.0514, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
|
|
|
| 791 |
[[34m2026-01-30 01:57:28[39m] (step=0000780) Train Loss mse: 0.0664, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 792 |
[[34m2026-01-30 01:57:45[39m] (step=0000781) Train Loss mse: 0.0600, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 793 |
[[34m2026-01-30 01:58:02[39m] (step=0000782) Train Loss mse: 0.0489, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 794 |
FullyShardedDataParallel(
|
| 795 |
(_fsdp_wrapped_module): Bagel(
|
| 796 |
(language_model): Qwen2ForCausalLM(
|
|
|
|
| 977 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 978 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 979 |
ce_avg: 0.0, mse_avg: 0.05453861877322197
|
| 980 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_img_lr2e_5_mse_only_ins_step1000
|
| 981 |
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 982 |
[eval debug] first 3 batch fingerprints:
|
| 983 |
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 984 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 985 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 986 |
+
ce_avg: 0.0, mse_avg: 0.05385476350784302
|
| 987 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_img_lr2e_5_mse_only_ins_step1500
|
| 988 |
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 989 |
[eval debug] first 3 batch fingerprints:
|
| 990 |
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 991 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 992 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 993 |
+
ce_avg: 0.0, mse_avg: 0.05185013636946678
|
| 994 |
+
[[34m2026-01-30 01:58:19[39m] (step=0000783) Train Loss mse: 0.0493, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 995 |
+
[[34m2026-01-30 01:58:35[39m] (step=0000784) Train Loss mse: 0.0575, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 996 |
+
[[34m2026-01-30 01:58:52[39m] (step=0000785) Train Loss mse: 0.0494, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 997 |
+
[[34m2026-01-30 01:59:09[39m] (step=0000786) Train Loss mse: 0.0599, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 998 |
+
[[34m2026-01-30 01:59:25[39m] (step=0000787) Train Loss mse: 0.0569, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 999 |
+
[[34m2026-01-30 01:59:42[39m] (step=0000788) Train Loss mse: 0.0589, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1000 |
+
[[34m2026-01-30 01:59:59[39m] (step=0000789) Train Loss mse: 0.0451, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1001 |
+
[[34m2026-01-30 02:00:16[39m] (step=0000790) Train Loss mse: 0.0543, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1002 |
+
[[34m2026-01-30 02:00:33[39m] (step=0000791) Train Loss mse: 0.0641, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1003 |
+
[[34m2026-01-30 02:00:49[39m] (step=0000792) Train Loss mse: 0.0502, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1004 |
+
[[34m2026-01-30 02:01:06[39m] (step=0000793) Train Loss mse: 0.0501, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1005 |
+
[[34m2026-01-30 02:01:22[39m] (step=0000794) Train Loss mse: 0.0646, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1006 |
+
[[34m2026-01-30 02:01:38[39m] (step=0000795) Train Loss mse: 0.0553, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1007 |
+
[[34m2026-01-30 02:01:55[39m] (step=0000796) Train Loss mse: 0.0584, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1008 |
+
[[34m2026-01-30 02:02:12[39m] (step=0000797) Train Loss mse: 0.0652, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1009 |
+
[[34m2026-01-30 02:02:28[39m] (step=0000798) Train Loss mse: 0.0514, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1010 |
+
[[34m2026-01-30 02:02:45[39m] (step=0000799) Train Loss mse: 0.0468, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1011 |
+
[[34m2026-01-30 02:03:02[39m] (step=0000800) Train Loss mse: 0.0632, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1012 |
+
[[34m2026-01-30 02:03:18[39m] (step=0000801) Train Loss mse: 0.0577, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1013 |
+
[[34m2026-01-30 02:03:35[39m] (step=0000802) Train Loss mse: 0.0555, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1014 |
+
[[34m2026-01-30 02:03:51[39m] (step=0000803) Train Loss mse: 0.0573, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1015 |
+
[[34m2026-01-30 02:04:08[39m] (step=0000804) Train Loss mse: 0.0592, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1016 |
+
[[34m2026-01-30 02:04:25[39m] (step=0000805) Train Loss mse: 0.0615, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1017 |
+
[[34m2026-01-30 02:04:42[39m] (step=0000806) Train Loss mse: 0.0607, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1018 |
+
[[34m2026-01-30 02:04:58[39m] (step=0000807) Train Loss mse: 0.0611, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1019 |
+
[[34m2026-01-30 02:05:15[39m] (step=0000808) Train Loss mse: 0.0460, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1020 |
+
[[34m2026-01-30 02:05:32[39m] (step=0000809) Train Loss mse: 0.0611, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1021 |
+
[[34m2026-01-30 02:05:48[39m] (step=0000810) Train Loss mse: 0.0489, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1022 |
+
[[34m2026-01-30 02:06:05[39m] (step=0000811) Train Loss mse: 0.0634, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1023 |
+
[[34m2026-01-30 02:06:22[39m] (step=0000812) Train Loss mse: 0.0506, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1024 |
+
[[34m2026-01-30 02:06:38[39m] (step=0000813) Train Loss mse: 0.0574, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1025 |
+
[[34m2026-01-30 02:06:55[39m] (step=0000814) Train Loss mse: 0.0571, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1026 |
+
[[34m2026-01-30 02:07:12[39m] (step=0000815) Train Loss mse: 0.0613, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1027 |
+
[[34m2026-01-30 02:07:28[39m] (step=0000816) Train Loss mse: 0.0563, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1028 |
+
[[34m2026-01-30 02:07:45[39m] (step=0000817) Train Loss mse: 0.0533, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1029 |
+
[[34m2026-01-30 02:08:01[39m] (step=0000818) Train Loss mse: 0.0604, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1030 |
+
[[34m2026-01-30 02:08:18[39m] (step=0000819) Train Loss mse: 0.0552, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1031 |
+
[[34m2026-01-30 02:08:35[39m] (step=0000820) Train Loss mse: 0.0533, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1032 |
+
[[34m2026-01-30 02:08:52[39m] (step=0000821) Train Loss mse: 0.0696, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1033 |
+
[[34m2026-01-30 02:09:08[39m] (step=0000822) Train Loss mse: 0.0618, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1034 |
+
[[34m2026-01-30 02:09:25[39m] (step=0000823) Train Loss mse: 0.0537, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1035 |
+
[[34m2026-01-30 02:09:42[39m] (step=0000824) Train Loss mse: 0.0643, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1036 |
+
[[34m2026-01-30 02:09:58[39m] (step=0000825) Train Loss mse: 0.0703, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1037 |
+
[[34m2026-01-30 02:10:15[39m] (step=0000826) Train Loss mse: 0.0553, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1038 |
+
[[34m2026-01-30 02:10:32[39m] (step=0000827) Train Loss mse: 0.0499, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1039 |
+
[[34m2026-01-30 02:10:49[39m] (step=0000828) Train Loss mse: 0.0466, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1040 |
+
[[34m2026-01-30 02:11:05[39m] (step=0000829) Train Loss mse: 0.0493, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1041 |
+
[[34m2026-01-30 02:11:22[39m] (step=0000830) Train Loss mse: 0.0555, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1042 |
+
[[34m2026-01-30 02:11:39[39m] (step=0000831) Train Loss mse: 0.0543, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1043 |
+
[[34m2026-01-30 02:11:56[39m] (step=0000832) Train Loss mse: 0.0656, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1044 |
+
[[34m2026-01-30 02:12:12[39m] (step=0000833) Train Loss mse: 0.0556, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1045 |
+
[[34m2026-01-30 02:12:29[39m] (step=0000834) Train Loss mse: 0.0591, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1046 |
+
[[34m2026-01-30 02:12:46[39m] (step=0000835) Train Loss mse: 0.0585, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1047 |
+
[[34m2026-01-30 02:13:02[39m] (step=0000836) Train Loss mse: 0.0481, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1048 |
+
[[34m2026-01-30 02:13:19[39m] (step=0000837) Train Loss mse: 0.0632, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1049 |
+
[[34m2026-01-30 02:13:35[39m] (step=0000838) Train Loss mse: 0.0626, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1050 |
+
[[34m2026-01-30 02:13:52[39m] (step=0000839) Train Loss mse: 0.0607, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1051 |
+
[[34m2026-01-30 02:14:09[39m] (step=0000840) Train Loss mse: 0.0552, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1052 |
+
[[34m2026-01-30 02:14:26[39m] (step=0000841) Train Loss mse: 0.0525, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1053 |
+
[[34m2026-01-30 02:14:42[39m] (step=0000842) Train Loss mse: 0.0637, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1054 |
+
[[34m2026-01-30 02:14:59[39m] (step=0000843) Train Loss mse: 0.0680, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1055 |
+
[[34m2026-01-30 02:15:15[39m] (step=0000844) Train Loss mse: 0.0513, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1056 |
+
[[34m2026-01-30 02:15:32[39m] (step=0000845) Train Loss mse: 0.0573, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1057 |
+
[[34m2026-01-30 02:15:49[39m] (step=0000846) Train Loss mse: 0.0547, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1058 |
+
[[34m2026-01-30 02:16:06[39m] (step=0000847) Train Loss mse: 0.0585, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1059 |
+
[[34m2026-01-30 02:16:22[39m] (step=0000848) Train Loss mse: 0.0592, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1060 |
+
[[34m2026-01-30 02:16:39[39m] (step=0000849) Train Loss mse: 0.0468, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1061 |
+
[[34m2026-01-30 02:16:56[39m] (step=0000850) Train Loss mse: 0.0466, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1062 |
+
[[34m2026-01-30 02:17:12[39m] (step=0000851) Train Loss mse: 0.0648, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1063 |
+
[[34m2026-01-30 02:17:29[39m] (step=0000852) Train Loss mse: 0.0504, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1064 |
+
[[34m2026-01-30 02:17:46[39m] (step=0000853) Train Loss mse: 0.0633, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1065 |
+
[[34m2026-01-30 02:18:02[39m] (step=0000854) Train Loss mse: 0.0531, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1066 |
+
[[34m2026-01-30 02:18:19[39m] (step=0000855) Train Loss mse: 0.0441, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1067 |
+
[[34m2026-01-30 02:18:36[39m] (step=0000856) Train Loss mse: 0.0665, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1068 |
+
[[34m2026-01-30 02:18:53[39m] (step=0000857) Train Loss mse: 0.0502, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1069 |
+
[[34m2026-01-30 02:19:10[39m] (step=0000858) Train Loss mse: 0.0536, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1070 |
+
[[34m2026-01-30 02:19:27[39m] (step=0000859) Train Loss mse: 0.0497, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1071 |
+
[[34m2026-01-30 02:19:43[39m] (step=0000860) Train Loss mse: 0.0548, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1072 |
+
[[34m2026-01-30 02:20:00[39m] (step=0000861) Train Loss mse: 0.0600, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1073 |
+
[[34m2026-01-30 02:20:16[39m] (step=0000862) Train Loss mse: 0.0653, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1074 |
+
[[34m2026-01-30 02:20:33[39m] (step=0000863) Train Loss mse: 0.0665, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1075 |
+
[[34m2026-01-30 02:20:50[39m] (step=0000864) Train Loss mse: 0.0509, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1076 |
+
[[34m2026-01-30 02:21:07[39m] (step=0000865) Train Loss mse: 0.0538, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1077 |
+
[[34m2026-01-30 02:21:24[39m] (step=0000866) Train Loss mse: 0.0583, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1078 |
+
[[34m2026-01-30 02:21:40[39m] (step=0000867) Train Loss mse: 0.0626, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1079 |
+
[[34m2026-01-30 02:21:57[39m] (step=0000868) Train Loss mse: 0.0621, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1080 |
+
[[34m2026-01-30 02:22:14[39m] (step=0000869) Train Loss mse: 0.0509, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1081 |
+
[[34m2026-01-30 02:22:31[39m] (step=0000870) Train Loss mse: 0.0561, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1082 |
+
[[34m2026-01-30 02:22:47[39m] (step=0000871) Train Loss mse: 0.0560, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1083 |
+
[[34m2026-01-30 02:23:04[39m] (step=0000872) Train Loss mse: 0.0603, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1084 |
+
[[34m2026-01-30 02:23:21[39m] (step=0000873) Train Loss mse: 0.0624, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1085 |
+
[[34m2026-01-30 02:23:38[39m] (step=0000874) Train Loss mse: 0.0663, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1086 |
+
[[34m2026-01-30 02:23:54[39m] (step=0000875) Train Loss mse: 0.0530, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1087 |
+
[[34m2026-01-30 02:24:11[39m] (step=0000876) Train Loss mse: 0.0586, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1088 |
+
[[34m2026-01-30 02:24:28[39m] (step=0000877) Train Loss mse: 0.0585, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1089 |
+
[[34m2026-01-30 02:24:44[39m] (step=0000878) Train Loss mse: 0.0560, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1090 |
+
[[34m2026-01-30 02:25:01[39m] (step=0000879) Train Loss mse: 0.0518, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1091 |
+
[[34m2026-01-30 02:25:18[39m] (step=0000880) Train Loss mse: 0.0568, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1092 |
+
[[34m2026-01-30 02:25:34[39m] (step=0000881) Train Loss mse: 0.0554, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1093 |
[[34m2026-01-30 02:25:51[39m] (step=0000882) Train Loss mse: 0.0666, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1094 |
[[34m2026-01-30 02:26:08[39m] (step=0000883) Train Loss mse: 0.0433, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 1095 |
[[34m2026-01-30 02:26:25[39m] (step=0000884) Train Loss mse: 0.0643, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
|
|
|
| 2174 |
[[34m2026-01-30 07:26:45[39m] (step=0001963) Train Loss mse: 0.0571, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 2175 |
[[34m2026-01-30 07:27:02[39m] (step=0001964) Train Loss mse: 0.0519, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 2176 |
[[34m2026-01-30 07:27:18[39m] (step=0001965) Train Loss mse: 0.0568, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 2177 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_img_lr2e_5_mse_only_ins_step2000
|
| 2178 |
+
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 2179 |
+
[eval debug] first 3 batch fingerprints:
|
| 2180 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 2181 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 2182 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 2183 |
+
ce_avg: 0.0, mse_avg: 0.05133244767785072
|
| 2184 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_img_lr2e_5_mse_only_ins_step2500
|
| 2185 |
+
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 2186 |
+
[eval debug] first 3 batch fingerprints:
|
| 2187 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 2188 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 2189 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 2190 |
+
ce_avg: 0.0, mse_avg: 0.05205056443810463
|
| 2191 |
[[34m2026-01-30 07:27:35[39m] (step=0001966) Train Loss mse: 0.0562, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 2192 |
[[34m2026-01-30 07:27:52[39m] (step=0001967) Train Loss mse: 0.0602, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 2193 |
[[34m2026-01-30 07:28:08[39m] (step=0001968) Train Loss mse: 0.0593, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
|
|
|
| 2333 |
[[34m2026-01-30 08:07:20[39m] (step=0002108) Train Loss mse: 0.0624, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 2334 |
[[34m2026-01-30 08:07:37[39m] (step=0002109) Train Loss mse: 0.0565, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 2335 |
[[34m2026-01-30 08:07:54[39m] (step=0002110) Train Loss mse: 0.0507, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2336 |
[[34m2026-01-30 08:08:10[39m] (step=0002111) Train Loss mse: 0.0557, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 2337 |
[[34m2026-01-30 08:08:27[39m] (step=0002112) Train Loss mse: 0.0546, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 2338 |
[[34m2026-01-30 08:08:44[39m] (step=0002113) Train Loss mse: 0.0537, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
|
|
|
| 3228 |
[[34m2026-01-30 12:16:26[39m] (step=0003003) Train Loss mse: 0.0488, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3229 |
[[34m2026-01-30 12:16:42[39m] (step=0003004) Train Loss mse: 0.0548, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3230 |
[[34m2026-01-30 12:16:59[39m] (step=0003005) Train Loss mse: 0.0518, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3231 |
+
[[34m2026-01-30 12:17:16[39m] (step=0003006) Train Loss mse: 0.0587, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3232 |
+
[[34m2026-01-30 12:17:32[39m] (step=0003007) Train Loss mse: 0.0525, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3233 |
+
[[34m2026-01-30 12:17:49[39m] (step=0003008) Train Loss mse: 0.0537, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3234 |
+
[[34m2026-01-30 12:18:06[39m] (step=0003009) Train Loss mse: 0.0481, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3235 |
+
[[34m2026-01-30 12:18:23[39m] (step=0003010) Train Loss mse: 0.0527, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3236 |
+
[[34m2026-01-30 12:18:39[39m] (step=0003011) Train Loss mse: 0.0501, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3237 |
+
[[34m2026-01-30 12:18:56[39m] (step=0003012) Train Loss mse: 0.0532, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3238 |
+
[[34m2026-01-30 12:19:12[39m] (step=0003013) Train Loss mse: 0.0553, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3239 |
+
[[34m2026-01-30 12:19:29[39m] (step=0003014) Train Loss mse: 0.0542, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3240 |
+
[[34m2026-01-30 12:19:46[39m] (step=0003015) Train Loss mse: 0.0549, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3241 |
+
[[34m2026-01-30 12:20:02[39m] (step=0003016) Train Loss mse: 0.0562, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3242 |
+
[[34m2026-01-30 12:20:19[39m] (step=0003017) Train Loss mse: 0.0514, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3243 |
+
[[34m2026-01-30 12:20:35[39m] (step=0003018) Train Loss mse: 0.0414, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3244 |
+
[[34m2026-01-30 12:20:52[39m] (step=0003019) Train Loss mse: 0.0610, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3245 |
+
[[34m2026-01-30 12:21:08[39m] (step=0003020) Train Loss mse: 0.0488, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3246 |
+
[[34m2026-01-30 12:21:25[39m] (step=0003021) Train Loss mse: 0.0605, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3247 |
+
[[34m2026-01-30 12:21:42[39m] (step=0003022) Train Loss mse: 0.0536, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3248 |
+
[[34m2026-01-30 12:21:59[39m] (step=0003023) Train Loss mse: 0.0466, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3249 |
+
[[34m2026-01-30 12:22:16[39m] (step=0003024) Train Loss mse: 0.0564, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3250 |
+
[[34m2026-01-30 12:22:32[39m] (step=0003025) Train Loss mse: 0.0536, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3251 |
[[34m2026-01-30 12:22:49[39m] (step=0003026) Train Loss mse: 0.0576, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3252 |
[[34m2026-01-30 12:23:06[39m] (step=0003027) Train Loss mse: 0.0597, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 3253 |
[[34m2026-01-30 12:23:22[39m] (step=0003028) Train Loss mse: 0.0511, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
|
|
|
| 4210 |
[[34m2026-01-30 16:49:13[39m] (step=0003985) Train Loss mse: 0.0617, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 4211 |
[[34m2026-01-30 16:49:30[39m] (step=0003986) Train Loss mse: 0.0581, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 4212 |
[[34m2026-01-30 16:49:47[39m] (step=0003987) Train Loss mse: 0.0509, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 4213 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_img_lr2e_5_mse_only_ins_step3000
|
| 4214 |
+
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 4215 |
+
[eval debug] first 3 batch fingerprints:
|
| 4216 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 4217 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 4218 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 4219 |
+
ce_avg: 0.0, mse_avg: 0.05160127952694893
|
| 4220 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_img_lr2e_5_mse_only_ins_step3500
|
| 4221 |
+
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 4222 |
+
[eval debug] first 3 batch fingerprints:
|
| 4223 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 4224 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 4225 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 4226 |
+
ce_avg: 0.0, mse_avg: 0.05199814960360527
|
| 4227 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_img_lr2e_5_mse_only_ins_step4000
|
| 4228 |
+
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 4229 |
+
[eval debug] first 3 batch fingerprints:
|
| 4230 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 4231 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 4232 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 4233 |
+
ce_avg: 0.0, mse_avg: 0.05156182870268822
|
| 4234 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_img_lr2e_5_mse_only_ins_step4500
|
| 4235 |
+
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 4236 |
+
[eval debug] first 3 batch fingerprints:
|
| 4237 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 4238 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 4239 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 4240 |
+
ce_avg: 0.0, mse_avg: 0.05144112929701805
|
| 4241 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_one_img_lr2e_5_mse_only_ins_step5000
|
| 4242 |
+
Preparing Dataset vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_val
|
| 4243 |
+
[eval debug] first 3 batch fingerprints:
|
| 4244 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 4245 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 4246 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_objaverse_pad3_by_axis_mse_loss_only_evalonce'}]
|
| 4247 |
+
ce_avg: 0.0, mse_avg: 0.05311766639351845
|
| 4248 |
[[34m2026-01-30 16:50:03[39m] (step=0003988) Train Loss mse: 0.0652, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 4249 |
[[34m2026-01-30 16:50:20[39m] (step=0003989) Train Loss mse: 0.0522, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 4250 |
[[34m2026-01-30 16:50:36[39m] (step=0003990) Train Loss mse: 0.0497, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
|
|
|
| 4551 |
[[34m2026-01-30 18:14:29[39m] (step=0004291) Train Loss mse: 0.0559, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 4552 |
[[34m2026-01-30 18:14:46[39m] (step=0004292) Train Loss mse: 0.0504, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 4553 |
[[34m2026-01-30 18:15:02[39m] (step=0004293) Train Loss mse: 0.0432, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4554 |
[[34m2026-01-30 18:15:19[39m] (step=0004294) Train Loss mse: 0.0541, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 4555 |
[[34m2026-01-30 18:15:36[39m] (step=0004295) Train Loss mse: 0.0577, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|
| 4556 |
[[34m2026-01-30 18:15:53[39m] (step=0004296) Train Loss mse: 0.0514, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
|