diff --git "a/lm_eval_logs/driver_nohup.log" "b/lm_eval_logs/driver_nohup.log" new file mode 100644--- /dev/null +++ "b/lm_eval_logs/driver_nohup.log" @@ -0,0 +1,1213 @@ +[INFO] override: GPU_ID=1 +[INFO] override: RUN_BG=1 +[INFO] override: MUON_LR=2.6e-3 +[INFO] override: ALPHA=1200 +[INFO] override: RUN_TRAIN=1 +[INFO] override: STEERING_COEFFS=15 +[INFO] OUTPUT_DIR = models/muon_v1_test_V1_K64_batches150_bs4_alpha1200-1200_steer15-15_muonlr2.6e-3_muonmom0.95_muonwd0.1_adamlr5e-5_seed42_layer7_layers5-6-7_params6 +[INFO] PRETRAINED_PATH = /egr/research-optml/wangc168/Muon_wmdp/rmu_wmdp/models/muon_v1_test_V1_K64_batches150_bs4_alpha1200-1200_steer15-15_muonlr2.6e-3_muonmom0.95_muonwd0.1_adamlr5e-5_seed42_layer7_layers5-6-7_params6 +[INFO] LOG_DIR = /egr/research-optml/wangc168/Muon_wmdp/rmu_wmdp/models/muon_v1_test_V1_K64_batches150_bs4_alpha1200-1200_steer15-15_muonlr2.6e-3_muonmom0.95_muonwd0.1_adamlr5e-5_seed42_layer7_layers5-6-7_params6/lm_eval_logs +[INFO] GPU_ID = 1 (CUDA_VISIBLE_DEVICES=1 => device cuda:0) +[INFO] WAIT_SEC = 300 (seconds) +[INFO] MAX_RETRIES = 10 (0 means infinite) +[INFO] training... +/egr/research-optml/wangc168/anaconda3/envs/py310/lib/python3.10/site-packages/transformers/utils/hub.py:127: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. + warnings.warn( + Downloading shards: 0%| | 0/8 [00:00