Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- 20260124_2143/config.yaml +142 -0
- 20260124_2143/reward_model/1769262210.5061178/events.out.tfevents.1769262210.MACLAB-S004.2626926.1 +3 -0
- 20260124_2143/reward_model/1769262210.5078583/hparams.yml +4 -0
- 20260124_2143/reward_model/events.out.tfevents.1769262210.MACLAB-S004.2626926.0 +3 -0
- 20260124_2143/train.20260124_2143.log +803 -0
- 20260124_2354/config.yaml +142 -0
- 20260124_2354/reward_model/1769270104.0081618/events.out.tfevents.1769270104.MACLAB-S004.3211506.1 +3 -0
- 20260124_2354/reward_model/1769270104.0091846/hparams.yml +4 -0
- 20260124_2354/reward_model/events.out.tfevents.1769270104.MACLAB-S004.3211506.0 +3 -0
- 20260124_2354/train.20260124_2354.log +306 -0
- 20260125_0035/config.yaml +142 -0
- 20260125_0035/reward_model/1769272544.7198617/events.out.tfevents.1769272544.MACLAB-S004.3403711.1 +3 -0
- 20260125_0035/reward_model/1769272544.7213397/hparams.yml +4 -0
- 20260125_0035/reward_model/events.out.tfevents.1769272544.MACLAB-S004.3403711.0 +3 -0
- 20260125_0035/train.20260125_0035.log +421 -0
- 20260125_0037/config.yaml +142 -0
- 20260125_0037/eval_results_0125_1713.jsonl +0 -0
- 20260125_0037/reward_model/1769272678.832529/events.out.tfevents.1769272678.MACLAB-S004.3414271.1 +3 -0
- 20260125_0037/reward_model/1769272678.8337765/hparams.yml +4 -0
- 20260125_0037/reward_model/events.out.tfevents.1769272678.MACLAB-S004.3414271.0 +3 -0
- 20260125_0037/train.20260125_0037.log +421 -0
- 20260125_0038/config.yaml +142 -0
- 20260125_0038/reward_model/1769272741.4481056/events.out.tfevents.1769272741.MACLAB-S004.3419169.1 +3 -0
- 20260125_0038/reward_model/1769272741.4495451/hparams.yml +4 -0
- 20260125_0038/reward_model/events.out.tfevents.1769272741.MACLAB-S004.3419169.0 +3 -0
- 20260125_0038/train.20260125_0038.log +211 -0
- 20260125_0933/config.yaml +142 -0
- 20260125_0933/reward_model/1769304848.6545663/events.out.tfevents.1769304848.MACLAB-S004.1519845.1 +3 -0
- 20260125_0933/reward_model/1769304848.6563416/hparams.yml +4 -0
- 20260125_0933/reward_model/events.out.tfevents.1769304848.MACLAB-S004.1519845.0 +3 -0
- 20260125_0933/train.20260125_0933.log +564 -0
- 20260125_0947_CA/config.yaml +142 -0
- 20260125_0947_CA/eval_results_0125_1703.jsonl +0 -0
- 20260125_0947_CA/reward_model/1769305674.1033533/events.out.tfevents.1769305674.MACLAB-S004.1592070.1 +3 -0
- 20260125_0947_CA/reward_model/1769305674.1053352/hparams.yml +4 -0
- 20260125_0947_CA/reward_model/events.out.tfevents.1769305674.MACLAB-S004.1592070.0 +3 -0
- 20260125_0947_CA/train.20260125_0947_CA.log +438 -0
- 20260125_1117/config.yaml +142 -0
- 20260125_1117/reward_model/1769311084.1305242/events.out.tfevents.1769311084.MACLAB-S004.2009526.1 +3 -0
- 20260125_1117/reward_model/1769311084.1322424/hparams.yml +4 -0
- 20260125_1117/reward_model/events.out.tfevents.1769311084.MACLAB-S004.2009526.0 +3 -0
- 20260125_1117/train.20260125_1117.log +441 -0
- 20260125_1231/config.yaml +142 -0
- 20260125_1231/eval_results_0125_1707.jsonl +0 -0
- 20260125_1231/reward_model/1769315504.5030606/events.out.tfevents.1769315504.MACLAB-S004.2360364.1 +3 -0
- 20260125_1231/reward_model/1769315504.5045948/hparams.yml +4 -0
- 20260125_1231/reward_model/events.out.tfevents.1769315504.MACLAB-S004.2360364.0 +3 -0
- 20260125_1231/test_20260125_191012_reward_model.best_4499/test_results.json +51 -0
- 20260125_1231/test_20260125_194533_reward_model.best_4499/test_config.yaml +142 -0
- 20260125_1231/test_20260125_194533_reward_model.best_4499/test_results.json +239 -0
20260124_2143/config.yaml
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DEVICES: '3'
|
| 2 |
+
accelerate:
|
| 3 |
+
mixed_precision: bf16
|
| 4 |
+
basics:
|
| 5 |
+
random_seed: 42
|
| 6 |
+
save_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model
|
| 7 |
+
dataset:
|
| 8 |
+
audio_dropout:
|
| 9 |
+
apply_to_eval: false
|
| 10 |
+
apply_to_ref: true
|
| 11 |
+
enabled: false
|
| 12 |
+
eval_only_on_training: true
|
| 13 |
+
max_duration: 1500
|
| 14 |
+
min_duration: 1500
|
| 15 |
+
train_mode: start
|
| 16 |
+
cache_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/tmp
|
| 17 |
+
db_path: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/backend/database.db
|
| 18 |
+
duration: 600.0
|
| 19 |
+
embedding_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/supervised_embeddings
|
| 20 |
+
max_samples: null
|
| 21 |
+
max_val_samples: null
|
| 22 |
+
metadata_jsonl: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/all_comparisons.jsonl
|
| 23 |
+
mode: raw_text_frozen_audio
|
| 24 |
+
preference_file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/human_annotations/train.json
|
| 25 |
+
sample_rate: 24000
|
| 26 |
+
val_preference_file: null
|
| 27 |
+
loss:
|
| 28 |
+
IF_ratio: 0.5
|
| 29 |
+
filter_ties: true
|
| 30 |
+
label_smoothing: 0.0
|
| 31 |
+
reduction: mean
|
| 32 |
+
model:
|
| 33 |
+
attention_mode: SA
|
| 34 |
+
attn_dropout: 0.0
|
| 35 |
+
category_embeddings: null
|
| 36 |
+
dim: 768
|
| 37 |
+
dim_head: 64
|
| 38 |
+
downsample:
|
| 39 |
+
configs:
|
| 40 |
+
conv2_4x:
|
| 41 |
+
factor: 4
|
| 42 |
+
kernel_size: 5
|
| 43 |
+
kind: conv*2
|
| 44 |
+
use_layernorm: true
|
| 45 |
+
conv_4x:
|
| 46 |
+
factor: 4
|
| 47 |
+
kernel_size: 5
|
| 48 |
+
kind: conv
|
| 49 |
+
stage: 1
|
| 50 |
+
use_layernorm: true
|
| 51 |
+
glu_4x:
|
| 52 |
+
factor: 4
|
| 53 |
+
kernel_size: 5
|
| 54 |
+
kind: gluconv*2+pw
|
| 55 |
+
use_layernorm: true
|
| 56 |
+
mean:
|
| 57 |
+
factor: 2
|
| 58 |
+
kind: mean
|
| 59 |
+
mean_4x:
|
| 60 |
+
dropout: 0.0
|
| 61 |
+
factor: 30
|
| 62 |
+
kind: mean+mlp
|
| 63 |
+
mlp_ratio: 2.0
|
| 64 |
+
none:
|
| 65 |
+
factor: 1
|
| 66 |
+
kind: none
|
| 67 |
+
eval: mean_4x
|
| 68 |
+
ref: null
|
| 69 |
+
text: none
|
| 70 |
+
ff_dropout: 0.0
|
| 71 |
+
ff_mult: 4
|
| 72 |
+
freeze_audio: true
|
| 73 |
+
freeze_text: true
|
| 74 |
+
gradient_checkpointing: false
|
| 75 |
+
heads: 8
|
| 76 |
+
joint_tf_depth: 1
|
| 77 |
+
load_config:
|
| 78 |
+
checkpoint_path: null
|
| 79 |
+
frozen_from_pretrained: true
|
| 80 |
+
pretrained_name: OpenMuQ/MuQ-MuLan-large
|
| 81 |
+
strict: false
|
| 82 |
+
mlp_dim: 768
|
| 83 |
+
mode: concat_text_late
|
| 84 |
+
model_name: OpenMuQ/MuQ-MuLan-large
|
| 85 |
+
name: reward
|
| 86 |
+
no_condition: false
|
| 87 |
+
null_embedding:
|
| 88 |
+
audio:
|
| 89 |
+
dropout: 0.5
|
| 90 |
+
length: 10
|
| 91 |
+
lyrics:
|
| 92 |
+
dropout: 0.3
|
| 93 |
+
length: 10
|
| 94 |
+
text:
|
| 95 |
+
dropout: 0
|
| 96 |
+
length: 10
|
| 97 |
+
output_dim: 2
|
| 98 |
+
prompt_tf_depth: 4
|
| 99 |
+
sr: 24000
|
| 100 |
+
text_encoder:
|
| 101 |
+
name: muq_mulan
|
| 102 |
+
tune: null
|
| 103 |
+
text_lora_config: null
|
| 104 |
+
train_muq_depth: 0
|
| 105 |
+
train_muqmulan: false
|
| 106 |
+
use_audio: true
|
| 107 |
+
use_layer_idx: -1
|
| 108 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 109 |
+
run_name: null
|
| 110 |
+
train:
|
| 111 |
+
batch_size: 48
|
| 112 |
+
betas:
|
| 113 |
+
- 0.9
|
| 114 |
+
- 0.99
|
| 115 |
+
ema_decay: 0.9999
|
| 116 |
+
ema_update_every: 1
|
| 117 |
+
enable_gradient_checkpointing: true
|
| 118 |
+
force_clear_prev_results: false
|
| 119 |
+
grad_accum_every: 1
|
| 120 |
+
log_tensorboard: true
|
| 121 |
+
lr_schedule:
|
| 122 |
+
min_lr_ratio: 0.001
|
| 123 |
+
name: linear_cosine
|
| 124 |
+
total_steps: 4000
|
| 125 |
+
warmup_steps: 300
|
| 126 |
+
max_grad_norm: 1
|
| 127 |
+
mlp_lr: 0.0001
|
| 128 |
+
num_train_steps: 4000
|
| 129 |
+
num_valid_batches: null
|
| 130 |
+
num_workers: 8
|
| 131 |
+
other_lr: 1.0e-05
|
| 132 |
+
resume: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_0147/ckpt/reward_model.best_29999.pt
|
| 133 |
+
resume_optimizer: false
|
| 134 |
+
save_model_every: 2000
|
| 135 |
+
use_checkpoint_config: true
|
| 136 |
+
use_ema: true
|
| 137 |
+
use_lion: false
|
| 138 |
+
valid_batch_size: 20
|
| 139 |
+
valid_every: 100
|
| 140 |
+
valid_frac: 0.1
|
| 141 |
+
verify_weights_on_load: true
|
| 142 |
+
validate_only: false
|
20260124_2143/reward_model/1769262210.5061178/events.out.tfevents.1769262210.MACLAB-S004.2626926.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:82ee766b07252644d7045f50ffd3d29ed1cbc0b26a834bdb1d855c526f959108
|
| 3 |
+
size 503
|
20260124_2143/reward_model/1769262210.5078583/hparams.yml
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
batch_size: 48
|
| 2 |
+
grad_accum_every: 1
|
| 3 |
+
learning_rate: 0.0001
|
| 4 |
+
num_train_steps: 4000
|
20260124_2143/reward_model/events.out.tfevents.1769262210.MACLAB-S004.2626926.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:361130a96e5393eb1f50a4f818c47547a16295e3f01976ce0e9113e0a561cf68
|
| 3 |
+
size 2219689
|
20260124_2143/train.20260124_2143.log
ADDED
|
@@ -0,0 +1,803 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-24 21:43:19 | INFO | Log file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/train.20260124_2143.log
|
| 2 |
+
2026-01-24 21:43:19 | INFO | Random seed set to 42
|
| 3 |
+
2026-01-24 21:43:21 | INFO | Created RawTextFrozenAudioDataset with 3463 samples
|
| 4 |
+
2026-01-24 21:43:21 | INFO | Split dataset into train (3117) and validation (346) sets (ratio: 10.00%)
|
| 5 |
+
2026-01-24 21:43:21 | INFO | Will resume from checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_0147/ckpt/reward_model.best_29999.pt
|
| 6 |
+
2026-01-24 21:43:21 | INFO | Using checkpoint config for model initialization (continue training mode)
|
| 7 |
+
2026-01-24 21:43:29 | INFO | Created RewardAttentionModel with attention_mode=SA
|
| 8 |
+
2026-01-24 21:43:29 | INFO | Created PreferenceLoss with filter_ties=True
|
| 9 |
+
2026-01-24 21:43:29 | INFO | ✓ Gradient checkpointing enabled
|
| 10 |
+
2026-01-24 21:43:29 | INFO | ✓ EMA enabled with decay=0.9999, update_every=1 (CPU offload)
|
| 11 |
+
2026-01-24 21:43:29 | INFO | MLP head parameters: 1,186,563 params, lr=0.0001
|
| 12 |
+
2026-01-24 21:43:29 | INFO | Other parameters: 37,397,634 params, lr=1e-05
|
| 13 |
+
2026-01-24 21:43:29 | INFO | Using lr_schedule=linear_cosine warmup_steps=300 total_steps=4000
|
| 14 |
+
2026-01-24 21:43:29 | INFO | Training with fixed validation set
|
| 15 |
+
2026-01-24 21:43:29 | INFO | Train batch_size: 48, Valid batch_size: 20
|
| 16 |
+
2026-01-24 21:43:29 | INFO | Missing keys (782): ['text_module.model.embeddings.word_embeddings.weight', 'text_module.model.embeddings.position_embeddings.weight', 'text_module.model.embeddings.token_type_embeddings.weight', 'text_module.model.embeddings.LayerNorm.weight', 'text_module.model.embeddings.LayerNorm.bias']...
|
| 17 |
+
2026-01-24 21:43:29 | INFO | ✓ EMA state loaded
|
| 18 |
+
2026-01-24 21:43:29 | INFO | ✓ Starting from step 0 (transfer learning mode, ignoring checkpoint steps=29999)
|
| 19 |
+
2026-01-24 21:43:29 | INFO | Resumed from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_0147/ckpt/reward_model.best_29999.pt
|
| 20 |
+
2026-01-24 21:43:29 | INFO | Parameters: 701.162M total, 38.584M trainable
|
| 21 |
+
2026-01-24 21:43:29 | INFO | Text encoder (frozen): 328.389M
|
| 22 |
+
2026-01-24 21:43:29 | INFO | Audio encoder (frozen): 334.189M
|
| 23 |
+
2026-01-24 21:43:29 | INFO | Other trainable: 38.584M
|
| 24 |
+
2026-01-24 21:43:29 | INFO | ℹ No LoRA configuration detected
|
| 25 |
+
2026-01-24 21:43:30 | INFO | ============================================================
|
| 26 |
+
2026-01-24 21:43:30 | INFO | Ready to start training
|
| 27 |
+
2026-01-24 21:43:30 | INFO | ============================================================
|
| 28 |
+
2026-01-24 21:43:30 | INFO | Starting training from step 0
|
| 29 |
+
2026-01-24 21:43:30 | INFO | ===== Accelerator / CUDA Debug Info =====
|
| 30 |
+
2026-01-24 21:43:30 | INFO | accelerator.device = cuda
|
| 31 |
+
2026-01-24 21:43:30 | INFO | mixed_precision = bf16
|
| 32 |
+
2026-01-24 21:43:30 | INFO | distributed_type = NO
|
| 33 |
+
2026-01-24 21:43:30 | INFO | num_processes = 1
|
| 34 |
+
2026-01-24 21:43:30 | INFO | process_index = 0
|
| 35 |
+
2026-01-24 21:43:30 | INFO | is_main_process = True
|
| 36 |
+
2026-01-24 21:43:30 | INFO | torch.cuda.is_available() = True
|
| 37 |
+
2026-01-24 21:43:30 | INFO | torch.cuda.device_count() = 1
|
| 38 |
+
2026-01-24 21:43:30 | INFO | current_device = 0
|
| 39 |
+
2026-01-24 21:43:30 | INFO | device_name = NVIDIA GeForce RTX 4090
|
| 40 |
+
2026-01-24 21:43:30 | INFO | model parameter device = cuda:0
|
| 41 |
+
2026-01-24 21:43:30 | INFO | Training for 4000.0 steps (~63 epochs, 64 steps/epoch)
|
| 42 |
+
2026-01-24 21:43:38 | INFO | Step 0: loss=1.6133 | IF_loss=2.2461, MQ_loss=0.9805 | acc=0.740 (IF=0.708, MQ=0.771) | lr=0.000001
|
| 43 |
+
2026-01-24 21:43:38 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.0.pt (filtered to 38.584M trainable parameters)
|
| 44 |
+
2026-01-24 21:43:39 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.0.pt (575.2MB)
|
| 45 |
+
2026-01-24 21:43:39 | INFO | Step 0: Saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.0.pt
|
| 46 |
+
2026-01-24 21:45:32 | INFO |
|
| 47 |
+
============================================================
|
| 48 |
+
Validation Results (took 9.56s):
|
| 49 |
+
Samples: 346 instruction, 346 quality
|
| 50 |
+
Instruction Acc: 0.6821
|
| 51 |
+
Quality Acc: 0.6387
|
| 52 |
+
Average Acc: 0.6604
|
| 53 |
+
Total Loss: 1.8726
|
| 54 |
+
Instruction Loss: 1.6586
|
| 55 |
+
Quality Loss: 2.0866
|
| 56 |
+
============================================================
|
| 57 |
+
2026-01-24 21:45:32 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_99.pt (filtered to 38.584M trainable parameters)
|
| 58 |
+
2026-01-24 21:45:33 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_99.pt (575.2MB)
|
| 59 |
+
2026-01-24 21:45:33 | INFO | Best 1 checkpoints:
|
| 60 |
+
2026-01-24 21:45:33 | INFO | 1. Step 99: acc=0.6604 (reward_model.best_99.pt)
|
| 61 |
+
2026-01-24 21:45:34 | INFO | Step 100: loss=1.5309 | IF_loss=1.2373, MQ_loss=1.8246 | acc=0.646 (IF=0.688, MQ=0.604) | lr=0.000034
|
| 62 |
+
2026-01-24 21:47:29 | INFO |
|
| 63 |
+
============================================================
|
| 64 |
+
Validation Results (took 8.11s):
|
| 65 |
+
Samples: 346 instruction, 346 quality
|
| 66 |
+
Instruction Acc: 0.6850
|
| 67 |
+
Quality Acc: 0.6387
|
| 68 |
+
Average Acc: 0.6618
|
| 69 |
+
Total Loss: 1.8631
|
| 70 |
+
Instruction Loss: 1.6525
|
| 71 |
+
Quality Loss: 2.0736
|
| 72 |
+
============================================================
|
| 73 |
+
2026-01-24 21:47:29 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_199.pt (filtered to 38.584M trainable parameters)
|
| 74 |
+
2026-01-24 21:47:30 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_199.pt (575.2MB)
|
| 75 |
+
2026-01-24 21:47:30 | INFO | Best 2 checkpoints:
|
| 76 |
+
2026-01-24 21:47:30 | INFO | 1. Step 199: acc=0.6618 (reward_model.best_199.pt)
|
| 77 |
+
2026-01-24 21:47:30 | INFO | 2. Step 99: acc=0.6604 (reward_model.best_99.pt)
|
| 78 |
+
2026-01-24 21:47:31 | INFO | Step 200: loss=0.4360 | IF_loss=0.4299, MQ_loss=0.4421 | acc=0.833 (IF=0.812, MQ=0.854) | lr=0.000067
|
| 79 |
+
2026-01-24 21:49:25 | INFO |
|
| 80 |
+
============================================================
|
| 81 |
+
Validation Results (took 9.42s):
|
| 82 |
+
Samples: 346 instruction, 346 quality
|
| 83 |
+
Instruction Acc: 0.6850
|
| 84 |
+
Quality Acc: 0.6387
|
| 85 |
+
Average Acc: 0.6618
|
| 86 |
+
Total Loss: 1.8438
|
| 87 |
+
Instruction Loss: 1.6364
|
| 88 |
+
Quality Loss: 2.0512
|
| 89 |
+
============================================================
|
| 90 |
+
2026-01-24 21:49:25 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_299.pt (filtered to 38.584M trainable parameters)
|
| 91 |
+
2026-01-24 21:49:25 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_299.pt (575.2MB)
|
| 92 |
+
2026-01-24 21:49:25 | INFO | Best 3 checkpoints:
|
| 93 |
+
2026-01-24 21:49:25 | INFO | 1. Step 199: acc=0.6618 (reward_model.best_199.pt)
|
| 94 |
+
2026-01-24 21:49:25 | INFO | 2. Step 299: acc=0.6618 (reward_model.best_299.pt)
|
| 95 |
+
2026-01-24 21:49:25 | INFO | 3. Step 99: acc=0.6604 (reward_model.best_99.pt)
|
| 96 |
+
2026-01-24 21:49:26 | INFO | Step 300: loss=0.4121 | IF_loss=0.5007, MQ_loss=0.3235 | acc=0.844 (IF=0.792, MQ=0.896) | lr=0.000100
|
| 97 |
+
2026-01-24 21:51:23 | INFO |
|
| 98 |
+
============================================================
|
| 99 |
+
Validation Results (took 7.32s):
|
| 100 |
+
Samples: 346 instruction, 346 quality
|
| 101 |
+
Instruction Acc: 0.6850
|
| 102 |
+
Quality Acc: 0.6387
|
| 103 |
+
Average Acc: 0.6618
|
| 104 |
+
Total Loss: 1.8266
|
| 105 |
+
Instruction Loss: 1.6230
|
| 106 |
+
Quality Loss: 2.0303
|
| 107 |
+
============================================================
|
| 108 |
+
2026-01-24 21:51:23 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_399.pt (filtered to 38.584M trainable parameters)
|
| 109 |
+
2026-01-24 21:51:24 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_399.pt (575.2MB)
|
| 110 |
+
2026-01-24 21:51:24 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_99.pt
|
| 111 |
+
2026-01-24 21:51:24 | INFO | Best 3 checkpoints:
|
| 112 |
+
2026-01-24 21:51:24 | INFO | 1. Step 199: acc=0.6618 (reward_model.best_199.pt)
|
| 113 |
+
2026-01-24 21:51:24 | INFO | 2. Step 299: acc=0.6618 (reward_model.best_299.pt)
|
| 114 |
+
2026-01-24 21:51:24 | INFO | 3. Step 399: acc=0.6618 (reward_model.best_399.pt)
|
| 115 |
+
2026-01-24 21:51:25 | INFO | Step 400: loss=0.4819 | IF_loss=0.4988, MQ_loss=0.4650 | acc=0.760 (IF=0.708, MQ=0.812) | lr=0.000100
|
| 116 |
+
2026-01-24 21:53:18 | INFO |
|
| 117 |
+
============================================================
|
| 118 |
+
Validation Results (took 8.30s):
|
| 119 |
+
Samples: 346 instruction, 346 quality
|
| 120 |
+
Instruction Acc: 0.6821
|
| 121 |
+
Quality Acc: 0.6416
|
| 122 |
+
Average Acc: 0.6618
|
| 123 |
+
Total Loss: 1.8103
|
| 124 |
+
Instruction Loss: 1.6100
|
| 125 |
+
Quality Loss: 2.0107
|
| 126 |
+
============================================================
|
| 127 |
+
2026-01-24 21:53:18 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_499.pt (filtered to 38.584M trainable parameters)
|
| 128 |
+
2026-01-24 21:53:19 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_499.pt (575.2MB)
|
| 129 |
+
2026-01-24 21:53:19 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_499.pt
|
| 130 |
+
2026-01-24 21:53:19 | INFO | Best 3 checkpoints:
|
| 131 |
+
2026-01-24 21:53:19 | INFO | 1. Step 199: acc=0.6618 (reward_model.best_199.pt)
|
| 132 |
+
2026-01-24 21:53:19 | INFO | 2. Step 299: acc=0.6618 (reward_model.best_299.pt)
|
| 133 |
+
2026-01-24 21:53:19 | INFO | 3. Step 399: acc=0.6618 (reward_model.best_399.pt)
|
| 134 |
+
2026-01-24 21:53:20 | INFO | Step 500: loss=0.4074 | IF_loss=0.4939, MQ_loss=0.3209 | acc=0.854 (IF=0.792, MQ=0.917) | lr=0.000099
|
| 135 |
+
2026-01-24 21:55:17 | INFO |
|
| 136 |
+
============================================================
|
| 137 |
+
Validation Results (took 7.55s):
|
| 138 |
+
Samples: 346 instruction, 346 quality
|
| 139 |
+
Instruction Acc: 0.6821
|
| 140 |
+
Quality Acc: 0.6416
|
| 141 |
+
Average Acc: 0.6618
|
| 142 |
+
Total Loss: 1.7951
|
| 143 |
+
Instruction Loss: 1.5986
|
| 144 |
+
Quality Loss: 1.9916
|
| 145 |
+
============================================================
|
| 146 |
+
2026-01-24 21:55:17 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_599.pt (filtered to 38.584M trainable parameters)
|
| 147 |
+
2026-01-24 21:55:17 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_599.pt (575.2MB)
|
| 148 |
+
2026-01-24 21:55:17 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_599.pt
|
| 149 |
+
2026-01-24 21:55:17 | INFO | Best 3 checkpoints:
|
| 150 |
+
2026-01-24 21:55:17 | INFO | 1. Step 199: acc=0.6618 (reward_model.best_199.pt)
|
| 151 |
+
2026-01-24 21:55:17 | INFO | 2. Step 299: acc=0.6618 (reward_model.best_299.pt)
|
| 152 |
+
2026-01-24 21:55:17 | INFO | 3. Step 399: acc=0.6618 (reward_model.best_399.pt)
|
| 153 |
+
2026-01-24 21:55:18 | INFO | Step 600: loss=0.3505 | IF_loss=0.3784, MQ_loss=0.3226 | acc=0.844 (IF=0.812, MQ=0.875) | lr=0.000098
|
| 154 |
+
2026-01-24 21:57:14 | INFO |
|
| 155 |
+
============================================================
|
| 156 |
+
Validation Results (took 7.89s):
|
| 157 |
+
Samples: 346 instruction, 346 quality
|
| 158 |
+
Instruction Acc: 0.6821
|
| 159 |
+
Quality Acc: 0.6445
|
| 160 |
+
Average Acc: 0.6633
|
| 161 |
+
Total Loss: 1.7807
|
| 162 |
+
Instruction Loss: 1.5876
|
| 163 |
+
Quality Loss: 1.9739
|
| 164 |
+
============================================================
|
| 165 |
+
2026-01-24 21:57:14 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_699.pt (filtered to 38.584M trainable parameters)
|
| 166 |
+
2026-01-24 21:57:14 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_699.pt (575.2MB)
|
| 167 |
+
2026-01-24 21:57:14 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_399.pt
|
| 168 |
+
2026-01-24 21:57:14 | INFO | Best 3 checkpoints:
|
| 169 |
+
2026-01-24 21:57:14 | INFO | 1. Step 699: acc=0.6633 (reward_model.best_699.pt)
|
| 170 |
+
2026-01-24 21:57:14 | INFO | 2. Step 199: acc=0.6618 (reward_model.best_199.pt)
|
| 171 |
+
2026-01-24 21:57:14 | INFO | 3. Step 299: acc=0.6618 (reward_model.best_299.pt)
|
| 172 |
+
2026-01-24 21:57:15 | INFO | Step 700: loss=0.2439 | IF_loss=0.3054, MQ_loss=0.1823 | acc=0.875 (IF=0.854, MQ=0.896) | lr=0.000097
|
| 173 |
+
2026-01-24 21:59:13 | INFO |
|
| 174 |
+
============================================================
|
| 175 |
+
Validation Results (took 7.71s):
|
| 176 |
+
Samples: 346 instruction, 346 quality
|
| 177 |
+
Instruction Acc: 0.6821
|
| 178 |
+
Quality Acc: 0.6474
|
| 179 |
+
Average Acc: 0.6647
|
| 180 |
+
Total Loss: 1.7686
|
| 181 |
+
Instruction Loss: 1.5780
|
| 182 |
+
Quality Loss: 1.9591
|
| 183 |
+
============================================================
|
| 184 |
+
2026-01-24 21:59:13 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_799.pt (filtered to 38.584M trainable parameters)
|
| 185 |
+
2026-01-24 21:59:13 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_799.pt (575.2MB)
|
| 186 |
+
2026-01-24 21:59:13 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_299.pt
|
| 187 |
+
2026-01-24 21:59:13 | INFO | Best 3 checkpoints:
|
| 188 |
+
2026-01-24 21:59:13 | INFO | 1. Step 799: acc=0.6647 (reward_model.best_799.pt)
|
| 189 |
+
2026-01-24 21:59:13 | INFO | 2. Step 699: acc=0.6633 (reward_model.best_699.pt)
|
| 190 |
+
2026-01-24 21:59:13 | INFO | 3. Step 199: acc=0.6618 (reward_model.best_199.pt)
|
| 191 |
+
2026-01-24 21:59:14 | INFO | Step 800: loss=0.2827 | IF_loss=0.3525, MQ_loss=0.2128 | acc=0.885 (IF=0.875, MQ=0.896) | lr=0.000096
|
| 192 |
+
2026-01-24 22:01:11 | INFO |
|
| 193 |
+
============================================================
|
| 194 |
+
Validation Results (took 7.05s):
|
| 195 |
+
Samples: 346 instruction, 346 quality
|
| 196 |
+
Instruction Acc: 0.6850
|
| 197 |
+
Quality Acc: 0.6474
|
| 198 |
+
Average Acc: 0.6662
|
| 199 |
+
Total Loss: 1.7570
|
| 200 |
+
Instruction Loss: 1.5693
|
| 201 |
+
Quality Loss: 1.9446
|
| 202 |
+
============================================================
|
| 203 |
+
2026-01-24 22:01:11 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_899.pt (filtered to 38.584M trainable parameters)
|
| 204 |
+
2026-01-24 22:01:12 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_899.pt (575.2MB)
|
| 205 |
+
2026-01-24 22:01:12 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_199.pt
|
| 206 |
+
2026-01-24 22:01:12 | INFO | Best 3 checkpoints:
|
| 207 |
+
2026-01-24 22:01:12 | INFO | 1. Step 899: acc=0.6662 (reward_model.best_899.pt)
|
| 208 |
+
2026-01-24 22:01:12 | INFO | 2. Step 799: acc=0.6647 (reward_model.best_799.pt)
|
| 209 |
+
2026-01-24 22:01:12 | INFO | 3. Step 699: acc=0.6633 (reward_model.best_699.pt)
|
| 210 |
+
2026-01-24 22:01:13 | INFO | Step 900: loss=0.1525 | IF_loss=0.1838, MQ_loss=0.1212 | acc=0.958 (IF=0.958, MQ=0.958) | lr=0.000094
|
| 211 |
+
2026-01-24 22:03:07 | INFO |
|
| 212 |
+
============================================================
|
| 213 |
+
Validation Results (took 7.74s):
|
| 214 |
+
Samples: 346 instruction, 346 quality
|
| 215 |
+
Instruction Acc: 0.6821
|
| 216 |
+
Quality Acc: 0.6474
|
| 217 |
+
Average Acc: 0.6647
|
| 218 |
+
Total Loss: 1.7472
|
| 219 |
+
Instruction Loss: 1.5625
|
| 220 |
+
Quality Loss: 1.9319
|
| 221 |
+
============================================================
|
| 222 |
+
2026-01-24 22:03:07 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_999.pt (filtered to 38.584M trainable parameters)
|
| 223 |
+
2026-01-24 22:03:08 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_999.pt (575.2MB)
|
| 224 |
+
2026-01-24 22:03:08 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_699.pt
|
| 225 |
+
2026-01-24 22:03:08 | INFO | Best 3 checkpoints:
|
| 226 |
+
2026-01-24 22:03:08 | INFO | 1. Step 899: acc=0.6662 (reward_model.best_899.pt)
|
| 227 |
+
2026-01-24 22:03:08 | INFO | 2. Step 799: acc=0.6647 (reward_model.best_799.pt)
|
| 228 |
+
2026-01-24 22:03:08 | INFO | 3. Step 999: acc=0.6647 (reward_model.best_999.pt)
|
| 229 |
+
2026-01-24 22:03:09 | INFO | Step 1000: loss=0.1671 | IF_loss=0.1673, MQ_loss=0.1668 | acc=0.969 (IF=0.979, MQ=0.958) | lr=0.000091
|
| 230 |
+
2026-01-24 22:05:04 | INFO |
|
| 231 |
+
============================================================
|
| 232 |
+
Validation Results (took 6.94s):
|
| 233 |
+
Samples: 346 instruction, 346 quality
|
| 234 |
+
Instruction Acc: 0.6850
|
| 235 |
+
Quality Acc: 0.6474
|
| 236 |
+
Average Acc: 0.6662
|
| 237 |
+
Total Loss: 1.7380
|
| 238 |
+
Instruction Loss: 1.5555
|
| 239 |
+
Quality Loss: 1.9205
|
| 240 |
+
============================================================
|
| 241 |
+
2026-01-24 22:05:04 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1099.pt (filtered to 38.584M trainable parameters)
|
| 242 |
+
2026-01-24 22:05:04 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1099.pt (575.2MB)
|
| 243 |
+
2026-01-24 22:05:04 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_999.pt
|
| 244 |
+
2026-01-24 22:05:04 | INFO | Best 3 checkpoints:
|
| 245 |
+
2026-01-24 22:05:04 | INFO | 1. Step 899: acc=0.6662 (reward_model.best_899.pt)
|
| 246 |
+
2026-01-24 22:05:04 | INFO | 2. Step 1099: acc=0.6662 (reward_model.best_1099.pt)
|
| 247 |
+
2026-01-24 22:05:04 | INFO | 3. Step 799: acc=0.6647 (reward_model.best_799.pt)
|
| 248 |
+
2026-01-24 22:05:05 | INFO | Step 1100: loss=0.1267 | IF_loss=0.1381, MQ_loss=0.1154 | acc=0.948 (IF=0.958, MQ=0.938) | lr=0.000089
|
| 249 |
+
2026-01-24 22:07:02 | INFO |
|
| 250 |
+
============================================================
|
| 251 |
+
Validation Results (took 7.34s):
|
| 252 |
+
Samples: 346 instruction, 346 quality
|
| 253 |
+
Instruction Acc: 0.6850
|
| 254 |
+
Quality Acc: 0.6416
|
| 255 |
+
Average Acc: 0.6633
|
| 256 |
+
Total Loss: 1.7320
|
| 257 |
+
Instruction Loss: 1.5520
|
| 258 |
+
Quality Loss: 1.9119
|
| 259 |
+
============================================================
|
| 260 |
+
2026-01-24 22:07:02 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1199.pt (filtered to 38.584M trainable parameters)
|
| 261 |
+
2026-01-24 22:07:03 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1199.pt (575.2MB)
|
| 262 |
+
2026-01-24 22:07:03 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1199.pt
|
| 263 |
+
2026-01-24 22:07:03 | INFO | Best 3 checkpoints:
|
| 264 |
+
2026-01-24 22:07:03 | INFO | 1. Step 899: acc=0.6662 (reward_model.best_899.pt)
|
| 265 |
+
2026-01-24 22:07:03 | INFO | 2. Step 1099: acc=0.6662 (reward_model.best_1099.pt)
|
| 266 |
+
2026-01-24 22:07:03 | INFO | 3. Step 799: acc=0.6647 (reward_model.best_799.pt)
|
| 267 |
+
2026-01-24 22:07:04 | INFO | Step 1200: loss=0.1201 | IF_loss=0.1744, MQ_loss=0.0657 | acc=0.948 (IF=0.917, MQ=0.979) | lr=0.000086
|
| 268 |
+
2026-01-24 22:08:59 | INFO |
|
| 269 |
+
============================================================
|
| 270 |
+
Validation Results (took 7.61s):
|
| 271 |
+
Samples: 346 instruction, 346 quality
|
| 272 |
+
Instruction Acc: 0.6850
|
| 273 |
+
Quality Acc: 0.6416
|
| 274 |
+
Average Acc: 0.6633
|
| 275 |
+
Total Loss: 1.7259
|
| 276 |
+
Instruction Loss: 1.5481
|
| 277 |
+
Quality Loss: 1.9036
|
| 278 |
+
============================================================
|
| 279 |
+
2026-01-24 22:08:59 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1299.pt (filtered to 38.584M trainable parameters)
|
| 280 |
+
2026-01-24 22:09:00 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1299.pt (575.2MB)
|
| 281 |
+
2026-01-24 22:09:00 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1299.pt
|
| 282 |
+
2026-01-24 22:09:00 | INFO | Best 3 checkpoints:
|
| 283 |
+
2026-01-24 22:09:00 | INFO | 1. Step 899: acc=0.6662 (reward_model.best_899.pt)
|
| 284 |
+
2026-01-24 22:09:00 | INFO | 2. Step 1099: acc=0.6662 (reward_model.best_1099.pt)
|
| 285 |
+
2026-01-24 22:09:00 | INFO | 3. Step 799: acc=0.6647 (reward_model.best_799.pt)
|
| 286 |
+
2026-01-24 22:09:01 | INFO | Step 1300: loss=0.0937 | IF_loss=0.1357, MQ_loss=0.0516 | acc=0.958 (IF=0.938, MQ=0.979) | lr=0.000083
|
| 287 |
+
2026-01-24 22:10:53 | INFO |
|
| 288 |
+
============================================================
|
| 289 |
+
Validation Results (took 7.24s):
|
| 290 |
+
Samples: 346 instruction, 346 quality
|
| 291 |
+
Instruction Acc: 0.6850
|
| 292 |
+
Quality Acc: 0.6416
|
| 293 |
+
Average Acc: 0.6633
|
| 294 |
+
Total Loss: 1.7217
|
| 295 |
+
Instruction Loss: 1.5459
|
| 296 |
+
Quality Loss: 1.8975
|
| 297 |
+
============================================================
|
| 298 |
+
2026-01-24 22:10:53 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1399.pt (filtered to 38.584M trainable parameters)
|
| 299 |
+
2026-01-24 22:10:54 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1399.pt (575.2MB)
|
| 300 |
+
2026-01-24 22:10:54 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1399.pt
|
| 301 |
+
2026-01-24 22:10:54 | INFO | Best 3 checkpoints:
|
| 302 |
+
2026-01-24 22:10:54 | INFO | 1. Step 899: acc=0.6662 (reward_model.best_899.pt)
|
| 303 |
+
2026-01-24 22:10:54 | INFO | 2. Step 1099: acc=0.6662 (reward_model.best_1099.pt)
|
| 304 |
+
2026-01-24 22:10:54 | INFO | 3. Step 799: acc=0.6647 (reward_model.best_799.pt)
|
| 305 |
+
2026-01-24 22:10:55 | INFO | Step 1400: loss=0.0782 | IF_loss=0.1080, MQ_loss=0.0484 | acc=0.990 (IF=0.979, MQ=1.000) | lr=0.000080
|
| 306 |
+
2026-01-24 22:12:49 | INFO |
|
| 307 |
+
============================================================
|
| 308 |
+
Validation Results (took 7.28s):
|
| 309 |
+
Samples: 346 instruction, 346 quality
|
| 310 |
+
Instruction Acc: 0.6908
|
| 311 |
+
Quality Acc: 0.6416
|
| 312 |
+
Average Acc: 0.6662
|
| 313 |
+
Total Loss: 1.7182
|
| 314 |
+
Instruction Loss: 1.5441
|
| 315 |
+
Quality Loss: 1.8922
|
| 316 |
+
============================================================
|
| 317 |
+
2026-01-24 22:12:49 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1499.pt (filtered to 38.584M trainable parameters)
|
| 318 |
+
2026-01-24 22:12:49 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1499.pt (575.2MB)
|
| 319 |
+
2026-01-24 22:12:49 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_799.pt
|
| 320 |
+
2026-01-24 22:12:49 | INFO | Best 3 checkpoints:
|
| 321 |
+
2026-01-24 22:12:49 | INFO | 1. Step 899: acc=0.6662 (reward_model.best_899.pt)
|
| 322 |
+
2026-01-24 22:12:49 | INFO | 2. Step 1099: acc=0.6662 (reward_model.best_1099.pt)
|
| 323 |
+
2026-01-24 22:12:49 | INFO | 3. Step 1499: acc=0.6662 (reward_model.best_1499.pt)
|
| 324 |
+
2026-01-24 22:12:50 | INFO | Step 1500: loss=0.0699 | IF_loss=0.0703, MQ_loss=0.0695 | acc=0.979 (IF=0.979, MQ=0.979) | lr=0.000076
|
| 325 |
+
2026-01-24 22:14:41 | INFO |
|
| 326 |
+
============================================================
|
| 327 |
+
Validation Results (took 7.76s):
|
| 328 |
+
Samples: 346 instruction, 346 quality
|
| 329 |
+
Instruction Acc: 0.6908
|
| 330 |
+
Quality Acc: 0.6416
|
| 331 |
+
Average Acc: 0.6662
|
| 332 |
+
Total Loss: 1.7151
|
| 333 |
+
Instruction Loss: 1.5435
|
| 334 |
+
Quality Loss: 1.8867
|
| 335 |
+
============================================================
|
| 336 |
+
2026-01-24 22:14:41 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1599.pt (filtered to 38.584M trainable parameters)
|
| 337 |
+
2026-01-24 22:14:42 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1599.pt (575.2MB)
|
| 338 |
+
2026-01-24 22:14:42 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1599.pt
|
| 339 |
+
2026-01-24 22:14:42 | INFO | Best 3 checkpoints:
|
| 340 |
+
2026-01-24 22:14:42 | INFO | 1. Step 899: acc=0.6662 (reward_model.best_899.pt)
|
| 341 |
+
2026-01-24 22:14:42 | INFO | 2. Step 1099: acc=0.6662 (reward_model.best_1099.pt)
|
| 342 |
+
2026-01-24 22:14:42 | INFO | 3. Step 1499: acc=0.6662 (reward_model.best_1499.pt)
|
| 343 |
+
2026-01-24 22:14:46 | INFO | Step 1600: loss=0.0346 | IF_loss=0.0421, MQ_loss=0.0272 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000072
|
| 344 |
+
2026-01-24 22:16:34 | INFO |
|
| 345 |
+
============================================================
|
| 346 |
+
Validation Results (took 7.04s):
|
| 347 |
+
Samples: 346 instruction, 346 quality
|
| 348 |
+
Instruction Acc: 0.6908
|
| 349 |
+
Quality Acc: 0.6445
|
| 350 |
+
Average Acc: 0.6676
|
| 351 |
+
Total Loss: 1.7117
|
| 352 |
+
Instruction Loss: 1.5434
|
| 353 |
+
Quality Loss: 1.8800
|
| 354 |
+
============================================================
|
| 355 |
+
2026-01-24 22:16:34 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1699.pt (filtered to 38.584M trainable parameters)
|
| 356 |
+
2026-01-24 22:16:35 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1699.pt (575.2MB)
|
| 357 |
+
2026-01-24 22:16:35 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1499.pt
|
| 358 |
+
2026-01-24 22:16:35 | INFO | Best 3 checkpoints:
|
| 359 |
+
2026-01-24 22:16:35 | INFO | 1. Step 1699: acc=0.6676 (reward_model.best_1699.pt)
|
| 360 |
+
2026-01-24 22:16:35 | INFO | 2. Step 899: acc=0.6662 (reward_model.best_899.pt)
|
| 361 |
+
2026-01-24 22:16:35 | INFO | 3. Step 1099: acc=0.6662 (reward_model.best_1099.pt)
|
| 362 |
+
2026-01-24 22:16:36 | INFO | Step 1700: loss=0.0480 | IF_loss=0.0609, MQ_loss=0.0350 | acc=0.990 (IF=0.979, MQ=1.000) | lr=0.000069
|
| 363 |
+
2026-01-24 22:18:30 | INFO |
|
| 364 |
+
============================================================
|
| 365 |
+
Validation Results (took 7.06s):
|
| 366 |
+
Samples: 346 instruction, 346 quality
|
| 367 |
+
Instruction Acc: 0.6936
|
| 368 |
+
Quality Acc: 0.6445
|
| 369 |
+
Average Acc: 0.6691
|
| 370 |
+
Total Loss: 1.7110
|
| 371 |
+
Instruction Loss: 1.5436
|
| 372 |
+
Quality Loss: 1.8783
|
| 373 |
+
============================================================
|
| 374 |
+
2026-01-24 22:18:30 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1799.pt (filtered to 38.584M trainable parameters)
|
| 375 |
+
2026-01-24 22:18:30 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1799.pt (575.2MB)
|
| 376 |
+
2026-01-24 22:18:30 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1099.pt
|
| 377 |
+
2026-01-24 22:18:30 | INFO | Best 3 checkpoints:
|
| 378 |
+
2026-01-24 22:18:30 | INFO | 1. Step 1799: acc=0.6691 (reward_model.best_1799.pt)
|
| 379 |
+
2026-01-24 22:18:30 | INFO | 2. Step 1699: acc=0.6676 (reward_model.best_1699.pt)
|
| 380 |
+
2026-01-24 22:18:30 | INFO | 3. Step 899: acc=0.6662 (reward_model.best_899.pt)
|
| 381 |
+
2026-01-24 22:18:32 | INFO | Step 1800: loss=0.0316 | IF_loss=0.0473, MQ_loss=0.0159 | acc=0.990 (IF=0.979, MQ=1.000) | lr=0.000065
|
| 382 |
+
2026-01-24 22:20:24 | INFO |
|
| 383 |
+
============================================================
|
| 384 |
+
Validation Results (took 7.18s):
|
| 385 |
+
Samples: 346 instruction, 346 quality
|
| 386 |
+
Instruction Acc: 0.6908
|
| 387 |
+
Quality Acc: 0.6474
|
| 388 |
+
Average Acc: 0.6691
|
| 389 |
+
Total Loss: 1.7090
|
| 390 |
+
Instruction Loss: 1.5445
|
| 391 |
+
Quality Loss: 1.8734
|
| 392 |
+
============================================================
|
| 393 |
+
2026-01-24 22:20:24 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1899.pt (filtered to 38.584M trainable parameters)
|
| 394 |
+
2026-01-24 22:20:25 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1899.pt (575.2MB)
|
| 395 |
+
2026-01-24 22:20:25 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_899.pt
|
| 396 |
+
2026-01-24 22:20:25 | INFO | Best 3 checkpoints:
|
| 397 |
+
2026-01-24 22:20:25 | INFO | 1. Step 1799: acc=0.6691 (reward_model.best_1799.pt)
|
| 398 |
+
2026-01-24 22:20:25 | INFO | 2. Step 1899: acc=0.6691 (reward_model.best_1899.pt)
|
| 399 |
+
2026-01-24 22:20:25 | INFO | 3. Step 1699: acc=0.6676 (reward_model.best_1699.pt)
|
| 400 |
+
2026-01-24 22:20:26 | INFO | Step 1900: loss=0.0415 | IF_loss=0.0539, MQ_loss=0.0290 | acc=0.990 (IF=0.979, MQ=1.000) | lr=0.000061
|
| 401 |
+
2026-01-24 22:22:22 | INFO |
|
| 402 |
+
============================================================
|
| 403 |
+
Validation Results (took 7.27s):
|
| 404 |
+
Samples: 346 instruction, 346 quality
|
| 405 |
+
Instruction Acc: 0.6936
|
| 406 |
+
Quality Acc: 0.6474
|
| 407 |
+
Average Acc: 0.6705
|
| 408 |
+
Total Loss: 1.7083
|
| 409 |
+
Instruction Loss: 1.5455
|
| 410 |
+
Quality Loss: 1.8711
|
| 411 |
+
============================================================
|
| 412 |
+
2026-01-24 22:22:22 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1999.pt (filtered to 38.584M trainable parameters)
|
| 413 |
+
2026-01-24 22:22:22 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1999.pt (575.2MB)
|
| 414 |
+
2026-01-24 22:22:22 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1699.pt
|
| 415 |
+
2026-01-24 22:22:22 | INFO | Best 3 checkpoints:
|
| 416 |
+
2026-01-24 22:22:22 | INFO | 1. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 417 |
+
2026-01-24 22:22:22 | INFO | 2. Step 1799: acc=0.6691 (reward_model.best_1799.pt)
|
| 418 |
+
2026-01-24 22:22:22 | INFO | 3. Step 1899: acc=0.6691 (reward_model.best_1899.pt)
|
| 419 |
+
2026-01-24 22:22:23 | INFO | Step 2000: loss=0.0589 | IF_loss=0.0511, MQ_loss=0.0667 | acc=0.979 (IF=0.979, MQ=0.979) | lr=0.000056
|
| 420 |
+
2026-01-24 22:22:23 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.2000.pt (filtered to 38.584M trainable parameters)
|
| 421 |
+
2026-01-24 22:22:24 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.2000.pt (575.2MB)
|
| 422 |
+
2026-01-24 22:22:24 | INFO | Step 2000: Saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.2000.pt
|
| 423 |
+
2026-01-24 22:24:18 | INFO |
|
| 424 |
+
============================================================
|
| 425 |
+
Validation Results (took 7.25s):
|
| 426 |
+
Samples: 346 instruction, 346 quality
|
| 427 |
+
Instruction Acc: 0.6879
|
| 428 |
+
Quality Acc: 0.6474
|
| 429 |
+
Average Acc: 0.6676
|
| 430 |
+
Total Loss: 1.7086
|
| 431 |
+
Instruction Loss: 1.5472
|
| 432 |
+
Quality Loss: 1.8700
|
| 433 |
+
============================================================
|
| 434 |
+
2026-01-24 22:24:18 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2099.pt (filtered to 38.584M trainable parameters)
|
| 435 |
+
2026-01-24 22:24:19 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2099.pt (575.2MB)
|
| 436 |
+
2026-01-24 22:24:19 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2099.pt
|
| 437 |
+
2026-01-24 22:24:19 | INFO | Best 3 checkpoints:
|
| 438 |
+
2026-01-24 22:24:19 | INFO | 1. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 439 |
+
2026-01-24 22:24:19 | INFO | 2. Step 1799: acc=0.6691 (reward_model.best_1799.pt)
|
| 440 |
+
2026-01-24 22:24:19 | INFO | 3. Step 1899: acc=0.6691 (reward_model.best_1899.pt)
|
| 441 |
+
2026-01-24 22:24:20 | INFO | Step 2100: loss=0.0284 | IF_loss=0.0286, MQ_loss=0.0281 | acc=0.990 (IF=1.000, MQ=0.979) | lr=0.000052
|
| 442 |
+
2026-01-24 22:26:12 | INFO |
|
| 443 |
+
============================================================
|
| 444 |
+
Validation Results (took 7.00s):
|
| 445 |
+
Samples: 346 instruction, 346 quality
|
| 446 |
+
Instruction Acc: 0.6879
|
| 447 |
+
Quality Acc: 0.6503
|
| 448 |
+
Average Acc: 0.6691
|
| 449 |
+
Total Loss: 1.7083
|
| 450 |
+
Instruction Loss: 1.5495
|
| 451 |
+
Quality Loss: 1.8672
|
| 452 |
+
============================================================
|
| 453 |
+
2026-01-24 22:26:12 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2199.pt (filtered to 38.584M trainable parameters)
|
| 454 |
+
2026-01-24 22:26:13 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2199.pt (575.2MB)
|
| 455 |
+
2026-01-24 22:26:13 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1899.pt
|
| 456 |
+
2026-01-24 22:26:13 | INFO | Best 3 checkpoints:
|
| 457 |
+
2026-01-24 22:26:13 | INFO | 1. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 458 |
+
2026-01-24 22:26:13 | INFO | 2. Step 2199: acc=0.6691 (reward_model.best_2199.pt)
|
| 459 |
+
2026-01-24 22:26:13 | INFO | 3. Step 1799: acc=0.6691 (reward_model.best_1799.pt)
|
| 460 |
+
2026-01-24 22:26:14 | INFO | Step 2200: loss=0.0061 | IF_loss=0.0038, MQ_loss=0.0085 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000048
|
| 461 |
+
2026-01-24 22:28:05 | INFO |
|
| 462 |
+
============================================================
|
| 463 |
+
Validation Results (took 7.64s):
|
| 464 |
+
Samples: 346 instruction, 346 quality
|
| 465 |
+
Instruction Acc: 0.6879
|
| 466 |
+
Quality Acc: 0.6532
|
| 467 |
+
Average Acc: 0.6705
|
| 468 |
+
Total Loss: 1.7088
|
| 469 |
+
Instruction Loss: 1.5525
|
| 470 |
+
Quality Loss: 1.8651
|
| 471 |
+
============================================================
|
| 472 |
+
2026-01-24 22:28:05 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2299.pt (filtered to 38.584M trainable parameters)
|
| 473 |
+
2026-01-24 22:28:05 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2299.pt (575.2MB)
|
| 474 |
+
2026-01-24 22:28:05 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1799.pt
|
| 475 |
+
2026-01-24 22:28:05 | INFO | Best 3 checkpoints:
|
| 476 |
+
2026-01-24 22:28:05 | INFO | 1. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 477 |
+
2026-01-24 22:28:05 | INFO | 2. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
|
| 478 |
+
2026-01-24 22:28:05 | INFO | 3. Step 2199: acc=0.6691 (reward_model.best_2199.pt)
|
| 479 |
+
2026-01-24 22:28:06 | INFO | Step 2300: loss=0.0451 | IF_loss=0.0768, MQ_loss=0.0134 | acc=0.990 (IF=0.979, MQ=1.000) | lr=0.000044
|
| 480 |
+
2026-01-24 22:30:00 | INFO |
|
| 481 |
+
============================================================
|
| 482 |
+
Validation Results (took 7.30s):
|
| 483 |
+
Samples: 346 instruction, 346 quality
|
| 484 |
+
Instruction Acc: 0.6908
|
| 485 |
+
Quality Acc: 0.6532
|
| 486 |
+
Average Acc: 0.6720
|
| 487 |
+
Total Loss: 1.7079
|
| 488 |
+
Instruction Loss: 1.5530
|
| 489 |
+
Quality Loss: 1.8628
|
| 490 |
+
============================================================
|
| 491 |
+
2026-01-24 22:30:00 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2399.pt (filtered to 38.584M trainable parameters)
|
| 492 |
+
2026-01-24 22:30:01 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2399.pt (575.2MB)
|
| 493 |
+
2026-01-24 22:30:01 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2199.pt
|
| 494 |
+
2026-01-24 22:30:01 | INFO | Best 3 checkpoints:
|
| 495 |
+
2026-01-24 22:30:01 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
|
| 496 |
+
2026-01-24 22:30:01 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 497 |
+
2026-01-24 22:30:01 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
|
| 498 |
+
2026-01-24 22:30:02 | INFO | Step 2400: loss=0.0141 | IF_loss=0.0160, MQ_loss=0.0122 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000039
|
| 499 |
+
2026-01-24 22:31:52 | INFO |
|
| 500 |
+
============================================================
|
| 501 |
+
Validation Results (took 6.60s):
|
| 502 |
+
Samples: 346 instruction, 346 quality
|
| 503 |
+
Instruction Acc: 0.6879
|
| 504 |
+
Quality Acc: 0.6503
|
| 505 |
+
Average Acc: 0.6691
|
| 506 |
+
Total Loss: 1.7095
|
| 507 |
+
Instruction Loss: 1.5571
|
| 508 |
+
Quality Loss: 1.8619
|
| 509 |
+
============================================================
|
| 510 |
+
2026-01-24 22:31:53 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2499.pt (filtered to 38.584M trainable parameters)
|
| 511 |
+
2026-01-24 22:31:53 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2499.pt (575.2MB)
|
| 512 |
+
2026-01-24 22:31:53 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2499.pt
|
| 513 |
+
2026-01-24 22:31:53 | INFO | Best 3 checkpoints:
|
| 514 |
+
2026-01-24 22:31:53 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
|
| 515 |
+
2026-01-24 22:31:53 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 516 |
+
2026-01-24 22:31:53 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
|
| 517 |
+
2026-01-24 22:31:54 | INFO | Step 2500: loss=0.0073 | IF_loss=0.0109, MQ_loss=0.0036 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000035
|
| 518 |
+
2026-01-24 22:33:43 | INFO |
|
| 519 |
+
============================================================
|
| 520 |
+
Validation Results (took 6.93s):
|
| 521 |
+
Samples: 346 instruction, 346 quality
|
| 522 |
+
Instruction Acc: 0.6879
|
| 523 |
+
Quality Acc: 0.6503
|
| 524 |
+
Average Acc: 0.6691
|
| 525 |
+
Total Loss: 1.7093
|
| 526 |
+
Instruction Loss: 1.5586
|
| 527 |
+
Quality Loss: 1.8601
|
| 528 |
+
============================================================
|
| 529 |
+
2026-01-24 22:33:43 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2599.pt (filtered to 38.584M trainable parameters)
|
| 530 |
+
2026-01-24 22:33:43 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2599.pt (575.2MB)
|
| 531 |
+
2026-01-24 22:33:43 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2599.pt
|
| 532 |
+
2026-01-24 22:33:43 | INFO | Best 3 checkpoints:
|
| 533 |
+
2026-01-24 22:33:43 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
|
| 534 |
+
2026-01-24 22:33:43 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 535 |
+
2026-01-24 22:33:43 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
|
| 536 |
+
2026-01-24 22:33:44 | INFO | Step 2600: loss=0.0025 | IF_loss=0.0039, MQ_loss=0.0011 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000031
|
| 537 |
+
2026-01-24 22:35:39 | INFO |
|
| 538 |
+
============================================================
|
| 539 |
+
Validation Results (took 6.71s):
|
| 540 |
+
Samples: 346 instruction, 346 quality
|
| 541 |
+
Instruction Acc: 0.6879
|
| 542 |
+
Quality Acc: 0.6503
|
| 543 |
+
Average Acc: 0.6691
|
| 544 |
+
Total Loss: 1.7105
|
| 545 |
+
Instruction Loss: 1.5632
|
| 546 |
+
Quality Loss: 1.8577
|
| 547 |
+
============================================================
|
| 548 |
+
2026-01-24 22:35:39 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2699.pt (filtered to 38.584M trainable parameters)
|
| 549 |
+
2026-01-24 22:35:39 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2699.pt (575.2MB)
|
| 550 |
+
2026-01-24 22:35:39 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2699.pt
|
| 551 |
+
2026-01-24 22:35:39 | INFO | Best 3 checkpoints:
|
| 552 |
+
2026-01-24 22:35:39 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
|
| 553 |
+
2026-01-24 22:35:39 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 554 |
+
2026-01-24 22:35:39 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
|
| 555 |
+
2026-01-24 22:35:40 | INFO | Step 2700: loss=0.0285 | IF_loss=0.0436, MQ_loss=0.0134 | acc=0.990 (IF=0.979, MQ=1.000) | lr=0.000028
|
| 556 |
+
2026-01-24 22:37:31 | INFO |
|
| 557 |
+
============================================================
|
| 558 |
+
Validation Results (took 7.15s):
|
| 559 |
+
Samples: 346 instruction, 346 quality
|
| 560 |
+
Instruction Acc: 0.6850
|
| 561 |
+
Quality Acc: 0.6503
|
| 562 |
+
Average Acc: 0.6676
|
| 563 |
+
Total Loss: 1.7119
|
| 564 |
+
Instruction Loss: 1.5662
|
| 565 |
+
Quality Loss: 1.8576
|
| 566 |
+
============================================================
|
| 567 |
+
2026-01-24 22:37:31 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2799.pt (filtered to 38.584M trainable parameters)
|
| 568 |
+
2026-01-24 22:37:32 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2799.pt (575.2MB)
|
| 569 |
+
2026-01-24 22:37:32 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2799.pt
|
| 570 |
+
2026-01-24 22:37:32 | INFO | Best 3 checkpoints:
|
| 571 |
+
2026-01-24 22:37:32 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
|
| 572 |
+
2026-01-24 22:37:32 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 573 |
+
2026-01-24 22:37:32 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
|
| 574 |
+
2026-01-24 22:37:33 | INFO | Step 2800: loss=0.0054 | IF_loss=0.0086, MQ_loss=0.0023 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000024
|
| 575 |
+
2026-01-24 22:39:25 | INFO |
|
| 576 |
+
============================================================
|
| 577 |
+
Validation Results (took 6.52s):
|
| 578 |
+
Samples: 346 instruction, 346 quality
|
| 579 |
+
Instruction Acc: 0.6879
|
| 580 |
+
Quality Acc: 0.6503
|
| 581 |
+
Average Acc: 0.6691
|
| 582 |
+
Total Loss: 1.7105
|
| 583 |
+
Instruction Loss: 1.5670
|
| 584 |
+
Quality Loss: 1.8540
|
| 585 |
+
============================================================
|
| 586 |
+
2026-01-24 22:39:25 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2899.pt (filtered to 38.584M trainable parameters)
|
| 587 |
+
2026-01-24 22:39:26 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2899.pt (575.2MB)
|
| 588 |
+
2026-01-24 22:39:26 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2899.pt
|
| 589 |
+
2026-01-24 22:39:26 | INFO | Best 3 checkpoints:
|
| 590 |
+
2026-01-24 22:39:26 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
|
| 591 |
+
2026-01-24 22:39:26 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 592 |
+
2026-01-24 22:39:26 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
|
| 593 |
+
2026-01-24 22:39:27 | INFO | Step 2900: loss=0.0121 | IF_loss=0.0158, MQ_loss=0.0084 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000020
|
| 594 |
+
2026-01-24 22:41:17 | INFO |
|
| 595 |
+
============================================================
|
| 596 |
+
Validation Results (took 7.44s):
|
| 597 |
+
Samples: 346 instruction, 346 quality
|
| 598 |
+
Instruction Acc: 0.6879
|
| 599 |
+
Quality Acc: 0.6503
|
| 600 |
+
Average Acc: 0.6691
|
| 601 |
+
Total Loss: 1.7130
|
| 602 |
+
Instruction Loss: 1.5717
|
| 603 |
+
Quality Loss: 1.8543
|
| 604 |
+
============================================================
|
| 605 |
+
2026-01-24 22:41:17 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2999.pt (filtered to 38.584M trainable parameters)
|
| 606 |
+
2026-01-24 22:41:17 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2999.pt (575.2MB)
|
| 607 |
+
2026-01-24 22:41:18 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2999.pt
|
| 608 |
+
2026-01-24 22:41:18 | INFO | Best 3 checkpoints:
|
| 609 |
+
2026-01-24 22:41:18 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
|
| 610 |
+
2026-01-24 22:41:18 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 611 |
+
2026-01-24 22:41:18 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
|
| 612 |
+
2026-01-24 22:41:19 | INFO | Step 3000: loss=0.0040 | IF_loss=0.0024, MQ_loss=0.0055 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000017
|
| 613 |
+
2026-01-24 22:43:12 | INFO |
|
| 614 |
+
============================================================
|
| 615 |
+
Validation Results (took 6.84s):
|
| 616 |
+
Samples: 346 instruction, 346 quality
|
| 617 |
+
Instruction Acc: 0.6908
|
| 618 |
+
Quality Acc: 0.6503
|
| 619 |
+
Average Acc: 0.6705
|
| 620 |
+
Total Loss: 1.7137
|
| 621 |
+
Instruction Loss: 1.5743
|
| 622 |
+
Quality Loss: 1.8532
|
| 623 |
+
============================================================
|
| 624 |
+
2026-01-24 22:43:12 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3099.pt (filtered to 38.584M trainable parameters)
|
| 625 |
+
2026-01-24 22:43:12 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3099.pt (575.2MB)
|
| 626 |
+
2026-01-24 22:43:12 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3099.pt
|
| 627 |
+
2026-01-24 22:43:12 | INFO | Best 3 checkpoints:
|
| 628 |
+
2026-01-24 22:43:12 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
|
| 629 |
+
2026-01-24 22:43:12 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 630 |
+
2026-01-24 22:43:12 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
|
| 631 |
+
2026-01-24 22:43:13 | INFO | Step 3100: loss=0.0095 | IF_loss=0.0161, MQ_loss=0.0029 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000014
|
| 632 |
+
2026-01-24 22:45:04 | INFO |
|
| 633 |
+
============================================================
|
| 634 |
+
Validation Results (took 7.66s):
|
| 635 |
+
Samples: 346 instruction, 346 quality
|
| 636 |
+
Instruction Acc: 0.6879
|
| 637 |
+
Quality Acc: 0.6503
|
| 638 |
+
Average Acc: 0.6691
|
| 639 |
+
Total Loss: 1.7135
|
| 640 |
+
Instruction Loss: 1.5760
|
| 641 |
+
Quality Loss: 1.8510
|
| 642 |
+
============================================================
|
| 643 |
+
2026-01-24 22:45:04 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3199.pt (filtered to 38.584M trainable parameters)
|
| 644 |
+
2026-01-24 22:45:04 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3199.pt (575.2MB)
|
| 645 |
+
2026-01-24 22:45:04 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3199.pt
|
| 646 |
+
2026-01-24 22:45:04 | INFO | Best 3 checkpoints:
|
| 647 |
+
2026-01-24 22:45:04 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
|
| 648 |
+
2026-01-24 22:45:04 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 649 |
+
2026-01-24 22:45:04 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
|
| 650 |
+
2026-01-24 22:45:08 | INFO | Step 3200: loss=0.0050 | IF_loss=0.0072, MQ_loss=0.0027 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000011
|
| 651 |
+
2026-01-24 22:46:57 | INFO |
|
| 652 |
+
============================================================
|
| 653 |
+
Validation Results (took 6.84s):
|
| 654 |
+
Samples: 346 instruction, 346 quality
|
| 655 |
+
Instruction Acc: 0.6879
|
| 656 |
+
Quality Acc: 0.6503
|
| 657 |
+
Average Acc: 0.6691
|
| 658 |
+
Total Loss: 1.7154
|
| 659 |
+
Instruction Loss: 1.5809
|
| 660 |
+
Quality Loss: 1.8499
|
| 661 |
+
============================================================
|
| 662 |
+
2026-01-24 22:46:57 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3299.pt (filtered to 38.584M trainable parameters)
|
| 663 |
+
2026-01-24 22:46:57 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3299.pt (575.2MB)
|
| 664 |
+
2026-01-24 22:46:57 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3299.pt
|
| 665 |
+
2026-01-24 22:46:57 | INFO | Best 3 checkpoints:
|
| 666 |
+
2026-01-24 22:46:57 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
|
| 667 |
+
2026-01-24 22:46:57 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 668 |
+
2026-01-24 22:46:57 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
|
| 669 |
+
2026-01-24 22:46:58 | INFO | Step 3300: loss=0.0362 | IF_loss=0.0503, MQ_loss=0.0221 | acc=0.990 (IF=0.979, MQ=1.000) | lr=0.000009
|
| 670 |
+
2026-01-24 22:48:50 | INFO |
|
| 671 |
+
============================================================
|
| 672 |
+
Validation Results (took 6.83s):
|
| 673 |
+
Samples: 346 instruction, 346 quality
|
| 674 |
+
Instruction Acc: 0.6879
|
| 675 |
+
Quality Acc: 0.6532
|
| 676 |
+
Average Acc: 0.6705
|
| 677 |
+
Total Loss: 1.7154
|
| 678 |
+
Instruction Loss: 1.5832
|
| 679 |
+
Quality Loss: 1.8477
|
| 680 |
+
============================================================
|
| 681 |
+
2026-01-24 22:48:50 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3399.pt (filtered to 38.584M trainable parameters)
|
| 682 |
+
2026-01-24 22:48:51 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3399.pt (575.2MB)
|
| 683 |
+
2026-01-24 22:48:51 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3399.pt
|
| 684 |
+
2026-01-24 22:48:51 | INFO | Best 3 checkpoints:
|
| 685 |
+
2026-01-24 22:48:51 | INFO | 1. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
|
| 686 |
+
2026-01-24 22:48:51 | INFO | 2. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 687 |
+
2026-01-24 22:48:51 | INFO | 3. Step 2299: acc=0.6705 (reward_model.best_2299.pt)
|
| 688 |
+
2026-01-24 22:48:52 | INFO | Step 3400: loss=0.0082 | IF_loss=0.0113, MQ_loss=0.0051 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000006
|
| 689 |
+
2026-01-24 22:50:41 | INFO |
|
| 690 |
+
============================================================
|
| 691 |
+
Validation Results (took 7.19s):
|
| 692 |
+
Samples: 346 instruction, 346 quality
|
| 693 |
+
Instruction Acc: 0.6908
|
| 694 |
+
Quality Acc: 0.6590
|
| 695 |
+
Average Acc: 0.6749
|
| 696 |
+
Total Loss: 1.7151
|
| 697 |
+
Instruction Loss: 1.5847
|
| 698 |
+
Quality Loss: 1.8456
|
| 699 |
+
============================================================
|
| 700 |
+
2026-01-24 22:50:41 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3499.pt (filtered to 38.584M trainable parameters)
|
| 701 |
+
2026-01-24 22:50:41 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3499.pt (575.2MB)
|
| 702 |
+
2026-01-24 22:50:41 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2299.pt
|
| 703 |
+
2026-01-24 22:50:41 | INFO | Best 3 checkpoints:
|
| 704 |
+
2026-01-24 22:50:41 | INFO | 1. Step 3499: acc=0.6749 (reward_model.best_3499.pt)
|
| 705 |
+
2026-01-24 22:50:41 | INFO | 2. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
|
| 706 |
+
2026-01-24 22:50:41 | INFO | 3. Step 1999: acc=0.6705 (reward_model.best_1999.pt)
|
| 707 |
+
2026-01-24 22:50:42 | INFO | Step 3500: loss=0.0045 | IF_loss=0.0077, MQ_loss=0.0013 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000005
|
| 708 |
+
2026-01-24 22:52:33 | INFO |
|
| 709 |
+
============================================================
|
| 710 |
+
Validation Results (took 7.08s):
|
| 711 |
+
Samples: 346 instruction, 346 quality
|
| 712 |
+
Instruction Acc: 0.6879
|
| 713 |
+
Quality Acc: 0.6590
|
| 714 |
+
Average Acc: 0.6734
|
| 715 |
+
Total Loss: 1.7160
|
| 716 |
+
Instruction Loss: 1.5876
|
| 717 |
+
Quality Loss: 1.8445
|
| 718 |
+
============================================================
|
| 719 |
+
2026-01-24 22:52:33 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3599.pt (filtered to 38.584M trainable parameters)
|
| 720 |
+
2026-01-24 22:52:34 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3599.pt (575.2MB)
|
| 721 |
+
2026-01-24 22:52:34 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_1999.pt
|
| 722 |
+
2026-01-24 22:52:34 | INFO | Best 3 checkpoints:
|
| 723 |
+
2026-01-24 22:52:34 | INFO | 1. Step 3499: acc=0.6749 (reward_model.best_3499.pt)
|
| 724 |
+
2026-01-24 22:52:34 | INFO | 2. Step 3599: acc=0.6734 (reward_model.best_3599.pt)
|
| 725 |
+
2026-01-24 22:52:34 | INFO | 3. Step 2399: acc=0.6720 (reward_model.best_2399.pt)
|
| 726 |
+
2026-01-24 22:52:35 | INFO | Step 3600: loss=0.0126 | IF_loss=0.0220, MQ_loss=0.0031 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000003
|
| 727 |
+
2026-01-24 22:54:24 | INFO |
|
| 728 |
+
============================================================
|
| 729 |
+
Validation Results (took 7.20s):
|
| 730 |
+
Samples: 346 instruction, 346 quality
|
| 731 |
+
Instruction Acc: 0.6879
|
| 732 |
+
Quality Acc: 0.6590
|
| 733 |
+
Average Acc: 0.6734
|
| 734 |
+
Total Loss: 1.7161
|
| 735 |
+
Instruction Loss: 1.5894
|
| 736 |
+
Quality Loss: 1.8428
|
| 737 |
+
============================================================
|
| 738 |
+
2026-01-24 22:54:24 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3699.pt (filtered to 38.584M trainable parameters)
|
| 739 |
+
2026-01-24 22:54:24 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3699.pt (575.2MB)
|
| 740 |
+
2026-01-24 22:54:24 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_2399.pt
|
| 741 |
+
2026-01-24 22:54:24 | INFO | Best 3 checkpoints:
|
| 742 |
+
2026-01-24 22:54:24 | INFO | 1. Step 3499: acc=0.6749 (reward_model.best_3499.pt)
|
| 743 |
+
2026-01-24 22:54:24 | INFO | 2. Step 3599: acc=0.6734 (reward_model.best_3599.pt)
|
| 744 |
+
2026-01-24 22:54:24 | INFO | 3. Step 3699: acc=0.6734 (reward_model.best_3699.pt)
|
| 745 |
+
2026-01-24 22:54:25 | INFO | Step 3700: loss=0.0085 | IF_loss=0.0041, MQ_loss=0.0130 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000002
|
| 746 |
+
2026-01-24 22:56:18 | INFO |
|
| 747 |
+
============================================================
|
| 748 |
+
Validation Results (took 6.85s):
|
| 749 |
+
Samples: 346 instruction, 346 quality
|
| 750 |
+
Instruction Acc: 0.6879
|
| 751 |
+
Quality Acc: 0.6618
|
| 752 |
+
Average Acc: 0.6749
|
| 753 |
+
Total Loss: 1.7157
|
| 754 |
+
Instruction Loss: 1.5912
|
| 755 |
+
Quality Loss: 1.8403
|
| 756 |
+
============================================================
|
| 757 |
+
2026-01-24 22:56:18 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3799.pt (filtered to 38.584M trainable parameters)
|
| 758 |
+
2026-01-24 22:56:19 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3799.pt (575.2MB)
|
| 759 |
+
2026-01-24 22:56:19 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3699.pt
|
| 760 |
+
2026-01-24 22:56:19 | INFO | Best 3 checkpoints:
|
| 761 |
+
2026-01-24 22:56:19 | INFO | 1. Step 3499: acc=0.6749 (reward_model.best_3499.pt)
|
| 762 |
+
2026-01-24 22:56:19 | INFO | 2. Step 3799: acc=0.6749 (reward_model.best_3799.pt)
|
| 763 |
+
2026-01-24 22:56:19 | INFO | 3. Step 3599: acc=0.6734 (reward_model.best_3599.pt)
|
| 764 |
+
2026-01-24 22:56:20 | INFO | Step 3800: loss=0.0120 | IF_loss=0.0037, MQ_loss=0.0202 | acc=0.990 (IF=1.000, MQ=0.979) | lr=0.000001
|
| 765 |
+
2026-01-24 22:58:09 | INFO |
|
| 766 |
+
============================================================
|
| 767 |
+
Validation Results (took 7.39s):
|
| 768 |
+
Samples: 346 instruction, 346 quality
|
| 769 |
+
Instruction Acc: 0.6908
|
| 770 |
+
Quality Acc: 0.6590
|
| 771 |
+
Average Acc: 0.6749
|
| 772 |
+
Total Loss: 1.7163
|
| 773 |
+
Instruction Loss: 1.5935
|
| 774 |
+
Quality Loss: 1.8391
|
| 775 |
+
============================================================
|
| 776 |
+
2026-01-24 22:58:09 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3899.pt (filtered to 38.584M trainable parameters)
|
| 777 |
+
2026-01-24 22:58:10 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3899.pt (575.2MB)
|
| 778 |
+
2026-01-24 22:58:10 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3599.pt
|
| 779 |
+
2026-01-24 22:58:10 | INFO | Best 3 checkpoints:
|
| 780 |
+
2026-01-24 22:58:10 | INFO | 1. Step 3499: acc=0.6749 (reward_model.best_3499.pt)
|
| 781 |
+
2026-01-24 22:58:10 | INFO | 2. Step 3799: acc=0.6749 (reward_model.best_3799.pt)
|
| 782 |
+
2026-01-24 22:58:10 | INFO | 3. Step 3899: acc=0.6749 (reward_model.best_3899.pt)
|
| 783 |
+
2026-01-24 22:58:11 | INFO | Step 3900: loss=0.0060 | IF_loss=0.0040, MQ_loss=0.0080 | acc=1.000 (IF=1.000, MQ=1.000) | lr=0.000000
|
| 784 |
+
2026-01-24 23:00:02 | INFO |
|
| 785 |
+
============================================================
|
| 786 |
+
Validation Results (took 6.60s):
|
| 787 |
+
Samples: 346 instruction, 346 quality
|
| 788 |
+
Instruction Acc: 0.6908
|
| 789 |
+
Quality Acc: 0.6590
|
| 790 |
+
Average Acc: 0.6749
|
| 791 |
+
Total Loss: 1.7165
|
| 792 |
+
Instruction Loss: 1.5967
|
| 793 |
+
Quality Loss: 1.8363
|
| 794 |
+
============================================================
|
| 795 |
+
2026-01-24 23:00:02 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3999.pt (filtered to 38.584M trainable parameters)
|
| 796 |
+
2026-01-24 23:00:02 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3999.pt (575.2MB)
|
| 797 |
+
2026-01-24 23:00:02 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2143/ckpt/reward_model.best_3999.pt
|
| 798 |
+
2026-01-24 23:00:02 | INFO | Best 3 checkpoints:
|
| 799 |
+
2026-01-24 23:00:02 | INFO | 1. Step 3499: acc=0.6749 (reward_model.best_3499.pt)
|
| 800 |
+
2026-01-24 23:00:02 | INFO | 2. Step 3799: acc=0.6749 (reward_model.best_3799.pt)
|
| 801 |
+
2026-01-24 23:00:02 | INFO | 3. Step 3899: acc=0.6749 (reward_model.best_3899.pt)
|
| 802 |
+
2026-01-24 23:00:02 | INFO | Training complete!
|
| 803 |
+
2026-01-24 23:00:02 | INFO | Training complete!
|
20260124_2354/config.yaml
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DEVICES: '3'
|
| 2 |
+
accelerate:
|
| 3 |
+
mixed_precision: bf16
|
| 4 |
+
basics:
|
| 5 |
+
random_seed: 42
|
| 6 |
+
save_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model
|
| 7 |
+
dataset:
|
| 8 |
+
audio_dropout:
|
| 9 |
+
apply_to_eval: false
|
| 10 |
+
apply_to_ref: true
|
| 11 |
+
enabled: true
|
| 12 |
+
eval_only_on_training: true
|
| 13 |
+
max_duration: 1500
|
| 14 |
+
min_duration: 200
|
| 15 |
+
train_mode: start
|
| 16 |
+
cache_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/tmp
|
| 17 |
+
db_path: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/backend/database.db
|
| 18 |
+
duration: 600.0
|
| 19 |
+
embedding_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/supervised_embeddings
|
| 20 |
+
max_samples: null
|
| 21 |
+
max_val_samples: null
|
| 22 |
+
metadata_jsonl: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/all_comparisons.jsonl
|
| 23 |
+
mode: raw_text_frozen_audio
|
| 24 |
+
preference_file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/human_annotations/train.json
|
| 25 |
+
sample_rate: 24000
|
| 26 |
+
val_preference_file: null
|
| 27 |
+
loss:
|
| 28 |
+
IF_ratio: 0.5
|
| 29 |
+
filter_ties: true
|
| 30 |
+
label_smoothing: 0.0
|
| 31 |
+
reduction: mean
|
| 32 |
+
model:
|
| 33 |
+
attention_mode: SA
|
| 34 |
+
attn_dropout: 0.0
|
| 35 |
+
category_embeddings: null
|
| 36 |
+
dim: 768
|
| 37 |
+
dim_head: 64
|
| 38 |
+
downsample:
|
| 39 |
+
configs:
|
| 40 |
+
conv2_4x:
|
| 41 |
+
factor: 4
|
| 42 |
+
kernel_size: 5
|
| 43 |
+
kind: conv*2
|
| 44 |
+
use_layernorm: true
|
| 45 |
+
conv_4x:
|
| 46 |
+
factor: 4
|
| 47 |
+
kernel_size: 5
|
| 48 |
+
kind: conv
|
| 49 |
+
stage: 1
|
| 50 |
+
use_layernorm: true
|
| 51 |
+
glu_4x:
|
| 52 |
+
factor: 4
|
| 53 |
+
kernel_size: 5
|
| 54 |
+
kind: gluconv*2+pw
|
| 55 |
+
use_layernorm: true
|
| 56 |
+
mean:
|
| 57 |
+
factor: 2
|
| 58 |
+
kind: mean
|
| 59 |
+
mean_4x:
|
| 60 |
+
dropout: 0.0
|
| 61 |
+
factor: 30
|
| 62 |
+
kind: mean+mlp
|
| 63 |
+
mlp_ratio: 2.0
|
| 64 |
+
none:
|
| 65 |
+
factor: 1
|
| 66 |
+
kind: none
|
| 67 |
+
eval: mean_4x
|
| 68 |
+
ref: null
|
| 69 |
+
text: none
|
| 70 |
+
ff_dropout: 0.0
|
| 71 |
+
ff_mult: 4
|
| 72 |
+
freeze_audio: true
|
| 73 |
+
freeze_text: true
|
| 74 |
+
gradient_checkpointing: false
|
| 75 |
+
heads: 8
|
| 76 |
+
joint_tf_depth: 1
|
| 77 |
+
load_config:
|
| 78 |
+
checkpoint_path: null
|
| 79 |
+
frozen_from_pretrained: true
|
| 80 |
+
pretrained_name: OpenMuQ/MuQ-MuLan-large
|
| 81 |
+
strict: false
|
| 82 |
+
mlp_dim: 768
|
| 83 |
+
mode: concat_text_late
|
| 84 |
+
model_name: OpenMuQ/MuQ-MuLan-large
|
| 85 |
+
name: reward
|
| 86 |
+
no_condition: false
|
| 87 |
+
null_embedding:
|
| 88 |
+
audio:
|
| 89 |
+
dropout: 0.5
|
| 90 |
+
length: 10
|
| 91 |
+
lyrics:
|
| 92 |
+
dropout: 0.3
|
| 93 |
+
length: 10
|
| 94 |
+
text:
|
| 95 |
+
dropout: 0
|
| 96 |
+
length: 10
|
| 97 |
+
output_dim: 2
|
| 98 |
+
prompt_tf_depth: 4
|
| 99 |
+
sr: 24000
|
| 100 |
+
text_encoder:
|
| 101 |
+
name: muq_mulan
|
| 102 |
+
tune: null
|
| 103 |
+
text_lora_config: null
|
| 104 |
+
train_muq_depth: 0
|
| 105 |
+
train_muqmulan: false
|
| 106 |
+
use_audio: true
|
| 107 |
+
use_layer_idx: -1
|
| 108 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 109 |
+
run_name: null
|
| 110 |
+
train:
|
| 111 |
+
batch_size: 48
|
| 112 |
+
betas:
|
| 113 |
+
- 0.9
|
| 114 |
+
- 0.99
|
| 115 |
+
ema_decay: 0.9999
|
| 116 |
+
ema_update_every: 1
|
| 117 |
+
enable_gradient_checkpointing: true
|
| 118 |
+
force_clear_prev_results: false
|
| 119 |
+
grad_accum_every: 1
|
| 120 |
+
log_tensorboard: true
|
| 121 |
+
lr_schedule:
|
| 122 |
+
min_lr_ratio: 0.001
|
| 123 |
+
name: linear_cosine
|
| 124 |
+
total_steps: 4000
|
| 125 |
+
warmup_steps: 10
|
| 126 |
+
max_grad_norm: 1
|
| 127 |
+
mlp_lr: 1.0e-05
|
| 128 |
+
num_train_steps: 4000
|
| 129 |
+
num_valid_batches: null
|
| 130 |
+
num_workers: 8
|
| 131 |
+
other_lr: 1.0e-05
|
| 132 |
+
resume: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_0147/ckpt/reward_model.best_29999.pt
|
| 133 |
+
resume_optimizer: false
|
| 134 |
+
save_model_every: 2000
|
| 135 |
+
use_checkpoint_config: true
|
| 136 |
+
use_ema: false
|
| 137 |
+
use_lion: false
|
| 138 |
+
valid_batch_size: 20
|
| 139 |
+
valid_every: 100
|
| 140 |
+
valid_frac: 0.1
|
| 141 |
+
verify_weights_on_load: true
|
| 142 |
+
validate_only: false
|
20260124_2354/reward_model/1769270104.0081618/events.out.tfevents.1769270104.MACLAB-S004.3211506.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f1c9d6a9609c7a84c7ece1c70819976b71fcbc41491a14dc042f92c982873761
|
| 3 |
+
size 503
|
20260124_2354/reward_model/1769270104.0091846/hparams.yml
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
batch_size: 48
|
| 2 |
+
grad_accum_every: 1
|
| 3 |
+
learning_rate: 1.0e-05
|
| 4 |
+
num_train_steps: 4000
|
20260124_2354/reward_model/events.out.tfevents.1769270104.MACLAB-S004.3211506.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:202656c2b035463a9a8b723b88d4b0c93b2d50b91b2b2c0ebb10f09261494610
|
| 3 |
+
size 647887
|
20260124_2354/train.20260124_2354.log
ADDED
|
@@ -0,0 +1,306 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-24 23:54:55 | INFO | Log file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/train.20260124_2354.log
|
| 2 |
+
2026-01-24 23:54:55 | INFO | Random seed set to 42
|
| 3 |
+
2026-01-24 23:54:56 | INFO | Created RawTextFrozenAudioDataset with 3463 samples
|
| 4 |
+
2026-01-24 23:54:56 | INFO | Split dataset into train (3117) and validation (346) sets (ratio: 10.00%)
|
| 5 |
+
2026-01-24 23:54:56 | INFO | Will resume from checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_0147/ckpt/reward_model.best_29999.pt
|
| 6 |
+
2026-01-24 23:54:56 | INFO | Using checkpoint config for model initialization (continue training mode)
|
| 7 |
+
2026-01-24 23:55:02 | INFO | Created RewardAttentionModel with attention_mode=SA
|
| 8 |
+
2026-01-24 23:55:02 | INFO | Created PreferenceLoss with filter_ties=True
|
| 9 |
+
2026-01-24 23:55:03 | INFO | ✓ Gradient checkpointing enabled
|
| 10 |
+
2026-01-24 23:55:03 | INFO | ✓ Audio cropping enabled: min=200, max=1500
|
| 11 |
+
2026-01-24 23:55:03 | INFO | Apply to eval: False, ref: True
|
| 12 |
+
2026-01-24 23:55:03 | INFO | Modes: train=random, val=start
|
| 13 |
+
2026-01-24 23:55:03 | INFO | MLP head parameters: 1,186,563 params, lr=1e-05
|
| 14 |
+
2026-01-24 23:55:03 | INFO | Other parameters: 37,397,634 params, lr=1e-05
|
| 15 |
+
2026-01-24 23:55:03 | INFO | Using lr_schedule=linear_cosine warmup_steps=10 total_steps=4000
|
| 16 |
+
2026-01-24 23:55:03 | INFO | Training with fixed validation set
|
| 17 |
+
2026-01-24 23:55:03 | INFO | Train batch_size: 48, Valid batch_size: 20
|
| 18 |
+
2026-01-24 23:55:03 | INFO | Missing keys (782): ['text_module.model.embeddings.word_embeddings.weight', 'text_module.model.embeddings.position_embeddings.weight', 'text_module.model.embeddings.token_type_embeddings.weight', 'text_module.model.embeddings.LayerNorm.weight', 'text_module.model.embeddings.LayerNorm.bias']...
|
| 19 |
+
2026-01-24 23:55:03 | INFO | ✓ Starting from step 0 (transfer learning mode, ignoring checkpoint steps=29999)
|
| 20 |
+
2026-01-24 23:55:03 | INFO | Resumed from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_0147/ckpt/reward_model.best_29999.pt
|
| 21 |
+
2026-01-24 23:55:03 | INFO | Parameters: 701.162M total, 38.584M trainable
|
| 22 |
+
2026-01-24 23:55:03 | INFO | Text encoder (frozen): 328.389M
|
| 23 |
+
2026-01-24 23:55:03 | INFO | Audio encoder (frozen): 334.189M
|
| 24 |
+
2026-01-24 23:55:03 | INFO | Other trainable: 38.584M
|
| 25 |
+
2026-01-24 23:55:03 | INFO | ℹ No LoRA configuration detected
|
| 26 |
+
2026-01-24 23:55:04 | INFO | ============================================================
|
| 27 |
+
2026-01-24 23:55:04 | INFO | Ready to start training
|
| 28 |
+
2026-01-24 23:55:04 | INFO | ============================================================
|
| 29 |
+
2026-01-24 23:55:04 | INFO | Starting training from step 0
|
| 30 |
+
2026-01-24 23:55:04 | INFO | ===== Accelerator / CUDA Debug Info =====
|
| 31 |
+
2026-01-24 23:55:04 | INFO | accelerator.device = cuda
|
| 32 |
+
2026-01-24 23:55:04 | INFO | mixed_precision = bf16
|
| 33 |
+
2026-01-24 23:55:04 | INFO | distributed_type = NO
|
| 34 |
+
2026-01-24 23:55:04 | INFO | num_processes = 1
|
| 35 |
+
2026-01-24 23:55:04 | INFO | process_index = 0
|
| 36 |
+
2026-01-24 23:55:04 | INFO | is_main_process = True
|
| 37 |
+
2026-01-24 23:55:04 | INFO | torch.cuda.is_available() = True
|
| 38 |
+
2026-01-24 23:55:04 | INFO | torch.cuda.device_count() = 1
|
| 39 |
+
2026-01-24 23:55:04 | INFO | current_device = 0
|
| 40 |
+
2026-01-24 23:55:04 | INFO | device_name = NVIDIA GeForce RTX 4090
|
| 41 |
+
2026-01-24 23:55:04 | INFO | model parameter device = cuda:0
|
| 42 |
+
2026-01-24 23:55:04 | INFO | Training for 4000.0 steps (~63 epochs, 64 steps/epoch)
|
| 43 |
+
2026-01-24 23:55:12 | INFO | Step 0: loss=1.8546 | IF_loss=2.4068, MQ_loss=1.3024 | acc=0.760 (IF=0.708, MQ=0.812) | lr=0.000002
|
| 44 |
+
2026-01-24 23:55:12 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.0.pt (filtered to 38.584M trainable parameters)
|
| 45 |
+
2026-01-24 23:55:12 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.0.pt (428.0MB)
|
| 46 |
+
2026-01-24 23:55:12 | INFO | Step 0: Saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.0.pt
|
| 47 |
+
2026-01-24 23:56:57 | INFO |
|
| 48 |
+
============================================================
|
| 49 |
+
Validation Results (took 8.49s):
|
| 50 |
+
Samples: 346 instruction, 346 quality
|
| 51 |
+
Instruction Acc: 0.7052
|
| 52 |
+
Quality Acc: 0.6821
|
| 53 |
+
Average Acc: 0.6936
|
| 54 |
+
Total Loss: 1.2481
|
| 55 |
+
Instruction Loss: 1.1851
|
| 56 |
+
Quality Loss: 1.3111
|
| 57 |
+
============================================================
|
| 58 |
+
2026-01-24 23:56:57 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_99.pt (filtered to 38.584M trainable parameters)
|
| 59 |
+
2026-01-24 23:56:57 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_99.pt (428.0MB)
|
| 60 |
+
2026-01-24 23:56:57 | INFO | Best 1 checkpoints:
|
| 61 |
+
2026-01-24 23:56:57 | INFO | 1. Step 99: acc=0.6936 (reward_model.best_99.pt)
|
| 62 |
+
2026-01-24 23:56:58 | INFO | Step 100: loss=1.0138 | IF_loss=0.8556, MQ_loss=1.1720 | acc=0.708 (IF=0.688, MQ=0.729) | lr=0.000010
|
| 63 |
+
2026-01-24 23:58:43 | INFO |
|
| 64 |
+
============================================================
|
| 65 |
+
Validation Results (took 6.77s):
|
| 66 |
+
Samples: 346 instruction, 346 quality
|
| 67 |
+
Instruction Acc: 0.6965
|
| 68 |
+
Quality Acc: 0.7197
|
| 69 |
+
Average Acc: 0.7081
|
| 70 |
+
Total Loss: 0.7433
|
| 71 |
+
Instruction Loss: 0.7416
|
| 72 |
+
Quality Loss: 0.7450
|
| 73 |
+
============================================================
|
| 74 |
+
2026-01-24 23:58:44 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_199.pt (filtered to 38.584M trainable parameters)
|
| 75 |
+
2026-01-24 23:58:44 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_199.pt (428.0MB)
|
| 76 |
+
2026-01-24 23:58:44 | INFO | Best 2 checkpoints:
|
| 77 |
+
2026-01-24 23:58:44 | INFO | 1. Step 199: acc=0.7081 (reward_model.best_199.pt)
|
| 78 |
+
2026-01-24 23:58:44 | INFO | 2. Step 99: acc=0.6936 (reward_model.best_99.pt)
|
| 79 |
+
2026-01-24 23:58:45 | INFO | Step 200: loss=0.4285 | IF_loss=0.4361, MQ_loss=0.4208 | acc=0.812 (IF=0.792, MQ=0.833) | lr=0.000010
|
| 80 |
+
2026-01-25 00:00:32 | INFO |
|
| 81 |
+
============================================================
|
| 82 |
+
Validation Results (took 7.53s):
|
| 83 |
+
Samples: 346 instruction, 346 quality
|
| 84 |
+
Instruction Acc: 0.7052
|
| 85 |
+
Quality Acc: 0.7514
|
| 86 |
+
Average Acc: 0.7283
|
| 87 |
+
Total Loss: 0.6484
|
| 88 |
+
Instruction Loss: 0.6697
|
| 89 |
+
Quality Loss: 0.6271
|
| 90 |
+
============================================================
|
| 91 |
+
2026-01-25 00:00:32 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_299.pt (filtered to 38.584M trainable parameters)
|
| 92 |
+
2026-01-25 00:00:32 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_299.pt (428.0MB)
|
| 93 |
+
2026-01-25 00:00:32 | INFO | Best 3 checkpoints:
|
| 94 |
+
2026-01-25 00:00:32 | INFO | 1. Step 299: acc=0.7283 (reward_model.best_299.pt)
|
| 95 |
+
2026-01-25 00:00:32 | INFO | 2. Step 199: acc=0.7081 (reward_model.best_199.pt)
|
| 96 |
+
2026-01-25 00:00:32 | INFO | 3. Step 99: acc=0.6936 (reward_model.best_99.pt)
|
| 97 |
+
2026-01-25 00:00:33 | INFO | Step 300: loss=0.3843 | IF_loss=0.4473, MQ_loss=0.3212 | acc=0.844 (IF=0.792, MQ=0.896) | lr=0.000010
|
| 98 |
+
2026-01-25 00:02:21 | INFO |
|
| 99 |
+
============================================================
|
| 100 |
+
Validation Results (took 7.10s):
|
| 101 |
+
Samples: 346 instruction, 346 quality
|
| 102 |
+
Instruction Acc: 0.6994
|
| 103 |
+
Quality Acc: 0.7399
|
| 104 |
+
Average Acc: 0.7197
|
| 105 |
+
Total Loss: 0.6475
|
| 106 |
+
Instruction Loss: 0.6784
|
| 107 |
+
Quality Loss: 0.6167
|
| 108 |
+
============================================================
|
| 109 |
+
2026-01-25 00:02:21 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_399.pt (filtered to 38.584M trainable parameters)
|
| 110 |
+
2026-01-25 00:02:22 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_399.pt (428.0MB)
|
| 111 |
+
2026-01-25 00:02:22 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_99.pt
|
| 112 |
+
2026-01-25 00:02:22 | INFO | Best 3 checkpoints:
|
| 113 |
+
2026-01-25 00:02:22 | INFO | 1. Step 299: acc=0.7283 (reward_model.best_299.pt)
|
| 114 |
+
2026-01-25 00:02:22 | INFO | 2. Step 399: acc=0.7197 (reward_model.best_399.pt)
|
| 115 |
+
2026-01-25 00:02:22 | INFO | 3. Step 199: acc=0.7081 (reward_model.best_199.pt)
|
| 116 |
+
2026-01-25 00:02:23 | INFO | Step 400: loss=0.5100 | IF_loss=0.5393, MQ_loss=0.4806 | acc=0.771 (IF=0.729, MQ=0.812) | lr=0.000010
|
| 117 |
+
2026-01-25 00:04:09 | INFO |
|
| 118 |
+
============================================================
|
| 119 |
+
Validation Results (took 7.22s):
|
| 120 |
+
Samples: 346 instruction, 346 quality
|
| 121 |
+
Instruction Acc: 0.6965
|
| 122 |
+
Quality Acc: 0.7399
|
| 123 |
+
Average Acc: 0.7182
|
| 124 |
+
Total Loss: 0.6691
|
| 125 |
+
Instruction Loss: 0.7099
|
| 126 |
+
Quality Loss: 0.6283
|
| 127 |
+
============================================================
|
| 128 |
+
2026-01-25 00:04:09 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_499.pt (filtered to 38.584M trainable parameters)
|
| 129 |
+
2026-01-25 00:04:09 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_499.pt (428.0MB)
|
| 130 |
+
2026-01-25 00:04:09 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_199.pt
|
| 131 |
+
2026-01-25 00:04:09 | INFO | Best 3 checkpoints:
|
| 132 |
+
2026-01-25 00:04:09 | INFO | 1. Step 299: acc=0.7283 (reward_model.best_299.pt)
|
| 133 |
+
2026-01-25 00:04:09 | INFO | 2. Step 399: acc=0.7197 (reward_model.best_399.pt)
|
| 134 |
+
2026-01-25 00:04:09 | INFO | 3. Step 499: acc=0.7182 (reward_model.best_499.pt)
|
| 135 |
+
2026-01-25 00:04:10 | INFO | Step 500: loss=0.4517 | IF_loss=0.5286, MQ_loss=0.3749 | acc=0.771 (IF=0.750, MQ=0.792) | lr=0.000010
|
| 136 |
+
2026-01-25 00:06:01 | INFO |
|
| 137 |
+
============================================================
|
| 138 |
+
Validation Results (took 7.07s):
|
| 139 |
+
Samples: 346 instruction, 346 quality
|
| 140 |
+
Instruction Acc: 0.6994
|
| 141 |
+
Quality Acc: 0.7457
|
| 142 |
+
Average Acc: 0.7225
|
| 143 |
+
Total Loss: 0.6932
|
| 144 |
+
Instruction Loss: 0.7406
|
| 145 |
+
Quality Loss: 0.6458
|
| 146 |
+
============================================================
|
| 147 |
+
2026-01-25 00:06:02 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_599.pt (filtered to 38.584M trainable parameters)
|
| 148 |
+
2026-01-25 00:06:02 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_599.pt (428.0MB)
|
| 149 |
+
2026-01-25 00:06:02 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_499.pt
|
| 150 |
+
2026-01-25 00:06:02 | INFO | Best 3 checkpoints:
|
| 151 |
+
2026-01-25 00:06:02 | INFO | 1. Step 299: acc=0.7283 (reward_model.best_299.pt)
|
| 152 |
+
2026-01-25 00:06:02 | INFO | 2. Step 599: acc=0.7225 (reward_model.best_599.pt)
|
| 153 |
+
2026-01-25 00:06:02 | INFO | 3. Step 399: acc=0.7197 (reward_model.best_399.pt)
|
| 154 |
+
2026-01-25 00:06:03 | INFO | Step 600: loss=0.3738 | IF_loss=0.4063, MQ_loss=0.3414 | acc=0.854 (IF=0.854, MQ=0.854) | lr=0.000009
|
| 155 |
+
2026-01-25 00:07:51 | INFO |
|
| 156 |
+
============================================================
|
| 157 |
+
Validation Results (took 8.06s):
|
| 158 |
+
Samples: 346 instruction, 346 quality
|
| 159 |
+
Instruction Acc: 0.6965
|
| 160 |
+
Quality Acc: 0.7370
|
| 161 |
+
Average Acc: 0.7168
|
| 162 |
+
Total Loss: 0.7321
|
| 163 |
+
Loss: 0.7873
|
| 164 |
+
Quality Loss: 0.6769
|
| 165 |
+
============================================================
|
| 166 |
+
2026-01-25 00:07:51 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_699.pt (filtered to 38.584M trainable parameters)
|
| 167 |
+
2026-01-25 00:07:51 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_699.pt (428.0MB)
|
| 168 |
+
2026-01-25 00:07:51 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_699.pt
|
| 169 |
+
2026-01-25 00:07:51 | INFO | Best 3 checkpoints:
|
| 170 |
+
2026-01-25 00:07:51 | INFO | 1. Step 299: acc=0.7283 (reward_model.best_299.pt)
|
| 171 |
+
2026-01-25 00:07:51 | INFO | 2. Step 599: acc=0.7225 (reward_model.best_599.pt)
|
| 172 |
+
2026-01-25 00:07:51 | INFO | 3. Step 399: acc=0.7197 (reward_model.best_399.pt)
|
| 173 |
+
2026-01-25 00:07:52 | INFO | Step 700: loss=0.2228 | IF_loss=0.2959, MQ_loss=0.1498 | acc=0.896 (IF=0.854, MQ=0.938) | lr=0.000009
|
| 174 |
+
2026-01-25 00:09:41 | INFO |
|
| 175 |
+
============================================================
|
| 176 |
+
Validation Results (took 6.89s):
|
| 177 |
+
Samples: 346 instruction, 346 quality
|
| 178 |
+
Instruction Acc: 0.6936
|
| 179 |
+
Quality Acc: 0.7341
|
| 180 |
+
Average Acc: 0.7139
|
| 181 |
+
Total Loss: 0.7643
|
| 182 |
+
Instruction Loss: 0.8224
|
| 183 |
+
Quality Loss: 0.7063
|
| 184 |
+
============================================================
|
| 185 |
+
2026-01-25 00:09:41 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_799.pt (filtered to 38.584M trainable parameters)
|
| 186 |
+
2026-01-25 00:09:41 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_799.pt (428.0MB)
|
| 187 |
+
2026-01-25 00:09:41 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_799.pt
|
| 188 |
+
2026-01-25 00:09:41 | INFO | Best 3 checkpoints:
|
| 189 |
+
2026-01-25 00:09:41 | INFO | 1. Step 299: acc=0.7283 (reward_model.best_299.pt)
|
| 190 |
+
2026-01-25 00:09:41 | INFO | 2. Step 599: acc=0.7225 (reward_model.best_599.pt)
|
| 191 |
+
2026-01-25 00:09:41 | INFO | 3. Step 399: acc=0.7197 (reward_model.best_399.pt)
|
| 192 |
+
2026-01-25 00:09:42 | INFO | Step 800: loss=0.3570 | IF_loss=0.4116, MQ_loss=0.3024 | acc=0.792 (IF=0.771, MQ=0.812) | lr=0.000009
|
| 193 |
+
2026-01-25 00:11:33 | INFO |
|
| 194 |
+
============================================================
|
| 195 |
+
Validation Results (took 6.85s):
|
| 196 |
+
Samples: 346 instruction, 346 quality
|
| 197 |
+
Instruction Acc: 0.6850
|
| 198 |
+
Quality Acc: 0.7341
|
| 199 |
+
Average Acc: 0.7095
|
| 200 |
+
Total Loss: 0.7902
|
| 201 |
+
Instruction Loss: 0.8561
|
| 202 |
+
Quality Loss: 0.7244
|
| 203 |
+
============================================================
|
| 204 |
+
2026-01-25 00:11:33 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_899.pt (filtered to 38.584M trainable parameters)
|
| 205 |
+
2026-01-25 00:11:33 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_899.pt (428.0MB)
|
| 206 |
+
2026-01-25 00:11:33 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_899.pt
|
| 207 |
+
2026-01-25 00:11:33 | INFO | Best 3 checkpoints:
|
| 208 |
+
2026-01-25 00:11:33 | INFO | 1. Step 299: acc=0.7283 (reward_model.best_299.pt)
|
| 209 |
+
2026-01-25 00:11:33 | INFO | 2. Step 599: acc=0.7225 (reward_model.best_599.pt)
|
| 210 |
+
2026-01-25 00:11:33 | INFO | 3. Step 399: acc=0.7197 (reward_model.best_399.pt)
|
| 211 |
+
2026-01-25 00:11:34 | INFO | Step 900: loss=0.2508 | IF_loss=0.2545, MQ_loss=0.2472 | acc=0.927 (IF=0.917, MQ=0.938) | lr=0.000009
|
| 212 |
+
2026-01-25 00:13:21 | INFO |
|
| 213 |
+
============================================================
|
| 214 |
+
Validation Results (took 7.29s):
|
| 215 |
+
Samples: 346 instruction, 346 quality
|
| 216 |
+
Instruction Acc: 0.6908
|
| 217 |
+
Quality Acc: 0.7254
|
| 218 |
+
Average Acc: 0.7081
|
| 219 |
+
Total Loss: 0.8355
|
| 220 |
+
Instruction Loss: 0.9110
|
| 221 |
+
Quality Loss: 0.7599
|
| 222 |
+
============================================================
|
| 223 |
+
2026-01-25 00:13:21 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_999.pt (filtered to 38.584M trainable parameters)
|
| 224 |
+
2026-01-25 00:13:22 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_999.pt (428.0MB)
|
| 225 |
+
2026-01-25 00:13:22 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_999.pt
|
| 226 |
+
2026-01-25 00:13:22 | INFO | Best 3 checkpoints:
|
| 227 |
+
2026-01-25 00:13:22 | INFO | 1. Step 299: acc=0.7283 (reward_model.best_299.pt)
|
| 228 |
+
2026-01-25 00:13:22 | INFO | 2. Step 599: acc=0.7225 (reward_model.best_599.pt)
|
| 229 |
+
2026-01-25 00:13:22 | INFO | 3. Step 399: acc=0.7197 (reward_model.best_399.pt)
|
| 230 |
+
2026-01-25 00:13:23 | INFO | Step 1000: loss=0.2025 | IF_loss=0.1883, MQ_loss=0.2167 | acc=0.917 (IF=0.958, MQ=0.875) | lr=0.000009
|
| 231 |
+
2026-01-25 00:15:12 | INFO |
|
| 232 |
+
============================================================
|
| 233 |
+
Validation Results (took 7.33s):
|
| 234 |
+
Samples: 346 instruction, 346 quality
|
| 235 |
+
Instruction Acc: 0.6936
|
| 236 |
+
Quality Acc: 0.7312
|
| 237 |
+
Average Acc: 0.7124
|
| 238 |
+
Total Loss: 0.8744
|
| 239 |
+
Instruction Loss: 0.9563
|
| 240 |
+
Quality Loss: 0.7924
|
| 241 |
+
============================================================
|
| 242 |
+
2026-01-25 00:15:12 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_1099.pt (filtered to 38.584M trainable parameters)
|
| 243 |
+
2026-01-25 00:15:12 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_1099.pt (428.0MB)
|
| 244 |
+
2026-01-25 00:15:12 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_1099.pt
|
| 245 |
+
2026-01-25 00:15:12 | INFO | Best 3 checkpoints:
|
| 246 |
+
2026-01-25 00:15:12 | INFO | 1. Step 299: acc=0.7283 (reward_model.best_299.pt)
|
| 247 |
+
2026-01-25 00:15:12 | INFO | 2. Step 599: acc=0.7225 (reward_model.best_599.pt)
|
| 248 |
+
2026-01-25 00:15:12 | INFO | 3. Step 399: acc=0.7197 (reward_model.best_399.pt)
|
| 249 |
+
2026-01-25 00:15:13 | INFO | Step 1100: loss=0.2070 | IF_loss=0.2735, MQ_loss=0.1405 | acc=0.927 (IF=0.896, MQ=0.958) | lr=0.000008
|
| 250 |
+
2026-01-25 00:17:01 | INFO |
|
| 251 |
+
============================================================
|
| 252 |
+
Validation Results (took 7.23s):
|
| 253 |
+
Samples: 346 instruction, 346 quality
|
| 254 |
+
Instruction Acc: 0.6936
|
| 255 |
+
Quality Acc: 0.7341
|
| 256 |
+
Average Acc: 0.7139
|
| 257 |
+
Total Loss: 0.9238
|
| 258 |
+
Instruction Loss: 1.0105
|
| 259 |
+
Quality Loss: 0.8370
|
| 260 |
+
============================================================
|
| 261 |
+
2026-01-25 00:17:01 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_1199.pt (filtered to 38.584M trainable parameters)
|
| 262 |
+
2026-01-25 00:17:02 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_1199.pt (428.0MB)
|
| 263 |
+
2026-01-25 00:17:02 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_1199.pt
|
| 264 |
+
2026-01-25 00:17:02 | INFO | Best 3 checkpoints:
|
| 265 |
+
2026-01-25 00:17:02 | INFO | 1. Step 299: acc=0.7283 (reward_model.best_299.pt)
|
| 266 |
+
2026-01-25 00:17:02 | INFO | 2. Step 599: acc=0.7225 (reward_model.best_599.pt)
|
| 267 |
+
2026-01-25 00:17:02 | INFO | 3. Step 399: acc=0.7197 (reward_model.best_399.pt)
|
| 268 |
+
2026-01-25 00:17:03 | INFO | Step 1200: loss=0.1291 | IF_loss=0.1584, MQ_loss=0.0999 | acc=0.948 (IF=0.917, MQ=0.979) | lr=0.000008
|
| 269 |
+
2026-01-25 00:18:54 | INFO |
|
| 270 |
+
============================================================
|
| 271 |
+
Validation Results (took 7.69s):
|
| 272 |
+
Samples: 346 instruction, 346 quality
|
| 273 |
+
Instruction Acc: 0.6908
|
| 274 |
+
Quality Acc: 0.7225
|
| 275 |
+
Average Acc: 0.7066
|
| 276 |
+
Total Loss: 0.9501
|
| 277 |
+
Instruction Loss: 1.0487
|
| 278 |
+
Quality Loss: 0.8515
|
| 279 |
+
============================================================
|
| 280 |
+
2026-01-25 00:18:54 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_1299.pt (filtered to 38.584M trainable parameters)
|
| 281 |
+
2026-01-25 00:18:54 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_1299.pt (428.0MB)
|
| 282 |
+
2026-01-25 00:18:54 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_1299.pt
|
| 283 |
+
2026-01-25 00:18:54 | INFO | Best 3 checkpoints:
|
| 284 |
+
2026-01-25 00:18:54 | INFO | 1. Step 299: acc=0.7283 (reward_model.best_299.pt)
|
| 285 |
+
2026-01-25 00:18:54 | INFO | 2. Step 599: acc=0.7225 (reward_model.best_599.pt)
|
| 286 |
+
2026-01-25 00:18:54 | INFO | 3. Step 399: acc=0.7197 (reward_model.best_399.pt)
|
| 287 |
+
2026-01-25 00:18:55 | INFO | Step 1300: loss=0.2189 | IF_loss=0.2415, MQ_loss=0.1962 | acc=0.917 (IF=0.896, MQ=0.938) | lr=0.000008
|
| 288 |
+
2026-01-25 00:20:44 | INFO |
|
| 289 |
+
============================================================
|
| 290 |
+
Validation Results (took 7.70s):
|
| 291 |
+
Samples: 346 instruction, 346 quality
|
| 292 |
+
Instruction Acc: 0.6994
|
| 293 |
+
Quality Acc: 0.7312
|
| 294 |
+
Average Acc: 0.7153
|
| 295 |
+
Total Loss: 1.0001
|
| 296 |
+
Instruction Loss: 1.1038
|
| 297 |
+
Quality Loss: 0.8963
|
| 298 |
+
============================================================
|
| 299 |
+
2026-01-25 00:20:44 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_1399.pt (filtered to 38.584M trainable parameters)
|
| 300 |
+
2026-01-25 00:20:44 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_1399.pt (428.0MB)
|
| 301 |
+
2026-01-25 00:20:44 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_2354/ckpt/reward_model.best_1399.pt
|
| 302 |
+
2026-01-25 00:20:44 | INFO | Best 3 checkpoints:
|
| 303 |
+
2026-01-25 00:20:44 | INFO | 1. Step 299: acc=0.7283 (reward_model.best_299.pt)
|
| 304 |
+
2026-01-25 00:20:44 | INFO | 2. Step 599: acc=0.7225 (reward_model.best_599.pt)
|
| 305 |
+
2026-01-25 00:20:44 | INFO | 3. Step 399: acc=0.7197 (reward_model.best_399.pt)
|
| 306 |
+
2026-01-25 00:20:46 | INFO | Step 1400: loss=0.1710 | IF_loss=0.1713, MQ_loss=0.1707 | acc=0.938 (IF=0.917, MQ=0.958) | lr=0.000007
|
20260125_0035/config.yaml
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DEVICES: '0'
|
| 2 |
+
accelerate:
|
| 3 |
+
mixed_precision: bf16
|
| 4 |
+
basics:
|
| 5 |
+
random_seed: 42
|
| 6 |
+
save_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model
|
| 7 |
+
dataset:
|
| 8 |
+
audio_dropout:
|
| 9 |
+
apply_to_eval: true
|
| 10 |
+
apply_to_ref: true
|
| 11 |
+
enabled: true
|
| 12 |
+
eval_only_on_training: true
|
| 13 |
+
max_duration: 1500
|
| 14 |
+
min_duration: 200
|
| 15 |
+
train_mode: start
|
| 16 |
+
cache_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/tmp
|
| 17 |
+
db_path: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/backend/database.db
|
| 18 |
+
duration: 600.0
|
| 19 |
+
embedding_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/supervised_embeddings
|
| 20 |
+
max_samples: null
|
| 21 |
+
max_val_samples: null
|
| 22 |
+
metadata_jsonl: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/all_comparisons.jsonl
|
| 23 |
+
mode: raw_text_frozen_audio
|
| 24 |
+
preference_file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/human_annotations/train.json
|
| 25 |
+
sample_rate: 24000
|
| 26 |
+
val_preference_file: null
|
| 27 |
+
loss:
|
| 28 |
+
IF_ratio: 0.5
|
| 29 |
+
filter_ties: true
|
| 30 |
+
label_smoothing: 0.0
|
| 31 |
+
reduction: mean
|
| 32 |
+
model:
|
| 33 |
+
attention_mode: SA
|
| 34 |
+
attn_dropout: 0.0
|
| 35 |
+
category_embeddings: null
|
| 36 |
+
dim: 768
|
| 37 |
+
dim_head: 64
|
| 38 |
+
downsample:
|
| 39 |
+
configs:
|
| 40 |
+
conv2_4x:
|
| 41 |
+
factor: 4
|
| 42 |
+
kernel_size: 5
|
| 43 |
+
kind: conv*2
|
| 44 |
+
use_layernorm: true
|
| 45 |
+
conv_4x:
|
| 46 |
+
factor: 4
|
| 47 |
+
kernel_size: 5
|
| 48 |
+
kind: conv
|
| 49 |
+
stage: 1
|
| 50 |
+
use_layernorm: true
|
| 51 |
+
glu_4x:
|
| 52 |
+
factor: 4
|
| 53 |
+
kernel_size: 5
|
| 54 |
+
kind: gluconv*2+pw
|
| 55 |
+
use_layernorm: true
|
| 56 |
+
mean:
|
| 57 |
+
factor: 2
|
| 58 |
+
kind: mean
|
| 59 |
+
mean_4x:
|
| 60 |
+
dropout: 0.0
|
| 61 |
+
factor: 30
|
| 62 |
+
kind: mean+mlp
|
| 63 |
+
mlp_ratio: 2.0
|
| 64 |
+
none:
|
| 65 |
+
factor: 1
|
| 66 |
+
kind: none
|
| 67 |
+
eval: mean_4x
|
| 68 |
+
ref: null
|
| 69 |
+
text: none
|
| 70 |
+
ff_dropout: 0.0
|
| 71 |
+
ff_mult: 4
|
| 72 |
+
freeze_audio: true
|
| 73 |
+
freeze_text: true
|
| 74 |
+
gradient_checkpointing: false
|
| 75 |
+
heads: 8
|
| 76 |
+
joint_tf_depth: 1
|
| 77 |
+
load_config:
|
| 78 |
+
checkpoint_path: null
|
| 79 |
+
frozen_from_pretrained: true
|
| 80 |
+
pretrained_name: OpenMuQ/MuQ-MuLan-large
|
| 81 |
+
strict: false
|
| 82 |
+
mlp_dim: 768
|
| 83 |
+
mode: concat_text_late
|
| 84 |
+
model_name: OpenMuQ/MuQ-MuLan-large
|
| 85 |
+
name: reward
|
| 86 |
+
no_condition: false
|
| 87 |
+
null_embedding:
|
| 88 |
+
audio:
|
| 89 |
+
dropout: 0.5
|
| 90 |
+
length: 10
|
| 91 |
+
lyrics:
|
| 92 |
+
dropout: 0.3
|
| 93 |
+
length: 10
|
| 94 |
+
text:
|
| 95 |
+
dropout: 0
|
| 96 |
+
length: 10
|
| 97 |
+
output_dim: 2
|
| 98 |
+
prompt_tf_depth: 4
|
| 99 |
+
sr: 24000
|
| 100 |
+
text_encoder:
|
| 101 |
+
name: muq_mulan
|
| 102 |
+
tune: null
|
| 103 |
+
text_lora_config: null
|
| 104 |
+
train_muq_depth: 0
|
| 105 |
+
train_muqmulan: false
|
| 106 |
+
use_audio: true
|
| 107 |
+
use_layer_idx: -1
|
| 108 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 109 |
+
run_name: null
|
| 110 |
+
train:
|
| 111 |
+
batch_size: 48
|
| 112 |
+
betas:
|
| 113 |
+
- 0.9
|
| 114 |
+
- 0.99
|
| 115 |
+
ema_decay: 0.9999
|
| 116 |
+
ema_update_every: 1
|
| 117 |
+
enable_gradient_checkpointing: true
|
| 118 |
+
force_clear_prev_results: false
|
| 119 |
+
grad_accum_every: 1
|
| 120 |
+
log_tensorboard: true
|
| 121 |
+
lr_schedule:
|
| 122 |
+
min_lr_ratio: 0.001
|
| 123 |
+
name: linear_cosine
|
| 124 |
+
total_steps: 2000
|
| 125 |
+
warmup_steps: 10
|
| 126 |
+
max_grad_norm: 1
|
| 127 |
+
mlp_lr: 1.0e-05
|
| 128 |
+
num_train_steps: 2000
|
| 129 |
+
num_valid_batches: null
|
| 130 |
+
num_workers: 8
|
| 131 |
+
other_lr: 1.0e-05
|
| 132 |
+
resume: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_0147/ckpt/reward_model.8000.pt
|
| 133 |
+
resume_optimizer: false
|
| 134 |
+
save_model_every: 2000
|
| 135 |
+
use_checkpoint_config: true
|
| 136 |
+
use_ema: false
|
| 137 |
+
use_lion: false
|
| 138 |
+
valid_batch_size: 20
|
| 139 |
+
valid_every: 100
|
| 140 |
+
valid_frac: 0.1
|
| 141 |
+
verify_weights_on_load: true
|
| 142 |
+
validate_only: false
|
20260125_0035/reward_model/1769272544.7198617/events.out.tfevents.1769272544.MACLAB-S004.3403711.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e5b60b1838c4e344cf6890b2bdce509325a24d7cb04497d228623da81ae0116
|
| 3 |
+
size 503
|
20260125_0035/reward_model/1769272544.7213397/hparams.yml
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
batch_size: 48
|
| 2 |
+
grad_accum_every: 1
|
| 3 |
+
learning_rate: 1.0e-05
|
| 4 |
+
num_train_steps: 2000
|
20260125_0035/reward_model/events.out.tfevents.1769272544.MACLAB-S004.3403711.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d068740b9c57c21acd78084f3e19ca1c6abadd2922a09126bbb46b8e1f5f7901
|
| 3 |
+
size 873949
|
20260125_0035/train.20260125_0035.log
ADDED
|
@@ -0,0 +1,421 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-25 00:35:33 | INFO | Log file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/train.20260125_0035.log
|
| 2 |
+
2026-01-25 00:35:33 | INFO | Random seed set to 42
|
| 3 |
+
2026-01-25 00:35:35 | INFO | Created RawTextFrozenAudioDataset with 3463 samples
|
| 4 |
+
2026-01-25 00:35:35 | INFO | Split dataset into train (3117) and validation (346) sets (ratio: 10.00%)
|
| 5 |
+
2026-01-25 00:35:35 | INFO | Will resume from checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_0147/ckpt/reward_model.8000.pt
|
| 6 |
+
2026-01-25 00:35:35 | INFO | Using checkpoint config for model initialization (continue training mode)
|
| 7 |
+
2026-01-25 00:35:42 | INFO | Created RewardAttentionModel with attention_mode=SA
|
| 8 |
+
2026-01-25 00:35:42 | INFO | Created PreferenceLoss with filter_ties=True
|
| 9 |
+
2026-01-25 00:35:42 | INFO | ✓ Gradient checkpointing enabled
|
| 10 |
+
2026-01-25 00:35:42 | INFO | ✓ Audio cropping enabled: min=200, max=1500
|
| 11 |
+
2026-01-25 00:35:42 | INFO | Apply to eval: True, ref: True
|
| 12 |
+
2026-01-25 00:35:42 | INFO | Modes: train=random, val=start
|
| 13 |
+
2026-01-25 00:35:42 | INFO | MLP head parameters: 1,186,563 params, lr=1e-05
|
| 14 |
+
2026-01-25 00:35:42 | INFO | Other parameters: 37,397,634 params, lr=1e-05
|
| 15 |
+
2026-01-25 00:35:42 | INFO | Using lr_schedule=linear_cosine warmup_steps=10 total_steps=2000
|
| 16 |
+
2026-01-25 00:35:42 | INFO | Training with fixed validation set
|
| 17 |
+
2026-01-25 00:35:42 | INFO | Train batch_size: 48, Valid batch_size: 20
|
| 18 |
+
2026-01-25 00:35:44 | INFO | Missing keys (782): ['text_module.model.embeddings.word_embeddings.weight', 'text_module.model.embeddings.position_embeddings.weight', 'text_module.model.embeddings.token_type_embeddings.weight', 'text_module.model.embeddings.LayerNorm.weight', 'text_module.model.embeddings.LayerNorm.bias']...
|
| 19 |
+
2026-01-25 00:35:44 | INFO | ✓ Starting from step 0 (transfer learning mode, ignoring checkpoint steps=8000)
|
| 20 |
+
2026-01-25 00:35:44 | INFO | Resumed from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_0147/ckpt/reward_model.8000.pt
|
| 21 |
+
2026-01-25 00:35:44 | INFO | Parameters: 701.162M total, 38.584M trainable
|
| 22 |
+
2026-01-25 00:35:44 | INFO | Text encoder (frozen): 328.389M
|
| 23 |
+
2026-01-25 00:35:44 | INFO | Audio encoder (frozen): 334.189M
|
| 24 |
+
2026-01-25 00:35:44 | INFO | Other trainable: 38.584M
|
| 25 |
+
2026-01-25 00:35:44 | INFO | ℹ No LoRA configuration detected
|
| 26 |
+
2026-01-25 00:35:44 | INFO | ============================================================
|
| 27 |
+
2026-01-25 00:35:44 | INFO | Ready to start training
|
| 28 |
+
2026-01-25 00:35:44 | INFO | ============================================================
|
| 29 |
+
2026-01-25 00:35:44 | INFO | Starting training from step 0
|
| 30 |
+
2026-01-25 00:35:44 | INFO | ===== Accelerator / CUDA Debug Info =====
|
| 31 |
+
2026-01-25 00:35:44 | INFO | accelerator.device = cuda
|
| 32 |
+
2026-01-25 00:35:44 | INFO | mixed_precision = bf16
|
| 33 |
+
2026-01-25 00:35:44 | INFO | distributed_type = NO
|
| 34 |
+
2026-01-25 00:35:44 | INFO | num_processes = 1
|
| 35 |
+
2026-01-25 00:35:44 | INFO | process_index = 0
|
| 36 |
+
2026-01-25 00:35:44 | INFO | is_main_process = True
|
| 37 |
+
2026-01-25 00:35:44 | INFO | torch.cuda.is_available() = True
|
| 38 |
+
2026-01-25 00:35:44 | INFO | torch.cuda.device_count() = 1
|
| 39 |
+
2026-01-25 00:35:44 | INFO | current_device = 0
|
| 40 |
+
2026-01-25 00:35:44 | INFO | device_name = NVIDIA GeForce RTX 4090
|
| 41 |
+
2026-01-25 00:35:44 | INFO | model parameter device = cuda:0
|
| 42 |
+
2026-01-25 00:35:44 | INFO | Training for 2000.0 steps (~32 epochs, 64 steps/epoch)
|
| 43 |
+
2026-01-25 00:35:52 | INFO | Step 0: loss=0.7688 | IF_loss=0.9857, MQ_loss=0.5519 | acc=0.729 (IF=0.688, MQ=0.771) | lr=0.000002
|
| 44 |
+
2026-01-25 00:35:52 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.0.pt (filtered to 38.584M trainable parameters)
|
| 45 |
+
2026-01-25 00:35:53 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.0.pt (428.0MB)
|
| 46 |
+
2026-01-25 00:35:53 | INFO | Step 0: Saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.0.pt
|
| 47 |
+
2026-01-25 00:37:35 | INFO |
|
| 48 |
+
============================================================
|
| 49 |
+
Validation Results (took 8.15s):
|
| 50 |
+
Samples: 346 instruction, 346 quality
|
| 51 |
+
Instruction Acc: 0.7052
|
| 52 |
+
Quality Acc: 0.7052
|
| 53 |
+
Average Acc: 0.7052
|
| 54 |
+
Total Loss: 0.6842
|
| 55 |
+
Instruction Loss: 0.6988
|
| 56 |
+
Quality Loss: 0.6695
|
| 57 |
+
============================================================
|
| 58 |
+
2026-01-25 00:37:36 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_99.pt (filtered to 38.584M trainable parameters)
|
| 59 |
+
2026-01-25 00:37:36 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_99.pt (428.0MB)
|
| 60 |
+
2026-01-25 00:37:36 | INFO | Best 1 checkpoints:
|
| 61 |
+
2026-01-25 00:37:36 | INFO | 1. Step 99: acc=0.7052 (reward_model.best_99.pt)
|
| 62 |
+
2026-01-25 00:37:37 | INFO | Step 100: loss=0.5884 | IF_loss=0.5924, MQ_loss=0.5843 | acc=0.688 (IF=0.646, MQ=0.729) | lr=0.000010
|
| 63 |
+
2026-01-25 00:39:28 | INFO |
|
| 64 |
+
============================================================
|
| 65 |
+
Validation Results (took 9.30s):
|
| 66 |
+
Samples: 346 instruction, 346 quality
|
| 67 |
+
Instruction Acc: 0.7197
|
| 68 |
+
Quality Acc: 0.7457
|
| 69 |
+
Average Acc: 0.7327
|
| 70 |
+
Total Loss: 0.6014
|
| 71 |
+
Instruction Loss: 0.6253
|
| 72 |
+
Quality Loss: 0.5774
|
| 73 |
+
============================================================
|
| 74 |
+
2026-01-25 00:39:28 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_199.pt (filtered to 38.584M trainable parameters)
|
| 75 |
+
2026-01-25 00:39:28 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_199.pt (428.0MB)
|
| 76 |
+
2026-01-25 00:39:28 | INFO | Best 2 checkpoints:
|
| 77 |
+
2026-01-25 00:39:28 | INFO | 1. Step 199: acc=0.7327 (reward_model.best_199.pt)
|
| 78 |
+
2026-01-25 00:39:28 | INFO | 2. Step 99: acc=0.7052 (reward_model.best_99.pt)
|
| 79 |
+
2026-01-25 00:39:29 | INFO | Step 200: loss=0.3779 | IF_loss=0.3349, MQ_loss=0.4209 | acc=0.844 (IF=0.896, MQ=0.792) | lr=0.000010
|
| 80 |
+
2026-01-25 00:41:19 | INFO |
|
| 81 |
+
============================================================
|
| 82 |
+
Validation Results (took 7.41s):
|
| 83 |
+
Samples: 346 instruction, 346 quality
|
| 84 |
+
Instruction Acc: 0.7110
|
| 85 |
+
Quality Acc: 0.7514
|
| 86 |
+
Average Acc: 0.7312
|
| 87 |
+
Total Loss: 0.5899
|
| 88 |
+
Instruction Loss: 0.6186
|
| 89 |
+
Quality Loss: 0.5612
|
| 90 |
+
============================================================
|
| 91 |
+
2026-01-25 00:41:19 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_299.pt (filtered to 38.584M trainable parameters)
|
| 92 |
+
2026-01-25 00:41:19 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_299.pt (428.0MB)
|
| 93 |
+
2026-01-25 00:41:19 | INFO | Best 3 checkpoints:
|
| 94 |
+
2026-01-25 00:41:19 | INFO | 1. Step 199: acc=0.7327 (reward_model.best_199.pt)
|
| 95 |
+
2026-01-25 00:41:19 | INFO | 2. Step 299: acc=0.7312 (reward_model.best_299.pt)
|
| 96 |
+
2026-01-25 00:41:19 | INFO | 3. Step 99: acc=0.7052 (reward_model.best_99.pt)
|
| 97 |
+
2026-01-25 00:41:20 | INFO | Step 300: loss=0.3940 | IF_loss=0.4496, MQ_loss=0.3384 | acc=0.802 (IF=0.792, MQ=0.812) | lr=0.000009
|
| 98 |
+
2026-01-25 00:43:18 | INFO |
|
| 99 |
+
============================================================
|
| 100 |
+
Validation Results (took 8.84s):
|
| 101 |
+
Samples: 346 instruction, 346 quality
|
| 102 |
+
Instruction Acc: 0.7168
|
| 103 |
+
Quality Acc: 0.7399
|
| 104 |
+
Average Acc: 0.7283
|
| 105 |
+
Total Loss: 0.5863
|
| 106 |
+
Instruction Loss: 0.6144
|
| 107 |
+
Quality Loss: 0.5582
|
| 108 |
+
============================================================
|
| 109 |
+
2026-01-25 00:43:18 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_399.pt (filtered to 38.584M trainable parameters)
|
| 110 |
+
2026-01-25 00:43:18 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_399.pt (428.0MB)
|
| 111 |
+
2026-01-25 00:43:18 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_99.pt
|
| 112 |
+
2026-01-25 00:43:18 | INFO | Best 3 checkpoints:
|
| 113 |
+
2026-01-25 00:43:18 | INFO | 1. Step 199: acc=0.7327 (reward_model.best_199.pt)
|
| 114 |
+
2026-01-25 00:43:18 | INFO | 2. Step 299: acc=0.7312 (reward_model.best_299.pt)
|
| 115 |
+
2026-01-25 00:43:18 | INFO | 3. Step 399: acc=0.7283 (reward_model.best_399.pt)
|
| 116 |
+
2026-01-25 00:43:19 | INFO | Step 400: loss=0.5238 | IF_loss=0.5729, MQ_loss=0.4747 | acc=0.771 (IF=0.708, MQ=0.833) | lr=0.000009
|
| 117 |
+
2026-01-25 00:45:10 | INFO |
|
| 118 |
+
============================================================
|
| 119 |
+
Validation Results (took 7.63s):
|
| 120 |
+
Samples: 346 instruction, 346 quality
|
| 121 |
+
Instruction Acc: 0.7052
|
| 122 |
+
Quality Acc: 0.7428
|
| 123 |
+
Average Acc: 0.7240
|
| 124 |
+
Total Loss: 0.5915
|
| 125 |
+
Instruction Loss: 0.6221
|
| 126 |
+
Quality Loss: 0.5608
|
| 127 |
+
============================================================
|
| 128 |
+
2026-01-25 00:45:10 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_499.pt (filtered to 38.584M trainable parameters)
|
| 129 |
+
2026-01-25 00:45:10 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_499.pt (428.0MB)
|
| 130 |
+
2026-01-25 00:45:10 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_499.pt
|
| 131 |
+
2026-01-25 00:45:10 | INFO | Best 3 checkpoints:
|
| 132 |
+
2026-01-25 00:45:10 | INFO | 1. Step 199: acc=0.7327 (reward_model.best_199.pt)
|
| 133 |
+
2026-01-25 00:45:10 | INFO | 2. Step 299: acc=0.7312 (reward_model.best_299.pt)
|
| 134 |
+
2026-01-25 00:45:10 | INFO | 3. Step 399: acc=0.7283 (reward_model.best_399.pt)
|
| 135 |
+
2026-01-25 00:45:11 | INFO | Step 500: loss=0.4478 | IF_loss=0.4706, MQ_loss=0.4250 | acc=0.760 (IF=0.792, MQ=0.729) | lr=0.000009
|
| 136 |
+
2026-01-25 00:47:06 | INFO |
|
| 137 |
+
============================================================
|
| 138 |
+
Validation Results (took 8.13s):
|
| 139 |
+
Samples: 346 instruction, 346 quality
|
| 140 |
+
Instruction Acc: 0.6994
|
| 141 |
+
Quality Acc: 0.7486
|
| 142 |
+
Average Acc: 0.7240
|
| 143 |
+
Total Loss: 0.5893
|
| 144 |
+
Instruction Loss: 0.6203
|
| 145 |
+
Quality Loss: 0.5584
|
| 146 |
+
============================================================
|
| 147 |
+
2026-01-25 00:47:06 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_599.pt (filtered to 38.584M trainable parameters)
|
| 148 |
+
2026-01-25 00:47:06 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_599.pt (428.0MB)
|
| 149 |
+
2026-01-25 00:47:06 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_599.pt
|
| 150 |
+
2026-01-25 00:47:06 | INFO | Best 3 checkpoints:
|
| 151 |
+
2026-01-25 00:47:06 | INFO | 1. Step 199: acc=0.7327 (reward_model.best_199.pt)
|
| 152 |
+
2026-01-25 00:47:06 | INFO | 2. Step 299: acc=0.7312 (reward_model.best_299.pt)
|
| 153 |
+
2026-01-25 00:47:06 | INFO | 3. Step 399: acc=0.7283 (reward_model.best_399.pt)
|
| 154 |
+
2026-01-25 00:47:08 | INFO | Step 600: loss=0.4813 | IF_loss=0.4879, MQ_loss=0.4747 | acc=0.802 (IF=0.792, MQ=0.812) | lr=0.000008
|
| 155 |
+
2026-01-25 00:49:00 | INFO |
|
| 156 |
+
============================================================
|
| 157 |
+
Validation Results (took 7.74s):
|
| 158 |
+
Samples: 346 instruction, 346 quality
|
| 159 |
+
Instruction Acc: 0.7023
|
| 160 |
+
Quality Acc: 0.7486
|
| 161 |
+
Average Acc: 0.7254
|
| 162 |
+
Total Loss: 0.5964
|
| 163 |
+
Instruction Loss: 0.6307
|
| 164 |
+
Quality Loss: 0.5621
|
| 165 |
+
============================================================
|
| 166 |
+
2026-01-25 00:49:00 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_699.pt (filtered to 38.584M trainable parameters)
|
| 167 |
+
2026-01-25 00:49:00 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_699.pt (428.0MB)
|
| 168 |
+
2026-01-25 00:49:00 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_699.pt
|
| 169 |
+
2026-01-25 00:49:00 | INFO | Best 3 checkpoints:
|
| 170 |
+
2026-01-25 00:49:00 | INFO | 1. Step 199: acc=0.7327 (reward_model.best_199.pt)
|
| 171 |
+
2026-01-25 00:49:00 | INFO | 2. Step 299: acc=0.7312 (reward_model.best_299.pt)
|
| 172 |
+
2026-01-25 00:49:00 | INFO | 3. Step 399: acc=0.7283 (reward_model.best_399.pt)
|
| 173 |
+
2026-01-25 00:49:01 | INFO | Step 700: loss=0.2444 | IF_loss=0.3514, MQ_loss=0.1374 | acc=0.927 (IF=0.896, MQ=0.958) | lr=0.000007
|
| 174 |
+
2026-01-25 00:51:03 | INFO |
|
| 175 |
+
============================================================
|
| 176 |
+
Validation Results (took 8.47s):
|
| 177 |
+
Samples: 346 instruction, 346 quality
|
| 178 |
+
Instruction Acc: 0.7023
|
| 179 |
+
Quality Acc: 0.7457
|
| 180 |
+
Average Acc: 0.7240
|
| 181 |
+
Total Loss: 0.6049
|
| 182 |
+
Instruction Loss: 0.6406
|
| 183 |
+
Quality Loss: 0.5693
|
| 184 |
+
============================================================
|
| 185 |
+
2026-01-25 00:51:03 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_799.pt (filtered to 38.584M trainable parameters)
|
| 186 |
+
2026-01-25 00:51:03 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_799.pt (428.0MB)
|
| 187 |
+
2026-01-25 00:51:03 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_799.pt
|
| 188 |
+
2026-01-25 00:51:03 | INFO | Best 3 checkpoints:
|
| 189 |
+
2026-01-25 00:51:03 | INFO | 1. Step 199: acc=0.7327 (reward_model.best_199.pt)
|
| 190 |
+
2026-01-25 00:51:03 | INFO | 2. Step 299: acc=0.7312 (reward_model.best_299.pt)
|
| 191 |
+
2026-01-25 00:51:03 | INFO | 3. Step 399: acc=0.7283 (reward_model.best_399.pt)
|
| 192 |
+
2026-01-25 00:51:04 | INFO | Step 800: loss=0.4378 | IF_loss=0.5861, MQ_loss=0.2894 | acc=0.729 (IF=0.625, MQ=0.833) | lr=0.000007
|
| 193 |
+
2026-01-25 00:53:00 | INFO |
|
| 194 |
+
============================================================
|
| 195 |
+
Validation Results (took 7.86s):
|
| 196 |
+
Samples: 346 instruction, 346 quality
|
| 197 |
+
Instruction Acc: 0.7023
|
| 198 |
+
Quality Acc: 0.7543
|
| 199 |
+
Average Acc: 0.7283
|
| 200 |
+
Total Loss: 0.6092
|
| 201 |
+
Instruction Loss: 0.6455
|
| 202 |
+
Quality Loss: 0.5729
|
| 203 |
+
============================================================
|
| 204 |
+
2026-01-25 00:53:00 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_899.pt (filtered to 38.584M trainable parameters)
|
| 205 |
+
2026-01-25 00:53:00 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_899.pt (428.0MB)
|
| 206 |
+
2026-01-25 00:53:00 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_899.pt
|
| 207 |
+
2026-01-25 00:53:00 | INFO | Best 3 checkpoints:
|
| 208 |
+
2026-01-25 00:53:00 | INFO | 1. Step 199: acc=0.7327 (reward_model.best_199.pt)
|
| 209 |
+
2026-01-25 00:53:00 | INFO | 2. Step 299: acc=0.7312 (reward_model.best_299.pt)
|
| 210 |
+
2026-01-25 00:53:00 | INFO | 3. Step 399: acc=0.7283 (reward_model.best_399.pt)
|
| 211 |
+
2026-01-25 00:53:01 | INFO | Step 900: loss=0.4075 | IF_loss=0.4561, MQ_loss=0.3589 | acc=0.771 (IF=0.750, MQ=0.792) | lr=0.000006
|
| 212 |
+
2026-01-25 00:54:54 | INFO |
|
| 213 |
+
============================================================
|
| 214 |
+
Validation Results (took 8.39s):
|
| 215 |
+
Samples: 346 instruction, 346 quality
|
| 216 |
+
Instruction Acc: 0.6936
|
| 217 |
+
Quality Acc: 0.7543
|
| 218 |
+
Average Acc: 0.7240
|
| 219 |
+
Total Loss: 0.6166
|
| 220 |
+
Instruction Loss: 0.6537
|
| 221 |
+
Quality Loss: 0.5795
|
| 222 |
+
============================================================
|
| 223 |
+
2026-01-25 00:54:54 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_999.pt (filtered to 38.584M trainable parameters)
|
| 224 |
+
2026-01-25 00:54:55 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_999.pt (428.0MB)
|
| 225 |
+
2026-01-25 00:54:55 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_999.pt
|
| 226 |
+
2026-01-25 00:54:55 | INFO | Best 3 checkpoints:
|
| 227 |
+
2026-01-25 00:54:55 | INFO | 1. Step 199: acc=0.7327 (reward_model.best_199.pt)
|
| 228 |
+
2026-01-25 00:54:55 | INFO | 2. Step 299: acc=0.7312 (reward_model.best_299.pt)
|
| 229 |
+
2026-01-25 00:54:55 | INFO | 3. Step 399: acc=0.7283 (reward_model.best_399.pt)
|
| 230 |
+
2026-01-25 00:54:56 | INFO | Step 1000: loss=0.3655 | IF_loss=0.3244, MQ_loss=0.4067 | acc=0.823 (IF=0.833, MQ=0.812) | lr=0.000005
|
| 231 |
+
2026-01-25 00:56:54 | INFO |
|
| 232 |
+
============================================================
|
| 233 |
+
Validation Results (took 7.77s):
|
| 234 |
+
Samples: 346 instruction, 346 quality
|
| 235 |
+
Instruction Acc: 0.6994
|
| 236 |
+
Quality Acc: 0.7514
|
| 237 |
+
Average Acc: 0.7254
|
| 238 |
+
Total Loss: 0.6242
|
| 239 |
+
Instruction Loss: 0.6619
|
| 240 |
+
Quality Loss: 0.5864
|
| 241 |
+
============================================================
|
| 242 |
+
2026-01-25 00:56:54 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1099.pt (filtered to 38.584M trainable parameters)
|
| 243 |
+
2026-01-25 00:56:54 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1099.pt (428.0MB)
|
| 244 |
+
2026-01-25 00:56:54 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1099.pt
|
| 245 |
+
2026-01-25 00:56:54 | INFO | Best 3 checkpoints:
|
| 246 |
+
2026-01-25 00:56:54 | INFO | 1. Step 199: acc=0.7327 (reward_model.best_199.pt)
|
| 247 |
+
2026-01-25 00:56:54 | INFO | 2. Step 299: acc=0.7312 (reward_model.best_299.pt)
|
| 248 |
+
2026-01-25 00:56:54 | INFO | 3. Step 399: acc=0.7283 (reward_model.best_399.pt)
|
| 249 |
+
2026-01-25 00:56:55 | INFO | Step 1100: loss=0.3254 | IF_loss=0.3815, MQ_loss=0.2692 | acc=0.865 (IF=0.854, MQ=0.875) | lr=0.000004
|
| 250 |
+
2026-01-25 00:58:51 | INFO |
|
| 251 |
+
============================================================
|
| 252 |
+
Validation Results (took 8.71s):
|
| 253 |
+
Samples: 346 instruction, 346 quality
|
| 254 |
+
Instruction Acc: 0.6965
|
| 255 |
+
Quality Acc: 0.7514
|
| 256 |
+
Average Acc: 0.7240
|
| 257 |
+
Total Loss: 0.6286
|
| 258 |
+
Instruction Loss: 0.6684
|
| 259 |
+
Quality Loss: 0.5887
|
| 260 |
+
============================================================
|
| 261 |
+
2026-01-25 00:58:51 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1199.pt (filtered to 38.584M trainable parameters)
|
| 262 |
+
2026-01-25 00:58:52 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1199.pt (428.0MB)
|
| 263 |
+
2026-01-25 00:58:52 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1199.pt
|
| 264 |
+
2026-01-25 00:58:52 | INFO | Best 3 checkpoints:
|
| 265 |
+
2026-01-25 00:58:52 | INFO | 1. Step 199: acc=0.7327 (reward_model.best_199.pt)
|
| 266 |
+
2026-01-25 00:58:52 | INFO | 2. Step 299: acc=0.7312 (reward_model.best_299.pt)
|
| 267 |
+
2026-01-25 00:58:52 | INFO | 3. Step 399: acc=0.7283 (reward_model.best_399.pt)
|
| 268 |
+
2026-01-25 00:58:53 | INFO | Step 1200: loss=0.2899 | IF_loss=0.3551, MQ_loss=0.2248 | acc=0.823 (IF=0.750, MQ=0.896) | lr=0.000003
|
| 269 |
+
2026-01-25 01:00:48 | INFO |
|
| 270 |
+
============================================================
|
| 271 |
+
Validation Results (took 7.62s):
|
| 272 |
+
Samples: 346 instruction, 346 quality
|
| 273 |
+
Instruction Acc: 0.6965
|
| 274 |
+
Quality Acc: 0.7543
|
| 275 |
+
Average Acc: 0.7254
|
| 276 |
+
Total Loss: 0.6329
|
| 277 |
+
Instruction Loss: 0.6736
|
| 278 |
+
Quality Loss: 0.5922
|
| 279 |
+
============================================================
|
| 280 |
+
2026-01-25 01:00:48 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1299.pt (filtered to 38.584M trainable parameters)
|
| 281 |
+
2026-01-25 01:00:48 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1299.pt (428.0MB)
|
| 282 |
+
2026-01-25 01:00:48 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1299.pt
|
| 283 |
+
2026-01-25 01:00:48 | INFO | Best 3 checkpoints:
|
| 284 |
+
2026-01-25 01:00:48 | INFO | 1. Step 199: acc=0.7327 (reward_model.best_199.pt)
|
| 285 |
+
2026-01-25 01:00:48 | INFO | 2. Step 299: acc=0.7312 (reward_model.best_299.pt)
|
| 286 |
+
2026-01-25 01:00:48 | INFO | 3. Step 399: acc=0.7283 (reward_model.best_399.pt)
|
| 287 |
+
2026-01-25 01:00:49 | INFO | Step 1300: loss=0.3270 | IF_loss=0.3120, MQ_loss=0.3420 | acc=0.875 (IF=0.917, MQ=0.833) | lr=0.000003
|
| 288 |
+
2026-01-25 01:02:37 | INFO |
|
| 289 |
+
============================================================
|
| 290 |
+
Validation Results (took 6.98s):
|
| 291 |
+
Samples: 346 instruction, 346 quality
|
| 292 |
+
Instruction Acc: 0.6994
|
| 293 |
+
Quality Acc: 0.7514
|
| 294 |
+
Average Acc: 0.7254
|
| 295 |
+
Total Loss: 0.6344
|
| 296 |
+
Instruction Loss: 0.6752
|
| 297 |
+
Quality Loss: 0.5936
|
| 298 |
+
============================================================
|
| 299 |
+
2026-01-25 01:02:37 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1399.pt (filtered to 38.584M trainable parameters)
|
| 300 |
+
2026-01-25 01:02:38 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1399.pt (428.0MB)
|
| 301 |
+
2026-01-25 01:02:38 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1399.pt
|
| 302 |
+
2026-01-25 01:02:38 | INFO | Best 3 checkpoints:
|
| 303 |
+
2026-01-25 01:02:38 | INFO | 1. Step 199: acc=0.7327 (reward_model.best_199.pt)
|
| 304 |
+
2026-01-25 01:02:38 | INFO | 2. Step 299: acc=0.7312 (reward_model.best_299.pt)
|
| 305 |
+
2026-01-25 01:02:38 | INFO | 3. Step 399: acc=0.7283 (reward_model.best_399.pt)
|
| 306 |
+
2026-01-25 01:02:39 | INFO | Step 1400: loss=0.3501 | IF_loss=0.4404, MQ_loss=0.2599 | acc=0.854 (IF=0.812, MQ=0.896) | lr=0.000002
|
| 307 |
+
2026-01-25 01:04:28 | INFO |
|
| 308 |
+
============================================================
|
| 309 |
+
Validation Results (took 7.15s):
|
| 310 |
+
Samples: 346 instruction, 346 quality
|
| 311 |
+
Instruction Acc: 0.6965
|
| 312 |
+
Quality Acc: 0.7514
|
| 313 |
+
Average Acc: 0.7240
|
| 314 |
+
Total Loss: 0.6387
|
| 315 |
+
Instruction Loss: 0.6798
|
| 316 |
+
Quality Loss: 0.5976
|
| 317 |
+
============================================================
|
| 318 |
+
2026-01-25 01:04:28 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1499.pt (filtered to 38.584M trainable parameters)
|
| 319 |
+
2026-01-25 01:04:29 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1499.pt (428.0MB)
|
| 320 |
+
2026-01-25 01:04:29 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1499.pt
|
| 321 |
+
2026-01-25 01:04:29 | INFO | Best 3 checkpoints:
|
| 322 |
+
2026-01-25 01:04:29 | INFO | 1. Step 199: acc=0.7327 (reward_model.best_199.pt)
|
| 323 |
+
2026-01-25 01:04:29 | INFO | 2. Step 299: acc=0.7312 (reward_model.best_299.pt)
|
| 324 |
+
2026-01-25 01:04:29 | INFO | 3. Step 399: acc=0.7283 (reward_model.best_399.pt)
|
| 325 |
+
2026-01-25 01:04:30 | INFO | Step 1500: loss=0.2991 | IF_loss=0.3190, MQ_loss=0.2793 | acc=0.833 (IF=0.833, MQ=0.833) | lr=0.000001
|
| 326 |
+
2026-01-25 01:06:20 | INFO |
|
| 327 |
+
============================================================
|
| 328 |
+
Validation Results (took 7.69s):
|
| 329 |
+
Samples: 346 instruction, 346 quality
|
| 330 |
+
Instruction Acc: 0.6994
|
| 331 |
+
Quality Acc: 0.7514
|
| 332 |
+
Average Acc: 0.7254
|
| 333 |
+
Total Loss: 0.6398
|
| 334 |
+
Instruction Loss: 0.6813
|
| 335 |
+
Quality Loss: 0.5983
|
| 336 |
+
============================================================
|
| 337 |
+
2026-01-25 01:06:21 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1599.pt (filtered to 38.584M trainable parameters)
|
| 338 |
+
2026-01-25 01:06:21 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1599.pt (428.0MB)
|
| 339 |
+
2026-01-25 01:06:21 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1599.pt
|
| 340 |
+
2026-01-25 01:06:21 | INFO | Best 3 checkpoints:
|
| 341 |
+
2026-01-25 01:06:21 | INFO | 1. Step 199: acc=0.7327 (reward_model.best_199.pt)
|
| 342 |
+
2026-01-25 01:06:21 | INFO | 2. Step 299: acc=0.7312 (reward_model.best_299.pt)
|
| 343 |
+
2026-01-25 01:06:21 | INFO | 3. Step 399: acc=0.7283 (reward_model.best_399.pt)
|
| 344 |
+
2026-01-25 01:06:25 | INFO | Step 1600: loss=0.2735 | IF_loss=0.3038, MQ_loss=0.2432 | acc=0.906 (IF=0.875, MQ=0.938) | lr=0.000001
|
| 345 |
+
2026-01-25 01:08:16 | INFO |
|
| 346 |
+
============================================================
|
| 347 |
+
Validation Results (took 7.81s):
|
| 348 |
+
Samples: 346 instruction, 346 quality
|
| 349 |
+
Instruction Acc: 0.6994
|
| 350 |
+
Quality Acc: 0.7514
|
| 351 |
+
Average Acc: 0.7254
|
| 352 |
+
Total Loss: 0.6407
|
| 353 |
+
Instruction Loss: 0.6825
|
| 354 |
+
Quality Loss: 0.5989
|
| 355 |
+
============================================================
|
| 356 |
+
2026-01-25 01:08:16 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1699.pt (filtered to 38.584M trainable parameters)
|
| 357 |
+
2026-01-25 01:08:16 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1699.pt (428.0MB)
|
| 358 |
+
2026-01-25 01:08:16 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1699.pt
|
| 359 |
+
2026-01-25 01:08:16 | INFO | Best 3 checkpoints:
|
| 360 |
+
2026-01-25 01:08:16 | INFO | 1. Step 199: acc=0.7327 (reward_model.best_199.pt)
|
| 361 |
+
2026-01-25 01:08:16 | INFO | 2. Step 299: acc=0.7312 (reward_model.best_299.pt)
|
| 362 |
+
2026-01-25 01:08:16 | INFO | 3. Step 399: acc=0.7283 (reward_model.best_399.pt)
|
| 363 |
+
2026-01-25 01:08:17 | INFO | Step 1700: loss=0.3877 | IF_loss=0.2611, MQ_loss=0.5142 | acc=0.771 (IF=0.896, MQ=0.646) | lr=0.000001
|
| 364 |
+
2026-01-25 01:10:15 | INFO |
|
| 365 |
+
============================================================
|
| 366 |
+
Validation Results (took 7.66s):
|
| 367 |
+
Samples: 346 instruction, 346 quality
|
| 368 |
+
Instruction Acc: 0.6994
|
| 369 |
+
Quality Acc: 0.7514
|
| 370 |
+
Average Acc: 0.7254
|
| 371 |
+
Total Loss: 0.6421
|
| 372 |
+
Instruction Loss: 0.6844
|
| 373 |
+
Quality Loss: 0.5999
|
| 374 |
+
============================================================
|
| 375 |
+
2026-01-25 01:10:15 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1799.pt (filtered to 38.584M trainable parameters)
|
| 376 |
+
2026-01-25 01:10:15 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1799.pt (428.0MB)
|
| 377 |
+
2026-01-25 01:10:15 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1799.pt
|
| 378 |
+
2026-01-25 01:10:15 | INFO | Best 3 checkpoints:
|
| 379 |
+
2026-01-25 01:10:15 | INFO | 1. Step 199: acc=0.7327 (reward_model.best_199.pt)
|
| 380 |
+
2026-01-25 01:10:15 | INFO | 2. Step 299: acc=0.7312 (reward_model.best_299.pt)
|
| 381 |
+
2026-01-25 01:10:15 | INFO | 3. Step 399: acc=0.7283 (reward_model.best_399.pt)
|
| 382 |
+
2026-01-25 01:10:16 | INFO | Step 1800: loss=0.2445 | IF_loss=0.2773, MQ_loss=0.2117 | acc=0.896 (IF=0.854, MQ=0.938) | lr=0.000000
|
| 383 |
+
2026-01-25 01:12:06 | INFO |
|
| 384 |
+
============================================================
|
| 385 |
+
Validation Results (took 7.71s):
|
| 386 |
+
Samples: 346 instruction, 346 quality
|
| 387 |
+
Instruction Acc: 0.6994
|
| 388 |
+
Quality Acc: 0.7514
|
| 389 |
+
Average Acc: 0.7254
|
| 390 |
+
Total Loss: 0.6428
|
| 391 |
+
Instruction Loss: 0.6848
|
| 392 |
+
Quality Loss: 0.6007
|
| 393 |
+
============================================================
|
| 394 |
+
2026-01-25 01:12:06 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1899.pt (filtered to 38.584M trainable parameters)
|
| 395 |
+
2026-01-25 01:12:06 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1899.pt (428.0MB)
|
| 396 |
+
2026-01-25 01:12:06 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1899.pt
|
| 397 |
+
2026-01-25 01:12:06 | INFO | Best 3 checkpoints:
|
| 398 |
+
2026-01-25 01:12:06 | INFO | 1. Step 199: acc=0.7327 (reward_model.best_199.pt)
|
| 399 |
+
2026-01-25 01:12:06 | INFO | 2. Step 299: acc=0.7312 (reward_model.best_299.pt)
|
| 400 |
+
2026-01-25 01:12:06 | INFO | 3. Step 399: acc=0.7283 (reward_model.best_399.pt)
|
| 401 |
+
2026-01-25 01:12:07 | INFO | Step 1900: loss=0.2576 | IF_loss=0.2896, MQ_loss=0.2257 | acc=0.833 (IF=0.771, MQ=0.896) | lr=0.000000
|
| 402 |
+
2026-01-25 01:14:00 | INFO |
|
| 403 |
+
============================================================
|
| 404 |
+
Validation Results (took 7.55s):
|
| 405 |
+
Samples: 346 instruction, 346 quality
|
| 406 |
+
Instruction Acc: 0.6994
|
| 407 |
+
Quality Acc: 0.7514
|
| 408 |
+
Average Acc: 0.7254
|
| 409 |
+
Total Loss: 0.6428
|
| 410 |
+
Instruction Loss: 0.6847
|
| 411 |
+
Quality Loss: 0.6010
|
| 412 |
+
============================================================
|
| 413 |
+
2026-01-25 01:14:00 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1999.pt (filtered to 38.584M trainable parameters)
|
| 414 |
+
2026-01-25 01:14:00 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1999.pt (428.0MB)
|
| 415 |
+
2026-01-25 01:14:00 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0035/ckpt/reward_model.best_1999.pt
|
| 416 |
+
2026-01-25 01:14:00 | INFO | Best 3 checkpoints:
|
| 417 |
+
2026-01-25 01:14:00 | INFO | 1. Step 199: acc=0.7327 (reward_model.best_199.pt)
|
| 418 |
+
2026-01-25 01:14:00 | INFO | 2. Step 299: acc=0.7312 (reward_model.best_299.pt)
|
| 419 |
+
2026-01-25 01:14:00 | INFO | 3. Step 399: acc=0.7283 (reward_model.best_399.pt)
|
| 420 |
+
2026-01-25 01:14:00 | INFO | Training complete!
|
| 421 |
+
2026-01-25 01:14:00 | INFO | Training complete!
|
20260125_0037/config.yaml
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DEVICES: '1'
|
| 2 |
+
accelerate:
|
| 3 |
+
mixed_precision: bf16
|
| 4 |
+
basics:
|
| 5 |
+
random_seed: 42
|
| 6 |
+
save_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model
|
| 7 |
+
dataset:
|
| 8 |
+
audio_dropout:
|
| 9 |
+
apply_to_eval: true
|
| 10 |
+
apply_to_ref: true
|
| 11 |
+
enabled: true
|
| 12 |
+
eval_only_on_training: true
|
| 13 |
+
max_duration: 1500
|
| 14 |
+
min_duration: 200
|
| 15 |
+
train_mode: start
|
| 16 |
+
cache_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/tmp
|
| 17 |
+
db_path: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/backend/database.db
|
| 18 |
+
duration: 600.0
|
| 19 |
+
embedding_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/supervised_embeddings
|
| 20 |
+
max_samples: null
|
| 21 |
+
max_val_samples: null
|
| 22 |
+
metadata_jsonl: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/all_comparisons.jsonl
|
| 23 |
+
mode: raw_text_frozen_audio
|
| 24 |
+
preference_file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/human_annotations/train.json
|
| 25 |
+
sample_rate: 24000
|
| 26 |
+
val_preference_file: null
|
| 27 |
+
loss:
|
| 28 |
+
IF_ratio: 0.5
|
| 29 |
+
filter_ties: true
|
| 30 |
+
label_smoothing: 0.0
|
| 31 |
+
reduction: mean
|
| 32 |
+
model:
|
| 33 |
+
attention_mode: SA
|
| 34 |
+
attn_dropout: 0.0
|
| 35 |
+
category_embeddings: null
|
| 36 |
+
dim: 768
|
| 37 |
+
dim_head: 64
|
| 38 |
+
downsample:
|
| 39 |
+
configs:
|
| 40 |
+
conv2_4x:
|
| 41 |
+
factor: 4
|
| 42 |
+
kernel_size: 5
|
| 43 |
+
kind: conv*2
|
| 44 |
+
use_layernorm: true
|
| 45 |
+
conv_4x:
|
| 46 |
+
factor: 4
|
| 47 |
+
kernel_size: 5
|
| 48 |
+
kind: conv
|
| 49 |
+
stage: 1
|
| 50 |
+
use_layernorm: true
|
| 51 |
+
glu_4x:
|
| 52 |
+
factor: 4
|
| 53 |
+
kernel_size: 5
|
| 54 |
+
kind: gluconv*2+pw
|
| 55 |
+
use_layernorm: true
|
| 56 |
+
mean:
|
| 57 |
+
factor: 2
|
| 58 |
+
kind: mean
|
| 59 |
+
mean_4x:
|
| 60 |
+
dropout: 0.0
|
| 61 |
+
factor: 30
|
| 62 |
+
kind: mean+mlp
|
| 63 |
+
mlp_ratio: 2.0
|
| 64 |
+
none:
|
| 65 |
+
factor: 1
|
| 66 |
+
kind: none
|
| 67 |
+
eval: mean_4x
|
| 68 |
+
ref: null
|
| 69 |
+
text: none
|
| 70 |
+
ff_dropout: 0.0
|
| 71 |
+
ff_mult: 4
|
| 72 |
+
freeze_audio: true
|
| 73 |
+
freeze_text: true
|
| 74 |
+
gradient_checkpointing: false
|
| 75 |
+
heads: 8
|
| 76 |
+
joint_tf_depth: 1
|
| 77 |
+
load_config:
|
| 78 |
+
checkpoint_path: null
|
| 79 |
+
frozen_from_pretrained: true
|
| 80 |
+
pretrained_name: OpenMuQ/MuQ-MuLan-large
|
| 81 |
+
strict: false
|
| 82 |
+
mlp_dim: 768
|
| 83 |
+
mode: concat_text_late
|
| 84 |
+
model_name: OpenMuQ/MuQ-MuLan-large
|
| 85 |
+
name: reward
|
| 86 |
+
no_condition: false
|
| 87 |
+
null_embedding:
|
| 88 |
+
audio:
|
| 89 |
+
dropout: 0.5
|
| 90 |
+
length: 10
|
| 91 |
+
lyrics:
|
| 92 |
+
dropout: 0.3
|
| 93 |
+
length: 10
|
| 94 |
+
text:
|
| 95 |
+
dropout: 0
|
| 96 |
+
length: 10
|
| 97 |
+
output_dim: 2
|
| 98 |
+
prompt_tf_depth: 4
|
| 99 |
+
sr: 24000
|
| 100 |
+
text_encoder:
|
| 101 |
+
name: muq_mulan
|
| 102 |
+
tune: null
|
| 103 |
+
text_lora_config: null
|
| 104 |
+
train_muq_depth: 0
|
| 105 |
+
train_muqmulan: false
|
| 106 |
+
use_audio: true
|
| 107 |
+
use_layer_idx: -1
|
| 108 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 109 |
+
run_name: null
|
| 110 |
+
train:
|
| 111 |
+
batch_size: 48
|
| 112 |
+
betas:
|
| 113 |
+
- 0.9
|
| 114 |
+
- 0.99
|
| 115 |
+
ema_decay: 0.9999
|
| 116 |
+
ema_update_every: 1
|
| 117 |
+
enable_gradient_checkpointing: true
|
| 118 |
+
force_clear_prev_results: false
|
| 119 |
+
grad_accum_every: 1
|
| 120 |
+
log_tensorboard: true
|
| 121 |
+
lr_schedule:
|
| 122 |
+
min_lr_ratio: 0.001
|
| 123 |
+
name: linear_cosine
|
| 124 |
+
total_steps: 2000
|
| 125 |
+
warmup_steps: 10
|
| 126 |
+
max_grad_norm: 1
|
| 127 |
+
mlp_lr: 1.0e-05
|
| 128 |
+
num_train_steps: 2000
|
| 129 |
+
num_valid_batches: null
|
| 130 |
+
num_workers: 8
|
| 131 |
+
other_lr: 1.0e-05
|
| 132 |
+
resume: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_0147/ckpt/reward_model.0.pt
|
| 133 |
+
resume_optimizer: false
|
| 134 |
+
save_model_every: 2000
|
| 135 |
+
use_checkpoint_config: true
|
| 136 |
+
use_ema: false
|
| 137 |
+
use_lion: false
|
| 138 |
+
valid_batch_size: 20
|
| 139 |
+
valid_every: 100
|
| 140 |
+
valid_frac: 0.1
|
| 141 |
+
verify_weights_on_load: true
|
| 142 |
+
validate_only: false
|
20260125_0037/eval_results_0125_1713.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
20260125_0037/reward_model/1769272678.832529/events.out.tfevents.1769272678.MACLAB-S004.3414271.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b803b753ca1da01eb8873ce114173c51f03c97c09a2bf8250935c19916c7993
|
| 3 |
+
size 503
|
20260125_0037/reward_model/1769272678.8337765/hparams.yml
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
batch_size: 48
|
| 2 |
+
grad_accum_every: 1
|
| 3 |
+
learning_rate: 1.0e-05
|
| 4 |
+
num_train_steps: 2000
|
20260125_0037/reward_model/events.out.tfevents.1769272678.MACLAB-S004.3414271.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da51cc565d6934666cc50ed6ca2621a2ab35e5035a94a17869931f3b323adb3e
|
| 3 |
+
size 873949
|
20260125_0037/train.20260125_0037.log
ADDED
|
@@ -0,0 +1,421 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-25 00:37:47 | INFO | Log file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/train.20260125_0037.log
|
| 2 |
+
2026-01-25 00:37:47 | INFO | Random seed set to 42
|
| 3 |
+
2026-01-25 00:37:49 | INFO | Created RawTextFrozenAudioDataset with 3463 samples
|
| 4 |
+
2026-01-25 00:37:49 | INFO | Split dataset into train (3117) and validation (346) sets (ratio: 10.00%)
|
| 5 |
+
2026-01-25 00:37:49 | INFO | Will resume from checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_0147/ckpt/reward_model.0.pt
|
| 6 |
+
2026-01-25 00:37:49 | INFO | Using checkpoint config for model initialization (continue training mode)
|
| 7 |
+
2026-01-25 00:37:55 | INFO | Created RewardAttentionModel with attention_mode=SA
|
| 8 |
+
2026-01-25 00:37:55 | INFO | Created PreferenceLoss with filter_ties=True
|
| 9 |
+
2026-01-25 00:37:56 | INFO | ✓ Gradient checkpointing enabled
|
| 10 |
+
2026-01-25 00:37:56 | INFO | ✓ Audio cropping enabled: min=200, max=1500
|
| 11 |
+
2026-01-25 00:37:56 | INFO | Apply to eval: True, ref: True
|
| 12 |
+
2026-01-25 00:37:56 | INFO | Modes: train=random, val=start
|
| 13 |
+
2026-01-25 00:37:56 | INFO | MLP head parameters: 1,186,563 params, lr=1e-05
|
| 14 |
+
2026-01-25 00:37:56 | INFO | Other parameters: 37,397,634 params, lr=1e-05
|
| 15 |
+
2026-01-25 00:37:56 | INFO | Using lr_schedule=linear_cosine warmup_steps=10 total_steps=2000
|
| 16 |
+
2026-01-25 00:37:56 | INFO | Training with fixed validation set
|
| 17 |
+
2026-01-25 00:37:56 | INFO | Train batch_size: 48, Valid batch_size: 20
|
| 18 |
+
2026-01-25 00:37:58 | INFO | Missing keys (782): ['text_module.model.embeddings.word_embeddings.weight', 'text_module.model.embeddings.position_embeddings.weight', 'text_module.model.embeddings.token_type_embeddings.weight', 'text_module.model.embeddings.LayerNorm.weight', 'text_module.model.embeddings.LayerNorm.bias']...
|
| 19 |
+
2026-01-25 00:37:58 | INFO | ✓ Starting from step 0 (transfer learning mode, ignoring checkpoint steps=0)
|
| 20 |
+
2026-01-25 00:37:58 | INFO | Resumed from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_0147/ckpt/reward_model.0.pt
|
| 21 |
+
2026-01-25 00:37:58 | INFO | Parameters: 701.162M total, 38.584M trainable
|
| 22 |
+
2026-01-25 00:37:58 | INFO | Text encoder (frozen): 328.389M
|
| 23 |
+
2026-01-25 00:37:58 | INFO | Audio encoder (frozen): 334.189M
|
| 24 |
+
2026-01-25 00:37:58 | INFO | Other trainable: 38.584M
|
| 25 |
+
2026-01-25 00:37:58 | INFO | ℹ No LoRA configuration detected
|
| 26 |
+
2026-01-25 00:37:58 | INFO | ============================================================
|
| 27 |
+
2026-01-25 00:37:58 | INFO | Ready to start training
|
| 28 |
+
2026-01-25 00:37:58 | INFO | ============================================================
|
| 29 |
+
2026-01-25 00:37:58 | INFO | Starting training from step 0
|
| 30 |
+
2026-01-25 00:37:58 | INFO | ===== Accelerator / CUDA Debug Info =====
|
| 31 |
+
2026-01-25 00:37:58 | INFO | accelerator.device = cuda
|
| 32 |
+
2026-01-25 00:37:58 | INFO | mixed_precision = bf16
|
| 33 |
+
2026-01-25 00:37:58 | INFO | distributed_type = NO
|
| 34 |
+
2026-01-25 00:37:58 | INFO | num_processes = 1
|
| 35 |
+
2026-01-25 00:37:58 | INFO | process_index = 0
|
| 36 |
+
2026-01-25 00:37:58 | INFO | is_main_process = True
|
| 37 |
+
2026-01-25 00:37:58 | INFO | torch.cuda.is_available() = True
|
| 38 |
+
2026-01-25 00:37:58 | INFO | torch.cuda.device_count() = 1
|
| 39 |
+
2026-01-25 00:37:58 | INFO | current_device = 0
|
| 40 |
+
2026-01-25 00:37:58 | INFO | device_name = NVIDIA GeForce RTX 4090
|
| 41 |
+
2026-01-25 00:37:58 | INFO | model parameter device = cuda:0
|
| 42 |
+
2026-01-25 00:37:58 | INFO | Training for 2000.0 steps (~32 epochs, 64 steps/epoch)
|
| 43 |
+
2026-01-25 00:38:08 | INFO | Step 0: loss=0.6973 | IF_loss=0.6935, MQ_loss=0.7010 | acc=0.510 (IF=0.521, MQ=0.500) | lr=0.000002
|
| 44 |
+
2026-01-25 00:38:09 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.0.pt (filtered to 38.584M trainable parameters)
|
| 45 |
+
2026-01-25 00:38:09 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.0.pt (428.0MB)
|
| 46 |
+
2026-01-25 00:38:09 | INFO | Step 0: Saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.0.pt
|
| 47 |
+
2026-01-25 00:40:04 | INFO |
|
| 48 |
+
============================================================
|
| 49 |
+
Validation Results (took 11.05s):
|
| 50 |
+
Samples: 346 instruction, 346 quality
|
| 51 |
+
Instruction Acc: 0.6416
|
| 52 |
+
Quality Acc: 0.7312
|
| 53 |
+
Average Acc: 0.6864
|
| 54 |
+
Total Loss: 0.5721
|
| 55 |
+
Instruction Loss: 0.6193
|
| 56 |
+
Quality Loss: 0.5249
|
| 57 |
+
============================================================
|
| 58 |
+
2026-01-25 00:40:04 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_99.pt (filtered to 38.584M trainable parameters)
|
| 59 |
+
2026-01-25 00:40:04 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_99.pt (428.0MB)
|
| 60 |
+
2026-01-25 00:40:04 | INFO | Best 1 checkpoints:
|
| 61 |
+
2026-01-25 00:40:04 | INFO | 1. Step 99: acc=0.6864 (reward_model.best_99.pt)
|
| 62 |
+
2026-01-25 00:40:05 | INFO | Step 100: loss=0.5208 | IF_loss=0.5649, MQ_loss=0.4766 | acc=0.740 (IF=0.708, MQ=0.771) | lr=0.000010
|
| 63 |
+
2026-01-25 00:42:11 | INFO |
|
| 64 |
+
============================================================
|
| 65 |
+
Validation Results (took 11.25s):
|
| 66 |
+
Samples: 346 instruction, 346 quality
|
| 67 |
+
Instruction Acc: 0.6705
|
| 68 |
+
Quality Acc: 0.7225
|
| 69 |
+
Average Acc: 0.6965
|
| 70 |
+
Total Loss: 0.5544
|
| 71 |
+
Instruction Loss: 0.5969
|
| 72 |
+
Quality Loss: 0.5120
|
| 73 |
+
============================================================
|
| 74 |
+
2026-01-25 00:42:11 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_199.pt (filtered to 38.584M trainable parameters)
|
| 75 |
+
2026-01-25 00:42:12 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_199.pt (428.0MB)
|
| 76 |
+
2026-01-25 00:42:12 | INFO | Best 2 checkpoints:
|
| 77 |
+
2026-01-25 00:42:12 | INFO | 1. Step 199: acc=0.6965 (reward_model.best_199.pt)
|
| 78 |
+
2026-01-25 00:42:12 | INFO | 2. Step 99: acc=0.6864 (reward_model.best_99.pt)
|
| 79 |
+
2026-01-25 00:42:13 | INFO | Step 200: loss=0.3984 | IF_loss=0.4045, MQ_loss=0.3923 | acc=0.823 (IF=0.812, MQ=0.833) | lr=0.000010
|
| 80 |
+
2026-01-25 00:44:08 | INFO |
|
| 81 |
+
============================================================
|
| 82 |
+
Validation Results (took 8.90s):
|
| 83 |
+
Samples: 346 instruction, 346 quality
|
| 84 |
+
Instruction Acc: 0.6792
|
| 85 |
+
Quality Acc: 0.7370
|
| 86 |
+
Average Acc: 0.7081
|
| 87 |
+
Total Loss: 0.5605
|
| 88 |
+
Instruction Loss: 0.6104
|
| 89 |
+
Quality Loss: 0.5105
|
| 90 |
+
============================================================
|
| 91 |
+
2026-01-25 00:44:09 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_299.pt (filtered to 38.584M trainable parameters)
|
| 92 |
+
2026-01-25 00:44:09 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_299.pt (428.0MB)
|
| 93 |
+
2026-01-25 00:44:09 | INFO | Best 3 checkpoints:
|
| 94 |
+
2026-01-25 00:44:09 | INFO | 1. Step 299: acc=0.7081 (reward_model.best_299.pt)
|
| 95 |
+
2026-01-25 00:44:09 | INFO | 2. Step 199: acc=0.6965 (reward_model.best_199.pt)
|
| 96 |
+
2026-01-25 00:44:09 | INFO | 3. Step 99: acc=0.6864 (reward_model.best_99.pt)
|
| 97 |
+
2026-01-25 00:44:11 | INFO | Step 300: loss=0.3611 | IF_loss=0.4409, MQ_loss=0.2813 | acc=0.812 (IF=0.792, MQ=0.833) | lr=0.000009
|
| 98 |
+
2026-01-25 00:46:11 | INFO |
|
| 99 |
+
============================================================
|
| 100 |
+
Validation Results (took 8.37s):
|
| 101 |
+
Samples: 346 instruction, 346 quality
|
| 102 |
+
Instruction Acc: 0.6908
|
| 103 |
+
Quality Acc: 0.7168
|
| 104 |
+
Average Acc: 0.7038
|
| 105 |
+
Total Loss: 0.5862
|
| 106 |
+
Instruction Loss: 0.6208
|
| 107 |
+
Quality Loss: 0.5516
|
| 108 |
+
============================================================
|
| 109 |
+
2026-01-25 00:46:11 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_399.pt (filtered to 38.584M trainable parameters)
|
| 110 |
+
2026-01-25 00:46:12 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_399.pt (428.0MB)
|
| 111 |
+
2026-01-25 00:46:12 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_99.pt
|
| 112 |
+
2026-01-25 00:46:12 | INFO | Best 3 checkpoints:
|
| 113 |
+
2026-01-25 00:46:12 | INFO | 1. Step 299: acc=0.7081 (reward_model.best_299.pt)
|
| 114 |
+
2026-01-25 00:46:12 | INFO | 2. Step 399: acc=0.7038 (reward_model.best_399.pt)
|
| 115 |
+
2026-01-25 00:46:12 | INFO | 3. Step 199: acc=0.6965 (reward_model.best_199.pt)
|
| 116 |
+
2026-01-25 00:46:13 | INFO | Step 400: loss=0.3193 | IF_loss=0.3378, MQ_loss=0.3007 | acc=0.865 (IF=0.833, MQ=0.896) | lr=0.000009
|
| 117 |
+
2026-01-25 00:48:10 | INFO |
|
| 118 |
+
============================================================
|
| 119 |
+
Validation Results (took 8.01s):
|
| 120 |
+
Samples: 346 instruction, 346 quality
|
| 121 |
+
Instruction Acc: 0.6763
|
| 122 |
+
Quality Acc: 0.7312
|
| 123 |
+
Average Acc: 0.7038
|
| 124 |
+
Total Loss: 0.5854
|
| 125 |
+
Instruction Loss: 0.6252
|
| 126 |
+
Quality Loss: 0.5457
|
| 127 |
+
============================================================
|
| 128 |
+
2026-01-25 00:48:11 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_499.pt (filtered to 38.584M trainable parameters)
|
| 129 |
+
2026-01-25 00:48:11 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_499.pt (428.0MB)
|
| 130 |
+
2026-01-25 00:48:11 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_199.pt
|
| 131 |
+
2026-01-25 00:48:11 | INFO | Best 3 checkpoints:
|
| 132 |
+
2026-01-25 00:48:11 | INFO | 1. Step 299: acc=0.7081 (reward_model.best_299.pt)
|
| 133 |
+
2026-01-25 00:48:11 | INFO | 2. Step 399: acc=0.7038 (reward_model.best_399.pt)
|
| 134 |
+
2026-01-25 00:48:11 | INFO | 3. Step 499: acc=0.7038 (reward_model.best_499.pt)
|
| 135 |
+
2026-01-25 00:48:12 | INFO | Step 500: loss=0.3185 | IF_loss=0.3553, MQ_loss=0.2816 | acc=0.844 (IF=0.875, MQ=0.812) | lr=0.000009
|
| 136 |
+
2026-01-25 00:50:10 | INFO |
|
| 137 |
+
============================================================
|
| 138 |
+
Validation Results (took 7.80s):
|
| 139 |
+
Samples: 346 instruction, 346 quality
|
| 140 |
+
Instruction Acc: 0.6792
|
| 141 |
+
Quality Acc: 0.7486
|
| 142 |
+
Average Acc: 0.7139
|
| 143 |
+
Total Loss: 0.5868
|
| 144 |
+
Instruction Loss: 0.6327
|
| 145 |
+
Quality Loss: 0.5409
|
| 146 |
+
============================================================
|
| 147 |
+
2026-01-25 00:50:10 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_599.pt (filtered to 38.584M trainable parameters)
|
| 148 |
+
2026-01-25 00:50:11 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_599.pt (428.0MB)
|
| 149 |
+
2026-01-25 00:50:11 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_499.pt
|
| 150 |
+
2026-01-25 00:50:11 | INFO | Best 3 checkpoints:
|
| 151 |
+
2026-01-25 00:50:11 | INFO | 1. Step 599: acc=0.7139 (reward_model.best_599.pt)
|
| 152 |
+
2026-01-25 00:50:11 | INFO | 2. Step 299: acc=0.7081 (reward_model.best_299.pt)
|
| 153 |
+
2026-01-25 00:50:11 | INFO | 3. Step 399: acc=0.7038 (reward_model.best_399.pt)
|
| 154 |
+
2026-01-25 00:50:12 | INFO | Step 600: loss=0.3412 | IF_loss=0.3309, MQ_loss=0.3515 | acc=0.844 (IF=0.875, MQ=0.812) | lr=0.000008
|
| 155 |
+
2026-01-25 00:52:10 | INFO |
|
| 156 |
+
============================================================
|
| 157 |
+
Validation Results (took 8.05s):
|
| 158 |
+
Samples: 346 instruction, 346 quality
|
| 159 |
+
Instruction Acc: 0.6850
|
| 160 |
+
Quality Acc: 0.7399
|
| 161 |
+
Average Acc: 0.7124
|
| 162 |
+
Total Loss: 0.6273
|
| 163 |
+
Instruction Loss: 0.6640
|
| 164 |
+
Quality Loss: 0.5907
|
| 165 |
+
============================================================
|
| 166 |
+
2026-01-25 00:52:10 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_699.pt (filtered to 38.584M trainable parameters)
|
| 167 |
+
2026-01-25 00:52:11 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_699.pt (428.0MB)
|
| 168 |
+
2026-01-25 00:52:11 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_399.pt
|
| 169 |
+
2026-01-25 00:52:11 | INFO | Best 3 checkpoints:
|
| 170 |
+
2026-01-25 00:52:11 | INFO | 1. Step 599: acc=0.7139 (reward_model.best_599.pt)
|
| 171 |
+
2026-01-25 00:52:11 | INFO | 2. Step 699: acc=0.7124 (reward_model.best_699.pt)
|
| 172 |
+
2026-01-25 00:52:11 | INFO | 3. Step 299: acc=0.7081 (reward_model.best_299.pt)
|
| 173 |
+
2026-01-25 00:52:12 | INFO | Step 700: loss=0.1745 | IF_loss=0.2334, MQ_loss=0.1156 | acc=0.917 (IF=0.875, MQ=0.958) | lr=0.000007
|
| 174 |
+
2026-01-25 00:54:13 | INFO |
|
| 175 |
+
============================================================
|
| 176 |
+
Validation Results (took 8.54s):
|
| 177 |
+
Samples: 346 instruction, 346 quality
|
| 178 |
+
Instruction Acc: 0.6590
|
| 179 |
+
Quality Acc: 0.7341
|
| 180 |
+
Average Acc: 0.6965
|
| 181 |
+
Total Loss: 0.6533
|
| 182 |
+
Instruction Loss: 0.6973
|
| 183 |
+
Quality Loss: 0.6092
|
| 184 |
+
============================================================
|
| 185 |
+
2026-01-25 00:54:13 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_799.pt (filtered to 38.584M trainable parameters)
|
| 186 |
+
2026-01-25 00:54:14 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_799.pt (428.0MB)
|
| 187 |
+
2026-01-25 00:54:14 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_799.pt
|
| 188 |
+
2026-01-25 00:54:14 | INFO | Best 3 checkpoints:
|
| 189 |
+
2026-01-25 00:54:14 | INFO | 1. Step 599: acc=0.7139 (reward_model.best_599.pt)
|
| 190 |
+
2026-01-25 00:54:14 | INFO | 2. Step 699: acc=0.7124 (reward_model.best_699.pt)
|
| 191 |
+
2026-01-25 00:54:14 | INFO | 3. Step 299: acc=0.7081 (reward_model.best_299.pt)
|
| 192 |
+
2026-01-25 00:54:15 | INFO | Step 800: loss=0.2953 | IF_loss=0.3655, MQ_loss=0.2252 | acc=0.875 (IF=0.833, MQ=0.917) | lr=0.000007
|
| 193 |
+
2026-01-25 00:56:15 | INFO |
|
| 194 |
+
============================================================
|
| 195 |
+
Validation Results (took 8.04s):
|
| 196 |
+
Samples: 346 instruction, 346 quality
|
| 197 |
+
Instruction Acc: 0.6590
|
| 198 |
+
Quality Acc: 0.7283
|
| 199 |
+
Average Acc: 0.6936
|
| 200 |
+
Total Loss: 0.6663
|
| 201 |
+
Instruction Loss: 0.7004
|
| 202 |
+
Quality Loss: 0.6321
|
| 203 |
+
============================================================
|
| 204 |
+
2026-01-25 00:56:15 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_899.pt (filtered to 38.584M trainable parameters)
|
| 205 |
+
2026-01-25 00:56:16 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_899.pt (428.0MB)
|
| 206 |
+
2026-01-25 00:56:16 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_899.pt
|
| 207 |
+
2026-01-25 00:56:16 | INFO | Best 3 checkpoints:
|
| 208 |
+
2026-01-25 00:56:16 | INFO | 1. Step 599: acc=0.7139 (reward_model.best_599.pt)
|
| 209 |
+
2026-01-25 00:56:16 | INFO | 2. Step 699: acc=0.7124 (reward_model.best_699.pt)
|
| 210 |
+
2026-01-25 00:56:16 | INFO | 3. Step 299: acc=0.7081 (reward_model.best_299.pt)
|
| 211 |
+
2026-01-25 00:56:17 | INFO | Step 900: loss=0.1683 | IF_loss=0.1746, MQ_loss=0.1621 | acc=0.938 (IF=0.958, MQ=0.917) | lr=0.000006
|
| 212 |
+
2026-01-25 00:58:18 | INFO |
|
| 213 |
+
============================================================
|
| 214 |
+
Validation Results (took 9.16s):
|
| 215 |
+
Samples: 346 instruction, 346 quality
|
| 216 |
+
Instruction Acc: 0.6561
|
| 217 |
+
Quality Acc: 0.7428
|
| 218 |
+
Average Acc: 0.6994
|
| 219 |
+
Total Loss: 0.6976
|
| 220 |
+
Instruction Loss: 0.7340
|
| 221 |
+
Quality Loss: 0.6612
|
| 222 |
+
============================================================
|
| 223 |
+
2026-01-25 00:58:19 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_999.pt (filtered to 38.584M trainable parameters)
|
| 224 |
+
2026-01-25 00:58:19 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_999.pt (428.0MB)
|
| 225 |
+
2026-01-25 00:58:19 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_999.pt
|
| 226 |
+
2026-01-25 00:58:19 | INFO | Best 3 checkpoints:
|
| 227 |
+
2026-01-25 00:58:19 | INFO | 1. Step 599: acc=0.7139 (reward_model.best_599.pt)
|
| 228 |
+
2026-01-25 00:58:19 | INFO | 2. Step 699: acc=0.7124 (reward_model.best_699.pt)
|
| 229 |
+
2026-01-25 00:58:19 | INFO | 3. Step 299: acc=0.7081 (reward_model.best_299.pt)
|
| 230 |
+
2026-01-25 00:58:20 | INFO | Step 1000: loss=0.1489 | IF_loss=0.1420, MQ_loss=0.1559 | acc=0.948 (IF=0.938, MQ=0.958) | lr=0.000005
|
| 231 |
+
2026-01-25 01:00:15 | INFO |
|
| 232 |
+
============================================================
|
| 233 |
+
Validation Results (took 7.24s):
|
| 234 |
+
Samples: 346 instruction, 346 quality
|
| 235 |
+
Instruction Acc: 0.6590
|
| 236 |
+
Quality Acc: 0.7312
|
| 237 |
+
Average Acc: 0.6951
|
| 238 |
+
Total Loss: 0.7224
|
| 239 |
+
Instruction Loss: 0.7648
|
| 240 |
+
Quality Loss: 0.6801
|
| 241 |
+
============================================================
|
| 242 |
+
2026-01-25 01:00:15 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1099.pt (filtered to 38.584M trainable parameters)
|
| 243 |
+
2026-01-25 01:00:15 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1099.pt (428.0MB)
|
| 244 |
+
2026-01-25 01:00:16 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1099.pt
|
| 245 |
+
2026-01-25 01:00:16 | INFO | Best 3 checkpoints:
|
| 246 |
+
2026-01-25 01:00:16 | INFO | 1. Step 599: acc=0.7139 (reward_model.best_599.pt)
|
| 247 |
+
2026-01-25 01:00:16 | INFO | 2. Step 699: acc=0.7124 (reward_model.best_699.pt)
|
| 248 |
+
2026-01-25 01:00:16 | INFO | 3. Step 299: acc=0.7081 (reward_model.best_299.pt)
|
| 249 |
+
2026-01-25 01:00:16 | INFO | Step 1100: loss=0.1252 | IF_loss=0.1115, MQ_loss=0.1390 | acc=0.958 (IF=0.979, MQ=0.938) | lr=0.000004
|
| 250 |
+
2026-01-25 01:02:06 | INFO |
|
| 251 |
+
============================================================
|
| 252 |
+
Validation Results (took 7.27s):
|
| 253 |
+
Samples: 346 instruction, 346 quality
|
| 254 |
+
Instruction Acc: 0.6503
|
| 255 |
+
Quality Acc: 0.7283
|
| 256 |
+
Average Acc: 0.6893
|
| 257 |
+
Total Loss: 0.7565
|
| 258 |
+
Instruction Loss: 0.8083
|
| 259 |
+
Quality Loss: 0.7047
|
| 260 |
+
============================================================
|
| 261 |
+
2026-01-25 01:02:06 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1199.pt (filtered to 38.584M trainable parameters)
|
| 262 |
+
2026-01-25 01:02:06 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1199.pt (428.0MB)
|
| 263 |
+
2026-01-25 01:02:06 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1199.pt
|
| 264 |
+
2026-01-25 01:02:06 | INFO | Best 3 checkpoints:
|
| 265 |
+
2026-01-25 01:02:06 | INFO | 1. Step 599: acc=0.7139 (reward_model.best_599.pt)
|
| 266 |
+
2026-01-25 01:02:06 | INFO | 2. Step 699: acc=0.7124 (reward_model.best_699.pt)
|
| 267 |
+
2026-01-25 01:02:06 | INFO | 3. Step 299: acc=0.7081 (reward_model.best_299.pt)
|
| 268 |
+
2026-01-25 01:02:07 | INFO | Step 1200: loss=0.1319 | IF_loss=0.1250, MQ_loss=0.1388 | acc=0.896 (IF=0.875, MQ=0.917) | lr=0.000003
|
| 269 |
+
2026-01-25 01:04:00 | INFO |
|
| 270 |
+
============================================================
|
| 271 |
+
Validation Results (took 7.46s):
|
| 272 |
+
Samples: 346 instruction, 346 quality
|
| 273 |
+
Instruction Acc: 0.6647
|
| 274 |
+
Quality Acc: 0.7283
|
| 275 |
+
Average Acc: 0.6965
|
| 276 |
+
Total Loss: 0.7569
|
| 277 |
+
Instruction Loss: 0.8060
|
| 278 |
+
Quality Loss: 0.7079
|
| 279 |
+
============================================================
|
| 280 |
+
2026-01-25 01:04:00 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1299.pt (filtered to 38.584M trainable parameters)
|
| 281 |
+
2026-01-25 01:04:00 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1299.pt (428.0MB)
|
| 282 |
+
2026-01-25 01:04:01 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1299.pt
|
| 283 |
+
2026-01-25 01:04:01 | INFO | Best 3 checkpoints:
|
| 284 |
+
2026-01-25 01:04:01 | INFO | 1. Step 599: acc=0.7139 (reward_model.best_599.pt)
|
| 285 |
+
2026-01-25 01:04:01 | INFO | 2. Step 699: acc=0.7124 (reward_model.best_699.pt)
|
| 286 |
+
2026-01-25 01:04:01 | INFO | 3. Step 299: acc=0.7081 (reward_model.best_299.pt)
|
| 287 |
+
2026-01-25 01:04:02 | INFO | Step 1300: loss=0.1072 | IF_loss=0.1049, MQ_loss=0.1095 | acc=0.958 (IF=0.979, MQ=0.938) | lr=0.000003
|
| 288 |
+
2026-01-25 01:05:51 | INFO |
|
| 289 |
+
============================================================
|
| 290 |
+
Validation Results (took 7.30s):
|
| 291 |
+
Samples: 346 instruction, 346 quality
|
| 292 |
+
Instruction Acc: 0.6445
|
| 293 |
+
Quality Acc: 0.7254
|
| 294 |
+
Average Acc: 0.6850
|
| 295 |
+
Total Loss: 0.7646
|
| 296 |
+
Instruction Loss: 0.8179
|
| 297 |
+
Quality Loss: 0.7114
|
| 298 |
+
============================================================
|
| 299 |
+
2026-01-25 01:05:51 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1399.pt (filtered to 38.584M trainable parameters)
|
| 300 |
+
2026-01-25 01:05:52 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1399.pt (428.0MB)
|
| 301 |
+
2026-01-25 01:05:52 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1399.pt
|
| 302 |
+
2026-01-25 01:05:52 | INFO | Best 3 checkpoints:
|
| 303 |
+
2026-01-25 01:05:52 | INFO | 1. Step 599: acc=0.7139 (reward_model.best_599.pt)
|
| 304 |
+
2026-01-25 01:05:52 | INFO | 2. Step 699: acc=0.7124 (reward_model.best_699.pt)
|
| 305 |
+
2026-01-25 01:05:52 | INFO | 3. Step 299: acc=0.7081 (reward_model.best_299.pt)
|
| 306 |
+
2026-01-25 01:05:53 | INFO | Step 1400: loss=0.1399 | IF_loss=0.1608, MQ_loss=0.1191 | acc=0.969 (IF=0.958, MQ=0.979) | lr=0.000002
|
| 307 |
+
2026-01-25 01:07:49 | INFO |
|
| 308 |
+
============================================================
|
| 309 |
+
Validation Results (took 7.28s):
|
| 310 |
+
Samples: 346 instruction, 346 quality
|
| 311 |
+
Instruction Acc: 0.6474
|
| 312 |
+
Quality Acc: 0.7370
|
| 313 |
+
Average Acc: 0.6922
|
| 314 |
+
Total Loss: 0.7650
|
| 315 |
+
Instruction Loss: 0.8168
|
| 316 |
+
Quality Loss: 0.7131
|
| 317 |
+
============================================================
|
| 318 |
+
2026-01-25 01:07:49 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1499.pt (filtered to 38.584M trainable parameters)
|
| 319 |
+
2026-01-25 01:07:50 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1499.pt (428.0MB)
|
| 320 |
+
2026-01-25 01:07:50 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1499.pt
|
| 321 |
+
2026-01-25 01:07:50 | INFO | Best 3 checkpoints:
|
| 322 |
+
2026-01-25 01:07:50 | INFO | 1. Step 599: acc=0.7139 (reward_model.best_599.pt)
|
| 323 |
+
2026-01-25 01:07:50 | INFO | 2. Step 699: acc=0.7124 (reward_model.best_699.pt)
|
| 324 |
+
2026-01-25 01:07:50 | INFO | 3. Step 299: acc=0.7081 (reward_model.best_299.pt)
|
| 325 |
+
2026-01-25 01:07:51 | INFO | Step 1500: loss=0.0816 | IF_loss=0.1031, MQ_loss=0.0600 | acc=0.969 (IF=0.979, MQ=0.958) | lr=0.000001
|
| 326 |
+
2026-01-25 01:09:42 | INFO |
|
| 327 |
+
============================================================
|
| 328 |
+
Validation Results (took 7.73s):
|
| 329 |
+
Samples: 346 instruction, 346 quality
|
| 330 |
+
Instruction Acc: 0.6445
|
| 331 |
+
Quality Acc: 0.7254
|
| 332 |
+
Average Acc: 0.6850
|
| 333 |
+
Total Loss: 0.7697
|
| 334 |
+
Instruction Loss: 0.8237
|
| 335 |
+
Quality Loss: 0.7156
|
| 336 |
+
============================================================
|
| 337 |
+
2026-01-25 01:09:42 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1599.pt (filtered to 38.584M trainable parameters)
|
| 338 |
+
2026-01-25 01:09:42 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1599.pt (428.0MB)
|
| 339 |
+
2026-01-25 01:09:42 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1599.pt
|
| 340 |
+
2026-01-25 01:09:42 | INFO | Best 3 checkpoints:
|
| 341 |
+
2026-01-25 01:09:42 | INFO | 1. Step 599: acc=0.7139 (reward_model.best_599.pt)
|
| 342 |
+
2026-01-25 01:09:42 | INFO | 2. Step 699: acc=0.7124 (reward_model.best_699.pt)
|
| 343 |
+
2026-01-25 01:09:42 | INFO | 3. Step 299: acc=0.7081 (reward_model.best_299.pt)
|
| 344 |
+
2026-01-25 01:09:47 | INFO | Step 1600: loss=0.0867 | IF_loss=0.0924, MQ_loss=0.0809 | acc=0.990 (IF=0.979, MQ=1.000) | lr=0.000001
|
| 345 |
+
2026-01-25 01:11:40 | INFO |
|
| 346 |
+
============================================================
|
| 347 |
+
Validation Results (took 8.52s):
|
| 348 |
+
Samples: 346 instruction, 346 quality
|
| 349 |
+
Instruction Acc: 0.6532
|
| 350 |
+
Quality Acc: 0.7283
|
| 351 |
+
Average Acc: 0.6908
|
| 352 |
+
Total Loss: 0.7751
|
| 353 |
+
Instruction Loss: 0.8262
|
| 354 |
+
Quality Loss: 0.7239
|
| 355 |
+
============================================================
|
| 356 |
+
2026-01-25 01:11:40 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1699.pt (filtered to 38.584M trainable parameters)
|
| 357 |
+
2026-01-25 01:11:41 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1699.pt (428.0MB)
|
| 358 |
+
2026-01-25 01:11:41 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1699.pt
|
| 359 |
+
2026-01-25 01:11:41 | INFO | Best 3 checkpoints:
|
| 360 |
+
2026-01-25 01:11:41 | INFO | 1. Step 599: acc=0.7139 (reward_model.best_599.pt)
|
| 361 |
+
2026-01-25 01:11:41 | INFO | 2. Step 699: acc=0.7124 (reward_model.best_699.pt)
|
| 362 |
+
2026-01-25 01:11:41 | INFO | 3. Step 299: acc=0.7081 (reward_model.best_299.pt)
|
| 363 |
+
2026-01-25 01:11:42 | INFO | Step 1700: loss=0.1204 | IF_loss=0.0824, MQ_loss=0.1585 | acc=0.927 (IF=0.979, MQ=0.875) | lr=0.000001
|
| 364 |
+
2026-01-25 01:13:36 | INFO |
|
| 365 |
+
============================================================
|
| 366 |
+
Validation Results (took 7.21s):
|
| 367 |
+
Samples: 346 instruction, 346 quality
|
| 368 |
+
Instruction Acc: 0.6590
|
| 369 |
+
Quality Acc: 0.7283
|
| 370 |
+
Average Acc: 0.6936
|
| 371 |
+
Total Loss: 0.7810
|
| 372 |
+
Instruction Loss: 0.8338
|
| 373 |
+
Quality Loss: 0.7282
|
| 374 |
+
============================================================
|
| 375 |
+
2026-01-25 01:13:36 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1799.pt (filtered to 38.584M trainable parameters)
|
| 376 |
+
2026-01-25 01:13:36 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1799.pt (428.0MB)
|
| 377 |
+
2026-01-25 01:13:37 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1799.pt
|
| 378 |
+
2026-01-25 01:13:37 | INFO | Best 3 checkpoints:
|
| 379 |
+
2026-01-25 01:13:37 | INFO | 1. Step 599: acc=0.7139 (reward_model.best_599.pt)
|
| 380 |
+
2026-01-25 01:13:37 | INFO | 2. Step 699: acc=0.7124 (reward_model.best_699.pt)
|
| 381 |
+
2026-01-25 01:13:37 | INFO | 3. Step 299: acc=0.7081 (reward_model.best_299.pt)
|
| 382 |
+
2026-01-25 01:13:38 | INFO | Step 1800: loss=0.0594 | IF_loss=0.0719, MQ_loss=0.0470 | acc=0.979 (IF=0.958, MQ=1.000) | lr=0.000000
|
| 383 |
+
2026-01-25 01:15:28 | INFO |
|
| 384 |
+
============================================================
|
| 385 |
+
Validation Results (took 7.61s):
|
| 386 |
+
Samples: 346 instruction, 346 quality
|
| 387 |
+
Instruction Acc: 0.6532
|
| 388 |
+
Quality Acc: 0.7283
|
| 389 |
+
Average Acc: 0.6908
|
| 390 |
+
Total Loss: 0.7827
|
| 391 |
+
Instruction Loss: 0.8356
|
| 392 |
+
Quality Loss: 0.7299
|
| 393 |
+
============================================================
|
| 394 |
+
2026-01-25 01:15:28 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1899.pt (filtered to 38.584M trainable parameters)
|
| 395 |
+
2026-01-25 01:15:28 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1899.pt (428.0MB)
|
| 396 |
+
2026-01-25 01:15:28 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1899.pt
|
| 397 |
+
2026-01-25 01:15:28 | INFO | Best 3 checkpoints:
|
| 398 |
+
2026-01-25 01:15:28 | INFO | 1. Step 599: acc=0.7139 (reward_model.best_599.pt)
|
| 399 |
+
2026-01-25 01:15:28 | INFO | 2. Step 699: acc=0.7124 (reward_model.best_699.pt)
|
| 400 |
+
2026-01-25 01:15:28 | INFO | 3. Step 299: acc=0.7081 (reward_model.best_299.pt)
|
| 401 |
+
2026-01-25 01:15:29 | INFO | Step 1900: loss=0.1343 | IF_loss=0.1457, MQ_loss=0.1229 | acc=0.906 (IF=0.896, MQ=0.917) | lr=0.000000
|
| 402 |
+
2026-01-25 01:17:19 | INFO |
|
| 403 |
+
============================================================
|
| 404 |
+
Validation Results (took 6.92s):
|
| 405 |
+
Samples: 346 instruction, 346 quality
|
| 406 |
+
Instruction Acc: 0.6532
|
| 407 |
+
Quality Acc: 0.7312
|
| 408 |
+
Average Acc: 0.6922
|
| 409 |
+
Total Loss: 0.7832
|
| 410 |
+
Instruction Loss: 0.8361
|
| 411 |
+
Quality Loss: 0.7304
|
| 412 |
+
============================================================
|
| 413 |
+
2026-01-25 01:17:19 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1999.pt (filtered to 38.584M trainable parameters)
|
| 414 |
+
2026-01-25 01:17:20 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1999.pt (428.0MB)
|
| 415 |
+
2026-01-25 01:17:20 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0037/ckpt/reward_model.best_1999.pt
|
| 416 |
+
2026-01-25 01:17:20 | INFO | Best 3 checkpoints:
|
| 417 |
+
2026-01-25 01:17:20 | INFO | 1. Step 599: acc=0.7139 (reward_model.best_599.pt)
|
| 418 |
+
2026-01-25 01:17:20 | INFO | 2. Step 699: acc=0.7124 (reward_model.best_699.pt)
|
| 419 |
+
2026-01-25 01:17:20 | INFO | 3. Step 299: acc=0.7081 (reward_model.best_299.pt)
|
| 420 |
+
2026-01-25 01:17:20 | INFO | Training complete!
|
| 421 |
+
2026-01-25 01:17:20 | INFO | Training complete!
|
20260125_0038/config.yaml
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DEVICES: '3'
|
| 2 |
+
accelerate:
|
| 3 |
+
mixed_precision: bf16
|
| 4 |
+
basics:
|
| 5 |
+
random_seed: 42
|
| 6 |
+
save_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model
|
| 7 |
+
dataset:
|
| 8 |
+
audio_dropout:
|
| 9 |
+
apply_to_eval: true
|
| 10 |
+
apply_to_ref: true
|
| 11 |
+
enabled: true
|
| 12 |
+
eval_only_on_training: true
|
| 13 |
+
max_duration: 1500
|
| 14 |
+
min_duration: 200
|
| 15 |
+
train_mode: start
|
| 16 |
+
cache_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/tmp
|
| 17 |
+
db_path: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/backend/database.db
|
| 18 |
+
duration: 600.0
|
| 19 |
+
embedding_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/supervised_embeddings
|
| 20 |
+
max_samples: null
|
| 21 |
+
max_val_samples: null
|
| 22 |
+
metadata_jsonl: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/all_comparisons.jsonl
|
| 23 |
+
mode: raw_text_frozen_audio
|
| 24 |
+
preference_file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/human_annotations/train.json
|
| 25 |
+
sample_rate: 24000
|
| 26 |
+
val_preference_file: null
|
| 27 |
+
loss:
|
| 28 |
+
IF_ratio: 0.5
|
| 29 |
+
filter_ties: true
|
| 30 |
+
label_smoothing: 0.0
|
| 31 |
+
reduction: mean
|
| 32 |
+
model:
|
| 33 |
+
attention_mode: SA
|
| 34 |
+
attn_dropout: 0.0
|
| 35 |
+
category_embeddings: null
|
| 36 |
+
dim: 768
|
| 37 |
+
dim_head: 64
|
| 38 |
+
downsample:
|
| 39 |
+
configs:
|
| 40 |
+
conv2_4x:
|
| 41 |
+
factor: 4
|
| 42 |
+
kernel_size: 5
|
| 43 |
+
kind: conv*2
|
| 44 |
+
use_layernorm: true
|
| 45 |
+
conv_4x:
|
| 46 |
+
factor: 4
|
| 47 |
+
kernel_size: 5
|
| 48 |
+
kind: conv
|
| 49 |
+
stage: 1
|
| 50 |
+
use_layernorm: true
|
| 51 |
+
glu_4x:
|
| 52 |
+
factor: 4
|
| 53 |
+
kernel_size: 5
|
| 54 |
+
kind: gluconv*2+pw
|
| 55 |
+
use_layernorm: true
|
| 56 |
+
mean:
|
| 57 |
+
factor: 2
|
| 58 |
+
kind: mean
|
| 59 |
+
mean_4x:
|
| 60 |
+
dropout: 0.0
|
| 61 |
+
factor: 30
|
| 62 |
+
kind: mean+mlp
|
| 63 |
+
mlp_ratio: 2.0
|
| 64 |
+
none:
|
| 65 |
+
factor: 1
|
| 66 |
+
kind: none
|
| 67 |
+
eval: mean_4x
|
| 68 |
+
ref: null
|
| 69 |
+
text: none
|
| 70 |
+
ff_dropout: 0.0
|
| 71 |
+
ff_mult: 4
|
| 72 |
+
freeze_audio: true
|
| 73 |
+
freeze_text: true
|
| 74 |
+
gradient_checkpointing: false
|
| 75 |
+
heads: 8
|
| 76 |
+
joint_tf_depth: 1
|
| 77 |
+
load_config:
|
| 78 |
+
checkpoint_path: null
|
| 79 |
+
frozen_from_pretrained: true
|
| 80 |
+
pretrained_name: OpenMuQ/MuQ-MuLan-large
|
| 81 |
+
strict: false
|
| 82 |
+
mlp_dim: 768
|
| 83 |
+
mode: concat_text_late
|
| 84 |
+
model_name: OpenMuQ/MuQ-MuLan-large
|
| 85 |
+
name: reward
|
| 86 |
+
no_condition: false
|
| 87 |
+
null_embedding:
|
| 88 |
+
audio:
|
| 89 |
+
dropout: 0.5
|
| 90 |
+
length: 10
|
| 91 |
+
lyrics:
|
| 92 |
+
dropout: 0.3
|
| 93 |
+
length: 10
|
| 94 |
+
text:
|
| 95 |
+
dropout: 0
|
| 96 |
+
length: 10
|
| 97 |
+
output_dim: 2
|
| 98 |
+
prompt_tf_depth: 4
|
| 99 |
+
sr: 24000
|
| 100 |
+
text_encoder:
|
| 101 |
+
name: muq_mulan
|
| 102 |
+
tune: null
|
| 103 |
+
text_lora_config: null
|
| 104 |
+
train_muq_depth: 0
|
| 105 |
+
train_muqmulan: false
|
| 106 |
+
use_audio: true
|
| 107 |
+
use_layer_idx: -1
|
| 108 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 109 |
+
run_name: null
|
| 110 |
+
train:
|
| 111 |
+
batch_size: 48
|
| 112 |
+
betas:
|
| 113 |
+
- 0.9
|
| 114 |
+
- 0.99
|
| 115 |
+
ema_decay: 0.9999
|
| 116 |
+
ema_update_every: 1
|
| 117 |
+
enable_gradient_checkpointing: true
|
| 118 |
+
force_clear_prev_results: false
|
| 119 |
+
grad_accum_every: 1
|
| 120 |
+
log_tensorboard: true
|
| 121 |
+
lr_schedule:
|
| 122 |
+
min_lr_ratio: 0.001
|
| 123 |
+
name: linear_cosine
|
| 124 |
+
total_steps: 2000
|
| 125 |
+
warmup_steps: 10
|
| 126 |
+
max_grad_norm: 1
|
| 127 |
+
mlp_lr: 1.0e-05
|
| 128 |
+
num_train_steps: 2000
|
| 129 |
+
num_valid_batches: null
|
| 130 |
+
num_workers: 8
|
| 131 |
+
other_lr: 1.0e-05
|
| 132 |
+
resume: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_0147/ckpt/reward_model.20000.pt
|
| 133 |
+
resume_optimizer: false
|
| 134 |
+
save_model_every: 2000
|
| 135 |
+
use_checkpoint_config: true
|
| 136 |
+
use_ema: false
|
| 137 |
+
use_lion: false
|
| 138 |
+
valid_batch_size: 20
|
| 139 |
+
valid_every: 100
|
| 140 |
+
valid_frac: 0.1
|
| 141 |
+
verify_weights_on_load: true
|
| 142 |
+
validate_only: false
|
20260125_0038/reward_model/1769272741.4481056/events.out.tfevents.1769272741.MACLAB-S004.3419169.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8b05b3493f3a74ca2aaaf8a9d4104cacb90a38935cf1f11482a5dd926ef450af
|
| 3 |
+
size 503
|
20260125_0038/reward_model/1769272741.4495451/hparams.yml
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
batch_size: 48
|
| 2 |
+
grad_accum_every: 1
|
| 3 |
+
learning_rate: 1.0e-05
|
| 4 |
+
num_train_steps: 2000
|
20260125_0038/reward_model/events.out.tfevents.1769272741.MACLAB-S004.3419169.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87b529d6d3202ede1b59405b16990059f5aa626adc0cd7c689cc5f35b07c43d6
|
| 3 |
+
size 428856
|
20260125_0038/train.20260125_0038.log
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-25 00:38:50 | INFO | Log file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0038/train.20260125_0038.log
|
| 2 |
+
2026-01-25 00:38:50 | INFO | Random seed set to 42
|
| 3 |
+
2026-01-25 00:38:51 | INFO | Created RawTextFrozenAudioDataset with 3463 samples
|
| 4 |
+
2026-01-25 00:38:51 | INFO | Split dataset into train (3117) and validation (346) sets (ratio: 10.00%)
|
| 5 |
+
2026-01-25 00:38:51 | INFO | Will resume from checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_0147/ckpt/reward_model.20000.pt
|
| 6 |
+
2026-01-25 00:38:51 | INFO | Using checkpoint config for model initialization (continue training mode)
|
| 7 |
+
2026-01-25 00:38:59 | INFO | Created RewardAttentionModel with attention_mode=SA
|
| 8 |
+
2026-01-25 00:38:59 | INFO | Created PreferenceLoss with filter_ties=True
|
| 9 |
+
2026-01-25 00:38:59 | INFO | ✓ Gradient checkpointing enabled
|
| 10 |
+
2026-01-25 00:38:59 | INFO | ✓ Audio cropping enabled: min=200, max=1500
|
| 11 |
+
2026-01-25 00:38:59 | INFO | Apply to eval: True, ref: True
|
| 12 |
+
2026-01-25 00:38:59 | INFO | Modes: train=random, val=start
|
| 13 |
+
2026-01-25 00:38:59 | INFO | MLP head parameters: 1,186,563 params, lr=1e-05
|
| 14 |
+
2026-01-25 00:38:59 | INFO | Other parameters: 37,397,634 params, lr=1e-05
|
| 15 |
+
2026-01-25 00:38:59 | INFO | Using lr_schedule=linear_cosine warmup_steps=10 total_steps=2000
|
| 16 |
+
2026-01-25 00:38:59 | INFO | Training with fixed validation set
|
| 17 |
+
2026-01-25 00:38:59 | INFO | Train batch_size: 48, Valid batch_size: 20
|
| 18 |
+
2026-01-25 00:39:00 | INFO | Missing keys (782): ['text_module.model.embeddings.word_embeddings.weight', 'text_module.model.embeddings.position_embeddings.weight', 'text_module.model.embeddings.token_type_embeddings.weight', 'text_module.model.embeddings.LayerNorm.weight', 'text_module.model.embeddings.LayerNorm.bias']...
|
| 19 |
+
2026-01-25 00:39:00 | INFO | ✓ Starting from step 0 (transfer learning mode, ignoring checkpoint steps=20000)
|
| 20 |
+
2026-01-25 00:39:00 | INFO | Resumed from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_0147/ckpt/reward_model.20000.pt
|
| 21 |
+
2026-01-25 00:39:00 | INFO | Parameters: 701.162M total, 38.584M trainable
|
| 22 |
+
2026-01-25 00:39:00 | INFO | Text encoder (frozen): 328.389M
|
| 23 |
+
2026-01-25 00:39:00 | INFO | Audio encoder (frozen): 334.189M
|
| 24 |
+
2026-01-25 00:39:00 | INFO | Other trainable: 38.584M
|
| 25 |
+
2026-01-25 00:39:00 | INFO | ℹ No LoRA configuration detected
|
| 26 |
+
2026-01-25 00:39:01 | INFO | ============================================================
|
| 27 |
+
2026-01-25 00:39:01 | INFO | Ready to start training
|
| 28 |
+
2026-01-25 00:39:01 | INFO | ============================================================
|
| 29 |
+
2026-01-25 00:39:01 | INFO | Starting training from step 0
|
| 30 |
+
2026-01-25 00:39:01 | INFO | ===== Accelerator / CUDA Debug Info =====
|
| 31 |
+
2026-01-25 00:39:01 | INFO | accelerator.device = cuda
|
| 32 |
+
2026-01-25 00:39:01 | INFO | mixed_precision = bf16
|
| 33 |
+
2026-01-25 00:39:01 | INFO | distributed_type = NO
|
| 34 |
+
2026-01-25 00:39:01 | INFO | num_processes = 1
|
| 35 |
+
2026-01-25 00:39:01 | INFO | process_index = 0
|
| 36 |
+
2026-01-25 00:39:01 | INFO | is_main_process = True
|
| 37 |
+
2026-01-25 00:39:01 | INFO | torch.cuda.is_available() = True
|
| 38 |
+
2026-01-25 00:39:01 | INFO | torch.cuda.device_count() = 1
|
| 39 |
+
2026-01-25 00:39:01 | INFO | current_device = 0
|
| 40 |
+
2026-01-25 00:39:01 | INFO | device_name = NVIDIA GeForce RTX 4090
|
| 41 |
+
2026-01-25 00:39:01 | INFO | model parameter device = cuda:0
|
| 42 |
+
2026-01-25 00:39:01 | INFO | Training for 2000.0 steps (~32 epochs, 64 steps/epoch)
|
| 43 |
+
2026-01-25 00:39:12 | INFO | Step 0: loss=1.3478 | IF_loss=1.7487, MQ_loss=0.9469 | acc=0.719 (IF=0.667, MQ=0.771) | lr=0.000002
|
| 44 |
+
2026-01-25 00:39:12 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0038/ckpt/reward_model.0.pt (filtered to 38.584M trainable parameters)
|
| 45 |
+
2026-01-25 00:39:13 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0038/ckpt/reward_model.0.pt (428.0MB)
|
| 46 |
+
2026-01-25 00:39:13 | INFO | Step 0: Saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0038/ckpt/reward_model.0.pt
|
| 47 |
+
2026-01-25 00:41:09 | INFO |
|
| 48 |
+
============================================================
|
| 49 |
+
Validation Results (took 10.74s):
|
| 50 |
+
Samples: 346 instruction, 346 quality
|
| 51 |
+
Instruction Acc: 0.7052
|
| 52 |
+
Quality Acc: 0.7139
|
| 53 |
+
Average Acc: 0.7095
|
| 54 |
+
Total Loss: 0.9740
|
| 55 |
+
Instruction Loss: 0.9900
|
| 56 |
+
Quality Loss: 0.9581
|
| 57 |
+
============================================================
|
| 58 |
+
2026-01-25 00:41:09 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0038/ckpt/reward_model.best_99.pt (filtered to 38.584M trainable parameters)
|
| 59 |
+
2026-01-25 00:41:09 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0038/ckpt/reward_model.best_99.pt (428.0MB)
|
| 60 |
+
2026-01-25 00:41:09 | INFO | Best 1 checkpoints:
|
| 61 |
+
2026-01-25 00:41:09 | INFO | 1. Step 99: acc=0.7095 (reward_model.best_99.pt)
|
| 62 |
+
2026-01-25 00:41:10 | INFO | Step 100: loss=0.8833 | IF_loss=0.7300, MQ_loss=1.0365 | acc=0.688 (IF=0.708, MQ=0.667) | lr=0.000010
|
| 63 |
+
2026-01-25 00:43:08 | INFO |
|
| 64 |
+
============================================================
|
| 65 |
+
Validation Results (took 7.91s):
|
| 66 |
+
Samples: 346 instruction, 346 quality
|
| 67 |
+
Instruction Acc: 0.7139
|
| 68 |
+
Quality Acc: 0.7370
|
| 69 |
+
Average Acc: 0.7254
|
| 70 |
+
Total Loss: 0.6643
|
| 71 |
+
Instruction Loss: 0.6989
|
| 72 |
+
Quality Loss: 0.6297
|
| 73 |
+
============================================================
|
| 74 |
+
2026-01-25 00:43:08 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0038/ckpt/reward_model.best_199.pt (filtered to 38.584M trainable parameters)
|
| 75 |
+
2026-01-25 00:43:08 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0038/ckpt/reward_model.best_199.pt (428.0MB)
|
| 76 |
+
2026-01-25 00:43:08 | INFO | Best 2 checkpoints:
|
| 77 |
+
2026-01-25 00:43:08 | INFO | 1. Step 199: acc=0.7254 (reward_model.best_199.pt)
|
| 78 |
+
2026-01-25 00:43:08 | INFO | 2. Step 99: acc=0.7095 (reward_model.best_99.pt)
|
| 79 |
+
2026-01-25 00:43:09 | INFO | Step 200: loss=0.3416 | IF_loss=0.3084, MQ_loss=0.3748 | acc=0.823 (IF=0.854, MQ=0.792) | lr=0.000010
|
| 80 |
+
2026-01-25 00:45:01 | INFO |
|
| 81 |
+
============================================================
|
| 82 |
+
Validation Results (took 8.58s):
|
| 83 |
+
Samples: 346 instruction, 346 quality
|
| 84 |
+
Instruction Acc: 0.7168
|
| 85 |
+
Quality Acc: 0.7543
|
| 86 |
+
Average Acc: 0.7355
|
| 87 |
+
Total Loss: 0.6117
|
| 88 |
+
Instruction Loss: 0.6554
|
| 89 |
+
Quality Loss: 0.5680
|
| 90 |
+
============================================================
|
| 91 |
+
2026-01-25 00:45:01 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0038/ckpt/reward_model.best_299.pt (filtered to 38.584M trainable parameters)
|
| 92 |
+
2026-01-25 00:45:01 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0038/ckpt/reward_model.best_299.pt (428.0MB)
|
| 93 |
+
2026-01-25 00:45:01 | INFO | Best 3 checkpoints:
|
| 94 |
+
2026-01-25 00:45:01 | INFO | 1. Step 299: acc=0.7355 (reward_model.best_299.pt)
|
| 95 |
+
2026-01-25 00:45:01 | INFO | 2. Step 199: acc=0.7254 (reward_model.best_199.pt)
|
| 96 |
+
2026-01-25 00:45:01 | INFO | 3. Step 99: acc=0.7095 (reward_model.best_99.pt)
|
| 97 |
+
2026-01-25 00:45:02 | INFO | Step 300: loss=0.3717 | IF_loss=0.4516, MQ_loss=0.2917 | acc=0.844 (IF=0.854, MQ=0.833) | lr=0.000009
|
| 98 |
+
2026-01-25 00:46:56 | INFO |
|
| 99 |
+
============================================================
|
| 100 |
+
Validation Results (took 8.46s):
|
| 101 |
+
Samples: 346 instruction, 346 quality
|
| 102 |
+
Instruction Acc: 0.7081
|
| 103 |
+
Quality Acc: 0.7630
|
| 104 |
+
Average Acc: 0.7355
|
| 105 |
+
Total Loss: 0.6097
|
| 106 |
+
Instruction Loss: 0.6561
|
| 107 |
+
Quality Loss: 0.5632
|
| 108 |
+
============================================================
|
| 109 |
+
2026-01-25 00:46:56 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0038/ckpt/reward_model.best_399.pt (filtered to 38.584M trainable parameters)
|
| 110 |
+
2026-01-25 00:46:56 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0038/ckpt/reward_model.best_399.pt (428.0MB)
|
| 111 |
+
2026-01-25 00:46:56 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0038/ckpt/reward_model.best_99.pt
|
| 112 |
+
2026-01-25 00:46:56 | INFO | Best 3 checkpoints:
|
| 113 |
+
2026-01-25 00:46:56 | INFO | 1. Step 299: acc=0.7355 (reward_model.best_299.pt)
|
| 114 |
+
2026-01-25 00:46:56 | INFO | 2. Step 399: acc=0.7355 (reward_model.best_399.pt)
|
| 115 |
+
2026-01-25 00:46:56 | INFO | 3. Step 199: acc=0.7254 (reward_model.best_199.pt)
|
| 116 |
+
2026-01-25 00:46:57 | INFO | Step 400: loss=0.5054 | IF_loss=0.5431, MQ_loss=0.4678 | acc=0.792 (IF=0.750, MQ=0.833) | lr=0.000009
|
| 117 |
+
2026-01-25 00:48:50 | INFO |
|
| 118 |
+
============================================================
|
| 119 |
+
Validation Results (took 8.33s):
|
| 120 |
+
Samples: 346 instruction, 346 quality
|
| 121 |
+
Instruction Acc: 0.7110
|
| 122 |
+
Quality Acc: 0.7630
|
| 123 |
+
Average Acc: 0.7370
|
| 124 |
+
Total Loss: 0.6197
|
| 125 |
+
Instruction Loss: 0.6728
|
| 126 |
+
Quality Loss: 0.5666
|
| 127 |
+
============================================================
|
| 128 |
+
2026-01-25 00:48:50 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0038/ckpt/reward_model.best_499.pt (filtered to 38.584M trainable parameters)
|
| 129 |
+
2026-01-25 00:48:50 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0038/ckpt/reward_model.best_499.pt (428.0MB)
|
| 130 |
+
2026-01-25 00:48:50 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0038/ckpt/reward_model.best_199.pt
|
| 131 |
+
2026-01-25 00:48:50 | INFO | Best 3 checkpoints:
|
| 132 |
+
2026-01-25 00:48:50 | INFO | 1. Step 499: acc=0.7370 (reward_model.best_499.pt)
|
| 133 |
+
2026-01-25 00:48:50 | INFO | 2. Step 299: acc=0.7355 (reward_model.best_299.pt)
|
| 134 |
+
2026-01-25 00:48:50 | INFO | 3. Step 399: acc=0.7355 (reward_model.best_399.pt)
|
| 135 |
+
2026-01-25 00:48:51 | INFO | Step 500: loss=0.4587 | IF_loss=0.5137, MQ_loss=0.4036 | acc=0.698 (IF=0.688, MQ=0.708) | lr=0.000009
|
| 136 |
+
2026-01-25 00:50:47 | INFO |
|
| 137 |
+
============================================================
|
| 138 |
+
Validation Results (took 7.48s):
|
| 139 |
+
Samples: 346 instruction, 346 quality
|
| 140 |
+
Instruction Acc: 0.7139
|
| 141 |
+
Quality Acc: 0.7601
|
| 142 |
+
Average Acc: 0.7370
|
| 143 |
+
Total Loss: 0.6275
|
| 144 |
+
Instruction Loss: 0.6826
|
| 145 |
+
Quality Loss: 0.5724
|
| 146 |
+
============================================================
|
| 147 |
+
2026-01-25 00:50:47 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0038/ckpt/reward_model.best_599.pt (filtered to 38.584M trainable parameters)
|
| 148 |
+
2026-01-25 00:50:48 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0038/ckpt/reward_model.best_599.pt (428.0MB)
|
| 149 |
+
2026-01-25 00:50:48 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0038/ckpt/reward_model.best_399.pt
|
| 150 |
+
2026-01-25 00:50:48 | INFO | Best 3 checkpoints:
|
| 151 |
+
2026-01-25 00:50:48 | INFO | 1. Step 499: acc=0.7370 (reward_model.best_499.pt)
|
| 152 |
+
2026-01-25 00:50:48 | INFO | 2. Step 599: acc=0.7370 (reward_model.best_599.pt)
|
| 153 |
+
2026-01-25 00:50:48 | INFO | 3. Step 299: acc=0.7355 (reward_model.best_299.pt)
|
| 154 |
+
2026-01-25 00:50:49 | INFO | Step 600: loss=0.4492 | IF_loss=0.4779, MQ_loss=0.4205 | acc=0.833 (IF=0.792, MQ=0.875) | lr=0.000008
|
| 155 |
+
2026-01-25 00:52:39 | INFO |
|
| 156 |
+
============================================================
|
| 157 |
+
Validation Results (took 7.99s):
|
| 158 |
+
Samples: 346 instruction, 346 quality
|
| 159 |
+
Instruction Acc: 0.6994
|
| 160 |
+
Quality Acc: 0.7659
|
| 161 |
+
Average Acc: 0.7327
|
| 162 |
+
Total Loss: 0.6397
|
| 163 |
+
Instruction Loss: 0.7005
|
| 164 |
+
Quality Loss: 0.5788
|
| 165 |
+
============================================================
|
| 166 |
+
2026-01-25 00:52:39 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0038/ckpt/reward_model.best_699.pt (filtered to 38.584M trainable parameters)
|
| 167 |
+
2026-01-25 00:52:40 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0038/ckpt/reward_model.best_699.pt (428.0MB)
|
| 168 |
+
2026-01-25 00:52:40 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0038/ckpt/reward_model.best_699.pt
|
| 169 |
+
2026-01-25 00:52:40 | INFO | Best 3 checkpoints:
|
| 170 |
+
2026-01-25 00:52:40 | INFO | 1. Step 499: acc=0.7370 (reward_model.best_499.pt)
|
| 171 |
+
2026-01-25 00:52:40 | INFO | 2. Step 599: acc=0.7370 (reward_model.best_599.pt)
|
| 172 |
+
2026-01-25 00:52:40 | INFO | 3. Step 299: acc=0.7355 (reward_model.best_299.pt)
|
| 173 |
+
2026-01-25 00:52:41 | INFO | Step 700: loss=0.2280 | IF_loss=0.3205, MQ_loss=0.1356 | acc=0.938 (IF=0.917, MQ=0.958) | lr=0.000007
|
| 174 |
+
2026-01-25 00:54:34 | INFO |
|
| 175 |
+
============================================================
|
| 176 |
+
Validation Results (took 7.95s):
|
| 177 |
+
Samples: 346 instruction, 346 quality
|
| 178 |
+
Instruction Acc: 0.7110
|
| 179 |
+
Quality Acc: 0.7688
|
| 180 |
+
Average Acc: 0.7399
|
| 181 |
+
Total Loss: 0.6517
|
| 182 |
+
Instruction Loss: 0.7169
|
| 183 |
+
Quality Loss: 0.5864
|
| 184 |
+
============================================================
|
| 185 |
+
2026-01-25 00:54:34 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0038/ckpt/reward_model.best_799.pt (filtered to 38.584M trainable parameters)
|
| 186 |
+
2026-01-25 00:54:34 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0038/ckpt/reward_model.best_799.pt (428.0MB)
|
| 187 |
+
2026-01-25 00:54:34 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0038/ckpt/reward_model.best_299.pt
|
| 188 |
+
2026-01-25 00:54:34 | INFO | Best 3 checkpoints:
|
| 189 |
+
2026-01-25 00:54:34 | INFO | 1. Step 799: acc=0.7399 (reward_model.best_799.pt)
|
| 190 |
+
2026-01-25 00:54:34 | INFO | 2. Step 499: acc=0.7370 (reward_model.best_499.pt)
|
| 191 |
+
2026-01-25 00:54:34 | INFO | 3. Step 599: acc=0.7370 (reward_model.best_599.pt)
|
| 192 |
+
2026-01-25 00:54:35 | INFO | Step 800: loss=0.3855 | IF_loss=0.4637, MQ_loss=0.3072 | acc=0.792 (IF=0.750, MQ=0.833) | lr=0.000007
|
| 193 |
+
2026-01-25 00:56:29 | INFO |
|
| 194 |
+
============================================================
|
| 195 |
+
Validation Results (took 7.96s):
|
| 196 |
+
Samples: 346 instruction, 346 quality
|
| 197 |
+
Instruction Acc: 0.7110
|
| 198 |
+
Quality Acc: 0.7572
|
| 199 |
+
Average Acc: 0.7341
|
| 200 |
+
Total Loss: 0.6621
|
| 201 |
+
Instruction Loss: 0.7294
|
| 202 |
+
Quality Loss: 0.5948
|
| 203 |
+
============================================================
|
| 204 |
+
2026-01-25 00:56:29 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0038/ckpt/reward_model.best_899.pt (filtered to 38.584M trainable parameters)
|
| 205 |
+
2026-01-25 00:56:29 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0038/ckpt/reward_model.best_899.pt (428.0MB)
|
| 206 |
+
2026-01-25 00:56:29 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260125_0038/ckpt/reward_model.best_899.pt
|
| 207 |
+
2026-01-25 00:56:29 | INFO | Best 3 checkpoints:
|
| 208 |
+
2026-01-25 00:56:29 | INFO | 1. Step 799: acc=0.7399 (reward_model.best_799.pt)
|
| 209 |
+
2026-01-25 00:56:29 | INFO | 2. Step 499: acc=0.7370 (reward_model.best_499.pt)
|
| 210 |
+
2026-01-25 00:56:29 | INFO | 3. Step 599: acc=0.7370 (reward_model.best_599.pt)
|
| 211 |
+
2026-01-25 00:56:30 | INFO | Step 900: loss=0.3468 | IF_loss=0.3446, MQ_loss=0.3489 | acc=0.812 (IF=0.812, MQ=0.812) | lr=0.000006
|
20260125_0933/config.yaml
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DEVICES: '0'
|
| 2 |
+
accelerate:
|
| 3 |
+
mixed_precision: bf16
|
| 4 |
+
basics:
|
| 5 |
+
random_seed: 42
|
| 6 |
+
save_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human
|
| 7 |
+
dataset:
|
| 8 |
+
audio_dropout:
|
| 9 |
+
apply_to_eval: true
|
| 10 |
+
apply_to_ref: true
|
| 11 |
+
enabled: true
|
| 12 |
+
eval_only_on_training: true
|
| 13 |
+
max_duration: 1500
|
| 14 |
+
min_duration: 200
|
| 15 |
+
train_mode: start
|
| 16 |
+
cache_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/tmp
|
| 17 |
+
db_path: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/backend/database.db
|
| 18 |
+
duration: 600.0
|
| 19 |
+
embedding_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/supervised_embeddings
|
| 20 |
+
max_samples: null
|
| 21 |
+
max_val_samples: null
|
| 22 |
+
metadata_jsonl: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/all_comparisons.jsonl
|
| 23 |
+
mode: raw_text_frozen_audio
|
| 24 |
+
preference_file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/human_annotations/train.json
|
| 25 |
+
sample_rate: 24000
|
| 26 |
+
val_preference_file: null
|
| 27 |
+
loss:
|
| 28 |
+
IF_ratio: 0.5
|
| 29 |
+
filter_ties: true
|
| 30 |
+
label_smoothing: 0.0
|
| 31 |
+
reduction: mean
|
| 32 |
+
model:
|
| 33 |
+
attention_mode: SA
|
| 34 |
+
attn_dropout: 0.0
|
| 35 |
+
category_embeddings: null
|
| 36 |
+
dim: 768
|
| 37 |
+
dim_head: 64
|
| 38 |
+
downsample:
|
| 39 |
+
configs:
|
| 40 |
+
conv2_4x:
|
| 41 |
+
factor: 4
|
| 42 |
+
kernel_size: 5
|
| 43 |
+
kind: conv*2
|
| 44 |
+
use_layernorm: true
|
| 45 |
+
conv_4x:
|
| 46 |
+
factor: 4
|
| 47 |
+
kernel_size: 5
|
| 48 |
+
kind: conv
|
| 49 |
+
stage: 1
|
| 50 |
+
use_layernorm: true
|
| 51 |
+
glu_4x:
|
| 52 |
+
factor: 4
|
| 53 |
+
kernel_size: 5
|
| 54 |
+
kind: gluconv*2+pw
|
| 55 |
+
use_layernorm: true
|
| 56 |
+
mean:
|
| 57 |
+
factor: 2
|
| 58 |
+
kind: mean
|
| 59 |
+
mean_4x:
|
| 60 |
+
dropout: 0.0
|
| 61 |
+
factor: 30
|
| 62 |
+
kind: mean+mlp
|
| 63 |
+
mlp_ratio: 2.0
|
| 64 |
+
none:
|
| 65 |
+
factor: 1
|
| 66 |
+
kind: none
|
| 67 |
+
eval: mean_4x
|
| 68 |
+
ref: null
|
| 69 |
+
text: none
|
| 70 |
+
ff_dropout: 0.0
|
| 71 |
+
ff_mult: 4
|
| 72 |
+
freeze_audio: true
|
| 73 |
+
freeze_text: true
|
| 74 |
+
gradient_checkpointing: false
|
| 75 |
+
heads: 8
|
| 76 |
+
joint_tf_depth: 1
|
| 77 |
+
load_config:
|
| 78 |
+
checkpoint_path: null
|
| 79 |
+
frozen_from_pretrained: true
|
| 80 |
+
pretrained_name: OpenMuQ/MuQ-MuLan-large
|
| 81 |
+
strict: false
|
| 82 |
+
mlp_dim: 768
|
| 83 |
+
mode: concat_text_late
|
| 84 |
+
model_name: OpenMuQ/MuQ-MuLan-large
|
| 85 |
+
name: reward
|
| 86 |
+
no_condition: false
|
| 87 |
+
null_embedding:
|
| 88 |
+
audio:
|
| 89 |
+
dropout: 0.5
|
| 90 |
+
length: 10
|
| 91 |
+
lyrics:
|
| 92 |
+
dropout: 0.3
|
| 93 |
+
length: 10
|
| 94 |
+
text:
|
| 95 |
+
dropout: 0
|
| 96 |
+
length: 10
|
| 97 |
+
output_dim: 2
|
| 98 |
+
prompt_tf_depth: 4
|
| 99 |
+
sr: 24000
|
| 100 |
+
text_encoder:
|
| 101 |
+
name: muq_mulan
|
| 102 |
+
tune: null
|
| 103 |
+
text_lora_config: null
|
| 104 |
+
train_muq_depth: 0
|
| 105 |
+
train_muqmulan: false
|
| 106 |
+
use_audio: true
|
| 107 |
+
use_layer_idx: -1
|
| 108 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 109 |
+
run_name: null
|
| 110 |
+
train:
|
| 111 |
+
batch_size: 48
|
| 112 |
+
betas:
|
| 113 |
+
- 0.9
|
| 114 |
+
- 0.99
|
| 115 |
+
ema_decay: 0.9999
|
| 116 |
+
ema_update_every: 1
|
| 117 |
+
enable_gradient_checkpointing: true
|
| 118 |
+
force_clear_prev_results: false
|
| 119 |
+
grad_accum_every: 1
|
| 120 |
+
log_tensorboard: true
|
| 121 |
+
lr_schedule:
|
| 122 |
+
min_lr_ratio: 0.001
|
| 123 |
+
name: linear_cosine
|
| 124 |
+
total_steps: 2000
|
| 125 |
+
warmup_steps: 10
|
| 126 |
+
max_grad_norm: 1
|
| 127 |
+
mlp_lr: 1.0e-05
|
| 128 |
+
num_train_steps: 2000
|
| 129 |
+
num_valid_batches: null
|
| 130 |
+
num_workers: 8
|
| 131 |
+
other_lr: 1.0e-05
|
| 132 |
+
resume: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_0147/ckpt/reward_model.best_29999.pt
|
| 133 |
+
resume_optimizer: false
|
| 134 |
+
save_model_every: 2000
|
| 135 |
+
use_checkpoint_config: true
|
| 136 |
+
use_ema: false
|
| 137 |
+
use_lion: false
|
| 138 |
+
valid_batch_size: 20
|
| 139 |
+
valid_every: 100
|
| 140 |
+
valid_frac: 0.1
|
| 141 |
+
verify_weights_on_load: true
|
| 142 |
+
validate_only: false
|
20260125_0933/reward_model/1769304848.6545663/events.out.tfevents.1769304848.MACLAB-S004.1519845.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c582683cea0697f98b6b4b9e504078b8949e1df961163c7183bb40829fde464
|
| 3 |
+
size 503
|
20260125_0933/reward_model/1769304848.6563416/hparams.yml
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
batch_size: 48
|
| 2 |
+
grad_accum_every: 1
|
| 3 |
+
learning_rate: 1.0e-05
|
| 4 |
+
num_train_steps: 2000
|
20260125_0933/reward_model/events.out.tfevents.1769304848.MACLAB-S004.1519845.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:26a03747b7fe4dfb03f91816a786eee4cedec85474701f7636e7363c8f5ad76e
|
| 3 |
+
size 873949
|
20260125_0933/train.20260125_0933.log
ADDED
|
@@ -0,0 +1,564 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-25 09:33:55 | INFO | Log file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/train.20260125_0933.log
|
| 2 |
+
2026-01-25 09:33:55 | INFO | Config: DEVICES: '0'
|
| 3 |
+
accelerate:
|
| 4 |
+
mixed_precision: bf16
|
| 5 |
+
basics:
|
| 6 |
+
random_seed: 42
|
| 7 |
+
save_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human
|
| 8 |
+
dataset:
|
| 9 |
+
audio_dropout:
|
| 10 |
+
apply_to_eval: true
|
| 11 |
+
apply_to_ref: true
|
| 12 |
+
enabled: true
|
| 13 |
+
eval_only_on_training: true
|
| 14 |
+
max_duration: 1500
|
| 15 |
+
min_duration: 200
|
| 16 |
+
train_mode: start
|
| 17 |
+
cache_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/tmp
|
| 18 |
+
db_path: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/backend/database.db
|
| 19 |
+
duration: 600.0
|
| 20 |
+
embedding_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/supervised_embeddings
|
| 21 |
+
mode: raw_text_frozen_audio
|
| 22 |
+
max_samples: null
|
| 23 |
+
max_val_samples: null
|
| 24 |
+
metadata_jsonl: ${project_root}/CMI-Training/all_comparisons.jsonl
|
| 25 |
+
preference_file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/human_annotations/train.json
|
| 26 |
+
sample_rate: 24000
|
| 27 |
+
val_preference_file: null
|
| 28 |
+
loss:
|
| 29 |
+
IF_ratio: 0.5
|
| 30 |
+
filter_ties: true
|
| 31 |
+
label_smoothing: 0.0
|
| 32 |
+
reduction: mean
|
| 33 |
+
model:
|
| 34 |
+
attention_mode: SA
|
| 35 |
+
attn_dropout: 0.0
|
| 36 |
+
category_embeddings: null
|
| 37 |
+
dim: 768
|
| 38 |
+
dim_head: 64
|
| 39 |
+
downsample:
|
| 40 |
+
configs:
|
| 41 |
+
conv2_4x:
|
| 42 |
+
factor: 4
|
| 43 |
+
kernel_size: 5
|
| 44 |
+
kind: conv*2
|
| 45 |
+
use_layernorm: true
|
| 46 |
+
conv_4x:
|
| 47 |
+
factor: 4
|
| 48 |
+
kernel_size: 5
|
| 49 |
+
kind: conv
|
| 50 |
+
stage: 1
|
| 51 |
+
use_layernorm: true
|
| 52 |
+
glu_4x:
|
| 53 |
+
factor: 4
|
| 54 |
+
kernel_size: 5
|
| 55 |
+
kind: gluconv*2+pw
|
| 56 |
+
use_layernorm: true
|
| 57 |
+
mean:
|
| 58 |
+
factor: 2
|
| 59 |
+
kind: mean
|
| 60 |
+
mean_4x:
|
| 61 |
+
dropout: 0.0
|
| 62 |
+
factor: 30
|
| 63 |
+
kind: mean+mlp
|
| 64 |
+
mlp_ratio: 2.0
|
| 65 |
+
none:
|
| 66 |
+
factor: 1
|
| 67 |
+
kind: none
|
| 68 |
+
eval: mean_4x
|
| 69 |
+
ref: null
|
| 70 |
+
text: none
|
| 71 |
+
ff_dropout: 0.0
|
| 72 |
+
ff_mult: 4
|
| 73 |
+
freeze_audio: true
|
| 74 |
+
freeze_text: true
|
| 75 |
+
gradient_checkpointing: false
|
| 76 |
+
heads: 8
|
| 77 |
+
joint_tf_depth: 1
|
| 78 |
+
load_config:
|
| 79 |
+
checkpoint_path: null
|
| 80 |
+
frozen_from_pretrained: true
|
| 81 |
+
pretrained_name: OpenMuQ/MuQ-MuLan-large
|
| 82 |
+
strict: false
|
| 83 |
+
mlp_dim: 768
|
| 84 |
+
mode: concat_text_late
|
| 85 |
+
model_name: OpenMuQ/MuQ-MuLan-large
|
| 86 |
+
name: reward
|
| 87 |
+
null_embedding:
|
| 88 |
+
audio:
|
| 89 |
+
dropout: 0.5
|
| 90 |
+
length: 10
|
| 91 |
+
lyrics:
|
| 92 |
+
dropout: 0.3
|
| 93 |
+
length: 10
|
| 94 |
+
text:
|
| 95 |
+
dropout: 0
|
| 96 |
+
length: 10
|
| 97 |
+
output_dim: 2
|
| 98 |
+
prompt_tf_depth: 4
|
| 99 |
+
sr: 24000
|
| 100 |
+
text_encoder:
|
| 101 |
+
name: muq_mulan
|
| 102 |
+
tune: null
|
| 103 |
+
text_lora_config: null
|
| 104 |
+
train_muq_depth: 0
|
| 105 |
+
train_muqmulan: false
|
| 106 |
+
use_layer_idx: -1
|
| 107 |
+
use_audio: true
|
| 108 |
+
no_condition: false
|
| 109 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 110 |
+
run_name: null
|
| 111 |
+
train:
|
| 112 |
+
batch_size: 48
|
| 113 |
+
betas:
|
| 114 |
+
- 0.9
|
| 115 |
+
- 0.99
|
| 116 |
+
ema_decay: 0.9999
|
| 117 |
+
ema_update_every: 1
|
| 118 |
+
enable_gradient_checkpointing: true
|
| 119 |
+
force_clear_prev_results: false
|
| 120 |
+
grad_accum_every: 1
|
| 121 |
+
log_tensorboard: true
|
| 122 |
+
lr_schedule:
|
| 123 |
+
min_lr_ratio: 0.001
|
| 124 |
+
name: linear_cosine
|
| 125 |
+
total_steps: 2000
|
| 126 |
+
warmup_steps: 10
|
| 127 |
+
max_grad_norm: 1
|
| 128 |
+
mlp_lr: 1.0e-05
|
| 129 |
+
num_train_steps: 2000
|
| 130 |
+
num_valid_batches: null
|
| 131 |
+
num_workers: 8
|
| 132 |
+
other_lr: 1.0e-05
|
| 133 |
+
resume: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_0147/ckpt/reward_model.best_29999.pt
|
| 134 |
+
resume_optimizer: false
|
| 135 |
+
save_model_every: 2000
|
| 136 |
+
use_checkpoint_config: true
|
| 137 |
+
use_ema: false
|
| 138 |
+
use_lion: false
|
| 139 |
+
valid_batch_size: 20
|
| 140 |
+
valid_every: 100
|
| 141 |
+
valid_frac: 0.1
|
| 142 |
+
verify_weights_on_load: true
|
| 143 |
+
validate_only: false
|
| 144 |
+
|
| 145 |
+
2026-01-25 09:33:55 | INFO | Random seed set to 42
|
| 146 |
+
2026-01-25 09:33:56 | INFO | Created RawTextFrozenAudioDataset with 3463 samples
|
| 147 |
+
2026-01-25 09:33:56 | INFO | Split dataset into train (3117) and validation (346) sets (ratio: 10.00%)
|
| 148 |
+
2026-01-25 09:33:56 | INFO | Will resume from checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_0147/ckpt/reward_model.best_29999.pt
|
| 149 |
+
2026-01-25 09:33:56 | INFO | Using checkpoint config for model initialization (continue training mode)
|
| 150 |
+
2026-01-25 09:34:06 | INFO | Created RewardAttentionModel with attention_mode=SA
|
| 151 |
+
2026-01-25 09:34:06 | INFO | Created PreferenceLoss with filter_ties=True
|
| 152 |
+
2026-01-25 09:34:06 | INFO | ✓ Gradient checkpointing enabled
|
| 153 |
+
2026-01-25 09:34:06 | INFO | ✓ Audio cropping enabled: min=200, max=1500
|
| 154 |
+
2026-01-25 09:34:06 | INFO | Apply to eval: True, ref: True
|
| 155 |
+
2026-01-25 09:34:06 | INFO | Modes: train=random, val=start
|
| 156 |
+
2026-01-25 09:34:06 | INFO | MLP head parameters: 1,186,563 params, lr=1e-05
|
| 157 |
+
2026-01-25 09:34:06 | INFO | Other parameters: 37,397,634 params, lr=1e-05
|
| 158 |
+
2026-01-25 09:34:06 | INFO | Using lr_schedule=linear_cosine warmup_steps=10 total_steps=2000
|
| 159 |
+
2026-01-25 09:34:06 | INFO | Training with fixed validation set
|
| 160 |
+
2026-01-25 09:34:06 | INFO | Train batch_size: 48, Valid batch_size: 20
|
| 161 |
+
2026-01-25 09:34:08 | INFO | Missing keys (782): ['text_module.model.embeddings.word_embeddings.weight', 'text_module.model.embeddings.position_embeddings.weight', 'text_module.model.embeddings.token_type_embeddings.weight', 'text_module.model.embeddings.LayerNorm.weight', 'text_module.model.embeddings.LayerNorm.bias']...
|
| 162 |
+
2026-01-25 09:34:08 | INFO | ✓ Starting from step 0 (transfer learning mode, ignoring checkpoint steps=29999)
|
| 163 |
+
2026-01-25 09:34:08 | INFO | Resumed from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260124_0147/ckpt/reward_model.best_29999.pt
|
| 164 |
+
2026-01-25 09:34:08 | INFO | Parameters: 701.162M total, 38.584M trainable
|
| 165 |
+
2026-01-25 09:34:08 | INFO | Text encoder (frozen): 328.389M
|
| 166 |
+
2026-01-25 09:34:08 | INFO | Audio encoder (frozen): 334.189M
|
| 167 |
+
2026-01-25 09:34:08 | INFO | Other trainable: 38.584M
|
| 168 |
+
2026-01-25 09:34:08 | INFO | ℹ No LoRA configuration detected
|
| 169 |
+
2026-01-25 09:34:08 | INFO | ============================================================
|
| 170 |
+
2026-01-25 09:34:08 | INFO | Ready to start training
|
| 171 |
+
2026-01-25 09:34:08 | INFO | ============================================================
|
| 172 |
+
2026-01-25 09:34:08 | INFO | Starting training from step 0
|
| 173 |
+
2026-01-25 09:34:08 | INFO | ===== Accelerator / CUDA Debug Info =====
|
| 174 |
+
2026-01-25 09:34:08 | INFO | accelerator.device = cuda
|
| 175 |
+
2026-01-25 09:34:08 | INFO | mixed_precision = bf16
|
| 176 |
+
2026-01-25 09:34:08 | INFO | distributed_type = NO
|
| 177 |
+
2026-01-25 09:34:08 | INFO | num_processes = 1
|
| 178 |
+
2026-01-25 09:34:08 | INFO | process_index = 0
|
| 179 |
+
2026-01-25 09:34:08 | INFO | is_main_process = True
|
| 180 |
+
2026-01-25 09:34:08 | INFO | torch.cuda.is_available() = True
|
| 181 |
+
2026-01-25 09:34:08 | INFO | torch.cuda.device_count() = 1
|
| 182 |
+
2026-01-25 09:34:08 | INFO | current_device = 0
|
| 183 |
+
2026-01-25 09:34:08 | INFO | device_name = NVIDIA GeForce RTX 4090
|
| 184 |
+
2026-01-25 09:34:08 | INFO | model parameter device = cuda:0
|
| 185 |
+
2026-01-25 09:34:08 | INFO | Training for 2000.0 steps (~32 epochs, 64 steps/epoch)
|
| 186 |
+
2026-01-25 09:34:17 | INFO | Step 0: loss=1.7986 | IF_loss=2.3230, MQ_loss=1.2743 | acc=0.750 (IF=0.688, MQ=0.812) | lr=0.000002
|
| 187 |
+
2026-01-25 09:34:17 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.0.pt (filtered to 38.584M trainable parameters)
|
| 188 |
+
2026-01-25 09:34:17 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.0.pt (428.0MB)
|
| 189 |
+
2026-01-25 09:34:17 | INFO | Step 0: Saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.0.pt
|
| 190 |
+
2026-01-25 09:35:59 | INFO |
|
| 191 |
+
============================================================
|
| 192 |
+
Validation Results (took 7.82s):
|
| 193 |
+
Samples: 346 instruction, 346 quality
|
| 194 |
+
Instruction Acc: 0.7110
|
| 195 |
+
Quality Acc: 0.6879
|
| 196 |
+
Average Acc: 0.6994
|
| 197 |
+
Total Loss: 1.2359
|
| 198 |
+
Instruction Loss: 1.2306
|
| 199 |
+
Quality Loss: 1.2412
|
| 200 |
+
============================================================
|
| 201 |
+
2026-01-25 09:35:59 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_99.pt (filtered to 38.584M trainable parameters)
|
| 202 |
+
2026-01-25 09:35:59 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_99.pt (428.0MB)
|
| 203 |
+
2026-01-25 09:35:59 | INFO | Best 1 checkpoints:
|
| 204 |
+
2026-01-25 09:35:59 | INFO | 1. Step 99: acc=0.6994 (reward_model.best_99.pt)
|
| 205 |
+
2026-01-25 09:36:00 | INFO | Step 100: loss=1.0465 | IF_loss=0.8500, MQ_loss=1.2430 | acc=0.688 (IF=0.708, MQ=0.667) | lr=0.000010
|
| 206 |
+
2026-01-25 09:37:40 | INFO |
|
| 207 |
+
============================================================
|
| 208 |
+
Validation Results (took 6.16s):
|
| 209 |
+
Samples: 346 instruction, 346 quality
|
| 210 |
+
Instruction Acc: 0.6994
|
| 211 |
+
Quality Acc: 0.7370
|
| 212 |
+
Average Acc: 0.7182
|
| 213 |
+
Total Loss: 0.7219
|
| 214 |
+
Instruction Loss: 0.7455
|
| 215 |
+
Quality Loss: 0.6983
|
| 216 |
+
============================================================
|
| 217 |
+
2026-01-25 09:37:40 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_199.pt (filtered to 38.584M trainable parameters)
|
| 218 |
+
2026-01-25 09:37:41 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_199.pt (428.0MB)
|
| 219 |
+
2026-01-25 09:37:41 | INFO | Best 2 checkpoints:
|
| 220 |
+
2026-01-25 09:37:41 | INFO | 1. Step 199: acc=0.7182 (reward_model.best_199.pt)
|
| 221 |
+
2026-01-25 09:37:41 | INFO | 2. Step 99: acc=0.6994 (reward_model.best_99.pt)
|
| 222 |
+
2026-01-25 09:37:42 | INFO | Step 200: loss=0.3606 | IF_loss=0.3367, MQ_loss=0.3845 | acc=0.823 (IF=0.833, MQ=0.812) | lr=0.000010
|
| 223 |
+
2026-01-25 09:39:20 | INFO |
|
| 224 |
+
============================================================
|
| 225 |
+
Validation Results (took 6.75s):
|
| 226 |
+
Samples: 346 instruction, 346 quality
|
| 227 |
+
Instruction Acc: 0.7110
|
| 228 |
+
Quality Acc: 0.7572
|
| 229 |
+
Average Acc: 0.7341
|
| 230 |
+
Total Loss: 0.6270
|
| 231 |
+
Instruction Loss: 0.6676
|
| 232 |
+
Quality Loss: 0.5865
|
| 233 |
+
============================================================
|
| 234 |
+
2026-01-25 09:39:20 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_299.pt (filtered to 38.584M trainable parameters)
|
| 235 |
+
2026-01-25 09:39:20 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_299.pt (428.0MB)
|
| 236 |
+
2026-01-25 09:39:20 | INFO | Best 3 checkpoints:
|
| 237 |
+
2026-01-25 09:39:20 | INFO | 1. Step 299: acc=0.7341 (reward_model.best_299.pt)
|
| 238 |
+
2026-01-25 09:39:20 | INFO | 2. Step 199: acc=0.7182 (reward_model.best_199.pt)
|
| 239 |
+
2026-01-25 09:39:20 | INFO | 3. Step 99: acc=0.6994 (reward_model.best_99.pt)
|
| 240 |
+
2026-01-25 09:39:21 | INFO | Step 300: loss=0.3793 | IF_loss=0.4554, MQ_loss=0.3032 | acc=0.844 (IF=0.833, MQ=0.854) | lr=0.000009
|
| 241 |
+
2026-01-25 09:41:04 | INFO |
|
| 242 |
+
============================================================
|
| 243 |
+
Validation Results (took 6.57s):
|
| 244 |
+
Samples: 346 instruction, 346 quality
|
| 245 |
+
Instruction Acc: 0.7110
|
| 246 |
+
Quality Acc: 0.7486
|
| 247 |
+
Average Acc: 0.7298
|
| 248 |
+
Total Loss: 0.6179
|
| 249 |
+
Instruction Loss: 0.6620
|
| 250 |
+
Quality Loss: 0.5737
|
| 251 |
+
============================================================
|
| 252 |
+
2026-01-25 09:41:04 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_399.pt (filtered to 38.584M trainable parameters)
|
| 253 |
+
2026-01-25 09:41:04 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_399.pt (428.0MB)
|
| 254 |
+
2026-01-25 09:41:04 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_99.pt
|
| 255 |
+
2026-01-25 09:41:04 | INFO | Best 3 checkpoints:
|
| 256 |
+
2026-01-25 09:41:04 | INFO | 1. Step 299: acc=0.7341 (reward_model.best_299.pt)
|
| 257 |
+
2026-01-25 09:41:04 | INFO | 2. Step 399: acc=0.7298 (reward_model.best_399.pt)
|
| 258 |
+
2026-01-25 09:41:04 | INFO | 3. Step 199: acc=0.7182 (reward_model.best_199.pt)
|
| 259 |
+
2026-01-25 09:41:05 | INFO | Step 400: loss=0.4959 | IF_loss=0.5285, MQ_loss=0.4633 | acc=0.812 (IF=0.792, MQ=0.833) | lr=0.000009
|
| 260 |
+
2026-01-25 09:42:46 | INFO |
|
| 261 |
+
============================================================
|
| 262 |
+
Validation Results (took 7.13s):
|
| 263 |
+
Samples: 346 instruction, 346 quality
|
| 264 |
+
Instruction Acc: 0.7023
|
| 265 |
+
Quality Acc: 0.7601
|
| 266 |
+
Average Acc: 0.7312
|
| 267 |
+
Total Loss: 0.6337
|
| 268 |
+
Instruction Loss: 0.6835
|
| 269 |
+
Quality Loss: 0.5838
|
| 270 |
+
============================================================
|
| 271 |
+
2026-01-25 09:42:46 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_499.pt (filtered to 38.584M trainable parameters)
|
| 272 |
+
2026-01-25 09:42:46 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_499.pt (428.0MB)
|
| 273 |
+
2026-01-25 09:42:46 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_199.pt
|
| 274 |
+
2026-01-25 09:42:46 | INFO | Best 3 checkpoints:
|
| 275 |
+
2026-01-25 09:42:46 | INFO | 1. Step 299: acc=0.7341 (reward_model.best_299.pt)
|
| 276 |
+
2026-01-25 09:42:46 | INFO | 2. Step 499: acc=0.7312 (reward_model.best_499.pt)
|
| 277 |
+
2026-01-25 09:42:46 | INFO | 3. Step 399: acc=0.7298 (reward_model.best_399.pt)
|
| 278 |
+
2026-01-25 09:42:47 | INFO | Step 500: loss=0.4449 | IF_loss=0.5034, MQ_loss=0.3864 | acc=0.698 (IF=0.667, MQ=0.729) | lr=0.000009
|
| 279 |
+
2026-01-25 09:44:34 | INFO |
|
| 280 |
+
============================================================
|
| 281 |
+
Validation Results (took 6.93s):
|
| 282 |
+
Samples: 346 instruction, 346 quality
|
| 283 |
+
Instruction Acc: 0.7139
|
| 284 |
+
Quality Acc: 0.7601
|
| 285 |
+
Average Acc: 0.7370
|
| 286 |
+
Total Loss: 0.6450
|
| 287 |
+
Instruction Loss: 0.6969
|
| 288 |
+
Quality Loss: 0.5930
|
| 289 |
+
============================================================
|
| 290 |
+
2026-01-25 09:44:34 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_599.pt (filtered to 38.584M trainable parameters)
|
| 291 |
+
2026-01-25 09:44:34 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_599.pt (428.0MB)
|
| 292 |
+
2026-01-25 09:44:34 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_399.pt
|
| 293 |
+
2026-01-25 09:44:34 | INFO | Best 3 checkpoints:
|
| 294 |
+
2026-01-25 09:44:34 | INFO | 1. Step 599: acc=0.7370 (reward_model.best_599.pt)
|
| 295 |
+
2026-01-25 09:44:34 | INFO | 2. Step 299: acc=0.7341 (reward_model.best_299.pt)
|
| 296 |
+
2026-01-25 09:44:34 | INFO | 3. Step 499: acc=0.7312 (reward_model.best_499.pt)
|
| 297 |
+
2026-01-25 09:44:35 | INFO | Step 600: loss=0.4510 | IF_loss=0.4687, MQ_loss=0.4333 | acc=0.812 (IF=0.792, MQ=0.833) | lr=0.000008
|
| 298 |
+
2026-01-25 09:46:19 | INFO |
|
| 299 |
+
============================================================
|
| 300 |
+
Validation Results (took 7.72s):
|
| 301 |
+
Samples: 346 instruction, 346 quality
|
| 302 |
+
Instruction Acc: 0.7139
|
| 303 |
+
Quality Acc: 0.7688
|
| 304 |
+
Average Acc: 0.7413
|
| 305 |
+
Total Loss: 0.6580
|
| 306 |
+
Instruction Loss: 0.7161
|
| 307 |
+
Quality Loss: 0.5999
|
| 308 |
+
============================================================
|
| 309 |
+
2026-01-25 09:46:19 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_699.pt (filtered to 38.584M trainable parameters)
|
| 310 |
+
2026-01-25 09:46:19 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_699.pt (428.0MB)
|
| 311 |
+
2026-01-25 09:46:19 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_499.pt
|
| 312 |
+
2026-01-25 09:46:19 | INFO | Best 3 checkpoints:
|
| 313 |
+
2026-01-25 09:46:19 | INFO | 1. Step 699: acc=0.7413 (reward_model.best_699.pt)
|
| 314 |
+
2026-01-25 09:46:19 | INFO | 2. Step 599: acc=0.7370 (reward_model.best_599.pt)
|
| 315 |
+
2026-01-25 09:46:19 | INFO | 3. Step 299: acc=0.7341 (reward_model.best_299.pt)
|
| 316 |
+
2026-01-25 09:46:20 | INFO | Step 700: loss=0.2300 | IF_loss=0.3156, MQ_loss=0.1444 | acc=0.906 (IF=0.896, MQ=0.917) | lr=0.000007
|
| 317 |
+
2026-01-25 09:48:06 | INFO |
|
| 318 |
+
============================================================
|
| 319 |
+
Validation Results (took 6.72s):
|
| 320 |
+
Samples: 346 instruction, 346 quality
|
| 321 |
+
Instruction Acc: 0.7081
|
| 322 |
+
Quality Acc: 0.7630
|
| 323 |
+
Average Acc: 0.7355
|
| 324 |
+
Total Loss: 0.6754
|
| 325 |
+
Instruction Loss: 0.7388
|
| 326 |
+
Quality Loss: 0.6120
|
| 327 |
+
============================================================
|
| 328 |
+
2026-01-25 09:48:07 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_799.pt (filtered to 38.584M trainable parameters)
|
| 329 |
+
2026-01-25 09:48:07 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_799.pt (428.0MB)
|
| 330 |
+
2026-01-25 09:48:07 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_299.pt
|
| 331 |
+
2026-01-25 09:48:07 | INFO | Best 3 checkpoints:
|
| 332 |
+
2026-01-25 09:48:07 | INFO | 1. Step 699: acc=0.7413 (reward_model.best_699.pt)
|
| 333 |
+
2026-01-25 09:48:07 | INFO | 2. Step 599: acc=0.7370 (reward_model.best_599.pt)
|
| 334 |
+
2026-01-25 09:48:07 | INFO | 3. Step 799: acc=0.7355 (reward_model.best_799.pt)
|
| 335 |
+
2026-01-25 09:48:08 | INFO | Step 800: loss=0.3552 | IF_loss=0.4192, MQ_loss=0.2911 | acc=0.844 (IF=0.833, MQ=0.854) | lr=0.000007
|
| 336 |
+
2026-01-25 09:49:55 | INFO |
|
| 337 |
+
============================================================
|
| 338 |
+
Validation Results (took 6.63s):
|
| 339 |
+
Samples: 346 instruction, 346 quality
|
| 340 |
+
Instruction Acc: 0.7081
|
| 341 |
+
Quality Acc: 0.7630
|
| 342 |
+
Average Acc: 0.7355
|
| 343 |
+
Total Loss: 0.6859
|
| 344 |
+
Instruction Loss: 0.7508
|
| 345 |
+
Quality Loss: 0.6209
|
| 346 |
+
============================================================
|
| 347 |
+
2026-01-25 09:49:55 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_899.pt (filtered to 38.584M trainable parameters)
|
| 348 |
+
2026-01-25 09:49:55 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_899.pt (428.0MB)
|
| 349 |
+
2026-01-25 09:49:55 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_899.pt
|
| 350 |
+
2026-01-25 09:49:55 | INFO | Best 3 checkpoints:
|
| 351 |
+
2026-01-25 09:49:55 | INFO | 1. Step 699: acc=0.7413 (reward_model.best_699.pt)
|
| 352 |
+
2026-01-25 09:49:55 | INFO | 2. Step 599: acc=0.7370 (reward_model.best_599.pt)
|
| 353 |
+
2026-01-25 09:49:55 | INFO | 3. Step 799: acc=0.7355 (reward_model.best_799.pt)
|
| 354 |
+
2026-01-25 09:49:56 | INFO | Step 900: loss=0.3278 | IF_loss=0.3222, MQ_loss=0.3335 | acc=0.865 (IF=0.875, MQ=0.854) | lr=0.000006
|
| 355 |
+
2026-01-25 09:51:41 | INFO |
|
| 356 |
+
============================================================
|
| 357 |
+
Validation Results (took 6.87s):
|
| 358 |
+
Samples: 346 instruction, 346 quality
|
| 359 |
+
Instruction Acc: 0.7081
|
| 360 |
+
Quality Acc: 0.7630
|
| 361 |
+
Average Acc: 0.7355
|
| 362 |
+
Total Loss: 0.7067
|
| 363 |
+
Instruction Loss: 0.7755
|
| 364 |
+
Quality Loss: 0.6378
|
| 365 |
+
============================================================
|
| 366 |
+
2026-01-25 09:51:41 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_999.pt (filtered to 38.584M trainable parameters)
|
| 367 |
+
2026-01-25 09:51:41 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_999.pt (428.0MB)
|
| 368 |
+
2026-01-25 09:51:41 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_999.pt
|
| 369 |
+
2026-01-25 09:51:41 | INFO | Best 3 checkpoints:
|
| 370 |
+
2026-01-25 09:51:41 | INFO | 1. Step 699: acc=0.7413 (reward_model.best_699.pt)
|
| 371 |
+
2026-01-25 09:51:41 | INFO | 2. Step 599: acc=0.7370 (reward_model.best_599.pt)
|
| 372 |
+
2026-01-25 09:51:41 | INFO | 3. Step 799: acc=0.7355 (reward_model.best_799.pt)
|
| 373 |
+
2026-01-25 09:51:42 | INFO | Step 1000: loss=0.2557 | IF_loss=0.2447, MQ_loss=0.2666 | acc=0.896 (IF=0.938, MQ=0.854) | lr=0.000005
|
| 374 |
+
2026-01-25 09:53:32 | INFO |
|
| 375 |
+
============================================================
|
| 376 |
+
Validation Results (took 7.21s):
|
| 377 |
+
Samples: 346 instruction, 346 quality
|
| 378 |
+
Instruction Acc: 0.7110
|
| 379 |
+
Quality Acc: 0.7659
|
| 380 |
+
Average Acc: 0.7384
|
| 381 |
+
Total Loss: 0.7211
|
| 382 |
+
Instruction Loss: 0.7922
|
| 383 |
+
Quality Loss: 0.6501
|
| 384 |
+
============================================================
|
| 385 |
+
2026-01-25 09:53:32 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_1099.pt (filtered to 38.584M trainable parameters)
|
| 386 |
+
2026-01-25 09:53:32 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_1099.pt (428.0MB)
|
| 387 |
+
2026-01-25 09:53:32 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_799.pt
|
| 388 |
+
2026-01-25 09:53:32 | INFO | Best 3 checkpoints:
|
| 389 |
+
2026-01-25 09:53:32 | INFO | 1. Step 699: acc=0.7413 (reward_model.best_699.pt)
|
| 390 |
+
2026-01-25 09:53:32 | INFO | 2. Step 1099: acc=0.7384 (reward_model.best_1099.pt)
|
| 391 |
+
2026-01-25 09:53:32 | INFO | 3. Step 599: acc=0.7370 (reward_model.best_599.pt)
|
| 392 |
+
2026-01-25 09:53:33 | INFO | Step 1100: loss=0.2468 | IF_loss=0.2882, MQ_loss=0.2053 | acc=0.875 (IF=0.875, MQ=0.875) | lr=0.000004
|
| 393 |
+
2026-01-25 09:55:16 | INFO |
|
| 394 |
+
============================================================
|
| 395 |
+
Validation Results (took 7.04s):
|
| 396 |
+
Samples: 346 instruction, 346 quality
|
| 397 |
+
Instruction Acc: 0.7081
|
| 398 |
+
Quality Acc: 0.7688
|
| 399 |
+
Average Acc: 0.7384
|
| 400 |
+
Total Loss: 0.7338
|
| 401 |
+
Instruction Loss: 0.8081
|
| 402 |
+
Quality Loss: 0.6596
|
| 403 |
+
============================================================
|
| 404 |
+
2026-01-25 09:55:16 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_1199.pt (filtered to 38.584M trainable parameters)
|
| 405 |
+
2026-01-25 09:55:17 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_1199.pt (428.0MB)
|
| 406 |
+
2026-01-25 09:55:17 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_599.pt
|
| 407 |
+
2026-01-25 09:55:17 | INFO | Best 3 checkpoints:
|
| 408 |
+
2026-01-25 09:55:17 | INFO | 1. Step 699: acc=0.7413 (reward_model.best_699.pt)
|
| 409 |
+
2026-01-25 09:55:17 | INFO | 2. Step 1099: acc=0.7384 (reward_model.best_1099.pt)
|
| 410 |
+
2026-01-25 09:55:17 | INFO | 3. Step 1199: acc=0.7384 (reward_model.best_1199.pt)
|
| 411 |
+
2026-01-25 09:55:18 | INFO | Step 1200: loss=0.2555 | IF_loss=0.3150, MQ_loss=0.1960 | acc=0.833 (IF=0.812, MQ=0.854) | lr=0.000003
|
| 412 |
+
2026-01-25 09:57:06 | INFO |
|
| 413 |
+
============================================================
|
| 414 |
+
Validation Results (took 6.87s):
|
| 415 |
+
Samples: 346 instruction, 346 quality
|
| 416 |
+
Instruction Acc: 0.7081
|
| 417 |
+
Quality Acc: 0.7659
|
| 418 |
+
Average Acc: 0.7370
|
| 419 |
+
Total Loss: 0.7409
|
| 420 |
+
Instruction Loss: 0.8178
|
| 421 |
+
Quality Loss: 0.6641
|
| 422 |
+
============================================================
|
| 423 |
+
2026-01-25 09:57:06 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_1299.pt (filtered to 38.584M trainable parameters)
|
| 424 |
+
2026-01-25 09:57:07 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_1299.pt (428.0MB)
|
| 425 |
+
2026-01-25 09:57:07 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_1299.pt
|
| 426 |
+
2026-01-25 09:57:07 | INFO | Best 3 checkpoints:
|
| 427 |
+
2026-01-25 09:57:07 | INFO | 1. Step 699: acc=0.7413 (reward_model.best_699.pt)
|
| 428 |
+
2026-01-25 09:57:07 | INFO | 2. Step 1099: acc=0.7384 (reward_model.best_1099.pt)
|
| 429 |
+
2026-01-25 09:57:07 | INFO | 3. Step 1199: acc=0.7384 (reward_model.best_1199.pt)
|
| 430 |
+
2026-01-25 09:57:08 | INFO | Step 1300: loss=0.3035 | IF_loss=0.2872, MQ_loss=0.3198 | acc=0.865 (IF=0.854, MQ=0.875) | lr=0.000003
|
| 431 |
+
2026-01-25 09:58:54 | INFO |
|
| 432 |
+
============================================================
|
| 433 |
+
Validation Results (took 8.06s):
|
| 434 |
+
Samples: 346 instruction, 346 quality
|
| 435 |
+
Instruction Acc: 0.7052
|
| 436 |
+
Quality Acc: 0.7659
|
| 437 |
+
Average Acc: 0.7355
|
| 438 |
+
Total Loss: 0.7497
|
| 439 |
+
Instruction Loss: 0.8259
|
| 440 |
+
Quality Loss: 0.6735
|
| 441 |
+
============================================================
|
| 442 |
+
2026-01-25 09:58:55 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_1399.pt (filtered to 38.584M trainable parameters)
|
| 443 |
+
2026-01-25 09:58:55 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_1399.pt (428.0MB)
|
| 444 |
+
2026-01-25 09:58:55 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_1399.pt
|
| 445 |
+
2026-01-25 09:58:55 | INFO | Best 3 checkpoints:
|
| 446 |
+
2026-01-25 09:58:55 | INFO | 1. Step 699: acc=0.7413 (reward_model.best_699.pt)
|
| 447 |
+
2026-01-25 09:58:55 | INFO | 2. Step 1099: acc=0.7384 (reward_model.best_1099.pt)
|
| 448 |
+
2026-01-25 09:58:55 | INFO | 3. Step 1199: acc=0.7384 (reward_model.best_1199.pt)
|
| 449 |
+
2026-01-25 09:58:56 | INFO | Step 1400: loss=0.2354 | IF_loss=0.2780, MQ_loss=0.1928 | acc=0.917 (IF=0.896, MQ=0.938) | lr=0.000002
|
| 450 |
+
2026-01-25 10:00:46 | INFO |
|
| 451 |
+
============================================================
|
| 452 |
+
Validation Results (took 6.79s):
|
| 453 |
+
Samples: 346 instruction, 346 quality
|
| 454 |
+
Instruction Acc: 0.7052
|
| 455 |
+
Quality Acc: 0.7717
|
| 456 |
+
Average Acc: 0.7384
|
| 457 |
+
Total Loss: 0.7534
|
| 458 |
+
Instruction Loss: 0.8301
|
| 459 |
+
Quality Loss: 0.6767
|
| 460 |
+
============================================================
|
| 461 |
+
2026-01-25 10:00:46 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_1499.pt (filtered to 38.584M trainable parameters)
|
| 462 |
+
2026-01-25 10:00:46 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_1499.pt (428.0MB)
|
| 463 |
+
2026-01-25 10:00:46 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_1499.pt
|
| 464 |
+
2026-01-25 10:00:46 | INFO | Best 3 checkpoints:
|
| 465 |
+
2026-01-25 10:00:46 | INFO | 1. Step 699: acc=0.7413 (reward_model.best_699.pt)
|
| 466 |
+
2026-01-25 10:00:46 | INFO | 2. Step 1099: acc=0.7384 (reward_model.best_1099.pt)
|
| 467 |
+
2026-01-25 10:00:46 | INFO | 3. Step 1199: acc=0.7384 (reward_model.best_1199.pt)
|
| 468 |
+
2026-01-25 10:00:47 | INFO | Step 1500: loss=0.2509 | IF_loss=0.2888, MQ_loss=0.2131 | acc=0.875 (IF=0.875, MQ=0.875) | lr=0.000001
|
| 469 |
+
2026-01-25 10:02:32 | INFO |
|
| 470 |
+
============================================================
|
| 471 |
+
Validation Results (took 7.11s):
|
| 472 |
+
Samples: 346 instruction, 346 quality
|
| 473 |
+
Instruction Acc: 0.7081
|
| 474 |
+
Quality Acc: 0.7717
|
| 475 |
+
Average Acc: 0.7399
|
| 476 |
+
Total Loss: 0.7576
|
| 477 |
+
Instruction Loss: 0.8358
|
| 478 |
+
Quality Loss: 0.6793
|
| 479 |
+
============================================================
|
| 480 |
+
2026-01-25 10:02:32 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_1599.pt (filtered to 38.584M trainable parameters)
|
| 481 |
+
2026-01-25 10:02:33 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_1599.pt (428.0MB)
|
| 482 |
+
2026-01-25 10:02:33 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_1199.pt
|
| 483 |
+
2026-01-25 10:02:33 | INFO | Best 3 checkpoints:
|
| 484 |
+
2026-01-25 10:02:33 | INFO | 1. Step 699: acc=0.7413 (reward_model.best_699.pt)
|
| 485 |
+
2026-01-25 10:02:33 | INFO | 2. Step 1599: acc=0.7399 (reward_model.best_1599.pt)
|
| 486 |
+
2026-01-25 10:02:33 | INFO | 3. Step 1099: acc=0.7384 (reward_model.best_1099.pt)
|
| 487 |
+
2026-01-25 10:02:38 | INFO | Step 1600: loss=0.1956 | IF_loss=0.2453, MQ_loss=0.1458 | acc=0.938 (IF=0.896, MQ=0.979) | lr=0.000001
|
| 488 |
+
2026-01-25 10:04:22 | INFO |
|
| 489 |
+
============================================================
|
| 490 |
+
Validation Results (took 7.02s):
|
| 491 |
+
Samples: 346 instruction, 346 quality
|
| 492 |
+
Instruction Acc: 0.7023
|
| 493 |
+
Quality Acc: 0.7688
|
| 494 |
+
Average Acc: 0.7355
|
| 495 |
+
Total Loss: 0.7592
|
| 496 |
+
Instruction Loss: 0.8378
|
| 497 |
+
Quality Loss: 0.6806
|
| 498 |
+
============================================================
|
| 499 |
+
2026-01-25 10:04:22 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_1699.pt (filtered to 38.584M trainable parameters)
|
| 500 |
+
2026-01-25 10:04:22 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_1699.pt (428.0MB)
|
| 501 |
+
2026-01-25 10:04:22 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_1699.pt
|
| 502 |
+
2026-01-25 10:04:22 | INFO | Best 3 checkpoints:
|
| 503 |
+
2026-01-25 10:04:22 | INFO | 1. Step 699: acc=0.7413 (reward_model.best_699.pt)
|
| 504 |
+
2026-01-25 10:04:22 | INFO | 2. Step 1599: acc=0.7399 (reward_model.best_1599.pt)
|
| 505 |
+
2026-01-25 10:04:22 | INFO | 3. Step 1099: acc=0.7384 (reward_model.best_1099.pt)
|
| 506 |
+
2026-01-25 10:04:23 | INFO | Step 1700: loss=0.3023 | IF_loss=0.2025, MQ_loss=0.4021 | acc=0.854 (IF=0.917, MQ=0.792) | lr=0.000001
|
| 507 |
+
2026-01-25 10:06:16 | INFO |
|
| 508 |
+
============================================================
|
| 509 |
+
Validation Results (took 7.44s):
|
| 510 |
+
Samples: 346 instruction, 346 quality
|
| 511 |
+
Instruction Acc: 0.7023
|
| 512 |
+
Quality Acc: 0.7659
|
| 513 |
+
Average Acc: 0.7341
|
| 514 |
+
Total Loss: 0.7613
|
| 515 |
+
Instruction Loss: 0.8400
|
| 516 |
+
Quality Loss: 0.6826
|
| 517 |
+
============================================================
|
| 518 |
+
2026-01-25 10:06:16 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_1799.pt (filtered to 38.584M trainable parameters)
|
| 519 |
+
2026-01-25 10:06:16 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_1799.pt (428.0MB)
|
| 520 |
+
2026-01-25 10:06:16 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_1799.pt
|
| 521 |
+
2026-01-25 10:06:16 | INFO | Best 3 checkpoints:
|
| 522 |
+
2026-01-25 10:06:16 | INFO | 1. Step 699: acc=0.7413 (reward_model.best_699.pt)
|
| 523 |
+
2026-01-25 10:06:16 | INFO | 2. Step 1599: acc=0.7399 (reward_model.best_1599.pt)
|
| 524 |
+
2026-01-25 10:06:16 | INFO | 3. Step 1099: acc=0.7384 (reward_model.best_1099.pt)
|
| 525 |
+
2026-01-25 10:06:17 | INFO | Step 1800: loss=0.1655 | IF_loss=0.1916, MQ_loss=0.1395 | acc=0.896 (IF=0.875, MQ=0.917) | lr=0.000000
|
| 526 |
+
2026-01-25 10:08:05 | INFO |
|
| 527 |
+
============================================================
|
| 528 |
+
Validation Results (took 7.05s):
|
| 529 |
+
Samples: 346 instruction, 346 quality
|
| 530 |
+
Instruction Acc: 0.7023
|
| 531 |
+
Quality Acc: 0.7717
|
| 532 |
+
Average Acc: 0.7370
|
| 533 |
+
Total Loss: 0.7619
|
| 534 |
+
Instruction Loss: 0.8410
|
| 535 |
+
Quality Loss: 0.6828
|
| 536 |
+
============================================================
|
| 537 |
+
2026-01-25 10:08:05 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_1899.pt (filtered to 38.584M trainable parameters)
|
| 538 |
+
2026-01-25 10:08:05 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_1899.pt (428.0MB)
|
| 539 |
+
2026-01-25 10:08:05 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_1899.pt
|
| 540 |
+
2026-01-25 10:08:05 | INFO | Best 3 checkpoints:
|
| 541 |
+
2026-01-25 10:08:05 | INFO | 1. Step 699: acc=0.7413 (reward_model.best_699.pt)
|
| 542 |
+
2026-01-25 10:08:05 | INFO | 2. Step 1599: acc=0.7399 (reward_model.best_1599.pt)
|
| 543 |
+
2026-01-25 10:08:05 | INFO | 3. Step 1099: acc=0.7384 (reward_model.best_1099.pt)
|
| 544 |
+
2026-01-25 10:08:06 | INFO | Step 1900: loss=0.2225 | IF_loss=0.2413, MQ_loss=0.2037 | acc=0.896 (IF=0.875, MQ=0.917) | lr=0.000000
|
| 545 |
+
2026-01-25 10:09:55 | INFO |
|
| 546 |
+
============================================================
|
| 547 |
+
Validation Results (took 7.58s):
|
| 548 |
+
Samples: 346 instruction, 346 quality
|
| 549 |
+
Instruction Acc: 0.7023
|
| 550 |
+
Quality Acc: 0.7688
|
| 551 |
+
Average Acc: 0.7355
|
| 552 |
+
Total Loss: 0.7619
|
| 553 |
+
Instruction Loss: 0.8410
|
| 554 |
+
Quality Loss: 0.6827
|
| 555 |
+
============================================================
|
| 556 |
+
2026-01-25 10:09:55 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_1999.pt (filtered to 38.584M trainable parameters)
|
| 557 |
+
2026-01-25 10:09:55 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_1999.pt (428.0MB)
|
| 558 |
+
2026-01-25 10:09:55 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0933/ckpt/reward_model.best_1999.pt
|
| 559 |
+
2026-01-25 10:09:55 | INFO | Best 3 checkpoints:
|
| 560 |
+
2026-01-25 10:09:55 | INFO | 1. Step 699: acc=0.7413 (reward_model.best_699.pt)
|
| 561 |
+
2026-01-25 10:09:55 | INFO | 2. Step 1599: acc=0.7399 (reward_model.best_1599.pt)
|
| 562 |
+
2026-01-25 10:09:55 | INFO | 3. Step 1099: acc=0.7384 (reward_model.best_1099.pt)
|
| 563 |
+
2026-01-25 10:09:55 | INFO | Training complete!
|
| 564 |
+
2026-01-25 10:09:55 | INFO | Training complete!
|
20260125_0947_CA/config.yaml
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DEVICES: '2'
|
| 2 |
+
accelerate:
|
| 3 |
+
mixed_precision: bf16
|
| 4 |
+
basics:
|
| 5 |
+
random_seed: 42
|
| 6 |
+
save_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human
|
| 7 |
+
dataset:
|
| 8 |
+
audio_dropout:
|
| 9 |
+
apply_to_eval: true
|
| 10 |
+
apply_to_ref: true
|
| 11 |
+
enabled: true
|
| 12 |
+
eval_only_on_training: true
|
| 13 |
+
max_duration: 1500
|
| 14 |
+
min_duration: 200
|
| 15 |
+
train_mode: start
|
| 16 |
+
cache_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/tmp
|
| 17 |
+
db_path: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/backend/database.db
|
| 18 |
+
duration: 600.0
|
| 19 |
+
embedding_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/supervised_embeddings
|
| 20 |
+
max_samples: null
|
| 21 |
+
max_val_samples: null
|
| 22 |
+
metadata_jsonl: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/all_comparisons.jsonl
|
| 23 |
+
mode: raw_text_frozen_audio
|
| 24 |
+
preference_file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/human_annotations/train.json
|
| 25 |
+
sample_rate: 24000
|
| 26 |
+
val_preference_file: null
|
| 27 |
+
loss:
|
| 28 |
+
IF_ratio: 0.5
|
| 29 |
+
filter_ties: true
|
| 30 |
+
label_smoothing: 0.0
|
| 31 |
+
reduction: mean
|
| 32 |
+
model:
|
| 33 |
+
attention_mode: SA
|
| 34 |
+
attn_dropout: 0.0
|
| 35 |
+
category_embeddings: null
|
| 36 |
+
dim: 768
|
| 37 |
+
dim_head: 64
|
| 38 |
+
downsample:
|
| 39 |
+
configs:
|
| 40 |
+
conv2_4x:
|
| 41 |
+
factor: 4
|
| 42 |
+
kernel_size: 5
|
| 43 |
+
kind: conv*2
|
| 44 |
+
use_layernorm: true
|
| 45 |
+
conv_4x:
|
| 46 |
+
factor: 4
|
| 47 |
+
kernel_size: 5
|
| 48 |
+
kind: conv
|
| 49 |
+
stage: 1
|
| 50 |
+
use_layernorm: true
|
| 51 |
+
glu_4x:
|
| 52 |
+
factor: 4
|
| 53 |
+
kernel_size: 5
|
| 54 |
+
kind: gluconv*2+pw
|
| 55 |
+
use_layernorm: true
|
| 56 |
+
mean:
|
| 57 |
+
factor: 2
|
| 58 |
+
kind: mean
|
| 59 |
+
mean_4x:
|
| 60 |
+
dropout: 0.0
|
| 61 |
+
factor: 30
|
| 62 |
+
kind: mean+mlp
|
| 63 |
+
mlp_ratio: 2.0
|
| 64 |
+
none:
|
| 65 |
+
factor: 1
|
| 66 |
+
kind: none
|
| 67 |
+
eval: mean_4x
|
| 68 |
+
ref: null
|
| 69 |
+
text: none
|
| 70 |
+
ff_dropout: 0.0
|
| 71 |
+
ff_mult: 4
|
| 72 |
+
freeze_audio: true
|
| 73 |
+
freeze_text: true
|
| 74 |
+
gradient_checkpointing: false
|
| 75 |
+
heads: 8
|
| 76 |
+
joint_tf_depth: 1
|
| 77 |
+
load_config:
|
| 78 |
+
checkpoint_path: null
|
| 79 |
+
frozen_from_pretrained: true
|
| 80 |
+
pretrained_name: OpenMuQ/MuQ-MuLan-large
|
| 81 |
+
strict: false
|
| 82 |
+
mlp_dim: 768
|
| 83 |
+
mode: concat_text_late
|
| 84 |
+
model_name: OpenMuQ/MuQ-MuLan-large
|
| 85 |
+
name: reward
|
| 86 |
+
no_condition: false
|
| 87 |
+
null_embedding:
|
| 88 |
+
audio:
|
| 89 |
+
dropout: 0.5
|
| 90 |
+
length: 10
|
| 91 |
+
lyrics:
|
| 92 |
+
dropout: 0.3
|
| 93 |
+
length: 10
|
| 94 |
+
text:
|
| 95 |
+
dropout: 0
|
| 96 |
+
length: 10
|
| 97 |
+
output_dim: 2
|
| 98 |
+
prompt_tf_depth: 4
|
| 99 |
+
sr: 24000
|
| 100 |
+
text_encoder:
|
| 101 |
+
name: muq_mulan
|
| 102 |
+
tune: null
|
| 103 |
+
text_lora_config: null
|
| 104 |
+
train_muq_depth: 0
|
| 105 |
+
train_muqmulan: false
|
| 106 |
+
use_audio: true
|
| 107 |
+
use_layer_idx: -1
|
| 108 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 109 |
+
run_name: CA
|
| 110 |
+
train:
|
| 111 |
+
batch_size: 48
|
| 112 |
+
betas:
|
| 113 |
+
- 0.9
|
| 114 |
+
- 0.99
|
| 115 |
+
ema_decay: 0.9999
|
| 116 |
+
ema_update_every: 1
|
| 117 |
+
enable_gradient_checkpointing: true
|
| 118 |
+
force_clear_prev_results: false
|
| 119 |
+
grad_accum_every: 1
|
| 120 |
+
log_tensorboard: true
|
| 121 |
+
lr_schedule:
|
| 122 |
+
min_lr_ratio: 0.001
|
| 123 |
+
name: linear_cosine
|
| 124 |
+
total_steps: 2000
|
| 125 |
+
warmup_steps: 10
|
| 126 |
+
max_grad_norm: 1
|
| 127 |
+
mlp_lr: 1.0e-05
|
| 128 |
+
num_train_steps: 2000
|
| 129 |
+
num_valid_batches: null
|
| 130 |
+
num_workers: 8
|
| 131 |
+
other_lr: 1.0e-05
|
| 132 |
+
resume: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260123_1310/ckpt/reward_model.best_25999.pt
|
| 133 |
+
resume_optimizer: false
|
| 134 |
+
save_model_every: 2000
|
| 135 |
+
use_checkpoint_config: true
|
| 136 |
+
use_ema: false
|
| 137 |
+
use_lion: false
|
| 138 |
+
valid_batch_size: 20
|
| 139 |
+
valid_every: 100
|
| 140 |
+
valid_frac: 0.1
|
| 141 |
+
verify_weights_on_load: true
|
| 142 |
+
validate_only: false
|
20260125_0947_CA/eval_results_0125_1703.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
20260125_0947_CA/reward_model/1769305674.1033533/events.out.tfevents.1769305674.MACLAB-S004.1592070.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5491cc26f71b367dacdcda2398f1629e1eb90969ee662e1e00c9c0e40d9ce02c
|
| 3 |
+
size 503
|
20260125_0947_CA/reward_model/1769305674.1053352/hparams.yml
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
batch_size: 48
|
| 2 |
+
grad_accum_every: 1
|
| 3 |
+
learning_rate: 1.0e-05
|
| 4 |
+
num_train_steps: 2000
|
20260125_0947_CA/reward_model/events.out.tfevents.1769305674.MACLAB-S004.1592070.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e55f888cb7afbd9e4d63134150e67da713c005819afe82f6f41ffd948a4993a8
|
| 3 |
+
size 874266
|
20260125_0947_CA/train.20260125_0947_CA.log
ADDED
|
@@ -0,0 +1,438 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-25 09:47:44 | INFO | Log file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/train.20260125_0947_CA.log
|
| 2 |
+
2026-01-25 09:47:44 | INFO | Random seed set to 42
|
| 3 |
+
2026-01-25 09:47:45 | INFO | Created RawTextFrozenAudioDataset with 3463 samples
|
| 4 |
+
2026-01-25 09:47:45 | INFO | Split dataset into train (3117) and validation (346) sets (ratio: 10.00%)
|
| 5 |
+
2026-01-25 09:47:45 | INFO | Will resume from checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260123_1310/ckpt/reward_model.best_25999.pt
|
| 6 |
+
2026-01-25 09:47:45 | INFO | Using checkpoint config for model initialization (continue training mode)
|
| 7 |
+
2026-01-25 09:47:52 | INFO | Created RewardAttentionModel with attention_mode=CA
|
| 8 |
+
2026-01-25 09:47:52 | INFO | Created PreferenceLoss with filter_ties=True
|
| 9 |
+
2026-01-25 09:47:52 | INFO | ✓ Gradient checkpointing enabled
|
| 10 |
+
2026-01-25 09:47:52 | INFO | ✓ Audio cropping enabled: min=200, max=1500
|
| 11 |
+
2026-01-25 09:47:52 | INFO | Apply to eval: True, ref: True
|
| 12 |
+
2026-01-25 09:47:52 | INFO | Modes: train=random, val=start
|
| 13 |
+
2026-01-25 09:47:52 | INFO | MLP head parameters: 1,186,563 params, lr=1e-05
|
| 14 |
+
2026-01-25 09:47:52 | INFO | Other parameters: 20,092,674 params, lr=1e-05
|
| 15 |
+
2026-01-25 09:47:52 | INFO | Using lr_schedule=linear_cosine warmup_steps=10 total_steps=2000
|
| 16 |
+
2026-01-25 09:47:52 | INFO | Training with fixed validation set
|
| 17 |
+
2026-01-25 09:47:52 | INFO | Train batch_size: 48, Valid batch_size: 20
|
| 18 |
+
2026-01-25 09:47:53 | INFO | Missing keys (782): ['text_module.model.embeddings.word_embeddings.weight', 'text_module.model.embeddings.position_embeddings.weight', 'text_module.model.embeddings.token_type_embeddings.weight', 'text_module.model.embeddings.LayerNorm.weight', 'text_module.model.embeddings.LayerNorm.bias']...
|
| 19 |
+
2026-01-25 09:47:53 | INFO | ✓ Starting from step 0 (transfer learning mode, ignoring checkpoint steps=25999)
|
| 20 |
+
2026-01-25 09:47:53 | INFO | Resumed from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model/20260123_1310/ckpt/reward_model.best_25999.pt
|
| 21 |
+
2026-01-25 09:47:53 | INFO | Parameters: 683.857M total, 21.279M trainable
|
| 22 |
+
2026-01-25 09:47:53 | INFO | Text encoder (frozen): 328.389M
|
| 23 |
+
2026-01-25 09:47:53 | INFO | Audio encoder (frozen): 334.189M
|
| 24 |
+
2026-01-25 09:47:53 | INFO | Other trainable: 21.279M
|
| 25 |
+
2026-01-25 09:47:53 | INFO | ℹ No LoRA configuration detected
|
| 26 |
+
2026-01-25 09:47:54 | INFO | ============================================================
|
| 27 |
+
2026-01-25 09:47:54 | INFO | Ready to start training
|
| 28 |
+
2026-01-25 09:47:54 | INFO | ============================================================
|
| 29 |
+
2026-01-25 09:47:54 | INFO | Starting training from step 0
|
| 30 |
+
2026-01-25 09:47:54 | INFO | ===== Accelerator / CUDA Debug Info =====
|
| 31 |
+
2026-01-25 09:47:54 | INFO | accelerator.device = cuda
|
| 32 |
+
2026-01-25 09:47:54 | INFO | mixed_precision = bf16
|
| 33 |
+
2026-01-25 09:47:54 | INFO | distributed_type = NO
|
| 34 |
+
2026-01-25 09:47:54 | INFO | num_processes = 1
|
| 35 |
+
2026-01-25 09:47:54 | INFO | process_index = 0
|
| 36 |
+
2026-01-25 09:47:54 | INFO | is_main_process = True
|
| 37 |
+
2026-01-25 09:47:54 | INFO | torch.cuda.is_available() = True
|
| 38 |
+
2026-01-25 09:47:54 | INFO | torch.cuda.device_count() = 1
|
| 39 |
+
2026-01-25 09:47:54 | INFO | current_device = 0
|
| 40 |
+
2026-01-25 09:47:54 | INFO | device_name = NVIDIA GeForce RTX 4090
|
| 41 |
+
2026-01-25 09:47:54 | INFO | model parameter device = cuda:0
|
| 42 |
+
2026-01-25 09:47:54 | INFO | Training for 2000.0 steps (~32 epochs, 64 steps/epoch)
|
| 43 |
+
2026-01-25 09:47:54 | INFO |
|
| 44 |
+
============================================================
|
| 45 |
+
2026-01-25 09:47:54 | INFO | Running initial validation after resume...
|
| 46 |
+
2026-01-25 09:47:54 | INFO | ============================================================
|
| 47 |
+
2026-01-25 09:48:06 | INFO |
|
| 48 |
+
============================================================
|
| 49 |
+
Validation Results (took 12.13s):
|
| 50 |
+
Samples: 346 instruction, 346 quality
|
| 51 |
+
Instruction Acc: 0.6503
|
| 52 |
+
Quality Acc: 0.6532
|
| 53 |
+
Average Acc: 0.6517
|
| 54 |
+
Total Loss: 1.2600
|
| 55 |
+
Instruction Loss: 1.2149
|
| 56 |
+
Quality Loss: 1.3051
|
| 57 |
+
============================================================
|
| 58 |
+
2026-01-25 09:48:06 | INFO | Initial validation complete.
|
| 59 |
+
|
| 60 |
+
2026-01-25 09:48:12 | INFO | Step 0: loss=1.5130 | IF_loss=1.3189, MQ_loss=1.7072 | acc=0.656 (IF=0.667, MQ=0.646) | lr=0.000002
|
| 61 |
+
2026-01-25 09:48:12 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.0.pt (filtered to 21.279M trainable parameters)
|
| 62 |
+
2026-01-25 09:48:12 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.0.pt (229.9MB)
|
| 63 |
+
2026-01-25 09:48:12 | INFO | Step 0: Saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.0.pt
|
| 64 |
+
2026-01-25 09:50:17 | INFO |
|
| 65 |
+
============================================================
|
| 66 |
+
Validation Results (took 8.06s):
|
| 67 |
+
Samples: 346 instruction, 346 quality
|
| 68 |
+
Instruction Acc: 0.6705
|
| 69 |
+
Quality Acc: 0.6965
|
| 70 |
+
Average Acc: 0.6835
|
| 71 |
+
Total Loss: 0.7808
|
| 72 |
+
Instruction Loss: 0.7905
|
| 73 |
+
Quality Loss: 0.7712
|
| 74 |
+
============================================================
|
| 75 |
+
2026-01-25 09:50:17 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_99.pt (filtered to 21.279M trainable parameters)
|
| 76 |
+
2026-01-25 09:50:17 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_99.pt (229.9MB)
|
| 77 |
+
2026-01-25 09:50:17 | INFO | Best 1 checkpoints:
|
| 78 |
+
2026-01-25 09:50:17 | INFO | 1. Step 99: acc=0.6835 (reward_model.best_99.pt)
|
| 79 |
+
2026-01-25 09:50:18 | INFO | Step 100: loss=0.7788 | IF_loss=0.7289, MQ_loss=0.8287 | acc=0.667 (IF=0.667, MQ=0.667) | lr=0.000010
|
| 80 |
+
2026-01-25 09:52:28 | INFO |
|
| 81 |
+
============================================================
|
| 82 |
+
Validation Results (took 9.09s):
|
| 83 |
+
Samples: 346 instruction, 346 quality
|
| 84 |
+
Instruction Acc: 0.7052
|
| 85 |
+
Quality Acc: 0.7370
|
| 86 |
+
Average Acc: 0.7211
|
| 87 |
+
Total Loss: 0.6198
|
| 88 |
+
Instruction Loss: 0.6420
|
| 89 |
+
Quality Loss: 0.5976
|
| 90 |
+
============================================================
|
| 91 |
+
2026-01-25 09:52:28 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_199.pt (filtered to 21.279M trainable parameters)
|
| 92 |
+
2026-01-25 09:52:28 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_199.pt (229.9MB)
|
| 93 |
+
2026-01-25 09:52:28 | INFO | Best 2 checkpoints:
|
| 94 |
+
2026-01-25 09:52:28 | INFO | 1. Step 199: acc=0.7211 (reward_model.best_199.pt)
|
| 95 |
+
2026-01-25 09:52:28 | INFO | 2. Step 99: acc=0.6835 (reward_model.best_99.pt)
|
| 96 |
+
2026-01-25 09:52:29 | INFO | Step 200: loss=0.6449 | IF_loss=0.5494, MQ_loss=0.7404 | acc=0.646 (IF=0.646, MQ=0.646) | lr=0.000010
|
| 97 |
+
2026-01-25 09:54:31 | INFO |
|
| 98 |
+
============================================================
|
| 99 |
+
Validation Results (took 8.43s):
|
| 100 |
+
Samples: 346 instruction, 346 quality
|
| 101 |
+
Instruction Acc: 0.7052
|
| 102 |
+
Quality Acc: 0.7457
|
| 103 |
+
Average Acc: 0.7254
|
| 104 |
+
Total Loss: 0.5725
|
| 105 |
+
Instruction Loss: 0.6007
|
| 106 |
+
Quality Loss: 0.5443
|
| 107 |
+
============================================================
|
| 108 |
+
2026-01-25 09:54:31 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_299.pt (filtered to 21.279M trainable parameters)
|
| 109 |
+
2026-01-25 09:54:31 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_299.pt (229.9MB)
|
| 110 |
+
2026-01-25 09:54:31 | INFO | Best 3 checkpoints:
|
| 111 |
+
2026-01-25 09:54:31 | INFO | 1. Step 299: acc=0.7254 (reward_model.best_299.pt)
|
| 112 |
+
2026-01-25 09:54:31 | INFO | 2. Step 199: acc=0.7211 (reward_model.best_199.pt)
|
| 113 |
+
2026-01-25 09:54:31 | INFO | 3. Step 99: acc=0.6835 (reward_model.best_99.pt)
|
| 114 |
+
2026-01-25 09:54:33 | INFO | Step 300: loss=0.5330 | IF_loss=0.4058, MQ_loss=0.6601 | acc=0.750 (IF=0.792, MQ=0.708) | lr=0.000009
|
| 115 |
+
2026-01-25 09:56:42 | INFO |
|
| 116 |
+
============================================================
|
| 117 |
+
Validation Results (took 8.75s):
|
| 118 |
+
Samples: 346 instruction, 346 quality
|
| 119 |
+
Instruction Acc: 0.7081
|
| 120 |
+
Quality Acc: 0.7630
|
| 121 |
+
Average Acc: 0.7355
|
| 122 |
+
Total Loss: 0.5519
|
| 123 |
+
Instruction Loss: 0.5810
|
| 124 |
+
Quality Loss: 0.5228
|
| 125 |
+
============================================================
|
| 126 |
+
2026-01-25 09:56:42 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_399.pt (filtered to 21.279M trainable parameters)
|
| 127 |
+
2026-01-25 09:56:42 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_399.pt (229.9MB)
|
| 128 |
+
2026-01-25 09:56:42 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_99.pt
|
| 129 |
+
2026-01-25 09:56:42 | INFO | Best 3 checkpoints:
|
| 130 |
+
2026-01-25 09:56:42 | INFO | 1. Step 399: acc=0.7355 (reward_model.best_399.pt)
|
| 131 |
+
2026-01-25 09:56:42 | INFO | 2. Step 299: acc=0.7254 (reward_model.best_299.pt)
|
| 132 |
+
2026-01-25 09:56:42 | INFO | 3. Step 199: acc=0.7211 (reward_model.best_199.pt)
|
| 133 |
+
2026-01-25 09:56:44 | INFO | Step 400: loss=0.5271 | IF_loss=0.4825, MQ_loss=0.5716 | acc=0.740 (IF=0.729, MQ=0.750) | lr=0.000009
|
| 134 |
+
2026-01-25 09:58:49 | INFO |
|
| 135 |
+
============================================================
|
| 136 |
+
Validation Results (took 8.57s):
|
| 137 |
+
Samples: 346 instruction, 346 quality
|
| 138 |
+
Instruction Acc: 0.7168
|
| 139 |
+
Quality Acc: 0.7746
|
| 140 |
+
Average Acc: 0.7457
|
| 141 |
+
Total Loss: 0.5440
|
| 142 |
+
Instruction Loss: 0.5747
|
| 143 |
+
Quality Loss: 0.5133
|
| 144 |
+
============================================================
|
| 145 |
+
2026-01-25 09:58:49 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_499.pt (filtered to 21.279M trainable parameters)
|
| 146 |
+
2026-01-25 09:58:49 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_499.pt (229.9MB)
|
| 147 |
+
2026-01-25 09:58:49 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_199.pt
|
| 148 |
+
2026-01-25 09:58:49 | INFO | Best 3 checkpoints:
|
| 149 |
+
2026-01-25 09:58:49 | INFO | 1. Step 499: acc=0.7457 (reward_model.best_499.pt)
|
| 150 |
+
2026-01-25 09:58:49 | INFO | 2. Step 399: acc=0.7355 (reward_model.best_399.pt)
|
| 151 |
+
2026-01-25 09:58:49 | INFO | 3. Step 299: acc=0.7254 (reward_model.best_299.pt)
|
| 152 |
+
2026-01-25 09:58:50 | INFO | Step 500: loss=0.4747 | IF_loss=0.5236, MQ_loss=0.4259 | acc=0.708 (IF=0.688, MQ=0.729) | lr=0.000009
|
| 153 |
+
2026-01-25 10:00:58 | INFO |
|
| 154 |
+
============================================================
|
| 155 |
+
Validation Results (took 8.22s):
|
| 156 |
+
Samples: 346 instruction, 346 quality
|
| 157 |
+
Instruction Acc: 0.7197
|
| 158 |
+
Quality Acc: 0.7717
|
| 159 |
+
Average Acc: 0.7457
|
| 160 |
+
Total Loss: 0.5393
|
| 161 |
+
Instruction Loss: 0.5700
|
| 162 |
+
Quality Loss: 0.5086
|
| 163 |
+
============================================================
|
| 164 |
+
2026-01-25 10:00:58 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_599.pt (filtered to 21.279M trainable parameters)
|
| 165 |
+
2026-01-25 10:00:59 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_599.pt (229.9MB)
|
| 166 |
+
2026-01-25 10:00:59 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_299.pt
|
| 167 |
+
2026-01-25 10:00:59 | INFO | Best 3 checkpoints:
|
| 168 |
+
2026-01-25 10:00:59 | INFO | 1. Step 599: acc=0.7457 (reward_model.best_599.pt)
|
| 169 |
+
2026-01-25 10:00:59 | INFO | 2. Step 499: acc=0.7457 (reward_model.best_499.pt)
|
| 170 |
+
2026-01-25 10:00:59 | INFO | 3. Step 399: acc=0.7355 (reward_model.best_399.pt)
|
| 171 |
+
2026-01-25 10:01:00 | INFO | Step 600: loss=0.4197 | IF_loss=0.3952, MQ_loss=0.4441 | acc=0.729 (IF=0.750, MQ=0.708) | lr=0.000008
|
| 172 |
+
2026-01-25 10:03:05 | INFO |
|
| 173 |
+
============================================================
|
| 174 |
+
Validation Results (took 8.34s):
|
| 175 |
+
Samples: 346 instruction, 346 quality
|
| 176 |
+
Instruction Acc: 0.7168
|
| 177 |
+
Quality Acc: 0.7803
|
| 178 |
+
Average Acc: 0.7486
|
| 179 |
+
Total Loss: 0.5377
|
| 180 |
+
Instruction Loss: 0.5695
|
| 181 |
+
Quality Loss: 0.5060
|
| 182 |
+
============================================================
|
| 183 |
+
2026-01-25 10:03:05 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_699.pt (filtered to 21.279M trainable parameters)
|
| 184 |
+
2026-01-25 10:03:05 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_699.pt (229.9MB)
|
| 185 |
+
2026-01-25 10:03:05 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_399.pt
|
| 186 |
+
2026-01-25 10:03:05 | INFO | Best 3 checkpoints:
|
| 187 |
+
2026-01-25 10:03:05 | INFO | 1. Step 699: acc=0.7486 (reward_model.best_699.pt)
|
| 188 |
+
2026-01-25 10:03:05 | INFO | 2. Step 599: acc=0.7457 (reward_model.best_599.pt)
|
| 189 |
+
2026-01-25 10:03:05 | INFO | 3. Step 499: acc=0.7457 (reward_model.best_499.pt)
|
| 190 |
+
2026-01-25 10:03:07 | INFO | Step 700: loss=0.4059 | IF_loss=0.4302, MQ_loss=0.3815 | acc=0.833 (IF=0.812, MQ=0.854) | lr=0.000007
|
| 191 |
+
2026-01-25 10:05:18 | INFO |
|
| 192 |
+
============================================================
|
| 193 |
+
Validation Results (took 8.63s):
|
| 194 |
+
Samples: 346 instruction, 346 quality
|
| 195 |
+
Instruction Acc: 0.7139
|
| 196 |
+
Quality Acc: 0.7861
|
| 197 |
+
Average Acc: 0.7500
|
| 198 |
+
Total Loss: 0.5391
|
| 199 |
+
Instruction Loss: 0.5710
|
| 200 |
+
Quality Loss: 0.5071
|
| 201 |
+
============================================================
|
| 202 |
+
2026-01-25 10:05:18 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_799.pt (filtered to 21.279M trainable parameters)
|
| 203 |
+
2026-01-25 10:05:18 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_799.pt (229.9MB)
|
| 204 |
+
2026-01-25 10:05:18 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_499.pt
|
| 205 |
+
2026-01-25 10:05:18 | INFO | Best 3 checkpoints:
|
| 206 |
+
2026-01-25 10:05:18 | INFO | 1. Step 799: acc=0.7500 (reward_model.best_799.pt)
|
| 207 |
+
2026-01-25 10:05:18 | INFO | 2. Step 699: acc=0.7486 (reward_model.best_699.pt)
|
| 208 |
+
2026-01-25 10:05:18 | INFO | 3. Step 599: acc=0.7457 (reward_model.best_599.pt)
|
| 209 |
+
2026-01-25 10:05:20 | INFO | Step 800: loss=0.4310 | IF_loss=0.5054, MQ_loss=0.3567 | acc=0.812 (IF=0.750, MQ=0.875) | lr=0.000007
|
| 210 |
+
2026-01-25 10:07:31 | INFO |
|
| 211 |
+
============================================================
|
| 212 |
+
Validation Results (took 8.70s):
|
| 213 |
+
Samples: 346 instruction, 346 quality
|
| 214 |
+
Instruction Acc: 0.7139
|
| 215 |
+
Quality Acc: 0.7746
|
| 216 |
+
Average Acc: 0.7442
|
| 217 |
+
Total Loss: 0.5383
|
| 218 |
+
Instruction Loss: 0.5699
|
| 219 |
+
Quality Loss: 0.5067
|
| 220 |
+
============================================================
|
| 221 |
+
2026-01-25 10:07:31 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_899.pt (filtered to 21.279M trainable parameters)
|
| 222 |
+
2026-01-25 10:07:31 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_899.pt (229.9MB)
|
| 223 |
+
2026-01-25 10:07:31 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_899.pt
|
| 224 |
+
2026-01-25 10:07:31 | INFO | Best 3 checkpoints:
|
| 225 |
+
2026-01-25 10:07:31 | INFO | 1. Step 799: acc=0.7500 (reward_model.best_799.pt)
|
| 226 |
+
2026-01-25 10:07:31 | INFO | 2. Step 699: acc=0.7486 (reward_model.best_699.pt)
|
| 227 |
+
2026-01-25 10:07:31 | INFO | 3. Step 599: acc=0.7457 (reward_model.best_599.pt)
|
| 228 |
+
2026-01-25 10:07:33 | INFO | Step 900: loss=0.5193 | IF_loss=0.5695, MQ_loss=0.4690 | acc=0.677 (IF=0.625, MQ=0.729) | lr=0.000006
|
| 229 |
+
2026-01-25 10:09:39 | INFO |
|
| 230 |
+
============================================================
|
| 231 |
+
Validation Results (took 8.45s):
|
| 232 |
+
Samples: 346 instruction, 346 quality
|
| 233 |
+
Instruction Acc: 0.7139
|
| 234 |
+
Quality Acc: 0.7746
|
| 235 |
+
Average Acc: 0.7442
|
| 236 |
+
Total Loss: 0.5387
|
| 237 |
+
Instruction Loss: 0.5706
|
| 238 |
+
Quality Loss: 0.5068
|
| 239 |
+
============================================================
|
| 240 |
+
2026-01-25 10:09:39 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_999.pt (filtered to 21.279M trainable parameters)
|
| 241 |
+
2026-01-25 10:09:39 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_999.pt (229.9MB)
|
| 242 |
+
2026-01-25 10:09:39 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_999.pt
|
| 243 |
+
2026-01-25 10:09:39 | INFO | Best 3 checkpoints:
|
| 244 |
+
2026-01-25 10:09:39 | INFO | 1. Step 799: acc=0.7500 (reward_model.best_799.pt)
|
| 245 |
+
2026-01-25 10:09:39 | INFO | 2. Step 699: acc=0.7486 (reward_model.best_699.pt)
|
| 246 |
+
2026-01-25 10:09:39 | INFO | 3. Step 599: acc=0.7457 (reward_model.best_599.pt)
|
| 247 |
+
2026-01-25 10:09:40 | INFO | Step 1000: loss=0.3726 | IF_loss=0.3096, MQ_loss=0.4357 | acc=0.771 (IF=0.875, MQ=0.667) | lr=0.000005
|
| 248 |
+
2026-01-25 10:11:48 | INFO |
|
| 249 |
+
============================================================
|
| 250 |
+
Validation Results (took 7.72s):
|
| 251 |
+
Samples: 346 instruction, 346 quality
|
| 252 |
+
Instruction Acc: 0.7168
|
| 253 |
+
Quality Acc: 0.7688
|
| 254 |
+
Average Acc: 0.7428
|
| 255 |
+
Total Loss: 0.5390
|
| 256 |
+
Instruction Loss: 0.5712
|
| 257 |
+
Quality Loss: 0.5067
|
| 258 |
+
============================================================
|
| 259 |
+
2026-01-25 10:11:48 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1099.pt (filtered to 21.279M trainable parameters)
|
| 260 |
+
2026-01-25 10:11:48 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1099.pt (229.9MB)
|
| 261 |
+
2026-01-25 10:11:48 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1099.pt
|
| 262 |
+
2026-01-25 10:11:48 | INFO | Best 3 checkpoints:
|
| 263 |
+
2026-01-25 10:11:48 | INFO | 1. Step 799: acc=0.7500 (reward_model.best_799.pt)
|
| 264 |
+
2026-01-25 10:11:48 | INFO | 2. Step 699: acc=0.7486 (reward_model.best_699.pt)
|
| 265 |
+
2026-01-25 10:11:48 | INFO | 3. Step 599: acc=0.7457 (reward_model.best_599.pt)
|
| 266 |
+
2026-01-25 10:11:50 | INFO | Step 1100: loss=0.4897 | IF_loss=0.5040, MQ_loss=0.4754 | acc=0.802 (IF=0.792, MQ=0.812) | lr=0.000004
|
| 267 |
+
2026-01-25 10:13:53 | INFO |
|
| 268 |
+
============================================================
|
| 269 |
+
Validation Results (took 8.01s):
|
| 270 |
+
Samples: 346 instruction, 346 quality
|
| 271 |
+
Instruction Acc: 0.7168
|
| 272 |
+
Quality Acc: 0.7688
|
| 273 |
+
Average Acc: 0.7428
|
| 274 |
+
Total Loss: 0.5386
|
| 275 |
+
Instruction Loss: 0.5713
|
| 276 |
+
Quality Loss: 0.5059
|
| 277 |
+
============================================================
|
| 278 |
+
2026-01-25 10:13:53 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1199.pt (filtered to 21.279M trainable parameters)
|
| 279 |
+
2026-01-25 10:13:53 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1199.pt (229.9MB)
|
| 280 |
+
2026-01-25 10:13:53 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1199.pt
|
| 281 |
+
2026-01-25 10:13:53 | INFO | Best 3 checkpoints:
|
| 282 |
+
2026-01-25 10:13:53 | INFO | 1. Step 799: acc=0.7500 (reward_model.best_799.pt)
|
| 283 |
+
2026-01-25 10:13:53 | INFO | 2. Step 699: acc=0.7486 (reward_model.best_699.pt)
|
| 284 |
+
2026-01-25 10:13:53 | INFO | 3. Step 599: acc=0.7457 (reward_model.best_599.pt)
|
| 285 |
+
2026-01-25 10:13:54 | INFO | Step 1200: loss=0.4865 | IF_loss=0.5833, MQ_loss=0.3896 | acc=0.750 (IF=0.708, MQ=0.792) | lr=0.000003
|
| 286 |
+
2026-01-25 10:16:00 | INFO |
|
| 287 |
+
============================================================
|
| 288 |
+
Validation Results (took 7.76s):
|
| 289 |
+
Samples: 346 instruction, 346 quality
|
| 290 |
+
Instruction Acc: 0.7139
|
| 291 |
+
Quality Acc: 0.7688
|
| 292 |
+
Average Acc: 0.7413
|
| 293 |
+
Total Loss: 0.5391
|
| 294 |
+
Instruction Loss: 0.5722
|
| 295 |
+
Quality Loss: 0.5060
|
| 296 |
+
============================================================
|
| 297 |
+
2026-01-25 10:16:01 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1299.pt (filtered to 21.279M trainable parameters)
|
| 298 |
+
2026-01-25 10:16:01 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1299.pt (229.9MB)
|
| 299 |
+
2026-01-25 10:16:01 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1299.pt
|
| 300 |
+
2026-01-25 10:16:01 | INFO | Best 3 checkpoints:
|
| 301 |
+
2026-01-25 10:16:01 | INFO | 1. Step 799: acc=0.7500 (reward_model.best_799.pt)
|
| 302 |
+
2026-01-25 10:16:01 | INFO | 2. Step 699: acc=0.7486 (reward_model.best_699.pt)
|
| 303 |
+
2026-01-25 10:16:01 | INFO | 3. Step 599: acc=0.7457 (reward_model.best_599.pt)
|
| 304 |
+
2026-01-25 10:16:02 | INFO | Step 1300: loss=0.4948 | IF_loss=0.5460, MQ_loss=0.4435 | acc=0.750 (IF=0.688, MQ=0.812) | lr=0.000003
|
| 305 |
+
2026-01-25 10:18:05 | INFO |
|
| 306 |
+
============================================================
|
| 307 |
+
Validation Results (took 8.29s):
|
| 308 |
+
Samples: 346 instruction, 346 quality
|
| 309 |
+
Instruction Acc: 0.7139
|
| 310 |
+
Quality Acc: 0.7717
|
| 311 |
+
Average Acc: 0.7428
|
| 312 |
+
Total Loss: 0.5395
|
| 313 |
+
Instruction Loss: 0.5728
|
| 314 |
+
Quality Loss: 0.5062
|
| 315 |
+
============================================================
|
| 316 |
+
2026-01-25 10:18:05 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1399.pt (filtered to 21.279M trainable parameters)
|
| 317 |
+
2026-01-25 10:18:05 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1399.pt (229.9MB)
|
| 318 |
+
2026-01-25 10:18:05 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1399.pt
|
| 319 |
+
2026-01-25 10:18:05 | INFO | Best 3 checkpoints:
|
| 320 |
+
2026-01-25 10:18:05 | INFO | 1. Step 799: acc=0.7500 (reward_model.best_799.pt)
|
| 321 |
+
2026-01-25 10:18:05 | INFO | 2. Step 699: acc=0.7486 (reward_model.best_699.pt)
|
| 322 |
+
2026-01-25 10:18:05 | INFO | 3. Step 599: acc=0.7457 (reward_model.best_599.pt)
|
| 323 |
+
2026-01-25 10:18:06 | INFO | Step 1400: loss=0.4470 | IF_loss=0.5541, MQ_loss=0.3399 | acc=0.812 (IF=0.750, MQ=0.875) | lr=0.000002
|
| 324 |
+
2026-01-25 10:20:12 | INFO |
|
| 325 |
+
============================================================
|
| 326 |
+
Validation Results (took 7.85s):
|
| 327 |
+
Samples: 346 instruction, 346 quality
|
| 328 |
+
Instruction Acc: 0.7139
|
| 329 |
+
Quality Acc: 0.7746
|
| 330 |
+
Average Acc: 0.7442
|
| 331 |
+
Total Loss: 0.5399
|
| 332 |
+
Instruction Loss: 0.5734
|
| 333 |
+
Quality Loss: 0.5064
|
| 334 |
+
============================================================
|
| 335 |
+
2026-01-25 10:20:12 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1499.pt (filtered to 21.279M trainable parameters)
|
| 336 |
+
2026-01-25 10:20:12 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1499.pt (229.9MB)
|
| 337 |
+
2026-01-25 10:20:12 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1499.pt
|
| 338 |
+
2026-01-25 10:20:12 | INFO | Best 3 checkpoints:
|
| 339 |
+
2026-01-25 10:20:12 | INFO | 1. Step 799: acc=0.7500 (reward_model.best_799.pt)
|
| 340 |
+
2026-01-25 10:20:12 | INFO | 2. Step 699: acc=0.7486 (reward_model.best_699.pt)
|
| 341 |
+
2026-01-25 10:20:12 | INFO | 3. Step 599: acc=0.7457 (reward_model.best_599.pt)
|
| 342 |
+
2026-01-25 10:20:13 | INFO | Step 1500: loss=0.3559 | IF_loss=0.4083, MQ_loss=0.3035 | acc=0.833 (IF=0.792, MQ=0.875) | lr=0.000001
|
| 343 |
+
2026-01-25 10:22:17 | INFO |
|
| 344 |
+
============================================================
|
| 345 |
+
Validation Results (took 9.35s):
|
| 346 |
+
Samples: 346 instruction, 346 quality
|
| 347 |
+
Instruction Acc: 0.7139
|
| 348 |
+
Quality Acc: 0.7688
|
| 349 |
+
Average Acc: 0.7413
|
| 350 |
+
Total Loss: 0.5398
|
| 351 |
+
Instruction Loss: 0.5737
|
| 352 |
+
Quality Loss: 0.5060
|
| 353 |
+
============================================================
|
| 354 |
+
2026-01-25 10:22:17 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1599.pt (filtered to 21.279M trainable parameters)
|
| 355 |
+
2026-01-25 10:22:17 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1599.pt (229.9MB)
|
| 356 |
+
2026-01-25 10:22:17 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1599.pt
|
| 357 |
+
2026-01-25 10:22:17 | INFO | Best 3 checkpoints:
|
| 358 |
+
2026-01-25 10:22:17 | INFO | 1. Step 799: acc=0.7500 (reward_model.best_799.pt)
|
| 359 |
+
2026-01-25 10:22:17 | INFO | 2. Step 699: acc=0.7486 (reward_model.best_699.pt)
|
| 360 |
+
2026-01-25 10:22:17 | INFO | 3. Step 599: acc=0.7457 (reward_model.best_599.pt)
|
| 361 |
+
2026-01-25 10:22:23 | INFO | Step 1600: loss=0.3699 | IF_loss=0.4525, MQ_loss=0.2873 | acc=0.875 (IF=0.854, MQ=0.896) | lr=0.000001
|
| 362 |
+
2026-01-25 10:24:25 | INFO |
|
| 363 |
+
============================================================
|
| 364 |
+
Validation Results (took 7.95s):
|
| 365 |
+
Samples: 346 instruction, 346 quality
|
| 366 |
+
Instruction Acc: 0.7110
|
| 367 |
+
Quality Acc: 0.7717
|
| 368 |
+
Average Acc: 0.7413
|
| 369 |
+
Total Loss: 0.5399
|
| 370 |
+
Instruction Loss: 0.5738
|
| 371 |
+
Quality Loss: 0.5060
|
| 372 |
+
============================================================
|
| 373 |
+
2026-01-25 10:24:25 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1699.pt (filtered to 21.279M trainable parameters)
|
| 374 |
+
2026-01-25 10:24:26 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1699.pt (229.9MB)
|
| 375 |
+
2026-01-25 10:24:26 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1699.pt
|
| 376 |
+
2026-01-25 10:24:26 | INFO | Best 3 checkpoints:
|
| 377 |
+
2026-01-25 10:24:26 | INFO | 1. Step 799: acc=0.7500 (reward_model.best_799.pt)
|
| 378 |
+
2026-01-25 10:24:26 | INFO | 2. Step 699: acc=0.7486 (reward_model.best_699.pt)
|
| 379 |
+
2026-01-25 10:24:26 | INFO | 3. Step 599: acc=0.7457 (reward_model.best_599.pt)
|
| 380 |
+
2026-01-25 10:24:27 | INFO | Step 1700: loss=0.3662 | IF_loss=0.3525, MQ_loss=0.3800 | acc=0.802 (IF=0.792, MQ=0.812) | lr=0.000001
|
| 381 |
+
2026-01-25 10:26:33 | INFO |
|
| 382 |
+
============================================================
|
| 383 |
+
Validation Results (took 7.78s):
|
| 384 |
+
Samples: 346 instruction, 346 quality
|
| 385 |
+
Instruction Acc: 0.7139
|
| 386 |
+
Quality Acc: 0.7717
|
| 387 |
+
Average Acc: 0.7428
|
| 388 |
+
Total Loss: 0.5401
|
| 389 |
+
Instruction Loss: 0.5740
|
| 390 |
+
Quality Loss: 0.5063
|
| 391 |
+
============================================================
|
| 392 |
+
2026-01-25 10:26:33 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1799.pt (filtered to 21.279M trainable parameters)
|
| 393 |
+
2026-01-25 10:26:33 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1799.pt (229.9MB)
|
| 394 |
+
2026-01-25 10:26:33 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1799.pt
|
| 395 |
+
2026-01-25 10:26:33 | INFO | Best 3 checkpoints:
|
| 396 |
+
2026-01-25 10:26:33 | INFO | 1. Step 799: acc=0.7500 (reward_model.best_799.pt)
|
| 397 |
+
2026-01-25 10:26:33 | INFO | 2. Step 699: acc=0.7486 (reward_model.best_699.pt)
|
| 398 |
+
2026-01-25 10:26:33 | INFO | 3. Step 599: acc=0.7457 (reward_model.best_599.pt)
|
| 399 |
+
2026-01-25 10:26:34 | INFO | Step 1800: loss=0.4003 | IF_loss=0.4304, MQ_loss=0.3701 | acc=0.823 (IF=0.792, MQ=0.854) | lr=0.000000
|
| 400 |
+
2026-01-25 10:28:34 | INFO |
|
| 401 |
+
============================================================
|
| 402 |
+
Validation Results (took 8.07s):
|
| 403 |
+
Samples: 346 instruction, 346 quality
|
| 404 |
+
Instruction Acc: 0.7139
|
| 405 |
+
Quality Acc: 0.7717
|
| 406 |
+
Average Acc: 0.7428
|
| 407 |
+
Total Loss: 0.5401
|
| 408 |
+
Instruction Loss: 0.5739
|
| 409 |
+
Quality Loss: 0.5063
|
| 410 |
+
============================================================
|
| 411 |
+
2026-01-25 10:28:34 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1899.pt (filtered to 21.279M trainable parameters)
|
| 412 |
+
2026-01-25 10:28:34 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1899.pt (229.9MB)
|
| 413 |
+
2026-01-25 10:28:34 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1899.pt
|
| 414 |
+
2026-01-25 10:28:34 | INFO | Best 3 checkpoints:
|
| 415 |
+
2026-01-25 10:28:34 | INFO | 1. Step 799: acc=0.7500 (reward_model.best_799.pt)
|
| 416 |
+
2026-01-25 10:28:34 | INFO | 2. Step 699: acc=0.7486 (reward_model.best_699.pt)
|
| 417 |
+
2026-01-25 10:28:34 | INFO | 3. Step 599: acc=0.7457 (reward_model.best_599.pt)
|
| 418 |
+
2026-01-25 10:28:35 | INFO | Step 1900: loss=0.4593 | IF_loss=0.4853, MQ_loss=0.4333 | acc=0.760 (IF=0.750, MQ=0.771) | lr=0.000000
|
| 419 |
+
2026-01-25 10:30:41 | INFO |
|
| 420 |
+
============================================================
|
| 421 |
+
Validation Results (took 7.71s):
|
| 422 |
+
Samples: 346 instruction, 346 quality
|
| 423 |
+
Instruction Acc: 0.7139
|
| 424 |
+
Quality Acc: 0.7717
|
| 425 |
+
Average Acc: 0.7428
|
| 426 |
+
Total Loss: 0.5400
|
| 427 |
+
Instruction Loss: 0.5738
|
| 428 |
+
Quality Loss: 0.5063
|
| 429 |
+
============================================================
|
| 430 |
+
2026-01-25 10:30:41 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1999.pt (filtered to 21.279M trainable parameters)
|
| 431 |
+
2026-01-25 10:30:41 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1999.pt (229.9MB)
|
| 432 |
+
2026-01-25 10:30:41 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_0947_CA/ckpt/reward_model.best_1999.pt
|
| 433 |
+
2026-01-25 10:30:41 | INFO | Best 3 checkpoints:
|
| 434 |
+
2026-01-25 10:30:41 | INFO | 1. Step 799: acc=0.7500 (reward_model.best_799.pt)
|
| 435 |
+
2026-01-25 10:30:41 | INFO | 2. Step 699: acc=0.7486 (reward_model.best_699.pt)
|
| 436 |
+
2026-01-25 10:30:41 | INFO | 3. Step 599: acc=0.7457 (reward_model.best_599.pt)
|
| 437 |
+
2026-01-25 10:30:41 | INFO | Training complete!
|
| 438 |
+
2026-01-25 10:30:41 | INFO | Training complete!
|
20260125_1117/config.yaml
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DEVICES: '5'
|
| 2 |
+
accelerate:
|
| 3 |
+
mixed_precision: bf16
|
| 4 |
+
basics:
|
| 5 |
+
random_seed: 42
|
| 6 |
+
save_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human
|
| 7 |
+
dataset:
|
| 8 |
+
audio_dropout:
|
| 9 |
+
apply_to_eval: true
|
| 10 |
+
apply_to_ref: true
|
| 11 |
+
enabled: true
|
| 12 |
+
eval_only_on_training: true
|
| 13 |
+
max_duration: 1500
|
| 14 |
+
min_duration: 200
|
| 15 |
+
train_mode: start
|
| 16 |
+
cache_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/tmp
|
| 17 |
+
db_path: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/backend/database.db
|
| 18 |
+
duration: 600.0
|
| 19 |
+
embedding_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/supervised_embeddings
|
| 20 |
+
max_samples: null
|
| 21 |
+
max_val_samples: null
|
| 22 |
+
metadata_jsonl: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/all_comparisons.jsonl
|
| 23 |
+
mode: raw_text_frozen_audio
|
| 24 |
+
preference_file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/human_annotations/train.json
|
| 25 |
+
sample_rate: 24000
|
| 26 |
+
val_preference_file: null
|
| 27 |
+
loss:
|
| 28 |
+
IF_ratio: 0.5
|
| 29 |
+
filter_ties: true
|
| 30 |
+
label_smoothing: 0.0
|
| 31 |
+
reduction: mean
|
| 32 |
+
model:
|
| 33 |
+
attention_mode: SA
|
| 34 |
+
attn_dropout: 0.0
|
| 35 |
+
category_embeddings: null
|
| 36 |
+
dim: 768
|
| 37 |
+
dim_head: 64
|
| 38 |
+
downsample:
|
| 39 |
+
configs:
|
| 40 |
+
conv2_4x:
|
| 41 |
+
factor: 4
|
| 42 |
+
kernel_size: 5
|
| 43 |
+
kind: conv*2
|
| 44 |
+
use_layernorm: true
|
| 45 |
+
conv_4x:
|
| 46 |
+
factor: 4
|
| 47 |
+
kernel_size: 5
|
| 48 |
+
kind: conv
|
| 49 |
+
stage: 1
|
| 50 |
+
use_layernorm: true
|
| 51 |
+
glu_4x:
|
| 52 |
+
factor: 4
|
| 53 |
+
kernel_size: 5
|
| 54 |
+
kind: gluconv*2+pw
|
| 55 |
+
use_layernorm: true
|
| 56 |
+
mean:
|
| 57 |
+
factor: 2
|
| 58 |
+
kind: mean
|
| 59 |
+
mean_4x:
|
| 60 |
+
dropout: 0.0
|
| 61 |
+
factor: 30
|
| 62 |
+
kind: mean+mlp
|
| 63 |
+
mlp_ratio: 2.0
|
| 64 |
+
none:
|
| 65 |
+
factor: 1
|
| 66 |
+
kind: none
|
| 67 |
+
eval: mean_4x
|
| 68 |
+
ref: null
|
| 69 |
+
text: none
|
| 70 |
+
ff_dropout: 0.0
|
| 71 |
+
ff_mult: 4
|
| 72 |
+
freeze_audio: true
|
| 73 |
+
freeze_text: true
|
| 74 |
+
gradient_checkpointing: false
|
| 75 |
+
heads: 8
|
| 76 |
+
joint_tf_depth: 1
|
| 77 |
+
load_config:
|
| 78 |
+
checkpoint_path: null
|
| 79 |
+
frozen_from_pretrained: true
|
| 80 |
+
pretrained_name: OpenMuQ/MuQ-MuLan-large
|
| 81 |
+
strict: false
|
| 82 |
+
mlp_dim: 768
|
| 83 |
+
mode: concat_text_late
|
| 84 |
+
model_name: OpenMuQ/MuQ-MuLan-large
|
| 85 |
+
name: reward
|
| 86 |
+
no_condition: false
|
| 87 |
+
null_embedding:
|
| 88 |
+
audio:
|
| 89 |
+
dropout: 0.5
|
| 90 |
+
length: 10
|
| 91 |
+
lyrics:
|
| 92 |
+
dropout: 0.3
|
| 93 |
+
length: 10
|
| 94 |
+
text:
|
| 95 |
+
dropout: 0
|
| 96 |
+
length: 10
|
| 97 |
+
output_dim: 2
|
| 98 |
+
prompt_tf_depth: 4
|
| 99 |
+
sr: 24000
|
| 100 |
+
text_encoder:
|
| 101 |
+
name: muq_mulan
|
| 102 |
+
tune: null
|
| 103 |
+
text_lora_config: null
|
| 104 |
+
train_muq_depth: 0
|
| 105 |
+
train_muqmulan: false
|
| 106 |
+
use_audio: true
|
| 107 |
+
use_layer_idx: -1
|
| 108 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 109 |
+
run_name: null
|
| 110 |
+
train:
|
| 111 |
+
batch_size: 48
|
| 112 |
+
betas:
|
| 113 |
+
- 0.9
|
| 114 |
+
- 0.99
|
| 115 |
+
ema_decay: 0.9999
|
| 116 |
+
ema_update_every: 1
|
| 117 |
+
enable_gradient_checkpointing: true
|
| 118 |
+
force_clear_prev_results: false
|
| 119 |
+
grad_accum_every: 1
|
| 120 |
+
log_tensorboard: true
|
| 121 |
+
lr_schedule:
|
| 122 |
+
min_lr_ratio: 0.001
|
| 123 |
+
name: linear_cosine
|
| 124 |
+
total_steps: 2000
|
| 125 |
+
warmup_steps: 10
|
| 126 |
+
max_grad_norm: 1
|
| 127 |
+
mlp_lr: 1.0e-05
|
| 128 |
+
num_train_steps: 2000
|
| 129 |
+
num_valid_batches: null
|
| 130 |
+
num_workers: 8
|
| 131 |
+
other_lr: 1.0e-05
|
| 132 |
+
resume: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/contrastive/20260123_1403_tune_mulan_transformer/ckpt/reward_model.best_27252.pt
|
| 133 |
+
resume_optimizer: false
|
| 134 |
+
save_model_every: 2000
|
| 135 |
+
use_checkpoint_config: true
|
| 136 |
+
use_ema: false
|
| 137 |
+
use_lion: false
|
| 138 |
+
valid_batch_size: 20
|
| 139 |
+
valid_every: 100
|
| 140 |
+
valid_frac: 0.1
|
| 141 |
+
verify_weights_on_load: true
|
| 142 |
+
validate_only: false
|
20260125_1117/reward_model/1769311084.1305242/events.out.tfevents.1769311084.MACLAB-S004.2009526.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:822b6fceabde39473c4a729e682f16fe698e1ddda674a89dfd54e6dae8b6b5bc
|
| 3 |
+
size 503
|
20260125_1117/reward_model/1769311084.1322424/hparams.yml
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
batch_size: 48
|
| 2 |
+
grad_accum_every: 1
|
| 3 |
+
learning_rate: 1.0e-05
|
| 4 |
+
num_train_steps: 2000
|
20260125_1117/reward_model/events.out.tfevents.1769311084.MACLAB-S004.2009526.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5324959f8755a329daa5765d181b2712b8dc36505a84bd25731dd5d0b8969191
|
| 3 |
+
size 874266
|
20260125_1117/train.20260125_1117.log
ADDED
|
@@ -0,0 +1,441 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-25 11:17:53 | INFO | Log file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/train.20260125_1117.log
|
| 2 |
+
2026-01-25 11:17:53 | INFO | Random seed set to 42
|
| 3 |
+
2026-01-25 11:17:54 | INFO | Created RawTextFrozenAudioDataset with 3463 samples
|
| 4 |
+
2026-01-25 11:17:54 | INFO | Split dataset into train (3117) and validation (346) sets (ratio: 10.00%)
|
| 5 |
+
2026-01-25 11:17:54 | INFO | Will resume from checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/contrastive/20260123_1403_tune_mulan_transformer/ckpt/reward_model.best_27252.pt
|
| 6 |
+
2026-01-25 11:17:54 | INFO | Using checkpoint config for model initialization (continue training mode)
|
| 7 |
+
2026-01-25 11:18:01 | INFO | Created RewardAttentionModel with attention_mode=CA
|
| 8 |
+
2026-01-25 11:18:01 | INFO | Created PreferenceLoss with filter_ties=True
|
| 9 |
+
2026-01-25 11:18:02 | INFO | ✓ Gradient checkpointing enabled
|
| 10 |
+
2026-01-25 11:18:02 | INFO | ✓ Audio cropping enabled: min=200, max=1500
|
| 11 |
+
2026-01-25 11:18:02 | INFO | Apply to eval: True, ref: True
|
| 12 |
+
2026-01-25 11:18:02 | INFO | Modes: train=random, val=start
|
| 13 |
+
2026-01-25 11:18:02 | INFO | MLP head parameters: 1,186,563 params, lr=1e-05
|
| 14 |
+
2026-01-25 11:18:02 | INFO | Other parameters: 70,437,634 params, lr=1e-05
|
| 15 |
+
2026-01-25 11:18:02 | INFO | Using lr_schedule=linear_cosine warmup_steps=10 total_steps=2000
|
| 16 |
+
2026-01-25 11:18:02 | INFO | Training with fixed validation set
|
| 17 |
+
2026-01-25 11:18:02 | INFO | Train batch_size: 48, Valid batch_size: 20
|
| 18 |
+
2026-01-25 11:18:03 | INFO | Skipping score_projector.3.weight: shape mismatch (ckpt torch.Size([1, 768]) vs model torch.Size([2, 768])), will use randomly initialized weights
|
| 19 |
+
2026-01-25 11:18:03 | INFO | Skipping score_projector.3.bias: shape mismatch (ckpt torch.Size([1]) vs model torch.Size([2])), will use randomly initialized weights
|
| 20 |
+
2026-01-25 11:18:03 | INFO | Missing keys (712): ['score_projector.3.weight', 'score_projector.3.bias', 'text_module.model.embeddings.word_embeddings.weight', 'text_module.model.embeddings.position_embeddings.weight', 'text_module.model.embeddings.token_type_embeddings.weight']...
|
| 21 |
+
2026-01-25 11:18:03 | INFO | ✓ Starting from step 0 (transfer learning mode, ignoring checkpoint steps=27252)
|
| 22 |
+
2026-01-25 11:18:03 | INFO | Resumed from /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/contrastive/20260123_1403_tune_mulan_transformer/ckpt/reward_model.best_27252.pt
|
| 23 |
+
2026-01-25 11:18:03 | INFO | Parameters: 683.857M total, 71.624M trainable
|
| 24 |
+
2026-01-25 11:18:03 | INFO | Text encoder (frozen): 278.044M
|
| 25 |
+
2026-01-25 11:18:03 | INFO | Text encoder (trainable): 50.345M
|
| 26 |
+
2026-01-25 11:18:03 | INFO | Audio encoder (frozen): 334.189M
|
| 27 |
+
2026-01-25 11:18:03 | INFO | Other trainable: 21.279M
|
| 28 |
+
2026-01-25 11:18:03 | INFO | ℹ No LoRA configuration detected
|
| 29 |
+
2026-01-25 11:18:04 | INFO | ============================================================
|
| 30 |
+
2026-01-25 11:18:04 | INFO | Ready to start training
|
| 31 |
+
2026-01-25 11:18:04 | INFO | ============================================================
|
| 32 |
+
2026-01-25 11:18:04 | INFO | Starting training from step 0
|
| 33 |
+
2026-01-25 11:18:04 | INFO | ===== Accelerator / CUDA Debug Info =====
|
| 34 |
+
2026-01-25 11:18:04 | INFO | accelerator.device = cuda
|
| 35 |
+
2026-01-25 11:18:04 | INFO | mixed_precision = bf16
|
| 36 |
+
2026-01-25 11:18:04 | INFO | distributed_type = NO
|
| 37 |
+
2026-01-25 11:18:04 | INFO | num_processes = 1
|
| 38 |
+
2026-01-25 11:18:04 | INFO | process_index = 0
|
| 39 |
+
2026-01-25 11:18:04 | INFO | is_main_process = True
|
| 40 |
+
2026-01-25 11:18:04 | INFO | torch.cuda.is_available() = True
|
| 41 |
+
2026-01-25 11:18:04 | INFO | torch.cuda.device_count() = 1
|
| 42 |
+
2026-01-25 11:18:04 | INFO | current_device = 0
|
| 43 |
+
2026-01-25 11:18:04 | INFO | device_name = NVIDIA GeForce RTX 4090
|
| 44 |
+
2026-01-25 11:18:04 | INFO | model parameter device = cuda:0
|
| 45 |
+
2026-01-25 11:18:04 | INFO | Training for 2000.0 steps (~32 epochs, 64 steps/epoch)
|
| 46 |
+
2026-01-25 11:18:04 | INFO |
|
| 47 |
+
============================================================
|
| 48 |
+
2026-01-25 11:18:04 | INFO | Running initial validation after resume...
|
| 49 |
+
2026-01-25 11:18:04 | INFO | ============================================================
|
| 50 |
+
2026-01-25 11:18:17 | INFO |
|
| 51 |
+
============================================================
|
| 52 |
+
Validation Results (took 13.32s):
|
| 53 |
+
Samples: 346 instruction, 346 quality
|
| 54 |
+
Instruction Acc: 0.5405
|
| 55 |
+
Quality Acc: 0.4740
|
| 56 |
+
Average Acc: 0.5072
|
| 57 |
+
Total Loss: 0.6589
|
| 58 |
+
Instruction Loss: 0.6537
|
| 59 |
+
Quality Loss: 0.6641
|
| 60 |
+
============================================================
|
| 61 |
+
2026-01-25 11:18:17 | INFO | Initial validation complete.
|
| 62 |
+
|
| 63 |
+
2026-01-25 11:18:22 | INFO | Step 0: loss=0.6765 | IF_loss=0.6586, MQ_loss=0.6944 | acc=0.552 (IF=0.604, MQ=0.500) | lr=0.000002
|
| 64 |
+
2026-01-25 11:18:23 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.0.pt (filtered to 71.624M trainable parameters)
|
| 65 |
+
2026-01-25 11:18:23 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.0.pt (422.0MB)
|
| 66 |
+
2026-01-25 11:18:23 | INFO | Step 0: Saved to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.0.pt
|
| 67 |
+
2026-01-25 11:20:31 | INFO |
|
| 68 |
+
============================================================
|
| 69 |
+
Validation Results (took 8.99s):
|
| 70 |
+
Samples: 346 instruction, 346 quality
|
| 71 |
+
Instruction Acc: 0.6763
|
| 72 |
+
Quality Acc: 0.6965
|
| 73 |
+
Average Acc: 0.6864
|
| 74 |
+
Total Loss: 0.5779
|
| 75 |
+
Instruction Loss: 0.6022
|
| 76 |
+
Quality Loss: 0.5537
|
| 77 |
+
============================================================
|
| 78 |
+
2026-01-25 11:20:31 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_99.pt (filtered to 71.624M trainable parameters)
|
| 79 |
+
2026-01-25 11:20:31 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_99.pt (422.0MB)
|
| 80 |
+
2026-01-25 11:20:31 | INFO | Best 1 checkpoints:
|
| 81 |
+
2026-01-25 11:20:31 | INFO | 1. Step 99: acc=0.6864 (reward_model.best_99.pt)
|
| 82 |
+
2026-01-25 11:20:32 | INFO | Step 100: loss=0.5965 | IF_loss=0.6046, MQ_loss=0.5884 | acc=0.688 (IF=0.646, MQ=0.729) | lr=0.000010
|
| 83 |
+
2026-01-25 11:22:45 | INFO |
|
| 84 |
+
============================================================
|
| 85 |
+
Validation Results (took 9.25s):
|
| 86 |
+
Samples: 346 instruction, 346 quality
|
| 87 |
+
Instruction Acc: 0.6821
|
| 88 |
+
Quality Acc: 0.7110
|
| 89 |
+
Average Acc: 0.6965
|
| 90 |
+
Total Loss: 0.5492
|
| 91 |
+
Instruction Loss: 0.5805
|
| 92 |
+
Quality Loss: 0.5180
|
| 93 |
+
============================================================
|
| 94 |
+
2026-01-25 11:22:45 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_199.pt (filtered to 71.624M trainable parameters)
|
| 95 |
+
2026-01-25 11:22:45 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_199.pt (422.0MB)
|
| 96 |
+
2026-01-25 11:22:45 | INFO | Best 2 checkpoints:
|
| 97 |
+
2026-01-25 11:22:45 | INFO | 1. Step 199: acc=0.6965 (reward_model.best_199.pt)
|
| 98 |
+
2026-01-25 11:22:45 | INFO | 2. Step 99: acc=0.6864 (reward_model.best_99.pt)
|
| 99 |
+
2026-01-25 11:22:46 | INFO | Step 200: loss=0.5629 | IF_loss=0.5719, MQ_loss=0.5538 | acc=0.708 (IF=0.667, MQ=0.750) | lr=0.000010
|
| 100 |
+
2026-01-25 11:24:58 | INFO |
|
| 101 |
+
============================================================
|
| 102 |
+
Validation Results (took 8.81s):
|
| 103 |
+
Samples: 346 instruction, 346 quality
|
| 104 |
+
Instruction Acc: 0.6879
|
| 105 |
+
Quality Acc: 0.7370
|
| 106 |
+
Average Acc: 0.7124
|
| 107 |
+
Total Loss: 0.5398
|
| 108 |
+
Instruction Loss: 0.5714
|
| 109 |
+
Quality Loss: 0.5082
|
| 110 |
+
============================================================
|
| 111 |
+
2026-01-25 11:24:58 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_299.pt (filtered to 71.624M trainable parameters)
|
| 112 |
+
2026-01-25 11:24:58 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_299.pt (422.0MB)
|
| 113 |
+
2026-01-25 11:24:58 | INFO | Best 3 checkpoints:
|
| 114 |
+
2026-01-25 11:24:58 | INFO | 1. Step 299: acc=0.7124 (reward_model.best_299.pt)
|
| 115 |
+
2026-01-25 11:24:58 | INFO | 2. Step 199: acc=0.6965 (reward_model.best_199.pt)
|
| 116 |
+
2026-01-25 11:24:58 | INFO | 3. Step 99: acc=0.6864 (reward_model.best_99.pt)
|
| 117 |
+
2026-01-25 11:24:59 | INFO | Step 300: loss=0.4994 | IF_loss=0.5058, MQ_loss=0.4929 | acc=0.802 (IF=0.812, MQ=0.792) | lr=0.000009
|
| 118 |
+
2026-01-25 11:27:15 | INFO |
|
| 119 |
+
============================================================
|
| 120 |
+
Validation Results (took 8.79s):
|
| 121 |
+
Samples: 346 instruction, 346 quality
|
| 122 |
+
Instruction Acc: 0.7023
|
| 123 |
+
Quality Acc: 0.7312
|
| 124 |
+
Average Acc: 0.7168
|
| 125 |
+
Total Loss: 0.5318
|
| 126 |
+
Instruction Loss: 0.5617
|
| 127 |
+
Quality Loss: 0.5019
|
| 128 |
+
============================================================
|
| 129 |
+
2026-01-25 11:27:15 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_399.pt (filtered to 71.624M trainable parameters)
|
| 130 |
+
2026-01-25 11:27:16 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_399.pt (422.0MB)
|
| 131 |
+
2026-01-25 11:27:16 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_99.pt
|
| 132 |
+
2026-01-25 11:27:16 | INFO | Best 3 checkpoints:
|
| 133 |
+
2026-01-25 11:27:16 | INFO | 1. Step 399: acc=0.7168 (reward_model.best_399.pt)
|
| 134 |
+
2026-01-25 11:27:16 | INFO | 2. Step 299: acc=0.7124 (reward_model.best_299.pt)
|
| 135 |
+
2026-01-25 11:27:16 | INFO | 3. Step 199: acc=0.6965 (reward_model.best_199.pt)
|
| 136 |
+
2026-01-25 11:27:17 | INFO | Step 400: loss=0.4955 | IF_loss=0.4808, MQ_loss=0.5101 | acc=0.698 (IF=0.667, MQ=0.729) | lr=0.000009
|
| 137 |
+
2026-01-25 11:29:28 | INFO |
|
| 138 |
+
============================================================
|
| 139 |
+
Validation Results (took 9.06s):
|
| 140 |
+
Samples: 346 instruction, 346 quality
|
| 141 |
+
Instruction Acc: 0.7110
|
| 142 |
+
Quality Acc: 0.7254
|
| 143 |
+
Average Acc: 0.7182
|
| 144 |
+
Total Loss: 0.5266
|
| 145 |
+
Instruction Loss: 0.5560
|
| 146 |
+
Quality Loss: 0.4972
|
| 147 |
+
============================================================
|
| 148 |
+
2026-01-25 11:29:28 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_499.pt (filtered to 71.624M trainable parameters)
|
| 149 |
+
2026-01-25 11:29:28 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_499.pt (422.0MB)
|
| 150 |
+
2026-01-25 11:29:28 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_199.pt
|
| 151 |
+
2026-01-25 11:29:28 | INFO | Best 3 checkpoints:
|
| 152 |
+
2026-01-25 11:29:28 | INFO | 1. Step 499: acc=0.7182 (reward_model.best_499.pt)
|
| 153 |
+
2026-01-25 11:29:28 | INFO | 2. Step 399: acc=0.7168 (reward_model.best_399.pt)
|
| 154 |
+
2026-01-25 11:29:28 | INFO | 3. Step 299: acc=0.7124 (reward_model.best_299.pt)
|
| 155 |
+
2026-01-25 11:29:29 | INFO | Step 500: loss=0.4977 | IF_loss=0.5734, MQ_loss=0.4219 | acc=0.688 (IF=0.667, MQ=0.708) | lr=0.000009
|
| 156 |
+
2026-01-25 11:31:41 | INFO |
|
| 157 |
+
============================================================
|
| 158 |
+
Validation Results (took 8.66s):
|
| 159 |
+
Samples: 346 instruction, 346 quality
|
| 160 |
+
Instruction Acc: 0.7168
|
| 161 |
+
Quality Acc: 0.7283
|
| 162 |
+
Average Acc: 0.7225
|
| 163 |
+
Total Loss: 0.5231
|
| 164 |
+
Instruction Loss: 0.5528
|
| 165 |
+
Quality Loss: 0.4934
|
| 166 |
+
============================================================
|
| 167 |
+
2026-01-25 11:31:41 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_599.pt (filtered to 71.624M trainable parameters)
|
| 168 |
+
2026-01-25 11:31:41 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_599.pt (422.0MB)
|
| 169 |
+
2026-01-25 11:31:41 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_299.pt
|
| 170 |
+
2026-01-25 11:31:41 | INFO | Best 3 checkpoints:
|
| 171 |
+
2026-01-25 11:31:41 | INFO | 1. Step 599: acc=0.7225 (reward_model.best_599.pt)
|
| 172 |
+
2026-01-25 11:31:41 | INFO | 2. Step 499: acc=0.7182 (reward_model.best_499.pt)
|
| 173 |
+
2026-01-25 11:31:41 | INFO | 3. Step 399: acc=0.7168 (reward_model.best_399.pt)
|
| 174 |
+
2026-01-25 11:31:43 | INFO | Step 600: loss=0.5072 | IF_loss=0.4980, MQ_loss=0.5164 | acc=0.698 (IF=0.688, MQ=0.708) | lr=0.000008
|
| 175 |
+
2026-01-25 11:33:51 | INFO |
|
| 176 |
+
============================================================
|
| 177 |
+
Validation Results (took 9.03s):
|
| 178 |
+
Samples: 346 instruction, 346 quality
|
| 179 |
+
Instruction Acc: 0.7168
|
| 180 |
+
Quality Acc: 0.7341
|
| 181 |
+
Average Acc: 0.7254
|
| 182 |
+
Total Loss: 0.5201
|
| 183 |
+
Instruction Loss: 0.5493
|
| 184 |
+
Quality Loss: 0.4909
|
| 185 |
+
============================================================
|
| 186 |
+
2026-01-25 11:33:52 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_699.pt (filtered to 71.624M trainable parameters)
|
| 187 |
+
2026-01-25 11:33:52 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_699.pt (422.0MB)
|
| 188 |
+
2026-01-25 11:33:52 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_399.pt
|
| 189 |
+
2026-01-25 11:33:52 | INFO | Best 3 checkpoints:
|
| 190 |
+
2026-01-25 11:33:52 | INFO | 1. Step 699: acc=0.7254 (reward_model.best_699.pt)
|
| 191 |
+
2026-01-25 11:33:52 | INFO | 2. Step 599: acc=0.7225 (reward_model.best_599.pt)
|
| 192 |
+
2026-01-25 11:33:52 | INFO | 3. Step 499: acc=0.7182 (reward_model.best_499.pt)
|
| 193 |
+
2026-01-25 11:33:53 | INFO | Step 700: loss=0.4063 | IF_loss=0.4648, MQ_loss=0.3477 | acc=0.833 (IF=0.812, MQ=0.854) | lr=0.000007
|
| 194 |
+
2026-01-25 11:36:07 | INFO |
|
| 195 |
+
============================================================
|
| 196 |
+
Validation Results (took 8.47s):
|
| 197 |
+
Samples: 346 instruction, 346 quality
|
| 198 |
+
Instruction Acc: 0.7168
|
| 199 |
+
Quality Acc: 0.7341
|
| 200 |
+
Average Acc: 0.7254
|
| 201 |
+
Total Loss: 0.5200
|
| 202 |
+
Instruction Loss: 0.5501
|
| 203 |
+
Quality Loss: 0.4900
|
| 204 |
+
============================================================
|
| 205 |
+
2026-01-25 11:36:07 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_799.pt (filtered to 71.624M trainable parameters)
|
| 206 |
+
2026-01-25 11:36:07 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_799.pt (422.0MB)
|
| 207 |
+
2026-01-25 11:36:07 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_499.pt
|
| 208 |
+
2026-01-25 11:36:07 | INFO | Best 3 checkpoints:
|
| 209 |
+
2026-01-25 11:36:07 | INFO | 1. Step 699: acc=0.7254 (reward_model.best_699.pt)
|
| 210 |
+
2026-01-25 11:36:07 | INFO | 2. Step 799: acc=0.7254 (reward_model.best_799.pt)
|
| 211 |
+
2026-01-25 11:36:07 | INFO | 3. Step 599: acc=0.7225 (reward_model.best_599.pt)
|
| 212 |
+
2026-01-25 11:36:08 | INFO | Step 800: loss=0.4288 | IF_loss=0.4825, MQ_loss=0.3751 | acc=0.740 (IF=0.688, MQ=0.792) | lr=0.000007
|
| 213 |
+
2026-01-25 11:38:19 | INFO |
|
| 214 |
+
============================================================
|
| 215 |
+
Validation Results (took 8.31s):
|
| 216 |
+
Samples: 346 instruction, 346 quality
|
| 217 |
+
Instruction Acc: 0.7225
|
| 218 |
+
Quality Acc: 0.7370
|
| 219 |
+
Average Acc: 0.7298
|
| 220 |
+
Total Loss: 0.5181
|
| 221 |
+
Instruction Loss: 0.5471
|
| 222 |
+
Quality Loss: 0.4891
|
| 223 |
+
============================================================
|
| 224 |
+
2026-01-25 11:38:19 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_899.pt (filtered to 71.624M trainable parameters)
|
| 225 |
+
2026-01-25 11:38:19 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_899.pt (422.0MB)
|
| 226 |
+
2026-01-25 11:38:19 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_599.pt
|
| 227 |
+
2026-01-25 11:38:19 | INFO | Best 3 checkpoints:
|
| 228 |
+
2026-01-25 11:38:19 | INFO | 1. Step 899: acc=0.7298 (reward_model.best_899.pt)
|
| 229 |
+
2026-01-25 11:38:19 | INFO | 2. Step 699: acc=0.7254 (reward_model.best_699.pt)
|
| 230 |
+
2026-01-25 11:38:19 | INFO | 3. Step 799: acc=0.7254 (reward_model.best_799.pt)
|
| 231 |
+
2026-01-25 11:38:21 | INFO | Step 900: loss=0.5461 | IF_loss=0.6051, MQ_loss=0.4871 | acc=0.708 (IF=0.625, MQ=0.792) | lr=0.000006
|
| 232 |
+
2026-01-25 11:40:29 | INFO |
|
| 233 |
+
============================================================
|
| 234 |
+
Validation Results (took 9.00s):
|
| 235 |
+
Samples: 346 instruction, 346 quality
|
| 236 |
+
Instruction Acc: 0.7225
|
| 237 |
+
Quality Acc: 0.7370
|
| 238 |
+
Average Acc: 0.7298
|
| 239 |
+
Total Loss: 0.5177
|
| 240 |
+
Instruction Loss: 0.5469
|
| 241 |
+
Quality Loss: 0.4885
|
| 242 |
+
============================================================
|
| 243 |
+
2026-01-25 11:40:30 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_999.pt (filtered to 71.624M trainable parameters)
|
| 244 |
+
2026-01-25 11:40:30 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_999.pt (422.0MB)
|
| 245 |
+
2026-01-25 11:40:30 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_799.pt
|
| 246 |
+
2026-01-25 11:40:30 | INFO | Best 3 checkpoints:
|
| 247 |
+
2026-01-25 11:40:30 | INFO | 1. Step 899: acc=0.7298 (reward_model.best_899.pt)
|
| 248 |
+
2026-01-25 11:40:30 | INFO | 2. Step 999: acc=0.7298 (reward_model.best_999.pt)
|
| 249 |
+
2026-01-25 11:40:30 | INFO | 3. Step 699: acc=0.7254 (reward_model.best_699.pt)
|
| 250 |
+
2026-01-25 11:40:31 | INFO | Step 1000: loss=0.4418 | IF_loss=0.4662, MQ_loss=0.4175 | acc=0.708 (IF=0.688, MQ=0.729) | lr=0.000005
|
| 251 |
+
2026-01-25 11:42:40 | INFO |
|
| 252 |
+
============================================================
|
| 253 |
+
Validation Results (took 8.11s):
|
| 254 |
+
Samples: 346 instruction, 346 quality
|
| 255 |
+
Instruction Acc: 0.7168
|
| 256 |
+
Quality Acc: 0.7370
|
| 257 |
+
Average Acc: 0.7269
|
| 258 |
+
Total Loss: 0.5173
|
| 259 |
+
Instruction Loss: 0.5461
|
| 260 |
+
Quality Loss: 0.4885
|
| 261 |
+
============================================================
|
| 262 |
+
2026-01-25 11:42:41 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_1099.pt (filtered to 71.624M trainable parameters)
|
| 263 |
+
2026-01-25 11:42:41 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_1099.pt (422.0MB)
|
| 264 |
+
2026-01-25 11:42:41 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_699.pt
|
| 265 |
+
2026-01-25 11:42:41 | INFO | Best 3 checkpoints:
|
| 266 |
+
2026-01-25 11:42:41 | INFO | 1. Step 899: acc=0.7298 (reward_model.best_899.pt)
|
| 267 |
+
2026-01-25 11:42:41 | INFO | 2. Step 999: acc=0.7298 (reward_model.best_999.pt)
|
| 268 |
+
2026-01-25 11:42:41 | INFO | 3. Step 1099: acc=0.7269 (reward_model.best_1099.pt)
|
| 269 |
+
2026-01-25 11:42:42 | INFO | Step 1100: loss=0.4653 | IF_loss=0.5016, MQ_loss=0.4290 | acc=0.760 (IF=0.708, MQ=0.812) | lr=0.000004
|
| 270 |
+
2026-01-25 11:44:51 | INFO |
|
| 271 |
+
============================================================
|
| 272 |
+
Validation Results (took 8.91s):
|
| 273 |
+
Samples: 346 instruction, 346 quality
|
| 274 |
+
Instruction Acc: 0.7254
|
| 275 |
+
Quality Acc: 0.7370
|
| 276 |
+
Average Acc: 0.7312
|
| 277 |
+
Total Loss: 0.5172
|
| 278 |
+
Instruction Loss: 0.5464
|
| 279 |
+
Quality Loss: 0.4879
|
| 280 |
+
============================================================
|
| 281 |
+
2026-01-25 11:44:51 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_1199.pt (filtered to 71.624M trainable parameters)
|
| 282 |
+
2026-01-25 11:44:52 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_1199.pt (422.0MB)
|
| 283 |
+
2026-01-25 11:44:52 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_1099.pt
|
| 284 |
+
2026-01-25 11:44:52 | INFO | Best 3 checkpoints:
|
| 285 |
+
2026-01-25 11:44:52 | INFO | 1. Step 1199: acc=0.7312 (reward_model.best_1199.pt)
|
| 286 |
+
2026-01-25 11:44:52 | INFO | 2. Step 899: acc=0.7298 (reward_model.best_899.pt)
|
| 287 |
+
2026-01-25 11:44:52 | INFO | 3. Step 999: acc=0.7298 (reward_model.best_999.pt)
|
| 288 |
+
2026-01-25 11:44:53 | INFO | Step 1200: loss=0.5002 | IF_loss=0.5816, MQ_loss=0.4188 | acc=0.760 (IF=0.688, MQ=0.833) | lr=0.000003
|
| 289 |
+
2026-01-25 11:47:08 | INFO |
|
| 290 |
+
============================================================
|
| 291 |
+
Validation Results (took 8.34s):
|
| 292 |
+
Samples: 346 instruction, 346 quality
|
| 293 |
+
Instruction Acc: 0.7254
|
| 294 |
+
Quality Acc: 0.7399
|
| 295 |
+
Average Acc: 0.7327
|
| 296 |
+
Total Loss: 0.5170
|
| 297 |
+
Instruction Loss: 0.5456
|
| 298 |
+
Quality Loss: 0.4884
|
| 299 |
+
============================================================
|
| 300 |
+
2026-01-25 11:47:08 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_1299.pt (filtered to 71.624M trainable parameters)
|
| 301 |
+
2026-01-25 11:47:08 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_1299.pt (422.0MB)
|
| 302 |
+
2026-01-25 11:47:08 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_999.pt
|
| 303 |
+
2026-01-25 11:47:08 | INFO | Best 3 checkpoints:
|
| 304 |
+
2026-01-25 11:47:08 | INFO | 1. Step 1299: acc=0.7327 (reward_model.best_1299.pt)
|
| 305 |
+
2026-01-25 11:47:08 | INFO | 2. Step 1199: acc=0.7312 (reward_model.best_1199.pt)
|
| 306 |
+
2026-01-25 11:47:08 | INFO | 3. Step 899: acc=0.7298 (reward_model.best_899.pt)
|
| 307 |
+
2026-01-25 11:47:10 | INFO | Step 1300: loss=0.5330 | IF_loss=0.6011, MQ_loss=0.4650 | acc=0.729 (IF=0.688, MQ=0.771) | lr=0.000003
|
| 308 |
+
2026-01-25 11:49:22 | INFO |
|
| 309 |
+
============================================================
|
| 310 |
+
Validation Results (took 8.80s):
|
| 311 |
+
Samples: 346 instruction, 346 quality
|
| 312 |
+
Instruction Acc: 0.7254
|
| 313 |
+
Quality Acc: 0.7370
|
| 314 |
+
Average Acc: 0.7312
|
| 315 |
+
Total Loss: 0.5172
|
| 316 |
+
Instruction Loss: 0.5459
|
| 317 |
+
Quality Loss: 0.4884
|
| 318 |
+
============================================================
|
| 319 |
+
2026-01-25 11:49:22 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_1399.pt (filtered to 71.624M trainable parameters)
|
| 320 |
+
2026-01-25 11:49:23 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_1399.pt (422.0MB)
|
| 321 |
+
2026-01-25 11:49:23 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_899.pt
|
| 322 |
+
2026-01-25 11:49:23 | INFO | Best 3 checkpoints:
|
| 323 |
+
2026-01-25 11:49:23 | INFO | 1. Step 1299: acc=0.7327 (reward_model.best_1299.pt)
|
| 324 |
+
2026-01-25 11:49:23 | INFO | 2. Step 1199: acc=0.7312 (reward_model.best_1199.pt)
|
| 325 |
+
2026-01-25 11:49:23 | INFO | 3. Step 1399: acc=0.7312 (reward_model.best_1399.pt)
|
| 326 |
+
2026-01-25 11:49:24 | INFO | Step 1400: loss=0.4927 | IF_loss=0.5769, MQ_loss=0.4085 | acc=0.708 (IF=0.667, MQ=0.750) | lr=0.000002
|
| 327 |
+
2026-01-25 11:51:38 | INFO |
|
| 328 |
+
============================================================
|
| 329 |
+
Validation Results (took 9.00s):
|
| 330 |
+
Samples: 346 instruction, 346 quality
|
| 331 |
+
Instruction Acc: 0.7254
|
| 332 |
+
Quality Acc: 0.7370
|
| 333 |
+
Average Acc: 0.7312
|
| 334 |
+
Total Loss: 0.5166
|
| 335 |
+
Instruction Loss: 0.5454
|
| 336 |
+
Quality Loss: 0.4878
|
| 337 |
+
============================================================
|
| 338 |
+
2026-01-25 11:51:38 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_1499.pt (filtered to 71.624M trainable parameters)
|
| 339 |
+
2026-01-25 11:51:38 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_1499.pt (422.0MB)
|
| 340 |
+
2026-01-25 11:51:38 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_1499.pt
|
| 341 |
+
2026-01-25 11:51:38 | INFO | Best 3 checkpoints:
|
| 342 |
+
2026-01-25 11:51:38 | INFO | 1. Step 1299: acc=0.7327 (reward_model.best_1299.pt)
|
| 343 |
+
2026-01-25 11:51:38 | INFO | 2. Step 1199: acc=0.7312 (reward_model.best_1199.pt)
|
| 344 |
+
2026-01-25 11:51:38 | INFO | 3. Step 1399: acc=0.7312 (reward_model.best_1399.pt)
|
| 345 |
+
2026-01-25 11:51:39 | INFO | Step 1500: loss=0.4455 | IF_loss=0.4833, MQ_loss=0.4076 | acc=0.750 (IF=0.708, MQ=0.792) | lr=0.000001
|
| 346 |
+
2026-01-25 11:53:52 | INFO |
|
| 347 |
+
============================================================
|
| 348 |
+
Validation Results (took 9.35s):
|
| 349 |
+
Samples: 346 instruction, 346 quality
|
| 350 |
+
Instruction Acc: 0.7254
|
| 351 |
+
Quality Acc: 0.7370
|
| 352 |
+
Average Acc: 0.7312
|
| 353 |
+
Total Loss: 0.5173
|
| 354 |
+
Instruction Loss: 0.5462
|
| 355 |
+
Quality Loss: 0.4884
|
| 356 |
+
============================================================
|
| 357 |
+
2026-01-25 11:53:52 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_1599.pt (filtered to 71.624M trainable parameters)
|
| 358 |
+
2026-01-25 11:53:53 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_1599.pt (422.0MB)
|
| 359 |
+
2026-01-25 11:53:53 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_1599.pt
|
| 360 |
+
2026-01-25 11:53:53 | INFO | Best 3 checkpoints:
|
| 361 |
+
2026-01-25 11:53:53 | INFO | 1. Step 1299: acc=0.7327 (reward_model.best_1299.pt)
|
| 362 |
+
2026-01-25 11:53:53 | INFO | 2. Step 1199: acc=0.7312 (reward_model.best_1199.pt)
|
| 363 |
+
2026-01-25 11:53:53 | INFO | 3. Step 1399: acc=0.7312 (reward_model.best_1399.pt)
|
| 364 |
+
2026-01-25 11:53:57 | INFO | Step 1600: loss=0.4107 | IF_loss=0.4762, MQ_loss=0.3453 | acc=0.823 (IF=0.833, MQ=0.812) | lr=0.000001
|
| 365 |
+
2026-01-25 11:56:08 | INFO |
|
| 366 |
+
============================================================
|
| 367 |
+
Validation Results (took 9.61s):
|
| 368 |
+
Samples: 346 instruction, 346 quality
|
| 369 |
+
Instruction Acc: 0.7254
|
| 370 |
+
Quality Acc: 0.7341
|
| 371 |
+
Average Acc: 0.7298
|
| 372 |
+
Total Loss: 0.5172
|
| 373 |
+
Instruction Loss: 0.5463
|
| 374 |
+
Quality Loss: 0.4881
|
| 375 |
+
============================================================
|
| 376 |
+
2026-01-25 11:56:08 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_1699.pt (filtered to 71.624M trainable parameters)
|
| 377 |
+
2026-01-25 11:56:09 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_1699.pt (422.0MB)
|
| 378 |
+
2026-01-25 11:56:09 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_1699.pt
|
| 379 |
+
2026-01-25 11:56:09 | INFO | Best 3 checkpoints:
|
| 380 |
+
2026-01-25 11:56:09 | INFO | 1. Step 1299: acc=0.7327 (reward_model.best_1299.pt)
|
| 381 |
+
2026-01-25 11:56:09 | INFO | 2. Step 1199: acc=0.7312 (reward_model.best_1199.pt)
|
| 382 |
+
2026-01-25 11:56:09 | INFO | 3. Step 1399: acc=0.7312 (reward_model.best_1399.pt)
|
| 383 |
+
2026-01-25 11:56:10 | INFO | Step 1700: loss=0.4612 | IF_loss=0.4737, MQ_loss=0.4487 | acc=0.802 (IF=0.750, MQ=0.854) | lr=0.000001
|
| 384 |
+
2026-01-25 11:58:26 | INFO |
|
| 385 |
+
============================================================
|
| 386 |
+
Validation Results (took 9.02s):
|
| 387 |
+
Samples: 346 instruction, 346 quality
|
| 388 |
+
Instruction Acc: 0.7254
|
| 389 |
+
Quality Acc: 0.7341
|
| 390 |
+
Average Acc: 0.7298
|
| 391 |
+
Total Loss: 0.5173
|
| 392 |
+
Instruction Loss: 0.5463
|
| 393 |
+
Quality Loss: 0.4883
|
| 394 |
+
============================================================
|
| 395 |
+
2026-01-25 11:58:26 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_1799.pt (filtered to 71.624M trainable parameters)
|
| 396 |
+
2026-01-25 11:58:26 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_1799.pt (422.0MB)
|
| 397 |
+
2026-01-25 11:58:26 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_1799.pt
|
| 398 |
+
2026-01-25 11:58:26 | INFO | Best 3 checkpoints:
|
| 399 |
+
2026-01-25 11:58:26 | INFO | 1. Step 1299: acc=0.7327 (reward_model.best_1299.pt)
|
| 400 |
+
2026-01-25 11:58:26 | INFO | 2. Step 1199: acc=0.7312 (reward_model.best_1199.pt)
|
| 401 |
+
2026-01-25 11:58:26 | INFO | 3. Step 1399: acc=0.7312 (reward_model.best_1399.pt)
|
| 402 |
+
2026-01-25 11:58:27 | INFO | Step 1800: loss=0.4209 | IF_loss=0.4485, MQ_loss=0.3933 | acc=0.833 (IF=0.833, MQ=0.833) | lr=0.000000
|
| 403 |
+
2026-01-25 12:00:38 | INFO |
|
| 404 |
+
============================================================
|
| 405 |
+
Validation Results (took 9.32s):
|
| 406 |
+
Samples: 346 instruction, 346 quality
|
| 407 |
+
Instruction Acc: 0.7283
|
| 408 |
+
Quality Acc: 0.7341
|
| 409 |
+
Average Acc: 0.7312
|
| 410 |
+
Total Loss: 0.5172
|
| 411 |
+
Instruction Loss: 0.5464
|
| 412 |
+
Quality Loss: 0.4881
|
| 413 |
+
============================================================
|
| 414 |
+
2026-01-25 12:00:38 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_1899.pt (filtered to 71.624M trainable parameters)
|
| 415 |
+
2026-01-25 12:00:39 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_1899.pt (422.0MB)
|
| 416 |
+
2026-01-25 12:00:39 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_1899.pt
|
| 417 |
+
2026-01-25 12:00:39 | INFO | Best 3 checkpoints:
|
| 418 |
+
2026-01-25 12:00:39 | INFO | 1. Step 1299: acc=0.7327 (reward_model.best_1299.pt)
|
| 419 |
+
2026-01-25 12:00:39 | INFO | 2. Step 1199: acc=0.7312 (reward_model.best_1199.pt)
|
| 420 |
+
2026-01-25 12:00:39 | INFO | 3. Step 1399: acc=0.7312 (reward_model.best_1399.pt)
|
| 421 |
+
2026-01-25 12:00:40 | INFO | Step 1900: loss=0.5161 | IF_loss=0.5734, MQ_loss=0.4587 | acc=0.688 (IF=0.646, MQ=0.729) | lr=0.000000
|
| 422 |
+
2026-01-25 12:02:54 | INFO |
|
| 423 |
+
============================================================
|
| 424 |
+
Validation Results (took 8.56s):
|
| 425 |
+
Samples: 346 instruction, 346 quality
|
| 426 |
+
Instruction Acc: 0.7283
|
| 427 |
+
Quality Acc: 0.7341
|
| 428 |
+
Average Acc: 0.7312
|
| 429 |
+
Total Loss: 0.5172
|
| 430 |
+
Instruction Loss: 0.5463
|
| 431 |
+
Quality Loss: 0.4881
|
| 432 |
+
============================================================
|
| 433 |
+
2026-01-25 12:02:54 | INFO | Saving checkpoint to /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_1999.pt (filtered to 71.624M trainable parameters)
|
| 434 |
+
2026-01-25 12:02:54 | INFO | Checkpoint saved: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_1999.pt (422.0MB)
|
| 435 |
+
2026-01-25 12:02:54 | INFO | Removed old checkpoint: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1117/ckpt/reward_model.best_1999.pt
|
| 436 |
+
2026-01-25 12:02:54 | INFO | Best 3 checkpoints:
|
| 437 |
+
2026-01-25 12:02:54 | INFO | 1. Step 1299: acc=0.7327 (reward_model.best_1299.pt)
|
| 438 |
+
2026-01-25 12:02:54 | INFO | 2. Step 1199: acc=0.7312 (reward_model.best_1199.pt)
|
| 439 |
+
2026-01-25 12:02:54 | INFO | 3. Step 1399: acc=0.7312 (reward_model.best_1399.pt)
|
| 440 |
+
2026-01-25 12:02:54 | INFO | Training complete!
|
| 441 |
+
2026-01-25 12:02:54 | INFO | Training complete!
|
20260125_1231/config.yaml
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DEVICES: '5'
|
| 2 |
+
accelerate:
|
| 3 |
+
mixed_precision: bf16
|
| 4 |
+
basics:
|
| 5 |
+
random_seed: 42
|
| 6 |
+
save_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human
|
| 7 |
+
dataset:
|
| 8 |
+
audio_dropout:
|
| 9 |
+
apply_to_eval: true
|
| 10 |
+
apply_to_ref: true
|
| 11 |
+
enabled: true
|
| 12 |
+
eval_only_on_training: true
|
| 13 |
+
max_duration: 1500
|
| 14 |
+
min_duration: 200
|
| 15 |
+
train_mode: start
|
| 16 |
+
cache_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/tmp
|
| 17 |
+
db_path: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/backend/database.db
|
| 18 |
+
duration: 600.0
|
| 19 |
+
embedding_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/supervised_embeddings
|
| 20 |
+
max_samples: null
|
| 21 |
+
max_val_samples: null
|
| 22 |
+
metadata_jsonl: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/all_comparisons.jsonl
|
| 23 |
+
mode: raw_text_frozen_audio
|
| 24 |
+
preference_file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/human_annotations/train.json
|
| 25 |
+
sample_rate: 24000
|
| 26 |
+
val_preference_file: null
|
| 27 |
+
loss:
|
| 28 |
+
IF_ratio: 0.5
|
| 29 |
+
filter_ties: true
|
| 30 |
+
label_smoothing: 0.0
|
| 31 |
+
reduction: mean
|
| 32 |
+
model:
|
| 33 |
+
attention_mode: SA
|
| 34 |
+
attn_dropout: 0.0
|
| 35 |
+
category_embeddings: null
|
| 36 |
+
dim: 768
|
| 37 |
+
dim_head: 64
|
| 38 |
+
downsample:
|
| 39 |
+
configs:
|
| 40 |
+
conv2_4x:
|
| 41 |
+
factor: 4
|
| 42 |
+
kernel_size: 5
|
| 43 |
+
kind: conv*2
|
| 44 |
+
use_layernorm: true
|
| 45 |
+
conv_4x:
|
| 46 |
+
factor: 4
|
| 47 |
+
kernel_size: 5
|
| 48 |
+
kind: conv
|
| 49 |
+
stage: 1
|
| 50 |
+
use_layernorm: true
|
| 51 |
+
glu_4x:
|
| 52 |
+
factor: 4
|
| 53 |
+
kernel_size: 5
|
| 54 |
+
kind: gluconv*2+pw
|
| 55 |
+
use_layernorm: true
|
| 56 |
+
mean:
|
| 57 |
+
factor: 2
|
| 58 |
+
kind: mean
|
| 59 |
+
mean_4x:
|
| 60 |
+
dropout: 0.0
|
| 61 |
+
factor: 30
|
| 62 |
+
kind: mean+mlp
|
| 63 |
+
mlp_ratio: 2.0
|
| 64 |
+
none:
|
| 65 |
+
factor: 1
|
| 66 |
+
kind: none
|
| 67 |
+
eval: mean_4x
|
| 68 |
+
ref: null
|
| 69 |
+
text: none
|
| 70 |
+
ff_dropout: 0.0
|
| 71 |
+
ff_mult: 4
|
| 72 |
+
freeze_audio: true
|
| 73 |
+
freeze_text: true
|
| 74 |
+
gradient_checkpointing: false
|
| 75 |
+
heads: 8
|
| 76 |
+
joint_tf_depth: 1
|
| 77 |
+
load_config:
|
| 78 |
+
checkpoint_path: null
|
| 79 |
+
frozen_from_pretrained: true
|
| 80 |
+
pretrained_name: OpenMuQ/MuQ-MuLan-large
|
| 81 |
+
strict: false
|
| 82 |
+
mlp_dim: 768
|
| 83 |
+
mode: concat_text_late
|
| 84 |
+
model_name: OpenMuQ/MuQ-MuLan-large
|
| 85 |
+
name: reward
|
| 86 |
+
no_condition: false
|
| 87 |
+
null_embedding:
|
| 88 |
+
audio:
|
| 89 |
+
dropout: 0.5
|
| 90 |
+
length: 10
|
| 91 |
+
lyrics:
|
| 92 |
+
dropout: 0.3
|
| 93 |
+
length: 10
|
| 94 |
+
text:
|
| 95 |
+
dropout: 0
|
| 96 |
+
length: 10
|
| 97 |
+
output_dim: 2
|
| 98 |
+
prompt_tf_depth: 4
|
| 99 |
+
sr: 24000
|
| 100 |
+
text_encoder:
|
| 101 |
+
name: muq_mulan
|
| 102 |
+
tune: null
|
| 103 |
+
text_lora_config: null
|
| 104 |
+
train_muq_depth: 0
|
| 105 |
+
train_muqmulan: false
|
| 106 |
+
use_audio: true
|
| 107 |
+
use_layer_idx: -1
|
| 108 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 109 |
+
run_name: null
|
| 110 |
+
train:
|
| 111 |
+
batch_size: 48
|
| 112 |
+
betas:
|
| 113 |
+
- 0.9
|
| 114 |
+
- 0.99
|
| 115 |
+
ema_decay: 0.9999
|
| 116 |
+
ema_update_every: 1
|
| 117 |
+
enable_gradient_checkpointing: true
|
| 118 |
+
force_clear_prev_results: false
|
| 119 |
+
grad_accum_every: 1
|
| 120 |
+
log_tensorboard: true
|
| 121 |
+
lr_schedule:
|
| 122 |
+
min_lr_ratio: 0.001
|
| 123 |
+
name: linear_cosine
|
| 124 |
+
total_steps: 10000
|
| 125 |
+
warmup_steps: 10
|
| 126 |
+
max_grad_norm: 1
|
| 127 |
+
mlp_lr: 1.0e-05
|
| 128 |
+
num_train_steps: 10000
|
| 129 |
+
num_valid_batches: null
|
| 130 |
+
num_workers: 8
|
| 131 |
+
other_lr: 1.0e-05
|
| 132 |
+
resume: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/contrastive/20260123_1403_tune_mulan_transformer/ckpt/reward_model.best_27252.pt
|
| 133 |
+
resume_optimizer: false
|
| 134 |
+
save_model_every: 2000
|
| 135 |
+
use_checkpoint_config: true
|
| 136 |
+
use_ema: false
|
| 137 |
+
use_lion: false
|
| 138 |
+
valid_batch_size: 20
|
| 139 |
+
valid_every: 500
|
| 140 |
+
valid_frac: 0.1
|
| 141 |
+
verify_weights_on_load: true
|
| 142 |
+
validate_only: false
|
20260125_1231/eval_results_0125_1707.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
20260125_1231/reward_model/1769315504.5030606/events.out.tfevents.1769315504.MACLAB-S004.2360364.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4430ab0d26659fcc57b20fd55521428ea8d75daa98b78169e91d25ebffd673d8
|
| 3 |
+
size 503
|
20260125_1231/reward_model/1769315504.5045948/hparams.yml
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
batch_size: 48
|
| 2 |
+
grad_accum_every: 1
|
| 3 |
+
learning_rate: 1.0e-05
|
| 4 |
+
num_train_steps: 10000
|
20260125_1231/reward_model/events.out.tfevents.1769315504.MACLAB-S004.2360364.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:76c1b87e41adf1f3ac89056217504376dbdc359f097f179ff31512b42ce3c00f
|
| 3 |
+
size 5986202
|
20260125_1231/test_20260125_191012_reward_model.best_4499/test_results.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"metrics": {
|
| 3 |
+
"overall": {
|
| 4 |
+
"total_samples": 3463,
|
| 5 |
+
"mq": {
|
| 6 |
+
"num_non_tie": 3463,
|
| 7 |
+
"accuracy": 0.7678313600924054,
|
| 8 |
+
"avg_confidence": 0.7792870429358729,
|
| 9 |
+
"std_confidence": 0.14544243560525433
|
| 10 |
+
},
|
| 11 |
+
"if": {
|
| 12 |
+
"num_non_tie": 3463,
|
| 13 |
+
"accuracy": 0.6996823563384349,
|
| 14 |
+
"avg_confidence": 0.7319046033279007,
|
| 15 |
+
"std_confidence": 0.1337721067466566
|
| 16 |
+
},
|
| 17 |
+
"avg_accuracy": 0.7337568582154201
|
| 18 |
+
},
|
| 19 |
+
"by_modality": {
|
| 20 |
+
"has_audio": {
|
| 21 |
+
"count": 884,
|
| 22 |
+
"mq_acc": 0.8122171945701357,
|
| 23 |
+
"if_acc": 0.7726244343891403,
|
| 24 |
+
"mq_conf": 0.7979760396534501,
|
| 25 |
+
"if_conf": 0.7548858234785262
|
| 26 |
+
},
|
| 27 |
+
"no_audio": {
|
| 28 |
+
"count": 2579,
|
| 29 |
+
"mq_acc": 0.7526172935246219,
|
| 30 |
+
"if_acc": 0.6746801085692129,
|
| 31 |
+
"mq_conf": 0.7728810432854897,
|
| 32 |
+
"if_conf": 0.7240273646256312
|
| 33 |
+
},
|
| 34 |
+
"has_lyrics": {
|
| 35 |
+
"count": 943,
|
| 36 |
+
"mq_acc": 0.8038176033934252,
|
| 37 |
+
"if_acc": 0.7592788971367974,
|
| 38 |
+
"mq_conf": 0.7926488271573695,
|
| 39 |
+
"if_conf": 0.732424895558605
|
| 40 |
+
},
|
| 41 |
+
"no_lyrics": {
|
| 42 |
+
"count": 2520,
|
| 43 |
+
"mq_acc": 0.7543650793650793,
|
| 44 |
+
"if_acc": 0.6773809523809524,
|
| 45 |
+
"mq_conf": 0.7742869784434636,
|
| 46 |
+
"if_conf": 0.7317099066717284
|
| 47 |
+
}
|
| 48 |
+
},
|
| 49 |
+
"by_score_diff": {
|
| 50 |
+
"score_range": {
|
| 51 |
+
"min":
|
20260125_1231/test_20260125_194533_reward_model.best_4499/test_config.yaml
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
DEVICES: '7'
|
| 2 |
+
accelerate:
|
| 3 |
+
mixed_precision: bf16
|
| 4 |
+
basics:
|
| 5 |
+
random_seed: 42
|
| 6 |
+
save_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/reward_model
|
| 7 |
+
dataset:
|
| 8 |
+
audio_dropout:
|
| 9 |
+
apply_to_eval: false
|
| 10 |
+
apply_to_ref: true
|
| 11 |
+
enabled: false
|
| 12 |
+
eval_only_on_training: true
|
| 13 |
+
max_duration: 1500
|
| 14 |
+
min_duration: 1500
|
| 15 |
+
train_mode: start
|
| 16 |
+
cache_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/tmp
|
| 17 |
+
db_path: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/backend/database.db
|
| 18 |
+
duration: 600.0
|
| 19 |
+
embedding_dir: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/supervised_embeddings
|
| 20 |
+
mode: raw_text_frozen_audio
|
| 21 |
+
max_samples: null
|
| 22 |
+
max_val_samples: null
|
| 23 |
+
metadata_jsonl: ${project_root}/CMI-Training/all_comparisons.jsonl
|
| 24 |
+
preference_file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/unbiased_qwen/train.json
|
| 25 |
+
sample_rate: 24000
|
| 26 |
+
val_preference_file: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/CMI-Training/human_annotations/train.json
|
| 27 |
+
loss:
|
| 28 |
+
IF_ratio: 0.5
|
| 29 |
+
filter_ties: true
|
| 30 |
+
label_smoothing: 0.0
|
| 31 |
+
reduction: mean
|
| 32 |
+
model:
|
| 33 |
+
attention_mode: SA
|
| 34 |
+
attn_dropout: 0.0
|
| 35 |
+
category_embeddings: null
|
| 36 |
+
dim: 768
|
| 37 |
+
dim_head: 64
|
| 38 |
+
downsample:
|
| 39 |
+
configs:
|
| 40 |
+
conv2_4x:
|
| 41 |
+
factor: 4
|
| 42 |
+
kernel_size: 5
|
| 43 |
+
kind: conv*2
|
| 44 |
+
use_layernorm: true
|
| 45 |
+
conv_4x:
|
| 46 |
+
factor: 4
|
| 47 |
+
kernel_size: 5
|
| 48 |
+
kind: conv
|
| 49 |
+
stage: 1
|
| 50 |
+
use_layernorm: true
|
| 51 |
+
glu_4x:
|
| 52 |
+
factor: 4
|
| 53 |
+
kernel_size: 5
|
| 54 |
+
kind: gluconv*2+pw
|
| 55 |
+
use_layernorm: true
|
| 56 |
+
mean:
|
| 57 |
+
factor: 2
|
| 58 |
+
kind: mean
|
| 59 |
+
mean_4x:
|
| 60 |
+
dropout: 0.0
|
| 61 |
+
factor: 30
|
| 62 |
+
kind: mean+mlp
|
| 63 |
+
mlp_ratio: 2.0
|
| 64 |
+
none:
|
| 65 |
+
factor: 1
|
| 66 |
+
kind: none
|
| 67 |
+
eval: mean_4x
|
| 68 |
+
ref: null
|
| 69 |
+
text: none
|
| 70 |
+
ff_dropout: 0.0
|
| 71 |
+
ff_mult: 4
|
| 72 |
+
freeze_audio: true
|
| 73 |
+
freeze_text: true
|
| 74 |
+
gradient_checkpointing: false
|
| 75 |
+
heads: 8
|
| 76 |
+
joint_tf_depth: 1
|
| 77 |
+
load_config:
|
| 78 |
+
checkpoint_path: null
|
| 79 |
+
frozen_from_pretrained: true
|
| 80 |
+
pretrained_name: OpenMuQ/MuQ-MuLan-large
|
| 81 |
+
strict: false
|
| 82 |
+
mlp_dim: 768
|
| 83 |
+
mode: concat_text_late
|
| 84 |
+
model_name: OpenMuQ/MuQ-MuLan-large
|
| 85 |
+
name: reward
|
| 86 |
+
null_embedding:
|
| 87 |
+
audio:
|
| 88 |
+
dropout: 0.5
|
| 89 |
+
length: 10
|
| 90 |
+
lyrics:
|
| 91 |
+
dropout: 0.3
|
| 92 |
+
length: 10
|
| 93 |
+
text:
|
| 94 |
+
dropout: 0
|
| 95 |
+
length: 10
|
| 96 |
+
output_dim: 2
|
| 97 |
+
prompt_tf_depth: 4
|
| 98 |
+
sr: 24000
|
| 99 |
+
text_encoder:
|
| 100 |
+
name: muq_mulan
|
| 101 |
+
tune: null
|
| 102 |
+
text_lora_config: null
|
| 103 |
+
train_muq_depth: 0
|
| 104 |
+
train_muqmulan: false
|
| 105 |
+
use_layer_idx: -1
|
| 106 |
+
use_audio: true
|
| 107 |
+
no_condition: false
|
| 108 |
+
project_root: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena
|
| 109 |
+
run_name: null
|
| 110 |
+
train:
|
| 111 |
+
batch_size: 48
|
| 112 |
+
betas:
|
| 113 |
+
- 0.9
|
| 114 |
+
- 0.99
|
| 115 |
+
ema_decay: 0.9999
|
| 116 |
+
ema_update_every: 1
|
| 117 |
+
enable_gradient_checkpointing: true
|
| 118 |
+
force_clear_prev_results: false
|
| 119 |
+
grad_accum_every: 1
|
| 120 |
+
log_tensorboard: true
|
| 121 |
+
lr_schedule:
|
| 122 |
+
min_lr_ratio: 0.001
|
| 123 |
+
name: linear_cosine
|
| 124 |
+
total_steps: 30000
|
| 125 |
+
warmup_steps: 300
|
| 126 |
+
max_grad_norm: 100
|
| 127 |
+
mlp_lr: 0.0001
|
| 128 |
+
num_train_steps: 30000
|
| 129 |
+
num_valid_batches: 10
|
| 130 |
+
num_workers: 8
|
| 131 |
+
other_lr: null
|
| 132 |
+
resume: /data/yrb/musicarena/Haiwen/offline_data/cmi-arena/experiments/finetune_human/20260125_1231/ckpt/reward_model.best_4499.pt
|
| 133 |
+
resume_optimizer: false
|
| 134 |
+
save_model_every: 2000
|
| 135 |
+
use_checkpoint_config: true
|
| 136 |
+
use_ema: true
|
| 137 |
+
use_lion: false
|
| 138 |
+
valid_batch_size: 20
|
| 139 |
+
valid_every: 2000
|
| 140 |
+
valid_frac: 0.1
|
| 141 |
+
verify_weights_on_load: true
|
| 142 |
+
validate_only: true
|
20260125_1231/test_20260125_194533_reward_model.best_4499/test_results.json
ADDED
|
@@ -0,0 +1,239 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"metrics": {
|
| 3 |
+
"overall": {
|
| 4 |
+
"total_samples": 3463,
|
| 5 |
+
"mq": {
|
| 6 |
+
"num_non_tie": 3463,
|
| 7 |
+
"accuracy": 0.7678313600924054,
|
| 8 |
+
"avg_confidence": 0.7792870429358729,
|
| 9 |
+
"std_confidence": 0.14544243560525433
|
| 10 |
+
},
|
| 11 |
+
"if": {
|
| 12 |
+
"num_non_tie": 3463,
|
| 13 |
+
"accuracy": 0.6996823563384349,
|
| 14 |
+
"avg_confidence": 0.7319046033279007,
|
| 15 |
+
"std_confidence": 0.1337721067466566
|
| 16 |
+
},
|
| 17 |
+
"avg_accuracy": 0.7337568582154201
|
| 18 |
+
},
|
| 19 |
+
"by_modality": {
|
| 20 |
+
"has_audio": {
|
| 21 |
+
"count": 884,
|
| 22 |
+
"mq_acc": 0.8122171945701357,
|
| 23 |
+
"if_acc": 0.7726244343891403,
|
| 24 |
+
"mq_conf": 0.7979760396534501,
|
| 25 |
+
"if_conf": 0.7548858234785262
|
| 26 |
+
},
|
| 27 |
+
"no_audio": {
|
| 28 |
+
"count": 2579,
|
| 29 |
+
"mq_acc": 0.7526172935246219,
|
| 30 |
+
"if_acc": 0.6746801085692129,
|
| 31 |
+
"mq_conf": 0.7728810432854897,
|
| 32 |
+
"if_conf": 0.7240273646256312
|
| 33 |
+
},
|
| 34 |
+
"has_lyrics": {
|
| 35 |
+
"count": 943,
|
| 36 |
+
"mq_acc": 0.8038176033934252,
|
| 37 |
+
"if_acc": 0.7592788971367974,
|
| 38 |
+
"mq_conf": 0.7926488271573695,
|
| 39 |
+
"if_conf": 0.732424895558605
|
| 40 |
+
},
|
| 41 |
+
"no_lyrics": {
|
| 42 |
+
"count": 2520,
|
| 43 |
+
"mq_acc": 0.7543650793650793,
|
| 44 |
+
"if_acc": 0.6773809523809524,
|
| 45 |
+
"mq_conf": 0.7742869784434636,
|
| 46 |
+
"if_conf": 0.7317099066717284
|
| 47 |
+
}
|
| 48 |
+
},
|
| 49 |
+
"by_score_diff": {
|
| 50 |
+
"score_range": {
|
| 51 |
+
"min": 0.0,
|
| 52 |
+
"max": 4.0
|
| 53 |
+
},
|
| 54 |
+
"0-1": {
|
| 55 |
+
"count": 59,
|
| 56 |
+
"mq_acc": 0.6610169491525424,
|
| 57 |
+
"if_acc": 0.6271186440677966,
|
| 58 |
+
"mq_conf": 0.6943881915787519,
|
| 59 |
+
"if_conf": 0.6363525047140607
|
| 60 |
+
},
|
| 61 |
+
"1-2": {
|
| 62 |
+
"count": 367,
|
| 63 |
+
"mq_acc": 0.6512261580381471,
|
| 64 |
+
"if_acc": 0.5858310626702997,
|
| 65 |
+
"mq_conf": 0.7223088202099709,
|
| 66 |
+
"if_conf": 0.674628816443503
|
| 67 |
+
},
|
| 68 |
+
"2-3": {
|
| 69 |
+
"count": 1192,
|
| 70 |
+
"mq_acc": 0.7374161073825504,
|
| 71 |
+
"if_acc": 0.6459731543624161,
|
| 72 |
+
"mq_conf": 0.7660567649958918,
|
| 73 |
+
"if_conf": 0.7183731070800916
|
| 74 |
+
},
|
| 75 |
+
"3+": {
|
| 76 |
+
"count": 1845,
|
| 77 |
+
"mq_acc": 0.8140921409214092,
|
| 78 |
+
"if_acc": 0.759349593495935,
|
| 79 |
+
"mq_conf": 0.8018835368518261,
|
| 80 |
+
"if_conf": 0.7550955687111955
|
| 81 |
+
}
|
| 82 |
+
},
|
| 83 |
+
"by_duration": {
|
| 84 |
+
"0-30s": {
|
| 85 |
+
"count": 1097,
|
| 86 |
+
"mq_acc": 0.7529626253418414,
|
| 87 |
+
"if_acc": 0.6435733819507748,
|
| 88 |
+
"mq_conf": 0.7710004717301757,
|
| 89 |
+
"if_conf": 0.7230207648403338
|
| 90 |
+
},
|
| 91 |
+
"30-60s": {
|
| 92 |
+
"count": 1007,
|
| 93 |
+
"mq_acc": 0.7864945382323734,
|
| 94 |
+
"if_acc": 0.7149950347567031,
|
| 95 |
+
"mq_conf": 0.8106962935453376,
|
| 96 |
+
"if_conf": 0.7519949673422517
|
| 97 |
+
},
|
| 98 |
+
"60-90s": {
|
| 99 |
+
"count": 741,
|
| 100 |
+
"mq_acc": 0.7840755735492577,
|
| 101 |
+
"if_acc": 0.7651821862348178,
|
| 102 |
+
"mq_conf": 0.7830080420542986,
|
| 103 |
+
"if_conf": 0.7454769649164558
|
| 104 |
+
},
|
| 105 |
+
"90-120s": {
|
| 106 |
+
"count": 12,
|
| 107 |
+
"mq_acc": 0.6666666666666666,
|
| 108 |
+
"if_acc": 0.5,
|
| 109 |
+
"mq_conf": 0.7591231515010198,
|
| 110 |
+
"if_conf": 0.7100299447774887
|
| 111 |
+
},
|
| 112 |
+
"120s+": {
|
| 113 |
+
"count": 606,
|
| 114 |
+
"mq_acc": 0.7458745874587459,
|
| 115 |
+
"if_acc": 0.6996699669966997,
|
| 116 |
+
"mq_conf": 0.7379437419447569,
|
| 117 |
+
"if_conf": 0.6984391746544601
|
| 118 |
+
}
|
| 119 |
+
},
|
| 120 |
+
"model_pairs": {
|
| 121 |
+
"total_pairs": 128,
|
| 122 |
+
"valid_pairs": 45,
|
| 123 |
+
"min_count_threshold": 10,
|
| 124 |
+
"top_5": [
|
| 125 |
+
{
|
| 126 |
+
"pair": "jamify vs suno-v4.5-plus",
|
| 127 |
+
"count": 13,
|
| 128 |
+
"mq_acc": 1.0,
|
| 129 |
+
"if_acc": 0.9230769230769231,
|
| 130 |
+
"avg_acc": 0.9615384615384616,
|
| 131 |
+
"mq_conf": 0.8753359088530908,
|
| 132 |
+
"if_conf": 0.8881901227510892
|
| 133 |
+
},
|
| 134 |
+
{
|
| 135 |
+
"pair": "jamify vs suno-v4",
|
| 136 |
+
"count": 11,
|
| 137 |
+
"mq_acc": 1.0,
|
| 138 |
+
"if_acc": 0.9090909090909091,
|
| 139 |
+
"avg_acc": 0.9545454545454546,
|
| 140 |
+
"mq_conf": 0.8784825205802917,
|
| 141 |
+
"if_conf": 0.8930827325040643
|
| 142 |
+
},
|
| 143 |
+
{
|
| 144 |
+
"pair": "audioldm2-music vs magenta-rt-large",
|
| 145 |
+
"count": 116,
|
| 146 |
+
"mq_acc": 0.9224137931034483,
|
| 147 |
+
"if_acc": 0.9137931034482759,
|
| 148 |
+
"avg_acc": 0.9181034482758621,
|
| 149 |
+
"mq_conf": 0.8762899652637285,
|
| 150 |
+
"if_conf": 0.8524722950211887
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"pair": "jamify vs levo",
|
| 154 |
+
"count": 65,
|
| 155 |
+
"mq_acc": 0.9538461538461539,
|
| 156 |
+
"if_acc": 0.8769230769230769,
|
| 157 |
+
"avg_acc": 0.9153846153846155,
|
| 158 |
+
"mq_conf": 0.8298323347018315,
|
| 159 |
+
"if_conf": 0.7724327931037316
|
| 160 |
+
},
|
| 161 |
+
{
|
| 162 |
+
"pair": "jamify vs suno-v3.5",
|
| 163 |
+
"count": 27,
|
| 164 |
+
"mq_acc": 0.9629629629629629,
|
| 165 |
+
"if_acc": 0.8518518518518519,
|
| 166 |
+
"avg_acc": 0.9074074074074074,
|
| 167 |
+
"mq_conf": 0.8570071635422883,
|
| 168 |
+
"if_conf": 0.8533604873551263
|
| 169 |
+
}
|
| 170 |
+
],
|
| 171 |
+
"bottom_5": [
|
| 172 |
+
{
|
| 173 |
+
"pair": "audioldm vs sao",
|
| 174 |
+
"count": 12,
|
| 175 |
+
"mq_acc": 0.5,
|
| 176 |
+
"if_acc": 0.5,
|
| 177 |
+
"avg_acc": 0.5,
|
| 178 |
+
"mq_conf": 0.6995708495378494,
|
| 179 |
+
"if_conf": 0.6728040178616842
|
| 180 |
+
},
|
| 181 |
+
{
|
| 182 |
+
"pair": "audioldm2-music vs sao-small",
|
| 183 |
+
"count": 20,
|
| 184 |
+
"mq_acc": 0.6,
|
| 185 |
+
"if_acc": 0.45,
|
| 186 |
+
"avg_acc": 0.525,
|
| 187 |
+
"mq_conf": 0.7222894936800003,
|
| 188 |
+
"if_conf": 0.6842079430818557
|
| 189 |
+
},
|
| 190 |
+
{
|
| 191 |
+
"pair": "sao vs sao-small",
|
| 192 |
+
"count": 18,
|
| 193 |
+
"mq_acc": 0.5555555555555556,
|
| 194 |
+
"if_acc": 0.5555555555555556,
|
| 195 |
+
"avg_acc": 0.5555555555555556,
|
| 196 |
+
"mq_conf": 0.7228857609960768,
|
| 197 |
+
"if_conf": 0.680361701382531
|
| 198 |
+
},
|
| 199 |
+
{
|
| 200 |
+
"pair": "suno-v3.5 vs suno-v5",
|
| 201 |
+
"count": 10,
|
| 202 |
+
"mq_acc": 0.7,
|
| 203 |
+
"if_acc": 0.5,
|
| 204 |
+
"avg_acc": 0.6,
|
| 205 |
+
"mq_conf": 0.6346197962760926,
|
| 206 |
+
"if_conf": 0.6014198660850525
|
| 207 |
+
},
|
| 208 |
+
{
|
| 209 |
+
"pair": "magenta-rt-large vs sao-small",
|
| 210 |
+
"count": 16,
|
| 211 |
+
"mq_acc": 0.6875,
|
| 212 |
+
"if_acc": 0.5625,
|
| 213 |
+
"avg_acc": 0.625,
|
| 214 |
+
"mq_conf": 0.8538035452365875,
|
| 215 |
+
"if_conf": 0.8277972266077995
|
| 216 |
+
}
|
| 217 |
+
]
|
| 218 |
+
},
|
| 219 |
+
"alignment": {
|
| 220 |
+
"total_non_tie": 3463,
|
| 221 |
+
"agreement_rate": 0.9347386658966215,
|
| 222 |
+
"agree": {
|
| 223 |
+
"count": 3237,
|
| 224 |
+
"mq_acc": 0.788693234476367,
|
| 225 |
+
"if_acc": 0.7055915971578622,
|
| 226 |
+
"mq_conf": 0.7936204567454942,
|
| 227 |
+
"if_conf": 0.7445049513111816
|
| 228 |
+
},
|
| 229 |
+
"disagree": {
|
| 230 |
+
"count": 226,
|
| 231 |
+
"mq_acc": 0.4690265486725664,
|
| 232 |
+
"if_acc": 0.6150442477876106,
|
| 233 |
+
"mq_conf": 0.5739894300962971,
|
| 234 |
+
"if_conf": 0.5514297076558645
|
| 235 |
+
}
|
| 236 |
+
}
|
| 237 |
+
},
|
| 238 |
+
"summary": "======================================================================\nTEST METRICS SUMMARY\n======================================================================\n\n[Overall] Total: 3463 samples\n MQ: Acc=0.7678, Conf=0.7793 ± 0.1454 (n=3463)\n IF: Acc=0.6997, Conf=0.7319 ± 0.1338 (n=3463)\n Avg Acc: 0.7338\n\n[By Prompt Modality]\n has_audio : n= 884, MQ_acc=0.8122, IF_acc=0.7726, MQ_conf=0.7980, IF_conf=0.7549\n no_audio : n= 2579, MQ_acc=0.7526, IF_acc=0.6747, MQ_conf=0.7729, IF_conf=0.7240\n has_lyrics : n= 943, MQ_acc=0.8038, IF_acc=0.7593, MQ_conf=0.7926, IF_conf=0.7324\n no_lyrics : n= 2520, MQ_acc=0.7544, IF_acc=0.6774, MQ_conf=0.7743, IF_conf=0.7317\n\n[By Score Difference (data confidence bins)]\n Score range: [0.00, 4.00]\n 0-1 : n= 59, MQ_acc=0.6610, IF_acc=0.6271, MQ_conf=0.6944, IF_conf=0.6364\n 1-2 : n= 367, MQ_acc=0.6512, IF_acc=0.5858, MQ_conf=0.7223, IF_conf=0.6746\n 2-3 : n= 1192, MQ_acc=0.7374, IF_acc=0.6460, MQ_conf=0.7661, IF_conf=0.7184\n 3+ : n= 1845, MQ_acc=0.8141, IF_acc=0.7593, MQ_conf=0.8019, IF_conf=0.7551\n\n[By Duration (gen_a + gen_b)]\n 0-30s : n= 1097, MQ_acc=0.7530, IF_acc=0.6436, MQ_conf=0.7710, IF_conf=0.7230\n 30-60s : n= 1007, MQ_acc=0.7865, IF_acc=0.7150, MQ_conf=0.8107, IF_conf=0.7520\n 60-90s : n= 741, MQ_acc=0.7841, IF_acc=0.7652, MQ_conf=0.7830, IF_conf=0.7455\n 90-120s : n= 12, MQ_acc=0.6667, IF_acc=0.5000, MQ_conf=0.7591, IF_conf=0.7100\n 120s+ : n= 606, MQ_acc=0.7459, IF_acc=0.6997, MQ_conf=0.7379, IF_conf=0.6984\n\n[Model Pairs] Total: 128 unique pairs\n Top 5 (by avg acc):\n jamify vs suno-v4.5-plus : n= 13, MQ=1.0000, IF=0.9231, Avg=0.9615, Conf(MQ/IF)=0.875/0.888\n jamify vs suno-v4 : n= 11, MQ=1.0000, IF=0.9091, Avg=0.9545, Conf(MQ/IF)=0.878/0.893\n audioldm2-music vs magenta-rt-large : n= 116, MQ=0.9224, IF=0.9138, Avg=0.9181, Conf(MQ/IF)=0.876/0.852\n jamify vs levo : n= 65, MQ=0.9538, IF=0.8769, Avg=0.9154, Conf(MQ/IF)=0.830/0.772\n jamify vs suno-v3.5 : n= 27, MQ=0.9630, IF=0.8519, Avg=0.9074, Conf(MQ/IF)=0.857/0.853\n Bottom 5:\n audioldm vs sao : n= 12, MQ=0.5000, IF=0.5000, Avg=0.5000, Conf(MQ/IF)=0.700/0.673\n audioldm2-music vs sao-small : n= 20, MQ=0.6000, IF=0.4500, Avg=0.5250, Conf(MQ/IF)=0.722/0.684\n sao vs sao-small : n= 18, MQ=0.5556, IF=0.5556, Avg=0.5556, Conf(MQ/IF)=0.723/0.680\n suno-v3.5 vs suno-v5 : n= 10, MQ=0.7000, IF=0.5000, Avg=0.6000, Conf(MQ/IF)=0.635/0.601\n magenta-rt-large vs sao-small : n= 16, MQ=0.6875, IF=0.5625, Avg=0.6250, Conf(MQ/IF)=0.854/0.828\n\n[MQ vs IF Alignment]\n Agreement rate: 0.9347 (3463 non-tie samples)\n When agree (n= 3237): MQ_acc=0.7887, IF_acc=0.7056, MQ_conf=0.7936, IF_conf=0.7445\n When disagree(n= 226): MQ_acc=0.4690, IF_acc=0.6150, MQ_conf=0.5740, IF_conf=0.5514\n======================================================================"
|
| 239 |
+
}
|