Update tfm_alive_fine-tuning.yaml
Browse files- tfm_alive_fine-tuning.yaml +0 -137
tfm_alive_fine-tuning.yaml
CHANGED
|
@@ -23,145 +23,8 @@ model:
|
|
| 23 |
alive_hidden_dim: 512 # Hidden dimension for alive prediction head
|
| 24 |
alive_hidden_layers: 1 # Number of hidden layers in alive prediction head
|
| 25 |
|
| 26 |
-
# Training configuration
|
| 27 |
-
training:
|
| 28 |
-
batch_size: 16
|
| 29 |
-
grad_accum_steps: 4
|
| 30 |
-
learning_rate_prediction_head: 0.00005
|
| 31 |
-
learning_rate_embedder: 0
|
| 32 |
-
learning_rate_processor: 0.00001
|
| 33 |
-
weight_decay: 0.01
|
| 34 |
-
num_epochs: 30
|
| 35 |
-
warmup_steps: 1000
|
| 36 |
-
max_grad_norm: 1.0
|
| 37 |
-
|
| 38 |
-
scheduler: 'cosine' # 'cosine' or 'linear'
|
| 39 |
-
|
| 40 |
-
base_model_path: '/share/guwanjun-local/cs2-demo-analytics/checkpoints_pretraining_v2/final.pth'
|
| 41 |
-
checkpoint_dir: 'checkpoints_alive_fine-tuning_v2'
|
| 42 |
-
|
| 43 |
-
from_scratch: False # Whether to train from scratch or fine-tune from a pre-trained model
|
| 44 |
-
|
| 45 |
-
use_lora: False # Whether to use LoRA for fine-tuning
|
| 46 |
-
lora_r: 8
|
| 47 |
-
lora_alpha: 16
|
| 48 |
-
lora_dropout: 0.1
|
| 49 |
-
|
| 50 |
# Data configuration
|
| 51 |
data:
|
| 52 |
-
train_data_path:
|
| 53 |
-
- archive_1.pt
|
| 54 |
-
- new_archive_1.pt
|
| 55 |
-
- archive_2.pt
|
| 56 |
-
- new_archive_2.pt
|
| 57 |
-
- archive_3.pt
|
| 58 |
-
- new_archive_3.pt
|
| 59 |
-
- archive_4.pt
|
| 60 |
-
- new_archive_4.pt
|
| 61 |
-
- new_archive_5.pt
|
| 62 |
-
- archive_5.pt
|
| 63 |
-
- archive_6.pt
|
| 64 |
-
- new_archive_6.pt
|
| 65 |
-
- archive_7.pt
|
| 66 |
-
- new_archive_7.pt
|
| 67 |
-
- archive_8.pt
|
| 68 |
-
- new_archive_8.pt
|
| 69 |
-
- archive_9.pt
|
| 70 |
-
- new_archive_9.pt
|
| 71 |
-
- archive_10.pt
|
| 72 |
-
- new_archive_10.pt
|
| 73 |
-
- archive_11.pt
|
| 74 |
-
- new_archive_11.pt
|
| 75 |
-
- archive_12.pt
|
| 76 |
-
- new_archive_12.pt
|
| 77 |
-
- archive_13.pt
|
| 78 |
-
- new_archive_13.pt
|
| 79 |
-
- archive_14.pt
|
| 80 |
-
- new_archive_14.pt
|
| 81 |
-
- archive_15.pt
|
| 82 |
-
- new_archive_15.pt
|
| 83 |
-
- archive_16.pt
|
| 84 |
-
- new_archive_16.pt
|
| 85 |
-
- archive_17.pt
|
| 86 |
-
- new_archive_17.pt
|
| 87 |
-
- archive_18.pt
|
| 88 |
-
- new_archive_18.pt
|
| 89 |
-
- archive_19.pt
|
| 90 |
-
- new_archive_19.pt
|
| 91 |
-
- archive_20.pt
|
| 92 |
-
- new_archive_20.pt
|
| 93 |
-
- archive_21.pt
|
| 94 |
-
- new_archive_21.pt
|
| 95 |
-
- archive_22.pt
|
| 96 |
-
- new_archive_22.pt
|
| 97 |
-
- archive_23.pt
|
| 98 |
-
- new_archive_23.pt
|
| 99 |
-
- archive_24.pt
|
| 100 |
-
- new_archive_24.pt
|
| 101 |
-
- archive_25.pt
|
| 102 |
-
- new_archive_25.pt
|
| 103 |
-
- archive_26.pt
|
| 104 |
-
- new_archive_26.pt
|
| 105 |
-
- archive_27.pt
|
| 106 |
-
- new_archive_27.pt
|
| 107 |
-
- archive_28.pt
|
| 108 |
-
- new_archive_28.pt
|
| 109 |
-
- archive_29.pt
|
| 110 |
-
- new_archive_29.pt
|
| 111 |
-
- archive_30.pt
|
| 112 |
-
- new_archive_30.pt
|
| 113 |
-
- archive_31.pt
|
| 114 |
-
- new_archive_31.pt
|
| 115 |
-
- archive_32.pt
|
| 116 |
-
- new_archive_32.pt
|
| 117 |
-
- archive_33.pt
|
| 118 |
-
- new_archive_33.pt
|
| 119 |
-
- archive_34.pt
|
| 120 |
-
- new_archive_34.pt
|
| 121 |
-
- archive_35.pt
|
| 122 |
-
- new_archive_35.pt
|
| 123 |
-
- archive_36.pt
|
| 124 |
-
- new_archive_36.pt
|
| 125 |
-
- archive_37.pt
|
| 126 |
-
- new_archive_37.pt
|
| 127 |
-
- archive_38.pt
|
| 128 |
-
- new_archive_38.pt
|
| 129 |
-
- archive_39.pt
|
| 130 |
-
- new_archive_39.pt
|
| 131 |
-
- archive_40.pt
|
| 132 |
-
- new_archive_40.pt
|
| 133 |
-
- archive_41.pt
|
| 134 |
-
- new_archive_41.pt
|
| 135 |
-
- archive_42.pt
|
| 136 |
-
- new_archive_42.pt
|
| 137 |
-
- archive_43.pt
|
| 138 |
-
- new_archive_43.pt
|
| 139 |
-
- archive_44.pt
|
| 140 |
-
- new_archive_44.pt
|
| 141 |
-
- archive_45.pt
|
| 142 |
-
- archive_46.pt
|
| 143 |
-
- archive_47.pt
|
| 144 |
-
- archive_48.pt
|
| 145 |
-
- archive_49.pt
|
| 146 |
-
- archive_50.pt
|
| 147 |
-
- archive_51.pt
|
| 148 |
-
- archive_52.pt
|
| 149 |
-
- archive_53.pt
|
| 150 |
-
- archive_54.pt
|
| 151 |
-
- archive_55.pt
|
| 152 |
-
- archive_56.pt
|
| 153 |
-
- archive_57.pt
|
| 154 |
-
- archive_58.pt
|
| 155 |
-
- archive_59.pt
|
| 156 |
-
- archive_60.pt
|
| 157 |
-
- archive_61.pt
|
| 158 |
-
- archive_106.pt
|
| 159 |
-
- archive_107.pt
|
| 160 |
-
val_data_path:
|
| 161 |
-
- archive_108.pt
|
| 162 |
-
- archive_109.pt
|
| 163 |
-
num_workers: 4
|
| 164 |
-
|
| 165 |
# Data dimensions (must match model)
|
| 166 |
ticks_per_sample: 64 # Number of ticks in each training sample
|
| 167 |
seq_len: 512 # Must match model.seq_len
|
|
|
|
| 23 |
alive_hidden_dim: 512 # Hidden dimension for alive prediction head
|
| 24 |
alive_hidden_layers: 1 # Number of hidden layers in alive prediction head
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
# Data configuration
|
| 27 |
data:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
# Data dimensions (must match model)
|
| 29 |
ticks_per_sample: 64 # Number of ticks in each training sample
|
| 30 |
seq_len: 512 # Must match model.seq_len
|