gary2oos commited on
Commit
486b8e5
·
verified ·
1 Parent(s): 5f2351a

Update tfm_alive_fine-tuning.yaml

Browse files
Files changed (1) hide show
  1. tfm_alive_fine-tuning.yaml +0 -137
tfm_alive_fine-tuning.yaml CHANGED
@@ -23,145 +23,8 @@ model:
23
  alive_hidden_dim: 512 # Hidden dimension for alive prediction head
24
  alive_hidden_layers: 1 # Number of hidden layers in alive prediction head
25
 
26
- # Training configuration
27
- training:
28
- batch_size: 16
29
- grad_accum_steps: 4
30
- learning_rate_prediction_head: 0.00005
31
- learning_rate_embedder: 0
32
- learning_rate_processor: 0.00001
33
- weight_decay: 0.01
34
- num_epochs: 30
35
- warmup_steps: 1000
36
- max_grad_norm: 1.0
37
-
38
- scheduler: 'cosine' # 'cosine' or 'linear'
39
-
40
- base_model_path: '/share/guwanjun-local/cs2-demo-analytics/checkpoints_pretraining_v2/final.pth'
41
- checkpoint_dir: 'checkpoints_alive_fine-tuning_v2'
42
-
43
- from_scratch: False # Whether to train from scratch or fine-tune from a pre-trained model
44
-
45
- use_lora: False # Whether to use LoRA for fine-tuning
46
- lora_r: 8
47
- lora_alpha: 16
48
- lora_dropout: 0.1
49
-
50
  # Data configuration
51
  data:
52
- train_data_path:
53
- - archive_1.pt
54
- - new_archive_1.pt
55
- - archive_2.pt
56
- - new_archive_2.pt
57
- - archive_3.pt
58
- - new_archive_3.pt
59
- - archive_4.pt
60
- - new_archive_4.pt
61
- - new_archive_5.pt
62
- - archive_5.pt
63
- - archive_6.pt
64
- - new_archive_6.pt
65
- - archive_7.pt
66
- - new_archive_7.pt
67
- - archive_8.pt
68
- - new_archive_8.pt
69
- - archive_9.pt
70
- - new_archive_9.pt
71
- - archive_10.pt
72
- - new_archive_10.pt
73
- - archive_11.pt
74
- - new_archive_11.pt
75
- - archive_12.pt
76
- - new_archive_12.pt
77
- - archive_13.pt
78
- - new_archive_13.pt
79
- - archive_14.pt
80
- - new_archive_14.pt
81
- - archive_15.pt
82
- - new_archive_15.pt
83
- - archive_16.pt
84
- - new_archive_16.pt
85
- - archive_17.pt
86
- - new_archive_17.pt
87
- - archive_18.pt
88
- - new_archive_18.pt
89
- - archive_19.pt
90
- - new_archive_19.pt
91
- - archive_20.pt
92
- - new_archive_20.pt
93
- - archive_21.pt
94
- - new_archive_21.pt
95
- - archive_22.pt
96
- - new_archive_22.pt
97
- - archive_23.pt
98
- - new_archive_23.pt
99
- - archive_24.pt
100
- - new_archive_24.pt
101
- - archive_25.pt
102
- - new_archive_25.pt
103
- - archive_26.pt
104
- - new_archive_26.pt
105
- - archive_27.pt
106
- - new_archive_27.pt
107
- - archive_28.pt
108
- - new_archive_28.pt
109
- - archive_29.pt
110
- - new_archive_29.pt
111
- - archive_30.pt
112
- - new_archive_30.pt
113
- - archive_31.pt
114
- - new_archive_31.pt
115
- - archive_32.pt
116
- - new_archive_32.pt
117
- - archive_33.pt
118
- - new_archive_33.pt
119
- - archive_34.pt
120
- - new_archive_34.pt
121
- - archive_35.pt
122
- - new_archive_35.pt
123
- - archive_36.pt
124
- - new_archive_36.pt
125
- - archive_37.pt
126
- - new_archive_37.pt
127
- - archive_38.pt
128
- - new_archive_38.pt
129
- - archive_39.pt
130
- - new_archive_39.pt
131
- - archive_40.pt
132
- - new_archive_40.pt
133
- - archive_41.pt
134
- - new_archive_41.pt
135
- - archive_42.pt
136
- - new_archive_42.pt
137
- - archive_43.pt
138
- - new_archive_43.pt
139
- - archive_44.pt
140
- - new_archive_44.pt
141
- - archive_45.pt
142
- - archive_46.pt
143
- - archive_47.pt
144
- - archive_48.pt
145
- - archive_49.pt
146
- - archive_50.pt
147
- - archive_51.pt
148
- - archive_52.pt
149
- - archive_53.pt
150
- - archive_54.pt
151
- - archive_55.pt
152
- - archive_56.pt
153
- - archive_57.pt
154
- - archive_58.pt
155
- - archive_59.pt
156
- - archive_60.pt
157
- - archive_61.pt
158
- - archive_106.pt
159
- - archive_107.pt
160
- val_data_path:
161
- - archive_108.pt
162
- - archive_109.pt
163
- num_workers: 4
164
-
165
  # Data dimensions (must match model)
166
  ticks_per_sample: 64 # Number of ticks in each training sample
167
  seq_len: 512 # Must match model.seq_len
 
23
  alive_hidden_dim: 512 # Hidden dimension for alive prediction head
24
  alive_hidden_layers: 1 # Number of hidden layers in alive prediction head
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  # Data configuration
27
  data:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  # Data dimensions (must match model)
29
  ticks_per_sample: 64 # Number of ticks in each training sample
30
  seq_len: 512 # Must match model.seq_len