gary2oos commited on
Commit
69409ce
·
verified ·
1 Parent(s): 486b8e5

Update tfm_duel_fine-tuning.yaml

Browse files
Files changed (1) hide show
  1. tfm_duel_fine-tuning.yaml +0 -137
tfm_duel_fine-tuning.yaml CHANGED
@@ -24,145 +24,8 @@ model:
24
  duel_hidden_layers: 2 # Number of hidden layers in duel prediction head
25
  duel_player_embedding_dim: 64
26
 
27
- # Training configuration
28
- training:
29
- batch_size: 16
30
- grad_accum_steps: 4
31
- learning_rate_prediction_head: 0.00005
32
- learning_rate_embedder: 0
33
- learning_rate_processor: 0.00001
34
- weight_decay: 0.01
35
- num_epochs: 30
36
- warmup_steps: 1000
37
- max_grad_norm: 1.0
38
-
39
- scheduler: 'cosine' # 'cosine' or 'linear'
40
-
41
- base_model_path: '/share/guwanjun-local/cs2-demo-analytics/checkpoints_pretraining_v2/final.pth'
42
- checkpoint_dir: 'checkpoints_duel_fine-tuning_v2'
43
-
44
- from_scratch: False # Whether to train from scratch or fine-tune from a pre-trained model
45
-
46
- use_lora: False # Whether to use LoRA for fine-tuning
47
- lora_r: 8
48
- lora_alpha: 16
49
- lora_dropout: 0.1
50
-
51
  # Data configuration
52
  data:
53
- train_data_path:
54
- - archive_1.pt
55
- - new_archive_1.pt
56
- - archive_2.pt
57
- - new_archive_2.pt
58
- - archive_3.pt
59
- - new_archive_3.pt
60
- - archive_4.pt
61
- - new_archive_4.pt
62
- - new_archive_5.pt
63
- - archive_5.pt
64
- - archive_6.pt
65
- - new_archive_6.pt
66
- - archive_7.pt
67
- - new_archive_7.pt
68
- - archive_8.pt
69
- - new_archive_8.pt
70
- - archive_9.pt
71
- - new_archive_9.pt
72
- - archive_10.pt
73
- - new_archive_10.pt
74
- - archive_11.pt
75
- - new_archive_11.pt
76
- - archive_12.pt
77
- - new_archive_12.pt
78
- - archive_13.pt
79
- - new_archive_13.pt
80
- - archive_14.pt
81
- - new_archive_14.pt
82
- - archive_15.pt
83
- - new_archive_15.pt
84
- - archive_16.pt
85
- - new_archive_16.pt
86
- - archive_17.pt
87
- - new_archive_17.pt
88
- - archive_18.pt
89
- - new_archive_18.pt
90
- - archive_19.pt
91
- - new_archive_19.pt
92
- - archive_20.pt
93
- - new_archive_20.pt
94
- - archive_21.pt
95
- - new_archive_21.pt
96
- - archive_22.pt
97
- - new_archive_22.pt
98
- - archive_23.pt
99
- - new_archive_23.pt
100
- - archive_24.pt
101
- - new_archive_24.pt
102
- - archive_25.pt
103
- - new_archive_25.pt
104
- - archive_26.pt
105
- - new_archive_26.pt
106
- - archive_27.pt
107
- - new_archive_27.pt
108
- - archive_28.pt
109
- - new_archive_28.pt
110
- - archive_29.pt
111
- - new_archive_29.pt
112
- - archive_30.pt
113
- - new_archive_30.pt
114
- - archive_31.pt
115
- - new_archive_31.pt
116
- - archive_32.pt
117
- - new_archive_32.pt
118
- - archive_33.pt
119
- - new_archive_33.pt
120
- - archive_34.pt
121
- - new_archive_34.pt
122
- - archive_35.pt
123
- - new_archive_35.pt
124
- - archive_36.pt
125
- - new_archive_36.pt
126
- - archive_37.pt
127
- - new_archive_37.pt
128
- - archive_38.pt
129
- - new_archive_38.pt
130
- - archive_39.pt
131
- - new_archive_39.pt
132
- - archive_40.pt
133
- - new_archive_40.pt
134
- - archive_41.pt
135
- - new_archive_41.pt
136
- - archive_42.pt
137
- - new_archive_42.pt
138
- - archive_43.pt
139
- - new_archive_43.pt
140
- - archive_44.pt
141
- - new_archive_44.pt
142
- - archive_45.pt
143
- - archive_46.pt
144
- - archive_47.pt
145
- - archive_48.pt
146
- - archive_49.pt
147
- - archive_50.pt
148
- - archive_51.pt
149
- - archive_52.pt
150
- - archive_53.pt
151
- - archive_54.pt
152
- - archive_55.pt
153
- - archive_56.pt
154
- - archive_57.pt
155
- - archive_58.pt
156
- - archive_59.pt
157
- - archive_60.pt
158
- - archive_61.pt
159
- - archive_106.pt
160
- - archive_107.pt
161
- val_data_path:
162
- - archive_108.pt
163
- - archive_109.pt
164
- num_workers: 4
165
-
166
  # Data dimensions (must match model)
167
  ticks_per_sample: 64 # Number of ticks in each training sample
168
  seq_len: 512 # Must match model.seq_len
 
24
  duel_hidden_layers: 2 # Number of hidden layers in duel prediction head
25
  duel_player_embedding_dim: 64
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  # Data configuration
28
  data:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  # Data dimensions (must match model)
30
  ticks_per_sample: 64 # Number of ticks in each training sample
31
  seq_len: 512 # Must match model.seq_len