kyLELEng commited on
Commit
8f2fed2
·
verified ·
1 Parent(s): 8249d0b

Upload README.md with huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +62 -247
README.md CHANGED
@@ -12,254 +12,69 @@ library_name: pytorch
12
 
13
  # MAG7 TCN Sequence Model
14
 
15
- Public TCN sequence model trained on a private MAG7 daily ML dataset.
16
 
17
- The dataset remains private and is not distributed in this repo.
18
 
19
- ## Metrics
20
 
21
- ```json
22
- {
23
- "app_version": "mag7-tcn-a10g-large-v1",
24
- "dataset_repo": "kyLELEng/mag7-ml-daily-dataset-5y",
25
- "dataset_file": "mag7_ml_daily_dataset_5y.csv",
26
- "model_repo": "kyLELEng/mag7-tcn-sequence-model",
27
- "target_col": "target_next_10d_outperform_qqq",
28
- "hardware_requested": "a10g-large",
29
- "device": "cuda",
30
- "split_info": {
31
- "train_end": "2024-03-06",
32
- "validation_end": "2025-02-19",
33
- "test_start": "2025-02-20"
34
- },
35
- "sample_counts": {
36
- "train": 4606,
37
- "val": 1673,
38
- "test": 1673
39
- },
40
- "feature_count": 187,
41
- "lookback": 60,
42
- "best_trial": {
43
- "trial": 5,
44
- "hidden_channels": 96,
45
- "levels": 4,
46
- "kernel_size": 3,
47
- "dropout": 0.1381333485236117,
48
- "lr": 0.00011813006897892851,
49
- "weight_decay": 6.880481204463462e-05,
50
- "best_score": 0.5440268185410438,
51
- "epochs": 27,
52
- "val_loss": 1.1796759750161852,
53
- "val_auc": 0.5440268185410438,
54
- "val_accuracy": 0.543335325762104,
55
- "val_precision": 0.5676691729323309,
56
- "val_recall": 0.5194954128440367
57
- },
58
- "test_metrics": {
59
- "loss": 1.1004198278699602,
60
- "auc": 0.5051273914485346,
61
- "accuracy": 0.5086670651524208,
62
- "precision": 0.5013192612137203,
63
- "recall": 0.46116504854368934,
64
- "top20pct_avg_future_return": 0.00976730976253748,
65
- "bottom20pct_avg_future_return": 0.02428087405860424,
66
- "top_minus_bottom_future_return": -0.014513564296066761
67
- },
68
- "all_trials": [
69
- {
70
- "trial": 0,
71
- "hidden_channels": 64,
72
- "levels": 4,
73
- "kernel_size": 3,
74
- "dropout": 0.18,
75
- "lr": 0.0008,
76
- "weight_decay": 0.0001,
77
- "best_score": 0.5125545476411366,
78
- "epochs": 15,
79
- "val_loss": 0.8187018079417092,
80
- "val_auc": 0.5125545476411366,
81
- "val_accuracy": 0.5218170950388523,
82
- "val_precision": 0.5596026490066225,
83
- "val_recall": 0.3876146788990826
84
- },
85
- {
86
- "trial": 1,
87
- "hidden_channels": 128,
88
- "levels": 4,
89
- "kernel_size": 5,
90
- "dropout": 0.2894351363662022,
91
- "lr": 0.00015235342538756977,
92
- "weight_decay": 7.952411684214868e-06,
93
- "best_score": 0.5237847759108454,
94
- "epochs": 19,
95
- "val_loss": 0.9681848202432904,
96
- "val_auc": 0.5237847759108454,
97
- "val_accuracy": 0.5343693962940825,
98
- "val_precision": 0.5566382460414129,
99
- "val_recall": 0.5240825688073395
100
- },
101
- {
102
- "trial": 2,
103
- "hidden_channels": 128,
104
- "levels": 3,
105
- "kernel_size": 5,
106
- "dropout": 0.08142163223270518,
107
- "lr": 0.001570682160922224,
108
- "weight_decay": 0.0002461547871651075,
109
- "best_score": 0.5250647126871227,
110
- "epochs": 19,
111
- "val_loss": 1.1032832307474953,
112
- "val_auc": 0.5250647126871227,
113
- "val_accuracy": 0.5331739390316796,
114
- "val_precision": 0.5486631016042781,
115
- "val_recall": 0.588302752293578
116
- },
117
- {
118
- "trial": 3,
119
- "hidden_channels": 32,
120
- "levels": 4,
121
- "kernel_size": 5,
122
- "dropout": 0.15517491526720878,
123
- "lr": 0.0001711111456311903,
124
- "weight_decay": 2.16385879826713e-05,
125
- "best_score": 0.5232178240502124,
126
- "epochs": 13,
127
- "val_loss": 0.6918037618909564,
128
- "val_auc": 0.5232178240502124,
129
- "val_accuracy": 0.5062761506276151,
130
- "val_precision": 0.53125,
131
- "val_recall": 0.44839449541284404
132
- },
133
- {
134
- "trial": 4,
135
- "hidden_channels": 48,
136
- "levels": 4,
137
- "kernel_size": 3,
138
- "dropout": 0.22944428506011294,
139
- "lr": 0.0031070660232048945,
140
- "weight_decay": 0.00023877329777338924,
141
- "best_score": 0.5051548236722445,
142
- "epochs": 18,
143
- "val_loss": 1.028452375105449,
144
- "val_auc": 0.5051548236722445,
145
- "val_accuracy": 0.5212193664076509,
146
- "val_precision": 0.5361875637104995,
147
- "val_recall": 0.6032110091743119
148
- },
149
- {
150
- "trial": 5,
151
- "hidden_channels": 96,
152
- "levels": 4,
153
- "kernel_size": 3,
154
- "dropout": 0.1381333485236117,
155
- "lr": 0.00011813006897892851,
156
- "weight_decay": 6.880481204463462e-05,
157
- "best_score": 0.5440268185410438,
158
- "epochs": 27,
159
- "val_loss": 1.1796759750161852,
160
- "val_auc": 0.5440268185410438,
161
- "val_accuracy": 0.543335325762104,
162
- "val_precision": 0.5676691729323309,
163
- "val_recall": 0.5194954128440367
164
- },
165
- {
166
- "trial": 6,
167
- "hidden_channels": 128,
168
- "levels": 3,
169
- "kernel_size": 2,
170
- "dropout": 0.08963367526887096,
171
- "lr": 0.00047349019976111593,
172
- "weight_decay": 2.503906317853087e-05,
173
- "best_score": 0.5291278676883253,
174
- "epochs": 31,
175
- "val_loss": 1.7204655834606715,
176
- "val_auc": 0.5291278676883253,
177
- "val_accuracy": 0.5242080095636581,
178
- "val_precision": 0.55,
179
- "val_recall": 0.4793577981651376
180
- },
181
- {
182
- "trial": 7,
183
- "hidden_channels": 128,
184
- "levels": 5,
185
- "kernel_size": 5,
186
- "dropout": 0.21881176458186874,
187
- "lr": 0.0004412501237233734,
188
- "weight_decay": 5.527703833149022e-06,
189
- "best_score": 0.5276761273179168,
190
- "epochs": 16,
191
- "val_loss": 0.8110667296818325,
192
- "val_auc": 0.5276761273179168,
193
- "val_accuracy": 0.5098625224148237,
194
- "val_precision": 0.521630615640599,
195
- "val_recall": 0.7190366972477065
196
- },
197
- {
198
- "trial": 8,
199
- "hidden_channels": 96,
200
- "levels": 5,
201
- "kernel_size": 2,
202
- "dropout": 0.13194857887603387,
203
- "lr": 0.0009472281705883164,
204
- "weight_decay": 3.997791791749733e-06,
205
- "best_score": 0.5203902232301367,
206
- "epochs": 17,
207
- "val_loss": 1.0085366197994776,
208
- "val_auc": 0.5203902232301367,
209
- "val_accuracy": 0.5146443514644351,
210
- "val_precision": 0.5251677852348994,
211
- "val_recall": 0.7178899082568807
212
- },
213
- {
214
- "trial": 9,
215
- "hidden_channels": 96,
216
- "levels": 4,
217
- "kernel_size": 3,
218
- "dropout": 0.30411288704647127,
219
- "lr": 0.0001155004929570295,
220
- "weight_decay": 6.350353678482887e-06,
221
- "best_score": 0.5126612090391598,
222
- "epochs": 13,
223
- "val_loss": 0.6987957826682499,
224
- "val_auc": 0.5126612090391598,
225
- "val_accuracy": 0.4955170352659892,
226
- "val_precision": 0.5180878552971576,
227
- "val_recall": 0.4598623853211009
228
- },
229
- {
230
- "trial": 10,
231
- "hidden_channels": 32,
232
- "levels": 5,
233
- "kernel_size": 5,
234
- "dropout": 0.21764351866447423,
235
- "lr": 0.0017384228382429352,
236
- "weight_decay": 8.301403060986722e-05,
237
- "best_score": 0.5219650895096725,
238
- "epochs": 17,
239
- "val_loss": 0.8541087976523808,
240
- "val_auc": 0.5219650895096725,
241
- "val_accuracy": 0.5194261805140467,
242
- "val_precision": 0.5374449339207048,
243
- "val_recall": 0.5596330275229358
244
- },
245
- {
246
- "trial": 11,
247
- "hidden_channels": 32,
248
- "levels": 5,
249
- "kernel_size": 3,
250
- "dropout": 0.22610883177165192,
251
- "lr": 0.00046048215182379454,
252
- "weight_decay": 0.00041116688533911693,
253
- "best_score": 0.5061920878718117,
254
- "epochs": 13,
255
- "val_loss": 0.692179513829095,
256
- "val_auc": 0.5061920878718117,
257
- "val_accuracy": 0.5146443514644351,
258
- "val_precision": 0.5903614457831325,
259
- "val_recall": 0.22477064220183487
260
- }
261
- ]
262
- }
263
- ```
264
 
265
- Research only. Not investment advice.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  # MAG7 TCN Sequence Model
14
 
15
+ Public Temporal Convolutional Network classifier trained on a private MAG7 daily ML dataset.
16
 
17
+ ## Data Access
18
 
19
+ The model is public. The dataset remains private and is not distributed in this repository.
20
 
21
+ - Private training dataset: `kyLELEng/mag7-ml-daily-dataset-5y`
22
+ - Dataset file used internally: `mag7_ml_daily_dataset_5y.csv`
23
+ - No raw dataset rows, target labels, or future-return columns are included here.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ ## Training Setup
26
+
27
+ - Task: binary classification
28
+ - Target: `target_next_10d_outperform_qqq`
29
+ - Architecture: TCN sequence model
30
+ - Lookback window: 60 daily bars
31
+ - Feature count: 187 selected numeric features plus ticker dummies
32
+ - Split: chronological train / validation / test
33
+ - Train end: 2024-03-06
34
+ - Validation end: 2025-02-19
35
+ - Test start: 2025-02-20
36
+ - Hardware: Hugging Face Jobs, `a10g-large`
37
+ - Device used: CUDA
38
+
39
+ ## Results
40
+
41
+ Validation was used for hyperparameter selection. Test metrics are out-of-sample on the final chronological holdout.
42
+
43
+ | Metric | Value |
44
+ |---|---:|
45
+ | Best validation AUC | 0.5440 |
46
+ | Test AUC | 0.5051 |
47
+ | Test accuracy | 0.5087 |
48
+ | Test precision | 0.5013 |
49
+ | Test recall | 0.4612 |
50
+ | Top 20 pct avg future return | 0.0098 |
51
+ | Bottom 20 pct avg future return | 0.0243 |
52
+ | Top minus bottom future return | -0.0145 |
53
+
54
+ The holdout edge is weak. Treat this as a research artifact, not as a production trading signal.
55
+
56
+ ## Best Trial
57
+
58
+ - Hidden channels: 96
59
+ - Levels: 4
60
+ - Kernel size: 3
61
+ - Dropout: 0.1381
62
+ - Learning rate: 0.000118
63
+ - Weight decay: 0.0000688
64
+ - Epochs: 27
65
+
66
+ ## Files
67
+
68
+ - `model.pt`
69
+ - `scaler.joblib`
70
+ - `feature_columns.json`
71
+ - `metrics.json`
72
+ - `training_log.csv`
73
+ - `trial_results.csv`
74
+ - `test_scores_public.csv`
75
+
76
+ `test_scores_public.csv` contains dates, tickers, and model scores only. It intentionally excludes labels and future returns.
77
+
78
+ ## Intended Use
79
+
80
+ This model is for personal research and model comparison on MAG7 daily technical features. It is not investment advice and should not be used for live trading without separate validation, walk-forward testing, slippage assumptions, and risk controls.