kyLELEng commited on
Commit
54d9eae
·
verified ·
1 Parent(s): 8f77c32

Upload README.md with huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +57 -273
README.md CHANGED
@@ -14,279 +14,63 @@ library_name: pytorch
14
 
15
  Public optimized TabNet classifier trained on a private MAG7 daily ML dataset.
16
 
17
- The dataset remains private and is not distributed in this repo.
18
 
19
- ## Metrics
20
 
21
- ```json
22
- {
23
- "app_version": "mag7-tabnet-a10g-large-v1",
24
- "dataset_repo": "kyLELEng/mag7-ml-daily-dataset-5y",
25
- "dataset_file": "mag7_ml_daily_dataset_5y.csv",
26
- "model_repo": "kyLELEng/mag7-tabnet-model",
27
- "target_col": "target_next_10d_outperform_qqq",
28
- "hardware_requested": "a10g-large",
29
- "device": "cuda",
30
- "split_info": {
31
- "train_end": "2024-03-06",
32
- "validation_end": "2025-02-19",
33
- "test_start": "2025-02-20"
34
- },
35
- "sample_counts": {
36
- "train": 5019,
37
- "val": 1673,
38
- "test": 1673
39
- },
40
- "feature_count": 247,
41
- "best_trial": {
42
- "trial": 6,
43
- "n_d": 32,
44
- "n_a": 32,
45
- "n_steps": 5,
46
- "gamma": 1.3472923985423138,
47
- "lambda_sparse": 5.9536858055136465e-05,
48
- "lr": 0.00025119563811957976,
49
- "mask_type": "entmax",
50
- "auc": 0.5403767080140649,
51
- "accuracy": 0.5092647937836222,
52
- "precision": 0.624390243902439,
53
- "recall": 0.14678899082568808,
54
- "best_epoch": 2,
55
- "best_cost": 0.5403767080140649
56
- },
57
- "test_metrics": {
58
- "auc": 0.503700813063913,
59
- "accuracy": 0.5008965929468021,
60
- "precision": 0.48014440433212996,
61
- "recall": 0.16140776699029127
62
- },
63
- "all_trials": [
64
- {
65
- "trial": 0,
66
- "n_d": 32,
67
- "n_a": 32,
68
- "n_steps": 4,
69
- "gamma": 1.4,
70
- "lambda_sparse": 0.0001,
71
- "lr": 0.002,
72
- "mask_type": "entmax",
73
- "auc": 0.5317779381277989,
74
- "accuracy": 0.5349671249252839,
75
- "precision": 0.54510556621881,
76
- "recall": 0.6513761467889908,
77
- "best_epoch": 10,
78
- "best_cost": 0.5317779381277989
79
- },
80
- {
81
- "trial": 1,
82
- "n_d": 16,
83
- "n_a": 16,
84
- "n_steps": 3,
85
- "gamma": 1.4494945037080806,
86
- "lambda_sparse": 6.375218482218559e-05,
87
- "lr": 0.00011188485779759972,
88
- "mask_type": "entmax",
89
- "auc": 0.5107234076670217,
90
- "accuracy": 0.5265989240884639,
91
- "precision": 0.5384615384615384,
92
- "recall": 0.6422018348623854,
93
- "best_epoch": 2,
94
- "best_cost": 0.5107234076670217
95
- },
96
- {
97
- "trial": 2,
98
- "n_d": 16,
99
- "n_a": 16,
100
- "n_steps": 4,
101
- "gamma": 1.149294403307938,
102
- "lambda_sparse": 2.450878873326482e-06,
103
- "lr": 0.004094099286627188,
104
- "mask_type": "sparsemax",
105
- "auc": 0.500190415650162,
106
- "accuracy": 0.49731022115959356,
107
- "precision": 0.5166130760986066,
108
- "recall": 0.5527522935779816,
109
- "best_epoch": 7,
110
- "best_cost": 0.500190415650162
111
- },
112
- {
113
- "trial": 3,
114
- "n_d": 64,
115
- "n_a": 64,
116
- "n_steps": 4,
117
- "gamma": 1.3582951866108537,
118
- "lambda_sparse": 3.421133844935391e-05,
119
- "lr": 0.0013391616977365564,
120
- "mask_type": "sparsemax",
121
- "auc": 0.5071226906733556,
122
- "accuracy": 0.5200239091452481,
123
- "precision": 0.5306122448979592,
124
- "recall": 0.6857798165137615,
125
- "best_epoch": 20,
126
- "best_cost": 0.5071226906733556
127
- },
128
- {
129
- "trial": 4,
130
- "n_d": 24,
131
- "n_a": 24,
132
- "n_steps": 5,
133
- "gamma": 1.6516277161527944,
134
- "lambda_sparse": 0.00010258450148297295,
135
- "lr": 0.0007431045712850257,
136
- "mask_type": "entmax",
137
- "auc": 0.5032370660527551,
138
- "accuracy": 0.5122534369396294,
139
- "precision": 0.5347394540942928,
140
- "recall": 0.49426605504587157,
141
- "best_epoch": 1,
142
- "best_cost": 0.5032370660527551
143
- },
144
- {
145
- "trial": 5,
146
- "n_d": 64,
147
- "n_a": 64,
148
- "n_steps": 5,
149
- "gamma": 1.4843526882090186,
150
- "lambda_sparse": 0.0008764777947850791,
151
- "lr": 0.00022267224321802473,
152
- "mask_type": "entmax",
153
- "auc": 0.530329061150626,
154
- "accuracy": 0.4931261207411835,
155
- "precision": 0.5214285714285715,
156
- "recall": 0.3348623853211009,
157
- "best_epoch": 1,
158
- "best_cost": 0.530329061150626
159
- },
160
- {
161
- "trial": 6,
162
- "n_d": 32,
163
- "n_a": 32,
164
- "n_steps": 5,
165
- "gamma": 1.3472923985423138,
166
- "lambda_sparse": 5.9536858055136465e-05,
167
- "lr": 0.00025119563811957976,
168
- "mask_type": "entmax",
169
- "auc": 0.5403767080140649,
170
- "accuracy": 0.5092647937836222,
171
- "precision": 0.624390243902439,
172
- "recall": 0.14678899082568808,
173
- "best_epoch": 2,
174
- "best_cost": 0.5403767080140649
175
- },
176
- {
177
- "trial": 7,
178
- "n_d": 32,
179
- "n_a": 32,
180
- "n_steps": 5,
181
- "gamma": 1.7071334862896903,
182
- "lambda_sparse": 2.433767087272514e-06,
183
- "lr": 0.0006223353226297901,
184
- "mask_type": "sparsemax",
185
- "auc": 0.5347272331603844,
186
- "accuracy": 0.511655708308428,
187
- "precision": 0.5429017160686428,
188
- "recall": 0.39908256880733944,
189
- "best_epoch": 3,
190
- "best_cost": 0.5347272331603844
191
- },
192
- {
193
- "trial": 8,
194
- "n_d": 24,
195
- "n_a": 24,
196
- "n_steps": 3,
197
- "gamma": 1.7271610157752573,
198
- "lambda_sparse": 1.9491536528703556e-05,
199
- "lr": 0.00017826874616773067,
200
- "mask_type": "entmax",
201
- "auc": 0.5325689505091113,
202
- "accuracy": 0.5289898386132695,
203
- "precision": 0.541095890410959,
204
- "recall": 0.6341743119266054,
205
- "best_epoch": 7,
206
- "best_cost": 0.5325689505091113
207
- },
208
- {
209
- "trial": 9,
210
- "n_d": 64,
211
- "n_a": 64,
212
- "n_steps": 5,
213
- "gamma": 1.2415512194516267,
214
- "lambda_sparse": 0.0005061663106438996,
215
- "lr": 0.00023396561610545004,
216
- "mask_type": "sparsemax",
217
- "auc": 0.5177659233297828,
218
- "accuracy": 0.5014943215780036,
219
- "precision": 0.5372549019607843,
220
- "recall": 0.31422018348623854,
221
- "best_epoch": 2,
222
- "best_cost": 0.5177659233297828
223
- },
224
- {
225
- "trial": 10,
226
- "n_d": 16,
227
- "n_a": 16,
228
- "n_steps": 3,
229
- "gamma": 1.3420235116289478,
230
- "lambda_sparse": 2.5510824218092704e-05,
231
- "lr": 0.0034707903086838303,
232
- "mask_type": "entmax",
233
- "auc": 0.5203859281402834,
234
- "accuracy": 0.4793783622235505,
235
- "precision": 0.5068493150684932,
236
- "recall": 0.04243119266055046,
237
- "best_epoch": 2,
238
- "best_cost": 0.5203859281402834
239
- },
240
- {
241
- "trial": 11,
242
- "n_d": 48,
243
- "n_a": 48,
244
- "n_steps": 5,
245
- "gamma": 1.3375244625197118,
246
- "lambda_sparse": 1.1236515243495598e-06,
247
- "lr": 0.0001869391570285696,
248
- "mask_type": "entmax",
249
- "auc": 0.5245679139607601,
250
- "accuracy": 0.5062761506276151,
251
- "precision": 0.5515695067264574,
252
- "recall": 0.28211009174311924,
253
- "best_epoch": 12,
254
- "best_cost": 0.5245679139607601
255
- },
256
- {
257
- "trial": 12,
258
- "n_d": 64,
259
- "n_a": 64,
260
- "n_steps": 3,
261
- "gamma": 1.5837279414231316,
262
- "lambda_sparse": 1.4588283152733528e-06,
263
- "lr": 0.0001142576991362432,
264
- "mask_type": "entmax",
265
- "auc": 0.5068657011304677,
266
- "accuracy": 0.5176329946204423,
267
- "precision": 0.5301764159702879,
268
- "recall": 0.6548165137614679,
269
- "best_epoch": 6,
270
- "best_cost": 0.5068657011304677
271
- },
272
- {
273
- "trial": 13,
274
- "n_d": 16,
275
- "n_a": 16,
276
- "n_steps": 5,
277
- "gamma": 1.5115173584668025,
278
- "lambda_sparse": 8.435857396344958e-06,
279
- "lr": 0.0003463700123384165,
280
- "mask_type": "entmax",
281
- "auc": 0.5253324399546438,
282
- "accuracy": 0.5182307232516438,
283
- "precision": 0.5355603448275862,
284
- "recall": 0.569954128440367,
285
- "best_epoch": 6,
286
- "best_cost": 0.5253324399546438
287
- }
288
- ]
289
- }
290
- ```
291
 
292
- Research only. Not investment advice.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  Public optimized TabNet classifier trained on a private MAG7 daily ML dataset.
16
 
17
+ ## Data Access
18
 
19
+ The model is public. The dataset remains private and is not distributed in this repository.
20
 
21
+ - Private training dataset: `kyLELEng/mag7-ml-daily-dataset-5y`
22
+ - Dataset file used internally: `mag7_ml_daily_dataset_5y.csv`
23
+ - No raw dataset rows, target labels, or future-return columns are included here.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ ## Training Setup
26
+
27
+ - Task: binary classification
28
+ - Target: `target_next_10d_outperform_qqq`
29
+ - Architecture: TabNet classifier
30
+ - Feature count: 247 selected numeric features plus ticker dummies
31
+ - Split: chronological train / validation / test
32
+ - Train end: 2024-03-06
33
+ - Validation end: 2025-02-19
34
+ - Test start: 2025-02-20
35
+ - Hardware: Hugging Face Jobs, `a10g-large`
36
+ - Device used: CUDA
37
+
38
+ ## Results
39
+
40
+ Validation was used for hyperparameter selection. Test metrics are out-of-sample on the final chronological holdout.
41
+
42
+ | Metric | Value |
43
+ |---|---:|
44
+ | Best validation AUC | 0.5404 |
45
+ | Test AUC | 0.5037 |
46
+ | Test accuracy | 0.5009 |
47
+ | Test precision | 0.4801 |
48
+ | Test recall | 0.1614 |
49
+
50
+ The holdout edge is weak. Treat this as a research artifact, not as a production trading signal.
51
+
52
+ ## Best Trial
53
+
54
+ - `n_d`: 32
55
+ - `n_a`: 32
56
+ - `n_steps`: 5
57
+ - `gamma`: 1.3473
58
+ - `lambda_sparse`: 0.0000595
59
+ - Learning rate: 0.000251
60
+ - Mask type: `entmax`
61
+ - Best epoch: 2
62
+
63
+ ## Files
64
+
65
+ - `tabnet_model.zip`
66
+ - `scaler.joblib`
67
+ - `feature_columns.json`
68
+ - `metrics.json`
69
+ - `trial_results.csv`
70
+ - `test_scores_public.csv`
71
+
72
+ `test_scores_public.csv` contains dates, tickers, and model scores only. It intentionally excludes labels and future returns.
73
+
74
+ ## Intended Use
75
+
76
+ This model is for personal research and model comparison on MAG7 daily technical features. It is not investment advice and should not be used for live trading without separate validation, walk-forward testing, slippage assumptions, and risk controls.