luohoa97 commited on
Commit
34e94cf
·
verified ·
1 Parent(s): 68e57b2

Deploy BitNet-Transformer Trainer

Browse files
scripts/generate_ai_dataset.py CHANGED
@@ -2,6 +2,7 @@
2
  """
3
  Generate training dataset for AI Fusion strategy.
4
  Fetches historical OHLCV, computes technical features, and labels data.
 
5
  """
6
 
7
  import sys
@@ -10,7 +11,7 @@ import pandas as pd
10
  import numpy as np
11
  import logging
12
  import torch
13
- from datetime import datetime, timedelta
14
 
15
  # Add project root to path
16
  sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
@@ -35,18 +36,17 @@ SYMBOLS = [
35
  "JNJ", "PFE", "UNH", "ABBV", "MRK", "LLY", "TMO", "DHR", "ISRG", "GILD",
36
  "WMT", "COST", "HD", "LOW", "TGT", "PG", "KO", "PEP", "PM", "MO",
37
  "CAT", "DE", "HON", "GE", "MMM", "UPS", "FDX", "RTX", "LMT", "GD",
38
- "BTC-USD", "ETH-USD", "GC=F", "CL=F" # Crypto and Commodities for diversity
39
  ]
40
  DAYS = 3652 # 10 years
41
  LOOKAHEAD = 5 # Prediction window (days)
42
  TARGET_PCT = 0.02 # Profit target (2%)
43
  STOP_PCT = 0.015 # Stop loss (1.5%)
 
44
 
45
  def generate_features(df):
46
  """Compute technical indicators for the feature vector."""
47
  close = df["close" if "close" in df.columns else "Close"]
48
- high = df["high" if "high" in df.columns else "High"]
49
- low = df["low" if "low" in df.columns else "Low"]
50
 
51
  # 1. RSI(2) - Very short period
52
  rsi2 = calculate_rsi(close, 2) / 100.0
@@ -69,10 +69,10 @@ def generate_features(df):
69
  atr = calculate_atr(df, 14)
70
  atr_pct = atr / close
71
 
72
- # 6. Volume spike (Ratio to SMA 20)
73
  vol = df["volume" if "volume" in df.columns else "Volume"]
74
  vol_sma = vol.rolling(20).mean()
75
- vol_ratio = (vol / vol_sma).clip(0, 5) / 5.0 # Normalized 0-1
76
 
77
  features = pd.DataFrame({
78
  "rsi2": rsi2,
@@ -85,23 +85,18 @@ def generate_features(df):
85
  "vol_ratio": vol_ratio,
86
  }, index=df.index)
87
 
88
- # Ensure all columns are 1D (should be Series already after flatten in market.py)
89
- for col in features.columns:
90
- if isinstance(features[col], pd.DataFrame):
91
- features[col] = features[col].squeeze()
92
-
93
- return features
94
 
95
  def generate_labels(df):
96
- """Label data using Triple Barrier: 1=Buy, 2=Sell, 0=Hold."""
97
  close = df["close" if "close" in df.columns else "Close"].values
98
  labels = np.zeros(len(close))
 
99
 
100
  for i in range(len(close) - LOOKAHEAD):
101
  current_price = close[i]
102
  future_prices = close[i+1 : i+LOOKAHEAD+1]
103
 
104
- # Look ahead for profit target or stop loss
105
  max_ret = (np.max(future_prices) - current_price) / current_price
106
  min_ret = (np.min(future_prices) - current_price) / current_price
107
 
@@ -112,69 +107,72 @@ def generate_labels(df):
112
  else:
113
  labels[i] = 0 # HOLD
114
 
115
- return labels
116
-
117
- SEQ_LEN = 30 # One month of trading days
118
 
119
  def build_dataset(symbols=SYMBOLS, days=DAYS, output_path="data/trading_dataset.pt"):
120
- """
121
- Programmatically build the sequence dataset.
122
- Used by local scripts and the Hugging Face Cloud trainer.
123
- """
124
- all_features = []
125
- all_labels = []
126
 
127
- for symbol in symbols:
128
- logger.info("Fetching data for %s", symbol)
129
- df = fetch_ohlcv_yfinance(symbol, days=days)
130
- total_days = len(df)
131
- if df.empty or total_days < (days // 2): # Ensure we have enough data
132
- logger.warning("Skipping %s: Insufficient history (%d < %d)", symbol, total_days, days // 2)
133
- continue
 
134
 
135
- features = generate_features(df)
136
- labels = generate_labels(df)
137
-
138
- # Sentiment simulation
139
- sentiment = np.random.normal(0, 0.2, len(features))
140
- features["sentiment"] = sentiment
141
-
142
- # Combine and drop NaN
143
- features["label"] = labels
144
- features = features.dropna()
145
-
146
- if len(features) < (SEQ_LEN + 100):
147
- logger.warning("Skipping %s: Too few valid samples after dropna (%d < %d)", symbol, len(features), SEQ_LEN + 100)
148
- continue
149
 
150
- # Create sequences
151
- feat_vals = features.drop(columns=["label"]).values
152
- label_vals = features["label"].values
153
-
154
- symbol_features = []
155
- symbol_labels = []
156
-
157
- for i in range(len(feat_vals) - SEQ_LEN):
158
- # Window of features: [i : i + SEQ_LEN]
159
- # Label is for the LAST day in the window
160
- symbol_features.append(feat_vals[i : i+SEQ_LEN])
161
- symbol_labels.append(label_vals[i+SEQ_LEN-1])
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
- all_features.append(np.array(symbol_features))
164
- all_labels.append(np.array(symbol_labels))
 
165
 
166
- X = np.concatenate(all_features, axis=0)
167
- y = np.concatenate(all_labels, axis=0)
 
168
 
169
- # Save as PyTorch dataset
170
  data = {
171
  "X": torch.tensor(X, dtype=torch.float32),
172
- "y": torch.tensor(y, dtype=torch.long)
 
 
173
  }
174
 
175
  os.makedirs(os.path.dirname(output_path), exist_ok=True)
176
  torch.save(data, output_path)
177
- logger.info("Sequence dataset saved to %s. Shape: %s", output_path, X.shape)
178
  return data
179
 
180
  if __name__ == "__main__":
 
2
  """
3
  Generate training dataset for AI Fusion strategy.
4
  Fetches historical OHLCV, computes technical features, and labels data.
5
+ Includes future returns for Profit/Loss backtesting.
6
  """
7
 
8
  import sys
 
11
  import numpy as np
12
  import logging
13
  import torch
14
+ from tqdm.auto import tqdm
15
 
16
  # Add project root to path
17
  sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 
36
  "JNJ", "PFE", "UNH", "ABBV", "MRK", "LLY", "TMO", "DHR", "ISRG", "GILD",
37
  "WMT", "COST", "HD", "LOW", "TGT", "PG", "KO", "PEP", "PM", "MO",
38
  "CAT", "DE", "HON", "GE", "MMM", "UPS", "FDX", "RTX", "LMT", "GD",
39
+ "BTC-USD", "ETH-USD", "GC=F", "CL=F"
40
  ]
41
  DAYS = 3652 # 10 years
42
  LOOKAHEAD = 5 # Prediction window (days)
43
  TARGET_PCT = 0.02 # Profit target (2%)
44
  STOP_PCT = 0.015 # Stop loss (1.5%)
45
+ SEQ_LEN = 30 # One month of trading days
46
 
47
  def generate_features(df):
48
  """Compute technical indicators for the feature vector."""
49
  close = df["close" if "close" in df.columns else "Close"]
 
 
50
 
51
  # 1. RSI(2) - Very short period
52
  rsi2 = calculate_rsi(close, 2) / 100.0
 
69
  atr = calculate_atr(df, 14)
70
  atr_pct = atr / close
71
 
72
+ # 6. Volume spike
73
  vol = df["volume" if "volume" in df.columns else "Volume"]
74
  vol_sma = vol.rolling(20).mean()
75
+ vol_ratio = (vol / vol_sma).clip(0, 5) / 5.0
76
 
77
  features = pd.DataFrame({
78
  "rsi2": rsi2,
 
85
  "vol_ratio": vol_ratio,
86
  }, index=df.index)
87
 
88
+ return features.dropna()
 
 
 
 
 
89
 
90
  def generate_labels(df):
91
+ """Label data using Triple Barrier and calculate future returns."""
92
  close = df["close" if "close" in df.columns else "Close"].values
93
  labels = np.zeros(len(close))
94
+ future_rets = np.zeros(len(close))
95
 
96
  for i in range(len(close) - LOOKAHEAD):
97
  current_price = close[i]
98
  future_prices = close[i+1 : i+LOOKAHEAD+1]
99
 
 
100
  max_ret = (np.max(future_prices) - current_price) / current_price
101
  min_ret = (np.min(future_prices) - current_price) / current_price
102
 
 
107
  else:
108
  labels[i] = 0 # HOLD
109
 
110
+ future_rets[i] = (close[i + LOOKAHEAD] - current_price) / current_price
111
+
112
+ return labels, future_rets
113
 
114
  def build_dataset(symbols=SYMBOLS, days=DAYS, output_path="data/trading_dataset.pt"):
115
+ """Fetch, label, and sequence data for all symbols."""
116
+ all_X, all_y, all_rets = [], [], []
 
 
 
 
117
 
118
+ for symbol in tqdm(symbols, desc="Building Global Dataset"):
119
+ try:
120
+ df = fetch_ohlcv_yfinance(symbol, days=days)
121
+ if len(df) < (SEQ_LEN + LOOKAHEAD + 50):
122
+ continue
123
+
124
+ features = generate_features(df)
125
+ labels, rets = generate_labels(df)
126
 
127
+ # Align features with labels/rets and add sentiment
128
+ df_aligned = pd.DataFrame(index=df.index)
129
+ df_aligned["label"] = labels
130
+ df_aligned["future_ret"] = rets
131
+ df_aligned["sentiment"] = np.random.normal(0, 0.2, len(df))
 
 
 
 
 
 
 
 
 
132
 
133
+ # Merge features
134
+ df_combined = features.join(df_aligned, how="inner").dropna()
135
+
136
+ if len(df_combined) < SEQ_LEN:
137
+ continue
138
+
139
+ feat_vals = df_combined.drop(columns=["label", "future_ret"]).values
140
+ label_vals = df_combined["label"].values.astype(int)
141
+ ret_vals = df_combined["future_ret"].values
142
+
143
+ symbol_X, symbol_y, symbol_rets = [], [], []
144
+ for i in range(len(feat_vals) - SEQ_LEN):
145
+ symbol_X.append(feat_vals[i : i+SEQ_LEN])
146
+ # Label/Ret is for the prediction point at the END of the sequence
147
+ symbol_y.append(label_vals[i+SEQ_LEN-1])
148
+ symbol_rets.append(ret_vals[i+SEQ_LEN-1])
149
+
150
+ if symbol_X:
151
+ all_X.append(np.array(symbol_X))
152
+ all_y.append(np.array(symbol_y))
153
+ all_rets.append(np.array(symbol_rets))
154
+
155
+ except Exception as e:
156
+ logger.error(f"Error processing {symbol}: {e}")
157
 
158
+ if not all_X:
159
+ logger.error("No valid data collected!")
160
+ return None
161
 
162
+ X = np.concatenate(all_X, axis=0)
163
+ y = np.concatenate(all_y, axis=0)
164
+ rets = np.concatenate(all_rets, axis=0)
165
 
 
166
  data = {
167
  "X": torch.tensor(X, dtype=torch.float32),
168
+ "y": torch.tensor(y, dtype=torch.long),
169
+ "rets": torch.tensor(rets, dtype=torch.float32),
170
+ "symbols": symbols
171
  }
172
 
173
  os.makedirs(os.path.dirname(output_path), exist_ok=True)
174
  torch.save(data, output_path)
175
+ logger.info(f" Dataset saved to {output_path} | Shape: {X.shape}")
176
  return data
177
 
178
  if __name__ == "__main__":
scripts/train_ai_model.py CHANGED
@@ -2,6 +2,7 @@
2
  """
3
  Train the BitNet AI Fusion model.
4
  Uses ternary weights (-1, 0, 1) and 8-bit activations.
 
5
  """
6
 
7
  import sys
@@ -10,8 +11,7 @@ import torch
10
  import torch.nn as nn
11
  import torch.optim as optim
12
  from torch.utils.data import DataLoader, TensorDataset, random_split
13
- from tqdm import tqdm
14
- from tqdm import tqdm
15
  import logging
16
  from safetensors.torch import save_file, load_file
17
  from huggingface_hub import HfApi, create_repo, hf_hub_download
@@ -22,7 +22,7 @@ from sklearn.metrics import classification_report, confusion_matrix
22
  sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
23
 
24
  from trading_cli.strategy.ai.model import create_model
25
- from scripts.generate_ai_dataset import build_dataset
26
 
27
  # Configure logging
28
  logging.basicConfig(level=logging.INFO)
@@ -30,14 +30,13 @@ logger = logging.getLogger(__name__)
30
 
31
  # Hyperparameters
32
  EPOCHS = 100
33
- BATCH_SIZE = 8192 # Aggressive batch size to saturate T4 GPU (16GB)
34
  LR = 0.0003
35
  HIDDEN_DIM = 512
36
  LAYERS = 8
37
- SEQ_LEN = 30
38
 
39
- # Hugging Face Settings (Optional)
40
- HF_REPO_ID = os.getenv("HF_REPO_ID", "luohoa97/BitFin") # User's model repo
41
  HF_DATASET_ID = "luohoa97/BitFin" # User's dataset repo
42
  HF_TOKEN = os.getenv("HF_TOKEN")
43
 
@@ -50,17 +49,14 @@ def get_max_batch_size(model, input_dim, seq_len, device, start_batch=128):
50
  batch_size = start_batch
51
  last_success = batch_size
52
 
53
- # Progress bar for the search
54
  pbar = tqdm(total=16384, desc="Hardware Probe", unit="batch")
55
  pbar.update(batch_size)
56
 
57
  try:
58
  while batch_size <= 16384: # Ceiling
59
- # Mock data for testing
60
  mock_X = torch.randn(batch_size, seq_len, input_dim).to(device)
61
  mock_y = torch.randint(0, 3, (batch_size,)).to(device)
62
 
63
- # Simulated forward/backward pass
64
  outputs = model(mock_X)
65
  loss = nn.CrossEntropyLoss()(outputs, mock_y)
66
  loss.backward()
@@ -84,42 +80,26 @@ def get_max_batch_size(model, input_dim, seq_len, device, start_batch=128):
84
 
85
  def train():
86
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
87
- logger.info(f"Using device: {device}")
88
-
89
- if device.type == "cpu":
90
- logger.warning("⚠️ WARNING: CUDA is NOT available. Training on CPU will be EXTREMELY slow.")
91
- logger.warning("👉 In Google Colab, go to 'Runtime' > 'Change runtime type' and select 'T4 GPU'.")
92
-
93
- # Modern torch.amp API
94
- use_bf16 = torch.cuda.is_available() and torch.cuda.is_bf16_supported()
95
- device_type = 'cuda' if torch.cuda.is_available() else 'cpu'
96
- dtype = torch.bfloat16 if use_bf16 else torch.float16
97
-
98
- # Scaler only needed for FP16 on CUDA
99
- scaler = torch.amp.GradScaler('cuda', enabled=(device.type == 'cuda' and not use_bf16))
100
 
101
- # 1. Load Dataset
102
  if not os.path.exists("data/trading_dataset.pt"):
103
- logger.info("Dataset not found locally. Searching on HF Hub...")
104
- if HF_DATASET_ID:
105
- try:
106
- hf_hub_download(repo_id=HF_DATASET_ID, filename="trading_dataset.pt", repo_type="dataset", local_dir="data")
107
- except Exception as e:
108
- logger.warning(f"Could not download dataset from HF: {e}. Falling back to generation.")
109
-
110
- # If still not found, generate it!
111
- if not os.path.exists("data/trading_dataset.pt"):
112
- logger.info("🚀 Starting on-the-fly dataset generation (10 years, 70 symbols)...")
113
  build_dataset()
114
 
115
  print("🚀 Loading dataset from data/trading_dataset.pt...", flush=True)
116
  data = torch.load("data/trading_dataset.pt")
117
- X, y = data["X"], data["y"]
118
 
119
- dataset = TensorDataset(X, y)
 
120
  train_size = int(0.8 * len(dataset))
121
  val_size = len(dataset) - train_size
122
- train_ds, val_ds = random_split(dataset, [train_size, val_size])
123
 
124
  # 3. Create Model
125
  input_dim = X.shape[2]
@@ -132,17 +112,16 @@ def train():
132
  train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=2)
133
  val_loader = DataLoader(val_ds, batch_size=batch_size, pin_memory=True, num_workers=2)
134
 
135
- total_params = sum(p.numel() for p in model.parameters())
136
- logger.info(f"Model Architecture: BitNet-Transformer ({LAYERS} layers, {HIDDEN_DIM} hidden)")
137
- logger.info(f"Total Parameters: {total_params:,}")
138
- # Use standard CrossEntropy for classification [HOLD, BUY, SELL]
139
  criterion = nn.CrossEntropyLoss()
140
- optimizer = optim.AdamW(model.parameters(), lr=LR, weight_decay=1e-4)
141
 
142
- logger.info("Starting training on %d samples (%d features)...", len(X), input_dim)
143
-
144
- # 5. Start Training
145
- print(f"🚀 Starting training loop (Batch Size: {batch_size})...", flush=True)
 
 
 
146
  best_val_loss = float('inf')
147
 
148
  for epoch in range(EPOCHS):
@@ -152,24 +131,20 @@ def train():
152
  total = 0
153
 
154
  pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}")
155
- for batch_X, batch_y in pbar:
156
  batch_X, batch_y = batch_X.to(device), batch_y.to(device)
157
  optimizer.zero_grad()
158
 
159
- # Using Mixed Precision (AMP)
160
  with torch.amp.autocast(device_type=device_type, dtype=dtype, enabled=(device.type == 'cuda')):
161
  outputs = model(batch_X)
162
  loss = criterion(outputs, batch_y)
163
 
164
  if not use_bf16:
165
  scaler.scale(loss).backward()
166
- scaler.unscale_(optimizer)
167
- torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
168
  scaler.step(optimizer)
169
  scaler.update()
170
  else:
171
  loss.backward()
172
- torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
173
  optimizer.step()
174
 
175
  train_loss += loss.item()
@@ -177,112 +152,68 @@ def train():
177
  total += batch_y.size(0)
178
  correct += predicted.eq(batch_y).sum().item()
179
 
180
- # Update progress bar
181
- pbar.set_postfix({
182
- "loss": f"{loss.item():.4f}",
183
- "acc": f"{100.*correct/total:.1f}%"
184
- })
185
 
186
- # Validation
187
  model.eval()
188
  val_loss = 0
189
- val_correct = 0
190
- val_total = 0
191
  with torch.no_grad():
192
- for batch_X, batch_y in val_loader:
193
  batch_X, batch_y = batch_X.to(device), batch_y.to(device)
194
  with torch.amp.autocast(device_type=device_type, dtype=dtype, enabled=(device.type == 'cuda')):
195
  outputs = model(batch_X)
196
  loss = criterion(outputs, batch_y)
 
197
  val_loss += loss.item()
198
- _, predicted = outputs.max(1)
199
- val_total += batch_y.size(0)
200
- val_correct += predicted.eq(batch_y).sum().item()
 
201
 
202
- avg_train_loss = train_loss / len(train_loader)
203
  avg_val_loss = val_loss / len(val_loader)
204
- train_acc = 100. * correct / total
205
- val_acc = 100. * val_correct / val_total
206
 
207
- if (epoch + 1) % 5 == 0 or epoch == 0:
208
- logger.info(f"Epoch {epoch+1}/{EPOCHS} | Train Loss: {avg_train_loss:.4f} Acc: {train_acc:.1f}% | Val Loss: {avg_val_loss:.4f} Acc: {val_acc:.1f}%")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
  if avg_val_loss < best_val_loss:
211
  best_val_loss = avg_val_loss
212
  os.makedirs("models", exist_ok=True)
213
- model_path = "models/ai_fusion_bitnet.safetensors"
214
- save_file(model.state_dict(), model_path)
215
- logger.info(f"Model saved to {model_path}")
216
-
217
- logger.info("Training complete.")
218
-
219
- # 6. Final Evaluation & Report
220
- model.load_state_dict(load_file("models/ai_fusion_bitnet.safetensors"))
221
- model.eval()
222
-
223
- all_preds = []
224
- all_true = []
225
-
226
- with torch.no_grad():
227
- for xb, yb in val_loader:
228
- xb, yb = xb.to(device), yb.to(device)
229
- outputs = model(xb)
230
- preds = torch.argmax(outputs, dim=-1)
231
- all_preds.extend(preds.cpu().numpy())
232
- all_true.extend(yb.cpu().numpy())
233
-
234
- target_names = ["HOLD", "BUY", "SELL"]
235
- report = classification_report(all_true, all_preds, target_names=target_names)
236
-
237
- # Advanced Metrics (Backtest Simulation)
238
- buys = (np.array(all_preds) == 1).sum()
239
- sells = (np.array(all_preds) == 2).sum()
240
- total = len(all_preds)
241
- win_count = ((np.array(all_preds) == 1) & (np.array(all_true) == 1)).sum()
242
- win_rate = win_count / (buys + 1e-6)
243
-
244
- perf_summary = f"""
245
- === AI Fusion Model Performance Report ===
246
- {report}
247
 
248
- Trading Profile:
249
- - Total Validation Samples: {total:,}
250
- - Signal Frequency: {(buys+sells)/total:.2%}
251
- - BUY Signals: {buys}
252
- - SELL Signals: {sells}
253
- - Win Rate (Direct match): {win_rate:.2%}
254
- - Estimated Sharpe Ratio (Simulated): {(win_rate - 0.4) * 5:.2f}
255
- - Portfolio Impact: Scalable
256
- """
257
- logger.info(perf_summary)
258
-
259
- cm = confusion_matrix(all_true, all_preds)
260
- logger.info(f"Confusion Matrix:\n{cm}")
261
-
262
- # Save report to file
263
- os.makedirs("data", exist_ok=True)
264
- with open("data/performance_report.txt", "w") as f:
265
- f.write(perf_summary)
266
- f.write("\nConfusion Matrix:\n")
267
- f.write(str(cm))
268
 
269
- # Optional: Upload to Hugging Face
270
  if HF_REPO_ID and HF_TOKEN:
271
  try:
272
- logger.info(f"Uploading model to Hugging Face Hub: {HF_REPO_ID}...")
273
  api = HfApi()
274
- # Ensure repo exists
275
  create_repo(repo_id=HF_REPO_ID, token=HF_TOKEN, exist_ok=True, repo_type="model")
276
- # Upload
277
  api.upload_file(
278
  path_or_fileobj="models/ai_fusion_bitnet.safetensors",
279
  path_in_repo="ai_fusion_bitnet.safetensors",
280
  repo_id=HF_REPO_ID,
281
  token=HF_TOKEN
282
  )
283
- logger.info("Upload successful!")
284
  except Exception as e:
285
- logger.error(f"Failed to upload to Hugging Face: {e}")
286
 
287
  if __name__ == "__main__":
288
  train()
 
2
  """
3
  Train the BitNet AI Fusion model.
4
  Uses ternary weights (-1, 0, 1) and 8-bit activations.
5
+ Now includes real-time PnL backtesting and Confusion Matrix logging.
6
  """
7
 
8
  import sys
 
11
  import torch.nn as nn
12
  import torch.optim as optim
13
  from torch.utils.data import DataLoader, TensorDataset, random_split
14
+ from tqdm.auto import tqdm
 
15
  import logging
16
  from safetensors.torch import save_file, load_file
17
  from huggingface_hub import HfApi, create_repo, hf_hub_download
 
22
  sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
23
 
24
  from trading_cli.strategy.ai.model import create_model
25
+ from scripts.generate_ai_dataset import build_dataset, SEQ_LEN, LOOKAHEAD
26
 
27
  # Configure logging
28
  logging.basicConfig(level=logging.INFO)
 
30
 
31
  # Hyperparameters
32
  EPOCHS = 100
33
+ BATCH_SIZE = 4096 # Starting point for dynamic search
34
  LR = 0.0003
35
  HIDDEN_DIM = 512
36
  LAYERS = 8
 
37
 
38
+ # HF Configuration
39
+ HF_REPO_ID = os.getenv("HF_REPO_ID") # e.g. "username/BitFin"
40
  HF_DATASET_ID = "luohoa97/BitFin" # User's dataset repo
41
  HF_TOKEN = os.getenv("HF_TOKEN")
42
 
 
49
  batch_size = start_batch
50
  last_success = batch_size
51
 
 
52
  pbar = tqdm(total=16384, desc="Hardware Probe", unit="batch")
53
  pbar.update(batch_size)
54
 
55
  try:
56
  while batch_size <= 16384: # Ceiling
 
57
  mock_X = torch.randn(batch_size, seq_len, input_dim).to(device)
58
  mock_y = torch.randint(0, 3, (batch_size,)).to(device)
59
 
 
60
  outputs = model(mock_X)
61
  loss = nn.CrossEntropyLoss()(outputs, mock_y)
62
  loss.backward()
 
80
 
81
  def train():
82
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
83
+ print(f"Using device: {device}", flush=True)
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
+ # 1. Load or Generate Dataset
86
  if not os.path.exists("data/trading_dataset.pt"):
87
+ try:
88
+ print("📦 Fetching dataset from Hugging Face...", flush=True)
89
+ hf_hub_download(repo_id=HF_DATASET_ID, filename="trading_dataset.pt", local_dir="data", repo_type="dataset")
90
+ except Exception:
91
+ print("🚀 Starting on-the-fly dataset generation...", flush=True)
 
 
 
 
 
92
  build_dataset()
93
 
94
  print("🚀 Loading dataset from data/trading_dataset.pt...", flush=True)
95
  data = torch.load("data/trading_dataset.pt")
96
+ X, y, rets = data["X"], data["y"], data["rets"]
97
 
98
+ # 2. Split Data
99
+ dataset = TensorDataset(X, y, rets)
100
  train_size = int(0.8 * len(dataset))
101
  val_size = len(dataset) - train_size
102
+ train_ds, val_ds = random_split(dataset, [train_size, val_size], generator=torch.Generator().manual_seed(42))
103
 
104
  # 3. Create Model
105
  input_dim = X.shape[2]
 
112
  train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=2)
113
  val_loader = DataLoader(val_ds, batch_size=batch_size, pin_memory=True, num_workers=2)
114
 
115
+ optimizer = optim.AdamW(model.parameters(), lr=LR)
 
 
 
116
  criterion = nn.CrossEntropyLoss()
 
117
 
118
+ # Mixed Precision Setup
119
+ dtype = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else torch.float16
120
+ device_type = 'cuda' if torch.cuda.is_available() else 'cpu'
121
+ use_bf16 = (dtype == torch.bfloat16)
122
+ scaler = torch.amp.GradScaler(device_type, enabled=(not use_bf16 and device.type == 'cuda'))
123
+
124
+ print(f"🚀 Starting training (Batch Size: {batch_size}, Precision: {dtype})...", flush=True)
125
  best_val_loss = float('inf')
126
 
127
  for epoch in range(EPOCHS):
 
131
  total = 0
132
 
133
  pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}")
134
+ for batch_X, batch_y, _ in pbar:
135
  batch_X, batch_y = batch_X.to(device), batch_y.to(device)
136
  optimizer.zero_grad()
137
 
 
138
  with torch.amp.autocast(device_type=device_type, dtype=dtype, enabled=(device.type == 'cuda')):
139
  outputs = model(batch_X)
140
  loss = criterion(outputs, batch_y)
141
 
142
  if not use_bf16:
143
  scaler.scale(loss).backward()
 
 
144
  scaler.step(optimizer)
145
  scaler.update()
146
  else:
147
  loss.backward()
 
148
  optimizer.step()
149
 
150
  train_loss += loss.item()
 
152
  total += batch_y.size(0)
153
  correct += predicted.eq(batch_y).sum().item()
154
 
155
+ pbar.set_postfix({"loss": f"{loss.item():.4f}", "acc": f"{100.*correct/total:.1f}%"})
 
 
 
 
156
 
157
+ # Validation & Backtest
158
  model.eval()
159
  val_loss = 0
160
+ all_preds, all_true, all_rets = [], [], []
161
+
162
  with torch.no_grad():
163
+ for batch_X, batch_y, batch_r in val_loader:
164
  batch_X, batch_y = batch_X.to(device), batch_y.to(device)
165
  with torch.amp.autocast(device_type=device_type, dtype=dtype, enabled=(device.type == 'cuda')):
166
  outputs = model(batch_X)
167
  loss = criterion(outputs, batch_y)
168
+
169
  val_loss += loss.item()
170
+ preds = torch.argmax(outputs, dim=-1)
171
+ all_preds.extend(preds.cpu().numpy())
172
+ all_true.extend(batch_y.cpu().numpy())
173
+ all_rets.extend(batch_r.numpy())
174
 
 
175
  avg_val_loss = val_loss / len(val_loader)
 
 
176
 
177
+ # Calculate Backtest Metrics
178
+ all_preds = np.array(all_preds)
179
+ all_true = np.array(all_true)
180
+ all_rets = np.array(all_rets)
181
+
182
+ buys = int((all_preds == 1).sum())
183
+ sells = int((all_preds == 2).sum())
184
+ pnl = float(np.sum(all_rets[all_preds == 1]) - np.sum(all_rets[all_preds == 2]))
185
+ win_rate = float(np.sum((all_preds == 1) & (all_true == 1)) / (buys + 1e-6))
186
+
187
+ print(f"\n--- Epoch {epoch+1} Statistics ---", flush=True)
188
+ print(f"Val Loss: {avg_val_loss:.4f} | Total PnL: {pnl:+.4f} | Win Rate: {win_rate:.1%}", flush=True)
189
+ print(f"Signals: {buys} BUY | {sells} SELL | Activity: {(buys+sells)/len(all_preds):.1%}", flush=True)
190
+
191
+ if buys + sells > 0:
192
+ cm = confusion_matrix(all_true, all_preds, labels=[0, 1, 2])
193
+ print(f"Confusion Matrix (HOLD/BUY/SELL):\n{cm}", flush=True)
194
 
195
  if avg_val_loss < best_val_loss:
196
  best_val_loss = avg_val_loss
197
  os.makedirs("models", exist_ok=True)
198
+ save_file(model.state_dict(), "models/ai_fusion_bitnet.safetensors")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
 
200
+ print("✅ Training complete. Final model saved.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
+ # Upload to HF
203
  if HF_REPO_ID and HF_TOKEN:
204
  try:
205
+ print(f"📤 Uploading to HF: {HF_REPO_ID}...", flush=True)
206
  api = HfApi()
 
207
  create_repo(repo_id=HF_REPO_ID, token=HF_TOKEN, exist_ok=True, repo_type="model")
 
208
  api.upload_file(
209
  path_or_fileobj="models/ai_fusion_bitnet.safetensors",
210
  path_in_repo="ai_fusion_bitnet.safetensors",
211
  repo_id=HF_REPO_ID,
212
  token=HF_TOKEN
213
  )
214
+ print("Upload successful!", flush=True)
215
  except Exception as e:
216
+ print(f"⚠️ Upload failed: {e}", flush=True)
217
 
218
  if __name__ == "__main__":
219
  train()