Spaces:
Runtime error
Runtime error
Deploy BitNet-Transformer Trainer
Browse files- scripts/generate_ai_dataset.py +61 -63
- scripts/train_ai_model.py +59 -128
scripts/generate_ai_dataset.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
| 2 |
"""
|
| 3 |
Generate training dataset for AI Fusion strategy.
|
| 4 |
Fetches historical OHLCV, computes technical features, and labels data.
|
|
|
|
| 5 |
"""
|
| 6 |
|
| 7 |
import sys
|
|
@@ -10,7 +11,7 @@ import pandas as pd
|
|
| 10 |
import numpy as np
|
| 11 |
import logging
|
| 12 |
import torch
|
| 13 |
-
from
|
| 14 |
|
| 15 |
# Add project root to path
|
| 16 |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
|
@@ -35,18 +36,17 @@ SYMBOLS = [
|
|
| 35 |
"JNJ", "PFE", "UNH", "ABBV", "MRK", "LLY", "TMO", "DHR", "ISRG", "GILD",
|
| 36 |
"WMT", "COST", "HD", "LOW", "TGT", "PG", "KO", "PEP", "PM", "MO",
|
| 37 |
"CAT", "DE", "HON", "GE", "MMM", "UPS", "FDX", "RTX", "LMT", "GD",
|
| 38 |
-
"BTC-USD", "ETH-USD", "GC=F", "CL=F"
|
| 39 |
]
|
| 40 |
DAYS = 3652 # 10 years
|
| 41 |
LOOKAHEAD = 5 # Prediction window (days)
|
| 42 |
TARGET_PCT = 0.02 # Profit target (2%)
|
| 43 |
STOP_PCT = 0.015 # Stop loss (1.5%)
|
|
|
|
| 44 |
|
| 45 |
def generate_features(df):
|
| 46 |
"""Compute technical indicators for the feature vector."""
|
| 47 |
close = df["close" if "close" in df.columns else "Close"]
|
| 48 |
-
high = df["high" if "high" in df.columns else "High"]
|
| 49 |
-
low = df["low" if "low" in df.columns else "Low"]
|
| 50 |
|
| 51 |
# 1. RSI(2) - Very short period
|
| 52 |
rsi2 = calculate_rsi(close, 2) / 100.0
|
|
@@ -69,10 +69,10 @@ def generate_features(df):
|
|
| 69 |
atr = calculate_atr(df, 14)
|
| 70 |
atr_pct = atr / close
|
| 71 |
|
| 72 |
-
# 6. Volume spike
|
| 73 |
vol = df["volume" if "volume" in df.columns else "Volume"]
|
| 74 |
vol_sma = vol.rolling(20).mean()
|
| 75 |
-
vol_ratio = (vol / vol_sma).clip(0, 5) / 5.0
|
| 76 |
|
| 77 |
features = pd.DataFrame({
|
| 78 |
"rsi2": rsi2,
|
|
@@ -85,23 +85,18 @@ def generate_features(df):
|
|
| 85 |
"vol_ratio": vol_ratio,
|
| 86 |
}, index=df.index)
|
| 87 |
|
| 88 |
-
|
| 89 |
-
for col in features.columns:
|
| 90 |
-
if isinstance(features[col], pd.DataFrame):
|
| 91 |
-
features[col] = features[col].squeeze()
|
| 92 |
-
|
| 93 |
-
return features
|
| 94 |
|
| 95 |
def generate_labels(df):
|
| 96 |
-
"""Label data using Triple Barrier
|
| 97 |
close = df["close" if "close" in df.columns else "Close"].values
|
| 98 |
labels = np.zeros(len(close))
|
|
|
|
| 99 |
|
| 100 |
for i in range(len(close) - LOOKAHEAD):
|
| 101 |
current_price = close[i]
|
| 102 |
future_prices = close[i+1 : i+LOOKAHEAD+1]
|
| 103 |
|
| 104 |
-
# Look ahead for profit target or stop loss
|
| 105 |
max_ret = (np.max(future_prices) - current_price) / current_price
|
| 106 |
min_ret = (np.min(future_prices) - current_price) / current_price
|
| 107 |
|
|
@@ -112,69 +107,72 @@ def generate_labels(df):
|
|
| 112 |
else:
|
| 113 |
labels[i] = 0 # HOLD
|
| 114 |
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
|
| 119 |
def build_dataset(symbols=SYMBOLS, days=DAYS, output_path="data/trading_dataset.pt"):
|
| 120 |
-
"""
|
| 121 |
-
|
| 122 |
-
Used by local scripts and the Hugging Face Cloud trainer.
|
| 123 |
-
"""
|
| 124 |
-
all_features = []
|
| 125 |
-
all_labels = []
|
| 126 |
|
| 127 |
-
for symbol in symbols:
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
|
|
|
| 134 |
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
features["sentiment"] = sentiment
|
| 141 |
-
|
| 142 |
-
# Combine and drop NaN
|
| 143 |
-
features["label"] = labels
|
| 144 |
-
features = features.dropna()
|
| 145 |
-
|
| 146 |
-
if len(features) < (SEQ_LEN + 100):
|
| 147 |
-
logger.warning("Skipping %s: Too few valid samples after dropna (%d < %d)", symbol, len(features), SEQ_LEN + 100)
|
| 148 |
-
continue
|
| 149 |
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
|
| 163 |
-
|
| 164 |
-
|
|
|
|
| 165 |
|
| 166 |
-
X = np.concatenate(
|
| 167 |
-
y = np.concatenate(
|
|
|
|
| 168 |
|
| 169 |
-
# Save as PyTorch dataset
|
| 170 |
data = {
|
| 171 |
"X": torch.tensor(X, dtype=torch.float32),
|
| 172 |
-
"y": torch.tensor(y, dtype=torch.long)
|
|
|
|
|
|
|
| 173 |
}
|
| 174 |
|
| 175 |
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
| 176 |
torch.save(data, output_path)
|
| 177 |
-
logger.info("
|
| 178 |
return data
|
| 179 |
|
| 180 |
if __name__ == "__main__":
|
|
|
|
| 2 |
"""
|
| 3 |
Generate training dataset for AI Fusion strategy.
|
| 4 |
Fetches historical OHLCV, computes technical features, and labels data.
|
| 5 |
+
Includes future returns for Profit/Loss backtesting.
|
| 6 |
"""
|
| 7 |
|
| 8 |
import sys
|
|
|
|
| 11 |
import numpy as np
|
| 12 |
import logging
|
| 13 |
import torch
|
| 14 |
+
from tqdm.auto import tqdm
|
| 15 |
|
| 16 |
# Add project root to path
|
| 17 |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
|
|
|
| 36 |
"JNJ", "PFE", "UNH", "ABBV", "MRK", "LLY", "TMO", "DHR", "ISRG", "GILD",
|
| 37 |
"WMT", "COST", "HD", "LOW", "TGT", "PG", "KO", "PEP", "PM", "MO",
|
| 38 |
"CAT", "DE", "HON", "GE", "MMM", "UPS", "FDX", "RTX", "LMT", "GD",
|
| 39 |
+
"BTC-USD", "ETH-USD", "GC=F", "CL=F"
|
| 40 |
]
|
| 41 |
DAYS = 3652 # 10 years
|
| 42 |
LOOKAHEAD = 5 # Prediction window (days)
|
| 43 |
TARGET_PCT = 0.02 # Profit target (2%)
|
| 44 |
STOP_PCT = 0.015 # Stop loss (1.5%)
|
| 45 |
+
SEQ_LEN = 30 # One month of trading days
|
| 46 |
|
| 47 |
def generate_features(df):
|
| 48 |
"""Compute technical indicators for the feature vector."""
|
| 49 |
close = df["close" if "close" in df.columns else "Close"]
|
|
|
|
|
|
|
| 50 |
|
| 51 |
# 1. RSI(2) - Very short period
|
| 52 |
rsi2 = calculate_rsi(close, 2) / 100.0
|
|
|
|
| 69 |
atr = calculate_atr(df, 14)
|
| 70 |
atr_pct = atr / close
|
| 71 |
|
| 72 |
+
# 6. Volume spike
|
| 73 |
vol = df["volume" if "volume" in df.columns else "Volume"]
|
| 74 |
vol_sma = vol.rolling(20).mean()
|
| 75 |
+
vol_ratio = (vol / vol_sma).clip(0, 5) / 5.0
|
| 76 |
|
| 77 |
features = pd.DataFrame({
|
| 78 |
"rsi2": rsi2,
|
|
|
|
| 85 |
"vol_ratio": vol_ratio,
|
| 86 |
}, index=df.index)
|
| 87 |
|
| 88 |
+
return features.dropna()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
def generate_labels(df):
|
| 91 |
+
"""Label data using Triple Barrier and calculate future returns."""
|
| 92 |
close = df["close" if "close" in df.columns else "Close"].values
|
| 93 |
labels = np.zeros(len(close))
|
| 94 |
+
future_rets = np.zeros(len(close))
|
| 95 |
|
| 96 |
for i in range(len(close) - LOOKAHEAD):
|
| 97 |
current_price = close[i]
|
| 98 |
future_prices = close[i+1 : i+LOOKAHEAD+1]
|
| 99 |
|
|
|
|
| 100 |
max_ret = (np.max(future_prices) - current_price) / current_price
|
| 101 |
min_ret = (np.min(future_prices) - current_price) / current_price
|
| 102 |
|
|
|
|
| 107 |
else:
|
| 108 |
labels[i] = 0 # HOLD
|
| 109 |
|
| 110 |
+
future_rets[i] = (close[i + LOOKAHEAD] - current_price) / current_price
|
| 111 |
+
|
| 112 |
+
return labels, future_rets
|
| 113 |
|
| 114 |
def build_dataset(symbols=SYMBOLS, days=DAYS, output_path="data/trading_dataset.pt"):
|
| 115 |
+
"""Fetch, label, and sequence data for all symbols."""
|
| 116 |
+
all_X, all_y, all_rets = [], [], []
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
+
for symbol in tqdm(symbols, desc="Building Global Dataset"):
|
| 119 |
+
try:
|
| 120 |
+
df = fetch_ohlcv_yfinance(symbol, days=days)
|
| 121 |
+
if len(df) < (SEQ_LEN + LOOKAHEAD + 50):
|
| 122 |
+
continue
|
| 123 |
+
|
| 124 |
+
features = generate_features(df)
|
| 125 |
+
labels, rets = generate_labels(df)
|
| 126 |
|
| 127 |
+
# Align features with labels/rets and add sentiment
|
| 128 |
+
df_aligned = pd.DataFrame(index=df.index)
|
| 129 |
+
df_aligned["label"] = labels
|
| 130 |
+
df_aligned["future_ret"] = rets
|
| 131 |
+
df_aligned["sentiment"] = np.random.normal(0, 0.2, len(df))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
|
| 133 |
+
# Merge features
|
| 134 |
+
df_combined = features.join(df_aligned, how="inner").dropna()
|
| 135 |
+
|
| 136 |
+
if len(df_combined) < SEQ_LEN:
|
| 137 |
+
continue
|
| 138 |
+
|
| 139 |
+
feat_vals = df_combined.drop(columns=["label", "future_ret"]).values
|
| 140 |
+
label_vals = df_combined["label"].values.astype(int)
|
| 141 |
+
ret_vals = df_combined["future_ret"].values
|
| 142 |
+
|
| 143 |
+
symbol_X, symbol_y, symbol_rets = [], [], []
|
| 144 |
+
for i in range(len(feat_vals) - SEQ_LEN):
|
| 145 |
+
symbol_X.append(feat_vals[i : i+SEQ_LEN])
|
| 146 |
+
# Label/Ret is for the prediction point at the END of the sequence
|
| 147 |
+
symbol_y.append(label_vals[i+SEQ_LEN-1])
|
| 148 |
+
symbol_rets.append(ret_vals[i+SEQ_LEN-1])
|
| 149 |
+
|
| 150 |
+
if symbol_X:
|
| 151 |
+
all_X.append(np.array(symbol_X))
|
| 152 |
+
all_y.append(np.array(symbol_y))
|
| 153 |
+
all_rets.append(np.array(symbol_rets))
|
| 154 |
+
|
| 155 |
+
except Exception as e:
|
| 156 |
+
logger.error(f"Error processing {symbol}: {e}")
|
| 157 |
|
| 158 |
+
if not all_X:
|
| 159 |
+
logger.error("No valid data collected!")
|
| 160 |
+
return None
|
| 161 |
|
| 162 |
+
X = np.concatenate(all_X, axis=0)
|
| 163 |
+
y = np.concatenate(all_y, axis=0)
|
| 164 |
+
rets = np.concatenate(all_rets, axis=0)
|
| 165 |
|
|
|
|
| 166 |
data = {
|
| 167 |
"X": torch.tensor(X, dtype=torch.float32),
|
| 168 |
+
"y": torch.tensor(y, dtype=torch.long),
|
| 169 |
+
"rets": torch.tensor(rets, dtype=torch.float32),
|
| 170 |
+
"symbols": symbols
|
| 171 |
}
|
| 172 |
|
| 173 |
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
| 174 |
torch.save(data, output_path)
|
| 175 |
+
logger.info(f"✅ Dataset saved to {output_path} | Shape: {X.shape}")
|
| 176 |
return data
|
| 177 |
|
| 178 |
if __name__ == "__main__":
|
scripts/train_ai_model.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
| 2 |
"""
|
| 3 |
Train the BitNet AI Fusion model.
|
| 4 |
Uses ternary weights (-1, 0, 1) and 8-bit activations.
|
|
|
|
| 5 |
"""
|
| 6 |
|
| 7 |
import sys
|
|
@@ -10,8 +11,7 @@ import torch
|
|
| 10 |
import torch.nn as nn
|
| 11 |
import torch.optim as optim
|
| 12 |
from torch.utils.data import DataLoader, TensorDataset, random_split
|
| 13 |
-
from tqdm import tqdm
|
| 14 |
-
from tqdm import tqdm
|
| 15 |
import logging
|
| 16 |
from safetensors.torch import save_file, load_file
|
| 17 |
from huggingface_hub import HfApi, create_repo, hf_hub_download
|
|
@@ -22,7 +22,7 @@ from sklearn.metrics import classification_report, confusion_matrix
|
|
| 22 |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
| 23 |
|
| 24 |
from trading_cli.strategy.ai.model import create_model
|
| 25 |
-
from scripts.generate_ai_dataset import build_dataset
|
| 26 |
|
| 27 |
# Configure logging
|
| 28 |
logging.basicConfig(level=logging.INFO)
|
|
@@ -30,14 +30,13 @@ logger = logging.getLogger(__name__)
|
|
| 30 |
|
| 31 |
# Hyperparameters
|
| 32 |
EPOCHS = 100
|
| 33 |
-
BATCH_SIZE =
|
| 34 |
LR = 0.0003
|
| 35 |
HIDDEN_DIM = 512
|
| 36 |
LAYERS = 8
|
| 37 |
-
SEQ_LEN = 30
|
| 38 |
|
| 39 |
-
#
|
| 40 |
-
HF_REPO_ID = os.getenv("HF_REPO_ID"
|
| 41 |
HF_DATASET_ID = "luohoa97/BitFin" # User's dataset repo
|
| 42 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 43 |
|
|
@@ -50,17 +49,14 @@ def get_max_batch_size(model, input_dim, seq_len, device, start_batch=128):
|
|
| 50 |
batch_size = start_batch
|
| 51 |
last_success = batch_size
|
| 52 |
|
| 53 |
-
# Progress bar for the search
|
| 54 |
pbar = tqdm(total=16384, desc="Hardware Probe", unit="batch")
|
| 55 |
pbar.update(batch_size)
|
| 56 |
|
| 57 |
try:
|
| 58 |
while batch_size <= 16384: # Ceiling
|
| 59 |
-
# Mock data for testing
|
| 60 |
mock_X = torch.randn(batch_size, seq_len, input_dim).to(device)
|
| 61 |
mock_y = torch.randint(0, 3, (batch_size,)).to(device)
|
| 62 |
|
| 63 |
-
# Simulated forward/backward pass
|
| 64 |
outputs = model(mock_X)
|
| 65 |
loss = nn.CrossEntropyLoss()(outputs, mock_y)
|
| 66 |
loss.backward()
|
|
@@ -84,42 +80,26 @@ def get_max_batch_size(model, input_dim, seq_len, device, start_batch=128):
|
|
| 84 |
|
| 85 |
def train():
|
| 86 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
if device.type == "cpu":
|
| 90 |
-
logger.warning("⚠️ WARNING: CUDA is NOT available. Training on CPU will be EXTREMELY slow.")
|
| 91 |
-
logger.warning("👉 In Google Colab, go to 'Runtime' > 'Change runtime type' and select 'T4 GPU'.")
|
| 92 |
-
|
| 93 |
-
# Modern torch.amp API
|
| 94 |
-
use_bf16 = torch.cuda.is_available() and torch.cuda.is_bf16_supported()
|
| 95 |
-
device_type = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 96 |
-
dtype = torch.bfloat16 if use_bf16 else torch.float16
|
| 97 |
-
|
| 98 |
-
# Scaler only needed for FP16 on CUDA
|
| 99 |
-
scaler = torch.amp.GradScaler('cuda', enabled=(device.type == 'cuda' and not use_bf16))
|
| 100 |
|
| 101 |
-
# 1. Load Dataset
|
| 102 |
if not os.path.exists("data/trading_dataset.pt"):
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
logger.warning(f"Could not download dataset from HF: {e}. Falling back to generation.")
|
| 109 |
-
|
| 110 |
-
# If still not found, generate it!
|
| 111 |
-
if not os.path.exists("data/trading_dataset.pt"):
|
| 112 |
-
logger.info("🚀 Starting on-the-fly dataset generation (10 years, 70 symbols)...")
|
| 113 |
build_dataset()
|
| 114 |
|
| 115 |
print("🚀 Loading dataset from data/trading_dataset.pt...", flush=True)
|
| 116 |
data = torch.load("data/trading_dataset.pt")
|
| 117 |
-
X, y = data["X"], data["y"]
|
| 118 |
|
| 119 |
-
|
|
|
|
| 120 |
train_size = int(0.8 * len(dataset))
|
| 121 |
val_size = len(dataset) - train_size
|
| 122 |
-
train_ds, val_ds = random_split(dataset, [train_size, val_size])
|
| 123 |
|
| 124 |
# 3. Create Model
|
| 125 |
input_dim = X.shape[2]
|
|
@@ -132,17 +112,16 @@ def train():
|
|
| 132 |
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=2)
|
| 133 |
val_loader = DataLoader(val_ds, batch_size=batch_size, pin_memory=True, num_workers=2)
|
| 134 |
|
| 135 |
-
|
| 136 |
-
logger.info(f"Model Architecture: BitNet-Transformer ({LAYERS} layers, {HIDDEN_DIM} hidden)")
|
| 137 |
-
logger.info(f"Total Parameters: {total_params:,}")
|
| 138 |
-
# Use standard CrossEntropy for classification [HOLD, BUY, SELL]
|
| 139 |
criterion = nn.CrossEntropyLoss()
|
| 140 |
-
optimizer = optim.AdamW(model.parameters(), lr=LR, weight_decay=1e-4)
|
| 141 |
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
|
|
|
|
|
|
|
|
|
| 146 |
best_val_loss = float('inf')
|
| 147 |
|
| 148 |
for epoch in range(EPOCHS):
|
|
@@ -152,24 +131,20 @@ def train():
|
|
| 152 |
total = 0
|
| 153 |
|
| 154 |
pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}")
|
| 155 |
-
for batch_X, batch_y in pbar:
|
| 156 |
batch_X, batch_y = batch_X.to(device), batch_y.to(device)
|
| 157 |
optimizer.zero_grad()
|
| 158 |
|
| 159 |
-
# Using Mixed Precision (AMP)
|
| 160 |
with torch.amp.autocast(device_type=device_type, dtype=dtype, enabled=(device.type == 'cuda')):
|
| 161 |
outputs = model(batch_X)
|
| 162 |
loss = criterion(outputs, batch_y)
|
| 163 |
|
| 164 |
if not use_bf16:
|
| 165 |
scaler.scale(loss).backward()
|
| 166 |
-
scaler.unscale_(optimizer)
|
| 167 |
-
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
|
| 168 |
scaler.step(optimizer)
|
| 169 |
scaler.update()
|
| 170 |
else:
|
| 171 |
loss.backward()
|
| 172 |
-
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
|
| 173 |
optimizer.step()
|
| 174 |
|
| 175 |
train_loss += loss.item()
|
|
@@ -177,112 +152,68 @@ def train():
|
|
| 177 |
total += batch_y.size(0)
|
| 178 |
correct += predicted.eq(batch_y).sum().item()
|
| 179 |
|
| 180 |
-
|
| 181 |
-
pbar.set_postfix({
|
| 182 |
-
"loss": f"{loss.item():.4f}",
|
| 183 |
-
"acc": f"{100.*correct/total:.1f}%"
|
| 184 |
-
})
|
| 185 |
|
| 186 |
-
# Validation
|
| 187 |
model.eval()
|
| 188 |
val_loss = 0
|
| 189 |
-
|
| 190 |
-
|
| 191 |
with torch.no_grad():
|
| 192 |
-
for batch_X, batch_y in val_loader:
|
| 193 |
batch_X, batch_y = batch_X.to(device), batch_y.to(device)
|
| 194 |
with torch.amp.autocast(device_type=device_type, dtype=dtype, enabled=(device.type == 'cuda')):
|
| 195 |
outputs = model(batch_X)
|
| 196 |
loss = criterion(outputs, batch_y)
|
|
|
|
| 197 |
val_loss += loss.item()
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
|
|
|
| 201 |
|
| 202 |
-
avg_train_loss = train_loss / len(train_loader)
|
| 203 |
avg_val_loss = val_loss / len(val_loader)
|
| 204 |
-
train_acc = 100. * correct / total
|
| 205 |
-
val_acc = 100. * val_correct / val_total
|
| 206 |
|
| 207 |
-
|
| 208 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
|
| 210 |
if avg_val_loss < best_val_loss:
|
| 211 |
best_val_loss = avg_val_loss
|
| 212 |
os.makedirs("models", exist_ok=True)
|
| 213 |
-
|
| 214 |
-
save_file(model.state_dict(), model_path)
|
| 215 |
-
logger.info(f"Model saved to {model_path}")
|
| 216 |
-
|
| 217 |
-
logger.info("Training complete.")
|
| 218 |
-
|
| 219 |
-
# 6. Final Evaluation & Report
|
| 220 |
-
model.load_state_dict(load_file("models/ai_fusion_bitnet.safetensors"))
|
| 221 |
-
model.eval()
|
| 222 |
-
|
| 223 |
-
all_preds = []
|
| 224 |
-
all_true = []
|
| 225 |
-
|
| 226 |
-
with torch.no_grad():
|
| 227 |
-
for xb, yb in val_loader:
|
| 228 |
-
xb, yb = xb.to(device), yb.to(device)
|
| 229 |
-
outputs = model(xb)
|
| 230 |
-
preds = torch.argmax(outputs, dim=-1)
|
| 231 |
-
all_preds.extend(preds.cpu().numpy())
|
| 232 |
-
all_true.extend(yb.cpu().numpy())
|
| 233 |
-
|
| 234 |
-
target_names = ["HOLD", "BUY", "SELL"]
|
| 235 |
-
report = classification_report(all_true, all_preds, target_names=target_names)
|
| 236 |
-
|
| 237 |
-
# Advanced Metrics (Backtest Simulation)
|
| 238 |
-
buys = (np.array(all_preds) == 1).sum()
|
| 239 |
-
sells = (np.array(all_preds) == 2).sum()
|
| 240 |
-
total = len(all_preds)
|
| 241 |
-
win_count = ((np.array(all_preds) == 1) & (np.array(all_true) == 1)).sum()
|
| 242 |
-
win_rate = win_count / (buys + 1e-6)
|
| 243 |
-
|
| 244 |
-
perf_summary = f"""
|
| 245 |
-
=== AI Fusion Model Performance Report ===
|
| 246 |
-
{report}
|
| 247 |
|
| 248 |
-
|
| 249 |
-
- Total Validation Samples: {total:,}
|
| 250 |
-
- Signal Frequency: {(buys+sells)/total:.2%}
|
| 251 |
-
- BUY Signals: {buys}
|
| 252 |
-
- SELL Signals: {sells}
|
| 253 |
-
- Win Rate (Direct match): {win_rate:.2%}
|
| 254 |
-
- Estimated Sharpe Ratio (Simulated): {(win_rate - 0.4) * 5:.2f}
|
| 255 |
-
- Portfolio Impact: Scalable
|
| 256 |
-
"""
|
| 257 |
-
logger.info(perf_summary)
|
| 258 |
-
|
| 259 |
-
cm = confusion_matrix(all_true, all_preds)
|
| 260 |
-
logger.info(f"Confusion Matrix:\n{cm}")
|
| 261 |
-
|
| 262 |
-
# Save report to file
|
| 263 |
-
os.makedirs("data", exist_ok=True)
|
| 264 |
-
with open("data/performance_report.txt", "w") as f:
|
| 265 |
-
f.write(perf_summary)
|
| 266 |
-
f.write("\nConfusion Matrix:\n")
|
| 267 |
-
f.write(str(cm))
|
| 268 |
|
| 269 |
-
#
|
| 270 |
if HF_REPO_ID and HF_TOKEN:
|
| 271 |
try:
|
| 272 |
-
|
| 273 |
api = HfApi()
|
| 274 |
-
# Ensure repo exists
|
| 275 |
create_repo(repo_id=HF_REPO_ID, token=HF_TOKEN, exist_ok=True, repo_type="model")
|
| 276 |
-
# Upload
|
| 277 |
api.upload_file(
|
| 278 |
path_or_fileobj="models/ai_fusion_bitnet.safetensors",
|
| 279 |
path_in_repo="ai_fusion_bitnet.safetensors",
|
| 280 |
repo_id=HF_REPO_ID,
|
| 281 |
token=HF_TOKEN
|
| 282 |
)
|
| 283 |
-
|
| 284 |
except Exception as e:
|
| 285 |
-
|
| 286 |
|
| 287 |
if __name__ == "__main__":
|
| 288 |
train()
|
|
|
|
| 2 |
"""
|
| 3 |
Train the BitNet AI Fusion model.
|
| 4 |
Uses ternary weights (-1, 0, 1) and 8-bit activations.
|
| 5 |
+
Now includes real-time PnL backtesting and Confusion Matrix logging.
|
| 6 |
"""
|
| 7 |
|
| 8 |
import sys
|
|
|
|
| 11 |
import torch.nn as nn
|
| 12 |
import torch.optim as optim
|
| 13 |
from torch.utils.data import DataLoader, TensorDataset, random_split
|
| 14 |
+
from tqdm.auto import tqdm
|
|
|
|
| 15 |
import logging
|
| 16 |
from safetensors.torch import save_file, load_file
|
| 17 |
from huggingface_hub import HfApi, create_repo, hf_hub_download
|
|
|
|
| 22 |
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
| 23 |
|
| 24 |
from trading_cli.strategy.ai.model import create_model
|
| 25 |
+
from scripts.generate_ai_dataset import build_dataset, SEQ_LEN, LOOKAHEAD
|
| 26 |
|
| 27 |
# Configure logging
|
| 28 |
logging.basicConfig(level=logging.INFO)
|
|
|
|
| 30 |
|
| 31 |
# Hyperparameters
|
| 32 |
EPOCHS = 100
|
| 33 |
+
BATCH_SIZE = 4096 # Starting point for dynamic search
|
| 34 |
LR = 0.0003
|
| 35 |
HIDDEN_DIM = 512
|
| 36 |
LAYERS = 8
|
|
|
|
| 37 |
|
| 38 |
+
# HF Configuration
|
| 39 |
+
HF_REPO_ID = os.getenv("HF_REPO_ID") # e.g. "username/BitFin"
|
| 40 |
HF_DATASET_ID = "luohoa97/BitFin" # User's dataset repo
|
| 41 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 42 |
|
|
|
|
| 49 |
batch_size = start_batch
|
| 50 |
last_success = batch_size
|
| 51 |
|
|
|
|
| 52 |
pbar = tqdm(total=16384, desc="Hardware Probe", unit="batch")
|
| 53 |
pbar.update(batch_size)
|
| 54 |
|
| 55 |
try:
|
| 56 |
while batch_size <= 16384: # Ceiling
|
|
|
|
| 57 |
mock_X = torch.randn(batch_size, seq_len, input_dim).to(device)
|
| 58 |
mock_y = torch.randint(0, 3, (batch_size,)).to(device)
|
| 59 |
|
|
|
|
| 60 |
outputs = model(mock_X)
|
| 61 |
loss = nn.CrossEntropyLoss()(outputs, mock_y)
|
| 62 |
loss.backward()
|
|
|
|
| 80 |
|
| 81 |
def train():
|
| 82 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 83 |
+
print(f"Using device: {device}", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
|
| 85 |
+
# 1. Load or Generate Dataset
|
| 86 |
if not os.path.exists("data/trading_dataset.pt"):
|
| 87 |
+
try:
|
| 88 |
+
print("📦 Fetching dataset from Hugging Face...", flush=True)
|
| 89 |
+
hf_hub_download(repo_id=HF_DATASET_ID, filename="trading_dataset.pt", local_dir="data", repo_type="dataset")
|
| 90 |
+
except Exception:
|
| 91 |
+
print("🚀 Starting on-the-fly dataset generation...", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
build_dataset()
|
| 93 |
|
| 94 |
print("🚀 Loading dataset from data/trading_dataset.pt...", flush=True)
|
| 95 |
data = torch.load("data/trading_dataset.pt")
|
| 96 |
+
X, y, rets = data["X"], data["y"], data["rets"]
|
| 97 |
|
| 98 |
+
# 2. Split Data
|
| 99 |
+
dataset = TensorDataset(X, y, rets)
|
| 100 |
train_size = int(0.8 * len(dataset))
|
| 101 |
val_size = len(dataset) - train_size
|
| 102 |
+
train_ds, val_ds = random_split(dataset, [train_size, val_size], generator=torch.Generator().manual_seed(42))
|
| 103 |
|
| 104 |
# 3. Create Model
|
| 105 |
input_dim = X.shape[2]
|
|
|
|
| 112 |
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=2)
|
| 113 |
val_loader = DataLoader(val_ds, batch_size=batch_size, pin_memory=True, num_workers=2)
|
| 114 |
|
| 115 |
+
optimizer = optim.AdamW(model.parameters(), lr=LR)
|
|
|
|
|
|
|
|
|
|
| 116 |
criterion = nn.CrossEntropyLoss()
|
|
|
|
| 117 |
|
| 118 |
+
# Mixed Precision Setup
|
| 119 |
+
dtype = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else torch.float16
|
| 120 |
+
device_type = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 121 |
+
use_bf16 = (dtype == torch.bfloat16)
|
| 122 |
+
scaler = torch.amp.GradScaler(device_type, enabled=(not use_bf16 and device.type == 'cuda'))
|
| 123 |
+
|
| 124 |
+
print(f"🚀 Starting training (Batch Size: {batch_size}, Precision: {dtype})...", flush=True)
|
| 125 |
best_val_loss = float('inf')
|
| 126 |
|
| 127 |
for epoch in range(EPOCHS):
|
|
|
|
| 131 |
total = 0
|
| 132 |
|
| 133 |
pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}")
|
| 134 |
+
for batch_X, batch_y, _ in pbar:
|
| 135 |
batch_X, batch_y = batch_X.to(device), batch_y.to(device)
|
| 136 |
optimizer.zero_grad()
|
| 137 |
|
|
|
|
| 138 |
with torch.amp.autocast(device_type=device_type, dtype=dtype, enabled=(device.type == 'cuda')):
|
| 139 |
outputs = model(batch_X)
|
| 140 |
loss = criterion(outputs, batch_y)
|
| 141 |
|
| 142 |
if not use_bf16:
|
| 143 |
scaler.scale(loss).backward()
|
|
|
|
|
|
|
| 144 |
scaler.step(optimizer)
|
| 145 |
scaler.update()
|
| 146 |
else:
|
| 147 |
loss.backward()
|
|
|
|
| 148 |
optimizer.step()
|
| 149 |
|
| 150 |
train_loss += loss.item()
|
|
|
|
| 152 |
total += batch_y.size(0)
|
| 153 |
correct += predicted.eq(batch_y).sum().item()
|
| 154 |
|
| 155 |
+
pbar.set_postfix({"loss": f"{loss.item():.4f}", "acc": f"{100.*correct/total:.1f}%"})
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
+
# Validation & Backtest
|
| 158 |
model.eval()
|
| 159 |
val_loss = 0
|
| 160 |
+
all_preds, all_true, all_rets = [], [], []
|
| 161 |
+
|
| 162 |
with torch.no_grad():
|
| 163 |
+
for batch_X, batch_y, batch_r in val_loader:
|
| 164 |
batch_X, batch_y = batch_X.to(device), batch_y.to(device)
|
| 165 |
with torch.amp.autocast(device_type=device_type, dtype=dtype, enabled=(device.type == 'cuda')):
|
| 166 |
outputs = model(batch_X)
|
| 167 |
loss = criterion(outputs, batch_y)
|
| 168 |
+
|
| 169 |
val_loss += loss.item()
|
| 170 |
+
preds = torch.argmax(outputs, dim=-1)
|
| 171 |
+
all_preds.extend(preds.cpu().numpy())
|
| 172 |
+
all_true.extend(batch_y.cpu().numpy())
|
| 173 |
+
all_rets.extend(batch_r.numpy())
|
| 174 |
|
|
|
|
| 175 |
avg_val_loss = val_loss / len(val_loader)
|
|
|
|
|
|
|
| 176 |
|
| 177 |
+
# Calculate Backtest Metrics
|
| 178 |
+
all_preds = np.array(all_preds)
|
| 179 |
+
all_true = np.array(all_true)
|
| 180 |
+
all_rets = np.array(all_rets)
|
| 181 |
+
|
| 182 |
+
buys = int((all_preds == 1).sum())
|
| 183 |
+
sells = int((all_preds == 2).sum())
|
| 184 |
+
pnl = float(np.sum(all_rets[all_preds == 1]) - np.sum(all_rets[all_preds == 2]))
|
| 185 |
+
win_rate = float(np.sum((all_preds == 1) & (all_true == 1)) / (buys + 1e-6))
|
| 186 |
+
|
| 187 |
+
print(f"\n--- Epoch {epoch+1} Statistics ---", flush=True)
|
| 188 |
+
print(f"Val Loss: {avg_val_loss:.4f} | Total PnL: {pnl:+.4f} | Win Rate: {win_rate:.1%}", flush=True)
|
| 189 |
+
print(f"Signals: {buys} BUY | {sells} SELL | Activity: {(buys+sells)/len(all_preds):.1%}", flush=True)
|
| 190 |
+
|
| 191 |
+
if buys + sells > 0:
|
| 192 |
+
cm = confusion_matrix(all_true, all_preds, labels=[0, 1, 2])
|
| 193 |
+
print(f"Confusion Matrix (HOLD/BUY/SELL):\n{cm}", flush=True)
|
| 194 |
|
| 195 |
if avg_val_loss < best_val_loss:
|
| 196 |
best_val_loss = avg_val_loss
|
| 197 |
os.makedirs("models", exist_ok=True)
|
| 198 |
+
save_file(model.state_dict(), "models/ai_fusion_bitnet.safetensors")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
|
| 200 |
+
print("✅ Training complete. Final model saved.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
|
| 202 |
+
# Upload to HF
|
| 203 |
if HF_REPO_ID and HF_TOKEN:
|
| 204 |
try:
|
| 205 |
+
print(f"📤 Uploading to HF: {HF_REPO_ID}...", flush=True)
|
| 206 |
api = HfApi()
|
|
|
|
| 207 |
create_repo(repo_id=HF_REPO_ID, token=HF_TOKEN, exist_ok=True, repo_type="model")
|
|
|
|
| 208 |
api.upload_file(
|
| 209 |
path_or_fileobj="models/ai_fusion_bitnet.safetensors",
|
| 210 |
path_in_repo="ai_fusion_bitnet.safetensors",
|
| 211 |
repo_id=HF_REPO_ID,
|
| 212 |
token=HF_TOKEN
|
| 213 |
)
|
| 214 |
+
print("✅ Upload successful!", flush=True)
|
| 215 |
except Exception as e:
|
| 216 |
+
print(f"⚠️ Upload failed: {e}", flush=True)
|
| 217 |
|
| 218 |
if __name__ == "__main__":
|
| 219 |
train()
|