Spaces:

luohoa97
/

BitFinTrainer

Runtime error

App Files Files Community

luohoa97 commited on 15 days ago

Commit

34e94cf

verified ·

1 Parent(s): 68e57b2

Deploy BitNet-Transformer Trainer

Browse files

Files changed (2) hide show

scripts/generate_ai_dataset.py +61 -63
scripts/train_ai_model.py +59 -128

scripts/generate_ai_dataset.py CHANGED Viewed

@@ -2,6 +2,7 @@
 """
 Generate training dataset for AI Fusion strategy.
 Fetches historical OHLCV, computes technical features, and labels data.
 """
 import sys
@@ -10,7 +11,7 @@ import pandas as pd
 import numpy as np
 import logging
 import torch
-from datetime import datetime, timedelta
 # Add project root to path
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
@@ -35,18 +36,17 @@ SYMBOLS = [
     "JNJ", "PFE", "UNH", "ABBV", "MRK", "LLY", "TMO", "DHR", "ISRG", "GILD",
     "WMT", "COST", "HD", "LOW", "TGT", "PG", "KO", "PEP", "PM", "MO",
     "CAT", "DE", "HON", "GE", "MMM", "UPS", "FDX", "RTX", "LMT", "GD",
-    "BTC-USD", "ETH-USD", "GC=F", "CL=F" # Crypto and Commodities for diversity
 ]
 DAYS = 3652 # 10 years
 LOOKAHEAD = 5 # Prediction window (days)
 TARGET_PCT = 0.02 # Profit target (2%)
 STOP_PCT = 0.015 # Stop loss (1.5%)
 def generate_features(df):
     """Compute technical indicators for the feature vector."""
     close = df["close" if "close" in df.columns else "Close"]
-    high = df["high" if "high" in df.columns else "High"]
-    low = df["low" if "low" in df.columns else "Low"]
     # 1. RSI(2) - Very short period
     rsi2 = calculate_rsi(close, 2) / 100.0
@@ -69,10 +69,10 @@ def generate_features(df):
     atr = calculate_atr(df, 14)
     atr_pct = atr / close
-    # 6. Volume spike (Ratio to SMA 20)
     vol = df["volume" if "volume" in df.columns else "Volume"]
     vol_sma = vol.rolling(20).mean()
-    vol_ratio = (vol / vol_sma).clip(0, 5) / 5.0 # Normalized 0-1
     features = pd.DataFrame({
         "rsi2": rsi2,
@@ -85,23 +85,18 @@ def generate_features(df):
         "vol_ratio": vol_ratio,
     }, index=df.index)
-    # Ensure all columns are 1D (should be Series already after flatten in market.py)
-    for col in features.columns:
-        if isinstance(features[col], pd.DataFrame):
-            features[col] = features[col].squeeze()
-    return features
 def generate_labels(df):
-    """Label data using Triple Barrier: 1=Buy, 2=Sell, 0=Hold."""
     close = df["close" if "close" in df.columns else "Close"].values
     labels = np.zeros(len(close))
     for i in range(len(close) - LOOKAHEAD):
         current_price = close[i]
         future_prices = close[i+1 : i+LOOKAHEAD+1]
-        # Look ahead for profit target or stop loss
         max_ret = (np.max(future_prices) - current_price) / current_price
         min_ret = (np.min(future_prices) - current_price) / current_price
@@ -112,69 +107,72 @@ def generate_labels(df):
         else:
             labels[i] = 0 # HOLD
-    return labels
-SEQ_LEN = 30 # One month of trading days
 def build_dataset(symbols=SYMBOLS, days=DAYS, output_path="data/trading_dataset.pt"):
-    """
-    Programmatically build the sequence dataset.
-    Used by local scripts and the Hugging Face Cloud trainer.
-    """
-    all_features = []
-    all_labels = []
-    for symbol in symbols:
-        logger.info("Fetching data for %s", symbol)
-        df = fetch_ohlcv_yfinance(symbol, days=days)
-        total_days = len(df)
-        if df.empty or total_days < (days // 2): # Ensure we have enough data
-            logger.warning("Skipping %s: Insufficient history (%d < %d)", symbol, total_days, days // 2)
-            continue
-        features = generate_features(df)
-        labels = generate_labels(df)
-        # Sentiment simulation
-        sentiment = np.random.normal(0, 0.2, len(features))
-        features["sentiment"] = sentiment
-        # Combine and drop NaN
-        features["label"] = labels
-        features = features.dropna()
-        if len(features) < (SEQ_LEN + 100):
-            logger.warning("Skipping %s: Too few valid samples after dropna (%d < %d)", symbol, len(features), SEQ_LEN + 100)
-            continue
-        # Create sequences
-        feat_vals = features.drop(columns=["label"]).values
-        label_vals = features["label"].values
-        symbol_features = []
-        symbol_labels = []
-        for i in range(len(feat_vals) - SEQ_LEN):
-            # Window of features: [i : i + SEQ_LEN]
-            # Label is for the LAST day in the window
-            symbol_features.append(feat_vals[i : i+SEQ_LEN])
-            symbol_labels.append(label_vals[i+SEQ_LEN-1])
-        all_features.append(np.array(symbol_features))
-        all_labels.append(np.array(symbol_labels))
-    X = np.concatenate(all_features, axis=0)
-    y = np.concatenate(all_labels, axis=0)
-    # Save as PyTorch dataset
     data = {
         "X": torch.tensor(X, dtype=torch.float32),
-        "y": torch.tensor(y, dtype=torch.long)
     }
     os.makedirs(os.path.dirname(output_path), exist_ok=True)
     torch.save(data, output_path)
-    logger.info("Sequence dataset saved to %s. Shape: %s", output_path, X.shape)
     return data
 if __name__ == "__main__":

 """
 Generate training dataset for AI Fusion strategy.
 Fetches historical OHLCV, computes technical features, and labels data.
+Includes future returns for Profit/Loss backtesting.
 """
 import sys
 import numpy as np
 import logging
 import torch
+from tqdm.auto import tqdm
 # Add project root to path
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
     "JNJ", "PFE", "UNH", "ABBV", "MRK", "LLY", "TMO", "DHR", "ISRG", "GILD",
     "WMT", "COST", "HD", "LOW", "TGT", "PG", "KO", "PEP", "PM", "MO",
     "CAT", "DE", "HON", "GE", "MMM", "UPS", "FDX", "RTX", "LMT", "GD",
+    "BTC-USD", "ETH-USD", "GC=F", "CL=F"
 ]
 DAYS = 3652 # 10 years
 LOOKAHEAD = 5 # Prediction window (days)
 TARGET_PCT = 0.02 # Profit target (2%)
 STOP_PCT = 0.015 # Stop loss (1.5%)
+SEQ_LEN = 30 # One month of trading days
 def generate_features(df):
     """Compute technical indicators for the feature vector."""
     close = df["close" if "close" in df.columns else "Close"]
     # 1. RSI(2) - Very short period
     rsi2 = calculate_rsi(close, 2) / 100.0
     atr = calculate_atr(df, 14)
     atr_pct = atr / close
+    # 6. Volume spike
     vol = df["volume" if "volume" in df.columns else "Volume"]
     vol_sma = vol.rolling(20).mean()
+    vol_ratio = (vol / vol_sma).clip(0, 5) / 5.0
     features = pd.DataFrame({
         "rsi2": rsi2,
         "vol_ratio": vol_ratio,
     }, index=df.index)
+    return features.dropna()
 def generate_labels(df):
+    """Label data using Triple Barrier and calculate future returns."""
     close = df["close" if "close" in df.columns else "Close"].values
     labels = np.zeros(len(close))
+    future_rets = np.zeros(len(close))
     for i in range(len(close) - LOOKAHEAD):
         current_price = close[i]
         future_prices = close[i+1 : i+LOOKAHEAD+1]
         max_ret = (np.max(future_prices) - current_price) / current_price
         min_ret = (np.min(future_prices) - current_price) / current_price
         else:
             labels[i] = 0 # HOLD
+        future_rets[i] = (close[i + LOOKAHEAD] - current_price) / current_price
+    return labels, future_rets
 def build_dataset(symbols=SYMBOLS, days=DAYS, output_path="data/trading_dataset.pt"):
+    """Fetch, label, and sequence data for all symbols."""
+    all_X, all_y, all_rets = [], [], []
+    for symbol in tqdm(symbols, desc="Building Global Dataset"):
+        try:
+            df = fetch_ohlcv_yfinance(symbol, days=days)
+            if len(df) < (SEQ_LEN + LOOKAHEAD + 50):
+                continue
+            features = generate_features(df)
+            labels, rets = generate_labels(df)
+            # Align features with labels/rets and add sentiment
+            df_aligned = pd.DataFrame(index=df.index)
+            df_aligned["label"] = labels
+            df_aligned["future_ret"] = rets
+            df_aligned["sentiment"] = np.random.normal(0, 0.2, len(df))
+            # Merge features
+            df_combined = features.join(df_aligned, how="inner").dropna()
+            if len(df_combined) < SEQ_LEN:
+                continue
+            feat_vals = df_combined.drop(columns=["label", "future_ret"]).values
+            label_vals = df_combined["label"].values.astype(int)
+            ret_vals = df_combined["future_ret"].values
+            symbol_X, symbol_y, symbol_rets = [], [], []
+            for i in range(len(feat_vals) - SEQ_LEN):
+                symbol_X.append(feat_vals[i : i+SEQ_LEN])
+                # Label/Ret is for the prediction point at the END of the sequence
+                symbol_y.append(label_vals[i+SEQ_LEN-1])
+                symbol_rets.append(ret_vals[i+SEQ_LEN-1])
+            if symbol_X:
+                all_X.append(np.array(symbol_X))
+                all_y.append(np.array(symbol_y))
+                all_rets.append(np.array(symbol_rets))
+        except Exception as e:
+            logger.error(f"Error processing {symbol}: {e}")
+    if not all_X:
+        logger.error("No valid data collected!")
+        return None
+    X = np.concatenate(all_X, axis=0)
+    y = np.concatenate(all_y, axis=0)
+    rets = np.concatenate(all_rets, axis=0)
     data = {
         "X": torch.tensor(X, dtype=torch.float32),
+        "y": torch.tensor(y, dtype=torch.long),
+        "rets": torch.tensor(rets, dtype=torch.float32),
+        "symbols": symbols
     }
     os.makedirs(os.path.dirname(output_path), exist_ok=True)
     torch.save(data, output_path)
+    logger.info(f"✅ Dataset saved to {output_path} | Shape: {X.shape}")
     return data
 if __name__ == "__main__":

scripts/train_ai_model.py CHANGED Viewed

@@ -2,6 +2,7 @@
 """
 Train the BitNet AI Fusion model.
 Uses ternary weights (-1, 0, 1) and 8-bit activations.
 """
 import sys
@@ -10,8 +11,7 @@ import torch
 import torch.nn as nn
 import torch.optim as optim
 from torch.utils.data import DataLoader, TensorDataset, random_split
-from tqdm import tqdm
-from tqdm import tqdm
 import logging
 from safetensors.torch import save_file, load_file
 from huggingface_hub import HfApi, create_repo, hf_hub_download
@@ -22,7 +22,7 @@ from sklearn.metrics import classification_report, confusion_matrix
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 from trading_cli.strategy.ai.model import create_model
-from scripts.generate_ai_dataset import build_dataset
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -30,14 +30,13 @@ logger = logging.getLogger(__name__)
 # Hyperparameters
 EPOCHS = 100
-BATCH_SIZE = 8192 # Aggressive batch size to saturate T4 GPU (16GB)
 LR = 0.0003
 HIDDEN_DIM = 512
 LAYERS = 8
-SEQ_LEN = 30
-# Hugging Face Settings (Optional)
-HF_REPO_ID = os.getenv("HF_REPO_ID", "luohoa97/BitFin") # User's model repo
 HF_DATASET_ID = "luohoa97/BitFin" # User's dataset repo
 HF_TOKEN = os.getenv("HF_TOKEN")
@@ -50,17 +49,14 @@ def get_max_batch_size(model, input_dim, seq_len, device, start_batch=128):
     batch_size = start_batch
     last_success = batch_size
-    # Progress bar for the search
     pbar = tqdm(total=16384, desc="Hardware Probe", unit="batch")
     pbar.update(batch_size)
     try:
         while batch_size <= 16384: # Ceiling
-            # Mock data for testing
             mock_X = torch.randn(batch_size, seq_len, input_dim).to(device)
             mock_y = torch.randint(0, 3, (batch_size,)).to(device)
-            # Simulated forward/backward pass
             outputs = model(mock_X)
             loss = nn.CrossEntropyLoss()(outputs, mock_y)
             loss.backward()
@@ -84,42 +80,26 @@ def get_max_batch_size(model, input_dim, seq_len, device, start_batch=128):
 def train():
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    logger.info(f"Using device: {device}")
-    if device.type == "cpu":
-        logger.warning("⚠️  WARNING: CUDA is NOT available. Training on CPU will be EXTREMELY slow.")
-        logger.warning("👉 In Google Colab, go to 'Runtime' > 'Change runtime type' and select 'T4 GPU'.")
-    # Modern torch.amp API
-    use_bf16 = torch.cuda.is_available() and torch.cuda.is_bf16_supported()
-    device_type = 'cuda' if torch.cuda.is_available() else 'cpu'
-    dtype = torch.bfloat16 if use_bf16 else torch.float16
-    # Scaler only needed for FP16 on CUDA
-    scaler = torch.amp.GradScaler('cuda', enabled=(device.type == 'cuda' and not use_bf16))
-    # 1. Load Dataset
     if not os.path.exists("data/trading_dataset.pt"):
-        logger.info("Dataset not found locally. Searching on HF Hub...")
-        if HF_DATASET_ID:
-            try:
-                hf_hub_download(repo_id=HF_DATASET_ID, filename="trading_dataset.pt", repo_type="dataset", local_dir="data")
-            except Exception as e:
-                logger.warning(f"Could not download dataset from HF: {e}. Falling back to generation.")
-        # If still not found, generate it!
-        if not os.path.exists("data/trading_dataset.pt"):
-            logger.info("🚀 Starting on-the-fly dataset generation (10 years, 70 symbols)...")
             build_dataset()
     print("🚀 Loading dataset from data/trading_dataset.pt...", flush=True)
     data = torch.load("data/trading_dataset.pt")
-    X, y = data["X"], data["y"]
-    dataset = TensorDataset(X, y)
     train_size = int(0.8 * len(dataset))
     val_size = len(dataset) - train_size
-    train_ds, val_ds = random_split(dataset, [train_size, val_size])
     # 3. Create Model
     input_dim = X.shape[2]
@@ -132,17 +112,16 @@ def train():
     train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=2)
     val_loader = DataLoader(val_ds, batch_size=batch_size, pin_memory=True, num_workers=2)
-    total_params = sum(p.numel() for p in model.parameters())
-    logger.info(f"Model Architecture: BitNet-Transformer ({LAYERS} layers, {HIDDEN_DIM} hidden)")
-    logger.info(f"Total Parameters: {total_params:,}")
-    # Use standard CrossEntropy for classification [HOLD, BUY, SELL]
     criterion = nn.CrossEntropyLoss()
-    optimizer = optim.AdamW(model.parameters(), lr=LR, weight_decay=1e-4)
-    logger.info("Starting training on %d samples (%d features)...", len(X), input_dim)
-    # 5. Start Training
-    print(f"🚀 Starting training loop (Batch Size: {batch_size})...", flush=True)
     best_val_loss = float('inf')
     for epoch in range(EPOCHS):
@@ -152,24 +131,20 @@ def train():
         total = 0
         pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}")
-        for batch_X, batch_y in pbar:
             batch_X, batch_y = batch_X.to(device), batch_y.to(device)
             optimizer.zero_grad()
-            # Using Mixed Precision (AMP)
             with torch.amp.autocast(device_type=device_type, dtype=dtype, enabled=(device.type == 'cuda')):
                 outputs = model(batch_X)
                 loss = criterion(outputs, batch_y)
             if not use_bf16:
                 scaler.scale(loss).backward()
-                scaler.unscale_(optimizer)
-                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                 scaler.step(optimizer)
                 scaler.update()
             else:
                 loss.backward()
-                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                 optimizer.step()
             train_loss += loss.item()
@@ -177,112 +152,68 @@ def train():
             total += batch_y.size(0)
             correct += predicted.eq(batch_y).sum().item()
-            # Update progress bar
-            pbar.set_postfix({
-                "loss": f"{loss.item():.4f}",
-                "acc": f"{100.*correct/total:.1f}%"
-            })
-        # Validation
         model.eval()
         val_loss = 0
-        val_correct = 0
-        val_total = 0
         with torch.no_grad():
-            for batch_X, batch_y in val_loader:
                 batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                 with torch.amp.autocast(device_type=device_type, dtype=dtype, enabled=(device.type == 'cuda')):
                     outputs = model(batch_X)
                     loss = criterion(outputs, batch_y)
                 val_loss += loss.item()
-                _, predicted = outputs.max(1)
-                val_total += batch_y.size(0)
-                val_correct += predicted.eq(batch_y).sum().item()
-        avg_train_loss = train_loss / len(train_loader)
         avg_val_loss = val_loss / len(val_loader)
-        train_acc = 100. * correct / total
-        val_acc = 100. * val_correct / val_total
-        if (epoch + 1) % 5 == 0 or epoch == 0:
-            logger.info(f"Epoch {epoch+1}/{EPOCHS} | Train Loss: {avg_train_loss:.4f} Acc: {train_acc:.1f}% | Val Loss: {avg_val_loss:.4f} Acc: {val_acc:.1f}%")
         if avg_val_loss < best_val_loss:
             best_val_loss = avg_val_loss
             os.makedirs("models", exist_ok=True)
-            model_path = "models/ai_fusion_bitnet.safetensors"
-            save_file(model.state_dict(), model_path)
-            logger.info(f"Model saved to {model_path}")
-    logger.info("Training complete.")
-    # 6. Final Evaluation & Report
-    model.load_state_dict(load_file("models/ai_fusion_bitnet.safetensors"))
-    model.eval()
-    all_preds = []
-    all_true = []
-    with torch.no_grad():
-        for xb, yb in val_loader:
-            xb, yb = xb.to(device), yb.to(device)
-            outputs = model(xb)
-            preds = torch.argmax(outputs, dim=-1)
-            all_preds.extend(preds.cpu().numpy())
-            all_true.extend(yb.cpu().numpy())
-    target_names = ["HOLD", "BUY", "SELL"]
-    report = classification_report(all_true, all_preds, target_names=target_names)
-    # Advanced Metrics (Backtest Simulation)
-    buys = (np.array(all_preds) == 1).sum()
-    sells = (np.array(all_preds) == 2).sum()
-    total = len(all_preds)
-    win_count = ((np.array(all_preds) == 1) & (np.array(all_true) == 1)).sum()
-    win_rate = win_count / (buys + 1e-6)
-    perf_summary = f"""
-=== AI Fusion Model Performance Report ===
-{report}
-Trading Profile:
-- Total Validation Samples: {total:,}
-- Signal Frequency: {(buys+sells)/total:.2%}
-- BUY Signals: {buys}
-- SELL Signals: {sells}
-- Win Rate (Direct match): {win_rate:.2%}
-- Estimated Sharpe Ratio (Simulated): {(win_rate - 0.4) * 5:.2f}
-- Portfolio Impact: Scalable
-"""
-    logger.info(perf_summary)
-    cm = confusion_matrix(all_true, all_preds)
-    logger.info(f"Confusion Matrix:\n{cm}")
-    # Save report to file
-    os.makedirs("data", exist_ok=True)
-    with open("data/performance_report.txt", "w") as f:
-        f.write(perf_summary)
-        f.write("\nConfusion Matrix:\n")
-        f.write(str(cm))
-    # Optional: Upload to Hugging Face
     if HF_REPO_ID and HF_TOKEN:
         try:
-            logger.info(f"Uploading model to Hugging Face Hub: {HF_REPO_ID}...")
             api = HfApi()
-            # Ensure repo exists
             create_repo(repo_id=HF_REPO_ID, token=HF_TOKEN, exist_ok=True, repo_type="model")
-            # Upload
             api.upload_file(
                 path_or_fileobj="models/ai_fusion_bitnet.safetensors",
                 path_in_repo="ai_fusion_bitnet.safetensors",
                 repo_id=HF_REPO_ID,
                 token=HF_TOKEN
             )
-            logger.info("Upload successful! ✓")
         except Exception as e:
-            logger.error(f"Failed to upload to Hugging Face: {e}")
 if __name__ == "__main__":
     train()

 """
 Train the BitNet AI Fusion model.
 Uses ternary weights (-1, 0, 1) and 8-bit activations.
+Now includes real-time PnL backtesting and Confusion Matrix logging.
 """
 import sys
 import torch.nn as nn
 import torch.optim as optim
 from torch.utils.data import DataLoader, TensorDataset, random_split
+from tqdm.auto import tqdm
 import logging
 from safetensors.torch import save_file, load_file
 from huggingface_hub import HfApi, create_repo, hf_hub_download
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 from trading_cli.strategy.ai.model import create_model
+from scripts.generate_ai_dataset import build_dataset, SEQ_LEN, LOOKAHEAD
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 # Hyperparameters
 EPOCHS = 100
+BATCH_SIZE = 4096 # Starting point for dynamic search
 LR = 0.0003
 HIDDEN_DIM = 512
 LAYERS = 8
+# HF Configuration
+HF_REPO_ID = os.getenv("HF_REPO_ID") # e.g. "username/BitFin"
 HF_DATASET_ID = "luohoa97/BitFin" # User's dataset repo
 HF_TOKEN = os.getenv("HF_TOKEN")
     batch_size = start_batch
     last_success = batch_size
     pbar = tqdm(total=16384, desc="Hardware Probe", unit="batch")
     pbar.update(batch_size)
     try:
         while batch_size <= 16384: # Ceiling
             mock_X = torch.randn(batch_size, seq_len, input_dim).to(device)
             mock_y = torch.randint(0, 3, (batch_size,)).to(device)
             outputs = model(mock_X)
             loss = nn.CrossEntropyLoss()(outputs, mock_y)
             loss.backward()
 def train():
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"Using device: {device}", flush=True)
+    # 1. Load or Generate Dataset
     if not os.path.exists("data/trading_dataset.pt"):
+        try:
+            print("📦 Fetching dataset from Hugging Face...", flush=True)
+            hf_hub_download(repo_id=HF_DATASET_ID, filename="trading_dataset.pt", local_dir="data", repo_type="dataset")
+        except Exception:
+            print("🚀 Starting on-the-fly dataset generation...", flush=True)
             build_dataset()
     print("🚀 Loading dataset from data/trading_dataset.pt...", flush=True)
     data = torch.load("data/trading_dataset.pt")
+    X, y, rets = data["X"], data["y"], data["rets"]
+    # 2. Split Data
+    dataset = TensorDataset(X, y, rets)
     train_size = int(0.8 * len(dataset))
     val_size = len(dataset) - train_size
+    train_ds, val_ds = random_split(dataset, [train_size, val_size], generator=torch.Generator().manual_seed(42))
     # 3. Create Model
     input_dim = X.shape[2]
     train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=2)
     val_loader = DataLoader(val_ds, batch_size=batch_size, pin_memory=True, num_workers=2)
+    optimizer = optim.AdamW(model.parameters(), lr=LR)
     criterion = nn.CrossEntropyLoss()
+    # Mixed Precision Setup
+    dtype = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else torch.float16
+    device_type = 'cuda' if torch.cuda.is_available() else 'cpu'
+    use_bf16 = (dtype == torch.bfloat16)
+    scaler = torch.amp.GradScaler(device_type, enabled=(not use_bf16 and device.type == 'cuda'))
+    print(f"🚀 Starting training (Batch Size: {batch_size}, Precision: {dtype})...", flush=True)
     best_val_loss = float('inf')
     for epoch in range(EPOCHS):
         total = 0
         pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}")
+        for batch_X, batch_y, _ in pbar:
             batch_X, batch_y = batch_X.to(device), batch_y.to(device)
             optimizer.zero_grad()
             with torch.amp.autocast(device_type=device_type, dtype=dtype, enabled=(device.type == 'cuda')):
                 outputs = model(batch_X)
                 loss = criterion(outputs, batch_y)
             if not use_bf16:
                 scaler.scale(loss).backward()
                 scaler.step(optimizer)
                 scaler.update()
             else:
                 loss.backward()
                 optimizer.step()
             train_loss += loss.item()
             total += batch_y.size(0)
             correct += predicted.eq(batch_y).sum().item()
+            pbar.set_postfix({"loss": f"{loss.item():.4f}", "acc": f"{100.*correct/total:.1f}%"})
+        # Validation & Backtest
         model.eval()
         val_loss = 0
+        all_preds, all_true, all_rets = [], [], []
         with torch.no_grad():
+            for batch_X, batch_y, batch_r in val_loader:
                 batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                 with torch.amp.autocast(device_type=device_type, dtype=dtype, enabled=(device.type == 'cuda')):
                     outputs = model(batch_X)
                     loss = criterion(outputs, batch_y)
                 val_loss += loss.item()
+                preds = torch.argmax(outputs, dim=-1)
+                all_preds.extend(preds.cpu().numpy())
+                all_true.extend(batch_y.cpu().numpy())
+                all_rets.extend(batch_r.numpy())
         avg_val_loss = val_loss / len(val_loader)
+        # Calculate Backtest Metrics
+        all_preds = np.array(all_preds)
+        all_true = np.array(all_true)
+        all_rets = np.array(all_rets)
+        buys = int((all_preds == 1).sum())
+        sells = int((all_preds == 2).sum())
+        pnl = float(np.sum(all_rets[all_preds == 1]) - np.sum(all_rets[all_preds == 2]))
+        win_rate = float(np.sum((all_preds == 1) & (all_true == 1)) / (buys + 1e-6))
+        print(f"\n--- Epoch {epoch+1} Statistics ---", flush=True)
+        print(f"Val Loss: {avg_val_loss:.4f} | Total PnL: {pnl:+.4f} | Win Rate: {win_rate:.1%}", flush=True)
+        print(f"Signals: {buys} BUY | {sells} SELL | Activity: {(buys+sells)/len(all_preds):.1%}", flush=True)
+        if buys + sells > 0:
+            cm = confusion_matrix(all_true, all_preds, labels=[0, 1, 2])
+            print(f"Confusion Matrix (HOLD/BUY/SELL):\n{cm}", flush=True)
         if avg_val_loss < best_val_loss:
             best_val_loss = avg_val_loss
             os.makedirs("models", exist_ok=True)
+            save_file(model.state_dict(), "models/ai_fusion_bitnet.safetensors")
+    print("✅ Training complete. Final model saved.")
+    # Upload to HF
     if HF_REPO_ID and HF_TOKEN:
         try:
+            print(f"📤 Uploading to HF: {HF_REPO_ID}...", flush=True)
             api = HfApi()
             create_repo(repo_id=HF_REPO_ID, token=HF_TOKEN, exist_ok=True, repo_type="model")
             api.upload_file(
                 path_or_fileobj="models/ai_fusion_bitnet.safetensors",
                 path_in_repo="ai_fusion_bitnet.safetensors",
                 repo_id=HF_REPO_ID,
                 token=HF_TOKEN
             )
+            print("✅ Upload successful!", flush=True)
         except Exception as e:
+            print(f"⚠️ Upload failed: {e}", flush=True)
 if __name__ == "__main__":
     train()