Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

README.md +93 -19
config.json +25 -8
data_processor.py +301 -0
model.pt +1 -1
model.py +311 -0

README.md CHANGED Viewed

@@ -1,31 +1,105 @@
 # LOBPatternNet - 主力下单模式识别模型
-## 模型简介
-基于Level-2十档委托单数据的主力（机构）交易模式识别深度学习模型。
-## 模型架构
-- **输入**: 100个连续的Level-2委托快照，每个快照包含40个特征（10档买/卖价格+数量）
-- **特征工程**: 订单流不平衡(OFI)、价差动态、深度不平衡、大单集中度、价格压力、OFI波动率
-- **编码器**: CNN空间编码器 + Inception多尺度时间特征 + Transformer注意力机制
-- **输出**: 3分类（主力买入 / 中性 / 主力卖出）
-## 性能指标
-- Test Accuracy: 0.8752
-- Test F1 (weighted): 0.8169
-## 使用方法
 ```python
-from model import LOBPatternNet
 import torch
-model = LOBPatternNet(seq_len=100, num_classes=3)
-model.load_state_dict(torch.load("model.pt"))
 model.eval()
-# Input: (batch, 100, 40) LOB snapshots
-# Output: (batch, 3) logits [主力买入, 中性, 主力卖出]
 ```
-## 引用
-- DeepLOB: Zhang et al., TNNLS 2019
-- TLOB: Berti & Kasneci, arxiv:2502.15757

 # LOBPatternNet - 主力下单模式识别模型
+# LOBPatternNet - Institutional Trading Pattern Detection from Level-2 Order Book
+## 模型简介 / Model Overview
+本模型基于A股Level-2十档委托单数据,利用深度学习自动识别主力（机构投资者）的下单模式。
+通过分析买卖委托的价格分布、挂单量、订单流不平衡(OFI)等微观结构特征,判断当前是否存在主力买入或卖出行为。
+This model detects institutional (主力) trading patterns from Level-2 order book data with 10 price levels.
+It analyzes bid/ask price distributions, order sizes, Order Flow Imbalance (OFI), and other microstructure
+features to classify market states into institutional buying, neutral, or institutional selling.
+## 架构 / Architecture
+```
+Input: (batch, 100, 40) - 100 consecutive LOB snapshots × 40 features
+    ↓
+BilinearNorm - 自适应归一化层
+    ↓
+Spatial CNN (Conv2d) - 提取价位间空间特征 (cross-level patterns)
+    ↓
+Inception Module × 2 - 多尺度时间特征提取 (multi-scale temporal)
+    ↓
+Transformer Attention × 2 - 时序依赖建模 (temporal dependencies)
+    ↓
+Fusion with Auxiliary Features:
+    - 订单流不平衡 (OFI)
+    - 价差动态 (Spread dynamics)
+    - 深度不平衡 (Depth imbalance)
+    - 大单集中度 (Volume concentration)
+    - 价格压力 (Price pressure)
+    - OFI波动率 (OFI volatility)
+    ↓
+3-class Classification Head
+```
+**Total Parameters**: 259,899
+## 输出类别 / Output Classes
+| Label | 中文 | English | Description |
+|-------|------|---------|-------------|
+| 0 | 主力买入 | Institutional Buying | 检测到机构大量买入信号 |
+| 1 | 中性/散户 | Neutral/Retail | 无明显主力操盘迹象 |
+| 2 | 主力卖出 | Institutional Selling | 检测到机构大量卖出信号 |
+## 性能指标 / Performance
+| Metric | Value |
+|--------|-------|
+| Test Accuracy | 0.4777 |
+| Test F1 (Macro) | 0.4127 |
+| Test F1 (Weighted) | 0.5091 |
+| 主力买入 Precision | 0.2369 |
+| 主力买入 Recall | 0.4251 |
+| 主力卖出 Precision | 0.2679 |
+| 主力卖出 Recall | 0.4983 |
+## 使用方法 / Usage
 ```python
 import torch
+from model import LOBPatternNet
+# Load model
+model = LOBPatternNet(seq_len=100, num_classes=3, d_model=128, nhead=4, num_attn_layers=2)
+model.load_state_dict(torch.load("model.pt", weights_only=True))
 model.eval()
+# Input: 100 consecutive Level-2 snapshots
+# Each snapshot: [ask_p1, ask_s1, bid_p1, bid_s1, ask_p2, ask_s2, ..., bid_p10, bid_s10]
+# Features should be z-score normalized (see data_processor.py)
+x = torch.randn(1, 100, 40)  # example input
+with torch.no_grad():
+    logits = model(x)
+    probs = torch.softmax(logits, dim=1)
+    pred = logits.argmax(dim=1)
+# pred: 0=主力买入, 1=中性, 2=主力卖出
+labels = ["主力买入", "中性/散户", "主力卖出"]
+print(f"Prediction: {labels[pred.item()]}")
+print(f"Confidence: {probs[0, pred.item()]:.2%}")
 ```
+## 数据格式 / Input Format
+每个Level-2快照包含40个特征 (10档 × 4个字段):
+| Feature | Description | 说明 |
+|---------|-------------|------|
+| ask_price_i | Ask price at level i | 第i档卖出价 |
+| ask_size_i | Ask volume at level i | 第i档卖出量 |
+| bid_price_i | Bid price at level i | 第i档买入价 |
+| bid_size_i | Bid volume at level i | 第i档买入量 |
+## 参考文献 / References
+- **DeepLOB**: Zhang et al., "Deep Convolutional Neural Networks for Limit Order Books", TNNLS 2019 (arxiv:1808.03668)
+- **TLOB**: Berti & Kasneci, "TLOB: A Novel Transformer Model with Dual Attention for Stock Price Trend Prediction", 2025 (arxiv:2502.15757)
+- **Training Dataset**: [LeonardoBerti/TRADES-LOB](https://huggingface.co/datasets/LeonardoBerti/TRADES-LOB)
+## 声明 / Disclaimer
+本模型仅供研究学习使用,不构成任何投资建议。股市有风险,入市需谨慎。
+This model is for research purposes only and does not constitute investment advice.

config.json CHANGED Viewed

@@ -1,19 +1,36 @@
 {
   "model_type": "LOBPatternNet",
   "num_levels": 10,
   "seq_len": 100,
   "num_classes": 3,
   "d_model": 128,
   "nhead": 4,
   "num_attn_layers": 2,
-  "dropout": 0.3,
   "class_names": [
-    "\u4e3b\u529b\u4e70\u5165 (Institutional Buy)",
-    "\u4e2d\u6027 (Neutral)",
-    "\u4e3b\u529b\u5356\u51fa (Institutional Sell)"
   ],
-  "description": "Institutional trading pattern detection from Level-2 order book data",
-  "architecture": "CNN + Inception + Transformer Attention",
-  "test_f1": 0.8168909018517501,
-  "test_acc": 0.8751566808723991
 }

 {
   "model_type": "LOBPatternNet",
+  "architecture": "CNN + Inception + Transformer Attention + Auxiliary Features",
   "num_levels": 10,
   "seq_len": 100,
   "num_classes": 3,
   "d_model": 128,
   "nhead": 4,
   "num_attn_layers": 2,
+  "dropout": 0.2,
   "class_names": [
+    "主力买入 (Institutional Buy)",
+    "中性 (Neutral)",
+    "主力卖出 (Institutional Sell)"
   ],
+  "class_names_zh": [
+    "主力买入",
+    "中性/散户",
+    "主力卖出"
+  ],
+  "total_parameters": 259899,
+  "training_dataset": "LeonardoBerti/TRADES-LOB",
+  "test_accuracy": 0.47769423558897245,
+  "test_f1_macro": 0.4126581408122072,
+  "test_f1_weighted": 0.5091308416210424,
+  "test_precision_per_class": [
+    0.23689320388349513,
+    0.7402173913043478,
+    0.26785714285714285
+  ],
+  "test_recall_per_class": [
+    0.4250871080139373,
+    0.4840085287846482,
+    0.4983388704318937
+  ]
 }

data_processor.py ADDED Viewed

	@@ -0,0 +1,301 @@

+"""
+Data processing pipeline for LOBPatternNet v2.
+Fixed: proper normalization, balanced labeling, oversampling.
+"""
+import numpy as np
+import pandas as pd
+from datasets import load_dataset
+from sklearn.model_selection import train_test_split
+import torch
+from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
+import os
+def load_lob_data():
+    """Load TRADES-LOB dataset from HF Hub."""
+    ds = load_dataset("LeonardoBerti/TRADES-LOB", split="train")
+    df = ds.to_pandas()
+    print(f"Loaded dataset: {len(df)} rows")
+    return df
+def extract_and_normalize_features(df):
+    """
+    Extract and normalize LOB features properly.
+    Approach:
+    1. Separate price and size features
+    2. Prices: normalize relative to mid-price (basis points)
+    3. Sizes: log-transform then z-score
+    4. Replace invalid values with 0
+    5. Final z-score normalization per feature
+    Returns: (N, 40) normalized features
+    """
+    N = len(df)
+    # Collect raw features
+    ask_prices = np.zeros((N, 10), dtype=np.float64)
+    ask_sizes = np.zeros((N, 10), dtype=np.float64)
+    bid_prices = np.zeros((N, 10), dtype=np.float64)
+    bid_sizes = np.zeros((N, 10), dtype=np.float64)
+    for i in range(10):
+        ask_prices[:, i] = df[f'ask_price_{i+1}'].values.astype(np.float64)
+        ask_sizes[:, i] = df[f'ask_size_{i+1}'].values.astype(np.float64)
+        bid_prices[:, i] = df[f'bid_price_{i+1}'].values.astype(np.float64)
+        bid_sizes[:, i] = df[f'bid_size_{i+1}'].values.astype(np.float64)
+    # Mark sentinel/invalid values
+    SENTINEL = 1e9
+    ask_p_valid = np.abs(ask_prices) < SENTINEL
+    ask_s_valid = np.abs(ask_sizes) < SENTINEL
+    bid_p_valid = np.abs(bid_prices) < SENTINEL
+    bid_s_valid = np.abs(bid_sizes) < SENTINEL
+    n_invalid = (~ask_p_valid).sum() + (~bid_p_valid).sum() + (~ask_s_valid).sum() + (~bid_s_valid).sum()
+    print(f"Found {n_invalid} invalid/sentinel values")
+    # Compute mid-price from valid best bid/ask
+    best_ask = ask_prices[:, 0].copy()
+    best_bid = bid_prices[:, 0].copy()
+    both_valid = ask_p_valid[:, 0] & bid_p_valid[:, 0]
+    mid_price = np.where(both_valid, (best_ask + best_bid) / 2.0, 0.0)
+    # Forward-fill mid_price where it's 0
+    for i in range(1, N):
+        if mid_price[i] == 0 and mid_price[i-1] != 0:
+            mid_price[i] = mid_price[i-1]
+    # Normalize prices: (price - mid) / mid * 10000 = basis points
+    norm_ask_p = np.zeros_like(ask_prices)
+    norm_bid_p = np.zeros_like(bid_prices)
+    for i in range(10):
+        valid_a = ask_p_valid[:, i] & (mid_price > 0)
+        valid_b = bid_p_valid[:, i] & (mid_price > 0)
+        norm_ask_p[valid_a, i] = (ask_prices[valid_a, i] - mid_price[valid_a]) / mid_price[valid_a] * 10000
+        norm_bid_p[valid_b, i] = (bid_prices[valid_b, i] - mid_price[valid_b]) / mid_price[valid_b] * 10000
+    # Normalize sizes: log1p then z-score
+    norm_ask_s = np.zeros_like(ask_sizes)
+    norm_bid_s = np.zeros_like(bid_sizes)
+    for i in range(10):
+        valid_a = ask_s_valid[:, i] & (ask_sizes[:, i] > 0)
+        valid_b = bid_s_valid[:, i] & (bid_sizes[:, i] > 0)
+        norm_ask_s[valid_a, i] = np.log1p(ask_sizes[valid_a, i])
+        norm_bid_s[valid_b, i] = np.log1p(bid_sizes[valid_b, i])
+    # Assemble into (N, 40) array: [ask_p_1, ask_s_1, bid_p_1, bid_s_1, ...]
+    features = np.zeros((N, 40), dtype=np.float32)
+    for i in range(10):
+        features[:, i*4] = norm_ask_p[:, i]
+        features[:, i*4+1] = norm_ask_s[:, i]
+        features[:, i*4+2] = norm_bid_p[:, i]
+        features[:, i*4+3] = norm_bid_s[:, i]
+    # Final z-score normalization per feature (critical for model convergence)
+    means = features.mean(axis=0)
+    stds = features.std(axis=0)
+    stds[stds < 1e-8] = 1.0  # avoid division by 0
+    features = (features - means) / stds
+    # Replace any remaining NaN/inf
+    features = np.nan_to_num(features, nan=0.0, posinf=0.0, neginf=0.0)
+    print(f"Feature shape: {features.shape}")
+    print(f"Feature range: [{features.min():.4f}, {features.max():.4f}]")
+    print(f"Feature mean: {features.mean():.6f}, std: {features.std():.4f}")
+    return features, means, stds
+def rolling_sum(arr, window):
+    """Fully vectorized rolling sum using cumsum trick."""
+    cum = np.cumsum(arr)
+    result = np.zeros_like(cum)
+    result[window:] = cum[window:] - cum[:-window]
+    return result
+def construct_labels_vectorized(df, window=50, ofi_threshold=0.15, percentile=85):
+    """
+    Fully vectorized label construction for institutional trading detection.
+    Uses rolling windows and relaxed thresholds for better class balance.
+    """
+    N = len(df)
+    buy_sell = df['BUY_SELL_FLAG'].values.astype(np.float32)  # 1=buy, 0=sell
+    sizes = df['SIZE'].values.astype(np.float32)
+    types = df['TYPE'].values
+    print(f"Constructing labels from {N} events, window={window}...")
+    # Signed volume
+    signed_vol = sizes * (2 * buy_sell - 1)
+    # Rolling sums (vectorized)
+    roll_signed = rolling_sum(signed_vol, window)
+    roll_total = rolling_sum(sizes, window)
+    norm_ofi = roll_signed / (roll_total + 1e-8)
+    # Large orders
+    is_large = (sizes > np.percentile(sizes, percentile)).astype(np.float32)
+    roll_large_buy = rolling_sum(is_large * buy_sell, window)
+    roll_large_sell = rolling_sum(is_large * (1 - buy_sell), window)
+    # Cancellation rate
+    is_cancel = (types == 'ORDER_CANCELLED').astype(np.float32)
+    roll_cancel = rolling_sum(is_cancel, window) / window
+    # Combined scores
+    large_diff = (roll_large_buy - roll_large_sell) / (window * 0.1 + 1e-8)
+    buy_score = norm_ofi + 0.3 * large_diff + 0.2 * roll_cancel
+    sell_score = -norm_ofi - 0.3 * large_diff + 0.2 * roll_cancel
+    # Use percentile thresholds for ~15-20% per class
+    valid = np.arange(window, N)
+    buy_threshold = np.percentile(buy_score[valid], 80)
+    sell_threshold = np.percentile(sell_score[valid], 80)
+    print(f"Buy threshold (p80): {buy_threshold:.4f}, Sell threshold (p80): {sell_threshold:.4f}")
+    labels = np.ones(N, dtype=np.int64)
+    labels[(buy_score > buy_threshold) & (norm_ofi > ofi_threshold)] = 0
+    labels[(sell_score > sell_threshold) & (norm_ofi < -ofi_threshold)] = 2
+    unique, counts = np.unique(labels, return_counts=True)
+    label_names = {0: '主力买入', 1: '中性', 2: '主力卖出'}
+    print("Label distribution:")
+    for u, c in zip(unique, counts):
+        print(f"  {u} ({label_names.get(u, '?')}): {c} ({c/N*100:.1f}%)")
+    return labels
+def create_sequences(features, labels, seq_len=100, stride=20):
+    """Create sliding window sequences using stride_tricks for efficiency."""
+    N = len(features)
+    F = features.shape[1]
+    n_sequences = (N - seq_len) // stride
+    # Use list comprehension (more memory efficient than pre-allocating huge array)
+    starts = np.arange(0, N - seq_len, stride)
+    n_sequences = len(starts)
+    print(f"Creating {n_sequences} sequences of length {seq_len}, stride {stride}...")
+    X = np.zeros((n_sequences, seq_len, F), dtype=np.float32)
+    y = np.zeros(n_sequences, dtype=np.int64)
+    for idx, start in enumerate(starts):
+        X[idx] = features[start:start + seq_len]
+        y[idx] = labels[start + seq_len - 1]
+    print(f"Created {n_sequences} sequences, memory: {X.nbytes / 1e6:.1f} MB")
+    return X, y
+class LOBDataset(Dataset):
+    def __init__(self, X, y):
+        self.X = torch.from_numpy(X)
+        self.y = torch.from_numpy(y)
+    def __len__(self):
+        return len(self.X)
+    def __getitem__(self, idx):
+        return self.X[idx], self.y[idx]
+def get_weighted_sampler(y_train):
+    """Create WeightedRandomSampler to oversample minority classes."""
+    class_counts = np.bincount(y_train)
+    class_weights = 1.0 / class_counts
+    sample_weights = class_weights[y_train]
+    sampler = WeightedRandomSampler(
+        weights=torch.from_numpy(sample_weights).double(),
+        num_samples=len(y_train),
+        replacement=True
+    )
+    return sampler
+def prepare_data(seq_len=100, stride=5, window=50, ofi_threshold=0.2,
+                 percentile=90, test_size=0.15, val_size=0.15,
+                 random_state=42, batch_size=64):
+    """
+    Full data preparation pipeline.
+    Returns train, val, test DataLoaders with balanced sampling.
+    """
+    cache_path = f"/app/data_v2_{seq_len}_{stride}_{window}.npz"
+    if os.path.exists(cache_path):
+        print(f"Loading cached data from {cache_path}")
+        data = np.load(cache_path, allow_pickle=True)
+        X_train, y_train = data['X_train'], data['y_train']
+        X_val, y_val = data['X_val'], data['y_val']
+        X_test, y_test = data['X_test'], data['y_test']
+    else:
+        # Load raw data
+        df = load_lob_data()
+        # Extract and normalize features
+        features, means, stds = extract_and_normalize_features(df)
+        # Construct labels
+        labels = construct_labels_vectorized(df, window=window,
+                                             ofi_threshold=ofi_threshold,
+                                             percentile=percentile)
+        # Create sequences
+        X, y = create_sequences(features, labels, seq_len=seq_len, stride=stride)
+        # Split (stratified)
+        X_train, X_temp, y_train, y_temp = train_test_split(
+            X, y, test_size=test_size + val_size, random_state=random_state, stratify=y)
+        X_val, X_test, y_val, y_test = train_test_split(
+            X_temp, y_temp, test_size=test_size / (test_size + val_size),
+            random_state=random_state, stratify=y_temp)
+        # Save cache
+        np.savez_compressed(cache_path,
+                           X_train=X_train, y_train=y_train,
+                           X_val=X_val, y_val=y_val,
+                           X_test=X_test, y_test=y_test,
+                           means=means, stds=stds)
+        print(f"Cached to {cache_path}")
+    print(f"Train: {len(X_train)}, Val: {len(X_val)}, Test: {len(X_test)}")
+    # Print label distributions
+    for name, ys in [("Train", y_train), ("Val", y_val), ("Test", y_test)]:
+        unique, counts = np.unique(ys, return_counts=True)
+        dist = {u: c for u, c in zip(unique, counts)}
+        print(f"  {name}: {dist}")
+    # Create datasets
+    train_dataset = LOBDataset(X_train, y_train)
+    val_dataset = LOBDataset(X_val, y_val)
+    test_dataset = LOBDataset(X_test, y_test)
+    # Weighted sampler for training (oversamples minority classes)
+    train_sampler = get_weighted_sampler(y_train)
+    train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler, num_workers=0)
+    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
+    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
+    return train_loader, val_loader, test_loader, y_train
+if __name__ == "__main__":
+    train_loader, val_loader, test_loader, y_train = prepare_data()
+    # Check a batch
+    for X_batch, y_batch in train_loader:
+        print(f"Batch X: {X_batch.shape}, y: {y_batch.shape}")
+        print(f"Batch labels: {y_batch[:20]}")
+        print(f"Batch X range: [{X_batch.min():.4f}, {X_batch.max():.4f}]")
+        break

model.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1b89a47feff6243a25222dcc4958860cb43dd705da131902c8fc3c4cc771bbb6
 size 1073163

 version https://git-lfs.github.com/spec/v1
+oid sha256:aa391467f5bc207ba527cda22072d606488d8e3cb07b10e60512451e7bc8733b
 size 1073163

model.py ADDED Viewed

	@@ -0,0 +1,311 @@

+"""
+LOBPatternNet: Deep Learning Model for Institutional Trading Pattern Detection
+from Level-2 Order Book Data (10-level bid/ask)
+Architecture: CNN (spatial) + Inception (multi-scale) + Transformer Attention (temporal) + MLP Head
+Based on DeepLOB (Zhang et al. 2019) + TLOB (Berti & Kasneci 2025) design principles
+Input: (batch, seq_len, 40) - seq_len consecutive LOB snapshots, each with 40 features:
+    [ask_price_1..10, ask_size_1..10, bid_price_1..10, bid_size_1..10]
+Output: 3-class classification
+    0: 主力买入 (Institutional Buying)
+    1: 中性/散户 (Neutral/Retail)
+    2: 主力卖出 (Institutional Selling)
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+class BilinearNorm(nn.Module):
+    """Bilinear normalization layer from TLOB - handles price/volume scale mismatch."""
+    def __init__(self, num_features):
+        super().__init__()
+        self.gamma = nn.Parameter(torch.ones(1, 1, num_features))
+        self.beta = nn.Parameter(torch.zeros(1, 1, num_features))
+        self.gate = nn.Parameter(torch.ones(1, 1, num_features))
+    def forward(self, x):
+        # x: (B, T, F)
+        mean = x.mean(dim=1, keepdim=True)
+        std = x.std(dim=1, keepdim=True) + 1e-8
+        x_norm = (x - mean) / std
+        gate = torch.sigmoid(self.gate)
+        return gate * (self.gamma * x_norm + self.beta) + (1 - gate) * x
+class InceptionModule(nn.Module):
+    """Inception module for multi-scale temporal feature extraction."""
+    def __init__(self, in_channels, out_channels=32):
+        super().__init__()
+        self.branch1 = nn.Sequential(
+            nn.Conv1d(in_channels, out_channels, kernel_size=1),
+            nn.BatchNorm1d(out_channels),
+            nn.LeakyReLU(0.01)
+        )
+        self.branch3 = nn.Sequential(
+            nn.Conv1d(in_channels, out_channels, kernel_size=3, padding=1),
+            nn.BatchNorm1d(out_channels),
+            nn.LeakyReLU(0.01)
+        )
+        self.branch5 = nn.Sequential(
+            nn.Conv1d(in_channels, out_channels, kernel_size=5, padding=2),
+            nn.BatchNorm1d(out_channels),
+            nn.LeakyReLU(0.01)
+        )
+        self.pool_branch = nn.Sequential(
+            nn.MaxPool1d(kernel_size=3, stride=1, padding=1),
+            nn.Conv1d(in_channels, out_channels, kernel_size=1),
+            nn.BatchNorm1d(out_channels),
+            nn.LeakyReLU(0.01)
+        )
+    def forward(self, x):
+        # x: (B, C, T)
+        return torch.cat([self.branch1(x), self.branch3(x),
+                         self.branch5(x), self.pool_branch(x)], dim=1)
+class TemporalAttention(nn.Module):
+    """Multi-head self-attention for temporal dependencies in order flow."""
+    def __init__(self, d_model, nhead=4, dropout=0.1):
+        super().__init__()
+        self.attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout, batch_first=True)
+        self.norm = nn.LayerNorm(d_model)
+        self.dropout = nn.Dropout(dropout)
+    def forward(self, x):
+        # x: (B, T, D)
+        attn_out, _ = self.attn(x, x, x)
+        return self.norm(x + self.dropout(attn_out))
+class LOBPatternNet(nn.Module):
+    """
+    Full model for institutional trading pattern detection from Level-2 LOB data.
+    Architecture:
+    1. BilinearNorm → normalize raw LOB features
+    2. CNN spatial encoder → extract cross-level order book patterns
+    3. Inception → multi-scale temporal features
+    4. Transformer attention → capture temporal dependencies
+    5. Classification head → 3-class output
+    """
+    def __init__(self,
+                 num_levels=10,        # number of price levels (10 for Level-2)
+                 seq_len=100,          # number of consecutive LOB snapshots
+                 num_classes=3,        # 主力买入, 中性, 主力卖出
+                 d_model=128,          # internal feature dimension
+                 nhead=4,              # attention heads
+                 num_attn_layers=2,    # number of attention layers
+                 dropout=0.2):
+        super().__init__()
+        self.num_levels = num_levels
+        self.seq_len = seq_len
+        self.num_features = num_levels * 4  # 40 features: ask_p, ask_s, bid_p, bid_s × 10 levels
+        # 1. Bilinear normalization
+        self.norm = BilinearNorm(self.num_features)
+        # 2. Spatial CNN encoder - processes each snapshot across price levels
+        # Reshape to (B, 1, T, 40) for 2D conv
+        self.spatial_cnn = nn.Sequential(
+            # Conv across features (price-volume pairing per level)
+            nn.Conv2d(1, 32, kernel_size=(1, 2), stride=(1, 2)),   # (B, 32, T, 20)
+            nn.BatchNorm2d(32),
+            nn.LeakyReLU(0.01),
+            nn.Conv2d(32, 32, kernel_size=(1, 2), stride=(1, 2)),  # (B, 32, T, 10)
+            nn.BatchNorm2d(32),
+            nn.LeakyReLU(0.01),
+            nn.Conv2d(32, 32, kernel_size=(1, 10)),                # (B, 32, T, 1)
+            nn.BatchNorm2d(32),
+            nn.LeakyReLU(0.01),
+        )
+        # 3. Inception module for multi-scale temporal features
+        self.inception1 = InceptionModule(32, 32)    # Output: 128 channels
+        self.inception2 = InceptionModule(128, 32)   # Output: 128 channels
+        # 4. Projection to d_model
+        self.proj = nn.Sequential(
+            nn.Linear(128, d_model),
+            nn.LayerNorm(d_model),
+            nn.LeakyReLU(0.01),
+            nn.Dropout(dropout)
+        )
+        # 5. Transformer attention layers
+        self.attention_layers = nn.ModuleList([
+            TemporalAttention(d_model, nhead, dropout)
+            for _ in range(num_attn_layers)
+        ])
+        # 6. Classification head
+        self.classifier = nn.Sequential(
+            nn.Linear(d_model, 64),
+            nn.LeakyReLU(0.01),
+            nn.Dropout(dropout),
+            nn.Linear(64, num_classes)
+        )
+        # Additional feature engineering layer
+        # Processes derived features: OFI, VPIN, spread, depth imbalance
+        self.aux_features_dim = 6  # number of derived features
+        self.aux_encoder = nn.Sequential(
+            nn.Linear(self.aux_features_dim, 32),
+            nn.LeakyReLU(0.01),
+            nn.Linear(32, d_model),
+            nn.LeakyReLU(0.01),
+            nn.Dropout(dropout)
+        )
+        # Fusion layer
+        self.fusion = nn.Sequential(
+            nn.Linear(d_model * 2, d_model),
+            nn.LeakyReLU(0.01),
+            nn.Dropout(dropout)
+        )
+        self._init_weights()
+    def _init_weights(self):
+        for m in self.modules():
+            if isinstance(m, (nn.Linear, nn.Conv1d, nn.Conv2d)):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='leaky_relu')
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.LayerNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+    def compute_aux_features(self, x):
+        """
+        Compute derived microstructure features from raw LOB data.
+        x: (B, T, 40) raw LOB features
+        Returns: (B, 6) aggregated auxiliary features
+        """
+        B, T, F = x.shape
+        # Parse LOB structure: ask_p(10), ask_s(10), bid_p(10), bid_s(10)
+        ask_prices = x[:, :, 0:10]    # (B, T, 10)
+        ask_sizes = x[:, :, 10:20]    # (B, T, 10)
+        bid_prices = x[:, :, 20:30]   # (B, T, 10)
+        bid_sizes = x[:, :, 30:40]    # (B, T, 10)
+        # 1. Order Flow Imbalance (OFI) - key institutional signal
+        total_bid = ask_sizes.sum(dim=-1)  # (B, T)
+        total_ask = bid_sizes.sum(dim=-1)  # (B, T)
+        ofi = (total_bid - total_ask) / (total_bid + total_ask + 1e-8)
+        ofi_mean = ofi.mean(dim=1, keepdim=True)  # (B, 1)
+        # 2. Spread dynamics
+        spread = ask_prices[:, :, 0] - bid_prices[:, :, 0]  # (B, T)
+        spread_mean = spread.mean(dim=1, keepdim=True)
+        # 3. Depth imbalance at top levels (1-3)
+        top_bid = bid_sizes[:, :, :3].sum(dim=-1)  # (B, T)
+        top_ask = ask_sizes[:, :, :3].sum(dim=-1)  # (B, T)
+        depth_imb = (top_bid - top_ask) / (top_bid + top_ask + 1e-8)
+        depth_imb_mean = depth_imb.mean(dim=1, keepdim=True)
+        # 4. Volume concentration (institutional = concentrated at few levels)
+        bid_concentration = bid_sizes[:, :, 0] / (bid_sizes.sum(dim=-1) + 1e-8)
+        bid_conc_mean = bid_concentration.mean(dim=1, keepdim=True)
+        # 5. Price pressure (weighted volume by distance from mid)
+        mid_price = (ask_prices[:, :, 0] + bid_prices[:, :, 0]) / 2
+        bid_pressure = (bid_sizes * (mid_price.unsqueeze(-1) - bid_prices).abs()).sum(dim=-1)
+        ask_pressure = (ask_sizes * (ask_prices - mid_price.unsqueeze(-1)).abs()).sum(dim=-1)
+        pressure_ratio = (bid_pressure - ask_pressure) / (bid_pressure + ask_pressure + 1e-8)
+        pressure_mean = pressure_ratio.mean(dim=1, keepdim=True)
+        # 6. Temporal volatility of OFI (sudden changes = institutional activity)
+        ofi_vol = ofi.std(dim=1, keepdim=True)
+        return torch.cat([ofi_mean, spread_mean, depth_imb_mean,
+                         bid_conc_mean, pressure_mean, ofi_vol], dim=1)  # (B, 6)
+    def forward(self, x):
+        """
+        x: (B, T, 40) - batch of LOB snapshot sequences
+        Returns: (B, num_classes) logits
+        """
+        B, T, F = x.shape
+        # Compute auxiliary features before normalization
+        aux_feats = self.compute_aux_features(x)  # (B, 6)
+        aux_encoded = self.aux_encoder(aux_feats)  # (B, d_model)
+        # 1. Bilinear normalization
+        x = self.norm(x)  # (B, T, 40)
+        # 2. Spatial CNN
+        x = x.unsqueeze(1)  # (B, 1, T, 40)
+        x = self.spatial_cnn(x)  # (B, 32, T, 1)
+        x = x.squeeze(-1)  # (B, 32, T)
+        # 3. Inception modules
+        x = self.inception1(x)  # (B, 128, T)
+        x = self.inception2(x)  # (B, 128, T)
+        # 4. Transpose and project for attention
+        x = x.permute(0, 2, 1)  # (B, T, 128)
+        x = self.proj(x)  # (B, T, d_model)
+        # 5. Temporal attention
+        for attn_layer in self.attention_layers:
+            x = attn_layer(x)
+        # Global average pooling
+        x = x.mean(dim=1)  # (B, d_model)
+        # 6. Fusion with auxiliary features
+        x = self.fusion(torch.cat([x, aux_encoded], dim=1))  # (B, d_model)
+        # 7. Classification
+        return self.classifier(x)  # (B, num_classes)
+    def get_attention_weights(self, x):
+        """Get attention weights for interpretability."""
+        B, T, F = x.shape
+        aux_feats = self.compute_aux_features(x)
+        x = self.norm(x)
+        x = x.unsqueeze(1)
+        x = self.spatial_cnn(x)
+        x = x.squeeze(-1)
+        x = self.inception1(x)
+        x = self.inception2(x)
+        x = x.permute(0, 2, 1)
+        x = self.proj(x)
+        weights = []
+        for attn_layer in self.attention_layers:
+            _, w = attn_layer.attn(x, x, x)
+            weights.append(w)
+            x = attn_layer(x)
+        return weights
+def count_parameters(model):
+    return sum(p.numel() for p in model.parameters() if p.requires_grad)
+if __name__ == "__main__":
+    # Test model
+    model = LOBPatternNet(seq_len=100, num_classes=3)
+    print(f"Total trainable parameters: {count_parameters(model):,}")
+    # Test forward pass
+    x = torch.randn(4, 100, 40)
+    out = model(x)
+    print(f"Input shape: {x.shape}")
+    print(f"Output shape: {out.shape}")
+    print(f"Output: {out}")