Humphreykowl commited on
Commit
d50a1f5
·
verified ·
1 Parent(s): 3dd04f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +764 -912
app.py CHANGED
@@ -1,991 +1,843 @@
1
  #!/usr/bin/env python3
 
2
  """
3
- 港股智分析平台 - Hugging Face Spaces版本
4
- 集成数学原理的XPINNs训练与LLM推理系统
5
- 已扩展:噪声分离、Whitney嵌入截面学习、梯度动力学模拟、LLM伪代码/日内策略
 
 
 
 
 
 
6
  """
7
 
8
  import os
9
- import gradio as gr
 
 
 
 
 
 
 
10
  import numpy as np
11
  import pandas as pd
 
 
 
12
  import torch
13
  import torch.nn as nn
14
  import torch.nn.functional as F
15
  from torch.optim import Adam
16
- import json
17
- import logging
18
- from datetime import datetime
19
- from typing import Dict, List, Tuple, Optional, Any
20
- import warnings
21
- from pathlib import Path
22
- import pickle
23
- from sklearn.preprocessing import StandardScaler
24
- from sklearn.model_selection import train_test_split
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  from scipy import stats
26
- from scipy.linalg import expm
 
 
 
27
  import requests
28
- warnings.filterwarnings('ignore')
29
 
30
- # 配置日志
 
 
 
 
31
  logging.basicConfig(level=logging.INFO)
32
- logger = logging.getLogger(__name__)
33
-
34
- # Hugging Face Inference API配置
35
- HF_API_URL = "https://api-inference.huggingface.co/models/"
36
- # 使用免费的开源模型
37
- AVAILABLE_MODELS = {
38
- "Qwen/Qwen2.5-1.5B-Instruct": "通义千问2.5",
39
- "mistralai/Mistral-7B-Instruct-v0.1": "Mistral 7B",
40
- "microsoft/Phi-3.5-mini-instruct": "Phi-3.5",
41
- "google/flan-t5-large": "FLAN-T5"
42
- }
43
 
 
 
 
 
 
 
 
44
  class Config:
45
- """配置类"""
46
- TEMP_DIR = Path("/tmp/hk_analysis")
47
- MODELS_DIR = TEMP_DIR / "models"
48
- DATA_DIR = TEMP_DIR / "data"
49
- MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB
50
-
51
- # 数学原理配置
52
- FIBER_BUNDLE_DIM = 16
53
- CAUSAL_LAG = 5
54
- XPINNS_SUBDOMAINS = 4
55
-
56
  def __init__(self):
57
- for dir_path in [self.TEMP_DIR, self.MODELS_DIR, self.DATA_DIR]:
58
- dir_path.mkdir(parents=True, exist_ok=True)
 
 
 
 
 
 
 
59
 
60
  config = Config()
61
 
 
 
 
62
  class FiberBundleTheory:
63
- """纤维丛理论实现"""
64
-
65
- def __init__(self, base_dim=2, fiber_dim=16):
66
- self.base_dim = base_dim # VIX^2, RV
67
- self.fiber_dim = fiber_dim # 隐藏状态维度
68
- self.whitney_factor = 2 * fiber_dim # Whitney嵌入因子(理论上)
69
-
70
- def project_to_base(self, high_dim_state):
71
- """投影到基空间"""
72
- if len(high_dim_state) < self.fiber_dim:
73
- high_dim_state = np.pad(high_dim_state, (0, self.fiber_dim - len(high_dim_state)))
74
-
75
- # 计算VIX^2代理
76
- vix_squared = np.sum(high_dim_state[:self.fiber_dim//2]**2) / (self.fiber_dim//2 + 1e-12)
77
- # 计算实现波动率
78
- rv = np.std(high_dim_state[self.fiber_dim//2:])
79
-
80
- return np.array([vix_squared, rv])
81
-
82
- def compute_vrp(self, base_point):
83
- """计算方差风险溢价"""
84
- vix_squared, rv = base_point
85
- return vix_squared - rv
86
 
87
  class NoiseExplorer:
88
- """
89
- 噪声分离与验证:基于 VIX^2 RV 的关系,尝试把观测上的噪声(纤维方向)与基空间信息分离。
90
- 简单实现:对 VIX^2 RV 做线性回归,分析残差的统计特性(自相关、能量谱),并给出 VRP 统计摘要。
91
- 以后可扩展为更严格的协整检验(Johansen / ADF)。
92
- """
93
- def __init__(self):
94
- pass
95
-
96
- def regress_vix2_vs_rv(self, vix2_series, rv_series):
97
- # 简单线性回归:vix2 = a * rv + b
98
- X = np.vstack([rv_series, np.ones_like(rv_series)]).T
99
- try:
100
- coef, _, _, _ = np.linalg.lstsq(X, vix2_series, rcond=None)
101
- a, b = coef[0], coef[1]
102
- preds = a * rv_series + b
103
- resid = vix2_series - preds
104
- return {
105
- 'a': float(a), 'b': float(b),
106
- 'preds': preds, 'resid': resid
107
- }
108
- except Exception as e:
109
- return None
110
-
111
- def resid_stats(self, resid):
112
- # 基本残差统计:均值、方差、自相关(lag1)、能量谱(简单FFT)
113
  mean = float(np.mean(resid))
114
  var = float(np.var(resid))
115
- if len(resid) > 2:
116
- ac1 = float(np.corrcoef(resid[:-1], resid[1:])[0,1])
117
- else:
118
- ac1 = 0.0
119
- # FFT能量谱主频
120
- try:
121
- fft = np.fft.rfft(resid - mean)
122
- freqs = np.fft.rfftfreq(len(resid))
123
- power = np.abs(fft)**2
124
- dominant_idx = int(np.argmax(power[1:]) + 1) if len(power) > 1 else 0
125
- dominant_freq = float(freqs[dominant_idx]) if len(freqs) > dominant_idx else 0.0
126
- except:
127
- dominant_freq = 0.0
128
- return {'mean':mean, 'var':var, 'ac1':ac1, 'dominant_freq':dominant_freq}
129
-
130
- def explore(self, df, vix2_col=None, rv_col=None):
131
- # 自动寻找列名
132
- numeric = df.select_dtypes(include=[np.number]).columns.tolist()
133
  if vix2_col is None or rv_col is None:
134
- # 尝试匹配
135
- candidates = [c.lower() for c in numeric]
136
- vix_col = None
137
- rv_col_local = None
138
- for c in numeric:
139
- if 'vix' in c.lower():
140
- vix_col = c
141
- if 'rv' in c.lower() or 'realized' in c.lower():
142
- rv_col_local = c
143
- if vix_col is None or rv_col_local is None:
144
- # 退回到前两列
145
- if len(numeric) >= 2:
146
- vix_col, rv_col_local = numeric[0], numeric[1]
147
- else:
148
- return None
149
- vix2_col, rv_col = vix_col, rv_col_local
150
-
151
  vix2 = df[vix2_col].fillna(method='ffill').values
152
  rv = df[rv_col].fillna(method='ffill').values
153
-
154
  reg = self.regress_vix2_vs_rv(vix2, rv)
155
- if reg is None:
156
- return None
157
- stats = self.resid_stats(reg['resid'])
158
- vrp_series = vix2 - reg['preds']
159
- # 返回摘要
160
  return {
161
  'vix2_col': vix2_col,
162
  'rv_col': rv_col,
163
- 'reg_coeff': {'a':reg['a'], 'b':reg['b']},
164
- 'resid_stats': stats,
165
- 'vrp_mean': float(np.mean(vrp_series)),
166
- 'vrp_std': float(np.std(vrp_series)),
167
- 'vrp_series': vrp_series,
168
- 'reg_pred': reg['preds'],
169
- 'residuals': reg['resid']
170
  }
171
 
172
- class WhitneyEmbedder(nn.Module):
173
- """
174
- 基于 Whitney 嵌入思想的简单 autoencoder 与截面学习网络:
175
- - autoencoder 用于学习从高维观测到低维(whitney_factor)的光滑嵌入
176
- - section_net 将 base (VIX^2, RV) 映射回 fiber(截面估计),用于自演化/再构造
177
- """
178
- def __init__(self, input_dim=64, fiber_dim=16, device='cpu'):
179
- super().__init__()
180
- self.device = device
181
- self.fiber_dim = fiber_dim
182
- self.whitney_dim = 2 * fiber_dim # 推荐维度
183
- # encoder / decoder
184
- self.encoder = nn.Sequential(
185
- nn.Linear(input_dim, 128),
186
- nn.ReLU(),
187
- nn.Linear(128, self.whitney_dim)
188
- )
189
- self.decoder = nn.Sequential(
190
- nn.Linear(self.whitney_dim, 128),
191
- nn.ReLU(),
192
- nn.Linear(128, input_dim)
193
- )
194
- # 截面学习网络:从 base (2维) -> fiber_dim
195
- self.section_net = nn.Sequential(
196
- nn.Linear(2, 32),
197
- nn.ReLU(),
198
- nn.Linear(32, fiber_dim)
199
- )
200
- self.to(self.device)
201
-
202
- def forward(self, x):
203
- z = self.encoder(x)
204
- recon = self.decoder(z)
205
- return z, recon
206
-
207
- def learn_section(self, base_points):
208
  """
209
- 给定 base_points (N x 2),输出 fiber 估计(N x fiber_dim)
210
- 这是一个直接前向调用(训练通过 train_embedding 来执行)
211
  """
212
- with torch.no_grad():
213
- x = torch.tensor(base_points, dtype=torch.float32, device=self.device)
214
- return self.section_net(x).cpu().numpy()
215
-
216
- class GradientDynamics:
217
- """
218
- 把势函数 U(b) 的梯度流当成市场的动力路径:db = -eta * grad U(b) dt + sigma dW
219
- - 能把离散梯度下降(epochs 步)映射为连续路径的模拟
220
- - 使用 torch 自动求导对 U 做梯度(U可以是任意torch可微函数)
221
- """
222
- def __init__(self, eta=0.1, sigma=0.01, device='cpu'):
223
- self.eta = eta
224
- self.sigma = sigma
225
- self.device = device
226
-
227
- def U_vrp(self, b):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  """
229
- 默认势函数:U(b) = (VRP)^2 / 2 ,其中 b = [vix2, rv]
230
- b torch.tensor shape (..., 2)
231
  """
232
- vix2 = b[...,0]
233
- rv = b[...,1]
234
- vrp = vix2 - rv
235
- return 0.5 * vrp**2
236
-
237
- def grad_U(self, b):
238
- b_t = torch.tensor(b, dtype=torch.float32, requires_grad=True, device=self.device)
239
- U = self.U_vrp(b_t).sum()
240
- U.backward()
241
- grad = b_t.grad.detach().cpu().numpy()
242
- return grad
243
-
244
- def simulate_flow(self, b0, T=1.0, dt=0.01, seed=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  """
246
- Euler-Maruyama 模拟:
247
- b0: 初始点 (2,)
248
- 返回路径 (Nsteps+1, 2)
249
  """
250
  if seed is not None:
251
  np.random.seed(seed)
252
- n_steps = int(T / dt)
253
- path = np.zeros((n_steps+1, 2))
254
- path[0] = np.array(b0, dtype=float)
255
- for i in range(n_steps):
256
- b_cur = path[i]
257
- grad = self.grad_U(b_cur)
258
- db_det = - self.eta * grad
259
- db_stoch = self.sigma * np.sqrt(dt) * np.random.randn(2)
260
- path[i+1] = b_cur + db_det * dt + db_stoch
261
- return path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
 
263
- class EquivariantLayer(nn.Module):
264
- """SO(3)等变层"""
265
-
266
- def __init__(self, in_features, out_features):
267
- super().__init__()
268
- self.linear = nn.Linear(in_features, out_features)
269
- self.norm = nn.LayerNorm(out_features)
270
-
271
- # 初始化为正交矩阵保持等变性
272
- nn.init.orthogonal_(self.linear.weight)
273
-
274
- def forward(self, x, rotation_matrix=None):
275
- """保持群等变性的前向传播"""
276
- if rotation_matrix is not None:
277
- # 应用旋转变换
278
- x = torch.matmul(x, rotation_matrix.T)
279
-
280
- x = self.linear(x)
281
- x = self.norm(x)
282
- return F.relu(x)
283
-
284
- class XPINNsGenerator(nn.Module):
285
- """扩展物理信息神经网络生成器"""
286
-
287
- def __init__(self, input_dim=64, hidden_dim=128, output_dim=64, num_subdomains=4):
288
- super().__init__()
289
- self.num_subdomains = num_subdomains
290
-
291
- # 子域网络
292
- self.subdomain_nets = nn.ModuleList([
293
- nn.Sequential(
294
- EquivariantLayer(input_dim, hidden_dim),
295
- EquivariantLayer(hidden_dim, hidden_dim),
296
- EquivariantLayer(hidden_dim, output_dim)
297
- ) for _ in range(num_subdomains)
298
- ])
299
-
300
- # 路由网络
301
- self.router = nn.Sequential(
302
- nn.Linear(input_dim, num_subdomains),
303
- nn.Softmax(dim=-1)
304
- )
305
-
306
- # 融合网络
307
- self.fusion = nn.Sequential(
308
- nn.Linear(output_dim * num_subdomains, hidden_dim),
309
- nn.ReLU(),
310
- nn.Linear(hidden_dim, output_dim)
311
- )
312
-
313
- # 李雅普诺夫稳定性网络
314
- self.stability_net = nn.Sequential(
315
- nn.Linear(output_dim, 32),
316
- nn.ReLU(),
317
- nn.Linear(32, 1),
318
- nn.Sigmoid()
319
- )
320
-
321
- def forward(self, x):
322
- batch_size = x.shape[0]
323
-
324
- # 路由到子域
325
- routing_weights = self.router(x)
326
-
327
- # 各子域处理
328
- subdomain_outputs = []
329
- for i, subnet in enumerate(self.subdomain_nets):
330
- weight = routing_weights[:, i:i+1]
331
- output = subnet(x * weight)
332
- subdomain_outputs.append(output)
333
-
334
- # 融合输出
335
- concat_output = torch.cat(subdomain_outputs, dim=-1)
336
- fused = self.fusion(concat_output)
337
-
338
- # 计算稳定性分数
339
- stability = self.stability_net(fused)
340
-
341
- return {
342
- 'output': fused,
343
- 'routing_weights': routing_weights,
344
- 'stability_score': stability
345
- }
346
 
347
- class CausalVAR:
348
- """因果VAR模型"""
349
-
350
- def __init__(self, max_lag=5):
351
- self.max_lag = max_lag
352
- self.coefficients = None
353
- self.stability_eigenvalues = None
354
-
355
- def fit(self, data):
356
- """拟合VAR模型"""
357
- n_samples, n_vars = data.shape
358
-
359
- if n_samples < self.max_lag + 10:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
360
  return None
361
-
362
- # 构建滞后矩阵
363
- X = []
364
- y = []
365
-
366
- for t in range(self.max_lag, n_samples):
367
- lag_features = []
368
- for lag in range(1, self.max_lag + 1):
369
- lag_features.extend(data[t - lag])
370
- X.append(lag_features)
371
- y.append(data[t])
372
-
373
- X = np.array(X)
374
- y = np.array(y)
375
-
376
- # 最小二乘估计
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
377
  try:
378
- self.coefficients = np.linalg.lstsq(X, y, rcond=None)[0]
379
-
380
- # 计算稳定性
381
- companion_matrix = self._build_companion_matrix(n_vars)
382
- eigenvalues = np.linalg.eigvals(companion_matrix)
383
- self.stability_eigenvalues = eigenvalues
384
-
385
- return {
386
- 'coefficients': self.coefficients,
387
- 'eigenvalues': eigenvalues,
388
- 'is_stable': np.max(np.abs(eigenvalues)) < 1.0
389
- }
390
- except:
391
- return None
392
-
393
- def _build_companion_matrix(self, n_vars):
394
- """构建伴随矩阵"""
395
- dim = n_vars * self.max_lag
396
- companion = np.zeros((dim, dim))
397
-
398
- if self.coefficients is not None:
399
- # 填充系数
400
- companion[:n_vars, :] = self.coefficients.T
401
- # 填充单位矩阵
402
- if self.max_lag > 1:
403
- companion[n_vars:, :-n_vars] = np.eye(dim - n_vars)
404
-
405
- return companion
406
-
407
- class MathematicalTrainer:
408
- """数学原理驱动的训练器"""
409
-
410
- def __init__(self, device='cpu'):
411
- self.device = device
412
- self.fiber_bundle = FiberBundleTheory(fiber_dim=config.FIBER_BUNDLE_DIM)
413
- self.causal_var = CausalVAR(max_lag=config.CAUSAL_LAG)
414
- self.model = XPINNsGenerator()
415
- self.optimizer = Adam(self.model.parameters(), lr=0.001)
416
- self.scaler = StandardScaler()
417
- # 新增:
418
- self.noise_explorer = NoiseExplorer()
419
- self.embedder = None # lazy init
420
- self.gradient_dynamics = GradientDynamics(eta=0.5, sigma=0.02, device=device)
421
-
422
- def prepare_data(self, df):
423
- """准备训练数据"""
424
- # 提取数值列
425
- numeric_cols = df.select_dtypes(include=[np.number]).columns
426
- if len(numeric_cols) == 0:
427
- return None, None, None
428
-
429
- data = df[numeric_cols].fillna(0).values
430
-
431
- # 因果分析
432
- causal_result = self.causal_var.fit(data)
433
-
434
- # 特征工程
435
- features = []
436
- targets = []
437
- window_size = 10
438
-
439
- for i in range(len(data) - window_size):
440
- window = data[i:i+window_size]
441
-
442
- # 投影到纤维丛基空间
443
- base_features = []
444
- for row in window:
445
- base_point = self.fiber_bundle.project_to_base(row)
446
- vrp = self.fiber_bundle.compute_vrp(base_point)
447
- base_features.extend([base_point[0], base_point[1], vrp])
448
-
449
- # 展平特征
450
- feature_vector = np.array(base_features).flatten()
451
-
452
- # 填充到固定维度
453
- if len(feature_vector) < 64:
454
- feature_vector = np.pad(feature_vector, (0, 64 - len(feature_vector)))
455
- elif len(feature_vector) > 64:
456
- feature_vector = feature_vector[:64]
457
-
458
- features.append(feature_vector)
459
- targets.append(data[i+window_size, 0]) # 预测第一列
460
-
461
- if len(features) == 0:
462
- return None, None, None
463
-
464
- X = np.array(features)
465
- y = np.array(targets).reshape(-1, 1)
466
-
467
- # 标准化
468
- X = self.scaler.fit_transform(X)
469
-
470
- return torch.FloatTensor(X), torch.FloatTensor(y), causal_result
471
-
472
- def train(self, X, y, epochs=100):
473
- """训练模型"""
474
- if X is None or y is None:
475
  return None
476
-
477
- dataset = torch.utils.data.TensorDataset(X, y)
478
- loader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)
479
-
480
- losses = []
481
-
482
- for epoch in range(epochs):
483
- epoch_loss = 0
484
- for batch_x, batch_y in loader:
485
- self.optimizer.zero_grad()
486
-
487
- # 前向传播
488
- outputs = self.model(batch_x)
489
-
490
- # 计算损失
491
- pred_loss = F.mse_loss(outputs['output'][:, 0:1], batch_y)
492
- stability_loss = torch.mean((1 - outputs['stability_score'])**2)
493
-
494
- total_loss = pred_loss + 0.1 * stability_loss
495
-
496
- # 反向传播
497
- total_loss.backward()
498
- self.optimizer.step()
499
-
500
- epoch_loss += total_loss.item()
501
-
502
- avg_loss = epoch_loss / len(loader)
503
- losses.append(avg_loss)
504
-
505
- if epoch % 20 == 0:
506
- logger.info(f"Epoch {epoch}: Loss = {avg_loss:.6f}")
507
-
508
- return losses
509
-
510
- # ---------- 新增:嵌入器训练接口 ----------
511
- def init_embedder(self, input_dim=64):
512
- if self.embedder is None:
513
- self.embedder = WhitneyEmbedder(input_dim=input_dim, fiber_dim=self.fiber_bundle.fiber_dim, device=self.device)
514
-
515
- def train_embedding(self, X_np, epochs=100, lr=1e-3):
516
- """
517
- 训练 autoencoder,X_np: numpy array (N, input_dim)
518
- """
519
- self.init_embedder(input_dim=X_np.shape[1])
520
- model = self.embedder
521
- opt = Adam(model.parameters(), lr=lr)
522
- X = torch.tensor(X_np, dtype=torch.float32, device=self.device)
523
- for epoch in range(epochs):
524
- opt.zero_grad()
525
- z, recon = model(X)
526
- loss = F.mse_loss(recon, X) # 重构损失
527
- loss.backward()
528
- opt.step()
529
- if epoch % 20 == 0:
530
- logger.info(f"[Embedder] Epoch {epoch}, recon loss {loss.item():.6f}")
531
- return loss.item()
532
-
533
- def explore_noise(self, df, vix2_col=None, rv_col=None):
534
- return self.noise_explorer.explore(df, vix2_col=vix2_col, rv_col=rv_col)
535
-
536
- def simulate_gradient_flow(self, initial_base_point, T=1.0, dt=0.01):
537
- return self.gradient_dynamics.simulate_flow(initial_base_point, T=T, dt=dt)
538
 
 
 
 
539
  class LLMInterface:
540
- """LLM接口"""
541
-
542
- def __init__(self, model_name="Qwen/Qwen2.5-1.5B-Instruct"):
543
- self.model_name = model_name
544
- self.api_url = HF_API_URL + model_name
545
-
546
- def query(self, prompt, max_length=500):
547
- """查询LLM"""
548
- headers = {"Content-Type": "application/json"}
549
-
550
- payload = {
551
- "inputs": prompt,
552
- "parameters": {
553
- "max_new_tokens": max_length,
554
- "temperature": 0.3,
555
- "top_p": 0.9,
556
- "do_sample": True
557
- }
558
- }
559
-
560
  try:
561
- response = requests.post(self.api_url, headers=headers, json=payload, timeout=30)
562
- if response.status_code == 200:
563
- result = response.json()
564
- if isinstance(result, list) and len(result) > 0:
565
- return result[0].get('generated_text', '分析失败')
566
- return str(result)
567
  else:
568
- return f"API调用失败: {response.status_code}"
569
  except Exception as e:
570
- return f"错误: {str(e)}"
571
-
572
- def _pseudocode_template(self, strategy_spec: Dict[str,Any]) -> str:
 
 
 
573
  """
574
- 当LLM不可用时,返回一个确定性的伪代码结构
575
- strategy_spec 包含:entry_rule, exit_rule, position_sizing, risk_params
 
 
 
 
576
  """
577
- template = f"""# PSEUDOCODE for intraday quant strategy
578
- # Entry: {strategy_spec.get('entry_rule','待定')}
579
- # Exit: {strategy_spec.get('exit_rule','待定')}
580
- # Position sizing: {strategy_spec.get('position_sizing','固定仓位/比例')}
581
- # Risk: {strategy_spec.get('risk_params','默认')}
582
- def on_bar(bar):
583
- features = compute_features(bar) # e.g. VRP, momentum, spread
584
- signal = 0
585
- if {strategy_spec.get('entry_condition_code','False')}:
586
- signal = 1
587
- entry_price = bar.close
588
- size = determine_size(entry_price)
589
- if {strategy_spec.get('exit_condition_code','False')}:
590
- signal = -1
591
- manage_risk()
592
- execute(signal, size)
593
- """
594
- return template
595
-
596
- def analyze_trading(self, analysis_results, market_data, intraday=False, generate_pseudocode=False):
597
- """分析交易策略,扩展:日内与伪代码生成"""
598
- prompt = f"""你是一个量化交易与日内交易专家。请基于以下数学分析结果和市场数据生成可操作的交易策略。
599
-
600
- 数学分析摘要:
601
- {json.dumps(analysis_results, indent=2)}
602
-
603
- 市场数据摘要:
604
- {market_data}
605
-
606
- 请给出:
607
- 1) 简洁的策略描述(入场、止损、止盈、仓位管理)
608
- 2) 若为日内(intraday=True),请给出明确定义的入场/退出信号(基于短周期,例如1-5分钟或tick),并说明延迟/滑点考虑
609
- 3) 风险控制与回测建议(数据频率、回测窗口)
610
- 4) 若要求generate_pseudocode=True,请以伪代码形式输出策略实现模板(明确函数名、输入特征、信号判断、止损/止盈逻辑)
611
-
612
- 输出格式:
613
- - 段落 1: 策略概览
614
- - 段落 2: 规则要点(枚举)
615
- - 段落 3: 伪代码(如果要求)
616
-
617
- 请尽量简洁、直接给出可执行的建议。
618
- """
619
- if generate_pseudocode:
620
- prompt += "\n请在伪代码中包含:compute_features(), determine_size(), execute() 等函数签名。\n"
621
- # 调用LLM
622
- llm_resp = self.query(prompt, max_length=800)
623
- # 如果返回是错误或API失败,fallback到确定性伪代码
624
- if isinstance(llm_resp, str) and llm_resp.startswith("错误") or "API调用失败" in str(llm_resp):
625
- # 构造简单策略说明
626
- strategy_spec = {
627
- 'entry_rule': '当 VRP 从负值上穿其短期均值且短期动量为正时买入',
628
- 'exit_rule': '亏损超过止损点或达到止盈点或VRP反转',
629
- 'position_sizing': '账户风险百分比方式(例如每单最大亏损 0.5%)',
630
- 'risk_params': '止损与仓位受限,滑点假设 0.02%',
631
- 'entry_condition_code': 'features["vrp"] > features["vrp_sma_short"] and features["mom"] > 0',
632
- 'exit_condition_code': 'price <= entry_price*(1 - stop_loss_pct) or price >= entry_price*(1 + take_profit_pct)'
633
- }
634
- fallback = "LLM不可用,返回内置伪代码模板。\n\n" + self._pseudocode_template(strategy_spec)
635
- return fallback
636
- return llm_resp
637
-
638
- class TradingPlatform:
639
- """主交易平台"""
640
-
641
- def __init__(self, device='cpu'):
642
- self.trainer = MathematicalTrainer(device=device)
643
  self.llm = LLMInterface()
644
  self.current_data = None
645
  self.analysis_results = {}
646
-
647
- def process_upload(self, file):
648
- """处理上传文件"""
649
  if file is None:
650
- return "请上传数据文件", None, None
651
-
652
  try:
653
- # 读取数据
654
- if file.name.endswith('.csv'):
655
- df = pd.read_csv(file.name)
656
- elif file.name.endswith(('.xlsx', '.xls')):
657
- df = pd.read_excel(file.name)
658
  else:
659
- return "不支持的文件格式", None, None
660
-
661
- self.current_data = df
662
-
663
- # 数据摘要
664
- summary = f"""数据集信息:
665
- - 行数: {len(df)}
666
- - 列数: {len(df.columns)}
667
- - 数值列: {list(df.select_dtypes(include=[np.number]).columns)}
668
- - 缺失值比例: {(df.isnull().sum().sum() / (len(df) * len(df.columns)) * 100):.2f}%"""
669
-
670
- # 准备训练数据
671
- X, y, causal_result = self.trainer.prepare_data(df)
672
-
673
- if X is not None:
674
- # 分析结果
675
- self.analysis_results = {
676
- 'data_shape': X.shape,
677
- 'causal_stable': causal_result['is_stable'] if causal_result else False,
678
- 'vrp': self.trainer.fiber_bundle.compute_vrp([1.0, 0.8]),
679
- 'lyapunov_stable': True if causal_result and causal_result['is_stable'] else False,
680
- 'fiber_projection': 'Complete'
681
- }
682
-
683
- analysis = f"""数学分析完成:
684
- - 因果VAR稳定性: {'稳定' if self.analysis_results['causal_stable'] else '不稳定'}
685
- - VRP计算: {self.analysis_results['vrp']:.4f}
686
- - 数据维度: {self.analysis_results['data_shape']}
687
- - Whitney嵌入因子: {self.trainer.fiber_bundle.whitney_factor}"""
688
- else:
689
- analysis = "数据不足或格式错误,无法进行数学分析"
690
-
691
- return summary, analysis, "数据处理成功"
692
-
693
  except Exception as e:
694
- return f"处理失败: {str(e)}", None, None
695
-
696
- def train_model(self, epochs=100):
697
- """训练模型"""
698
- if self.current_data is None:
699
- return "请先上传数据", None
700
-
701
  try:
702
- X, y, causal_result = self.trainer.prepare_data(self.current_data)
703
-
704
- if X is None:
705
- return "数据准备失败", None
706
-
707
- # 训练
708
- losses = self.trainer.train(X, y, epochs)
709
-
710
- # 绘制损失曲线
711
- import matplotlib.pyplot as plt
712
- fig, ax = plt.subplots(figsize=(10, 6))
713
- ax.plot(losses)
714
- ax.set_xlabel('Epoch')
715
- ax.set_ylabel('Loss')
716
- ax.set_title('XPINNs Training Loss')
717
- ax.grid(True)
718
-
719
- result = f"""训练完成:
720
- - 最终损失: {losses[-1]:.6f}
721
- - 训练轮数: {epochs}
722
- - 子域数量: {config.XPINNS_SUBDOMAINS}
723
- - 因果滞后阶数: {config.CAUSAL_LAG}"""
724
-
725
- return result, fig
726
-
727
  except Exception as e:
728
- return f"训练失败: {str(e)}", None
729
-
730
- def run_noise_exploration(self):
731
- """运行噪声探索(基于 VIX^2 vs RV 回归)"""
732
- if self.current_data is None:
733
- return "请先上传数据", None
 
 
 
 
 
 
 
 
 
 
 
734
  try:
735
- res = self.trainer.explore_noise(self.current_data)
736
- if res is None:
737
- return "未找到合适的数值列进行噪声探索", None
738
- summary = f"噪声探索结果: VRP 均值 {res['vrp_mean']:.6f}, VRP std {res['vrp_std']:.6f}, 残差自相关(1) {res['resid_stats']['ac1']:.4f}"
739
- return summary, res
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
740
  except Exception as e:
741
- return f"噪声探索失败: {str(e)}", None
742
-
743
- def train_embedding(self, epochs=100):
744
- """训练 Whitney 嵌入(autoencoder)"""
745
  if self.current_data is None:
746
- return "请先上传数据", None
 
 
 
 
 
 
747
  try:
748
- # 复用之前prepare_data的X构造方式,得到特征矩阵
749
- X, y, causal = self.trainer.prepare_data(self.current_data)
750
- if X is None:
751
- return "数据不足以训练嵌入", None
752
- X_np = X.numpy()
753
- final_loss = self.trainer.train_embedding(X_np, epochs=epochs)
754
- return f"嵌入训练完成, 最终重构损失 {final_loss:.6f}", None
755
  except Exception as e:
756
- return f"嵌入训练失败: {str(e)}", None
757
-
758
- def simulate_dynamics(self, start_vix2=1.0, start_rv=0.8, T=1.0, dt=0.01):
759
- """模拟梯度动力学路径"""
 
 
 
 
760
  try:
761
- path = self.trainer.simulate_gradient_flow([start_vix2, start_rv], T=T, dt=dt)
762
- return f"模拟完成,路径长度 {len(path)}", path
 
 
763
  except Exception as e:
764
- return f"模拟失败: {str(e)}", None
765
-
766
- def get_trading_strategy(self, user_question, intraday=False, generate_pseudocode=False, model_name=None):
767
- """获取交易策略"""
768
- if not self.analysis_results:
769
- return "请先上传并分析数据"
770
-
 
 
 
 
 
 
 
 
 
 
 
771
  try:
772
- # 准备市场数据���要
773
- market_summary = ""
774
- if self.current_data is not None:
775
- numeric_cols = self.current_data.select_dtypes(include=[np.number]).columns
776
- if len(numeric_cols) > 0:
777
- latest_data = self.current_data[numeric_cols].tail(20).describe()
778
- market_summary = latest_data.to_string()
779
-
780
- # 合并用户问题
781
- full_prompt = f"{user_question}\n\n当前分析结果:{json.dumps(self.analysis_results, indent=2)}\n\n市场数据:{market_summary}"
782
-
783
- # 更新llm模型(可选)
784
- if model_name:
785
- self.llm = LLMInterface(model_name)
786
-
787
- # 获取LLM回复
788
- response = self.llm.analyze_trading(self.analysis_results, full_prompt, intraday=intraday, generate_pseudocode=generate_pseudocode)
789
-
790
- return response
791
-
792
  except Exception as e:
793
- return f"策略生成失败: {str(e)}"
794
-
795
- # 创建Gradio界面
796
- def create_interface():
797
- platform = TradingPlatform()
798
-
799
- with gr.Blocks(title="港股智能分析平台", theme=gr.themes.Soft()) as interface:
800
- gr.Markdown("""
801
- # 🚀 港股智能分析平台 - 学原理驱动(扩展版)
802
-
803
- 新增功能:
804
- - 噪声分离 / VRP 残差分析
805
- - 基于 Whitney 嵌入的 autoencoder 与截面学习
806
- - 梯度动力学(将梯度下降视作连续演化)模拟
807
- - LLM 支持日内策略与伪代码输出(集成/回退模板)
808
- """)
809
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
810
  with gr.Tabs():
811
- # 数据上传标签
812
- with gr.TabItem("📁 数据上传与分析"):
813
- with gr.Row():
814
- with gr.Column(scale=1):
815
- file_input = gr.File(
816
- label="上传数据文件 (CSV/Excel)",
817
- file_types=[".csv", ".xlsx", ".xls"]
818
- )
819
- upload_btn = gr.Button("分析数据", variant="primary")
820
-
821
- with gr.Column(scale=2):
822
- data_summary = gr.Textbox(
823
- label="数据摘要",
824
- lines=6,
825
- interactive=False
826
- )
827
- analysis_result = gr.Textbox(
828
- label="数学分析结果",
829
- lines=6,
830
- interactive=False
831
- )
832
- status_text = gr.Textbox(
833
- label="状态",
834
- interactive=False
835
- )
836
-
837
- upload_btn.click(
838
- platform.process_upload,
839
- inputs=[file_input],
840
- outputs=[data_summary, analysis_result, status_text]
841
- )
842
-
843
- # 噪声探索与嵌入训练
844
- with gr.TabItem("🔍 噪声探索 & 嵌入"):
845
  with gr.Row():
846
- with gr.Column():
847
- noise_btn = gr.Button("运行噪声探索 (VIX^2 vs RV)", variant="primary")
848
- noise_summary = gr.Textbox(label="噪声探索摘要", lines=4, interactive=False)
849
- noise_details = gr.JSON(label="噪声探索详细结果", visible=False)
850
- embed_epochs = gr.Slider(minimum=20, maximum=500, value=100, step=20, label="嵌入训练轮数")
851
- embed_btn = gr.Button("训练 Whitney 嵌入", variant="primary")
852
- embed_status = gr.Textbox(label="嵌入训练状态", lines=2, interactive=False)
853
-
854
- with gr.Column():
855
- embed_plot = gr.Plot(label="嵌入(可视化)", visible=False)
856
-
857
- noise_btn.click(platform.run_noise_exploration, inputs=[], outputs=[noise_summary, noise_details])
858
- embed_btn.click(platform.train_embedding, inputs=[embed_epochs], outputs=[embed_status, embed_plot])
859
-
860
- # 模型训练标签
861
- with gr.TabItem("🧮 XPINNs模型训练"):
862
  with gr.Row():
863
- with gr.Column():
864
- epochs_slider = gr.Slider(
865
- minimum=50,
866
- maximum=500,
867
- value=100,
868
- step=50,
869
- label="训练轮数"
870
- )
871
- train_btn = gr.Button("开始训练", variant="primary")
872
- training_result = gr.Textbox(
873
- label="训练结果",
874
- lines=8,
875
- interactive=False
876
- )
877
-
878
- with gr.Column():
879
- loss_plot = gr.Plot(label="训练损失曲线")
880
-
881
- train_btn.click(
882
- platform.train_model,
883
- inputs=[epochs_slider],
884
- outputs=[training_result, loss_plot]
885
- )
886
-
887
- # 交易策略标签
888
- with gr.TabItem("💹 智能交易策略"):
889
- gr.Markdown("""
890
- ### 基于数学原理的交易策略生成(扩展)
891
-
892
- 系统将结合:
893
- - 纤维丛投影的市场状态
894
- - 因果VAR的动态关系
895
- - 梯度动力学的演化模拟
896
- - LLM智能推理(可输出伪代码)
897
- """)
898
-
899
  with gr.Row():
900
- with gr.Column():
901
- # LLM模型选择
902
- model_dropdown = gr.Dropdown(
903
- choices=list(AVAILABLE_MODELS.keys()),
904
- value="Qwen/Qwen2.5-1.5B-Instruct",
905
- label="选择LLM模型"
906
- )
907
-
908
- user_input = gr.Textbox(
909
- label="交易问题",
910
- placeholder="例如:基于当前分析,今天的日内策略如何构造?请给出伪代码。",
911
- lines=3
912
- )
913
-
914
- intraday_check = gr.Checkbox(label="日内策略 (intraday)", value=True)
915
- pseudocode_check = gr.Checkbox(label="生成伪代码", value=True)
916
- strategy_btn = gr.Button("生成策略", variant="primary")
917
-
918
- with gr.Column():
919
- strategy_output = gr.Textbox(
920
- label="AI交易策略建议",
921
- lines=20,
922
- interactive=False
923
- )
924
-
925
- def update_llm_model(model_name):
926
- platform.llm = LLMInterface(model_name)
927
- return f"已切换到 {AVAILABLE_MODELS.get(model_name, model_name)}"
928
-
929
- model_dropdown.change(
930
- update_llm_model,
931
- inputs=[model_dropdown],
932
- outputs=[gr.Textbox(visible=False)]
933
- )
934
-
935
- strategy_btn.click(
936
- platform.get_trading_strategy,
937
- inputs=[user_input, intraday_check, pseudocode_check, model_dropdown],
938
- outputs=[strategy_output]
939
- )
940
-
941
- # 梯度动力学模拟
942
- with gr.TabItem("⚙️ 梯度动力学模拟"):
943
  with gr.Row():
944
- with gr.Column():
945
- start_vix2 = gr.Number(value=1.0, label="初始 VIX^2")
946
- start_rv = gr.Number(value=0.8, label="初始 RV")
947
- T = gr.Number(value=1.0, label="模拟总时间 T")
948
- dt = gr.Number(value=0.01, label="时间步长 dt")
949
- sim_btn = gr.Button("运行模拟", variant="primary")
950
- sim_result = gr.Textbox(label="模拟结果", lines=3, interactive=False)
951
- with gr.Column():
952
- sim_plot = gr.Plot(label="模拟路径 (VIX^2, RV)")
953
-
954
- sim_btn.click(
955
- platform.simulate_dynamics,
956
- inputs=[start_vix2, start_rv, T, dt],
957
- outputs=[sim_result, sim_plot]
958
- )
959
-
960
- # 数学原理说明
961
- with gr.TabItem("📚 数学原理"):
962
- gr.Markdown(f"""
963
- ## 核心数学原理(参考:你的笔记与论文)
964
- - 纤维丛理论与VRP截面:参见上传的改进文档(BKKK改进)。:contentReference[oaicite:3]{index=3}
965
- - Whitney嵌入定理与光滑嵌入:作为嵌入维度上限的理论依据。:contentReference[oaicite:4]{index=4}
966
- - 等变网络与XPINNs:关于等变与几何保证的更系统论文。:contentReference[oaicite:5]{index=5}
967
- - 梯度动力学视角:梯度下降路径可看作系统的演化路径、可扩展到SDE与Fokker-Planck 表述。:contentReference[oaicite:6]{index=6}
968
- """)
969
-
970
- gr.Markdown("""
971
- ---
972
- ### 使用说明:
973
- 1. 上传您的金融数据(CSV或Excel格式)
974
- 2. 运行噪声探索以获得 VRP/残差统计
975
- 3. 训练嵌入 / XPINNs 并模拟梯度动力学
976
- 4. 使用 LLM 生成日内/量化策略并选择是否输出伪代码
977
-
978
- **注意**: 本系统仅供研究参考,不构成投资建议。
979
- """)
980
-
981
- return interface
982
-
983
- # 主函数
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
984
  if __name__ == "__main__":
985
- logger.info("启动港股智能分析平台(扩展版)...")
986
- interface = create_interface()
987
- interface.launch(
988
- server_name="0.0.0.0",
989
- server_port=7860,
990
- share=False
991
- )
 
1
  #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
  """
4
+ 升级版 app.py — 高精度数值 / 高性统计 + 精细化 LLM 策略输出
5
+ 功能亮点:
6
+ - Crank–Nicolson PDEBlack–Scholes
7
+ - Monte Carlo:Antithetic + Control variates(使用 BS 解析作为控制变量)
8
+ - GARCH(1,1) 使用 arch (若可用)或 MLE minimize 回退
9
+ - Johansen 协整检验(statsmodels 若可用)
10
+ - 组合优化使用 cvxpy(若可用)或 SciPy 回退
11
+ - LLM 生成结构化 JSON 策略(策略说明、信号、伪代码、回测/风险提示)
12
+ - 保持之前的几何/Whitney/Noise/Gradient 模块兼容
13
  """
14
 
15
  import os
16
+ import json
17
+ import warnings
18
+ warnings.filterwarnings("ignore")
19
+
20
+ from pathlib import Path
21
+ from datetime import datetime
22
+ from typing import Any, Dict, Optional, Tuple, List
23
+
24
  import numpy as np
25
  import pandas as pd
26
+ import matplotlib.pyplot as plt
27
+
28
+ # torch used for embedding / potential LSTM
29
  import torch
30
  import torch.nn as nn
31
  import torch.nn.functional as F
32
  from torch.optim import Adam
33
+
34
+ # statsmodels optional
35
+ try:
36
+ import statsmodels.api as sm
37
+ from statsmodels.tsa.vector_ar.vecm import coint_johansen
38
+ from statsmodels.tsa.api import VAR
39
+ STATS_MODELS_AVAILABLE = True
40
+ except Exception:
41
+ STATS_MODELS_AVAILABLE = False
42
+
43
+ # arch package (GARCH) optional
44
+ try:
45
+ from arch import arch_model
46
+ ARCH_AVAILABLE = True
47
+ except Exception:
48
+ ARCH_AVAILABLE = False
49
+
50
+ # cvxpy for portfolio optimization optional
51
+ try:
52
+ import cvxpy as cp
53
+ CVXPY_AVAILABLE = True
54
+ except Exception:
55
+ CVXPY_AVAILABLE = False
56
+
57
+ # scipy fallback utilities
58
  from scipy import stats
59
+ from scipy.optimize import minimize
60
+ from scipy.linalg import toeplitz
61
+
62
+ # HTTP for LLM
63
  import requests
 
64
 
65
+ # Gradio UI
66
+ import gradio as gr
67
+
68
+ # Logging
69
+ import logging
70
  logging.basicConfig(level=logging.INFO)
71
+ logger = logging.getLogger("quant_upgraded")
 
 
 
 
 
 
 
 
 
 
72
 
73
+ # base dir
74
+ BASE_DIR = Path("/tmp/quant_upgraded")
75
+ BASE_DIR.mkdir(parents=True, exist_ok=True)
76
+
77
+ # ---------------------
78
+ # Configuration
79
+ # ---------------------
80
  class Config:
 
 
 
 
 
 
 
 
 
 
 
81
  def __init__(self):
82
+ self.device = 'cpu'
83
+ if torch.cuda.is_available():
84
+ self.device = 'cuda'
85
+ self.hf_token = os.getenv("HF_API_TOKEN", "")
86
+ self.hf_default_model = "Qwen/Qwen2.5-1.5B-Instruct"
87
+ self.mc_default_paths = 20000
88
+ self.cv_solver = "cvxpy" if CVXPY_AVAILABLE else "scipy"
89
+ self.statsmodels = STATS_MODELS_AVAILABLE
90
+ self.arch = ARCH_AVAILABLE
91
 
92
  config = Config()
93
 
94
+ # ---------------------
95
+ # Geometry / existing modules (compact)
96
+ # ---------------------
97
  class FiberBundleTheory:
98
+ def __init__(self, fiber_dim=16):
99
+ self.fiber_dim = fiber_dim
100
+ self.whitney_factor = 2 * fiber_dim
101
+
102
+ def project_to_base(self, x: np.ndarray) -> np.ndarray:
103
+ x = np.asarray(x).ravel()
104
+ if len(x) < self.fiber_dim:
105
+ x = np.pad(x, (0, self.fiber_dim - len(x)))
106
+ half = self.fiber_dim // 2
107
+ vix2 = float(np.sum(x[:half]**2) / (half + 1e-12))
108
+ rv = float(np.std(x[half:]))
109
+ return np.array([vix2, rv])
110
+
111
+ def compute_vrp(self, base_point: np.ndarray) -> float:
112
+ vix2, rv = base_point
113
+ return vix2 - rv
 
 
 
 
 
 
 
114
 
115
  class NoiseExplorer:
116
+ def regress_vix2_vs_rv(self, vix2: np.ndarray, rv: np.ndarray):
117
+ X = np.vstack([rv, np.ones_like(rv)]).T
118
+ coef, *_ = np.linalg.lstsq(X, vix2, rcond=None)
119
+ a, b = float(coef[0]), float(coef[1])
120
+ preds = a * rv + b
121
+ resid = vix2 - preds
122
+ return {'a': a, 'b': b, 'preds': preds, 'resid': resid}
123
+
124
+ def resid_stats(self, resid: np.ndarray):
125
+ resid = np.asarray(resid)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  mean = float(np.mean(resid))
127
  var = float(np.var(resid))
128
+ ac1 = float(np.corrcoef(resid[:-1], resid[1:])[0,1]) if len(resid) > 2 else 0.0
129
+ fft = np.fft.rfft(resid - mean)
130
+ freqs = np.fft.rfftfreq(len(resid))
131
+ power = np.abs(fft)**2
132
+ dominant_freq = float(freqs[np.argmax(power[1:])+1]) if len(power) > 1 else 0.0
133
+ return {'mean': mean, 'var': var, 'ac1': ac1, 'dominant_freq': dominant_freq}
134
+
135
+ def explore(self, df: pd.DataFrame, vix2_col: Optional[str]=None, rv_col: Optional[str]=None):
136
+ numcols = df.select_dtypes(include=[np.number]).columns.tolist()
137
+ if not numcols:
138
+ return None
 
 
 
 
 
 
 
139
  if vix2_col is None or rv_col is None:
140
+ vix2_col = numcols[0]
141
+ rv_col = numcols[1] if len(numcols) > 1 else numcols[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  vix2 = df[vix2_col].fillna(method='ffill').values
143
  rv = df[rv_col].fillna(method='ffill').values
 
144
  reg = self.regress_vix2_vs_rv(vix2, rv)
145
+ st = self.resid_stats(reg['resid'])
146
+ vrp = vix2 - reg['preds']
 
 
 
147
  return {
148
  'vix2_col': vix2_col,
149
  'rv_col': rv_col,
150
+ 'reg': {'a': reg['a'], 'b': reg['b']},
151
+ 'resid_stats': st,
152
+ 'vrp_mean': float(np.mean(vrp)),
153
+ 'vrp_std': float(np.std(vrp)),
154
+ 'vrp_series': vrp.tolist(),
155
+ 'residuals': reg['resid'].tolist()
 
156
  }
157
 
158
+ # ---------------------
159
+ # Quant modules (upgraded)
160
+ # ---------------------
161
+
162
+ class StochasticModels:
163
+ """High-precision stochastic processes and pricing helpers."""
164
+
165
+ @staticmethod
166
+ def bs_price(S: float, K: float, r: float, q: float, sigma: float, T: float, option_type: str='call') -> float:
167
+ """Black-Scholes closed-form price (with dividend yield q)."""
168
+ S, K, r, q, sigma, T = map(float, (S, K, r, q, sigma, T))
169
+ if T <= 0 or sigma <= 0:
170
+ return float(max(S - K, 0.0) if option_type == 'call' else max(K - S, 0.0))
171
+ d1 = (np.log(S / K) + (r - q + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
172
+ d2 = d1 - sigma * np.sqrt(T)
173
+ if option_type == 'call':
174
+ price = S * np.exp(-q * T) * stats.norm.cdf(d1) - K * np.exp(-r * T) * stats.norm.cdf(d2)
175
+ else:
176
+ price = K * np.exp(-r * T) * stats.norm.cdf(-d2) - S * np.exp(-q * T) * stats.norm.cdf(-d1)
177
+ return float(price)
178
+
179
+ @staticmethod
180
+ def heston_simulate(S0: float, v0: float, r: float, kappa: float, theta: float, xi: float, rho: float, T: float,
181
+ n_steps: int=252, n_paths: int=2000, seed: Optional[int]=None):
 
 
 
 
 
 
 
 
 
 
 
 
182
  """
183
+ Euler-Maruyama with full-reflection for variance (CIR-like) more stable by forcing v>=0.
184
+ Keep path count moderate unless GPU simulation used externally.
185
  """
186
+ if seed is not None:
187
+ np.random.seed(seed)
188
+ dt = T / n_steps
189
+ S = np.zeros((n_paths, n_steps+1))
190
+ v = np.zeros((n_paths, n_steps+1))
191
+ S[:,0] = S0
192
+ v[:,0] = v0
193
+ for t in range(n_steps):
194
+ z1 = np.random.randn(n_paths)
195
+ z2 = np.random.randn(n_paths)
196
+ w1 = z1
197
+ w2 = rho * z1 + np.sqrt(max(0.0, 1 - rho**2)) * z2
198
+ v_prev = np.maximum(v[:,t], 0.0)
199
+ # full truncation Euler
200
+ dv = kappa * (theta - v_prev) * dt + xi * np.sqrt(v_prev * dt) * w2
201
+ v_new = np.maximum(v_prev + dv, 1e-8)
202
+ dS = r * S[:,t] * dt + np.sqrt(v_prev * dt) * S[:,t] * w1
203
+ S[:,t+1] = S[:,t] + dS
204
+ v[:,t+1] = v_new
205
+ return S, v
206
+
207
+ @staticmethod
208
+ def merton_jump_diffusion(S0: float, mu: float, sigma: float, lamb: float, mu_j: float, sigma_j: float,
209
+ T: float, n_steps: int=252, n_paths: int=2000, seed: Optional[int]=None):
210
+ """Improved Merton simulator with vectorized operations."""
211
+ if seed is not None:
212
+ np.random.seed(seed)
213
+ dt = T / n_steps
214
+ S = np.full((n_paths, n_steps+1), S0, dtype=float)
215
+ for t in range(n_steps):
216
+ z = np.random.randn(n_paths)
217
+ pois = np.random.poisson(lamb * dt, size=n_paths)
218
+ jumps = np.exp(mu_j + sigma_j * np.random.randn(n_paths)) - 1.0
219
+ S[:, t+1] = S[:, t] * (1 + mu*dt + sigma*np.sqrt(dt)*z) + S[:, t] * (jumps * pois)
220
+ S[:, t+1] = np.maximum(S[:, t+1], 1e-8)
221
+ return S
222
+
223
+ class NumericalMethods:
224
+ """Crank-Nicolson PDE + Monte Carlo with variance reduction."""
225
+
226
+ @staticmethod
227
+ def bs_crank_nicolson(S0: float, K: float, r: float, q: float, sigma: float, T: float,
228
+ Smax_mult: float=3.0, M: int=400, N: int=400, option_type: str='call') -> float:
229
  """
230
+ Crank-Nicolson solver for Black-Scholes PDE. More stable with sufficient grid resolution.
231
+ M: number of asset steps, N: time steps.
232
  """
233
+ Smax = S0 * Smax_mult
234
+ dS = Smax / M
235
+ dt = T / N
236
+ grid = np.zeros((M+1, N+1))
237
+ Svals = np.linspace(0, Smax, M+1)
238
+ # terminal condition
239
+ if option_type == 'call':
240
+ grid[:, -1] = np.maximum(Svals - K, 0)
241
+ else:
242
+ grid[:, -1] = np.maximum(K - Svals, 0)
243
+ # boundary conditions
244
+ grid[0, :] = 0.0 if option_type == 'call' else K * np.exp(-r * (T - np.linspace(0, T, N+1)))
245
+ grid[-1, :] = (Smax - K * np.exp(-r * (T - np.linspace(0, T, N+1)))) if option_type == 'call' else 0.0
246
+ # prepare tridiagonal coefficients
247
+ j = np.arange(1, M)
248
+ a = 0.25 * dt * (sigma**2 * j**2 - (r - q) * j)
249
+ b = -0.5 * dt * (sigma**2 * j**2 + r)
250
+ c = 0.25 * dt * (sigma**2 * j**2 + (r - q) * j)
251
+ # construct A and B matrices (tridiagonal)
252
+ A = np.zeros((M-1, M-1))
253
+ B = np.zeros((M-1, M-1))
254
+ for idx in range(M-1):
255
+ if idx > 0:
256
+ A[idx, idx-1] = -a[idx+1]
257
+ B[idx, idx-1] = a[idx+1]
258
+ A[idx, idx] = 1 - b[idx+1]
259
+ B[idx, idx] = 1 + b[idx+1]
260
+ if idx < M-2:
261
+ A[idx, idx+1] = -c[idx+1]
262
+ B[idx, idx+1] = c[idx+1]
263
+ # backward time stepping
264
+ from numpy.linalg import solve
265
+ for n in reversed(range(N)):
266
+ rhs = B.dot(grid[1:M, n+1])
267
+ # add boundary contributions
268
+ rhs[0] += a[1] * (grid[0, n] + grid[0, n+1])
269
+ rhs[-1] += c[M-1] * (grid[M, n] + grid[M, n+1])
270
+ grid[1:M, n] = solve(A, rhs)
271
+ # interpolate at S0
272
+ i = int(S0 / dS)
273
+ if i >= M:
274
+ return float(grid[-1, 0])
275
+ w = (S0 - i * dS) / dS
276
+ price = (1-w) * grid[i, 0] + w * grid[i+1, 0]
277
+ return float(price)
278
+
279
+ @staticmethod
280
+ def mc_price_bs_cv(S0: float, K: float, r: float, q: float, sigma: float, T: float,
281
+ option_type: str='call', n_paths: int=20000, antithetic: bool=True, seed: Optional[int]=None):
282
  """
283
+ Monte Carlo with antithetic variates and control variate (BS analytic).
284
+ Control variate: use discount payoff under geometric Brownian motion analytic expectation = BS price with same params.
 
285
  """
286
  if seed is not None:
287
  np.random.seed(seed)
288
+ n = n_paths
289
+ half = n // 2 if antithetic else n
290
+ Z = np.random.randn(half)
291
+ if antithetic:
292
+ Z = np.concatenate([Z, -Z])
293
+ ST = S0 * np.exp((r - q - 0.5*sigma**2) * T + sigma * np.sqrt(T) * Z)
294
+ if option_type == 'call':
295
+ payoff = np.maximum(ST - K, 0)
296
+ else:
297
+ payoff = np.maximum(K - ST, 0)
298
+ # control variate: use discounted ST (or log ST) expectation known
299
+ # use analytic BS price as control target
300
+ bs_analytic = StochasticModels.bs_price(S0, K, r, q, sigma, T, option_type=option_type)
301
+ # choose control variable as discounted payoff under geometric mean? simple: use ST
302
+ control = ST # expectation of ST under risk-neutral = S0 * exp((r-q)T)
303
+ control_mean = S0 * np.exp((r - q) * T)
304
+ # compute covariance and adjust
305
+ cov_pc = np.cov(payoff, control, ddof=1)[0,1]
306
+ var_c = np.var(control, ddof=1)
307
+ if var_c > 0:
308
+ beta = cov_pc / var_c
309
+ else:
310
+ beta = 0.0
311
+ adj_payoff = payoff - beta * (control - control_mean)
312
+ price = np.exp(-r * T) * np.mean(adj_payoff)
313
+ # bias correction via analytic price difference if helpful
314
+ return float(price)
315
 
316
+ class Econometrics:
317
+ """GARCH via arch package (preferred) or MLE fallback; Johansen using statsmodels if available"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
 
319
+ @staticmethod
320
+ def garch_11_fit(returns: np.ndarray):
321
+ r = np.asarray(returns).astype(float)
322
+ r = r - np.mean(r)
323
+ if config.arch:
324
+ try:
325
+ am = arch_model(r * 100.0, vol='Garch', p=1, q=1, dist='normal') # scale to percent to help arch convergence
326
+ res = am.fit(disp='off')
327
+ params = res.params.to_dict()
328
+ cond_var = res.conditional_volatility / 100.0
329
+ return {'method': 'arch', 'params': params, 'cond_var': cond_var.tolist()}
330
+ except Exception as e:
331
+ logger.warning(f"arch fit failed: {e}; falling back to MLE.")
332
+ # MLE fallback
333
+ T = len(r)
334
+ def neglog(params):
335
+ omega, alpha, beta = params
336
+ if omega <= 0 or alpha < 0 or beta < 0 or alpha + beta >= 0.9999:
337
+ return 1e12
338
+ h = np.zeros(T)
339
+ h[0] = np.var(r)
340
+ ll = 0.0
341
+ for t in range(1, T):
342
+ h[t] = omega + alpha * r[t-1]**2 + beta * h[t-1]
343
+ ll = 0.5 * (np.log(2*np.pi) + np.log(h) + (r**2)/h)
344
+ return np.sum(ll)
345
+ init = np.array([1e-6, 0.05, 0.9])
346
+ bnds = [(1e-12, None), (0, 0.9999), (0, 0.9999)]
347
+ res = minimize(neglog, x0=init, bounds=bnds)
348
+ if not res.success:
349
+ logger.warning("GARCH MLE did not converge; returning fallback params")
350
+ omega, alpha, beta = init
351
+ else:
352
+ omega, alpha, beta = res.x
353
+ # compute h
354
+ h = np.zeros(T)
355
+ h[0] = np.var(r)
356
+ for t in range(1, T):
357
+ h[t] = omega + alpha * r[t-1]**2 + beta * h[t-1]
358
+ return {'method': 'mle', 'params': {'omega': float(omega), 'alpha': float(alpha), 'beta': float(beta)}, 'cond_var': h.tolist()}
359
+
360
+ @staticmethod
361
+ def johansen_test(data: np.ndarray, det_order: int=0, k_ar_diff: int=1):
362
+ if config.statsmodels:
363
+ try:
364
+ res = coint_johansen(data, det_order, k_ar_diff)
365
+ return {'eig': res.eig.tolist(), 'lr1': res.lr1.tolist(), 'cvm': res.cvt.tolist()}
366
+ except Exception as e:
367
+ logger.warning(f"Johansen failed: {e}")
368
+ return None
369
+ else:
370
  return None
371
+
372
+ class PortfolioOptimization:
373
+ """Black-Litterman and Markowitz using cvxpy if available, else SciPy minimize"""
374
+
375
+ @staticmethod
376
+ def gmv_weights(returns: np.ndarray):
377
+ R = np.asarray(returns)
378
+ cov = np.cov(R.T)
379
+ n = cov.shape[0]
380
+ if CVXPY_AVAILABLE:
381
+ w = cp.Variable(n)
382
+ prob = cp.Problem(cp.Minimize(cp.quad_form(w, cov)),
383
+ [cp.sum(w) == 1])
384
+ prob.solve(solver=cp.SCS, verbose=False)
385
+ w_opt = np.array(w.value).ravel()
386
+ return w_opt
387
+ else:
388
+ # analytic GMV: invcov * 1 / (1^T invcov 1)
389
+ invcov = np.linalg.pinv(cov)
390
+ ones = np.ones((n,))
391
+ w = invcov.dot(ones)
392
+ w = w / (ones.dot(invcov).dot(ones))
393
+ return w
394
+
395
+ @staticmethod
396
+ def mean_variance_opt(returns: np.ndarray, target_return: Optional[float]=None):
397
+ R = np.asarray(returns)
398
+ mu = np.mean(R, axis=0)
399
+ cov = np.cov(R.T)
400
+ n = len(mu)
401
+ if CVXPY_AVAILABLE:
402
+ w = cp.Variable(n)
403
+ constraints = [cp.sum(w) == 1]
404
+ if target_return is not None:
405
+ constraints.append(mu @ w >= target_return)
406
+ prob = cp.Problem(cp.Minimize(cp.quad_form(w, cov)), constraints)
407
+ prob.solve(solver=cp.SCS, verbose=False)
408
+ return np.array(w.value).ravel()
409
+ else:
410
+ # solve using analytical formula for target_return or GMV fallback
411
+ if target_return is None:
412
+ return PortfolioOptimization.gmv_weights(R)
413
+ invcov = np.linalg.pinv(cov)
414
+ ones = np.ones(n)
415
+ A = ones.T.dot(invcov).dot(ones)
416
+ B = ones.T.dot(invcov).dot(mu)
417
+ C = mu.T.dot(invcov).dot(mu)
418
+ denom = A * C - B**2
419
+ lam = (C - target_return * B) / denom
420
+ gamma = (target_return * A - B) / denom
421
+ w = invcov.dot(lam * ones + gamma * mu)
422
+ return w
423
+
424
+ # ---------------------
425
+ # ML for Finance helpers
426
+ # ---------------------
427
+ class MLForFinance:
428
+ @staticmethod
429
+ def compute_basic_features(price: np.ndarray, mom_window: int=20, vol_window: int=20):
430
+ p = np.asarray(price).ravel()
431
+ ret = np.concatenate([[0], np.diff(np.log(p + 1e-12))])
432
+ mom = pd.Series(p).pct_change(mom_window).fillna(0).values
433
+ rv = pd.Series(ret).rolling(vol_window).std().fillna(method='bfill').values
434
+ sma = pd.Series(p).rolling(mom_window).mean().fillna(method='bfill').values
435
+ features = np.vstack([ret, mom, rv, sma]).T
436
+ return features
437
+
438
+ @staticmethod
439
+ def lasso_select(X: np.ndarray, y: np.ndarray):
440
+ model = None
441
  try:
442
+ from sklearn.linear_model import LassoCV
443
+ model = LassoCV(cv=5, n_jobs=1).fit(X, y.ravel())
444
+ coef = model.coef_
445
+ selected = list(np.where(np.abs(coef) > 1e-6)[0])
446
+ return {'coef': coef.tolist(), 'selected': selected, 'alpha': float(model.alpha_)}
447
+ except Exception as e:
448
+ logger.warning(f"LASSO selection failed: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
449
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
 
451
+ # ---------------------
452
+ # LLM interface (detailed prompt + structured JSON output)
453
+ # ---------------------
454
  class LLMInterface:
455
+ def __init__(self, model_name: str = None, hf_token: Optional[str] = None):
456
+ self.model_name = model_name or config.hf_default_model
457
+ self.api_url = f"https://api-inference.huggingface.co/models/{self.model_name}"
458
+ self.hf_token = hf_token or config.hf_token
459
+
460
+ def _call_api(self, prompt: str, max_length: int = 700) -> str:
461
+ headers = {"Authorization": f"Bearer {self.hf_token}"} if self.hf_token else {"Content-Type": "application/json"}
462
+ payload = {"inputs": prompt, "parameters": {"max_new_tokens": max_length, "temperature": 0.2}}
 
 
 
 
 
 
 
 
 
 
 
 
463
  try:
464
+ r = requests.post(self.api_url, headers=headers, json=payload, timeout=40)
465
+ if r.status_code == 200:
466
+ res = r.json()
467
+ if isinstance(res, list) and isinstance(res[0], dict):
468
+ return res[0].get("generated_text", str(res[0]))
469
+ return str(res)
470
  else:
471
+ return f"API_ERROR_{r.status_code}: {r.text[:200]}"
472
  except Exception as e:
473
+ return f"API_EXCEPTION: {e}"
474
+
475
+ def generate_structured_strategy(self,
476
+ analysis: Dict[str, Any],
477
+ market_snapshot: str,
478
+ requirements: Dict[str, Any]) -> Dict[str, Any]:
479
  """
480
+ Produce structured JSON with keys:
481
+ - strategy_summary
482
+ - signals (list of rules)
483
+ - risk_management
484
+ - pseudocode (string)
485
+ - backtest_guidance
486
  """
487
+ instr = (
488
+ "You are a quantitative researcher writing a concise Quant Research Note. "
489
+ "Produce structured JSON only, with keys: strategy_summary, signals, risk_management, pseudocode, backtest_guidance, notes.\n\n"
490
+ "Requirements: "
491
+ f"{json.dumps(requirements)}\n\n"
492
+ "Analysis (numerical results):\n"
493
+ f"{json.dumps(analysis, indent=2, ensure_ascii=False)[:4000]}\n\n"
494
+ "Market snapshot:\n"
495
+ f"{market_snapshot[:2000]}\n\n"
496
+ "Be specific: signals should include exact mathematical conditions (e.g. vrp > vrp_sma_short AND rsi < 30). "
497
+ "Pseudocode should include function signatures: compute_features(data), generate_signal(features), risk_manage(position), execute(signal). "
498
+ "Backtest guidance should specify data frequency, in-sample/out-of-sample split, sample length, and slippage/commission assumptions. "
499
+ "Keep outputs compact but precise."
500
+ )
501
+ raw = self._call_api(instr, max_length=800)
502
+ # Try to parse JSON from raw; if fails, fallback to heuristics
503
+ try:
504
+ # sometimes HF returns text with JSON in it — try to extract first JSON object
505
+ start = raw.find("{")
506
+ end = raw.rfind("}")
507
+ if start != -1 and end != -1:
508
+ candidate = raw[start:end+1]
509
+ data = json.loads(candidate)
510
+ return data
511
+ except Exception as e:
512
+ logger.warning(f"LLM did not return pure JSON: {e}")
513
+ # fallback: craft deterministic template using analysis and requirements
514
+ fallback = {
515
+ "strategy_summary": "Fallback strategy: VRP mean-reversion with momentum filter.",
516
+ "signals": [
517
+ "entry: vrp < vrp_sma_short and momentum > 0.5",
518
+ "exit: vrp > vrp_sma_long or price crosses stop loss"
519
+ ],
520
+ "risk_management": "max position risk 0.5% NAV; use stop-loss and time-based exit",
521
+ "pseudocode": (
522
+ "def compute_features(data):\n"
523
+ " features = {...} # vrp, sma, momentum\n"
524
+ "def generate_signal(features):\n"
525
+ " if features['vrp'] < features['vrp_sma_short'] and features['mom'] > 0:\n"
526
+ " return 1\n"
527
+ " return 0\n"
528
+ "def risk_manage(pos):\n"
529
+ " # apply stop loss / position sizing\n"
530
+ ),
531
+ "backtest_guidance": "Use 1-minute bars, in-sample 2 years, OOS 6 months, slippage 0.02%, commission 0.0005 per trade",
532
+ "notes": "LLM API failed or returned non-JSON; this is a deterministic fallback."
533
+ }
534
+ return fallback
535
+
536
+ # ---------------------
537
+ # Integrative Trainer / Platform
538
+ # ---------------------
539
+ class QuantPlatform:
540
+ def __init__(self):
541
+ self.fiber = FiberBundleTheory()
542
+ self.noise = NoiseExplorer()
543
+ self.trainer_ml = None
 
 
 
 
 
 
 
 
 
544
  self.llm = LLMInterface()
545
  self.current_data = None
546
  self.analysis_results = {}
547
+
548
+ # Data ingestion & basic analysis
549
+ def upload_and_analyze(self, file):
550
  if file is None:
551
+ return "请上传 CSV / Excel 文件", None, None
552
+ fname = file.name
553
  try:
554
+ if fname.endswith('.csv'):
555
+ df = pd.read_csv(fname)
 
 
 
556
  else:
557
+ df = pd.read_excel(fname)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
558
  except Exception as e:
559
+ return f"读取失败: {e}", None, None
560
+ self.current_data = df
561
+ numeric = df.select_dtypes(include=[np.number]).columns.tolist()
562
+ summary = f"Rows: {len(df)}, Cols: {len(df.columns)}, Numeric: {numeric}"
563
+ # noise exploration (first two numeric columns)
 
 
564
  try:
565
+ noise_res = self.noise.explore(df)
566
+ noise_summary = f"VRP mean {noise_res['vrp_mean']:.6f}, vrp std {noise_res['vrp_std']:.6f}, resid ac1 {noise_res['resid_stats']['ac1']:.4f}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
567
  except Exception as e:
568
+ noise_summary = f"噪声分析失败: {e}"
569
+ noise_res = None
570
+ # garch quick fit on first numeric column returns (if plausible)
571
+ garch_summary = "GARCH not run"
572
+ if numeric:
573
+ series = df[numeric[0]].pct_change().dropna().values
574
+ if len(series) > 30:
575
+ try:
576
+ garch_res = Econometrics.garch_11_fit(series)
577
+ garch_summary = f"GARCH method: {garch_res.get('method','?')}, params keys: {list(garch_res.get('params',{}).keys()) if 'params' in garch_res else 'n/a'}"
578
+ except Exception as e:
579
+ garch_summary = f"GARCH失败: {e}"
580
+ self.analysis_results = {'noise': noise_res, 'garch': garch_summary}
581
+ return summary, noise_summary, garch_summary
582
+
583
+ # Pricing / PDE / MC wrappers
584
+ def price_bs_cn(self, S, K, r, q, sigma, T, Smax_mult=3.0, M=400, N=400, option_type='call'):
585
  try:
586
+ p = NumericalMethods.bs_crank_nicolson(float(S), float(K), float(r), float(q), float(sigma), float(T),
587
+ Smax_mult=float(Smax_mult), M=int(M), N=int(N), option_type=option_type)
588
+ return f"Crank–Nicolson price: {p:.6f}"
589
+ except Exception as e:
590
+ return f"PDE pricing failed: {e}"
591
+
592
+ def price_bs_mc(self, S, K, r, q, sigma, T, option_type='call', n_paths=20000, antithetic=True):
593
+ try:
594
+ p = NumericalMethods.mc_price_bs_cv(float(S), float(K), float(r), float(q), float(sigma), float(T),
595
+ option_type=option_type, n_paths=int(n_paths), antithetic=bool(antithetic))
596
+ return f"MC price (CV): {p:.6f}"
597
+ except Exception as e:
598
+ return f"MC pricing failed: {e}"
599
+
600
+ def simulate_heston(self, S0, v0, r, kappa, theta, xi, rho, T, n_steps=252, n_paths=2000):
601
+ try:
602
+ S, v = StochasticModels.heston_simulate(float(S0), float(v0), float(r), float(kappa), float(theta), float(xi), float(rho), float(T), int(n_steps), int(n_paths))
603
+ # return minimal summary and a small plot (first 3 paths)
604
+ fig, ax = plt.subplots()
605
+ for i in range(min(3, S.shape[0])):
606
+ ax.plot(S[i,:], label=f'path{i}')
607
+ ax.set_title("Heston sample paths (first few)")
608
+ ax.legend()
609
+ return "Heston simulation success", fig
610
  except Exception as e:
611
+ return f"Heston simulation failed: {e}", None
612
+
613
+ # Econometrics wrappers
614
+ def garch_fit(self):
615
  if self.current_data is None:
616
+ return "请先上传数据"
617
+ numeric = self.current_data.select_dtypes(include=[np.number]).columns.tolist()
618
+ if not numeric:
619
+ return "数据无数值列"
620
+ series = self.current_data[numeric[0]].pct_change().dropna().values
621
+ if len(series) < 30:
622
+ return "样本过短,至少需要30个观测用于GARCH拟合"
623
  try:
624
+ res = Econometrics.garch_11_fit(series)
625
+ return json.dumps({'method': res.get('method','mle'), 'params': res.get('params') if 'params' in res else 'omega/alpha/beta', 'cond_var_mean': float(np.mean(res.get('cond_var',[])) if res.get('cond_var') else np.nan)}, indent=2)
 
 
 
 
 
626
  except Exception as e:
627
+ return f"GARCH拟合失败: {e}"
628
+
629
+ def johansen(self):
630
+ if self.current_data is None:
631
+ return "请先上传数据"
632
+ data = self.current_data.select_dtypes(include=[np.number]).dropna().values
633
+ if data.shape[0] < 50 or data.shape[1] < 2:
634
+ return "数据不足以做 Johansen 协整检验(至少 50 行,2 列)"
635
  try:
636
+ res = Econometrics.johansen_test(data)
637
+ if res is None:
638
+ return "Johansen 不可用(statsmodels 未安装或出错)"
639
+ return json.dumps({'eig_top5': res['eig'][:5], 'lr1_top5': res['lr1'][:5]}, indent=2)
640
  except Exception as e:
641
+ return f"Johansen 失败: {e}"
642
+
643
+ # Portfolio & Risk
644
+ def compute_gmv(self):
645
+ if self.current_data is None:
646
+ return "请先上传数据"
647
+ df = self.current_data.select_dtypes(include=[np.number]).dropna()
648
+ if df.shape[0] < 10 or df.shape[1] < 1:
649
+ return "数据不足"
650
+ returns = df.pct_change().dropna().values
651
+ w = PortfolioOptimization.gmv_weights(returns)
652
+ return f"GMV weights (len {len(w)}): {np.round(w,4).tolist()}"
653
+
654
+ def mean_var_opt(self, target_return: Optional[float]=None):
655
+ if self.current_data is None:
656
+ return "请先上传数据"
657
+ df = self.current_data.select_dtypes(include=[np.number]).dropna()
658
+ returns = df.pct_change().dropna().values
659
  try:
660
+ w = PortfolioOptimization.mean_variance_opt(returns, target_return=float(target_return) if target_return is not None else None)
661
+ return f"Optimized weights (len {len(w)}): {np.round(w,4).tolist()}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
662
  except Exception as e:
663
+ return f"Mean-Variance optimization failed: {e}"
664
+
665
+ # ML
666
+ def lasso_select(self):
667
+ if self.current_data is None:
668
+ return "请先上传数据"
669
+ df = self.current_data.select_dtypes(include=[np.number]).dropna()
670
+ if df.shape[1] < 2 or df.shape[0] < 30:
671
+ return "据不足以做 LASSO"
672
+ y = df.iloc[:,0].pct_change().dropna().values
673
+ X = df.iloc[:,1:].pct_change().dropna().values
674
+ # align lengths
675
+ minlen = min(len(y), len(X))
676
+ if minlen <= 10:
677
+ return "数据对齐后样本太短"
678
+ y = y[-minlen:]
679
+ X = X[-minlen:]
680
+ res = MLForFinance.lasso_select(X, y)
681
+ if res is None:
682
+ return "LASSO 失败"
683
+ return f"Selected indices: {res['selected']}, alpha: {res['alpha']:.6g}"
684
+
685
+ # LLM strategy (structured)
686
+ def generate_strategy(self, user_prompt: str, intraday: bool=True, model_name: Optional[str]=None) -> str:
687
+ if self.current_data is None:
688
+ return json.dumps({'error': '请先上传数据'}, ensure_ascii=False)
689
+ # Build analysis dict
690
+ analysis = {}
691
+ if self.analysis_results.get('noise'):
692
+ analysis['noise'] = self.analysis_results['noise']
693
+ # GARCH cond var mean if available
694
+ try:
695
+ g = self.garch_fit()
696
+ analysis['garch_summary'] = json.loads(g) if g and g.startswith("{") else g
697
+ except Exception:
698
+ analysis['garch_summary'] = "GARCH无法解析"
699
+ # market snapshot: last 50 rows numeric describe
700
+ do_numeric = self.current_data.select_dtypes(include=[np.number]).tail(50).describe().to_string()
701
+ requirements = {'intraday': intraday, 'pseudocode': True, 'user_prompt': user_prompt}
702
+ if model_name:
703
+ self.llm = LLMInterface(model_name=model_name)
704
+ result = self.llm.generate_structured_strategy(analysis, do_numeric, requirements)
705
+ # return pretty JSON
706
+ return json.dumps(result, ensure_ascii=False, indent=2)
707
+
708
+ # ---------------------
709
+ # Gradio UI
710
+ # ---------------------
711
+ def create_ui():
712
+ platform = QuantPlatform()
713
+ with gr.Blocks(title="Quant Upgraded Platform") as demo:
714
+ gr.Markdown("# Quant Upgraded Platform — 高精度/高性能 + 精细化 LLM 策略")
715
  with gr.Tabs():
716
+ with gr.TabItem("📁 数据上传 & 基础分析"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
717
  with gr.Row():
718
+ file_input = gr.File(label="上传 CSV / Excel")
719
+ upload_btn = gr.Button("上传并分析")
720
+ summary = gr.Textbox(label="数据摘要", lines=2)
721
+ noise = gr.Textbox(label="噪声探索摘要", lines=2)
722
+ garch = gr.Textbox(label="GARCH 摘要", lines=2)
723
+ upload_btn.click(platform.upload_and_analyze, inputs=[file_input], outputs=[summary, noise, garch])
724
+
725
+ with gr.TabItem("📊 Pricing / PDE / MC"):
 
 
 
 
 
 
 
 
726
  with gr.Row():
727
+ S = gr.Number(value=100.0, label="Spot S")
728
+ K = gr.Number(value=100.0, label="Strike K")
729
+ r = gr.Number(value=0.01, label="r")
730
+ q = gr.Number(value=0.0, label="q")
731
+ sigma = gr.Number(value=0.2, label="sigma")
732
+ T = gr.Number(value=0.5, label="T (yrs)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
733
  with gr.Row():
734
+ bs_cn_btn = gr.Button("Crank–Nicolson BS PDE 价格")
735
+ bs_cn_out = gr.Textbox(label="PDE Price", lines=1)
736
+ bs_cn_btn.click(platform.price_bs_cn, inputs=[S,K,r,q,sigma,T, gr.Number(value=3.0), gr.Slider(100,800,value=400), gr.Slider(100,800,value=400), gr.Dropdown(['call','put'], value='call')], outputs=[bs_cn_out])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
737
  with gr.Row():
738
+ mc_btn = gr.Button("Monte Carlo (Antithetic + Control Var)")
739
+ mc_out = gr.Textbox(label="MC Price (CV)", lines=1)
740
+ mc_btn.click(platform.price_bs_mc, inputs=[S,K,r,q,sigma,T, gr.Dropdown(['call','put'], value='call'), gr.Number(value=config.mc_default_paths), gr.Checkbox(value=True, label="Antithetic")], outputs=[mc_out])
741
+
742
+ with gr.TabItem("🔢 Econometrics"):
743
+ garch_btn = gr.Button("GARCH(1,1) 拟合")
744
+ garch_out = gr.Textbox(label="GARCH 结果", lines=8)
745
+ garch_btn.click(platform.garch_fit, inputs=None, outputs=[garch_out])
746
+
747
+ joh_btn = gr.Button("Johansen 协整检验")
748
+ joh_out = gr.Textbox(label="Johansen 结果", lines=6)
749
+ joh_btn.click(platform.johansen, inputs=None, outputs=[joh_out])
750
+
751
+ with gr.TabItem("📈 Portfolio & Risk"):
752
+ gmv_btn = gr.Button("计算 GMV 权重")
753
+ gmv_out = gr.Textbox(label="GMV 权重", lines=3)
754
+ gmv_btn.click(platform.compute_gmv, inputs=None, outputs=[gmv_out])
755
+
756
+ mv_btn = gr.Button("均值-方差 优化 (可选目标收益)")
757
+ target = gr.Number(label="目标收益 (可空)", value=None)
758
+ mv_out = gr.Textbox(label="MV 结果", lines=3)
759
+ mv_btn.click(platform.mean_var_opt, inputs=[target], outputs=[mv_out])
760
+
761
+ with gr.TabItem("🤖 LLM 策略生成 (结构化)"):
762
+ user_q = gr.Textbox(label="你的问题(策略 / 日内 / 回测)", lines=3, value="基于当前数据,给出日内量化策略并生成伪代码")
763
+ intraday = gr.Checkbox(label="日内策略", value=True)
764
+ model_sel = gr.Dropdown(label="LLM 模型 (若无Token或模型不可用会回退)", choices=[config.hf_default_model], value=config.hf_default_model)
765
+ strat_out = gr.Textbox(label="结构化策略输出 (JSON)", lines=20)
766
+ strat_btn = gr.Button("生成策略")
767
+ strat_btn.click(platform.generate_strategy, inputs=[user_q, intraday, model_sel], outputs=[strat_out])
768
+
769
+ with gr.TabItem("🔬 Dynamics & Geometry (原有)"):
770
+ noise_btn = gr.Button("运行噪声探索")
771
+ noise_text = gr.Textbox(label="Noise summary", lines=3)
772
+ def run_noise():
773
+ if platform.current_data is None:
774
+ return "请先上传数据"
775
+ res = platform.noise.explore(platform.current_data)
776
+ return f"VRP mean {res['vrp_mean']:.6f}, resid ac1 {res['resid_stats']['ac1']:.4f}"
777
+ noise_btn.click(run_noise, inputs=None, outputs=[noise_text])
778
+
779
+ sim_vix2 = gr.Number(value=1.0, label="start VIX^2")
780
+ sim_rv = gr.Number(value=0.8, label="start RV")
781
+ T_sim = gr.Number(value=1.0, label="T")
782
+ dt_sim = gr.Number(value=0.01, label="dt")
783
+ sim_btn = gr.Button("模拟梯度动力学")
784
+ sim_out = gr.Plot(label="Dynamics path")
785
+ def run_sim(vix2, rv, T, dt):
786
+ # lightweight simulate using gradient dynamics (reuse earlier pattern)
787
+ gradient = GradientDynamicsLite()
788
+ path = gradient.simulate_flow([vix2, rv], T=float(T), dt=float(dt))
789
+ fig, ax = plt.subplots()
790
+ ax.plot(path[:,0], label='VIX^2')
791
+ ax.plot(path[:,1], label='RV')
792
+ ax.legend()
793
+ ax.set_title("Gradient dynamics (VIX^2 & RV)")
794
+ return fig
795
+ sim_btn.click(run_sim, inputs=[sim_vix2, sim_rv, T_sim, dt_sim], outputs=[sim_out])
796
+
797
+ gr.Markdown("注:本系统为研究用途,不构成投资建议。部分功能依赖外部库(statsmodels, arch, cvxpy)。")
798
+
799
+ return demo
800
+
801
+ # ---------------------
802
+ # Small helper: GradientDynamicsLite (used only in UI simulation)
803
+ # ---------------------
804
+ class GradientDynamicsLite:
805
+ def __init__(self, eta=0.5, sigma=0.02):
806
+ self.eta = eta
807
+ self.sigma = sigma
808
+
809
+ def U_vrp(self, b):
810
+ vix2 = b[...,0]
811
+ rv = b[...,1]
812
+ vrp = vix2 - rv
813
+ return 0.5 * vrp**2
814
+
815
+ def grad_U(self, b):
816
+ # analytic gradient for U = 0.5*(vix2 - rv)^2
817
+ vix2 = b[0]
818
+ rv = b[1]
819
+ # dU/dvix2 = (vix2 - rv); dU/drv = -(vix2 - rv)
820
+ g = np.array([vix2 - rv, -(vix2 - rv)], dtype=float)
821
+ return g
822
+
823
+ def simulate_flow(self, b0, T=1.0, dt=0.01, seed=None):
824
+ if seed is not None:
825
+ np.random.seed(seed)
826
+ n_steps = int(T / dt)
827
+ path = np.zeros((n_steps+1, 2))
828
+ path[0] = np.array(b0, dtype=float)
829
+ for i in range(n_steps):
830
+ bcur = path[i]
831
+ grad = self.grad_U(bcur)
832
+ db_det = - self.eta * grad
833
+ db_stoch = self.sigma * np.sqrt(dt) * np.random.randn(2)
834
+ path[i+1] = bcur + db_det * dt + db_stoch
835
+ return path
836
+
837
+ # ---------------------
838
+ # Entrypoint
839
+ # ---------------------
840
  if __name__ == "__main__":
841
+ app = create_ui()
842
+ # Launch locally
843
+ app.launch(server_name="0.0.0.0", server_port=7860, share=False)