gaurv007 commited on
Commit
ddf5d46
Β·
verified Β·
1 Parent(s): a2396a0

Upload alpha_factory/ui.py

Browse files
Files changed (1) hide show
  1. alpha_factory/ui.py +312 -55
alpha_factory/ui.py CHANGED
@@ -1,15 +1,19 @@
1
  """
2
- Alpha Factory β€” Gradio UI
3
- View generated alphas, copy expressions, run new batches.
 
4
 
5
  Run: uv run python -m alpha_factory.ui
6
  """
7
  import os
8
  import sys
9
  import subprocess
 
 
10
  import duckdb
11
  import gradio as gr
12
  from pathlib import Path
 
13
 
14
  try:
15
  from dotenv import load_dotenv
@@ -17,8 +21,49 @@ try:
17
  except ImportError:
18
  pass
19
 
 
 
 
20
  DB_PATH = Path("factor_store/alphas.duckdb")
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  def get_alphas_from_db(limit=50):
24
  if not DB_PATH.exists():
@@ -72,96 +117,308 @@ def get_full_expression(evt: gr.SelectData):
72
  return ""
73
 
74
 
75
- def run_batch(batch_size):
76
- """Run pipeline as subprocess with forced UTF-8 to avoid Windows encoding crash."""
 
 
 
 
 
 
 
 
 
 
 
77
  env = os.environ.copy()
78
- # Force UTF-8 output β€” prevents Rich/Windows cp1252 crash
79
  env["PYTHONIOENCODING"] = "utf-8"
80
  env["PYTHONLEGACYWINDOWSSTDIO"] = "utf-8"
81
- # Disable Rich color/formatting when piped (cleaner output)
82
  env["NO_COLOR"] = "1"
83
  env["TERM"] = "dumb"
84
- # Ensure HF_TOKEN passes through
85
- if "HF_TOKEN" not in env:
86
- token = os.getenv("HF_TOKEN", "")
87
- if token:
88
- env["HF_TOKEN"] = token
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  try:
91
  result = subprocess.run(
92
- [sys.executable, "-m", "alpha_factory.run", "--dry-run", "--batch-size", str(int(batch_size))],
93
  capture_output=True,
94
  env=env,
95
- timeout=180,
96
  cwd=str(Path.cwd()),
97
  )
98
- # Decode with utf-8, replace errors
99
  stdout = result.stdout.decode("utf-8", errors="replace") if result.stdout else ""
100
  stderr = result.stderr.decode("utf-8", errors="replace") if result.stderr else ""
101
-
102
  log = ""
103
  if stdout:
104
- log = stdout[-3000:]
105
  if result.returncode != 0 and stderr:
106
  log += "\n\n--- ERRORS ---\n" + stderr[-2000:]
107
  if not log.strip():
108
  log = f"Process exited with code {result.returncode}"
109
  return log
110
  except subprocess.TimeoutExpired:
111
- return "ERROR: Pipeline timed out after 180 seconds. Try smaller batch size."
112
  except Exception as e:
113
  return f"ERROR: {str(e)}"
114
 
115
 
116
- def generate_and_refresh(batch_size):
117
- log = run_batch(batch_size)
 
 
 
 
 
 
 
 
 
 
 
 
118
  table = get_alpha_cards()
119
  return table, log
120
 
121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  def build_ui():
123
- with gr.Blocks(title="Alpha Factory") as app:
124
  gr.Markdown("""
125
- # Alpha Factory β€” Generated Alphas
126
- View, copy, and manage alphas generated by the pipeline.
127
  """)
128
 
129
- with gr.Row():
130
- with gr.Column(scale=1):
131
- batch_size_input = gr.Number(value=3, label="Batch Size", minimum=1, maximum=20)
132
- generate_btn = gr.Button("Generate New Batch", variant="primary")
133
- refresh_btn = gr.Button("Refresh Table")
134
- gr.Markdown("*Dry run mode β€” no BRAIN submissions*")
135
- with gr.Column(scale=3):
136
- stats_md = gr.Markdown(f"**Alphas in store:** {len(get_alphas_from_db())}")
137
-
138
- gr.Markdown("### Click any row to see full expression")
139
-
140
- alpha_table = gr.Dataframe(
141
- value=get_alpha_cards(),
142
- headers=["Time", "ID", "Theme", "Archetype", "Tag", "Decay", "Status", "Expression"],
143
- interactive=False,
144
- wrap=True,
145
- )
 
 
 
146
 
147
- gr.Markdown("### Full Expression β€” Ctrl+A then Ctrl+C to copy")
148
- full_expr = gr.Textbox(
149
- label="Full Expression",
150
- lines=6,
151
- interactive=True,
152
- )
 
 
 
 
153
 
154
- gr.Markdown("### Pipeline Log")
155
- pipeline_log = gr.Textbox(label="Output", lines=15, interactive=False)
156
 
157
- # Events
158
- alpha_table.select(get_full_expression, outputs=[full_expr])
159
- refresh_btn.click(get_alpha_cards, outputs=[alpha_table])
160
- generate_btn.click(
161
- generate_and_refresh,
162
- inputs=[batch_size_input],
163
- outputs=[alpha_table, pipeline_log],
164
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
  return app
167
 
 
1
  """
2
+ Alpha Factory β€” Gradio UI v2
3
+ View generated alphas, copy expressions, run new batches,
4
+ and SELECT per-tier models from discovered Ollama + HuggingFace models.
5
 
6
  Run: uv run python -m alpha_factory.ui
7
  """
8
  import os
9
  import sys
10
  import subprocess
11
+ import json
12
+ import asyncio
13
  import duckdb
14
  import gradio as gr
15
  from pathlib import Path
16
+ from typing import Optional
17
 
18
  try:
19
  from dotenv import load_dotenv
 
21
  except ImportError:
22
  pass
23
 
24
+ from .config import load_config
25
+ from .infra.model_manager import ModelManager, ModelInfo, ModelProvider
26
+
27
  DB_PATH = Path("factor_store/alphas.duckdb")
28
 
29
+ # ── Globals (shared across Gradio sessions) ──────────────────────────────────
30
+ _LAST_DISCOVERED_MODELS: list[ModelInfo] = []
31
+
32
+ def _model_choice_name(m: ModelInfo) -> str:
33
+ """Human-readable label for a model in the dropdown."""
34
+ size = f" ({m.size_gb:.1f}GB)" if m.size_gb else ""
35
+ quant = f" [{m.quantization}]" if m.quantization else ""
36
+ return f"[{m.provider.value.upper()}] {m.name}{size}{quant}"
37
+
38
+
39
+ def _discover_models_sync(
40
+ ollama_url: str = "http://localhost:11434",
41
+ hf_token: Optional[str] = None,
42
+ ) -> list[ModelInfo]:
43
+ """Synchronous wrapper around async model discovery."""
44
+ global _LAST_DISCOVERED_MODELS
45
+
46
+ # Resolve HF token
47
+ token = hf_token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN", "")
48
+
49
+ manager = ModelManager(ollama_url=ollama_url, hf_token=token)
50
+ try:
51
+ asyncio.run(manager.discover_all())
52
+ except Exception as e:
53
+ print(f"Model discovery error: {e}")
54
+
55
+ _LAST_DISCOVERED_MODELS = manager.get_all_models()
56
+ return _LAST_DISCOVERED_MODELS
57
+
58
+
59
+ def _get_dropdown_choices(models: list[ModelInfo]) -> list[str]:
60
+ """Build dropdown choices: [Use Default (auto-assign)] + discovered models."""
61
+ choices = ["Use Default (auto-assign)"]
62
+ choices.extend([_model_choice_name(m) for m in models])
63
+ return choices
64
+
65
+
66
+ # ── DB helpers (unchanged) ──────────────────────────────────────────────────
67
 
68
  def get_alphas_from_db(limit=50):
69
  if not DB_PATH.exists():
 
117
  return ""
118
 
119
 
120
+ # ── Pipeline runner (now with per-tier model overrides) ───────────────────────
121
+
122
+ def _run_pipeline_subprocess(
123
+ batch_size: int,
124
+ proven_mode: bool,
125
+ enable_brain: bool,
126
+ ollama_url: str,
127
+ microfish: str,
128
+ tinyfish: str,
129
+ mediumfish: str,
130
+ bigfish: str,
131
+ ) -> str:
132
+ """Run the pipeline as a subprocess with the selected configuration."""
133
  env = os.environ.copy()
 
134
  env["PYTHONIOENCODING"] = "utf-8"
135
  env["PYTHONLEGACYWINDOWSSTDIO"] = "utf-8"
 
136
  env["NO_COLOR"] = "1"
137
  env["TERM"] = "dumb"
138
+
139
+ # Build CLI args
140
+ cmd = [
141
+ sys.executable, "-m", "alpha_factory.run",
142
+ "--batch-size", str(int(batch_size)),
143
+ "--ollama-url", ollama_url,
144
+ ]
145
+
146
+ if proven_mode:
147
+ cmd.append("--proven")
148
+ if enable_brain:
149
+ cmd.append("--enable-brain")
150
+
151
+ # Only pass per-tier overrides if user selected something other than default
152
+ def _extract_model_name(choice: str) -> Optional[str]:
153
+ if not choice or choice == "Use Default (auto-assign)":
154
+ return None
155
+ # Strip the [PROVIDER] prefix and size/quant suffix
156
+ if "]" in choice:
157
+ return choice.split("]", 1)[1].strip().split(" (")[0].split(" [")[0].strip()
158
+ return choice.strip()
159
+
160
+ mf = _extract_model_name(microfish)
161
+ tf = _extract_model_name(tinyfish)
162
+ mmf = _extract_model_name(mediumfish)
163
+ bf = _extract_model_name(bigfish)
164
+
165
+ if mf:
166
+ cmd.extend(["--microfish", mf])
167
+ if tf:
168
+ cmd.extend(["--tinyfish", tf])
169
+ if mmf:
170
+ cmd.extend(["--mediumfish", mmf])
171
+ if bf:
172
+ cmd.extend(["--bigfish", bf])
173
+
174
+ # Log the command for debugging
175
+ print(f"Running: {' '.join(cmd)}")
176
 
177
  try:
178
  result = subprocess.run(
179
+ cmd,
180
  capture_output=True,
181
  env=env,
182
+ timeout=300,
183
  cwd=str(Path.cwd()),
184
  )
 
185
  stdout = result.stdout.decode("utf-8", errors="replace") if result.stdout else ""
186
  stderr = result.stderr.decode("utf-8", errors="replace") if result.stderr else ""
187
+
188
  log = ""
189
  if stdout:
190
+ log = stdout[-4000:]
191
  if result.returncode != 0 and stderr:
192
  log += "\n\n--- ERRORS ---\n" + stderr[-2000:]
193
  if not log.strip():
194
  log = f"Process exited with code {result.returncode}"
195
  return log
196
  except subprocess.TimeoutExpired:
197
+ return "ERROR: Pipeline timed out after 300 seconds. Try smaller batch size."
198
  except Exception as e:
199
  return f"ERROR: {str(e)}"
200
 
201
 
202
+ def generate_and_refresh(
203
+ batch_size,
204
+ proven_mode,
205
+ enable_brain,
206
+ ollama_url,
207
+ microfish,
208
+ tinyfish,
209
+ mediumfish,
210
+ bigfish,
211
+ ):
212
+ log = _run_pipeline_subprocess(
213
+ batch_size, proven_mode, enable_brain, ollama_url,
214
+ microfish, tinyfish, mediumfish, bigfish,
215
+ )
216
  table = get_alpha_cards()
217
  return table, log
218
 
219
 
220
+ # ── Model discovery refresh ──────────────────────────────────────────────────
221
+
222
+ def refresh_model_list(ollama_url: str, hf_token: str) -> tuple[str, list[str]]:
223
+ """Discover models and return (status_msg, dropdown_choices)."""
224
+ models = _discover_models_sync(ollama_url=ollama_url, hf_token=hf_token)
225
+
226
+ if not models:
227
+ return "No models found. Is Ollama running? Is HF_TOKEN set?", ["Use Default (auto-assign)"]
228
+
229
+ local_count = sum(1 for m in models if m.provider == ModelProvider.OLLAMA)
230
+ cloud_count = sum(1 for m in models if m.provider == ModelProvider.HUGGINGFACE)
231
+
232
+ msg = f"Found {local_count} Ollama + {cloud_count} HF models"
233
+ choices = _get_dropdown_choices(models)
234
+ return msg, choices
235
+
236
+
237
+ # ── UI Builder ──────────────────────────────────────────────────────────────
238
+
239
  def build_ui():
240
+ with gr.Blocks(title="Alpha Factory v0.2.0") as app:
241
  gr.Markdown("""
242
+ # Alpha Factory β€” LLM-Driven Alpha Generation
243
+ Generate and manage equity alpha expressions for WorldQuant BRAIN.
244
  """)
245
 
246
+ # ─── SETTINGS TAB ─────────────────────────────────────────────────────
247
+ with gr.Tab("βš™οΈ Settings"):
248
+ with gr.Row():
249
+ with gr.Column(scale=1):
250
+ gr.Markdown("### Connection")
251
+ ollama_url_input = gr.Textbox(
252
+ value="http://localhost:11434",
253
+ label="Ollama URL",
254
+ )
255
+ hf_token_input = gr.Textbox(
256
+ value=os.getenv("HF_TOKEN", ""),
257
+ label="HF Token (optional)",
258
+ type="password",
259
+ )
260
+ refresh_models_btn = gr.Button("πŸ” Refresh Model List", variant="secondary")
261
+ discovery_status = gr.Textbox(
262
+ label="Discovery Status",
263
+ value="Click 'Refresh Model List' to discover Ollama + HF models",
264
+ interactive=False,
265
+ )
266
 
267
+ with gr.Column(scale=2):
268
+ gr.Markdown("### Model Selection β€” One Per Tier")
269
+ gr.Markdown("""
270
+ | Tier | Role | Typical Size |
271
+ |------|------|-------------|
272
+ | **Microfish** | Hypothesis generation (bulk) | 1.5B-3B |
273
+ | **Tinyfish** | Expression compilation | 3B-7B |
274
+ | **Mediumfish** | Crowd scout + Performance surgeon | 7B-14B |
275
+ | **Bigfish** | Gatekeeper (final memo) | 14B-72B |
276
+ """)
277
 
278
+ # Initial choices: just default until discovery
279
+ default_choices = ["Use Default (auto-assign)"]
280
 
281
+ microfish_dropdown = gr.Dropdown(
282
+ choices=default_choices,
283
+ value="Use Default (auto-assign)",
284
+ label="Microfish β€” Hypothesis Generation",
285
+ )
286
+ tinyfish_dropdown = gr.Dropdown(
287
+ choices=default_choices,
288
+ value="Use Default (auto-assign)",
289
+ label="Tinyfish β€” Expression Compilation",
290
+ )
291
+ mediumfish_dropdown = gr.Dropdown(
292
+ choices=default_choices,
293
+ value="Use Default (auto-assign)",
294
+ label="Mediumfish β€” Critique & Diagnosis",
295
+ )
296
+ bigfish_dropdown = gr.Dropdown(
297
+ choices=default_choices,
298
+ value="Use Default (auto-assign)",
299
+ label="Bigfish β€” Final Gatekeeper",
300
+ )
301
+
302
+ # When refresh is clicked, update all dropdowns
303
+ refresh_models_btn.click(
304
+ fn=refresh_model_list,
305
+ inputs=[ollama_url_input, hf_token_input],
306
+ outputs=[discovery_status, microfish_dropdown],
307
+ ).then(
308
+ lambda choices: gr.Dropdown(choices=choices),
309
+ inputs=microfish_dropdown,
310
+ outputs=tinyfish_dropdown,
311
+ ).then(
312
+ lambda choices: gr.Dropdown(choices=choices),
313
+ inputs=microfish_dropdown,
314
+ outputs=mediumfish_dropdown,
315
+ ).then(
316
+ lambda choices: gr.Dropdown(choices=choices),
317
+ inputs=microfish_dropdown,
318
+ outputs=bigfish_dropdown,
319
+ )
320
+ # Actually the proper way: refresh returns one choices list,
321
+ # then update all 4 dropdowns with that same list
322
+ def _update_all_dropdowns(status, choices):
323
+ return status, choices, choices, choices, choices
324
+
325
+ refresh_models_btn.click(
326
+ fn=lambda url, token: _update_all_dropdowns(*refresh_model_list(url, token)),
327
+ inputs=[ollama_url_input, hf_token_input],
328
+ outputs=[
329
+ discovery_status,
330
+ microfish_dropdown,
331
+ tinyfish_dropdown,
332
+ mediumfish_dropdown,
333
+ bigfish_dropdown,
334
+ ],
335
+ )
336
+
337
+ # ─── GENERATION TAB ───────────────────────────────────────────────────
338
+ with gr.Tab("πŸš€ Generate Alphas"):
339
+ with gr.Row():
340
+ with gr.Column(scale=1):
341
+ batch_size_input = gr.Number(
342
+ value=3, label="Batch Size", minimum=1, maximum=20,
343
+ )
344
+ proven_mode_cb = gr.Checkbox(
345
+ value=False, label="Proven Templates (no LLM)",
346
+ )
347
+ enable_brain_cb = gr.Checkbox(
348
+ value=False, label="Enable BRAIN Submission (needs token)",
349
+ )
350
+ gr.Markdown("---")
351
+ gr.Markdown("*Selected models carry over from the Settings tab*")
352
+ generate_btn = gr.Button("Generate New Batch", variant="primary")
353
+ refresh_table_btn = gr.Button("Refresh Table Only")
354
+
355
+ with gr.Column(scale=3):
356
+ stats_md = gr.Markdown(
357
+ f"**Alphas in store:** {len(get_alphas_from_db())}"
358
+ )
359
+
360
+ gr.Markdown("### Click any row to see the full expression")
361
+ alpha_table = gr.Dataframe(
362
+ value=get_alpha_cards(),
363
+ headers=["Time", "ID", "Theme", "Archetype", "Tag", "Decay", "Status", "Expression"],
364
+ interactive=False,
365
+ wrap=True,
366
+ )
367
+
368
+ gr.Markdown("### Full Expression β€” Ctrl+A then Ctrl+C to copy")
369
+ full_expr = gr.Textbox(
370
+ label="Full Expression",
371
+ lines=6,
372
+ interactive=True,
373
+ )
374
+
375
+ gr.Markdown("### Pipeline Log")
376
+ pipeline_log = gr.Textbox(label="Output", lines=20, interactive=False)
377
+
378
+ # Events
379
+ alpha_table.select(get_full_expression, outputs=[full_expr])
380
+ refresh_table_btn.click(get_alpha_cards, outputs=[alpha_table])
381
+ generate_btn.click(
382
+ fn=generate_and_refresh,
383
+ inputs=[
384
+ batch_size_input,
385
+ proven_mode_cb,
386
+ enable_brain_cb,
387
+ ollama_url_input,
388
+ microfish_dropdown,
389
+ tinyfish_dropdown,
390
+ mediumfish_dropdown,
391
+ bigfish_dropdown,
392
+ ],
393
+ outputs=[alpha_table, pipeline_log],
394
+ )
395
+
396
+ # ─── ABOUT TAB ──────────────────────────────────────────────────────
397
+ with gr.Tab("πŸ“– About"):
398
+ gr.Markdown("""
399
+ **Alpha Factory v0.2.0** β€” Open-source LLM-driven pipeline for WorldQuant BRAIN.
400
+
401
+ ### How it works
402
+ 1. **Microfish** generates alpha hypotheses (ideas)
403
+ 2. **Tinyfish** compiles the idea into a BRAIN expression
404
+ 3. **Mediumfish** critiques and diagnoses performance
405
+ 4. **Bigfish** makes the final go/no-go decision
406
+
407
+ ### Modes
408
+ - **Proven Templates**: Deterministic, no LLM needed, guaranteed valid expressions
409
+ - **LLM Mode**: Uses local (Ollama) or cloud (HuggingFace) models
410
+
411
+ ### Model Discovery
412
+ - Set your **Ollama URL** and click **Refresh Model List** to find local models
413
+ - Set your **HF Token** to see HuggingFace Inference API models
414
+ - Select which model to use for each tier, or leave as "Use Default"
415
+
416
+ ### BRAIN Integration
417
+ - Requires `BRAIN_SESSION_TOKEN` from browser devtools
418
+ - Enable "BRAIN Submission" checkbox (disabled by default for safety)
419
+
420
+ [GitHub / HuggingFace](https://huggingface.co/gaurv007/alpha-factory)
421
+ """)
422
 
423
  return app
424