gaurv007 commited on
Commit
5090758
·
verified ·
1 Parent(s): d41fd24

Upload alpha_factory/personas/crowd_scout.py with huggingface_hub

Browse files
alpha_factory/personas/crowd_scout.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Crowd Scout — Persona 4 (Mediumfish)
3
+ Checks novelty: correlation to library + thematic dedup.
4
+ """
5
+ from ..infra.llm_client import LLMClient
6
+ from ..schemas import CrowdScoutResult, Verdict
7
+
8
+
9
+ SYSTEM_PROMPT = """You are a portfolio manager evaluating whether a new alpha is sufficiently
10
+ different from the existing library to justify inclusion.
11
+
12
+ You must assess TWO dimensions:
13
+ 1. NUMERICAL: Is the alpha's correlation to existing alphas below 0.65?
14
+ 2. THEMATIC: Even if correlation is low, does this alpha exploit the SAME anomaly as existing ones?
15
+
16
+ An alpha is a THEMATIC DUPLICATE if it:
17
+ - Exploits the same underlying behavioral bias (e.g., two momentum alphas = duplicate even if corr=0.3)
18
+ - Uses the same field family with minor operator changes
19
+ - Is a linear combination of existing alphas
20
+
21
+ You MUST label the alpha's underlying anomaly from this list:
22
+ PEAD, value, momentum, reversal, low_vol, quality, liquidity, sentiment, analyst,
23
+ option_surface, social, fundamental, technical, event, other
24
+
25
+ RULES:
26
+ - If the library already has 3+ alphas with the same anomaly_tag → this alpha is SATURATED → kill
27
+ - If correlation > 0.85 → DUPLICATE → kill
28
+ - If correlation 0.65-0.85 AND same anomaly_tag → DEMOTE (allow with reduced weight)
29
+ - If correlation < 0.65 AND different anomaly_tag → PROMOTE
30
+ """
31
+
32
+
33
+ async def scout_novelty(
34
+ llm: LLMClient,
35
+ expression: str,
36
+ theme: str,
37
+ anomaly_tag: str,
38
+ existing_anomaly_tags: list[str],
39
+ max_corr_to_library: float,
40
+ model: str | None = None,
41
+ ) -> CrowdScoutResult:
42
+ """
43
+ Assess whether an alpha is novel enough to include in the library.
44
+
45
+ Combines numerical (correlation) check with LLM thematic assessment.
46
+ """
47
+ # Determine saturation
48
+ tag_count = existing_anomaly_tags.count(anomaly_tag)
49
+ is_saturated = tag_count >= 3
50
+
51
+ # Quick deterministic checks
52
+ if max_corr_to_library > 0.85:
53
+ return CrowdScoutResult(
54
+ max_corr_to_library=max_corr_to_library,
55
+ is_thematic_duplicate=True,
56
+ anomaly_already_saturated=is_saturated,
57
+ verdict=Verdict.KILL,
58
+ reason=f"Correlation {max_corr_to_library:.2f} > 0.85 threshold — duplicate",
59
+ )
60
+
61
+ if is_saturated and max_corr_to_library > 0.5:
62
+ return CrowdScoutResult(
63
+ max_corr_to_library=max_corr_to_library,
64
+ is_thematic_duplicate=True,
65
+ anomaly_already_saturated=True,
66
+ verdict=Verdict.KILL,
67
+ reason=f"Anomaly '{anomaly_tag}' already has {tag_count} alphas AND corr={max_corr_to_library:.2f}",
68
+ )
69
+
70
+ # For borderline cases (corr 0.4-0.65), use LLM for thematic assessment
71
+ if max_corr_to_library > 0.4:
72
+ user_prompt = f"""Assess this alpha's thematic novelty:
73
+
74
+ Expression: {expression[:200]}...
75
+ Theme: {theme}
76
+ Anomaly tag: {anomaly_tag}
77
+ Max correlation to existing library: {max_corr_to_library:.3f}
78
+ Existing anomaly distribution: {dict(zip(set(existing_anomaly_tags), [existing_anomaly_tags.count(t) for t in set(existing_anomaly_tags)]))}
79
+
80
+ Is this a THEMATIC DUPLICATE of something already in the library?
81
+ Consider: does it exploit the same behavioral bias, even if numerically different?"""
82
+
83
+ result = await llm.generate_json(
84
+ prompt=user_prompt,
85
+ schema=CrowdScoutResult,
86
+ model=model or llm.config.mediumfish_model,
87
+ temperature=0.3,
88
+ system_prompt=SYSTEM_PROMPT,
89
+ )
90
+ return result
91
+
92
+ # Low correlation + not saturated → promote
93
+ return CrowdScoutResult(
94
+ max_corr_to_library=max_corr_to_library,
95
+ is_thematic_duplicate=False,
96
+ anomaly_already_saturated=is_saturated,
97
+ verdict=Verdict.PROMOTE,
98
+ reason=f"Novel: corr={max_corr_to_library:.2f} < 0.65, anomaly '{anomaly_tag}' has {tag_count} existing",
99
+ )