gaurv007 commited on
Commit
23dc829
·
verified ·
1 Parent(s): cf782be

feat: add alternative neutralization groups (85+ under-explored keys with AC≤100)

Browse files
Files changed (1) hide show
  1. alpha_factory/data/brain_groups.py +58 -0
alpha_factory/data/brain_groups.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ BRAIN Alternative Neutralization Groups — Under-explored grouping keys.
3
+ Switching from subindustry (AC=272,958) to these (AC≤100) lowers correlation to global pool.
4
+ """
5
+ from dataclasses import dataclass
6
+ import random
7
+
8
+
9
+ @dataclass
10
+ class BrainGroup:
11
+ id: str
12
+ coverage: float
13
+ alpha_count: int
14
+ description: str
15
+
16
+
17
+ STANDARD_GROUPS = [
18
+ BrainGroup("subindustry", 1.00, 272958, "Subindustry grouping"),
19
+ BrainGroup("industry", 1.00, 229917, "Industry grouping"),
20
+ BrainGroup("sector", 1.00, 333086, "Sector grouping"),
21
+ ]
22
+
23
+ ALT_GROUPS = [
24
+ BrainGroup("pv13_rcsed_6l", 1.00, 3, "Supply chain 6-level hierarchy"),
25
+ BrainGroup("pv13_di_6l", 1.00, 6, "Direct industry 6-level"),
26
+ BrainGroup("pv13_hierarchy_min22_513_sector", 0.69, 7, "Hierarchy min22 sector"),
27
+ BrainGroup("pv13_di_5l", 1.00, 10, "Direct industry 5-level"),
28
+ BrainGroup("pv13_rha2_min2_513_sector", 0.70, 11, "Revenue hierarchy min2"),
29
+ BrainGroup("pv13_hierarchy_min51_f1_513_sector", 0.99, 12, "Hierarchy min51 f1"),
30
+ BrainGroup("pv13_hierarchy_min20_513_sector", 0.69, 12, "Hierarchy min20"),
31
+ BrainGroup("pv13_rha2_min10_513_sector", 0.70, 13, "Revenue hierarchy min10"),
32
+ BrainGroup("pv13_hierarchy_min100_2000_513_sector", 0.69, 14, "Hierarchy min100 2k"),
33
+ BrainGroup("pv13_hierarchy_f3_513_sector", 0.99, 15, "Hierarchy f3"),
34
+ BrainGroup("pv13_hierarchy_min51_f2_513_sector", 0.99, 16, "Hierarchy min51 f2"),
35
+ BrainGroup("pv13_hierarchy_min30_3000_mapped_513_sector", 0.99, 17, "Hierarchy min30 3k mapped"),
36
+ BrainGroup("pv13_rha2_min5_3000_513_sector", 0.99, 18, "Revenue hierarchy min5 3k"),
37
+ BrainGroup("pv13_hierarchy_min51_f3_513_sector", 0.99, 18, "Hierarchy min51 f3"),
38
+ BrainGroup("pv13_hierarchys32_513_sector", 0.99, 20, "Hierarchys32"),
39
+ ]
40
+
41
+ PRODUCTION_GROUPS = [g for g in ALT_GROUPS if g.coverage >= 0.90 and g.alpha_count <= 30]
42
+
43
+
44
+ def pick_group(min_coverage=0.90, max_ac=50):
45
+ candidates = [g for g in ALT_GROUPS if g.coverage >= min_coverage and g.alpha_count <= max_ac]
46
+ if not candidates:
47
+ return BrainGroup("industry", 1.00, 229917, "Industry grouping")
48
+ weights = [1.0 / (g.alpha_count + 1) for g in candidates]
49
+ total = sum(weights)
50
+ weights = [w / total for w in weights]
51
+ return random.choices(candidates, weights=weights, k=1)[0]
52
+
53
+
54
+ def get_group_for_expression(prefer_novel=True):
55
+ if prefer_novel:
56
+ group = pick_group(min_coverage=0.95, max_ac=30)
57
+ return group.id
58
+ return "subindustry"