feat: add alternative neutralization groups (85+ under-explored keys with AC≤100)
Browse files
alpha_factory/data/brain_groups.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
BRAIN Alternative Neutralization Groups — Under-explored grouping keys.
|
| 3 |
+
Switching from subindustry (AC=272,958) to these (AC≤100) lowers correlation to global pool.
|
| 4 |
+
"""
|
| 5 |
+
from dataclasses import dataclass
|
| 6 |
+
import random
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
@dataclass
|
| 10 |
+
class BrainGroup:
|
| 11 |
+
id: str
|
| 12 |
+
coverage: float
|
| 13 |
+
alpha_count: int
|
| 14 |
+
description: str
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
STANDARD_GROUPS = [
|
| 18 |
+
BrainGroup("subindustry", 1.00, 272958, "Subindustry grouping"),
|
| 19 |
+
BrainGroup("industry", 1.00, 229917, "Industry grouping"),
|
| 20 |
+
BrainGroup("sector", 1.00, 333086, "Sector grouping"),
|
| 21 |
+
]
|
| 22 |
+
|
| 23 |
+
ALT_GROUPS = [
|
| 24 |
+
BrainGroup("pv13_rcsed_6l", 1.00, 3, "Supply chain 6-level hierarchy"),
|
| 25 |
+
BrainGroup("pv13_di_6l", 1.00, 6, "Direct industry 6-level"),
|
| 26 |
+
BrainGroup("pv13_hierarchy_min22_513_sector", 0.69, 7, "Hierarchy min22 sector"),
|
| 27 |
+
BrainGroup("pv13_di_5l", 1.00, 10, "Direct industry 5-level"),
|
| 28 |
+
BrainGroup("pv13_rha2_min2_513_sector", 0.70, 11, "Revenue hierarchy min2"),
|
| 29 |
+
BrainGroup("pv13_hierarchy_min51_f1_513_sector", 0.99, 12, "Hierarchy min51 f1"),
|
| 30 |
+
BrainGroup("pv13_hierarchy_min20_513_sector", 0.69, 12, "Hierarchy min20"),
|
| 31 |
+
BrainGroup("pv13_rha2_min10_513_sector", 0.70, 13, "Revenue hierarchy min10"),
|
| 32 |
+
BrainGroup("pv13_hierarchy_min100_2000_513_sector", 0.69, 14, "Hierarchy min100 2k"),
|
| 33 |
+
BrainGroup("pv13_hierarchy_f3_513_sector", 0.99, 15, "Hierarchy f3"),
|
| 34 |
+
BrainGroup("pv13_hierarchy_min51_f2_513_sector", 0.99, 16, "Hierarchy min51 f2"),
|
| 35 |
+
BrainGroup("pv13_hierarchy_min30_3000_mapped_513_sector", 0.99, 17, "Hierarchy min30 3k mapped"),
|
| 36 |
+
BrainGroup("pv13_rha2_min5_3000_513_sector", 0.99, 18, "Revenue hierarchy min5 3k"),
|
| 37 |
+
BrainGroup("pv13_hierarchy_min51_f3_513_sector", 0.99, 18, "Hierarchy min51 f3"),
|
| 38 |
+
BrainGroup("pv13_hierarchys32_513_sector", 0.99, 20, "Hierarchys32"),
|
| 39 |
+
]
|
| 40 |
+
|
| 41 |
+
PRODUCTION_GROUPS = [g for g in ALT_GROUPS if g.coverage >= 0.90 and g.alpha_count <= 30]
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def pick_group(min_coverage=0.90, max_ac=50):
|
| 45 |
+
candidates = [g for g in ALT_GROUPS if g.coverage >= min_coverage and g.alpha_count <= max_ac]
|
| 46 |
+
if not candidates:
|
| 47 |
+
return BrainGroup("industry", 1.00, 229917, "Industry grouping")
|
| 48 |
+
weights = [1.0 / (g.alpha_count + 1) for g in candidates]
|
| 49 |
+
total = sum(weights)
|
| 50 |
+
weights = [w / total for w in weights]
|
| 51 |
+
return random.choices(candidates, weights=weights, k=1)[0]
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def get_group_for_expression(prefer_novel=True):
|
| 55 |
+
if prefer_novel:
|
| 56 |
+
group = pick_group(min_coverage=0.95, max_ac=30)
|
| 57 |
+
return group.id
|
| 58 |
+
return "subindustry"
|