| """ |
| BRAIN Alternative Neutralization Groups — Under-explored grouping keys. |
| Switching from subindustry (AC=272,958) to these (AC≤100) lowers correlation to global pool. |
| """ |
| from dataclasses import dataclass |
| import random |
|
|
|
|
| @dataclass |
| class BrainGroup: |
| id: str |
| coverage: float |
| alpha_count: int |
| description: str |
|
|
|
|
| STANDARD_GROUPS = [ |
| BrainGroup("subindustry", 1.00, 272958, "Subindustry grouping"), |
| BrainGroup("industry", 1.00, 229917, "Industry grouping"), |
| BrainGroup("sector", 1.00, 333086, "Sector grouping"), |
| ] |
|
|
| ALT_GROUPS = [ |
| BrainGroup("pv13_rcsed_6l", 1.00, 3, "Supply chain 6-level hierarchy"), |
| BrainGroup("pv13_di_6l", 1.00, 6, "Direct industry 6-level"), |
| BrainGroup("pv13_hierarchy_min22_513_sector", 0.69, 7, "Hierarchy min22 sector"), |
| BrainGroup("pv13_di_5l", 1.00, 10, "Direct industry 5-level"), |
| BrainGroup("pv13_rha2_min2_513_sector", 0.70, 11, "Revenue hierarchy min2"), |
| BrainGroup("pv13_hierarchy_min51_f1_513_sector", 0.99, 12, "Hierarchy min51 f1"), |
| BrainGroup("pv13_hierarchy_min20_513_sector", 0.69, 12, "Hierarchy min20"), |
| BrainGroup("pv13_rha2_min10_513_sector", 0.70, 13, "Revenue hierarchy min10"), |
| BrainGroup("pv13_hierarchy_min100_2000_513_sector", 0.69, 14, "Hierarchy min100 2k"), |
| BrainGroup("pv13_hierarchy_f3_513_sector", 0.99, 15, "Hierarchy f3"), |
| BrainGroup("pv13_hierarchy_min51_f2_513_sector", 0.99, 16, "Hierarchy min51 f2"), |
| BrainGroup("pv13_hierarchy_min30_3000_mapped_513_sector", 0.99, 17, "Hierarchy min30 3k mapped"), |
| BrainGroup("pv13_rha2_min5_3000_513_sector", 0.99, 18, "Revenue hierarchy min5 3k"), |
| BrainGroup("pv13_hierarchy_min51_f3_513_sector", 0.99, 18, "Hierarchy min51 f3"), |
| BrainGroup("pv13_hierarchys32_513_sector", 0.99, 20, "Hierarchys32"), |
| ] |
|
|
| PRODUCTION_GROUPS = [g for g in ALT_GROUPS if g.coverage >= 0.90 and g.alpha_count <= 30] |
|
|
|
|
| def pick_group(min_coverage=0.90, max_ac=50): |
| candidates = [g for g in ALT_GROUPS if g.coverage >= min_coverage and g.alpha_count <= max_ac] |
| if not candidates: |
| return BrainGroup("industry", 1.00, 229917, "Industry grouping") |
| weights = [1.0 / (g.alpha_count + 1) for g in candidates] |
| total = sum(weights) |
| weights = [w / total for w in weights] |
| return random.choices(candidates, weights=weights, k=1)[0] |
|
|
|
|
| def get_group_for_expression(prefer_novel=True): |
| if prefer_novel: |
| group = pick_group(min_coverage=0.95, max_ac=30) |
| return group.id |
| return "subindustry" |
|
|