File size: 2,494 Bytes
23dc829 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | """
BRAIN Alternative Neutralization Groups — Under-explored grouping keys.
Switching from subindustry (AC=272,958) to these (AC≤100) lowers correlation to global pool.
"""
from dataclasses import dataclass
import random
@dataclass
class BrainGroup:
id: str
coverage: float
alpha_count: int
description: str
STANDARD_GROUPS = [
BrainGroup("subindustry", 1.00, 272958, "Subindustry grouping"),
BrainGroup("industry", 1.00, 229917, "Industry grouping"),
BrainGroup("sector", 1.00, 333086, "Sector grouping"),
]
ALT_GROUPS = [
BrainGroup("pv13_rcsed_6l", 1.00, 3, "Supply chain 6-level hierarchy"),
BrainGroup("pv13_di_6l", 1.00, 6, "Direct industry 6-level"),
BrainGroup("pv13_hierarchy_min22_513_sector", 0.69, 7, "Hierarchy min22 sector"),
BrainGroup("pv13_di_5l", 1.00, 10, "Direct industry 5-level"),
BrainGroup("pv13_rha2_min2_513_sector", 0.70, 11, "Revenue hierarchy min2"),
BrainGroup("pv13_hierarchy_min51_f1_513_sector", 0.99, 12, "Hierarchy min51 f1"),
BrainGroup("pv13_hierarchy_min20_513_sector", 0.69, 12, "Hierarchy min20"),
BrainGroup("pv13_rha2_min10_513_sector", 0.70, 13, "Revenue hierarchy min10"),
BrainGroup("pv13_hierarchy_min100_2000_513_sector", 0.69, 14, "Hierarchy min100 2k"),
BrainGroup("pv13_hierarchy_f3_513_sector", 0.99, 15, "Hierarchy f3"),
BrainGroup("pv13_hierarchy_min51_f2_513_sector", 0.99, 16, "Hierarchy min51 f2"),
BrainGroup("pv13_hierarchy_min30_3000_mapped_513_sector", 0.99, 17, "Hierarchy min30 3k mapped"),
BrainGroup("pv13_rha2_min5_3000_513_sector", 0.99, 18, "Revenue hierarchy min5 3k"),
BrainGroup("pv13_hierarchy_min51_f3_513_sector", 0.99, 18, "Hierarchy min51 f3"),
BrainGroup("pv13_hierarchys32_513_sector", 0.99, 20, "Hierarchys32"),
]
PRODUCTION_GROUPS = [g for g in ALT_GROUPS if g.coverage >= 0.90 and g.alpha_count <= 30]
def pick_group(min_coverage=0.90, max_ac=50):
candidates = [g for g in ALT_GROUPS if g.coverage >= min_coverage and g.alpha_count <= max_ac]
if not candidates:
return BrainGroup("industry", 1.00, 229917, "Industry grouping")
weights = [1.0 / (g.alpha_count + 1) for g in candidates]
total = sum(weights)
weights = [w / total for w in weights]
return random.choices(candidates, weights=weights, k=1)[0]
def get_group_for_expression(prefer_novel=True):
if prefer_novel:
group = pick_group(min_coverage=0.95, max_ac=30)
return group.id
return "subindustry"
|