File size: 2,494 Bytes
23dc829
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
"""
BRAIN Alternative Neutralization Groups — Under-explored grouping keys.
Switching from subindustry (AC=272,958) to these (AC≤100) lowers correlation to global pool.
"""
from dataclasses import dataclass
import random


@dataclass
class BrainGroup:
    id: str
    coverage: float
    alpha_count: int
    description: str


STANDARD_GROUPS = [
    BrainGroup("subindustry", 1.00, 272958, "Subindustry grouping"),
    BrainGroup("industry", 1.00, 229917, "Industry grouping"),
    BrainGroup("sector", 1.00, 333086, "Sector grouping"),
]

ALT_GROUPS = [
    BrainGroup("pv13_rcsed_6l", 1.00, 3, "Supply chain 6-level hierarchy"),
    BrainGroup("pv13_di_6l", 1.00, 6, "Direct industry 6-level"),
    BrainGroup("pv13_hierarchy_min22_513_sector", 0.69, 7, "Hierarchy min22 sector"),
    BrainGroup("pv13_di_5l", 1.00, 10, "Direct industry 5-level"),
    BrainGroup("pv13_rha2_min2_513_sector", 0.70, 11, "Revenue hierarchy min2"),
    BrainGroup("pv13_hierarchy_min51_f1_513_sector", 0.99, 12, "Hierarchy min51 f1"),
    BrainGroup("pv13_hierarchy_min20_513_sector", 0.69, 12, "Hierarchy min20"),
    BrainGroup("pv13_rha2_min10_513_sector", 0.70, 13, "Revenue hierarchy min10"),
    BrainGroup("pv13_hierarchy_min100_2000_513_sector", 0.69, 14, "Hierarchy min100 2k"),
    BrainGroup("pv13_hierarchy_f3_513_sector", 0.99, 15, "Hierarchy f3"),
    BrainGroup("pv13_hierarchy_min51_f2_513_sector", 0.99, 16, "Hierarchy min51 f2"),
    BrainGroup("pv13_hierarchy_min30_3000_mapped_513_sector", 0.99, 17, "Hierarchy min30 3k mapped"),
    BrainGroup("pv13_rha2_min5_3000_513_sector", 0.99, 18, "Revenue hierarchy min5 3k"),
    BrainGroup("pv13_hierarchy_min51_f3_513_sector", 0.99, 18, "Hierarchy min51 f3"),
    BrainGroup("pv13_hierarchys32_513_sector", 0.99, 20, "Hierarchys32"),
]

PRODUCTION_GROUPS = [g for g in ALT_GROUPS if g.coverage >= 0.90 and g.alpha_count <= 30]


def pick_group(min_coverage=0.90, max_ac=50):
    candidates = [g for g in ALT_GROUPS if g.coverage >= min_coverage and g.alpha_count <= max_ac]
    if not candidates:
        return BrainGroup("industry", 1.00, 229917, "Industry grouping")
    weights = [1.0 / (g.alpha_count + 1) for g in candidates]
    total = sum(weights)
    weights = [w / total for w in weights]
    return random.choices(candidates, weights=weights, k=1)[0]


def get_group_for_expression(prefer_novel=True):
    if prefer_novel:
        group = pick_group(min_coverage=0.95, max_ac=30)
        return group.id
    return "subindustry"