File size: 1,423 Bytes
21c7db9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
"""Placeholder hetero graph encoder."""

from __future__ import annotations

import numpy as np

from app.knowledge.ddi_knowledge import top_risky_pairs
from app.knowledge.drug_catalog import DRUG_CLASSES
from app.knowledge.side_effect_ontology import SIDE_EFFECT_TAGS


def encode_regimen(drugs: list[str], dim: int = 24) -> np.ndarray:
    vec = np.zeros(dim, dtype=float)
    ordered = sorted(drugs)
    for idx, drug in enumerate(ordered[:12]):
        vec[idx] = (hash(drug) % 1000) / 1000.0

    class_counts: dict[str, int] = {}
    for drug in ordered:
        cls = DRUG_CLASSES.get(drug, "unknown")
        class_counts[cls] = class_counts.get(cls, 0) + 1
    class_values = sorted(class_counts.values(), reverse=True)
    for i, value in enumerate(class_values[:5], start=12):
        vec[i] = min(1.0, value / 4.0)

    side_effect_count = sum(len(SIDE_EFFECT_TAGS.get(drug, [])) for drug in ordered)
    vec[17] = min(1.0, side_effect_count / 20.0)
    vec[18] = min(1.0, len(ordered) / 12.0)
    vec[19] = min(1.0, len(top_risky_pairs(ordered)) / 4.0)
    vec[20] = float(any("sedative" == DRUG_CLASSES.get(drug) for drug in ordered))
    vec[21] = float(any("anticoagulant" == DRUG_CLASSES.get(drug) for drug in ordered))
    vec[22] = float(any("glucose_lowering" == DRUG_CLASSES.get(drug) for drug in ordered))
    vec[23] = min(1.0, sum(ord(ch) for ch in "".join(ordered)) % 1000 / 1000.0)
    return vec