File size: 6,614 Bytes
e1624f5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 | """
LangGraph Node implementations for OncoAgent.
This module retains the data ingestion node (PHI cleaning + entity extraction)
and re-exports all other nodes from their dedicated modules for backward
compatibility.
Module organisation (SOTA redesign):
- agents/router.py β Router Node (complexity classification)
- agents/corrective_rag.py β Corrective RAG Node (graded retrieval)
- agents/specialist.py β Specialist Node (tier-adaptive reasoning)
- agents/critic.py β Critic Node (reflexion validation)
- agents/formatter.py β Formatter + Fallback Nodes
- agents/tools.py β Shared vLLM client + tier calling
- agents/memory.py β Per-patient session memory
"""
from typing import Dict, Any
import re
import logging
from .state import AgentState
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# PHI Patterns (Zero-PHI Policy β Rule #39)
# ---------------------------------------------------------------------------
_PHI_PATTERNS = [
re.compile(r"\b\d{3}-\d{2}-\d{4}\b"), # SSN
re.compile(r"\b\d{2}/\d{2}/\d{4}\b"), # Date of birth
re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}"), # Email
]
# ---------------------------------------------------------------------------
# Node 1: Data Ingestion β PHI cleaning & entity extraction
# ---------------------------------------------------------------------------
def data_ingestion_node(state: AgentState) -> Dict[str, Any]:
"""Clean the input clinical text (Zero-PHI policy) and extract
key medical entities via rule-based heuristics.
Enhanced extraction includes:
- Cancer type identification (20+ types)
- TNM staging parsing
- Biomarker/mutation detection (15+ markers)
- Performance status detection (ECOG)
- Urgency signals
Args:
state: Current LangGraph state with ``clinical_text``.
Returns:
State update with ``extracted_entities`` and ``phi_detected``.
"""
text: str = state.get("clinical_text", "")
# --- Zero-PHI check and redaction ---
phi_found = False
cleaned_text = text
for pattern in _PHI_PATTERNS:
if pattern.search(text):
phi_found = True
# Redact detected PHI
cleaned_text = pattern.sub("[REDACTED]", cleaned_text)
if phi_found:
logger.warning("PHI detected and redacted from clinical input.")
# Use cleaned text for downstream processing
text = cleaned_text
# --- Rule-based entity extraction ---
extracted: Dict[str, Any] = {
"cancer_type": "Unknown",
"stage": "Unknown",
"mutations": [],
"ecog_status": "Unknown",
"urgency": "routine",
}
text_lower = text.lower()
# Cancer type heuristic (Explicit + Symptom-based risk)
cancer_keywords = {
"breast": "Breast Cancer",
"lung": "Lung Cancer",
"non-small cell": "Non-Small Cell Lung Cancer",
"small cell lung": "Small Cell Lung Cancer",
"colon": "Colon Cancer",
"colorectal": "Colorectal Cancer",
"prostate": "Prostate Cancer",
"pancreatic": "Pancreatic Cancer",
"hepatocellular": "Hepatocellular Carcinoma",
"hcc": "Hepatocellular Carcinoma",
"melanoma": "Melanoma",
"renal": "Renal Cell Carcinoma",
"bladder": "Bladder Cancer",
"ovarian": "Ovarian Cancer",
"cervical": "Cervical Cancer",
"thyroid": "Thyroid Cancer",
"leukemia": "Leukemia",
"lymphoma": "Lymphoma",
"myeloma": "Multiple Myeloma",
"sarcoma": "Sarcoma",
"glioma": "Glioma",
"glioblastoma": "Glioblastoma",
"esophageal": "Esophageal Cancer",
"gastric": "Gastric Cancer",
"cholangiocarcinoma": "Cholangiocarcinoma",
"mesothelioma": "Mesothelioma",
"uterine": "Uterine Cancer",
"endometrial": "Uterine Cancer",
# Symptom-based risk mapping (Triage mode) - Multilingual support
"menstru": "Uterine Cancer",
"vaginal": "Uterine Cancer",
"bleeding": "Uterine Cancer",
"sangrado": "Uterine Cancer",
"periods": "Uterine Cancer",
"periodo": "Uterine Cancer",
"postmenopausal": "Uterine Cancer",
"postmenopau": "Uterine Cancer",
"hemorragia": "Uterine Cancer",
}
for keyword, label in cancer_keywords.items():
if keyword in text_lower:
extracted["cancer_type"] = label
break
# Stage heuristic (supports TNM and simple staging)
stage_match = re.search(
r"stage\s+(I{1,3}V?|[1-4]|iv|iii|ii|i)\b",
text,
re.IGNORECASE,
)
if stage_match:
extracted["stage"] = f"Stage {stage_match.group(1).upper()}"
# TNM staging
tnm_match = re.search(
r"\b(T[0-4x]N[0-3x]M[01x])\b",
text,
re.IGNORECASE,
)
if tnm_match:
extracted["tnm"] = tnm_match.group(1).upper()
# Mutation heuristic (expanded)
mutations_found = re.findall(
r"\b(EGFR|ALK|KRAS|BRAF|HER2|TP53|BRCA[12]|PD-?L1|ROS1|MET|RET|"
r"NTRK|PIK3CA|MSI-?H|dMMR|FGFR[1-4]?|IDH[12]?|ERBB2|CDK[46]|"
r"PTEN|APC|VEGF|mTOR)\b",
text,
re.IGNORECASE,
)
if mutations_found:
extracted["mutations"] = list(set(m.upper() for m in mutations_found))
# ECOG Performance Status
ecog_match = re.search(
r"(?:ECOG|performance\s+status)\s*(?:of\s*)?(\d)",
text,
re.IGNORECASE,
)
if ecog_match:
extracted["ecog_status"] = f"ECOG {ecog_match.group(1)}"
# Urgency detection
urgency_keywords = [
"urgent", "emergency", "critical", "immediate",
"rapidly progressing", "acute", "life-threatening",
]
for kw in urgency_keywords:
if kw in text_lower:
extracted["urgency"] = "urgent"
break
return {
"clinical_text": cleaned_text,
"extracted_entities": extracted,
"phi_detected": phi_found,
}
# ---------------------------------------------------------------------------
# Re-exports for backward compatibility
# ---------------------------------------------------------------------------
from .corrective_rag import corrective_rag_node as rag_retrieval_node # noqa: E402, F401
from .specialist import specialist_node as clinical_specialist_node # noqa: E402, F401
from .critic import critic_node as safety_validator_node # noqa: E402, F401
|