File size: 5,205 Bytes
a33aae2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
"""
SynthAudit.Env β€” Pydantic Models (Competition Grade)
=====================================================
Type-safe Action, Observation, and State models for the
Multi-Agent Clinical AI Oversight Environment.

8 tool actions for the Oversight Agent:
  review_proposal, investigate_patient, request_shap,
  cohort_analysis, temporal_audit, flag_error, approve,
  submit_audit_report
"""

from __future__ import annotations

from enum import Enum
from typing import Optional

from pydantic import BaseModel, Field


# ═══════════════════════════════════════════════════════════════
# Action Types β€” 8 Oversight Tools
# ═══════════════════════════════════════════════════════════════

class ActionType(str, Enum):
    review_proposal = "review_proposal"
    investigate_patient = "investigate_patient"
    request_shap = "request_shap"
    cohort_analysis = "cohort_analysis"
    temporal_audit = "temporal_audit"
    flag_error = "flag_error"
    approve = "approve"
    submit_audit_report = "submit_audit_report"


class ErrorType(str, Enum):
    hallucination = "hallucination"
    age_boundary_error = "age_boundary_error"
    temporal_inconsistency = "temporal_inconsistency"
    protocol_window_violation = "protocol_window_violation"
    bias_blind_spot = "bias_blind_spot"
    comorbidity_override_miss = "comorbidity_override_miss"
    statistical_hallucination = "statistical_hallucination"
    citation_fabrication = "citation_fabrication"


class SynthAuditAction(BaseModel):
    """Action the oversight agent can take. Supports 8 tool types."""
    action_type: ActionType
    proposal_id: Optional[str] = None       # For review/flag/approve
    patient_id: Optional[str] = None        # For investigate/shap/temporal
    feature: Optional[str] = None           # For shap/cohort
    error_type: Optional[str] = None        # For flag_error
    reason: Optional[str] = None            # For flag_error (Theory-of-Mind)
    confidence: float = Field(default=0.5, ge=0.0, le=1.0)
    report: Optional[str] = None            # For submit_audit_report


# ═══════════════════════════════════════════════════════════════
# Actor Proposal (what the Actor agent produces)
# ═══════════════════════════════════════════════════════════════

class ActorProposal(BaseModel):
    """A clinical proposal made by the Actor agent."""
    proposal_id: str
    patient_id: str
    diagnosis: str
    reasoning: str
    confidence: float
    recommended_action: str
    status: str = "pending"  # pending, flagged, approved


# ═══════════════════════════════════════════════════════════════
# Observation β€” what the Oversight Agent sees
# ═══════════════════════════════════════════════════════════════

class SynthAuditObservation(BaseModel):
    """Rich observation returned after each step."""
    done: bool = False
    reward: float = 0.0
    task_id: str = ""
    difficulty: str = "medium"
    protocol_excerpt: str = ""
    actor_proposals: list[ActorProposal] = Field(default_factory=list)
    current_proposal_detail: Optional[dict] = None
    patient_data: Optional[dict] = None
    shap_result: Optional[dict] = None
    feedback: str = ""
    score_so_far: float = 0.01
    proposals_reviewed: int = 0
    errors_flagged: int = 0
    correct_flags: int = 0
    false_positives: int = 0
    approvals: int = 0
    correct_approvals: int = 0
    steps_taken: int = 0
    steps_remaining: int = 0
    phase: str = "review"  # review, investigation, reporting, complete


# ═══════════════════════════════════════════════════════════════
# State β€” episode-level tracking
# ═══════════════════════════════════════════════════════════════

class SynthAuditState(BaseModel):
    """Episode state for monitoring and curriculum tracking."""
    episode_id: str = ""
    step_count: int = 0
    current_score: float = 0.01
    proposals_total: int = 0
    proposals_reviewed: int = 0
    errors_flagged: int = 0
    correct_flags: int = 0
    false_positives: int = 0
    approvals: int = 0
    correct_approvals: int = 0
    missed_errors: int = 0
    shap_requests: int = 0
    investigations: int = 0
    phase: str = "review"
    score_breakdown: dict = Field(default_factory=dict)