from pydantic import BaseModel, Field from enum import Enum from typing import List, Dict, Any, Optional # --- ACTION SPACE --- class ActionType(str, Enum): INVESTIGATE = "INVESTIGATE" BLOCK_IP = "BLOCK_IP" ISOLATE_HOST = "ISOLATE_HOST" DISMISS_ALERT = "DISMISS_ALERT" ESCALATE_TO_HUMAN = "ESCALATE_TO_HUMAN" class Action(BaseModel): action_type: ActionType = Field( ..., description="The incident response action to execute." ) target_ip: Optional[str] = Field( None, description="The specific IP address to target (Required for BLOCK_IP, ISOLATE_HOST, and INVESTIGATE)." ) # --- OBSERVATION SPACE --- class LogEntry(BaseModel): timestamp: str log_type: str src_ip: str dest_ip: str severity: str context: str host_info: Optional[str] = None class Observation(BaseModel): current_time: str = Field( ..., description="The simulated current time in the SOC." ) active_alerts: List[LogEntry] = Field( default_factory=list, description="List of current SIEM logs and alerts requiring triage." ) system_status: Dict[str, Any] = Field( default_factory=dict, description="Health and uptime status of critical internal systems." ) # --- REWARD SPACE --- class Reward(BaseModel): score_delta: float = Field( ..., description="Change in score based on the effectiveness and safety of the last action. Ranges from -1.0 to 1.0." )