from pydantic import BaseModel, Field from typing import Optional, List, Dict, Any from enum import Enum class TaskDifficulty(str, Enum): EASY = "easy" MEDIUM = "medium" HARD = "hard" class ActionType(str, Enum): PREDICT_CONFUSION = "predict_confusion" ANALYZE_BEHAVIOR = "analyze_behavior" TRIGGER_INTERVENTION = "trigger_intervention" CLASSIFY_DIFFICULTY = "classify_difficulty" FUSE_MODALITIES = "fuse_modalities" class Observation(BaseModel): step: int = Field(..., description="Current step in the episode") episode_id: str = Field(..., description="Unique episode identifier") learning_context: Dict[str, Any] = Field( default_factory=dict, description="Current learning context (topic, difficulty, time spent)" ) learner_state: Dict[str, Any] = Field( default_factory=dict, description="Learner state signals from all modalities" ) gaze_features: List[float] = Field( default_factory=list, description="Gaze tracking features (16 dimensions)" ) gesture_features: List[float] = Field( default_factory=list, description="Hand gesture features (21 landmarks x 3 coords)" ) biometric_features: List[float] = Field( default_factory=list, description="Biometric features (heart rate, GSR, etc.)" ) audio_features: List[float] = Field( default_factory=list, description="Audio features (pitch, tone, pauses)" ) behavioral_features: List[float] = Field( default_factory=list, description="Behavioral features (scroll speed, clicks, typing)" ) confusion_history: List[float] = Field( default_factory=list, description="Historical confusion probabilities" ) prediction_window: int = Field( default=5, description="Steps ahead to predict confusion" ) available_interventions: List[str] = Field( default_factory=list, description="Available intervention types" ) multimodal_fused: bool = Field( default=False, description="Whether multi-modal fusion is enabled" ) metadata: Dict[str, Any] = Field( default_factory=dict, description="Additional metadata" ) class Action(BaseModel): action_type: ActionType = Field(..., description="Type of action to take") predicted_confusion: Optional[float] = Field( None, description="Predicted confusion probability (0.0-1.0)", ge=0.0, le=1.0 ) intervention_type: Optional[str] = Field( None, description="Intervention to trigger (if action_type is trigger_intervention)" ) intervention_intensity: Optional[float] = Field( None, description="Intervention intensity (0.0-1.0)", ge=0.0, le=1.0 ) difficulty_prediction: Optional[TaskDifficulty] = Field( None, description="Predicted task difficulty (if action_type is classify_difficulty)" ) modality_weights: Optional[Dict[str, float]] = Field( None, description="Weights for multi-modal fusion", ge=0.0, le=1.0 ) reasoning: Optional[str] = Field( None, description="Agent's reasoning for the action" ) class Reward(BaseModel): total: float = Field(..., description="Total reward for this step") confusion_prediction_reward: float = Field( default=0.0, description="Reward for confusion prediction accuracy" ) early_detection_reward: float = Field( default=0.0, description="Reward for early confusion detection" ) intervention_reward: float = Field( default=0.0, description="Reward for effective intervention" ) partial_progress_reward: float = Field( default=0.0, description="Reward for partial progress toward goals" ) penalty: float = Field( default=0.0, description="Penalty for negative behaviors" ) metadata: Dict[str, Any] = Field( default_factory=dict, description="Additional reward metadata" ) class State(BaseModel): episode_id: str = Field(..., description="Unique episode identifier") step_count: int = Field(default=0, description="Number of steps taken") max_steps: int = Field(default=100, description="Maximum steps per episode") task_difficulty: TaskDifficulty = Field(default=TaskDifficulty.MEDIUM) ground_truth_confusion: Optional[float] = Field(None, description="Actual confusion level") predictions_history: List[Dict[str, Any]] = Field(default_factory=list) interventions_history: List[Dict[str, Any]] = Field(default_factory=list) episode_reward: float = Field(default=0.0) task_complete: bool = Field(default=False) task_success: bool = Field(False) class StepResult(BaseModel): observation: Observation reward: Reward done: bool info: Dict[str, Any] = Field(default_factory=dict) class GraderResult(BaseModel): score: float = Field(..., ge=0.0, le=1.0, description="Grader score (0.0-1.0)") feedback: str = Field(..., description="Feedback on performance") metrics: Dict[str, float] = Field(default_factory=dict) passed: bool = Field(..., description="Whether task passed") __all__ = [ "Observation", "Action", "Reward", "State", "StepResult", "GraderResult", "TaskDifficulty", "ActionType", ]