File size: 5,355 Bytes
fdd45f1
 
 
 
 
 
2be28d6
fdd45f1
2be28d6
 
 
fdd45f1
2be28d6
fdd45f1
 
 
 
 
 
 
2be28d6
 
 
 
 
 
 
 
 
 
 
 
 
 
fdd45f1
 
2be28d6
 
fdd45f1
 
 
2be28d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fdd45f1
 
 
 
 
 
2be28d6
 
fdd45f1
2be28d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fdd45f1
 
 
 
 
2be28d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fdd45f1
 
2be28d6
fdd45f1
 
2be28d6
 
 
 
 
 
 
 
 
 
 
 
 
 
fdd45f1
2be28d6
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
"""
FocusFlow RL Environment β€” models.py
OpenEnv hackathon submission: Meta x Scaler 2026
Pydantic models for Action, Observation, State
"""

from __future__ import annotations
from enum import Enum
from typing import List, Optional, Dict, Any, Literal
from pydantic import BaseModel, Field


# ─── Enums ────────────────────────────────────────────────────────────────────

class AppCategory(str, Enum):
    social_media = "social_media"
    video        = "video"
    messaging    = "messaging"
    gaming       = "gaming"
    news         = "news"
    shopping     = "shopping"

class SessionPhase(str, Enum):
    focus     = "focus"
    break_    = "break"
    planning  = "planning"
    overload  = "overload"

class DistractionType(str, Enum):
    app_notification  = "app_notification"
    social_message    = "social_message"
    urgent_task       = "urgent_task"
    environment_noise = "environment_noise"
    internal_urge     = "internal_urge"


# ─── Core data objects ────────────────────────────────────────────────────────

class DistractingApp(BaseModel):
    name: str
    category: AppCategory
    temptation_level: float = Field(..., ge=0.0, le=1.0)

class DistractionEvent(BaseModel):
    """Rich natural-language distraction β€” requires LLM reasoning to handle."""
    id: str
    type: DistractionType
    description: str
    urgency: float = Field(..., ge=0.0, le=1.0)
    can_defer: bool = True
    deadline_steps: Optional[int] = None
    correct_action: str = ""


class DayContext(BaseModel):
    """Multi-day persistent context β€” forces long-horizon planning."""
    day_number: int = Field(1, ge=1)
    total_days: int = Field(7, ge=1)
    pending_deadlines: List[Dict[str, Any]] = Field(default_factory=list)
    energy_level: float = Field(1.0, ge=0.0, le=1.0)
    completed_tasks: List[str] = Field(default_factory=list)
    deferred_events: List[DistractionEvent] = Field(default_factory=list)
    streak_days: int = Field(0, ge=0)


# ─── Action ───────────────────────────────────────────────────────────────────

class FocusAction(BaseModel):
    """
    Agent action. `reasoning` field is REQUIRED β€” forces chain-of-thought.
    This is what makes your env LLM-specific: a rule-based policy can't fill this.
    """
    action_type: Literal[
        "focus", "block_app", "take_break", "defer_event", 
        "respond_to_event", "plan_day", "adjust_energy", "quit_session", "check_app"
    ] = Field(..., description="The exact action the agent intends to take.")
    
    app_name: Optional[str]         = Field(None, description="Required if action is block_app.")
    event_id: Optional[str]         = Field(None, description="Required for defer/respond actions.")
    response_text: Optional[str]    = Field(None, description="Agent's NL reply to a social message.")
    timer_minutes: Optional[int]    = Field(None, ge=1)
    day_plan: Optional[List[str]]   = Field(None, description="List of tasks for plan_day action.")
    
    reasoning: str = Field(
        ...,
        min_length=10,
        description="MANDATORY: Agent MUST explain its reasoning. Empty or short reasoning = heavy penalty."
    )


# ─── Observation ──────────────────────────────────────────────────────────────

class FocusObservation(BaseModel):
    time_remaining_seconds: int = Field(..., ge=0)
    current_phase: SessionPhase
    sessions_completed: int = Field(..., ge=0)
    focus_score: float = Field(..., ge=0.0, le=1.0)
    
    active_distractions: List[str] = Field(default_factory=list)
    blocked_apps: List[str] = Field(default_factory=list)
    pending_event: Optional[DistractionEvent] = None
    
    day_context: DayContext
    cognitive_load: float = Field(0.0, ge=0.0, le=1.0)
    deadline_pressure: float = Field(0.0, ge=0.0, le=1.0)
    
    last_action_feedback: str
    last_action_reward: float
    reasoning_quality_score: float = Field(0.0, ge=0.0, le=1.0)


# ─── Full internal state (for /state endpoint) ────────────────────────────────

class FocusState(BaseModel):
    episode_step: int = Field(..., ge=0)
    max_steps: int = Field(..., ge=1)
    total_focus_seconds: int = Field(..., ge=0)
    total_distraction_seconds: int = Field(..., ge=0)
    sessions_completed: int = Field(..., ge=0)
    breaks_taken: int = Field(..., ge=0)
    
    apps_blocked: List[str] = Field(default_factory=list)
    apps_checked: List[str] = Field(default_factory=list)
    events_deferred: List[str] = Field(default_factory=list)
    events_responded: List[str] = Field(default_factory=list)
    
    current_phase: SessionPhase
    time_remaining_seconds: int = Field(..., ge=0)
    cumulative_reward: float
    day_context: DayContext
    cognitive_load: float = Field(..., ge=0.0, le=1.0)
    done: bool