Imaginephoenix commited on
Commit
29b06ad
·
verified ·
1 Parent(s): 3f73ea4

Upload models.py

Browse files
Files changed (1) hide show
  1. models.py +89 -0
models.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Data models for the OpenEnv email triage environment."""
2
+
3
+ from typing import Literal
4
+
5
+ from pydantic import BaseModel, field_validator
6
+
7
+ OPEN_INTERVAL_EPSILON = 1e-2
8
+
9
+
10
+ def _strict_open_unit_interval(raw_value: float) -> float:
11
+ """Clamp numeric values to the strict open interval (0, 1)."""
12
+ numeric_value = float(raw_value)
13
+ if numeric_value <= 0.0:
14
+ return OPEN_INTERVAL_EPSILON
15
+ if numeric_value >= 1.0:
16
+ return 1.0 - OPEN_INTERVAL_EPSILON
17
+ return numeric_value
18
+
19
+
20
+ class EmailObservation(BaseModel):
21
+ """Represents the email context visible to the agent at each step."""
22
+
23
+ email_id: str
24
+ subject: str
25
+ body: str
26
+ sender: str
27
+ timestamp: str
28
+ thread_history: list[str]
29
+ task_id: str
30
+ step_number: int
31
+ total_emails: int
32
+
33
+
34
+ class TriageAction(BaseModel):
35
+ """Represents the action chosen by the agent for an email."""
36
+
37
+ label: Literal["urgent", "normal", "spam", "archive"]
38
+ summary: str
39
+ route_to: str
40
+
41
+
42
+ class RewardResult(BaseModel):
43
+ """Represents deterministic grading output before reward shaping."""
44
+
45
+ score: float
46
+ breakdown: dict[str, float]
47
+ feedback: str
48
+
49
+ @field_validator("score")
50
+ @classmethod
51
+ def _validate_score(cls, value: float) -> float:
52
+ return _strict_open_unit_interval(value)
53
+
54
+
55
+ class EnvironmentState(BaseModel):
56
+ """Represents full internal environment state for debugging and evaluation."""
57
+
58
+ task_id: str
59
+ current_step: int
60
+ total_steps: int
61
+ done: bool
62
+ action_history: list[TriageAction]
63
+ reward_history: list[float]
64
+
65
+ @field_validator("reward_history")
66
+ @classmethod
67
+ def _validate_reward_history(cls, values: list[float]) -> list[float]:
68
+ return [_strict_open_unit_interval(value) for value in values]
69
+
70
+
71
+ class StepResult(BaseModel):
72
+ """Represents the standardized output of environment step calls."""
73
+
74
+ observation: EmailObservation
75
+ reward: float
76
+ done: bool
77
+ info: dict[str, str | int | float | bool]
78
+
79
+ @field_validator("reward")
80
+ @classmethod
81
+ def _validate_reward(cls, value: float) -> float:
82
+ return _strict_open_unit_interval(value)
83
+
84
+
85
+ class ResetResult(BaseModel):
86
+ """Represents the standardized output of environment reset calls."""
87
+
88
+ observation: EmailObservation
89
+ info: dict[str, str | int | float | bool]