File size: 4,014 Bytes
2312199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
"""Task and evaluation schemas for Harbor RL environment (credit card optimization)."""

from __future__ import annotations

from typing import Any, Literal

from pydantic import Field

from lexenvs.schemas.base import InputSchema, ResponseSchema

# ---------------------------------------------------------------------------
# Internal models — loaded from JSON, never exposed via API
# ---------------------------------------------------------------------------

DimensionType = Literal["automated", "human"]
DimensionStatus = Literal["scored", "pending_reference", "requires_human"]


class ComplexityHint(ResponseSchema):
    max_tokens: int
    expected_output: str


class TaskMetadata(ResponseSchema):
    domain: str
    difficulty: Literal["easy", "medium", "hard"]
    task_number: int
    complexity_hint: ComplexityHint
    requires_human_review: bool = False


class TaskPrompt(ResponseSchema):
    """Prompt delivered to the agent. ``context`` is resolved at load time."""

    system: str = ""
    system_prompt_ref: str | None = None
    context: str = ""
    user: str
    knowledge_base_ref: str | None = None
    kb_filter: list[str] | None = None


class ScoringDimension(ResponseSchema):
    weight: float
    type: DimensionType
    description: str
    checks: dict[str, Any] | None = None
    reference: dict[str, Any] | None = None
    rubric: dict[str, str] | None = None
    score: float | None = None
    hard_constraint: bool = False


class Scoring(ResponseSchema):
    dimensions: dict[str, ScoringDimension]
    passing_threshold: float
    hard_constraint_failure_zeroes_dimension: bool = True


class EVBreakdown(ResponseSchema):
    signup_bonuses_usd: float | None = None
    ongoing_rewards_usd: float | None = None
    credits_usd: float | None = None
    annual_fees_usd: float | None = None
    other_usd: float | None = None


class ReferenceSolution(ResponseSchema):
    status: str = Field(alias="_status")
    recommended_cards: list[str] | None = None
    total_ev_usd: float | None = None
    ev_breakdown: EVBreakdown | None = None
    housing_option: str | None = None
    key_constraints_flags: list[str] | None = None
    expert_notes: str | None = None


class TaskDefinition(ResponseSchema):
    """Full internal task definition — never exposed via API."""

    task_id: str
    version: str = "1.0.0"
    created_at: str | None = None
    metadata: TaskMetadata
    prompt: TaskPrompt
    scoring: Scoring
    reference_solution: ReferenceSolution


# ---------------------------------------------------------------------------
# API response models — hide reference solution & scoring internals
# ---------------------------------------------------------------------------


class TaskResponse(ResponseSchema):
    """List view — enough to identify and select a task."""

    task_id: str
    domain: str
    difficulty: str
    task_number: int


class TaskDetailResponse(TaskResponse):
    """Detail view — full prompt for the agent to work with."""

    system_prompt: str
    context: str
    user_prompt: str
    max_tokens: int | None = None


class TaskListResponse(ResponseSchema):
    """List of available tasks."""

    tasks: list[TaskResponse]
    total: int


# ---------------------------------------------------------------------------
# Evaluation request / response
# ---------------------------------------------------------------------------


class EvaluateRequest(InputSchema):
    """Agent submits answer as raw text (should contain a JSON block)."""

    answer: str


class DimensionResult(ResponseSchema):
    """Per-dimension scoring result."""

    dimension: str
    score: float | None = None
    weight: float
    status: DimensionStatus


class TaskResultResponse(ResponseSchema):
    """Evaluation result returned to the agent."""

    task_id: str
    reward: float = Field(ge=0.0, le=1.0)
    dimensions: list[DimensionResult] = Field(default_factory=list)
    metadata: dict[str, Any] = Field(default_factory=dict)