Spaces:
Sleeping
Sleeping
File size: 4,014 Bytes
2312199 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 | """Task and evaluation schemas for Harbor RL environment (credit card optimization)."""
from __future__ import annotations
from typing import Any, Literal
from pydantic import Field
from lexenvs.schemas.base import InputSchema, ResponseSchema
# ---------------------------------------------------------------------------
# Internal models — loaded from JSON, never exposed via API
# ---------------------------------------------------------------------------
DimensionType = Literal["automated", "human"]
DimensionStatus = Literal["scored", "pending_reference", "requires_human"]
class ComplexityHint(ResponseSchema):
max_tokens: int
expected_output: str
class TaskMetadata(ResponseSchema):
domain: str
difficulty: Literal["easy", "medium", "hard"]
task_number: int
complexity_hint: ComplexityHint
requires_human_review: bool = False
class TaskPrompt(ResponseSchema):
"""Prompt delivered to the agent. ``context`` is resolved at load time."""
system: str = ""
system_prompt_ref: str | None = None
context: str = ""
user: str
knowledge_base_ref: str | None = None
kb_filter: list[str] | None = None
class ScoringDimension(ResponseSchema):
weight: float
type: DimensionType
description: str
checks: dict[str, Any] | None = None
reference: dict[str, Any] | None = None
rubric: dict[str, str] | None = None
score: float | None = None
hard_constraint: bool = False
class Scoring(ResponseSchema):
dimensions: dict[str, ScoringDimension]
passing_threshold: float
hard_constraint_failure_zeroes_dimension: bool = True
class EVBreakdown(ResponseSchema):
signup_bonuses_usd: float | None = None
ongoing_rewards_usd: float | None = None
credits_usd: float | None = None
annual_fees_usd: float | None = None
other_usd: float | None = None
class ReferenceSolution(ResponseSchema):
status: str = Field(alias="_status")
recommended_cards: list[str] | None = None
total_ev_usd: float | None = None
ev_breakdown: EVBreakdown | None = None
housing_option: str | None = None
key_constraints_flags: list[str] | None = None
expert_notes: str | None = None
class TaskDefinition(ResponseSchema):
"""Full internal task definition — never exposed via API."""
task_id: str
version: str = "1.0.0"
created_at: str | None = None
metadata: TaskMetadata
prompt: TaskPrompt
scoring: Scoring
reference_solution: ReferenceSolution
# ---------------------------------------------------------------------------
# API response models — hide reference solution & scoring internals
# ---------------------------------------------------------------------------
class TaskResponse(ResponseSchema):
"""List view — enough to identify and select a task."""
task_id: str
domain: str
difficulty: str
task_number: int
class TaskDetailResponse(TaskResponse):
"""Detail view — full prompt for the agent to work with."""
system_prompt: str
context: str
user_prompt: str
max_tokens: int | None = None
class TaskListResponse(ResponseSchema):
"""List of available tasks."""
tasks: list[TaskResponse]
total: int
# ---------------------------------------------------------------------------
# Evaluation request / response
# ---------------------------------------------------------------------------
class EvaluateRequest(InputSchema):
"""Agent submits answer as raw text (should contain a JSON block)."""
answer: str
class DimensionResult(ResponseSchema):
"""Per-dimension scoring result."""
dimension: str
score: float | None = None
weight: float
status: DimensionStatus
class TaskResultResponse(ResponseSchema):
"""Evaluation result returned to the agent."""
task_id: str
reward: float = Field(ge=0.0, le=1.0)
dimensions: list[DimensionResult] = Field(default_factory=list)
metadata: dict[str, Any] = Field(default_factory=dict)
|