Spaces:
Sleeping
Sleeping
| """Task and evaluation schemas for Harbor RL environment (credit card optimization).""" | |
| from __future__ import annotations | |
| from typing import Any, Literal | |
| from pydantic import Field | |
| from lexenvs.schemas.base import InputSchema, ResponseSchema | |
| # --------------------------------------------------------------------------- | |
| # Internal models — loaded from JSON, never exposed via API | |
| # --------------------------------------------------------------------------- | |
| DimensionType = Literal["automated", "human"] | |
| DimensionStatus = Literal["scored", "pending_reference", "requires_human"] | |
| class ComplexityHint(ResponseSchema): | |
| max_tokens: int | |
| expected_output: str | |
| class TaskMetadata(ResponseSchema): | |
| domain: str | |
| difficulty: Literal["easy", "medium", "hard"] | |
| task_number: int | |
| complexity_hint: ComplexityHint | |
| requires_human_review: bool = False | |
| class TaskPrompt(ResponseSchema): | |
| """Prompt delivered to the agent. ``context`` is resolved at load time.""" | |
| system: str = "" | |
| system_prompt_ref: str | None = None | |
| context: str = "" | |
| user: str | |
| knowledge_base_ref: str | None = None | |
| kb_filter: list[str] | None = None | |
| class ScoringDimension(ResponseSchema): | |
| weight: float | |
| type: DimensionType | |
| description: str | |
| checks: dict[str, Any] | None = None | |
| reference: dict[str, Any] | None = None | |
| rubric: dict[str, str] | None = None | |
| score: float | None = None | |
| hard_constraint: bool = False | |
| class Scoring(ResponseSchema): | |
| dimensions: dict[str, ScoringDimension] | |
| passing_threshold: float | |
| hard_constraint_failure_zeroes_dimension: bool = True | |
| class EVBreakdown(ResponseSchema): | |
| signup_bonuses_usd: float | None = None | |
| ongoing_rewards_usd: float | None = None | |
| credits_usd: float | None = None | |
| annual_fees_usd: float | None = None | |
| other_usd: float | None = None | |
| class ReferenceSolution(ResponseSchema): | |
| status: str = Field(alias="_status") | |
| recommended_cards: list[str] | None = None | |
| total_ev_usd: float | None = None | |
| ev_breakdown: EVBreakdown | None = None | |
| housing_option: str | None = None | |
| key_constraints_flags: list[str] | None = None | |
| expert_notes: str | None = None | |
| class TaskDefinition(ResponseSchema): | |
| """Full internal task definition — never exposed via API.""" | |
| task_id: str | |
| version: str = "1.0.0" | |
| created_at: str | None = None | |
| metadata: TaskMetadata | |
| prompt: TaskPrompt | |
| scoring: Scoring | |
| reference_solution: ReferenceSolution | |
| # --------------------------------------------------------------------------- | |
| # API response models — hide reference solution & scoring internals | |
| # --------------------------------------------------------------------------- | |
| class TaskResponse(ResponseSchema): | |
| """List view — enough to identify and select a task.""" | |
| task_id: str | |
| domain: str | |
| difficulty: str | |
| task_number: int | |
| class TaskDetailResponse(TaskResponse): | |
| """Detail view — full prompt for the agent to work with.""" | |
| system_prompt: str | |
| context: str | |
| user_prompt: str | |
| max_tokens: int | None = None | |
| class TaskListResponse(ResponseSchema): | |
| """List of available tasks.""" | |
| tasks: list[TaskResponse] | |
| total: int | |
| # --------------------------------------------------------------------------- | |
| # Evaluation request / response | |
| # --------------------------------------------------------------------------- | |
| class EvaluateRequest(InputSchema): | |
| """Agent submits answer as raw text (should contain a JSON block).""" | |
| answer: str | |
| class DimensionResult(ResponseSchema): | |
| """Per-dimension scoring result.""" | |
| dimension: str | |
| score: float | None = None | |
| weight: float | |
| status: DimensionStatus | |
| class TaskResultResponse(ResponseSchema): | |
| """Evaluation result returned to the agent.""" | |
| task_id: str | |
| reward: float = Field(ge=0.0, le=1.0) | |
| dimensions: list[DimensionResult] = Field(default_factory=list) | |
| metadata: dict[str, Any] = Field(default_factory=dict) | |