endishai's picture
Upload folder using huggingface_hub
2312199 verified
"""Task and evaluation schemas for Harbor RL environment (credit card optimization)."""
from __future__ import annotations
from typing import Any, Literal
from pydantic import Field
from lexenvs.schemas.base import InputSchema, ResponseSchema
# ---------------------------------------------------------------------------
# Internal models — loaded from JSON, never exposed via API
# ---------------------------------------------------------------------------
DimensionType = Literal["automated", "human"]
DimensionStatus = Literal["scored", "pending_reference", "requires_human"]
class ComplexityHint(ResponseSchema):
max_tokens: int
expected_output: str
class TaskMetadata(ResponseSchema):
domain: str
difficulty: Literal["easy", "medium", "hard"]
task_number: int
complexity_hint: ComplexityHint
requires_human_review: bool = False
class TaskPrompt(ResponseSchema):
"""Prompt delivered to the agent. ``context`` is resolved at load time."""
system: str = ""
system_prompt_ref: str | None = None
context: str = ""
user: str
knowledge_base_ref: str | None = None
kb_filter: list[str] | None = None
class ScoringDimension(ResponseSchema):
weight: float
type: DimensionType
description: str
checks: dict[str, Any] | None = None
reference: dict[str, Any] | None = None
rubric: dict[str, str] | None = None
score: float | None = None
hard_constraint: bool = False
class Scoring(ResponseSchema):
dimensions: dict[str, ScoringDimension]
passing_threshold: float
hard_constraint_failure_zeroes_dimension: bool = True
class EVBreakdown(ResponseSchema):
signup_bonuses_usd: float | None = None
ongoing_rewards_usd: float | None = None
credits_usd: float | None = None
annual_fees_usd: float | None = None
other_usd: float | None = None
class ReferenceSolution(ResponseSchema):
status: str = Field(alias="_status")
recommended_cards: list[str] | None = None
total_ev_usd: float | None = None
ev_breakdown: EVBreakdown | None = None
housing_option: str | None = None
key_constraints_flags: list[str] | None = None
expert_notes: str | None = None
class TaskDefinition(ResponseSchema):
"""Full internal task definition — never exposed via API."""
task_id: str
version: str = "1.0.0"
created_at: str | None = None
metadata: TaskMetadata
prompt: TaskPrompt
scoring: Scoring
reference_solution: ReferenceSolution
# ---------------------------------------------------------------------------
# API response models — hide reference solution & scoring internals
# ---------------------------------------------------------------------------
class TaskResponse(ResponseSchema):
"""List view — enough to identify and select a task."""
task_id: str
domain: str
difficulty: str
task_number: int
class TaskDetailResponse(TaskResponse):
"""Detail view — full prompt for the agent to work with."""
system_prompt: str
context: str
user_prompt: str
max_tokens: int | None = None
class TaskListResponse(ResponseSchema):
"""List of available tasks."""
tasks: list[TaskResponse]
total: int
# ---------------------------------------------------------------------------
# Evaluation request / response
# ---------------------------------------------------------------------------
class EvaluateRequest(InputSchema):
"""Agent submits answer as raw text (should contain a JSON block)."""
answer: str
class DimensionResult(ResponseSchema):
"""Per-dimension scoring result."""
dimension: str
score: float | None = None
weight: float
status: DimensionStatus
class TaskResultResponse(ResponseSchema):
"""Evaluation result returned to the agent."""
task_id: str
reward: float = Field(ge=0.0, le=1.0)
dimensions: list[DimensionResult] = Field(default_factory=list)
metadata: dict[str, Any] = Field(default_factory=dict)