Spaces:

endishai
/

lexenvs-harbor

Sleeping

App Files Files Community

lexenvs-harbor / src /lexenvs /schemas /task.py

endishai

Upload folder using huggingface_hub

2312199 verified 24 days ago

raw

history blame contribute delete

4.01 kB

	"""Task and evaluation schemas for Harbor RL environment (credit card optimization)."""

	from __future__ import annotations

	from typing import Any, Literal

	from pydantic import Field

	from lexenvs.schemas.base import InputSchema, ResponseSchema

	# ---------------------------------------------------------------------------
	# Internal models — loaded from JSON, never exposed via API
	# ---------------------------------------------------------------------------

	DimensionType = Literal["automated", "human"]
	DimensionStatus = Literal["scored", "pending_reference", "requires_human"]


	class ComplexityHint(ResponseSchema):
	max_tokens: int
	expected_output: str


	class TaskMetadata(ResponseSchema):
	domain: str
	difficulty: Literal["easy", "medium", "hard"]
	task_number: int
	complexity_hint: ComplexityHint
	requires_human_review: bool = False


	class TaskPrompt(ResponseSchema):
	"""Prompt delivered to the agent. ``context`` is resolved at load time."""

	system: str = ""
	system_prompt_ref: str \| None = None
	context: str = ""
	user: str
	knowledge_base_ref: str \| None = None
	kb_filter: list[str] \| None = None


	class ScoringDimension(ResponseSchema):
	weight: float
	type: DimensionType
	description: str
	checks: dict[str, Any] \| None = None
	reference: dict[str, Any] \| None = None
	rubric: dict[str, str] \| None = None
	score: float \| None = None
	hard_constraint: bool = False


	class Scoring(ResponseSchema):
	dimensions: dict[str, ScoringDimension]
	passing_threshold: float
	hard_constraint_failure_zeroes_dimension: bool = True


	class EVBreakdown(ResponseSchema):
	signup_bonuses_usd: float \| None = None
	ongoing_rewards_usd: float \| None = None
	credits_usd: float \| None = None
	annual_fees_usd: float \| None = None
	other_usd: float \| None = None


	class ReferenceSolution(ResponseSchema):
	status: str = Field(alias="_status")
	recommended_cards: list[str] \| None = None
	total_ev_usd: float \| None = None
	ev_breakdown: EVBreakdown \| None = None
	housing_option: str \| None = None
	key_constraints_flags: list[str] \| None = None
	expert_notes: str \| None = None


	class TaskDefinition(ResponseSchema):
	"""Full internal task definition — never exposed via API."""

	task_id: str
	version: str = "1.0.0"
	created_at: str \| None = None
	metadata: TaskMetadata
	prompt: TaskPrompt
	scoring: Scoring
	reference_solution: ReferenceSolution


	# ---------------------------------------------------------------------------
	# API response models — hide reference solution & scoring internals
	# ---------------------------------------------------------------------------


	class TaskResponse(ResponseSchema):
	"""List view — enough to identify and select a task."""

	task_id: str
	domain: str
	difficulty: str
	task_number: int


	class TaskDetailResponse(TaskResponse):
	"""Detail view — full prompt for the agent to work with."""

	system_prompt: str
	context: str
	user_prompt: str
	max_tokens: int \| None = None


	class TaskListResponse(ResponseSchema):
	"""List of available tasks."""

	tasks: list[TaskResponse]
	total: int


	# ---------------------------------------------------------------------------
	# Evaluation request / response
	# ---------------------------------------------------------------------------


	class EvaluateRequest(InputSchema):
	"""Agent submits answer as raw text (should contain a JSON block)."""

	answer: str


	class DimensionResult(ResponseSchema):
	"""Per-dimension scoring result."""

	dimension: str
	score: float \| None = None
	weight: float
	status: DimensionStatus


	class TaskResultResponse(ResponseSchema):
	"""Evaluation result returned to the agent."""

	task_id: str
	reward: float = Field(ge=0.0, le=1.0)
	dimensions: list[DimensionResult] = Field(default_factory=list)
	metadata: dict[str, Any] = Field(default_factory=dict)