File size: 1,950 Bytes
9f43137
225e725
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9f43137
225e725
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
"""Action and observation models for Subtext Arena."""
from __future__ import annotations

from typing import Any, Dict, List, Optional

from openenv.core.env_server.types import Action, Observation
from pydantic import Field


class SubtextArenaAction(Action):
    """Single agent action: pick a tool, optionally with arguments."""

    tool: str = Field(
        ...,
        description=(
            "One of: get_transcript, get_prosody_features, "
            "get_pitch_contour, submit_belief"
        ),
    )
    tool_args: Dict[str, Any] = Field(
        default_factory=dict,
        description=(
            "Tool-specific arguments. "
            "get_prosody_features / get_pitch_contour: optional {start: float, end: float} in seconds. "
            "submit_belief: {label: 'sarcastic'|'sincere', confidence: float}."
        ),
    )


class SubtextArenaObservation(Observation):
    """Observation returned after each action."""

    clip_id: str = Field(default="", description="MUStARD clip identifier (e.g. '1_60')")
    speaker: str = Field(default="", description="Speaker name when available (Friends/BBT)")
    duration_s: float = Field(default=0.0, description="Total clip duration in seconds")
    is_pivot: bool = Field(default=False, description="True if clip is in the Prosody-Pivot Set")

    tool_used: str = Field(default="", description="Which tool the agent just invoked")
    tool_output: str = Field(default="", description="Text output of the tool call")

    step: int = Field(default=0, description="Current step in this episode (0-indexed)")
    max_steps: int = Field(default=6, description="Hard cap on tool calls before forced submission")
    audio_calls_so_far: int = Field(
        default=0,
        description="How many of the prior calls were audio-tool calls (used by reward)",
    )

    error: Optional[str] = Field(default=None, description="Error message when the agent's action was malformed")