Spaces:
Sleeping
Sleeping
File size: 3,575 Bytes
6850dad | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 | # Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""Prompt Golf Environment Client."""
from typing import Dict
from openenv.core import EnvClient
from openenv.core.client_types import StepResult
from openenv.core.env_server.types import State
from .models import (
DEFAULT_PROMPT_BUDGET,
MAX_TARGET_OUTPUT_TOKENS,
TEST_EXAMPLES_PER_EPISODE,
GolfAction,
GolfObservation,
)
class PromptGolfEnv(EnvClient[GolfAction, GolfObservation, State]):
"""
Client for the Prompt Golf Environment.
Use reset(task="task_name") to select a task (see TASK_NAMES) or
task="random" to sample one. The observation includes a natural-language
task description and a few visible train examples. Submit a prompt via
step(GolfAction(prompt=...)) — the episode terminates on that step and
the observation's `reward` carries the final reward.
Example:
>>> async with PromptGolfEnv(base_url="http://localhost:8000") as env:
... result = await env.reset(task="sentiment_basic")
... obs = result.observation
... my_prompt = f"{obs.task_description}\\nAnswer with one word."
... result = await env.step(GolfAction(prompt=my_prompt))
... print(f"Reward: {result.reward:.3f} | "
... f"Score: {result.observation.raw_task_score:.2f} | "
... f"Tokens: {result.observation.submitted_prompt_tokens}")
"""
def _step_payload(self, action: GolfAction) -> Dict:
return {"prompt": action.prompt}
def _parse_result(self, payload: Dict) -> StepResult[GolfObservation]:
obs_data = payload.get("observation", {})
observation = GolfObservation(
task_id=obs_data.get("task_id", ""),
task_category=obs_data.get("task_category", ""),
task_description=obs_data.get("task_description", ""),
target_model_id=obs_data.get("target_model_id", ""),
prompt_budget_tokens=obs_data.get("prompt_budget_tokens", DEFAULT_PROMPT_BUDGET),
max_target_output_tokens=obs_data.get(
"max_target_output_tokens", MAX_TARGET_OUTPUT_TOKENS
),
num_test_examples=obs_data.get("num_test_examples", TEST_EXAMPLES_PER_EPISODE),
train_examples=obs_data.get("train_examples", []),
scorer_name=obs_data.get("scorer_name", ""),
baseline_zero_shot_score=obs_data.get("baseline_zero_shot_score", 0.0),
submitted_prompt_tokens=obs_data.get("submitted_prompt_tokens"),
raw_task_score=obs_data.get("raw_task_score"),
length_factor=obs_data.get("length_factor"),
leakage_penalty=obs_data.get("leakage_penalty"),
gain_over_baseline=obs_data.get("gain_over_baseline"),
grade_details=obs_data.get("grade_details"),
sample_generations=obs_data.get("sample_generations"),
done=payload.get("done", False),
reward=payload.get("reward"),
metadata=obs_data.get("metadata", {}),
)
return StepResult(
observation=observation,
reward=payload.get("reward"),
done=payload.get("done", False),
)
def _parse_state(self, payload: Dict) -> State:
return State(
episode_id=payload.get("episode_id"),
step_count=payload.get("step_count", 0),
)
|