ajaxwin
refactor: Improved grading logic for task 2
f78cba2
"""Agents for Task2: Property Discovery"""
import random as _random
from typing import Any, Dict, List
from server import Task2Environment
from env.schemas import Action, ActionType
from data.data_loader import load_contracts, get_function_by_name
# ─────────────────────────────────────────────────────────────────────────────
# Task 2 agents
# ─────────────────────────────────────────────────────────────────────────────
def oracle_t2(env: Task2Environment, seed: int, verbose: bool = False) -> Dict[str, Any]:
"""Submits ground-truth in natural langugage (English) β†’ score β‰₯ 0.70."""
r = env.reset(seed=seed)
obs = r.observation
fn_name = obs.extra["target_function"]
contract = obs.contract_name
contracts = load_contracts()
gt_text = ""
for c in contracts:
if c["contract_name"] == contract:
fn = get_function_by_name(c, fn_name)
if fn and fn.get("property"):
gt_text = fn["property"]
break
if verbose:
print(f" {contract}.{fn_name}()")
env.step(Action(action_type=ActionType.GET_FUNCTION_CODE))
result = env.step(Action(action_type=ActionType.SUBMIT_PROPERTY,
params={"property": gt_text}))
return {"seed": seed, "contract": contract, "function": fn_name,
"grader_score": result.reward.value }
def partial_t2(env: Task2Environment, seed: int) -> Dict[str, Any]:
"""Submits only the function's short NatSpec comment β€” partial credit."""
r = env.reset(seed=seed)
obs = r.observation
contracts = load_contracts()
comment = ""
for c in contracts:
if c["contract_name"] == obs.contract_name:
fn = get_function_by_name(c, obs.extra["target_function"])
if fn:
comment = fn.get("comment", "")
break
result = env.step(Action(action_type=ActionType.SUBMIT_PROPERTY,
params={"property": comment}))
return {"seed": seed, "grader_score": result.reward.value}
def random_t2(env: Task2Environment, seed: int) -> Dict[str, Any]:
"""Genuine random agent: random browse then submits a generic property template.
The submitted text contains high-frequency words that are unlikely to match
task-specific key phrases. Expected score: near 0 (coincidental matches only).
Uses a seeded RNG for reproducibility.
"""
rng = _random.Random(seed ^ 0xBEEF1)
r = env.reset(seed=seed)
obs = r.observation
fn_name = obs.extra.get("target_function", "this function")
# Random browse: pick 1–2 actions at random
browse_pool = [
ActionType.GET_FILE_NATSPEC,
ActionType.GET_RELATED_FUNCTIONS,
ActionType.GET_SIGNATURE,
]
rng.shuffle(browse_pool)
for at in browse_pool[:rng.randint(1, 2)]:
env.step(Action(action_type=at))
# Submit a randomly assembled generic property (won't match specific key phrases)
templates = [
f"The {fn_name} operation completes the intended computation on the input data.",
f"When {fn_name} executes, it processes the provided arguments and updates the contract.",
f"The {fn_name} function validates inputs and performs the expected operation.",
f"Calling {fn_name} causes the contract to execute its designated logic.",
f"{fn_name} runs when invoked and modifies internal state as designed.",
]
prop = rng.choice(templates)
result = env.step(Action(action_type=ActionType.SUBMIT_PROPERTY,
params={"property": prop}))
return {"seed": seed, "grader_score": result.reward.value,
"submitted": prop[:60]}
def floor_t2(env: Task2Environment, seed: int) -> Dict[str, Any]:
"""Submits empty string β†’ score = 0.0 guaranteed."""
env.reset(seed=seed)
result = env.step(Action(action_type=ActionType.SUBMIT_PROPERTY,
params={"property": ""}))
return {"seed": seed, "grader_score": 0.001}