Spaces:
Running
Running
| """Agents for Task2: Property Discovery""" | |
| import random as _random | |
| from typing import Any, Dict, List | |
| from server import Task2Environment | |
| from env.schemas import Action, ActionType | |
| from data.data_loader import load_contracts, get_function_by_name | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Task 2 agents | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def oracle_t2(env: Task2Environment, seed: int, verbose: bool = False) -> Dict[str, Any]: | |
| """Submits ground-truth in natural langugage (English) β score β₯ 0.70.""" | |
| r = env.reset(seed=seed) | |
| obs = r.observation | |
| fn_name = obs.extra["target_function"] | |
| contract = obs.contract_name | |
| contracts = load_contracts() | |
| gt_text = "" | |
| for c in contracts: | |
| if c["contract_name"] == contract: | |
| fn = get_function_by_name(c, fn_name) | |
| if fn and fn.get("property"): | |
| gt_text = fn["property"] | |
| break | |
| if verbose: | |
| print(f" {contract}.{fn_name}()") | |
| env.step(Action(action_type=ActionType.GET_FUNCTION_CODE)) | |
| result = env.step(Action(action_type=ActionType.SUBMIT_PROPERTY, | |
| params={"property": gt_text})) | |
| return {"seed": seed, "contract": contract, "function": fn_name, | |
| "grader_score": result.reward.value } | |
| def partial_t2(env: Task2Environment, seed: int) -> Dict[str, Any]: | |
| """Submits only the function's short NatSpec comment β partial credit.""" | |
| r = env.reset(seed=seed) | |
| obs = r.observation | |
| contracts = load_contracts() | |
| comment = "" | |
| for c in contracts: | |
| if c["contract_name"] == obs.contract_name: | |
| fn = get_function_by_name(c, obs.extra["target_function"]) | |
| if fn: | |
| comment = fn.get("comment", "") | |
| break | |
| result = env.step(Action(action_type=ActionType.SUBMIT_PROPERTY, | |
| params={"property": comment})) | |
| return {"seed": seed, "grader_score": result.reward.value} | |
| def random_t2(env: Task2Environment, seed: int) -> Dict[str, Any]: | |
| """Genuine random agent: random browse then submits a generic property template. | |
| The submitted text contains high-frequency words that are unlikely to match | |
| task-specific key phrases. Expected score: near 0 (coincidental matches only). | |
| Uses a seeded RNG for reproducibility. | |
| """ | |
| rng = _random.Random(seed ^ 0xBEEF1) | |
| r = env.reset(seed=seed) | |
| obs = r.observation | |
| fn_name = obs.extra.get("target_function", "this function") | |
| # Random browse: pick 1β2 actions at random | |
| browse_pool = [ | |
| ActionType.GET_FILE_NATSPEC, | |
| ActionType.GET_RELATED_FUNCTIONS, | |
| ActionType.GET_SIGNATURE, | |
| ] | |
| rng.shuffle(browse_pool) | |
| for at in browse_pool[:rng.randint(1, 2)]: | |
| env.step(Action(action_type=at)) | |
| # Submit a randomly assembled generic property (won't match specific key phrases) | |
| templates = [ | |
| f"The {fn_name} operation completes the intended computation on the input data.", | |
| f"When {fn_name} executes, it processes the provided arguments and updates the contract.", | |
| f"The {fn_name} function validates inputs and performs the expected operation.", | |
| f"Calling {fn_name} causes the contract to execute its designated logic.", | |
| f"{fn_name} runs when invoked and modifies internal state as designed.", | |
| ] | |
| prop = rng.choice(templates) | |
| result = env.step(Action(action_type=ActionType.SUBMIT_PROPERTY, | |
| params={"property": prop})) | |
| return {"seed": seed, "grader_score": result.reward.value, | |
| "submitted": prop[:60]} | |
| def floor_t2(env: Task2Environment, seed: int) -> Dict[str, Any]: | |
| """Submits empty string β score = 0.0 guaranteed.""" | |
| env.reset(seed=seed) | |
| result = env.step(Action(action_type=ActionType.SUBMIT_PROPERTY, | |
| params={"property": ""})) | |
| return {"seed": seed, "grader_score": 0.001} |