Spaces:
Sleeping
Sleeping
File size: 3,832 Bytes
a72a5e3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 | """
Task Definitions & Graders for the Negotiation Environment.
3 tasks: Easy β Medium β Hard, each with a programmatic grader (0.0β1.0).
"""
from dataclasses import dataclass
from typing import List
@dataclass
class TaskConfig:
"""Configuration for a single evaluation task."""
name: str
description: str
difficulty: str
opp_type: str
agent_value: int
opponent_value: int
agent_role: str
max_rounds: int
success_threshold: float # score >= this means success
# βββββββββββββββββββββββββββββββββββββββββββββ
# Task Definitions
# βββββββββββββββββββββββββββββββββββββββββββββ
TASK_A = TaskConfig(
name="task_a_easy",
description="Easy negotiation: fair opponent, wide ZOPA, plenty of rounds",
difficulty="easy",
opp_type="fair",
agent_value=800,
opponent_value=400,
agent_role="buyer",
max_rounds=20,
success_threshold=0.2,
)
TASK_B = TaskConfig(
name="task_b_medium",
description="Medium negotiation: greedy opponent, narrow ZOPA, fewer rounds",
difficulty="medium",
opp_type="greedy",
agent_value=700,
opponent_value=500,
agent_role="buyer",
max_rounds=15,
success_threshold=0.3,
)
TASK_C = TaskConfig(
name="task_c_hard",
description="Hard negotiation: impatient opponent, tight margins, very few rounds",
difficulty="hard",
opp_type="impatient",
agent_value=600,
opponent_value=480,
agent_role="buyer",
max_rounds=6,
success_threshold=0.4,
)
ALL_TASKS: List[TaskConfig] = [TASK_A, TASK_B, TASK_C]
# βββββββββββββββββββββββββββββββββββββββββββββ
# Grader
# βββββββββββββββββββββββββββββββββββββββββββββ
class Grader:
"""
Programmatic grader for a negotiation task.
Scores agent performance on a 0.0β1.0 scale.
"""
def __init__(self, task: TaskConfig):
self.task = task
self.max_possible = float(abs(task.agent_value - task.opponent_value))
def grade(self, rewards: List[float], steps: int, deal_made: bool) -> dict:
"""
Grade an episode.
Args:
rewards: list of per-step rewards
steps: number of steps taken
deal_made: whether a deal was successfully completed
Returns:
dict with score, success, and breakdown
"""
total_reward = sum(rewards)
# Score normalization: total_reward / max_possible, clamped to [0, 1]
if self.max_possible > 0:
raw_score = total_reward / self.max_possible
else:
raw_score = 0.0
score = max(0.0, min(1.0, raw_score))
success = score >= self.task.success_threshold
# ββ Detailed breakdown ββ
efficiency = 0.0
if deal_made and steps > 0:
# Bonus for fewer steps β max 1.0 if done in 1 step
efficiency = max(0.0, 1.0 - (steps / self.task.max_rounds))
return {
"task": self.task.name,
"difficulty": self.task.difficulty,
"score": round(score, 4),
"success": success,
"threshold": self.task.success_threshold,
"total_reward": round(total_reward, 4),
"max_possible": self.max_possible,
"steps": steps,
"deal_made": deal_made,
"efficiency": round(efficiency, 4),
}
def get_grader(task: TaskConfig) -> Grader:
"""Create a grader for the given task."""
return Grader(task)
|