fix: fix 3 graders
Browse files- env_server.py +4 -0
- inference.py +22 -22
env_server.py
CHANGED
|
@@ -7,6 +7,7 @@ TASKS: Dict[str, Dict[str, Any]] ={
|
|
| 7 |
"vector_add_easy": {
|
| 8 |
"name": "Vector Addition Kernel Optimization",
|
| 9 |
"difficulty": "easy",
|
|
|
|
| 10 |
"max_steps": 5,
|
| 11 |
"target_speedup": 1.8,
|
| 12 |
"baseline_code": """extern "C" __global__ void vector_add(const float* a, const float* b, float* c, int n)
|
|
@@ -24,6 +25,7 @@ TASKS: Dict[str, Dict[str, Any]] ={
|
|
| 24 |
"matmul_medium": {
|
| 25 |
"name": "Matrix Multiplication Kernel Optimization",
|
| 26 |
"difficulty": "medium",
|
|
|
|
| 27 |
"max_steps": 6,
|
| 28 |
"target_speedup": 3.0,
|
| 29 |
"baseline_code": """extern "C" __global__ void matmul(const float* A, const float* B, float* C, int N)
|
|
@@ -45,6 +47,7 @@ TASKS: Dict[str, Dict[str, Any]] ={
|
|
| 45 |
"reduction_hard": {
|
| 46 |
"name": "Reduction Kernel Optimization",
|
| 47 |
"difficulty": "hard",
|
|
|
|
| 48 |
"max_steps":7,
|
| 49 |
"target_speedup": 3.5,
|
| 50 |
"baseline_code": """extern "C" __global__ void reduce_sum(const float* input, float* output, int n)
|
|
@@ -118,6 +121,7 @@ class KernelOptimization_env:
|
|
| 118 |
"task_id": self.current_task_id,
|
| 119 |
"task_name": task["name"],
|
| 120 |
"difficulty": task["difficulty"],
|
|
|
|
| 121 |
"max_steps": task["max_steps"],
|
| 122 |
"target_speedup": task["target_speedup"],
|
| 123 |
"checks": task["checks"],
|
|
|
|
| 7 |
"vector_add_easy": {
|
| 8 |
"name": "Vector Addition Kernel Optimization",
|
| 9 |
"difficulty": "easy",
|
| 10 |
+
"grader": "deterministic_rule_based",
|
| 11 |
"max_steps": 5,
|
| 12 |
"target_speedup": 1.8,
|
| 13 |
"baseline_code": """extern "C" __global__ void vector_add(const float* a, const float* b, float* c, int n)
|
|
|
|
| 25 |
"matmul_medium": {
|
| 26 |
"name": "Matrix Multiplication Kernel Optimization",
|
| 27 |
"difficulty": "medium",
|
| 28 |
+
"grader": "deterministic_rule_based",
|
| 29 |
"max_steps": 6,
|
| 30 |
"target_speedup": 3.0,
|
| 31 |
"baseline_code": """extern "C" __global__ void matmul(const float* A, const float* B, float* C, int N)
|
|
|
|
| 47 |
"reduction_hard": {
|
| 48 |
"name": "Reduction Kernel Optimization",
|
| 49 |
"difficulty": "hard",
|
| 50 |
+
"grader": "deterministic_rule_based",
|
| 51 |
"max_steps":7,
|
| 52 |
"target_speedup": 3.5,
|
| 53 |
"baseline_code": """extern "C" __global__ void reduce_sum(const float* input, float* output, int n)
|
|
|
|
| 121 |
"task_id": self.current_task_id,
|
| 122 |
"task_name": task["name"],
|
| 123 |
"difficulty": task["difficulty"],
|
| 124 |
+
"grader": task["grader"],
|
| 125 |
"max_steps": task["max_steps"],
|
| 126 |
"target_speedup": task["target_speedup"],
|
| 127 |
"checks": task["checks"],
|
inference.py
CHANGED
|
@@ -13,7 +13,7 @@ load_dotenv()
|
|
| 13 |
API_BASE_URL = os.getenv("API_BASE_URL", "https://api.openai.com/v1")
|
| 14 |
MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4o-mini")
|
| 15 |
API_KEY = os.getenv("OPENAI_API_KEY") or os.getenv("HF_TOKEN") or os.getenv("API_KEY")
|
| 16 |
-
TASK_NAME = os.getenv("TASK_ID"
|
| 17 |
BENCHMARK = "kernel_optimization"
|
| 18 |
|
| 19 |
|
|
@@ -85,8 +85,7 @@ def choose_action(client: Optional[OpenAI], observation: dict) -> Action:
|
|
| 85 |
return fallback_action(observation)
|
| 86 |
|
| 87 |
|
| 88 |
-
def
|
| 89 |
-
task_id = TASK_NAME if TASK_NAME in TASKS else "vector_add_easy"
|
| 90 |
env = KernelOptimization_env()
|
| 91 |
rewards: List[float] = []
|
| 92 |
steps_taken = 0
|
|
@@ -94,27 +93,18 @@ def main() -> int:
|
|
| 94 |
success = False
|
| 95 |
|
| 96 |
log_start(task=task_id, env=BENCHMARK, model=MODEL_NAME)
|
| 97 |
-
|
| 98 |
try:
|
| 99 |
-
client: Optional[OpenAI] = None
|
| 100 |
-
if API_KEY:
|
| 101 |
-
try:
|
| 102 |
-
client = OpenAI(api_key=API_KEY, base_url=API_BASE_URL)
|
| 103 |
-
except Exception:
|
| 104 |
-
client = None
|
| 105 |
obs = env.reset(task_id=task_id)["observation"]
|
| 106 |
done = False
|
| 107 |
-
|
| 108 |
while not done:
|
| 109 |
action = choose_action(client, obs)
|
| 110 |
-
action_str = action.optimized_code
|
| 111 |
step_result = env.step(action)
|
| 112 |
done = step_result.done
|
| 113 |
obs = step_result.observation.model_dump()
|
| 114 |
reward = step_result.reward.value
|
| 115 |
rewards.append(reward)
|
| 116 |
steps_taken = obs["step_count"]
|
| 117 |
-
log_step(step=steps_taken, action=
|
| 118 |
|
| 119 |
score = grade_episode(
|
| 120 |
task_id,
|
|
@@ -125,19 +115,29 @@ def main() -> int:
|
|
| 125 |
)
|
| 126 |
score = min(max(score, 0.0), 1.0)
|
| 127 |
success = score >= 0.1
|
| 128 |
-
return 0
|
| 129 |
except Exception as exc:
|
| 130 |
-
log_step(
|
| 131 |
-
step=max(1, steps_taken + 1),
|
| 132 |
-
action="error",
|
| 133 |
-
reward=0.0,
|
| 134 |
-
done=True,
|
| 135 |
-
error=str(exc),
|
| 136 |
-
)
|
| 137 |
-
return 0
|
| 138 |
finally:
|
| 139 |
log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
|
| 140 |
|
| 141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
if __name__ == "__main__":
|
| 143 |
sys.exit(main())
|
|
|
|
| 13 |
API_BASE_URL = os.getenv("API_BASE_URL", "https://api.openai.com/v1")
|
| 14 |
MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4o-mini")
|
| 15 |
API_KEY = os.getenv("OPENAI_API_KEY") or os.getenv("HF_TOKEN") or os.getenv("API_KEY")
|
| 16 |
+
TASK_NAME = os.getenv("TASK_ID")
|
| 17 |
BENCHMARK = "kernel_optimization"
|
| 18 |
|
| 19 |
|
|
|
|
| 85 |
return fallback_action(observation)
|
| 86 |
|
| 87 |
|
| 88 |
+
def run_episode(client: Optional[OpenAI], task_id: str) -> None:
|
|
|
|
| 89 |
env = KernelOptimization_env()
|
| 90 |
rewards: List[float] = []
|
| 91 |
steps_taken = 0
|
|
|
|
| 93 |
success = False
|
| 94 |
|
| 95 |
log_start(task=task_id, env=BENCHMARK, model=MODEL_NAME)
|
|
|
|
| 96 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
obs = env.reset(task_id=task_id)["observation"]
|
| 98 |
done = False
|
|
|
|
| 99 |
while not done:
|
| 100 |
action = choose_action(client, obs)
|
|
|
|
| 101 |
step_result = env.step(action)
|
| 102 |
done = step_result.done
|
| 103 |
obs = step_result.observation.model_dump()
|
| 104 |
reward = step_result.reward.value
|
| 105 |
rewards.append(reward)
|
| 106 |
steps_taken = obs["step_count"]
|
| 107 |
+
log_step(step=steps_taken, action=action.optimized_code, reward=reward, done=done, error=None)
|
| 108 |
|
| 109 |
score = grade_episode(
|
| 110 |
task_id,
|
|
|
|
| 115 |
)
|
| 116 |
score = min(max(score, 0.0), 1.0)
|
| 117 |
success = score >= 0.1
|
|
|
|
| 118 |
except Exception as exc:
|
| 119 |
+
log_step(step=max(1, steps_taken + 1), action="error", reward=0.0, done=True, error=str(exc))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
finally:
|
| 121 |
log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
|
| 122 |
|
| 123 |
|
| 124 |
+
def main() -> int:
|
| 125 |
+
client: Optional[OpenAI] = None
|
| 126 |
+
if API_KEY:
|
| 127 |
+
try:
|
| 128 |
+
client = OpenAI(api_key=API_KEY, base_url=API_BASE_URL)
|
| 129 |
+
except Exception:
|
| 130 |
+
client = None
|
| 131 |
+
|
| 132 |
+
if TASK_NAME and TASK_NAME in TASKS:
|
| 133 |
+
task_ids = [TASK_NAME]
|
| 134 |
+
else:
|
| 135 |
+
task_ids = list(TASKS.keys())
|
| 136 |
+
|
| 137 |
+
for task_id in task_ids:
|
| 138 |
+
run_episode(client, task_id)
|
| 139 |
+
return 0
|
| 140 |
+
|
| 141 |
+
|
| 142 |
if __name__ == "__main__":
|
| 143 |
sys.exit(main())
|