aaloksan commited on
Commit
aebc8f0
·
1 Parent(s): 0bf3992

fix: fix 3 graders

Browse files
Files changed (2) hide show
  1. env_server.py +4 -0
  2. inference.py +22 -22
env_server.py CHANGED
@@ -7,6 +7,7 @@ TASKS: Dict[str, Dict[str, Any]] ={
7
  "vector_add_easy": {
8
  "name": "Vector Addition Kernel Optimization",
9
  "difficulty": "easy",
 
10
  "max_steps": 5,
11
  "target_speedup": 1.8,
12
  "baseline_code": """extern "C" __global__ void vector_add(const float* a, const float* b, float* c, int n)
@@ -24,6 +25,7 @@ TASKS: Dict[str, Dict[str, Any]] ={
24
  "matmul_medium": {
25
  "name": "Matrix Multiplication Kernel Optimization",
26
  "difficulty": "medium",
 
27
  "max_steps": 6,
28
  "target_speedup": 3.0,
29
  "baseline_code": """extern "C" __global__ void matmul(const float* A, const float* B, float* C, int N)
@@ -45,6 +47,7 @@ TASKS: Dict[str, Dict[str, Any]] ={
45
  "reduction_hard": {
46
  "name": "Reduction Kernel Optimization",
47
  "difficulty": "hard",
 
48
  "max_steps":7,
49
  "target_speedup": 3.5,
50
  "baseline_code": """extern "C" __global__ void reduce_sum(const float* input, float* output, int n)
@@ -118,6 +121,7 @@ class KernelOptimization_env:
118
  "task_id": self.current_task_id,
119
  "task_name": task["name"],
120
  "difficulty": task["difficulty"],
 
121
  "max_steps": task["max_steps"],
122
  "target_speedup": task["target_speedup"],
123
  "checks": task["checks"],
 
7
  "vector_add_easy": {
8
  "name": "Vector Addition Kernel Optimization",
9
  "difficulty": "easy",
10
+ "grader": "deterministic_rule_based",
11
  "max_steps": 5,
12
  "target_speedup": 1.8,
13
  "baseline_code": """extern "C" __global__ void vector_add(const float* a, const float* b, float* c, int n)
 
25
  "matmul_medium": {
26
  "name": "Matrix Multiplication Kernel Optimization",
27
  "difficulty": "medium",
28
+ "grader": "deterministic_rule_based",
29
  "max_steps": 6,
30
  "target_speedup": 3.0,
31
  "baseline_code": """extern "C" __global__ void matmul(const float* A, const float* B, float* C, int N)
 
47
  "reduction_hard": {
48
  "name": "Reduction Kernel Optimization",
49
  "difficulty": "hard",
50
+ "grader": "deterministic_rule_based",
51
  "max_steps":7,
52
  "target_speedup": 3.5,
53
  "baseline_code": """extern "C" __global__ void reduce_sum(const float* input, float* output, int n)
 
121
  "task_id": self.current_task_id,
122
  "task_name": task["name"],
123
  "difficulty": task["difficulty"],
124
+ "grader": task["grader"],
125
  "max_steps": task["max_steps"],
126
  "target_speedup": task["target_speedup"],
127
  "checks": task["checks"],
inference.py CHANGED
@@ -13,7 +13,7 @@ load_dotenv()
13
  API_BASE_URL = os.getenv("API_BASE_URL", "https://api.openai.com/v1")
14
  MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4o-mini")
15
  API_KEY = os.getenv("OPENAI_API_KEY") or os.getenv("HF_TOKEN") or os.getenv("API_KEY")
16
- TASK_NAME = os.getenv("TASK_ID", "vector_add_easy")
17
  BENCHMARK = "kernel_optimization"
18
 
19
 
@@ -85,8 +85,7 @@ def choose_action(client: Optional[OpenAI], observation: dict) -> Action:
85
  return fallback_action(observation)
86
 
87
 
88
- def main() -> int:
89
- task_id = TASK_NAME if TASK_NAME in TASKS else "vector_add_easy"
90
  env = KernelOptimization_env()
91
  rewards: List[float] = []
92
  steps_taken = 0
@@ -94,27 +93,18 @@ def main() -> int:
94
  success = False
95
 
96
  log_start(task=task_id, env=BENCHMARK, model=MODEL_NAME)
97
-
98
  try:
99
- client: Optional[OpenAI] = None
100
- if API_KEY:
101
- try:
102
- client = OpenAI(api_key=API_KEY, base_url=API_BASE_URL)
103
- except Exception:
104
- client = None
105
  obs = env.reset(task_id=task_id)["observation"]
106
  done = False
107
-
108
  while not done:
109
  action = choose_action(client, obs)
110
- action_str = action.optimized_code
111
  step_result = env.step(action)
112
  done = step_result.done
113
  obs = step_result.observation.model_dump()
114
  reward = step_result.reward.value
115
  rewards.append(reward)
116
  steps_taken = obs["step_count"]
117
- log_step(step=steps_taken, action=action_str, reward=reward, done=done, error=None)
118
 
119
  score = grade_episode(
120
  task_id,
@@ -125,19 +115,29 @@ def main() -> int:
125
  )
126
  score = min(max(score, 0.0), 1.0)
127
  success = score >= 0.1
128
- return 0
129
  except Exception as exc:
130
- log_step(
131
- step=max(1, steps_taken + 1),
132
- action="error",
133
- reward=0.0,
134
- done=True,
135
- error=str(exc),
136
- )
137
- return 0
138
  finally:
139
  log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
140
 
141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  if __name__ == "__main__":
143
  sys.exit(main())
 
13
  API_BASE_URL = os.getenv("API_BASE_URL", "https://api.openai.com/v1")
14
  MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4o-mini")
15
  API_KEY = os.getenv("OPENAI_API_KEY") or os.getenv("HF_TOKEN") or os.getenv("API_KEY")
16
+ TASK_NAME = os.getenv("TASK_ID")
17
  BENCHMARK = "kernel_optimization"
18
 
19
 
 
85
  return fallback_action(observation)
86
 
87
 
88
+ def run_episode(client: Optional[OpenAI], task_id: str) -> None:
 
89
  env = KernelOptimization_env()
90
  rewards: List[float] = []
91
  steps_taken = 0
 
93
  success = False
94
 
95
  log_start(task=task_id, env=BENCHMARK, model=MODEL_NAME)
 
96
  try:
 
 
 
 
 
 
97
  obs = env.reset(task_id=task_id)["observation"]
98
  done = False
 
99
  while not done:
100
  action = choose_action(client, obs)
 
101
  step_result = env.step(action)
102
  done = step_result.done
103
  obs = step_result.observation.model_dump()
104
  reward = step_result.reward.value
105
  rewards.append(reward)
106
  steps_taken = obs["step_count"]
107
+ log_step(step=steps_taken, action=action.optimized_code, reward=reward, done=done, error=None)
108
 
109
  score = grade_episode(
110
  task_id,
 
115
  )
116
  score = min(max(score, 0.0), 1.0)
117
  success = score >= 0.1
 
118
  except Exception as exc:
119
+ log_step(step=max(1, steps_taken + 1), action="error", reward=0.0, done=True, error=str(exc))
 
 
 
 
 
 
 
120
  finally:
121
  log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
122
 
123
 
124
+ def main() -> int:
125
+ client: Optional[OpenAI] = None
126
+ if API_KEY:
127
+ try:
128
+ client = OpenAI(api_key=API_KEY, base_url=API_BASE_URL)
129
+ except Exception:
130
+ client = None
131
+
132
+ if TASK_NAME and TASK_NAME in TASKS:
133
+ task_ids = [TASK_NAME]
134
+ else:
135
+ task_ids = list(TASKS.keys())
136
+
137
+ for task_id in task_ids:
138
+ run_episode(client, task_id)
139
+ return 0
140
+
141
+
142
  if __name__ == "__main__":
143
  sys.exit(main())