aaloksan commited on
Commit
d5c6f39
·
1 Parent(s): d0a6ad8

fix: dervies speedup now

Browse files
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ __pycache__
2
+ venv/
3
+ .env
app.py CHANGED
@@ -9,7 +9,20 @@ import traceback
9
 
10
  load_dotenv()
11
 
12
- def ui(task_id:str, max_steps:int, openai_api_key:str)-> Iterator[Tuple[str,str]]:
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  log= []
14
  env=KernelOptimization_env()
15
  api_key = openai_api_key or os.getenv("OPENAI_API_KEY")
@@ -17,9 +30,13 @@ def ui(task_id:str, max_steps:int, openai_api_key:str)-> Iterator[Tuple[str,str]
17
  yield "ERROR: Missing OPENAI_API_KEY", ""
18
  return
19
 
20
- model = os.getenv("MODEL_NAME", "gpt-4")
21
- client = OpenAI(api_key=api_key, base_url=os.getenv("API_BASE_URL", "https://api.openai.com/v1"))
22
  obs = env.reset(task_id=task_id)["observation"]
 
 
 
 
23
  best_code = obs["current_best_code"]
24
  log.append(f"Task: {obs['task_name']}")
25
 
@@ -34,7 +51,8 @@ def ui(task_id:str, max_steps:int, openai_api_key:str)-> Iterator[Tuple[str,str]
34
  {"role": "user", "content": prompt},
35
  ],
36
  )
37
- code = (res.choices[0].message.content or "").strip() or obs["current_best_code"]
 
38
  step = env.step(Action(optimized_code=code, strategy="ui_proposed"))
39
  obs = step.observation.model_dump()
40
  best_code = obs["current_best_code"]
@@ -49,13 +67,20 @@ def ui(task_id:str, max_steps:int, openai_api_key:str)-> Iterator[Tuple[str,str]
49
  with gr.Blocks(title="CUDA Kernel Optimizer") as demo:
50
  gr.Markdown("CUDA Kernel Optimizer - OpenEnv-aligned workflow")
51
  task = gr.Dropdown(choices=list(TASKS.keys()), value="vector_add_easy", label="Task")
 
 
 
 
 
 
52
  steps = gr.Slider(minimum=1, maximum=12, value=6, step=1, label="Max Steps")
53
  key = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...")
54
  run = gr.Button("Run Optimization", variant="primary")
55
  logs = gr.Textbox(label="Logs", lines=14)
56
  code = gr.Code(label="Best Code", language="cpp", lines=16)
57
- run.click(ui, inputs=[task, steps, key], outputs=[logs, code])
 
58
 
59
 
60
  if __name__ == "__main__":
61
- demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
 
9
 
10
  load_dotenv()
11
 
12
+ def task_baseline_code(task_id: str) -> str:
13
+ return TASKS[task_id]["baseline_code"]
14
+
15
+ def extract_code(text: str) -> str:
16
+ if "```" not in text:
17
+ return text
18
+ start = text.find("```")
19
+ end = text.rfind("```")
20
+ chunk = text[start + 3 : end]
21
+ if chunk.startswith("cuda") or chunk.startswith("cpp"):
22
+ return chunk.split("\n", 1)[1]
23
+ return chunk
24
+
25
+ def ui(task_id:str, kernel_code: str, max_steps:int, openai_api_key:str)-> Iterator[Tuple[str,str]]:
26
  log= []
27
  env=KernelOptimization_env()
28
  api_key = openai_api_key or os.getenv("OPENAI_API_KEY")
 
30
  yield "ERROR: Missing OPENAI_API_KEY", ""
31
  return
32
 
33
+ model = os.getenv("MODEL_NAME", "llama-3.2-3b")
34
+ client = OpenAI(api_key=api_key, base_url=os.getenv("API_BASE_URL", "https://api.oxlo.ai/v1"))
35
  obs = env.reset(task_id=task_id)["observation"]
36
+ if kernel_code and kernel_code.strip():
37
+ custom_code = kernel_code.strip()
38
+ env.state.best_code = custom_code
39
+ obs["current_best_code"] = custom_code
40
  best_code = obs["current_best_code"]
41
  log.append(f"Task: {obs['task_name']}")
42
 
 
51
  {"role": "user", "content": prompt},
52
  ],
53
  )
54
+ raw = (res.choices[0].message.content or "").strip()
55
+ code = extract_code(raw).strip() or obs["current_best_code"]
56
  step = env.step(Action(optimized_code=code, strategy="ui_proposed"))
57
  obs = step.observation.model_dump()
58
  best_code = obs["current_best_code"]
 
67
  with gr.Blocks(title="CUDA Kernel Optimizer") as demo:
68
  gr.Markdown("CUDA Kernel Optimizer - OpenEnv-aligned workflow")
69
  task = gr.Dropdown(choices=list(TASKS.keys()), value="vector_add_easy", label="Task")
70
+ kernel_input = gr.Code(
71
+ label="Kernel Code (editable, used as optimization input)",
72
+ language="cpp",
73
+ lines=16,
74
+ value=TASKS["vector_add_easy"]["baseline_code"],
75
+ )
76
  steps = gr.Slider(minimum=1, maximum=12, value=6, step=1, label="Max Steps")
77
  key = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...")
78
  run = gr.Button("Run Optimization", variant="primary")
79
  logs = gr.Textbox(label="Logs", lines=14)
80
  code = gr.Code(label="Best Code", language="cpp", lines=16)
81
+ task.change(task_baseline_code, inputs=[task], outputs=[kernel_input])
82
+ run.click(ui, inputs=[task, kernel_input, steps, key], outputs=[logs, code])
83
 
84
 
85
  if __name__ == "__main__":
86
+ demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
env_server.py CHANGED
@@ -112,8 +112,17 @@ class KernelOptimization_env:
112
  self.current_task_id =task_id or random.choice(list(TASKS.keys()))
113
  task= TASKS[self.current_task_id]
114
  self.state =EnvState(initialized=True, task_id=self.current_task_id, step_count=0, max_steps=task["max_steps"], total_reward=0.0, best_code=task["baseline_code"], best_speedup=1.0, completed_checks=[], action_history=[])
115
-
116
- return {"observation": to_observation(self.current_task_id, self.state).model_dump()}
 
 
 
 
 
 
 
 
 
117
 
118
  def step(self, action:Action) ->StepResult:
119
  if not self.state.initialized or not self.current_task_id:
@@ -129,7 +138,12 @@ class KernelOptimization_env:
129
  completed.update(newly_completed)
130
  self.state.completed_checks = sorted(completed)
131
 
132
- est_speedup = self.current_task_id, completed, compile_ok
 
 
 
 
 
133
  if est_speedup > self.state.best_speedup:
134
  self.state.best_speedup = est_speedup
135
  self.state.best_code = code
@@ -199,4 +213,4 @@ def step(request: StepRequest):
199
 
200
  @app.get("/state")
201
  def state():
202
- return env.state_dict()
 
112
  self.current_task_id =task_id or random.choice(list(TASKS.keys()))
113
  task= TASKS[self.current_task_id]
114
  self.state =EnvState(initialized=True, task_id=self.current_task_id, step_count=0, max_steps=task["max_steps"], total_reward=0.0, best_code=task["baseline_code"], best_speedup=1.0, completed_checks=[], action_history=[])
115
+ return {
116
+ "observation": to_observation(self.current_task_id, self.state).model_dump(),
117
+ "info": {
118
+ "task_id": self.current_task_id,
119
+ "task_name": task["name"],
120
+ "difficulty": task["difficulty"],
121
+ "max_steps": task["max_steps"],
122
+ "target_speedup": task["target_speedup"],
123
+ "checks": task["checks"],
124
+ },
125
+ }
126
 
127
  def step(self, action:Action) ->StepResult:
128
  if not self.state.initialized or not self.current_task_id:
 
138
  completed.update(newly_completed)
139
  self.state.completed_checks = sorted(completed)
140
 
141
+ completion_ratio = len(completed) / max(len(TASKS[self.current_task_id]["checks"]), 1)
142
+ max_reasonable_speedup = 1.0 + completion_ratio * 3.0
143
+ if action.expected_speedup is None:
144
+ est_speedup = round(max_reasonable_speedup, 3)
145
+ else:
146
+ est_speedup = round(max(1.0, min(action.expected_speedup, max_reasonable_speedup)), 3)
147
  if est_speedup > self.state.best_speedup:
148
  self.state.best_speedup = est_speedup
149
  self.state.best_code = code
 
213
 
214
  @app.get("/state")
215
  def state():
216
+ return env.state_dict()
openenv.yaml CHANGED
@@ -17,7 +17,7 @@ metadata:
17
  author: aaloksan
18
 
19
  tasks:
20
- - id: vector_addition_easy
21
  name: "Vector Addition Kernel Optimization"
22
  difficulty: easy
23
  objective: "Improve memory throughput while preserving correctness."
@@ -39,7 +39,7 @@ interfaces:
39
  reset:
40
  method: POST
41
  path: /reset
42
- returns: initial observation and info
43
  step:
44
  method: POST
45
  path: /step
@@ -52,4 +52,4 @@ interfaces:
52
  baseline:
53
  script: inference.py
54
  model_env_var: MODEL_NAME
55
- api_key_env_var: OPENAI_API_KEY
 
17
  author: aaloksan
18
 
19
  tasks:
20
+ - id: vector_add_easy
21
  name: "Vector Addition Kernel Optimization"
22
  difficulty: easy
23
  objective: "Improve memory throughput while preserving correctness."
 
39
  reset:
40
  method: POST
41
  path: /reset
42
+ returns: initial observation, metadata info
43
  step:
44
  method: POST
45
  path: /step
 
52
  baseline:
53
  script: inference.py
54
  model_env_var: MODEL_NAME
55
+ api_key_env_var: OPENAI_API_KEY
openenv_train.py CHANGED
@@ -11,8 +11,10 @@ class KernelOptTool:
11
  self.reward = 0.0
12
  self.done = False
13
 
14
- def reset(self, **kwargs) ->str|None:
15
- task_id =kwargs.get("task_id")
 
 
16
  result = self.env.reset(task_id=task_id)
17
  obs = result["observation"]
18
  self.reward = 0.0
@@ -24,10 +26,12 @@ class KernelOptTool:
24
  "Use tools to submit improved code."
25
  )
26
 
27
- def submit_optiization(self, optimized_code:str, strategy:str ="")->str:
28
  if self.done:
29
  raise ValueError("Episode is already done.")
30
- result = self.env.step(Action(optimized_code=optimized_code, strategy=strategy))
 
 
31
  self.reward = result.reward.value
32
  self.done = result.done
33
  obs = result.observation
@@ -37,8 +41,12 @@ class KernelOptTool:
37
  f"pending_checks={obs.pending_checks}, done={result.done}"
38
  )
39
 
40
- def reward_func(environmnets, **kwargs)-> List[float]:
41
- return [env.reward for env in environmnets]
 
 
 
 
42
 
43
  def build_dataset(repeats_per_task:int=32)-> Dataset:
44
  prompts, task_ids = [], []
@@ -67,4 +75,4 @@ def main():
67
  # trainer = GRPOTrainer(model =model_name, train_dataset=dataset, reward_funcs =reward_func, env_factory=KernelOptTool)
68
 
69
  if __name__ == "__main__":
70
- main()
 
11
  self.reward = 0.0
12
  self.done = False
13
 
14
+ def reset(self, **kwargs) -> str:
15
+ task_id = kwargs.get("task_id")
16
+ if task_id is None and isinstance(kwargs.get("sample"), dict):
17
+ task_id = kwargs["sample"].get("task_id")
18
  result = self.env.reset(task_id=task_id)
19
  obs = result["observation"]
20
  self.reward = 0.0
 
26
  "Use tools to submit improved code."
27
  )
28
 
29
+ def submit_optimization(self, optimized_code: str, strategy: str = "", expected_speedup: float | None = None) -> str:
30
  if self.done:
31
  raise ValueError("Episode is already done.")
32
+ result = self.env.step(
33
+ Action(optimized_code=optimized_code, strategy=strategy, expected_speedup=expected_speedup)
34
+ )
35
  self.reward = result.reward.value
36
  self.done = result.done
37
  obs = result.observation
 
41
  f"pending_checks={obs.pending_checks}, done={result.done}"
42
  )
43
 
44
+ # Backward-compatible alias
45
+ def submit_optiization(self, optimized_code: str, strategy: str = "") -> str:
46
+ return self.submit_optimization(optimized_code=optimized_code, strategy=strategy)
47
+
48
+ def reward_func(environments, **kwargs) -> List[float]:
49
+ return [env.reward for env in environments]
50
 
51
  def build_dataset(repeats_per_task:int=32)-> Dataset:
52
  prompts, task_ids = [], []
 
75
  # trainer = GRPOTrainer(model =model_name, train_dataset=dataset, reward_funcs =reward_func, env_factory=KernelOptTool)
76
 
77
  if __name__ == "__main__":
78
+ main()
pyproject.toml CHANGED
@@ -16,6 +16,7 @@ dependencies = [
16
  "openenv-core==0.1.1",
17
  "python-dotenv>=1.0.0",
18
  "pillow>=10.4.0,<11",
 
19
  "gradio>=4.44.0",
20
  "datasets>=2.20.0",
21
  "trl>=0.12.0"
 
16
  "openenv-core==0.1.1",
17
  "python-dotenv>=1.0.0",
18
  "pillow>=10.4.0,<11",
19
+ "audioop-lts>=0.2.2; python_version >= '3.13'",
20
  "gradio>=4.44.0",
21
  "datasets>=2.20.0",
22
  "trl>=0.12.0"
requirements.txt CHANGED
@@ -10,3 +10,4 @@ pytest>=7.4.0
10
  pyyaml>=6.0.0
11
  python-dotenv
12
  pillow>=10.4.0,<11
 
 
10
  pyyaml>=6.0.0
11
  python-dotenv
12
  pillow>=10.4.0,<11
13
+ audioop-lts>=0.2.2; python_version >= "3.13"
server/__pycache__/__init__.cpython-312.pyc CHANGED
Binary files a/server/__pycache__/__init__.cpython-312.pyc and b/server/__pycache__/__init__.cpython-312.pyc differ
 
server/__pycache__/app.cpython-312.pyc CHANGED
Binary files a/server/__pycache__/app.cpython-312.pyc and b/server/__pycache__/app.cpython-312.pyc differ