fix: dervies speedup now
Browse files- .gitignore +3 -0
- app.py +31 -6
- env_server.py +18 -4
- openenv.yaml +3 -3
- openenv_train.py +15 -7
- pyproject.toml +1 -0
- requirements.txt +1 -0
- server/__pycache__/__init__.cpython-312.pyc +0 -0
- server/__pycache__/app.cpython-312.pyc +0 -0
.gitignore
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__
|
| 2 |
+
venv/
|
| 3 |
+
.env
|
app.py
CHANGED
|
@@ -9,7 +9,20 @@ import traceback
|
|
| 9 |
|
| 10 |
load_dotenv()
|
| 11 |
|
| 12 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
log= []
|
| 14 |
env=KernelOptimization_env()
|
| 15 |
api_key = openai_api_key or os.getenv("OPENAI_API_KEY")
|
|
@@ -17,9 +30,13 @@ def ui(task_id:str, max_steps:int, openai_api_key:str)-> Iterator[Tuple[str,str]
|
|
| 17 |
yield "ERROR: Missing OPENAI_API_KEY", ""
|
| 18 |
return
|
| 19 |
|
| 20 |
-
model = os.getenv("MODEL_NAME", "
|
| 21 |
-
client = OpenAI(api_key=api_key, base_url=os.getenv("API_BASE_URL", "https://api.
|
| 22 |
obs = env.reset(task_id=task_id)["observation"]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
best_code = obs["current_best_code"]
|
| 24 |
log.append(f"Task: {obs['task_name']}")
|
| 25 |
|
|
@@ -34,7 +51,8 @@ def ui(task_id:str, max_steps:int, openai_api_key:str)-> Iterator[Tuple[str,str]
|
|
| 34 |
{"role": "user", "content": prompt},
|
| 35 |
],
|
| 36 |
)
|
| 37 |
-
|
|
|
|
| 38 |
step = env.step(Action(optimized_code=code, strategy="ui_proposed"))
|
| 39 |
obs = step.observation.model_dump()
|
| 40 |
best_code = obs["current_best_code"]
|
|
@@ -49,13 +67,20 @@ def ui(task_id:str, max_steps:int, openai_api_key:str)-> Iterator[Tuple[str,str]
|
|
| 49 |
with gr.Blocks(title="CUDA Kernel Optimizer") as demo:
|
| 50 |
gr.Markdown("CUDA Kernel Optimizer - OpenEnv-aligned workflow")
|
| 51 |
task = gr.Dropdown(choices=list(TASKS.keys()), value="vector_add_easy", label="Task")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
steps = gr.Slider(minimum=1, maximum=12, value=6, step=1, label="Max Steps")
|
| 53 |
key = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...")
|
| 54 |
run = gr.Button("Run Optimization", variant="primary")
|
| 55 |
logs = gr.Textbox(label="Logs", lines=14)
|
| 56 |
code = gr.Code(label="Best Code", language="cpp", lines=16)
|
| 57 |
-
|
|
|
|
| 58 |
|
| 59 |
|
| 60 |
if __name__ == "__main__":
|
| 61 |
-
demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
|
|
|
|
| 9 |
|
| 10 |
load_dotenv()
|
| 11 |
|
| 12 |
+
def task_baseline_code(task_id: str) -> str:
|
| 13 |
+
return TASKS[task_id]["baseline_code"]
|
| 14 |
+
|
| 15 |
+
def extract_code(text: str) -> str:
|
| 16 |
+
if "```" not in text:
|
| 17 |
+
return text
|
| 18 |
+
start = text.find("```")
|
| 19 |
+
end = text.rfind("```")
|
| 20 |
+
chunk = text[start + 3 : end]
|
| 21 |
+
if chunk.startswith("cuda") or chunk.startswith("cpp"):
|
| 22 |
+
return chunk.split("\n", 1)[1]
|
| 23 |
+
return chunk
|
| 24 |
+
|
| 25 |
+
def ui(task_id:str, kernel_code: str, max_steps:int, openai_api_key:str)-> Iterator[Tuple[str,str]]:
|
| 26 |
log= []
|
| 27 |
env=KernelOptimization_env()
|
| 28 |
api_key = openai_api_key or os.getenv("OPENAI_API_KEY")
|
|
|
|
| 30 |
yield "ERROR: Missing OPENAI_API_KEY", ""
|
| 31 |
return
|
| 32 |
|
| 33 |
+
model = os.getenv("MODEL_NAME", "llama-3.2-3b")
|
| 34 |
+
client = OpenAI(api_key=api_key, base_url=os.getenv("API_BASE_URL", "https://api.oxlo.ai/v1"))
|
| 35 |
obs = env.reset(task_id=task_id)["observation"]
|
| 36 |
+
if kernel_code and kernel_code.strip():
|
| 37 |
+
custom_code = kernel_code.strip()
|
| 38 |
+
env.state.best_code = custom_code
|
| 39 |
+
obs["current_best_code"] = custom_code
|
| 40 |
best_code = obs["current_best_code"]
|
| 41 |
log.append(f"Task: {obs['task_name']}")
|
| 42 |
|
|
|
|
| 51 |
{"role": "user", "content": prompt},
|
| 52 |
],
|
| 53 |
)
|
| 54 |
+
raw = (res.choices[0].message.content or "").strip()
|
| 55 |
+
code = extract_code(raw).strip() or obs["current_best_code"]
|
| 56 |
step = env.step(Action(optimized_code=code, strategy="ui_proposed"))
|
| 57 |
obs = step.observation.model_dump()
|
| 58 |
best_code = obs["current_best_code"]
|
|
|
|
| 67 |
with gr.Blocks(title="CUDA Kernel Optimizer") as demo:
|
| 68 |
gr.Markdown("CUDA Kernel Optimizer - OpenEnv-aligned workflow")
|
| 69 |
task = gr.Dropdown(choices=list(TASKS.keys()), value="vector_add_easy", label="Task")
|
| 70 |
+
kernel_input = gr.Code(
|
| 71 |
+
label="Kernel Code (editable, used as optimization input)",
|
| 72 |
+
language="cpp",
|
| 73 |
+
lines=16,
|
| 74 |
+
value=TASKS["vector_add_easy"]["baseline_code"],
|
| 75 |
+
)
|
| 76 |
steps = gr.Slider(minimum=1, maximum=12, value=6, step=1, label="Max Steps")
|
| 77 |
key = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...")
|
| 78 |
run = gr.Button("Run Optimization", variant="primary")
|
| 79 |
logs = gr.Textbox(label="Logs", lines=14)
|
| 80 |
code = gr.Code(label="Best Code", language="cpp", lines=16)
|
| 81 |
+
task.change(task_baseline_code, inputs=[task], outputs=[kernel_input])
|
| 82 |
+
run.click(ui, inputs=[task, kernel_input, steps, key], outputs=[logs, code])
|
| 83 |
|
| 84 |
|
| 85 |
if __name__ == "__main__":
|
| 86 |
+
demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
|
env_server.py
CHANGED
|
@@ -112,8 +112,17 @@ class KernelOptimization_env:
|
|
| 112 |
self.current_task_id =task_id or random.choice(list(TASKS.keys()))
|
| 113 |
task= TASKS[self.current_task_id]
|
| 114 |
self.state =EnvState(initialized=True, task_id=self.current_task_id, step_count=0, max_steps=task["max_steps"], total_reward=0.0, best_code=task["baseline_code"], best_speedup=1.0, completed_checks=[], action_history=[])
|
| 115 |
-
|
| 116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
def step(self, action:Action) ->StepResult:
|
| 119 |
if not self.state.initialized or not self.current_task_id:
|
|
@@ -129,7 +138,12 @@ class KernelOptimization_env:
|
|
| 129 |
completed.update(newly_completed)
|
| 130 |
self.state.completed_checks = sorted(completed)
|
| 131 |
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
if est_speedup > self.state.best_speedup:
|
| 134 |
self.state.best_speedup = est_speedup
|
| 135 |
self.state.best_code = code
|
|
@@ -199,4 +213,4 @@ def step(request: StepRequest):
|
|
| 199 |
|
| 200 |
@app.get("/state")
|
| 201 |
def state():
|
| 202 |
-
return env.state_dict()
|
|
|
|
| 112 |
self.current_task_id =task_id or random.choice(list(TASKS.keys()))
|
| 113 |
task= TASKS[self.current_task_id]
|
| 114 |
self.state =EnvState(initialized=True, task_id=self.current_task_id, step_count=0, max_steps=task["max_steps"], total_reward=0.0, best_code=task["baseline_code"], best_speedup=1.0, completed_checks=[], action_history=[])
|
| 115 |
+
return {
|
| 116 |
+
"observation": to_observation(self.current_task_id, self.state).model_dump(),
|
| 117 |
+
"info": {
|
| 118 |
+
"task_id": self.current_task_id,
|
| 119 |
+
"task_name": task["name"],
|
| 120 |
+
"difficulty": task["difficulty"],
|
| 121 |
+
"max_steps": task["max_steps"],
|
| 122 |
+
"target_speedup": task["target_speedup"],
|
| 123 |
+
"checks": task["checks"],
|
| 124 |
+
},
|
| 125 |
+
}
|
| 126 |
|
| 127 |
def step(self, action:Action) ->StepResult:
|
| 128 |
if not self.state.initialized or not self.current_task_id:
|
|
|
|
| 138 |
completed.update(newly_completed)
|
| 139 |
self.state.completed_checks = sorted(completed)
|
| 140 |
|
| 141 |
+
completion_ratio = len(completed) / max(len(TASKS[self.current_task_id]["checks"]), 1)
|
| 142 |
+
max_reasonable_speedup = 1.0 + completion_ratio * 3.0
|
| 143 |
+
if action.expected_speedup is None:
|
| 144 |
+
est_speedup = round(max_reasonable_speedup, 3)
|
| 145 |
+
else:
|
| 146 |
+
est_speedup = round(max(1.0, min(action.expected_speedup, max_reasonable_speedup)), 3)
|
| 147 |
if est_speedup > self.state.best_speedup:
|
| 148 |
self.state.best_speedup = est_speedup
|
| 149 |
self.state.best_code = code
|
|
|
|
| 213 |
|
| 214 |
@app.get("/state")
|
| 215 |
def state():
|
| 216 |
+
return env.state_dict()
|
openenv.yaml
CHANGED
|
@@ -17,7 +17,7 @@ metadata:
|
|
| 17 |
author: aaloksan
|
| 18 |
|
| 19 |
tasks:
|
| 20 |
-
- id:
|
| 21 |
name: "Vector Addition Kernel Optimization"
|
| 22 |
difficulty: easy
|
| 23 |
objective: "Improve memory throughput while preserving correctness."
|
|
@@ -39,7 +39,7 @@ interfaces:
|
|
| 39 |
reset:
|
| 40 |
method: POST
|
| 41 |
path: /reset
|
| 42 |
-
returns: initial observation
|
| 43 |
step:
|
| 44 |
method: POST
|
| 45 |
path: /step
|
|
@@ -52,4 +52,4 @@ interfaces:
|
|
| 52 |
baseline:
|
| 53 |
script: inference.py
|
| 54 |
model_env_var: MODEL_NAME
|
| 55 |
-
api_key_env_var: OPENAI_API_KEY
|
|
|
|
| 17 |
author: aaloksan
|
| 18 |
|
| 19 |
tasks:
|
| 20 |
+
- id: vector_add_easy
|
| 21 |
name: "Vector Addition Kernel Optimization"
|
| 22 |
difficulty: easy
|
| 23 |
objective: "Improve memory throughput while preserving correctness."
|
|
|
|
| 39 |
reset:
|
| 40 |
method: POST
|
| 41 |
path: /reset
|
| 42 |
+
returns: initial observation, metadata info
|
| 43 |
step:
|
| 44 |
method: POST
|
| 45 |
path: /step
|
|
|
|
| 52 |
baseline:
|
| 53 |
script: inference.py
|
| 54 |
model_env_var: MODEL_NAME
|
| 55 |
+
api_key_env_var: OPENAI_API_KEY
|
openenv_train.py
CHANGED
|
@@ -11,8 +11,10 @@ class KernelOptTool:
|
|
| 11 |
self.reward = 0.0
|
| 12 |
self.done = False
|
| 13 |
|
| 14 |
-
def reset(self, **kwargs) ->str
|
| 15 |
-
task_id =kwargs.get("task_id")
|
|
|
|
|
|
|
| 16 |
result = self.env.reset(task_id=task_id)
|
| 17 |
obs = result["observation"]
|
| 18 |
self.reward = 0.0
|
|
@@ -24,10 +26,12 @@ class KernelOptTool:
|
|
| 24 |
"Use tools to submit improved code."
|
| 25 |
)
|
| 26 |
|
| 27 |
-
def
|
| 28 |
if self.done:
|
| 29 |
raise ValueError("Episode is already done.")
|
| 30 |
-
result = self.env.step(
|
|
|
|
|
|
|
| 31 |
self.reward = result.reward.value
|
| 32 |
self.done = result.done
|
| 33 |
obs = result.observation
|
|
@@ -37,8 +41,12 @@ class KernelOptTool:
|
|
| 37 |
f"pending_checks={obs.pending_checks}, done={result.done}"
|
| 38 |
)
|
| 39 |
|
| 40 |
-
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
def build_dataset(repeats_per_task:int=32)-> Dataset:
|
| 44 |
prompts, task_ids = [], []
|
|
@@ -67,4 +75,4 @@ def main():
|
|
| 67 |
# trainer = GRPOTrainer(model =model_name, train_dataset=dataset, reward_funcs =reward_func, env_factory=KernelOptTool)
|
| 68 |
|
| 69 |
if __name__ == "__main__":
|
| 70 |
-
main()
|
|
|
|
| 11 |
self.reward = 0.0
|
| 12 |
self.done = False
|
| 13 |
|
| 14 |
+
def reset(self, **kwargs) -> str:
|
| 15 |
+
task_id = kwargs.get("task_id")
|
| 16 |
+
if task_id is None and isinstance(kwargs.get("sample"), dict):
|
| 17 |
+
task_id = kwargs["sample"].get("task_id")
|
| 18 |
result = self.env.reset(task_id=task_id)
|
| 19 |
obs = result["observation"]
|
| 20 |
self.reward = 0.0
|
|
|
|
| 26 |
"Use tools to submit improved code."
|
| 27 |
)
|
| 28 |
|
| 29 |
+
def submit_optimization(self, optimized_code: str, strategy: str = "", expected_speedup: float | None = None) -> str:
|
| 30 |
if self.done:
|
| 31 |
raise ValueError("Episode is already done.")
|
| 32 |
+
result = self.env.step(
|
| 33 |
+
Action(optimized_code=optimized_code, strategy=strategy, expected_speedup=expected_speedup)
|
| 34 |
+
)
|
| 35 |
self.reward = result.reward.value
|
| 36 |
self.done = result.done
|
| 37 |
obs = result.observation
|
|
|
|
| 41 |
f"pending_checks={obs.pending_checks}, done={result.done}"
|
| 42 |
)
|
| 43 |
|
| 44 |
+
# Backward-compatible alias
|
| 45 |
+
def submit_optiization(self, optimized_code: str, strategy: str = "") -> str:
|
| 46 |
+
return self.submit_optimization(optimized_code=optimized_code, strategy=strategy)
|
| 47 |
+
|
| 48 |
+
def reward_func(environments, **kwargs) -> List[float]:
|
| 49 |
+
return [env.reward for env in environments]
|
| 50 |
|
| 51 |
def build_dataset(repeats_per_task:int=32)-> Dataset:
|
| 52 |
prompts, task_ids = [], []
|
|
|
|
| 75 |
# trainer = GRPOTrainer(model =model_name, train_dataset=dataset, reward_funcs =reward_func, env_factory=KernelOptTool)
|
| 76 |
|
| 77 |
if __name__ == "__main__":
|
| 78 |
+
main()
|
pyproject.toml
CHANGED
|
@@ -16,6 +16,7 @@ dependencies = [
|
|
| 16 |
"openenv-core==0.1.1",
|
| 17 |
"python-dotenv>=1.0.0",
|
| 18 |
"pillow>=10.4.0,<11",
|
|
|
|
| 19 |
"gradio>=4.44.0",
|
| 20 |
"datasets>=2.20.0",
|
| 21 |
"trl>=0.12.0"
|
|
|
|
| 16 |
"openenv-core==0.1.1",
|
| 17 |
"python-dotenv>=1.0.0",
|
| 18 |
"pillow>=10.4.0,<11",
|
| 19 |
+
"audioop-lts>=0.2.2; python_version >= '3.13'",
|
| 20 |
"gradio>=4.44.0",
|
| 21 |
"datasets>=2.20.0",
|
| 22 |
"trl>=0.12.0"
|
requirements.txt
CHANGED
|
@@ -10,3 +10,4 @@ pytest>=7.4.0
|
|
| 10 |
pyyaml>=6.0.0
|
| 11 |
python-dotenv
|
| 12 |
pillow>=10.4.0,<11
|
|
|
|
|
|
| 10 |
pyyaml>=6.0.0
|
| 11 |
python-dotenv
|
| 12 |
pillow>=10.4.0,<11
|
| 13 |
+
audioop-lts>=0.2.2; python_version >= "3.13"
|
server/__pycache__/__init__.cpython-312.pyc
CHANGED
|
Binary files a/server/__pycache__/__init__.cpython-312.pyc and b/server/__pycache__/__init__.cpython-312.pyc differ
|
|
|
server/__pycache__/app.cpython-312.pyc
CHANGED
|
Binary files a/server/__pycache__/app.cpython-312.pyc and b/server/__pycache__/app.cpython-312.pyc differ
|
|
|