Spaces:
Sleeping
Sleeping
| import json | |
| from graders import grade, grade_task1, grade_task2, grade_task3 | |
| from environment import FrontierLabsEnv | |
| env = FrontierLabsEnv() | |
| for task in ["task1_security_audit", "task2_fsdp_cluster", "task3_triton_kernel"]: | |
| env.reset(task) | |
| # Test without creating any files | |
| res = grade(task, env.state(), env.get_filesystem_file) | |
| print(f"{task} empty: score = {res['score']}") | |
| # Let's write the golden solutions and see | |
| if task == "task1_security_audit": | |
| # Simulate perfect solution | |
| cleaned = [] | |
| for l in env._filesystem["dataset.jsonl"].strip().split("\n"): | |
| p = json.loads(l) | |
| if p["label"] == "clean": | |
| cleaned.append(p) | |
| env._filesystem["cleaned_dataset.jsonl"] = "\n".join(json.dumps(p) for p in cleaned) | |
| report = {"f1_score": 1.0} | |
| env._filesystem["metrics_report.json"] = json.dumps(report) | |
| res = grade(task, env.state(), env.get_filesystem_file) | |
| print(f"{task} perfect: score = {res['score']}") | |
| elif task == "task2_fsdp_cluster": | |
| # Perfect | |
| env._filesystem["train_fsdp.py"] = """ | |
| import torch | |
| import torch.distributed as dist | |
| from torch.distributed.fsdp import FullyShardedDataParallel as FSDP | |
| from torch.distributed.fsdp import ShardingStrategy | |
| dist.init_process_group("nccl") | |
| """ | |
| res = grade(task, env.state(), env.get_filesystem_file) | |
| print(f"{task} perfect: score = {res['score']}") | |
| elif task == "task3_triton_kernel": | |
| env._filesystem["fast_silu_kernel.py"] = """ | |
| import triton | |
| import triton.language as tl | |
| import torch | |
| @triton.jit | |
| def kernel(x_ptr, gate_ptr): | |
| x = tl.load(x_ptr) | |
| gate = tl.load(gate_ptr) | |
| y = x * sigmoid(x) * gate | |
| tl.store(gate_ptr, y) | |
| """ | |
| res = grade(task, env.state(), env.get_filesystem_file) | |
| print(f"{task} perfect: score = {res['score']}") | |