FrontierLabs-Env / test_grader.py
aryxn323's picture
Upload 29 files
3dc0b03 verified
import json
from graders import grade, grade_task1, grade_task2, grade_task3
from environment import FrontierLabsEnv
env = FrontierLabsEnv()
for task in ["task1_security_audit", "task2_fsdp_cluster", "task3_triton_kernel"]:
env.reset(task)
# Test without creating any files
res = grade(task, env.state(), env.get_filesystem_file)
print(f"{task} empty: score = {res['score']}")
# Let's write the golden solutions and see
if task == "task1_security_audit":
# Simulate perfect solution
cleaned = []
for l in env._filesystem["dataset.jsonl"].strip().split("\n"):
p = json.loads(l)
if p["label"] == "clean":
cleaned.append(p)
env._filesystem["cleaned_dataset.jsonl"] = "\n".join(json.dumps(p) for p in cleaned)
report = {"f1_score": 1.0}
env._filesystem["metrics_report.json"] = json.dumps(report)
res = grade(task, env.state(), env.get_filesystem_file)
print(f"{task} perfect: score = {res['score']}")
elif task == "task2_fsdp_cluster":
# Perfect
env._filesystem["train_fsdp.py"] = """
import torch
import torch.distributed as dist
from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
from torch.distributed.fsdp import ShardingStrategy
dist.init_process_group("nccl")
"""
res = grade(task, env.state(), env.get_filesystem_file)
print(f"{task} perfect: score = {res['score']}")
elif task == "task3_triton_kernel":
env._filesystem["fast_silu_kernel.py"] = """
import triton
import triton.language as tl
import torch
@triton.jit
def kernel(x_ptr, gate_ptr):
x = tl.load(x_ptr)
gate = tl.load(gate_ptr)
y = x * sigmoid(x) * gate
tl.store(gate_ptr, y)
"""
res = grade(task, env.state(), env.get_filesystem_file)
print(f"{task} perfect: score = {res['score']}")