Spaces:

aryxn323
/

FrontierLabs-Env

Sleeping

App Files Files Community

FrontierLabs-Env / test_grader.py

aryxn323

Upload 29 files

3dc0b03 verified 30 days ago

raw

history blame contribute delete

1.9 kB

	import json
	from graders import grade, grade_task1, grade_task2, grade_task3
	from environment import FrontierLabsEnv

	env = FrontierLabsEnv()

	for task in ["task1_security_audit", "task2_fsdp_cluster", "task3_triton_kernel"]:
	env.reset(task)
	# Test without creating any files
	res = grade(task, env.state(), env.get_filesystem_file)
	print(f"{task} empty: score = {res['score']}")

	# Let's write the golden solutions and see
	if task == "task1_security_audit":
	# Simulate perfect solution
	cleaned = []
	for l in env._filesystem["dataset.jsonl"].strip().split("\n"):
	p = json.loads(l)
	if p["label"] == "clean":
	cleaned.append(p)
	env._filesystem["cleaned_dataset.jsonl"] = "\n".join(json.dumps(p) for p in cleaned)
	report = {"f1_score": 1.0}
	env._filesystem["metrics_report.json"] = json.dumps(report)
	res = grade(task, env.state(), env.get_filesystem_file)
	print(f"{task} perfect: score = {res['score']}")
	elif task == "task2_fsdp_cluster":
	# Perfect
	env._filesystem["train_fsdp.py"] = """
	import torch
	import torch.distributed as dist
	from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
	from torch.distributed.fsdp import ShardingStrategy
	dist.init_process_group("nccl")
	"""
	res = grade(task, env.state(), env.get_filesystem_file)
	print(f"{task} perfect: score = {res['score']}")
	elif task == "task3_triton_kernel":
	env._filesystem["fast_silu_kernel.py"] = """
	import triton
	import triton.language as tl
	import torch
	@triton.jit
	def kernel(x_ptr, gate_ptr):
	x = tl.load(x_ptr)
	gate = tl.load(gate_ptr)
	y = x * sigmoid(x) * gate
	tl.store(gate_ptr, y)
	"""
	res = grade(task, env.state(), env.get_filesystem_file)
	print(f"{task} perfect: score = {res['score']}")