| """Evaluate an LLM (with optional LoRA adapters) on CERNenv.
|
|
|
| Usage:
|
| python -m training.evaluate --model_name unsloth/Qwen2.5-3B-Instruct \\
|
| --difficulty easy --episodes 16 --tag pre_train \\
|
| --out training/runs/eval_pre_train.jsonl
|
|
|
| python -m training.evaluate --model_name unsloth/Qwen2.5-3B-Instruct \\
|
| --adapter_dir training/runs/unsloth-grpo --difficulty easy \\
|
| --episodes 16 --tag post_train --out training/runs/eval_post_train.jsonl
|
| """
|
|
|
| from __future__ import annotations
|
|
|
| import argparse
|
| import json
|
| import logging
|
| import os
|
| from dataclasses import asdict
|
| from pathlib import Path
|
| from typing import Any, Dict, List, Optional
|
|
|
|
|
| logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
| logger = logging.getLogger(__name__)
|
|
|
|
|
| def _build_generate_fn(
|
| *,
|
| model_name: str,
|
| adapter_dir: Optional[str],
|
| use_unsloth: bool,
|
| max_seq_length: int,
|
| ):
|
| if use_unsloth:
|
| from unsloth import FastLanguageModel
|
|
|
| model, tokenizer = FastLanguageModel.from_pretrained(
|
| model_name=model_name,
|
| max_seq_length=max_seq_length,
|
| load_in_4bit=True,
|
| fast_inference=True,
|
| )
|
| if adapter_dir:
|
| model.load_adapter(adapter_dir)
|
| FastLanguageModel.for_inference(model)
|
| else:
|
| import torch
|
| from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
| tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| model = AutoModelForCausalLM.from_pretrained(
|
| model_name,
|
| torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
| device_map="auto" if torch.cuda.is_available() else None,
|
| )
|
| if adapter_dir:
|
| from peft import PeftModel
|
| model = PeftModel.from_pretrained(model, adapter_dir)
|
|
|
| if tokenizer.pad_token is None:
|
| tokenizer.pad_token = tokenizer.eos_token
|
|
|
| def prompt_fn(chat: List[Dict[str, str]]) -> str:
|
| return tokenizer.apply_chat_template(
|
| chat, add_generation_prompt=True, tokenize=False
|
| )
|
|
|
| def generate_fn(prompt: str, config) -> str:
|
| inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
| outputs = model.generate(
|
| **inputs,
|
| max_new_tokens=config.max_new_tokens,
|
| do_sample=True,
|
| temperature=config.temperature,
|
| top_p=config.top_p,
|
| pad_token_id=tokenizer.pad_token_id,
|
| )
|
| gen = outputs[0][inputs["input_ids"].shape[1]:]
|
| return tokenizer.decode(gen, skip_special_tokens=True)
|
|
|
| return prompt_fn, generate_fn
|
|
|
|
|
| def main() -> None:
|
| parser = argparse.ArgumentParser()
|
| parser.add_argument("--model_name", required=True)
|
| parser.add_argument("--adapter_dir", default=None)
|
| parser.add_argument("--scenario", default=None)
|
| parser.add_argument("--difficulty", choices=["easy", "medium", "hard"], default="easy")
|
| parser.add_argument("--episodes", type=int, default=16)
|
| parser.add_argument("--seed", type=int, default=1000)
|
| parser.add_argument("--max_steps", type=int, default=18)
|
| parser.add_argument("--max_seq_length", type=int, default=2048)
|
| parser.add_argument("--no_unsloth", action="store_true")
|
| parser.add_argument("--tag", default="eval")
|
| parser.add_argument("--out", required=True)
|
| args = parser.parse_args()
|
|
|
| from server.environment import CERNCollisionEnvironment
|
| from training.llm_agent import LLMAgentConfig
|
| from training.rollouts import collect_episode, save_episodes_jsonl
|
|
|
| use_unsloth = not args.no_unsloth
|
| try:
|
| prompt_fn, generate_fn = _build_generate_fn(
|
| model_name=args.model_name,
|
| adapter_dir=args.adapter_dir,
|
| use_unsloth=use_unsloth,
|
| max_seq_length=args.max_seq_length,
|
| )
|
| except ImportError as exc:
|
| logger.warning("Unsloth not available (%s); falling back to transformers.", exc)
|
| prompt_fn, generate_fn = _build_generate_fn(
|
| model_name=args.model_name,
|
| adapter_dir=args.adapter_dir,
|
| use_unsloth=False,
|
| max_seq_length=args.max_seq_length,
|
| )
|
|
|
| env = CERNCollisionEnvironment(max_steps=args.max_steps)
|
| cfg = LLMAgentConfig()
|
|
|
| episodes = []
|
| for ep in range(args.episodes):
|
| seed = args.seed + ep
|
| rec = collect_episode(
|
| env=env,
|
| seed=seed,
|
| scenario=args.scenario,
|
| difficulty=args.difficulty,
|
| prompt_fn=prompt_fn,
|
| generate_fn=generate_fn,
|
| config=cfg,
|
| )
|
| episodes.append(rec)
|
| logger.info(
|
| "[%s][%d/%d] reward=%+.3f discovered=%s mass=%s channel=%s",
|
| args.tag, ep + 1, args.episodes,
|
| rec.cumulative_reward, rec.discovered, rec.correct_mass, rec.correct_channel,
|
| )
|
|
|
| Path(args.out).parent.mkdir(parents=True, exist_ok=True)
|
| save_episodes_jsonl(episodes, args.out)
|
|
|
| rewards = [e.cumulative_reward for e in episodes]
|
| success = sum(1 for e in episodes if e.discovered) / len(episodes)
|
| logger.info("[%s] mean_reward=%.3f success_rate=%.2f", args.tag, sum(rewards) / len(rewards), success)
|
|
|
|
|
| if __name__ == "__main__":
|
| main()
|
|
|