Spaces:
Sleeping
Sleeping
File size: 5,535 Bytes
5f78183 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 | """Evaluate an LLM (with optional LoRA adapters) on CERNenv.
Usage:
python -m training.evaluate --model_name unsloth/Qwen2.5-3B-Instruct \\
--difficulty easy --episodes 16 --tag pre_train \\
--out training/runs/eval_pre_train.jsonl
python -m training.evaluate --model_name unsloth/Qwen2.5-3B-Instruct \\
--adapter_dir training/runs/unsloth-grpo --difficulty easy \\
--episodes 16 --tag post_train --out training/runs/eval_post_train.jsonl
"""
from __future__ import annotations
import argparse
import json
import logging
import os
from dataclasses import asdict
from pathlib import Path
from typing import Any, Dict, List, Optional
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
logger = logging.getLogger(__name__)
def _build_generate_fn(
*,
model_name: str,
adapter_dir: Optional[str],
use_unsloth: bool,
max_seq_length: int,
):
if use_unsloth:
from unsloth import FastLanguageModel # type: ignore
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=model_name,
max_seq_length=max_seq_length,
load_in_4bit=True,
fast_inference=True,
)
if adapter_dir:
model.load_adapter(adapter_dir)
FastLanguageModel.for_inference(model)
else:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto" if torch.cuda.is_available() else None,
)
if adapter_dir:
from peft import PeftModel # type: ignore
model = PeftModel.from_pretrained(model, adapter_dir)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
def prompt_fn(chat: List[Dict[str, str]]) -> str:
return tokenizer.apply_chat_template(
chat, add_generation_prompt=True, tokenize=False
)
def generate_fn(prompt: str, config) -> str:
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(
**inputs,
max_new_tokens=config.max_new_tokens,
do_sample=True,
temperature=config.temperature,
top_p=config.top_p,
pad_token_id=tokenizer.pad_token_id,
)
gen = outputs[0][inputs["input_ids"].shape[1]:]
return tokenizer.decode(gen, skip_special_tokens=True)
return prompt_fn, generate_fn
def main() -> None: # pragma: no cover
parser = argparse.ArgumentParser()
parser.add_argument("--model_name", required=True)
parser.add_argument("--adapter_dir", default=None)
parser.add_argument("--scenario", default=None)
parser.add_argument("--difficulty", choices=["easy", "medium", "hard"], default="easy")
parser.add_argument("--episodes", type=int, default=16)
parser.add_argument("--seed", type=int, default=1000)
parser.add_argument("--max_steps", type=int, default=18)
parser.add_argument("--max_seq_length", type=int, default=2048)
parser.add_argument("--no_unsloth", action="store_true")
parser.add_argument("--tag", default="eval")
parser.add_argument("--out", required=True)
args = parser.parse_args()
from server.environment import CERNCollisionEnvironment
from training.llm_agent import LLMAgentConfig
from training.rollouts import collect_episode, save_episodes_jsonl
use_unsloth = not args.no_unsloth
try:
prompt_fn, generate_fn = _build_generate_fn(
model_name=args.model_name,
adapter_dir=args.adapter_dir,
use_unsloth=use_unsloth,
max_seq_length=args.max_seq_length,
)
except ImportError as exc:
logger.warning("Unsloth not available (%s); falling back to transformers.", exc)
prompt_fn, generate_fn = _build_generate_fn(
model_name=args.model_name,
adapter_dir=args.adapter_dir,
use_unsloth=False,
max_seq_length=args.max_seq_length,
)
env = CERNCollisionEnvironment(max_steps=args.max_steps)
cfg = LLMAgentConfig()
episodes = []
for ep in range(args.episodes):
seed = args.seed + ep
rec = collect_episode(
env=env,
seed=seed,
scenario=args.scenario,
difficulty=args.difficulty,
prompt_fn=prompt_fn,
generate_fn=generate_fn,
config=cfg,
)
episodes.append(rec)
logger.info(
"[%s][%d/%d] reward=%+.3f discovered=%s mass=%s channel=%s",
args.tag, ep + 1, args.episodes,
rec.cumulative_reward, rec.discovered, rec.correct_mass, rec.correct_channel,
)
Path(args.out).parent.mkdir(parents=True, exist_ok=True)
save_episodes_jsonl(episodes, args.out)
rewards = [e.cumulative_reward for e in episodes]
success = sum(1 for e in episodes if e.discovered) / len(episodes)
logger.info("[%s] mean_reward=%.3f success_rate=%.2f", args.tag, sum(rewards) / len(rewards), success)
if __name__ == "__main__": # pragma: no cover
main()
|