| |
| |
|
|
| import math |
| import os |
| import argparse |
| import warnings |
| from typing import List, Tuple |
| from vllm import LLM |
| import pandas as pd |
| import numpy as np |
| import wandb |
|
|
| def build_pair_prompts(chosen_prompt: str, |
| chosen: str, |
| reject: str, |
| joiner: str = "\n") -> Tuple[str, str]: |
| """为一条样本构造两条输入:prompt_chosen, prompt_reject。""" |
| cp = chosen_prompt if chosen_prompt is not None else "" |
| ch = chosen if chosen is not None else "" |
| rj = reject if reject is not None else "" |
| |
| cp = cp.rstrip() |
| ch = ch.rstrip() |
| rj = rj.rstrip() |
| prompt_chosen = f"{cp}{joiner}{ch}" |
| prompt_reject = f"{cp}{joiner}{rj}" |
| return prompt_chosen, prompt_reject |
|
|
| data_path="/home/data/raw/test/1159-L6_format_full_label_v5.0safe.parquet" |
| if not os.path.exists(data_path): |
| raise FileNotFoundError(f"数据文件不存在:{data_path}") |
|
|
| df = pd.read_parquet(data_path) |
| required_cols = ["chosen_prompt", "chosen", "reject"] |
| for c in required_cols: |
| if c not in df.columns: |
| raise ValueError( |
| f"数据缺少必要列 `{c}`,实际列为:{list(df.columns)}" |
| ) |
| num_samples = len(df) |
| print(f"Loaded {num_samples} samples from {data_path}") |
| llm = LLM(model="/home/rm5.0_9e-6", runner="pooling", convert="reward") |
|
|
| results = [] |
| correct_cnt = 0 |
| total_cnt = 0 |
| batch_size=16 |
| |
| num_batches = math.ceil(num_samples/batch_size) |
|
|
| print("\nStart pairwise reward evaluation...\n" + "-" * 70) |
| for b in range(num_batches): |
| start = b * batch_size |
| end = min((b + 1) * batch_size, num_samples) |
| batch = df.iloc[start:end] |
|
|
| |
| pair_prompts = [] |
| indices = [] |
| for idx, row in batch.iterrows(): |
| prompt_chosen, prompt_reject = build_pair_prompts( |
| row["chosen_prompt"], row["chosen"], row["reject"], joiner="" |
| ) |
| |
| pair_prompts.append(prompt_chosen) |
| pair_prompts.append(prompt_reject) |
| indices.append(idx) |
|
|
| |
| try: |
| outputs = llm.reward(pair_prompts) |
| except Exception as e: |
| warnings.warn(f"llm.reward 执行失败(batch={b}):{e}") |
| |
| continue |
|
|
| |
| |
| |
| scalar_scores = [] |
| for out in outputs: |
| |
| score = out.outputs.data[-1] |
| |
| if np.isnan(score): |
| score = -1e30 |
| scalar_scores.append(score) |
|
|
| |
| for i, idx in enumerate(indices): |
| chosen_score = scalar_scores[2 * i] |
| reject_score = scalar_scores[2 * i + 1] |
| total_cnt += 1 |
| is_correct = chosen_score > reject_score |
| correct_cnt += int(is_correct) |
| running_acc = correct_cnt / total_cnt |
|
|
| |
| print( |
| f"[{total_cnt:6d}] " |
| f"Chosen={chosen_score:.6f} | Reject={reject_score:.6f} | " |
| f"Correct={is_correct} | RunningAcc={running_acc*100:.2f}%" |
| ) |
|
|
| |
| |
| |
| final_acc = (correct_cnt / total_cnt) if total_cnt > 0 else 0.0 |
| print("\n" + "-" * 70) |
| print(f"Finished. Total={total_cnt}, Correct={correct_cnt}, " |
| f"FinalAcc={final_acc*100:.2f}%") |
|
|
|
|
|
|
|
|
|
|