mr233 commited on
Commit
57c60a1
·
verified ·
1 Parent(s): 0c0761d

Upload README.md with huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +60 -0
README.md CHANGED
@@ -68,3 +68,63 @@ TokenHD models are evaluated with two metrics:
68
 
69
  - **S_incor**: Token-level F1 on hallucinated (incorrect) responses — measures how precisely the detector localizes errors.
70
  - **S_cor**: Recall on hallucination-free (correct) responses — measures how rarely the detector raises false alarms.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  - **S_incor**: Token-level F1 on hallucinated (incorrect) responses — measures how precisely the detector localizes errors.
70
  - **S_cor**: Recall on hallucination-free (correct) responses — measures how rarely the detector raises false alarms.
71
+
72
+ ---
73
+
74
+ ## Evaluation
75
+
76
+ Evaluate using the [TokenHD eval dataset](https://huggingface.co/datasets/mr233/TokenHD-eval-data):
77
+
78
+ ```python
79
+ from datasets import load_dataset
80
+ from transformers import AutoTokenizer, AutoModelForTokenClassification
81
+ import torch
82
+ import numpy as np
83
+
84
+ def hard_f1(y_true, y_pred):
85
+ if max(y_true) == 0:
86
+ y_true, y_pred = 1 - y_true, 1 - y_pred
87
+ tp = np.sum((y_pred == 1) & (y_true == 1))
88
+ fp = np.sum((y_pred == 1) & (y_true == 0))
89
+ fn = np.sum((y_pred == 0) & (y_true == 1))
90
+ precision = tp / (tp + fp + 1e-7)
91
+ recall = tp / (tp + fn + 1e-7)
92
+ f1 = 2 * precision * recall / (precision + recall + 1e-7)
93
+ return precision, recall, f1
94
+
95
+ model_id = "mr233/TokenHD-0.6B"
96
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
97
+ model = AutoModelForTokenClassification.from_pretrained(
98
+ model_id, num_labels=1, torch_dtype=torch.bfloat16, device_map="auto"
99
+ )
100
+ model.eval()
101
+
102
+ dataset = load_dataset("mr233/TokenHD-eval-data",
103
+ data_files="tokenhd_eval_math_500.jsonl", split="train")
104
+
105
+ f1_incor, f1_cor = [], []
106
+ for item in dataset:
107
+ problem, raw_answer = item["problem"], item["raw_answer"]
108
+ token_weights_gt = np.array(item["token_weights"], dtype=np.float32)
109
+ gt_hard = (token_weights_gt > 0.5).astype(np.float32)
110
+
111
+ messages = [{"role": "user", "content": problem},
112
+ {"role": "assistant", "content": raw_answer}]
113
+ input_ids = tokenizer.apply_chat_template(
114
+ messages, tokenize=True, add_generation_prompt=False)[:-2]
115
+ input_tensor = torch.tensor(input_ids, device=model.device).unsqueeze(0)
116
+
117
+ with torch.no_grad():
118
+ logits = model(input_ids=input_tensor).logits
119
+ scores = torch.sigmoid(logits.squeeze(-1).squeeze(0))[-len(token_weights_gt):]
120
+ pred_hard = (scores.float().cpu().numpy() > 0.5).astype(np.float32)
121
+
122
+ _, _, f1 = hard_f1(gt_hard, pred_hard)
123
+ if item["correctness"] == -1:
124
+ f1_incor.append(f1)
125
+ else:
126
+ f1_cor.append(f1)
127
+
128
+ print(f"S_incor (F1 on hallucinated): {np.mean(f1_incor)*100:.2f}")
129
+ print(f"S_cor (recall on correct): {np.mean(f1_cor)*100:.2f}")
130
+ ```