Jayant-Kernel commited on
Commit
0bdaeb6
·
1 Parent(s): 20bb6de

update: compare 0.5B vs 1.5B trained models

Browse files
Files changed (1) hide show
  1. evaluate.py +43 -56
evaluate.py CHANGED
@@ -1,4 +1,4 @@
1
- import os, json, re, torch, pathlib, gc
2
  import threading
3
  from http.server import HTTPServer, BaseHTTPRequestHandler
4
 
@@ -10,24 +10,22 @@ class HealthHandler(BaseHTTPRequestHandler):
10
  def log_message(self, format, *args):
11
  pass
12
 
13
- def start_health_server():
14
- server = HTTPServer(("0.0.0.0", 7860), HealthHandler)
15
- server.serve_forever()
16
-
17
- health_thread = threading.Thread(target=start_health_server, daemon=True)
18
  health_thread.start()
19
  print("Health server started on port 7860")
20
 
21
- from unsloth import FastLanguageModel
22
  from deceit_env.server.environment import DeceitEnvironment
23
  from deceit_env.server.grader import Grader
24
  from deceit_env.models import DeceitAction
25
  import matplotlib.pyplot as plt
26
  import matplotlib
27
  matplotlib.use('Agg')
 
28
 
29
- # Auth
30
- from huggingface_hub import login
31
  login(token=os.environ["HF_TOKEN"])
32
  os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY", "")
33
  os.environ["DECEIT_GRADER_CACHE"] = "/tmp/eval_cache.json"
@@ -56,15 +54,20 @@ def parse_action(text):
56
 
57
  def evaluate_model(model_name, label, n_episodes=30):
58
  print(f"\nEvaluating: {label}")
59
- model, tokenizer = FastLanguageModel.from_pretrained(
60
- model_name=model_name,
61
- max_seq_length=1024,
62
- dtype=None,
63
  load_in_4bit=True,
 
 
 
 
 
 
 
 
64
  )
65
- FastLanguageModel.for_inference(model)
 
66
 
67
- # Download dataset from GitHub
68
  import urllib.request as _ur
69
  _RAW = "https://raw.githubusercontent.com/Jayant-kernel/DECEIT-the-ai-truth-environment-/main/src/deceit_env/data"
70
  for _fname in ["level1.jsonl", "level2.jsonl", "level3.jsonl"]:
@@ -119,7 +122,6 @@ def evaluate_model(model_name, label, n_episodes=30):
119
  if (i+1) % 10 == 0:
120
  print(f" {i+1}/{n_episodes} done, mean reward so far: {sum(rewards)/len(rewards):.3f}")
121
 
122
- # Free model from GPU
123
  del model
124
  torch.cuda.empty_cache()
125
  gc.collect()
@@ -133,85 +135,70 @@ def evaluate_model(model_name, label, n_episodes=30):
133
  "rewards": rewards,
134
  }
135
 
136
- # Evaluate both models — 200 episodes each (env samples randomly with replacement)
137
- base_results = evaluate_model("unsloth/Qwen2.5-0.5B-Instruct", "Base Model (untrained)", n_episodes=200)
138
- trained_results = evaluate_model("Ajsaxena/deceit-qwen-0.5b-full", "DECEIT Trained", n_episodes=30)
139
 
140
- # Print comparison
141
  print("\n" + "="*60)
142
  print("RESULTS COMPARISON")
143
  print("="*60)
144
- for r in [base_results, trained_results]:
145
  print(f"\n{r['label']}:")
146
  print(f" Mean Reward: {r['mean_reward']:+.3f}")
147
  print(f" Accuracy: {r['accuracy']*100:.1f}%")
148
  print(f" Confident Wrong Rate: {r['confident_wrong_rate']*100:.1f}% <- sycophancy proxy")
149
  print(f" Abstain Rate: {r['abstain_rate']*100:.1f}%")
150
 
151
- # Plot 1 — Reward comparison bar chart
152
  fig, axes = plt.subplots(1, 3, figsize=(14, 5))
153
-
154
- models = [base_results["label"], trained_results["label"]]
155
  colors = ["#e74c3c", "#2ecc71"]
156
 
157
- # Bar 1 — Mean reward
158
- axes[0].bar(models, [base_results["mean_reward"], trained_results["mean_reward"]], color=colors)
159
  axes[0].axhline(y=0, color="gray", linestyle="--", alpha=0.5)
160
  axes[0].set_title("Mean Episode Reward")
161
  axes[0].set_ylabel("Reward")
162
 
163
- # Bar 2 — Accuracy
164
- axes[1].bar(models, [base_results["accuracy"]*100, trained_results["accuracy"]*100], color=colors)
165
  axes[1].set_title("Answer Accuracy (%)")
166
  axes[1].set_ylabel("Accuracy %")
167
  axes[1].set_ylim(0, 100)
168
 
169
- # Bar 3 — Confident wrong rate (sycophancy proxy)
170
- axes[2].bar(models, [base_results["confident_wrong_rate"]*100, trained_results["confident_wrong_rate"]*100], color=colors)
171
  axes[2].set_title("Confident Wrong Rate %\n(Sycophancy Proxy - lower is better)")
172
  axes[2].set_ylabel("%")
173
  axes[2].set_ylim(0, 100)
174
 
175
- plt.suptitle("DECEIT: Base Model vs Trained Model\n(Qwen 2.5 0.5B, 200 episodes each)", fontsize=13)
176
  plt.tight_layout()
177
- plt.savefig("comparison_chart.png", dpi=150, bbox_inches="tight")
178
  print("\nSaved comparison_chart.png")
179
 
180
  # Plot 2 — Reward distribution
181
  fig2, ax = plt.subplots(figsize=(10, 5))
182
- ax.hist(base_results["rewards"], bins=15, alpha=0.6, color="#e74c3c", label="Base Model")
183
- ax.hist(trained_results["rewards"], bins=15, alpha=0.6, color="#2ecc71", label="DECEIT Trained")
184
  ax.axvline(x=0, color="gray", linestyle="--", alpha=0.5)
185
  ax.set_xlabel("Episode Reward")
186
  ax.set_ylabel("Count")
187
- ax.set_title("Reward Distribution: Base vs Trained")
188
  ax.legend()
189
  plt.tight_layout()
190
- plt.savefig("reward_distribution.png", dpi=150, bbox_inches="tight")
191
  print("Saved reward_distribution.png")
192
 
193
- print("\nDone! Download comparison_chart.png and reward_distribution.png")
194
-
195
- from huggingface_hub import upload_file
196
- import time
197
-
198
  try:
199
- upload_file(
200
- path_or_fileobj="comparison_chart.png",
201
- path_in_repo="comparison_chart.png",
202
- repo_id="Ajsaxena/deceit-qwen-0.5b-full",
203
- repo_type="model"
204
- )
205
- upload_file(
206
- path_or_fileobj="reward_distribution.png",
207
- path_in_repo="reward_distribution.png",
208
- repo_id="Ajsaxena/deceit-qwen-0.5b-full",
209
- repo_type="model"
210
- )
211
- print("Charts uploaded to HF Hub successfully!")
212
  except Exception as e:
213
  print(f"Upload error: {e}")
214
 
215
- print("Keeping alive for 120 seconds...")
216
- time.sleep(120)
217
  print("Done.")
 
1
+ import os, json, re, torch, pathlib, gc, time
2
  import threading
3
  from http.server import HTTPServer, BaseHTTPRequestHandler
4
 
 
10
  def log_message(self, format, *args):
11
  pass
12
 
13
+ health_thread = threading.Thread(
14
+ target=lambda: HTTPServer(("0.0.0.0", 7860), HealthHandler).serve_forever(),
15
+ daemon=True
16
+ )
 
17
  health_thread.start()
18
  print("Health server started on port 7860")
19
 
20
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
21
  from deceit_env.server.environment import DeceitEnvironment
22
  from deceit_env.server.grader import Grader
23
  from deceit_env.models import DeceitAction
24
  import matplotlib.pyplot as plt
25
  import matplotlib
26
  matplotlib.use('Agg')
27
+ from huggingface_hub import login, upload_file
28
 
 
 
29
  login(token=os.environ["HF_TOKEN"])
30
  os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY", "")
31
  os.environ["DECEIT_GRADER_CACHE"] = "/tmp/eval_cache.json"
 
54
 
55
  def evaluate_model(model_name, label, n_episodes=30):
56
  print(f"\nEvaluating: {label}")
57
+ bnb_config = BitsAndBytesConfig(
 
 
 
58
  load_in_4bit=True,
59
+ bnb_4bit_quant_type="nf4",
60
+ bnb_4bit_compute_dtype=torch.bfloat16,
61
+ )
62
+ model = AutoModelForCausalLM.from_pretrained(
63
+ model_name,
64
+ quantization_config=bnb_config,
65
+ device_map="auto",
66
+ trust_remote_code=True,
67
  )
68
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
69
+ tokenizer.pad_token = tokenizer.eos_token
70
 
 
71
  import urllib.request as _ur
72
  _RAW = "https://raw.githubusercontent.com/Jayant-kernel/DECEIT-the-ai-truth-environment-/main/src/deceit_env/data"
73
  for _fname in ["level1.jsonl", "level2.jsonl", "level3.jsonl"]:
 
122
  if (i+1) % 10 == 0:
123
  print(f" {i+1}/{n_episodes} done, mean reward so far: {sum(rewards)/len(rewards):.3f}")
124
 
 
125
  del model
126
  torch.cuda.empty_cache()
127
  gc.collect()
 
135
  "rewards": rewards,
136
  }
137
 
138
+ results_05b = evaluate_model("Ajsaxena/deceit-qwen-0.5b-full", "DECEIT 0.5B Trained", n_episodes=30)
139
+ results_15b = evaluate_model("Ajsaxena/deceit-qwen-1.5b-full", "DECEIT 1.5B Trained", n_episodes=30)
 
140
 
 
141
  print("\n" + "="*60)
142
  print("RESULTS COMPARISON")
143
  print("="*60)
144
+ for r in [results_05b, results_15b]:
145
  print(f"\n{r['label']}:")
146
  print(f" Mean Reward: {r['mean_reward']:+.3f}")
147
  print(f" Accuracy: {r['accuracy']*100:.1f}%")
148
  print(f" Confident Wrong Rate: {r['confident_wrong_rate']*100:.1f}% <- sycophancy proxy")
149
  print(f" Abstain Rate: {r['abstain_rate']*100:.1f}%")
150
 
151
+ # Plot 1 — Comparison bar chart
152
  fig, axes = plt.subplots(1, 3, figsize=(14, 5))
153
+ models = [results_05b["label"], results_15b["label"]]
 
154
  colors = ["#e74c3c", "#2ecc71"]
155
 
156
+ axes[0].bar(models, [results_05b["mean_reward"], results_15b["mean_reward"]], color=colors)
 
157
  axes[0].axhline(y=0, color="gray", linestyle="--", alpha=0.5)
158
  axes[0].set_title("Mean Episode Reward")
159
  axes[0].set_ylabel("Reward")
160
 
161
+ axes[1].bar(models, [results_05b["accuracy"]*100, results_15b["accuracy"]*100], color=colors)
 
162
  axes[1].set_title("Answer Accuracy (%)")
163
  axes[1].set_ylabel("Accuracy %")
164
  axes[1].set_ylim(0, 100)
165
 
166
+ axes[2].bar(models, [results_05b["confident_wrong_rate"]*100, results_15b["confident_wrong_rate"]*100], color=colors)
 
167
  axes[2].set_title("Confident Wrong Rate %\n(Sycophancy Proxy - lower is better)")
168
  axes[2].set_ylabel("%")
169
  axes[2].set_ylim(0, 100)
170
 
171
+ plt.suptitle("DECEIT: 0.5B vs 1.5B Trained Model Comparison", fontsize=13)
172
  plt.tight_layout()
173
+ plt.savefig("/tmp/comparison_chart.png", dpi=150, bbox_inches="tight")
174
  print("\nSaved comparison_chart.png")
175
 
176
  # Plot 2 — Reward distribution
177
  fig2, ax = plt.subplots(figsize=(10, 5))
178
+ ax.hist(results_05b["rewards"], bins=15, alpha=0.6, color="#e74c3c", label="DECEIT 0.5B Trained")
179
+ ax.hist(results_15b["rewards"], bins=15, alpha=0.6, color="#2ecc71", label="DECEIT 1.5B Trained")
180
  ax.axvline(x=0, color="gray", linestyle="--", alpha=0.5)
181
  ax.set_xlabel("Episode Reward")
182
  ax.set_ylabel("Count")
183
+ ax.set_title("Reward Distribution: 0.5B vs 1.5B Trained")
184
  ax.legend()
185
  plt.tight_layout()
186
+ plt.savefig("/tmp/reward_distribution.png", dpi=150, bbox_inches="tight")
187
  print("Saved reward_distribution.png")
188
 
 
 
 
 
 
189
  try:
190
+ for fname in ["comparison_chart.png", "reward_distribution.png"]:
191
+ upload_file(
192
+ path_or_fileobj=f"/tmp/{fname}",
193
+ path_in_repo=fname,
194
+ repo_id="Ajsaxena/deceit-qwen-1.5b-full",
195
+ repo_type="model"
196
+ )
197
+ print(f"Uploaded {fname} to HF Hub")
198
+ print("All charts uploaded!")
 
 
 
 
199
  except Exception as e:
200
  print(f"Upload error: {e}")
201
 
202
+ print("Keeping alive...")
203
+ time.sleep(3600)
204
  print("Done.")