Jayant-Kernel commited on
Commit
b84ec51
·
unverified ·
1 Parent(s): 32b9179

update: evaluate retrained model, upload charts to HF Hub

Browse files
Files changed (1) hide show
  1. evaluate.py +44 -1
evaluate.py CHANGED
@@ -1,4 +1,23 @@
1
  import os, json, re, torch, pathlib, gc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from unsloth import FastLanguageModel
3
  from deceit_env.server.environment import DeceitEnvironment
4
  from deceit_env.server.grader import Grader
@@ -116,7 +135,7 @@ def evaluate_model(model_name, label, n_episodes=30):
116
 
117
  # Evaluate both models — 200 episodes each (env samples randomly with replacement)
118
  base_results = evaluate_model("unsloth/Qwen2.5-0.5B-Instruct", "Base Model (untrained)", n_episodes=200)
119
- trained_results = evaluate_model("Ajsaxena/deceit-qwen-0.5b-full", "DECEIT Trained", n_episodes=200)
120
 
121
  # Print comparison
122
  print("\n" + "="*60)
@@ -172,3 +191,27 @@ plt.savefig("reward_distribution.png", dpi=150, bbox_inches="tight")
172
  print("Saved reward_distribution.png")
173
 
174
  print("\nDone! Download comparison_chart.png and reward_distribution.png")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os, json, re, torch, pathlib, gc
2
+ import threading
3
+ from http.server import HTTPServer, BaseHTTPRequestHandler
4
+
5
+ class HealthHandler(BaseHTTPRequestHandler):
6
+ def do_GET(self):
7
+ self.send_response(200)
8
+ self.end_headers()
9
+ self.wfile.write(b"Evaluation in progress...")
10
+ def log_message(self, format, *args):
11
+ pass
12
+
13
+ def start_health_server():
14
+ server = HTTPServer(("0.0.0.0", 7860), HealthHandler)
15
+ server.serve_forever()
16
+
17
+ health_thread = threading.Thread(target=start_health_server, daemon=True)
18
+ health_thread.start()
19
+ print("Health server started on port 7860")
20
+
21
  from unsloth import FastLanguageModel
22
  from deceit_env.server.environment import DeceitEnvironment
23
  from deceit_env.server.grader import Grader
 
135
 
136
  # Evaluate both models — 200 episodes each (env samples randomly with replacement)
137
  base_results = evaluate_model("unsloth/Qwen2.5-0.5B-Instruct", "Base Model (untrained)", n_episodes=200)
138
+ trained_results = evaluate_model("Ajsaxena/deceit-qwen-0.5b-full", "DECEIT Trained", n_episodes=30)
139
 
140
  # Print comparison
141
  print("\n" + "="*60)
 
191
  print("Saved reward_distribution.png")
192
 
193
  print("\nDone! Download comparison_chart.png and reward_distribution.png")
194
+
195
+ from huggingface_hub import upload_file
196
+ import time
197
+
198
+ try:
199
+ upload_file(
200
+ path_or_fileobj="comparison_chart.png",
201
+ path_in_repo="comparison_chart.png",
202
+ repo_id="Ajsaxena/deceit-qwen-0.5b-full",
203
+ repo_type="model"
204
+ )
205
+ upload_file(
206
+ path_or_fileobj="reward_distribution.png",
207
+ path_in_repo="reward_distribution.png",
208
+ repo_id="Ajsaxena/deceit-qwen-0.5b-full",
209
+ repo_type="model"
210
+ )
211
+ print("Charts uploaded to HF Hub successfully!")
212
+ except Exception as e:
213
+ print(f"Upload error: {e}")
214
+
215
+ print("Keeping alive for 120 seconds...")
216
+ time.sleep(120)
217
+ print("Done.")