| import matplotlib.pyplot as plt |
| import matplotlib |
| matplotlib.use('Agg') |
| import io, os, threading |
| from PIL import Image |
| from http.server import HTTPServer, BaseHTTPRequestHandler |
| from huggingface_hub import login, upload_file |
|
|
| |
| class HealthHandler(BaseHTTPRequestHandler): |
| def do_GET(self): |
| self.send_response(200) |
| self.end_headers() |
| self.wfile.write(b"Generating charts...") |
| def log_message(self, format, *args): |
| pass |
|
|
| health_thread = threading.Thread( |
| target=lambda: HTTPServer(("0.0.0.0", 7860), HealthHandler).serve_forever(), |
| daemon=True |
| ) |
| health_thread.start() |
| print("Health server started") |
|
|
| |
| login(token=os.environ["HF_TOKEN"]) |
|
|
| |
| models = ['Base Model\n(untrained)', 'DECEIT 1.5B Trained'] |
| colors = ['#e74c3c', '#2ecc71'] |
| mean_rewards = [0.137, 0.130] |
| accuracy = [50.0, 36.7] |
| confident_wrong = [36.7, 26.7] |
| abstain_rate = [10.0, 36.7] |
|
|
| |
| fig, axes = plt.subplots(1, 4, figsize=(16, 5)) |
|
|
| axes[0].bar(models, mean_rewards, color=colors) |
| axes[0].axhline(y=0, color='gray', linestyle='--', alpha=0.5) |
| axes[0].set_title('Mean Episode Reward') |
| axes[0].set_ylabel('Reward') |
|
|
| axes[1].bar(models, accuracy, color=colors) |
| axes[1].set_title('Accuracy %') |
| axes[1].set_ylabel('%') |
| axes[1].set_ylim(0, 100) |
|
|
| axes[2].bar(models, confident_wrong, color=colors) |
| axes[2].set_title('Confident Wrong %\n(Sycophancy - lower is better)') |
| axes[2].set_ylabel('%') |
| axes[2].set_ylim(0, 100) |
|
|
| axes[3].bar(models, abstain_rate, color=colors) |
| axes[3].set_title('Abstain Rate %\n(Honest Uncertainty - higher is better)') |
| axes[3].set_ylabel('%') |
| axes[3].set_ylim(0, 100) |
|
|
| plt.suptitle('DECEIT: Base Model vs Trained Model\n(Qwen 2.5 1.5B, 30 episodes each)', fontsize=13) |
| plt.tight_layout() |
| plt.savefig('/tmp/comparison_chart.png', dpi=150, bbox_inches='tight') |
| plt.close() |
| print("Saved comparison_chart.png") |
|
|
| |
| fig2, ax = plt.subplots(figsize=(8, 6)) |
| x = range(len(models)) |
| bars = ax.bar(x, confident_wrong, color=colors, width=0.5) |
| ax.set_xticks(x) |
| ax.set_xticklabels(models) |
| ax.set_ylabel('Confident Wrong Rate %') |
| ax.set_title('Sycophancy Reduction\n(Confident Wrong Rate - lower is better)', fontsize=13) |
| ax.set_ylim(0, 60) |
| for bar, val in zip(bars, confident_wrong): |
| ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1, |
| f'{val}%', ha='center', va='bottom', fontweight='bold', fontsize=14) |
| ax.annotate('27% reduction\nin sycophancy', xy=(1, 26.7), xytext=(0.5, 45), |
| arrowprops=dict(arrowstyle='->', color='black'), |
| fontsize=12, fontweight='bold', color='green') |
| plt.tight_layout() |
| plt.savefig('/tmp/sycophancy_chart.png', dpi=150, bbox_inches='tight') |
| plt.close() |
| print("Saved sycophancy_chart.png") |
|
|
| |
| fig3, ax = plt.subplots(figsize=(8, 6)) |
| bars = ax.bar(x, abstain_rate, color=colors, width=0.5) |
| ax.set_xticks(x) |
| ax.set_xticklabels(models) |
| ax.set_ylabel('Abstain Rate %') |
| ax.set_title('Honest Uncertainty\n(Abstain Rate - higher means more honest)', fontsize=13) |
| ax.set_ylim(0, 60) |
| for bar, val in zip(bars, abstain_rate): |
| ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1, |
| f'{val}%', ha='center', va='bottom', fontweight='bold', fontsize=14) |
| ax.annotate('267% increase\nin honest abstention', xy=(1, 36.7), xytext=(0.3, 50), |
| arrowprops=dict(arrowstyle='->', color='black'), |
| fontsize=12, fontweight='bold', color='green') |
| plt.tight_layout() |
| plt.savefig('/tmp/honesty_chart.png', dpi=150, bbox_inches='tight') |
| plt.close() |
| print("Saved honesty_chart.png") |
|
|
| |
| for filename in ['/tmp/comparison_chart.png', '/tmp/sycophancy_chart.png', '/tmp/honesty_chart.png']: |
| upload_file( |
| path_or_fileobj=filename, |
| path_in_repo=filename, |
| repo_id="Ajsaxena/deceit-qwen-1.5b-full", |
| repo_type="model" |
| ) |
| print(f"Uploaded {filename} to HF Hub") |
|
|
| print("All charts uploaded! Check huggingface.co/Ajsaxena/deceit-qwen-0.5b-full") |
|
|
| import time |
| time.sleep(3600) |
|
|