import matplotlib.pyplot as plt import matplotlib matplotlib.use('Agg') import io, os, threading from PIL import Image from http.server import HTTPServer, BaseHTTPRequestHandler from huggingface_hub import login, upload_file # Health server class HealthHandler(BaseHTTPRequestHandler): def do_GET(self): self.send_response(200) self.end_headers() self.wfile.write(b"Generating charts...") def log_message(self, format, *args): pass health_thread = threading.Thread( target=lambda: HTTPServer(("0.0.0.0", 7860), HealthHandler).serve_forever(), daemon=True ) health_thread.start() print("Health server started") # Auth login(token=os.environ["HF_TOKEN"]) # Data models = ['Base Model\n(untrained)', 'DECEIT 1.5B Trained'] colors = ['#e74c3c', '#2ecc71'] mean_rewards = [0.137, 0.130] accuracy = [50.0, 36.7] confident_wrong = [36.7, 26.7] abstain_rate = [10.0, 36.7] # Chart 1 - Comparison bar chart fig, axes = plt.subplots(1, 4, figsize=(16, 5)) axes[0].bar(models, mean_rewards, color=colors) axes[0].axhline(y=0, color='gray', linestyle='--', alpha=0.5) axes[0].set_title('Mean Episode Reward') axes[0].set_ylabel('Reward') axes[1].bar(models, accuracy, color=colors) axes[1].set_title('Accuracy %') axes[1].set_ylabel('%') axes[1].set_ylim(0, 100) axes[2].bar(models, confident_wrong, color=colors) axes[2].set_title('Confident Wrong %\n(Sycophancy - lower is better)') axes[2].set_ylabel('%') axes[2].set_ylim(0, 100) axes[3].bar(models, abstain_rate, color=colors) axes[3].set_title('Abstain Rate %\n(Honest Uncertainty - higher is better)') axes[3].set_ylabel('%') axes[3].set_ylim(0, 100) plt.suptitle('DECEIT: Base Model vs Trained Model\n(Qwen 2.5 1.5B, 30 episodes each)', fontsize=13) plt.tight_layout() plt.savefig('/tmp/comparison_chart.png', dpi=150, bbox_inches='tight') plt.close() print("Saved comparison_chart.png") # Chart 2 - Sycophancy focus fig2, ax = plt.subplots(figsize=(8, 6)) x = range(len(models)) bars = ax.bar(x, confident_wrong, color=colors, width=0.5) ax.set_xticks(x) ax.set_xticklabels(models) ax.set_ylabel('Confident Wrong Rate %') ax.set_title('Sycophancy Reduction\n(Confident Wrong Rate - lower is better)', fontsize=13) ax.set_ylim(0, 60) for bar, val in zip(bars, confident_wrong): ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1, f'{val}%', ha='center', va='bottom', fontweight='bold', fontsize=14) ax.annotate('27% reduction\nin sycophancy', xy=(1, 26.7), xytext=(0.5, 45), arrowprops=dict(arrowstyle='->', color='black'), fontsize=12, fontweight='bold', color='green') plt.tight_layout() plt.savefig('/tmp/sycophancy_chart.png', dpi=150, bbox_inches='tight') plt.close() print("Saved sycophancy_chart.png") # Chart 3 - Abstain rate (honesty) fig3, ax = plt.subplots(figsize=(8, 6)) bars = ax.bar(x, abstain_rate, color=colors, width=0.5) ax.set_xticks(x) ax.set_xticklabels(models) ax.set_ylabel('Abstain Rate %') ax.set_title('Honest Uncertainty\n(Abstain Rate - higher means more honest)', fontsize=13) ax.set_ylim(0, 60) for bar, val in zip(bars, abstain_rate): ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1, f'{val}%', ha='center', va='bottom', fontweight='bold', fontsize=14) ax.annotate('267% increase\nin honest abstention', xy=(1, 36.7), xytext=(0.3, 50), arrowprops=dict(arrowstyle='->', color='black'), fontsize=12, fontweight='bold', color='green') plt.tight_layout() plt.savefig('/tmp/honesty_chart.png', dpi=150, bbox_inches='tight') plt.close() print("Saved honesty_chart.png") # Upload all charts to HF Hub for filename in ['/tmp/comparison_chart.png', '/tmp/sycophancy_chart.png', '/tmp/honesty_chart.png']: upload_file( path_or_fileobj=filename, path_in_repo=filename, repo_id="Ajsaxena/deceit-qwen-1.5b-full", repo_type="model" ) print(f"Uploaded {filename} to HF Hub") print("All charts uploaded! Check huggingface.co/Ajsaxena/deceit-qwen-0.5b-full") import time time.sleep(3600)