deceit1 / generate_charts.py
Jayant-Kernel
fix: sleep 3600 to keep container alive after chart upload
20bb6de
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg')
import io, os, threading
from PIL import Image
from http.server import HTTPServer, BaseHTTPRequestHandler
from huggingface_hub import login, upload_file
# Health server
class HealthHandler(BaseHTTPRequestHandler):
def do_GET(self):
self.send_response(200)
self.end_headers()
self.wfile.write(b"Generating charts...")
def log_message(self, format, *args):
pass
health_thread = threading.Thread(
target=lambda: HTTPServer(("0.0.0.0", 7860), HealthHandler).serve_forever(),
daemon=True
)
health_thread.start()
print("Health server started")
# Auth
login(token=os.environ["HF_TOKEN"])
# Data
models = ['Base Model\n(untrained)', 'DECEIT 1.5B Trained']
colors = ['#e74c3c', '#2ecc71']
mean_rewards = [0.137, 0.130]
accuracy = [50.0, 36.7]
confident_wrong = [36.7, 26.7]
abstain_rate = [10.0, 36.7]
# Chart 1 - Comparison bar chart
fig, axes = plt.subplots(1, 4, figsize=(16, 5))
axes[0].bar(models, mean_rewards, color=colors)
axes[0].axhline(y=0, color='gray', linestyle='--', alpha=0.5)
axes[0].set_title('Mean Episode Reward')
axes[0].set_ylabel('Reward')
axes[1].bar(models, accuracy, color=colors)
axes[1].set_title('Accuracy %')
axes[1].set_ylabel('%')
axes[1].set_ylim(0, 100)
axes[2].bar(models, confident_wrong, color=colors)
axes[2].set_title('Confident Wrong %\n(Sycophancy - lower is better)')
axes[2].set_ylabel('%')
axes[2].set_ylim(0, 100)
axes[3].bar(models, abstain_rate, color=colors)
axes[3].set_title('Abstain Rate %\n(Honest Uncertainty - higher is better)')
axes[3].set_ylabel('%')
axes[3].set_ylim(0, 100)
plt.suptitle('DECEIT: Base Model vs Trained Model\n(Qwen 2.5 1.5B, 30 episodes each)', fontsize=13)
plt.tight_layout()
plt.savefig('/tmp/comparison_chart.png', dpi=150, bbox_inches='tight')
plt.close()
print("Saved comparison_chart.png")
# Chart 2 - Sycophancy focus
fig2, ax = plt.subplots(figsize=(8, 6))
x = range(len(models))
bars = ax.bar(x, confident_wrong, color=colors, width=0.5)
ax.set_xticks(x)
ax.set_xticklabels(models)
ax.set_ylabel('Confident Wrong Rate %')
ax.set_title('Sycophancy Reduction\n(Confident Wrong Rate - lower is better)', fontsize=13)
ax.set_ylim(0, 60)
for bar, val in zip(bars, confident_wrong):
ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1,
f'{val}%', ha='center', va='bottom', fontweight='bold', fontsize=14)
ax.annotate('27% reduction\nin sycophancy', xy=(1, 26.7), xytext=(0.5, 45),
arrowprops=dict(arrowstyle='->', color='black'),
fontsize=12, fontweight='bold', color='green')
plt.tight_layout()
plt.savefig('/tmp/sycophancy_chart.png', dpi=150, bbox_inches='tight')
plt.close()
print("Saved sycophancy_chart.png")
# Chart 3 - Abstain rate (honesty)
fig3, ax = plt.subplots(figsize=(8, 6))
bars = ax.bar(x, abstain_rate, color=colors, width=0.5)
ax.set_xticks(x)
ax.set_xticklabels(models)
ax.set_ylabel('Abstain Rate %')
ax.set_title('Honest Uncertainty\n(Abstain Rate - higher means more honest)', fontsize=13)
ax.set_ylim(0, 60)
for bar, val in zip(bars, abstain_rate):
ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1,
f'{val}%', ha='center', va='bottom', fontweight='bold', fontsize=14)
ax.annotate('267% increase\nin honest abstention', xy=(1, 36.7), xytext=(0.3, 50),
arrowprops=dict(arrowstyle='->', color='black'),
fontsize=12, fontweight='bold', color='green')
plt.tight_layout()
plt.savefig('/tmp/honesty_chart.png', dpi=150, bbox_inches='tight')
plt.close()
print("Saved honesty_chart.png")
# Upload all charts to HF Hub
for filename in ['/tmp/comparison_chart.png', '/tmp/sycophancy_chart.png', '/tmp/honesty_chart.png']:
upload_file(
path_or_fileobj=filename,
path_in_repo=filename,
repo_id="Ajsaxena/deceit-qwen-1.5b-full",
repo_type="model"
)
print(f"Uploaded {filename} to HF Hub")
print("All charts uploaded! Check huggingface.co/Ajsaxena/deceit-qwen-0.5b-full")
import time
time.sleep(3600)