File size: 4,061 Bytes
76117fc 088adaf 76117fc 088adaf 76117fc d407740 76117fc d407740 76117fc d407740 76117fc d407740 76117fc d407740 76117fc 20bb6de | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 | import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg')
import io, os, threading
from PIL import Image
from http.server import HTTPServer, BaseHTTPRequestHandler
from huggingface_hub import login, upload_file
# Health server
class HealthHandler(BaseHTTPRequestHandler):
def do_GET(self):
self.send_response(200)
self.end_headers()
self.wfile.write(b"Generating charts...")
def log_message(self, format, *args):
pass
health_thread = threading.Thread(
target=lambda: HTTPServer(("0.0.0.0", 7860), HealthHandler).serve_forever(),
daemon=True
)
health_thread.start()
print("Health server started")
# Auth
login(token=os.environ["HF_TOKEN"])
# Data
models = ['Base Model\n(untrained)', 'DECEIT 1.5B Trained']
colors = ['#e74c3c', '#2ecc71']
mean_rewards = [0.137, 0.130]
accuracy = [50.0, 36.7]
confident_wrong = [36.7, 26.7]
abstain_rate = [10.0, 36.7]
# Chart 1 - Comparison bar chart
fig, axes = plt.subplots(1, 4, figsize=(16, 5))
axes[0].bar(models, mean_rewards, color=colors)
axes[0].axhline(y=0, color='gray', linestyle='--', alpha=0.5)
axes[0].set_title('Mean Episode Reward')
axes[0].set_ylabel('Reward')
axes[1].bar(models, accuracy, color=colors)
axes[1].set_title('Accuracy %')
axes[1].set_ylabel('%')
axes[1].set_ylim(0, 100)
axes[2].bar(models, confident_wrong, color=colors)
axes[2].set_title('Confident Wrong %\n(Sycophancy - lower is better)')
axes[2].set_ylabel('%')
axes[2].set_ylim(0, 100)
axes[3].bar(models, abstain_rate, color=colors)
axes[3].set_title('Abstain Rate %\n(Honest Uncertainty - higher is better)')
axes[3].set_ylabel('%')
axes[3].set_ylim(0, 100)
plt.suptitle('DECEIT: Base Model vs Trained Model\n(Qwen 2.5 1.5B, 30 episodes each)', fontsize=13)
plt.tight_layout()
plt.savefig('/tmp/comparison_chart.png', dpi=150, bbox_inches='tight')
plt.close()
print("Saved comparison_chart.png")
# Chart 2 - Sycophancy focus
fig2, ax = plt.subplots(figsize=(8, 6))
x = range(len(models))
bars = ax.bar(x, confident_wrong, color=colors, width=0.5)
ax.set_xticks(x)
ax.set_xticklabels(models)
ax.set_ylabel('Confident Wrong Rate %')
ax.set_title('Sycophancy Reduction\n(Confident Wrong Rate - lower is better)', fontsize=13)
ax.set_ylim(0, 60)
for bar, val in zip(bars, confident_wrong):
ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1,
f'{val}%', ha='center', va='bottom', fontweight='bold', fontsize=14)
ax.annotate('27% reduction\nin sycophancy', xy=(1, 26.7), xytext=(0.5, 45),
arrowprops=dict(arrowstyle='->', color='black'),
fontsize=12, fontweight='bold', color='green')
plt.tight_layout()
plt.savefig('/tmp/sycophancy_chart.png', dpi=150, bbox_inches='tight')
plt.close()
print("Saved sycophancy_chart.png")
# Chart 3 - Abstain rate (honesty)
fig3, ax = plt.subplots(figsize=(8, 6))
bars = ax.bar(x, abstain_rate, color=colors, width=0.5)
ax.set_xticks(x)
ax.set_xticklabels(models)
ax.set_ylabel('Abstain Rate %')
ax.set_title('Honest Uncertainty\n(Abstain Rate - higher means more honest)', fontsize=13)
ax.set_ylim(0, 60)
for bar, val in zip(bars, abstain_rate):
ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1,
f'{val}%', ha='center', va='bottom', fontweight='bold', fontsize=14)
ax.annotate('267% increase\nin honest abstention', xy=(1, 36.7), xytext=(0.3, 50),
arrowprops=dict(arrowstyle='->', color='black'),
fontsize=12, fontweight='bold', color='green')
plt.tight_layout()
plt.savefig('/tmp/honesty_chart.png', dpi=150, bbox_inches='tight')
plt.close()
print("Saved honesty_chart.png")
# Upload all charts to HF Hub
for filename in ['/tmp/comparison_chart.png', '/tmp/sycophancy_chart.png', '/tmp/honesty_chart.png']:
upload_file(
path_or_fileobj=filename,
path_in_repo=filename,
repo_id="Ajsaxena/deceit-qwen-1.5b-full",
repo_type="model"
)
print(f"Uploaded {filename} to HF Hub")
print("All charts uploaded! Check huggingface.co/Ajsaxena/deceit-qwen-0.5b-full")
import time
time.sleep(3600)
|