Spaces:

Ajsaxena
/

deceit-results

Sleeping

App Files Files Community

Jayant-Kernel commited on 16 days ago

Commit

53e9ac9

unverified ·

0 Parent(s):

initial: DECEIT results Gradio Space

Browse files

Files changed (3) hide show

README.md +12 -0
app.py +78 -0
requirements.txt +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,12 @@

+---
+title: DECEIT Results
+emoji: 📊
+colorFrom: red
+colorTo: green
+sdk: gradio
+sdk_version: 4.0.0
+app_file: app.py
+pinned: false
+---
+# DECEIT Evaluation Results
+Interactive charts showing base vs trained model comparison.

app.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import gradio as gr
+import matplotlib.pyplot as plt
+import matplotlib
+matplotlib.use('Agg')
+import io
+from PIL import Image
+def show_results():
+    models = ['Base Model\n(untrained)', 'DECEIT Trained']
+    colors = ['#e74c3c', '#2ecc71']
+    fig, axes = plt.subplots(1, 4, figsize=(16, 5))
+    axes[0].bar(models, [0.137, 0.130], color=colors)
+    axes[0].axhline(y=0, color='gray', linestyle='--', alpha=0.5)
+    axes[0].set_title('Mean Episode Reward')
+    axes[0].set_ylabel('Reward')
+    axes[1].bar(models, [50.0, 36.7], color=colors)
+    axes[1].set_title('Accuracy %')
+    axes[1].set_ylabel('%')
+    axes[1].set_ylim(0, 100)
+    axes[2].bar(models, [36.7, 26.7], color=colors)
+    axes[2].set_title('Confident Wrong %\n(Sycophancy - lower is better)')
+    axes[2].set_ylabel('%')
+    axes[2].set_ylim(0, 100)
+    axes[3].bar(models, [10.0, 36.7], color=colors)
+    axes[3].set_title('Abstain Rate %\n(Honest Uncertainty - higher is better)')
+    axes[3].set_ylabel('%')
+    axes[3].set_ylim(0, 100)
+    plt.suptitle('DECEIT: Base Model vs Trained Model\n(Qwen 2.5 0.5B, 30 episodes each)', fontsize=13)
+    plt.tight_layout()
+    buf = io.BytesIO()
+    plt.savefig(buf, format='png', dpi=150, bbox_inches='tight')
+    buf.seek(0)
+    plt.close()
+    return Image.open(buf)
+with gr.Blocks(title="DECEIT Results") as demo:
+    gr.Markdown("# DECEIT — Evaluation Results")
+    gr.Markdown("## Comparing Base Qwen 2.5 0.5B vs DECEIT-Trained Model")
+    gr.Markdown("""
+    ### Key Finding: Sycophancy reduced by 27%
+    The trained model learns to say 'I don't know' instead of confidently hallucinating.
+    """)
+    with gr.Row():
+        gr.Markdown("""
+        | Metric | Base Model | DECEIT Trained | Change |
+        |--------|-----------|----------------|--------|
+        | Mean Reward | +0.137 | +0.130 | similar |
+        | Accuracy | 50.0% | 36.7% | ↓ abstains more |
+        | **Confident Wrong (Sycophancy)** | **36.7%** | **26.7%** | **↓ 27% reduction** |
+        | **Abstain Rate (Honest Uncertainty)** | **10.0%** | **36.7%** | **↑ 267% increase** |
+        """)
+    chart = gr.Image(label="Comparison Chart")
+    btn = gr.Button("Regenerate Chart", variant="primary")
+    btn.click(show_results, outputs=chart)
+    demo.load(show_results, outputs=chart)
+    gr.Markdown("""
+    ### What the results mean
+    - **Confident Wrong Rate dropped 27%** — the model is less sycophantic
+    - **Abstain Rate increased 267%** — the model learned honest uncertainty
+    - **Accuracy appears lower** because abstaining on hard questions is correct behavior
+    ### Links
+    - [HF Space (live env)](https://huggingface.co/spaces/Ajsaxena/DECEIT)
+    - [Trained Model](https://huggingface.co/Ajsaxena/deceit-qwen-0.5b-full)
+    - [GitHub](https://github.com/Jayant-kernel/DECEIT-the-ai-truth-environment-)
+    """)
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio
+matplotlib
+Pillow