Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import matplotlib.pyplot as plt | |
| import matplotlib | |
| matplotlib.use('Agg') | |
| import io | |
| from PIL import Image | |
| def show_results(): | |
| models = ['Base Model\n(untrained)', 'DECEIT Trained'] | |
| colors = ['#e74c3c', '#2ecc71'] | |
| fig, axes = plt.subplots(1, 4, figsize=(16, 5)) | |
| axes[0].bar(models, [0.137, 0.130], color=colors) | |
| axes[0].axhline(y=0, color='gray', linestyle='--', alpha=0.5) | |
| axes[0].set_title('Mean Episode Reward') | |
| axes[0].set_ylabel('Reward') | |
| axes[1].bar(models, [50.0, 36.7], color=colors) | |
| axes[1].set_title('Accuracy %') | |
| axes[1].set_ylabel('%') | |
| axes[1].set_ylim(0, 100) | |
| axes[2].bar(models, [36.7, 26.7], color=colors) | |
| axes[2].set_title('Confident Wrong %\n(Sycophancy - lower is better)') | |
| axes[2].set_ylabel('%') | |
| axes[2].set_ylim(0, 100) | |
| axes[3].bar(models, [10.0, 36.7], color=colors) | |
| axes[3].set_title('Abstain Rate %\n(Honest Uncertainty - higher is better)') | |
| axes[3].set_ylabel('%') | |
| axes[3].set_ylim(0, 100) | |
| plt.suptitle('DECEIT: Base Model vs Trained Model\n(Qwen 2.5 0.5B, 30 episodes each)', fontsize=13) | |
| plt.tight_layout() | |
| buf = io.BytesIO() | |
| plt.savefig(buf, format='png', dpi=150, bbox_inches='tight') | |
| buf.seek(0) | |
| plt.close() | |
| return Image.open(buf) | |
| with gr.Blocks(title="DECEIT Results") as demo: | |
| gr.Markdown("# DECEIT β Evaluation Results") | |
| gr.Markdown("## Comparing Base Qwen 2.5 0.5B vs DECEIT-Trained Model") | |
| gr.Markdown(""" | |
| ### Key Finding: Sycophancy reduced by 27% | |
| The trained model learns to say 'I don't know' instead of confidently hallucinating. | |
| """) | |
| with gr.Row(): | |
| gr.Markdown(""" | |
| | Metric | Base Model | DECEIT Trained | Change | | |
| |--------|-----------|----------------|--------| | |
| | Mean Reward | +0.137 | +0.130 | similar | | |
| | Accuracy | 50.0% | 36.7% | β abstains more | | |
| | **Confident Wrong (Sycophancy)** | **36.7%** | **26.7%** | **β 27% reduction** | | |
| | **Abstain Rate (Honest Uncertainty)** | **10.0%** | **36.7%** | **β 267% increase** | | |
| """) | |
| chart = gr.Image(label="Comparison Chart") | |
| btn = gr.Button("Regenerate Chart", variant="primary") | |
| btn.click(show_results, outputs=chart) | |
| demo.load(show_results, outputs=chart) | |
| gr.Markdown(""" | |
| ### What the results mean | |
| - **Confident Wrong Rate dropped 27%** β the model is less sycophantic | |
| - **Abstain Rate increased 267%** β the model learned honest uncertainty | |
| - **Accuracy appears lower** because abstaining on hard questions is correct behavior | |
| ### Links | |
| - [HF Space (live env)](https://huggingface.co/spaces/Ajsaxena/DECEIT) | |
| - [Trained Model](https://huggingface.co/Ajsaxena/deceit-qwen-0.5b-full) | |
| - [GitHub](https://github.com/Jayant-kernel/DECEIT-the-ai-truth-environment-) | |
| """) | |
| demo.launch() | |