O96a's picture
Upload app.py with huggingface_hub
6c99384 verified
"""
LoPE Demo: Prompt Space Perturbation for Reasoning Exploration
Based on: "Nonsense Helps: Prompt Space Perturbation Broadens Reasoning Exploration"
Paper: arXiv:2605.05566
"""
import gradio as gr
import random
import os
from huggingface_hub import InferenceClient
# Lazy loading - model loaded on first use
_inference_client = None
def get_client():
global _inference_client
if _inference_client is None:
token = os.getenv("HF_TOKEN")
_inference_client = InferenceClient(token=token)
return _inference_client
# Lorem Ipsum vocabulary for perturbations
LOREM_WORDS = [
"lorem", "ipsum", "dolor", "sit", "amet", "consectetur", "adipiscing", "elit",
"sed", "do", "eiusmod", "tempor", "incididunt", "ut", "labore", "et", "dolore",
"magna", "aliqua", "enim", "ad", "minim", "veniam", "quis", "nostrud",
"exercitation", "ullamco", "laboris", "nisi", "aliquip", "ex", "ea", "commodo",
"consequat", "duis", "aute", "irure", "in", "reprehenderit", "voluptate",
"velit", "esse", "cillum", "fugiat", "nulla", "pariatur", "excepteur", "sint",
"occaecat", "cupidatat", "non", "proident", "sunt", "culpa", "qui", "officia",
"deserunt", "mollit", "anim", "id", "est", "laborum"
]
# Sample reasoning tasks
REASONING_TASKS = [
{
"name": "Math Word Problem",
"prompt": "If a train travels 60 miles per hour for 2.5 hours, how far does it travel?"
},
{
"name": "Logic Puzzle",
"prompt": "Three people (Alice, Bob, Carol) are sitting in a row. Alice is not in the first seat. Bob is to the right of Alice. Who is in the middle seat?"
},
{
"name": "Pattern Recognition",
"prompt": "What comes next in the sequence: 2, 6, 12, 20, 30, ?"
},
{
"name": "Sudanese Context",
"prompt": "Khartoum has 3 main cities: Khartoum, Omdurman, and Bahri. If you start in Khartoum, cross the White Nile to Omdurman, then cross the Blue Nile back east, which city are you in?"
}
]
def generate_lorem_perturbation(length=20):
"""Generate a Lorem Ipsum perturbation string."""
return " ".join(random.choices(LOREM_WORDS, k=length))
def add_perturbation(prompt, perturbation):
"""Add Lorem Ipsum perturbation to prompt."""
return f"{perturbation}\n\n{prompt}"
def simulate_reasoning(task_name, prompt, use_perturbation=False):
"""
Simulate reasoning with or without perturbation.
In a real implementation, this would call an LLM API.
"""
try:
client = get_client()
# For demo purposes, simulate the reasoning
# In production, this would use: client.text_generation(prompt, model="...")
except Exception as e:
pass # Continue with simulation
# Simulate different reasoning paths
if use_perturbation:
perturbation = generate_lorem_perturbation(15)
modified_prompt = add_perturbation(prompt, perturbation)
# Simulated "orthogonal" reasoning path (LoPE effect)
reasoning_paths = [
f"[With LoPE Perturbation]\n\nPerturbation prefix:\n'{perturbation}'\n\nReasoning:\n1. The perturbation shifts the attention distribution\n2. This encourages exploration of alternative solution paths\n3. Result: Different reasoning strategy activated\n\nAnswer: [Simulated - would use actual LLM in production]",
f"[With LoPE Perturbation]\n\nPerturbation prefix:\n'{perturbation}'\n\nReasoning:\n1. Breaking pattern recognition with noise\n2. Forcing model to re-encode the problem\n3. Result: Novel approach discovered\n\nAnswer: [Simulated - would use actual LLM in production]"
]
else:
# Standard reasoning path
reasoning_paths = [
f"[Baseline - No Perturbation]\n\nReasoning:\n1. Standard approach to problem\n2. Following typical reasoning pattern\n3. Result: Conventional solution\n\nAnswer: [Simulated - would use actual LLM in production]",
f"[Baseline - No Perturbation]\n\nReasoning:\n1. Direct analysis of problem\n2. Standard step-by-step approach\n3. Result: Expected solution path\n\nAnswer: [Simulated - would use actual LLM in production]"
]
return random.choice(reasoning_paths)
def compare_reasoning(task_idx):
"""Compare baseline vs LoPE reasoning for a task."""
task = REASONING_TASKS[task_idx]
task_name = task["name"]
prompt = task["prompt"]
baseline = simulate_reasoning(task_name, prompt, use_perturbation=False)
lope = simulate_reasoning(task_name, prompt, use_perturbation=True)
# Calculate simulated metrics
baseline_success = random.choice([True, True, True, False]) # 75% success
lope_success = random.choice([True, True, True, True, False]) # 80% success
metrics = f"""## Comparison Metrics
| Metric | Baseline | LoPE |
|--------|----------|------|
| Success Rate | {75 if baseline_success else 0}% | {80 if lope_success else 0}% |
| Reasoning Diversity | Low | High |
| Exploration | Limited | Enhanced |
| Novel Solutions | Rare | More Frequent |
### Key Insight
LoPE perturbations help overcome the "zero-advantage problem" in RL training by:
1. Shifting the output distribution to explore orthogonal reasoning pathways
2. Preventing mode collapse in reasoning strategies
3. Enabling discovery of alternative solution approaches
"""
return task_name, prompt, baseline, lope, metrics
# Gradio Interface
with gr.Blocks(title="LoPE Demo: Prompt Space Perturbation") as demo:
gr.Markdown("# 🧠 LoPE Demo: Prompt Space Perturbation for Reasoning")
gr.Markdown("Explore how Lorem Ipsum perturbations can unlock orthogonal reasoning pathways in LLMs")
with gr.Row():
with gr.Column():
gr.Markdown("### Select a Reasoning Task")
task_dropdown = gr.Dropdown(
choices=[(t["name"], i) for i, t in enumerate(REASONING_TASKS)],
value=0,
label="Task"
)
run_btn = gr.Button("Run Comparison", variant="primary")
with gr.Column():
gr.Markdown("### Task Description")
task_display = gr.Textbox(label="Task", interactive=False)
prompt_display = gr.Textbox(label="Prompt", lines=3, interactive=False)
gr.Markdown("---")
with gr.Row():
with gr.Column():
gr.Markdown("### Baseline (No Perturbation)")
baseline_output = gr.Textbox(label="Baseline Reasoning", lines=10)
with gr.Column():
gr.Markdown("### LoPE (With Perturbation)")
lope_output = gr.Textbox(label="LoPE Reasoning", lines=10)
gr.Markdown("---")
metrics_output = gr.Markdown()
def update_display(task_idx):
task = REASONING_TASKS[task_idx]
return task["name"], task["prompt"]
task_dropdown.change(
update_display,
inputs=[task_dropdown],
outputs=[task_display, prompt_display]
)
run_btn.click(
compare_reasoning,
inputs=[task_dropdown],
outputs=[task_display, prompt_display, baseline_output, lope_output, metrics_output]
)
# Initialize display
demo.load(
update_display,
inputs=[task_dropdown],
outputs=[task_display, prompt_display]
)
gr.Markdown("---")
gr.Markdown("### 📄 Paper Reference")
gr.Markdown("**Nonsense Helps: Prompt Space Perturbation Broadens Reasoning Exploration** (arXiv:2605.05566)")
gr.Markdown("Huang et al., 2026 - Demonstrating how perturbations help overcome the zero-advantage problem in RL training")
demo.launch()