""" LoPE Demo: Prompt Space Perturbation for Reasoning Exploration Based on: "Nonsense Helps: Prompt Space Perturbation Broadens Reasoning Exploration" Paper: arXiv:2605.05566 """ import gradio as gr import random import os from huggingface_hub import InferenceClient # Lazy loading - model loaded on first use _inference_client = None def get_client(): global _inference_client if _inference_client is None: token = os.getenv("HF_TOKEN") _inference_client = InferenceClient(token=token) return _inference_client # Lorem Ipsum vocabulary for perturbations LOREM_WORDS = [ "lorem", "ipsum", "dolor", "sit", "amet", "consectetur", "adipiscing", "elit", "sed", "do", "eiusmod", "tempor", "incididunt", "ut", "labore", "et", "dolore", "magna", "aliqua", "enim", "ad", "minim", "veniam", "quis", "nostrud", "exercitation", "ullamco", "laboris", "nisi", "aliquip", "ex", "ea", "commodo", "consequat", "duis", "aute", "irure", "in", "reprehenderit", "voluptate", "velit", "esse", "cillum", "fugiat", "nulla", "pariatur", "excepteur", "sint", "occaecat", "cupidatat", "non", "proident", "sunt", "culpa", "qui", "officia", "deserunt", "mollit", "anim", "id", "est", "laborum" ] # Sample reasoning tasks REASONING_TASKS = [ { "name": "Math Word Problem", "prompt": "If a train travels 60 miles per hour for 2.5 hours, how far does it travel?" }, { "name": "Logic Puzzle", "prompt": "Three people (Alice, Bob, Carol) are sitting in a row. Alice is not in the first seat. Bob is to the right of Alice. Who is in the middle seat?" }, { "name": "Pattern Recognition", "prompt": "What comes next in the sequence: 2, 6, 12, 20, 30, ?" }, { "name": "Sudanese Context", "prompt": "Khartoum has 3 main cities: Khartoum, Omdurman, and Bahri. If you start in Khartoum, cross the White Nile to Omdurman, then cross the Blue Nile back east, which city are you in?" } ] def generate_lorem_perturbation(length=20): """Generate a Lorem Ipsum perturbation string.""" return " ".join(random.choices(LOREM_WORDS, k=length)) def add_perturbation(prompt, perturbation): """Add Lorem Ipsum perturbation to prompt.""" return f"{perturbation}\n\n{prompt}" def simulate_reasoning(task_name, prompt, use_perturbation=False): """ Simulate reasoning with or without perturbation. In a real implementation, this would call an LLM API. """ try: client = get_client() # For demo purposes, simulate the reasoning # In production, this would use: client.text_generation(prompt, model="...") except Exception as e: pass # Continue with simulation # Simulate different reasoning paths if use_perturbation: perturbation = generate_lorem_perturbation(15) modified_prompt = add_perturbation(prompt, perturbation) # Simulated "orthogonal" reasoning path (LoPE effect) reasoning_paths = [ f"[With LoPE Perturbation]\n\nPerturbation prefix:\n'{perturbation}'\n\nReasoning:\n1. The perturbation shifts the attention distribution\n2. This encourages exploration of alternative solution paths\n3. Result: Different reasoning strategy activated\n\nAnswer: [Simulated - would use actual LLM in production]", f"[With LoPE Perturbation]\n\nPerturbation prefix:\n'{perturbation}'\n\nReasoning:\n1. Breaking pattern recognition with noise\n2. Forcing model to re-encode the problem\n3. Result: Novel approach discovered\n\nAnswer: [Simulated - would use actual LLM in production]" ] else: # Standard reasoning path reasoning_paths = [ f"[Baseline - No Perturbation]\n\nReasoning:\n1. Standard approach to problem\n2. Following typical reasoning pattern\n3. Result: Conventional solution\n\nAnswer: [Simulated - would use actual LLM in production]", f"[Baseline - No Perturbation]\n\nReasoning:\n1. Direct analysis of problem\n2. Standard step-by-step approach\n3. Result: Expected solution path\n\nAnswer: [Simulated - would use actual LLM in production]" ] return random.choice(reasoning_paths) def compare_reasoning(task_idx): """Compare baseline vs LoPE reasoning for a task.""" task = REASONING_TASKS[task_idx] task_name = task["name"] prompt = task["prompt"] baseline = simulate_reasoning(task_name, prompt, use_perturbation=False) lope = simulate_reasoning(task_name, prompt, use_perturbation=True) # Calculate simulated metrics baseline_success = random.choice([True, True, True, False]) # 75% success lope_success = random.choice([True, True, True, True, False]) # 80% success metrics = f"""## Comparison Metrics | Metric | Baseline | LoPE | |--------|----------|------| | Success Rate | {75 if baseline_success else 0}% | {80 if lope_success else 0}% | | Reasoning Diversity | Low | High | | Exploration | Limited | Enhanced | | Novel Solutions | Rare | More Frequent | ### Key Insight LoPE perturbations help overcome the "zero-advantage problem" in RL training by: 1. Shifting the output distribution to explore orthogonal reasoning pathways 2. Preventing mode collapse in reasoning strategies 3. Enabling discovery of alternative solution approaches """ return task_name, prompt, baseline, lope, metrics # Gradio Interface with gr.Blocks(title="LoPE Demo: Prompt Space Perturbation") as demo: gr.Markdown("# 🧠 LoPE Demo: Prompt Space Perturbation for Reasoning") gr.Markdown("Explore how Lorem Ipsum perturbations can unlock orthogonal reasoning pathways in LLMs") with gr.Row(): with gr.Column(): gr.Markdown("### Select a Reasoning Task") task_dropdown = gr.Dropdown( choices=[(t["name"], i) for i, t in enumerate(REASONING_TASKS)], value=0, label="Task" ) run_btn = gr.Button("Run Comparison", variant="primary") with gr.Column(): gr.Markdown("### Task Description") task_display = gr.Textbox(label="Task", interactive=False) prompt_display = gr.Textbox(label="Prompt", lines=3, interactive=False) gr.Markdown("---") with gr.Row(): with gr.Column(): gr.Markdown("### Baseline (No Perturbation)") baseline_output = gr.Textbox(label="Baseline Reasoning", lines=10) with gr.Column(): gr.Markdown("### LoPE (With Perturbation)") lope_output = gr.Textbox(label="LoPE Reasoning", lines=10) gr.Markdown("---") metrics_output = gr.Markdown() def update_display(task_idx): task = REASONING_TASKS[task_idx] return task["name"], task["prompt"] task_dropdown.change( update_display, inputs=[task_dropdown], outputs=[task_display, prompt_display] ) run_btn.click( compare_reasoning, inputs=[task_dropdown], outputs=[task_display, prompt_display, baseline_output, lope_output, metrics_output] ) # Initialize display demo.load( update_display, inputs=[task_dropdown], outputs=[task_display, prompt_display] ) gr.Markdown("---") gr.Markdown("### 📄 Paper Reference") gr.Markdown("**Nonsense Helps: Prompt Space Perturbation Broadens Reasoning Exploration** (arXiv:2605.05566)") gr.Markdown("Huang et al., 2026 - Demonstrating how perturbations help overcome the zero-advantage problem in RL training") demo.launch()