Spaces:
Running
Running
| """ | |
| LoPE Demo: Prompt Space Perturbation for Reasoning Exploration | |
| Based on: "Nonsense Helps: Prompt Space Perturbation Broadens Reasoning Exploration" | |
| Paper: arXiv:2605.05566 | |
| """ | |
| import gradio as gr | |
| import random | |
| import os | |
| from huggingface_hub import InferenceClient | |
| # Lazy loading - model loaded on first use | |
| _inference_client = None | |
| def get_client(): | |
| global _inference_client | |
| if _inference_client is None: | |
| token = os.getenv("HF_TOKEN") | |
| _inference_client = InferenceClient(token=token) | |
| return _inference_client | |
| # Lorem Ipsum vocabulary for perturbations | |
| LOREM_WORDS = [ | |
| "lorem", "ipsum", "dolor", "sit", "amet", "consectetur", "adipiscing", "elit", | |
| "sed", "do", "eiusmod", "tempor", "incididunt", "ut", "labore", "et", "dolore", | |
| "magna", "aliqua", "enim", "ad", "minim", "veniam", "quis", "nostrud", | |
| "exercitation", "ullamco", "laboris", "nisi", "aliquip", "ex", "ea", "commodo", | |
| "consequat", "duis", "aute", "irure", "in", "reprehenderit", "voluptate", | |
| "velit", "esse", "cillum", "fugiat", "nulla", "pariatur", "excepteur", "sint", | |
| "occaecat", "cupidatat", "non", "proident", "sunt", "culpa", "qui", "officia", | |
| "deserunt", "mollit", "anim", "id", "est", "laborum" | |
| ] | |
| # Sample reasoning tasks | |
| REASONING_TASKS = [ | |
| { | |
| "name": "Math Word Problem", | |
| "prompt": "If a train travels 60 miles per hour for 2.5 hours, how far does it travel?" | |
| }, | |
| { | |
| "name": "Logic Puzzle", | |
| "prompt": "Three people (Alice, Bob, Carol) are sitting in a row. Alice is not in the first seat. Bob is to the right of Alice. Who is in the middle seat?" | |
| }, | |
| { | |
| "name": "Pattern Recognition", | |
| "prompt": "What comes next in the sequence: 2, 6, 12, 20, 30, ?" | |
| }, | |
| { | |
| "name": "Sudanese Context", | |
| "prompt": "Khartoum has 3 main cities: Khartoum, Omdurman, and Bahri. If you start in Khartoum, cross the White Nile to Omdurman, then cross the Blue Nile back east, which city are you in?" | |
| } | |
| ] | |
| def generate_lorem_perturbation(length=20): | |
| """Generate a Lorem Ipsum perturbation string.""" | |
| return " ".join(random.choices(LOREM_WORDS, k=length)) | |
| def add_perturbation(prompt, perturbation): | |
| """Add Lorem Ipsum perturbation to prompt.""" | |
| return f"{perturbation}\n\n{prompt}" | |
| def simulate_reasoning(task_name, prompt, use_perturbation=False): | |
| """ | |
| Simulate reasoning with or without perturbation. | |
| In a real implementation, this would call an LLM API. | |
| """ | |
| try: | |
| client = get_client() | |
| # For demo purposes, simulate the reasoning | |
| # In production, this would use: client.text_generation(prompt, model="...") | |
| except Exception as e: | |
| pass # Continue with simulation | |
| # Simulate different reasoning paths | |
| if use_perturbation: | |
| perturbation = generate_lorem_perturbation(15) | |
| modified_prompt = add_perturbation(prompt, perturbation) | |
| # Simulated "orthogonal" reasoning path (LoPE effect) | |
| reasoning_paths = [ | |
| f"[With LoPE Perturbation]\n\nPerturbation prefix:\n'{perturbation}'\n\nReasoning:\n1. The perturbation shifts the attention distribution\n2. This encourages exploration of alternative solution paths\n3. Result: Different reasoning strategy activated\n\nAnswer: [Simulated - would use actual LLM in production]", | |
| f"[With LoPE Perturbation]\n\nPerturbation prefix:\n'{perturbation}'\n\nReasoning:\n1. Breaking pattern recognition with noise\n2. Forcing model to re-encode the problem\n3. Result: Novel approach discovered\n\nAnswer: [Simulated - would use actual LLM in production]" | |
| ] | |
| else: | |
| # Standard reasoning path | |
| reasoning_paths = [ | |
| f"[Baseline - No Perturbation]\n\nReasoning:\n1. Standard approach to problem\n2. Following typical reasoning pattern\n3. Result: Conventional solution\n\nAnswer: [Simulated - would use actual LLM in production]", | |
| f"[Baseline - No Perturbation]\n\nReasoning:\n1. Direct analysis of problem\n2. Standard step-by-step approach\n3. Result: Expected solution path\n\nAnswer: [Simulated - would use actual LLM in production]" | |
| ] | |
| return random.choice(reasoning_paths) | |
| def compare_reasoning(task_idx): | |
| """Compare baseline vs LoPE reasoning for a task.""" | |
| task = REASONING_TASKS[task_idx] | |
| task_name = task["name"] | |
| prompt = task["prompt"] | |
| baseline = simulate_reasoning(task_name, prompt, use_perturbation=False) | |
| lope = simulate_reasoning(task_name, prompt, use_perturbation=True) | |
| # Calculate simulated metrics | |
| baseline_success = random.choice([True, True, True, False]) # 75% success | |
| lope_success = random.choice([True, True, True, True, False]) # 80% success | |
| metrics = f"""## Comparison Metrics | |
| | Metric | Baseline | LoPE | | |
| |--------|----------|------| | |
| | Success Rate | {75 if baseline_success else 0}% | {80 if lope_success else 0}% | | |
| | Reasoning Diversity | Low | High | | |
| | Exploration | Limited | Enhanced | | |
| | Novel Solutions | Rare | More Frequent | | |
| ### Key Insight | |
| LoPE perturbations help overcome the "zero-advantage problem" in RL training by: | |
| 1. Shifting the output distribution to explore orthogonal reasoning pathways | |
| 2. Preventing mode collapse in reasoning strategies | |
| 3. Enabling discovery of alternative solution approaches | |
| """ | |
| return task_name, prompt, baseline, lope, metrics | |
| # Gradio Interface | |
| with gr.Blocks(title="LoPE Demo: Prompt Space Perturbation") as demo: | |
| gr.Markdown("# 🧠 LoPE Demo: Prompt Space Perturbation for Reasoning") | |
| gr.Markdown("Explore how Lorem Ipsum perturbations can unlock orthogonal reasoning pathways in LLMs") | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### Select a Reasoning Task") | |
| task_dropdown = gr.Dropdown( | |
| choices=[(t["name"], i) for i, t in enumerate(REASONING_TASKS)], | |
| value=0, | |
| label="Task" | |
| ) | |
| run_btn = gr.Button("Run Comparison", variant="primary") | |
| with gr.Column(): | |
| gr.Markdown("### Task Description") | |
| task_display = gr.Textbox(label="Task", interactive=False) | |
| prompt_display = gr.Textbox(label="Prompt", lines=3, interactive=False) | |
| gr.Markdown("---") | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### Baseline (No Perturbation)") | |
| baseline_output = gr.Textbox(label="Baseline Reasoning", lines=10) | |
| with gr.Column(): | |
| gr.Markdown("### LoPE (With Perturbation)") | |
| lope_output = gr.Textbox(label="LoPE Reasoning", lines=10) | |
| gr.Markdown("---") | |
| metrics_output = gr.Markdown() | |
| def update_display(task_idx): | |
| task = REASONING_TASKS[task_idx] | |
| return task["name"], task["prompt"] | |
| task_dropdown.change( | |
| update_display, | |
| inputs=[task_dropdown], | |
| outputs=[task_display, prompt_display] | |
| ) | |
| run_btn.click( | |
| compare_reasoning, | |
| inputs=[task_dropdown], | |
| outputs=[task_display, prompt_display, baseline_output, lope_output, metrics_output] | |
| ) | |
| # Initialize display | |
| demo.load( | |
| update_display, | |
| inputs=[task_dropdown], | |
| outputs=[task_display, prompt_display] | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown("### 📄 Paper Reference") | |
| gr.Markdown("**Nonsense Helps: Prompt Space Perturbation Broadens Reasoning Exploration** (arXiv:2605.05566)") | |
| gr.Markdown("Huang et al., 2026 - Demonstrating how perturbations help overcome the zero-advantage problem in RL training") | |
| demo.launch() | |