---
title: "LenVM Token-Level Length Control Demo"
emoji: "📏"
colorFrom: "blue"
colorTo: "green"
sdk: "gradio"
sdk_version: "4.36.0"
app_file: "app.py"
---

import gradio as gr
import re

# Lazy loading placeholder
lenvm_model = None

def get_lenvm_model():
    """Lazy load model to avoid import at module level"""
    global lenvm_model
    if lenvm_model is None:
        # Simulated LenVM value estimation
        # In real implementation, this would load a trained model
        lenvm_model = "loaded"
    return lenvm_model

def estimate_remaining_length(text, current_tokens, target_length):
    """
    Simulate LenVM token-level length estimation.
    
    LenVM predicts remaining generation length by treating it as
    a value estimation problem with constant negative reward per token.
    """
    # Simple heuristic simulation based on paper methodology
    words = len(text.split()) if text else 0
    chars = len(text) if text else 0
    
    # Estimate tokens (rough approximation: ~4 chars per token)
    estimated_tokens = chars // 4 + 1
    
    # Calculate remaining
    remaining = max(0, target_length - estimated_tokens)
    
    # Simulate value estimation (higher value = more tokens expected)
    # This mirrors LenVM's approach of predicting discounted return
    if remaining <= 0:
        value_score = 0.1  # Near completion
        status = "✅ At or exceeding target"
    elif remaining < target_length * 0.3:
        value_score = 0.3
        status = "🟡 Approaching target"
    elif remaining < target_length * 0.6:
        value_score = 0.6
        status = "🔵 Mid-generation"
    else:
        value_score = 0.9
        status = "🟢 Early generation"
    
    return {
        "estimated_tokens": estimated_tokens,
        "remaining_tokens": remaining,
        "value_score": round(value_score, 2),
        "status": status,
        "token_efficiency": round((estimated_tokens / max(target_length, 1)) * 100, 1)
    }

def analyze_generation(text, target_length):
    """Analyze text generation and estimate length properties"""
    if not text:
        return "Please enter some text to analyze."
    
    result = estimate_remaining_length(text, 0, target_length)
    
    output = f"""## LenVM Analysis Results

**Input Statistics:**
- Characters: {len(text)}
- Words: {len(text.split())}
- Estimated Tokens: ~{result['estimated_tokens']}

**Length Value Model Predictions:**
- Target Length: {target_length} tokens
- Remaining Tokens: {result['remaining_tokens']}
- Value Score: {result['value_score']} (higher = more generation expected)
- Status: {result['status']}
- Current Efficiency: {result['token_efficiency']}% of target used

**Interpretation:**
Based on the LenVM paper methodology, this text shows a value score of {result['value_score']}, 
indicating {'substantial' if result['value_score'] > 0.7 else 'moderate' if result['value_score'] > 0.4 else 'minimal'} 
remaining generation horizon.
"""
    return output

def simulate_token_budget_strategy(prompt, max_tokens, strategy):
    """
    Demonstrate different token budget strategies inspired by LenVM findings.
    """
    if not prompt:
        return "Please enter a prompt."
    
    strategies = {
        "greedy": "Generate until natural completion (baseline)",
        "budget_hard": f"Hard stop at {max_tokens} tokens",
        "lenvm_adaptive": f"LenVM adaptive: predict optimal stopping point",
        "early_termination": f"Stop early if value score < 0.2"
    }
    
    # Simulate generation lengths for different strategies
    import random
    random.seed(hash(prompt) % 10000)
    
    baseline_tokens = random.randint(max_tokens - 50, max_tokens + 100)
    
    results = {}
    results["baseline"] = baseline_tokens
    results["hard_budget"] = min(baseline_tokens, max_tokens)
    results["lenvm"] = int(max_tokens * 0.85) if baseline_tokens > max_tokens else baseline_tokens
    results["early_term"] = int(baseline_tokens * 0.7) if baseline_tokens > max_tokens * 0.8 else baseline_tokens
    
    output = f"## Token Budget Strategy Comparison\n\n**Prompt:** {prompt[:50]}...\n\n"
    output += f"**Target Budget:** {max_tokens} tokens\n\n"
    
    for name, tokens in results.items():
        efficiency = min(100, (tokens / max_tokens) * 100) if max_tokens > 0 else 0
        output += f"**{name.replace('_', ' ').title()}:** {tokens} tokens ({efficiency:.1f}% of budget)\n"
    
    output += f"\n**Key Insight from LenVM Paper:**\n"
    output += f"LenVM maintains 63% accuracy on GSM8K at 200 token budget vs 6% for baseline.\n"
    output += f"This demonstrates that token-level value estimation enables better length control."
    
    return output

# Gradio interface
demo = gr.Blocks(title="LenVM: Length Value Model Demo")

with demo:
    gr.Markdown("""
    # 📏 LenVM: Token-Level Length Modeling Demo
    
    This demo illustrates concepts from the paper **"Length Value Model: Scalable Value Pretraining for Token-Level Length Modeling"**.
    
    LenVM treats length modeling as a value estimation problem, predicting remaining generation length 
    through token-level value signals rather than sequence-level heuristics.
    """)
    
    with gr.Tab("Length Analysis"):
        with gr.Row():
            with gr.Column():
                text_input = gr.Textbox(
                    label="Generated Text",
                    placeholder="Enter text to analyze...",
                    lines=5
                )
                target_length = gr.Slider(
                    minimum=50,
                    maximum=500,
                    value=200,
                    step=10,
                    label="Target Token Length"
                )
                analyze_btn = gr.Button("Analyze Length", variant="primary")
            
            with gr.Column():
                analysis_output = gr.Markdown(label="Results")
        
        analyze_btn.click(
            fn=analyze_generation,
            inputs=[text_input, target_length],
            outputs=analysis_output
        )
    
    with gr.Tab("Token Budget Strategies"):
        with gr.Row():
            with gr.Column():
                prompt_input = gr.Textbox(
                    label="Prompt",
                    placeholder="Enter a reasoning prompt...",
                    lines=3
                )
                budget_slider = gr.Slider(
                    minimum=50,
                    maximum=400,
                    value=200,
                    step=10,
                    label="Token Budget"
                )
                strategy_btn = gr.Button("Compare Strategies", variant="primary")
            
            with gr.Column():
                strategy_output = gr.Markdown(label="Strategy Comparison")
        
        strategy_btn.click(
            fn=simulate_token_budget_strategy,
            inputs=[prompt_input, budget_slider],
            outputs=strategy_output
        )
    
    gr.Markdown("""
    ### About This Demo
    
    This Space demonstrates key concepts from the LenVM paper:
    - **Token-level value estimation**: Predicting remaining generation length per token
    - **Discounted return formulation**: Using constant negative reward per token
    - **Length control**: Enabling continuous trade-off between performance and efficiency
    
    [Paper: arXiv:2604.27039](https://arxiv.org/abs/2604.27039) | [Code](https://github.com/eric-ai-lab/Length-Value-Model)
    """)

if __name__ == "__main__":
    demo.launch()