--- title: "LenVM Token-Level Length Control Demo" emoji: "📏" colorFrom: "blue" colorTo: "green" sdk: "gradio" sdk_version: "4.36.0" app_file: "app.py" --- import gradio as gr import re # Lazy loading placeholder lenvm_model = None def get_lenvm_model(): """Lazy load model to avoid import at module level""" global lenvm_model if lenvm_model is None: # Simulated LenVM value estimation # In real implementation, this would load a trained model lenvm_model = "loaded" return lenvm_model def estimate_remaining_length(text, current_tokens, target_length): """ Simulate LenVM token-level length estimation. LenVM predicts remaining generation length by treating it as a value estimation problem with constant negative reward per token. """ # Simple heuristic simulation based on paper methodology words = len(text.split()) if text else 0 chars = len(text) if text else 0 # Estimate tokens (rough approximation: ~4 chars per token) estimated_tokens = chars // 4 + 1 # Calculate remaining remaining = max(0, target_length - estimated_tokens) # Simulate value estimation (higher value = more tokens expected) # This mirrors LenVM's approach of predicting discounted return if remaining <= 0: value_score = 0.1 # Near completion status = "✅ At or exceeding target" elif remaining < target_length * 0.3: value_score = 0.3 status = "🟡 Approaching target" elif remaining < target_length * 0.6: value_score = 0.6 status = "🔵 Mid-generation" else: value_score = 0.9 status = "🟢 Early generation" return { "estimated_tokens": estimated_tokens, "remaining_tokens": remaining, "value_score": round(value_score, 2), "status": status, "token_efficiency": round((estimated_tokens / max(target_length, 1)) * 100, 1) } def analyze_generation(text, target_length): """Analyze text generation and estimate length properties""" if not text: return "Please enter some text to analyze." result = estimate_remaining_length(text, 0, target_length) output = f"""## LenVM Analysis Results **Input Statistics:** - Characters: {len(text)} - Words: {len(text.split())} - Estimated Tokens: ~{result['estimated_tokens']} **Length Value Model Predictions:** - Target Length: {target_length} tokens - Remaining Tokens: {result['remaining_tokens']} - Value Score: {result['value_score']} (higher = more generation expected) - Status: {result['status']} - Current Efficiency: {result['token_efficiency']}% of target used **Interpretation:** Based on the LenVM paper methodology, this text shows a value score of {result['value_score']}, indicating {'substantial' if result['value_score'] > 0.7 else 'moderate' if result['value_score'] > 0.4 else 'minimal'} remaining generation horizon. """ return output def simulate_token_budget_strategy(prompt, max_tokens, strategy): """ Demonstrate different token budget strategies inspired by LenVM findings. """ if not prompt: return "Please enter a prompt." strategies = { "greedy": "Generate until natural completion (baseline)", "budget_hard": f"Hard stop at {max_tokens} tokens", "lenvm_adaptive": f"LenVM adaptive: predict optimal stopping point", "early_termination": f"Stop early if value score < 0.2" } # Simulate generation lengths for different strategies import random random.seed(hash(prompt) % 10000) baseline_tokens = random.randint(max_tokens - 50, max_tokens + 100) results = {} results["baseline"] = baseline_tokens results["hard_budget"] = min(baseline_tokens, max_tokens) results["lenvm"] = int(max_tokens * 0.85) if baseline_tokens > max_tokens else baseline_tokens results["early_term"] = int(baseline_tokens * 0.7) if baseline_tokens > max_tokens * 0.8 else baseline_tokens output = f"## Token Budget Strategy Comparison\n\n**Prompt:** {prompt[:50]}...\n\n" output += f"**Target Budget:** {max_tokens} tokens\n\n" for name, tokens in results.items(): efficiency = min(100, (tokens / max_tokens) * 100) if max_tokens > 0 else 0 output += f"**{name.replace('_', ' ').title()}:** {tokens} tokens ({efficiency:.1f}% of budget)\n" output += f"\n**Key Insight from LenVM Paper:**\n" output += f"LenVM maintains 63% accuracy on GSM8K at 200 token budget vs 6% for baseline.\n" output += f"This demonstrates that token-level value estimation enables better length control." return output # Gradio interface demo = gr.Blocks(title="LenVM: Length Value Model Demo") with demo: gr.Markdown(""" # 📏 LenVM: Token-Level Length Modeling Demo This demo illustrates concepts from the paper **"Length Value Model: Scalable Value Pretraining for Token-Level Length Modeling"**. LenVM treats length modeling as a value estimation problem, predicting remaining generation length through token-level value signals rather than sequence-level heuristics. """) with gr.Tab("Length Analysis"): with gr.Row(): with gr.Column(): text_input = gr.Textbox( label="Generated Text", placeholder="Enter text to analyze...", lines=5 ) target_length = gr.Slider( minimum=50, maximum=500, value=200, step=10, label="Target Token Length" ) analyze_btn = gr.Button("Analyze Length", variant="primary") with gr.Column(): analysis_output = gr.Markdown(label="Results") analyze_btn.click( fn=analyze_generation, inputs=[text_input, target_length], outputs=analysis_output ) with gr.Tab("Token Budget Strategies"): with gr.Row(): with gr.Column(): prompt_input = gr.Textbox( label="Prompt", placeholder="Enter a reasoning prompt...", lines=3 ) budget_slider = gr.Slider( minimum=50, maximum=400, value=200, step=10, label="Token Budget" ) strategy_btn = gr.Button("Compare Strategies", variant="primary") with gr.Column(): strategy_output = gr.Markdown(label="Strategy Comparison") strategy_btn.click( fn=simulate_token_budget_strategy, inputs=[prompt_input, budget_slider], outputs=strategy_output ) gr.Markdown(""" ### About This Demo This Space demonstrates key concepts from the LenVM paper: - **Token-level value estimation**: Predicting remaining generation length per token - **Discounted return formulation**: Using constant negative reward per token - **Length control**: Enabling continuous trade-off between performance and efficiency [Paper: arXiv:2604.27039](https://arxiv.org/abs/2604.27039) | [Code](https://github.com/eric-ai-lab/Length-Value-Model) """) if __name__ == "__main__": demo.launch()