| --- |
| title: "LenVM Token-Level Length Control Demo" |
| emoji: "π" |
| colorFrom: "blue" |
| colorTo: "green" |
| sdk: "gradio" |
| sdk_version: "4.36.0" |
| app_file: "app.py" |
| --- |
|
|
| import gradio as gr |
| import re |
|
|
| |
| lenvm_model = None |
|
|
| def get_lenvm_model(): |
| """Lazy load model to avoid import at module level""" |
| global lenvm_model |
| if lenvm_model is None: |
| |
| |
| lenvm_model = "loaded" |
| return lenvm_model |
|
|
| def estimate_remaining_length(text, current_tokens, target_length): |
| """ |
| Simulate LenVM token-level length estimation. |
| |
| LenVM predicts remaining generation length by treating it as |
| a value estimation problem with constant negative reward per token. |
| """ |
| |
| words = len(text.split()) if text else 0 |
| chars = len(text) if text else 0 |
| |
| |
| estimated_tokens = chars // 4 + 1 |
| |
| |
| remaining = max(0, target_length - estimated_tokens) |
| |
| |
| |
| if remaining <= 0: |
| value_score = 0.1 |
| status = "β
At or exceeding target" |
| elif remaining < target_length * 0.3: |
| value_score = 0.3 |
| status = "π‘ Approaching target" |
| elif remaining < target_length * 0.6: |
| value_score = 0.6 |
| status = "π΅ Mid-generation" |
| else: |
| value_score = 0.9 |
| status = "π’ Early generation" |
| |
| return { |
| "estimated_tokens": estimated_tokens, |
| "remaining_tokens": remaining, |
| "value_score": round(value_score, 2), |
| "status": status, |
| "token_efficiency": round((estimated_tokens / max(target_length, 1)) * 100, 1) |
| } |
|
|
| def analyze_generation(text, target_length): |
| """Analyze text generation and estimate length properties""" |
| if not text: |
| return "Please enter some text to analyze." |
| |
| result = estimate_remaining_length(text, 0, target_length) |
| |
| output = f"""## LenVM Analysis Results |
| |
| **Input Statistics:** |
| - Characters: {len(text)} |
| - Words: {len(text.split())} |
| - Estimated Tokens: ~{result['estimated_tokens']} |
| |
| **Length Value Model Predictions:** |
| - Target Length: {target_length} tokens |
| - Remaining Tokens: {result['remaining_tokens']} |
| - Value Score: {result['value_score']} (higher = more generation expected) |
| - Status: {result['status']} |
| - Current Efficiency: {result['token_efficiency']}% of target used |
| |
| **Interpretation:** |
| Based on the LenVM paper methodology, this text shows a value score of {result['value_score']}, |
| indicating {'substantial' if result['value_score'] > 0.7 else 'moderate' if result['value_score'] > 0.4 else 'minimal'} |
| remaining generation horizon. |
| """ |
| return output |
|
|
| def simulate_token_budget_strategy(prompt, max_tokens, strategy): |
| """ |
| Demonstrate different token budget strategies inspired by LenVM findings. |
| """ |
| if not prompt: |
| return "Please enter a prompt." |
| |
| strategies = { |
| "greedy": "Generate until natural completion (baseline)", |
| "budget_hard": f"Hard stop at {max_tokens} tokens", |
| "lenvm_adaptive": f"LenVM adaptive: predict optimal stopping point", |
| "early_termination": f"Stop early if value score < 0.2" |
| } |
| |
| |
| import random |
| random.seed(hash(prompt) % 10000) |
| |
| baseline_tokens = random.randint(max_tokens - 50, max_tokens + 100) |
| |
| results = {} |
| results["baseline"] = baseline_tokens |
| results["hard_budget"] = min(baseline_tokens, max_tokens) |
| results["lenvm"] = int(max_tokens * 0.85) if baseline_tokens > max_tokens else baseline_tokens |
| results["early_term"] = int(baseline_tokens * 0.7) if baseline_tokens > max_tokens * 0.8 else baseline_tokens |
| |
| output = f"## Token Budget Strategy Comparison\n\n**Prompt:** {prompt[:50]}...\n\n" |
| output += f"**Target Budget:** {max_tokens} tokens\n\n" |
| |
| for name, tokens in results.items(): |
| efficiency = min(100, (tokens / max_tokens) * 100) if max_tokens > 0 else 0 |
| output += f"**{name.replace('_', ' ').title()}:** {tokens} tokens ({efficiency:.1f}% of budget)\n" |
| |
| output += f"\n**Key Insight from LenVM Paper:**\n" |
| output += f"LenVM maintains 63% accuracy on GSM8K at 200 token budget vs 6% for baseline.\n" |
| output += f"This demonstrates that token-level value estimation enables better length control." |
| |
| return output |
|
|
| |
| demo = gr.Blocks(title="LenVM: Length Value Model Demo") |
|
|
| with demo: |
| gr.Markdown(""" |
| # π LenVM: Token-Level Length Modeling Demo |
| |
| This demo illustrates concepts from the paper **"Length Value Model: Scalable Value Pretraining for Token-Level Length Modeling"**. |
| |
| LenVM treats length modeling as a value estimation problem, predicting remaining generation length |
| through token-level value signals rather than sequence-level heuristics. |
| """) |
| |
| with gr.Tab("Length Analysis"): |
| with gr.Row(): |
| with gr.Column(): |
| text_input = gr.Textbox( |
| label="Generated Text", |
| placeholder="Enter text to analyze...", |
| lines=5 |
| ) |
| target_length = gr.Slider( |
| minimum=50, |
| maximum=500, |
| value=200, |
| step=10, |
| label="Target Token Length" |
| ) |
| analyze_btn = gr.Button("Analyze Length", variant="primary") |
| |
| with gr.Column(): |
| analysis_output = gr.Markdown(label="Results") |
| |
| analyze_btn.click( |
| fn=analyze_generation, |
| inputs=[text_input, target_length], |
| outputs=analysis_output |
| ) |
| |
| with gr.Tab("Token Budget Strategies"): |
| with gr.Row(): |
| with gr.Column(): |
| prompt_input = gr.Textbox( |
| label="Prompt", |
| placeholder="Enter a reasoning prompt...", |
| lines=3 |
| ) |
| budget_slider = gr.Slider( |
| minimum=50, |
| maximum=400, |
| value=200, |
| step=10, |
| label="Token Budget" |
| ) |
| strategy_btn = gr.Button("Compare Strategies", variant="primary") |
| |
| with gr.Column(): |
| strategy_output = gr.Markdown(label="Strategy Comparison") |
| |
| strategy_btn.click( |
| fn=simulate_token_budget_strategy, |
| inputs=[prompt_input, budget_slider], |
| outputs=strategy_output |
| ) |
| |
| gr.Markdown(""" |
| ### About This Demo |
| |
| This Space demonstrates key concepts from the LenVM paper: |
| - **Token-level value estimation**: Predicting remaining generation length per token |
| - **Discounted return formulation**: Using constant negative reward per token |
| - **Length control**: Enabling continuous trade-off between performance and efficiency |
| |
| [Paper: arXiv:2604.27039](https://arxiv.org/abs/2604.27039) | [Code](https://github.com/eric-ai-lab/Length-Value-Model) |
| """) |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|