O96a's picture
Upload app.py with huggingface_hub
654c363 verified
---
title: "LenVM Token-Level Length Control Demo"
emoji: "πŸ“"
colorFrom: "blue"
colorTo: "green"
sdk: "gradio"
sdk_version: "4.36.0"
app_file: "app.py"
---
import gradio as gr
import re
# Lazy loading placeholder
lenvm_model = None
def get_lenvm_model():
"""Lazy load model to avoid import at module level"""
global lenvm_model
if lenvm_model is None:
# Simulated LenVM value estimation
# In real implementation, this would load a trained model
lenvm_model = "loaded"
return lenvm_model
def estimate_remaining_length(text, current_tokens, target_length):
"""
Simulate LenVM token-level length estimation.
LenVM predicts remaining generation length by treating it as
a value estimation problem with constant negative reward per token.
"""
# Simple heuristic simulation based on paper methodology
words = len(text.split()) if text else 0
chars = len(text) if text else 0
# Estimate tokens (rough approximation: ~4 chars per token)
estimated_tokens = chars // 4 + 1
# Calculate remaining
remaining = max(0, target_length - estimated_tokens)
# Simulate value estimation (higher value = more tokens expected)
# This mirrors LenVM's approach of predicting discounted return
if remaining <= 0:
value_score = 0.1 # Near completion
status = "βœ… At or exceeding target"
elif remaining < target_length * 0.3:
value_score = 0.3
status = "🟑 Approaching target"
elif remaining < target_length * 0.6:
value_score = 0.6
status = "πŸ”΅ Mid-generation"
else:
value_score = 0.9
status = "🟒 Early generation"
return {
"estimated_tokens": estimated_tokens,
"remaining_tokens": remaining,
"value_score": round(value_score, 2),
"status": status,
"token_efficiency": round((estimated_tokens / max(target_length, 1)) * 100, 1)
}
def analyze_generation(text, target_length):
"""Analyze text generation and estimate length properties"""
if not text:
return "Please enter some text to analyze."
result = estimate_remaining_length(text, 0, target_length)
output = f"""## LenVM Analysis Results
**Input Statistics:**
- Characters: {len(text)}
- Words: {len(text.split())}
- Estimated Tokens: ~{result['estimated_tokens']}
**Length Value Model Predictions:**
- Target Length: {target_length} tokens
- Remaining Tokens: {result['remaining_tokens']}
- Value Score: {result['value_score']} (higher = more generation expected)
- Status: {result['status']}
- Current Efficiency: {result['token_efficiency']}% of target used
**Interpretation:**
Based on the LenVM paper methodology, this text shows a value score of {result['value_score']},
indicating {'substantial' if result['value_score'] > 0.7 else 'moderate' if result['value_score'] > 0.4 else 'minimal'}
remaining generation horizon.
"""
return output
def simulate_token_budget_strategy(prompt, max_tokens, strategy):
"""
Demonstrate different token budget strategies inspired by LenVM findings.
"""
if not prompt:
return "Please enter a prompt."
strategies = {
"greedy": "Generate until natural completion (baseline)",
"budget_hard": f"Hard stop at {max_tokens} tokens",
"lenvm_adaptive": f"LenVM adaptive: predict optimal stopping point",
"early_termination": f"Stop early if value score < 0.2"
}
# Simulate generation lengths for different strategies
import random
random.seed(hash(prompt) % 10000)
baseline_tokens = random.randint(max_tokens - 50, max_tokens + 100)
results = {}
results["baseline"] = baseline_tokens
results["hard_budget"] = min(baseline_tokens, max_tokens)
results["lenvm"] = int(max_tokens * 0.85) if baseline_tokens > max_tokens else baseline_tokens
results["early_term"] = int(baseline_tokens * 0.7) if baseline_tokens > max_tokens * 0.8 else baseline_tokens
output = f"## Token Budget Strategy Comparison\n\n**Prompt:** {prompt[:50]}...\n\n"
output += f"**Target Budget:** {max_tokens} tokens\n\n"
for name, tokens in results.items():
efficiency = min(100, (tokens / max_tokens) * 100) if max_tokens > 0 else 0
output += f"**{name.replace('_', ' ').title()}:** {tokens} tokens ({efficiency:.1f}% of budget)\n"
output += f"\n**Key Insight from LenVM Paper:**\n"
output += f"LenVM maintains 63% accuracy on GSM8K at 200 token budget vs 6% for baseline.\n"
output += f"This demonstrates that token-level value estimation enables better length control."
return output
# Gradio interface
demo = gr.Blocks(title="LenVM: Length Value Model Demo")
with demo:
gr.Markdown("""
# πŸ“ LenVM: Token-Level Length Modeling Demo
This demo illustrates concepts from the paper **"Length Value Model: Scalable Value Pretraining for Token-Level Length Modeling"**.
LenVM treats length modeling as a value estimation problem, predicting remaining generation length
through token-level value signals rather than sequence-level heuristics.
""")
with gr.Tab("Length Analysis"):
with gr.Row():
with gr.Column():
text_input = gr.Textbox(
label="Generated Text",
placeholder="Enter text to analyze...",
lines=5
)
target_length = gr.Slider(
minimum=50,
maximum=500,
value=200,
step=10,
label="Target Token Length"
)
analyze_btn = gr.Button("Analyze Length", variant="primary")
with gr.Column():
analysis_output = gr.Markdown(label="Results")
analyze_btn.click(
fn=analyze_generation,
inputs=[text_input, target_length],
outputs=analysis_output
)
with gr.Tab("Token Budget Strategies"):
with gr.Row():
with gr.Column():
prompt_input = gr.Textbox(
label="Prompt",
placeholder="Enter a reasoning prompt...",
lines=3
)
budget_slider = gr.Slider(
minimum=50,
maximum=400,
value=200,
step=10,
label="Token Budget"
)
strategy_btn = gr.Button("Compare Strategies", variant="primary")
with gr.Column():
strategy_output = gr.Markdown(label="Strategy Comparison")
strategy_btn.click(
fn=simulate_token_budget_strategy,
inputs=[prompt_input, budget_slider],
outputs=strategy_output
)
gr.Markdown("""
### About This Demo
This Space demonstrates key concepts from the LenVM paper:
- **Token-level value estimation**: Predicting remaining generation length per token
- **Discounted return formulation**: Using constant negative reward per token
- **Length control**: Enabling continuous trade-off between performance and efficiency
[Paper: arXiv:2604.27039](https://arxiv.org/abs/2604.27039) | [Code](https://github.com/eric-ai-lab/Length-Value-Model)
""")
if __name__ == "__main__":
demo.launch()