File size: 7,526 Bytes
654c363 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 | ---
title: "LenVM Token-Level Length Control Demo"
emoji: "π"
colorFrom: "blue"
colorTo: "green"
sdk: "gradio"
sdk_version: "4.36.0"
app_file: "app.py"
---
import gradio as gr
import re
# Lazy loading placeholder
lenvm_model = None
def get_lenvm_model():
"""Lazy load model to avoid import at module level"""
global lenvm_model
if lenvm_model is None:
# Simulated LenVM value estimation
# In real implementation, this would load a trained model
lenvm_model = "loaded"
return lenvm_model
def estimate_remaining_length(text, current_tokens, target_length):
"""
Simulate LenVM token-level length estimation.
LenVM predicts remaining generation length by treating it as
a value estimation problem with constant negative reward per token.
"""
# Simple heuristic simulation based on paper methodology
words = len(text.split()) if text else 0
chars = len(text) if text else 0
# Estimate tokens (rough approximation: ~4 chars per token)
estimated_tokens = chars // 4 + 1
# Calculate remaining
remaining = max(0, target_length - estimated_tokens)
# Simulate value estimation (higher value = more tokens expected)
# This mirrors LenVM's approach of predicting discounted return
if remaining <= 0:
value_score = 0.1 # Near completion
status = "β
At or exceeding target"
elif remaining < target_length * 0.3:
value_score = 0.3
status = "π‘ Approaching target"
elif remaining < target_length * 0.6:
value_score = 0.6
status = "π΅ Mid-generation"
else:
value_score = 0.9
status = "π’ Early generation"
return {
"estimated_tokens": estimated_tokens,
"remaining_tokens": remaining,
"value_score": round(value_score, 2),
"status": status,
"token_efficiency": round((estimated_tokens / max(target_length, 1)) * 100, 1)
}
def analyze_generation(text, target_length):
"""Analyze text generation and estimate length properties"""
if not text:
return "Please enter some text to analyze."
result = estimate_remaining_length(text, 0, target_length)
output = f"""## LenVM Analysis Results
**Input Statistics:**
- Characters: {len(text)}
- Words: {len(text.split())}
- Estimated Tokens: ~{result['estimated_tokens']}
**Length Value Model Predictions:**
- Target Length: {target_length} tokens
- Remaining Tokens: {result['remaining_tokens']}
- Value Score: {result['value_score']} (higher = more generation expected)
- Status: {result['status']}
- Current Efficiency: {result['token_efficiency']}% of target used
**Interpretation:**
Based on the LenVM paper methodology, this text shows a value score of {result['value_score']},
indicating {'substantial' if result['value_score'] > 0.7 else 'moderate' if result['value_score'] > 0.4 else 'minimal'}
remaining generation horizon.
"""
return output
def simulate_token_budget_strategy(prompt, max_tokens, strategy):
"""
Demonstrate different token budget strategies inspired by LenVM findings.
"""
if not prompt:
return "Please enter a prompt."
strategies = {
"greedy": "Generate until natural completion (baseline)",
"budget_hard": f"Hard stop at {max_tokens} tokens",
"lenvm_adaptive": f"LenVM adaptive: predict optimal stopping point",
"early_termination": f"Stop early if value score < 0.2"
}
# Simulate generation lengths for different strategies
import random
random.seed(hash(prompt) % 10000)
baseline_tokens = random.randint(max_tokens - 50, max_tokens + 100)
results = {}
results["baseline"] = baseline_tokens
results["hard_budget"] = min(baseline_tokens, max_tokens)
results["lenvm"] = int(max_tokens * 0.85) if baseline_tokens > max_tokens else baseline_tokens
results["early_term"] = int(baseline_tokens * 0.7) if baseline_tokens > max_tokens * 0.8 else baseline_tokens
output = f"## Token Budget Strategy Comparison\n\n**Prompt:** {prompt[:50]}...\n\n"
output += f"**Target Budget:** {max_tokens} tokens\n\n"
for name, tokens in results.items():
efficiency = min(100, (tokens / max_tokens) * 100) if max_tokens > 0 else 0
output += f"**{name.replace('_', ' ').title()}:** {tokens} tokens ({efficiency:.1f}% of budget)\n"
output += f"\n**Key Insight from LenVM Paper:**\n"
output += f"LenVM maintains 63% accuracy on GSM8K at 200 token budget vs 6% for baseline.\n"
output += f"This demonstrates that token-level value estimation enables better length control."
return output
# Gradio interface
demo = gr.Blocks(title="LenVM: Length Value Model Demo")
with demo:
gr.Markdown("""
# π LenVM: Token-Level Length Modeling Demo
This demo illustrates concepts from the paper **"Length Value Model: Scalable Value Pretraining for Token-Level Length Modeling"**.
LenVM treats length modeling as a value estimation problem, predicting remaining generation length
through token-level value signals rather than sequence-level heuristics.
""")
with gr.Tab("Length Analysis"):
with gr.Row():
with gr.Column():
text_input = gr.Textbox(
label="Generated Text",
placeholder="Enter text to analyze...",
lines=5
)
target_length = gr.Slider(
minimum=50,
maximum=500,
value=200,
step=10,
label="Target Token Length"
)
analyze_btn = gr.Button("Analyze Length", variant="primary")
with gr.Column():
analysis_output = gr.Markdown(label="Results")
analyze_btn.click(
fn=analyze_generation,
inputs=[text_input, target_length],
outputs=analysis_output
)
with gr.Tab("Token Budget Strategies"):
with gr.Row():
with gr.Column():
prompt_input = gr.Textbox(
label="Prompt",
placeholder="Enter a reasoning prompt...",
lines=3
)
budget_slider = gr.Slider(
minimum=50,
maximum=400,
value=200,
step=10,
label="Token Budget"
)
strategy_btn = gr.Button("Compare Strategies", variant="primary")
with gr.Column():
strategy_output = gr.Markdown(label="Strategy Comparison")
strategy_btn.click(
fn=simulate_token_budget_strategy,
inputs=[prompt_input, budget_slider],
outputs=strategy_output
)
gr.Markdown("""
### About This Demo
This Space demonstrates key concepts from the LenVM paper:
- **Token-level value estimation**: Predicting remaining generation length per token
- **Discounted return formulation**: Using constant negative reward per token
- **Length control**: Enabling continuous trade-off between performance and efficiency
[Paper: arXiv:2604.27039](https://arxiv.org/abs/2604.27039) | [Code](https://github.com/eric-ai-lab/Length-Value-Model)
""")
if __name__ == "__main__":
demo.launch()
|