Spaces:

O96a
/

lenvm-token-control-demo

Paused

App Files Files Community

lenvm-token-control-demo / app.py

O96a

Upload app.py with huggingface_hub

654c363 verified 6 days ago

raw

history blame contribute delete

7.53 kB

	---
	title: "LenVM Token-Level Length Control Demo"
	emoji: "📏"
	colorFrom: "blue"
	colorTo: "green"
	sdk: "gradio"
	sdk_version: "4.36.0"
	app_file: "app.py"
	---

	import gradio as gr
	import re

	# Lazy loading placeholder
	lenvm_model = None

	def get_lenvm_model():
	"""Lazy load model to avoid import at module level"""
	global lenvm_model
	if lenvm_model is None:
	# Simulated LenVM value estimation
	# In real implementation, this would load a trained model
	lenvm_model = "loaded"
	return lenvm_model

	def estimate_remaining_length(text, current_tokens, target_length):
	"""
	Simulate LenVM token-level length estimation.

	LenVM predicts remaining generation length by treating it as
	a value estimation problem with constant negative reward per token.
	"""
	# Simple heuristic simulation based on paper methodology
	words = len(text.split()) if text else 0
	chars = len(text) if text else 0

	# Estimate tokens (rough approximation: ~4 chars per token)
	estimated_tokens = chars // 4 + 1

	# Calculate remaining
	remaining = max(0, target_length - estimated_tokens)

	# Simulate value estimation (higher value = more tokens expected)
	# This mirrors LenVM's approach of predicting discounted return
	if remaining <= 0:
	value_score = 0.1 # Near completion
	status = "✅ At or exceeding target"
	elif remaining < target_length * 0.3:
	value_score = 0.3
	status = "🟡 Approaching target"
	elif remaining < target_length * 0.6:
	value_score = 0.6
	status = "🔵 Mid-generation"
	else:
	value_score = 0.9
	status = "🟢 Early generation"

	return {
	"estimated_tokens": estimated_tokens,
	"remaining_tokens": remaining,
	"value_score": round(value_score, 2),
	"status": status,
	"token_efficiency": round((estimated_tokens / max(target_length, 1)) * 100, 1)
	}

	def analyze_generation(text, target_length):
	"""Analyze text generation and estimate length properties"""
	if not text:
	return "Please enter some text to analyze."

	result = estimate_remaining_length(text, 0, target_length)

	output = f"""## LenVM Analysis Results

	Input Statistics:
	- Characters: {len(text)}
	- Words: {len(text.split())}
	- Estimated Tokens: ~{result['estimated_tokens']}

	Length Value Model Predictions:
	- Target Length: {target_length} tokens
	- Remaining Tokens: {result['remaining_tokens']}
	- Value Score: {result['value_score']} (higher = more generation expected)
	- Status: {result['status']}
	- Current Efficiency: {result['token_efficiency']}% of target used

	Interpretation:
	Based on the LenVM paper methodology, this text shows a value score of {result['value_score']},
	indicating {'substantial' if result['value_score'] > 0.7 else 'moderate' if result['value_score'] > 0.4 else 'minimal'}
	remaining generation horizon.
	"""
	return output

	def simulate_token_budget_strategy(prompt, max_tokens, strategy):
	"""
	Demonstrate different token budget strategies inspired by LenVM findings.
	"""
	if not prompt:
	return "Please enter a prompt."

	strategies = {
	"greedy": "Generate until natural completion (baseline)",
	"budget_hard": f"Hard stop at {max_tokens} tokens",
	"lenvm_adaptive": f"LenVM adaptive: predict optimal stopping point",
	"early_termination": f"Stop early if value score < 0.2"
	}

	# Simulate generation lengths for different strategies
	import random
	random.seed(hash(prompt) % 10000)

	baseline_tokens = random.randint(max_tokens - 50, max_tokens + 100)

	results = {}
	results["baseline"] = baseline_tokens
	results["hard_budget"] = min(baseline_tokens, max_tokens)
	results["lenvm"] = int(max_tokens * 0.85) if baseline_tokens > max_tokens else baseline_tokens
	results["early_term"] = int(baseline_tokens * 0.7) if baseline_tokens > max_tokens * 0.8 else baseline_tokens

	output = f"## Token Budget Strategy Comparison\n\nPrompt: {prompt[:50]}...\n\n"
	output += f"Target Budget: {max_tokens} tokens\n\n"

	for name, tokens in results.items():
	efficiency = min(100, (tokens / max_tokens) * 100) if max_tokens > 0 else 0
	output += f"{name.replace('_', ' ').title()}: {tokens} tokens ({efficiency:.1f}% of budget)\n"

	output += f"\nKey Insight from LenVM Paper:\n"
	output += f"LenVM maintains 63% accuracy on GSM8K at 200 token budget vs 6% for baseline.\n"
	output += f"This demonstrates that token-level value estimation enables better length control."

	return output

	# Gradio interface
	demo = gr.Blocks(title="LenVM: Length Value Model Demo")

	with demo:
	gr.Markdown("""
	# 📏 LenVM: Token-Level Length Modeling Demo

	This demo illustrates concepts from the paper "Length Value Model: Scalable Value Pretraining for Token-Level Length Modeling".

	LenVM treats length modeling as a value estimation problem, predicting remaining generation length
	through token-level value signals rather than sequence-level heuristics.
	""")

	with gr.Tab("Length Analysis"):
	with gr.Row():
	with gr.Column():
	text_input = gr.Textbox(
	label="Generated Text",
	placeholder="Enter text to analyze...",
	lines=5
	)
	target_length = gr.Slider(
	minimum=50,
	maximum=500,
	value=200,
	step=10,
	label="Target Token Length"
	)
	analyze_btn = gr.Button("Analyze Length", variant="primary")

	with gr.Column():
	analysis_output = gr.Markdown(label="Results")

	analyze_btn.click(
	fn=analyze_generation,
	inputs=[text_input, target_length],
	outputs=analysis_output
	)

	with gr.Tab("Token Budget Strategies"):
	with gr.Row():
	with gr.Column():
	prompt_input = gr.Textbox(
	label="Prompt",
	placeholder="Enter a reasoning prompt...",
	lines=3
	)
	budget_slider = gr.Slider(
	minimum=50,
	maximum=400,
	value=200,
	step=10,
	label="Token Budget"
	)
	strategy_btn = gr.Button("Compare Strategies", variant="primary")

	with gr.Column():
	strategy_output = gr.Markdown(label="Strategy Comparison")

	strategy_btn.click(
	fn=simulate_token_budget_strategy,
	inputs=[prompt_input, budget_slider],
	outputs=strategy_output
	)

	gr.Markdown("""
	### About This Demo

	This Space demonstrates key concepts from the LenVM paper:
	- Token-level value estimation: Predicting remaining generation length per token
	- Discounted return formulation: Using constant negative reward per token
	- Length control: Enabling continuous trade-off between performance and efficiency

	[Paper: arXiv:2604.27039](https://arxiv.org/abs/2604.27039) \| [Code](https://github.com/eric-ai-lab/Length-Value-Model)
	""")

	if __name__ == "__main__":
	demo.launch()