File size: 7,526 Bytes
654c363
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
---
title: "LenVM Token-Level Length Control Demo"
emoji: "πŸ“"
colorFrom: "blue"
colorTo: "green"
sdk: "gradio"
sdk_version: "4.36.0"
app_file: "app.py"
---

import gradio as gr
import re

# Lazy loading placeholder
lenvm_model = None

def get_lenvm_model():
    """Lazy load model to avoid import at module level"""
    global lenvm_model
    if lenvm_model is None:
        # Simulated LenVM value estimation
        # In real implementation, this would load a trained model
        lenvm_model = "loaded"
    return lenvm_model

def estimate_remaining_length(text, current_tokens, target_length):
    """
    Simulate LenVM token-level length estimation.
    
    LenVM predicts remaining generation length by treating it as
    a value estimation problem with constant negative reward per token.
    """
    # Simple heuristic simulation based on paper methodology
    words = len(text.split()) if text else 0
    chars = len(text) if text else 0
    
    # Estimate tokens (rough approximation: ~4 chars per token)
    estimated_tokens = chars // 4 + 1
    
    # Calculate remaining
    remaining = max(0, target_length - estimated_tokens)
    
    # Simulate value estimation (higher value = more tokens expected)
    # This mirrors LenVM's approach of predicting discounted return
    if remaining <= 0:
        value_score = 0.1  # Near completion
        status = "βœ… At or exceeding target"
    elif remaining < target_length * 0.3:
        value_score = 0.3
        status = "🟑 Approaching target"
    elif remaining < target_length * 0.6:
        value_score = 0.6
        status = "πŸ”΅ Mid-generation"
    else:
        value_score = 0.9
        status = "🟒 Early generation"
    
    return {
        "estimated_tokens": estimated_tokens,
        "remaining_tokens": remaining,
        "value_score": round(value_score, 2),
        "status": status,
        "token_efficiency": round((estimated_tokens / max(target_length, 1)) * 100, 1)
    }

def analyze_generation(text, target_length):
    """Analyze text generation and estimate length properties"""
    if not text:
        return "Please enter some text to analyze."
    
    result = estimate_remaining_length(text, 0, target_length)
    
    output = f"""## LenVM Analysis Results

**Input Statistics:**
- Characters: {len(text)}
- Words: {len(text.split())}
- Estimated Tokens: ~{result['estimated_tokens']}

**Length Value Model Predictions:**
- Target Length: {target_length} tokens
- Remaining Tokens: {result['remaining_tokens']}
- Value Score: {result['value_score']} (higher = more generation expected)
- Status: {result['status']}
- Current Efficiency: {result['token_efficiency']}% of target used

**Interpretation:**
Based on the LenVM paper methodology, this text shows a value score of {result['value_score']}, 
indicating {'substantial' if result['value_score'] > 0.7 else 'moderate' if result['value_score'] > 0.4 else 'minimal'} 
remaining generation horizon.
"""
    return output

def simulate_token_budget_strategy(prompt, max_tokens, strategy):
    """
    Demonstrate different token budget strategies inspired by LenVM findings.
    """
    if not prompt:
        return "Please enter a prompt."
    
    strategies = {
        "greedy": "Generate until natural completion (baseline)",
        "budget_hard": f"Hard stop at {max_tokens} tokens",
        "lenvm_adaptive": f"LenVM adaptive: predict optimal stopping point",
        "early_termination": f"Stop early if value score < 0.2"
    }
    
    # Simulate generation lengths for different strategies
    import random
    random.seed(hash(prompt) % 10000)
    
    baseline_tokens = random.randint(max_tokens - 50, max_tokens + 100)
    
    results = {}
    results["baseline"] = baseline_tokens
    results["hard_budget"] = min(baseline_tokens, max_tokens)
    results["lenvm"] = int(max_tokens * 0.85) if baseline_tokens > max_tokens else baseline_tokens
    results["early_term"] = int(baseline_tokens * 0.7) if baseline_tokens > max_tokens * 0.8 else baseline_tokens
    
    output = f"## Token Budget Strategy Comparison\n\n**Prompt:** {prompt[:50]}...\n\n"
    output += f"**Target Budget:** {max_tokens} tokens\n\n"
    
    for name, tokens in results.items():
        efficiency = min(100, (tokens / max_tokens) * 100) if max_tokens > 0 else 0
        output += f"**{name.replace('_', ' ').title()}:** {tokens} tokens ({efficiency:.1f}% of budget)\n"
    
    output += f"\n**Key Insight from LenVM Paper:**\n"
    output += f"LenVM maintains 63% accuracy on GSM8K at 200 token budget vs 6% for baseline.\n"
    output += f"This demonstrates that token-level value estimation enables better length control."
    
    return output

# Gradio interface
demo = gr.Blocks(title="LenVM: Length Value Model Demo")

with demo:
    gr.Markdown("""
    # πŸ“ LenVM: Token-Level Length Modeling Demo
    
    This demo illustrates concepts from the paper **"Length Value Model: Scalable Value Pretraining for Token-Level Length Modeling"**.
    
    LenVM treats length modeling as a value estimation problem, predicting remaining generation length 
    through token-level value signals rather than sequence-level heuristics.
    """)
    
    with gr.Tab("Length Analysis"):
        with gr.Row():
            with gr.Column():
                text_input = gr.Textbox(
                    label="Generated Text",
                    placeholder="Enter text to analyze...",
                    lines=5
                )
                target_length = gr.Slider(
                    minimum=50,
                    maximum=500,
                    value=200,
                    step=10,
                    label="Target Token Length"
                )
                analyze_btn = gr.Button("Analyze Length", variant="primary")
            
            with gr.Column():
                analysis_output = gr.Markdown(label="Results")
        
        analyze_btn.click(
            fn=analyze_generation,
            inputs=[text_input, target_length],
            outputs=analysis_output
        )
    
    with gr.Tab("Token Budget Strategies"):
        with gr.Row():
            with gr.Column():
                prompt_input = gr.Textbox(
                    label="Prompt",
                    placeholder="Enter a reasoning prompt...",
                    lines=3
                )
                budget_slider = gr.Slider(
                    minimum=50,
                    maximum=400,
                    value=200,
                    step=10,
                    label="Token Budget"
                )
                strategy_btn = gr.Button("Compare Strategies", variant="primary")
            
            with gr.Column():
                strategy_output = gr.Markdown(label="Strategy Comparison")
        
        strategy_btn.click(
            fn=simulate_token_budget_strategy,
            inputs=[prompt_input, budget_slider],
            outputs=strategy_output
        )
    
    gr.Markdown("""
    ### About This Demo
    
    This Space demonstrates key concepts from the LenVM paper:
    - **Token-level value estimation**: Predicting remaining generation length per token
    - **Discounted return formulation**: Using constant negative reward per token
    - **Length control**: Enabling continuous trade-off between performance and efficiency
    
    [Paper: arXiv:2604.27039](https://arxiv.org/abs/2604.27039) | [Code](https://github.com/eric-ai-lab/Length-Value-Model)
    """)

if __name__ == "__main__":
    demo.launch()