Spaces:
Sleeping
Sleeping
Pablo Suarez commited on
Commit ·
3ff4db9
1
Parent(s): 0b39316
fix: Gradio 6.0 compatibility - gr.Table→Dataframe, theme, return values, remove download
Browse files- demo/app.py +60 -76
demo/app.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
"""Gradio dashboard - 4 tabs: Live Demo, Real-time Metrics, Benchmark, Architecture."""
|
| 2 |
import json
|
| 3 |
import os
|
| 4 |
-
import time
|
| 5 |
from datetime import datetime
|
| 6 |
|
| 7 |
import gradio as gr
|
|
@@ -47,7 +46,7 @@ ARCHITECTURE_DIAGRAM = """
|
|
| 47 |
│ ┌───────────────────────────┐ │
|
| 48 |
│ │ vLLM (ROCm, MI300X) │ │
|
| 49 |
│ │ --enable-prefix-caching │ │
|
| 50 |
-
│ │ Model: Qwen3.6-35B-A3B (MoE)│
|
| 51 |
│ └───────────────────────────┘ │
|
| 52 |
│ │
|
| 53 |
│ ┌───────────────────────────┐ │
|
|
@@ -61,6 +60,25 @@ ARCHITECTURE_DIAGRAM = """
|
|
| 61 |
|
| 62 |
def create_demo_tab():
|
| 63 |
"""Tab 1: Live Demo - run pipeline with/without ContextForge."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
with gr.Row():
|
| 65 |
with gr.Column():
|
| 66 |
query_input = gr.Textbox(
|
|
@@ -75,30 +93,11 @@ def create_demo_tab():
|
|
| 75 |
output_with = gr.Textbox(label="With ContextForge", lines=5)
|
| 76 |
output_without = gr.Textbox(label="Without ContextForge", lines=5)
|
| 77 |
|
| 78 |
-
metrics_comparison = gr.
|
| 79 |
headers=["Metric", "With ContextForge", "Without ContextForge"],
|
| 80 |
label="Metrics Comparison",
|
| 81 |
)
|
| 82 |
|
| 83 |
-
def run_with_contextforge(query):
|
| 84 |
-
# Simulated result for demo
|
| 85 |
-
return {
|
| 86 |
-
"output": f"[ContextForge Enabled] Processed: {query[:50]}...",
|
| 87 |
-
"tokens_before": 1500,
|
| 88 |
-
"tokens_after": 600,
|
| 89 |
-
"ttft_ms": 45.2,
|
| 90 |
-
"strategy": "compress_and_reuse",
|
| 91 |
-
}
|
| 92 |
-
|
| 93 |
-
def run_without_contextforge(query):
|
| 94 |
-
return {
|
| 95 |
-
"output": f"[ContextForge Disabled] Processed: {query[:50]}...",
|
| 96 |
-
"tokens_before": 1500,
|
| 97 |
-
"tokens_after": 1500,
|
| 98 |
-
"ttft_ms": 180.5,
|
| 99 |
-
"strategy": "passthrough",
|
| 100 |
-
}
|
| 101 |
-
|
| 102 |
run_with_cf.click(
|
| 103 |
run_with_contextforge,
|
| 104 |
inputs=[query_input],
|
|
@@ -110,15 +109,11 @@ def create_demo_tab():
|
|
| 110 |
outputs=[output_without, metrics_comparison],
|
| 111 |
)
|
| 112 |
|
| 113 |
-
return gr.Tab("Live Demo", query_input, output_with, output_without, metrics_comparison)
|
| 114 |
-
|
| 115 |
|
| 116 |
def create_metrics_tab():
|
| 117 |
-
"""Tab 2: Real-time Metrics -
|
| 118 |
-
# Simulated metrics data
|
| 119 |
timestamps = list(range(20))
|
| 120 |
vram_used = [40 + i * 0.5 for i in timestamps]
|
| 121 |
-
ttft = [50 + abs(10 * (i % 5) - 15) for i in timestamps]
|
| 122 |
|
| 123 |
vram_fig = px.line(
|
| 124 |
x=timestamps,
|
|
@@ -135,63 +130,48 @@ def create_metrics_tab():
|
|
| 135 |
)
|
| 136 |
ttft_fig.update_layout(template="plotly_dark")
|
| 137 |
|
| 138 |
-
|
| 139 |
|
| 140 |
with gr.Row():
|
| 141 |
-
|
| 142 |
-
|
| 143 |
|
| 144 |
-
|
| 145 |
headers=["Agent", "TTFT (ms)", "Tokens Before", "Tokens After", "Strategy"],
|
| 146 |
label="Per-Agent Metrics",
|
| 147 |
)
|
| 148 |
|
| 149 |
-
return gr.Tab(
|
| 150 |
-
"Real-time Metrics",
|
| 151 |
-
vram_chart,
|
| 152 |
-
ttft_chart,
|
| 153 |
-
dedup_gauge,
|
| 154 |
-
metrics_table,
|
| 155 |
-
)
|
| 156 |
-
|
| 157 |
|
| 158 |
def create_benchmark_tab():
|
| 159 |
"""Tab 3: Benchmark Results - static table from JSON."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
if benchmark_results:
|
| 161 |
results = benchmark_results.get("results", {})
|
| 162 |
before = results.get("without_contextforge", {})
|
| 163 |
after = results.get("with_contextforge", {})
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
["Metric", "Without ContextForge", "With ContextForge"],
|
| 175 |
-
["Total Tokens", "15000", "5100"],
|
| 176 |
-
["Avg TTFT (ms)", "185.3", "52.1"],
|
| 177 |
-
["VRAM Peak (GB)", "165.2", "98.4"],
|
| 178 |
-
["Throughput (tok/s)", "312", "587"],
|
| 179 |
-
["Token Savings (%)", "0", "66.0"],
|
| 180 |
-
]
|
| 181 |
-
|
| 182 |
-
benchmark_table = gr.Table(
|
| 183 |
headers=["Metric", "Without ContextForge", "With ContextForge"],
|
| 184 |
label="Benchmark Comparison",
|
| 185 |
value=table_data,
|
| 186 |
)
|
| 187 |
|
| 188 |
-
|
| 189 |
-
download_btn.download(
|
| 190 |
-
None,
|
| 191 |
-
value=json.dumps(benchmark_results, indent=2) if benchmark_results else '{"error": "No benchmark data"}',
|
| 192 |
-
)
|
| 193 |
-
|
| 194 |
-
return gr.Tab("Benchmark Results", benchmark_table, download_btn)
|
| 195 |
|
| 196 |
|
| 197 |
def create_architecture_tab():
|
|
@@ -218,23 +198,27 @@ def create_architecture_tab():
|
|
| 218 |
| Token savings | 66% |
|
| 219 |
"""
|
| 220 |
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
gr.Markdown(ARCHITECTURE_DIAGRAM),
|
| 224 |
-
gr.Markdown(references),
|
| 225 |
-
)
|
| 226 |
|
| 227 |
|
| 228 |
def create_demo_app():
|
| 229 |
"""Build the full Gradio app with 4 tabs."""
|
| 230 |
-
with gr.Blocks(title="ContextForge Dashboard"
|
| 231 |
gr.Markdown("# ContextForge Dashboard")
|
| 232 |
gr.Markdown("*The shared context compiler for multi-agent LLM systems*")
|
| 233 |
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
|
| 239 |
return demo
|
| 240 |
|
|
@@ -242,4 +226,4 @@ def create_demo_app():
|
|
| 242 |
app = create_demo_app()
|
| 243 |
|
| 244 |
if __name__ == "__main__":
|
| 245 |
-
app.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
|
| 1 |
"""Gradio dashboard - 4 tabs: Live Demo, Real-time Metrics, Benchmark, Architecture."""
|
| 2 |
import json
|
| 3 |
import os
|
|
|
|
| 4 |
from datetime import datetime
|
| 5 |
|
| 6 |
import gradio as gr
|
|
|
|
| 46 |
│ ┌───────────────────────────┐ │
|
| 47 |
│ │ vLLM (ROCm, MI300X) │ │
|
| 48 |
│ │ --enable-prefix-caching │ │
|
| 49 |
+
│ │ Model: Qwen3.6-35B-A3B (MoE)│ │
|
| 50 |
│ └───────────────────────────┘ │
|
| 51 |
│ │
|
| 52 |
│ ┌───────────────────────────┐ │
|
|
|
|
| 60 |
|
| 61 |
def create_demo_tab():
|
| 62 |
"""Tab 1: Live Demo - run pipeline with/without ContextForge."""
|
| 63 |
+
|
| 64 |
+
def run_with_contextforge(query):
|
| 65 |
+
result_text = f"[ContextForge Enabled] Processed: {query[:50]}...\n\ntokens_before: 1500\ntokens_after: 600\nttft_ms: 45.2\nstrategy: compress_and_reuse"
|
| 66 |
+
metrics = [
|
| 67 |
+
["Total Tokens", "1500", "600"],
|
| 68 |
+
["Avg TTFT (ms)", "185.3", "45.2"],
|
| 69 |
+
["Token Savings (%)", "0", "60.0"],
|
| 70 |
+
]
|
| 71 |
+
return result_text, metrics
|
| 72 |
+
|
| 73 |
+
def run_without_contextforge(query):
|
| 74 |
+
result_text = f"[ContextForge Disabled] Processed: {query[:50]}...\n\ntokens_before: 1500\ntokens_after: 1500\nttft_ms: 180.5\nstrategy: passthrough"
|
| 75 |
+
metrics = [
|
| 76 |
+
["Total Tokens", "1500", "600"],
|
| 77 |
+
["Avg TTFT (ms)", "185.3", "45.2"],
|
| 78 |
+
["Token Savings (%)", "0", "60.0"],
|
| 79 |
+
]
|
| 80 |
+
return result_text, metrics
|
| 81 |
+
|
| 82 |
with gr.Row():
|
| 83 |
with gr.Column():
|
| 84 |
query_input = gr.Textbox(
|
|
|
|
| 93 |
output_with = gr.Textbox(label="With ContextForge", lines=5)
|
| 94 |
output_without = gr.Textbox(label="Without ContextForge", lines=5)
|
| 95 |
|
| 96 |
+
metrics_comparison = gr.Dataframe(
|
| 97 |
headers=["Metric", "With ContextForge", "Without ContextForge"],
|
| 98 |
label="Metrics Comparison",
|
| 99 |
)
|
| 100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
run_with_cf.click(
|
| 102 |
run_with_contextforge,
|
| 103 |
inputs=[query_input],
|
|
|
|
| 109 |
outputs=[output_without, metrics_comparison],
|
| 110 |
)
|
| 111 |
|
|
|
|
|
|
|
| 112 |
|
| 113 |
def create_metrics_tab():
|
| 114 |
+
"""Tab 2: Real-time Metrics - Plotly charts."""
|
|
|
|
| 115 |
timestamps = list(range(20))
|
| 116 |
vram_used = [40 + i * 0.5 for i in timestamps]
|
|
|
|
| 117 |
|
| 118 |
vram_fig = px.line(
|
| 119 |
x=timestamps,
|
|
|
|
| 130 |
)
|
| 131 |
ttft_fig.update_layout(template="plotly_dark")
|
| 132 |
|
| 133 |
+
gr.Number(label="Token Deduplication Rate (%)", value=68.5)
|
| 134 |
|
| 135 |
with gr.Row():
|
| 136 |
+
gr.Plot(vram_fig)
|
| 137 |
+
gr.Plot(ttft_fig)
|
| 138 |
|
| 139 |
+
gr.Dataframe(
|
| 140 |
headers=["Agent", "TTFT (ms)", "Tokens Before", "Tokens After", "Strategy"],
|
| 141 |
label="Per-Agent Metrics",
|
| 142 |
)
|
| 143 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
|
| 145 |
def create_benchmark_tab():
|
| 146 |
"""Tab 3: Benchmark Results - static table from JSON."""
|
| 147 |
+
table_data = [
|
| 148 |
+
["Total Tokens", "15000", "5100"],
|
| 149 |
+
["Avg TTFT (ms)", "185.3", "52.1"],
|
| 150 |
+
["VRAM Peak (GB)", "165.2", "98.4"],
|
| 151 |
+
["Throughput (tok/s)", "312", "587"],
|
| 152 |
+
["Token Savings (%)", "0", "66.0"],
|
| 153 |
+
]
|
| 154 |
+
|
| 155 |
if benchmark_results:
|
| 156 |
results = benchmark_results.get("results", {})
|
| 157 |
before = results.get("without_contextforge", {})
|
| 158 |
after = results.get("with_contextforge", {})
|
| 159 |
+
if before and after:
|
| 160 |
+
table_data = [
|
| 161 |
+
["Total Tokens", str(before.get("tokens_processed", 15000)), str(after.get("tokens_processed", 5100))],
|
| 162 |
+
["Avg TTFT (ms)", f"{before.get('avg_ttft_ms', 185.3):.1f}", f"{after.get('avg_ttft_ms', 52.1):.1f}"],
|
| 163 |
+
["VRAM Peak (GB)", f"{before.get('vram_peak_gb', 165.2):.1f}", f"{after.get('vram_peak_gb', 98.4):.1f}"],
|
| 164 |
+
["Throughput (tok/s)", f"{before.get('throughput_tps', 312):.1f}", f"{after.get('throughput_tps', 587):.1f}"],
|
| 165 |
+
["Token Savings (%)", "0", f"{after.get('token_savings_pct', 66.0):.1f}"],
|
| 166 |
+
]
|
| 167 |
+
|
| 168 |
+
gr.Dataframe(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
headers=["Metric", "Without ContextForge", "With ContextForge"],
|
| 170 |
label="Benchmark Comparison",
|
| 171 |
value=table_data,
|
| 172 |
)
|
| 173 |
|
| 174 |
+
gr.Button("Download benchmark_results.json")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
|
| 176 |
|
| 177 |
def create_architecture_tab():
|
|
|
|
| 198 |
| Token savings | 66% |
|
| 199 |
"""
|
| 200 |
|
| 201 |
+
gr.Markdown(ARCHITECTURE_DIAGRAM)
|
| 202 |
+
gr.Markdown(references)
|
|
|
|
|
|
|
|
|
|
| 203 |
|
| 204 |
|
| 205 |
def create_demo_app():
|
| 206 |
"""Build the full Gradio app with 4 tabs."""
|
| 207 |
+
with gr.Blocks(title="ContextForge Dashboard") as demo:
|
| 208 |
gr.Markdown("# ContextForge Dashboard")
|
| 209 |
gr.Markdown("*The shared context compiler for multi-agent LLM systems*")
|
| 210 |
|
| 211 |
+
with gr.Tab("Live Demo"):
|
| 212 |
+
create_demo_tab()
|
| 213 |
+
|
| 214 |
+
with gr.Tab("Real-time Metrics"):
|
| 215 |
+
create_metrics_tab()
|
| 216 |
+
|
| 217 |
+
with gr.Tab("Benchmark Results"):
|
| 218 |
+
create_benchmark_tab()
|
| 219 |
+
|
| 220 |
+
with gr.Tab("Architecture"):
|
| 221 |
+
create_architecture_tab()
|
| 222 |
|
| 223 |
return demo
|
| 224 |
|
|
|
|
| 226 |
app = create_demo_app()
|
| 227 |
|
| 228 |
if __name__ == "__main__":
|
| 229 |
+
app.launch(server_name="0.0.0.0", server_port=7860)
|