Spaces:
Sleeping
Sleeping
| """Live demo of the @mukundakatta agent reliability stack: fit, guard, snap, vet, cast. | |
| Each tab runs the corresponding library against user input so you can see exactly | |
| what it does without installing anything. All five libraries are pure Python, | |
| zero runtime deps. | |
| """ | |
| import json | |
| import gradio as gr | |
| from agentfit import count, fit | |
| from agentguard import policy, check | |
| from agentsnap import diff | |
| from agentvet import validate, adapters as vet_adapters | |
| from agentcast import extract_json, adapters as cast_adapters | |
| # ---------- agentfit ---------------------------------------------------------- | |
| DEFAULT_MESSAGES = json.dumps([ | |
| {"role": "system", "content": "You are precise and concise."}, | |
| {"role": "user", "content": "Tell me everything you know about the Roman Empire " * 30}, | |
| {"role": "assistant", "content": "The Roman Empire was a vast civilization " * 30}, | |
| {"role": "user", "content": "Now summarize that in 3 bullets " * 5}, | |
| {"role": "assistant", "content": "Here is a summary " * 30}, | |
| {"role": "user", "content": "What is 2+2?"}, | |
| ], indent=2) | |
| def fit_demo(messages_json: str, max_tokens: int, model: str, strategy: str, preserve_last_n: int): | |
| try: | |
| messages = json.loads(messages_json) | |
| except json.JSONDecodeError as e: | |
| return f"β Invalid JSON: {e}" | |
| before_tokens = count(messages, model=model) | |
| result = fit( | |
| messages, | |
| max_tokens=max_tokens, | |
| model=model, | |
| strategy=strategy, | |
| preserve_system=True, | |
| preserve_last_n=preserve_last_n, | |
| on_over_budget="return-partial", | |
| ) | |
| return ( | |
| f"**Before:** {before_tokens} tokens Β· **After:** {result.tokens.after} tokens " | |
| f"Β· **Budget:** {result.tokens.budget} Β· **Fit:** {result.fit}\n\n" | |
| f"**Dropped:** {len(result.dropped)} message(s)\n\n" | |
| f"**Surviving messages:**\n```json\n{json.dumps([dict(m) for m in result.messages], indent=2)}\n```" | |
| ) | |
| # ---------- agentguard -------------------------------------------------------- | |
| DEFAULT_POLICY = json.dumps({ | |
| "network": { | |
| "allow": ["api.openai.com", "*.anthropic.com"], | |
| "deny": ["evil.example.com"], | |
| }, | |
| }, indent=2) | |
| DEFAULT_URLS = "\n".join([ | |
| "https://api.openai.com/v1/chat/completions", | |
| "https://api.anthropic.com/v1/messages", | |
| "https://evil.example.com/leak", | |
| "https://random.example.org/data", | |
| ]) | |
| def guard_demo(policy_json: str, urls_text: str): | |
| try: | |
| spec = json.loads(policy_json) | |
| except json.JSONDecodeError as e: | |
| return f"β Invalid JSON: {e}" | |
| try: | |
| p = policy(spec) | |
| except Exception as e: | |
| return f"β Invalid policy: {e}" | |
| rows = [] | |
| for url in (u.strip() for u in urls_text.splitlines() if u.strip()): | |
| decision = check(p, url) | |
| if decision["action"] == "allow": | |
| rows.append(f"β `{url}` β allowed") | |
| else: | |
| rows.append(f"β `{url}` β denied (`{decision['reason']}`)") | |
| return "\n".join(rows) | |
| # ---------- agentsnap --------------------------------------------------------- | |
| DEFAULT_BASELINE = json.dumps({ | |
| "version": 1, | |
| "model": "claude-sonnet-4-6", | |
| "input": "search for python tutorials", | |
| "output": "Here are 3 results.", | |
| "tools": [ | |
| {"name": "web_search", "args": {"q": "python tutorials"}, "result_hash": "abc123"}, | |
| {"name": "fetch_page", "args": {"url": "https://example.com"}, "result_hash": "def456"}, | |
| ], | |
| "error": None, | |
| "fingerprint": {"node": "20.0", "agentsnap": "0.1.0"}, | |
| }, indent=2) | |
| DEFAULT_CURRENT = json.dumps({ | |
| "version": 1, | |
| "model": "claude-sonnet-4-6", | |
| "input": "search for python tutorials", | |
| "output": "Here are 5 results.", | |
| "tools": [ | |
| {"name": "web_search", "args": {"q": "python tutorials"}, "result_hash": "abc123"}, | |
| {"name": "fetch_page", "args": {"url": "https://example.com"}, "result_hash": "DIFFERENT"}, | |
| {"name": "summarize", "args": {"text": "..."}, "result_hash": "new789"}, | |
| ], | |
| "error": None, | |
| "fingerprint": {"node": "20.0", "agentsnap": "0.1.0"}, | |
| }, indent=2) | |
| def snap_demo(baseline_json: str, current_json: str): | |
| try: | |
| baseline = json.loads(baseline_json) | |
| current = json.loads(current_json) | |
| except json.JSONDecodeError as e: | |
| return f"β Invalid JSON: {e}" | |
| result = diff(baseline, current) | |
| out = [f"**Status:** `{result.status}`", "", "**Changes:**"] | |
| if not result.changes: | |
| out.append("(none β traces match)") | |
| else: | |
| for change in result.changes: | |
| out.append(f"- `{change.path}`") | |
| out.append(f" - from: `{change.from_!r}`") | |
| out.append(f" - to: `{change.to!r}`") | |
| return "\n".join(out) | |
| # ---------- agentvet ---------------------------------------------------------- | |
| DEFAULT_TOOL_NAME = "send_email" | |
| DEFAULT_SHAPE = json.dumps({ | |
| "to": "str", | |
| "subject": "str", | |
| "body": "str", | |
| "cc": "list?", | |
| }, indent=2) | |
| DEFAULT_ARGS = json.dumps({ | |
| "to": "alice@example.com", | |
| "body": "hello", | |
| }, indent=2) | |
| def vet_demo(tool_name: str, shape_json: str, args_json: str): | |
| try: | |
| shape_spec = json.loads(shape_json) | |
| args = json.loads(args_json) | |
| except json.JSONDecodeError as e: | |
| return f"β Invalid JSON: {e}" | |
| validator = vet_adapters.shape(shape_spec) | |
| result = validate(tool_name, validator, args) | |
| if result["valid"]: | |
| return "β **Valid** β args match the schema." | |
| err = result["error"] | |
| feedback = err.to_llm_feedback() if hasattr(err, "to_llm_feedback") else err.message | |
| return ( | |
| f"β **Invalid** β {err.validation_error}\n\n" | |
| f"**LLM-friendly retry hint:**\n```\n{feedback}\n```" | |
| ) | |
| # ---------- agentcast --------------------------------------------------------- | |
| DEFAULT_MESSY = """Sure! Here's the product info you asked for: | |
| ```json | |
| { | |
| "name": "Widget Pro", | |
| "price": 29.99, | |
| "in_stock": true, | |
| "tags": ["best-seller", "new"] | |
| } | |
| ``` | |
| Let me know if you need anything else!""" | |
| DEFAULT_VALIDATE_SHAPE = json.dumps({ | |
| "name": "str", | |
| "price": "float", | |
| "in_stock": "bool", | |
| "tags": "list", | |
| }, indent=2) | |
| def cast_demo(messy_text: str, shape_json: str): | |
| extracted = extract_json(messy_text) | |
| if extracted is None: | |
| return "β Could not find any JSON in the text." | |
| try: | |
| shape_spec = json.loads(shape_json) | |
| except json.JSONDecodeError as e: | |
| return f"β Invalid shape JSON: {e}" | |
| validator = cast_adapters.shape(shape_spec) | |
| val_result = validator(extracted) | |
| if val_result["valid"]: | |
| return ( | |
| f"β **Extracted + validated:**\n```json\n{json.dumps(extracted, indent=2)}\n```" | |
| ) | |
| return ( | |
| f"β οΈ **Extracted but failed validation:**\n```json\n{json.dumps(extracted, indent=2)}\n```\n\n" | |
| f"**Validation error:** `{val_result['error']}`" | |
| ) | |
| # ---------- UI ---------------------------------------------------------------- | |
| with gr.Blocks(title="The Agent Reliability Stack β Live Demo", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown( | |
| """ | |
| # The Agent Reliability Stack β Live Demo | |
| Five small libraries that fix the boring problems every long-running AI agent eventually hits. | |
| Pick a tab below to see what each one does. Pure Python, zero runtime deps. | |
| π **Landing page:** https://mukundakatta.github.io/agent-stack/ | |
| π¦ **PyPI:** [`agentfit-py`](https://pypi.org/project/agentfit-py/) Β· [`agentguard-firewall`](https://pypi.org/project/agentguard-firewall/) Β· [`agentsnap-py`](https://pypi.org/project/agentsnap-py/) Β· [`agentvet-py`](https://pypi.org/project/agentvet-py/) Β· [`agentcast-py`](https://pypi.org/project/agentcast-py/) | |
| π¦ **npm:** [`@mukundakatta/agentkit`](https://www.npmjs.com/package/@mukundakatta/agentkit) (one install for the whole stack) | |
| """ | |
| ) | |
| with gr.Tab("πͺ fit β message truncation"): | |
| gr.Markdown("**`agentfit`** β fit a chat history into a token budget.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| fit_messages = gr.Code(value=DEFAULT_MESSAGES, language="json", label="Messages (JSON array)", lines=14) | |
| fit_max = gr.Number(value=200, label="max_tokens") | |
| fit_model = gr.Dropdown(["claude-sonnet-4-6", "gpt-5", "claude-haiku-4-5", "default"], value="claude-sonnet-4-6", label="Model") | |
| fit_strategy = gr.Radio(["drop-oldest", "drop-middle", "priority"], value="drop-oldest", label="Strategy") | |
| fit_preserve = gr.Number(value=2, label="preserve_last_n") | |
| fit_btn = gr.Button("Fit", variant="primary") | |
| fit_output = gr.Markdown() | |
| fit_btn.click(fit_demo, inputs=[fit_messages, fit_max, fit_model, fit_strategy, fit_preserve], outputs=fit_output) | |
| with gr.Tab("π‘οΈ guard β egress firewall"): | |
| gr.Markdown("**`agentguard`** β check URLs against a declarative network policy before any fetch.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| guard_policy = gr.Code(value=DEFAULT_POLICY, language="json", label="Policy", lines=10) | |
| guard_urls = gr.Textbox(value=DEFAULT_URLS, label="URLs to check (one per line)", lines=6) | |
| guard_btn = gr.Button("Check", variant="primary") | |
| guard_output = gr.Markdown() | |
| guard_btn.click(guard_demo, inputs=[guard_policy, guard_urls], outputs=guard_output) | |
| with gr.Tab("πΈ snap β trace diffing"): | |
| gr.Markdown("**`agentsnap`** β diff two tool-call traces, catch silent regressions.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| snap_baseline = gr.Code(value=DEFAULT_BASELINE, language="json", label="Baseline trace", lines=14) | |
| with gr.Column(): | |
| snap_current = gr.Code(value=DEFAULT_CURRENT, language="json", label="Current trace", lines=14) | |
| snap_btn = gr.Button("Diff", variant="primary") | |
| snap_output = gr.Markdown() | |
| snap_btn.click(snap_demo, inputs=[snap_baseline, snap_current], outputs=snap_output) | |
| with gr.Tab("β vet β tool-arg validation"): | |
| gr.Markdown("**`agentvet`** β validate tool-call args before execution; produce LLM-friendly retry hints when wrong.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| vet_tool = gr.Textbox(value=DEFAULT_TOOL_NAME, label="Tool name") | |
| vet_shape = gr.Code(value=DEFAULT_SHAPE, language="json", label="Shape (suffix '?' for optional)", lines=8) | |
| vet_args = gr.Code(value=DEFAULT_ARGS, language="json", label="Args from LLM", lines=8) | |
| vet_btn = gr.Button("Validate", variant="primary") | |
| vet_output = gr.Markdown() | |
| vet_btn.click(vet_demo, inputs=[vet_tool, vet_shape, vet_args], outputs=vet_output) | |
| with gr.Tab("π― cast β structured output"): | |
| gr.Markdown("**`agentcast`** β extract JSON from messy LLM text, validate against a shape.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| cast_text = gr.Textbox(value=DEFAULT_MESSY, label="Messy LLM output", lines=12) | |
| cast_shape = gr.Code(value=DEFAULT_VALIDATE_SHAPE, language="json", label="Expected shape", lines=8) | |
| cast_btn = gr.Button("Extract + validate", variant="primary") | |
| cast_output = gr.Markdown() | |
| cast_btn.click(cast_demo, inputs=[cast_text, cast_shape], outputs=cast_output) | |
| gr.Markdown( | |
| """ | |
| --- | |
| Built by [Mukunda Katta](https://github.com/MukundaKatta) Β· MIT licensed across the board Β· [GitHub](https://github.com/MukundaKatta) | |
| """ | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |