Spaces:
Running
Running
| """FastAPI entrypoint for Chakravyuh — OpenEnv API + Gradio demo on one app. | |
| Routes: | |
| / — landing page (HTML) ← was 404 before | |
| /demo — interactive Gradio demo (mounted) | |
| /reset, /step, | |
| /state, /metadata, | |
| /schema, /health — OpenEnv contract | |
| /leaderboard, /submit — public leaderboard (E.10) | |
| /docs, /openapi.json — FastAPI swagger / schema | |
| /mcp/* — MCP server (registered by openenv-core) | |
| Run locally: | |
| uvicorn server.app:app --host 0.0.0.0 --port 8000 | |
| Hugging Face Space / Docker: | |
| Referenced by ``openenv.yaml`` as ``app: server.app:app``. | |
| """ | |
| from __future__ import annotations | |
| import os | |
| from fastapi.responses import HTMLResponse | |
| from openenv.core.env_server import create_app | |
| from chakravyuh_env.openenv_environment import ChakravyuhOpenEnv | |
| from chakravyuh_env.openenv_models import ChakravyuhAction, ChakravyuhObservation | |
| from server.diagnose_endpoint import attach_to_app as attach_diagnose | |
| from server.eval_endpoint import attach_to_app as attach_eval | |
| from server.leaderboard import attach_to_app | |
| # One factory call per concurrent session → fully isolated episodes. | |
| max_concurrent = int(os.getenv("MAX_CONCURRENT_ENVS", "8")) | |
| app = create_app( | |
| ChakravyuhOpenEnv, | |
| ChakravyuhAction, | |
| ChakravyuhObservation, | |
| env_name="chakravyuh_env", | |
| max_concurrent_envs=max_concurrent, | |
| ) | |
| # Public leaderboard endpoints (E.10): GET /leaderboard, POST /submit. | |
| # Persistence at logs/leaderboard.jsonl (override via CHAKRAVYUH_LEADERBOARD_PATH). | |
| attach_to_app(app) | |
| # Research endpoints: GET /eval (and /eval/{bootstrap,known-novel,redteam,…}), | |
| # POST /diagnose (single-message rubric breakdown using AnalyzerRubricV2). | |
| attach_eval(app) | |
| attach_diagnose(app) | |
| # --------------------------------------------------------------------------- | |
| # Landing page — replaces FastAPI's default 404 at `/` so the HF Space root | |
| # shows something useful at-a-glance. | |
| # --------------------------------------------------------------------------- | |
| _LANDING_HTML = """<!doctype html> | |
| <html lang="en"> | |
| <head> | |
| <meta charset="utf-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1"> | |
| <meta name="color-scheme" content="light only"> | |
| <title>Chakravyuh — Multi-Agent Fraud Arena</title> | |
| <meta name="description" content="A self-improving benchmark for Indian UPI fraud detection. Five agents compete under structural information asymmetry."> | |
| <link rel="icon" href="data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 32 32'><circle cx='16' cy='16' r='15' fill='%23381932'/><circle cx='16' cy='16' r='6' fill='none' stroke='%23e8c97a' stroke-width='2'/><line x1='16' y1='2' x2='16' y2='10' stroke='%23e8c97a' stroke-width='2'/><line x1='16' y1='22' x2='16' y2='30' stroke='%23e8c97a' stroke-width='2'/><line x1='2' y1='16' x2='10' y2='16' stroke='%23e8c97a' stroke-width='2'/><line x1='22' y1='16' x2='30' y2='16' stroke='%23e8c97a' stroke-width='2'/><line x1='6.1' y1='6.1' x2='11.8' y2='11.8' stroke='%23e8c97a' stroke-width='2'/><line x1='20.2' y1='20.2' x2='25.9' y2='25.9' stroke='%23e8c97a' stroke-width='2'/><line x1='25.9' y1='6.1' x2='20.2' y2='11.8' stroke='%23e8c97a' stroke-width='2'/><line x1='11.8' y1='20.2' x2='6.1' y2='25.9' stroke='%23e8c97a' stroke-width='2'/></svg>"> | |
| <link rel="preconnect" href="https://fonts.googleapis.com"> | |
| <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> | |
| <link href="https://fonts.googleapis.com/css2?family=Inter:ital,wght@0,400;0,500;0,600;0,700;0,800;1,700&family=JetBrains+Mono:wght@600&display=swap" rel="stylesheet"> | |
| <style> | |
| :root { | |
| --plum: #381932; | |
| --plum-dark: #2A0F25; | |
| --plum-light: rgba(56,25,50,0.08); | |
| --plum-border: rgba(56,25,50,0.18); | |
| --cream: #FFF3E6; | |
| --cream-2: #FFFBF5; | |
| --gold: #e8c97a; | |
| --text: #000000; | |
| --text-muted: rgba(0,0,0,0.62); | |
| --radius: 12px; | |
| --nav-h: 64px; | |
| color-scheme: light only; | |
| } | |
| *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; } | |
| html { scroll-behavior: smooth; } | |
| body { | |
| background: var(--cream); | |
| color: var(--text); | |
| font-family: 'Inter', 'Segoe UI', system-ui, sans-serif; | |
| -webkit-font-smoothing: antialiased; | |
| line-height: 1.6; | |
| min-height: 100vh; | |
| } | |
| /* ── Navbar ── */ | |
| .nav { | |
| position: sticky; top: 0; z-index: 100; | |
| background: rgba(255,243,230,0.85); | |
| backdrop-filter: blur(14px); | |
| -webkit-backdrop-filter: blur(14px); | |
| border-bottom: 1px solid var(--plum-border); | |
| height: var(--nav-h); | |
| } | |
| .nav-inner { | |
| max-width: 1600px; margin: 0 auto; | |
| padding: 0 32px; | |
| height: 100%; | |
| display: flex; align-items: center; gap: 24px; | |
| } | |
| .nav-logo { | |
| display: flex; align-items: center; gap: 10px; | |
| text-decoration: none; color: var(--text); | |
| font-weight: 800; font-size: 16px; letter-spacing: -0.3px; | |
| flex-shrink: 0; | |
| } | |
| .nav-logo-badge { | |
| width: 32px; height: 32px; border-radius: 8px; | |
| background: var(--plum); color: var(--gold); | |
| display: flex; align-items: center; justify-content: center; | |
| font-size: 16px; font-weight: 800; | |
| } | |
| .nav-links { | |
| display: flex; align-items: center; gap: 4px; | |
| margin-left: auto; | |
| } | |
| .nav-link { | |
| padding: 6px 14px; border-radius: 8px; | |
| font-size: 14px; font-weight: 600; | |
| text-decoration: none; color: var(--text-muted); | |
| transition: color .15s, background .15s; | |
| } | |
| .nav-link:hover { color: var(--text); background: var(--plum-light); } | |
| .nav-cta { | |
| margin-left: 8px; | |
| padding: 8px 18px; border-radius: 8px; | |
| background: var(--plum); color: #fff; | |
| font-size: 14px; font-weight: 700; | |
| text-decoration: none; | |
| transition: background .15s, transform .08s; | |
| white-space: nowrap; | |
| } | |
| .nav-cta:hover { background: var(--plum-dark); transform: translateY(-1px); } | |
| .nav-ham { display: none; } | |
| /* ── Page shell ── */ | |
| .page { max-width: 1600px; margin: 0 auto; padding: 0 32px; } | |
| /* ── Hero ── */ | |
| .hero { | |
| display: grid; | |
| grid-template-columns: 1fr 420px; | |
| gap: 48px; | |
| align-items: center; | |
| padding: 72px 0 80px; | |
| } | |
| .hero-eyebrow { | |
| display: inline-block; | |
| font-size: 11px; font-weight: 700; letter-spacing: 2px; | |
| text-transform: uppercase; | |
| color: #fff; background: var(--plum); | |
| padding: 5px 13px; border-radius: 999px; | |
| margin-bottom: 20px; | |
| } | |
| .hero h1 { | |
| font-size: clamp(32px, 3.8vw, 56px); | |
| font-weight: 800; line-height: 1.08; letter-spacing: -1px; | |
| margin-bottom: 20px; | |
| } | |
| .hero h1 em { | |
| font-style: normal; color: var(--plum); | |
| } | |
| .hero-lede { | |
| font-size: clamp(15px, 1.3vw, 17px); | |
| line-height: 1.7; | |
| color: var(--text-muted); | |
| max-width: 580px; | |
| margin-bottom: 36px; | |
| } | |
| .cta-row { | |
| display: flex; flex-wrap: wrap; gap: 12px; | |
| margin-bottom: 36px; | |
| } | |
| .cta { | |
| display: inline-flex; align-items: center; gap: 8px; | |
| padding: 13px 24px; border-radius: var(--radius); | |
| font-weight: 700; font-size: 14px; | |
| text-decoration: none; | |
| border: 1.5px solid transparent; | |
| transition: transform .08s ease, background .15s ease, border-color .15s ease; | |
| } | |
| .cta:hover { transform: translateY(-2px); } | |
| .cta.primary { background: var(--plum); color: #fff; border-color: var(--plum); } | |
| .cta.primary:hover { background: var(--plum-dark); } | |
| .cta.secondary { background: #fff; color: var(--text); border-color: var(--plum-border); } | |
| .cta.secondary:hover { background: var(--plum-light); border-color: var(--plum); } | |
| .badge-row { | |
| display: flex; flex-wrap: wrap; gap: 8px; | |
| } | |
| .badge { | |
| display: inline-block; | |
| padding: 4px 10px; border-radius: 999px; | |
| font-size: 11px; font-weight: 600; | |
| background: #fff; border: 1px solid var(--plum-border); | |
| color: var(--text-muted); | |
| } | |
| /* ── Stat cards (hero right) ── */ | |
| .stat-cards { | |
| display: flex; flex-direction: column; gap: 12px; | |
| } | |
| .stat-card { | |
| background: #fff; | |
| border: 1.5px solid var(--plum-border); | |
| border-radius: var(--radius); | |
| padding: 20px 24px; | |
| } | |
| .stat-card.accent { | |
| background: var(--plum); color: #fff; | |
| border-color: var(--plum); | |
| } | |
| .stat-card-label { | |
| font-size: 11px; font-weight: 700; letter-spacing: 1.4px; | |
| text-transform: uppercase; | |
| color: var(--text-muted); | |
| margin-bottom: 6px; | |
| } | |
| .stat-card.accent .stat-card-label { color: rgba(255,255,255,0.65); } | |
| .stat-card-value { | |
| font-family: 'JetBrains Mono', ui-monospace, monospace; | |
| font-size: 36px; font-weight: 700; | |
| font-variant-numeric: tabular-nums; | |
| line-height: 1; | |
| color: var(--text); | |
| } | |
| .stat-card.accent .stat-card-value { color: var(--gold); } | |
| .stat-card-sub { | |
| font-size: 12px; color: var(--text-muted); margin-top: 4px; | |
| } | |
| .stat-card.accent .stat-card-sub { color: rgba(255,255,255,0.55); } | |
| .stat-pair { | |
| display: grid; grid-template-columns: 1fr 1fr; gap: 12px; | |
| } | |
| .stat-pair .stat-card-value { font-size: 26px; } | |
| /* ── Section heading ── */ | |
| .section { padding: 64px 0; } | |
| .section-head { | |
| display: flex; align-items: center; gap: 14px; | |
| margin-bottom: 28px; | |
| } | |
| .section-head::before { | |
| content: ""; flex-shrink: 0; | |
| width: 20px; height: 3px; | |
| background: var(--plum); border-radius: 999px; | |
| } | |
| .section-title { | |
| font-size: 11px; font-weight: 800; letter-spacing: 1.8px; | |
| text-transform: uppercase; color: var(--text); | |
| } | |
| /* ── Features grid ── */ | |
| .features-grid { | |
| display: grid; | |
| grid-template-columns: repeat(auto-fit, minmax(220px, 1fr)); | |
| gap: 16px; | |
| } | |
| .feature-card { | |
| background: var(--cream-2); | |
| border: 1px solid var(--plum-border); | |
| border-radius: var(--radius); | |
| padding: 22px 20px; | |
| transition: border-color .15s, transform .08s; | |
| } | |
| .feature-card:hover { border-color: var(--plum); transform: translateY(-2px); } | |
| .feature-icon { | |
| font-size: 22px; margin-bottom: 12px; display: block; | |
| } | |
| .feature-name { | |
| font-size: 14px; font-weight: 700; margin-bottom: 6px; | |
| } | |
| .feature-desc { | |
| font-size: 13px; color: var(--text-muted); line-height: 1.55; | |
| } | |
| /* ── Endpoints grid ── */ | |
| .endpoints-grid { | |
| display: grid; | |
| grid-template-columns: repeat(auto-fill, minmax(260px, 1fr)); | |
| gap: 12px; | |
| } | |
| .endpoint { | |
| display: block; | |
| padding: 16px 18px; | |
| background: var(--cream-2); | |
| border: 1px solid var(--plum-border); | |
| border-radius: var(--radius); | |
| text-decoration: none; color: var(--text); | |
| transition: border-color .15s, transform .08s; | |
| } | |
| .endpoint:hover { border-color: var(--plum); transform: translateY(-2px); } | |
| .endpoint code { | |
| display: block; | |
| font-family: 'JetBrains Mono', ui-monospace, monospace; | |
| font-weight: 700; font-size: 13px; | |
| color: var(--plum); margin-bottom: 5px; | |
| } | |
| .endpoint span { | |
| font-size: 12px; color: var(--text-muted); | |
| line-height: 1.5; display: block; | |
| } | |
| /* ── Divider ── */ | |
| .divider { | |
| border: none; border-top: 1px solid var(--plum-border); | |
| margin: 0; | |
| } | |
| /* ── Footer ── */ | |
| footer { | |
| background: var(--plum); | |
| padding: 40px 0; | |
| margin-top: 0; | |
| } | |
| .footer-inner { | |
| max-width: 1600px; margin: 0 auto; | |
| padding: 0 32px; | |
| display: flex; align-items: center; | |
| justify-content: space-between; | |
| gap: 24px; flex-wrap: wrap; | |
| } | |
| .footer-brand { | |
| font-size: 15px; font-weight: 700; color: #fff; | |
| margin-bottom: 4px; | |
| } | |
| .footer-copy { | |
| font-size: 12px; color: rgba(255,255,255,0.55); | |
| line-height: 1.6; | |
| } | |
| .footer-links { | |
| display: flex; flex-wrap: wrap; gap: 8px; | |
| } | |
| .footer-link { | |
| padding: 6px 14px; border-radius: 999px; | |
| font-size: 12px; font-weight: 600; | |
| text-decoration: none; | |
| color: rgba(255,255,255,0.75); | |
| border: 1px solid rgba(255,255,255,0.20); | |
| transition: background .15s, color .15s; | |
| } | |
| .footer-link:hover { background: rgba(255,255,255,0.12); color: #fff; } | |
| /* ── Responsive ── */ | |
| @media (max-width: 1100px) { | |
| .hero { grid-template-columns: 1fr; gap: 40px; } | |
| .stat-cards { flex-direction: row; flex-wrap: wrap; } | |
| .stat-card { flex: 1 1 180px; } | |
| } | |
| @media (max-width: 900px) { | |
| .page { padding: 0 20px; } | |
| .nav-inner { padding: 0 20px; } | |
| .hero { padding: 48px 0 56px; } | |
| .nav-links .nav-link { display: none; } | |
| .footer-inner { padding: 0 20px; } | |
| } | |
| @media (max-width: 600px) { | |
| .hero { padding: 36px 0 44px; } | |
| .stat-pair { grid-template-columns: 1fr; } | |
| .cta { padding: 11px 18px; font-size: 13px; } | |
| .footer-inner { flex-direction: column; align-items: flex-start; } | |
| } | |
| @media (min-width: 1400px) { | |
| .features-grid { grid-template-columns: repeat(4, 1fr); } | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <!-- ── Navbar ── --> | |
| <nav class="nav"> | |
| <div class="nav-inner"> | |
| <a class="nav-logo" href="/"> | |
| <span class="nav-logo-badge">C</span> | |
| Chakravyuh | |
| </a> | |
| <div class="nav-links"> | |
| <a class="nav-link" href="/demo/">Demo</a> | |
| <a class="nav-link" href="/leaderboard">Leaderboard</a> | |
| <a class="nav-link" href="/eval">Eval</a> | |
| <a class="nav-link" href="/docs">API</a> | |
| <a class="nav-cta" href="/demo/">Open Demo →</a> | |
| </div> | |
| </div> | |
| </nav> | |
| <!-- ── Hero ── --> | |
| <div class="page"> | |
| <section class="hero"> | |
| <div class="hero-left"> | |
| <span class="hero-eyebrow">Multi-Agent UPI Fraud Arena</span> | |
| <h1>The benchmark where <em>scammers train</em> against defenders.</h1> | |
| <p class="hero-lede"> | |
| Five agents — Scammer, Victim, on-device Analyzer LLM, Bank Monitor, Regulator — | |
| run adversarial fraud episodes under structural information asymmetry. | |
| <strong>Two trained adapters:</strong> the Analyzer (Qwen2.5-7B + LoRA, 8-rubric GRPO) | |
| hits <strong>99.3 % detection / 6.7 % FPR</strong>; the Scammer | |
| (Qwen2.5-0.5B + LoRA, adversarial GRPO) bypasses rules at | |
| <strong>93.75 %</strong> — a 0.5B model beating 70B+ frontier LLMs | |
| at detector evasion. | |
| </p> | |
| <div class="cta-row"> | |
| <a class="cta primary" href="/demo/">Open interactive demo →</a> | |
| <a class="cta secondary" href="/docs">API docs (Swagger)</a> | |
| <a class="cta secondary" href="/leaderboard">Leaderboard</a> | |
| </div> | |
| <div class="badge-row"> | |
| <span class="badge">OpenEnv Hackathon 2026</span> | |
| <span class="badge">MIT License</span> | |
| <span class="badge">CC-BY-4.0 Dataset</span> | |
| <span class="badge">n = 175 bench scenarios</span> | |
| </div> | |
| </div> | |
| <div class="stat-cards"> | |
| <div class="stat-card accent"> | |
| <div class="stat-card-label">v2 Detection rate</div> | |
| <div class="stat-card-value">99.3%</div> | |
| <div class="stat-card-sub">vs 100% v1 (reward-hacked)</div> | |
| </div> | |
| <div class="stat-pair"> | |
| <div class="stat-card"> | |
| <div class="stat-card-label">v2 FPR</div> | |
| <div class="stat-card-value">6.7%</div> | |
| <div class="stat-card-sub">v1 was 36%</div> | |
| </div> | |
| <div class="stat-card"> | |
| <div class="stat-card-label">F1 Score</div> | |
| <div class="stat-card-value">0.99</div> | |
| <div class="stat-card-sub">+0.03 vs v1</div> | |
| </div> | |
| </div> | |
| <div class="stat-pair"> | |
| <div class="stat-card"> | |
| <div class="stat-card-label">Novel det.</div> | |
| <div class="stat-card-value">97.1%</div> | |
| <div class="stat-card-sub">post-2024 scams</div> | |
| </div> | |
| <div class="stat-card"> | |
| <div class="stat-card-label">Bench size</div> | |
| <div class="stat-card-value">175</div> | |
| <div class="stat-card-sub">scenarios</div> | |
| </div> | |
| </div> | |
| <div class="stat-card accent"> | |
| <div class="stat-card-label">Scammer LoRA bypass (0.5B)</div> | |
| <div class="stat-card-value">93.75%</div> | |
| <div class="stat-card-sub">best-of-8 vs rules · beats 70B+ frontier LLMs</div> | |
| </div> | |
| </div> | |
| </section> | |
| <hr class="divider"> | |
| <!-- ── Features ── --> | |
| <section class="section"> | |
| <div class="section-head"> | |
| <span class="section-title">Five-agent arena</span> | |
| </div> | |
| <div class="features-grid"> | |
| <div class="feature-card"> | |
| <span class="feature-icon">🎭</span> | |
| <div class="feature-name">Scammer</div> | |
| <div class="feature-desc">Qwen2.5-0.5B + LoRA trained via GRPO to craft convincing UPI fraud scripts across banking, KYC, OTP and CEO-deepfake categories.</div> | |
| </div> | |
| <div class="feature-card"> | |
| <span class="feature-icon">🛡</span> | |
| <div class="feature-name">Analyzer LLM</div> | |
| <div class="feature-desc">Qwen2.5-7B LoRA post-trained on 8-rubric GRPO reward. v2 retrain fixed reward hacking: FPR dropped 5× while detection held at 99.3%.</div> | |
| </div> | |
| <div class="feature-card"> | |
| <span class="feature-icon">🏦</span> | |
| <div class="feature-name">Bank Monitor</div> | |
| <div class="feature-desc">Rule-based transaction watchdog that applies velocity limits, amount thresholds, and beneficiary trust scores in real-time per episode.</div> | |
| </div> | |
| <div class="feature-card"> | |
| <span class="feature-icon">⚖️</span> | |
| <div class="feature-name">Composable Reward</div> | |
| <div class="feature-desc">8-leaf rubric with independently tuneable weights. Reward hacking is made visible: toggle v1 vs v2 profiles on the same analyzer output.</div> | |
| </div> | |
| </div> | |
| </section> | |
| <hr class="divider"> | |
| <!-- ── Endpoints ── --> | |
| <section class="section"> | |
| <div class="section-head"> | |
| <span class="section-title">API endpoints</span> | |
| </div> | |
| <div class="endpoints-grid"> | |
| <a class="endpoint" href="/demo/"> | |
| <code>/demo/</code> | |
| <span>Interactive Gradio UI — replay curated episodes or score your own message.</span> | |
| </a> | |
| <a class="endpoint" href="/health"> | |
| <code>GET /health</code> | |
| <span>OpenEnv liveness probe. Returns {"status": "healthy"}.</span> | |
| </a> | |
| <a class="endpoint" href="/metadata"> | |
| <code>GET /metadata</code> | |
| <span>Environment metadata (action / observation schema, version).</span> | |
| </a> | |
| <a class="endpoint" href="/schema"> | |
| <code>GET /schema</code> | |
| <span>Pydantic model JSON schemas for action and observation.</span> | |
| </a> | |
| <a class="endpoint" href="/leaderboard"> | |
| <code>GET /leaderboard</code> | |
| <span>Ranked submissions on chakravyuh-bench-v0.</span> | |
| </a> | |
| <a class="endpoint" href="/eval"> | |
| <code>GET /eval</code> | |
| <span>v2 eval artifact — detection / FPR / F1 / per-difficulty breakdown.</span> | |
| </a> | |
| <a class="endpoint" href="/eval/bootstrap"> | |
| <code>GET /eval/bootstrap</code> | |
| <span>10k-iteration percentile bootstrap 95% confidence intervals.</span> | |
| </a> | |
| <a class="endpoint" href="/docs#/diagnose/post_diagnose_diagnose_post"> | |
| <code>POST /diagnose</code> | |
| <span>Score one message; get full 8-rubric AnalyzerRubricV2 decomposition.</span> | |
| </a> | |
| <a class="endpoint" href="/docs"> | |
| <code>/docs · /openapi.json</code> | |
| <span>Interactive API explorer + OpenAPI 3.1 schema.</span> | |
| </a> | |
| </div> | |
| </section> | |
| </div> | |
| <!-- ── Footer ── --> | |
| <footer> | |
| <div class="footer-inner"> | |
| <div> | |
| <div class="footer-brand">Chakravyuh</div> | |
| <div class="footer-copy"> | |
| Open-source benchmark for Indian UPI fraud detection · | |
| Entry to the Meta PyTorch OpenEnv Hackathon 2026, Bangalore.<br> | |
| Built by <strong>Ujjwal Pardeshi</strong> & <strong>Omkar Kadam</strong> · | |
| MIT (code) · CC-BY-4.0 (dataset) | |
| </div> | |
| </div> | |
| <div class="footer-links"> | |
| <a class="footer-link" href="https://huggingface.co/datasets/ujjwalpardeshi/chakravyuh-bench-v0">Dataset</a> | |
| <a class="footer-link" href="https://huggingface.co/ujjwalpardeshi/chakravyuh-analyzer-lora-v2">Analyzer LoRA</a> | |
| <a class="footer-link" href="https://huggingface.co/ujjwalpardeshi/chakravyuh-scammer-lora-phase1">Scammer LoRA</a> | |
| <a class="footer-link" href="https://github.com/UjjwalPardeshi/Chakravyuh">GitHub</a> | |
| <a class="footer-link" href="/docs">API</a> | |
| </div> | |
| </div> | |
| </footer> | |
| </body> | |
| </html>""" | |
| def landing() -> HTMLResponse: | |
| """Friendly landing page at the Space root (was 404 before).""" | |
| return HTMLResponse(_LANDING_HTML) | |
| def manifest() -> dict: | |
| """Minimal web-app manifest — eliminates the 404 that browser DevTools reports.""" | |
| return { | |
| "name": "Chakravyuh", | |
| "short_name": "Chakravyuh", | |
| "description": "Multi-Agent UPI Fraud Detection Arena", | |
| "start_url": "/", | |
| "display": "browser", | |
| "background_color": "#FFF3E6", | |
| "theme_color": "#381932", | |
| "icons": [], | |
| } | |
| _DEMO_PREVIEW_HTML = """<!DOCTYPE html> | |
| <html lang="en"><head> | |
| <meta charset="utf-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1"> | |
| <title>Chakravyuh — warming up</title> | |
| <style> | |
| body { font-family: -apple-system, system-ui, Segoe UI, sans-serif; | |
| margin: 0; padding: 48px 24px; background: #FFF3E6; color: #000; } | |
| main { max-width: 720px; margin: 0 auto; } | |
| h1 { font-size: 26px; margin: 0 0 6px; } | |
| .sub { color: rgba(0,0,0,0.62); margin: 0 0 24px; font-size: 14px; } | |
| figure { margin: 0 0 24px; } | |
| img { max-width: 100%; border-radius: 8px; box-shadow: 0 1px 4px rgba(0,0,0,0.08); } | |
| figcaption { font-size: 12px; color: rgba(0,0,0,0.55); margin-top: 6px; } | |
| .pill { display: inline-block; padding: 4px 10px; border-radius: 999px; | |
| background: rgba(46,125,50,0.10); color: #1b5e20; font-size: 12px; | |
| letter-spacing: 0.04em; text-transform: uppercase; } | |
| a { color: #381932; font-weight: 600; } | |
| .grid { display: grid; grid-template-columns: 1fr 1fr; gap: 18px; margin: 16px 0 24px; } | |
| .grid-3 { display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 18px; margin: 16px 0 24px; } | |
| .card { padding: 14px; background: #fff; border-radius: 10px; | |
| box-shadow: 0 1px 4px rgba(0,0,0,0.06); font-size: 13px; line-height: 1.5; } | |
| .card.v1 { border-left: 3px solid #9C1B1B; } | |
| .card.v2 { border-left: 3px solid #381932; } | |
| .card.scammer { border-left: 3px solid #e8c97a; background: #381932; color: #fff; } | |
| .card .label { font-weight: 700; font-size: 12px; letter-spacing: 0.6px; text-transform: uppercase; } | |
| .card.v1 .label { color: #9C1B1B; } | |
| .card.v2 .label { color: #381932; } | |
| .card.scammer .label { color: #e8c97a; } | |
| .card .stat { font-weight: 700; } | |
| .card.v1 .stat { color: #9C1B1B; } | |
| .card.v2 .stat { color: #381932; } | |
| .card.scammer .stat { color: #e8c97a; } | |
| @media (max-width: 900px) { .grid-3 { grid-template-columns: 1fr; } } | |
| @media (max-width: 600px) { .grid { grid-template-columns: 1fr; } } | |
| </style> | |
| </head><body><main> | |
| <span class="pill">Warming up · ~10–30s</span> | |
| <h1>Chakravyuh — multi-agent UPI fraud detection</h1> | |
| <p class="sub">The interactive demo is booting. While it warms up, here's the headline result.</p> | |
| <figure> | |
| <img src="https://raw.githubusercontent.com/UjjwalPardeshi/Chakravyuh/a9e723bf495182724845dbf1f69f8968434a9e02/docs/assets/plots/v2_per_difficulty_check.png" | |
| alt="Per-difficulty detection: scripted analyzer vs Chakravyuh v2 LoRA — scripted catches 50% on novel post-2024 scams; v2 catches 97%."> | |
| <figcaption>Per-difficulty detection — scripted vs Chakravyuh v2 (n = 175 bench scenarios).</figcaption> | |
| </figure> | |
| <div class="grid-3"> | |
| <div class="card v1"> | |
| <div class="label">v1 · Analyzer (reward-hacked)</div> | |
| detection 100 % · FPR <span class="stat">36 %</span> · F1 0.96<br> | |
| the model learned to flag everything. | |
| </div> | |
| <div class="card v2"> | |
| <div class="label">v2 · Analyzer (principled retrain)</div> | |
| detection 99.3 % · FPR <span class="stat">6.7 %</span> · F1 0.99<br> | |
| same detection, FPR collapsed 5×. | |
| </div> | |
| <div class="card scammer"> | |
| <div class="label">Scammer LoRA (0.5B + GRPO)</div> | |
| best-of-8 bypass <span class="stat">93.75 %</span> vs rules<br> | |
| beats 70B+ frontier LLMs at evasion. | |
| </div> | |
| </div> | |
| <p style="font-size:13px;color:rgba(0,0,0,0.72)">Once the demo is live, you'll see eight tabs: Replay, Live Q&A, You vs Analyzer, 🎭 Trained Scammer, Adversary Lab, v1↔v2 toggle, <strong>🔴 Red-team it yourself</strong>, and Leaderboard.</p> | |
| <p><a href="/">← back to landing</a> · <a href="/demo/" id="live-link">try the live demo</a></p> | |
| <script> | |
| // Poll /demo/ every 2s; redirect when 200. | |
| (function poll() { | |
| fetch('/demo/', { method: 'HEAD', cache: 'no-store' }).then(function(r){ | |
| if (r.ok) { window.location.href = '/demo/'; } | |
| else { setTimeout(poll, 2000); } | |
| }).catch(function(){ setTimeout(poll, 2000); }); | |
| })(); | |
| </script> | |
| </main></body></html>""" | |
| def demo_preview() -> HTMLResponse: | |
| """Static fallback that renders instantly while Gradio /demo/ warms up. | |
| Self-redirects to /demo/ once that route returns 200.""" | |
| return HTMLResponse(_DEMO_PREVIEW_HTML) | |
| # --------------------------------------------------------------------------- | |
| # Mount the Gradio demo at /demo. Lazy-import so importing server.app stays | |
| # cheap for tools that only want the FastAPI app (the existing test suite). | |
| # --------------------------------------------------------------------------- | |
| def _mount_demo() -> None: | |
| """Mount the Gradio demo at /demo. Lazy imports keep the OpenEnv API | |
| alive even if `gradio` is not installed (e.g. in a slim runtime image).""" | |
| import gradio as gr # type: ignore[import-not-found] | |
| from server.demo_ui import build_app as _build_demo, _build_theme, CUSTOM_CSS | |
| demo_blocks = _build_demo() | |
| gr.mount_gradio_app( | |
| app, | |
| demo_blocks, | |
| path="/demo", | |
| theme=_build_theme(), | |
| css=CUSTOM_CSS, | |
| ) | |
| # Mount on import so uvicorn picks it up without a separate startup hook. | |
| # Failures here must not crash the OpenEnv API surface — log full traceback | |
| # and continue so /reset, /step, /state, /eval, /diagnose still serve. | |
| try: | |
| _mount_demo() | |
| except (ImportError, ModuleNotFoundError) as _demo_err: | |
| import logging | |
| logging.getLogger("chakravyuh.app").error( | |
| "Gradio not installed; /demo route disabled. Error: %s", _demo_err | |
| ) | |
| except Exception: | |
| import logging | |
| logging.getLogger("chakravyuh.app").exception( | |
| "Unexpected failure mounting Gradio demo at /demo — /demo will 404 " | |
| "but other OpenEnv routes remain healthy. See traceback above." | |
| ) | |
| def main() -> None: | |
| import uvicorn | |
| uvicorn.run( | |
| app, | |
| host=os.getenv("HOST", "0.0.0.0"), | |
| port=int(os.getenv("PORT", "8000")), | |
| ) | |
| if __name__ == "__main__": | |
| main() | |