"""FastAPI entrypoint for Chakravyuh — OpenEnv API + Gradio demo on one app. Routes: / — landing page (HTML) ← was 404 before /demo — interactive Gradio demo (mounted) /reset, /step, /state, /metadata, /schema, /health — OpenEnv contract /leaderboard, /submit — public leaderboard (E.10) /docs, /openapi.json — FastAPI swagger / schema /mcp/* — MCP server (registered by openenv-core) Run locally: uvicorn server.app:app --host 0.0.0.0 --port 8000 Hugging Face Space / Docker: Referenced by ``openenv.yaml`` as ``app: server.app:app``. """ from __future__ import annotations import os from fastapi.responses import HTMLResponse from openenv.core.env_server import create_app from chakravyuh_env.openenv_environment import ChakravyuhOpenEnv from chakravyuh_env.openenv_models import ChakravyuhAction, ChakravyuhObservation from server.diagnose_endpoint import attach_to_app as attach_diagnose from server.eval_endpoint import attach_to_app as attach_eval from server.leaderboard import attach_to_app # One factory call per concurrent session → fully isolated episodes. max_concurrent = int(os.getenv("MAX_CONCURRENT_ENVS", "8")) app = create_app( ChakravyuhOpenEnv, ChakravyuhAction, ChakravyuhObservation, env_name="chakravyuh_env", max_concurrent_envs=max_concurrent, ) # Public leaderboard endpoints (E.10): GET /leaderboard, POST /submit. # Persistence at logs/leaderboard.jsonl (override via CHAKRAVYUH_LEADERBOARD_PATH). attach_to_app(app) # Research endpoints: GET /eval (and /eval/{bootstrap,known-novel,redteam,…}), # POST /diagnose (single-message rubric breakdown using AnalyzerRubricV2). attach_eval(app) attach_diagnose(app) # --------------------------------------------------------------------------- # Landing page — replaces FastAPI's default 404 at `/` so the HF Space root # shows something useful at-a-glance. # --------------------------------------------------------------------------- _LANDING_HTML = """ Chakravyuh — Multi-Agent Fraud Arena
Multi-Agent UPI Fraud Arena

The benchmark where scammers train against defenders.

Five agents — Scammer, Victim, on-device Analyzer LLM, Bank Monitor, Regulator — run adversarial fraud episodes under structural information asymmetry. Two trained adapters: the Analyzer (Qwen2.5-7B + LoRA, 8-rubric GRPO) hits 99.3 % detection / 6.7 % FPR; the Scammer (Qwen2.5-0.5B + LoRA, adversarial GRPO) bypasses rules at 93.75 % — a 0.5B model beating 70B+ frontier LLMs at detector evasion.

OpenEnv Hackathon 2026 MIT License CC-BY-4.0 Dataset n = 175 bench scenarios
v2 Detection rate
99.3%
vs 100% v1 (reward-hacked)
v2 FPR
6.7%
v1 was 36%
F1 Score
0.99
+0.03 vs v1
Novel det.
97.1%
post-2024 scams
Bench size
175
scenarios
Scammer LoRA bypass (0.5B)
93.75%
best-of-8 vs rules · beats 70B+ frontier LLMs

Five-agent arena
🎭
Scammer
Qwen2.5-0.5B + LoRA trained via GRPO to craft convincing UPI fraud scripts across banking, KYC, OTP and CEO-deepfake categories.
🛡
Analyzer LLM
Qwen2.5-7B LoRA post-trained on 8-rubric GRPO reward. v2 retrain fixed reward hacking: FPR dropped 5× while detection held at 99.3%.
🏦
Bank Monitor
Rule-based transaction watchdog that applies velocity limits, amount thresholds, and beneficiary trust scores in real-time per episode.
⚖️
Composable Reward
8-leaf rubric with independently tuneable weights. Reward hacking is made visible: toggle v1 vs v2 profiles on the same analyzer output.

API endpoints
""" @app.get("/", response_class=HTMLResponse, include_in_schema=False) def landing() -> HTMLResponse: """Friendly landing page at the Space root (was 404 before).""" return HTMLResponse(_LANDING_HTML) @app.get("/manifest.json", include_in_schema=False) def manifest() -> dict: """Minimal web-app manifest — eliminates the 404 that browser DevTools reports.""" return { "name": "Chakravyuh", "short_name": "Chakravyuh", "description": "Multi-Agent UPI Fraud Detection Arena", "start_url": "/", "display": "browser", "background_color": "#FFF3E6", "theme_color": "#381932", "icons": [], } _DEMO_PREVIEW_HTML = """ Chakravyuh — warming up
Warming up · ~10–30s

Chakravyuh — multi-agent UPI fraud detection

The interactive demo is booting. While it warms up, here's the headline result.

Per-difficulty detection: scripted analyzer vs Chakravyuh v2 LoRA — scripted catches 50% on novel post-2024 scams; v2 catches 97%.
Per-difficulty detection — scripted vs Chakravyuh v2 (n = 175 bench scenarios).
v1 · Analyzer (reward-hacked)
detection 100 % · FPR 36 % · F1 0.96
the model learned to flag everything.
v2 · Analyzer (principled retrain)
detection 99.3 % · FPR 6.7 % · F1 0.99
same detection, FPR collapsed 5×.
Scammer LoRA (0.5B + GRPO)
best-of-8 bypass 93.75 % vs rules
beats 70B+ frontier LLMs at evasion.

Once the demo is live, you'll see eight tabs: Replay, Live Q&A, You vs Analyzer, 🎭 Trained Scammer, Adversary Lab, v1↔v2 toggle, 🔴 Red-team it yourself, and Leaderboard.

← back to landing · try the live demo

""" @app.get("/demo/preview", response_class=HTMLResponse, include_in_schema=False) def demo_preview() -> HTMLResponse: """Static fallback that renders instantly while Gradio /demo/ warms up. Self-redirects to /demo/ once that route returns 200.""" return HTMLResponse(_DEMO_PREVIEW_HTML) # --------------------------------------------------------------------------- # Mount the Gradio demo at /demo. Lazy-import so importing server.app stays # cheap for tools that only want the FastAPI app (the existing test suite). # --------------------------------------------------------------------------- def _mount_demo() -> None: """Mount the Gradio demo at /demo. Lazy imports keep the OpenEnv API alive even if `gradio` is not installed (e.g. in a slim runtime image).""" import gradio as gr # type: ignore[import-not-found] from server.demo_ui import build_app as _build_demo, _build_theme, CUSTOM_CSS demo_blocks = _build_demo() gr.mount_gradio_app( app, demo_blocks, path="/demo", theme=_build_theme(), css=CUSTOM_CSS, ) # Mount on import so uvicorn picks it up without a separate startup hook. # Failures here must not crash the OpenEnv API surface — log full traceback # and continue so /reset, /step, /state, /eval, /diagnose still serve. try: _mount_demo() except (ImportError, ModuleNotFoundError) as _demo_err: import logging logging.getLogger("chakravyuh.app").error( "Gradio not installed; /demo route disabled. Error: %s", _demo_err ) except Exception: import logging logging.getLogger("chakravyuh.app").exception( "Unexpected failure mounting Gradio demo at /demo — /demo will 404 " "but other OpenEnv routes remain healthy. See traceback above." ) def main() -> None: import uvicorn uvicorn.run( app, host=os.getenv("HOST", "0.0.0.0"), port=int(os.getenv("PORT", "8000")), ) if __name__ == "__main__": main()