"""Gradio demo UI for Chakravyuh — visual v2. Upgrades in this iteration: A. Animated suspicion bars (CSS transitions) B. 5-agent status card grid (hero multi-agent visual) C. Theme-aware CSS using Gradio CSS variables (works on light + dark) D. Attack timeline — one icon per turn, color-coded by actor E. Larger typography + breathing room Launch: pip install -e '.[demo]' python -m server.demo_ui # Opens at http://127.0.0.1:7860 """ from __future__ import annotations import logging import gradio as gr # type: ignore[import-not-found] from chakravyuh_env.agents.analyzer import ScriptedAnalyzer from chakravyuh_env.schemas import AnalyzerScore, ChatMessage, Observation from server.episode_curator import ( CURATED_EPISODES, ReplayedEpisode, compute_agent_states, format_agent_cards_html, format_attack_timeline_html, format_bank_panel, format_chat_html, format_suspicion_timeline, max_turn, outcome_badge, replay, suspicion_score_for_turn, ) logger = logging.getLogger("chakravyuh.demo") TITLE = "Chakravyuh — 5-Agent Fraud Arena" SUBTITLE = ( "A self-improving benchmark for Indian UPI fraud detection. " "Every episode is deterministic (seed-reproducible) and grounded in real RBI/NPCI case studies." ) # --------------------------------------------------------------------------- # Design system — two-color palette with strict white/black text rule # --------------------------------------------------------------------------- # # #FFF3E6 warm peach-cream → page surface, cards, light states # #381932 deep aubergine plum → accents, fills, selected states, dark states # # Text contract (no greys): # - On any light surface (#FFF3E6 / white) → BLACK # - On any plum fill (#381932) → WHITE # # Severity / state encoding uses *fill density* — there are only two colors, # so we vary how much plum is on the surface: # - HIGH / decisive (FROZEN / MONEY EXTRACTED / HIGH suspicion / direct-PII keyword) # → solid plum fill + white text # - MEDIUM (FLAGGED / MEDIUM suspicion / urgency keyword) # → white fill + plum border + plum-tinted accent + black text # - LOW / safe (APPROVED / LOW suspicion / regular text) # → cream / white fill + black text + hairline plum border # --------------------------------------------------------------------------- PALETTE = { "cream": "#FFF3E6", "plum": "#381932", } CUSTOM_CSS = """ /* === Force light color-scheme — Gradio's default theme injects a * prefers-color-scheme: dark block that flips body bg to #0E0A07 and * swaps component tokens (textbox/example values) to white text on * dark, which collides with our cream surfaces. We pin light mode. === */ :root, html, body, gradio-app, .gradio-container { color-scheme: light !important; } /* === Design tokens === */ :root { --ck-cream: #FFF3E6; /* page surface */ --ck-cream-2: #FFFBF5; /* lifted surfaces */ --ck-cream-3: #FFE8D2; /* subtle dividers / chip surface */ --ck-plum: #381932; /* accent / dark fills */ --ck-plum-hover: #2A0F25; /* button hover */ --ck-plum-tint-08: rgba(56, 25, 50, 0.08); /* hover wash */ --ck-plum-tint-12: rgba(56, 25, 50, 0.12); --ck-plum-tint-18: rgba(56, 25, 50, 0.18); /* hairline border */ --ck-plum-tint-30: rgba(56, 25, 50, 0.30); /* stronger border */ --ck-black: #000000; --ck-black-72: rgba(0, 0, 0, 0.72); /* subtitle / secondary copy */ --ck-white: #FFFFFF; --ck-radius-sm: 8px; --ck-radius-md: 12px; --ck-radius-lg: 16px; --ck-shadow-1: 0 1px 2px rgba(56, 25, 50, 0.06), 0 1px 1px rgba(56, 25, 50, 0.04); --ck-shadow-2: 0 6px 18px rgba(56, 25, 50, 0.10), 0 2px 4px rgba(56, 25, 50, 0.05); --ck-font-stack: 'Inter', 'Segoe UI', -apple-system, BlinkMacSystemFont, system-ui, 'Helvetica Neue', sans-serif; } /* === Page surface === */ html, body, gradio-app, .gradio-container { background: var(--ck-cream) !important; color: var(--ck-black) !important; font-family: var(--ck-font-stack); -webkit-font-smoothing: antialiased; -moz-osx-font-smoothing: grayscale; } /* Override Gradio's default container chrome */ .gradio-container .main, .gradio-container .wrap, .gr-block, .gr-form, .gr-panel, .gr-padded { background: transparent !important; } /* === Layout === */ .chakravyuh-container { max-width: 1200px; margin: 0 auto; padding: 24px 20px 48px; } /* === Hero === */ .chakravyuh-hero { display: flex; flex-direction: column; gap: 8px; padding: 24px 0 12px; border-bottom: 1px solid var(--ck-plum-tint-18); margin-bottom: 22px; } .chakravyuh-eyebrow { display: inline-block; font-size: 11px; font-weight: 700; letter-spacing: 2px; text-transform: uppercase; color: var(--ck-white); background: var(--ck-plum); border: 1px solid var(--ck-plum); padding: 5px 12px; border-radius: 999px; align-self: flex-start; margin-bottom: 6px; } .chakravyuh-title { font-size: clamp(26px, 4.5vw, 38px) !important; font-weight: 800 !important; letter-spacing: -0.6px; line-height: 1.1; color: var(--ck-black) !important; margin: 0 !important; } .chakravyuh-subtitle { font-size: clamp(14px, 1.4vw, 16px) !important; line-height: 1.6; color: var(--ck-black-72) !important; max-width: 760px; margin: 0 !important; } /* === Tabs === */ .tab-nav, div[role="tablist"] { border-bottom: 1px solid var(--ck-plum-tint-18) !important; background: transparent !important; margin-bottom: 22px !important; gap: 4px; } .tab-nav button, div[role="tablist"] button, button[role="tab"] { font-family: var(--ck-font-stack) !important; font-size: 14px !important; font-weight: 600 !important; color: var(--ck-black) !important; opacity: 0.75 !important; padding: 12px 18px !important; border: 0 !important; background: transparent !important; border-bottom: 2px solid transparent !important; border-radius: 0 !important; transition: opacity .15s ease, border-color .15s ease, background .15s ease; } .tab-nav button:hover, button[role="tab"]:hover { opacity: 1 !important; background: var(--ck-plum-tint-08) !important; } .tab-nav button.selected, .tab-nav button[aria-selected="true"], button[role="tab"][aria-selected="true"], button[role="tab"].selected { opacity: 1 !important; color: var(--ck-black) !important; border-bottom: 3px solid var(--ck-plum) !important; background: transparent !important; } /* === Cards / panels === */ .gr-form, .gr-panel, .form, .block, .gr-group, .gr-box { background: var(--ck-cream-2) !important; border: 1px solid var(--ck-plum-tint-18) !important; border-radius: var(--ck-radius-md) !important; box-shadow: var(--ck-shadow-1); } /* === Form controls — strict white bg + BLACK text === */ .gr-input, .gr-textarea, .gr-textbox textarea, .gr-textbox input, .gr-dropdown, input[type="text"], textarea, input { background: var(--ck-white) !important; color: var(--ck-black) !important; -webkit-text-fill-color: var(--ck-black) !important; /* Safari override */ caret-color: var(--ck-plum) !important; border: 1px solid var(--ck-plum-tint-18) !important; border-radius: var(--ck-radius-sm) !important; font-family: var(--ck-font-stack) !important; font-size: 14px !important; transition: border-color .15s ease, box-shadow .15s ease; } .gr-input:focus, .gr-textarea:focus, textarea:focus, input:focus { outline: none !important; border-color: var(--ck-plum) !important; box-shadow: 0 0 0 3px var(--ck-plum-tint-12) !important; } ::placeholder, ::-webkit-input-placeholder, ::-moz-placeholder, :-ms-input-placeholder { color: rgba(0,0,0,0.40) !important; -webkit-text-fill-color: rgba(0,0,0,0.40) !important; } /* === Radios as pills (selected = solid plum + white text) === */ input[type="radio"], input[type="checkbox"] { accent-color: var(--ck-plum) !important; } .gr-radio, fieldset[data-testid="radio"], [data-testid="radio"] { gap: 8px !important; display: flex !important; flex-wrap: wrap !important; } .gr-radio label, .gr-radio .wrap label, [data-testid="radio"] label, fieldset[data-testid="radio"] label { background: var(--ck-white) !important; border: 1px solid var(--ck-plum-tint-18) !important; border-radius: 999px !important; padding: 9px 16px !important; font-size: 13px !important; font-weight: 600 !important; color: var(--ck-black) !important; cursor: pointer !important; display: inline-flex !important; align-items: center !important; gap: 8px !important; transition: background .15s ease, border-color .15s ease, color .15s ease, box-shadow .15s ease; } .gr-radio label:hover, [data-testid="radio"] label:hover { background: var(--ck-plum-tint-08) !important; border-color: var(--ck-plum-tint-30) !important; } /* SELECTED — solid plum bg, white text. Every common Gradio selector covered. */ .gr-radio label:has(input[type="radio"]:checked), .gr-radio label.selected, .gr-radio label[aria-checked="true"], [data-testid="radio"] label:has(input[type="radio"]:checked), [data-testid="radio"] label.selected, [data-testid="radio"] label[aria-checked="true"], fieldset[data-testid="radio"] label:has(input[type="radio"]:checked) { background: var(--ck-plum) !important; border-color: var(--ck-plum) !important; color: var(--ck-white) !important; box-shadow: var(--ck-shadow-1); } .gr-radio label:has(input[type="radio"]:checked) *, [data-testid="radio"] label:has(input[type="radio"]:checked) *, fieldset[data-testid="radio"] label:has(input[type="radio"]:checked) * { color: var(--ck-white) !important; /* force every nested span/strong to white */ } /* === Buttons === */ .gr-button, button.lg, button.sm, .primary { font-family: var(--ck-font-stack) !important; font-weight: 600 !important; border-radius: var(--ck-radius-sm) !important; padding: 10px 18px !important; font-size: 14px !important; letter-spacing: 0.1px; transition: background .15s ease, transform .08s ease, box-shadow .15s ease, border-color .15s ease; } .gr-button.primary, button.primary { background: var(--ck-plum) !important; color: var(--ck-white) !important; border: 1px solid var(--ck-plum) !important; box-shadow: var(--ck-shadow-1); } .gr-button.primary:hover, button.primary:hover { background: var(--ck-plum-hover) !important; transform: translateY(-1px); box-shadow: var(--ck-shadow-2); } .gr-button.secondary, button.secondary, .gr-button:not(.primary) { background: var(--ck-white) !important; color: var(--ck-black) !important; border: 1px solid var(--ck-plum-tint-30) !important; } .gr-button.secondary:hover, button.secondary:hover, .gr-button:not(.primary):hover { background: var(--ck-plum-tint-08) !important; border-color: var(--ck-plum) !important; } .gr-button:disabled, button:disabled { opacity: 0.4; cursor: not-allowed; transform: none !important; } /* === Section heading === */ .panel-heading { font-size: 11px !important; font-weight: 800 !important; text-transform: uppercase; letter-spacing: 1.6px; color: var(--ck-black) !important; margin: 22px 0 12px !important; padding: 0 !important; display: flex; align-items: center; gap: 10px; } .panel-heading::before { content: ""; width: 16px; height: 2px; background: var(--ck-plum); border-radius: 999px; } /* === Metadata chip strip === */ .metadata-strip { display: flex; flex-wrap: wrap; gap: 8px; padding: 12px 14px; background: var(--ck-cream-2); border: 1px solid var(--ck-plum-tint-18); border-radius: var(--ck-radius-md); margin: 14px 0 10px; box-shadow: var(--ck-shadow-1); } .meta-chip { display: inline-flex; align-items: baseline; gap: 8px; padding: 6px 12px; background: var(--ck-white); border: 1px solid var(--ck-plum-tint-18); border-radius: 999px; font-size: 12px; line-height: 1; color: var(--ck-black); } .meta-chip-label { font-size: 10px; font-weight: 700; letter-spacing: 1.4px; text-transform: uppercase; color: var(--ck-black-72); } .meta-chip-value { font-family: 'JetBrains Mono', ui-monospace, 'SF Mono', Menlo, Consolas, monospace; font-weight: 700; color: var(--ck-black); font-variant-numeric: tabular-nums; } /* === Form / block labels === */ .gr-block-label, .block-label, span[data-testid="block-label"], .gr-form > label, .gr-form > div > label, fieldset > legend, .gr-input-label, .gr-radio > label:first-child, .gr-radio > .wrap > label:first-child { color: var(--ck-black) !important; opacity: 1 !important; font-weight: 700 !important; font-size: 12px !important; text-transform: uppercase !important; letter-spacing: 1.2px !important; margin-bottom: 8px !important; } /* === Suspicion score panel === */ #suspicion-panel { border-radius: var(--ck-radius-md); padding: 22px 20px; text-align: center; transition: background .35s ease, border-color .35s ease; box-shadow: var(--ck-shadow-1); } #suspicion-score { font-size: clamp(40px, 6vw, 56px); font-weight: 800; margin: 6px 0 4px; line-height: 1; font-variant-numeric: tabular-nums; transition: color .35s ease; } #suspicion-label { font-size: 10px; font-weight: 700; text-transform: uppercase; letter-spacing: 2px; margin: 0; } #suspicion-explanation { font-size: 13px; line-height: 1.5; margin-top: 10px; } /* === Suspicion bar === */ .suspicion-bar-fill { transition: width 0.6s cubic-bezier(0.4, 0, 0.2, 1), background-color 0.35s ease; border-radius: 999px; } /* === Outcome badge === */ .outcome-badge { font-size: 16px; font-weight: 700; padding: 16px 20px; border-radius: var(--ck-radius-md); text-align: center; margin: 18px 0 6px; letter-spacing: 0.3px; transition: background .35s ease, color .35s ease; box-shadow: var(--ck-shadow-1); } /* === Attack timeline === */ .attack-timeline { border: 1px solid var(--ck-plum-tint-18); border-radius: var(--ck-radius-md); background: var(--ck-cream-2); } .timeline-step { transition: opacity 0.3s ease, transform 0.2s ease; } /* === Agent cards === */ .agent-grid { transition: opacity 0.3s ease; } .agent-card { background: var(--ck-white) !important; border: 1px solid var(--ck-plum-tint-18) !important; border-radius: var(--ck-radius-md) !important; transition: transform 0.18s ease, box-shadow 0.18s ease, border-color 0.18s ease; } .agent-card:hover { transform: translateY(-2px); box-shadow: var(--ck-shadow-2); border-color: var(--ck-plum) !important; } /* === Pulse === */ @keyframes pulse-dot { 0%, 100% { opacity: 1; transform: scale(1); } 50% { opacity: 0.55; transform: scale(0.92); } } .pulse { animation: pulse-dot 1.6s ease-in-out infinite; } /* === 5-agent hero cascade — entrance animation === */ @keyframes ck-agent-cascade { 0%, 18% { opacity: 0; transform: translateY(8px); } 22%, 100% { opacity: 1; transform: translateY(0); } } @keyframes ck-rubric-grow { 0% { width: 0%; } 100% { width: var(--ck-rubric-w, 60%); } } .ck-hero-strip { display: flex; gap: 10px; flex-wrap: wrap; margin: 4px 0 18px; padding: 14px 16px; background: var(--ck-cream-2); border: 1px solid var(--ck-plum-tint-18); border-radius: var(--ck-radius-md); } .ck-hero-agent { flex: 1 1 130px; min-width: 130px; padding: 10px 12px; text-align: center; background: var(--ck-cream); border: 1px solid var(--ck-plum-tint-18); border-radius: var(--ck-radius-sm); font-size: 12px; font-weight: 700; color: var(--ck-black); animation: ck-agent-cascade 5s ease-in-out infinite; } .ck-hero-agent:nth-child(1) { animation-delay: 0s; } .ck-hero-agent:nth-child(2) { animation-delay: 1s; } .ck-hero-agent:nth-child(3) { animation-delay: 2s; } .ck-hero-agent:nth-child(4) { animation-delay: 3s; } .ck-hero-agent:nth-child(5) { animation-delay: 4s; } .ck-hero-agent .ck-hero-emoji { display: block; font-size: 24px; margin-bottom: 4px; } .ck-hero-agent .ck-hero-letter { display: inline-block; padding: 1px 6px; font-family: 'JetBrains Mono', monospace; font-size: 10px; background: var(--ck-plum); color: var(--ck-white); border-radius: 4px; margin-left: 4px; } .ck-hero-trained { border: 1.5px solid var(--ck-plum); background: var(--ck-cream-2); } .ck-hero-badge { display: inline-block; margin-top: 4px; font-size: 9px; font-weight: 800; letter-spacing: 1.2px; text-transform: uppercase; color: var(--ck-white); background: var(--ck-plum); padding: 2px 8px; border-radius: 999px; } @media (prefers-reduced-motion: reduce) { .ck-hero-agent { animation-duration: 0.01s !important; } } /* === Hot-key overlay modal === */ .ck-hotkey-modal { display: none; position: fixed; inset: 0; background: rgba(0,0,0,0.45); z-index: 9000; align-items: center; justify-content: center; font-family: var(--ck-font-stack); } .ck-hotkey-modal.open { display: flex; } .ck-hotkey-modal-card { max-width: 460px; width: calc(100% - 32px); padding: 24px 28px; border-radius: var(--ck-radius-md); background: var(--ck-cream); color: var(--ck-black); border: 1px solid var(--ck-plum); box-shadow: var(--ck-shadow-2); } .ck-hotkey-modal h3 { margin: 0 0 12px; font-size: 16px; color: var(--ck-plum); } .ck-hotkey-row { display: flex; justify-content: space-between; align-items: center; padding: 6px 0; border-bottom: 1px solid var(--ck-plum-tint-12); font-size: 13px; } .ck-hotkey-row:last-child { border-bottom: 0; } .ck-hotkey-key { font-family: 'JetBrains Mono', monospace; padding: 2px 8px; border-radius: 4px; background: var(--ck-plum); color: var(--ck-white); font-size: 11px; font-weight: 700; } .ck-hotkey-hint { margin-top: 10px; font-size: 11px; color: var(--ck-black-72); text-align: center; } /* === Playback controls === */ #playback-controls { gap: 10px !important; margin: 16px 0 8px !important; flex-wrap: wrap; } #playback-controls button { min-width: 130px; } /* === Examples === */ .examples-table, .gr-examples { background: transparent !important; } .examples-table button, .gr-examples button { background: var(--ck-white) !important; border: 1px solid var(--ck-plum-tint-18) !important; color: var(--ck-black) !important; font-family: var(--ck-font-stack) !important; border-radius: var(--ck-radius-sm) !important; text-align: left !important; padding: 12px 14px !important; transition: background .15s, border-color .15s; } .examples-table button:hover, .gr-examples button:hover { background: var(--ck-plum-tint-08) !important; border-color: var(--ck-plum) !important; } /* === Footer === */ .chakravyuh-footer { margin-top: 32px !important; padding-top: 18px !important; border-top: 1px solid var(--ck-plum-tint-18) !important; font-size: 12px !important; color: var(--ck-black-72) !important; line-height: 1.6; } .chakravyuh-footer a { color: var(--ck-plum) !important; text-decoration: underline; text-decoration-color: var(--ck-plum); text-decoration-thickness: 1.5px; text-underline-offset: 3px; font-weight: 600; } /* === Markdown copy === */ .gr-markdown, .markdown, .prose, .gradio-container .prose, .gradio-container p, .gradio-container li { color: var(--ck-black) !important; font-family: var(--ck-font-stack) !important; } /* Inline `code` — strong override against Gradio's dark default */ code, kbd, samp, .gradio-container code, .gr-markdown code, .markdown code, .prose code, p code, span code, li code, td code, th code, .gr-html code, .gr-html-content code { background: var(--ck-cream-3) !important; color: var(--ck-black) !important; border: 1px solid var(--ck-plum-tint-18) !important; border-radius: 4px !important; padding: 1px 7px !important; font-size: 0.92em !important; font-family: 'JetBrains Mono', ui-monospace, 'SF Mono', Menlo, Consolas, monospace !important; font-weight: 600 !important; white-space: nowrap; } code::before, code::after { content: none !important; } /* === Scrollbars === */ *::-webkit-scrollbar { width: 8px; height: 8px; } *::-webkit-scrollbar-track { background: transparent; } *::-webkit-scrollbar-thumb { background: var(--ck-plum-tint-18); border-radius: 999px; } *::-webkit-scrollbar-thumb:hover { background: var(--ck-plum-tint-30); } /* === Responsive: tablet === */ @media (max-width: 900px) { .chakravyuh-container { padding: 16px 14px 36px; } .chakravyuh-hero { padding: 18px 0 8px; margin-bottom: 16px; } .agent-grid { grid-template-columns: repeat(2, 1fr) !important; } .gr-row { flex-direction: column !important; gap: 0 !important; } .gr-row > .gr-column { width: 100% !important; max-width: 100% !important; } } /* === Responsive: mobile === */ @media (max-width: 600px) { .chakravyuh-container { padding: 12px 10px 28px; } .agent-grid { grid-template-columns: 1fr !important; } #suspicion-score { font-size: 40px; } .outcome-badge { font-size: 14px; padding: 12px; } .gr-button, button { width: 100% !important; min-width: 0 !important; } #playback-controls { flex-direction: column !important; } .tab-nav button { padding: 9px 12px !important; font-size: 13px !important; } } /* === Reduced motion === */ @media (prefers-reduced-motion: reduce) { *, *::before, *::after { animation-duration: 0.001ms !important; transition-duration: 0.001ms !important; } } /* === Focus visible (a11y) === */ :focus-visible { outline: 2px solid var(--ck-plum) !important; outline-offset: 2px; } /* === Dark-mode safety net === * * If the user's OS is in dark mode, Gradio applies dark-theme tokens to * many components (textbox, examples, dropdown, dataframe, …). We override * those tokens to keep the cream / plum contract consistent and the strict * white-on-plum / black-on-light text rule intact. * * Selectors chosen to win against Gradio's `!important` declarations. */ @media (prefers-color-scheme: dark) { html, body, gradio-app, .gradio-container, .gradio-container .main, .gradio-container .wrap { background: var(--ck-cream) !important; color: var(--ck-black) !important; } /* Cards, blocks, forms */ .gradio-container .gr-block, .gradio-container .gr-form, .gradio-container .gr-panel, .gradio-container .gr-padded, .gradio-container .gr-box, .gradio-container .gr-group, .gradio-container .block, .gradio-container .form, .gradio-container fieldset { background: var(--ck-cream-2) !important; color: var(--ck-black) !important; border-color: var(--ck-plum-tint-18) !important; } /* Inputs / textareas — must always be white bg with BLACK value text */ .gradio-container .gr-input, .gradio-container .gr-textarea, .gradio-container .gr-textbox, .gradio-container .gr-textbox textarea, .gradio-container .gr-textbox input, .gradio-container .gr-dropdown, .gradio-container input[type="text"], .gradio-container textarea, .gradio-container input { background: var(--ck-white) !important; color: var(--ck-black) !important; border-color: var(--ck-plum-tint-18) !important; -webkit-text-fill-color: var(--ck-black) !important; } .gradio-container ::placeholder { color: rgba(0, 0, 0, 0.40) !important; -webkit-text-fill-color: rgba(0, 0, 0, 0.40) !important; } /* Buttons — primary stays plum+white; secondary stays white+black */ .gradio-container .gr-button.primary, .gradio-container button.primary { background: var(--ck-plum) !important; color: var(--ck-white) !important; border-color: var(--ck-plum) !important; } .gradio-container .gr-button:not(.primary), .gradio-container button:not(.primary):not([role="tab"]) { background: var(--ck-white) !important; color: var(--ck-black) !important; border-color: var(--ck-plum-tint-30) !important; } /* Examples table — Gradio defaults to dark text on dark in dark mode */ .gradio-container .examples-table, .gradio-container .gr-examples, .gradio-container .examples-table table, .gradio-container .gr-examples table { background: transparent !important; color: var(--ck-black) !important; } .gradio-container .examples-table button, .gradio-container .gr-examples button, .gradio-container .examples-table td, .gradio-container .gr-examples td { background: var(--ck-white) !important; color: var(--ck-black) !important; border-color: var(--ck-plum-tint-18) !important; } /* Tabs */ .gradio-container .tab-nav button, .gradio-container button[role="tab"] { background: transparent !important; color: var(--ck-black) !important; } .gradio-container button[role="tab"][aria-selected="true"], .gradio-container .tab-nav button.selected { color: var(--ck-black) !important; border-bottom-color: var(--ck-plum) !important; } /* Markdown / prose */ .gradio-container .gr-markdown, .gradio-container .markdown, .gradio-container .prose, .gradio-container p, .gradio-container li, .gradio-container span:not(.meta-chip-label):not(.meta-chip-value) { color: var(--ck-black) !important; } /* Inline `code` again (Gradio's dark-mode variant) */ .gradio-container code, .gradio-container .gr-markdown code, .gradio-container .markdown code, .gradio-container .prose code { background: var(--ck-cream-3) !important; color: var(--ck-black) !important; border: 1px solid var(--ck-plum-tint-18) !important; } /* Form / block labels */ .gradio-container .gr-block-label, .gradio-container .block-label, .gradio-container span[data-testid="block-label"], .gradio-container fieldset > legend { color: var(--ck-black) !important; } /* Radio pills — re-assert selected = plum bg + white text */ .gradio-container .gr-radio label, .gradio-container [data-testid="radio"] label { background: var(--ck-white) !important; color: var(--ck-black) !important; border-color: var(--ck-plum-tint-18) !important; } .gradio-container .gr-radio label:has(input[type="radio"]:checked), .gradio-container [data-testid="radio"] label:has(input[type="radio"]:checked), .gradio-container .gr-radio label[aria-checked="true"], .gradio-container [data-testid="radio"] label[aria-checked="true"] { background: var(--ck-plum) !important; color: var(--ck-white) !important; border-color: var(--ck-plum) !important; } .gradio-container .gr-radio label:has(input[type="radio"]:checked) *, .gradio-container [data-testid="radio"] label:has(input[type="radio"]:checked) * { color: var(--ck-white) !important; -webkit-text-fill-color: var(--ck-white) !important; } } /* ---------- How-it-works accordion ---------- */ .ck-howto > .label-wrap, .ck-howto > button, .ck-howto label { background: var(--ck-cream-2) !important; color: var(--ck-black) !important; font-weight: 700 !important; border: 1px solid var(--ck-plum-30) !important; border-radius: 12px !important; } .ck-howto-body { font-size: 14px; line-height: 1.65; color: var(--ck-black); padding: 14px 18px 6px; } .ck-howto-body p { margin: 0 0 8px; } .ck-howto-body p strong { color: var(--ck-plum); } .ck-howto-list { margin: 0 0 14px 22px; padding: 0; } .ck-howto-list li { margin: 4px 0; } .ck-howto-list li strong { color: var(--ck-plum); } .ck-howto-body code { background: var(--ck-cream-3) !important; color: var(--ck-black) !important; padding: 1px 6px; border-radius: 4px; font-size: 12px; font-weight: 700; font-family: 'JetBrains Mono', ui-monospace, Menlo, Consolas, monospace; } /* ---------- Decisive-moment micro-animations ---------- */ @keyframes ck-pulse-plum { 0% { transform: scale(1); box-shadow: 0 0 0 0 rgba(56, 25, 50, 0.0); } 35% { transform: scale(1.025); box-shadow: 0 0 0 12px rgba(56, 25, 50, 0.18); } 100% { transform: scale(1); box-shadow: 0 0 0 0 rgba(56, 25, 50, 0.0); } } @keyframes ck-shake { 0%, 100% { transform: translateX(0); } 25% { transform: translateX(-3px); } 50% { transform: translateX(3px); } 75% { transform: translateX(-2px); } } @keyframes ck-slide-in-success { 0% { opacity: 0; transform: translateY(-6px); } 100% { opacity: 1; transform: translateY(0); } } .ck-bank-freeze, .ck-bank-flag { animation: ck-pulse-plum 1.2s ease-out 1; } .agent-card-analyzer.agent-card-tone-critical, .agent-card-bank.agent-card-tone-critical { animation: ck-pulse-plum 1.2s ease-out 1; } .agent-card-analyzer.agent-card-tone-critical .agent-emoji { animation: ck-shake 0.6s ease-in-out 1; display: inline-block; } .agent-card-victim.agent-card-tone-safe { animation: ck-slide-in-success 0.45s ease-out 1; } @media (prefers-reduced-motion: reduce) { .ck-bank-freeze, .ck-bank-flag, .agent-card-analyzer.agent-card-tone-critical, .agent-card-bank.agent-card-tone-critical, .agent-card-analyzer.agent-card-tone-critical .agent-emoji, .agent-card-victim.agent-card-tone-safe { animation: none !important; } } /* =============== Live red-team tab — reward-profile asymmetry =============== */ .redteam-empty { padding: 18px; background: var(--ck-cream-2); border: 1px dashed var(--ck-plum-tint-30); border-radius: 10px; color: rgba(0, 0, 0, 0.62); font-size: 14px; line-height: 1.6; } .redteam-card { padding: 16px; border-radius: 12px; border-left: 4px solid; background: rgba(255, 255, 255, 0.55); margin-bottom: 12px; } .redteam-v1 { border-left-color: #d32f2f; background: linear-gradient(180deg, rgba(211, 47, 47, 0.06), rgba(255, 255, 255, 0.6)); } .redteam-v2 { border-left-color: #2e7d32; background: linear-gradient(180deg, rgba(46, 125, 50, 0.06), rgba(255, 255, 255, 0.6)); } .redteam-card-head { display: flex; flex-direction: column; gap: 2px; margin-bottom: 10px; } .redteam-card-title { font-size: 15px; letter-spacing: 0.02em; color: #000; } .redteam-card-subtitle { font-size: 12px; opacity: 0.7; color: #000; } .redteam-card-score-row { display: flex; align-items: baseline; gap: 12px; margin-bottom: 8px; } .redteam-score { font-size: 28px; font-weight: 700; font-variant-numeric: tabular-nums; color: #000; } .redteam-flag { display: inline-block; padding: 2px 10px; border-radius: 999px; font-size: 11px; letter-spacing: 0.06em; text-transform: uppercase; } .redteam-flag.flagged { background: rgba(211, 47, 47, 0.15); color: #b71c1c; border: 1px solid rgba(211, 47, 47, 0.5); } .redteam-flag.clean { background: rgba(46, 125, 50, 0.12); color: #1b5e20; border: 1px solid rgba(46, 125, 50, 0.45); } .redteam-signals { display: flex; flex-wrap: wrap; gap: 6px; margin: 6px 0 8px; } .redteam-sig { display: inline-block; padding: 2px 8px; background: rgba(0, 0, 0, 0.06); border-radius: 6px; font-size: 11px; color: #000; } .redteam-sig-empty { opacity: 0.5; } .redteam-explanation { font-size: 13px; color: #333; margin: 6px 0 12px; line-height: 1.5; } .redteam-breakdown { width: 100%; border-collapse: collapse; font-size: 12px; } .redteam-breakdown th, .redteam-breakdown td { padding: 4px 8px; text-align: right; border-top: 1px solid rgba(0, 0, 0, 0.06); } .redteam-breakdown thead th { font-weight: 600; font-size: 11px; text-transform: uppercase; letter-spacing: 0.04em; color: rgba(0, 0, 0, 0.55); border-bottom: 1px solid var(--ck-plum-tint-18); } .redteam-leaf-name { text-align: left !important; font-family: ui-monospace, SFMono-Regular, monospace; font-size: 11px; } .redteam-leaf-val, .redteam-leaf-weight, .redteam-leaf-contrib { font-variant-numeric: tabular-nums; } .redteam-leaf-na { opacity: 0.3; } .redteam-total-label { text-align: right !important; font-size: 11px; text-transform: uppercase; color: #000; } .redteam-total-val { font-weight: 700; font-variant-numeric: tabular-nums; color: #000; } .redteam-asym { margin-top: 14px; padding: 12px 16px; border-radius: 10px; font-size: 14px; line-height: 1.5; } .redteam-asym-warning { background: rgba(211, 47, 47, 0.10); border: 1px solid rgba(211, 47, 47, 0.35); color: #b71c1c; } .redteam-asym-mild { background: rgba(255, 152, 0, 0.10); border: 1px solid rgba(255, 152, 0, 0.35); color: #6d4c41; } .redteam-asym-agree { background: var(--ck-cream-2); border: 1px solid var(--ck-plum-tint-18); color: rgba(0, 0, 0, 0.72); } @media (max-width: 768px) { #redteam-row > * { width: 100% !important; } .redteam-breakdown { font-size: 11px; } .ck-hero-strip { gap: 6px !important; flex-wrap: wrap !important; } .agent-card { font-size: 13px !important; padding: 8px !important; } .panel-heading { font-size: 14px !important; } } .live-empty { padding: 14px 16px; background: var(--ck-cream-2); border-radius: 10px; border: 1px dashed var(--ck-plum-tint-30); font-size: 13px; line-height: 1.5; color: rgba(0, 0, 0, 0.62); } .live-error { padding: 14px 16px; background: rgba(211, 47, 47, 0.08); border-radius: 10px; border: 1px solid rgba(211, 47, 47, 0.35); font-size: 13px; color: #b71c1c; } .live-followup { margin-top: 10px; padding: 10px 12px; background: rgba(46, 125, 50, 0.08); border-radius: 8px; font-size: 13px; color: #1b5e20; } .ck-redteam-pointer { margin: 0 0 14px; padding: 10px 14px; border-left: 3px solid #d32f2f; background: rgba(211, 47, 47, 0.05); border-radius: 6px; font-size: 13px; line-height: 1.55; color: #000; } .ck-redteam-pointer em { font-style: normal; font-weight: 600; color: #b71c1c; } """ MODE_AUTO = "Auto-play (full episode)" MODE_STEP = "Step-through (turn by turn)" def _suspicion_style(score: float) -> tuple[str, str, str, str]: """Return (background, text_color, border, bar_color) for the suspicion panel. Two-color palette + strict white/black text rule: HIGH (>= 0.70) → solid plum bg + WHITE text + white bar (high-contrast) MED (>= 0.40) → white bg + BLACK text + plum border + plum bar LOW (< 0.40) → cream-2 bg + BLACK text + plum hairline border + plum bar """ if score >= 0.70: return ("#381932", "#FFFFFF", "#381932", "#FFFFFF") if score >= 0.40: return ("#FFFFFF", "#000000", "#381932", "#381932") return ("#FFFBF5", "#000000", "rgba(56,25,50,0.30)", "#381932") def _render_suspicion_score(score: float, explanation: str) -> str: bg, fg, border, bar_color = _suspicion_style(score) pct = int(round(score * 100)) # Bar track tinted relative to the panel: dark plum for light panels, # semi-transparent white for the dark high-suspicion panel. bar_track = "rgba(255,255,255,0.30)" if bg == "#381932" else "rgba(56,25,50,0.18)" return ( f'
Suspicion Score
' f'{score:.2f}
' f'" f'' f'{explanation or "No signals detected."}
' "{SUBTITLE}
' "POST /submit.' 'Two trained adapters: ' 'Analyzer v2 LoRA (Qwen2.5-7B) — F1 = 0.99, FPR = 6.7%, ECE = 0.039. ' 'Scammer LoRA (Qwen2.5-0.5B) — 93.75% bypass vs rules, beats 70B+ frontier LLMs. ' 'All numbers reproducible in <2 min on CPU.' '
' 'The 9-turn schedule:
' 'The 5 agents:
' 'What to watch: The Suspicion timeline climbing across analyzer turns; the Bank Monitor panel changing from review → flag → freeze; the Outcome badge at T9 (✅ saved, ❌ scammed, or 🤝 verified).
'
"Paste any SMS or chat snippet — the rule-based "
"Analyzer returns a suspicion score, declared signals, and a "
"natural-language explanation in one round-trip. The trained "
"Qwen2.5-7B-LoRA-v2 adapter is hot-swappable "
"with get_trained_analyzer() when a GPU is available."
"
'
"Note: this tab runs the rule-based scripted "
"Analyzer. Score ≥ 0.50 means caught; below means bypassed. The trained "
"Qwen2.5-7B-LoRA-v2 adapter scores tighter — see the "
"Leaderboard tab for v1 vs v2 deltas. LoRA serving in this "
"Space requires a GPU; routing through HF Inference API is on the v3 roadmap."
"
'
"A static benchmark goes stale as the defender improves. The "
"Scammer LoRA (Qwen2.5-0.5B + LoRA r=16) is "
"trained via TRL GRPO with the reward 1 − "
"ScriptedAnalyzer.score to craft scams that evade "
"keyword rules. This creates an arms-race dynamic: "
"the Analyzer must learn semantics, not keyword patterns, "
"because the adversary is co-adapting. The result? A 0.5B "
"model that outperforms 70B frontier LLMs at detector evasion."
"
logs/b2_phase1_scammer_training.json · '
'Eval log: '
'logs/b2_phase1_scammer_eval_n64_bestof8.json'
''
"We trained a Qwen2.5-0.5B Scammer LoRA via TRL GRPO "
"with the adversarial reward 1 − ScriptedAnalyzer.score. "
"Below are the n = 64 best-of-8 generated scams, "
"scored by both defenders side-by-side. The pairs where "
"scripted misses but v2 LoRA catches are the +60 pp "
"co-evolution gap made visible — Theme #1 in one click."
"
logs/b2_phase1_scammer_vs_v2_lora.json missing."
"' "Pick a scenario. The v1 panel shows the reward-hacked adapter; " "the v2 panel shows the principled retrain. The asymmetric " "improvement — detection ≈ unchanged, FPR collapsed 5× — is the " "signal that v2 learned the task instead of the proxy." "
" ) vs_banner = gr.HTML(value=render_summary_banner()) vs_choices = list_scenario_choices() vs_default_id = vs_choices[1][1] if len(vs_choices) >= 2 else vs_choices[0][1] vs_picker = gr.Radio( choices=[label for label, _ in vs_choices], value=next(label for label, sid in vs_choices if sid == vs_default_id), label="Scenario (try the benigns to see v1 over-flag)", ) vs_prompt = gr.HTML() with gr.Row(): with gr.Column(): vs_v1 = gr.HTML() with gr.Column(): vs_v2 = gr.HTML() vs_asymmetry = gr.HTML() def _vs_handler(label_value: str) -> tuple[str, str, str, str]: sid = next( (sid for label, sid in vs_choices if label == label_value), vs_default_id, ) return render_toggle_view(sid) # Prime the initial render. _initial_vs = _vs_handler( next(label for label, sid in vs_choices if sid == vs_default_id) ) vs_prompt.value = _initial_vs[0] vs_v1.value = _initial_vs[1] vs_v2.value = _initial_vs[2] vs_asymmetry.value = _initial_vs[3] vs_picker.change( _vs_handler, inputs=[vs_picker], outputs=[vs_prompt, vs_v1, vs_v2, vs_asymmetry], ) # ================================================= # LIVE RED-TEAM TAB — same analyzer, two reward profiles # ================================================= with gr.Tab("🔴 Red-team it yourself"): from server.redteam_handler import render_redteam_view gr.HTML( '' "Type any scam attempt or borderline benign. The rule-based scripted " "analyzer scores it once. Then we evaluate the same prediction " "against the v1 reward profile (5 leaves, the reward-hacked one) " "and the v2 reward profile (8 leaves, the principled retrain). " "The difference between the two reward totals is the " "reward-hacking signature — that asymmetry is exactly what " "shaped v1's 36 % FPR and what v2 fixed. Optionally tag your " "input as benign / scam to surface the diagnostic explicitly." "
" ) rt_input = gr.Textbox( placeholder="e.g. 'Your KYC expires today, click bit.ly/verify-kyc'", label="Your message", lines=3, elem_id="redteam-input", ) rt_truth = gr.Radio( choices=[ ("(unspecified)", "none"), ("ground truth: scam", "scam"), ("ground truth: benign", "benign"), ], value="none", label="Optional ground-truth tag (sharpens the diagnostic)", ) rt_btn = gr.Button( "Score with both reward profiles", variant="primary", elem_id="redteam-score-btn", ) with gr.Row(elem_id="redteam-row"): with gr.Column(): rt_v1 = gr.HTML() with gr.Column(): rt_v2 = gr.HTML() rt_badge = gr.HTML() def _rt_handler(message: str, truth: str) -> tuple[str, str, str]: is_benign = ( True if truth == "benign" else False if truth == "scam" else None ) try: return render_redteam_view(message, is_benign_truth=is_benign) except Exception as exc: # noqa: BLE001 err = ( ''
"Methods ranked by F1 on the 175-scenario bench. "
"v1 (reward-hacked) is kept on the board to motivate v2's principled retrain. "
"Submit your model: POST /submit with the schema in "
"server/leaderboard.py."
"
' "Why F1? F1 balances detection (recall) and false-positive " "avoidance — a model that flags everything has high recall but low F1. The " "asymmetric v1→v2 lift (recall ≈ unchanged, FPR ↓5×) is exactly the kind " "of move F1 surfaces and detection-only ranking would hide." "
" ) leaderboard_table = gr.Dataframe( headers=["#", "Method", "F1", "Detection", "FPR", "n", "Notes"], value=_load_leaderboard_rows(), datatype=["str", "str", "str", "str", "str", "str", "str"], interactive=False, wrap=True, elem_classes=["ck-leaderboard"], ) refresh_btn = gr.Button("↻ Refresh leaderboard") refresh_btn.click( lambda: _load_leaderboard_rows(), inputs=[], outputs=[leaderboard_table], ) gr.HTML( '" ) return app def _build_theme() -> gr.themes.Base: """Two-color theme: deep plum (#381932) + warm cream (#FFF3E6). Strict white/black text rule — no greys. Plum scale built around #381932; neutral scale built around the cream surface. We start from `Base` so we can paint exact colors and not inherit Gradio's default blue/grey. """ plum_scale = gr.themes.Color( c50="#FBEAF6", c100="#F5D2EA", c200="#E0A4C9", c300="#C476A8", c400="#9C4F87", c500="#7A3565", c600="#5A234C", c700="#381932", # primary plum c800="#2A0F25", c900="#1A0717", c950="#0D030B", ) neutral_scale = gr.themes.Color( c50="#FFFBF5", # cream-2 (lifted) c100="#FFF3E6", # cream (page surface) c200="#FFE8D2", # cream-3 c300="#F5D8BB", c400="#E2BEA0", c500="#B89880", c600="#8E7461", c700="#5C4B3F", c800="#3A2F27", c900="#1F1813", c950="#0E0A07", ) return gr.themes.Base( primary_hue=plum_scale, secondary_hue=plum_scale, neutral_hue=neutral_scale, font=[gr.themes.GoogleFont("Inter"), "Segoe UI", "system-ui", "sans-serif"], font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "Consolas", "monospace"], ).set( # Light mode (the only mode we ship) body_background_fill="#FFF3E6", body_text_color="#000000", body_text_color_subdued="#000000", background_fill_primary="#FFF3E6", background_fill_secondary="#FFFBF5", block_background_fill="#FFFBF5", border_color_primary="rgba(56,25,50,0.18)", button_primary_background_fill="#381932", button_primary_background_fill_hover="#2A0F25", button_primary_text_color="#FFFFFF", button_secondary_background_fill="#FFFFFF", button_secondary_background_fill_hover="rgba(56,25,50,0.08)", button_secondary_text_color="#000000", input_background_fill="#FFFFFF", input_border_color="rgba(56,25,50,0.18)", input_border_color_focus="#381932", link_text_color="#381932", block_label_text_color="#000000", block_title_text_color="#000000", # Dark-mode overrides — applied when OS is in dark mode. We intentionally # mirror the light values so the UI stays cream + plum either way. body_background_fill_dark="#FFF3E6", body_text_color_dark="#000000", body_text_color_subdued_dark="#000000", background_fill_primary_dark="#FFF3E6", background_fill_secondary_dark="#FFFBF5", block_background_fill_dark="#FFFBF5", border_color_primary_dark="rgba(56,25,50,0.18)", button_primary_background_fill_dark="#381932", button_primary_background_fill_hover_dark="#2A0F25", button_primary_text_color_dark="#FFFFFF", button_secondary_background_fill_dark="#FFFFFF", button_secondary_background_fill_hover_dark="rgba(56,25,50,0.08)", button_secondary_text_color_dark="#000000", input_background_fill_dark="#FFFFFF", input_border_color_dark="rgba(56,25,50,0.18)", input_border_color_focus_dark="#381932", link_text_color_dark="#381932", block_label_text_color_dark="#000000", block_title_text_color_dark="#000000", ) def main() -> None: logging.basicConfig(level=logging.INFO) app = build_app() app.launch( server_name="0.0.0.0", server_port=7860, css=CUSTOM_CSS, theme=_build_theme(), ) if __name__ == "__main__": main()