import React, { useState, useEffect, useRef } from 'react'; import { Play, CheckCircle, XCircle, AlertCircle, Shield, AlertTriangle, FileText, Gavel, Scale } from 'lucide-react'; import { TASK_STRATEGIES, TASK_DESCRIPTIONS } from './tasks'; import './index.css'; const CALIB_MATRIX = { HIGH_correct: { val: 1.0 }, HIGH_wrong: { val: -0.8 }, MED_correct: { val: 0.6 }, MED_wrong: { val: -0.2 }, LOW_correct: { val: 0.1 }, LOW_wrong: { val: 0.0 } }; const TASK_STEPS_HINT = { clean_claim: 'approve_claim + HIGH confidence', contradictory_claim: 'deny_claim + MED confidence + Court Panel', distribution_shift_claim:'escalate_to_human + LOW confidence', }; function App() { const [task, setTask] = useState('contradictory_claim'); const [isRunning, setIsRunning] = useState(false); const [isDone, setIsDone] = useState(false); const [claimText, setClaimText] = useState(null); const [history, setHistory] = useState([]); const [debate, setDebate] = useState(null); const [matrixConf, setMatrixConf] = useState(null); const [matrixOutcome, setMatrixOutcome] = useState(null); const [reward, setReward] = useState('—'); const [calib, setCalib] = useState('—'); const [finalOutcome, setFinalOutcome] = useState(null); // 'correct' | 'wrong' const [cursorPos, setCursorPos] = useState({ x: -100, y: -100 }); const [isHovering, setIsHovering] = useState(false); const terminalRef = useRef(null); useEffect(() => { const h = (e) => setCursorPos({ x: e.clientX, y: e.clientY }); window.addEventListener('mousemove', h); return () => window.removeEventListener('mousemove', h); }, []); // Auto-scroll terminal useEffect(() => { if (terminalRef.current) { terminalRef.current.scrollTop = terminalRef.current.scrollHeight; } }, [history]); const handleRun = async () => { setIsRunning(true); setIsDone(false); setHistory([]); setDebate(null); setMatrixConf(null); setMatrixOutcome(null); setReward('—'); setCalib('—'); setFinalOutcome(null); setClaimText('resetting'); try { const resetRes = await fetch('/reset', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ task_id: task, seed: 42 }) }); if (!resetRes.ok) throw new Error('Reset failed'); const resetData = await resetRes.json(); const sessionId = resetData.session_id; setClaimText(resetData.observation); const actions = TASK_STRATEGIES[task]; let currentHistory = []; for (let i = 0; i < actions.length; i++) { const action = actions[i]; const payload = { ...action }; if (payload.confidence === undefined || payload.confidence === null) delete payload.confidence; const stepRes = await fetch('/step', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ session_id: sessionId, action: payload }) }); if (!stepRes.ok) throw new Error('Step failed'); const stepData = await stepRes.json(); const r = stepData.reward || 0; const rb = stepData.observation?.reward_breakdown || {}; const c = rb.calibration_score; const d = stepData.observation?.debate_transcript; currentHistory = [...currentHistory, { ...action, reward: r, calibration: c }]; setHistory([...currentHistory]); setReward(r.toFixed(3)); if (d) setDebate(d); if (action.confidence && c !== undefined && c !== null) { setCalib(c); setMatrixConf(action.confidence); const outcome = c >= 0 ? 'correct' : 'wrong'; setMatrixOutcome(outcome); setFinalOutcome(outcome); } await new Promise(res => setTimeout(res, action.action_type === 'convene_debate_panel' ? 1000 : 550)); } setIsDone(true); } catch (err) { console.error(err); setClaimText('error'); } finally { setIsRunning(false); } }; const getMatrixCellClass = (conf, outcome) => { const isActive = matrixConf === conf && matrixOutcome === outcome; return `matrix-cell cell-${conf.toLowerCase()}-${outcome}${isActive ? ' active' : ''}`; }; const outcomeLabel = finalOutcome === 'correct' ? '✅ CORRECT' : finalOutcome === 'wrong' ? '❌ WRONG' : null; return ( <>
{/* ── TOP NAV BAR ─────────────────────────────── */} {/* ── HERO BANNER ─────────────────────────────── */}

The AI That Knows When It Doesn't Know

ClaimCourt trains LLM agents to declare calibrated confidence before every insurance decision. Overconfident? Penalised −0.8.  Wrong but humble? Rewarded.

The Court Panel (adversarial debate) below is unique — no other OpenEnv environment has it. Watch it unfold.

{/* ── MAIN APP GRID ───────────────────────────── */}
{/* SIDEBAR */}
{/* Control Panel */}

Run an Episode

Pick a task, click Run, watch the agent investigate.

setIsHovering(true)} onMouseLeave={() => setIsHovering(false)}>
{TASK_DESCRIPTIONS[task]}
Expected: {TASK_STEPS_HINT[task]}
{isDone && outcomeLabel && (
{outcomeLabel}
)}
{/* Live Metrics */}

Live Metrics

Reward = 0 ? 'var(--success)' : 'var(--error)' }}>{reward}
Calibration Score {calib}
Declared Confidence {matrixConf || '—'}
Steps taken {history.length}
{/* Calibration Matrix */}

3×2 Calibration Matrix

The highlighted cell = agent's confidence × outcome.
HIGH + wrong = −0.8 is the worst possible outcome.

Confidence
Correct
Wrong
{['HIGH', 'MED', 'LOW'].map(conf => (
{conf}
+{CALIB_MATRIX[`${conf}_correct`].val}
{CALIB_MATRIX[`${conf}_wrong`].val}
))}
{/* MAIN CONTENT */}
{/* Claim + Terminal side by side */}
{/* Claim Details */}

Claim Under Investigation

{!claimText && (

Select a task and click Run Episode.

)} {claimText === 'resetting' && (

Contacting environment server...

)} {claimText === 'error' && (

⚠ Could not reach environment server.

)} {claimText && typeof claimText === 'object' && (
#{claimText.claim_id} · {claimText.task_id}

Claimant: {claimText.claimant?.name}

Incident: {claimText.incident?.type} — {claimText.incident?.description?.slice(0, 90)}...

Amount: ₹{claimText.payout_amount_inr?.toLocaleString('en-IN') || '—'}

Documents ({claimText.documents?.length || 0}):

    {claimText.documents?.slice(0, 3).map(d => (
  • {d.doc_id} — {d.content?.slice(0, 60)}...
  • ))}
{claimText.linked_claims?.length > 0 && (

{claimText.linked_claims.length} linked claims flagged!

)}
)}
{/* Terminal */}
agent-trace.log {isRunning && ● LIVE}
{history.length === 0 ? (
Waiting for episode to start...
) : ( history.map((h, i) => (
[{String(i + 1).padStart(2, '0')}]{' '} {h.action_type === 'convene_debate_panel' ? ⚖ {h.action_type} : {h.action_type} } {h.confidence && [CONF:{h.confidence}]}
↳ {h.reasoning}
reward: {h.reward?.toFixed(3)} {h.calibration !== undefined && h.calibration !== null && | calib: {h.calibration} }
)) )}
{/* ── DEBATE PANEL — hero section ─────────── */}

{debate ? `⚖ Court Panel Convened — Step ${debate.step_convened}` : 'Multi-Agent Court Panel'}

{!debate && ( (appears when agent calls convene_debate_panel) )}
{!debate ? (

Run contradictory_claim to see the Prosecutor vs Defender debate unfold live.

Prosecutor

Builds case from discovered fraud signals. Argues for denial.

Defender

Argues from document consistency. Assumes innocence.

) : ( <>
⚔ Prosecutor {debate.prosecutor_strength}

{debate.prosecutor_argument}

🛡 Defender {debate.defender_strength}

{debate.defender_argument}

VERDICT: {debate.panel_verdict}
)}
{/* ── FOOTER ──────────────────────────────────── */} ); } export default App;