import React, { useState, useEffect, useRef } from 'react'; import { Play, CheckCircle, XCircle, AlertCircle, Shield, AlertTriangle, FileText, Gavel, Scale } from 'lucide-react'; import { TASK_STRATEGIES, TASK_DESCRIPTIONS } from './tasks'; import './index.css'; const CALIB_MATRIX = { HIGH_correct: { val: 1.0 }, HIGH_wrong: { val: -0.8 }, MED_correct: { val: 0.6 }, MED_wrong: { val: -0.2 }, LOW_correct: { val: 0.1 }, LOW_wrong: { val: 0.0 } }; const TASK_STEPS_HINT = { clean_claim: 'approve_claim + HIGH confidence', contradictory_claim: 'deny_claim + MED confidence + Court Panel', distribution_shift_claim:'escalate_to_human + LOW confidence', }; function App() { const [task, setTask] = useState('contradictory_claim'); const [isRunning, setIsRunning] = useState(false); const [isDone, setIsDone] = useState(false); const [claimText, setClaimText] = useState(null); const [history, setHistory] = useState([]); const [debate, setDebate] = useState(null); const [matrixConf, setMatrixConf] = useState(null); const [matrixOutcome, setMatrixOutcome] = useState(null); const [reward, setReward] = useState('—'); const [calib, setCalib] = useState('—'); const [finalOutcome, setFinalOutcome] = useState(null); // 'correct' | 'wrong' const [cursorPos, setCursorPos] = useState({ x: -100, y: -100 }); const [isHovering, setIsHovering] = useState(false); const terminalRef = useRef(null); useEffect(() => { const h = (e) => setCursorPos({ x: e.clientX, y: e.clientY }); window.addEventListener('mousemove', h); return () => window.removeEventListener('mousemove', h); }, []); // Auto-scroll terminal useEffect(() => { if (terminalRef.current) { terminalRef.current.scrollTop = terminalRef.current.scrollHeight; } }, [history]); const handleRun = async () => { setIsRunning(true); setIsDone(false); setHistory([]); setDebate(null); setMatrixConf(null); setMatrixOutcome(null); setReward('—'); setCalib('—'); setFinalOutcome(null); setClaimText('resetting'); try { const resetRes = await fetch('/reset', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ task_id: task, seed: 42 }) }); if (!resetRes.ok) throw new Error('Reset failed'); const resetData = await resetRes.json(); const sessionId = resetData.session_id; setClaimText(resetData.observation); const actions = TASK_STRATEGIES[task]; let currentHistory = []; for (let i = 0; i < actions.length; i++) { const action = actions[i]; const payload = { ...action }; if (payload.confidence === undefined || payload.confidence === null) delete payload.confidence; const stepRes = await fetch('/step', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ session_id: sessionId, action: payload }) }); if (!stepRes.ok) throw new Error('Step failed'); const stepData = await stepRes.json(); const r = stepData.reward || 0; const rb = stepData.observation?.reward_breakdown || {}; const c = rb.calibration_score; const d = stepData.observation?.debate_transcript; currentHistory = [...currentHistory, { ...action, reward: r, calibration: c }]; setHistory([...currentHistory]); setReward(r.toFixed(3)); if (d) setDebate(d); if (action.confidence && c !== undefined && c !== null) { setCalib(c); setMatrixConf(action.confidence); const outcome = c >= 0 ? 'correct' : 'wrong'; setMatrixOutcome(outcome); setFinalOutcome(outcome); } await new Promise(res => setTimeout(res, action.action_type === 'convene_debate_panel' ? 1000 : 550)); } setIsDone(true); } catch (err) { console.error(err); setClaimText('error'); } finally { setIsRunning(false); } }; const getMatrixCellClass = (conf, outcome) => { const isActive = matrixConf === conf && matrixOutcome === outcome; return `matrix-cell cell-${conf.toLowerCase()}-${outcome}${isActive ? ' active' : ''}`; }; const outcomeLabel = finalOutcome === 'correct' ? '✅ CORRECT' : finalOutcome === 'wrong' ? '❌ WRONG' : null; return ( <>
{/* ── TOP NAV BAR ─────────────────────────────── */} {/* ── HERO BANNER ─────────────────────────────── */}ClaimCourt trains LLM agents to declare calibrated confidence before every insurance decision. Overconfident? Penalised −0.8. Wrong but humble? Rewarded.
The Court Panel (adversarial debate) below is unique — no other OpenEnv environment has it. Watch it unfold.
Pick a task, click Run, watch the agent investigate.
The highlighted cell = agent's confidence × outcome.
HIGH + wrong = −0.8 is the worst possible outcome.
Select a task and click Run Episode.
)} {claimText === 'resetting' && (Contacting environment server...
)} {claimText === 'error' && (⚠ Could not reach environment server.
)} {claimText && typeof claimText === 'object' && (Claimant: {claimText.claimant?.name}
Incident: {claimText.incident?.type} — {claimText.incident?.description?.slice(0, 90)}...
Amount: ₹{claimText.payout_amount_inr?.toLocaleString('en-IN') || '—'}
Documents ({claimText.documents?.length || 0}):
{d.doc_id} — {d.content?.slice(0, 60)}...
convene_debate_panel)
)}
Run contradictory_claim to see the Prosecutor vs Defender debate unfold live.
Builds case from discovered fraud signals. Argues for denial.
Argues from document consistency. Assumes innocence.
{debate.prosecutor_argument}
{debate.defender_argument}