import React, { useEffect, useMemo, useRef, useState } from "react"; import { api, fmt } from "../../api/client"; function backendBaseUrl() { if (typeof window === "undefined") return "http://127.0.0.1:7860"; const host = window.location.hostname; const port = window.location.port; if ((host === "127.0.0.1" || host === "localhost") && port === "5173") { return `http://${host}:7860`; } return window.location.origin; } function normalizePath(path) { return String(path || "").replace(/\\/g, "/").toLowerCase(); } function toNumberOrNull(value) { const n = Number(value); return Number.isFinite(n) ? n : null; } function timestampToDate(value) { const n = Number(value); if (!Number.isFinite(n) || n <= 0) return null; return new Date(n * 1000); } function metricRowKV(line) { const m = String(line || "").match(/\|\s*([a-zA-Z0-9_ ]+?)\s*\|\s*([-]?\d+(?:\.\d+)?)\s*\|/); if (!m) return null; return { key: String(m[1]).trim().toLowerCase().replace(/\s+/g, "_"), value: parseFloat(m[2]), }; } function parseLogMetrics(lines) { const rewards = []; const scores = []; let latestTableReward = null; let latestTableScore = null; let latestProgressRatio = null; let latestLoggedTimesteps = null; for (const line of lines || []) { if (!line) continue; const ratioMatch = line.match(/(\d[\d,]*)\/(\d[\d,]*)/); if (ratioMatch) { const done = parseInt(String(ratioMatch[1]).replace(/,/g, ""), 10); const total = parseInt(String(ratioMatch[2]).replace(/,/g, ""), 10); if (Number.isFinite(done) && Number.isFinite(total) && total > 0) { latestProgressRatio = done / total; } } const metric = metricRowKV(line); if (metric) { if (metric.key === "ep_rew_mean" || metric.key === "mean_reward") { latestTableReward = metric.value; } if (metric.key === "grader_score" || metric.key === "avg_grader_score") { latestTableScore = metric.value; } if (metric.key === "total_timesteps") { const ts = parseInt(String(metric.value), 10); if (Number.isFinite(ts)) { latestLoggedTimesteps = ts; if (Number.isFinite(latestTableReward)) { rewards.push({ t: ts, value: Number(latestTableReward) }); latestTableReward = null; } if (Number.isFinite(latestTableScore)) { scores.push({ t: ts, value: Number(latestTableScore) }); latestTableScore = null; } } } } const evalReward = line.match(/Eval\s+num_timesteps=(\d[\d,]*),\s*episode_reward=([-]?\d+(?:\.\d+)?)/i); if (evalReward) { const ts = parseInt(String(evalReward[1]).replace(/,/g, ""), 10); const rew = parseFloat(evalReward[2]); if (Number.isFinite(ts) && Number.isFinite(rew)) { latestLoggedTimesteps = ts; rewards.push({ t: ts, value: rew }); } } const evalScore = line.match(/\[Eval\]\s+Average grader score:\s+([0-9.]+)/i); if (evalScore) { const score = parseFloat(evalScore[1]); if (Number.isFinite(score)) { const ts = latestLoggedTimesteps || (scores.length > 0 ? scores[scores.length - 1].t + 1 : 1); scores.push({ t: ts, value: score }); } } const bestScore = line.match(/\[Eval\]\s+New best(?: recurrent)? grader score:\s+([0-9.]+)/i); if (bestScore) { const score = parseFloat(bestScore[1]); if (Number.isFinite(score)) { const ts = latestLoggedTimesteps || (scores.length > 0 ? scores[scores.length - 1].t + 1 : 1); scores.push({ t: ts, value: score }); } } } const dedupe = (rows) => { const map = new Map(); for (const row of rows) { if (!Number.isFinite(row.t) || !Number.isFinite(row.value)) continue; map.set(row.t, row); } return Array.from(map.values()).sort((a, b) => a.t - b.t); }; return { rewardPoints: dedupe(rewards), scorePoints: dedupe(scores), logProgressRatio: Number.isFinite(latestProgressRatio) ? latestProgressRatio : null, lastLoggedTimesteps: Number.isFinite(latestLoggedTimesteps) ? latestLoggedTimesteps : null, }; } function seriesSpread(rows) { if (!Array.isArray(rows) || rows.length === 0) return 0; const vals = rows.map((r) => Number(r?.value)).filter(Number.isFinite); if (vals.length === 0) return 0; return Math.max(...vals) - Math.min(...vals); } function payloadHighlights(payload) { const src = payload && typeof payload === "object" ? payload : {}; const keys = [ "task_id", "step", "reward", "score", "done", "backlog", "completed", "total_backlog", "total_completed", "total_sla_breaches", "total_valid", "total_actions", "passed", "action_history_len", ]; const out = []; for (const key of keys) { if (!(key in src)) continue; const value = src[key]; if (value == null) continue; if (typeof value === "number") { out.push([key, Number.isFinite(value) ? Number(value).toFixed(Math.abs(value) >= 10 ? 1 : 3) : String(value)]); } else { out.push([key, String(value)]); } } return out; } function toPolyline(points, { minY, maxY, width, height }) { if (!points || points.length === 0) return ""; return points .map((p, idx) => { const x = (idx / Math.max(points.length - 1, 1)) * width; const y = height - ((p.value - minY) / (maxY - minY || 1)) * height; return `${x},${y}`; }) .join(" "); } function normalizeSeries(points) { const map = new Map(); for (const row of points || []) { const t = Number(row?.t); const value = Number(row?.value); if (!Number.isFinite(t) || !Number.isFinite(value)) continue; map.set(t, { t, value }); } return Array.from(map.values()).sort((a, b) => a.t - b.t); } function toPolylineByT(points, { minX, maxX, minY, maxY, width, height }) { if (!points || points.length === 0) return ""; const xDen = maxX - minX || 1; const yDen = maxY - minY || 1; return points .map((p) => { const x = ((p.t - minX) / xDen) * width; const y = height - ((p.value - minY) / yDen) * height; return `${x},${y}`; }) .join(" "); } function toStairPolylineByT(points, { minX, maxX, minY, maxY, width, height }) { if (!points || points.length === 0) return ""; const xDen = maxX - minX || 1; const yDen = maxY - minY || 1; const xOf = (t) => ((t - minX) / xDen) * width; const yOf = (v) => height - ((v - minY) / yDen) * height; const sorted = normalizeSeries(points); if (sorted.length === 0) return ""; const out = []; const first = sorted[0]; out.push(`${xOf(minX)},${yOf(first.value)}`); out.push(`${xOf(first.t)},${yOf(first.value)}`); for (let i = 1; i < sorted.length; i += 1) { const prev = sorted[i - 1]; const curr = sorted[i]; const x = xOf(curr.t); out.push(`${x},${yOf(prev.value)}`); out.push(`${x},${yOf(curr.value)}`); } const last = sorted[sorted.length - 1]; out.push(`${xOf(maxX)},${yOf(last.value)}`); return out.join(" "); } function summarizeLogLine(line) { const raw = String(line || "").trim(); if (!raw) return { title: "Info", text: "Empty line", tone: "slate" }; const lower = raw.toLowerCase(); const evalReward = raw.match(/Eval\s+num_timesteps=(\d[\d,]*),\s*episode_reward=([-]?\d+(?:\.\d+)?)/i); if (evalReward) { const ts = Number(String(evalReward[1]).replace(/,/g, "")); const rew = Number(evalReward[2]); return { title: "Eval Checkpoint", text: `Timesteps ${Number.isFinite(ts) ? ts.toLocaleString() : "-"} | Reward ${Number.isFinite(rew) ? rew.toFixed(2) : "-"}`, tone: "emerald", }; } const bestScore = raw.match(/\[Eval\]\s+New best(?: recurrent)? grader score:\s+([0-9.]+)/i); if (bestScore) { const score = Number(bestScore[1]); return { title: "Best Score Improved", text: `Grader score improved to ${Number.isFinite(score) ? score.toFixed(4) : "-"}.`, tone: "emerald", }; } const avgScore = raw.match(/\[Eval\]\s+Average grader score:\s+([0-9.]+)/i); if (avgScore) { const score = Number(avgScore[1]); return { title: "Evaluation Summary", text: `Average grader score ${Number.isFinite(score) ? score.toFixed(4) : "-"}.`, tone: "emerald", }; } const metric = metricRowKV(raw); if (metric) { const key = String(metric.key || "").replace(/_/g, " "); return { title: "Metric Update", text: `${key}: ${Number.isFinite(metric.value) ? metric.value : "-"}`, tone: "indigo", }; } if (lower.includes("traceback") || lower.includes("exception") || lower.includes("error")) { return { title: "Error", text: "A runtime error was reported by the training process. Review backend logs for the exact stack trace.", tone: "rose" }; } if (lower.includes("[eval]")) { return { title: "Evaluation", text: "Evaluation cycle completed and scores were updated.", tone: "emerald" }; } if (lower.includes("[training_jobs]")) { if (lower.includes("started pid=")) { return { title: "Job Started", text: "Training worker started successfully and began consuming timesteps.", tone: "cyan" }; } if (lower.includes("command:")) { return { title: "Runtime Config", text: "Training command was prepared with current phase and environment settings.", tone: "cyan" }; } return { title: "System", text: "Background training service published a runtime status update.", tone: "cyan" }; } if (lower.includes("[phase 1]")) { return { title: "Phase 1 Update", text: "Phase 1 PPO training is actively optimizing policy behavior.", tone: "indigo" }; } if (lower.includes("[phase 2]")) { return { title: "Phase 2 Update", text: "Phase 2 curriculum training is active for harder scenario generalization.", tone: "indigo" }; } if (lower.includes("[costmonitor]")) { return { title: "Constraint Monitor", text: "SLA/fairness penalty monitor updated policy constraint feedback.", tone: "amber" }; } return { title: "Runtime Update", text: "The trainer reported a new runtime event and internal state progressed.", tone: "amber" }; } function summarizeEnvEvent(event) { const stage = String(event?.stage || ""); const payload = event?.payload || {}; const task = payload?.task_id ? ` [${payload.task_id}]` : ""; if (stage === "reset") { return `Task${task}: session created. Day ${payload?.day ?? "-"}, starting backlog ${payload?.backlog ?? "-"}.`; } if (stage === "state:initial") { return `Task${task}: initial snapshot captured. Completed ${payload?.total_completed ?? "-"}, backlog ${payload?.total_backlog ?? "-"}.`; } if (stage === "action-masks") { return `Task${task}: step ${payload?.step ?? "-"} validated actions (${payload?.total_valid ?? "-"} valid of ${payload?.total_actions ?? "-"}).`; } if (stage === "auto_step") { return `Task${task}: step ${payload?.step ?? "-"} executed. Reward ${fmt(payload?.reward, 3)}, backlog ${payload?.backlog ?? "-"}, completed ${payload?.completed ?? "-"}.`; } if (stage === "state:post_step") { return `Task${task}: post-step state updated. Completed ${payload?.total_completed ?? "-"}, backlog ${payload?.total_backlog ?? "-"}, SLA breaches ${payload?.total_sla_breaches ?? "-"}.`; } if (stage === "grade") { return `Task${task}: grading finished. Score ${fmt(payload?.score, 3)}, pass ${String(payload?.passed)}.`; } if (stage === "session:closed") { return `Task${task}: session closed successfully.`; } if (stage === "task:error") { return `Task${task}: run failed - ${payload?.error || "unknown error"}.`; } return `Task${task}: ${stage}.`; } function workflowStageLabel(stage) { const key = String(stage || "").toLowerCase(); if (key === "reset") return "Reset"; if (key === "state:initial") return "Initial State"; if (key === "action-masks") return "Action Validation"; if (key === "auto_step") return "Auto Step"; if (key === "state:post_step") return "Post-Step State"; if (key === "grade") return "Grade"; if (key === "session:closed") return "Session Closed"; if (key === "task:error") return "Task Error"; return stage; } function jsonPretty(value) { try { return JSON.stringify(value, null, 2); } catch (_err) { return String(value); } } function toneClasses(tone) { if (tone === "rose") return "bg-rose-500/5 border-rose-500/20"; if (tone === "emerald") return "bg-emerald-500/5 border-emerald-500/20"; if (tone === "indigo") return "bg-indigo-500/5 border-indigo-500/20"; if (tone === "cyan") return "bg-cyan-500/5 border-cyan-500/20"; if (tone === "amber") return "bg-amber-500/5 border-amber-500/20"; return "bg-slate-700/10 border-slate-500/20"; } function statusClasses(status) { const s = String(status || "").toLowerCase(); if (s === "running") return "text-emerald-300 bg-emerald-500/10 border-emerald-500/30"; if (s === "queued") return "text-amber-300 bg-amber-500/10 border-amber-500/30"; if (s === "completed") return "text-indigo-300 bg-indigo-500/10 border-indigo-500/30"; if (s === "failed") return "text-rose-300 bg-rose-500/10 border-rose-500/30"; if (s === "stopped") return "text-slate-300 bg-slate-600/20 border-slate-500/30"; return "text-slate-300 bg-slate-700/20 border-slate-500/30"; } function normalizeJob(raw, index) { const jobId = String(raw?.job_id || raw?.id || `job-${index}`); const status = String(raw?.status || "unknown"); const timesteps = Number(raw?.timesteps || 0); const latestMetrics = raw?.latest_metrics && typeof raw.latest_metrics === "object" ? raw.latest_metrics : {}; const progressRaw = toNumberOrNull(raw?.progress); const ts = toNumberOrNull(latestMetrics.total_timesteps); const progressFromMetrics = Number.isFinite(ts) && Number.isFinite(timesteps) && timesteps > 0 ? Math.max(0, Math.min(1, Number(ts) / Number(timesteps))) : null; const progress = Number.isFinite(progressRaw) ? Math.max(0, Math.min(1, Number(progressRaw))) : Number.isFinite(progressFromMetrics) ? Number(progressFromMetrics) : 0; return { ...raw, job_id: jobId, status, timesteps: Number.isFinite(timesteps) ? timesteps : 0, phase: Number(raw?.phase || 0), n_envs: Number(raw?.n_envs || 0), progress, latest_metrics: latestMetrics, logs_tail: Array.isArray(raw?.logs_tail) ? raw.logs_tail : [], created_at: toNumberOrNull(raw?.created_at), updated_at: toNumberOrNull(raw?.updated_at), }; } export function TrainingTabV2({ tasks = [] }) { const [endpointRows, setEndpointRows] = useState([]); const [endpointError, setEndpointError] = useState(""); const [agents, setAgents] = useState([]); const [modelRows, setModelRows] = useState([]); const [modelError, setModelError] = useState(""); const [jobs, setJobs] = useState([]); const [jobsLoading, setJobsLoading] = useState(false); const [jobsError, setJobsError] = useState(""); const [activeJobId, setActiveJobId] = useState(""); const [activeJob, setActiveJob] = useState(null); const [deletingJobId, setDeletingJobId] = useState(""); const [jobError, setJobError] = useState(""); const [pollIntervalMs, setPollIntervalMs] = useState(1500); const pollFailuresRef = useRef(0); const [rewardPoints, setRewardPoints] = useState([]); const [scorePoints, setScorePoints] = useState([]); const [scoreSignalMeta, setScoreSignalMeta] = useState({ key: "grader_score", label: "Grader Score", fallback: false, }); const [logLines, setLogLines] = useState([]); const [logProgressRatio, setLogProgressRatio] = useState(null); const [lastLoggedTimesteps, setLastLoggedTimesteps] = useState(null); const [jobForm, setJobForm] = useState({ phase: 1, timesteps: 80000, n_envs: 4, seed: "", }); const [envTaskId, setEnvTaskId] = useState(tasks[0] || "district_backlog_easy"); const [envSeed, setEnvSeed] = useState(""); const [envPolicyName, setEnvPolicyName] = useState("backlog_clearance"); const [envMaxSteps, setEnvMaxSteps] = useState(6); const [envBusy, setEnvBusy] = useState(false); const [envError, setEnvError] = useState(""); const [envFlowEvents, setEnvFlowEvents] = useState([]); const [envFlowSummary, setEnvFlowSummary] = useState(null); const [envFlowRuns, setEnvFlowRuns] = useState([]); const envEventSeqRef = useRef(0); useEffect(() => { if (tasks.length > 0 && !tasks.includes(envTaskId)) { setEnvTaskId(tasks[0]); } }, [tasks, envTaskId]); useEffect(() => { if (agents.length > 0 && !agents.includes(envPolicyName)) { setEnvPolicyName(agents[0]); } }, [agents, envPolicyName]); const refreshEndpointHealth = async () => { setEndpointError(""); const directGet = async (path) => { const res = await fetch(`${backendBaseUrl()}${path}`, { method: "GET" }); if (!res.ok) { throw new Error(`${path} -> ${res.status}`); } try { return await res.json(); } catch (_err) { return { ok: true }; } }; const checks = [ { key: "health", label: "Health", fn: () => api("/health") }, { key: "tasks", label: "Tasks", fn: () => api("/tasks") }, { key: "agents", label: "Agents", fn: () => api("/agents") }, { key: "training_jobs", label: "Training Jobs", fn: () => api("/training_jobs") }, { key: "actions_schema", label: "Action Schema", fn: () => api("/actions/schema") }, { key: "rl_models", label: "RL Models", fn: () => api("/rl_models") }, { key: "rl_models_v2", label: "RL Models V2", fn: () => api("/rl/models") }, { key: "v1_agents", label: "V1 Agents", fn: () => directGet("/api/v1/agents") }, { key: "v1_rl_models", label: "V1 RL Models", fn: () => directGet("/api/v1/rl_models") }, ]; const settled = await Promise.allSettled( checks.map(async (chk) => { const start = Date.now(); await chk.fn(); return { key: chk.key, label: chk.label, ok: true, ms: Date.now() - start }; }) ); const rows = settled.map((res, idx) => { const meta = checks[idx]; if (res.status === "fulfilled") return res.value; return { key: meta.key, label: meta.label, ok: false, ms: null, error: res.reason?.message || String(res.reason), }; }); setEndpointRows(rows); if (rows.some((r) => !r.ok)) { setEndpointError("Some endpoints are down. Retries remain active."); } }; const refreshCatalog = async () => { setModelError(""); try { const [agentRes, rlV1Res, rlV2Res] = await Promise.allSettled([ api("/agents"), api("/rl_models"), api("/rl/models"), ]); if (agentRes.status === "fulfilled") { setAgents(Array.isArray(agentRes.value) ? agentRes.value : []); } const unified = []; if (rlV1Res.status === "fulfilled") { const rows = Array.isArray(rlV1Res.value?.models) ? rlV1Res.value.models : []; for (const row of rows) { unified.push({ source: "api/rl_models", label: row.label || row.path || "unnamed", path: row.path || "", exists: Boolean(row.exists), phase: normalizePath(row.path).includes("/phase2/") ? 2 : normalizePath(row.path).includes("/phase1/") ? 1 : 0, }); } } if (rlV2Res.status === "fulfilled") { const rows = Array.isArray(rlV2Res.value) ? rlV2Res.value : []; for (const row of rows) { const path = row.model_path ? (String(row.model_path).toLowerCase().endsWith(".zip") ? row.model_path : `${row.model_path}.zip`) : ""; unified.push({ source: "api/rl/models", label: path.split(/[\\/]/).pop() || row.model_path || "unnamed", path, exists: Boolean(row.exists), phase: Number(row.phase || 0), }); } } const dedupe = new Map(); for (const row of unified) { const key = normalizePath(row.path); if (!key) continue; if (!dedupe.has(key)) dedupe.set(key, row); } const rows = Array.from(dedupe.values()).sort((a, b) => { if (a.phase !== b.phase) return b.phase - a.phase; return String(a.label).localeCompare(String(b.label)); }); setModelRows(rows); if (rows.length === 0) { setModelError("No models discovered from dynamic model endpoints."); } } catch (err) { setModelError(err?.message || "Failed to load model registry."); } }; const refreshJobs = async () => { setJobsLoading(true); try { const data = await api("/training_jobs"); const rowsRaw = Array.isArray(data?.jobs) ? data.jobs : []; const rows = rowsRaw.map(normalizeJob).sort((a, b) => Number(b.created_at || 0) - Number(a.created_at || 0)); setJobs(rows); setJobsError(""); const running = rows.find((j) => j.status === "running" || j.status === "queued"); const current = rows.find((j) => j.job_id === activeJobId); if (running?.job_id) { if (!current || (current.status !== "running" && current.status !== "queued")) { setActiveJobId(running.job_id); } } else if (!activeJobId && rows[0]?.job_id) { setActiveJobId(rows[0].job_id); } } catch (err) { setJobsError(err?.message || "Failed to load training jobs."); } finally { setJobsLoading(false); } }; const parseAndSetPoints = (jobSnapshot) => { const lines = Array.isArray(jobSnapshot?.logs_tail) ? jobSnapshot.logs_tail : []; setLogLines(lines); const parsed = parseLogMetrics(lines); setLogProgressRatio(parsed.logProgressRatio); setLastLoggedTimesteps(parsed.lastLoggedTimesteps); const nextRewards = []; const nextScores = []; const nextSignals = { explained_variance: [], ep_len_mean: [], approx_kl: [], }; const history = Array.isArray(jobSnapshot?.metric_history) ? jobSnapshot.metric_history : []; for (const row of history) { const t = Number(row?.t ?? row?.total_timesteps ?? NaN); if (!Number.isFinite(t)) continue; const rew = Number(row?.ep_rew_mean ?? row?.mean_reward ?? NaN); const score = Number(row?.grader_score ?? row?.avg_grader_score ?? NaN); if (Number.isFinite(rew)) nextRewards.push({ t, value: rew }); if (Number.isFinite(score)) nextScores.push({ t, value: score }); for (const key of Object.keys(nextSignals)) { const vv = Number(row?.[key] ?? NaN); if (Number.isFinite(vv)) nextSignals[key].push({ t, value: vv }); } } nextRewards.push(...parsed.rewardPoints); nextScores.push(...parsed.scorePoints); const lm = jobSnapshot?.latest_metrics || {}; const metricTs = Number(lm.total_timesteps ?? NaN); const metricReward = Number(lm.ep_rew_mean ?? lm.mean_reward ?? NaN); const metricScore = Number(lm.grader_score ?? lm.avg_grader_score ?? NaN); if (Number.isFinite(metricTs) && Number.isFinite(metricReward)) { nextRewards.push({ t: metricTs, value: metricReward }); } if (Number.isFinite(metricTs) && Number.isFinite(metricScore)) { nextScores.push({ t: metricTs, value: metricScore }); } for (const key of Object.keys(nextSignals)) { const vv = Number(lm[key] ?? NaN); if (Number.isFinite(metricTs) && Number.isFinite(vv)) { nextSignals[key].push({ t: metricTs, value: vv }); } } const dedupe = (rows) => { const map = new Map(); for (const row of rows) { if (!Number.isFinite(row.t) || !Number.isFinite(row.value)) continue; map.set(row.t, row); } return Array.from(map.values()).sort((a, b) => a.t - b.t); }; const dedupedRewards = dedupe(nextRewards); const dedupedScores = dedupe(nextScores); const dedupedSignals = Object.fromEntries( Object.entries(nextSignals).map(([key, rows]) => [key, dedupe(rows)]) ); let chosenScores = dedupedScores; let chosenMeta = { key: "grader_score", label: "Grader Score", fallback: false }; if (dedupedScores.length < 2 || seriesSpread(dedupedScores) < 1e-6) { const fallbackCandidates = [ { key: "explained_variance", label: "Explained Variance" }, { key: "ep_len_mean", label: "Episode Length Mean" }, { key: "approx_kl", label: "Approx KL" }, ]; for (const candidate of fallbackCandidates) { const rows = dedupedSignals[candidate.key] || []; if (rows.length >= 2 && seriesSpread(rows) >= 1e-6) { chosenScores = rows; chosenMeta = { key: candidate.key, label: candidate.label, fallback: true }; break; } } } setRewardPoints(dedupedRewards); setScorePoints(chosenScores); setScoreSignalMeta(chosenMeta); }; const startTrainingJob = async () => { setJobError(""); try { const payload = { phase: Number(jobForm.phase) || 1, timesteps: Number(jobForm.timesteps) || 80000, n_envs: Number(jobForm.n_envs) || 4, }; const seedNum = Number(jobForm.seed); if (jobForm.seed !== "" && Number.isFinite(seedNum)) payload.seed = seedNum; const res = await api("/training_jobs", { method: "POST", body: JSON.stringify(payload), }); if (res?.job_id) { setActiveJobId(res.job_id); const norm = normalizeJob(res, 0); setActiveJob(norm); parseAndSetPoints(norm); } await refreshJobs(); } catch (err) { setJobError(err?.message || "Failed to start training job."); } }; const stopTrainingJob = async () => { if (!activeJobId) return; setJobError(""); try { await api(`/training_jobs/${activeJobId}/stop`, { method: "POST" }); await refreshJobs(); const stopped = await api(`/training_jobs/${activeJobId}`); const norm = normalizeJob(stopped, 0); setActiveJob(norm); parseAndSetPoints(norm); } catch (err) { setJobError(err?.message || "Failed to stop training job."); } }; const clearTrainingHistory = async () => { setJobError(""); try { await api("/training_jobs?clear_artifacts=false", { method: "DELETE" }); setJobs([]); setActiveJob(null); setActiveJobId(""); setRewardPoints([]); setScorePoints([]); setScoreSignalMeta({ key: "grader_score", label: "Grader Score", fallback: false }); setLogLines([]); setLogProgressRatio(null); setLastLoggedTimesteps(null); } catch (err) { setJobError(err?.message || "Failed to clear training history."); } }; const deleteTrainingJob = async (jobId) => { if (!jobId) return; setJobError(""); setDeletingJobId(jobId); try { await api(`/training_jobs/${jobId}?clear_artifacts=false`, { method: "DELETE" }); if (activeJobId === jobId) { setActiveJobId(""); setActiveJob(null); setRewardPoints([]); setScorePoints([]); setScoreSignalMeta({ key: "grader_score", label: "Grader Score", fallback: false }); setLogLines([]); } await refreshJobs(); } catch (err) { setJobError(err?.message || "Failed to delete training job."); } finally { setDeletingJobId(""); } }; const pushEnvEvent = (stage, payload, tone = "indigo") => { const seq = envEventSeqRef.current + 1; envEventSeqRef.current = seq; setEnvFlowEvents((prev) => [ ...prev, { id: `${Date.now()}-${Math.random()}`, seq, ts: Date.now(), stage, payload, tone }, ].slice(-400)); }; const runAutomatedOpenEnvFlow = async () => { setEnvBusy(true); setEnvError(""); setEnvFlowSummary(null); setEnvFlowEvents([]); setEnvFlowRuns([]); envEventSeqRef.current = 0; try { const seedNum = Number(envSeed); const taskScope = Array.isArray(tasks) && tasks.length > 0 ? tasks : [envTaskId]; const runTaskIds = Array.from(new Set(taskScope.filter(Boolean))); const maxSteps = Math.max(1, Number(envMaxSteps) || 6); const taskResults = []; for (const taskId of runTaskIds) { let sessionId = ""; let stepsExecuted = 0; let finalState = null; try { const resetPayload = { task_id: taskId }; if (envSeed !== "" && Number.isFinite(seedNum)) { resetPayload.seed = seedNum; } const resetRes = await api("/reset", { method: "POST", body: JSON.stringify(resetPayload), }); sessionId = String(resetRes?.session_id || ""); if (!sessionId) throw new Error(`reset() did not return session_id for task ${taskId}`); pushEnvEvent( "reset", { task_id: taskId, day: resetRes?.observation?.day, backlog: resetRes?.observation?.total_backlog, completed: resetRes?.observation?.total_completed, }, "emerald" ); const initialState = await api("/state", { method: "POST", body: JSON.stringify({ session_id: sessionId, include_action_history: false }), }); pushEnvEvent( "state:initial", { task_id: taskId, total_completed: initialState?.state?.total_completed, total_backlog: initialState?.state?.total_backlog, fairness_gap: initialState?.state?.fairness_gap, }, "cyan" ); let done = false; for (let idx = 0; idx < maxSteps; idx += 1) { if (done) break; const masks = await api("/action-masks", { method: "POST", body: JSON.stringify({ session_id: sessionId }), }); pushEnvEvent( "action-masks", { task_id: taskId, step: idx + 1, total_valid: masks?.total_valid, total_actions: masks?.total_actions, }, "amber" ); const stepRes = await api("/auto_step", { method: "POST", body: JSON.stringify({ session_id: sessionId, agent_policy: envPolicyName || "backlog_clearance", }), }); done = Boolean(stepRes?.done); stepsExecuted += 1; pushEnvEvent( "auto_step", { task_id: taskId, step: idx + 1, reward: stepRes?.reward, done: stepRes?.done, day: stepRes?.observation?.day, backlog: stepRes?.observation?.total_backlog, completed: stepRes?.observation?.total_completed, }, "indigo" ); const stateRes = await api("/state", { method: "POST", body: JSON.stringify({ session_id: sessionId, include_action_history: true }), }); finalState = stateRes; pushEnvEvent( "state:post_step", { task_id: taskId, step: idx + 1, total_completed: stateRes?.state?.total_completed, total_backlog: stateRes?.state?.total_backlog, total_sla_breaches: stateRes?.state?.total_sla_breaches, action_history_len: Array.isArray(stateRes?.state?.action_history) ? stateRes.state.action_history.length : 0, }, "cyan" ); } const gradeRes = await api("/grade", { method: "POST", body: JSON.stringify({ session_id: sessionId }), }); const scoreValue = Number(gradeRes?.score); const dynamicPassed = typeof gradeRes?.passed === "boolean" ? gradeRes.passed : (Number.isFinite(scoreValue) ? scoreValue >= 0.5 : null); pushEnvEvent( "grade", { task_id: taskId, score: gradeRes?.score, passed: dynamicPassed, }, "emerald" ); taskResults.push({ task_id: taskId, steps_executed: stepsExecuted, score: gradeRes?.score ?? null, passed: dynamicPassed, final_completed: finalState?.state?.total_completed ?? null, final_backlog: finalState?.state?.total_backlog ?? null, final_sla_breaches: finalState?.state?.total_sla_breaches ?? null, }); } catch (taskErr) { const msg = taskErr?.message || String(taskErr); pushEnvEvent("task:error", { task_id: taskId, error: msg }, "rose"); taskResults.push({ task_id: taskId, steps_executed: stepsExecuted, score: null, passed: null, error: msg, }); } finally { if (sessionId) { try { await api(`/sessions/${sessionId}`, { method: "DELETE" }); pushEnvEvent("session:closed", { task_id: taskId }, "slate"); } catch (_err) { // no-op } } } } setEnvFlowRuns(taskResults); const validScores = taskResults .map((row) => Number(row.score)) .filter((v) => Number.isFinite(v)); const passedCount = taskResults.filter((row) => row.passed === true).length; setEnvFlowSummary({ tasks_executed: taskResults.length, total_steps_executed: taskResults.reduce((acc, row) => acc + Number(row.steps_executed || 0), 0), avg_score: validScores.length > 0 ? validScores.reduce((acc, score) => acc + Number(score), 0) / validScores.length : null, passed_tasks: passedCount, }); } catch (err) { setEnvError(err?.message || "Automated OpenEnv workflow failed."); } finally { setEnvBusy(false); } }; useEffect(() => { refreshEndpointHealth(); refreshCatalog(); refreshJobs(); // eslint-disable-next-line react-hooks/exhaustive-deps }, []); useEffect(() => { const t = setInterval(() => { refreshJobs(); }, 5000); return () => clearInterval(t); // eslint-disable-next-line react-hooks/exhaustive-deps }, []); useEffect(() => { const t = setInterval(() => { refreshEndpointHealth(); }, 15000); return () => clearInterval(t); // eslint-disable-next-line react-hooks/exhaustive-deps }, []); useEffect(() => { if (!activeJobId) return undefined; let cancelled = false; const t = setInterval(async () => { if (cancelled) return; try { const snapshotRaw = await api(`/training_jobs/${activeJobId}`); if (cancelled) return; const snapshot = normalizeJob(snapshotRaw, 0); setActiveJob(snapshot); parseAndSetPoints(snapshot); setJobError(""); pollFailuresRef.current = 0; if (pollIntervalMs !== 1500) setPollIntervalMs(1500); } catch (err) { pollFailuresRef.current += 1; if (pollFailuresRef.current >= 3) { setPollIntervalMs(4000); setJobError(err?.message || "Polling failed repeatedly, switched to fallback polling."); } } }, pollIntervalMs); return () => { cancelled = true; clearInterval(t); }; }, [activeJobId, pollIntervalMs]); useEffect(() => { if (!activeJobId) return; const row = jobs.find((j) => j.job_id === activeJobId); if (!row) return; setActiveJob(row); parseAndSetPoints(row); // eslint-disable-next-line react-hooks/exhaustive-deps }, [activeJobId, jobs]); const progressA = useMemo(() => { if (!activeJob) return null; const p = toNumberOrNull(activeJob.progress); return Number.isFinite(p) ? Math.max(0, Math.min(1, Number(p))) : null; }, [activeJob]); const progressB = useMemo(() => { if (!activeJob) return null; const history = Array.isArray(activeJob?.metric_history) ? activeJob.metric_history : []; const historyTs = history.length > 0 ? toNumberOrNull(history[history.length - 1]?.t ?? history[history.length - 1]?.total_timesteps) : null; const ts = toNumberOrNull(activeJob?.latest_metrics?.total_timesteps) ?? historyTs; const total = toNumberOrNull(activeJob?.timesteps); if (!Number.isFinite(ts) || !Number.isFinite(total) || total <= 0) return null; return Math.max(0, Math.min(1, Number(ts) / Number(total))); }, [activeJob]); const progressC = useMemo(() => { if (!activeJob) return null; const total = toNumberOrNull(activeJob?.timesteps); if (!Number.isFinite(total) || total <= 0) { return Number.isFinite(logProgressRatio) ? Number(logProgressRatio) : null; } const fromLogTs = Number.isFinite(lastLoggedTimesteps) && Number(lastLoggedTimesteps) > 0 ? Math.max(0, Math.min(1, Number(lastLoggedTimesteps) / Number(total))) : null; if (Number.isFinite(fromLogTs) && Number.isFinite(logProgressRatio)) { return Math.max(Number(fromLogTs), Number(logProgressRatio)); } if (Number.isFinite(fromLogTs)) return Number(fromLogTs); if (Number.isFinite(logProgressRatio)) return Number(logProgressRatio); return null; }, [activeJob, lastLoggedTimesteps, logProgressRatio]); const effectiveProgress = useMemo(() => { const values = [progressA, progressB, progressC].filter((v) => Number.isFinite(v)); return values.length > 0 ? Math.max(...values) : null; }, [progressA, progressB, progressC]); const rewardLatest = rewardPoints.length ? rewardPoints[rewardPoints.length - 1].value : null; const rewardBest = rewardPoints.length ? Math.max(...rewardPoints.map((p) => p.value)) : null; const scoreLatest = scorePoints.length ? scorePoints[scorePoints.length - 1].value : null; const scoreBest = scorePoints.length ? Math.max(...scorePoints.map((p) => p.value)) : null; const rewardSeries = useMemo(() => normalizeSeries(rewardPoints), [rewardPoints]); const scoreSeries = useMemo(() => normalizeSeries(scorePoints), [scorePoints]); const graphXMin = useMemo(() => { const allTs = [...rewardSeries, ...scoreSeries].map((p) => Number(p.t)).filter(Number.isFinite); if (allTs.length === 0) return 0; return Math.min(...allTs); }, [rewardSeries, scoreSeries]); const graphXMax = useMemo(() => { const allTs = [...rewardSeries, ...scoreSeries].map((p) => Number(p.t)).filter(Number.isFinite); if (allTs.length === 0) return 1; const mx = Math.max(...allTs); return mx > graphXMin ? mx : graphXMin + 1; }, [rewardSeries, scoreSeries, graphXMin]); const rewardMin = rewardPoints.length ? Math.min(...rewardPoints.map((p) => p.value), -10) : -10; const rewardMax = rewardPoints.length ? Math.max(...rewardPoints.map((p) => p.value), 10) : 10; const scoreMin = scorePoints.length ? Math.min(...scorePoints.map((p) => p.value), 0) : 0; const scoreMax = scorePoints.length ? Math.max(...scorePoints.map((p) => p.value), 1) : 1; const rewardPolyline = useMemo( () => toPolylineByT(rewardSeries, { minX: graphXMin, maxX: graphXMax, minY: rewardMin, maxY: rewardMax, width: 700, height: 260, }), [rewardSeries, graphXMin, graphXMax, rewardMin, rewardMax] ); const scoreStairPolyline = useMemo( () => toStairPolylineByT(scoreSeries, { minX: graphXMin, maxX: graphXMax, minY: scoreMin, maxY: scoreMax, width: 700, height: 260, }), [scoreSeries, graphXMin, graphXMax, scoreMin, scoreMax] ); const llmStoryCards = useMemo(() => { const cards = []; let seq = 1; if (activeJob) { cards.push({ id: `story-${seq}`, seq: seq++, title: "Training Context", text: `Phase ${activeJob?.phase || "-"} job ${String(activeJob?.job_id || "").slice(0, 8)} is ${activeJob?.status || "unknown"} at ${fmt((Number(activeJob?.progress || 0) * 100), 1)}%.`, tone: "cyan", }); if (rewardSeries.length >= 2 || scoreSeries.length >= 2) { const rewardStart = rewardSeries.length > 0 ? rewardSeries[0].value : null; const rewardEnd = rewardSeries.length > 0 ? rewardSeries[rewardSeries.length - 1].value : null; const scoreStart = scoreSeries.length > 0 ? scoreSeries[0].value : null; const scoreEnd = scoreSeries.length > 0 ? scoreSeries[scoreSeries.length - 1].value : null; cards.push({ id: `story-${seq}`, seq: seq++, title: "Learning Trend", text: `Reward ${rewardStart != null ? fmt(rewardStart, 2) : "-"} -> ${rewardEnd != null ? fmt(rewardEnd, 2) : "-"}; ${scoreSignalMeta.label.toLowerCase()} ${scoreStart != null ? fmt(scoreStart, 3) : "-"} -> ${scoreEnd != null ? fmt(scoreEnd, 3) : "-"}.`, tone: "indigo", }); } } for (const line of (logLines || []).slice(-14)) { const row = summarizeLogLine(line); cards.push({ id: `log-${seq}-${line.slice(0, 8)}`, seq: seq++, title: row.title, text: row.text, tone: row.tone, }); } const evalRows = Array.isArray(activeJob?.evaluation_rows) ? activeJob.evaluation_rows : []; for (const row of evalRows) { cards.push({ id: `eval-${seq}-${row.task_id}`, seq: seq++, title: "Evaluation Replay", text: `${row.task_id}: score ${fmt(row.grader_score, 3)}, reward ${fmt(row.total_reward, 2)}, completed ${row.total_completed}, breaches ${row.total_sla_breaches}.`, tone: "emerald", }); } if (toNumberOrNull(activeJob?.evaluation_avg_score) != null) { cards.push({ id: `eval-avg-${seq}`, seq: seq++, title: "Evaluation Summary", text: `Average grader score ${fmt(activeJob.evaluation_avg_score, 3)} across evaluated tasks.`, tone: "emerald", }); } for (const event of (envFlowEvents || []).slice(-10)) { cards.push({ id: `replay-${seq}-${event.id}`, seq: seq++, title: "OpenEnv Replay", text: summarizeEnvEvent(event), tone: event?.tone || "cyan", }); } return cards.slice(-32); }, [activeJob, rewardSeries, scoreSeries, logLines, envFlowEvents, scoreSignalMeta.label]); const progressText = (v) => (Number.isFinite(v) ? `${fmt(Number(v) * 100, 1)}%` : "-"); const currentTs = useMemo(() => { const history = Array.isArray(activeJob?.metric_history) ? activeJob.metric_history : []; const histTs = history.length > 0 ? toNumberOrNull(history[history.length - 1]?.t ?? history[history.length - 1]?.total_timesteps) : null; return toNumberOrNull(activeJob?.latest_metrics?.total_timesteps) ?? histTs ?? lastLoggedTimesteps; }, [activeJob, lastLoggedTimesteps]); const currentReward = useMemo(() => { const history = Array.isArray(activeJob?.metric_history) ? activeJob.metric_history : []; const histReward = history.length > 0 ? toNumberOrNull(history[history.length - 1]?.ep_rew_mean ?? history[history.length - 1]?.mean_reward) : null; return toNumberOrNull(activeJob?.latest_metrics?.ep_rew_mean) ?? toNumberOrNull(activeJob?.latest_metrics?.mean_reward) ?? histReward; }, [activeJob]); const currentScore = scoreLatest; return (

hub Endpoint Connectivity Matrix

{endpointError && (
{endpointError}
)}
{endpointRows.map((row) => (
{row.label}
{row.ok ? "UP" : "DOWN"}
{row.ok ? `${row.ms} ms` : row.error || "unreachable"}
))}

tune Live Training Control

{jobError && (
{jobError}
)}

monitoring Live Metrics and Storytelling Timeline

Active Job Status
{activeJob?.status || "idle"}
Current Timesteps
{currentTs != null ? Number(currentTs).toLocaleString() : "-"}
Current Reward
{currentReward != null ? fmt(currentReward, 3) : "-"}
Current {scoreSignalMeta.label}
{currentScore != null ? fmt(currentScore, 3) : "-"}
Reward line (left axis) + {scoreSignalMeta.label} stair-step line (right axis), updated from live backend metrics.
Combined Reward and Score (Dual Axis)
timesteps {Number.isFinite(graphXMin) ? Number(graphXMin).toLocaleString() : "-"} - {Number.isFinite(graphXMax) ? Number(graphXMax).toLocaleString() : "-"}
{rewardSeries.length === 0 && scoreSeries.length === 0 ? (
Waiting for live metric history from training logs...
) : (
{[0, 1, 2, 3, 4].map((i) => ( ))} {rewardPolyline ? ( ) : null} {scoreStairPolyline ? ( ) : null}
Reward min {rewardMin.toFixed(2)} | max {rewardMax.toFixed(2)}
{scoreSignalMeta.label} min {scoreMin.toFixed(3)} | max {scoreMax.toFixed(3)}
)}
reward current: {rewardLatest != null ? rewardLatest.toFixed(3) : "-"} | reward best: {rewardBest != null ? rewardBest.toFixed(3) : "-"} | {scoreSignalMeta.label.toLowerCase()} current: {scoreLatest != null ? scoreLatest.toFixed(3) : "-"} | {scoreSignalMeta.label.toLowerCase()} best: {scoreBest != null ? scoreBest.toFixed(3) : "-"}
Legend: Reward (line) - {scoreSignalMeta.label} (stair-step hold-last-value){scoreSignalMeta.fallback ? " - fallback metric used because grader score has no live movement yet." : ""}
LLM Story Feed (logs + replay + evaluation)
Sequential order - {llmStoryCards.length} cards
{llmStoryCards.length === 0 ? (
No storyline events yet.
) : (
{llmStoryCards.map((card) => (
{card.title}
#{card.seq}
{card.text}
))}
)}

history Training Job History

{jobsError &&
{jobsError}
} {jobsLoading ? (
Loading jobs...
) : (
{jobs.map((job) => { const updated = timestampToDate(job.updated_at); return ( setActiveJobId(job.job_id)} > ); })} {jobs.length === 0 && ( )}
Job Status Phase Progress Updated Action
{String(job.job_id || "").slice(0, 8)} {job.status} {job.phase || "-"} {fmt((Number(job.progress || 0) * 100), 1)}% {updated ? updated.toLocaleTimeString() : "-"}
No training jobs found.
)}

database Model Registry (Dynamic)

{modelError &&
{modelError}
}
{modelRows.map((m) => ( ))} {modelRows.length === 0 && ( )}
Label Phase Source Exists
{m.label}
{m.path || "-"}
{m.phase || "-"} {m.source || "-"} {m.exists ? "yes" : "no"}
No models discovered.

api Automated OpenEnv Workflow (`reset`, `step`, `state`, `grade`)

Runs sequentially across all available tasks and records each stage in chronological order.
{envError && (
{envError}
)}
{envFlowSummary && (
Tasks Executed: {envFlowSummary.tasks_executed}
Total Steps Executed: {envFlowSummary.total_steps_executed}
Average Score: {envFlowSummary.avg_score != null ? fmt(envFlowSummary.avg_score, 3) : "-"}
Passed Tasks: {envFlowSummary.passed_tasks}
)} {envFlowRuns.length > 0 && (
{envFlowRuns.map((row) => ( ))}
Task Steps Score Completed Backlog SLA Breaches Passed
{row.task_id} {row.steps_executed} {row.score != null ? fmt(row.score, 3) : "-"} {row.final_completed ?? "-"} {row.final_backlog ?? "-"} {row.final_sla_breaches ?? "-"} {row.passed === true ? "true" : row.passed === false ? "false" : "-"}
)}
{envFlowEvents.length === 0 ? (
No automated workflow events yet.
) : ( envFlowEvents.map((event) => (
{workflowStageLabel(event.stage)}
#{event.seq} | {new Date(event.ts).toLocaleTimeString()}
{summarizeEnvEvent(event)}
{payloadHighlights(event.payload).length > 0 && (
{payloadHighlights(event.payload).map(([k, v]) => ( {k}: {v} ))}
)}
)) )}
); }