Spaces:
Paused
Paused
| from __future__ import annotations | |
| import json | |
| from pathlib import Path | |
| from typing import Any | |
| from osint_env.data.generator import PlatformViews | |
| from osint_env.domain.models import CanonicalGraph, Edge, TaskInstance | |
| from osint_env.env.environment import OSINTEnvironment | |
| def _safe_label(value: str, fallback: str) -> str: | |
| text = str(value).strip() | |
| return text if text else fallback | |
| def _canonical_graph_payload(graph: CanonicalGraph) -> dict[str, Any]: | |
| nodes = [] | |
| for node in graph.nodes.values(): | |
| attrs = node.attrs or {} | |
| title = "\\n".join(f"{k}: {v}" for k, v in attrs.items()) | |
| label = _safe_label(str(attrs.get("name") or attrs.get("handle") or node.node_id), node.node_id) | |
| nodes.append( | |
| { | |
| "id": node.node_id, | |
| "label": label, | |
| "group": str(node.node_type.value), | |
| "title": title, | |
| "attrs": attrs, | |
| } | |
| ) | |
| edges = [] | |
| for idx, edge in enumerate(graph.edges): | |
| edges.append( | |
| { | |
| "id": f"c_{idx}", | |
| "from": edge.src, | |
| "to": edge.dst, | |
| "label": edge.rel, | |
| "arrows": "to", | |
| "color": "#1f2937", | |
| "width": 1, | |
| "confidence": float(edge.confidence), | |
| "status": "canonical", | |
| } | |
| ) | |
| return {"nodes": nodes, "edges": edges} | |
| def _edge_key(edge: Edge) -> tuple[str, str, str]: | |
| return (edge.src, edge.rel, edge.dst) | |
| def _episode_graph_payload(pred_edges: list[Edge], truth_edges: list[Edge], graph: CanonicalGraph) -> dict[str, Any]: | |
| pred = {_edge_key(e): e for e in pred_edges} | |
| truth = {_edge_key(e): e for e in truth_edges} | |
| all_nodes = set() | |
| all_keys = set(pred) | set(truth) | |
| for src, _, dst in all_keys: | |
| all_nodes.add(src) | |
| all_nodes.add(dst) | |
| nodes = [] | |
| for node_id in sorted(all_nodes): | |
| node = graph.nodes.get(node_id) | |
| if node is None: | |
| nodes.append({"id": node_id, "label": node_id, "group": "episode", "attrs": {}}) | |
| continue | |
| attrs = node.attrs or {} | |
| label = _safe_label(str(attrs.get("name") or attrs.get("handle") or node_id), node_id) | |
| nodes.append({"id": node_id, "label": label, "group": str(node.node_type.value), "attrs": attrs}) | |
| edges = [] | |
| for idx, key in enumerate(sorted(all_keys)): | |
| src, rel, dst = key | |
| in_pred = key in pred | |
| in_truth = key in truth | |
| if in_pred and in_truth: | |
| color = "#16a34a" | |
| dashes = False | |
| status = "matched" | |
| elif in_pred: | |
| color = "#2563eb" | |
| dashes = False | |
| status = "pred_only" | |
| else: | |
| color = "#f59e0b" | |
| dashes = True | |
| status = "truth_only" | |
| edges.append( | |
| { | |
| "id": f"e_{idx}", | |
| "from": src, | |
| "to": dst, | |
| "label": rel, | |
| "arrows": "to", | |
| "color": color, | |
| "dashes": dashes, | |
| "width": 2, | |
| "status": status, | |
| "confidence": float((pred.get(key) or truth.get(key) or Edge(src, rel, dst)).confidence), | |
| } | |
| ) | |
| return {"nodes": nodes, "edges": edges} | |
| def _views_payload(views: PlatformViews) -> dict[str, Any]: | |
| return { | |
| "microblog_posts": views.microblog_posts, | |
| "forum_threads": views.forum_threads, | |
| "profiles": views.profiles, | |
| } | |
| def _leaderboard_payload(records: list[dict[str, Any]]) -> list[dict[str, Any]]: | |
| ranked = sorted(records, key=lambda r: float(r.get("metrics", {}).get("leaderboard_score", 0.0)), reverse=True) | |
| return ranked[:200] | |
| def export_dashboard( | |
| env: OSINTEnvironment, | |
| evaluation: dict[str, Any], | |
| leaderboard_records: list[dict[str, Any]], | |
| output_path: str, | |
| ) -> str: | |
| summary = evaluation.get("summary", evaluation) | |
| episodes = evaluation.get("episodes", []) | |
| task: TaskInstance | None = env.state.task if env.state else None | |
| truth_edges = task.supporting_edges if task else [] | |
| pred_edges = env.memory_graph.edges if env.state else [] | |
| episode_graphs: list[dict[str, Any]] = [] | |
| for episode in episodes: | |
| pred_from_eval = [Edge(str(e.get("src", "")), str(e.get("rel", "")), str(e.get("dst", "")), float(e.get("confidence", 1.0))) for e in episode.get("pred_edges", []) if isinstance(e, dict)] | |
| truth_from_eval = [Edge(str(e.get("src", "")), str(e.get("rel", "")), str(e.get("dst", "")), float(e.get("confidence", 1.0))) for e in episode.get("truth_edges", []) if isinstance(e, dict)] | |
| if pred_from_eval or truth_from_eval: | |
| episode_graphs.append(_episode_graph_payload(pred_from_eval, truth_from_eval, env.graph)) | |
| if not episode_graphs: | |
| episode_graphs.append(_episode_graph_payload(pred_edges, truth_edges, env.graph)) | |
| payload = { | |
| "summary": summary, | |
| "episodes": episodes, | |
| "leaderboard": _leaderboard_payload(leaderboard_records), | |
| "canonical_graph": _canonical_graph_payload(env.graph), | |
| "episode_graphs": episode_graphs, | |
| "episode_graph": episode_graphs[-1], | |
| "views": _views_payload(env.views), | |
| "task": { | |
| "task_id": task.task_id if task else "n/a", | |
| "task_type": task.task_type if task else "n/a", | |
| "question": task.question if task else "n/a", | |
| "answer": task.answer if task else "n/a", | |
| }, | |
| } | |
| html = f"""<!doctype html> | |
| <html lang=\"en\"> | |
| <head> | |
| <meta charset=\"utf-8\" /> | |
| <meta name=\"viewport\" content=\"width=device-width, initial-scale=1\" /> | |
| <title>OSINT Environment Dashboard</title> | |
| <link rel=\"preconnect\" href=\"https://fonts.googleapis.com\" /> | |
| <link rel=\"preconnect\" href=\"https://fonts.gstatic.com\" crossorigin /> | |
| <link href=\"https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;600;700&family=IBM+Plex+Mono:wght@400;600&display=swap\" rel=\"stylesheet\" /> | |
| <link href=\"https://unpkg.com/vis-network@9.1.9/styles/vis-network.min.css\" rel=\"stylesheet\" /> | |
| <script src=\"https://unpkg.com/vis-network@9.1.9/standalone/umd/vis-network.min.js\"></script> | |
| <script src=\"https://cdn.jsdelivr.net/npm/chart.js@4.4.3/dist/chart.umd.min.js\"></script> | |
| <style> | |
| :root {{ | |
| --ink: #1d232f; | |
| --muted: #5f6d7a; | |
| --line: #d5dfe8; | |
| --bg: #f5f8fb; | |
| --card: #ffffff; | |
| --brand: #0f766e; | |
| --brand-soft: #d4f4ef; | |
| --accent: #d97706; | |
| --accent-soft: #ffe7c2; | |
| --ok: #15803d; | |
| --danger: #b91c1c; | |
| }} | |
| * {{ box-sizing: border-box; }} | |
| body {{ | |
| margin: 0; | |
| color: var(--ink); | |
| font-family: \"Space Grotesk\", \"Segoe UI\", sans-serif; | |
| background: | |
| radial-gradient(1200px 500px at -5% -20%, #d8efe9, transparent 70%), | |
| radial-gradient(900px 500px at 110% -10%, #ffe9cf, transparent 65%), | |
| var(--bg); | |
| }} | |
| .wrap {{ max-width: 1500px; margin: 0 auto; padding: 20px; }} | |
| .card {{ | |
| background: var(--card); | |
| border: 1px solid var(--line); | |
| border-radius: 18px; | |
| padding: 16px; | |
| box-shadow: 0 10px 24px rgba(24, 39, 59, 0.06); | |
| }} | |
| .hero {{ | |
| display: grid; | |
| grid-template-columns: 2.1fr 1fr; | |
| gap: 14px; | |
| margin-bottom: 14px; | |
| }} | |
| .hero-main {{ | |
| background: linear-gradient(145deg, #f7fffd, #fff8ef); | |
| border: 1px solid #e6efe8; | |
| }} | |
| h1 {{ margin: 0 0 8px; font-size: 30px; letter-spacing: -0.02em; }} | |
| h2 {{ margin: 0 0 10px; font-size: 18px; letter-spacing: -0.01em; }} | |
| .muted {{ color: var(--muted); }} | |
| .pill-row {{ display: flex; gap: 8px; flex-wrap: wrap; margin-top: 8px; }} | |
| .pill {{ | |
| border: 1px solid #dce8e6; | |
| background: #fbfffe; | |
| border-radius: 999px; | |
| padding: 4px 10px; | |
| font-size: 12px; | |
| color: #214742; | |
| }} | |
| .stats {{ display: grid; grid-template-columns: repeat(3, minmax(120px, 1fr)); gap: 10px; margin-top: 10px; }} | |
| .stat {{ | |
| border: 1px dashed #cde2df; | |
| background: linear-gradient(180deg, #fcfffe, #f6fffc); | |
| border-radius: 12px; | |
| padding: 10px; | |
| }} | |
| .stat .k {{ font-size: 11px; color: var(--muted); text-transform: uppercase; letter-spacing: 0.06em; }} | |
| .stat .v {{ font-size: 22px; font-weight: 700; }} | |
| .layout {{ display: grid; grid-template-columns: 1.2fr 3fr 1.2fr; gap: 14px; margin-bottom: 14px; }} | |
| .control-col {{ display: flex; flex-direction: column; gap: 14px; }} | |
| .control-grid {{ display: grid; gap: 8px; }} | |
| .graph-wrap {{ position: relative; overflow: hidden; }} | |
| .graph {{ height: 540px; border: 1px solid var(--line); border-radius: 14px; background: #fbfdff; }} | |
| .graph-banner {{ | |
| position: absolute; | |
| top: 10px; | |
| left: 10px; | |
| background: rgba(255,255,255,0.93); | |
| border: 1px solid var(--line); | |
| border-radius: 12px; | |
| padding: 6px 10px; | |
| font-size: 12px; | |
| z-index: 2; | |
| backdrop-filter: blur(4px); | |
| }} | |
| .legend {{ display: flex; gap: 8px; flex-wrap: wrap; margin-top: 8px; font-size: 12px; }} | |
| .dot {{ width: 9px; height: 9px; border-radius: 999px; display: inline-block; margin-right: 4px; }} | |
| .mono {{ font-family: \"IBM Plex Mono\", monospace; font-size: 12px; }} | |
| .mono-box {{ font-family: \"IBM Plex Mono\", monospace; font-size: 12px; line-height: 1.4; }} | |
| .answer-ok {{ color: var(--ok); font-weight: 600; }} | |
| .answer-bad {{ color: var(--danger); font-weight: 600; }} | |
| .inline {{ display: flex; gap: 8px; align-items: center; }} | |
| .split {{ display: grid; grid-template-columns: 2fr 1.3fr; gap: 14px; margin-bottom: 14px; }} | |
| .db-tabs {{ display: flex; gap: 6px; flex-wrap: wrap; margin-bottom: 8px; }} | |
| .tab {{ | |
| border: 1px solid var(--line); | |
| border-radius: 9px; | |
| padding: 5px 10px; | |
| background: #fff; | |
| cursor: pointer; | |
| font-size: 12px; | |
| }} | |
| .tab.active {{ background: var(--brand-soft); border-color: #b5e7de; color: #08554e; }} | |
| .table-wrap {{ max-height: 320px; overflow: auto; border: 1px solid var(--line); border-radius: 12px; }} | |
| table {{ width: 100%; border-collapse: collapse; font-size: 12.5px; }} | |
| th, td {{ padding: 8px; border-bottom: 1px solid #edf2f7; text-align: left; vertical-align: top; }} | |
| th {{ position: sticky; top: 0; background: #f7fbff; z-index: 1; }} | |
| tr:hover td {{ background: #f9fcff; }} | |
| .json-view {{ | |
| height: 320px; | |
| overflow: auto; | |
| border: 1px solid var(--line); | |
| border-radius: 12px; | |
| background: #0f172a; | |
| color: #d2f8ee; | |
| padding: 10px; | |
| margin: 0; | |
| }} | |
| .charts {{ display: grid; grid-template-columns: 1fr 1fr; gap: 14px; margin-bottom: 14px; }} | |
| .chart-box {{ height: 300px; }} | |
| select, input[type=\"search\"], button {{ | |
| border: 1px solid var(--line); | |
| border-radius: 9px; | |
| padding: 8px; | |
| font: inherit; | |
| background: #fff; | |
| color: var(--ink); | |
| }} | |
| button {{ cursor: pointer; background: #fff; }} | |
| button.primary {{ background: var(--brand); border-color: #0e6f68; color: #fff; }} | |
| .subtle {{ background: #f7fafc; }} | |
| @media (max-width: 1100px) {{ | |
| .hero, .layout, .split, .charts {{ grid-template-columns: 1fr; }} | |
| .graph {{ height: 440px; }} | |
| }} | |
| </style> | |
| </head> | |
| <body> | |
| <div class=\"wrap\"> | |
| <div class=\"hero\"> | |
| <section class=\"card hero-main\"> | |
| <h1>OSINT Benchmark Dashboard</h1> | |
| <p class=\"muted\">Interactive explorer for canonical knowledge graph, episode traces, source platform records, and benchmark ranking.</p> | |
| <div class=\"pill-row\" id=\"hero-pills\"></div> | |
| <div class=\"stats\" id=\"stats\"></div> | |
| </section> | |
| <section class=\"card\"> | |
| <h2>Episode Explorer</h2> | |
| <div class=\"inline\" style=\"margin-bottom:8px\"> | |
| <label class=\"mono\" for=\"episode-select\">Episode</label> | |
| <select id=\"episode-select\" style=\"flex:1\"></select> | |
| </div> | |
| <div class=\"inline\" style=\"gap:6px; margin-bottom:8px\"> | |
| <button id=\"episode-prev\">Prev</button> | |
| <button id=\"episode-next\">Next</button> | |
| </div> | |
| <div><strong>Task ID:</strong> <span id=\"task-id\"></span></div> | |
| <div><strong>Task Type:</strong> <span id=\"task-type\"></span></div> | |
| <div style=\"margin-top:8px\"><strong>Question</strong></div> | |
| <div id=\"task-question\" class=\"muted mono-box\"></div> | |
| <div style=\"margin-top:8px\"><strong>Ground Truth Answer</strong>: <span id=\"task-answer\"></span></div> | |
| <div style=\"margin-top:8px\"><strong>Agent Answer</strong>: <span id=\"agent-answer\"></span></div> | |
| <div style=\"margin-top:8px\"><strong>Correct</strong>: <span id=\"answer-correct\"></span></div> | |
| </section> | |
| </div> | |
| <div class=\"layout\"> | |
| <section class=\"card control-col\"> | |
| <div> | |
| <h2>Graph Controls</h2> | |
| <div class=\"control-grid\"> | |
| <label class=\"mono\" for=\"graph-mode\">Graph Layer</label> | |
| <select id=\"graph-mode\"> | |
| <option value=\"canonical\">Canonical Graph</option> | |
| <option value=\"episode\">Episode Graph</option> | |
| </select> | |
| <label class=\"mono\" for=\"graph-search\">Node Search</label> | |
| <input id=\"graph-search\" type=\"search\" placeholder=\"Type node id or label...\" /> | |
| <label class=\"mono\" for=\"relation-filter\">Relation Filter</label> | |
| <input id=\"relation-filter\" type=\"search\" placeholder=\"Filter edge labels...\" /> | |
| <button id=\"fit-graph\" class=\"primary\">Fit Graph</button> | |
| </div> | |
| </div> | |
| <div> | |
| <h2>Node Types</h2> | |
| <div id=\"type-filters\" class=\"control-grid mono\"></div> | |
| </div> | |
| </section> | |
| <section class=\"card\"> | |
| <h2>Graph Explorer</h2> | |
| <div class=\"graph-wrap\"> | |
| <div class=\"graph-banner\" id=\"graph-banner\">Layer: Canonical Graph</div> | |
| <div id=\"graph-canvas\" class=\"graph\"></div> | |
| </div> | |
| <div class=\"legend\"> | |
| <span><span class=\"dot\" style=\"background:#16a34a\"></span>matched edge</span> | |
| <span><span class=\"dot\" style=\"background:#2563eb\"></span>predicted only</span> | |
| <span><span class=\"dot\" style=\"background:#f59e0b\"></span>truth only</span> | |
| </div> | |
| </section> | |
| <section class=\"card control-col\"> | |
| <div> | |
| <h2>Node Inspector</h2> | |
| <pre id=\"node-detail\" class=\"json-view\">Click a node to inspect attributes and neighbors.</pre> | |
| </div> | |
| <div> | |
| <h2>Edge Inspector</h2> | |
| <pre id=\"edge-detail\" class=\"json-view\">Click an edge to inspect relation details.</pre> | |
| </div> | |
| </section> | |
| </div> | |
| <div class=\"split\"> | |
| <section class=\"card\"> | |
| <h2>Original Database Explorer</h2> | |
| <div class=\"db-tabs\" id=\"db-tabs\"></div> | |
| <div class=\"inline\" style=\"margin-bottom:8px\"> | |
| <input id=\"db-search\" type=\"search\" placeholder=\"Search records...\" style=\"flex:1\" /> | |
| <select id=\"db-limit\"> | |
| <option value=\"200\">200</option> | |
| <option value=\"500\">500</option> | |
| <option value=\"1000\">1000</option> | |
| </select> | |
| </div> | |
| <div class=\"table-wrap\"><table id=\"db-table\"></table></div> | |
| </section> | |
| <section class=\"card\"> | |
| <h2>Selected Source Record</h2> | |
| <pre id=\"db-detail\" class=\"json-view\">Click a row in the database table to inspect full JSON.</pre> | |
| </section> | |
| </div> | |
| <div class=\"charts\"> | |
| <section class=\"card\"> | |
| <h2>Benchmark Summary Radar</h2> | |
| <div class=\"chart-box\"><canvas id=\"summary-chart\"></canvas></div> | |
| </section> | |
| <section class=\"card\"> | |
| <h2>Episode Reward and Graph F1</h2> | |
| <div class=\"chart-box\"><canvas id=\"trace-chart\"></canvas></div> | |
| </section> | |
| </div> | |
| <section class=\"card\"> | |
| <h2>Benchmark Leaderboard</h2> | |
| <div class=\"inline\" style=\"margin-bottom:8px\"> | |
| <label class=\"mono\" for=\"leader-sort\">Sort by</label> | |
| <select id=\"leader-sort\" class=\"subtle\"> | |
| <option value=\"leaderboard_score\">leaderboard_score</option> | |
| <option value=\"task_success_rate\">task_success_rate</option> | |
| <option value=\"avg_graph_f1\">avg_graph_f1</option> | |
| <option value=\"retrieval_signal\">retrieval_signal</option> | |
| <option value=\"structural_signal\">structural_signal</option> | |
| <option value=\"spawn_signal\">spawn_signal</option> | |
| <option value=\"avg_reward\">avg_reward</option> | |
| </select> | |
| </div> | |
| <div class=\"table-wrap\"><table id=\"leaderboard-table\"></table></div> | |
| </section> | |
| </div> | |
| <script> | |
| const payload = {json.dumps(payload)}; | |
| function metricCards(summary) {{ | |
| const selected = [ | |
| ["leaderboard_score", summary.leaderboard_score || 0], | |
| ["task_success_rate", summary.task_success_rate || 0], | |
| ["avg_graph_f1", summary.avg_graph_f1 || 0], | |
| ["retrieval_signal", summary.retrieval_signal || 0], | |
| ["structural_signal", summary.structural_signal || 0], | |
| ["tool_efficiency", summary.tool_efficiency || 0], | |
| ["avg_reward", summary.avg_reward || 0] | |
| ]; | |
| const root = document.getElementById("stats"); | |
| root.innerHTML = ""; | |
| selected.forEach(([k, v]) => {{ | |
| const div = document.createElement("div"); | |
| div.className = "stat"; | |
| div.innerHTML = `<div class=\"k\">${{k}}</div><div class=\"v\">${{Number(v).toFixed(3)}}</div>`; | |
| root.appendChild(div); | |
| }}); | |
| const pillRow = document.getElementById("hero-pills"); | |
| pillRow.innerHTML = ""; | |
| [ | |
| `deanonymization: ${{Number(summary.deanonymization_accuracy || 0).toFixed(3)}}`, | |
| `avg steps: ${{Number(summary.avg_steps_to_solution || 0).toFixed(2)}}`, | |
| `episodes: ${{(payload.episodes || []).length}}` | |
| ].forEach((text) => {{ | |
| const span = document.createElement("span"); | |
| span.className = "pill"; | |
| span.textContent = text; | |
| pillRow.appendChild(span); | |
| }}); | |
| }} | |
| function buildTypeFilters(allGroups) {{ | |
| const root = document.getElementById("type-filters"); | |
| root.innerHTML = ""; | |
| allGroups.forEach((group) => {{ | |
| const id = `type_${{group}}`; | |
| const row = document.createElement("label"); | |
| row.className = "inline"; | |
| row.innerHTML = `<input type=\"checkbox\" id=\"${{id}}\" value=\"${{group}}\" checked /> <span>${{group}}</span>`; | |
| root.appendChild(row); | |
| }}); | |
| }} | |
| function createNetworkController() {{ | |
| const container = document.getElementById("graph-canvas"); | |
| const banner = document.getElementById("graph-banner"); | |
| const modeSelect = document.getElementById("graph-mode"); | |
| const nodeSearch = document.getElementById("graph-search"); | |
| const relFilter = document.getElementById("relation-filter"); | |
| const fitBtn = document.getElementById("fit-graph"); | |
| const rawLayers = {{ | |
| canonical: payload.canonical_graph || {{ nodes: [], edges: [] }}, | |
| episode: payload.episode_graph || {{ nodes: [], edges: [] }} | |
| }}; | |
| const episodeLayers = payload.episode_graphs || []; | |
| const groupSet = new Set(); | |
| (rawLayers.canonical.nodes || []).forEach((n) => groupSet.add(n.group || "unknown")); | |
| (episodeLayers || []).forEach((layer) => {{ | |
| (layer.nodes || []).forEach((n) => groupSet.add(n.group || "unknown")); | |
| }}); | |
| const allGroups = Array.from(groupSet).sort(); | |
| buildTypeFilters(allGroups); | |
| const state = {{ | |
| mode: "canonical", | |
| relationQuery: "", | |
| nodeQuery: "", | |
| selectedEpisode: Math.max(0, (payload.episodes || []).length - 1), | |
| }}; | |
| function currentEpisodeLayer() {{ | |
| if (!episodeLayers.length) {{ | |
| return rawLayers.episode; | |
| }} | |
| const idx = Math.max(0, Math.min(episodeLayers.length - 1, Number(state.selectedEpisode || 0))); | |
| return episodeLayers[idx] || rawLayers.episode; | |
| }} | |
| const nodesDS = new vis.DataSet([]); | |
| const edgesDS = new vis.DataSet([]); | |
| const network = new vis.Network(container, {{ nodes: nodesDS, edges: edgesDS }}, {{ | |
| interaction: {{ hover: true, navigationButtons: true, keyboard: true }}, | |
| physics: {{ stabilization: false, barnesHut: {{ springLength: 130 }} }}, | |
| edges: {{ smooth: true, font: {{ size: 10 }} }}, | |
| nodes: {{ shape: "dot", size: 11, font: {{ size: 10 }} }} | |
| }}); | |
| function activeGroups() {{ | |
| const checked = Array.from(document.querySelectorAll('#type-filters input[type="checkbox"]:checked')); | |
| return new Set(checked.map(x => x.value)); | |
| }} | |
| function styleNode(node, query) {{ | |
| const text = `${{node.id}} ${{node.label || ""}}`.toLowerCase(); | |
| const hit = query && text.includes(query); | |
| return {{ | |
| ...node, | |
| color: hit ? "#f59e0b" : undefined, | |
| size: hit ? 18 : 11, | |
| }}; | |
| }} | |
| function refresh() {{ | |
| const raw = state.mode === "episode" ? currentEpisodeLayer() : rawLayers.canonical; | |
| const groups = activeGroups(); | |
| const relQ = state.relationQuery.toLowerCase(); | |
| const nodeQ = state.nodeQuery.toLowerCase(); | |
| const nodes = (raw.nodes || []).filter(n => groups.has(n.group || "unknown")).map(n => styleNode(n, nodeQ)); | |
| const nodeIds = new Set(nodes.map(n => n.id)); | |
| const edges = (raw.edges || []).filter(e => nodeIds.has(e.from) && nodeIds.has(e.to)).filter(e => !relQ || String(e.label || "").toLowerCase().includes(relQ)); | |
| nodesDS.clear(); | |
| edgesDS.clear(); | |
| nodesDS.add(nodes); | |
| edgesDS.add(edges); | |
| banner.textContent = state.mode === "canonical" ? "Layer: Canonical Graph" : "Layer: Episode Graph"; | |
| }} | |
| modeSelect.addEventListener("change", () => {{ | |
| state.mode = modeSelect.value; | |
| refresh(); | |
| }}); | |
| document.addEventListener("osint-episode-change", (event) => {{ | |
| state.selectedEpisode = Number(event.detail?.index || 0); | |
| if (state.mode === "episode") {{ | |
| refresh(); | |
| }} | |
| }}); | |
| relFilter.addEventListener("input", () => {{ | |
| state.relationQuery = relFilter.value || ""; | |
| refresh(); | |
| }}); | |
| nodeSearch.addEventListener("input", () => {{ | |
| state.nodeQuery = nodeSearch.value || ""; | |
| refresh(); | |
| }}); | |
| fitBtn.addEventListener("click", () => network.fit({{ animation: true }})); | |
| document.getElementById("type-filters").addEventListener("change", refresh); | |
| network.on("click", (params) => {{ | |
| if (params.nodes && params.nodes.length) {{ | |
| const node = nodesDS.get(params.nodes[0]); | |
| const connected = network.getConnectedNodes(node.id) || []; | |
| document.getElementById("node-detail").textContent = JSON.stringify({{ | |
| node, | |
| connected_nodes: connected | |
| }}, null, 2); | |
| }} | |
| if (params.edges && params.edges.length) {{ | |
| const edge = edgesDS.get(params.edges[0]); | |
| document.getElementById("edge-detail").textContent = JSON.stringify(edge, null, 2); | |
| }} | |
| }}); | |
| refresh(); | |
| }} | |
| function buildRows(views) {{ | |
| const rows = []; | |
| (views.microblog_posts || []).forEach((x) => rows.push({{ source: "microblog", id: x.post_id || "post", text: JSON.stringify(x), raw: x }})); | |
| (views.forum_threads || []).forEach((x) => rows.push({{ source: "forum", id: x.thread_id || "thread", text: JSON.stringify(x), raw: x }})); | |
| (views.profiles || []).forEach((x) => rows.push({{ source: "profile", id: x.user_id || "profile", text: JSON.stringify(x), raw: x }})); | |
| return rows; | |
| }} | |
| function initDatabaseExplorer() {{ | |
| const rows = buildRows(payload.views || {{}}); | |
| const tabs = document.getElementById("db-tabs"); | |
| const search = document.getElementById("db-search"); | |
| const limit = document.getElementById("db-limit"); | |
| const table = document.getElementById("db-table"); | |
| const detail = document.getElementById("db-detail"); | |
| const sources = ["all", "microblog", "forum", "profile"]; | |
| const state = {{ source: "all", query: "", limit: 200 }}; | |
| tabs.innerHTML = ""; | |
| sources.forEach((src) => {{ | |
| const btn = document.createElement("button"); | |
| btn.className = `tab ${{src === state.source ? "active" : ""}}`; | |
| btn.textContent = src; | |
| btn.addEventListener("click", () => {{ | |
| state.source = src; | |
| Array.from(tabs.children).forEach((child) => child.classList.remove("active")); | |
| btn.classList.add("active"); | |
| render(); | |
| }}); | |
| tabs.appendChild(btn); | |
| }}); | |
| function filtered() {{ | |
| const q = state.query.toLowerCase(); | |
| return rows | |
| .filter((row) => state.source === "all" || row.source === state.source) | |
| .filter((row) => !q || row.text.toLowerCase().includes(q) || row.id.toLowerCase().includes(q)); | |
| }} | |
| function render() {{ | |
| const show = filtered().slice(0, state.limit); | |
| table.innerHTML = "<thead><tr><th>source</th><th>id</th><th>preview</th></tr></thead>"; | |
| const body = document.createElement("tbody"); | |
| show.forEach((row) => {{ | |
| const tr = document.createElement("tr"); | |
| const preview = row.text.length > 120 ? `${{row.text.slice(0, 120)}}...` : row.text; | |
| tr.innerHTML = `<td>${{row.source}}</td><td class=\"mono\">${{row.id}}</td><td>${{preview}}</td>`; | |
| tr.addEventListener("click", () => {{ | |
| detail.textContent = JSON.stringify(row.raw, null, 2); | |
| }}); | |
| body.appendChild(tr); | |
| }}); | |
| table.appendChild(body); | |
| }} | |
| search.addEventListener("input", () => {{ state.query = search.value || ""; render(); }}); | |
| limit.addEventListener("change", () => {{ state.limit = Number(limit.value || 200); render(); }}); | |
| render(); | |
| }} | |
| function renderLeaderboard(records, sortBy = "leaderboard_score") {{ | |
| const sorted = [...records].sort((a, b) => (b.metrics?.[sortBy] || 0) - (a.metrics?.[sortBy] || 0)); | |
| const table = document.getElementById("leaderboard-table"); | |
| table.innerHTML = "<thead><tr><th>rank</th><th>run</th><th>score</th><th>success</th><th>graph_f1</th><th>retrieval</th><th>structural</th><th>spawn</th><th>reward</th></tr></thead>"; | |
| const body = document.createElement("tbody"); | |
| sorted.forEach((rec, i) => {{ | |
| const m = rec.metrics || {{}}; | |
| const tr = document.createElement("tr"); | |
| tr.innerHTML = `<td>${{i + 1}}</td><td>${{rec.run_name || rec.run_id || "run"}}</td><td>${{(m.leaderboard_score || 0).toFixed(4)}}</td><td>${{(m.task_success_rate || 0).toFixed(3)}}</td><td>${{(m.avg_graph_f1 || 0).toFixed(3)}}</td><td>${{(m.retrieval_signal || 0).toFixed(3)}}</td><td>${{(m.structural_signal || 0).toFixed(3)}}</td><td>${{(m.spawn_signal || 0).toFixed(3)}}</td><td>${{(m.avg_reward || 0).toFixed(3)}}</td>`; | |
| body.appendChild(tr); | |
| }}); | |
| table.appendChild(body); | |
| }} | |
| function drawSummaryChart(summary) {{ | |
| const labels = ["success", "graph_f1", "tool_eff", "deanon", "retrieval", "structural", "score"]; | |
| const values = [ | |
| summary.task_success_rate || 0, | |
| summary.avg_graph_f1 || 0, | |
| summary.tool_efficiency || 0, | |
| summary.deanonymization_accuracy || 0, | |
| summary.retrieval_signal || 0, | |
| summary.structural_signal || 0, | |
| summary.leaderboard_score || 0, | |
| ]; | |
| new Chart(document.getElementById("summary-chart"), {{ | |
| type: "radar", | |
| data: {{ | |
| labels, | |
| datasets: [{{ | |
| label: "normalized metrics", | |
| data: values, | |
| backgroundColor: "rgba(15,118,110,0.2)", | |
| borderColor: "#0f766e", | |
| pointBackgroundColor: "#d97706", | |
| pointRadius: 3 | |
| }}] | |
| }}, | |
| options: {{ responsive: true, maintainAspectRatio: false, scales: {{ r: {{ min: 0, max: 1 }} }} }} | |
| }}); | |
| }} | |
| function drawTraceChart(episodes) {{ | |
| const labels = episodes.map((_, i) => `ep_${{i + 1}}`); | |
| const rewards = episodes.map(e => e.reward || 0); | |
| const f1 = episodes.map(e => e.graph_f1 || 0); | |
| new Chart(document.getElementById("trace-chart"), {{ | |
| type: "line", | |
| data: {{ | |
| labels, | |
| datasets: [ | |
| {{ label: "reward", data: rewards, borderColor: "#0f766e", yAxisID: "y", tension: 0.2 }}, | |
| {{ label: "graph_f1", data: f1, borderColor: "#d97706", yAxisID: "y1", tension: 0.2 }} | |
| ] | |
| }}, | |
| options: {{ | |
| responsive: true, | |
| maintainAspectRatio: false, | |
| scales: {{ | |
| y: {{ position: "left" }}, | |
| y1: {{ position: "right", min: 0, max: 1, grid: {{ drawOnChartArea: false }} }} | |
| }} | |
| }} | |
| }}); | |
| }} | |
| function initEpisodeExplorer() {{ | |
| const episodes = payload.episodes || []; | |
| const select = document.getElementById("episode-select"); | |
| const prevBtn = document.getElementById("episode-prev"); | |
| const nextBtn = document.getElementById("episode-next"); | |
| function fillFromEpisode(ep) {{ | |
| const fallback = payload.task || {{}}; | |
| const taskId = ep?.task_id || fallback.task_id || "n/a"; | |
| const taskType = ep?.task_type || fallback.task_type || "n/a"; | |
| const question = ep?.question || fallback.question || "n/a"; | |
| const truth = ep?.task_answer ?? fallback.answer ?? "n/a"; | |
| const agent = ep?.agent_answer ?? ""; | |
| const isCorrect = String(agent) === String(truth); | |
| document.getElementById("task-id").textContent = taskId; | |
| document.getElementById("task-type").textContent = taskType; | |
| document.getElementById("task-question").textContent = question; | |
| document.getElementById("task-answer").textContent = truth; | |
| document.getElementById("agent-answer").textContent = agent || "(no answer)"; | |
| const correctEl = document.getElementById("answer-correct"); | |
| correctEl.textContent = isCorrect ? "yes" : "no"; | |
| correctEl.className = isCorrect ? "answer-ok" : "answer-bad"; | |
| }} | |
| if (!episodes.length) {{ | |
| select.innerHTML = "<option value='-1'>latest</option>"; | |
| fillFromEpisode(null); | |
| prevBtn.disabled = true; | |
| nextBtn.disabled = true; | |
| return; | |
| }} | |
| select.innerHTML = episodes | |
| .map((ep, idx) => `<option value=\"${{idx}}\">ep_${{idx + 1}} | ${{ep.task_type || \"task\"}} | reward=${{Number(ep.reward || 0).toFixed(3)}}</option>`) | |
| .join(""); | |
| select.value = String(Math.max(0, episodes.length - 1)); | |
| function sync(delta = 0) {{ | |
| const current = Math.max(0, Math.min(episodes.length - 1, Number(select.value || 0) + delta)); | |
| select.value = String(current); | |
| fillFromEpisode(episodes[current]); | |
| document.dispatchEvent(new CustomEvent("osint-episode-change", {{ detail: {{ index: current }} }})); | |
| }} | |
| select.addEventListener("change", () => sync(0)); | |
| prevBtn.addEventListener("click", () => sync(-1)); | |
| nextBtn.addEventListener("click", () => sync(1)); | |
| sync(0); | |
| }} | |
| const summary = payload.summary || {{}}; | |
| metricCards(summary); | |
| initEpisodeExplorer(); | |
| createNetworkController(); | |
| initDatabaseExplorer(); | |
| const leaderboard = payload.leaderboard || []; | |
| const leaderSort = document.getElementById("leader-sort"); | |
| renderLeaderboard(leaderboard, leaderSort.value); | |
| leaderSort.addEventListener("change", () => renderLeaderboard(leaderboard, leaderSort.value)); | |
| drawSummaryChart(summary); | |
| drawTraceChart(payload.episodes || []); | |
| </script> | |
| </body> | |
| </html> | |
| """ | |
| out = Path(output_path) | |
| out.parent.mkdir(parents=True, exist_ok=True) | |
| out.write_text(html, encoding="utf-8") | |
| return str(out) | |