from __future__ import annotations import json from pathlib import Path from typing import Any from osint_env.data.generator import PlatformViews from osint_env.domain.models import CanonicalGraph, Edge, TaskInstance from osint_env.env.environment import OSINTEnvironment def _safe_label(value: str, fallback: str) -> str: text = str(value).strip() return text if text else fallback def _canonical_graph_payload(graph: CanonicalGraph) -> dict[str, Any]: nodes = [] for node in graph.nodes.values(): attrs = node.attrs or {} title = "\\n".join(f"{k}: {v}" for k, v in attrs.items()) label = _safe_label(str(attrs.get("name") or attrs.get("handle") or node.node_id), node.node_id) nodes.append( { "id": node.node_id, "label": label, "group": str(node.node_type.value), "title": title, "attrs": attrs, } ) edges = [] for idx, edge in enumerate(graph.edges): edges.append( { "id": f"c_{idx}", "from": edge.src, "to": edge.dst, "label": edge.rel, "arrows": "to", "color": "#1f2937", "width": 1, "confidence": float(edge.confidence), "status": "canonical", } ) return {"nodes": nodes, "edges": edges} def _edge_key(edge: Edge) -> tuple[str, str, str]: return (edge.src, edge.rel, edge.dst) def _episode_graph_payload(pred_edges: list[Edge], truth_edges: list[Edge], graph: CanonicalGraph) -> dict[str, Any]: pred = {_edge_key(e): e for e in pred_edges} truth = {_edge_key(e): e for e in truth_edges} all_nodes = set() all_keys = set(pred) | set(truth) for src, _, dst in all_keys: all_nodes.add(src) all_nodes.add(dst) nodes = [] for node_id in sorted(all_nodes): node = graph.nodes.get(node_id) if node is None: nodes.append({"id": node_id, "label": node_id, "group": "episode", "attrs": {}}) continue attrs = node.attrs or {} label = _safe_label(str(attrs.get("name") or attrs.get("handle") or node_id), node_id) nodes.append({"id": node_id, "label": label, "group": str(node.node_type.value), "attrs": attrs}) edges = [] for idx, key in enumerate(sorted(all_keys)): src, rel, dst = key in_pred = key in pred in_truth = key in truth if in_pred and in_truth: color = "#16a34a" dashes = False status = "matched" elif in_pred: color = "#2563eb" dashes = False status = "pred_only" else: color = "#f59e0b" dashes = True status = "truth_only" edges.append( { "id": f"e_{idx}", "from": src, "to": dst, "label": rel, "arrows": "to", "color": color, "dashes": dashes, "width": 2, "status": status, "confidence": float((pred.get(key) or truth.get(key) or Edge(src, rel, dst)).confidence), } ) return {"nodes": nodes, "edges": edges} def _views_payload(views: PlatformViews) -> dict[str, Any]: return { "microblog_posts": views.microblog_posts, "forum_threads": views.forum_threads, "profiles": views.profiles, } def _leaderboard_payload(records: list[dict[str, Any]]) -> list[dict[str, Any]]: ranked = sorted(records, key=lambda r: float(r.get("metrics", {}).get("leaderboard_score", 0.0)), reverse=True) return ranked[:200] def export_dashboard( env: OSINTEnvironment, evaluation: dict[str, Any], leaderboard_records: list[dict[str, Any]], output_path: str, ) -> str: summary = evaluation.get("summary", evaluation) episodes = evaluation.get("episodes", []) task: TaskInstance | None = env.state.task if env.state else None truth_edges = task.supporting_edges if task else [] pred_edges = env.memory_graph.edges if env.state else [] episode_graphs: list[dict[str, Any]] = [] for episode in episodes: pred_from_eval = [Edge(str(e.get("src", "")), str(e.get("rel", "")), str(e.get("dst", "")), float(e.get("confidence", 1.0))) for e in episode.get("pred_edges", []) if isinstance(e, dict)] truth_from_eval = [Edge(str(e.get("src", "")), str(e.get("rel", "")), str(e.get("dst", "")), float(e.get("confidence", 1.0))) for e in episode.get("truth_edges", []) if isinstance(e, dict)] if pred_from_eval or truth_from_eval: episode_graphs.append(_episode_graph_payload(pred_from_eval, truth_from_eval, env.graph)) if not episode_graphs: episode_graphs.append(_episode_graph_payload(pred_edges, truth_edges, env.graph)) payload = { "summary": summary, "episodes": episodes, "leaderboard": _leaderboard_payload(leaderboard_records), "canonical_graph": _canonical_graph_payload(env.graph), "episode_graphs": episode_graphs, "episode_graph": episode_graphs[-1], "views": _views_payload(env.views), "task": { "task_id": task.task_id if task else "n/a", "task_type": task.task_type if task else "n/a", "question": task.question if task else "n/a", "answer": task.answer if task else "n/a", }, } html = f""" OSINT Environment Dashboard

OSINT Benchmark Dashboard

Interactive explorer for canonical knowledge graph, episode traces, source platform records, and benchmark ranking.

Episode Explorer

Task ID:
Task Type:
Question
Ground Truth Answer:
Agent Answer:
Correct:

Graph Controls

Node Types

Graph Explorer

Layer: Canonical Graph
matched edge predicted only truth only

Node Inspector

Click a node to inspect attributes and neighbors.

Edge Inspector

Click an edge to inspect relation details.

Original Database Explorer

Selected Source Record

Click a row in the database table to inspect full JSON.

Benchmark Summary Radar

Episode Reward and Graph F1

Benchmark Leaderboard

""" out = Path(output_path) out.parent.mkdir(parents=True, exist_ok=True) out.write_text(html, encoding="utf-8") return str(out)