siddeshwar-kagatikar commited on
Commit
9af411f
·
1 Parent(s): 281dcb4

Fix seeded context retrieval and latest Space dashboard

Browse files
server.py CHANGED
@@ -37,6 +37,13 @@ def _load_json(path: Path) -> dict[str, Any] | None:
37
  return payload if isinstance(payload, dict) else None
38
 
39
 
 
 
 
 
 
 
 
40
  def _build_environment() -> OSINTEnvironment:
41
  shared = load_shared_config(SPACE_CONFIG_PATH)
42
  env_cfg = clone_environment_config(shared.environment)
@@ -97,31 +104,37 @@ def _space_snapshot() -> dict[str, Any]:
97
  snapshot = dict(_base_environment_snapshot())
98
 
99
  baseline_payload = _load_json(LATEST_BASELINE_OUTPUT)
 
 
 
100
  if baseline_payload is not None and isinstance(baseline_payload.get("summary"), dict):
101
- dashboard_path = Path(
102
- str(
103
- ((baseline_payload.get("run") or {}).get("dashboard_path"))
104
- or "artifacts/baselines/openai_fixed_levels_dashboard.html"
 
 
 
 
 
 
 
 
 
 
105
  )
106
- )
107
- if dashboard_path.exists():
108
- snapshot["dashboard_path"] = str(dashboard_path)
109
- snapshot["summary"] = dict(baseline_payload["summary"])
110
- snapshot["source"] = "baseline_output"
111
- return snapshot
112
 
113
- evaluation_payload = _load_json(LATEST_EVALUATION_OUTPUT)
114
- if evaluation_payload is not None and isinstance(evaluation_payload.get("summary"), dict):
115
  env = _build_environment()
116
  dashboard_path = export_dashboard(
117
  env=env,
118
- evaluation=evaluation_payload,
119
  leaderboard_records=[],
120
  output_path=str(SPACE_DASHBOARD),
121
  )
122
- snapshot["summary"] = dict(evaluation_payload["summary"])
123
  snapshot["dashboard_path"] = dashboard_path
124
- snapshot["source"] = "latest_evaluation"
125
  return snapshot
126
 
127
  preview = _preview_snapshot()
 
37
  return payload if isinstance(payload, dict) else None
38
 
39
 
40
+ def _path_mtime(path: Path) -> float:
41
+ try:
42
+ return path.stat().st_mtime
43
+ except OSError:
44
+ return 0.0
45
+
46
+
47
  def _build_environment() -> OSINTEnvironment:
48
  shared = load_shared_config(SPACE_CONFIG_PATH)
49
  env_cfg = clone_environment_config(shared.environment)
 
104
  snapshot = dict(_base_environment_snapshot())
105
 
106
  baseline_payload = _load_json(LATEST_BASELINE_OUTPUT)
107
+ evaluation_payload = _load_json(LATEST_EVALUATION_OUTPUT)
108
+
109
+ candidates: list[tuple[float, str, dict[str, Any]]] = []
110
  if baseline_payload is not None and isinstance(baseline_payload.get("summary"), dict):
111
+ candidates.append((_path_mtime(LATEST_BASELINE_OUTPUT), "baseline_output", baseline_payload))
112
+ if evaluation_payload is not None and isinstance(evaluation_payload.get("summary"), dict):
113
+ candidates.append((_path_mtime(LATEST_EVALUATION_OUTPUT), "latest_evaluation", evaluation_payload))
114
+
115
+ if candidates:
116
+ _, source, payload = max(candidates, key=lambda item: item[0])
117
+ snapshot["summary"] = dict(payload["summary"])
118
+ snapshot["source"] = source
119
+ if source == "baseline_output":
120
+ dashboard_path = Path(
121
+ str(
122
+ ((payload.get("run") or {}).get("dashboard_path"))
123
+ or "artifacts/baselines/openai_fixed_levels_dashboard.html"
124
+ )
125
  )
126
+ if dashboard_path.exists():
127
+ snapshot["dashboard_path"] = str(dashboard_path)
128
+ return snapshot
 
 
 
129
 
 
 
130
  env = _build_environment()
131
  dashboard_path = export_dashboard(
132
  env=env,
133
+ evaluation=payload,
134
  leaderboard_records=[],
135
  output_path=str(SPACE_DASHBOARD),
136
  )
 
137
  snapshot["dashboard_path"] = dashboard_path
 
138
  return snapshot
139
 
140
  preview = _preview_snapshot()
src/osint_env/baselines/openai_runner.py CHANGED
@@ -22,6 +22,9 @@ Available actions are provided as function tools. On every turn, call exactly on
22
  Rules:
23
  - Solve the question using only tool outputs and the current graph snapshot.
24
  - When you have enough evidence, call submit_answer with the exact node id string.
 
 
 
25
  - Use add_edge only for relationships strongly supported by the evidence you have already collected.
26
  - Prefer concise, high-signal tool queries.
27
  - Never guess free-form prose when a node id answer is required.
@@ -74,13 +77,19 @@ def build_action_tools() -> list[dict[str, Any]]:
74
  return [
75
  _tool_schema(
76
  "search_posts",
77
- "Search microblog posts by substring query.",
78
  {"query": {"type": "string", "description": "Substring to search for in post text."}},
79
  ["query"],
80
  ),
 
 
 
 
 
 
81
  _tool_schema(
82
  "get_user_posts",
83
- "Fetch posts authored by a user or alias id.",
84
  {"user_id": {"type": "string", "description": "User or alias node id."}},
85
  ["user_id"],
86
  ),
@@ -110,13 +119,13 @@ def build_action_tools() -> list[dict[str, Any]]:
110
  ),
111
  _tool_schema(
112
  "get_profile",
113
- "Fetch a profile record by canonical user id.",
114
- {"user_id": {"type": "string", "description": "Canonical user node id."}},
115
  ["user_id"],
116
  ),
117
  _tool_schema(
118
  "search_people",
119
- "Search profiles by name and or organization.",
120
  {
121
  "name": {"type": "string", "description": "Optional name substring.", "default": ""},
122
  "org": {"type": "string", "description": "Optional organization substring.", "default": ""},
@@ -125,8 +134,8 @@ def build_action_tools() -> list[dict[str, Any]]:
125
  ),
126
  _tool_schema(
127
  "get_connections",
128
- "Fetch explicit profile connections for a user.",
129
- {"user_id": {"type": "string", "description": "Canonical user node id."}},
130
  ["user_id"],
131
  ),
132
  _tool_schema(
@@ -284,11 +293,12 @@ class OpenAIBaselineRunner:
284
 
285
  def _episode(self, env: OSINTEnvironment, episode_index: int) -> tuple[dict[str, Any], dict[str, Any]]:
286
  obs = env.reset()
 
287
  messages: list[dict[str, Any]] = [
288
  {"role": "system", "content": SYSTEM_PROMPT},
289
  {
290
  "role": "user",
291
- "content": json.dumps(_observation_payload(env, obs, env.config.max_steps), indent=2, sort_keys=True),
292
  },
293
  ]
294
 
@@ -322,7 +332,14 @@ class OpenAIBaselineRunner:
322
  }
323
  messages.append({"role": "assistant", "content": content})
324
  messages.append({"role": "tool", "tool_call_id": "fallback_submit", "content": json.dumps(tool_result)})
325
- turn_trace.append({"assistant_content": content, "tool_name": "submit_answer", "args": {"answer": fallback_answer}})
 
 
 
 
 
 
 
326
  break
327
 
328
  tool_call = tool_calls[0]
@@ -360,16 +377,41 @@ class OpenAIBaselineRunner:
360
  }
361
  messages.append(assistant_message)
362
  messages.append({"role": "tool", "tool_call_id": tool_call.id, "content": json.dumps(tool_payload, sort_keys=True)})
363
- turn_trace.append({"assistant_content": content, "tool_name": tool_name, "args": args, "reward": reward, "done": done})
 
 
 
 
 
 
 
 
 
364
 
365
  if not done:
366
  obs, _, done, info = env.step(Action(ActionType.ANSWER, {"answer": "unknown"}))
367
- turn_trace.append({"assistant_content": "", "tool_name": "submit_answer", "args": {"answer": "unknown"}, "reward": 0.0, "done": done})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368
 
369
  info = dict(info)
370
  info["openai_system_fingerprints"] = raw_fingerprints
371
  info["usage"] = usage_totals
372
- return info, {"turns": turn_trace}
373
 
374
  def run(self) -> dict[str, Any]:
375
  env = self._build_environment()
 
22
  Rules:
23
  - Solve the question using only tool outputs and the current graph snapshot.
24
  - When you have enough evidence, call submit_answer with the exact node id string.
25
+ - Questions may contain exact node ids such as alias_*, user_*, post_*, thr_*, org_*, loc_*, and event_*.
26
+ - Prefer direct id lookups when an exact id is present in the question.
27
+ - get_post and get_thread retrieve exact seeded records by id.
28
  - Use add_edge only for relationships strongly supported by the evidence you have already collected.
29
  - Prefer concise, high-signal tool queries.
30
  - Never guess free-form prose when a node id answer is required.
 
77
  return [
78
  _tool_schema(
79
  "search_posts",
80
+ "Search microblog posts by substring over post text, post id, author id, canonical user id, or referenced entity ids/names.",
81
  {"query": {"type": "string", "description": "Substring to search for in post text."}},
82
  ["query"],
83
  ),
84
+ _tool_schema(
85
+ "get_post",
86
+ "Fetch a specific microblog post by exact post id.",
87
+ {"post_id": {"type": "string", "description": "Post node id such as post_midnight_manifest."}},
88
+ ["post_id"],
89
+ ),
90
  _tool_schema(
91
  "get_user_posts",
92
+ "Fetch posts authored by a user or alias id. Alias ids are resolved to the canonical user and vice versa.",
93
  {"user_id": {"type": "string", "description": "User or alias node id."}},
94
  ["user_id"],
95
  ),
 
119
  ),
120
  _tool_schema(
121
  "get_profile",
122
+ "Fetch a profile record by canonical user id or alias id.",
123
+ {"user_id": {"type": "string", "description": "Canonical user node id or alias id."}},
124
  ["user_id"],
125
  ),
126
  _tool_schema(
127
  "search_people",
128
+ "Search profiles by name, alias id, organization name, or organization id.",
129
  {
130
  "name": {"type": "string", "description": "Optional name substring.", "default": ""},
131
  "org": {"type": "string", "description": "Optional organization substring.", "default": ""},
 
134
  ),
135
  _tool_schema(
136
  "get_connections",
137
+ "Fetch explicit profile connections for a user or alias id.",
138
+ {"user_id": {"type": "string", "description": "Canonical user node id or alias id."}},
139
  ["user_id"],
140
  ),
141
  _tool_schema(
 
293
 
294
  def _episode(self, env: OSINTEnvironment, episode_index: int) -> tuple[dict[str, Any], dict[str, Any]]:
295
  obs = env.reset()
296
+ initial_observation = _observation_payload(env, obs, env.config.max_steps)
297
  messages: list[dict[str, Any]] = [
298
  {"role": "system", "content": SYSTEM_PROMPT},
299
  {
300
  "role": "user",
301
+ "content": json.dumps(initial_observation, indent=2, sort_keys=True),
302
  },
303
  ]
304
 
 
332
  }
333
  messages.append({"role": "assistant", "content": content})
334
  messages.append({"role": "tool", "tool_call_id": "fallback_submit", "content": json.dumps(tool_result)})
335
+ turn_trace.append(
336
+ {
337
+ "assistant_content": content,
338
+ "tool_name": "submit_answer",
339
+ "args": {"answer": fallback_answer},
340
+ "tool_payload": tool_result,
341
+ }
342
+ )
343
  break
344
 
345
  tool_call = tool_calls[0]
 
377
  }
378
  messages.append(assistant_message)
379
  messages.append({"role": "tool", "tool_call_id": tool_call.id, "content": json.dumps(tool_payload, sort_keys=True)})
380
+ turn_trace.append(
381
+ {
382
+ "assistant_content": content,
383
+ "tool_name": tool_name,
384
+ "args": args,
385
+ "reward": reward,
386
+ "done": done,
387
+ "tool_payload": tool_payload,
388
+ }
389
+ )
390
 
391
  if not done:
392
  obs, _, done, info = env.step(Action(ActionType.ANSWER, {"answer": "unknown"}))
393
+ final_payload = {
394
+ "submitted_answer": "unknown",
395
+ "reward": 0.0,
396
+ "done": done,
397
+ "observation": _observation_payload(env, obs, env.config.max_steps),
398
+ "info": _safe_info(info),
399
+ }
400
+ turn_trace.append(
401
+ {
402
+ "assistant_content": "",
403
+ "tool_name": "submit_answer",
404
+ "args": {"answer": "unknown"},
405
+ "reward": 0.0,
406
+ "done": done,
407
+ "tool_payload": final_payload,
408
+ }
409
+ )
410
 
411
  info = dict(info)
412
  info["openai_system_fingerprints"] = raw_fingerprints
413
  info["usage"] = usage_totals
414
+ return info, {"initial_observation": initial_observation, "turns": turn_trace}
415
 
416
  def run(self) -> dict[str, Any]:
417
  env = self._build_environment()
src/osint_env/data/generator.py CHANGED
@@ -27,6 +27,7 @@ class PlatformViews:
27
  microblog_posts: list[dict]
28
  forum_threads: list[dict]
29
  profiles: list[dict]
 
30
 
31
 
32
  class DatasetGenerator:
@@ -589,6 +590,13 @@ class DatasetGenerator:
589
  users = [n for n in graph.nodes.values() if n.node_type == NodeType.USER]
590
  aliases = [n for n in graph.nodes.values() if n.node_type == NodeType.ALIAS]
591
  alias_owner = {e.src: e.dst for e in graph.edges if e.rel == "alias_of"}
 
 
 
 
 
 
 
592
 
593
  microblog_posts: list[dict] = []
594
  for i, user in enumerate(users):
@@ -605,11 +613,46 @@ class DatasetGenerator:
605
  "user_id": poster,
606
  "canonical_user": alias_owner.get(poster, user.node_id),
607
  "text": text,
 
 
608
  "mentions": [f"user_{self.rng.randint(0, self.config.n_users - 1)}"],
609
  "timestamp": 1000 + i,
610
  }
611
  )
612
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
613
  forum_threads: list[dict] = []
614
  for i in range(max(8, self.config.n_users // 3)):
615
  author = self.rng.choice(users).node_id
@@ -622,18 +665,60 @@ class DatasetGenerator:
622
  {"user_id": self.rng.choice(users).node_id, "text": "Following this."},
623
  {"user_id": self.rng.choice(users).node_id, "text": "Interesting link."},
624
  ],
 
 
625
  }
626
  )
627
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
628
  profiles: list[dict] = []
629
  for user in users:
630
  conns = [e.dst for e in graph.edges if e.src == user.node_id and e.rel == "connected_to"][:5]
 
 
631
  profiles.append(
632
  {
633
  "user_id": user.node_id,
634
  "name": user.attrs["name"],
635
  "org": user.attrs["org"],
 
636
  "location": user.attrs["location"],
 
 
637
  "connections": conns,
638
  "work_history": [user.attrs["org"]],
639
  }
@@ -645,12 +730,15 @@ class DatasetGenerator:
645
  "user_id": f"noise_{i}",
646
  "name": f"P{self.rng.randint(100,999)}",
647
  "org": self.rng.choice(["Stealth Co", "Unknown Ventures"]),
 
648
  "location": self.rng.choice(["Remote", "Unknown"]),
 
 
649
  "connections": [],
650
  "work_history": [],
651
  }
652
  )
653
- return PlatformViews(microblog_posts, forum_threads, profiles)
654
 
655
  def generate_tasks(self, graph: CanonicalGraph, views: PlatformViews, count: int = 12) -> list[TaskInstance]:
656
  tasks = self._seeded_tasks(graph)
 
27
  microblog_posts: list[dict]
28
  forum_threads: list[dict]
29
  profiles: list[dict]
30
+ alias_lookup: dict[str, str]
31
 
32
 
33
  class DatasetGenerator:
 
590
  users = [n for n in graph.nodes.values() if n.node_type == NodeType.USER]
591
  aliases = [n for n in graph.nodes.values() if n.node_type == NodeType.ALIAS]
592
  alias_owner = {e.src: e.dst for e in graph.edges if e.rel == "alias_of"}
593
+ user_aliases: dict[str, list[str]] = {}
594
+ for alias_id, user_id in alias_owner.items():
595
+ user_aliases.setdefault(user_id, []).append(alias_id)
596
+ node_names = {
597
+ node_id: str((node.attrs or {}).get("name") or (node.attrs or {}).get("handle") or node_id)
598
+ for node_id, node in graph.nodes.items()
599
+ }
600
 
601
  microblog_posts: list[dict] = []
602
  for i, user in enumerate(users):
 
613
  "user_id": poster,
614
  "canonical_user": alias_owner.get(poster, user.node_id),
615
  "text": text,
616
+ "references": [],
617
+ "reference_names": [],
618
  "mentions": [f"user_{self.rng.randint(0, self.config.n_users - 1)}"],
619
  "timestamp": 1000 + i,
620
  }
621
  )
622
 
623
+ authored_posts: dict[str, str] = {}
624
+ post_references: dict[str, list[str]] = {}
625
+ for edge in graph.edges:
626
+ if edge.rel == "authored_post":
627
+ authored_posts[edge.dst] = edge.src
628
+ elif edge.rel == "references" and edge.src.startswith("post_"):
629
+ post_references.setdefault(edge.src, []).append(edge.dst)
630
+
631
+ for post_id, author_id in authored_posts.items():
632
+ refs = post_references.get(post_id, [])
633
+ ref_names = [node_names.get(ref, ref) for ref in refs]
634
+ author_label = node_names.get(author_id, author_id)
635
+ text_parts = [f"{post_id} update from {author_label}"]
636
+ if ref_names:
637
+ text_parts.append("references " + ", ".join(ref_names))
638
+ if refs:
639
+ text_parts.append("ids " + ", ".join(refs))
640
+ post_payload = {
641
+ "post_id": post_id,
642
+ "user_id": author_id,
643
+ "canonical_user": alias_owner.get(author_id, author_id),
644
+ "text": ". ".join(text_parts),
645
+ "references": refs,
646
+ "reference_names": ref_names,
647
+ "mentions": [],
648
+ "timestamp": 5000 + len(microblog_posts),
649
+ }
650
+ existing_idx = next((idx for idx, row in enumerate(microblog_posts) if row["post_id"] == post_id), None)
651
+ if existing_idx is None:
652
+ microblog_posts.append(post_payload)
653
+ else:
654
+ microblog_posts[existing_idx] = post_payload
655
+
656
  forum_threads: list[dict] = []
657
  for i in range(max(8, self.config.n_users // 3)):
658
  author = self.rng.choice(users).node_id
 
665
  {"user_id": self.rng.choice(users).node_id, "text": "Following this."},
666
  {"user_id": self.rng.choice(users).node_id, "text": "Interesting link."},
667
  ],
668
+ "references": [],
669
+ "discusses": [],
670
  }
671
  )
672
 
673
+ authored_threads: dict[str, str] = {}
674
+ thread_refs: dict[str, list[str]] = {}
675
+ thread_discusses: dict[str, list[str]] = {}
676
+ for edge in graph.edges:
677
+ if edge.rel == "authored_thread":
678
+ authored_threads[edge.dst] = edge.src
679
+ elif edge.rel == "references" and edge.src.startswith(("thr_", "thread_")):
680
+ thread_refs.setdefault(edge.src, []).append(edge.dst)
681
+ elif edge.rel == "discusses" and edge.src.startswith(("thr_", "thread_")):
682
+ thread_discusses.setdefault(edge.src, []).append(edge.dst)
683
+
684
+ for thread_id, author_id in authored_threads.items():
685
+ node = graph.nodes.get(thread_id)
686
+ refs = thread_refs.get(thread_id, [])
687
+ discussed = thread_discusses.get(thread_id, [])
688
+ comments = []
689
+ for ref in refs:
690
+ comments.append({"user_id": author_id, "text": f"Reference: {node_names.get(ref, ref)} ({ref})"})
691
+ for item in discussed:
692
+ comments.append({"user_id": author_id, "text": f"Discusses: {node_names.get(item, item)} ({item})"})
693
+ thread_payload = {
694
+ "thread_id": thread_id,
695
+ "topic": str((node.attrs or {}).get("topic", "seeded")) if node else "seeded",
696
+ "author_id": author_id,
697
+ "title": node_names.get(thread_id, thread_id),
698
+ "comments": comments,
699
+ "references": refs,
700
+ "discusses": discussed,
701
+ }
702
+ existing_idx = next((idx for idx, row in enumerate(forum_threads) if row["thread_id"] == thread_id), None)
703
+ if existing_idx is None:
704
+ forum_threads.append(thread_payload)
705
+ else:
706
+ forum_threads[existing_idx] = thread_payload
707
+
708
  profiles: list[dict] = []
709
  for user in users:
710
  conns = [e.dst for e in graph.edges if e.src == user.node_id and e.rel == "connected_to"][:5]
711
+ org_id = next((e.dst for e in graph.edges if e.src == user.node_id and e.rel == "works_at"), "")
712
+ location_id = next((e.dst for e in graph.edges if e.src == user.node_id and e.rel == "located_in"), "")
713
  profiles.append(
714
  {
715
  "user_id": user.node_id,
716
  "name": user.attrs["name"],
717
  "org": user.attrs["org"],
718
+ "org_id": org_id,
719
  "location": user.attrs["location"],
720
+ "location_id": location_id,
721
+ "alias_ids": sorted(user_aliases.get(user.node_id, [])),
722
  "connections": conns,
723
  "work_history": [user.attrs["org"]],
724
  }
 
730
  "user_id": f"noise_{i}",
731
  "name": f"P{self.rng.randint(100,999)}",
732
  "org": self.rng.choice(["Stealth Co", "Unknown Ventures"]),
733
+ "org_id": "",
734
  "location": self.rng.choice(["Remote", "Unknown"]),
735
+ "location_id": "",
736
+ "alias_ids": [],
737
  "connections": [],
738
  "work_history": [],
739
  }
740
  )
741
+ return PlatformViews(microblog_posts, forum_threads, profiles, alias_lookup=alias_owner)
742
 
743
  def generate_tasks(self, graph: CanonicalGraph, views: PlatformViews, count: int = 12) -> list[TaskInstance]:
744
  tasks = self._seeded_tasks(graph)
src/osint_env/platforms/tools.py CHANGED
@@ -9,13 +9,40 @@ from osint_env.data.generator import PlatformViews
9
  class ToolRegistry:
10
  def __init__(self, views: PlatformViews):
11
  self.views = views
 
12
  self._index()
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  def _index(self) -> None:
15
  self.posts_by_user: dict[str, list[dict[str, Any]]] = defaultdict(list)
16
  self.mentions_by_user: dict[str, list[dict[str, Any]]] = defaultdict(list)
 
17
  for post in self.views.microblog_posts:
18
  self.posts_by_user[post["user_id"]].append(post)
 
 
 
19
  for m in post.get("mentions", []):
20
  self.mentions_by_user[m].append(post)
21
 
@@ -36,19 +63,59 @@ class ToolRegistry:
36
 
37
  def search_posts(self, query: str, time_range: tuple[int, int] | None = None) -> dict[str, Any]:
38
  start, end = time_range or (0, 10**9)
 
39
  results = [
40
- p for p in self.views.microblog_posts if query.lower() in p["text"].lower() and start <= p["timestamp"] <= end
 
 
 
 
 
 
 
 
 
 
41
  ]
42
  return {"results": results[:20], "count": len(results)}
43
 
 
 
 
 
44
  def get_user_posts(self, user_id: str) -> dict[str, Any]:
45
- return {"results": self.posts_by_user.get(user_id, []), "count": len(self.posts_by_user.get(user_id, []))}
 
 
 
 
 
 
 
 
 
46
 
47
  def get_mentions(self, user_id: str) -> dict[str, Any]:
48
- return {"results": self.mentions_by_user.get(user_id, []), "count": len(self.mentions_by_user.get(user_id, []))}
 
 
 
 
 
 
 
 
 
49
 
50
  def search_threads(self, topic: str) -> dict[str, Any]:
51
- results = [t for t in self.views.forum_threads if t["topic"] == topic]
 
 
 
 
 
 
 
52
  return {"results": results[:20], "count": len(results)}
53
 
54
  def get_thread(self, thread_id: str) -> dict[str, Any]:
@@ -56,21 +123,46 @@ class ToolRegistry:
56
  return {"result": thread, "found": thread is not None}
57
 
58
  def get_user_activity(self, user_id: str) -> dict[str, Any]:
59
- acts = self.activity_by_user.get(user_id, [])
 
 
 
 
 
 
 
 
60
  return {"results": acts, "count": len(acts)}
61
 
62
  def get_profile(self, user_id: str) -> dict[str, Any]:
63
- profile = self.profiles_by_user.get(user_id)
 
64
  return {"result": profile, "found": profile is not None}
65
 
66
  def search_people(self, name: str | None = None, org: str | None = None) -> dict[str, Any]:
67
  results = self.views.profiles
68
  if name:
69
- results = [p for p in results if name.lower() in p["name"].lower()]
 
 
 
 
 
 
 
70
  if org:
71
- results = [p for p in results if org.lower() in p["org"].lower()]
 
 
 
 
 
 
 
 
72
  return {"results": results[:20], "count": len(results)}
73
 
74
  def get_connections(self, user_id: str) -> dict[str, Any]:
75
- profile = self.profiles_by_user.get(user_id)
 
76
  return {"results": profile["connections"] if profile else [], "count": len(profile["connections"]) if profile else 0}
 
9
  class ToolRegistry:
10
  def __init__(self, views: PlatformViews):
11
  self.views = views
12
+ self.alias_lookup = dict(getattr(views, "alias_lookup", {}))
13
  self._index()
14
 
15
+ @staticmethod
16
+ def _normalize_lookup_token(value: str) -> str:
17
+ token = str(value or "").strip().lower()
18
+ for prefix in ("org_", "loc_", "event_", "post_", "thr_", "thread_", "alias_", "user_"):
19
+ if token.startswith(prefix):
20
+ token = token[len(prefix) :]
21
+ break
22
+ return token.replace("_", " ")
23
+
24
+ def _resolve_user_ids(self, user_id: str) -> list[str]:
25
+ user_id = str(user_id or "").strip()
26
+ if not user_id:
27
+ return []
28
+ resolved = [user_id]
29
+ canonical = self.alias_lookup.get(user_id)
30
+ if canonical and canonical not in resolved:
31
+ resolved.append(canonical)
32
+ for alias_id, owner in self.alias_lookup.items():
33
+ if owner == user_id and alias_id not in resolved:
34
+ resolved.append(alias_id)
35
+ return resolved
36
+
37
  def _index(self) -> None:
38
  self.posts_by_user: dict[str, list[dict[str, Any]]] = defaultdict(list)
39
  self.mentions_by_user: dict[str, list[dict[str, Any]]] = defaultdict(list)
40
+ self.posts_by_id = {post["post_id"]: post for post in self.views.microblog_posts}
41
  for post in self.views.microblog_posts:
42
  self.posts_by_user[post["user_id"]].append(post)
43
+ canonical_user = post.get("canonical_user")
44
+ if canonical_user:
45
+ self.posts_by_user[canonical_user].append(post)
46
  for m in post.get("mentions", []):
47
  self.mentions_by_user[m].append(post)
48
 
 
63
 
64
  def search_posts(self, query: str, time_range: tuple[int, int] | None = None) -> dict[str, Any]:
65
  start, end = time_range or (0, 10**9)
66
+ needle = str(query or "").lower()
67
  results = [
68
+ p
69
+ for p in self.views.microblog_posts
70
+ if start <= p["timestamp"] <= end
71
+ and (
72
+ needle in p["text"].lower()
73
+ or needle in str(p.get("post_id", "")).lower()
74
+ or needle in str(p.get("user_id", "")).lower()
75
+ or needle in str(p.get("canonical_user", "")).lower()
76
+ or any(needle in str(ref).lower() for ref in p.get("references", []))
77
+ or any(needle in str(ref).lower() for ref in p.get("reference_names", []))
78
+ )
79
  ]
80
  return {"results": results[:20], "count": len(results)}
81
 
82
+ def get_post(self, post_id: str) -> dict[str, Any]:
83
+ post = self.posts_by_id.get(post_id)
84
+ return {"result": post, "found": post is not None}
85
+
86
  def get_user_posts(self, user_id: str) -> dict[str, Any]:
87
+ results: list[dict[str, Any]] = []
88
+ seen_post_ids: set[str] = set()
89
+ for resolved_id in self._resolve_user_ids(user_id):
90
+ for post in self.posts_by_user.get(resolved_id, []):
91
+ post_id = str(post.get("post_id", ""))
92
+ if post_id in seen_post_ids:
93
+ continue
94
+ seen_post_ids.add(post_id)
95
+ results.append(post)
96
+ return {"results": results, "count": len(results)}
97
 
98
  def get_mentions(self, user_id: str) -> dict[str, Any]:
99
+ results: list[dict[str, Any]] = []
100
+ seen_post_ids: set[str] = set()
101
+ for resolved_id in self._resolve_user_ids(user_id):
102
+ for post in self.mentions_by_user.get(resolved_id, []):
103
+ post_id = str(post.get("post_id", ""))
104
+ if post_id in seen_post_ids:
105
+ continue
106
+ seen_post_ids.add(post_id)
107
+ results.append(post)
108
+ return {"results": results, "count": len(results)}
109
 
110
  def search_threads(self, topic: str) -> dict[str, Any]:
111
+ needle = str(topic or "").strip().lower()
112
+ results = [
113
+ t
114
+ for t in self.views.forum_threads
115
+ if t["topic"] == topic
116
+ or needle in str(t.get("thread_id", "")).lower()
117
+ or needle in str(t.get("title", "")).lower()
118
+ ]
119
  return {"results": results[:20], "count": len(results)}
120
 
121
  def get_thread(self, thread_id: str) -> dict[str, Any]:
 
123
  return {"result": thread, "found": thread is not None}
124
 
125
  def get_user_activity(self, user_id: str) -> dict[str, Any]:
126
+ acts: list[dict[str, Any]] = []
127
+ seen = set()
128
+ for resolved_id in self._resolve_user_ids(user_id):
129
+ for activity in self.activity_by_user.get(resolved_id, []):
130
+ key = (activity.get("kind"), activity.get("thread_id"))
131
+ if key in seen:
132
+ continue
133
+ seen.add(key)
134
+ acts.append(activity)
135
  return {"results": acts, "count": len(acts)}
136
 
137
  def get_profile(self, user_id: str) -> dict[str, Any]:
138
+ resolved_ids = self._resolve_user_ids(user_id)
139
+ profile = next((self.profiles_by_user.get(candidate) for candidate in resolved_ids if self.profiles_by_user.get(candidate)), None)
140
  return {"result": profile, "found": profile is not None}
141
 
142
  def search_people(self, name: str | None = None, org: str | None = None) -> dict[str, Any]:
143
  results = self.views.profiles
144
  if name:
145
+ name_query = str(name).lower()
146
+ results = [
147
+ p
148
+ for p in results
149
+ if name_query in p["name"].lower()
150
+ or name_query in p["user_id"].lower()
151
+ or any(name_query in alias.lower() for alias in p.get("alias_ids", []))
152
+ ]
153
  if org:
154
+ org_query = str(org).lower()
155
+ normalized_org = self._normalize_lookup_token(org_query)
156
+ results = [
157
+ p
158
+ for p in results
159
+ if org_query in p["org"].lower()
160
+ or org_query in str(p.get("org_id", "")).lower()
161
+ or (normalized_org and normalized_org in p["org"].lower())
162
+ ]
163
  return {"results": results[:20], "count": len(results)}
164
 
165
  def get_connections(self, user_id: str) -> dict[str, Any]:
166
+ resolved_ids = self._resolve_user_ids(user_id)
167
+ profile = next((self.profiles_by_user.get(candidate) for candidate in resolved_ids if self.profiles_by_user.get(candidate)), None)
168
  return {"results": profile["connections"] if profile else [], "count": len(profile["connections"]) if profile else 0}
tests/test_generator.py CHANGED
@@ -66,10 +66,31 @@ def test_generator_outputs():
66
  views = gen.build_platform_views(graph)
67
  tasks = gen.generate_tasks(graph, views, count=5)
68
  assert len(graph.nodes) >= 20
69
- assert len(views.microblog_posts) == 20
70
  assert len(tasks) == 5
71
 
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  def test_graph_generation_uses_parallel_shared_context_workers():
74
  cfg = EnvironmentConfig(n_users=12, seed=9)
75
  cfg.seeding.llm_generate_remaining_graph = True
 
66
  views = gen.build_platform_views(graph)
67
  tasks = gen.generate_tasks(graph, views, count=5)
68
  assert len(graph.nodes) >= 20
69
+ assert len(views.microblog_posts) >= 20
70
  assert len(tasks) == 5
71
 
72
 
73
+ def test_seeded_views_include_seeded_posts_and_threads():
74
+ from osint_env.config import clone_environment_config, load_seeding_config, load_shared_config
75
+
76
+ shared = load_shared_config("datasets/fixed_levels/shared_config_fixed_levels.json")
77
+ cfg = clone_environment_config(shared.environment)
78
+ cfg.seeding = load_seeding_config("datasets/fixed_levels/seed_fixed_levels.json")
79
+ cfg.llm.provider = "mock"
80
+
81
+ gen = DatasetGenerator(cfg)
82
+ graph = gen.build_canonical_graph()
83
+ views = gen.build_platform_views(graph)
84
+
85
+ seeded_post = next((post for post in views.microblog_posts if post["post_id"] == "post_midnight_manifest"), None)
86
+ seeded_thread = next((thread for thread in views.forum_threads if thread["thread_id"] == "thr_supply_leak"), None)
87
+
88
+ assert seeded_post is not None
89
+ assert "loc_dockyard17" in seeded_post["references"]
90
+ assert seeded_thread is not None
91
+ assert "org_northbridge_logistics" in seeded_thread["references"]
92
+
93
+
94
  def test_graph_generation_uses_parallel_shared_context_workers():
95
  cfg = EnvironmentConfig(n_users=12, seed=9)
96
  cfg.seeding.llm_generate_remaining_graph = True
tests/test_openai_baseline.py CHANGED
@@ -7,6 +7,7 @@ def test_openai_baseline_toolset_contains_answer_and_graph_actions():
7
  assert "submit_answer" in names
8
  assert "add_edge" in names
9
  assert "search_memory" in names
 
10
 
11
 
12
  def test_gpt5_request_kwargs_avoid_temperature_and_use_max_completion_tokens():
 
7
  assert "submit_answer" in names
8
  assert "add_edge" in names
9
  assert "search_memory" in names
10
+ assert "get_post" in names
11
 
12
 
13
  def test_gpt5_request_kwargs_avoid_temperature_and_use_max_completion_tokens():
tests/test_server.py CHANGED
@@ -1,5 +1,8 @@
 
 
1
  from fastapi.testclient import TestClient
2
 
 
3
  from server import app
4
 
5
 
@@ -20,3 +23,48 @@ def test_server_environment_metadata():
20
  assert "observation_space" in body
21
  assert "summary" in body
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
  from fastapi.testclient import TestClient
4
 
5
+ import server
6
  from server import app
7
 
8
 
 
23
  assert "observation_space" in body
24
  assert "summary" in body
25
 
26
+
27
+ def test_space_snapshot_prefers_newer_evaluation_payload(tmp_path, monkeypatch):
28
+ baseline_path = tmp_path / "baseline.json"
29
+ evaluation_path = tmp_path / "evaluation.json"
30
+ baseline_dashboard = tmp_path / "baseline_dashboard.html"
31
+ space_dashboard = tmp_path / "space_dashboard.html"
32
+
33
+ baseline_path.write_text(
34
+ json.dumps(
35
+ {
36
+ "run": {"dashboard_path": str(baseline_dashboard)},
37
+ "summary": {"leaderboard_score": 0.1, "task_success_rate": 0.1},
38
+ }
39
+ ),
40
+ encoding="utf-8",
41
+ )
42
+ baseline_dashboard.write_text("<html>baseline</html>", encoding="utf-8")
43
+ evaluation_path.write_text(
44
+ json.dumps({"summary": {"leaderboard_score": 0.9, "task_success_rate": 0.9}, "episodes": []}),
45
+ encoding="utf-8",
46
+ )
47
+ space_dashboard.write_text("<html>space</html>", encoding="utf-8")
48
+
49
+ monkeypatch.setattr(server, "LATEST_BASELINE_OUTPUT", baseline_path)
50
+ monkeypatch.setattr(server, "LATEST_EVALUATION_OUTPUT", evaluation_path)
51
+ monkeypatch.setattr(server, "SPACE_DASHBOARD", space_dashboard)
52
+ monkeypatch.setattr(
53
+ server,
54
+ "_base_environment_snapshot",
55
+ lambda: {
56
+ "task_count": 30,
57
+ "difficulty_counts": {},
58
+ "action_space": ["CALL_TOOL", "ADD_EDGE", "ANSWER"],
59
+ "observation_space": {},
60
+ "task_types": [],
61
+ "config": {},
62
+ },
63
+ )
64
+ monkeypatch.setattr(server, "_build_environment", lambda: object())
65
+ monkeypatch.setattr(server, "export_dashboard", lambda env, evaluation, leaderboard_records, output_path: str(space_dashboard))
66
+
67
+ snapshot = server._space_snapshot()
68
+ assert snapshot["source"] == "latest_evaluation"
69
+ assert snapshot["summary"]["leaderboard_score"] == 0.9
70
+ assert snapshot["dashboard_path"] == str(space_dashboard)
tests/test_tools.py CHANGED
@@ -1,5 +1,7 @@
 
1
  from osint_env.data.generator import DatasetGenerator
2
  from osint_env.domain.models import EnvironmentConfig
 
3
  from osint_env.platforms.tools import ToolRegistry
4
 
5
 
@@ -13,3 +15,25 @@ def test_tools_basics():
13
  profile_any = next(iter([p["user_id"] for p in views.profiles if p["user_id"].startswith("user_")]))
14
  profile = tools.get_profile(profile_any)
15
  assert profile["found"] is True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from osint_env.config import clone_environment_config, load_seeding_config, load_shared_config
2
  from osint_env.data.generator import DatasetGenerator
3
  from osint_env.domain.models import EnvironmentConfig
4
+ from osint_env.env.environment import OSINTEnvironment
5
  from osint_env.platforms.tools import ToolRegistry
6
 
7
 
 
15
  profile_any = next(iter([p["user_id"] for p in views.profiles if p["user_id"].startswith("user_")]))
16
  profile = tools.get_profile(profile_any)
17
  assert profile["found"] is True
18
+
19
+
20
+ def test_seeded_tools_expose_seed_question_entities():
21
+ shared = load_shared_config("datasets/fixed_levels/shared_config_fixed_levels.json")
22
+ env_cfg = clone_environment_config(shared.environment)
23
+ env_cfg.seeding = load_seeding_config("datasets/fixed_levels/seed_fixed_levels.json")
24
+ env_cfg.llm.provider = "mock"
25
+ env = OSINTEnvironment(env_cfg)
26
+ tools = env.tools
27
+
28
+ post = tools.get_post("post_midnight_manifest")
29
+ assert post["found"] is True
30
+ assert "loc_dockyard17" in post["result"]["references"]
31
+
32
+ people = tools.search_people(org="org_northbridge_logistics")
33
+ user_ids = {row["user_id"] for row in people["results"]}
34
+ assert "user_bharat" in user_ids
35
+ assert "user_hiro" in user_ids
36
+
37
+ alias_profile = tools.get_profile("alias_docksparrow")
38
+ assert alias_profile["found"] is True
39
+ assert alias_profile["result"]["user_id"] == "user_hiro"