Spaces:
Sleeping
Sleeping
siddeshwar-kagatikar commited on
Commit ·
dbcbf00
1
Parent(s): 9e6be29
tested using openai
Browse files- .dockerignore +9 -0
- Dockerfile +28 -0
- README.md +138 -269
- pyproject.toml +12 -2
- scripts/run_openai_baseline.py +60 -0
- server.py +221 -0
- src/osint_env/baselines/__init__.py +4 -0
- src/osint_env/baselines/openai_runner.py +480 -0
- src/osint_env/cli.py +1 -1
- src/osint_env/env/environment.py +1 -2
- src/osint_env/env/openenv_compat.py +20 -0
- tests/conftest.py +12 -0
- tests/test_openai_baseline.py +19 -0
- tests/test_server.py +22 -0
.dockerignore
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.git
|
| 2 |
+
.pytest_cache
|
| 3 |
+
__pycache__
|
| 4 |
+
*.pyc
|
| 5 |
+
*.pyo
|
| 6 |
+
*.pyd
|
| 7 |
+
.Python
|
| 8 |
+
artifacts
|
| 9 |
+
tests
|
Dockerfile
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.12-slim
|
| 2 |
+
|
| 3 |
+
RUN useradd -m -u 1000 user
|
| 4 |
+
|
| 5 |
+
USER user
|
| 6 |
+
ENV HOME=/home/user \
|
| 7 |
+
PATH=/home/user/.local/bin:$PATH \
|
| 8 |
+
PYTHONDONTWRITEBYTECODE=1 \
|
| 9 |
+
PYTHONUNBUFFERED=1 \
|
| 10 |
+
PORT=7860
|
| 11 |
+
|
| 12 |
+
WORKDIR $HOME/app
|
| 13 |
+
|
| 14 |
+
COPY --chown=user pyproject.toml README.md $HOME/app/
|
| 15 |
+
COPY --chown=user src $HOME/app/src
|
| 16 |
+
COPY --chown=user config $HOME/app/config
|
| 17 |
+
COPY --chown=user datasets $HOME/app/datasets
|
| 18 |
+
COPY --chown=user docs $HOME/app/docs
|
| 19 |
+
COPY --chown=user scripts $HOME/app/scripts
|
| 20 |
+
COPY --chown=user server.py $HOME/app/server.py
|
| 21 |
+
|
| 22 |
+
RUN pip install --no-cache-dir --upgrade pip && \
|
| 23 |
+
pip install --no-cache-dir -e .
|
| 24 |
+
|
| 25 |
+
EXPOSE 7860
|
| 26 |
+
|
| 27 |
+
CMD ["sh", "-c", "uvicorn server:app --host 0.0.0.0 --port ${PORT:-7860}"]
|
| 28 |
+
|
README.md
CHANGED
|
@@ -1,331 +1,200 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
-
|
| 4 |
|
| 5 |
-
|
| 6 |
|
| 7 |
-
|
| 8 |
|
| 9 |
-
|
| 10 |
|
| 11 |
-
|
| 12 |
-
2. Project noisy partial views into mock platforms (microblog, forum, profile).
|
| 13 |
-
3. Ask identity-resolution, network-discovery, and event-tracing questions.
|
| 14 |
-
4. Let agents call tools, add graph edges, and submit answers.
|
| 15 |
-
5. Score episodes using a composite reward that combines correctness, retrieval utility, graph quality, and efficiency.
|
| 16 |
|
| 17 |
-
The
|
| 18 |
|
| 19 |
-
|
| 20 |
|
| 21 |
-
|
| 22 |
|
| 23 |
-
-
|
| 24 |
-
-
|
| 25 |
-
-
|
| 26 |
-
- Seeded questions from user-provided JSON.
|
| 27 |
-
- LLM-assisted generation hooks for remaining graph/task expansion with deterministic fallback.
|
| 28 |
-
- Persistent benchmark leaderboard with composite utility score.
|
| 29 |
-
- Interactive dashboard showing:
|
| 30 |
-
- canonical graph,
|
| 31 |
-
- episode graph diff (predicted vs truth),
|
| 32 |
-
- source database explorer,
|
| 33 |
-
- benchmark charts and leaderboard.
|
| 34 |
|
| 35 |
-
##
|
| 36 |
|
| 37 |
-
|
| 38 |
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
| 41 |
|
| 42 |
-
|
| 43 |
|
| 44 |
-
|
| 45 |
-
uv pip install -e .
|
| 46 |
|
| 47 |
-
|
|
|
|
|
|
|
| 48 |
|
| 49 |
-
|
| 50 |
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
-
|
| 54 |
-
- ollama: local model inference (recommended for offline development).
|
| 55 |
-
- openai: remote API provider using an API key.
|
| 56 |
|
| 57 |
-
|
| 58 |
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
-
|
|
|
|
|
|
|
| 68 |
|
| 69 |
-
|
| 70 |
-
ollama cp qwen3:1.7b qwen3:2b
|
| 71 |
|
| 72 |
-
|
| 73 |
|
| 74 |
-
|
| 75 |
|
| 76 |
-
|
|
|
|
|
|
|
| 77 |
|
| 78 |
-
|
| 79 |
|
| 80 |
-
|
|
|
|
|
|
|
| 81 |
|
| 82 |
-
|
| 83 |
|
| 84 |
-
|
| 85 |
|
| 86 |
-
|
| 87 |
-
|
|
|
|
| 88 |
|
| 89 |
-
|
| 90 |
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
data/ canonical graph, views, and task generation
|
| 95 |
-
domain/ data models and configuration dataclasses
|
| 96 |
-
env/ OpenEnv environment and reward logic
|
| 97 |
-
eval/ metrics, runner, leaderboard
|
| 98 |
-
llm/ LLM client interface and local mock
|
| 99 |
-
memory/ in-memory KG and semantic memory
|
| 100 |
-
platforms/ platform tool APIs
|
| 101 |
-
viz/ dashboard export
|
| 102 |
-
cli.py command-line entrypoint
|
| 103 |
|
| 104 |
-
|
| 105 |
-
shared_config.json shared runtime/environment/swarm/reward config
|
| 106 |
-
seed_example.json example seeded graph and question file
|
| 107 |
|
| 108 |
-
|
|
|
|
|
|
|
| 109 |
|
| 110 |
-
|
| 111 |
|
| 112 |
-
|
| 113 |
|
| 114 |
-
|
| 115 |
-
- swarm limits,
|
| 116 |
-
- spawn reward shaping hyperparameters,
|
| 117 |
-
- seeding defaults,
|
| 118 |
-
- llm backend defaults,
|
| 119 |
-
- runtime output paths.
|
| 120 |
|
| 121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
-
|
| 124 |
-
- max_breadth: 2
|
| 125 |
-
- max_width: 2
|
| 126 |
-
- max_depth: 2
|
| 127 |
|
| 128 |
-
|
|
|
|
|
|
|
|
|
|
| 129 |
|
| 130 |
-
|
| 131 |
|
| 132 |
-
|
| 133 |
|
| 134 |
-
-
|
| 135 |
-
- graph edges,
|
| 136 |
-
- task questions (optionally with answers and supporting edges).
|
| 137 |
|
| 138 |
-
|
|
|
|
|
|
|
| 139 |
|
| 140 |
-
|
| 141 |
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
|
|
|
| 145 |
|
| 146 |
-
|
| 147 |
|
| 148 |
-
|
| 149 |
|
| 150 |
-
-
|
| 151 |
-
-
|
| 152 |
-
-
|
| 153 |
-
-
|
| 154 |
-
- --llm-model to override configured model
|
| 155 |
-
- --ollama-base-url to override local Ollama endpoint
|
| 156 |
-
- --openai-api-key or --openai-api-key-env for OpenAI authentication
|
| 157 |
|
| 158 |
-
|
| 159 |
|
| 160 |
-
|
| 161 |
|
| 162 |
-
|
|
|
|
|
|
|
| 163 |
|
| 164 |
-
|
| 165 |
|
| 166 |
-
|
| 167 |
|
| 168 |
-
|
|
|
|
| 169 |
|
| 170 |
-
|
| 171 |
|
| 172 |
-
|
| 173 |
|
| 174 |
-
|
|
|
|
|
|
|
| 175 |
|
| 176 |
-
|
| 177 |
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
6. Export explorer without full benchmark:
|
| 181 |
-
|
| 182 |
-
osint-env viz --with-demo --output artifacts/osint_explorer.html
|
| 183 |
-
|
| 184 |
-
7. Benchmark with local Qwen model:
|
| 185 |
-
|
| 186 |
-
osint-env benchmark --episodes 20 --agent-mode swarm --llm-provider ollama --llm-model qwen3:2b --name qwen3_swarm
|
| 187 |
-
|
| 188 |
-
8. Fast local smoke benchmark:
|
| 189 |
-
|
| 190 |
-
osint-env benchmark --episodes 1 --agent-mode swarm --llm-provider ollama --llm-model qwen3:2b --seed-file config/seed_ollama_smoke.json --name ollama_qwen_smoke
|
| 191 |
-
|
| 192 |
-
## 8. Multi-Agent Swarm Design
|
| 193 |
-
|
| 194 |
-
Swarm orchestration is implemented in src/osint_env/agents/swarm_agent.py.
|
| 195 |
-
|
| 196 |
-
Design choices:
|
| 197 |
-
|
| 198 |
-
- Shared environment state (single episode state machine).
|
| 199 |
-
- Planner rounds bounded by max_depth and planner_rounds.
|
| 200 |
-
- Parallel workers bounded by min(max_agents, max_breadth, max_width).
|
| 201 |
-
- Each worker performs limited tool calls, then attempts edge addition.
|
| 202 |
-
- Final answer is submitted once planning rounds complete or episode ends.
|
| 203 |
-
|
| 204 |
-
Reward compatibility:
|
| 205 |
-
|
| 206 |
-
- Existing edge and answer reward components are unchanged.
|
| 207 |
-
- Spawn utility is added as an auxiliary term using the PARL-style helper in src/osint_env/env/spawn_reward_hooks.py.
|
| 208 |
-
- Spawn telemetry (count, critical steps, completion) is tracked in episode info and evaluation summaries.
|
| 209 |
-
|
| 210 |
-
## 9. Reward Design (Integrated Notes)
|
| 211 |
-
|
| 212 |
-
The reward function is a composite of graph-construction and answer-time utility terms. It combines ideas from DeepPath, EMNLP 2018 reward shaping, UniRel, and AutoGraph-R1.
|
| 213 |
-
|
| 214 |
-
### 9.1 Edge Reward During Graph Construction
|
| 215 |
-
|
| 216 |
-
For each ADD_EDGE action, the environment combines:
|
| 217 |
-
|
| 218 |
-
1. Global accuracy signal (DeepPath-style positive/negative credit).
|
| 219 |
-
2. Soft shaping term inspired by EMNLP 2018 reward shaping:
|
| 220 |
-
|
| 221 |
-
R = Rb + (1 - Rb) f(s, r, o)
|
| 222 |
-
|
| 223 |
-
where f is approximated in code with relation and type priors plus small domain priors.
|
| 224 |
-
|
| 225 |
-
3. Efficiency bonus inversely proportional to step count.
|
| 226 |
-
4. Diversity bonus using signature novelty against previous edges.
|
| 227 |
-
5. Relation informativeness using normalized relation IDF.
|
| 228 |
-
6. Entity informativeness using inverse hubness penalty.
|
| 229 |
-
7. Connectivity gain bonus for bridge-style edges.
|
| 230 |
-
|
| 231 |
-
### 9.2 Final Answer Reward
|
| 232 |
-
|
| 233 |
-
For ANSWER, reward includes:
|
| 234 |
-
|
| 235 |
-
1. format validity,
|
| 236 |
-
2. correctness,
|
| 237 |
-
3. knowledge-carrying utility (AutoGraph-style deducibility),
|
| 238 |
-
4. knowledge-indexing utility (AutoGraph-style evidence coverage proxy over tool outputs),
|
| 239 |
-
5. UniRel-style connectivity score over seed entities,
|
| 240 |
-
6. graph F1 against supporting edges,
|
| 241 |
-
7. compactness and repetition controls,
|
| 242 |
-
8. efficiency and informativeness terms.
|
| 243 |
-
|
| 244 |
-
### 9.3 Swarm Auxiliary Reward
|
| 245 |
-
|
| 246 |
-
The swarm runner adds a PARL-style auxiliary term based on:
|
| 247 |
-
|
| 248 |
-
- spawn parallelism,
|
| 249 |
-
- finished subtask ratio,
|
| 250 |
-
- critical-step latency proxy,
|
| 251 |
-
- optional breadth and depth shaping.
|
| 252 |
-
|
| 253 |
-
This auxiliary term is configurable in shared_config.json via spawn_reward.
|
| 254 |
-
|
| 255 |
-
### 9.4 Benchmark Metrics
|
| 256 |
-
|
| 257 |
-
Evaluation tracks:
|
| 258 |
-
|
| 259 |
-
- task success,
|
| 260 |
-
- graph F1,
|
| 261 |
-
- deanonymization accuracy,
|
| 262 |
-
- tool efficiency,
|
| 263 |
-
- retrieval and structural utility signals,
|
| 264 |
-
- spawn signals (for swarm runs),
|
| 265 |
-
- composite leaderboard score.
|
| 266 |
-
|
| 267 |
-
## 10. Interactive Dashboard
|
| 268 |
-
|
| 269 |
-
Dashboard export includes:
|
| 270 |
-
|
| 271 |
-
- canonical graph explorer,
|
| 272 |
-
- episode graph comparison,
|
| 273 |
-
- node and edge inspectors,
|
| 274 |
-
- source database table with record detail pane,
|
| 275 |
-
- reward and graph traces,
|
| 276 |
-
- sortable leaderboard snapshot.
|
| 277 |
-
|
| 278 |
-
Primary outputs:
|
| 279 |
-
|
| 280 |
-
- artifacts/osint_dashboard.html
|
| 281 |
-
- artifacts/osint_explorer.html
|
| 282 |
-
- artifacts/sweep_dashboards/*.html
|
| 283 |
-
|
| 284 |
-
## 11. Notes On LLM Generation
|
| 285 |
-
|
| 286 |
-
Dataset generation supports an LLM-assisted expansion path for remaining tasks and graph edges.
|
| 287 |
-
|
| 288 |
-
If no model is connected or structured output is unavailable, deterministic template fallback is used. This preserves reproducibility while keeping the interface compatible with stronger local or remote LLMs.
|
| 289 |
-
|
| 290 |
-
## 12. Citation And Source Papers
|
| 291 |
-
|
| 292 |
-
Reward components and swarm hooks are informed by the following papers:
|
| 293 |
-
|
| 294 |
-
1. AutoGraph-R1: Enhancing Agentic RAG with Graph-R1 for Complex QA.
|
| 295 |
-
arXiv: https://arxiv.org/abs/2510.15339
|
| 296 |
-
|
| 297 |
-
2. UniRel: Graph-based Relational Retrieval for LLM Reasoning.
|
| 298 |
-
arXiv: https://arxiv.org/abs/2512.17043
|
| 299 |
-
|
| 300 |
-
3. DeepPath: A Reinforcement Learning Method for Knowledge Graph Reasoning.
|
| 301 |
-
EMNLP 2017: https://aclanthology.org/D17-1060/
|
| 302 |
-
|
| 303 |
-
4. Multi-Hop Knowledge Graph Reasoning with Reward Shaping.
|
| 304 |
-
EMNLP 2018: https://aclanthology.org/D18-1362/
|
| 305 |
-
|
| 306 |
-
5. Kimi K2.5 (PARL-style multi-agent shaping motivation).
|
| 307 |
-
arXiv: https://arxiv.org/abs/2602.02276
|
| 308 |
-
|
| 309 |
-
Additional context:
|
| 310 |
-
|
| 311 |
-
6. MINERVA: Reinforcement Learning for Query Answering over Knowledge Graphs.
|
| 312 |
-
arXiv: https://arxiv.org/abs/1711.05851
|
| 313 |
-
|
| 314 |
-
## 13. Development And Testing
|
| 315 |
-
|
| 316 |
-
Run tests from project root:
|
| 317 |
-
|
| 318 |
-
pytest -q
|
| 319 |
-
|
| 320 |
-
Recommended validation after config changes:
|
| 321 |
-
|
| 322 |
-
1. osint-env demo --agent-mode swarm
|
| 323 |
-
2. osint-env eval --episodes 5
|
| 324 |
-
3. osint-env benchmark --episodes 5 --name quick_check
|
| 325 |
-
4. osint-env leaderboard --top 5
|
| 326 |
-
|
| 327 |
-
## 14. Scope Boundaries
|
| 328 |
-
|
| 329 |
-
- This repository supports a low-width swarm baseline and reward-compatible orchestration.
|
| 330 |
-
- It does not include a full distributed training stack or asynchronous external worker runtime.
|
| 331 |
-
- The architecture keeps those extensions possible without breaking current interfaces.
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: OSINT OpenEnv
|
| 3 |
+
emoji: 🕵️
|
| 4 |
+
colorFrom: teal
|
| 5 |
+
colorTo: amber
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
+
tags:
|
| 9 |
+
- openenv
|
| 10 |
+
- osint
|
| 11 |
+
- benchmark
|
| 12 |
+
- docker
|
| 13 |
+
- fastapi
|
| 14 |
+
short_description: Containerized OpenEnv-compatible OSINT benchmark with fixed-level tasks, dashboard export, and an OpenAI baseline runner.
|
| 15 |
+
---
|
| 16 |
|
| 17 |
+
# OSINT OpenEnv
|
| 18 |
|
| 19 |
+
OSINT OpenEnv is a synthetic benchmark environment for tool-using agents that must recover identities, trace events, and link entities across noisy multi-platform records. The project is designed to feel like a compact OSINT workflow rather than a raw QA dataset: agents query mock profiles, posts, forum threads, and semantic memory, build a working graph, and then submit an answer.
|
| 20 |
|
| 21 |
+
The motivation is to provide a reproducible OpenEnv-compatible environment for evaluating graph-building and tool-using reasoning without depending on live web data, unstable APIs, or private corpora. That makes it useful for local development, regression testing, and hosted demos such as a Docker-based Hugging Face Space.
|
| 22 |
|
| 23 |
+
## Environment Summary
|
| 24 |
|
| 25 |
+
The environment generates or loads a hidden canonical graph of users, aliases, organizations, locations, posts, threads, and events. It then exposes partial platform views and a task list drawn from that graph.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
+
The default hosted Space uses the fixed-level benchmark in [`datasets/fixed_levels/seed_fixed_levels.json`](/c:/Users/SIDDESHWAR/Desktop/meta/OSINT_env/datasets/fixed_levels/seed_fixed_levels.json), which contains 15 stable tasks over one shared seeded graph.
|
| 28 |
|
| 29 |
+
## Action Space
|
| 30 |
|
| 31 |
+
The environment exposes three actions:
|
| 32 |
|
| 33 |
+
- `CALL_TOOL`: query platform views or semantic memory such as `search_posts`, `get_profile`, `search_threads`, `get_connections`, or `search_memory`.
|
| 34 |
+
- `ADD_EDGE`: add a candidate relation to the working memory graph.
|
| 35 |
+
- `ANSWER`: submit the final answer as an exact node id string.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
+
## Observation Space
|
| 38 |
|
| 39 |
+
Each step returns a JSON observation with four parts:
|
| 40 |
|
| 41 |
+
- `tool_outputs`: the most recent tool results.
|
| 42 |
+
- `graph_snapshot`: the current working-memory graph edges.
|
| 43 |
+
- `action_history`: recent actions and rewards.
|
| 44 |
+
- `task`: the active task id, task type, and question.
|
| 45 |
|
| 46 |
+
## Task Types And Difficulty
|
| 47 |
|
| 48 |
+
The benchmark mixes direct lookups with multi-hop traces:
|
|
|
|
| 49 |
|
| 50 |
+
- Easy: single-hop identity resolution, organization lookup, event lookup, or location lookup.
|
| 51 |
+
- Mid: two-hop alias-to-user-to-organization or thread-to-event-to-user traces.
|
| 52 |
+
- High: cross-platform multi-hop traces combining aliases, authored content, event references, organization links, and direct connections.
|
| 53 |
|
| 54 |
+
Common task families include:
|
| 55 |
|
| 56 |
+
- `identity_resolution`
|
| 57 |
+
- `network_discovery`
|
| 58 |
+
- `event_tracing`
|
| 59 |
+
- `cross_platform_linking`
|
| 60 |
+
- `deanonymization`
|
| 61 |
+
- `convoluted_trace`
|
| 62 |
|
| 63 |
+
Expected difficulty increases with the number of relations the agent must chain together and whether the evidence is split across posts, threads, aliases, and profile edges.
|
|
|
|
|
|
|
| 64 |
|
| 65 |
+
## Repository Layout
|
| 66 |
|
| 67 |
+
```text
|
| 68 |
+
src/osint_env/
|
| 69 |
+
agents/ single-agent and swarm runners
|
| 70 |
+
baselines/ reusable OpenAI baseline runner
|
| 71 |
+
config/ shared config and seed loading
|
| 72 |
+
data/ graph/view/task generation
|
| 73 |
+
domain/ dataclasses and environment models
|
| 74 |
+
env/ environment, reward logic, OpenEnv compatibility shim
|
| 75 |
+
eval/ evaluation metrics and leaderboard helpers
|
| 76 |
+
llm/ mock, Ollama, and OpenAI client wrappers
|
| 77 |
+
memory/ working graph and semantic memory
|
| 78 |
+
platforms/ tool APIs over synthetic platform views
|
| 79 |
+
viz/ dashboard export
|
| 80 |
|
| 81 |
+
scripts/
|
| 82 |
+
build_fixed_levels_dataset.py
|
| 83 |
+
run_openai_baseline.py
|
| 84 |
|
| 85 |
+
datasets/fixed_levels/
|
| 86 |
+
seed_fixed_levels.json
|
| 87 |
+
shared_config_fixed_levels.json
|
| 88 |
+
qwen_swarm_benchmark_fixed_levels.json
|
| 89 |
|
| 90 |
+
server.py FastAPI app for local use and Docker/HF Spaces
|
| 91 |
+
Dockerfile Container entrypoint for Hugging Face Docker Spaces
|
| 92 |
+
```
|
| 93 |
|
| 94 |
+
## Setup
|
|
|
|
| 95 |
|
| 96 |
+
Python 3.10+ is required.
|
| 97 |
|
| 98 |
+
Local install:
|
| 99 |
|
| 100 |
+
```bash
|
| 101 |
+
python -m pip install -e .
|
| 102 |
+
```
|
| 103 |
|
| 104 |
+
Run tests:
|
| 105 |
|
| 106 |
+
```bash
|
| 107 |
+
python -m pytest -q
|
| 108 |
+
```
|
| 109 |
|
| 110 |
+
## Usage
|
| 111 |
|
| 112 |
+
Run one demo episode:
|
| 113 |
|
| 114 |
+
```bash
|
| 115 |
+
osint-env demo --agent-mode swarm --llm-provider mock
|
| 116 |
+
```
|
| 117 |
|
| 118 |
+
Run a quick evaluation:
|
| 119 |
|
| 120 |
+
```bash
|
| 121 |
+
osint-env eval --episodes 5 --agent-mode swarm --llm-provider mock
|
| 122 |
+
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
+
Export a dashboard:
|
|
|
|
|
|
|
| 125 |
|
| 126 |
+
```bash
|
| 127 |
+
osint-env benchmark --episodes 5 --agent-mode swarm --llm-provider mock --name quick_check
|
| 128 |
+
```
|
| 129 |
|
| 130 |
+
## OpenAI Baseline
|
| 131 |
|
| 132 |
+
The reproducible OpenAI baseline is implemented in [`scripts/run_openai_baseline.py`](/c:/Users/SIDDESHWAR/Desktop/meta/OSINT_env/scripts/run_openai_baseline.py). It runs on the fixed-level benchmark, uses a stable seeded graph/task set, writes a JSON artifact, appends a leaderboard record, and exports a dashboard.
|
| 133 |
|
| 134 |
+
Default behavior:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
+
- dataset: fixed-level benchmark
|
| 137 |
+
- episodes: 15
|
| 138 |
+
- max steps per episode: 8
|
| 139 |
+
- temperature: 0.0
|
| 140 |
+
- output artifact: `artifacts/baselines/openai_fixed_levels_latest.json`
|
| 141 |
|
| 142 |
+
Run it with an API key:
|
|
|
|
|
|
|
|
|
|
| 143 |
|
| 144 |
+
```bash
|
| 145 |
+
export OPENAI_API_KEY="your_key_here"
|
| 146 |
+
python scripts/run_openai_baseline.py --model gpt-4o-mini
|
| 147 |
+
```
|
| 148 |
|
| 149 |
+
The script is designed to stay bounded enough for a normal benchmark pass to finish comfortably under 20 minutes on a lightweight chat model, while still using the full fixed task set. For repeatability it fixes the benchmark graph/tasks and uses deterministic decoding settings. Because remote model backends can still change over time, the output artifact also records model metadata and system fingerprints when available.
|
| 150 |
|
| 151 |
+
## Docker And Hugging Face Space
|
| 152 |
|
| 153 |
+
The repository is ready for a Docker-based Hugging Face Space:
|
|
|
|
|
|
|
| 154 |
|
| 155 |
+
- `README.md` includes `sdk: docker`
|
| 156 |
+
- `README.md` includes the `openenv` Space tag
|
| 157 |
+
- `Dockerfile` serves [`server.py`](/c:/Users/SIDDESHWAR/Desktop/meta/OSINT_env/server.py) on port `7860`
|
| 158 |
|
| 159 |
+
Local Docker smoke test:
|
| 160 |
|
| 161 |
+
```bash
|
| 162 |
+
docker build -t osint-openenv .
|
| 163 |
+
docker run --rm -p 7860:7860 osint-openenv
|
| 164 |
+
```
|
| 165 |
|
| 166 |
+
Then open `http://localhost:7860`.
|
| 167 |
|
| 168 |
+
The FastAPI app serves:
|
| 169 |
|
| 170 |
+
- `/`: overview page
|
| 171 |
+
- `/dashboard`: generated benchmark dashboard
|
| 172 |
+
- `/api/environment`: environment metadata
|
| 173 |
+
- `/healthz`: health check
|
|
|
|
|
|
|
|
|
|
| 174 |
|
| 175 |
+
## Baseline Scores
|
| 176 |
|
| 177 |
+
Bundled fixed-level baseline artifact:
|
| 178 |
|
| 179 |
+
| baseline | provider | model | episodes | task success | avg graph f1 | leaderboard score |
|
| 180 |
+
|---|---|---|---:|---:|---:|---:|
|
| 181 |
+
| `fixed_levels_qwen_swarm` | Ollama | `qwen3:2b` | 15 | 1.000 | 0.849 | 0.854 |
|
| 182 |
|
| 183 |
+
Source: [`datasets/fixed_levels/qwen_swarm_benchmark_fixed_levels.json`](/c:/Users/SIDDESHWAR/Desktop/meta/OSINT_env/datasets/fixed_levels/qwen_swarm_benchmark_fixed_levels.json)
|
| 184 |
|
| 185 |
+
After you supply an OpenAI API key, the matching baseline scores will be written to:
|
| 186 |
|
| 187 |
+
- [`artifacts/baselines/openai_fixed_levels_latest.json`](/c:/Users/SIDDESHWAR/Desktop/meta/OSINT_env/artifacts/baselines/openai_fixed_levels_latest.json)
|
| 188 |
+
- [`artifacts/baselines/openai_fixed_levels_dashboard.html`](/c:/Users/SIDDESHWAR/Desktop/meta/OSINT_env/artifacts/baselines/openai_fixed_levels_dashboard.html)
|
| 189 |
|
| 190 |
+
## Notes On `pyproject.toml`
|
| 191 |
|
| 192 |
+
The packaging file is structurally correct for a `src/` layout and editable installs. The main gaps were deployment/runtime related rather than build-breaking:
|
| 193 |
|
| 194 |
+
- `openenv` is now version-bounded explicitly.
|
| 195 |
+
- `fastapi` and `uvicorn` are included because the repo now ships a real web server.
|
| 196 |
+
- pytest is pointed at the `tests/` directory, and the test suite also adds `src/` to `sys.path` so source-layout imports work reliably during local runs.
|
| 197 |
|
| 198 |
+
## Development Notes
|
| 199 |
|
| 200 |
+
The project keeps a lightweight local compatibility shim for `openenv` so the source tree remains importable even before dependencies are installed. In a normal install or Docker build, the real `openenv` package from PyPI is still used.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pyproject.toml
CHANGED
|
@@ -5,9 +5,16 @@ description = "OSINT-style multi-platform information ecosystem environment for
|
|
| 5 |
readme = "README.md"
|
| 6 |
requires-python = ">=3.10"
|
| 7 |
dependencies = [
|
| 8 |
-
"openenv",
|
| 9 |
"openai>=1.40.0",
|
| 10 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
]
|
| 12 |
|
| 13 |
[project.scripts]
|
|
@@ -22,3 +29,6 @@ package-dir = {"" = "src"}
|
|
| 22 |
|
| 23 |
[tool.setuptools.packages.find]
|
| 24 |
where = ["src"]
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
readme = "README.md"
|
| 6 |
requires-python = ">=3.10"
|
| 7 |
dependencies = [
|
| 8 |
+
"openenv>=0.1.13",
|
| 9 |
"openai>=1.40.0",
|
| 10 |
+
"fastapi>=0.115.0",
|
| 11 |
+
"requests>=2.32.3",
|
| 12 |
+
"uvicorn>=0.30.0",
|
| 13 |
+
]
|
| 14 |
+
|
| 15 |
+
[project.optional-dependencies]
|
| 16 |
+
dev = [
|
| 17 |
+
"pytest>=8.0.0",
|
| 18 |
]
|
| 19 |
|
| 20 |
[project.scripts]
|
|
|
|
| 29 |
|
| 30 |
[tool.setuptools.packages.find]
|
| 31 |
where = ["src"]
|
| 32 |
+
|
| 33 |
+
[tool.pytest.ini_options]
|
| 34 |
+
testpaths = ["tests"]
|
scripts/run_openai_baseline.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import json
|
| 5 |
+
import os
|
| 6 |
+
|
| 7 |
+
from osint_env.baselines import OpenAIBaselineConfig, OpenAIBaselineRunner
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def build_parser() -> argparse.ArgumentParser:
|
| 11 |
+
parser = argparse.ArgumentParser(description="Run the reproducible OpenAI baseline on the fixed-level OSINT benchmark.")
|
| 12 |
+
parser.add_argument("--config", default="datasets/fixed_levels/shared_config_fixed_levels.json", help="Shared config JSON.")
|
| 13 |
+
parser.add_argument("--seed-file", default="datasets/fixed_levels/seed_fixed_levels.json", help="Fixed seed file JSON.")
|
| 14 |
+
parser.add_argument("--output", default="artifacts/baselines/openai_fixed_levels_latest.json", help="Baseline result JSON output path.")
|
| 15 |
+
parser.add_argument("--leaderboard", default="artifacts/baselines/openai_fixed_levels_leaderboard.json", help="Leaderboard JSON path.")
|
| 16 |
+
parser.add_argument("--dashboard", default="artifacts/baselines/openai_fixed_levels_dashboard.html", help="Dashboard HTML path.")
|
| 17 |
+
parser.add_argument("--run-name", default="openai_fixed_levels_baseline", help="Leaderboard run name.")
|
| 18 |
+
parser.add_argument("--model", default="gpt-4o-mini", help="OpenAI chat model name.")
|
| 19 |
+
parser.add_argument("--openai-base-url", default="https://api.openai.com/v1", help="OpenAI-compatible base URL.")
|
| 20 |
+
parser.add_argument("--openai-api-key", default="", help="OpenAI API key override.")
|
| 21 |
+
parser.add_argument("--openai-api-key-env", default="OPENAI_API_KEY", help="Environment variable name for the API key.")
|
| 22 |
+
parser.add_argument("--episodes", type=int, default=15, help="Number of episodes to evaluate.")
|
| 23 |
+
parser.add_argument("--max-steps", type=int, default=8, help="Episode step budget to keep runs bounded.")
|
| 24 |
+
parser.add_argument("--temperature", type=float, default=0.0, help="Sampling temperature.")
|
| 25 |
+
parser.add_argument("--max-tokens", type=int, default=256, help="Maximum completion tokens per step.")
|
| 26 |
+
parser.add_argument("--timeout-seconds", type=int, default=60, help="Per-request timeout.")
|
| 27 |
+
parser.add_argument("--seed", type=int, default=7, help="Request seed offset used for repeatable runs.")
|
| 28 |
+
parser.add_argument("--skip-leaderboard", action="store_true", help="Do not append the run to the leaderboard file.")
|
| 29 |
+
return parser
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def main() -> None:
|
| 33 |
+
args = build_parser().parse_args()
|
| 34 |
+
api_key = args.openai_api_key or os.getenv(args.openai_api_key_env, "")
|
| 35 |
+
config = OpenAIBaselineConfig(
|
| 36 |
+
shared_config_path=args.config,
|
| 37 |
+
seed_file=args.seed_file,
|
| 38 |
+
output_path=args.output,
|
| 39 |
+
leaderboard_path=args.leaderboard,
|
| 40 |
+
dashboard_path=args.dashboard,
|
| 41 |
+
run_name=args.run_name,
|
| 42 |
+
model=args.model,
|
| 43 |
+
base_url=args.openai_base_url,
|
| 44 |
+
api_key=api_key,
|
| 45 |
+
api_key_env=args.openai_api_key_env,
|
| 46 |
+
temperature=args.temperature,
|
| 47 |
+
max_tokens=args.max_tokens,
|
| 48 |
+
timeout_seconds=args.timeout_seconds,
|
| 49 |
+
episodes=args.episodes,
|
| 50 |
+
max_steps=args.max_steps,
|
| 51 |
+
seed=args.seed,
|
| 52 |
+
append_leaderboard=not args.skip_leaderboard,
|
| 53 |
+
)
|
| 54 |
+
result = OpenAIBaselineRunner(config).run()
|
| 55 |
+
print(json.dumps({"summary": result["summary"], "output": args.output, "dashboard": args.dashboard}, indent=2, sort_keys=True))
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
if __name__ == "__main__":
|
| 59 |
+
main()
|
| 60 |
+
|
server.py
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
from collections import Counter
|
| 6 |
+
from functools import lru_cache
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from typing import Any
|
| 9 |
+
|
| 10 |
+
from fastapi import FastAPI
|
| 11 |
+
from fastapi.responses import FileResponse, HTMLResponse, JSONResponse
|
| 12 |
+
|
| 13 |
+
from osint_env.config import clone_environment_config, load_seeding_config, load_shared_config
|
| 14 |
+
from osint_env.env.environment import OSINTEnvironment
|
| 15 |
+
from osint_env.eval.runner import run_evaluation
|
| 16 |
+
from osint_env.llm import build_llm_client
|
| 17 |
+
from osint_env.viz import export_dashboard
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
SPACE_CONFIG_PATH = Path(os.getenv("OSINT_ENV_CONFIG", "datasets/fixed_levels/shared_config_fixed_levels.json"))
|
| 21 |
+
SPACE_SEED_PATH = Path(os.getenv("OSINT_ENV_SEED_FILE", "datasets/fixed_levels/seed_fixed_levels.json"))
|
| 22 |
+
SPACE_PROVIDER = os.getenv("OSINT_SPACE_LLM_PROVIDER", "mock")
|
| 23 |
+
SPACE_MODEL = os.getenv("OSINT_SPACE_LLM_MODEL", "gpt-4o-mini")
|
| 24 |
+
SPACE_PORT = int(os.getenv("PORT", "7860"))
|
| 25 |
+
SPACE_DASHBOARD = Path("artifacts/space_dashboard.html")
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def _build_environment() -> OSINTEnvironment:
|
| 29 |
+
shared = load_shared_config(SPACE_CONFIG_PATH)
|
| 30 |
+
env_cfg = clone_environment_config(shared.environment)
|
| 31 |
+
if SPACE_SEED_PATH.exists():
|
| 32 |
+
env_cfg.seeding = load_seeding_config(SPACE_SEED_PATH)
|
| 33 |
+
env_cfg.llm.provider = SPACE_PROVIDER
|
| 34 |
+
env_cfg.llm.model = SPACE_MODEL
|
| 35 |
+
try:
|
| 36 |
+
llm = build_llm_client(env_cfg.llm)
|
| 37 |
+
except Exception:
|
| 38 |
+
env_cfg.llm.provider = "mock"
|
| 39 |
+
llm = build_llm_client(env_cfg.llm)
|
| 40 |
+
return OSINTEnvironment(env_cfg, llm=llm)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
@lru_cache(maxsize=1)
|
| 44 |
+
def _space_snapshot() -> dict[str, Any]:
|
| 45 |
+
env = _build_environment()
|
| 46 |
+
evaluation = run_evaluation(env, episodes=3, return_details=True, llm=build_llm_client(env.config.llm))
|
| 47 |
+
dashboard_path = export_dashboard(
|
| 48 |
+
env=env,
|
| 49 |
+
evaluation=evaluation,
|
| 50 |
+
leaderboard_records=[],
|
| 51 |
+
output_path=str(SPACE_DASHBOARD),
|
| 52 |
+
)
|
| 53 |
+
difficulty_counts = Counter(str(task.metadata.get("difficulty", "unknown")) for task in env.tasks)
|
| 54 |
+
return {
|
| 55 |
+
"dashboard_path": dashboard_path,
|
| 56 |
+
"summary": evaluation["summary"],
|
| 57 |
+
"task_count": len(env.tasks),
|
| 58 |
+
"difficulty_counts": dict(difficulty_counts),
|
| 59 |
+
"action_space": ["CALL_TOOL", "ADD_EDGE", "ANSWER"],
|
| 60 |
+
"observation_space": {
|
| 61 |
+
"tool_outputs": "Last tool results and memory hits.",
|
| 62 |
+
"graph_snapshot": "Current working graph edge snapshot.",
|
| 63 |
+
"action_history": "Recent action/reward trace.",
|
| 64 |
+
"task": "Task id, task type, and question.",
|
| 65 |
+
},
|
| 66 |
+
"task_types": sorted({task.task_type for task in env.tasks}),
|
| 67 |
+
"config": {
|
| 68 |
+
"seed": env.config.seed,
|
| 69 |
+
"max_steps": env.config.max_steps,
|
| 70 |
+
"swarm_enabled": env.config.swarm.enabled,
|
| 71 |
+
"llm_provider": env.config.llm.provider,
|
| 72 |
+
"llm_model": env.config.llm.model,
|
| 73 |
+
},
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
app = FastAPI(title="OSINT OpenEnv Space", version="0.1.0")
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
@app.get("/", response_class=HTMLResponse)
|
| 81 |
+
def home() -> str:
|
| 82 |
+
snapshot = _space_snapshot()
|
| 83 |
+
summary = snapshot["summary"]
|
| 84 |
+
difficulty_html = "".join(
|
| 85 |
+
f"<li><strong>{level}</strong>: {count}</li>"
|
| 86 |
+
for level, count in sorted(snapshot["difficulty_counts"].items())
|
| 87 |
+
)
|
| 88 |
+
task_type_html = "".join(f"<li>{task_type}</li>" for task_type in snapshot["task_types"])
|
| 89 |
+
return f"""<!doctype html>
|
| 90 |
+
<html lang="en">
|
| 91 |
+
<head>
|
| 92 |
+
<meta charset="utf-8" />
|
| 93 |
+
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
| 94 |
+
<title>OSINT OpenEnv Space</title>
|
| 95 |
+
<style>
|
| 96 |
+
:root {{
|
| 97 |
+
--ink: #13212d;
|
| 98 |
+
--muted: #4d5b69;
|
| 99 |
+
--line: #d8e2eb;
|
| 100 |
+
--card: #ffffff;
|
| 101 |
+
--bg: #f6fafc;
|
| 102 |
+
--brand: #0f766e;
|
| 103 |
+
--accent: #b45309;
|
| 104 |
+
}}
|
| 105 |
+
* {{ box-sizing: border-box; }}
|
| 106 |
+
body {{
|
| 107 |
+
margin: 0;
|
| 108 |
+
font-family: "Segoe UI", sans-serif;
|
| 109 |
+
color: var(--ink);
|
| 110 |
+
background:
|
| 111 |
+
radial-gradient(circle at top left, rgba(15,118,110,0.12), transparent 30%),
|
| 112 |
+
radial-gradient(circle at top right, rgba(180,83,9,0.10), transparent 28%),
|
| 113 |
+
var(--bg);
|
| 114 |
+
}}
|
| 115 |
+
.wrap {{ max-width: 1120px; margin: 0 auto; padding: 24px; }}
|
| 116 |
+
.hero, .grid {{ display: grid; gap: 16px; }}
|
| 117 |
+
.hero {{ grid-template-columns: 1.5fr 1fr; }}
|
| 118 |
+
.grid {{ grid-template-columns: repeat(3, minmax(0, 1fr)); margin-top: 16px; }}
|
| 119 |
+
.card {{
|
| 120 |
+
background: var(--card);
|
| 121 |
+
border: 1px solid var(--line);
|
| 122 |
+
border-radius: 18px;
|
| 123 |
+
padding: 18px;
|
| 124 |
+
box-shadow: 0 12px 24px rgba(19, 33, 45, 0.06);
|
| 125 |
+
}}
|
| 126 |
+
h1, h2 {{ margin-top: 0; }}
|
| 127 |
+
.muted {{ color: var(--muted); }}
|
| 128 |
+
.stats {{ display: grid; grid-template-columns: repeat(2, minmax(0, 1fr)); gap: 10px; }}
|
| 129 |
+
.stat {{ border: 1px dashed var(--line); border-radius: 12px; padding: 10px; }}
|
| 130 |
+
.stat .k {{ font-size: 12px; color: var(--muted); text-transform: uppercase; }}
|
| 131 |
+
.stat .v {{ font-size: 22px; font-weight: 700; }}
|
| 132 |
+
a.button {{
|
| 133 |
+
display: inline-block;
|
| 134 |
+
padding: 10px 14px;
|
| 135 |
+
border-radius: 12px;
|
| 136 |
+
text-decoration: none;
|
| 137 |
+
color: white;
|
| 138 |
+
background: var(--brand);
|
| 139 |
+
margin-right: 10px;
|
| 140 |
+
}}
|
| 141 |
+
a.link {{
|
| 142 |
+
color: var(--accent);
|
| 143 |
+
text-decoration: none;
|
| 144 |
+
font-weight: 600;
|
| 145 |
+
}}
|
| 146 |
+
ul {{ padding-left: 18px; }}
|
| 147 |
+
code {{
|
| 148 |
+
background: #f1f5f9;
|
| 149 |
+
border-radius: 6px;
|
| 150 |
+
padding: 2px 6px;
|
| 151 |
+
}}
|
| 152 |
+
@media (max-width: 900px) {{
|
| 153 |
+
.hero, .grid {{ grid-template-columns: 1fr; }}
|
| 154 |
+
}}
|
| 155 |
+
</style>
|
| 156 |
+
</head>
|
| 157 |
+
<body>
|
| 158 |
+
<div class="wrap">
|
| 159 |
+
<div class="hero">
|
| 160 |
+
<section class="card">
|
| 161 |
+
<h1>OSINT OpenEnv Space</h1>
|
| 162 |
+
<p class="muted">A containerized OpenEnv-compatible benchmark for synthetic OSINT reasoning over profiles, forum threads, posts, aliases, organizations, locations, and event links.</p>
|
| 163 |
+
<p>The Space boots with the fixed-level benchmark so visitors get a stable environment snapshot instead of a different graph every restart.</p>
|
| 164 |
+
<a class="button" href="/dashboard">Open Dashboard</a>
|
| 165 |
+
<a class="link" href="/api/environment">Environment JSON</a>
|
| 166 |
+
</section>
|
| 167 |
+
<section class="card">
|
| 168 |
+
<h2>Included Snapshot</h2>
|
| 169 |
+
<div class="stats">
|
| 170 |
+
<div class="stat"><div class="k">Tasks</div><div class="v">{snapshot["task_count"]}</div></div>
|
| 171 |
+
<div class="stat"><div class="k">Provider</div><div class="v">{snapshot["config"]["llm_provider"]}</div></div>
|
| 172 |
+
<div class="stat"><div class="k">Score</div><div class="v">{summary["leaderboard_score"]:.3f}</div></div>
|
| 173 |
+
<div class="stat"><div class="k">Success</div><div class="v">{summary["task_success_rate"]:.3f}</div></div>
|
| 174 |
+
</div>
|
| 175 |
+
</section>
|
| 176 |
+
</div>
|
| 177 |
+
|
| 178 |
+
<div class="grid">
|
| 179 |
+
<section class="card">
|
| 180 |
+
<h2>Action Space</h2>
|
| 181 |
+
<ul>
|
| 182 |
+
<li><code>CALL_TOOL</code>: query platform views or semantic memory.</li>
|
| 183 |
+
<li><code>ADD_EDGE</code>: add a hypothesized relation to the working graph.</li>
|
| 184 |
+
<li><code>ANSWER</code>: submit the final node id answer.</li>
|
| 185 |
+
</ul>
|
| 186 |
+
</section>
|
| 187 |
+
<section class="card">
|
| 188 |
+
<h2>Difficulty Mix</h2>
|
| 189 |
+
<ul>{difficulty_html}</ul>
|
| 190 |
+
</section>
|
| 191 |
+
<section class="card">
|
| 192 |
+
<h2>Task Families</h2>
|
| 193 |
+
<ul>{task_type_html}</ul>
|
| 194 |
+
</section>
|
| 195 |
+
</div>
|
| 196 |
+
</div>
|
| 197 |
+
</body>
|
| 198 |
+
</html>"""
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
@app.get("/healthz")
|
| 202 |
+
def healthz() -> JSONResponse:
|
| 203 |
+
return JSONResponse({"status": "ok"})
|
| 204 |
+
|
| 205 |
+
|
| 206 |
+
@app.get("/api/environment")
|
| 207 |
+
def environment_metadata() -> JSONResponse:
|
| 208 |
+
return JSONResponse(_space_snapshot())
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
@app.get("/dashboard")
|
| 212 |
+
def dashboard() -> FileResponse:
|
| 213 |
+
snapshot = _space_snapshot()
|
| 214 |
+
return FileResponse(snapshot["dashboard_path"], media_type="text/html")
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
if __name__ == "__main__":
|
| 218 |
+
import uvicorn
|
| 219 |
+
|
| 220 |
+
uvicorn.run("server:app", host="0.0.0.0", port=SPACE_PORT)
|
| 221 |
+
|
src/osint_env/baselines/__init__.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from osint_env.baselines.openai_runner import OpenAIBaselineConfig, OpenAIBaselineRunner
|
| 2 |
+
|
| 3 |
+
__all__ = ["OpenAIBaselineConfig", "OpenAIBaselineRunner"]
|
| 4 |
+
|
src/osint_env/baselines/openai_runner.py
ADDED
|
@@ -0,0 +1,480 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
from dataclasses import asdict, dataclass
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from time import perf_counter
|
| 7 |
+
from typing import Any
|
| 8 |
+
|
| 9 |
+
from osint_env.config import clone_environment_config, load_seeding_config, load_shared_config
|
| 10 |
+
from osint_env.domain.models import Action, ActionType, Edge
|
| 11 |
+
from osint_env.env.environment import OSINTEnvironment
|
| 12 |
+
from osint_env.env.reward import compute_graph_f1
|
| 13 |
+
from osint_env.eval.leaderboard import append_leaderboard_record, load_leaderboard
|
| 14 |
+
from osint_env.eval.metrics import EvalMetrics
|
| 15 |
+
from osint_env.viz import export_dashboard
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
SYSTEM_PROMPT = """You are an OSINT benchmark agent operating in a synthetic OpenEnv task.
|
| 19 |
+
|
| 20 |
+
Available actions are provided as function tools. On every turn, call exactly one tool.
|
| 21 |
+
|
| 22 |
+
Rules:
|
| 23 |
+
- Solve the question using only tool outputs and the current graph snapshot.
|
| 24 |
+
- When you have enough evidence, call submit_answer with the exact node id string.
|
| 25 |
+
- Use add_edge only for relationships strongly supported by the evidence you have already collected.
|
| 26 |
+
- Prefer concise, high-signal tool queries.
|
| 27 |
+
- Never guess free-form prose when a node id answer is required.
|
| 28 |
+
"""
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
@dataclass(slots=True)
|
| 32 |
+
class OpenAIBaselineConfig:
|
| 33 |
+
shared_config_path: str = "datasets/fixed_levels/shared_config_fixed_levels.json"
|
| 34 |
+
seed_file: str = "datasets/fixed_levels/seed_fixed_levels.json"
|
| 35 |
+
output_path: str = "artifacts/baselines/openai_fixed_levels_latest.json"
|
| 36 |
+
leaderboard_path: str = "artifacts/baselines/openai_fixed_levels_leaderboard.json"
|
| 37 |
+
dashboard_path: str = "artifacts/baselines/openai_fixed_levels_dashboard.html"
|
| 38 |
+
run_name: str = "openai_fixed_levels_baseline"
|
| 39 |
+
model: str = "gpt-4o-mini"
|
| 40 |
+
base_url: str = "https://api.openai.com/v1"
|
| 41 |
+
api_key: str = ""
|
| 42 |
+
api_key_env: str = "OPENAI_API_KEY"
|
| 43 |
+
temperature: float = 0.0
|
| 44 |
+
max_tokens: int = 256
|
| 45 |
+
timeout_seconds: int = 60
|
| 46 |
+
episodes: int = 15
|
| 47 |
+
max_steps: int = 8
|
| 48 |
+
seed: int | None = 7
|
| 49 |
+
append_leaderboard: bool = True
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def _tool_schema(
|
| 53 |
+
name: str,
|
| 54 |
+
description: str,
|
| 55 |
+
properties: dict[str, Any],
|
| 56 |
+
required: list[str],
|
| 57 |
+
) -> dict[str, Any]:
|
| 58 |
+
return {
|
| 59 |
+
"type": "function",
|
| 60 |
+
"function": {
|
| 61 |
+
"name": name,
|
| 62 |
+
"description": description,
|
| 63 |
+
"parameters": {
|
| 64 |
+
"type": "object",
|
| 65 |
+
"properties": properties,
|
| 66 |
+
"required": required,
|
| 67 |
+
"additionalProperties": False,
|
| 68 |
+
},
|
| 69 |
+
},
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def build_action_tools() -> list[dict[str, Any]]:
|
| 74 |
+
return [
|
| 75 |
+
_tool_schema(
|
| 76 |
+
"search_posts",
|
| 77 |
+
"Search microblog posts by substring query.",
|
| 78 |
+
{"query": {"type": "string", "description": "Substring to search for in post text."}},
|
| 79 |
+
["query"],
|
| 80 |
+
),
|
| 81 |
+
_tool_schema(
|
| 82 |
+
"get_user_posts",
|
| 83 |
+
"Fetch posts authored by a user or alias id.",
|
| 84 |
+
{"user_id": {"type": "string", "description": "User or alias node id."}},
|
| 85 |
+
["user_id"],
|
| 86 |
+
),
|
| 87 |
+
_tool_schema(
|
| 88 |
+
"get_mentions",
|
| 89 |
+
"Fetch posts that mention a given canonical user id.",
|
| 90 |
+
{"user_id": {"type": "string", "description": "Canonical user node id."}},
|
| 91 |
+
["user_id"],
|
| 92 |
+
),
|
| 93 |
+
_tool_schema(
|
| 94 |
+
"search_threads",
|
| 95 |
+
"Search forum threads by exact topic name.",
|
| 96 |
+
{"topic": {"type": "string", "description": "Thread topic such as security or ai."}},
|
| 97 |
+
["topic"],
|
| 98 |
+
),
|
| 99 |
+
_tool_schema(
|
| 100 |
+
"get_thread",
|
| 101 |
+
"Fetch a specific forum thread by id.",
|
| 102 |
+
{"thread_id": {"type": "string", "description": "Thread node id."}},
|
| 103 |
+
["thread_id"],
|
| 104 |
+
),
|
| 105 |
+
_tool_schema(
|
| 106 |
+
"get_user_activity",
|
| 107 |
+
"Fetch a user's known forum activity.",
|
| 108 |
+
{"user_id": {"type": "string", "description": "Canonical user node id."}},
|
| 109 |
+
["user_id"],
|
| 110 |
+
),
|
| 111 |
+
_tool_schema(
|
| 112 |
+
"get_profile",
|
| 113 |
+
"Fetch a profile record by canonical user id.",
|
| 114 |
+
{"user_id": {"type": "string", "description": "Canonical user node id."}},
|
| 115 |
+
["user_id"],
|
| 116 |
+
),
|
| 117 |
+
_tool_schema(
|
| 118 |
+
"search_people",
|
| 119 |
+
"Search profiles by name and or organization.",
|
| 120 |
+
{
|
| 121 |
+
"name": {"type": "string", "description": "Optional name substring.", "default": ""},
|
| 122 |
+
"org": {"type": "string", "description": "Optional organization substring.", "default": ""},
|
| 123 |
+
},
|
| 124 |
+
[],
|
| 125 |
+
),
|
| 126 |
+
_tool_schema(
|
| 127 |
+
"get_connections",
|
| 128 |
+
"Fetch explicit profile connections for a user.",
|
| 129 |
+
{"user_id": {"type": "string", "description": "Canonical user node id."}},
|
| 130 |
+
["user_id"],
|
| 131 |
+
),
|
| 132 |
+
_tool_schema(
|
| 133 |
+
"search_memory",
|
| 134 |
+
"Search semantic memory over prior observations and tool outputs.",
|
| 135 |
+
{
|
| 136 |
+
"query": {"type": "string", "description": "Memory retrieval query."},
|
| 137 |
+
"k": {"type": "integer", "description": "Top-k matches.", "default": 5},
|
| 138 |
+
},
|
| 139 |
+
["query"],
|
| 140 |
+
),
|
| 141 |
+
_tool_schema(
|
| 142 |
+
"add_edge",
|
| 143 |
+
"Add a supported graph edge to the working memory graph.",
|
| 144 |
+
{
|
| 145 |
+
"src": {"type": "string"},
|
| 146 |
+
"rel": {"type": "string"},
|
| 147 |
+
"dst": {"type": "string"},
|
| 148 |
+
"confidence": {"type": "number", "default": 1.0},
|
| 149 |
+
},
|
| 150 |
+
["src", "rel", "dst"],
|
| 151 |
+
),
|
| 152 |
+
_tool_schema(
|
| 153 |
+
"submit_answer",
|
| 154 |
+
"Finish the episode by submitting the exact node id answer.",
|
| 155 |
+
{"answer": {"type": "string", "description": "Exact node id answer for the task."}},
|
| 156 |
+
["answer"],
|
| 157 |
+
),
|
| 158 |
+
]
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
def _message_text(message: Any) -> str:
|
| 162 |
+
content = getattr(message, "content", "")
|
| 163 |
+
if isinstance(content, str):
|
| 164 |
+
return content
|
| 165 |
+
if isinstance(content, list):
|
| 166 |
+
parts: list[str] = []
|
| 167 |
+
for item in content:
|
| 168 |
+
if isinstance(item, dict) and item.get("type") == "text":
|
| 169 |
+
parts.append(str(item.get("text", "")))
|
| 170 |
+
else:
|
| 171 |
+
text = getattr(item, "text", None)
|
| 172 |
+
if text:
|
| 173 |
+
parts.append(str(text))
|
| 174 |
+
return "\n".join(part for part in parts if part)
|
| 175 |
+
return str(content or "")
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
def _safe_info(info: dict[str, Any]) -> dict[str, Any]:
|
| 179 |
+
return {
|
| 180 |
+
"step_count": int(info.get("step_count", 0)),
|
| 181 |
+
"total_reward": float(info.get("total_reward", 0.0)),
|
| 182 |
+
"tool_calls": int(info.get("tool_calls", 0)),
|
| 183 |
+
"redundant_tool_calls": int(info.get("redundant_tool_calls", 0)),
|
| 184 |
+
"reward_components": dict(info.get("reward_components", {})),
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
def _observation_payload(env: OSINTEnvironment, observation: Any, step_limit: int) -> dict[str, Any]:
|
| 189 |
+
task = dict(observation.task)
|
| 190 |
+
return {
|
| 191 |
+
"task": {
|
| 192 |
+
"task_id": task.get("task_id", ""),
|
| 193 |
+
"task_type": task.get("task_type", ""),
|
| 194 |
+
"question": task.get("question", ""),
|
| 195 |
+
},
|
| 196 |
+
"remaining_steps": max(0, step_limit - int(env.state.step_count if env.state else 0)),
|
| 197 |
+
"recent_tool_outputs": list(observation.tool_outputs),
|
| 198 |
+
"graph_snapshot": dict(observation.graph_snapshot),
|
| 199 |
+
"recent_action_history": list(observation.action_history),
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
class OpenAIBaselineRunner:
|
| 204 |
+
def __init__(self, config: OpenAIBaselineConfig):
|
| 205 |
+
self.config = config
|
| 206 |
+
|
| 207 |
+
from openai import OpenAI
|
| 208 |
+
|
| 209 |
+
if not config.api_key:
|
| 210 |
+
raise ValueError(
|
| 211 |
+
"OpenAI baseline requires an API key. "
|
| 212 |
+
f"Set {config.api_key_env} or pass --openai-api-key."
|
| 213 |
+
)
|
| 214 |
+
|
| 215 |
+
self.client = OpenAI(
|
| 216 |
+
api_key=config.api_key,
|
| 217 |
+
base_url=config.base_url,
|
| 218 |
+
timeout=config.timeout_seconds,
|
| 219 |
+
)
|
| 220 |
+
self.tools = build_action_tools()
|
| 221 |
+
|
| 222 |
+
@staticmethod
|
| 223 |
+
def _is_gpt5_family(model: str) -> bool:
|
| 224 |
+
return str(model).strip().lower().startswith("gpt-5")
|
| 225 |
+
|
| 226 |
+
def _request_kwargs(self, messages: list[dict[str, Any]], episode_index: int) -> dict[str, Any]:
|
| 227 |
+
kwargs: dict[str, Any] = {
|
| 228 |
+
"model": self.config.model,
|
| 229 |
+
"messages": messages,
|
| 230 |
+
"tools": self.tools,
|
| 231 |
+
"tool_choice": "required",
|
| 232 |
+
"parallel_tool_calls": False,
|
| 233 |
+
"max_completion_tokens": self.config.max_tokens,
|
| 234 |
+
}
|
| 235 |
+
if self.config.seed is not None:
|
| 236 |
+
kwargs["seed"] = int(self.config.seed) + episode_index
|
| 237 |
+
|
| 238 |
+
if self._is_gpt5_family(self.config.model):
|
| 239 |
+
# GPT-5 family chat-completions compatibility:
|
| 240 |
+
# use max_completion_tokens and avoid temperature for older GPT-5 models.
|
| 241 |
+
kwargs["reasoning_effort"] = "none"
|
| 242 |
+
else:
|
| 243 |
+
kwargs["temperature"] = self.config.temperature
|
| 244 |
+
|
| 245 |
+
return kwargs
|
| 246 |
+
|
| 247 |
+
def _build_environment(self) -> OSINTEnvironment:
|
| 248 |
+
shared = load_shared_config(self.config.shared_config_path)
|
| 249 |
+
env_cfg = clone_environment_config(shared.environment)
|
| 250 |
+
env_cfg.seeding = load_seeding_config(self.config.seed_file)
|
| 251 |
+
env_cfg.llm.provider = "mock"
|
| 252 |
+
env_cfg.llm.model = self.config.model
|
| 253 |
+
env_cfg.llm.temperature = self.config.temperature
|
| 254 |
+
env_cfg.llm.max_tokens = self.config.max_tokens
|
| 255 |
+
env_cfg.max_steps = min(int(env_cfg.max_steps), int(self.config.max_steps))
|
| 256 |
+
return OSINTEnvironment(env_cfg)
|
| 257 |
+
|
| 258 |
+
def _execute_action(
|
| 259 |
+
self,
|
| 260 |
+
env: OSINTEnvironment,
|
| 261 |
+
tool_name: str,
|
| 262 |
+
args: dict[str, Any],
|
| 263 |
+
) -> tuple[Any, float, bool, dict[str, Any], dict[str, Any]]:
|
| 264 |
+
if tool_name == "submit_answer":
|
| 265 |
+
answer = str(args.get("answer", "")).strip()
|
| 266 |
+
obs, reward, done, info = env.step(Action(ActionType.ANSWER, {"answer": answer}))
|
| 267 |
+
result = {"submitted_answer": answer}
|
| 268 |
+
return obs, reward, done, info, result
|
| 269 |
+
|
| 270 |
+
if tool_name == "add_edge":
|
| 271 |
+
payload = {
|
| 272 |
+
"src": str(args.get("src", "")).strip(),
|
| 273 |
+
"rel": str(args.get("rel", "")).strip(),
|
| 274 |
+
"dst": str(args.get("dst", "")).strip(),
|
| 275 |
+
"confidence": float(args.get("confidence", 1.0)),
|
| 276 |
+
}
|
| 277 |
+
obs, reward, done, info = env.step(Action(ActionType.ADD_EDGE, payload))
|
| 278 |
+
return obs, reward, done, info, payload
|
| 279 |
+
|
| 280 |
+
payload = {"tool_name": tool_name, "args": dict(args)}
|
| 281 |
+
obs, reward, done, info = env.step(Action(ActionType.CALL_TOOL, payload))
|
| 282 |
+
result = obs.tool_outputs[-1]["output"] if obs.tool_outputs else {}
|
| 283 |
+
return obs, reward, done, info, result
|
| 284 |
+
|
| 285 |
+
def _episode(self, env: OSINTEnvironment, episode_index: int) -> tuple[dict[str, Any], dict[str, Any]]:
|
| 286 |
+
obs = env.reset()
|
| 287 |
+
messages: list[dict[str, Any]] = [
|
| 288 |
+
{"role": "system", "content": SYSTEM_PROMPT},
|
| 289 |
+
{
|
| 290 |
+
"role": "user",
|
| 291 |
+
"content": json.dumps(_observation_payload(env, obs, env.config.max_steps), indent=2, sort_keys=True),
|
| 292 |
+
},
|
| 293 |
+
]
|
| 294 |
+
|
| 295 |
+
turn_trace: list[dict[str, Any]] = []
|
| 296 |
+
raw_fingerprints: list[str] = []
|
| 297 |
+
info: dict[str, Any] = {}
|
| 298 |
+
done = False
|
| 299 |
+
usage_totals = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
|
| 300 |
+
|
| 301 |
+
while not done and env.state is not None and env.state.step_count < env.config.max_steps:
|
| 302 |
+
completion = self.client.chat.completions.create(**self._request_kwargs(messages, episode_index))
|
| 303 |
+
if getattr(completion, "system_fingerprint", None):
|
| 304 |
+
raw_fingerprints.append(str(completion.system_fingerprint))
|
| 305 |
+
if getattr(completion, "usage", None) is not None:
|
| 306 |
+
usage_totals["prompt_tokens"] += int(getattr(completion.usage, "prompt_tokens", 0) or 0)
|
| 307 |
+
usage_totals["completion_tokens"] += int(getattr(completion.usage, "completion_tokens", 0) or 0)
|
| 308 |
+
usage_totals["total_tokens"] += int(getattr(completion.usage, "total_tokens", 0) or 0)
|
| 309 |
+
|
| 310 |
+
message = completion.choices[0].message
|
| 311 |
+
content = _message_text(message)
|
| 312 |
+
tool_calls = list(message.tool_calls or [])
|
| 313 |
+
if not tool_calls:
|
| 314 |
+
fallback_answer = content.strip() or "unknown"
|
| 315 |
+
obs, reward, done, info = env.step(Action(ActionType.ANSWER, {"answer": fallback_answer}))
|
| 316 |
+
tool_result = {
|
| 317 |
+
"submitted_answer": fallback_answer,
|
| 318 |
+
"reward": reward,
|
| 319 |
+
"done": done,
|
| 320 |
+
"observation": _observation_payload(env, obs, env.config.max_steps),
|
| 321 |
+
"info": _safe_info(info),
|
| 322 |
+
}
|
| 323 |
+
messages.append({"role": "assistant", "content": content})
|
| 324 |
+
messages.append({"role": "tool", "tool_call_id": "fallback_submit", "content": json.dumps(tool_result)})
|
| 325 |
+
turn_trace.append({"assistant_content": content, "tool_name": "submit_answer", "args": {"answer": fallback_answer}})
|
| 326 |
+
break
|
| 327 |
+
|
| 328 |
+
tool_call = tool_calls[0]
|
| 329 |
+
tool_name = str(tool_call.function.name)
|
| 330 |
+
try:
|
| 331 |
+
args = json.loads(tool_call.function.arguments or "{}")
|
| 332 |
+
except json.JSONDecodeError:
|
| 333 |
+
args = {}
|
| 334 |
+
if not isinstance(args, dict):
|
| 335 |
+
args = {}
|
| 336 |
+
|
| 337 |
+
obs, reward, done, info, result = self._execute_action(env, tool_name, args)
|
| 338 |
+
tool_payload = {
|
| 339 |
+
"tool_name": tool_name,
|
| 340 |
+
"args": args,
|
| 341 |
+
"result": result,
|
| 342 |
+
"reward": reward,
|
| 343 |
+
"done": done,
|
| 344 |
+
"observation": _observation_payload(env, obs, env.config.max_steps),
|
| 345 |
+
"info": _safe_info(info),
|
| 346 |
+
}
|
| 347 |
+
assistant_message = {
|
| 348 |
+
"role": "assistant",
|
| 349 |
+
"content": content,
|
| 350 |
+
"tool_calls": [
|
| 351 |
+
{
|
| 352 |
+
"id": tool_call.id,
|
| 353 |
+
"type": "function",
|
| 354 |
+
"function": {
|
| 355 |
+
"name": tool_name,
|
| 356 |
+
"arguments": json.dumps(args, sort_keys=True),
|
| 357 |
+
},
|
| 358 |
+
}
|
| 359 |
+
],
|
| 360 |
+
}
|
| 361 |
+
messages.append(assistant_message)
|
| 362 |
+
messages.append({"role": "tool", "tool_call_id": tool_call.id, "content": json.dumps(tool_payload, sort_keys=True)})
|
| 363 |
+
turn_trace.append({"assistant_content": content, "tool_name": tool_name, "args": args, "reward": reward, "done": done})
|
| 364 |
+
|
| 365 |
+
if not done:
|
| 366 |
+
obs, _, done, info = env.step(Action(ActionType.ANSWER, {"answer": "unknown"}))
|
| 367 |
+
turn_trace.append({"assistant_content": "", "tool_name": "submit_answer", "args": {"answer": "unknown"}, "reward": 0.0, "done": done})
|
| 368 |
+
|
| 369 |
+
info = dict(info)
|
| 370 |
+
info["openai_system_fingerprints"] = raw_fingerprints
|
| 371 |
+
info["usage"] = usage_totals
|
| 372 |
+
return info, {"turns": turn_trace}
|
| 373 |
+
|
| 374 |
+
def run(self) -> dict[str, Any]:
|
| 375 |
+
env = self._build_environment()
|
| 376 |
+
metrics = EvalMetrics()
|
| 377 |
+
episode_rows: list[dict[str, Any]] = []
|
| 378 |
+
|
| 379 |
+
started = perf_counter()
|
| 380 |
+
run_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
|
| 381 |
+
for episode_index in range(int(self.config.episodes)):
|
| 382 |
+
info, trace = self._episode(env, episode_index)
|
| 383 |
+
episode_usage = dict(info.get("usage", {}))
|
| 384 |
+
for key in run_usage:
|
| 385 |
+
run_usage[key] += int(episode_usage.get(key, 0) or 0)
|
| 386 |
+
task_type = env.state.task.task_type if env.state else "unknown"
|
| 387 |
+
task_id = env.state.task.task_id if env.state else f"episode_{episode_index}"
|
| 388 |
+
truth = env.state.task.supporting_edges if env.state else []
|
| 389 |
+
pred = env.memory_graph.edges if env.state else []
|
| 390 |
+
graph_f1 = compute_graph_f1(pred, truth)
|
| 391 |
+
metrics.add(info, task_type=task_type, graph_f1=graph_f1)
|
| 392 |
+
episode_rows.append(
|
| 393 |
+
{
|
| 394 |
+
"task_id": task_id,
|
| 395 |
+
"task_type": task_type,
|
| 396 |
+
"question": env.state.task.question if env.state else "",
|
| 397 |
+
"task_answer": str(info.get("task_answer", "")),
|
| 398 |
+
"agent_answer": str(info.get("agent_answer", "")) if info.get("agent_answer") is not None else "",
|
| 399 |
+
"graph_f1": graph_f1,
|
| 400 |
+
"reward": float(info.get("total_reward", 0.0)),
|
| 401 |
+
"steps": int(info.get("step_count", 0)),
|
| 402 |
+
"tool_calls": int(info.get("tool_calls", 0)),
|
| 403 |
+
"success": int(info.get("agent_answer") == info.get("task_answer")),
|
| 404 |
+
"reward_components": dict(info.get("reward_components", {})),
|
| 405 |
+
"pred_edges": [
|
| 406 |
+
{
|
| 407 |
+
"src": edge.src,
|
| 408 |
+
"rel": edge.rel,
|
| 409 |
+
"dst": edge.dst,
|
| 410 |
+
"confidence": float(edge.confidence),
|
| 411 |
+
}
|
| 412 |
+
for edge in pred
|
| 413 |
+
],
|
| 414 |
+
"truth_edges": [
|
| 415 |
+
{
|
| 416 |
+
"src": edge.src,
|
| 417 |
+
"rel": edge.rel,
|
| 418 |
+
"dst": edge.dst,
|
| 419 |
+
"confidence": float(edge.confidence),
|
| 420 |
+
}
|
| 421 |
+
for edge in truth
|
| 422 |
+
],
|
| 423 |
+
"trace": trace,
|
| 424 |
+
"openai_system_fingerprints": list(info.get("openai_system_fingerprints", [])),
|
| 425 |
+
"usage": episode_usage,
|
| 426 |
+
}
|
| 427 |
+
)
|
| 428 |
+
|
| 429 |
+
summary = metrics.summary()
|
| 430 |
+
duration_seconds = perf_counter() - started
|
| 431 |
+
dashboard_path = export_dashboard(
|
| 432 |
+
env=env,
|
| 433 |
+
evaluation={"summary": summary, "episodes": episode_rows},
|
| 434 |
+
leaderboard_records=load_leaderboard(self.config.leaderboard_path),
|
| 435 |
+
output_path=self.config.dashboard_path,
|
| 436 |
+
)
|
| 437 |
+
|
| 438 |
+
payload: dict[str, Any] = {
|
| 439 |
+
"run": {
|
| 440 |
+
"name": self.config.run_name,
|
| 441 |
+
"model": self.config.model,
|
| 442 |
+
"episodes": int(self.config.episodes),
|
| 443 |
+
"temperature": float(self.config.temperature),
|
| 444 |
+
"max_tokens": int(self.config.max_tokens),
|
| 445 |
+
"timeout_seconds": int(self.config.timeout_seconds),
|
| 446 |
+
"max_steps": int(self.config.max_steps),
|
| 447 |
+
"seed": self.config.seed,
|
| 448 |
+
"shared_config_path": self.config.shared_config_path,
|
| 449 |
+
"seed_file": self.config.seed_file,
|
| 450 |
+
"duration_seconds": duration_seconds,
|
| 451 |
+
"dashboard_path": dashboard_path,
|
| 452 |
+
},
|
| 453 |
+
"summary": summary,
|
| 454 |
+
"usage": run_usage,
|
| 455 |
+
"episodes": episode_rows,
|
| 456 |
+
}
|
| 457 |
+
|
| 458 |
+
output = Path(self.config.output_path)
|
| 459 |
+
output.parent.mkdir(parents=True, exist_ok=True)
|
| 460 |
+
output.write_text(json.dumps(payload, indent=2, sort_keys=True), encoding="utf-8")
|
| 461 |
+
|
| 462 |
+
if self.config.append_leaderboard:
|
| 463 |
+
record = append_leaderboard_record(
|
| 464 |
+
path=self.config.leaderboard_path,
|
| 465 |
+
summary=summary,
|
| 466 |
+
episodes=int(self.config.episodes),
|
| 467 |
+
run_name=self.config.run_name,
|
| 468 |
+
config={
|
| 469 |
+
"provider": "openai",
|
| 470 |
+
"model": self.config.model,
|
| 471 |
+
"seed": self.config.seed,
|
| 472 |
+
"max_steps": self.config.max_steps,
|
| 473 |
+
"shared_config_path": self.config.shared_config_path,
|
| 474 |
+
"seed_file": self.config.seed_file,
|
| 475 |
+
},
|
| 476 |
+
)
|
| 477 |
+
payload["record"] = record
|
| 478 |
+
output.write_text(json.dumps(payload, indent=2, sort_keys=True), encoding="utf-8")
|
| 479 |
+
|
| 480 |
+
return payload
|
src/osint_env/cli.py
CHANGED
|
@@ -282,7 +282,7 @@ def main() -> None:
|
|
| 282 |
if args.with_demo:
|
| 283 |
_runner_for(env).run_episode()
|
| 284 |
info = {
|
| 285 |
-
"agent_answer": env.state.
|
| 286 |
"task_answer": env.state.task.answer if env.state else "",
|
| 287 |
"total_reward": env.state.total_reward if env.state else 0.0,
|
| 288 |
"step_count": env.state.step_count if env.state else 0,
|
|
|
|
| 282 |
if args.with_demo:
|
| 283 |
_runner_for(env).run_episode()
|
| 284 |
info = {
|
| 285 |
+
"agent_answer": env.state.answer if env.state else "",
|
| 286 |
"task_answer": env.state.task.answer if env.state else "",
|
| 287 |
"total_reward": env.state.total_reward if env.state else 0.0,
|
| 288 |
"step_count": env.state.step_count if env.state else 0,
|
src/osint_env/env/environment.py
CHANGED
|
@@ -3,10 +3,9 @@ from __future__ import annotations
|
|
| 3 |
from dataclasses import dataclass, field
|
| 4 |
from typing import TYPE_CHECKING, Any
|
| 5 |
|
| 6 |
-
from openenv.env import Env
|
| 7 |
-
|
| 8 |
from osint_env.data.generator import DatasetGenerator
|
| 9 |
from osint_env.domain.models import Action, ActionType, Edge, EnvironmentConfig, Observation, TaskInstance
|
|
|
|
| 10 |
from osint_env.env.reward import (
|
| 11 |
build_reward_model,
|
| 12 |
compute_answer_reward,
|
|
|
|
| 3 |
from dataclasses import dataclass, field
|
| 4 |
from typing import TYPE_CHECKING, Any
|
| 5 |
|
|
|
|
|
|
|
| 6 |
from osint_env.data.generator import DatasetGenerator
|
| 7 |
from osint_env.domain.models import Action, ActionType, Edge, EnvironmentConfig, Observation, TaskInstance
|
| 8 |
+
from osint_env.env.openenv_compat import Env
|
| 9 |
from osint_env.env.reward import (
|
| 10 |
build_reward_model,
|
| 11 |
compute_answer_reward,
|
src/osint_env/env/openenv_compat.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
try:
|
| 4 |
+
from openenv.env import Env
|
| 5 |
+
except ImportError:
|
| 6 |
+
class Env:
|
| 7 |
+
"""Minimal fallback used when openenv is not installed locally."""
|
| 8 |
+
|
| 9 |
+
def __init__(
|
| 10 |
+
self,
|
| 11 |
+
name: str,
|
| 12 |
+
state_space: str,
|
| 13 |
+
action_space: list[str],
|
| 14 |
+
episode_max_length: int,
|
| 15 |
+
) -> None:
|
| 16 |
+
self.name = name
|
| 17 |
+
self.state_space = state_space
|
| 18 |
+
self.action_space = action_space
|
| 19 |
+
self.episode_max_length = episode_max_length
|
| 20 |
+
|
tests/conftest.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import sys
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
ROOT = Path(__file__).resolve().parents[1]
|
| 8 |
+
SRC = ROOT / "src"
|
| 9 |
+
|
| 10 |
+
if str(SRC) not in sys.path:
|
| 11 |
+
sys.path.insert(0, str(SRC))
|
| 12 |
+
|
tests/test_openai_baseline.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from osint_env.baselines.openai_runner import OpenAIBaselineConfig, OpenAIBaselineRunner, build_action_tools
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def test_openai_baseline_toolset_contains_answer_and_graph_actions():
|
| 5 |
+
tools = build_action_tools()
|
| 6 |
+
names = {tool["function"]["name"] for tool in tools}
|
| 7 |
+
assert "submit_answer" in names
|
| 8 |
+
assert "add_edge" in names
|
| 9 |
+
assert "search_memory" in names
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def test_gpt5_request_kwargs_avoid_temperature_and_use_max_completion_tokens():
|
| 13 |
+
runner = OpenAIBaselineRunner.__new__(OpenAIBaselineRunner)
|
| 14 |
+
runner.config = OpenAIBaselineConfig(model="gpt-5-nano", max_tokens=321, temperature=0.0, seed=7)
|
| 15 |
+
runner.tools = build_action_tools()
|
| 16 |
+
kwargs = runner._request_kwargs(messages=[{"role": "user", "content": "hi"}], episode_index=0)
|
| 17 |
+
assert kwargs["max_completion_tokens"] == 321
|
| 18 |
+
assert kwargs["reasoning_effort"] == "none"
|
| 19 |
+
assert "temperature" not in kwargs
|
tests/test_server.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi.testclient import TestClient
|
| 2 |
+
|
| 3 |
+
from server import app
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
client = TestClient(app)
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def test_server_health():
|
| 10 |
+
response = client.get("/healthz")
|
| 11 |
+
assert response.status_code == 200
|
| 12 |
+
assert response.json()["status"] == "ok"
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def test_server_environment_metadata():
|
| 16 |
+
response = client.get("/api/environment")
|
| 17 |
+
assert response.status_code == 200
|
| 18 |
+
body = response.json()
|
| 19 |
+
assert "action_space" in body
|
| 20 |
+
assert "observation_space" in body
|
| 21 |
+
assert "summary" in body
|
| 22 |
+
|