Siddeshwar1625 commited on
Commit
87f8562
·
1 Parent(s): 4de4725

Removed unintended embedded repo and added to gitignore

Browse files
.gitignore CHANGED
@@ -4,3 +4,4 @@ blueprint.txt
4
  artifacts/*
5
  *.html
6
  .venv/
 
 
4
  artifacts/*
5
  *.html
6
  .venv/
7
+ .tmp_compare/
.tmp_compare/Meta-s-LedgerShield ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit fd5c9b60ddfbd2eba9d09001938b63169ac98f7b
README.md CHANGED
@@ -209,12 +209,13 @@ The FastAPI app serves:
209
  - `/`: overview page
210
  - `/dashboard`: generated benchmark dashboard
211
  - `/api/environment`: environment metadata
212
- - `/healthz`: health check
 
213
  - `/openenv.yaml`: OpenEnv HTTP spec stub
214
  - `/openenv/tasks`: task enumeration
215
- - `/openenv/reset`: episode reset endpoint
216
- - `/openenv/step`: episode step endpoint
217
- - `/openenv/state/{session_id}`: current session state endpoint
218
 
219
  ## Automated Validation
220
 
 
209
  - `/`: overview page
210
  - `/dashboard`: generated benchmark dashboard
211
  - `/api/environment`: environment metadata
212
+ - `/health`: health check (validator-friendly alias)
213
+ - `/healthz`: health check (legacy alias)
214
  - `/openenv.yaml`: OpenEnv HTTP spec stub
215
  - `/openenv/tasks`: task enumeration
216
+ - `/reset` and `/openenv/reset`: episode reset endpoints
217
+ - `/step` and `/openenv/step`: episode step endpoints
218
+ - `/state` and `/openenv/state/{session_id}`: session state endpoints (`/state` returns the latest session)
219
 
220
  ## Automated Validation
221
 
datasets/fixed_levels/leaderboard_fixed_levels.json CHANGED
@@ -881,5 +881,48 @@
881
  },
882
  "run_id": "run_0021",
883
  "run_name": "fixed_levels_qwen_swarm"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
884
  }
885
  ]
 
881
  },
882
  "run_id": "run_0021",
883
  "run_name": "fixed_levels_qwen_swarm"
884
+ },
885
+ {
886
+ "config": {
887
+ "llm_model": "gpt-5.4-mini",
888
+ "llm_provider": "openai",
889
+ "max_agents": 3,
890
+ "max_breadth": 2,
891
+ "max_depth": 2,
892
+ "max_steps": 24,
893
+ "max_width": 2,
894
+ "seed": 2026,
895
+ "seeded_questions": 30,
896
+ "swarm_enabled": true
897
+ },
898
+ "created_at": "2026-04-07T15:59:20+00:00",
899
+ "episodes": 1,
900
+ "metrics": {
901
+ "avg_compactness_reward": 0.0,
902
+ "avg_connectivity_gain_reward": 0.0,
903
+ "avg_connectivity_reward": 0.0,
904
+ "avg_diversity_reward": 0.0,
905
+ "avg_entity_informativeness_reward": 0.0,
906
+ "avg_format_reward": 0.15,
907
+ "avg_graph_f1": 0.0,
908
+ "avg_knowledge_carrier_reward": 0.0,
909
+ "avg_knowledge_indexing_reward": 0.0,
910
+ "avg_relation_informativeness_reward": 0.0,
911
+ "avg_reward": 0.5519400198339021,
912
+ "avg_soft_shaping_reward": 0.0,
913
+ "avg_spawn_count": 0.0,
914
+ "avg_spawn_critical_steps": 0.0,
915
+ "avg_steps_to_solution": 1.0,
916
+ "deanonymization_accuracy": 0.0,
917
+ "leaderboard_score": 0.2785970009916951,
918
+ "retrieval_signal": 0.5,
919
+ "spawn_completion_rate": 0.0,
920
+ "spawn_signal": 0.4,
921
+ "structural_signal": 0.5,
922
+ "task_success_rate": 0.0,
923
+ "tool_efficiency": 1.0
924
+ },
925
+ "run_id": "run_0022",
926
+ "run_name": "fixed_levels_qwen_swarm"
927
  }
928
  ]
inference.py CHANGED
@@ -97,7 +97,7 @@ def log_step(step: int, action: str, reward: float, done: bool, error: str | Non
97
  def log_end(success: bool, steps: int, score: float, rewards: list[float]) -> None:
98
  rewards_text = ",".join(f"{value:.2f}" for value in rewards)
99
  print(
100
- f"[END] success={str(bool(success)).lower()} steps={steps} score={score:.3f} rewards={rewards_text}",
101
  flush=True,
102
  )
103
 
 
97
  def log_end(success: bool, steps: int, score: float, rewards: list[float]) -> None:
98
  rewards_text = ",".join(f"{value:.2f}" for value in rewards)
99
  print(
100
+ f"[END] success={str(bool(success)).lower()} steps={steps} score={score:.2f} rewards={rewards_text}",
101
  flush=True,
102
  )
103
 
openenv.yaml CHANGED
@@ -7,7 +7,7 @@ transport:
7
  endpoints:
8
  health:
9
  method: GET
10
- path: /healthz
11
  metadata:
12
  method: GET
13
  path: /api/environment
@@ -16,13 +16,13 @@ endpoints:
16
  path: /openenv/tasks
17
  reset:
18
  method: POST
19
- path: /openenv/reset
20
  step:
21
  method: POST
22
- path: /openenv/step
23
  state:
24
  method: GET
25
- path: /openenv/state/{session_id}
26
  models:
27
  action_space:
28
  - CALL_TOOL
 
7
  endpoints:
8
  health:
9
  method: GET
10
+ path: /health
11
  metadata:
12
  method: GET
13
  path: /api/environment
 
16
  path: /openenv/tasks
17
  reset:
18
  method: POST
19
+ path: /reset
20
  step:
21
  method: POST
22
+ path: /step
23
  state:
24
  method: GET
25
+ path: /state
26
  models:
27
  action_space:
28
  - CALL_TOOL
server.py CHANGED
@@ -43,6 +43,7 @@ OPENENV_SPEC_PATH = Path("openenv.yaml")
43
  _SESSION_LOCK = Lock()
44
  _SESSIONS: dict[str, OSINTEnvironment] = {}
45
  _RESET_COUNTER = 0
 
46
 
47
 
48
  def _load_json(path: Path) -> dict[str, Any] | None:
@@ -147,8 +148,26 @@ def _get_session_env(session_id: str) -> OSINTEnvironment:
147
 
148
 
149
  def _store_session(session_id: str, env: OSINTEnvironment) -> None:
 
150
  with _SESSION_LOCK:
151
  _SESSIONS[session_id] = env
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
 
154
  def _task_lookup(env: OSINTEnvironment) -> dict[str, Any]:
@@ -400,6 +419,11 @@ def healthz() -> JSONResponse:
400
  return JSONResponse({"status": "ok"})
401
 
402
 
 
 
 
 
 
403
  @app.get("/openenv.yaml")
404
  def openenv_spec() -> FileResponse:
405
  return FileResponse(OPENENV_SPEC_PATH, media_type="text/yaml")
@@ -456,8 +480,12 @@ async def openenv_reset(request: Request) -> OpenEnvResponseEnvelope:
456
 
457
 
458
  @app.post("/openenv/step", response_model=OpenEnvResponseEnvelope)
 
 
 
459
  def openenv_step(request: OpenEnvActionRequest) -> OpenEnvResponseEnvelope:
460
- env = _get_session_env(request.session_id)
 
461
  action_type_raw = request.resolved_action_type().strip()
462
  if not action_type_raw:
463
  raise HTTPException(status_code=400, detail="Missing action_type")
@@ -467,7 +495,7 @@ def openenv_step(request: OpenEnvActionRequest) -> OpenEnvResponseEnvelope:
467
  raise HTTPException(status_code=400, detail=f"Unsupported action_type {action_type_raw}") from exc
468
  observation, reward, done, info = env.step(Action(action_type=action_type, payload=request.resolved_payload()))
469
  return OpenEnvResponseEnvelope(
470
- session_id=request.session_id,
471
  observation=_serialize_observation(observation),
472
  reward=float(reward),
473
  done=bool(done),
@@ -475,8 +503,7 @@ def openenv_step(request: OpenEnvActionRequest) -> OpenEnvResponseEnvelope:
475
  )
476
 
477
 
478
- @app.get("/openenv/state/{session_id}", response_model=OpenEnvResponseEnvelope)
479
- def openenv_state(session_id: str) -> OpenEnvResponseEnvelope:
480
  env = _get_session_env(session_id)
481
  if env.state is None:
482
  raise HTTPException(status_code=400, detail="Session has not been reset yet")
@@ -489,6 +516,18 @@ def openenv_state(session_id: str) -> OpenEnvResponseEnvelope:
489
  )
490
 
491
 
 
 
 
 
 
 
 
 
 
 
 
 
492
  @app.post("/openenv/report_inference", response_model=OpenEnvInferenceReportResponse)
493
  def openenv_report_inference(request: OpenEnvInferenceReportRequest) -> OpenEnvInferenceReportResponse:
494
  env = _build_environment()
 
43
  _SESSION_LOCK = Lock()
44
  _SESSIONS: dict[str, OSINTEnvironment] = {}
45
  _RESET_COUNTER = 0
46
+ _LATEST_SESSION_ID: str | None = None
47
 
48
 
49
  def _load_json(path: Path) -> dict[str, Any] | None:
 
148
 
149
 
150
  def _store_session(session_id: str, env: OSINTEnvironment) -> None:
151
+ global _LATEST_SESSION_ID
152
  with _SESSION_LOCK:
153
  _SESSIONS[session_id] = env
154
+ _LATEST_SESSION_ID = session_id
155
+
156
+
157
+ def _latest_session_id() -> str:
158
+ with _SESSION_LOCK:
159
+ if _LATEST_SESSION_ID and _LATEST_SESSION_ID in _SESSIONS:
160
+ return _LATEST_SESSION_ID
161
+ if _SESSIONS:
162
+ return next(reversed(_SESSIONS))
163
+ raise HTTPException(status_code=404, detail="No active session. Call /reset first.")
164
+
165
+
166
+ def _resolve_session_id(session_id: str | None) -> str:
167
+ token = str(session_id or "").strip()
168
+ if token:
169
+ return token
170
+ return _latest_session_id()
171
 
172
 
173
  def _task_lookup(env: OSINTEnvironment) -> dict[str, Any]:
 
419
  return JSONResponse({"status": "ok"})
420
 
421
 
422
+ @app.get("/health")
423
+ def health() -> JSONResponse:
424
+ return healthz()
425
+
426
+
427
  @app.get("/openenv.yaml")
428
  def openenv_spec() -> FileResponse:
429
  return FileResponse(OPENENV_SPEC_PATH, media_type="text/yaml")
 
480
 
481
 
482
  @app.post("/openenv/step", response_model=OpenEnvResponseEnvelope)
483
+ @app.post("/openenv/step/", response_model=OpenEnvResponseEnvelope, include_in_schema=False)
484
+ @app.post("/step", response_model=OpenEnvResponseEnvelope, include_in_schema=False)
485
+ @app.post("/step/", response_model=OpenEnvResponseEnvelope, include_in_schema=False)
486
  def openenv_step(request: OpenEnvActionRequest) -> OpenEnvResponseEnvelope:
487
+ session_id = _resolve_session_id(request.session_id)
488
+ env = _get_session_env(session_id)
489
  action_type_raw = request.resolved_action_type().strip()
490
  if not action_type_raw:
491
  raise HTTPException(status_code=400, detail="Missing action_type")
 
495
  raise HTTPException(status_code=400, detail=f"Unsupported action_type {action_type_raw}") from exc
496
  observation, reward, done, info = env.step(Action(action_type=action_type, payload=request.resolved_payload()))
497
  return OpenEnvResponseEnvelope(
498
+ session_id=session_id,
499
  observation=_serialize_observation(observation),
500
  reward=float(reward),
501
  done=bool(done),
 
503
  )
504
 
505
 
506
+ def _state_response(session_id: str) -> OpenEnvResponseEnvelope:
 
507
  env = _get_session_env(session_id)
508
  if env.state is None:
509
  raise HTTPException(status_code=400, detail="Session has not been reset yet")
 
516
  )
517
 
518
 
519
+ @app.get("/openenv/state/{session_id}", response_model=OpenEnvResponseEnvelope)
520
+ def openenv_state(session_id: str) -> OpenEnvResponseEnvelope:
521
+ return _state_response(session_id)
522
+
523
+
524
+ @app.get("/openenv/state", response_model=OpenEnvResponseEnvelope, include_in_schema=False)
525
+ @app.get("/state", response_model=OpenEnvResponseEnvelope, include_in_schema=False)
526
+ @app.get("/state/", response_model=OpenEnvResponseEnvelope, include_in_schema=False)
527
+ def openenv_state_latest() -> OpenEnvResponseEnvelope:
528
+ return _state_response(_latest_session_id())
529
+
530
+
531
  @app.post("/openenv/report_inference", response_model=OpenEnvInferenceReportResponse)
532
  def openenv_report_inference(request: OpenEnvInferenceReportRequest) -> OpenEnvInferenceReportResponse:
533
  env = _build_environment()
src/osint_env/api/models.py CHANGED
@@ -26,7 +26,10 @@ class OpenEnvResetRequest(BaseModel):
26
 
27
 
28
  class OpenEnvActionRequest(BaseModel):
29
- session_id: str
 
 
 
30
  action_type: str | None = Field(default=None, description="One of CALL_TOOL, ADD_EDGE, ANSWER.")
31
  payload: dict[str, Any] = Field(default_factory=dict)
32
  action: dict[str, Any] | None = None
 
26
 
27
 
28
  class OpenEnvActionRequest(BaseModel):
29
+ session_id: str | None = Field(
30
+ default=None,
31
+ description="Session identifier. Optional for /step compatibility alias, which uses the latest session.",
32
+ )
33
  action_type: str | None = Field(default=None, description="One of CALL_TOOL, ADD_EDGE, ANSWER.")
34
  payload: dict[str, Any] = Field(default_factory=dict)
35
  action: dict[str, Any] | None = None
tests/test_server.py CHANGED
@@ -16,6 +16,12 @@ def test_server_health():
16
  assert response.json()["status"] == "ok"
17
 
18
 
 
 
 
 
 
 
19
  def test_server_environment_metadata():
20
  response = client.get("/api/environment")
21
  assert response.status_code == 200
@@ -115,6 +121,36 @@ def test_openenv_step_accepts_nested_action_payload():
115
  assert step.json()["done"] is True
116
 
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  def test_report_inference_updates_latest_evaluation_and_dashboard(tmp_path, monkeypatch):
119
  latest_evaluation = tmp_path / "latest_evaluation.json"
120
  space_dashboard = tmp_path / "space_dashboard.html"
 
16
  assert response.json()["status"] == "ok"
17
 
18
 
19
+ def test_server_health_alias():
20
+ response = client.get("/health")
21
+ assert response.status_code == 200
22
+ assert response.json()["status"] == "ok"
23
+
24
+
25
  def test_server_environment_metadata():
26
  response = client.get("/api/environment")
27
  assert response.status_code == 200
 
121
  assert step.json()["done"] is True
122
 
123
 
124
+ def test_step_alias_uses_latest_session_when_session_id_missing():
125
+ reset = client.post("/reset", json={"task_index": 0})
126
+ assert reset.status_code == 200
127
+ session_id = reset.json()["session_id"]
128
+
129
+ step = client.post(
130
+ "/step",
131
+ json={
132
+ "action_type": "ANSWER",
133
+ "payload": {"answer": "unknown"},
134
+ },
135
+ )
136
+ assert step.status_code == 200
137
+ body = step.json()
138
+ assert body["session_id"] == session_id
139
+ assert body["done"] is True
140
+
141
+
142
+ def test_state_alias_returns_latest_session():
143
+ reset = client.post("/reset", json={"task_index": 0})
144
+ assert reset.status_code == 200
145
+ session_id = reset.json()["session_id"]
146
+
147
+ state = client.get("/state")
148
+ assert state.status_code == 200
149
+ body = state.json()
150
+ assert body["session_id"] == session_id
151
+ assert "task" in body["observation"]
152
+
153
+
154
  def test_report_inference_updates_latest_evaluation_and_dashboard(tmp_path, monkeypatch):
155
  latest_evaluation = tmp_path / "latest_evaluation.json"
156
  space_dashboard = tmp_path / "space_dashboard.html"