Spaces:

mekosotto
/

hackathon

Running

App Files Files Community

mekosotto Claude Sonnet 4.6 commited on 5 days ago

Commit

2091a1b

1 Parent(s): d3e290f

feat(agents): orchestrator loop (function-calling + tool trace + max-steps gate)

Browse files

Files changed (3) hide show

src/agents/orchestrator.py +108 -0
src/agents/prompts.py +49 -0
tests/agents/test_orchestrator.py +161 -0

src/agents/orchestrator.py ADDED Viewed

	@@ -0,0 +1,108 @@

+"""Orchestrator agent: function-calling loop over a list of Tools.
+No agent framework — uses the openai SDK's chat-completions function-calling
+interface directly. This is the same SDK already used by src/llm/explainer.py,
+keeping the dependency surface minimal.
+Public entry: `Orchestrator(llm_client, tools, system_prompt, model).run(user_input)`.
+Returns an `AgentResult` with synthesized text + full tool-call trace.
+"""
+from __future__ import annotations
+import json
+from typing import Any
+from src.agents.schemas import AgentResult, ToolTraceItem
+from src.agents.tools import Tool
+from src.core.logger import get_logger
+logger = get_logger(__name__)
+class Orchestrator:
+    """Single-agent function-calling loop. Stops on (a) text response, (b) max steps."""
+    def __init__(
+        self,
+        llm_client: Any,
+        tools: list[Tool],
+        system_prompt: str,
+        model: str,
+        max_steps: int = 5,
+        temperature: float = 0.0,
+    ) -> None:
+        self._client = llm_client
+        self._tools_by_name = {t.name: t for t in tools}
+        self._tool_schemas = [t.openai_schema() for t in tools]
+        self._system_prompt = system_prompt
+        self._model = model
+        self._max_steps = max_steps
+        self._temperature = temperature
+    def run(self, user_input: str) -> AgentResult:
+        messages: list[dict[str, Any]] = [
+            {"role": "system", "content": self._system_prompt},
+            {"role": "user", "content": user_input},
+        ]
+        trace: list[ToolTraceItem] = []
+        for _step in range(self._max_steps):
+            response = self._client.chat.completions.create(
+                model=self._model,
+                messages=messages,
+                tools=self._tool_schemas,
+                tool_choice="auto",
+                temperature=self._temperature,
+            )
+            msg = response.choices[0].message
+            if not getattr(msg, "tool_calls", None):
+                return AgentResult(
+                    text=(msg.content or "").strip(),
+                    trace=trace,
+                    model=self._model,
+                    finish_reason="complete",
+                )
+            messages.append({
+                "role": "assistant",
+                "content": msg.content,
+                "tool_calls": [tc.model_dump() for tc in msg.tool_calls],
+            })
+            for tc in msg.tool_calls:
+                name = tc.function.name
+                tool = self._tools_by_name.get(name)
+                if tool is None:
+                    err = f"unknown tool: {name}"
+                    trace.append(ToolTraceItem(name=name, args={}, error=err))
+                    messages.append({
+                        "role": "tool",
+                        "tool_call_id": tc.id,
+                        "content": json.dumps({"error": err}),
+                    })
+                    continue
+                try:
+                    args = json.loads(tc.function.arguments or "{}")
+                    result = tool.invoke(args)
+                    trace.append(ToolTraceItem(name=name, args=args, result=result))
+                    messages.append({
+                        "role": "tool",
+                        "tool_call_id": tc.id,
+                        "content": json.dumps({"result": result}, default=str),
+                    })
+                except Exception as e:
+                    err = str(e)
+                    trace.append(ToolTraceItem(name=name, args={}, error=err))
+                    messages.append({
+                        "role": "tool",
+                        "tool_call_id": tc.id,
+                        "content": json.dumps({"error": err}),
+                    })
+        return AgentResult(
+            text="Max steps reached without a final answer.",
+            trace=trace,
+            model=self._model,
+            finish_reason="max_steps",
+        )

src/agents/prompts.py ADDED Viewed

	@@ -0,0 +1,49 @@

+"""System prompts for the orchestrator agent.
+Kept in a dedicated module so prompt edits are diff-readable and reviewable
+in isolation from the orchestrator loop.
+"""
+from __future__ import annotations
+ORCHESTRATOR_SYSTEM_PROMPT = """\
+You are the NeuroBridge clinical-ML orchestrator. You have four tools:
+- run_bbb_pipeline(smiles, top_k=5)         → for a SMILES molecular string
+- run_eeg_pipeline(input_path)               → for a .fif or .edf EEG file path
+- run_mri_pipeline(input_dir, sites_csv)     → for a directory of NIfTI MRI files
+- retrieve_context(query, k=4)               → for grounding chunks from the knowledge base
+Workflow — follow exactly:
+1. Look at the user input. Decide which ONE pipeline tool fits:
+   - SMILES (short, all-letters/digits, no slashes, no .ext)        → run_bbb_pipeline
+   - Path ending in .fif or .edf                                    → run_eeg_pipeline
+   - Path that is a directory (no file extension at the tail)       → run_mri_pipeline
+   If ambiguous, prefer SMILES if it parses; otherwise return:
+   "Cannot identify modality. Provide a SMILES, .fif/.edf path, or NIfTI directory."
+2. Call the chosen pipeline tool exactly once with the user input.
+3. After the pipeline returns, formulate ONE focused retrieval query that
+   captures the scientific concept behind the prediction (NOT the raw input).
+   Examples of good queries:
+   - "BBB permeability of small lipophilic molecules" (after BBB predict)
+   - "ICA artifact removal in multi-channel EEG" (after EEG run)
+   - "ComBat scanner site harmonization in multi-center MRI" (after MRI run)
+   Then call retrieve_context with that query.
+4. Synthesize a final response in 3-5 sentences:
+   - State the concrete pipeline result (label, confidence, key numbers).
+   - Cite at least one specific fact from the retrieved chunks (mention the
+     source file in parentheses, e.g. "(lipinski_rule_of_five.md)").
+   - Match the user's question language: Turkish in → Turkish out, etc.
+   - If retrieve_context returned 0 chunks, say so explicitly and answer
+     using only the pipeline result.
+Hard constraints:
+- Call exactly ONE pipeline tool, then exactly ONE retrieve_context, then stop.
+- Do NOT invent facts. Only use numbers from the pipeline tool output and
+  text from the retrieved chunks.
+- No preamble, no apologies, no meta-commentary about being an AI.
+"""

tests/agents/test_orchestrator.py ADDED Viewed

	@@ -0,0 +1,161 @@

+"""Tests for src.agents.orchestrator — agent loop with stubbed LLM client.
+We do NOT hit OpenRouter here. We construct a fake client that returns
+scripted tool-call responses, then verify the orchestrator dispatches
+tools and assembles the trace correctly.
+"""
+from __future__ import annotations
+import json
+from typing import Any
+from unittest.mock import MagicMock
+import pytest
+from pydantic import BaseModel
+from src.agents.orchestrator import Orchestrator
+from src.agents.tools import Tool
+# --- Helpers ----------------------------------------------------------------
+def _fake_choice_with_tool_call(name: str, args: dict[str, Any], call_id: str = "c1") -> Any:
+    msg = MagicMock()
+    msg.content = None
+    tc = MagicMock()
+    tc.id = call_id
+    tc.function.name = name
+    tc.function.arguments = json.dumps(args)
+    tc.model_dump = MagicMock(return_value={"id": call_id, "type": "function",
+                                            "function": {"name": name,
+                                                         "arguments": json.dumps(args)}})
+    msg.tool_calls = [tc]
+    choice = MagicMock()
+    choice.message = msg
+    response = MagicMock()
+    response.choices = [choice]
+    return response
+def _fake_choice_with_text(text: str) -> Any:
+    msg = MagicMock()
+    msg.content = text
+    msg.tool_calls = None
+    choice = MagicMock()
+    choice.message = msg
+    response = MagicMock()
+    response.choices = [choice]
+    return response
+class _PingInput(BaseModel):
+    msg: str
+class _PingOutput(BaseModel):
+    echo: str
+def _make_ping_tool() -> Tool:
+    return Tool(
+        name="ping",
+        description="Echo a string back.",
+        input_model=_PingInput,
+        output_model=_PingOutput,
+        execute=lambda inp: _PingOutput(echo=f"pong:{inp.msg}"),
+    )
+# --- Tests ------------------------------------------------------------------
+class TestOrchestrator:
+    def test_single_tool_then_text_response(self) -> None:
+        client = MagicMock()
+        client.chat.completions.create.side_effect = [
+            _fake_choice_with_tool_call("ping", {"msg": "hello"}),
+            _fake_choice_with_text("All done."),
+        ]
+        orch = Orchestrator(
+            llm_client=client,
+            tools=[_make_ping_tool()],
+            system_prompt="sys",
+            model="stub-model",
+            max_steps=4,
+        )
+        result = orch.run("test input")
+        assert result.text == "All done."
+        assert result.finish_reason == "complete"
+        assert len(result.trace) == 1
+        assert result.trace[0].name == "ping"
+        assert result.trace[0].args == {"msg": "hello"}
+        assert result.trace[0].result == {"echo": "pong:hello"}
+    def test_unknown_tool_recorded_as_error(self) -> None:
+        client = MagicMock()
+        client.chat.completions.create.side_effect = [
+            _fake_choice_with_tool_call("nonexistent_tool", {"x": 1}),
+            _fake_choice_with_text("Done."),
+        ]
+        orch = Orchestrator(
+            llm_client=client,
+            tools=[_make_ping_tool()],
+            system_prompt="sys",
+            model="stub-model",
+            max_steps=4,
+        )
+        result = orch.run("test")
+        assert result.trace[0].error is not None
+        assert "unknown tool" in result.trace[0].error
+        assert result.text == "Done."
+    def test_invalid_tool_args_recorded_as_error(self) -> None:
+        client = MagicMock()
+        client.chat.completions.create.side_effect = [
+            _fake_choice_with_tool_call("ping", {"wrong_field": "x"}),
+            _fake_choice_with_text("Recovered."),
+        ]
+        orch = Orchestrator(
+            llm_client=client,
+            tools=[_make_ping_tool()],
+            system_prompt="sys",
+            model="stub-model",
+            max_steps=4,
+        )
+        result = orch.run("test")
+        assert result.trace[0].error is not None
+        assert result.text == "Recovered."
+    def test_max_steps_exhausted_returns_finish_reason(self) -> None:
+        client = MagicMock()
+        # Always return another tool call — never terminates with text
+        client.chat.completions.create.side_effect = [
+            _fake_choice_with_tool_call("ping", {"msg": f"{i}"}, call_id=f"c{i}")
+            for i in range(10)
+        ]
+        orch = Orchestrator(
+            llm_client=client,
+            tools=[_make_ping_tool()],
+            system_prompt="sys",
+            model="stub-model",
+            max_steps=3,
+        )
+        result = orch.run("test")
+        assert result.finish_reason == "max_steps"
+        assert len(result.trace) == 3
+    def test_first_response_is_text_no_tools(self) -> None:
+        client = MagicMock()
+        client.chat.completions.create.side_effect = [
+            _fake_choice_with_text("Direct answer."),
+        ]
+        orch = Orchestrator(
+            llm_client=client,
+            tools=[_make_ping_tool()],
+            system_prompt="sys",
+            model="stub-model",
+        )
+        result = orch.run("trivial input")
+        assert result.text == "Direct answer."
+        assert result.trace == []