"""
Agent Q3 [HQ] — TandemCore
Gemma4-E4B (Reasoner) → Qwen3.5-4B (Coder) chained pipeline.
Step 1: Reasoner produces a structured plan.
Step 2: Coder implements the plan.
"""
import httpx, os

OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434/v1/chat/completions")
REASONER   = os.getenv("REASONER_MODEL", "gemma4:e4b-instruct-q4_K_M")
CODER      = os.getenv("CODER_MODEL",    "qwen3.5:4b-instruct-q4_K_M")

REASON_SYSTEM = (
    "You are a senior architect and researcher. Given the user request, "
    "produce a concise, structured plan (steps, constraints, edge cases) "
    "that a coder can implement directly. Be precise. No code yet."
)
CODE_SYSTEM = (
    "You are an expert software engineer. Implement the plan provided exactly. "
    "Output production-quality code with inline comments. No explanation outside code blocks."
)

class TandemCore:
    async def _call(self, model: str, system: str, messages: list) -> str:
        payload = {
            "model": model,
            "messages": [{"role": "system", "content": system}] + messages,
        }
        async with httpx.AsyncClient(timeout=120) as client:
            r = await client.post(OLLAMA_URL, json=payload)
            r.raise_for_status()
            return r.json()["choices"][0]["message"]["content"]

    async def run(self, messages: list) -> dict:
        plan = await self._call(REASONER, REASON_SYSTEM, messages)
        plan_msg = {"role": "assistant", "content": f"[PLAN]\n{plan}"}
        impl = await self._call(CODER, CODE_SYSTEM, messages + [plan_msg])
        return {
            "tandem": True,
            "plan":   plan,
            "implementation": impl,
            "choices": [{"message": {"role": "assistant", "content": impl}}]
        }