""" Agent Q3 [HQ] — TandemCore Gemma4-E4B (Reasoner) → Qwen3.5-4B (Coder) chained pipeline. Step 1: Reasoner produces a structured plan. Step 2: Coder implements the plan. """ import httpx, os OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434/v1/chat/completions") REASONER = os.getenv("REASONER_MODEL", "gemma4:e4b-instruct-q4_K_M") CODER = os.getenv("CODER_MODEL", "qwen3.5:4b-instruct-q4_K_M") REASON_SYSTEM = ( "You are a senior architect and researcher. Given the user request, " "produce a concise, structured plan (steps, constraints, edge cases) " "that a coder can implement directly. Be precise. No code yet." ) CODE_SYSTEM = ( "You are an expert software engineer. Implement the plan provided exactly. " "Output production-quality code with inline comments. No explanation outside code blocks." ) class TandemCore: async def _call(self, model: str, system: str, messages: list) -> str: payload = { "model": model, "messages": [{"role": "system", "content": system}] + messages, } async with httpx.AsyncClient(timeout=120) as client: r = await client.post(OLLAMA_URL, json=payload) r.raise_for_status() return r.json()["choices"][0]["message"]["content"] async def run(self, messages: list) -> dict: plan = await self._call(REASONER, REASON_SYSTEM, messages) plan_msg = {"role": "assistant", "content": f"[PLAN]\n{plan}"} impl = await self._call(CODER, CODE_SYSTEM, messages + [plan_msg]) return { "tandem": True, "plan": plan, "implementation": impl, "choices": [{"message": {"role": "assistant", "content": impl}}] }