| """ |
| Agent Q3 [HQ] — TandemCore |
| Gemma4-E4B (Reasoner) → Qwen3.5-4B (Coder) chained pipeline. |
| Step 1: Reasoner produces a structured plan. |
| Step 2: Coder implements the plan. |
| """ |
| import httpx, os |
|
|
| OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434/v1/chat/completions") |
| REASONER = os.getenv("REASONER_MODEL", "gemma4:e4b-instruct-q4_K_M") |
| CODER = os.getenv("CODER_MODEL", "qwen3.5:4b-instruct-q4_K_M") |
|
|
| REASON_SYSTEM = ( |
| "You are a senior architect and researcher. Given the user request, " |
| "produce a concise, structured plan (steps, constraints, edge cases) " |
| "that a coder can implement directly. Be precise. No code yet." |
| ) |
| CODE_SYSTEM = ( |
| "You are an expert software engineer. Implement the plan provided exactly. " |
| "Output production-quality code with inline comments. No explanation outside code blocks." |
| ) |
|
|
| class TandemCore: |
| async def _call(self, model: str, system: str, messages: list) -> str: |
| payload = { |
| "model": model, |
| "messages": [{"role": "system", "content": system}] + messages, |
| } |
| async with httpx.AsyncClient(timeout=120) as client: |
| r = await client.post(OLLAMA_URL, json=payload) |
| r.raise_for_status() |
| return r.json()["choices"][0]["message"]["content"] |
|
|
| async def run(self, messages: list) -> dict: |
| plan = await self._call(REASONER, REASON_SYSTEM, messages) |
| plan_msg = {"role": "assistant", "content": f"[PLAN]\n{plan}"} |
| impl = await self._call(CODER, CODE_SYSTEM, messages + [plan_msg]) |
| return { |
| "tandem": True, |
| "plan": plan, |
| "implementation": impl, |
| "choices": [{"message": {"role": "assistant", "content": impl}}] |
| } |
|
|