Agent-Q3 / hq /tandem_core.py
madDegen's picture
consolidate: HQ TandemCore — Gemma4 reasons, Qwen3.5 implements
69fc3dd verified
"""
Agent Q3 [HQ] — TandemCore
Gemma4-E4B (Reasoner) → Qwen3.5-4B (Coder) chained pipeline.
Step 1: Reasoner produces a structured plan.
Step 2: Coder implements the plan.
"""
import httpx, os
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434/v1/chat/completions")
REASONER = os.getenv("REASONER_MODEL", "gemma4:e4b-instruct-q4_K_M")
CODER = os.getenv("CODER_MODEL", "qwen3.5:4b-instruct-q4_K_M")
REASON_SYSTEM = (
"You are a senior architect and researcher. Given the user request, "
"produce a concise, structured plan (steps, constraints, edge cases) "
"that a coder can implement directly. Be precise. No code yet."
)
CODE_SYSTEM = (
"You are an expert software engineer. Implement the plan provided exactly. "
"Output production-quality code with inline comments. No explanation outside code blocks."
)
class TandemCore:
async def _call(self, model: str, system: str, messages: list) -> str:
payload = {
"model": model,
"messages": [{"role": "system", "content": system}] + messages,
}
async with httpx.AsyncClient(timeout=120) as client:
r = await client.post(OLLAMA_URL, json=payload)
r.raise_for_status()
return r.json()["choices"][0]["message"]["content"]
async def run(self, messages: list) -> dict:
plan = await self._call(REASONER, REASON_SYSTEM, messages)
plan_msg = {"role": "assistant", "content": f"[PLAN]\n{plan}"}
impl = await self._call(CODER, CODE_SYSTEM, messages + [plan_msg])
return {
"tandem": True,
"plan": plan,
"implementation": impl,
"choices": [{"message": {"role": "assistant", "content": impl}}]
}