File size: 4,276 Bytes
ea561c8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/env python3
"""Mixture-of-Agents (MoA) consensus β€” 3 LLMs propose, 1 LLM judges + synthesizes.

Used by orchestrate's `--consensus` mode (ENABLE_MOA=1) for critical stages
(DEV implementation, REVIEWER verdict). Trades 4Γ— cost for higher quality.

Usage from bash:
    python3 ~/.surrogate/bin/moa-consensus.py <prompt_file> [stage]
Reads prompt from file, returns synthesized response on stdout.
"""
from __future__ import annotations
import sys, os, json, urllib.request, urllib.error
from pathlib import Path

PROPOSERS = [
    ("cerebras-llama-70b", "https://api.cerebras.ai/v1/chat/completions", "llama-3.3-70b", "CEREBRAS_API_KEY"),
    ("groq-llama-70b", "https://api.groq.com/openai/v1/chat/completions", "llama-3.3-70b-versatile", "GROQ_API_KEY"),
    ("hf-router-deepseek", "https://router.huggingface.co/v1/chat/completions", "deepseek-ai/DeepSeek-V3.1-Terminus", "HF_TOKEN"),
]
JUDGE = ("hf-router-qwen3-coder-480b", "https://router.huggingface.co/v1/chat/completions",
         "Qwen/Qwen3-Coder-480B-A35B-Instruct", "HF_TOKEN")


def call_oai(url: str, model: str, key: str, prompt: str, temperature: float = 0.4, max_tokens: int = 6000) -> str:
    body = {"model": model, "messages": [{"role":"user","content":prompt}],
            "temperature": temperature, "max_tokens": max_tokens}
    headers = {"Content-Type":"application/json", "Authorization": f"Bearer {key}"}
    if "openrouter" in url or "router.huggingface" in url:
        headers["HTTP-Referer"] = "https://axentx.ai"
    req = urllib.request.Request(url, data=json.dumps(body).encode(), headers=headers)
    with urllib.request.urlopen(req, timeout=120) as r:
        return json.load(r)["choices"][0]["message"]["content"]


def main() -> int:
    if len(sys.argv) < 2:
        print("usage: moa-consensus.py <prompt_file> [stage]", file=sys.stderr); return 2
    prompt = Path(sys.argv[1]).read_text()
    stage = sys.argv[2] if len(sys.argv) > 2 else "general"

    # Round 1: 3 proposers in parallel via threading
    import concurrent.futures as cf
    proposals: dict[str, str] = {}
    with cf.ThreadPoolExecutor(max_workers=3) as ex:
        futures = {}
        for name, url, model, key_env in PROPOSERS:
            key = os.environ.get(key_env)
            if not key: continue
            futures[ex.submit(call_oai, url, model, key, prompt, 0.5)] = name
        for fut in cf.as_completed(futures, timeout=180):
            name = futures[fut]
            try:
                proposals[name] = fut.result()
                print(f"# {name}: {len(proposals[name])} chars", file=sys.stderr)
            except Exception as e:
                print(f"# {name}: FAIL {type(e).__name__}: {e}", file=sys.stderr)

    if not proposals:
        print("ERR: all proposers failed", file=sys.stderr); return 3
    if len(proposals) == 1:
        # Only one succeeded β†’ just return it
        sys.stdout.write(next(iter(proposals.values())))
        return 0

    # Round 2: judge synthesizes best answer from all proposals
    judge_prompt = f"""You are the SYNTHESIS JUDGE. {len(proposals)} expert agents proposed answers to this task.
Evaluate each, then output a SINGLE final answer that combines the best ideas.
Do NOT just pick one β€” synthesize across them. Output the answer directly, no preamble.

=== TASK ===
{prompt[:6000]}

"""
    for i, (name, text) in enumerate(proposals.items(), 1):
        judge_prompt += f"\n=== PROPOSAL {i} (from {name}) ===\n{text[:6000]}\n"
    judge_prompt += "\n=== YOUR SYNTHESIZED ANSWER ===\n"

    judge_key = os.environ.get(JUDGE[3])
    if not judge_key:
        # No judge key β†’ return best-effort: longest proposal
        sys.stdout.write(max(proposals.values(), key=len))
        return 0
    try:
        synthesized = call_oai(JUDGE[1], JUDGE[2], judge_key, judge_prompt, 0.3, 8000)
        sys.stdout.write(synthesized)
        print(f"# judge ({JUDGE[0]}): synthesized {len(synthesized)} chars from {len(proposals)} proposals", file=sys.stderr)
        return 0
    except Exception as e:
        print(f"# judge FAIL {type(e).__name__}: {e}", file=sys.stderr)
        # Fallback: longest
        sys.stdout.write(max(proposals.values(), key=len))
        return 0


if __name__ == "__main__":
    sys.exit(main())