#!/usr/bin/env python3
"""Surrogate-1 V10 — feature-request extractor.

Read all 748 past Claude conversation .jsonl files in ~/.claude/projects/
and extract every moment where the user asked Claude to BUILD a feature
in Surrogate. These become FEATURE-DEMONSTRATION training pairs that
get distilled into Surrogate's weights — so the model itself knows how
to build the feature when asked again.

Format of output JSONL (consumed by trainer via merge_external):
  {
    "prompt": "<user feature request, made self-contained>",
    "response": "<concrete implementation: code/diff/spec/etc>",
    "kind": "feature-build",
    "source": "<session-id>",
    "feature_class": "<heuristic-tag>"
  }

Heuristic feature-class tags pulled from the user's actual asks:
  - "agent-coding-loop"   — autonomous coding agent / dev chain
  - "monitoring"          — 24/7 monitor, watchdog, observability
  - "training-technique"  — new technique to add to trainer (RL, DPO, etc.)
  - "orchestration"       — multi-agent, spawn/aggregate, parallel
  - "knowledge-ingest"    — distill X into model
  - "self-improvement"    — closed-loop improvement, flywheel
  - "release-pipeline"    — autonomous release, CI/CD, draft PR
  - "incident-response"   — auto-heal, diagnose, fix
  - "evaluation"          — bench, scoring, verdict
  - "cost-efficiency"     — frontier-style smarter-with-less
"""
from __future__ import annotations

import argparse
import json
import re
import sys
from pathlib import Path

PROJECTS = Path.home() / ".claude/projects"

# Heuristic patterns — match Thai + English request-style language
REQUEST_PATTERNS = [
    # Direct imperative "build/make/add/create X"
    r"\b(build|make|add|create|implement|wire|train|bake|ingest|distill)\s+\w+",
    # Thai imperatives
    r"(ทำ|สร้าง|เพิ่ม|implement|train|เทรน|fine-?tune|fine_tune)\s+",
    # Feature-shaped asks
    r"feature\s+",
    r"(can|please|let|let's|how about)\s+",
    # explicit "I want X" / "ผมอยาก"
    r"(I want|I need|I'd like|ผมอยาก|ผมต้องการ|อยาก|ต้องการ)",
    # research-asks that lead to features
    r"(go research|research|find out|หามา|ลองหา|วิจัย)",
]

FEATURE_CLASS_KEYWORDS = {
    "agent-coding-loop":  ["agent chain", "dev agent", "autonomous coding", "code 24", "spawn agent", "sub-?agent", "team agent"],
    "monitoring":         ["monitor", "watchdog", "observability", "smoke", "health check", "anomaly", "incident detect"],
    "training-technique": ["GRPO", "DPO", "DAPO", "ORPO", "KTO", "RLVR", "SimPO", "Constitutional AI", "TruthRL", "Mask-DPO", "PiSSA", "LoftQ", "CorDA", "DoRA", "RSLoRA", "NEFTune", "Spectrum", "Quiet-STaR", "Reflexion", "Voyager", "Magpie", "self-rewarding", "knowledge distillation", "DistillKit", "active learning", "CoT", "PRM", "verify"],
    "orchestration":      ["multi-agent", "spawn", "aggregate", "parallel", "orchestrat", "team", "shared context", "subagent", "dispatch"],
    "knowledge-ingest":   ["ingest", "distill", "corpus", "training data", "training pair", "Q&A", "vault", "obsidian", "memory"],
    "self-improvement":   ["self-improve", "flywheel", "improve over time", "เก่งขึ้น", "online RL", "continual learn"],
    "release-pipeline":   ["release", "draft PR", "CI/CD", "deploy", "MVP", "v1 v2", "v10000", "ship feature", "auto-release"],
    "incident-response":  ["incident", "auto-heal", "rollback", "diagnose", "patch", "remediate", "fix"],
    "evaluation":         ["bench", "eval", "score", "verdict", "rubric", "HumanEval", "MBPP", "BFCL", "SWE-Bench", "test", "วัดผล"],
    "cost-efficiency":    ["smarter with less", "frontier", "efficiency", "speculative", "MoE", "sliding window", "test-time compute", "ใช้ resource น้อย"],
    "role-persona":       ["SRE", "DevSecOps", "Full Stack", "PM", "PO", "SA", "BD", "QE", "AI Engineer", "Marketing", "role", "persona", "engineer"],
    "long-context":       ["long context", "32K", "64K", "128K", "1M", "YaRN", "RoPE", "context window"],
    "anti-hallucination": ["halluc", "หลอน", "factual", "truth", "correctness", "F-DPO", "TruthRL", "calibration"],
}


def classify(text: str) -> str:
    text_lower = text.lower()
    scores = {}
    for cls, kws in FEATURE_CLASS_KEYWORDS.items():
        for kw in kws:
            if kw.lower() in text_lower:
                scores[cls] = scores.get(cls, 0) + 1
    if not scores:
        return "general"
    return max(scores, key=scores.get)


def looks_like_feature_request(user_msg: str) -> bool:
    """True if the user message is asking for something to be built."""
    if len(user_msg) < 30:
        return False
    if len(user_msg) > 4000:
        # Very long messages are usually critique/anger, not feature requests
        # but they often contain feature requests inside
        pass
    for pat in REQUEST_PATTERNS:
        if re.search(pat, user_msg, re.I):
            return True
    return False


def extract_pairs_from_session(jsonl_path: Path) -> list[dict]:
    """For each (user, assistant) pair where the user asked for a feature,
    return a training-ready dict."""
    out = []
    last_user = None
    for L in jsonl_path.read_text(errors="replace").splitlines():
        try:
            ev = json.loads(L)
        except Exception:
            continue
        msg = ev.get("message", {})
        role = msg.get("role")
        content = msg.get("content", "")
        if isinstance(content, list):
            content = "\n".join(b.get("text", "") for b in content
                                 if isinstance(b, dict) and b.get("type") == "text")
        if not isinstance(content, str):
            content = str(content)
        if role == "user":
            last_user = content
        elif role == "assistant" and last_user:
            if looks_like_feature_request(last_user):
                # Make prompt self-contained — restate context briefly
                # Skip if assistant response is too short or just clarification
                if len(content) >= 100 and not content.startswith("?"):
                    out.append({
                        "prompt": last_user[:3000],
                        "response": content[:6000],
                        "kind": "feature-build",
                        "source": jsonl_path.stem,
                        "feature_class": classify(last_user + " " + content),
                    })
            last_user = None
    return out


def main() -> int:
    p = argparse.ArgumentParser()
    p.add_argument("--out", default=str(Path.home() / ".surrogate/state/v10-ingest/conversations/feature-builds.jsonl"))
    p.add_argument("--limit", type=int, default=0)
    args = p.parse_args()

    out_path = Path(args.out)
    out_path.parent.mkdir(parents=True, exist_ok=True)
    f_out = out_path.open("w")
    n_total = 0
    n_files = 0
    by_class = {}
    files = sorted(PROJECTS.rglob("*.jsonl"))
    if args.limit:
        files = files[:args.limit]
    print(f"scanning {len(files)} session files for feature-build pairs...")
    for fp in files:
        try:
            pairs = extract_pairs_from_session(fp)
        except Exception as e:
            sys.stderr.write(f"  skip {fp.name}: {e}\n")
            continue
        if not pairs:
            continue
        n_files += 1
        for j in pairs:
            f_out.write(json.dumps(j, ensure_ascii=False) + "\n")
            by_class[j["feature_class"]] = by_class.get(j["feature_class"], 0) + 1
            n_total += 1
        if n_files % 50 == 0:
            print(f"  scanned {n_files} sessions, extracted {n_total} feature-build pairs")
    f_out.close()
    print(f"\nDONE: {n_total} feature-build pairs from {n_files} sessions → {out_path}")
    print("By feature class:")
    for cls, n in sorted(by_class.items(), key=lambda x: -x[1]):
        print(f"  {n:5d}  {cls}")
    return 0


if __name__ == "__main__":
    sys.exit(main())