from __future__ import annotations

import argparse
import json
import os
import random
from dataclasses import dataclass
from typing import Any

import requests
from openai import OpenAI


TASKS = ["easy_docker", "medium_k8s", "hard_ml_config"]


@dataclass
class EpisodeResult:
    task_id: str
    final_score: float
    done: bool
    steps: int
    rewards: list[float]


def build_openai_client() -> OpenAI:
    api_key = os.getenv("OPENAI_API_KEY")
    if not api_key:
        raise RuntimeError("OPENAI_API_KEY is required for OpenAI baseline mode")
    return OpenAI(api_key=api_key)


def extract_json_object(text: str) -> dict[str, Any]:
    text = text.strip()
    if "```" in text:
        blocks = text.split("```")
        for block in blocks:
            block = block.strip()
            if block.startswith("json"):
                block = block[4:].strip()
            if block.startswith("{") and block.endswith("}"):
                return json.loads(block)
    start = text.find("{")
    end = text.rfind("}")
    if start != -1 and end != -1 and end > start:
        return json.loads(text[start : end + 1])
    raise ValueError("No JSON object found in model output")


def choose_heuristic_action(task_id: str, step: int) -> dict[str, Any]:
    # Deterministic policy for reproducible baseline.
    easy_plan = [
        {"operation": "edit", "path": "services.web.image", "value": "nginx:latest"},
        {"operation": "delete", "path": "services.web.ports.1"},
        {"operation": "edit", "path": "services.web.environment", "value": {"DEBUG": "true", "API_KEY": "placeholder"}},
        {"operation": "edit", "path": "services.db.ports.0", "value": "5432:5432"},
    ]

    medium_plan = [
        {"operation": "edit", "path": "metadata.namespace", "value": "default"},
        {"operation": "edit", "path": "spec.replicas", "value": 3},
        {"operation": "edit", "path": "spec.template.spec.containers.0.image", "value": "nginx:latest"},
        {"operation": "edit", "path": "spec.template.spec.containers.0.resources.limits.memory", "value": "512Mi"},
        {"operation": "edit", "path": "spec.template.spec.containers.0.resources.requests.memory", "value": "256Mi"},
        {"operation": "edit", "path": "spec.template.spec.containers.0.resources.requests.cpu", "value": "500m"},
        {"operation": "add", "path": "spec.template.spec.containers.0.ports", "value": [{"containerPort": 80}]},
    ]

    hard_plan = [
        {"operation": "delete", "path": "training.fp16"},
        {"operation": "edit", "path": "training.batch_size", "value": 16},
        {"operation": "edit", "path": "training.gradient_accumulation_steps", "value": 2},
        {"operation": "edit", "path": "training.max_steps", "value": 1000},
        {"operation": "edit", "path": "training.warmup_steps", "value": 100},
        {"operation": "edit", "path": "training.optimizer.type", "value": "adamw"},
        {"operation": "edit", "path": "hardware.gpu_count", "value": 1},
        {"operation": "edit", "path": "data.train_batch_size", "value": 32},
        {"operation": "edit", "path": "logging.log_interval", "value": 10},
    ]

    plans = {
        "easy_docker": easy_plan,
        "medium_k8s": medium_plan,
        "hard_ml_config": hard_plan,
    }
    plan = plans[task_id]
    return plan[min(step, len(plan) - 1)]


def choose_openai_action(client: OpenAI, model: str, observation: dict[str, Any]) -> dict[str, Any]:
    system_prompt = (
        "You are an environment-control agent for configuration debugging. "
        "Return exactly one JSON object action."
    )
    user_prompt = (
        "Task:\n"
        f"{observation['task_description']}\n\n"
        "Allowed schema:\n"
        "{\"operation\": \"edit|add|delete\", \"path\": \"dot.path\", \"value\": any|null}\n\n"
        f"Current score: {observation['overall_score']}\n"
        f"Validation errors: {observation['validation_errors']}\n"
        f"Current YAML:\n{observation['current_config']}\n"
    )

    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ],
        temperature=0,
        top_p=1,
        seed=42,
    )
    content = response.choices[0].message.content or ""
    return extract_json_object(content)


def run_episode(
    api_base_url: str,
    task_id: str,
    max_steps: int,
    policy: str,
    model: str,
    openai_client: OpenAI | None,
) -> EpisodeResult:
    reset_resp = requests.post(f"{api_base_url}/reset", json={"task_id": task_id}, timeout=30)
    reset_resp.raise_for_status()
    observation = reset_resp.json()["observation"]

    rewards: list[float] = []
    done = False

    print(f"[START] task={task_id} policy={policy}")

    for step in range(max_steps):
        if done:
            break

        if policy == "heuristic":
            action = choose_heuristic_action(task_id, step)
        else:
            assert openai_client is not None
            action = choose_openai_action(openai_client, model, observation)

        step_resp = requests.post(f"{api_base_url}/step", json=action, timeout=30)
        if step_resp.status_code != 200:
            rewards.append(0.0)
            print(f"[STEP] task={task_id} step={step} action=invalid reward=0.00 done=false")
            continue

        payload = step_resp.json()
        observation = payload["observation"]
        reward = payload["reward"]
        done = payload["done"]
        reward_value = float(reward["value"])
        rewards.append(reward_value)

        print(
            f"[STEP] task={task_id} step={step} action={action.get('operation')}:{action.get('path')} "
            f"reward={reward_value:.3f} score={observation['overall_score']:.3f} done={str(done).lower()}"
        )

    result = EpisodeResult(
        task_id=task_id,
        final_score=float(observation["overall_score"]),
        done=done,
        steps=min(max_steps, len(rewards)),
        rewards=rewards,
    )

    reward_text = ",".join(f"{v:.3f}" for v in rewards)
    print(
        f"[END] task={task_id} score={result.final_score:.3f} "
        f"steps={result.steps} done={str(result.done).lower()} rewards={reward_text}"
    )
    return result


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Baseline inference for ConfigDebuggerEnv")
    parser.add_argument("--api-base-url", default=os.getenv("API_BASE_URL", "http://localhost:8000"))
    parser.add_argument("--max-steps", type=int, default=12)
    parser.add_argument("--policy", choices=["heuristic", "openai"], default="heuristic")
    parser.add_argument("--model", default=os.getenv("OPENAI_MODEL", "gpt-4o-mini"))
    parser.add_argument("--seed", type=int, default=42)
    return parser.parse_args()


def main() -> None:
    args = parse_args()
    random.seed(args.seed)

    openai_client: OpenAI | None = None
    if args.policy == "openai":
        openai_client = build_openai_client()

    results: list[EpisodeResult] = []
    for task_id in TASKS:
        results.append(
            run_episode(
                api_base_url=args.api_base_url,
                task_id=task_id,
                max_steps=args.max_steps,
                policy=args.policy,
                model=args.model,
                openai_client=openai_client,
            )
        )

    avg = sum(r.final_score for r in results) / len(results)
    print("\n=== BASELINE SUMMARY ===")
    for result in results:
        print(
            f"{result.task_id}: final_score={result.final_score:.3f} steps={result.steps} done={str(result.done).lower()}"
        )
    print(f"average_score={avg:.3f}")


if __name__ == "__main__":
    main()