import os
import requests
from openai import OpenAI

BASE_URL = os.environ.get("BASE_URL", "https://kevanthonyp-it-support-triage.hf.space")

client = OpenAI(
    base_url=os.environ["API_BASE_URL"],
    api_key=os.environ["API_KEY"],
)

TASKS = ["task_easy", "task_medium", "task_hard"]

def agent_policy(observation):
    prompt = f"""You are an IT support triage agent. Given this support ticket observation, respond with a JSON object containing:
- category: one of "security", "hardware", "software", "network"
- priority: one of "high", "medium", "low"
- response: a brief action to resolve the issue

Observation: {observation}

Respond with only valid JSON, no markdown."""

    completion = client.chat.completions.create(
        model="openai/gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=200,
    )

    import json
    try:
        return json.loads(completion.choices[0].message.content)
    except Exception:
        return {
            "category": "software",
            "priority": "low",
            "response": "Reinstall app."
        }

for task in TASKS:
    print(f"[START] task={task}", flush=True)
    try:
        res = requests.post(f"{BASE_URL}/reset", json={"task_id": task}, timeout=30)
        res.raise_for_status()
        state = res.json()
        observation = state.get("observation", {})
        done = False
        step = 0
        total_reward = 0

        while not done:
            step += 1
            action = agent_policy(observation)
            res = requests.post(f"{BASE_URL}/step", json={"action": action}, timeout=30)
            res.raise_for_status()
            data = res.json()
            reward = data.get("reward", 0)
            done = data.get("done", True)
            observation = data.get("observation", {})
            total_reward += reward
            print(f"[STEP] step={step} reward={reward}", flush=True)

        print(f"[END] task={task} score={total_reward} steps={step}", flush=True)

    except Exception as e:
        print(f"[STEP] step=1 reward=0", flush=True)
        print(f"[END] task={task} score=0 steps=1", flush=True)