"""LLM-based claim extractor for factual fidelity scoring.

Extracts structured factual claims from an agent's answer so they can be
verified against the card database. Uses an OpenAI-compatible API (supports
OpenRouter, OpenAI, or any compatible endpoint).
"""

from __future__ import annotations

import json
import logging
from typing import Any

logger = logging.getLogger(__name__)

EXTRACTION_PROMPT = """\
Extract all factual claims about credit cards from the following agent answer.
Return a JSON object with this exact structure:

{
  "per_card_claims": [
    {
      "card_name": "exact card name as mentioned",
      "claimed_annual_fee": <number or null if not mentioned>,
      "claimed_earning_rates": {"category": rate_as_number, ...},
      "claimed_credits": [{"name": "credit_name", "claimed_value": <annual dollar value>}],
      "claimed_perks": [{"name": "perk_name", "claimed_value": <annual dollar value>}],
      "claimed_signup_bonus_points": <number or null if not mentioned>,
      "claimed_signup_bonus_value_usd": <number or null if not mentioned>
    }
  ]
}

Rules:
- Only extract explicit numerical claims (earning rates, fees, dollar values, point amounts)
- For earning rates, use the multiplier (e.g., "4x on dining" → {"dining": 4})
- For credits, use annual value
  (e.g., "$10/month Uber credit" → {"name": "uber_cash", "claimed_value": 120})
- Normalize credit/perk names to snake_case
  (e.g., "Uber Cash" → "uber_cash", "airline fee credit" → "airline_fee_credit")
- If a card is mentioned but no numerical claims are made about it, skip it
- Do not infer or calculate — only extract what is explicitly stated
- Return ONLY the JSON object, no other text

Agent answer:
"""


class ClaimExtractor:
    """Extracts structured claims from agent answers using an LLM.

    Uses the OpenAI-compatible API format, which works with OpenRouter,
    OpenAI, and other compatible providers.
    """

    def __init__(
        self,
        api_key: str,
        model: str = "anthropic/claude-haiku-4.5",
        base_url: str = "https://openrouter.ai/api/v1",
    ) -> None:
        import httpx

        self.model = model
        self.client = httpx.Client(
            base_url=base_url,
            headers={
                "Authorization": f"Bearer {api_key}",
                "Content-Type": "application/json",
            },
            timeout=30.0,
        )

    def extract_claims(self, agent_answer: str) -> dict[str, Any] | None:
        """Extract factual claims from an agent's answer.

        Returns the structured claims dict, or None if extraction fails.
        """
        try:
            resp = self.client.post(
                "/chat/completions",
                json={
                    "model": self.model,
                    "max_tokens": 2000,
                    "temperature": 0,
                    "messages": [
                        {
                            "role": "user",
                            "content": EXTRACTION_PROMPT + agent_answer,
                        }
                    ],
                },
            )
            resp.raise_for_status()
            text = resp.json()["choices"][0]["message"]["content"].strip()

            # Strip markdown code fences if present
            if text.startswith("```"):
                lines = text.split("\n")
                lines = [ln for ln in lines if not ln.strip().startswith("```")]
                text = "\n".join(lines)

            claims = json.loads(text)
            if not isinstance(claims, dict) or "per_card_claims" not in claims:
                logger.warning("LLM returned invalid claims structure")
                return None
            return claims

        except json.JSONDecodeError as e:
            logger.warning("Failed to parse LLM claims output as JSON: %s", e)
            return None
        except Exception as e:
            logger.warning("Claim extraction failed: %s", e)
            return None