"""
Bloom's Level: Analyze
Compare or contrast information from buried text.
"""
import logging
import os
import random
import time
from typing import List, Dict, Any

from tqdm import tqdm

from src.generator import generate_text
from src.metrics import exact_match_score, compute_accuracy
from src.utils import ensure_dir, save_jsonl, save_json

logger = logging.getLogger(__name__)

FILLERS = [
    "The museum houses artifacts from the ancient world.",
    "Coral reefs support diverse marine ecosystems.",
    "Railway gauges vary between countries.",
]

COMPARE_PAIRS = [
    ("Solar power is renewable but intermittent.", "Coal is reliable but polluting.", "solar is cleaner"),
    ("Online learning is flexible.", "Classroom learning is interactive.", "online is more flexible"),
    ("Electric cars have zero emissions.", "Gas cars have longer range.", "electric has zero emissions"),
]


def run_analyze(
    model_name: str,
    num_sentences: int,
    num_examples: int,
    out_dir: str,
    depths: List[float] = None,
) -> Dict[str, Any]:
    ensure_dir(out_dir)
    if depths is None:
        depths = [0.0, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 1.0]

    results = {}
    start = time.time()

    for depth in depths:
        logger.info(f"[ANALYZE] Depth {depth:.1%}")
        preds = []
        for _ in tqdm(range(num_examples), desc=f"Analyze {depth:.1%}", leave=False):
            sents = [random.choice(FILLERS) for _ in range(num_sentences)]
            stmt_a, stmt_b, expected_comparison = random.choice(COMPARE_PAIRS)

            # Place statements at positions related to depth
            idx_a = int(depth * len(sents))
            idx_b = (idx_a + 10) % len(sents)
            sents.insert(idx_a, f"Statement A: {stmt_a}")
            if idx_b >= len(sents):
                sents.append(f"Statement B: {stmt_b}")
            else:
                sents.insert(idx_b, f"Statement B: {stmt_b}")

            doc = " ".join(sents)
            prompt = (
                f"Read the text and compare Statement A and Statement B.\n\n"
                f"{doc}\n\n"
                f"Which statement represents the better option? Explain in one sentence."
            )
            ans = generate_text(
                [{"role": "user", "content": prompt}],
                model_name=model_name,
                max_new_tokens=40,
            )
            # Check if expected comparison keywords appear
            correct = 1.0 if any(kw in ans.lower() for kw in expected_comparison.split()) else 0.0
            preds.append({
                "model_answer": ans,
                "correct": correct,
                "expected": expected_comparison,
                "depth": depth,
            })

        save_jsonl(os.path.join(out_dir, f"analyze_depth_{depth}.jsonl"), preds)
        acc = compute_accuracy(preds)
        results[depth] = {"accuracy": acc, "predictions": preds}
        logger.info(f"[ANALYZE] Depth {depth:.1%}: acc={acc:.3f}")

    summary = {
        "experiment": "analyze",
        "cognitive_level": "analyze",
        "num_sentences": num_sentences,
        "num_examples": num_examples,
        "depths": {str(d): results[d]["accuracy"] for d in depths},
        "time_minutes": (time.time() - start) / 60,
    }
    save_json(os.path.join(out_dir, "analyze_summary.json"), summary)
    logger.info(f"[ANALYZE] Time={(time.time()-start)/60:.1f} min")
    return summary