"""
Inference script for testing the fine-tuned telecom intent model.
Loads LoRA adapters and generates network configurations from natural language intents.

Usage on Kaggle:
    python inference.py --intent "Deploy a low latency slice for autonomous drones in the harbor zone"

Or run with a file of intents:
    python inference.py --input_file intents.txt --output_file configs.json
"""

import argparse
import json
import os
import re
import sys

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

# ============================================================================
# CONFIGURATION
# ============================================================================

BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct"  # must match train.py
ADAPTER_PATH = "./qwen2.5-7b-telecom-intent-lora"  # output from train.py
MAX_NEW_TOKENS = 1024
TEMPERATURE = 0.1  # low for deterministic config generation
TOP_P = 0.95


def load_model(adapter_path: str, base_model: str):
    """Load base model + LoRA adapters."""
    adapter_path = os.path.abspath(adapter_path)
    if not os.path.isdir(adapter_path):
        print(f"ERROR: Adapter path not found: {adapter_path}")
        print("Run train.py first to generate adapters.")
        sys.exit(1)

    print(f"Loading base model: {base_model}")
    model = AutoModelForCausalLM.from_pretrained(
        base_model,
        dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True,
    )

    print(f"Loading LoRA adapters: {adapter_path}")
    model = PeftModel.from_pretrained(model, adapter_path)
    model.eval()

    tokenizer = AutoTokenizer.from_pretrained(
        base_model,
        trust_remote_code=True,
        padding_side="left",
    )
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    print("Model ready!")
    return model, tokenizer


def generate_config(model, tokenizer, intent_text: str) -> str:
    """Generate a network configuration from a natural language intent."""
    messages = [
        {
            "role": "system",
            "content": (
                "You are a 5G/6G network orchestrator. "
                "Given a natural language network intent, output a valid, "
                "spec-compliant JSON network configuration. "
                "Do not include any explanation — only the JSON configuration."
            ),
        },
        {"role": "user", "content": intent_text},
    ]

    prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
    )

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=MAX_NEW_TOKENS,
            temperature=TEMPERATURE,
            top_p=TOP_P,
            do_sample=True,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )

    generated = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract only the assistant's response (after the prompt)
    response = generated[len(prompt):].strip()

    # Try to extract JSON if wrapped in markdown
    json_match = re.search(r"```(?:json)?\s*(.*?)\s*```", response, re.DOTALL)
    if json_match:
        response = json_match.group(1)

    return response


def validate_json(text: str) -> tuple[bool, dict | None]:
    """Try to parse response as JSON. Returns (success, parsed)."""
    try:
        text = text.strip()
        start = text.find("{")
        end = text.rfind("}")
        if start != -1 and end != -1 and end > start:
            text = text[start:end + 1]
        parsed = json.loads(text)
        return True, parsed
    except json.JSONDecodeError:
        return False, None


def main():
    parser = argparse.ArgumentParser(description="Telecom Intent Inference")
    parser.add_argument(
        "--intent",
        type=str,
        default=None,
        help="Single natural language intent string",
    )
    parser.add_argument(
        "--input_file",
        type=str,
        default=None,
        help="File with one intent per line",
    )
    parser.add_argument(
        "--output_file",
        type=str,
        default="generated_configs.json",
        help="Output JSON file for batch results",
    )
    parser.add_argument(
        "--adapter_path",
        type=str,
        default=ADAPTER_PATH,
        help="Path to LoRA adapters",
    )
    parser.add_argument(
        "--base_model",
        type=str,
        default=BASE_MODEL,
        help="Base model name",
    )
    args = parser.parse_args()

    model, tokenizer = load_model(args.adapter_path, args.base_model)

    intents = []
    if args.intent:
        intents = [args.intent]
    elif args.input_file:
        with open(args.input_file, "r") as f:
            intents = [line.strip() for line in f if line.strip()]
    else:
        # Interactive mode
        print("\nInteractive mode. Type 'quit' to exit.")
        while True:
            user_input = input("\nIntent> ")
            if user_input.lower() in ("quit", "exit", "q"):
                break
            config = generate_config(model, tokenizer, user_input)
            is_valid, parsed = validate_json(config)
            print(f"\n{'=' * 60}")
            print(f"Generated Config (valid={is_valid}):")
            print(f"{'=' * 60}")
            if is_valid:
                print(json.dumps(parsed, indent=2))
            else:
                print(config)
        return

    # Batch processing
    results = []
    valid_count = 0
    for i, intent in enumerate(intents):
        print(f"\n[{i + 1}/{len(intents)}] Processing: {intent[:80]}...")
        config = generate_config(model, tokenizer, intent)
        is_valid, parsed = validate_json(config)
        if is_valid:
            valid_count += 1

        results.append({
            "intent": intent,
            "generated_config": parsed if is_valid else config,
            "json_valid": is_valid,
        })

    # Save results
    with open(args.output_file, "w") as f:
        json.dump(results, f, indent=2)

    print(f"\n{'=' * 60}")
    print(f"Batch complete: {valid_count}/{len(intents)} valid JSON configs")
    print(f"Results saved to: {args.output_file}")


if __name__ == "__main__":
    main()