"""
Cost calculation module for MLPerf configurations.
"""

import logging

import pandas as pd

logger = logging.getLogger(__name__)

DEFAULT_HOURLY_COST = 1.0

DEFAULT_DEVICE_COSTS = {
    "NVIDIA H100": 3.00,
    "NVIDIA H200": 4.00,
    "NVIDIA GH200": 5.00,
    "NVIDIA B200/GB200": 7.00,
    "AMD MI300X": 3.50,
    "AMD MI325X": 4.50,
    "NVIDIA RTX 4090": 1.20,
    "NVIDIA L40S": 1.80,
    "NVIDIA Jetson AGX": 0.30,
}

device_costs = {}


def normalize_gpu_name(name: str) -> str:
    """Normalize GPU names by identifying common patterns for the same device families."""
    if not name:
        return name

    name_upper = name.upper()

    gpu_families = {
        "H100": "NVIDIA H100",
        "H200": "NVIDIA H200",
        "GH200": "NVIDIA GH200",
        "GRACE HOPPER": "NVIDIA GH200",
        "B200": "NVIDIA B200/GB200",
        "GB200": "NVIDIA B200/GB200",
        "MI300X": "AMD MI300X",
        "MI325X": "AMD MI325X",
        "RTX 4090": "NVIDIA RTX 4090",
        "L40S": "NVIDIA L40S",
    }

    if "JETSON" in name_upper and ("ORIN" in name_upper or "THOR" in name_upper):
        return "NVIDIA Jetson AGX"

    for keyword, normalized_name in gpu_families.items():
        if keyword in name_upper:
            return normalized_name

    return name


def initialize_device_costs(df: pd.DataFrame) -> None:
    """Initialize device costs from dataset with default values."""
    global device_costs

    accelerators = set()

    if df is not None and not df.empty and "system.accelerator.name" in df.columns:
        for acc in df["system.accelerator.name"].dropna().unique():
            normalized_name = normalize_gpu_name(acc)
            accelerators.add(normalized_name)

    device_costs = {}
    for device in accelerators:
        if device in DEFAULT_DEVICE_COSTS:
            device_costs[device] = DEFAULT_DEVICE_COSTS[device]
        else:
            device_costs[device] = DEFAULT_HOURLY_COST

    logger.info(f"Initialized costs for {len(device_costs)} unique device families")


def get_device_costs() -> dict[str, float]:
    """Return a copy of the current device costs."""
    return device_costs.copy()


def update_device_costs(new_costs: dict[str, float]) -> None:
    """Update device costs with new values."""
    global device_costs
    device_costs.update(new_costs)
    logger.info(f"Updated costs for {len(new_costs)} devices")


def calculate_costs(df: pd.DataFrame) -> pd.DataFrame:
    """Add cost metrics to the DataFrame."""
    if df is None or df.empty:
        return df

    result_df = df.copy()

    result_df["hourly_cost"] = None
    result_df["cost_per_million_tokens"] = None

    for idx, row in result_df.iterrows():
        hourly_cost = estimate_hourly_cost(row)
        result_df.at[idx, "hourly_cost"] = hourly_cost

        if hourly_cost and "metrics.result" in row and row["metrics.result"]:
            tokens_per_hour = row["metrics.result"] * 3600
            if tokens_per_hour > 0:
                cost_per_million = (hourly_cost / tokens_per_hour) * 1000000
                result_df.at[idx, "cost_per_million_tokens"] = cost_per_million

    return result_df


def estimate_hourly_cost(row: pd.Series) -> float:
    """Estimate hourly cost for a single configuration."""
    try:
        acc_name = row.get("system.accelerator.name")
        acc_vendor = row.get("system.accelerator.vendor")
        acc_count = row.get("system.accelerator.total_count")

        if not acc_count:
            return None

        base_cost = DEFAULT_HOURLY_COST

        if acc_name:
            normalized_name = normalize_gpu_name(acc_name)
            if normalized_name in device_costs:
                base_cost = device_costs[normalized_name]
            elif acc_vendor and acc_vendor in device_costs:
                base_cost = device_costs[acc_vendor]

        return base_cost * acc_count

    except Exception as e:
        logger.warning(f"Error calculating cost: {e}")
        return None