from pathlib import Path
from typing import Dict

import numpy as np
import json

from fairchem.data.omol.modules.evaluator import (
    ligand_pocket,
    ligand_strain,
    geom_conformers,
    protonation_energies,
    unoptimized_ie_ea,
    distance_scaling,
    unoptimized_spin_gap,
)

from evaluator_utils import get_order


class SubmissionLoadError(Exception):
    """Raised if unable to load the submission file."""


OMOL_EVAL_FUNCTIONS = {
    "Ligand pocket": ligand_pocket,
    "Ligand strain": ligand_strain,
    "Conformers": geom_conformers,
    "Protonation": protonation_energies,
    "IE_EA": unoptimized_ie_ea,
    "Distance scaling": distance_scaling,
    "Spin gap": unoptimized_spin_gap,
}

OMOL_DATA_ID_MAPPING = {
    "metal_complexes": ["metal_complexes"],
    "electrolytes": ["elytes"],
    "biomolecules": ["biomolecules"],
    "neutral_organics": ["ani2x", "orbnet_denali", "geom_orca6", "trans1x", "rgd"],
}

# OC20 subsplit mappings
OC20_DATA_ID_MAPPING = {
    "id": ["id"],
    "ood_ads": ["ood_ads"],
    "ood_cat": ["ood_cat"],
    "ood_both": ["ood_both"],
}


def omol_s2ef_metrics(
    annotations_path: Path,
    submission_filename: Path,
    subsets: list = ["all"],
) -> Dict[str, float]:
    try:
        with np.load(submission_filename) as data:
            submission_ids = data["ids"]
    except Exception as e:
        raise SubmissionLoadError(
            "Error loading submission file. 'ids' must not be object types."
        ) from e
    with np.load(annotations_path, allow_pickle=True) as data:
        annotations_ids = data["ids"]

    order = get_order(annotations_ids, submission_ids)

    try:
        with np.load(submission_filename) as pred_data:
            forces = pred_data["forces"]
            energy = pred_data["energy"][order]
            forces = np.array(
                np.split(forces, np.cumsum(pred_data["natoms"])[:-1]), dtype=object
            )[order]
    except Exception as e:
        raise SubmissionLoadError(
            "Error loading submission data. Make sure you concatenated your forces and there are no object types."
        ) from e

    if len(set(np.where(np.isinf(energy))[0])) != 0:
        inf_energy_ids = list(set(np.where(np.isinf(energy))[0]))
        raise Exception(
            f"Inf values found in `energy` for IDs: ({inf_energy_ids[:3]}, ...)"
        )

    with np.load(annotations_path, allow_pickle=True) as target_data:
        target_forces = target_data["forces"]
        target_energy = target_data["energy"]
        target_data_ids = target_data["data_ids"]

    metrics = {}
    for subset in subsets:
        if subset == "all":
            subset_mask = np.ones(len(target_data_ids), dtype=bool)
        else:
            allowed_ids = set(OMOL_DATA_ID_MAPPING.get(subset, []))
            subset_mask = np.array(
                [data_id in allowed_ids for data_id in target_data_ids]
            )

        sub_energy = energy[subset_mask]
        sub_target_energy = target_energy[subset_mask]
        energy_mae = np.mean(np.abs(sub_target_energy - sub_energy))
        metrics[f"{subset}_energy_mae"] = energy_mae

        forces_mae = 0
        natoms = 0
        for sub_forces, sub_target_forces in zip(
            forces[subset_mask], target_forces[subset_mask]
        ):
            forces_mae += np.sum(np.abs(sub_target_forces - sub_forces))
            natoms += sub_forces.shape[0]
        forces_mae /= 3 * natoms

        metrics[f"{subset}_forces_mae"] = forces_mae

    return metrics


def omol_evaluations(
    annotations_path: Path,
    submission_filename: Path,
    eval_type: str,
) -> Dict[str, float]:
    try:
        with open(submission_filename) as f:
            submission_data = json.load(f)
    except Exception as e:
        raise SubmissionLoadError("Error loading submission file") from e

    with open(annotations_path) as f:
        annotations_data = json.load(f)

    submission_entries = set(submission_data.keys())
    annotation_entries = set(annotations_data.keys())
    if submission_entries != annotation_entries:
        missing = annotation_entries - submission_entries
        unexpected = submission_entries - annotation_entries
        raise ValueError(
            f"Submission and annotations entries do not match.\n"
            f"Missing entries in submission: {missing}\n"
            f"Unexpected entries in submission: {unexpected}"
        )

    assert len(submission_entries) == len(
        submission_data
    ), "Duplicate entries found in submission."

    eval_fn = OMOL_EVAL_FUNCTIONS.get(eval_type)
    metrics = eval_fn(annotations_data, submission_data)
    return metrics


def oc_s2ef_metrics(
    annotations_path: Path,
    submission_filename: Path,
    subsets: list = ["id", "ood_ads", "ood_cat", "ood_both"],
) -> Dict[str, float]:
    """
    Calculate S2EF metrics for OC datasets.
    """
    metrics = {}
    for split in subsets:
        try:
            with np.load(submission_filename) as data:
                submission_ids = data[f"{split}_ids"]
        except Exception as e:
            raise SubmissionLoadError(
                f"Error loading submission file. '{split}_ids' must not be object types."
            ) from e
        with np.load(annotations_path, allow_pickle=True) as data:
            annotations_ids = data[f"{split}_ids"]

        order = get_order(annotations_ids, submission_ids)

        try:
            with np.load(submission_filename) as pred_data:
                forces = pred_data[f"{split}_forces"]
                energy = pred_data[f"{split}_energy"][order]
                forces = np.array(
                    np.split(forces, pred_data[f"{split}_chunk_idx"]), dtype=object
                )[order]
        except Exception as e:
            raise SubmissionLoadError(
                "Error loading submission data. Make sure you concatenated your forces and there are no object types."
            ) from e

        if len(set(np.where(np.isinf(energy))[0])) != 0:
            inf_energy_ids = list(set(np.where(np.isinf(energy))[0]))
            raise Exception(
                f"Inf values found in `energy` for IDs: ({inf_energy_ids[:3]}, ...)"
            )

        with np.load(annotations_path, allow_pickle=True) as target_data:
            target_forces = target_data[f"{split}_forces"]
            target_energy = target_data[f"{split}_energy"]

        energy_mae = np.mean(np.abs(target_energy - energy.flatten()))
        metrics[f"{split}_energy_mae"] = energy_mae

        forces_mae = 0
        natoms = 0
        for sub_forces, sub_target_forces in zip(
            forces, target_forces
        ):
            forces_mae += np.sum(np.abs(sub_target_forces - sub_forces))
            natoms += sub_forces.shape[0]
        forces_mae /= 3 * natoms

        metrics[f"{split}_forces_mae"] = forces_mae
    
    # Compute average across all subsplits
    energy_maes = [metrics[f"{s}_energy_mae"] for s in subsets]
    forces_maes = [metrics[f"{s}_forces_mae"] for s in subsets]
    metrics["avg_energy_mae"] = np.mean(energy_maes)
    metrics["avg_forces_mae"] = np.mean(forces_maes)

    return metrics


def oc_is2re_metrics(
    annotations_path: Path,
    submission_filename: Path,
    subsets: list = ["id", "ood_ads", "ood_cat", "ood_both"],
) -> Dict[str, float]:
    """
    Calculate IS2RE metrics for OC dataset.
    """
    metrics = {}
    for split in subsets:
        try:
            with np.load(submission_filename) as data:
                submission_ids = data[f"{split}_ids"]
        except Exception as e:
            raise SubmissionLoadError(
                f"Error loading submission file. '{split}_ids' must not be object types."
            ) from e
        with np.load(annotations_path, allow_pickle=True) as data:
            annotations_ids = data[f"{split}_ids"]

        order = get_order(annotations_ids, submission_ids)

        try:
            with np.load(submission_filename) as pred_data:
                energy = pred_data[f"{split}_energy"][order]
        except Exception as e:
            raise SubmissionLoadError(
                "Error loading submission data."
            ) from e

        if len(set(np.where(np.isinf(energy))[0])) != 0:
            inf_energy_ids = list(set(np.where(np.isinf(energy))[0]))
            raise Exception(
                f"Inf values found in `energy` for IDs: ({inf_energy_ids[:3]}, ...)"
            )

        with np.load(annotations_path, allow_pickle=True) as target_data:
            target_energy = target_data[f"{split}_energy"]

        energy_mae = np.mean(np.abs(target_energy - energy.flatten()))
        metrics[f"{split}_energy_mae"] = energy_mae

    # Compute average across all subsplits
    energy_maes = [metrics[f"{s}_energy_mae"] for s in subsets]
    metrics["avg_energy_mae"] = np.mean(energy_maes)

    return metrics


def evaluate(
    annotations_path: Path,
    submission_filename: Path,
    eval_type: str,
):
    if eval_type in ["Validation", "Test"]:
        metrics = omol_s2ef_metrics(
            annotations_path,
            submission_filename,
            subsets=[
                "all",
                "metal_complexes",
                "electrolytes",
                "biomolecules",
                "neutral_organics",
            ],
        )
    elif eval_type == "OC20 S2EF Test":
        metrics = oc_s2ef_metrics(
            annotations_path,
            submission_filename,
            subsets=["id", "ood_ads", "ood_cat", "ood_both"],
        )
    elif eval_type == "OC20 IS2RE Test":
        metrics = oc_is2re_metrics(
            annotations_path,
            submission_filename,
            subsets=["id", "ood_ads", "ood_cat", "ood_both"],
        )
    elif eval_type in OMOL_EVAL_FUNCTIONS:
        metrics = omol_evaluations(
            annotations_path,
            submission_filename,
            eval_type,
        )
    else:
        raise ValueError(f"Unknown eval_type: {eval_type}")

    return metrics