import torch
import numpy as np
import torch.nn
from utils import cosineSimilartydis
from sklearn import metrics
import sklearn.metrics as metrics
from sklearn.cluster import KMeans
from munkres import Munkres
import sys
import logging
from sample_kernal import *
def tiny_infer(model, device, all_data, all_label_X, all_label_Y):
    model.eval()
    align_out0 = []
    align_out1 = []
    sort_value=[]
    class_labels_cluster = []
    len_alldata0 = all_data[0].shape[1]
    len_alldata1 = all_data[1].shape[1]
    # print(len_alldata0)
    # print(len_alldata1)
    len_map=max(len_alldata0, len_alldata1)
    align_labels = torch.zeros(len_map)
    if len_alldata0 > len_alldata1:
        labels = all_label_Y
        long_labels=all_label_X
        test_num = len_alldata1
        long_num= len_alldata0
    else:
        labels = all_label_X
        long_labels = all_label_Y
        test_num = len_alldata0
        long_num = len_alldata1
    labels = torch.from_numpy(labels)
    with torch.no_grad():
        x0, x1, labels = all_data[0].to(device), all_data[1].to(device), labels.to(device)
        x0 = x0.view(x0.size()[0], -1).T
        x1 = x1.view(x1.size()[0], -1).T
        h0, h1 = model(x0, x1)
        if len_alldata0 > len_alldata1:
            C = cosineSimilartydis(h0, h1).T
            C_temp=C.clone()
            for i in range(test_num):
                idx = torch.argsort(C[i, :])
                sort_value.append(C_temp[i, idx[0]])
                C[:, idx[0]] = float("inf")
                align_out0.append((h1[i, :].cpu()).numpy())
                align_out1.append((h0[idx[0], :].cpu()).numpy())#它和align0维度一样变小了
                # if all_label_Y[i] == all_label_X[idx[0]]:
                #     align_labels[i] = 1
        else:
            C = cosineSimilartydis(h0, h1)
            C_temp = C.clone()
            for i in range(test_num):
                idx = torch.argsort(C[i, :])
                sort_value.append(C_temp[i, idx[0]])
                C[:, idx[0]] = float("inf")
                align_out0.append((h0[i, :].cpu()).numpy())
                align_out1.append((h1[idx[0], :].cpu()).numpy())
                # if all_label_X[i] == all_label_Y[idx[0]]:
                #     align_labels[i] = 1

        sort_value,align_out0=torch.tensor(sort_value),torch.tensor(align_out0)
        sorted_list, sorted_indice0 = torch.sort(sort_value)
        sorted_indice0 = sorted_indice0.to(torch.long)

        # 使用排序后的索引重新排列 align_out0
        sorted_align0 = align_out0[sorted_indice0]
        # 计算相邻元素之间的差值
        differences = sorted_list[1:] - sorted_list[:-1]
        x_known = np.arange(len(sorted_list))
        # 生成索引
        index_pairs = [(i, i + 1) for i in range(len(sorted_list) - 1)]
        Xn=long_num-test_num
        top_values, top_indices = torch.topk(differences, Xn)
        # 使用 top_indices 获取对应的索引对
        top_index_pairs = [index_pairs[i.item()] for i in top_indices]
        average_indices = [(i + j) / 2 for i, j in top_index_pairs]
        average_indices=np.array(average_indices)
        bandwidth=1.0
        index_pairs=np.array(index_pairs)
        sorted_align0=np.array(sorted_align0)

        A3_initial = kernel_regression_multi_dim(x_known, sorted_align0, average_indices, bandwidth)
        x_known_sorted, y_sorted_align0 = insert_and_sort(x_known, sorted_align0, average_indices, A3_initial)
        alignre0,alignre1=[],[]
        y_sorted_align0=torch.tensor(y_sorted_align0).to('cuda')
        y_sorted_align0=y_sorted_align0.float()
        if len_alldata0 > len_alldata1:
            Cre = cosineSimilartydis(h0,y_sorted_align0)
            for i in range(long_num):
                idx0 = torch.argsort(Cre[i, :])
                Cre[:, idx0[0]] = float("inf")
                alignre0.append((h0[i, :].cpu()).numpy())
                alignre1.append((y_sorted_align0[idx[0], :].cpu()).numpy())
                if all_label_X[i] == all_label_Y[idx[0]]:
                    align_labels[i] = 1
        else:
            Cre = cosineSimilartydis(h1,y_sorted_align0)
            for i in range(long_num):
                idx1 = torch.argsort(Cre[i, :])
                Cre[:, idx1[0]] = float("inf")
                alignre0.append((h1[i, :].cpu()).numpy())
                alignre1.append((y_sorted_align0[idx[0], :].cpu()).numpy())
                if all_label_Y[i] == all_label_X[idx[0]]:
                    align_labels[i] = 1

        class_labels_cluster.extend(labels.cpu().numpy())
#
    count = torch.sum(align_labels)
    # print(test_num,'testnum')
    inference_acc = count.item() / test_num
    print(inference_acc)
    print(np.shape(align_out1))
    return np.array(alignre0), np.array(alignre1), np.array(class_labels_cluster), inference_acc
    # return np.array(align_out0), np.array(align_out1), np.array(class_labels_cluster), inference_acc
def Clustering(x_list, y):
    # logging.info('******** Clustering ********')
    n_clusters = np.size(np.unique(y))

    # np.random.seed(1)

    x_final_concat = np.concatenate(x_list[:], axis=1)
    kmeans_assignments, km = get_cluster_sols(x_final_concat, ClusterClass=KMeans, n_clusters=n_clusters,
                                              init_args={'n_init': 10})
    y_preds = get_y_preds(y, kmeans_assignments, n_clusters)
    if np.min(y) == 1:
        y = y - 1
    scores, _ ,accuracy,nmi,ari,f_score,f_score2,precision,precision2,recall,purity= clustering_metric(y, kmeans_assignments, n_clusters)

    ret = {}
    ret['kmeans'] = scores
    return y_preds, ret,accuracy,nmi,ari,f_score,f_score2,precision,precision2,recall,purity

def get_y_preds(y_true, cluster_assignments, n_clusters):
    '''
    Computes the predicted labels, where label assignments now
    correspond to the actual labels in y_true (as estimated by Munkres)

    cluster_assignments:    array of labels, outputted by kmeans
    y_true:                 true labels
    n_clusters:             number of clusters in the dataset

    returns:    a tuple containing the accuracy and confusion matrix,
                in that order
    '''
    confusion_matrix = metrics.confusion_matrix(y_true, cluster_assignments, labels=None)
    # compute accuracy based on optimal 1:1 assignment of clusters to labels
    cost_matrix = calculate_cost_matrix(confusion_matrix, n_clusters)
    indices = Munkres().compute(cost_matrix)
    kmeans_to_true_cluster_labels = get_cluster_labels_from_indices(indices)

    if np.min(cluster_assignments) != 0:
        cluster_assignments = cluster_assignments - np.min(cluster_assignments)
    y_pred = kmeans_to_true_cluster_labels[cluster_assignments]
    return y_pred

def get_cluster_sols(x, cluster_obj=None, ClusterClass=None, n_clusters=None, init_args={}):
    '''
    Using either a newly instantiated ClusterClass or a provided
    cluster_obj, generates cluster assignments based on input data

    x:              the points with which to perform clustering
    cluster_obj:    a pre-fitted instance of a clustering class
    ClusterClass:   a reference to the sklearn clustering class, necessary
                    if instantiating a new clustering class
    n_clusters:     number of clusters in the dataset, necessary
                    if instantiating new clustering class
    init_args:      any initialization arguments passed to ClusterClass

    returns:    a tuple containing the label assignments and the clustering object
    '''
    # if provided_cluster_obj is None, we must have both ClusterClass and n_clusters
    assert not (cluster_obj is None and (ClusterClass is None or n_clusters is None))
    cluster_assignments = None
    if cluster_obj is None:
        cluster_obj = ClusterClass(n_clusters, **init_args)
        for _ in range(10):
            try:
                cluster_obj.fit(x)
                break
            except:
                print("Unexpected error:", sys.exc_info())
        else:
            return np.zeros((len(x),)), cluster_obj

    cluster_assignments = cluster_obj.predict(x)
    return cluster_assignments, cluster_obj

def calculate_cost_matrix(C, n_clusters):
    cost_matrix = np.zeros((n_clusters, n_clusters))

    # cost_matrix[i,j] will be the cost of assigning cluster i to label j
    for j in range(n_clusters):
        s = np.sum(C[:, j])  # number of examples in cluster i
        for i in range(n_clusters):
            t = C[i, j]
            cost_matrix[j, i] = s - t
    return cost_matrix


def get_cluster_labels_from_indices(indices):
    n_clusters = len(indices)
    clusterLabels = np.zeros(n_clusters)
    for i in range(n_clusters):
        clusterLabels[i] = indices[i][1]
    return clusterLabels

def clustering_metric(y_true, y_pred, n_clusters, verbose=False, decimals=4):
    y_pred_ajusted = get_y_preds(y_true, y_pred, n_clusters)

    classification_metrics, confusion_matrix = classification_metric(y_true, y_pred_ajusted)
    accuracy = metrics.accuracy_score(y_true, y_pred_ajusted)
    accuracy = np.round(accuracy, decimals)
    # AMI
    ami = metrics.adjusted_mutual_info_score(y_true, y_pred_ajusted)
    ami = np.round(ami, decimals)
    # NMI
    nmi = metrics.normalized_mutual_info_score(y_true, y_pred_ajusted)
    nmi = np.round(nmi, decimals)
    # ARI
    ari = metrics.adjusted_rand_score(y_true, y_pred_ajusted)
    ari = np.round(ari, decimals)
    #fscore
    f_score = metrics.f1_score(y_true, y_pred_ajusted, average='macro')
    f_score = np.round(f_score, decimals)
    f_score2 = metrics.f1_score(y_true, y_pred_ajusted, average='weighted')
    f_score2 = np.round(f_score2, decimals)
    # precision
    precision = metrics.precision_score(y_true, y_pred_ajusted, average='macro')
    precision = np.round(precision, decimals)
    precision2 = metrics.precision_score(y_true, y_pred_ajusted, average='weighted')
    precision2 = np.round(precision2, decimals)
    # recall
    recall = metrics.recall_score(y_true, y_pred_ajusted, average='macro')
    recall = np.round(recall, decimals)
    # Purity
    purity = Purity(y_true, y_pred_ajusted)
    purity = np.round(purity, decimals)
    # print(accuracy,nmi,ari,f_score,f_score2,precision,precision2,recall,purity,"zb")
    # if verbose:
    #     logging.info('AMI: {}, NMI: {}, ARI: {}'.format(ami, nmi, ari))
    # return dict({'AMI': ami, 'NMI': nmi, 'ARI': ari}, **classification_metrics), confusion_matrix,accuracy,nmi,ari,f_score,f_score2,precision,precision2,recall,purity
    return dict({'ACC': accuracy,'AMI': ami, 'NMI': nmi, 'ARI': ari, 'F1': f_score, 'F2': f_score2, 'PRE': precision, 'PRE2': precision2, 'REC': recall, 'PUR': purity}), confusion_matrix, accuracy, nmi, ari, f_score, f_score2, precision, precision2, recall, purity
def Purity(y_true, y_pred):
    y_voted_labels = np.zeros(y_true.shape)
    labels = np.unique(y_true)
    ordered_labels = np.arange(labels.shape[0])
    for k in range(labels.shape[0]):
        y_true[y_true == labels[k]] = ordered_labels[k]
    labels = np.unique(y_true)
    bins = np.concatenate((labels, [np.max(labels) + 1]), axis=0)

    for cluster in np.unique(y_pred):
        hist, _ = np.histogram(y_true[y_pred == cluster], bins=bins)
        winner = np.argmax(hist)
        y_voted_labels[y_pred == cluster] = winner

    return metrics.accuracy_score(y_true, y_voted_labels)

def classification_metric(y_true, y_pred, average='macro', verbose=False, decimals=4):
    # confusion matrix
    confusion_matrix = metrics.confusion_matrix(y_true, y_pred)
    # ACC
    accuracy = metrics.accuracy_score(y_true, y_pred)
    accuracy = np.round(accuracy, decimals)

    # precision
    precision = metrics.precision_score(y_true, y_pred, average=average)
    precision = np.round(precision, decimals)

    # recall
    recall = metrics.recall_score(y_true, y_pred, average=average)
    recall = np.round(recall, decimals)

    # F-score
    f_score = metrics.f1_score(y_true, y_pred, average=average)
    f_score = np.round(f_score, decimals)

    if verbose:
        # print('Confusion Matrix')
        # print(confusion_matrix)
        logging.info('accuracy: {}, precision: {}, recall: {}, f_measure: {}'.format(accuracy, precision, recall, f_score))
    return {'accuracy': accuracy, 'precision': precision, 'recall': recall, 'f_measure': f_score}, confusion_matrix