| import numpy as np
|
| import random
|
| from sklearn.preprocessing import OneHotEncoder
|
| from numpy.random import randint
|
| import math
|
| import torch
|
| def TT_split(n_all, test_prop, seed):
|
| '''
|
| split data into training, testing dataset
|
| '''
|
| random.seed(seed)
|
| random_idx = random.sample(range(n_all), n_all)
|
| train_num = np.ceil((1-test_prop) * n_all).astype(int)
|
| train_idx = random_idx[0:train_num]
|
| test_num = np.floor(test_prop * n_all).astype(int)
|
| test_idx = random_idx[-test_num:]
|
| return train_idx, test_idx
|
|
|
| def get_sn(view_num, alldata_len, missing_rate):
|
| """Randomly generate incomplete data information, simulate partial view data with complete view data
|
| :param view_num:view number
|
| :param alldata_len:number of samples
|
| :param missing_rate:Defined in section 4.3 of the paper
|
| :return:Sn
|
| """
|
| missing_rate = missing_rate / 2
|
| one_rate = 1.0 - missing_rate
|
| if one_rate <= (1 / view_num):
|
| enc = OneHotEncoder()
|
| view_preserve = enc.fit_transform(randint(0, view_num, size=(alldata_len, 1))).toarray()
|
| return view_preserve
|
| error = 1
|
| if one_rate == 1:
|
| matrix = randint(1, 2, size=(alldata_len, view_num))
|
| return matrix
|
| max_iterations = 200
|
| iterations = 0
|
|
|
| while error >= 0.005 and iterations < max_iterations:
|
| enc = OneHotEncoder()
|
| view_preserve = enc.fit_transform(randint(0, view_num, size=(alldata_len, 1))).toarray()
|
| one_num = view_num * alldata_len * one_rate - alldata_len
|
| ratio = one_num / (view_num * alldata_len)
|
| matrix_iter = (randint(0, 100, size=(alldata_len, view_num)) < int(ratio * 100)).astype(int)
|
| a = np.sum(((matrix_iter + view_preserve) > 1).astype(int))
|
|
|
| one_num_iter = one_num / (1 - a / one_num)
|
| ratio = one_num_iter / (view_num * alldata_len)
|
| matrix_iter = (randint(0, 100, size=(alldata_len, view_num)) < int(ratio * 100)).astype(int)
|
| matrix = ((matrix_iter + view_preserve) > 0).astype(int)
|
| ratio = np.sum(matrix) / (view_num * alldata_len)
|
| error = abs(one_rate - ratio)
|
| iterations=iterations+1
|
| return matrix
|
|
|
| def cosineSimilartydis(A,B):
|
| A=A/(torch.norm(A,dim=1,p=2,keepdim=True)+0.000001)
|
| B=B/(torch.norm(B,dim=1,p=2,keepdim=True)+0.000001)
|
|
|
| W=torch.mm(A,B.t())
|
| max_values, _ = torch.max(W, axis=0)
|
| min_values, _ = torch.min(W, axis=0)
|
| denominator = max_values - min_values
|
| denominator = torch.clamp(denominator, min=1e-6)
|
| normalized_matrix = (W - min_values) / denominator
|
| return 1-normalized_matrix
|
|
|
| def find_nanchor(A,B):
|
| print(A.device)
|
| W=cosineSimilartydis(A, B)
|
| n = math.ceil(W.shape[0]/19)
|
|
|
|
|
| modified_matrix_A = W.clone()
|
| print(modified_matrix_A.device,'de')
|
| for col in range(modified_matrix_A.shape[1]):
|
| min_indices = np.argpartition(modified_matrix_A[:, col], n)[:n]
|
| modified_matrix_A[min_indices, col] = 0
|
|
|
| return modified_matrix_A |