CAPIMAC / utils.py
bestow136's picture
Upload 13 files
8ffcfd0 verified
import numpy as np
import random
from sklearn.preprocessing import OneHotEncoder
from numpy.random import randint
import math
import torch
def TT_split(n_all, test_prop, seed):
'''
split data into training, testing dataset
'''
random.seed(seed)
random_idx = random.sample(range(n_all), n_all)
train_num = np.ceil((1-test_prop) * n_all).astype(int)
train_idx = random_idx[0:train_num]
test_num = np.floor(test_prop * n_all).astype(int)
test_idx = random_idx[-test_num:]
return train_idx, test_idx
def get_sn(view_num, alldata_len, missing_rate):
"""Randomly generate incomplete data information, simulate partial view data with complete view data
:param view_num:view number
:param alldata_len:number of samples
:param missing_rate:Defined in section 4.3 of the paper
:return:Sn
"""
missing_rate = missing_rate / 2
one_rate = 1.0 - missing_rate
if one_rate <= (1 / view_num):
enc = OneHotEncoder() # n_values=view_num
view_preserve = enc.fit_transform(randint(0, view_num, size=(alldata_len, 1))).toarray()
return view_preserve
error = 1
if one_rate == 1:
matrix = randint(1, 2, size=(alldata_len, view_num))
return matrix
max_iterations = 200 # 设置最大循环次数
iterations = 0 # 初始化循环次数
while error >= 0.005 and iterations < max_iterations:
enc = OneHotEncoder() # n_values=view_num
view_preserve = enc.fit_transform(randint(0, view_num, size=(alldata_len, 1))).toarray()#生成一个len^view的矩阵,矩阵每一行只有一个1
one_num = view_num * alldata_len * one_rate - alldata_len
ratio = one_num / (view_num * alldata_len)#0.25
matrix_iter = (randint(0, 100, size=(alldata_len, view_num)) < int(ratio * 100)).astype(int)
a = np.sum(((matrix_iter + view_preserve) > 1).astype(int))
one_num_iter = one_num / (1 - a / one_num)
ratio = one_num_iter / (view_num * alldata_len)
matrix_iter = (randint(0, 100, size=(alldata_len, view_num)) < int(ratio * 100)).astype(int)
matrix = ((matrix_iter + view_preserve) > 0).astype(int)
ratio = np.sum(matrix) / (view_num * alldata_len)
error = abs(one_rate - ratio)
iterations=iterations+1
return matrix
def cosineSimilartydis(A,B):
A=A/(torch.norm(A,dim=1,p=2,keepdim=True)+0.000001)
B=B/(torch.norm(B,dim=1,p=2,keepdim=True)+0.000001)
W=torch.mm(A,B.t())
max_values, _ = torch.max(W, axis=0)
min_values, _ = torch.min(W, axis=0)
denominator = max_values - min_values
denominator = torch.clamp(denominator, min=1e-6)
normalized_matrix = (W - min_values) / denominator
return 1-normalized_matrix
def find_nanchor(A,B):
print(A.device)
W=cosineSimilartydis(A, B)#表示距离
n = math.ceil(W.shape[0]/19)
# print(n)
# 复制矩阵A以避免修改原始矩阵
modified_matrix_A = W.clone()
print(modified_matrix_A.device,'de')
for col in range(modified_matrix_A.shape[1]):
min_indices = np.argpartition(modified_matrix_A[:, col], n)[:n]
modified_matrix_A[min_indices, col] = 0
return modified_matrix_A