| import numpy as np
|
| import torch
|
| import umap
|
| import matplotlib.pyplot as plt
|
| from run import *
|
|
|
|
|
| def para(data,num_nodes,num_class):
|
| similarity_threshold = 0.4
|
| num_anchors = num_class*2
|
|
|
| distances = cosineSimilartydis(data, data)
|
|
|
| mean_distance = distances[~torch.eye(distances.size(0), dtype=torch.bool)].mean()
|
| coverage_radius=mean_distance*0.3
|
|
|
| if num_nodes < 100:
|
| num_walks,walk_length = 20,3
|
| elif num_nodes < 1000:
|
| num_walks,walk_length = 10,5
|
| elif num_nodes < 10000:
|
| num_walks,walk_length = 5,10
|
| else:
|
| num_walks,walk_length = 3,20
|
| return num_walks,walk_length,similarity_threshold,num_anchors,coverage_radius
|
|
|
|
|
|
|
|
|
| def cosineSimilarty(A,B):
|
| A=A/(torch.norm(A,dim=1,p=2,keepdim=True)+0.000001)
|
|
|
| B=B/(torch.norm(B,dim=1,p=2,keepdim=True)+0.000001)
|
|
|
|
|
|
|
| W=torch.mm(A,B.t())
|
| max_values,_ = torch.max(W, axis=0)
|
| min_values,_ = torch.min(W, axis=0)
|
| normalized_matrix = (W - min_values) / (max_values - min_values)
|
| normalized_matrix = torch.nan_to_num(normalized_matrix, nan=0.0001)
|
| return normalized_matrix
|
|
|
| def cosineSimilartydis(A,B):
|
| A=A/(torch.norm(A,dim=1,p=2,keepdim=True)+0.000001)
|
| B=B/(torch.norm(B,dim=1,p=2,keepdim=True)+0.000001)
|
|
|
| W=torch.mm(A,B.t())
|
| max_values, _ = torch.max(W, axis=0)
|
| min_values, _ = torch.min(W, axis=0)
|
| normalized_matrix = (W - min_values) / (max_values - min_values)
|
| normalized_matrix = torch.nan_to_num(normalized_matrix, nan=0.0001)
|
| return 1-normalized_matrix
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| def visit(data, alpha=0.5):
|
| """
|
| 根据给定的节点特征矩阵data和参数alpha计算转移矩阵。
|
| 使用余弦相似度矩阵作为转移的相似度度量。
|
| 计算公式:r_mu(x_i) = (x_i / mu_i) ^ -alpha
|
| """
|
| num_nodes = data.size(0)
|
|
|
|
|
| adj_matrix = cosineSimilarty(data, data)
|
|
|
|
|
| adj_matrix.fill_diagonal_(0)
|
| adj_matrix = torch.nan_to_num(adj_matrix, nan=0.0001)
|
|
|
|
|
| row_sums = adj_matrix.sum(dim=1, keepdim=True) + 0.000001
|
| adj_matrix = adj_matrix / row_sums
|
|
|
|
|
| adj_matrix = torch.nan_to_num(adj_matrix, nan=0.0001)
|
| adj_matrix = torch.clamp(adj_matrix, min=0.0001)
|
|
|
|
|
| transition_matrix = adj_matrix ** (-alpha)
|
|
|
|
|
| transition_matrix = transition_matrix / (transition_matrix.sum(dim=1, keepdim=True) + 0.000001)
|
|
|
|
|
| zero_rows = (transition_matrix.sum(dim=1) == 0)
|
| if zero_rows.any():
|
| transition_matrix[zero_rows] = 1.0 / num_nodes
|
|
|
| return transition_matrix
|
|
|
|
|
|
|
| def random_walk_batch_paths(transition_matrix, num_walks, walk_length):
|
| """
|
| 批量化生成随机游走路径,并统计访问频次。
|
| """
|
| num_nodes = transition_matrix.size(0)
|
| visit_matrix = torch.zeros_like(transition_matrix,device='cuda')
|
| for start_node in range(num_nodes):
|
|
|
| paths = torch.full((num_walks, walk_length + 1), start_node, dtype=torch.long,device='cuda')
|
| for step in range(walk_length):
|
|
|
| probs = transition_matrix[paths[:, step]]
|
| next_nodes = torch.multinomial(probs, 1).squeeze()
|
| paths[:, step + 1] = next_nodes
|
|
|
|
|
| for path in paths:
|
| visit_matrix[start_node].index_add_(0, path, torch.ones_like(path, dtype=torch.float,device='cuda'))
|
| visit_matrix -= torch.diag(torch.full((num_nodes,), num_walks, dtype=visit_matrix.dtype,device='cuda'))
|
| return visit_matrix
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| def thresholded_knn(visit_matrix,similarity_threshold):
|
| similarity_matrix = visit_matrix / visit_matrix.max()
|
| thresholded_adj = (similarity_matrix > similarity_threshold).float()
|
| return thresholded_adj
|
|
|
| def greedy_cover_with_importance(data, importance_scores, r, num_anchors):
|
| """
|
| 贪心覆盖算法用于选择锚点
|
| :param data: 数据点,形状为 (n_samples, n_features)
|
| :param importance_scores: 每个点的重要性分数 (随机游走访问频率)
|
| :param r: 覆盖半径
|
| :param num_anchors: 需要选择的锚点数量
|
| :return: 锚点索引
|
| """
|
| distances = cosineSimilartydis(data,data)
|
| selected = []
|
| covered = torch.zeros(data.size(0), dtype=torch.bool,device='cuda')
|
| sorted_indices = torch.argsort(importance_scores, descending=True)
|
| cluster_selected = torch.zeros(data.size(0), dtype=torch.bool, device='cuda')
|
|
|
| while len(selected) < num_anchors:
|
|
|
|
|
| for idx in sorted_indices:
|
| if len(selected) >= num_anchors:
|
| break
|
| if not covered[idx] and not cluster_selected[idx]:
|
| selected.append(idx)
|
|
|
| cluster_selected[idx] = 1
|
|
|
| covered |= distances[idx] <= r
|
| covered[idx] = 1
|
| selected_anchors = set(selected)
|
| selected_anchors_tensor = torch.tensor(list(selected_anchors), device='cuda')
|
|
|
| if covered.sum().item() == data.size(0):
|
| print("所有点已被覆盖,重置覆盖状态")
|
|
|
| covered[:] = 0
|
| covered[selected_anchors_tensor] = 1
|
| print(len(selected))
|
|
|
|
|
|
|
| return torch.tensor(selected,device='cuda')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|