Spaces:
Runtime error
Runtime error
| ############################################################################### | |
| # | |
| # Adapted from https://github.com/lrjconan/GRAN/ which in turn is adapted from https://github.com/JiaxuanYou/graph-generation | |
| # | |
| ############################################################################### | |
| # import graph_tool.all as gt | |
| ##Navigate to the ./util/orca directory and compile orca.cpp | |
| # g++ -O2 -std=c++11 -o orca orca.cpp | |
| import os | |
| import copy | |
| import torch | |
| import torch.nn as nn | |
| import numpy as np | |
| import networkx as nx | |
| import subprocess as sp | |
| import concurrent.futures | |
| import pygsp as pg | |
| import secrets | |
| from string import ascii_uppercase, digits | |
| from datetime import datetime | |
| from scipy.linalg import eigvalsh | |
| from scipy.stats import chi2 | |
| from analysis.dist_helper import compute_mmd, gaussian_emd, gaussian, emd, gaussian_tv, disc | |
| from torch_geometric.utils import to_networkx | |
| import wandb | |
| from collections import defaultdict | |
| PRINT_TIME = False | |
| __all__ = ['degree_stats', 'clustering_stats', 'orbit_stats_all', 'spectral_stats', 'eval_acc_lobster_graph'] | |
| def degree_worker(G): | |
| return np.array(nx.degree_histogram(G)) | |
| def degree_stats(graph_ref_list, graph_pred_list, is_parallel=True, compute_emd=False): | |
| ''' Compute the distance between the degree distributions of two unordered sets of graphs. | |
| Args: | |
| graph_ref_list, graph_target_list: two lists of networkx graphs to be evaluated | |
| ''' | |
| sample_ref = [] | |
| sample_pred = [] | |
| # in case an empty graph is generated | |
| graph_pred_list_remove_empty = [ | |
| G for G in graph_pred_list if not G.number_of_nodes() == 0 | |
| ] | |
| prev = datetime.now() | |
| if is_parallel: | |
| with concurrent.futures.ThreadPoolExecutor() as executor: | |
| for deg_hist in executor.map(degree_worker, graph_ref_list): | |
| sample_ref.append(deg_hist) | |
| with concurrent.futures.ThreadPoolExecutor() as executor: | |
| for deg_hist in executor.map(degree_worker, graph_pred_list_remove_empty): | |
| sample_pred.append(deg_hist) | |
| else: | |
| for i in range(len(graph_ref_list)): | |
| degree_temp = np.array(nx.degree_histogram(graph_ref_list[i])) | |
| sample_ref.append(degree_temp) | |
| for i in range(len(graph_pred_list_remove_empty)): | |
| degree_temp = np.array( | |
| nx.degree_histogram(graph_pred_list_remove_empty[i])) | |
| sample_pred.append(degree_temp) | |
| # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_emd) | |
| # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=emd) | |
| if compute_emd: | |
| # EMD option uses the same computation as GraphRNN, the alternative is MMD as computed by GRAN | |
| # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=emd) | |
| mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_emd) | |
| else: | |
| mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_tv) | |
| # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian) | |
| elapsed = datetime.now() - prev | |
| if PRINT_TIME: | |
| print('Time computing degree mmd: ', elapsed) | |
| return mmd_dist | |
| ############################################################################### | |
| def spectral_worker(G, n_eigvals=-1): | |
| # eigs = nx.laplacian_spectrum(G) | |
| try: | |
| eigs = eigvalsh(nx.normalized_laplacian_matrix(G).todense()) | |
| except: | |
| eigs = np.zeros(G.number_of_nodes()) | |
| if n_eigvals > 0: | |
| eigs = eigs[1:n_eigvals + 1] | |
| spectral_pmf, _ = np.histogram(eigs, bins=200, range=(-1e-5, 2), density=False) | |
| spectral_pmf = spectral_pmf / spectral_pmf.sum() | |
| return spectral_pmf | |
| def get_spectral_pmf(eigs, max_eig): | |
| spectral_pmf, _ = np.histogram(np.clip(eigs, 0, max_eig), bins=200, range=(-1e-5, max_eig), density=False) | |
| spectral_pmf = spectral_pmf / spectral_pmf.sum() | |
| return spectral_pmf | |
| def eigval_stats(eig_ref_list, eig_pred_list, max_eig=20, is_parallel=True, compute_emd=False): | |
| ''' Compute the distance between the degree distributions of two unordered sets of graphs. | |
| Args: | |
| graph_ref_list, graph_target_list: two lists of networkx graphs to be evaluated | |
| ''' | |
| sample_ref = [] | |
| sample_pred = [] | |
| prev = datetime.now() | |
| if is_parallel: | |
| with concurrent.futures.ThreadPoolExecutor() as executor: | |
| for spectral_density in executor.map(get_spectral_pmf, eig_ref_list, | |
| [max_eig for i in range(len(eig_ref_list))]): | |
| sample_ref.append(spectral_density) | |
| with concurrent.futures.ThreadPoolExecutor() as executor: | |
| for spectral_density in executor.map(get_spectral_pmf, eig_pred_list, | |
| [max_eig for i in range(len(eig_ref_list))]): | |
| sample_pred.append(spectral_density) | |
| else: | |
| for i in range(len(eig_ref_list)): | |
| spectral_temp = get_spectral_pmf(eig_ref_list[i]) | |
| sample_ref.append(spectral_temp) | |
| for i in range(len(eig_pred_list)): | |
| spectral_temp = get_spectral_pmf(eig_pred_list[i]) | |
| sample_pred.append(spectral_temp) | |
| # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_emd) | |
| if compute_emd: | |
| mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=emd) | |
| else: | |
| mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_tv) | |
| # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian) | |
| elapsed = datetime.now() - prev | |
| if PRINT_TIME: | |
| print('Time computing eig mmd: ', elapsed) | |
| return mmd_dist | |
| def eigh_worker(G): | |
| L = nx.normalized_laplacian_matrix(G).todense() | |
| try: | |
| eigvals, eigvecs = np.linalg.eigh(L) | |
| except: | |
| eigvals = np.zeros(L[0, :].shape) | |
| eigvecs = np.zeros(L.shape) | |
| return (eigvals, eigvecs) | |
| def compute_list_eigh(graph_list, is_parallel=False): | |
| eigval_list = [] | |
| eigvec_list = [] | |
| if is_parallel: | |
| with concurrent.futures.ThreadPoolExecutor() as executor: | |
| for e_U in executor.map(eigh_worker, graph_list): | |
| eigval_list.append(e_U[0]) | |
| eigvec_list.append(e_U[1]) | |
| else: | |
| for i in range(len(graph_list)): | |
| e_U = eigh_worker(graph_list[i]) | |
| eigval_list.append(e_U[0]) | |
| eigvec_list.append(e_U[1]) | |
| return eigval_list, eigvec_list | |
| def get_spectral_filter_worker(eigvec, eigval, filters, bound=1.4): | |
| ges = filters.evaluate(eigval) | |
| linop = [] | |
| for ge in ges: | |
| linop.append(eigvec @ np.diag(ge) @ eigvec.T) | |
| linop = np.array(linop) | |
| norm_filt = np.sum(linop ** 2, axis=2) | |
| hist_range = [0, bound] | |
| hist = np.array([np.histogram(x, range=hist_range, bins=100)[0] for x in norm_filt]) # NOTE: change number of bins | |
| return hist.flatten() | |
| def spectral_filter_stats(eigvec_ref_list, eigval_ref_list, eigvec_pred_list, eigval_pred_list, is_parallel=False, | |
| compute_emd=False): | |
| ''' Compute the distance between the eigvector sets. | |
| Args: | |
| graph_ref_list, graph_target_list: two lists of networkx graphs to be evaluated | |
| ''' | |
| prev = datetime.now() | |
| class DMG(object): | |
| """Dummy Normalized Graph""" | |
| lmax = 2 | |
| n_filters = 12 | |
| filters = pg.filters.Abspline(DMG, n_filters) | |
| bound = np.max(filters.evaluate(np.arange(0, 2, 0.01))) | |
| sample_ref = [] | |
| sample_pred = [] | |
| if is_parallel: | |
| with concurrent.futures.ThreadPoolExecutor() as executor: | |
| for spectral_density in executor.map(get_spectral_filter_worker, eigvec_ref_list, eigval_ref_list, | |
| [filters for i in range(len(eigval_ref_list))], | |
| [bound for i in range(len(eigval_ref_list))]): | |
| sample_ref.append(spectral_density) | |
| with concurrent.futures.ThreadPoolExecutor() as executor: | |
| for spectral_density in executor.map(get_spectral_filter_worker, eigvec_pred_list, eigval_pred_list, | |
| [filters for i in range(len(eigval_ref_list))], | |
| [bound for i in range(len(eigval_ref_list))]): | |
| sample_pred.append(spectral_density) | |
| else: | |
| for i in range(len(eigval_ref_list)): | |
| try: | |
| spectral_temp = get_spectral_filter_worker(eigvec_ref_list[i], eigval_ref_list[i], filters, bound) | |
| sample_ref.append(spectral_temp) | |
| except: | |
| pass | |
| for i in range(len(eigval_pred_list)): | |
| try: | |
| spectral_temp = get_spectral_filter_worker(eigvec_pred_list[i], eigval_pred_list[i], filters, bound) | |
| sample_pred.append(spectral_temp) | |
| except: | |
| pass | |
| if compute_emd: | |
| # EMD option uses the same computation as GraphRNN, the alternative is MMD as computed by GRAN | |
| # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=emd) | |
| mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_emd) | |
| else: | |
| mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_tv) | |
| elapsed = datetime.now() - prev | |
| if PRINT_TIME: | |
| print('Time computing spectral filter stats: ', elapsed) | |
| return mmd_dist | |
| def spectral_stats(graph_ref_list, graph_pred_list, is_parallel=True, n_eigvals=-1, compute_emd=False): | |
| ''' Compute the distance between the degree distributions of two unordered sets of graphs. | |
| Args: | |
| graph_ref_list, graph_target_list: two lists of networkx graphs to be evaluated | |
| ''' | |
| sample_ref = [] | |
| sample_pred = [] | |
| # in case an empty graph is generated | |
| graph_pred_list_remove_empty = [ | |
| G for G in graph_pred_list if not G.number_of_nodes() == 0 | |
| ] | |
| prev = datetime.now() | |
| if is_parallel: | |
| with concurrent.futures.ThreadPoolExecutor() as executor: | |
| for spectral_density in executor.map(spectral_worker, graph_ref_list, [n_eigvals for i in graph_ref_list]): | |
| sample_ref.append(spectral_density) | |
| with concurrent.futures.ThreadPoolExecutor() as executor: | |
| for spectral_density in executor.map(spectral_worker, graph_pred_list_remove_empty, | |
| [n_eigvals for i in graph_ref_list]): | |
| sample_pred.append(spectral_density) | |
| else: | |
| for i in range(len(graph_ref_list)): | |
| spectral_temp = spectral_worker(graph_ref_list[i], n_eigvals) | |
| sample_ref.append(spectral_temp) | |
| for i in range(len(graph_pred_list_remove_empty)): | |
| spectral_temp = spectral_worker(graph_pred_list_remove_empty[i], n_eigvals) | |
| sample_pred.append(spectral_temp) | |
| # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_emd) | |
| # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=emd) | |
| if compute_emd: | |
| # EMD option uses the same computation as GraphRNN, the alternative is MMD as computed by GRAN | |
| # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=emd) | |
| mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_emd) | |
| else: | |
| mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_tv) | |
| # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian) | |
| elapsed = datetime.now() - prev | |
| if PRINT_TIME: | |
| print('Time computing degree mmd: ', elapsed) | |
| return mmd_dist | |
| ############################################################################### | |
| def clustering_worker(param): | |
| G, bins = param | |
| clustering_coeffs_list = list(nx.clustering(G).values()) | |
| hist, _ = np.histogram( | |
| clustering_coeffs_list, bins=bins, range=(0.0, 1.0), density=False) | |
| return hist | |
| def clustering_stats(graph_ref_list, | |
| graph_pred_list, | |
| bins=100, | |
| is_parallel=True, compute_emd=False): | |
| sample_ref = [] | |
| sample_pred = [] | |
| graph_pred_list_remove_empty = [ | |
| G for G in graph_pred_list if not G.number_of_nodes() == 0 | |
| ] | |
| prev = datetime.now() | |
| if is_parallel: | |
| with concurrent.futures.ThreadPoolExecutor() as executor: | |
| for clustering_hist in executor.map(clustering_worker, | |
| [(G, bins) for G in graph_ref_list]): | |
| sample_ref.append(clustering_hist) | |
| with concurrent.futures.ThreadPoolExecutor() as executor: | |
| for clustering_hist in executor.map( | |
| clustering_worker, [(G, bins) for G in graph_pred_list_remove_empty]): | |
| sample_pred.append(clustering_hist) | |
| # check non-zero elements in hist | |
| # total = 0 | |
| # for i in range(len(sample_pred)): | |
| # nz = np.nonzero(sample_pred[i])[0].shape[0] | |
| # total += nz | |
| # print(total) | |
| else: | |
| for i in range(len(graph_ref_list)): | |
| clustering_coeffs_list = list(nx.clustering(graph_ref_list[i]).values()) | |
| hist, _ = np.histogram( | |
| clustering_coeffs_list, bins=bins, range=(0.0, 1.0), density=False) | |
| sample_ref.append(hist) | |
| for i in range(len(graph_pred_list_remove_empty)): | |
| clustering_coeffs_list = list( | |
| nx.clustering(graph_pred_list_remove_empty[i]).values()) | |
| hist, _ = np.histogram( | |
| clustering_coeffs_list, bins=bins, range=(0.0, 1.0), density=False) | |
| sample_pred.append(hist) | |
| if compute_emd: | |
| # EMD option uses the same computation as GraphRNN, the alternative is MMD as computed by GRAN | |
| # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=emd, sigma=1.0 / 10) | |
| mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_emd, sigma=1.0 / 10, distance_scaling=bins) | |
| else: | |
| mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_tv, sigma=1.0 / 10) | |
| elapsed = datetime.now() - prev | |
| if PRINT_TIME: | |
| print('Time computing clustering mmd: ', elapsed) | |
| return mmd_dist | |
| # maps motif/orbit name string to its corresponding list of indices from orca output | |
| motif_to_indices = { | |
| '3path': [1, 2], | |
| '4cycle': [8], | |
| } | |
| COUNT_START_STR = 'orbit counts:' | |
| def edge_list_reindexed(G): | |
| idx = 0 | |
| id2idx = dict() | |
| for u in G.nodes(): | |
| id2idx[str(u)] = idx | |
| idx += 1 | |
| edges = [] | |
| for (u, v) in G.edges(): | |
| edges.append((id2idx[str(u)], id2idx[str(v)])) | |
| return edges | |
| def orca(graph): | |
| # tmp_fname = f'analysis/orca/tmp_{"".join(secrets.choice(ascii_uppercase + digits) for i in range(8))}.txt' | |
| tmp_fname = f'orca/tmp_{"".join(secrets.choice(ascii_uppercase + digits) for i in range(8))}.txt' | |
| tmp_fname = os.path.join(os.path.dirname(os.path.realpath(__file__)), tmp_fname) | |
| # print(tmp_fname, flush=True) | |
| f = open(tmp_fname, 'w') | |
| f.write( | |
| str(graph.number_of_nodes()) + ' ' + str(graph.number_of_edges()) + '\n') | |
| for (u, v) in edge_list_reindexed(graph): | |
| f.write(str(u) + ' ' + str(v) + '\n') | |
| f.close() | |
| output = sp.check_output( | |
| [str(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'orca/orca')), 'node', '4', tmp_fname, 'std']) | |
| output = output.decode('utf8').strip() | |
| idx = output.find(COUNT_START_STR) + len(COUNT_START_STR) + 2 | |
| output = output[idx:] | |
| node_orbit_counts = np.array([ | |
| list(map(int, | |
| node_cnts.strip().split(' '))) | |
| for node_cnts in output.strip('\n').split('\n') | |
| ]) | |
| try: | |
| os.remove(tmp_fname) | |
| except OSError: | |
| pass | |
| return node_orbit_counts | |
| def motif_stats(graph_ref_list, graph_pred_list, motif_type='4cycle', ground_truth_match=None, | |
| bins=100, compute_emd=False): | |
| # graph motif counts (int for each graph) | |
| # normalized by graph size | |
| total_counts_ref = [] | |
| total_counts_pred = [] | |
| num_matches_ref = [] | |
| num_matches_pred = [] | |
| graph_pred_list_remove_empty = [G for G in graph_pred_list if not G.number_of_nodes() == 0] | |
| indices = motif_to_indices[motif_type] | |
| for G in graph_ref_list: | |
| orbit_counts = orca(G) | |
| motif_counts = np.sum(orbit_counts[:, indices], axis=1) | |
| if ground_truth_match is not None: | |
| match_cnt = 0 | |
| for elem in motif_counts: | |
| if elem == ground_truth_match: | |
| match_cnt += 1 | |
| num_matches_ref.append(match_cnt / G.number_of_nodes()) | |
| # hist, _ = np.histogram( | |
| # motif_counts, bins=bins, density=False) | |
| motif_temp = np.sum(motif_counts) / G.number_of_nodes() | |
| total_counts_ref.append(motif_temp) | |
| for G in graph_pred_list_remove_empty: | |
| orbit_counts = orca(G) | |
| motif_counts = np.sum(orbit_counts[:, indices], axis=1) | |
| if ground_truth_match is not None: | |
| match_cnt = 0 | |
| for elem in motif_counts: | |
| if elem == ground_truth_match: | |
| match_cnt += 1 | |
| num_matches_pred.append(match_cnt / G.number_of_nodes()) | |
| motif_temp = np.sum(motif_counts) / G.number_of_nodes() | |
| total_counts_pred.append(motif_temp) | |
| total_counts_ref = np.array(total_counts_ref)[:, None] | |
| total_counts_pred = np.array(total_counts_pred)[:, None] | |
| if compute_emd: | |
| # EMD option uses the same computation as GraphRNN, the alternative is MMD as computed by GRAN | |
| # mmd_dist = compute_mmd(total_counts_ref, total_counts_pred, kernel=emd, is_hist=False) | |
| mmd_dist = compute_mmd(total_counts_ref, total_counts_pred, kernel=gaussian, is_hist=False) | |
| else: | |
| mmd_dist = compute_mmd(total_counts_ref, total_counts_pred, kernel=gaussian, is_hist=False) | |
| return mmd_dist | |
| def orbit_stats_all(graph_ref_list, graph_pred_list, compute_emd=False): | |
| total_counts_ref = [] | |
| total_counts_pred = [] | |
| graph_pred_list_remove_empty = [ | |
| G for G in graph_pred_list if not G.number_of_nodes() == 0 | |
| ] | |
| for G in graph_ref_list: | |
| orbit_counts = orca(G) | |
| orbit_counts_graph = np.sum(orbit_counts, axis=0) / G.number_of_nodes() | |
| total_counts_ref.append(orbit_counts_graph) | |
| for G in graph_pred_list: | |
| orbit_counts = orca(G) | |
| orbit_counts_graph = np.sum(orbit_counts, axis=0) / G.number_of_nodes() | |
| total_counts_pred.append(orbit_counts_graph) | |
| total_counts_ref = np.array(total_counts_ref) | |
| total_counts_pred = np.array(total_counts_pred) | |
| # mmd_dist = compute_mmd( | |
| # total_counts_ref, | |
| # total_counts_pred, | |
| # kernel=gaussian, | |
| # is_hist=False, | |
| # sigma=30.0) | |
| # mmd_dist = compute_mmd( | |
| # total_counts_ref, | |
| # total_counts_pred, | |
| # kernel=gaussian_tv, | |
| # is_hist=False, | |
| # sigma=30.0) | |
| if compute_emd: | |
| # mmd_dist = compute_mmd(total_counts_ref, total_counts_pred, kernel=emd, sigma=30.0) | |
| # EMD option uses the same computation as GraphRNN, the alternative is MMD as computed by GRAN | |
| mmd_dist = compute_mmd(total_counts_ref, total_counts_pred, kernel=gaussian, is_hist=False, sigma=30.0) | |
| else: | |
| mmd_dist = compute_mmd(total_counts_ref, total_counts_pred, kernel=gaussian_tv, is_hist=False, sigma=30.0) | |
| return mmd_dist | |
| def eval_acc_lobster_graph(G_list): | |
| G_list = [copy.deepcopy(gg) for gg in G_list] | |
| count = 0 | |
| for gg in G_list: | |
| if is_lobster_graph(gg): | |
| count += 1 | |
| return count / float(len(G_list)) | |
| def eval_acc_tree_graph(G_list): | |
| count = 0 | |
| for gg in G_list: | |
| if nx.is_tree(gg): | |
| count += 1 | |
| return count / float(len(G_list)) | |
| def eval_acc_grid_graph(G_list, grid_start=10, grid_end=20): | |
| count = 0 | |
| for gg in G_list: | |
| if is_grid_graph(gg): | |
| count += 1 | |
| return count / float(len(G_list)) | |
| def eval_acc_sbm_graph(G_list, p_intra=0.3, p_inter=0.005, strict=True, refinement_steps=1000, is_parallel=True): | |
| count = 0.0 | |
| if is_parallel: | |
| with concurrent.futures.ThreadPoolExecutor() as executor: | |
| for prob in executor.map(is_sbm_graph, | |
| [gg for gg in G_list], [p_intra for i in range(len(G_list))], | |
| [p_inter for i in range(len(G_list))], | |
| [strict for i in range(len(G_list))], | |
| [refinement_steps for i in range(len(G_list))]): | |
| count += prob | |
| else: | |
| for gg in G_list: | |
| count += is_sbm_graph(gg, p_intra=p_intra, p_inter=p_inter, strict=strict, | |
| refinement_steps=refinement_steps) | |
| return count / float(len(G_list)) | |
| def eval_acc_planar_graph(G_list): | |
| count = 0 | |
| for gg in G_list: | |
| if is_planar_graph(gg): | |
| count += 1 | |
| return count / float(len(G_list)) | |
| def is_planar_graph(G): | |
| return nx.is_connected(G) and nx.check_planarity(G)[0] | |
| def is_lobster_graph(G): | |
| """ | |
| Check a given graph is a lobster graph or not | |
| Removing leaf nodes twice: | |
| lobster -> caterpillar -> path | |
| """ | |
| ### Check if G is a tree | |
| if nx.is_tree(G): | |
| G = G.copy() | |
| ### Check if G is a path after removing leaves twice | |
| leaves = [n for n, d in G.degree() if d == 1] | |
| G.remove_nodes_from(leaves) | |
| leaves = [n for n, d in G.degree() if d == 1] | |
| G.remove_nodes_from(leaves) | |
| num_nodes = len(G.nodes()) | |
| num_degree_one = [d for n, d in G.degree() if d == 1] | |
| num_degree_two = [d for n, d in G.degree() if d == 2] | |
| if sum(num_degree_one) == 2 and sum(num_degree_two) == 2 * (num_nodes - 2): | |
| return True | |
| elif sum(num_degree_one) == 0 and sum(num_degree_two) == 0: | |
| return True | |
| else: | |
| return False | |
| else: | |
| return False | |
| def is_grid_graph(G): | |
| """ | |
| Check if the graph is grid, by comparing with all the real grids with the same node count | |
| """ | |
| all_grid_file = f"data/all_grids.pt" | |
| if os.path.isfile(all_grid_file): | |
| all_grids = torch.load(all_grid_file) | |
| else: | |
| all_grids = {} | |
| for i in range(2, 20): | |
| for j in range(2, 20): | |
| G_grid = nx.grid_2d_graph(i, j) | |
| n_nodes = f"{len(G_grid.nodes())}" | |
| all_grids[n_nodes] = all_grids.get(n_nodes, []) + [G_grid] | |
| torch.save(all_grids, all_grid_file) | |
| n_nodes = f"{len(G.nodes())}" | |
| if n_nodes in all_grids: | |
| for G_grid in all_grids[n_nodes]: | |
| if nx.faster_could_be_isomorphic(G, G_grid): | |
| if nx.is_isomorphic(G, G_grid): | |
| return True | |
| return False | |
| else: | |
| return False | |
| # def is_sbm_graph(G, p_intra=0.3, p_inter=0.005, strict=True, refinement_steps=1000): | |
| # """ | |
| # Check if how closely given graph matches a SBM with given probabilites by computing mean probability of Wald test statistic for each recovered parameter | |
| # """ | |
| # adj = nx.adjacency_matrix(G).toarray() | |
| # idx = adj.nonzero() | |
| # g = gt.Graph() | |
| # g.add_edge_list(np.transpose(idx)) | |
| # try: | |
| # state = gt.minimize_blockmodel_dl(g) | |
| # except ValueError: | |
| # if strict: | |
| # return False | |
| # else: | |
| # return 0.0 | |
| # # Refine using merge-split MCMC | |
| # for i in range(refinement_steps): | |
| # state.multiflip_mcmc_sweep(beta=np.inf, niter=10) | |
| # b = state.get_blocks() | |
| # b = gt.contiguous_map(state.get_blocks()) | |
| # state = state.copy(b=b) | |
| # e = state.get_matrix() | |
| # n_blocks = state.get_nonempty_B() | |
| # node_counts = state.get_nr().get_array()[:n_blocks] | |
| # edge_counts = e.todense()[:n_blocks, :n_blocks] | |
| # if strict: | |
| # if (node_counts > 40).sum() > 0 or (node_counts < 20).sum() > 0 or n_blocks > 5 or n_blocks < 2: | |
| # return False | |
| # max_intra_edges = node_counts * (node_counts - 1) | |
| # est_p_intra = np.diagonal(edge_counts) / (max_intra_edges + 1e-6) | |
| # max_inter_edges = node_counts.reshape((-1, 1)) @ node_counts.reshape((1, -1)) | |
| # np.fill_diagonal(edge_counts, 0) | |
| # est_p_inter = edge_counts / (max_inter_edges + 1e-6) | |
| # W_p_intra = (est_p_intra - p_intra) ** 2 / (est_p_intra * (1 - est_p_intra) + 1e-6) | |
| # W_p_inter = (est_p_inter - p_inter) ** 2 / (est_p_inter * (1 - est_p_inter) + 1e-6) | |
| # W = W_p_inter.copy() | |
| # np.fill_diagonal(W, W_p_intra) | |
| # p = 1 - chi2.cdf(abs(W), 1) | |
| # p = p.mean() | |
| # if strict: | |
| # return p > 0.9 # p value < 10 % | |
| # else: | |
| # return p | |
| def eval_fraction_isomorphic(fake_graphs, train_graphs): | |
| count = 0 | |
| for fake_g in fake_graphs: | |
| for train_g in train_graphs: | |
| if nx.faster_could_be_isomorphic(fake_g, train_g): | |
| if nx.is_isomorphic(fake_g, train_g): | |
| count += 1 | |
| break | |
| return count / float(len(fake_graphs)) | |
| def eval_fraction_unique(fake_graphs, precise=False): | |
| count_non_unique = 0 | |
| fake_evaluated = [] | |
| for fake_g in fake_graphs: | |
| unique = True | |
| if not fake_g.number_of_nodes() == 0: | |
| for fake_old in fake_evaluated: | |
| if precise: | |
| if nx.faster_could_be_isomorphic(fake_g, fake_old): | |
| if nx.is_isomorphic(fake_g, fake_old): | |
| count_non_unique += 1 | |
| unique = False | |
| break | |
| else: | |
| if nx.faster_could_be_isomorphic(fake_g, fake_old): | |
| if nx.could_be_isomorphic(fake_g, fake_old): | |
| count_non_unique += 1 | |
| unique = False | |
| break | |
| if unique: | |
| fake_evaluated.append(fake_g) | |
| frac_unique = (float(len(fake_graphs)) - count_non_unique) / float( | |
| len(fake_graphs)) # Fraction of distinct isomorphism classes in the fake graphs | |
| return frac_unique | |
| def eval_fraction_unique_non_isomorphic_valid(fake_graphs, train_graphs, validity_func=(lambda x: True)): | |
| count_valid = 0 | |
| count_isomorphic = 0 | |
| count_non_unique = 0 | |
| fake_evaluated = [] | |
| for fake_g in fake_graphs: | |
| unique = True | |
| for fake_old in fake_evaluated: | |
| if nx.faster_could_be_isomorphic(fake_g, fake_old): | |
| if nx.is_isomorphic(fake_g, fake_old): | |
| count_non_unique += 1 | |
| unique = False | |
| break | |
| if unique: | |
| fake_evaluated.append(fake_g) | |
| non_isomorphic = True | |
| for train_g in train_graphs: | |
| if nx.faster_could_be_isomorphic(fake_g, train_g): | |
| if nx.is_isomorphic(fake_g, train_g): | |
| count_isomorphic += 1 | |
| non_isomorphic = False | |
| break | |
| if non_isomorphic: | |
| if validity_func(fake_g): | |
| count_valid += 1 | |
| frac_unique = (float(len(fake_graphs)) - count_non_unique) / float( | |
| len(fake_graphs)) # Fraction of distinct isomorphism classes in the fake graphs | |
| frac_unique_non_isomorphic = (float(len(fake_graphs)) - count_non_unique - count_isomorphic) / float( | |
| len(fake_graphs)) # Fraction of distinct isomorphism classes in the fake graphs that are not in the training set | |
| frac_unique_non_isomorphic_valid = count_valid / float( | |
| len(fake_graphs)) # Fraction of distinct isomorphism classes in the fake graphs that are not in the training set and are valid | |
| return frac_unique, frac_unique_non_isomorphic, frac_unique_non_isomorphic_valid | |
| class SpectreSamplingMetrics(nn.Module): | |
| def __init__(self, data_loaders, compute_emd, metrics_list): | |
| super().__init__() | |
| self.train_graphs = self.loader_to_nx(data_loaders['train']) | |
| self.val_graphs = self.loader_to_nx(data_loaders['val']) | |
| self.test_graphs = self.loader_to_nx(data_loaders['test']) | |
| self.num_graphs_test = len(self.test_graphs) | |
| self.num_graphs_val = len(self.val_graphs) | |
| self.compute_emd = compute_emd | |
| self.metrics_list = metrics_list | |
| def loader_to_nx(self, loader): | |
| networkx_graphs = [] | |
| for i, batch in enumerate(loader): | |
| data_list = batch.to_data_list() | |
| for j, data in enumerate(data_list): | |
| networkx_graphs.append(to_networkx(data, node_attrs=None, edge_attrs=None, to_undirected=True, | |
| remove_self_loops=True)) | |
| return networkx_graphs | |
| def forward(self, generated_graphs: list, local_rank, test=False): | |
| reference_graphs = self.test_graphs if test else self.val_graphs | |
| if local_rank == 0: | |
| print(f"Computing sampling metrics between {len(generated_graphs)} generated graphs and {len(reference_graphs)}" | |
| f" test graphs -- emd computation: {self.compute_emd}") | |
| networkx_graphs = [] | |
| adjacency_matrices = [] | |
| if local_rank == 0: | |
| print("Building networkx graphs...") | |
| for graph in generated_graphs: | |
| node_types, edge_types = graph | |
| A = edge_types.bool().cpu().numpy() | |
| adjacency_matrices.append(A) | |
| nx_graph = nx.from_numpy_array(A) | |
| networkx_graphs.append(nx_graph) | |
| np.savez('generated_adjs.npz', *adjacency_matrices) | |
| to_log = {} | |
| if 'degree' in self.metrics_list: | |
| if local_rank == 0: | |
| print("Computing degree stats..") | |
| degree = degree_stats(reference_graphs, networkx_graphs, is_parallel=True, | |
| compute_emd=self.compute_emd) | |
| to_log['degree'] = degree | |
| if wandb.run: | |
| wandb.run.summary['degree'] = degree | |
| # val_eigvals = [graph["eigval"][1:self.k + 1].cpu().detach().numpy() for graph in self.val] | |
| # train_eigvals = [graph["eigval"][1:self.k + 1].cpu().detach().numpy() for graph in self.train] | |
| # eigval_stats(eig_ref_list, eig_pred_list, max_eig=20, is_parallel=True, compute_emd=False) | |
| # spectral_filter_stats(eigvec_ref_list, eigval_ref_list, eigvec_pred_list, eigval_pred_list, is_parallel=False, | |
| # compute_emd=False) # This is the one called wavelet | |
| if 'spectre' in self.metrics_list: | |
| if local_rank == 0: | |
| print("Computing spectre stats...") | |
| spectre = spectral_stats(reference_graphs, networkx_graphs, is_parallel=True, n_eigvals=-1, | |
| compute_emd=self.compute_emd) | |
| to_log['spectre'] = spectre | |
| if wandb.run: | |
| wandb.run.summary['spectre'] = spectre | |
| if 'clustering' in self.metrics_list: | |
| if local_rank == 0: | |
| print("Computing clustering stats...") | |
| clustering = clustering_stats(reference_graphs, networkx_graphs, bins=100, is_parallel=True, | |
| compute_emd=self.compute_emd) | |
| to_log['clustering'] = clustering | |
| if wandb.run: | |
| wandb.run.summary['clustering'] = clustering | |
| if 'motif' in self.metrics_list: | |
| if local_rank == 0: | |
| print("Computing motif stats") | |
| motif = motif_stats(reference_graphs, networkx_graphs, motif_type='4cycle', ground_truth_match=None, bins=100, | |
| compute_emd=self.compute_emd) | |
| to_log['motif'] = motif | |
| if wandb.run: | |
| wandb.run.summary['motif'] = motif | |
| if 'orbit' in self.metrics_list: | |
| if local_rank == 0: | |
| print("Computing orbit stats...") | |
| orbit = orbit_stats_all(reference_graphs, networkx_graphs, compute_emd=self.compute_emd) | |
| to_log['orbit'] = orbit | |
| if wandb.run: | |
| wandb.run.summary['orbit'] = orbit | |
| if 'sbm' in self.metrics_list: | |
| if local_rank == 0: | |
| print("Computing accuracy...") | |
| acc = eval_acc_sbm_graph(networkx_graphs, refinement_steps=100, strict=True) | |
| to_log['sbm_acc'] = acc | |
| if wandb.run: | |
| wandb.run.summary['sbmacc'] = acc | |
| if 'planar' in self.metrics_list: | |
| if local_rank ==0: | |
| print('Computing planar accuracy...') | |
| planar_acc = eval_acc_planar_graph(networkx_graphs) | |
| to_log['planar_acc'] = planar_acc | |
| if wandb.run: | |
| wandb.run.summary['planar_acc'] = planar_acc | |
| if 'sbm' or 'planar' in self.metrics_list: | |
| if local_rank == 0: | |
| print("Computing all fractions...") | |
| frac_unique, frac_unique_non_isomorphic, fraction_unique_non_isomorphic_valid = eval_fraction_unique_non_isomorphic_valid( | |
| networkx_graphs, self.train_graphs, is_sbm_graph if 'sbm' in self.metrics_list else is_planar_graph) | |
| frac_non_isomorphic = 1.0 - eval_fraction_isomorphic(networkx_graphs, self.train_graphs) | |
| to_log.update({'sampling/frac_unique': frac_unique, | |
| 'sampling/frac_unique_non_iso': frac_unique_non_isomorphic, | |
| 'sampling/frac_unic_non_iso_valid': fraction_unique_non_isomorphic_valid, | |
| 'sampling/frac_non_iso': frac_non_isomorphic}) | |
| if local_rank == 0: | |
| print("Sampling statistics", to_log) | |
| if wandb.run: | |
| wandb.log(to_log, commit=False) | |
| def reset(self): | |
| pass | |
| def loader_to_nx(loader): | |
| networkx_graphs = {} | |
| for i, batch in enumerate(loader): | |
| data_list = batch.to_data_list() | |
| for j, data in enumerate(data_list): | |
| networkx_graphs[data.prompt_id.squeeze(0).item()] = [to_networkx(data, node_attrs=None, edge_attrs=None, to_undirected=True, remove_self_loops=True)] | |
| return networkx_graphs | |
| def compute_metrics(generated_graphs, referenced_graphs): | |
| networkx_graphs = defaultdict(list) | |
| adjacency_matrices = defaultdict(list) | |
| for key in generated_graphs: | |
| for graph in generated_graphs[key]: | |
| node_types, edge_types = graph | |
| A = edge_types.bool().cpu().numpy() | |
| nx_graph = nx.from_numpy_array(A) | |
| networkx_graphs[key].append(nx_graph) | |
| adjacency_matrices[key].append(A) | |
| new_referenced_graphs = [] | |
| for key in referenced_graphs: | |
| new_referenced_graphs.extend(referenced_graphs[key]) | |
| referenced_graphs = new_referenced_graphs | |
| nx_graphs = [] | |
| for key in networkx_graphs: | |
| nx_graphs.extend(networkx_graphs[key]) | |
| return nx_graphs | |
| class Comm20SamplingMetrics(SpectreSamplingMetrics): | |
| def __init__(self, data_loaders): | |
| super().__init__(data_loaders=data_loaders, | |
| compute_emd=True, | |
| metrics_list=['degree', 'clustering', 'orbit']) | |
| class PlanarSamplingMetrics(SpectreSamplingMetrics): | |
| def __init__(self, data_loaders): | |
| super().__init__(data_loaders=data_loaders, | |
| compute_emd=False, | |
| metrics_list=['degree', 'clustering', 'orbit', 'spectre', 'planar']) | |
| class SBMSamplingMetrics(SpectreSamplingMetrics): | |
| def __init__(self, data_loaders): | |
| super().__init__(data_loaders=data_loaders, | |
| compute_emd=False, | |
| metrics_list=['degree', 'clustering', 'orbit', 'spectre', 'sbm']) | |
| class CrossDomainSamplingMetrics(SpectreSamplingMetrics): | |
| def __init__(self, data_loaders): | |
| super().__init__(data_loaders=data_loaders, | |
| compute_emd=False, | |
| metrics_list=['degree', 'clustering', 'orbit', 'spectre']) | |