Spaces:
Runtime error
Runtime error
| ############################################################################### | |
| # | |
| # Adapted from https://github.com/lrjconan/GRAN/ which in turn is adapted from https://github.com/JiaxuanYou/graph-generation | |
| # | |
| ############################################################################### | |
| import pyemd | |
| import numpy as np | |
| import concurrent.futures | |
| from functools import partial | |
| from scipy.linalg import toeplitz | |
| def emd(x, y, distance_scaling=1.0): | |
| support_size = max(len(x), len(y)) | |
| d_mat = toeplitz(range(support_size)).astype(float) | |
| distance_mat = d_mat / distance_scaling | |
| # convert histogram values x and y to float, and make them equal len | |
| x = x.astype(float) | |
| y = y.astype(float) | |
| if len(x) < len(y): | |
| x = np.hstack((x, [0.0] * (support_size - len(x)))) | |
| elif len(y) < len(x): | |
| y = np.hstack((y, [0.0] * (support_size - len(y)))) | |
| emd = pyemd.emd(x, y, distance_mat) | |
| return emd | |
| def l2(x, y): | |
| dist = np.linalg.norm(x - y, 2) | |
| return dist | |
| def emd(x, y, sigma=1.0, distance_scaling=1.0): | |
| ''' EMD | |
| Args: | |
| x, y: 1D pmf of two distributions with the same support | |
| sigma: standard deviation | |
| ''' | |
| support_size = max(len(x), len(y)) | |
| d_mat = toeplitz(range(support_size)).astype(float) | |
| distance_mat = d_mat / distance_scaling | |
| # convert histogram values x and y to float, and make them equal len | |
| x = x.astype(float) | |
| y = y.astype(float) | |
| if len(x) < len(y): | |
| x = np.hstack((x, [0.0] * (support_size - len(x)))) | |
| elif len(y) < len(x): | |
| y = np.hstack((y, [0.0] * (support_size - len(y)))) | |
| return np.abs(pyemd.emd(x, y, distance_mat)) | |
| def gaussian_emd(x, y, sigma=1.0, distance_scaling=1.0): | |
| ''' Gaussian kernel with squared distance in exponential term replaced by EMD | |
| Args: | |
| x, y: 1D pmf of two distributions with the same support | |
| sigma: standard deviation | |
| ''' | |
| support_size = max(len(x), len(y)) | |
| d_mat = toeplitz(range(support_size)).astype(float) | |
| distance_mat = d_mat / distance_scaling | |
| # convert histogram values x and y to float, and make them equal len | |
| x = x.astype(float) | |
| y = y.astype(float) | |
| if len(x) < len(y): | |
| x = np.hstack((x, [0.0] * (support_size - len(x)))) | |
| elif len(y) < len(x): | |
| y = np.hstack((y, [0.0] * (support_size - len(y)))) | |
| emd = pyemd.emd(x, y, distance_mat) | |
| return np.exp(-emd * emd / (2 * sigma * sigma)) | |
| def gaussian(x, y, sigma=1.0): | |
| support_size = max(len(x), len(y)) | |
| # convert histogram values x and y to float, and make them equal len | |
| x = x.astype(float) | |
| y = y.astype(float) | |
| if len(x) < len(y): | |
| x = np.hstack((x, [0.0] * (support_size - len(x)))) | |
| elif len(y) < len(x): | |
| y = np.hstack((y, [0.0] * (support_size - len(y)))) | |
| dist = np.linalg.norm(x - y, 2) | |
| return np.exp(-dist * dist / (2 * sigma * sigma)) | |
| def gaussian_tv(x, y, sigma=1.0): | |
| support_size = max(len(x), len(y)) | |
| # convert histogram values x and y to float, and make them equal len | |
| x = x.astype(float) | |
| y = y.astype(float) | |
| if len(x) < len(y): | |
| x = np.hstack((x, [0.0] * (support_size - len(x)))) | |
| elif len(y) < len(x): | |
| y = np.hstack((y, [0.0] * (support_size - len(y)))) | |
| dist = np.abs(x - y).sum() / 2.0 | |
| return np.exp(-dist * dist / (2 * sigma * sigma)) | |
| def kernel_parallel_unpacked(x, samples2, kernel): | |
| d = 0 | |
| for s2 in samples2: | |
| d += kernel(x, s2) | |
| return d | |
| def kernel_parallel_worker(t): | |
| return kernel_parallel_unpacked(*t) | |
| def disc(samples1, samples2, kernel, is_parallel=True, *args, **kwargs): | |
| ''' Discrepancy between 2 samples ''' | |
| d = 0 | |
| if not is_parallel: | |
| for s1 in samples1: | |
| for s2 in samples2: | |
| d += kernel(s1, s2, *args, **kwargs) | |
| else: | |
| with concurrent.futures.ThreadPoolExecutor() as executor: | |
| for dist in executor.map(kernel_parallel_worker, [ | |
| (s1, samples2, partial(kernel, *args, **kwargs)) for s1 in samples1 | |
| ]): | |
| d += dist | |
| if len(samples1) * len(samples2) > 0: | |
| d /= len(samples1) * len(samples2) | |
| else: | |
| d = 1e+6 | |
| return d | |
| def compute_mmd(samples1, samples2, kernel, is_hist=True, *args, **kwargs): | |
| ''' MMD between two samples ''' | |
| # normalize histograms into pmf | |
| if is_hist: | |
| samples1 = [s1 / (np.sum(s1) + 1e-6) for s1 in samples1] | |
| samples2 = [s2 / (np.sum(s2) + 1e-6) for s2 in samples2] | |
| return disc(samples1, samples1, kernel, *args, **kwargs) + disc(samples2, samples2, kernel, *args, **kwargs) - \ | |
| 2 * disc(samples1, samples2, kernel, *args, **kwargs) | |
| def compute_emd(samples1, samples2, kernel, is_hist=True, *args, **kwargs): | |
| ''' EMD between average of two samples ''' | |
| # normalize histograms into pmf | |
| if is_hist: | |
| samples1 = [np.mean(samples1)] | |
| samples2 = [np.mean(samples2)] | |
| return disc(samples1, samples2, kernel, *args, | |
| **kwargs), [samples1[0], samples2[0]] | |