| import numpy as np |
| import matplotlib.pyplot as plt |
| from threading import Thread |
| from matplotlib.colors import ListedColormap |
| from sklearn.datasets import make_moons, make_circles, make_classification |
| from sklearn.datasets import make_blobs, make_circles, make_moons |
| import gradio as gr |
| import math |
| from functools import partial |
| import time |
|
|
| import matplotlib |
|
|
| from sklearn import svm |
| from sklearn.datasets import make_moons, make_blobs |
| from sklearn.covariance import EllipticEnvelope |
| from sklearn.ensemble import IsolationForest |
| from sklearn.neighbors import LocalOutlierFactor |
| from sklearn.linear_model import SGDOneClassSVM |
| from sklearn.kernel_approximation import Nystroem |
| from sklearn.pipeline import make_pipeline |
|
|
| def get_groundtruth_model(X, labels): |
| |
| class Dummy: |
| def __init__(self, y): |
| self.labels_ = labels |
|
|
| return Dummy(labels) |
|
|
| |
| FIGSIZE = 10,10 |
| figure = plt.figure(figsize=(25, 10)) |
|
|
|
|
| def train_models(input_data, outliers_fraction, n_samples, clf_name): |
| n_outliers = int(outliers_fraction * n_samples) |
| n_inliers = n_samples - n_outliers |
| blobs_params = dict(random_state=0, n_samples=n_inliers, n_features=2) |
| NAME_CLF_MAPPING = {"Robust covariance": EllipticEnvelope(contamination=outliers_fraction), |
| "One-Class SVM": svm.OneClassSVM(nu=outliers_fraction, kernel="rbf", gamma=0.1), |
| "One-Class SVM (SGD)":make_pipeline( |
| Nystroem(gamma=0.1, random_state=42, n_components=150), |
| SGDOneClassSVM( |
| nu=outliers_fraction, |
| shuffle=True, |
| fit_intercept=True, |
| random_state=42, |
| tol=1e-6, |
| ), |
| ), |
| "Isolation Forest": IsolationForest(contamination=outliers_fraction, random_state=42), |
| "Local Outlier Factor": LocalOutlierFactor(n_neighbors=35, contamination=outliers_fraction), |
| } |
| DATA_MAPPING = { |
| "Central Blob":make_blobs(centers=[[0, 0], [0, 0]], cluster_std=0.5, **blobs_params)[0], |
| "Two Blobs": make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[0.5, 0.5], **blobs_params)[0], |
| "Blob with Noise": make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[1.5, 0.3], **blobs_params)[0], |
| "Moons": 4.0 |
| * ( |
| make_moons(n_samples=n_samples, noise=0.05, random_state=0)[0] |
| - np.array([0.5, 0.25]) |
| ), |
| "Noise": 14.0 * (np.random.RandomState(42).rand(n_samples, 2) - 0.5), |
| } |
| DATASETS = [ |
| make_blobs(centers=[[0, 0], [0, 0]], cluster_std=0.5, **blobs_params)[0], |
| make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[0.5, 0.5], **blobs_params)[0], |
| make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[1.5, 0.3], **blobs_params)[0], |
| 4.0 |
| * ( |
| make_moons(n_samples=n_samples, noise=0.05, random_state=0)[0] |
| - np.array([0.5, 0.25]) |
| ), |
| 14.0 * (np.random.RandomState(42).rand(n_samples, 2) - 0.5), |
| ] |
| |
| xx, yy = np.meshgrid(np.linspace(-7, 7, 150), np.linspace(-7, 7, 150)) |
| clf = NAME_CLF_MAPPING[clf_name] |
| plt.figure(figsize=(len(NAME_CLF_MAPPING) * 2 + 4, 12.5)) |
|
|
|
|
| plot_num = 1 |
| rng = np.random.RandomState(42) |
| X = DATA_MAPPING[input_data] |
| X = np.concatenate([X, rng.uniform(low=-6, high=6, size=(n_outliers, 2))], axis=0) |
| |
| t0 = time.time() |
| clf.fit(X) |
| t1 = time.time() |
| |
| if clf_name == "Local Outlier Factor": |
| y_pred = clf.fit_predict(X) |
| else: |
| y_pred = clf.fit(X).predict(X) |
|
|
| |
| if clf_name != "Local Outlier Factor": |
| Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) |
| Z = Z.reshape(xx.shape) |
| plt.contour(xx, yy, Z, levels=[0], linewidths=10, colors="black") |
|
|
| colors = np.array(["#377eb8", "#ff7f00"]) |
| plt.scatter(X[:, 0], X[:, 1], s=100, color=colors[(y_pred + 1) // 2]) |
|
|
| plt.xlim(-7, 7) |
| plt.ylim(-7, 7) |
| plt.xticks(()) |
| plt.yticks(()) |
| plt.text( |
| 0.99, |
| 0.01, |
| ("%.2fs" % (t1 - t0)).lstrip("0"), |
| transform=plt.gca().transAxes, |
| size=60, |
| horizontalalignment="right", |
| ) |
| plot_num += 1 |
|
|
| return plt |
|
|
| description = "Learn how different anomaly detection algorithms perform in different datasets." |
|
|
| def iter_grid(n_rows, n_cols): |
| |
| for _ in range(n_rows): |
| with gr.Row(): |
| for _ in range(n_cols): |
| with gr.Column(): |
| yield |
|
|
| title = "🕵️♀️ compare anomaly detection algorithms 🕵️♂️" |
| with gr.Blocks() as demo: |
| gr.Markdown(f"## {title}") |
| gr.Markdown(description) |
|
|
| input_models = ["Robust covariance","One-Class SVM","One-Class SVM (SGD)","Isolation Forest", |
| "Local Outlier Factor"] |
| input_data = gr.Radio( |
| choices=["Central Blob", "Two Blobs", "Blob with Noise", "Moons", "Noise"], |
| value="Moons" |
| ) |
| n_samples = gr.Slider(minimum=100, maximum=500, step=25, label="Number of Samples") |
| outliers_fraction = gr.Slider(minimum=0.1, maximum=0.9, step=0.1, label="Fraction of Outliers") |
| counter = 0 |
|
|
|
|
| for _ in iter_grid(5, 5): |
| if counter >= len(input_models): |
| break |
|
|
| input_model = input_models[counter] |
| plot = gr.Plot(label=input_model) |
| fn = partial(train_models, clf_name=input_model) |
| input_data.change(fn=fn, inputs=[input_data, outliers_fraction, n_samples], outputs=plot) |
| n_samples.change(fn=fn, inputs=[input_data, outliers_fraction, n_samples], outputs=plot) |
| outliers_fraction.change(fn=fn, inputs=[input_data, outliers_fraction, n_samples], outputs=plot) |
| counter += 1 |
|
|
| demo.launch(enable_queue=True, debug=True) |
|
|
|
|