diff --git "a/software_defect_prediction_final.ipynb" "b/software_defect_prediction_final.ipynb"
--- "a/software_defect_prediction_final.ipynb"
+++ "b/software_defect_prediction_final.ipynb"
@@ -1 +1,8451 @@
-PLACEHOLDER
\ No newline at end of file
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "56a937f6",
+ "metadata": {},
+ "source": [
+ "# Section 0 — Imports and Configuration\n",
+ "\n",
+ "All imports, seeds, and global configuration in one place."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "14b3703d",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-07T14:55:15.648777Z",
+ "iopub.status.busy": "2026-05-07T14:55:15.648567Z",
+ "iopub.status.idle": "2026-05-07T14:55:18.479874Z",
+ "shell.execute_reply": "2026-05-07T14:55:18.478674Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Configuration loaded.\n",
+ "Phase 1: ['eclipse', 'mylyn']\n",
+ "Phase 2: ['equinox', 'lucene', 'pde']\n"
+ ]
+ }
+ ],
+ "source": [
+ "import os\n",
+ "import sys\n",
+ "import json\n",
+ "import random\n",
+ "import warnings\n",
+ "from collections import defaultdict\n",
+ "\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import matplotlib.pyplot as plt\n",
+ "import seaborn as sns\n",
+ "from scipy import stats\n",
+ "from tqdm.auto import tqdm\n",
+ "\n",
+ "# sklearn\n",
+ "from sklearn.model_selection import train_test_split, StratifiedKFold\n",
+ "from sklearn.preprocessing import FunctionTransformer, RobustScaler\n",
+ "from sklearn.pipeline import Pipeline\n",
+ "from sklearn.utils.class_weight import compute_sample_weight\n",
+ "\n",
+ "# models\n",
+ "from sklearn.ensemble import (\n",
+ " RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier\n",
+ ")\n",
+ "from sklearn.tree import DecisionTreeClassifier\n",
+ "from sklearn.linear_model import LogisticRegression\n",
+ "from sklearn.svm import SVC\n",
+ "from sklearn.neighbors import KNeighborsClassifier\n",
+ "from sklearn.naive_bayes import GaussianNB\n",
+ "\n",
+ "# xgboost / lightgbm\n",
+ "try:\n",
+ " from xgboost import XGBClassifier\n",
+ "except Exception:\n",
+ " XGBClassifier = None\n",
+ "try:\n",
+ " from lightgbm import LGBMClassifier\n",
+ "except Exception:\n",
+ " LGBMClassifier = None\n",
+ "\n",
+ "# imbalanced-learn\n",
+ "from imblearn.over_sampling import SMOTE\n",
+ "\n",
+ "# metrics\n",
+ "from sklearn.metrics import (\n",
+ " f1_score, roc_auc_score, average_precision_score,\n",
+ " classification_report, confusion_matrix, roc_curve, precision_recall_curve\n",
+ ")\n",
+ "\n",
+ "# SHAP / LIME\n",
+ "import shap\n",
+ "from lime.lime_tabular import LimeTabularExplainer\n",
+ "\n",
+ "# Seeds\n",
+ "SEED = 42\n",
+ "np.random.seed(SEED)\n",
+ "random.seed(SEED)\n",
+ "\n",
+ "# Matplotlib defaults\n",
+ "plt.rcParams['figure.dpi'] = 100\n",
+ "plt.rcParams['savefig.dpi'] = 150\n",
+ "sns.set_palette(\"husl\")\n",
+ "\n",
+ "# ---------------- CONFIG ----------------\n",
+ "CONFIG = {\n",
+ " 'data_dir': 'data',\n",
+ " 'datasets': {\n",
+ " 'eclipse': 'eclipse.csv',\n",
+ " 'equinox': 'equinox.csv',\n",
+ " 'lucene': 'lucene.csv',\n",
+ " 'mylyn': 'mylyn.csv',\n",
+ " 'pde': 'pde.csv',\n",
+ " },\n",
+ " 'phase1': ['eclipse', 'mylyn'],\n",
+ " 'phase2': ['equinox', 'lucene', 'pde'],\n",
+ " 'random_state': 42,\n",
+ " 'cv_folds': 5,\n",
+ " 'test_size': 0.20,\n",
+ " 'smote_threshold': 2.0,\n",
+ " 'ZERO_VAR_FEATURES': {\n",
+ " 'lucene': [\n",
+ " 'numberOfNonTrivialBugsFoundUntil:',\n",
+ " 'numberOfMajorBugsFoundUntil:',\n",
+ " 'numberOfCriticalBugsFoundUntil:',\n",
+ " 'numberOfHighPriorityBugsFoundUntil:',\n",
+ " ],\n",
+ " },\n",
+ "}\n",
+ "\n",
+ "# Storage containers\n",
+ "DATASETS = {}\n",
+ "RESULTS_PHASE1 = {}\n",
+ "BEST_MODEL = {}\n",
+ "TOP2_MODELS = {}\n",
+ "SHAP_WEIGHTS = {}\n",
+ "AVERAGED_SHAP_WEIGHTS = None\n",
+ "ALL_FEATURES = []\n",
+ "\n",
+ "print(\"Configuration loaded.\")\n",
+ "print(\"Phase 1:\", CONFIG['phase1'])\n",
+ "print(\"Phase 2:\", CONFIG['phase2'])\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "746df5c7",
+ "metadata": {},
+ "source": [
+ "# Section 1 — Data Loading\n",
+ "\n",
+ "Load all 5 datasets, drop leakage columns, create binary target, drop zero-variance features per CONFIG, and report shapes + class distributions."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "f624d0f5",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-07T14:55:18.483498Z",
+ "iopub.status.busy": "2026-05-07T14:55:18.482981Z",
+ "iopub.status.idle": "2026-05-07T14:55:18.525217Z",
+ "shell.execute_reply": "2026-05-07T14:55:18.523969Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Dataset: eclipse | shape=(997, 5) | buggy=206 | clean=791 | imbalance_ratio=3.84\n",
+ "Dataset: equinox | shape=(324, 5) | buggy=129 | clean=195 | imbalance_ratio=1.51\n",
+ " [lucene] Dropping zero-variance: ['numberOfNonTrivialBugsFoundUntil:', 'numberOfMajorBugsFoundUntil:', 'numberOfCriticalBugsFoundUntil:', 'numberOfHighPriorityBugsFoundUntil:']\n",
+ "Dataset: lucene | shape=(691, 1) | buggy=64 | clean=627 | imbalance_ratio=9.80\n",
+ "Dataset: mylyn | shape=(1862, 5) | buggy=245 | clean=1617 | imbalance_ratio=6.60\n",
+ "Dataset: pde | shape=(1497, 5) | buggy=209 | clean=1288 | imbalance_ratio=6.16\n",
+ "\n",
+ "All datasets loaded.\n"
+ ]
+ }
+ ],
+ "source": [
+ "def load_dataset(name, path):\n",
+ " df = pd.read_csv(path, sep=';', skipinitialspace=True)\n",
+ " df.columns = df.columns.str.strip()\n",
+ "\n",
+ " # Drop unnamed / empty columns\n",
+ " unnamed = [c for c in df.columns if c == '' or c.startswith('Unnamed')]\n",
+ " df = df.drop(columns=unnamed)\n",
+ "\n",
+ " # Drop leakage columns\n",
+ " leakage = ['nonTrivialBugs', 'majorBugs', 'criticalBugs', 'highPriorityBugs']\n",
+ " leakage = [c for c in leakage if c in df.columns]\n",
+ " df = df.drop(columns=leakage)\n",
+ "\n",
+ " # Binary target\n",
+ " y = (df['bugs'] > 0).astype(int)\n",
+ " df = df.drop(columns=['bugs'])\n",
+ "\n",
+ " # Drop identifier\n",
+ " df = df.drop(columns=['classname'])\n",
+ "\n",
+ " # Drop zero-variance per CONFIG\n",
+ " zv = CONFIG['ZERO_VAR_FEATURES'].get(name, [])\n",
+ " zv = [c for c in zv if c in df.columns]\n",
+ " if zv:\n",
+ " print(f\" [{name}] Dropping zero-variance: {zv}\")\n",
+ " df = df.drop(columns=zv)\n",
+ "\n",
+ " feature_names = [f.rstrip(':') for f in df.columns.tolist()]\n",
+ " df.columns = feature_names\n",
+ " return df, y, feature_names\n",
+ "\n",
+ "\n",
+ "for name, fname in CONFIG['datasets'].items():\n",
+ " path = os.path.join(CONFIG['data_dir'], fname)\n",
+ " X, y, fnames = load_dataset(name, path)\n",
+ " DATASETS[name] = {'X': X, 'y': y, 'feature_names': fnames}\n",
+ " n_buggy = int(y.sum())\n",
+ " n_clean = int((y == 0).sum())\n",
+ " ratio = n_clean / n_buggy if n_buggy > 0 else float('inf')\n",
+ " print(f\"Dataset: {name:8s} | shape={X.shape} | buggy={n_buggy} | clean={n_clean} | imbalance_ratio={ratio:.2f}\")\n",
+ "\n",
+ "print(\"\\nAll datasets loaded.\")\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b1c7557c",
+ "metadata": {},
+ "source": [
+ "# Section 2 — EDA (Concise Per-Dataset)\n",
+ "\n",
+ "For each dataset: class balance bar chart, feature boxplots (log-scaled), correlation heatmap, and a printed summary table — all in a single figure using `gridspec`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "9a73dd1f",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-07T14:55:18.527156Z",
+ "iopub.status.busy": "2026-05-07T14:55:18.526946Z",
+ "iopub.status.idle": "2026-05-07T14:55:24.479169Z",
+ "shell.execute_reply": "2026-05-07T14:55:24.477582Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "============================================================\n",
+ "EDA for dataset: eclipse\n",
+ "============================================================\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved EDA figure: eda_eclipse.png\n",
+ "\n",
+ "--- Summary table for eclipse ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " feature | \n",
+ " mean | \n",
+ " median | \n",
+ " max | \n",
+ " skewness | \n",
+ " %zeros | \n",
+ " IQR_outliers | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " numberOfBugsFoundUntil | \n",
+ " 11.64 | \n",
+ " 5.00 | \n",
+ " 214 | \n",
+ " 4.95 | \n",
+ " 11.3 | \n",
+ " 111 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " numberOfNonTrivialBugsFoundUntil | \n",
+ " 10.15 | \n",
+ " 4.00 | \n",
+ " 200 | \n",
+ " 4.94 | \n",
+ " 14.4 | \n",
+ " 114 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " numberOfMajorBugsFoundUntil | \n",
+ " 1.14 | \n",
+ " 0.00 | \n",
+ " 38 | \n",
+ " 5.61 | \n",
+ " 64.7 | \n",
+ " 140 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " numberOfCriticalBugsFoundUntil | \n",
+ " 0.43 | \n",
+ " 0.00 | \n",
+ " 15 | \n",
+ " 5.34 | \n",
+ " 78.3 | \n",
+ " 216 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " numberOfHighPriorityBugsFoundUntil | \n",
+ " 0.46 | \n",
+ " 0.00 | \n",
+ " 10 | \n",
+ " 4.32 | \n",
+ " 72.6 | \n",
+ " 40 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " feature mean median max skewness %zeros \\\n",
+ "0 numberOfBugsFoundUntil 11.64 5.00 214 4.95 11.3 \n",
+ "1 numberOfNonTrivialBugsFoundUntil 10.15 4.00 200 4.94 14.4 \n",
+ "2 numberOfMajorBugsFoundUntil 1.14 0.00 38 5.61 64.7 \n",
+ "3 numberOfCriticalBugsFoundUntil 0.43 0.00 15 5.34 78.3 \n",
+ "4 numberOfHighPriorityBugsFoundUntil 0.46 0.00 10 4.32 72.6 \n",
+ "\n",
+ " IQR_outliers \n",
+ "0 111 \n",
+ "1 114 \n",
+ "2 140 \n",
+ "3 216 \n",
+ "4 40 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "============================================================\n",
+ "EDA for dataset: equinox\n",
+ "============================================================\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved EDA figure: eda_equinox.png\n",
+ "\n",
+ "--- Summary table for equinox ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " feature | \n",
+ " mean | \n",
+ " median | \n",
+ " max | \n",
+ " skewness | \n",
+ " %zeros | \n",
+ " IQR_outliers | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " numberOfBugsFoundUntil | \n",
+ " 4.59 | \n",
+ " 2.00 | \n",
+ " 78 | \n",
+ " 4.95 | \n",
+ " 3.4 | \n",
+ " 37 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " numberOfNonTrivialBugsFoundUntil | \n",
+ " 4.30 | \n",
+ " 2.00 | \n",
+ " 71 | \n",
+ " 4.94 | \n",
+ " 4.3 | \n",
+ " 31 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " numberOfMajorBugsFoundUntil | \n",
+ " 0.48 | \n",
+ " 0.00 | \n",
+ " 11 | \n",
+ " 4.90 | \n",
+ " 76.5 | \n",
+ " 76 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " numberOfCriticalBugsFoundUntil | \n",
+ " 0.22 | \n",
+ " 0.00 | \n",
+ " 4 | \n",
+ " 3.54 | \n",
+ " 85.5 | \n",
+ " 47 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " numberOfHighPriorityBugsFoundUntil | \n",
+ " 0.04 | \n",
+ " 0.00 | \n",
+ " 4 | \n",
+ " 9.64 | \n",
+ " 96.9 | \n",
+ " 10 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " feature mean median max skewness %zeros \\\n",
+ "0 numberOfBugsFoundUntil 4.59 2.00 78 4.95 3.4 \n",
+ "1 numberOfNonTrivialBugsFoundUntil 4.30 2.00 71 4.94 4.3 \n",
+ "2 numberOfMajorBugsFoundUntil 0.48 0.00 11 4.90 76.5 \n",
+ "3 numberOfCriticalBugsFoundUntil 0.22 0.00 4 3.54 85.5 \n",
+ "4 numberOfHighPriorityBugsFoundUntil 0.04 0.00 4 9.64 96.9 \n",
+ "\n",
+ " IQR_outliers \n",
+ "0 37 \n",
+ "1 31 \n",
+ "2 76 \n",
+ "3 47 \n",
+ "4 10 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "============================================================\n",
+ "EDA for dataset: lucene\n",
+ "============================================================\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved EDA figure: eda_lucene.png\n",
+ "\n",
+ "--- Summary table for lucene ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " feature | \n",
+ " mean | \n",
+ " median | \n",
+ " max | \n",
+ " skewness | \n",
+ " %zeros | \n",
+ " IQR_outliers | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " numberOfBugsFoundUntil | \n",
+ " 2.48 | \n",
+ " 1.00 | \n",
+ " 81 | \n",
+ " 8.17 | \n",
+ " 24.6 | \n",
+ " 48 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " feature mean median max skewness %zeros IQR_outliers\n",
+ "0 numberOfBugsFoundUntil 2.48 1.00 81 8.17 24.6 48"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "============================================================\n",
+ "EDA for dataset: mylyn\n",
+ "============================================================\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved EDA figure: eda_mylyn.png\n",
+ "\n",
+ "--- Summary table for mylyn ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " feature | \n",
+ " mean | \n",
+ " median | \n",
+ " max | \n",
+ " skewness | \n",
+ " %zeros | \n",
+ " IQR_outliers | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " numberOfBugsFoundUntil | \n",
+ " 7.83 | \n",
+ " 4.00 | \n",
+ " 197 | \n",
+ " 5.90 | \n",
+ " 1.1 | \n",
+ " 153 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " numberOfNonTrivialBugsFoundUntil | \n",
+ " 3.66 | \n",
+ " 2.00 | \n",
+ " 92 | \n",
+ " 6.19 | \n",
+ " 16.5 | \n",
+ " 169 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " numberOfMajorBugsFoundUntil | \n",
+ " 0.32 | \n",
+ " 0.00 | \n",
+ " 13 | \n",
+ " 5.98 | \n",
+ " 79.0 | \n",
+ " 391 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " numberOfCriticalBugsFoundUntil | \n",
+ " 0.13 | \n",
+ " 0.00 | \n",
+ " 5 | \n",
+ " 5.24 | \n",
+ " 90.3 | \n",
+ " 180 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " numberOfHighPriorityBugsFoundUntil | \n",
+ " 4.30 | \n",
+ " 2.00 | \n",
+ " 85 | \n",
+ " 4.29 | \n",
+ " 27.0 | \n",
+ " 141 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " feature mean median max skewness %zeros \\\n",
+ "0 numberOfBugsFoundUntil 7.83 4.00 197 5.90 1.1 \n",
+ "1 numberOfNonTrivialBugsFoundUntil 3.66 2.00 92 6.19 16.5 \n",
+ "2 numberOfMajorBugsFoundUntil 0.32 0.00 13 5.98 79.0 \n",
+ "3 numberOfCriticalBugsFoundUntil 0.13 0.00 5 5.24 90.3 \n",
+ "4 numberOfHighPriorityBugsFoundUntil 4.30 2.00 85 4.29 27.0 \n",
+ "\n",
+ " IQR_outliers \n",
+ "0 153 \n",
+ "1 169 \n",
+ "2 391 \n",
+ "3 180 \n",
+ "4 141 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "============================================================\n",
+ "EDA for dataset: pde\n",
+ "============================================================\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved EDA figure: eda_pde.png\n",
+ "\n",
+ "--- Summary table for pde ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " feature | \n",
+ " mean | \n",
+ " median | \n",
+ " max | \n",
+ " skewness | \n",
+ " %zeros | \n",
+ " IQR_outliers | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " numberOfBugsFoundUntil | \n",
+ " 3.88 | \n",
+ " 2.00 | \n",
+ " 232 | \n",
+ " 17.20 | \n",
+ " 17.0 | \n",
+ " 149 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " numberOfNonTrivialBugsFoundUntil | \n",
+ " 2.80 | \n",
+ " 1.00 | \n",
+ " 143 | \n",
+ " 12.46 | \n",
+ " 31.1 | \n",
+ " 133 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " numberOfMajorBugsFoundUntil | \n",
+ " 0.24 | \n",
+ " 0.00 | \n",
+ " 8 | \n",
+ " 4.94 | \n",
+ " 85.2 | \n",
+ " 221 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " numberOfCriticalBugsFoundUntil | \n",
+ " 0.07 | \n",
+ " 0.00 | \n",
+ " 6 | \n",
+ " 8.38 | \n",
+ " 95.5 | \n",
+ " 68 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " numberOfHighPriorityBugsFoundUntil | \n",
+ " 0.06 | \n",
+ " 0.00 | \n",
+ " 6 | \n",
+ " 8.63 | \n",
+ " 95.7 | \n",
+ " 65 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " feature mean median max skewness %zeros \\\n",
+ "0 numberOfBugsFoundUntil 3.88 2.00 232 17.20 17.0 \n",
+ "1 numberOfNonTrivialBugsFoundUntil 2.80 1.00 143 12.46 31.1 \n",
+ "2 numberOfMajorBugsFoundUntil 0.24 0.00 8 4.94 85.2 \n",
+ "3 numberOfCriticalBugsFoundUntil 0.07 0.00 6 8.38 95.5 \n",
+ "4 numberOfHighPriorityBugsFoundUntil 0.06 0.00 6 8.63 95.7 \n",
+ "\n",
+ " IQR_outliers \n",
+ "0 149 \n",
+ "1 133 \n",
+ "2 221 \n",
+ "3 68 \n",
+ "4 65 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "def eda_dataset(name, data_dict, save_prefix='eda'):\n",
+ " X = data_dict['X']\n",
+ " y = data_dict['y']\n",
+ " features = data_dict['feature_names']\n",
+ "\n",
+ " fig = plt.figure(figsize=(18, 14))\n",
+ " gs = fig.add_gridspec(3, 2, height_ratios=[1, 1.2, 1.2])\n",
+ "\n",
+ " # --- Class balance ---\n",
+ " ax1 = fig.add_subplot(gs[0, 0])\n",
+ " counts = y.value_counts().sort_index()\n",
+ " bars = ax1.bar(['clean (0)', 'buggy (1)'], counts.values, color=['steelblue', 'coral'])\n",
+ " ax1.set_title('Class Balance')\n",
+ " ax1.set_ylabel('Count')\n",
+ " for bar, val in zip(bars, counts.values):\n",
+ " ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02*counts.max(),\n",
+ " str(val), ha='center', va='bottom', fontsize=11)\n",
+ "\n",
+ " # --- Feature boxplots (log1p scale) ---\n",
+ " ax2 = fig.add_subplot(gs[0, 1])\n",
+ " X_log1p = np.log1p(X)\n",
+ " vals = [X_log1p[c].dropna().values for c in features]\n",
+ " bp = ax2.boxplot(vals, labels=features, vert=False, patch_artist=True)\n",
+ " for patch in bp['boxes']:\n",
+ " patch.set_facecolor('lightsteelblue')\n",
+ " ax2.set_title('Feature Distributions (log1p scale)')\n",
+ " ax2.set_xlabel('log1p(value)')\n",
+ "\n",
+ " # --- Correlation heatmap ---\n",
+ " ax3 = fig.add_subplot(gs[1, :])\n",
+ " corr = X.corr(method='pearson')\n",
+ " sns.heatmap(corr, annot=True, fmt='.2f', cmap='coolwarm', center=0, ax=ax3,\n",
+ " vmin=-1, vmax=1, square=True)\n",
+ " ax3.set_title('Pearson Correlation Heatmap (features only)')\n",
+ "\n",
+ " # --- Summary table ---\n",
+ " ax4 = fig.add_subplot(gs[2, :])\n",
+ " ax4.axis('off')\n",
+ " rows = []\n",
+ " for col in features:\n",
+ " s = X[col]\n",
+ " pct_zeros = (s == 0).mean() * 100\n",
+ " q1, q3 = s.quantile(0.25), s.quantile(0.75)\n",
+ " iqr = q3 - q1\n",
+ " low, high = q1 - 1.5*iqr, q3 + 1.5*iqr\n",
+ " outlier_count = ((s < low) | (s > high)).sum()\n",
+ " rows.append({\n",
+ " 'feature': col,\n",
+ " 'mean': f\"{s.mean():.2f}\",\n",
+ " 'median': f\"{s.median():.2f}\",\n",
+ " 'max': f\"{s.max():.0f}\",\n",
+ " 'skewness': f\"{s.skew():.2f}\",\n",
+ " '%zeros': f\"{pct_zeros:.1f}\",\n",
+ " 'IQR_outliers': outlier_count,\n",
+ " })\n",
+ " summary_df = pd.DataFrame(rows)\n",
+ " table = ax4.table(cellText=summary_df.values, colLabels=summary_df.columns,\n",
+ " loc='center', cellLoc='center')\n",
+ " table.auto_set_font_size(False)\n",
+ " table.set_fontsize(9)\n",
+ " table.scale(1.2, 1.5)\n",
+ " ax4.set_title('Feature Summary Table', y=0.95, pad=10)\n",
+ "\n",
+ " fig.suptitle(f'EDA: {name}', fontsize=16, fontweight='bold')\n",
+ " plt.tight_layout(rect=[0, 0, 1, 0.96])\n",
+ " fname = f\"{save_prefix}_{name}.png\"\n",
+ " plt.savefig(fname)\n",
+ " plt.close(fig)\n",
+ " print(f\"Saved EDA figure: {fname}\")\n",
+ " print(f\"\\n--- Summary table for {name} ---\")\n",
+ " display(summary_df)\n",
+ " return summary_df\n",
+ "\n",
+ "\n",
+ "for name in DATASETS:\n",
+ " print(f\"\\n{'='*60}\")\n",
+ " print(f\"EDA for dataset: {name}\")\n",
+ " print(f\"{'='*60}\")\n",
+ " eda_dataset(name, DATASETS[name])\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0a0befbb",
+ "metadata": {},
+ "source": [
+ "# Section 3 — Preprocessing Pipelines\n",
+ "\n",
+ "Three pipelines:\n",
+ "- **Pipeline A**: tree-compatible (no scaling)\n",
+ "- **Pipeline B**: linear/distance-compatible (log1p + RobustScaler + optional SMOTE)\n",
+ "- **Pipeline C**: interpretability-optimised (log1p only, no SMOTE)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "3388aba9",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-07T14:55:24.488412Z",
+ "iopub.status.busy": "2026-05-07T14:55:24.487683Z",
+ "iopub.status.idle": "2026-05-07T14:55:24.497158Z",
+ "shell.execute_reply": "2026-05-07T14:55:24.496173Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Pipeline A (tree): Pipeline(steps=[('identity', 'passthrough')])\n",
+ "\n",
+ "Preproc B (log1p+RobustScaler): Pipeline(steps=[('log1p', FunctionTransformer(func=)),\n",
+ " ('scaler', RobustScaler())])\n",
+ "\n",
+ "Pipeline C (interpretability): Pipeline(steps=[('log1p', FunctionTransformer(func=))])\n"
+ ]
+ }
+ ],
+ "source": [
+ "def make_pipeline_a():\n",
+ " return Pipeline([('identity', 'passthrough')])\n",
+ "\n",
+ "\n",
+ "def make_preproc_b():\n",
+ " \"\"\"Return the preprocessing part of Pipeline B (log1p + RobustScaler).\"\"\"\n",
+ " return Pipeline([\n",
+ " ('log1p', FunctionTransformer(np.log1p)),\n",
+ " ('scaler', RobustScaler()),\n",
+ " ])\n",
+ "\n",
+ "\n",
+ "def make_pipeline_c():\n",
+ " return Pipeline([\n",
+ " ('log1p', FunctionTransformer(np.log1p)),\n",
+ " ])\n",
+ "\n",
+ "\n",
+ "def imbalance_ratio(y):\n",
+ " counts = np.bincount(y)\n",
+ " if len(counts) < 2 or counts[1] == 0:\n",
+ " return float('inf')\n",
+ " return counts[0] / counts[1]\n",
+ "\n",
+ "\n",
+ "print(\"Pipeline A (tree):\", make_pipeline_a())\n",
+ "print(\"\\nPreproc B (log1p+RobustScaler):\", make_preproc_b())\n",
+ "print(\"\\nPipeline C (interpretability):\", make_pipeline_c())\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1e4d6d76",
+ "metadata": {},
+ "source": [
+ "# Section 4 — Model Definitions\n",
+ "\n",
+ "Define all 8 models with assigned pipelines. XGBoost `scale_pos_weight` is computed per dataset at training time."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "4b9211dd",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-07T14:55:24.499876Z",
+ "iopub.status.busy": "2026-05-07T14:55:24.499597Z",
+ "iopub.status.idle": "2026-05-07T14:55:24.508565Z",
+ "shell.execute_reply": "2026-05-07T14:55:24.507645Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "RandomForest -> pipeline A | RandomForestClassifier\n",
+ "XGBoost -> pipeline A | XGBClassifier\n",
+ "LightGBM -> pipeline A | LGBMClassifier\n",
+ "GradientBoosting -> pipeline A | GradientBoostingClassifier\n",
+ "ExtraTrees -> pipeline A | ExtraTreesClassifier\n",
+ "LogisticRegression -> pipeline B | LogisticRegression\n",
+ "SVM -> pipeline B | SVC\n",
+ "KNN -> pipeline B | KNeighborsClassifier\n"
+ ]
+ }
+ ],
+ "source": [
+ "def build_models(y_train=None):\n",
+ " spw = 1.0\n",
+ " if y_train is not None:\n",
+ " counts = np.bincount(y_train)\n",
+ " if len(counts) > 1 and counts[1] > 0:\n",
+ " spw = counts[0] / counts[1]\n",
+ "\n",
+ " models = {\n",
+ " 'RandomForest': {\n",
+ " 'model': RandomForestClassifier(\n",
+ " n_estimators=300, class_weight='balanced',\n",
+ " random_state=CONFIG['random_state'], n_jobs=-1\n",
+ " ),\n",
+ " 'pipeline': 'A',\n",
+ " },\n",
+ " 'XGBoost': {\n",
+ " 'model': XGBClassifier(\n",
+ " n_estimators=300,\n",
+ " eval_metric='logloss',\n",
+ " scale_pos_weight=spw,\n",
+ " random_state=CONFIG['random_state'],\n",
+ " n_jobs=-1,\n",
+ " ) if XGBClassifier is not None else None,\n",
+ " 'pipeline': 'A',\n",
+ " },\n",
+ " 'LightGBM': {\n",
+ " 'model': LGBMClassifier(\n",
+ " n_estimators=300, class_weight='balanced',\n",
+ " random_state=CONFIG['random_state'],\n",
+ " n_jobs=-1, verbose=-1,\n",
+ " ) if LGBMClassifier is not None else None,\n",
+ " 'pipeline': 'A',\n",
+ " },\n",
+ " 'GradientBoosting': {\n",
+ " 'model': GradientBoostingClassifier(\n",
+ " n_estimators=200, random_state=CONFIG['random_state']\n",
+ " ),\n",
+ " 'pipeline': 'A',\n",
+ " },\n",
+ " 'ExtraTrees': {\n",
+ " 'model': ExtraTreesClassifier(\n",
+ " n_estimators=300, class_weight='balanced',\n",
+ " random_state=CONFIG['random_state'], n_jobs=-1\n",
+ " ),\n",
+ " 'pipeline': 'A',\n",
+ " },\n",
+ " 'LogisticRegression': {\n",
+ " 'model': LogisticRegression(\n",
+ " C=1.0, class_weight='balanced', max_iter=1000,\n",
+ " random_state=CONFIG['random_state']\n",
+ " ),\n",
+ " 'pipeline': 'B',\n",
+ " },\n",
+ " 'SVM': {\n",
+ " 'model': SVC(\n",
+ " kernel='rbf', C=1.0, probability=True,\n",
+ " class_weight='balanced', random_state=CONFIG['random_state']\n",
+ " ),\n",
+ " 'pipeline': 'B',\n",
+ " },\n",
+ " 'KNN': {\n",
+ " 'model': KNeighborsClassifier(\n",
+ " n_neighbors=7, metric='euclidean', n_jobs=-1\n",
+ " ),\n",
+ " 'pipeline': 'B',\n",
+ " },\n",
+ " }\n",
+ " models = {k: v for k, v in models.items() if v['model'] is not None}\n",
+ " return models\n",
+ "\n",
+ "\n",
+ "MODELS_CHECK = build_models(np.array([0,0,0,1,1]))\n",
+ "for k, v in MODELS_CHECK.items():\n",
+ " print(f\"{k:20s} -> pipeline {v['pipeline']} | {type(v['model']).__name__}\")\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4e7f4546",
+ "metadata": {},
+ "source": [
+ "# Section 5 — Training and Evaluation (Phase 1 datasets)\n",
+ "\n",
+ "Stratified split, 5-fold CV on train set, final test evaluation, model ranking, and comparative visualisation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "a26d2aca",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-07T14:55:24.510686Z",
+ "iopub.status.busy": "2026-05-07T14:55:24.510460Z",
+ "iopub.status.idle": "2026-05-07T14:56:03.673975Z",
+ "shell.execute_reply": "2026-05-07T14:56:03.672496Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "======================================================================\n",
+ "PHASE 1 — Dataset: eclipse\n",
+ "======================================================================\n",
+ "Train: (797, 5) | Test: (200, 5)\n",
+ "Train dist: [632 165] | Test dist: [159 41]\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "36e19e3b212f4e2a8ff4962b1d019b69",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "CV eclipse: 0%| | 0/8 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- CV Results (eclipse) ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " model | \n",
+ " f1_macro | \n",
+ " roc_auc | \n",
+ " pr_auc | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 6 | \n",
+ " SVM | \n",
+ " 0.7166 ± 0.0295 | \n",
+ " 0.7786 ± 0.0386 | \n",
+ " 0.5303 ± 0.0404 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " KNN | \n",
+ " 0.6893 ± 0.0273 | \n",
+ " 0.7593 ± 0.0254 | \n",
+ " 0.4703 ± 0.0366 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " LogisticRegression | \n",
+ " 0.6842 ± 0.0219 | \n",
+ " 0.8133 ± 0.0261 | \n",
+ " 0.6371 ± 0.0397 | \n",
+ "
\n",
+ " \n",
+ " | 0 | \n",
+ " RandomForest | \n",
+ " 0.6812 ± 0.0376 | \n",
+ " 0.7411 ± 0.0170 | \n",
+ " 0.5368 ± 0.0466 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " LightGBM | \n",
+ " 0.6771 ± 0.0240 | \n",
+ " 0.7133 ± 0.0316 | \n",
+ " 0.5389 ± 0.0427 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " GradientBoosting | \n",
+ " 0.6679 ± 0.0434 | \n",
+ " 0.7073 ± 0.0392 | \n",
+ " 0.5357 ± 0.0373 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " XGBoost | \n",
+ " 0.6621 ± 0.0299 | \n",
+ " 0.6868 ± 0.0306 | \n",
+ " 0.5158 ± 0.0455 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " ExtraTrees | \n",
+ " 0.6601 ± 0.0170 | \n",
+ " 0.7150 ± 0.0197 | \n",
+ " 0.4693 ± 0.0459 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " model f1_macro roc_auc pr_auc\n",
+ "6 SVM 0.7166 ± 0.0295 0.7786 ± 0.0386 0.5303 ± 0.0404\n",
+ "7 KNN 0.6893 ± 0.0273 0.7593 ± 0.0254 0.4703 ± 0.0366\n",
+ "5 LogisticRegression 0.6842 ± 0.0219 0.8133 ± 0.0261 0.6371 ± 0.0397\n",
+ "0 RandomForest 0.6812 ± 0.0376 0.7411 ± 0.0170 0.5368 ± 0.0466\n",
+ "2 LightGBM 0.6771 ± 0.0240 0.7133 ± 0.0316 0.5389 ± 0.0427\n",
+ "3 GradientBoosting 0.6679 ± 0.0434 0.7073 ± 0.0392 0.5357 ± 0.0373\n",
+ "1 XGBoost 0.6621 ± 0.0299 0.6868 ± 0.0306 0.5158 ± 0.0455\n",
+ "4 ExtraTrees 0.6601 ± 0.0170 0.7150 ± 0.0197 0.4693 ± 0.0459"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "247614267d7b45eb8ca13b12ee3e4125",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Test eclipse: 0%| | 0/8 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- RandomForest on eclipse test set ---\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " clean 0.8363 0.8994 0.8667 159\n",
+ " buggy 0.4483 0.3171 0.3714 41\n",
+ "\n",
+ " accuracy 0.7800 200\n",
+ " macro avg 0.6423 0.6082 0.6190 200\n",
+ "weighted avg 0.7567 0.7800 0.7651 200\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- XGBoost on eclipse test set ---\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " clean 0.8477 0.8050 0.8258 159\n",
+ " buggy 0.3673 0.4390 0.4000 41\n",
+ "\n",
+ " accuracy 0.7300 200\n",
+ " macro avg 0.6075 0.6220 0.6129 200\n",
+ "weighted avg 0.7492 0.7300 0.7385 200\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- LightGBM on eclipse test set ---\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " clean 0.8533 0.8050 0.8285 159\n",
+ " buggy 0.3800 0.4634 0.4176 41\n",
+ "\n",
+ " accuracy 0.7350 200\n",
+ " macro avg 0.6167 0.6342 0.6230 200\n",
+ "weighted avg 0.7563 0.7350 0.7442 200\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- GradientBoosting on eclipse test set ---\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " clean 0.8910 0.8742 0.8825 159\n",
+ " buggy 0.5455 0.5854 0.5647 41\n",
+ "\n",
+ " accuracy 0.8150 200\n",
+ " macro avg 0.7182 0.7298 0.7236 200\n",
+ "weighted avg 0.8202 0.8150 0.8174 200\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- ExtraTrees on eclipse test set ---\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " clean 0.8457 0.8616 0.8536 159\n",
+ " buggy 0.4211 0.3902 0.4051 41\n",
+ "\n",
+ " accuracy 0.7650 200\n",
+ " macro avg 0.6334 0.6259 0.6293 200\n",
+ "weighted avg 0.7586 0.7650 0.7616 200\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- LogisticRegression on eclipse test set ---\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " clean 0.9124 0.7862 0.8446 159\n",
+ " buggy 0.4603 0.7073 0.5577 41\n",
+ "\n",
+ " accuracy 0.7700 200\n",
+ " macro avg 0.6864 0.7467 0.7011 200\n",
+ "weighted avg 0.8197 0.7700 0.7858 200\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- SVM on eclipse test set ---\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " clean 0.8846 0.8679 0.8762 159\n",
+ " buggy 0.5227 0.5610 0.5412 41\n",
+ "\n",
+ " accuracy 0.8050 200\n",
+ " macro avg 0.7037 0.7145 0.7087 200\n",
+ "weighted avg 0.8104 0.8050 0.8075 200\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- KNN on eclipse test set ---\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " clean 0.8926 0.8365 0.8636 159\n",
+ " buggy 0.4902 0.6098 0.5435 41\n",
+ "\n",
+ " accuracy 0.7900 200\n",
+ " macro avg 0.6914 0.7231 0.7036 200\n",
+ "weighted avg 0.8101 0.7900 0.7980 200\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Test Set Ranking (eclipse) ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " model | \n",
+ " f1_macro | \n",
+ " roc_auc | \n",
+ " pr_auc | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 3 | \n",
+ " GradientBoosting | \n",
+ " 0.723623 | \n",
+ " 0.746434 | \n",
+ " 0.522457 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " SVM | \n",
+ " 0.708683 | \n",
+ " 0.771898 | \n",
+ " 0.595115 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " KNN | \n",
+ " 0.703557 | \n",
+ " 0.749885 | \n",
+ " 0.572197 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " LogisticRegression | \n",
+ " 0.701143 | \n",
+ " 0.784323 | \n",
+ " 0.627092 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " ExtraTrees | \n",
+ " 0.629323 | \n",
+ " 0.719896 | \n",
+ " 0.388231 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " LightGBM | \n",
+ " 0.623031 | \n",
+ " 0.730940 | \n",
+ " 0.465996 | \n",
+ "
\n",
+ " \n",
+ " | 0 | \n",
+ " RandomForest | \n",
+ " 0.619048 | \n",
+ " 0.724268 | \n",
+ " 0.471125 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " XGBoost | \n",
+ " 0.612903 | \n",
+ " 0.683080 | \n",
+ " 0.452841 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " model f1_macro roc_auc pr_auc\n",
+ "3 GradientBoosting 0.723623 0.746434 0.522457\n",
+ "6 SVM 0.708683 0.771898 0.595115\n",
+ "7 KNN 0.703557 0.749885 0.572197\n",
+ "5 LogisticRegression 0.701143 0.784323 0.627092\n",
+ "4 ExtraTrees 0.629323 0.719896 0.388231\n",
+ "2 LightGBM 0.623031 0.730940 0.465996\n",
+ "0 RandomForest 0.619048 0.724268 0.471125\n",
+ "1 XGBoost 0.612903 0.683080 0.452841"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Top-1 (SHAP): GradientBoosting\n",
+ "Top-2 (LIME): ['GradientBoosting', 'SVM']\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved model_comparison_eclipse.png\n",
+ "\n",
+ "======================================================================\n",
+ "PHASE 1 — Dataset: mylyn\n",
+ "======================================================================\n",
+ "Train: (1489, 5) | Test: (373, 5)\n",
+ "Train dist: [1293 196] | Test dist: [324 49]\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "e6a9373f078d4b079ff1e6f0a095b629",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "CV mylyn: 0%| | 0/8 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- CV Results (mylyn) ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " model | \n",
+ " f1_macro | \n",
+ " roc_auc | \n",
+ " pr_auc | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 7 | \n",
+ " KNN | \n",
+ " 0.6001 ± 0.0160 | \n",
+ " 0.6599 ± 0.0175 | \n",
+ " 0.2451 ± 0.0282 | \n",
+ "
\n",
+ " \n",
+ " | 0 | \n",
+ " RandomForest | \n",
+ " 0.5965 ± 0.0296 | \n",
+ " 0.6723 ± 0.0357 | \n",
+ " 0.3147 ± 0.0618 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " LightGBM | \n",
+ " 0.5952 ± 0.0428 | \n",
+ " 0.6563 ± 0.0492 | \n",
+ " 0.2923 ± 0.0665 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " SVM | \n",
+ " 0.5946 ± 0.0337 | \n",
+ " 0.7372 ± 0.0291 | \n",
+ " 0.3210 ± 0.0621 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " LogisticRegression | \n",
+ " 0.5851 ± 0.0337 | \n",
+ " 0.7007 ± 0.0424 | \n",
+ " 0.2914 ± 0.0353 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " ExtraTrees | \n",
+ " 0.5850 ± 0.0284 | \n",
+ " 0.6538 ± 0.0359 | \n",
+ " 0.2644 ± 0.0514 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " XGBoost | \n",
+ " 0.5824 ± 0.0427 | \n",
+ " 0.6362 ± 0.0641 | \n",
+ " 0.2881 ± 0.0653 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " GradientBoosting | \n",
+ " 0.5741 ± 0.0305 | \n",
+ " 0.6809 ± 0.0439 | \n",
+ " 0.3075 ± 0.0475 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " model f1_macro roc_auc pr_auc\n",
+ "7 KNN 0.6001 ± 0.0160 0.6599 ± 0.0175 0.2451 ± 0.0282\n",
+ "0 RandomForest 0.5965 ± 0.0296 0.6723 ± 0.0357 0.3147 ± 0.0618\n",
+ "2 LightGBM 0.5952 ± 0.0428 0.6563 ± 0.0492 0.2923 ± 0.0665\n",
+ "6 SVM 0.5946 ± 0.0337 0.7372 ± 0.0291 0.3210 ± 0.0621\n",
+ "5 LogisticRegression 0.5851 ± 0.0337 0.7007 ± 0.0424 0.2914 ± 0.0353\n",
+ "4 ExtraTrees 0.5850 ± 0.0284 0.6538 ± 0.0359 0.2644 ± 0.0514\n",
+ "1 XGBoost 0.5824 ± 0.0427 0.6362 ± 0.0641 0.2881 ± 0.0653\n",
+ "3 GradientBoosting 0.5741 ± 0.0305 0.6809 ± 0.0439 0.3075 ± 0.0475"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "4f67f20befda43119e11318be4df03c5",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Test mylyn: 0%| | 0/8 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- RandomForest on mylyn test set ---\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " clean 0.8860 0.8395 0.8621 324\n",
+ " buggy 0.2121 0.2857 0.2435 49\n",
+ "\n",
+ " accuracy 0.7668 373\n",
+ " macro avg 0.5491 0.5626 0.5528 373\n",
+ "weighted avg 0.7975 0.7668 0.7809 373\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- XGBoost on mylyn test set ---\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " clean 0.8920 0.7901 0.8380 324\n",
+ " buggy 0.2093 0.3673 0.2667 49\n",
+ "\n",
+ " accuracy 0.7346 373\n",
+ " macro avg 0.5506 0.5787 0.5523 373\n",
+ "weighted avg 0.8023 0.7346 0.7629 373\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- LightGBM on mylyn test set ---\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " clean 0.8936 0.7778 0.8317 324\n",
+ " buggy 0.2088 0.3878 0.2714 49\n",
+ "\n",
+ " accuracy 0.7265 373\n",
+ " macro avg 0.5512 0.5828 0.5516 373\n",
+ "weighted avg 0.8037 0.7265 0.7581 373\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- GradientBoosting on mylyn test set ---\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " clean 0.9140 0.7870 0.8458 324\n",
+ " buggy 0.2660 0.5102 0.3497 49\n",
+ "\n",
+ " accuracy 0.7507 373\n",
+ " macro avg 0.5900 0.6486 0.5977 373\n",
+ "weighted avg 0.8288 0.7507 0.7806 373\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- ExtraTrees on mylyn test set ---\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " clean 0.8849 0.8302 0.8567 324\n",
+ " buggy 0.2029 0.2857 0.2373 49\n",
+ "\n",
+ " accuracy 0.7587 373\n",
+ " macro avg 0.5439 0.5580 0.5470 373\n",
+ "weighted avg 0.7953 0.7587 0.7753 373\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- LogisticRegression on mylyn test set ---\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " clean 0.9000 0.7778 0.8344 324\n",
+ " buggy 0.2258 0.4286 0.2958 49\n",
+ "\n",
+ " accuracy 0.7319 373\n",
+ " macro avg 0.5629 0.6032 0.5651 373\n",
+ "weighted avg 0.8114 0.7319 0.7637 373\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- SVM on mylyn test set ---\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " clean 0.9193 0.8086 0.8604 324\n",
+ " buggy 0.2955 0.5306 0.3796 49\n",
+ "\n",
+ " accuracy 0.7721 373\n",
+ " macro avg 0.6074 0.6696 0.6200 373\n",
+ "weighted avg 0.8373 0.7721 0.7973 373\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- KNN on mylyn test set ---\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " clean 0.9085 0.8272 0.8659 324\n",
+ " buggy 0.2821 0.4490 0.3465 49\n",
+ "\n",
+ " accuracy 0.7775 373\n",
+ " macro avg 0.5953 0.6381 0.6062 373\n",
+ "weighted avg 0.8262 0.7775 0.7977 373\n",
+ "\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- Test Set Ranking (mylyn) ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " model | \n",
+ " f1_macro | \n",
+ " roc_auc | \n",
+ " pr_auc | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 6 | \n",
+ " SVM | \n",
+ " 0.619994 | \n",
+ " 0.759826 | \n",
+ " 0.331761 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " KNN | \n",
+ " 0.606185 | \n",
+ " 0.641377 | \n",
+ " 0.224083 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " GradientBoosting | \n",
+ " 0.597711 | \n",
+ " 0.660872 | \n",
+ " 0.255290 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " LogisticRegression | \n",
+ " 0.565106 | \n",
+ " 0.720773 | \n",
+ " 0.306504 | \n",
+ "
\n",
+ " \n",
+ " | 0 | \n",
+ " RandomForest | \n",
+ " 0.552801 | \n",
+ " 0.605978 | \n",
+ " 0.184380 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " XGBoost | \n",
+ " 0.552319 | \n",
+ " 0.574515 | \n",
+ " 0.220625 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " LightGBM | \n",
+ " 0.551556 | \n",
+ " 0.610922 | \n",
+ " 0.191383 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " ExtraTrees | \n",
+ " 0.546988 | \n",
+ " 0.574735 | \n",
+ " 0.154150 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " model f1_macro roc_auc pr_auc\n",
+ "6 SVM 0.619994 0.759826 0.331761\n",
+ "7 KNN 0.606185 0.641377 0.224083\n",
+ "3 GradientBoosting 0.597711 0.660872 0.255290\n",
+ "5 LogisticRegression 0.565106 0.720773 0.306504\n",
+ "0 RandomForest 0.552801 0.605978 0.184380\n",
+ "1 XGBoost 0.552319 0.574515 0.220625\n",
+ "2 LightGBM 0.551556 0.610922 0.191383\n",
+ "4 ExtraTrees 0.546988 0.574735 0.154150"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Top-1 (SHAP): SVM\n",
+ "Top-2 (LIME): ['SVM', 'KNN']\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved model_comparison_mylyn.png\n",
+ "\n",
+ "Phase 1 training complete.\n"
+ ]
+ }
+ ],
+ "source": [
+ "def apply_pipeline_b(X_train, y_train, X_test, apply_smote=False):\n",
+ " pre = make_preproc_b()\n",
+ " X_train_t = pre.fit_transform(X_train)\n",
+ " X_test_t = pre.transform(X_test)\n",
+ " if apply_smote:\n",
+ " sm = SMOTE(random_state=CONFIG['random_state'])\n",
+ " X_train_t, y_train_t = sm.fit_resample(X_train_t, y_train)\n",
+ " else:\n",
+ " y_train_t = y_train\n",
+ " return X_train_t, y_train_t, X_test_t, pre\n",
+ "\n",
+ "\n",
+ "def evaluate_model(model, X_train, y_train, X_test, y_test, pipeline_key):\n",
+ " ir = imbalance_ratio(y_train)\n",
+ " apply_smote = (pipeline_key == 'B') and (ir > CONFIG['smote_threshold'])\n",
+ "\n",
+ " if pipeline_key == 'A':\n",
+ " pipe = make_pipeline_a()\n",
+ " X_train_t = pipe.fit_transform(X_train)\n",
+ " X_test_t = pipe.transform(X_test)\n",
+ " if isinstance(model, GradientBoostingClassifier):\n",
+ " sw = compute_sample_weight('balanced', y_train)\n",
+ " model.fit(X_train_t, y_train, sample_weight=sw)\n",
+ " else:\n",
+ " model.fit(X_train_t, y_train)\n",
+ " elif pipeline_key == 'B':\n",
+ " X_train_t, y_train_t, X_test_t, _ = apply_pipeline_b(\n",
+ " X_train, y_train, X_test, apply_smote=apply_smote\n",
+ " )\n",
+ " model.fit(X_train_t, y_train_t)\n",
+ " else:\n",
+ " raise ValueError(f\"Unknown pipeline key {pipeline_key}\")\n",
+ "\n",
+ " y_pred = model.predict(X_test_t)\n",
+ " y_prob = model.predict_proba(X_test_t)[:, 1]\n",
+ " return {\n",
+ " 'f1_macro': f1_score(y_test, y_pred, average='macro'),\n",
+ " 'roc_auc': roc_auc_score(y_test, y_prob),\n",
+ " 'pr_auc': average_precision_score(y_test, y_prob),\n",
+ " 'y_pred': y_pred,\n",
+ " 'y_prob': y_prob,\n",
+ " }\n",
+ "\n",
+ "\n",
+ "def cross_val_model(model, X, y, pipeline_key):\n",
+ " skf = StratifiedKFold(n_splits=CONFIG['cv_folds'], shuffle=True,\n",
+ " random_state=CONFIG['random_state'])\n",
+ " fold_f1, fold_roc, fold_pr = [], [], []\n",
+ "\n",
+ " for tr_idx, val_idx in skf.split(X, y):\n",
+ " X_tr, X_val = X.iloc[tr_idx], X.iloc[val_idx]\n",
+ " y_tr, y_val = y.iloc[tr_idx], y.iloc[val_idx]\n",
+ " ir = imbalance_ratio(y_tr)\n",
+ " apply_smote = (pipeline_key == 'B') and (ir > CONFIG['smote_threshold'])\n",
+ "\n",
+ " if pipeline_key == 'A':\n",
+ " pipe = make_pipeline_a()\n",
+ " X_tr_t = pipe.fit_transform(X_tr)\n",
+ " X_val_t = pipe.transform(X_val)\n",
+ " m = type(model)(**model.get_params())\n",
+ " if isinstance(m, GradientBoostingClassifier):\n",
+ " sw = compute_sample_weight('balanced', y_tr)\n",
+ " m.fit(X_tr_t, y_tr, sample_weight=sw)\n",
+ " else:\n",
+ " m.fit(X_tr_t, y_tr)\n",
+ " elif pipeline_key == 'B':\n",
+ " X_tr_t, y_tr_t, X_val_t, _ = apply_pipeline_b(\n",
+ " X_tr, y_tr, X_val, apply_smote=apply_smote\n",
+ " )\n",
+ " m = type(model)(**model.get_params())\n",
+ " m.fit(X_tr_t, y_tr_t)\n",
+ " else:\n",
+ " raise ValueError(f\"Unknown pipeline key {pipeline_key}\")\n",
+ "\n",
+ " y_pred = m.predict(X_val_t)\n",
+ " y_prob = m.predict_proba(X_val_t)[:, 1]\n",
+ " fold_f1.append(f1_score(y_val, y_pred, average='macro'))\n",
+ " fold_roc.append(roc_auc_score(y_val, y_prob))\n",
+ " fold_pr.append(average_precision_score(y_val, y_prob))\n",
+ "\n",
+ " return {\n",
+ " 'f1_macro_mean': np.mean(fold_f1),\n",
+ " 'f1_macro_std': np.std(fold_f1),\n",
+ " 'roc_auc_mean': np.mean(fold_roc),\n",
+ " 'roc_auc_std': np.std(fold_roc),\n",
+ " 'pr_auc_mean': np.mean(fold_pr),\n",
+ " 'pr_auc_std': np.std(fold_pr),\n",
+ " }\n",
+ "\n",
+ "\n",
+ "# --- Run Phase 1 ---\n",
+ "for ds_name in CONFIG['phase1']:\n",
+ " print(f\"\\n{'='*70}\")\n",
+ " print(f\"PHASE 1 — Dataset: {ds_name}\")\n",
+ " print(f\"{'='*70}\")\n",
+ "\n",
+ " data = DATASETS[ds_name]\n",
+ " X, y = data['X'], data['y']\n",
+ " X_train, X_test, y_train, y_test = train_test_split(\n",
+ " X, y, test_size=CONFIG['test_size'], stratify=y,\n",
+ " random_state=CONFIG['random_state']\n",
+ " )\n",
+ " print(f\"Train: {X_train.shape} | Test: {X_test.shape}\")\n",
+ " print(f\"Train dist: {np.bincount(y_train)} | Test dist: {np.bincount(y_test)}\")\n",
+ "\n",
+ " DATASETS[ds_name]['X_train'] = X_train\n",
+ " DATASETS[ds_name]['X_test'] = X_test\n",
+ " DATASETS[ds_name]['y_train'] = y_train\n",
+ " DATASETS[ds_name]['y_test'] = y_test\n",
+ "\n",
+ " # 5.2 Cross-validation\n",
+ " models = build_models(y_train.values)\n",
+ " cv_results = []\n",
+ " for mname, mdict in tqdm(models.items(), desc=f\"CV {ds_name}\"):\n",
+ " cv = cross_val_model(mdict['model'], X_train, y_train, mdict['pipeline'])\n",
+ " cv_results.append({\n",
+ " 'model': mname,\n",
+ " 'f1_macro': f\"{cv['f1_macro_mean']:.4f} ± {cv['f1_macro_std']:.4f}\",\n",
+ " 'roc_auc': f\"{cv['roc_auc_mean']:.4f} ± {cv['roc_auc_std']:.4f}\",\n",
+ " 'pr_auc': f\"{cv['pr_auc_mean']:.4f} ± {cv['pr_auc_std']:.4f}\",\n",
+ " 'f1_macro_raw': cv['f1_macro_mean'],\n",
+ " 'roc_auc_raw': cv['roc_auc_mean'],\n",
+ " })\n",
+ " cv_df = pd.DataFrame(cv_results).sort_values('f1_macro_raw', ascending=False)\n",
+ " print(f\"\\n--- CV Results ({ds_name}) ---\")\n",
+ " display(cv_df[['model', 'f1_macro', 'roc_auc', 'pr_auc']])\n",
+ "\n",
+ " # 5.3 Final evaluation on held-out test set\n",
+ " test_results = {}\n",
+ " fig, axes = plt.subplots(2, 4, figsize=(20, 10))\n",
+ " axes = axes.flatten()\n",
+ " ax_idx = 0\n",
+ "\n",
+ " for mname, mdict in tqdm(models.items(), desc=f\"Test {ds_name}\"):\n",
+ " res = evaluate_model(mdict['model'], X_train, y_train, X_test, y_test, mdict['pipeline'])\n",
+ " test_results[mname] = res\n",
+ " print(f\"\\n--- {mname} on {ds_name} test set ---\")\n",
+ " print(classification_report(y_test, res['y_pred'],\n",
+ " target_names=['clean', 'buggy'], digits=4))\n",
+ " cm = confusion_matrix(y_test, res['y_pred'])\n",
+ " if ax_idx < len(axes):\n",
+ " ax = axes[ax_idx]\n",
+ " sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax,\n",
+ " xticklabels=['clean', 'buggy'],\n",
+ " yticklabels=['clean', 'buggy'])\n",
+ " ax.set_title(f'{mname} — CM')\n",
+ " ax.set_ylabel('True')\n",
+ " ax.set_xlabel('Predicted')\n",
+ " ax_idx += 1\n",
+ " # ROC\n",
+ " fpr, tpr, _ = roc_curve(y_test, res['y_prob'])\n",
+ " plt.figure(figsize=(5, 4))\n",
+ " plt.plot(fpr, tpr, label=f'{mname} (AUC={res[\"roc_auc\"]:.3f})')\n",
+ " plt.plot([0, 1], [0, 1], 'k--')\n",
+ " plt.xlabel('FPR'); plt.ylabel('TPR')\n",
+ " plt.title(f'ROC: {mname} — {ds_name}')\n",
+ " plt.legend()\n",
+ " plt.savefig(f\"roc_{ds_name}_{mname}.png\")\n",
+ " plt.close()\n",
+ " # PR\n",
+ " prec, rec, _ = precision_recall_curve(y_test, res['y_prob'])\n",
+ " plt.figure(figsize=(5, 4))\n",
+ " plt.plot(rec, prec, label=f'{mname} (AP={res[\"pr_auc\"]:.3f})')\n",
+ " plt.xlabel('Recall'); plt.ylabel('Precision')\n",
+ " plt.title(f'PR: {mname} — {ds_name}')\n",
+ " plt.legend()\n",
+ " plt.savefig(f\"pr_{ds_name}_{mname}.png\")\n",
+ " plt.close()\n",
+ "\n",
+ " plt.tight_layout()\n",
+ " plt.savefig(f\"confusion_matrices_{ds_name}.png\")\n",
+ " plt.close()\n",
+ "\n",
+ " # 5.4 Model ranking\n",
+ " test_summary = []\n",
+ " for mname, res in test_results.items():\n",
+ " test_summary.append({\n",
+ " 'model': mname,\n",
+ " 'f1_macro': res['f1_macro'],\n",
+ " 'roc_auc': res['roc_auc'],\n",
+ " 'pr_auc': res['pr_auc'],\n",
+ " })\n",
+ " test_df = pd.DataFrame(test_summary).sort_values(\n",
+ " ['f1_macro', 'roc_auc'], ascending=[False, False]\n",
+ " )\n",
+ " print(f\"\\n--- Test Set Ranking ({ds_name}) ---\")\n",
+ " display(test_df)\n",
+ "\n",
+ " top1 = test_df.iloc[0]['model']\n",
+ " top2 = [test_df.iloc[0]['model'], test_df.iloc[1]['model']]\n",
+ " BEST_MODEL[ds_name] = top1\n",
+ " TOP2_MODELS[ds_name] = top2\n",
+ " RESULTS_PHASE1[ds_name] = test_results\n",
+ " print(f\"Top-1 (SHAP): {top1}\")\n",
+ " print(f\"Top-2 (LIME): {top2}\")\n",
+ "\n",
+ " # 5.5 Comparative visualisation\n",
+ " fig, ax = plt.subplots(figsize=(12, 6))\n",
+ " x = np.arange(len(test_df))\n",
+ " width = 0.25\n",
+ " color_map = {\n",
+ " 'RandomForest': 'teal', 'ExtraTrees': 'teal',\n",
+ " 'XGBoost': 'coral', 'LightGBM': 'coral', 'GradientBoosting': 'coral',\n",
+ " 'LogisticRegression': '#DAA520', 'SVM': '#DAA520', 'KNN': '#DAA520',\n",
+ " }\n",
+ " colors = [color_map.get(m, 'gray') for m in test_df['model']]\n",
+ " ax.bar(x - width, test_df['f1_macro'], width, label='F1-macro',\n",
+ " color=colors, alpha=0.8, edgecolor='black')\n",
+ " ax.bar(x, test_df['roc_auc'], width, label='ROC-AUC',\n",
+ " color=colors, alpha=0.6, edgecolor='black', hatch='//')\n",
+ " ax.bar(x + width, test_df['pr_auc'], width, label='PR-AUC',\n",
+ " color=colors, alpha=0.4, edgecolor='black', hatch='xx')\n",
+ " ax.set_xticks(x)\n",
+ " ax.set_xticklabels(test_df['model'], rotation=45, ha='right')\n",
+ " ax.set_ylabel('Score')\n",
+ " ax.set_title(f'Model Comparison — {ds_name}')\n",
+ " ax.legend()\n",
+ " ax.set_ylim(0, 1.05)\n",
+ " plt.tight_layout()\n",
+ " plt.savefig(f\"model_comparison_{ds_name}.png\")\n",
+ " plt.close()\n",
+ " print(f\"Saved model_comparison_{ds_name}.png\")\n",
+ "\n",
+ "print(\"\\nPhase 1 training complete.\")\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2e5fe03a",
+ "metadata": {},
+ "source": [
+ "# Section 6 — LIME (Top-2 Models, Phase 1 Datasets)\n",
+ "\n",
+ "LIME explanations for the top-2 models on each Phase 1 dataset, including stability checks."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "d3dfe5af",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-07T14:56:03.677280Z",
+ "iopub.status.busy": "2026-05-07T14:56:03.677043Z",
+ "iopub.status.idle": "2026-05-07T14:56:09.833455Z",
+ "shell.execute_reply": "2026-05-07T14:56:09.832470Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "======================================================================\n",
+ "LIME — Dataset: eclipse\n",
+ "======================================================================\n",
+ "\n",
+ "--- LIME for GradientBoosting ---\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved LIME explanation: lime_eclipse_GradientBoosting_confidently_buggy.png\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved LIME explanation: lime_eclipse_GradientBoosting_borderline.png\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved LIME explanation: lime_eclipse_GradientBoosting_confidently_clean.png\n",
+ "\n",
+ "--- LIME Stability (eclipse, GradientBoosting, confidently_buggy) ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " feature | \n",
+ " mean_LIME_weight | \n",
+ " std | \n",
+ " stable | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " numberOfBugsFoundUntil | \n",
+ " 0.177890 | \n",
+ " 0.005512 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " numberOfNonTrivialBugsFoundUntil | \n",
+ " 0.037933 | \n",
+ " 0.001901 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " numberOfMajorBugsFoundUntil | \n",
+ " 0.097461 | \n",
+ " 0.001851 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " numberOfCriticalBugsFoundUntil | \n",
+ " -0.020249 | \n",
+ " 0.002427 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " numberOfHighPriorityBugsFoundUntil | \n",
+ " 0.011379 | \n",
+ " 0.001608 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " feature mean_LIME_weight std stable\n",
+ "0 numberOfBugsFoundUntil 0.177890 0.005512 True\n",
+ "1 numberOfNonTrivialBugsFoundUntil 0.037933 0.001901 True\n",
+ "2 numberOfMajorBugsFoundUntil 0.097461 0.001851 True\n",
+ "3 numberOfCriticalBugsFoundUntil -0.020249 0.002427 False\n",
+ "4 numberOfHighPriorityBugsFoundUntil 0.011379 0.001608 False"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "WARNING: Unstable features: ['numberOfCriticalBugsFoundUntil', 'numberOfHighPriorityBugsFoundUntil']\n",
+ "\n",
+ "--- LIME for SVM ---\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved LIME explanation: lime_eclipse_SVM_confidently_buggy.png\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved LIME explanation: lime_eclipse_SVM_borderline.png\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved LIME explanation: lime_eclipse_SVM_confidently_clean.png\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- LIME Stability (eclipse, SVM, confidently_buggy) ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " feature | \n",
+ " mean_LIME_weight | \n",
+ " std | \n",
+ " stable | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " numberOfBugsFoundUntil | \n",
+ " 0.108020 | \n",
+ " 0.000932 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " numberOfNonTrivialBugsFoundUntil | \n",
+ " 0.006968 | \n",
+ " 0.001553 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " numberOfMajorBugsFoundUntil | \n",
+ " 0.009797 | \n",
+ " 0.000285 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " numberOfCriticalBugsFoundUntil | \n",
+ " 0.003714 | \n",
+ " 0.000333 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " numberOfHighPriorityBugsFoundUntil | \n",
+ " 0.019348 | \n",
+ " 0.002188 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " feature mean_LIME_weight std stable\n",
+ "0 numberOfBugsFoundUntil 0.108020 0.000932 True\n",
+ "1 numberOfNonTrivialBugsFoundUntil 0.006968 0.001553 False\n",
+ "2 numberOfMajorBugsFoundUntil 0.009797 0.000285 True\n",
+ "3 numberOfCriticalBugsFoundUntil 0.003714 0.000333 True\n",
+ "4 numberOfHighPriorityBugsFoundUntil 0.019348 0.002188 False"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "WARNING: Unstable features: ['numberOfNonTrivialBugsFoundUntil', 'numberOfHighPriorityBugsFoundUntil']\n",
+ "\n",
+ "======================================================================\n",
+ "LIME — Dataset: mylyn\n",
+ "======================================================================\n",
+ "\n",
+ "--- LIME for SVM ---\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved LIME explanation: lime_mylyn_SVM_confidently_buggy.png\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved LIME explanation: lime_mylyn_SVM_borderline.png\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved LIME explanation: lime_mylyn_SVM_confidently_clean.png\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- LIME Stability (mylyn, SVM, confidently_buggy) ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " feature | \n",
+ " mean_LIME_weight | \n",
+ " std | \n",
+ " stable | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " numberOfBugsFoundUntil | \n",
+ " 0.205426 | \n",
+ " 0.001041 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " numberOfNonTrivialBugsFoundUntil | \n",
+ " -0.116240 | \n",
+ " 0.001091 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " numberOfMajorBugsFoundUntil | \n",
+ " 0.046075 | \n",
+ " 0.001564 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " numberOfCriticalBugsFoundUntil | \n",
+ " -0.034608 | \n",
+ " 0.002826 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " numberOfHighPriorityBugsFoundUntil | \n",
+ " 0.029225 | \n",
+ " 0.001809 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " feature mean_LIME_weight std stable\n",
+ "0 numberOfBugsFoundUntil 0.205426 0.001041 True\n",
+ "1 numberOfNonTrivialBugsFoundUntil -0.116240 0.001091 True\n",
+ "2 numberOfMajorBugsFoundUntil 0.046075 0.001564 True\n",
+ "3 numberOfCriticalBugsFoundUntil -0.034608 0.002826 True\n",
+ "4 numberOfHighPriorityBugsFoundUntil 0.029225 0.001809 True"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- LIME for KNN ---\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved LIME explanation: lime_mylyn_KNN_confidently_buggy.png\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved LIME explanation: lime_mylyn_KNN_borderline.png\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved LIME explanation: lime_mylyn_KNN_confidently_clean.png\n",
+ "\n",
+ "--- LIME Stability (mylyn, KNN, confidently_buggy) ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " feature | \n",
+ " mean_LIME_weight | \n",
+ " std | \n",
+ " stable | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " numberOfBugsFoundUntil | \n",
+ " 0.082644 | \n",
+ " 0.000496 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " numberOfNonTrivialBugsFoundUntil | \n",
+ " 0.063268 | \n",
+ " 0.001292 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " numberOfMajorBugsFoundUntil | \n",
+ " 0.037995 | \n",
+ " 0.001511 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " numberOfCriticalBugsFoundUntil | \n",
+ " -0.009260 | \n",
+ " 0.002500 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " numberOfHighPriorityBugsFoundUntil | \n",
+ " 0.105383 | \n",
+ " 0.001307 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " feature mean_LIME_weight std stable\n",
+ "0 numberOfBugsFoundUntil 0.082644 0.000496 True\n",
+ "1 numberOfNonTrivialBugsFoundUntil 0.063268 0.001292 True\n",
+ "2 numberOfMajorBugsFoundUntil 0.037995 0.001511 True\n",
+ "3 numberOfCriticalBugsFoundUntil -0.009260 0.002500 False\n",
+ "4 numberOfHighPriorityBugsFoundUntil 0.105383 0.001307 True"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "WARNING: Unstable features: ['numberOfCriticalBugsFoundUntil']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# PHASE 1 LIME\n",
+ "for ds_name in CONFIG['phase1']:\n",
+ " print(f\"\\n{'='*70}\")\n",
+ " print(f\"LIME — Dataset: {ds_name}\")\n",
+ " print(f\"{'='*70}\")\n",
+ "\n",
+ " data = DATASETS[ds_name]\n",
+ " X_train = data['X_train']\n",
+ " X_test = data['X_test']\n",
+ " y_test = data['y_test']\n",
+ " feature_names = data['feature_names']\n",
+ "\n",
+ " pipe_c = make_pipeline_c()\n",
+ " X_train_c = pipe_c.fit_transform(X_train).values\n",
+ " X_test_c = pipe_c.transform(X_test).values\n",
+ "\n",
+ " for mname in TOP2_MODELS[ds_name]:\n",
+ " print(f\"\\n--- LIME for {mname} ---\")\n",
+ " models = build_models(data['y_train'].values)\n",
+ " model = models[mname]['model']\n",
+ " model.fit(X_train_c, data['y_train'])\n",
+ " probs = model.predict_proba(X_test_c)[:, 1]\n",
+ "\n",
+ " buggy_idx = np.where(y_test.values == 1)[0]\n",
+ " clean_idx = np.where(y_test.values == 0)[0]\n",
+ " conf_buggy_idx = int(buggy_idx[np.argmax(probs[buggy_idx])])\n",
+ " conf_clean_idx = int(clean_idx[np.argmin(probs[clean_idx])])\n",
+ " border_idx = int(np.argmin(np.abs(probs - 0.5)))\n",
+ " instances = {\n",
+ " 'confidently_buggy': conf_buggy_idx,\n",
+ " 'borderline': border_idx,\n",
+ " 'confidently_clean': conf_clean_idx,\n",
+ " }\n",
+ "\n",
+ " explainer = LimeTabularExplainer(\n",
+ " training_data=X_train_c,\n",
+ " feature_names=feature_names,\n",
+ " class_names=['clean', 'buggy'],\n",
+ " mode='classification',\n",
+ " discretize_continuous=False,\n",
+ " sample_around_instance=True,\n",
+ " random_state=CONFIG['random_state'],\n",
+ " )\n",
+ "\n",
+ " for inst_label, idx in instances.items():\n",
+ " x_inst = X_test_c[idx]\n",
+ " true_label = int(y_test.values[idx])\n",
+ " pred_prob = float(probs[idx])\n",
+ " exp = explainer.explain_instance(\n",
+ " x_inst, model.predict_proba,\n",
+ " num_features=len(feature_names), num_samples=5000,\n",
+ " )\n",
+ " fig = exp.as_pyplot_figure()\n",
+ " fig.suptitle(\n",
+ " f\"{ds_name} | {mname} | {inst_label} | idx={idx} | \"\n",
+ " f\"true={true_label} | prob={pred_prob:.3f}\"\n",
+ " )\n",
+ " plt.tight_layout()\n",
+ " fname = f\"lime_{ds_name}_{mname}_{inst_label}.png\"\n",
+ " plt.savefig(fname)\n",
+ " plt.close(fig)\n",
+ " print(f\"Saved LIME explanation: {fname}\")\n",
+ "\n",
+ " # 6.3 LIME stability check for confidently-buggy instance\n",
+ " idx = conf_buggy_idx\n",
+ " x_inst = X_test_c[idx]\n",
+ " weights_runs = []\n",
+ " for seed in [42, 123, 999]:\n",
+ " expl_s = LimeTabularExplainer(\n",
+ " training_data=X_train_c,\n",
+ " feature_names=feature_names,\n",
+ " class_names=['clean', 'buggy'],\n",
+ " mode='classification',\n",
+ " discretize_continuous=False,\n",
+ " sample_around_instance=True,\n",
+ " random_state=seed,\n",
+ " )\n",
+ " exp_s = expl_s.explain_instance(\n",
+ " x_inst, model.predict_proba,\n",
+ " num_features=len(feature_names), num_samples=5000,\n",
+ " )\n",
+ " w_dict = dict(exp_s.as_list())\n",
+ " weights_runs.append([w_dict.get(f, 0.0) for f in feature_names])\n",
+ "\n",
+ " weights_arr = np.array(weights_runs)\n",
+ " mean_w = weights_arr.mean(axis=0)\n",
+ " std_w = weights_arr.std(axis=0)\n",
+ " stab_df = pd.DataFrame({\n",
+ " 'feature': feature_names,\n",
+ " 'mean_LIME_weight': mean_w,\n",
+ " 'std': std_w,\n",
+ " 'stable': [std_w[i] < 0.1 * abs(mean_w[i]) if abs(mean_w[i]) > 1e-9 else False\n",
+ " for i in range(len(feature_names))],\n",
+ " })\n",
+ " print(f\"\\n--- LIME Stability ({ds_name}, {mname}, confidently_buggy) ---\")\n",
+ " display(stab_df)\n",
+ " unstable = stab_df[~stab_df['stable']]['feature'].tolist()\n",
+ " if unstable:\n",
+ " print(f\"WARNING: Unstable features: {unstable}\")\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cbf7d0d9",
+ "metadata": {},
+ "source": [
+ "# Section 7 — SHAP (Best Model, Phase 1 Datasets)\n",
+ "\n",
+ "SHAP analysis for the best model on each Phase 1 dataset. Extract mean absolute SHAP values as weight vectors."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "7a6cc3dd",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-07T14:56:09.835841Z",
+ "iopub.status.busy": "2026-05-07T14:56:09.835580Z",
+ "iopub.status.idle": "2026-05-07T14:57:04.451464Z",
+ "shell.execute_reply": "2026-05-07T14:57:04.450574Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "======================================================================"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "SHAP — Dataset: eclipse | Best model: GradientBoosting\n",
+ "======================================================================\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_2350/1849820119.py:53: FutureWarning: The NumPy global RNG was seeded by calling `np.random.seed`. In a future version this function will no longer use the global RNG. Pass `rng` explicitly to opt-in to the new behaviour and silence this warning.\n",
+ " shap.summary_plot(shap_values, X_test_c, feature_names=feature_names,\n",
+ "/tmp/ipykernel_2350/1849820119.py:63: FutureWarning: The NumPy global RNG was seeded by calling `np.random.seed`. In a future version this function will no longer use the global RNG. Pass `rng` explicitly to opt-in to the new behaviour and silence this warning.\n",
+ " shap.summary_plot(shap_values, X_test_c, feature_names=feature_names,\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved shap_summary_eclipse.png\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- SHAP Weights (eclipse) ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " feature | \n",
+ " mean_abs_SHAP | \n",
+ " weight | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " numberOfBugsFoundUntil | \n",
+ " 1.124781 | \n",
+ " 0.511651 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " numberOfNonTrivialBugsFoundUntil | \n",
+ " 0.508046 | \n",
+ " 0.231105 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " numberOfMajorBugsFoundUntil | \n",
+ " 0.317338 | \n",
+ " 0.144353 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " numberOfCriticalBugsFoundUntil | \n",
+ " 0.175032 | \n",
+ " 0.079620 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " numberOfHighPriorityBugsFoundUntil | \n",
+ " 0.073141 | \n",
+ " 0.033271 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " feature mean_abs_SHAP weight\n",
+ "0 numberOfBugsFoundUntil 1.124781 0.511651\n",
+ "1 numberOfNonTrivialBugsFoundUntil 0.508046 0.231105\n",
+ "2 numberOfMajorBugsFoundUntil 0.317338 0.144353\n",
+ "3 numberOfCriticalBugsFoundUntil 0.175032 0.079620\n",
+ "4 numberOfHighPriorityBugsFoundUntil 0.073141 0.033271"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved shap_weights_eclipse.json\n",
+ "\n",
+ "======================================================================\n",
+ "SHAP — Dataset: mylyn | Best model: SVM\n",
+ "======================================================================\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "bb77565a12f3457f98a236d99e9d795e",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/373 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/tmp/ipykernel_2350/1849820119.py:53: FutureWarning: The NumPy global RNG was seeded by calling `np.random.seed`. In a future version this function will no longer use the global RNG. Pass `rng` explicitly to opt-in to the new behaviour and silence this warning.\n",
+ " shap.summary_plot(shap_values, X_test_c, feature_names=feature_names,\n",
+ "/tmp/ipykernel_2350/1849820119.py:63: FutureWarning: The NumPy global RNG was seeded by calling `np.random.seed`. In a future version this function will no longer use the global RNG. Pass `rng` explicitly to opt-in to the new behaviour and silence this warning.\n",
+ " shap.summary_plot(shap_values, X_test_c, feature_names=feature_names,\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved shap_summary_mylyn.png\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "--- SHAP Weights (mylyn) ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " feature | \n",
+ " mean_abs_SHAP | \n",
+ " weight | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " numberOfBugsFoundUntil | \n",
+ " 0.120600 | \n",
+ " 0.450861 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " numberOfHighPriorityBugsFoundUntil | \n",
+ " 0.078152 | \n",
+ " 0.292171 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " numberOfMajorBugsFoundUntil | \n",
+ " 0.029633 | \n",
+ " 0.110783 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " numberOfNonTrivialBugsFoundUntil | \n",
+ " 0.026053 | \n",
+ " 0.097398 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " numberOfCriticalBugsFoundUntil | \n",
+ " 0.013050 | \n",
+ " 0.048787 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " feature mean_abs_SHAP weight\n",
+ "0 numberOfBugsFoundUntil 0.120600 0.450861\n",
+ "4 numberOfHighPriorityBugsFoundUntil 0.078152 0.292171\n",
+ "2 numberOfMajorBugsFoundUntil 0.029633 0.110783\n",
+ "1 numberOfNonTrivialBugsFoundUntil 0.026053 0.097398\n",
+ "3 numberOfCriticalBugsFoundUntil 0.013050 0.048787"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved shap_weights_mylyn.json\n",
+ "\n",
+ "Saved shap_weights_phase1.json\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# PHASE 1 SHAP\n",
+ "for ds_name in CONFIG['phase1']:\n",
+ " print(f\"\\n{'='*70}\")\n",
+ " print(f\"SHAP — Dataset: {ds_name} | Best model: {BEST_MODEL[ds_name]}\")\n",
+ " print(f\"{'='*70}\")\n",
+ "\n",
+ " data = DATASETS[ds_name]\n",
+ " X_train = data['X_train']\n",
+ " X_test = data['X_test']\n",
+ " y_train = data['y_train']\n",
+ " y_test = data['y_test']\n",
+ " feature_names = data['feature_names']\n",
+ "\n",
+ " pipe_c = make_pipeline_c()\n",
+ " X_train_c = pipe_c.fit_transform(X_train).values\n",
+ " X_test_c = pipe_c.transform(X_test).values\n",
+ "\n",
+ " models = build_models(y_train.values)\n",
+ " best_name = BEST_MODEL[ds_name]\n",
+ " model = models[best_name]['model']\n",
+ " model.fit(X_train_c, y_train)\n",
+ "\n",
+ " # 7.1 Determine explainer type\n",
+ " tree_types = (RandomForestClassifier, DecisionTreeClassifier,\n",
+ " ExtraTreesClassifier, GradientBoostingClassifier)\n",
+ " if isinstance(model, tree_types):\n",
+ " explainer = shap.TreeExplainer(model)\n",
+ " elif XGBClassifier is not None and isinstance(model, XGBClassifier):\n",
+ " explainer = shap.TreeExplainer(model)\n",
+ " elif LGBMClassifier is not None and isinstance(model, LGBMClassifier):\n",
+ " explainer = shap.TreeExplainer(model)\n",
+ " elif isinstance(model, LogisticRegression):\n",
+ " explainer = shap.LinearExplainer(model, X_train_c)\n",
+ " else:\n",
+ " background = shap.sample(X_train_c, 100, random_state=CONFIG['random_state'])\n",
+ " explainer = shap.KernelExplainer(model.predict_proba, background)\n",
+ "\n",
+ " # 7.2 Compute SHAP values\n",
+ " shap_vals = explainer.shap_values(X_test_c)\n",
+ "\n",
+ " # Handle different return shapes\n",
+ " if isinstance(shap_vals, list):\n",
+ " shap_values = shap_vals[1]\n",
+ " elif shap_vals.ndim == 3:\n",
+ " # (n_samples, n_features, n_classes) -> take positive class\n",
+ " shap_values = shap_vals[:, :, 1]\n",
+ " else:\n",
+ " shap_values = shap_vals\n",
+ "\n",
+ " # 7.3 SHAP plots\n",
+ " # Bar summary\n",
+ " plt.figure(figsize=(10, 6))\n",
+ " shap.summary_plot(shap_values, X_test_c, feature_names=feature_names,\n",
+ " plot_type='bar', show=False)\n",
+ " plt.title(f\"SHAP Bar Summary — {ds_name}\")\n",
+ " plt.tight_layout()\n",
+ " plt.savefig(f\"shap_summary_{ds_name}.png\")\n",
+ " plt.close()\n",
+ " print(f\"Saved shap_summary_{ds_name}.png\")\n",
+ "\n",
+ " # Beeswarm\n",
+ " plt.figure(figsize=(10, 6))\n",
+ " shap.summary_plot(shap_values, X_test_c, feature_names=feature_names,\n",
+ " plot_type='dot', show=False)\n",
+ " plt.title(f\"SHAP Beeswarm — {ds_name}\")\n",
+ " plt.tight_layout()\n",
+ " plt.savefig(f\"shap_beeswarm_{ds_name}.png\")\n",
+ " plt.close()\n",
+ "\n",
+ " # Dependence plots for top-3 features\n",
+ " mean_abs = np.abs(shap_values).mean(axis=0)\n",
+ " top3_idx = np.argsort(mean_abs)[-3:][::-1]\n",
+ " for idx in top3_idx:\n",
+ " plt.figure(figsize=(8, 5))\n",
+ " shap.dependence_plot(idx, shap_values, X_test_c,\n",
+ " feature_names=feature_names, show=False)\n",
+ " plt.title(f\"Dependence: {feature_names[idx]} — {ds_name}\")\n",
+ " plt.tight_layout()\n",
+ " plt.savefig(f\"shap_dependence_{ds_name}_{feature_names[idx]}.png\")\n",
+ " plt.close()\n",
+ "\n",
+ " # Waterfall plots for 3 representative instances\n",
+ " probs = model.predict_proba(X_test_c)[:, 1]\n",
+ " buggy_idx = np.where(y_test.values == 1)[0]\n",
+ " clean_idx = np.where(y_test.values == 0)[0]\n",
+ " rep_indices = [\n",
+ " int(buggy_idx[np.argmax(probs[buggy_idx])]),\n",
+ " int(np.argmin(np.abs(probs - 0.5))),\n",
+ " int(clean_idx[np.argmin(probs[clean_idx])]),\n",
+ " ]\n",
+ "\n",
+ " for ridx in rep_indices:\n",
+ " plt.figure(figsize=(10, 6))\n",
+ " base = explainer.expected_value\n",
+ " # Convert array expected_value to scalar for waterfall\n",
+ " if hasattr(base, '__len__') and not isinstance(base, str):\n",
+ " base = float(base[0])\n",
+ " shap.waterfall_plot(shap.Explanation(\n",
+ " values=shap_values[ridx],\n",
+ " base_values=base,\n",
+ " data=X_test_c[ridx],\n",
+ " feature_names=feature_names,\n",
+ " ), max_display=len(feature_names), show=False)\n",
+ " plt.title(f\"Waterfall — {ds_name} — idx={ridx} — true={int(y_test.values[ridx])}\")\n",
+ " plt.tight_layout()\n",
+ " plt.savefig(f\"shap_waterfall_{ds_name}_idx{ridx}.png\")\n",
+ " plt.close()\n",
+ "\n",
+ " # 7.4 Extract SHAP weight vector\n",
+ " mean_abs_shap = np.abs(shap_values).mean(axis=0)\n",
+ " shap_weights = mean_abs_shap / mean_abs_shap.sum()\n",
+ " SHAP_WEIGHTS[ds_name] = dict(zip(feature_names, shap_weights))\n",
+ "\n",
+ " weight_df = pd.DataFrame({\n",
+ " 'feature': feature_names,\n",
+ " 'mean_abs_SHAP': mean_abs_shap,\n",
+ " 'weight': shap_weights,\n",
+ " }).sort_values('weight', ascending=False)\n",
+ " print(f\"\\n--- SHAP Weights ({ds_name}) ---\")\n",
+ " display(weight_df)\n",
+ "\n",
+ " with open(f\"shap_weights_{ds_name}.json\", 'w') as f:\n",
+ " json.dump(SHAP_WEIGHTS[ds_name], f, indent=2)\n",
+ " print(f\"Saved shap_weights_{ds_name}.json\")\n",
+ "\n",
+ "# Save merged phase1 SHAP weights\n",
+ "with open(\"shap_weights_phase1.json\", 'w') as f:\n",
+ " json.dump(SHAP_WEIGHTS, f, indent=2)\n",
+ "print(\"\\nSaved shap_weights_phase1.json\")\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "23125db6",
+ "metadata": {},
+ "source": [
+ "# Section 8 — Cross-Dataset SHAP Weight Averaging\n",
+ "\n",
+ "Align feature spaces across Phase 1 datasets, handle missing features as NaN, and compute a canonical averaged weight vector."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "1aed1920",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-07T14:57:04.454361Z",
+ "iopub.status.busy": "2026-05-07T14:57:04.454039Z",
+ "iopub.status.idle": "2026-05-07T14:57:04.707461Z",
+ "shell.execute_reply": "2026-05-07T14:57:04.706524Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Unified feature space (5 features): ['numberOfBugsFoundUntil', 'numberOfCriticalBugsFoundUntil', 'numberOfHighPriorityBugsFoundUntil', 'numberOfMajorBugsFoundUntil', 'numberOfNonTrivialBugsFoundUntil']\n",
+ "Saved averaged_shap_weights.json\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved averaged_shap_weights.png\n",
+ "\n",
+ "--- Averaged SHAP Weight Comparison ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " feature | \n",
+ " averaged_weight | \n",
+ " weight_eclipse | \n",
+ " weight_mylyn | \n",
+ " n_datasets_contributing | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " numberOfBugsFoundUntil | \n",
+ " 0.4813 | \n",
+ " 0.5117 | \n",
+ " 0.4509 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " numberOfCriticalBugsFoundUntil | \n",
+ " 0.0642 | \n",
+ " 0.0796 | \n",
+ " 0.0488 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " numberOfHighPriorityBugsFoundUntil | \n",
+ " 0.1627 | \n",
+ " 0.0333 | \n",
+ " 0.2922 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " numberOfMajorBugsFoundUntil | \n",
+ " 0.1276 | \n",
+ " 0.1444 | \n",
+ " 0.1108 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " numberOfNonTrivialBugsFoundUntil | \n",
+ " 0.1643 | \n",
+ " 0.2311 | \n",
+ " 0.0974 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " feature averaged_weight weight_eclipse \\\n",
+ "0 numberOfBugsFoundUntil 0.4813 0.5117 \n",
+ "1 numberOfCriticalBugsFoundUntil 0.0642 0.0796 \n",
+ "2 numberOfHighPriorityBugsFoundUntil 0.1627 0.0333 \n",
+ "3 numberOfMajorBugsFoundUntil 0.1276 0.1444 \n",
+ "4 numberOfNonTrivialBugsFoundUntil 0.1643 0.2311 \n",
+ "\n",
+ " weight_mylyn n_datasets_contributing \n",
+ "0 0.4509 2 \n",
+ "1 0.0488 2 \n",
+ "2 0.2922 2 \n",
+ "3 0.1108 2 \n",
+ "4 0.0974 2 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# 8.1 Build unified feature space\n",
+ "ALL_FEATURES = sorted(list(set().union(*[set(SHAP_WEIGHTS[ds].keys()) for ds in CONFIG['phase1']])))\n",
+ "print(f\"Unified feature space ({len(ALL_FEATURES)} features): {ALL_FEATURES}\")\n",
+ "\n",
+ "weight_matrix = []\n",
+ "for ds_name in CONFIG['phase1']:\n",
+ " vec = []\n",
+ " for f in ALL_FEATURES:\n",
+ " vec.append(SHAP_WEIGHTS[ds_name].get(f, np.nan))\n",
+ " weight_matrix.append(vec)\n",
+ "\n",
+ "weight_matrix = np.array(weight_matrix, dtype=float)\n",
+ "\n",
+ "# 8.2 Averaged weight vector (NaN-aware)\n",
+ "AVERAGED_SHAP_WEIGHTS = np.nanmean(weight_matrix, axis=0)\n",
+ "AVERAGED_SHAP_WEIGHTS = AVERAGED_SHAP_WEIGHTS / AVERAGED_SHAP_WEIGHTS.sum()\n",
+ "avg_dict = {f: float(w) for f, w in zip(ALL_FEATURES, AVERAGED_SHAP_WEIGHTS)}\n",
+ "\n",
+ "with open(\"averaged_shap_weights.json\", 'w') as f:\n",
+ " json.dump(avg_dict, f, indent=2)\n",
+ "print(\"Saved averaged_shap_weights.json\")\n",
+ "\n",
+ "# Visualise\n",
+ "fig, ax = plt.subplots(figsize=(10, 6))\n",
+ "y_pos = np.arange(len(ALL_FEATURES))\n",
+ "ax.barh(y_pos, AVERAGED_SHAP_WEIGHTS, color='seagreen')\n",
+ "ax.set_yticks(y_pos)\n",
+ "ax.set_yticklabels(ALL_FEATURES)\n",
+ "ax.set_xlabel('Averaged SHAP Weight')\n",
+ "ax.set_title('Averaged SHAP Weights Across Phase 1 Datasets')\n",
+ "for i, v in enumerate(AVERAGED_SHAP_WEIGHTS):\n",
+ " ax.text(v + 0.005, i, f\"{v:.3f}\", va='center', fontsize=9)\n",
+ "plt.tight_layout()\n",
+ "plt.savefig(\"averaged_shap_weights.png\")\n",
+ "plt.close()\n",
+ "print(\"Saved averaged_shap_weights.png\")\n",
+ "\n",
+ "# Table\n",
+ "comp_rows = []\n",
+ "for i, f in enumerate(ALL_FEATURES):\n",
+ " row = {'feature': f, 'averaged_weight': f\"{AVERAGED_SHAP_WEIGHTS[i]:.4f}\"}\n",
+ " for j, ds_name in enumerate(CONFIG['phase1']):\n",
+ " row[f'weight_{ds_name}'] = f\"{weight_matrix[j, i]:.4f}\" if not np.isnan(weight_matrix[j, i]) else \"NaN\"\n",
+ " row['n_datasets_contributing'] = int(np.sum(~np.isnan(weight_matrix[:, i])))\n",
+ " comp_rows.append(row)\n",
+ "\n",
+ "comp_df = pd.DataFrame(comp_rows)\n",
+ "print(\"\\n--- Averaged SHAP Weight Comparison ---\")\n",
+ "display(comp_df)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4b2e7919",
+ "metadata": {},
+ "source": [
+ "# Section 9 — Phase 2: Apply Averaged SHAP Weights to Remaining Datasets\n",
+ "\n",
+ "For each Phase 2 dataset: load, align features, create original and SHAP-weighted variants, retrain all 8 models, and compare."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "49290102",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-07T14:57:04.709683Z",
+ "iopub.status.busy": "2026-05-07T14:57:04.709463Z",
+ "iopub.status.idle": "2026-05-07T14:57:18.321327Z",
+ "shell.execute_reply": "2026-05-07T14:57:18.320129Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "======================================================================\n",
+ "PHASE 2 — Dataset: equinox\n",
+ "======================================================================\n",
+ "Train: (259, 5) | Test: (65, 5)\n",
+ "Class dist: [156 103]\n",
+ "\n",
+ "--- Variant: original ---\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "2bb2416ab15144c18b8b37104e7ef8a9",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "equinox-original: 0%| | 0/8 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "RandomForest | F1=0.6691 | ROC=0.7061 | PR=0.6571\n",
+ "XGBoost | F1=0.6832 | ROC=0.7377 | PR=0.6994\n",
+ "LightGBM | F1=0.6549 | ROC=0.7101 | PR=0.6685\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "GradientBoosting | F1=0.6516 | ROC=0.6696 | PR=0.6349\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ExtraTrees | F1=0.6516 | ROC=0.6933 | PR=0.5868\n",
+ "LogisticRegression | F1=0.6691 | ROC=0.6972 | PR=0.6480\n",
+ "SVM | F1=0.6608 | ROC=0.6785 | PR=0.5299\n",
+ "KNN | F1=0.7006 | ROC=0.7618 | PR=0.6835\n",
+ "\n",
+ "--- Variant: weighted ---\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "fc0f28da08a04721acc6065aa9bcb56c",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "equinox-weighted: 0%| | 0/8 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "RandomForest | F1=0.6691 | ROC=0.7081 | PR=0.6588\n",
+ "XGBoost | F1=0.6832 | ROC=0.7377 | PR=0.6994\n",
+ "LightGBM | F1=0.6549 | ROC=0.7101 | PR=0.6685\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "GradientBoosting | F1=0.6516 | ROC=0.6696 | PR=0.6349\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ExtraTrees | F1=0.6516 | ROC=0.6933 | PR=0.5868\n",
+ "LogisticRegression | F1=0.6691 | ROC=0.6943 | PR=0.6407\n",
+ "SVM | F1=0.7032 | ROC=0.6460 | PR=0.4750\n",
+ "KNN | F1=0.6286 | ROC=0.6893 | PR=0.5707\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved phase2_comparison_equinox.png\n",
+ "\n",
+ "--- Delta Table (equinox) ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " model | \n",
+ " delta_f1_str | \n",
+ " delta_roc_str | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " RandomForest | \n",
+ " +0.0000 | \n",
+ " +0.0020 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " XGBoost | \n",
+ " +0.0000 | \n",
+ " +0.0000 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " LightGBM | \n",
+ " +0.0000 | \n",
+ " +0.0000 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " GradientBoosting | \n",
+ " +0.0000 | \n",
+ " +0.0000 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " ExtraTrees | \n",
+ " +0.0000 | \n",
+ " +0.0000 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " LogisticRegression | \n",
+ " +0.0000 | \n",
+ " -0.0030 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " SVM | \n",
+ " +0.0424 | \n",
+ " -0.0325 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " KNN | \n",
+ " -0.0720 | \n",
+ " -0.0725 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " model delta_f1_str delta_roc_str\n",
+ "0 RandomForest +0.0000 +0.0020\n",
+ "1 XGBoost +0.0000 +0.0000\n",
+ "2 LightGBM +0.0000 +0.0000\n",
+ "3 GradientBoosting +0.0000 +0.0000\n",
+ "4 ExtraTrees +0.0000 +0.0000\n",
+ "5 LogisticRegression +0.0000 -0.0030\n",
+ "6 SVM +0.0424 -0.0325\n",
+ "7 KNN -0.0720 -0.0725"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Models benefiting from SHAP weighting: ['SVM']\n",
+ "Models hurt by SHAP weighting: ['KNN']\n",
+ "\n",
+ "======================================================================\n",
+ "PHASE 2 — Dataset: lucene\n",
+ "======================================================================\n",
+ "Train: (552, 5) | Test: (139, 5)\n",
+ "Class dist: [501 51]\n",
+ "\n",
+ "--- Variant: original ---\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "83f83c850b5748f1a4ea8bd69078cccf",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "lucene-original: 0%| | 0/8 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "RandomForest | F1=0.5890 | ROC=0.7167 | PR=0.2375\n",
+ "XGBoost | F1=0.5832 | ROC=0.7314 | PR=0.2636\n",
+ "LightGBM | F1=0.5832 | ROC=0.7808 | PR=0.2749\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "GradientBoosting | F1=0.5832 | ROC=0.7295 | PR=0.2625\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ExtraTrees | F1=0.5832 | ROC=0.7302 | PR=0.2625\n",
+ "LogisticRegression | F1=0.5854 | ROC=0.7912 | PR=0.3229\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "SVM | F1=0.5212 | ROC=0.7772 | PR=0.2023\n",
+ "KNN | F1=0.5487 | ROC=0.7241 | PR=0.2277\n",
+ "\n",
+ "--- Variant: weighted ---\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "a19d03918e994c6f8cab3f054c5a4108",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "lucene-weighted: 0%| | 0/8 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "RandomForest | F1=0.5890 | ROC=0.7167 | PR=0.2375\n",
+ "XGBoost | F1=0.5832 | ROC=0.7314 | PR=0.2636\n",
+ "LightGBM | F1=0.5832 | ROC=0.7808 | PR=0.2749\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "GradientBoosting | F1=0.5832 | ROC=0.7295 | PR=0.2625\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ExtraTrees | F1=0.5832 | ROC=0.7302 | PR=0.2625\n",
+ "LogisticRegression | F1=0.5854 | ROC=0.7912 | PR=0.3229\n",
+ "SVM | F1=0.5212 | ROC=0.7723 | PR=0.1958\n",
+ "KNN | F1=0.5487 | ROC=0.7241 | PR=0.2277\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved phase2_comparison_lucene.png\n",
+ "\n",
+ "--- Delta Table (lucene) ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " model | \n",
+ " delta_f1_str | \n",
+ " delta_roc_str | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " RandomForest | \n",
+ " +0.0000 | \n",
+ " +0.0000 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " XGBoost | \n",
+ " +0.0000 | \n",
+ " +0.0000 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " LightGBM | \n",
+ " +0.0000 | \n",
+ " +0.0000 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " GradientBoosting | \n",
+ " +0.0000 | \n",
+ " +0.0000 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " ExtraTrees | \n",
+ " +0.0000 | \n",
+ " +0.0000 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " LogisticRegression | \n",
+ " +0.0000 | \n",
+ " +0.0000 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " SVM | \n",
+ " +0.0000 | \n",
+ " -0.0049 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " KNN | \n",
+ " +0.0000 | \n",
+ " +0.0000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " model delta_f1_str delta_roc_str\n",
+ "0 RandomForest +0.0000 +0.0000\n",
+ "1 XGBoost +0.0000 +0.0000\n",
+ "2 LightGBM +0.0000 +0.0000\n",
+ "3 GradientBoosting +0.0000 +0.0000\n",
+ "4 ExtraTrees +0.0000 +0.0000\n",
+ "5 LogisticRegression +0.0000 +0.0000\n",
+ "6 SVM +0.0000 -0.0049\n",
+ "7 KNN +0.0000 +0.0000"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Models benefiting from SHAP weighting: []\n",
+ "Models hurt by SHAP weighting: []\n",
+ "\n",
+ "======================================================================\n",
+ "PHASE 2 — Dataset: pde\n",
+ "======================================================================\n",
+ "Train: (1197, 5) | Test: (300, 5)\n",
+ "Class dist: [1030 167]\n",
+ "\n",
+ "--- Variant: original ---\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "d1434329ff284e2bb99208252de791f9",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "pde-original: 0%| | 0/8 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "RandomForest | F1=0.5635 | ROC=0.6445 | PR=0.2347\n",
+ "XGBoost | F1=0.5965 | ROC=0.6475 | PR=0.2845\n",
+ "LightGBM | F1=0.6296 | ROC=0.7664 | PR=0.3932\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "GradientBoosting | F1=0.6028 | ROC=0.7074 | PR=0.3277\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ExtraTrees | F1=0.5810 | ROC=0.6564 | PR=0.2545\n",
+ "LogisticRegression | F1=0.5807 | ROC=0.7936 | PR=0.4034\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "SVM | F1=0.5830 | ROC=0.7999 | PR=0.4107\n",
+ "KNN | F1=0.6706 | ROC=0.7801 | PR=0.3088\n",
+ "\n",
+ "--- Variant: weighted ---\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "9a2021e1e6cf4dd484127c057cdfd263",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "pde-weighted: 0%| | 0/8 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "RandomForest | F1=0.5661 | ROC=0.6437 | PR=0.2343\n",
+ "XGBoost | F1=0.5965 | ROC=0.6475 | PR=0.2845\n",
+ "LightGBM | F1=0.6296 | ROC=0.7664 | PR=0.3932\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "GradientBoosting | F1=0.6028 | ROC=0.7088 | PR=0.3276\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ExtraTrees | F1=0.5810 | ROC=0.6564 | PR=0.2545\n",
+ "LogisticRegression | F1=0.6354 | ROC=0.7928 | PR=0.3839\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "SVM | F1=0.6071 | ROC=0.7928 | PR=0.3669\n",
+ "KNN | F1=0.6505 | ROC=0.7129 | PR=0.3043\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved phase2_comparison_pde.png\n",
+ "\n",
+ "--- Delta Table (pde) ---\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " model | \n",
+ " delta_f1_str | \n",
+ " delta_roc_str | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " RandomForest | \n",
+ " +0.0025 | \n",
+ " -0.0007 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " XGBoost | \n",
+ " +0.0000 | \n",
+ " +0.0000 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " LightGBM | \n",
+ " +0.0000 | \n",
+ " +0.0000 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " GradientBoosting | \n",
+ " +0.0000 | \n",
+ " +0.0014 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " ExtraTrees | \n",
+ " +0.0000 | \n",
+ " +0.0000 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " LogisticRegression | \n",
+ " +0.0547 | \n",
+ " -0.0008 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " SVM | \n",
+ " +0.0240 | \n",
+ " -0.0071 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " KNN | \n",
+ " -0.0201 | \n",
+ " -0.0672 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " model delta_f1_str delta_roc_str\n",
+ "0 RandomForest +0.0025 -0.0007\n",
+ "1 XGBoost +0.0000 +0.0000\n",
+ "2 LightGBM +0.0000 +0.0000\n",
+ "3 GradientBoosting +0.0000 +0.0014\n",
+ "4 ExtraTrees +0.0000 +0.0000\n",
+ "5 LogisticRegression +0.0547 -0.0008\n",
+ "6 SVM +0.0240 -0.0071\n",
+ "7 KNN -0.0201 -0.0672"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Models benefiting from SHAP weighting: ['RandomForest', 'LogisticRegression', 'SVM']\n",
+ "Models hurt by SHAP weighting: ['KNN']\n"
+ ]
+ }
+ ],
+ "source": [
+ "# PHASE 2\n",
+ "RESULTS_PHASE2 = {}\n",
+ "\n",
+ "for ds_name in CONFIG['phase2']:\n",
+ " print(f\"\\n{'='*70}\")\n",
+ " print(f\"PHASE 2 — Dataset: {ds_name}\")\n",
+ " print(f\"{'='*70}\")\n",
+ "\n",
+ " data = DATASETS[ds_name]\n",
+ " X = data['X']\n",
+ " y = data['y']\n",
+ " feature_names = data['feature_names']\n",
+ "\n",
+ " # 9.1 Align to ALL_FEATURES\n",
+ " X_aligned = pd.DataFrame(0.0, index=X.index, columns=ALL_FEATURES)\n",
+ " for f in feature_names:\n",
+ " if f in ALL_FEATURES:\n",
+ " X_aligned[f] = X[f].values\n",
+ "\n",
+ " # 9.2 Apply averaged SHAP weights\n",
+ " weight_vec = np.array([avg_dict.get(f, 0.0) for f in ALL_FEATURES])\n",
+ " X_weighted = X_aligned * weight_vec\n",
+ "\n",
+ " # Train/test split\n",
+ " X_train_o, X_test_o, y_train, y_test = train_test_split(\n",
+ " X_aligned, y, test_size=CONFIG['test_size'], stratify=y,\n",
+ " random_state=CONFIG['random_state']\n",
+ " )\n",
+ " X_train_w, X_test_w, _, _ = train_test_split(\n",
+ " X_weighted, y, test_size=CONFIG['test_size'], stratify=y,\n",
+ " random_state=CONFIG['random_state']\n",
+ " )\n",
+ " print(f\"Train: {X_train_o.shape} | Test: {X_test_o.shape}\")\n",
+ " print(f\"Class dist: {np.bincount(y_train)}\")\n",
+ "\n",
+ " RESULTS_PHASE2[ds_name] = {'original': {}, 'weighted': {}}\n",
+ "\n",
+ " for variant, X_tr, X_te in [('original', X_train_o, X_test_o),\n",
+ " ('weighted', X_train_w, X_test_w)]:\n",
+ " print(f\"\\n--- Variant: {variant} ---\")\n",
+ " models = build_models(y_train.values)\n",
+ " for mname, mdict in tqdm(models.items(), desc=f\"{ds_name}-{variant}\"):\n",
+ " res = evaluate_model(mdict['model'], X_tr, y_train, X_te, y_test, mdict['pipeline'])\n",
+ " RESULTS_PHASE2[ds_name][variant][mname] = res\n",
+ " print(f\"{mname:20s} | F1={res['f1_macro']:.4f} | ROC={res['roc_auc']:.4f} | PR={res['pr_auc']:.4f}\")\n",
+ "\n",
+ " # 9.4 Comparative analysis\n",
+ " orig_df = pd.DataFrame([\n",
+ " {'model': m, 'f1_macro': RESULTS_PHASE2[ds_name]['original'][m]['f1_macro'],\n",
+ " 'roc_auc': RESULTS_PHASE2[ds_name]['original'][m]['roc_auc'],\n",
+ " 'pr_auc': RESULTS_PHASE2[ds_name]['original'][m]['pr_auc']}\n",
+ " for m in RESULTS_PHASE2[ds_name]['original']\n",
+ " ])\n",
+ " wgt_df = pd.DataFrame([\n",
+ " {'model': m, 'f1_macro': RESULTS_PHASE2[ds_name]['weighted'][m]['f1_macro'],\n",
+ " 'roc_auc': RESULTS_PHASE2[ds_name]['weighted'][m]['roc_auc'],\n",
+ " 'pr_auc': RESULTS_PHASE2[ds_name]['weighted'][m]['pr_auc']}\n",
+ " for m in RESULTS_PHASE2[ds_name]['weighted']\n",
+ " ])\n",
+ " merged = orig_df.merge(wgt_df, on='model', suffixes=('_orig', '_wgt'))\n",
+ " merged['delta_f1'] = merged['f1_macro_wgt'] - merged['f1_macro_orig']\n",
+ " merged['delta_roc'] = merged['roc_auc_wgt'] - merged['roc_auc_orig']\n",
+ "\n",
+ " # Grouped bar chart\n",
+ " fig, ax = plt.subplots(figsize=(12, 6))\n",
+ " x = np.arange(len(merged))\n",
+ " width = 0.35\n",
+ " ax.bar(x - width/2, merged['f1_macro_orig'], width, label='Original', color='steelblue')\n",
+ " ax.bar(x + width/2, merged['f1_macro_wgt'], width, label='SHAP-weighted', color='coral')\n",
+ " ax.set_xticks(x)\n",
+ " ax.set_xticklabels(merged['model'], rotation=45, ha='right')\n",
+ " ax.set_ylabel('F1-macro')\n",
+ " ax.set_title(f'Original vs SHAP-weighted — {ds_name}')\n",
+ " ax.legend()\n",
+ " plt.tight_layout()\n",
+ " plt.savefig(f\"phase2_comparison_{ds_name}.png\")\n",
+ " plt.close()\n",
+ " print(f\"Saved phase2_comparison_{ds_name}.png\")\n",
+ "\n",
+ " # Delta table\n",
+ " print(f\"\\n--- Delta Table ({ds_name}) ---\")\n",
+ " delta_df = merged[['model', 'delta_f1', 'delta_roc']].copy()\n",
+ " delta_df['delta_f1_str'] = delta_df['delta_f1'].apply(lambda v: f\"{v:+.4f}\")\n",
+ " delta_df['delta_roc_str'] = delta_df['delta_roc'].apply(lambda v: f\"{v:+.4f}\")\n",
+ " display(delta_df[['model', 'delta_f1_str', 'delta_roc_str']])\n",
+ " winners = delta_df[delta_df['delta_f1'] > 0]['model'].tolist()\n",
+ " losers = delta_df[delta_df['delta_f1'] < 0]['model'].tolist()\n",
+ " print(f\"Models benefiting from SHAP weighting: {winners}\")\n",
+ " print(f\"Models hurt by SHAP weighting: {losers}\")\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "eb11bc98",
+ "metadata": {},
+ "source": [
+ "# Section 10 — Summary and Cross-Dataset Report\n",
+ "\n",
+ "Master results table, cross-dataset consistency plot, SHAP comparison, and final printed summary."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "9e9283a1",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-07T14:57:18.323682Z",
+ "iopub.status.busy": "2026-05-07T14:57:18.323425Z",
+ "iopub.status.idle": "2026-05-07T14:57:18.878584Z",
+ "shell.execute_reply": "2026-05-07T14:57:18.877327Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved results_summary.csv\n",
+ "Master results shape: (64, 7)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " dataset | \n",
+ " model | \n",
+ " variant | \n",
+ " f1_macro | \n",
+ " roc_auc | \n",
+ " pr_auc | \n",
+ " is_phase1 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " eclipse | \n",
+ " RandomForest | \n",
+ " original | \n",
+ " 0.619048 | \n",
+ " 0.724268 | \n",
+ " 0.471125 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " eclipse | \n",
+ " XGBoost | \n",
+ " original | \n",
+ " 0.612903 | \n",
+ " 0.683080 | \n",
+ " 0.452841 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " eclipse | \n",
+ " LightGBM | \n",
+ " original | \n",
+ " 0.623031 | \n",
+ " 0.730940 | \n",
+ " 0.465996 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " eclipse | \n",
+ " GradientBoosting | \n",
+ " original | \n",
+ " 0.723623 | \n",
+ " 0.746434 | \n",
+ " 0.522457 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " eclipse | \n",
+ " ExtraTrees | \n",
+ " original | \n",
+ " 0.629323 | \n",
+ " 0.719896 | \n",
+ " 0.388231 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " eclipse | \n",
+ " LogisticRegression | \n",
+ " original | \n",
+ " 0.701143 | \n",
+ " 0.784323 | \n",
+ " 0.627092 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " eclipse | \n",
+ " SVM | \n",
+ " original | \n",
+ " 0.708683 | \n",
+ " 0.771898 | \n",
+ " 0.595115 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " eclipse | \n",
+ " KNN | \n",
+ " original | \n",
+ " 0.703557 | \n",
+ " 0.749885 | \n",
+ " 0.572197 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " mylyn | \n",
+ " RandomForest | \n",
+ " original | \n",
+ " 0.552801 | \n",
+ " 0.605978 | \n",
+ " 0.184380 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " mylyn | \n",
+ " XGBoost | \n",
+ " original | \n",
+ " 0.552319 | \n",
+ " 0.574515 | \n",
+ " 0.220625 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " dataset model variant f1_macro roc_auc pr_auc \\\n",
+ "0 eclipse RandomForest original 0.619048 0.724268 0.471125 \n",
+ "1 eclipse XGBoost original 0.612903 0.683080 0.452841 \n",
+ "2 eclipse LightGBM original 0.623031 0.730940 0.465996 \n",
+ "3 eclipse GradientBoosting original 0.723623 0.746434 0.522457 \n",
+ "4 eclipse ExtraTrees original 0.629323 0.719896 0.388231 \n",
+ "5 eclipse LogisticRegression original 0.701143 0.784323 0.627092 \n",
+ "6 eclipse SVM original 0.708683 0.771898 0.595115 \n",
+ "7 eclipse KNN original 0.703557 0.749885 0.572197 \n",
+ "8 mylyn RandomForest original 0.552801 0.605978 0.184380 \n",
+ "9 mylyn XGBoost original 0.552319 0.574515 0.220625 \n",
+ "\n",
+ " is_phase1 \n",
+ "0 True \n",
+ "1 True \n",
+ "2 True \n",
+ "3 True \n",
+ "4 True \n",
+ "5 True \n",
+ "6 True \n",
+ "7 True \n",
+ "8 True \n",
+ "9 True "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved cross_dataset_consistency.png\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved shap_comparison.png\n",
+ "\n",
+ "======================================================================\n",
+ "FINAL SUMMARY\n",
+ "======================================================================\n",
+ "Best model overall (highest mean F1-macro): SVM (mean F1=0.6320)\n",
+ "Most consistent model (lowest std F1-macro): LightGBM (std F1=0.0366)\n",
+ "Feature with highest averaged SHAP weight: numberOfBugsFoundUntil (weight=0.4813)\n",
+ "Mean Δ_F1 on Phase 2 (weighted - original): +0.0013\n",
+ "SHAP weighting IMPROVED performance on Phase 2 datasets on average.\n",
+ "======================================================================\n"
+ ]
+ }
+ ],
+ "source": [
+ "# 10.1 Master results table\n",
+ "master_rows = []\n",
+ "\n",
+ "# Phase 1\n",
+ "for ds_name in CONFIG['phase1']:\n",
+ " for mname, res in RESULTS_PHASE1[ds_name].items():\n",
+ " master_rows.append({\n",
+ " 'dataset': ds_name,\n",
+ " 'model': mname,\n",
+ " 'variant': 'original',\n",
+ " 'f1_macro': res['f1_macro'],\n",
+ " 'roc_auc': res['roc_auc'],\n",
+ " 'pr_auc': res['pr_auc'],\n",
+ " 'is_phase1': True,\n",
+ " })\n",
+ "\n",
+ "# Phase 2\n",
+ "for ds_name in CONFIG['phase2']:\n",
+ " for variant in ['original', 'weighted']:\n",
+ " for mname, res in RESULTS_PHASE2[ds_name][variant].items():\n",
+ " master_rows.append({\n",
+ " 'dataset': ds_name,\n",
+ " 'model': mname,\n",
+ " 'variant': variant,\n",
+ " 'f1_macro': res['f1_macro'],\n",
+ " 'roc_auc': res['roc_auc'],\n",
+ " 'pr_auc': res['pr_auc'],\n",
+ " 'is_phase1': False,\n",
+ " })\n",
+ "\n",
+ "master_df = pd.DataFrame(master_rows)\n",
+ "master_df.to_csv(\"results_summary.csv\", index=False)\n",
+ "print(\"Saved results_summary.csv\")\n",
+ "print(f\"Master results shape: {master_df.shape}\")\n",
+ "display(master_df.head(10))\n",
+ "\n",
+ "# 10.2 Cross-dataset model consistency plot\n",
+ "consistency = defaultdict(dict)\n",
+ "for _, row in master_df.iterrows():\n",
+ " if row['is_phase1'] or (not row['is_phase1'] and row['variant'] == 'weighted'):\n",
+ " consistency[row['model']][row['dataset']] = row['f1_macro']\n",
+ "\n",
+ "fig, ax = plt.subplots(figsize=(12, 7))\n",
+ "all_datasets = CONFIG['phase1'] + CONFIG['phase2']\n",
+ "x = np.arange(len(all_datasets))\n",
+ "for mname in sorted(consistency.keys()):\n",
+ " vals = [consistency[mname].get(ds, np.nan) for ds in all_datasets]\n",
+ " ax.plot(x, vals, marker='o', label=mname)\n",
+ "ax.set_xticks(x)\n",
+ "ax.set_xticklabels(all_datasets)\n",
+ "ax.set_ylabel('F1-macro')\n",
+ "ax.set_title('Cross-Dataset Model Consistency (Phase 1 original + Phase 2 weighted)')\n",
+ "ax.legend()\n",
+ "ax.set_ylim(0, 1.05)\n",
+ "plt.tight_layout()\n",
+ "plt.savefig(\"cross_dataset_consistency.png\")\n",
+ "plt.close()\n",
+ "print(\"Saved cross_dataset_consistency.png\")\n",
+ "\n",
+ "# 10.3 SHAP feature importance comparison\n",
+ "fig, ax = plt.subplots(figsize=(10, 6))\n",
+ "bar_width = 0.25\n",
+ "x = np.arange(len(ALL_FEATURES))\n",
+ "for i, ds_name in enumerate(CONFIG['phase1']):\n",
+ " weights = [SHAP_WEIGHTS[ds_name].get(f, 0.0) for f in ALL_FEATURES]\n",
+ " ax.bar(x + i*bar_width, weights, bar_width, label=ds_name)\n",
+ "ax.bar(x + len(CONFIG['phase1'])*bar_width, AVERAGED_SHAP_WEIGHTS,\n",
+ " bar_width, label='averaged', color='black')\n",
+ "ax.set_xticks(x + bar_width)\n",
+ "ax.set_xticklabels(ALL_FEATURES, rotation=45, ha='right')\n",
+ "ax.set_ylabel('SHAP Weight')\n",
+ "ax.set_title('SHAP Feature Importance Comparison')\n",
+ "ax.legend()\n",
+ "plt.tight_layout()\n",
+ "plt.savefig(\"shap_comparison.png\")\n",
+ "plt.close()\n",
+ "print(\"Saved shap_comparison.png\")\n",
+ "\n",
+ "# 10.4 Final printed summary\n",
+ "print(\"\\n\" + \"=\"*70)\n",
+ "print(\"FINAL SUMMARY\")\n",
+ "print(\"=\"*70)\n",
+ "\n",
+ "model_mean_f1 = {}\n",
+ "model_std_f1 = {}\n",
+ "for mname in sorted(consistency.keys()):\n",
+ " vals = [consistency[mname].get(ds, np.nan) for ds in all_datasets]\n",
+ " model_mean_f1[mname] = np.nanmean(vals)\n",
+ " model_std_f1[mname] = np.nanstd(vals)\n",
+ "\n",
+ "best_overall = max(model_mean_f1, key=model_mean_f1.get)\n",
+ "print(f\"Best model overall (highest mean F1-macro): {best_overall} (mean F1={model_mean_f1[best_overall]:.4f})\")\n",
+ "\n",
+ "most_consistent = min(model_std_f1, key=model_std_f1.get)\n",
+ "print(f\"Most consistent model (lowest std F1-macro): {most_consistent} (std F1={model_std_f1[most_consistent]:.4f})\")\n",
+ "\n",
+ "top_feature = ALL_FEATURES[np.argmax(AVERAGED_SHAP_WEIGHTS)]\n",
+ "print(f\"Feature with highest averaged SHAP weight: {top_feature} (weight={AVERAGED_SHAP_WEIGHTS.max():.4f})\")\n",
+ "\n",
+ "phase2_deltas = []\n",
+ "for ds_name in CONFIG['phase2']:\n",
+ " for mname in RESULTS_PHASE2[ds_name]['original']:\n",
+ " d = RESULTS_PHASE2[ds_name]['weighted'][mname]['f1_macro'] - RESULTS_PHASE2[ds_name]['original'][mname]['f1_macro']\n",
+ " phase2_deltas.append(d)\n",
+ "mean_delta = np.mean(phase2_deltas)\n",
+ "print(f\"Mean Δ_F1 on Phase 2 (weighted - original): {mean_delta:+.4f}\")\n",
+ "if mean_delta > 0:\n",
+ " print(\"SHAP weighting IMPROVED performance on Phase 2 datasets on average.\")\n",
+ "else:\n",
+ " print(\"SHAP weighting did NOT improve performance on Phase 2 datasets on average.\")\n",
+ "\n",
+ "print(\"=\"*70)\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.12"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "state": {
+ "0284da0f5b274e78838f38b3ef2dca55": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "background": null,
+ "description_width": "",
+ "font_size": null,
+ "text_color": null
+ }
+ },
+ "02e2dc7a600a4c91a197341fa9d40e78": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "04ec49c98dd94569900101513328aa90": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "05235f0bc96547059ca6fe4457025389": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_10d60d1486b44fbc8bbd3faa54a6f915",
+ "placeholder": "",
+ "style": "IPY_MODEL_564a613aa6d94ba39345122e9b745f8b",
+ "tabbable": null,
+ "tooltip": null,
+ "value": " 8/8 [00:02<00:00, 2.81it/s]"
+ }
+ },
+ "05d22bc8cd804154b2157d2a82a331ee": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "0858affa4842445cac0f54e2a8f7253c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "0879b02b60f64264b14d61f50ae2c36e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "0919568854c74666ba410773c46d6531": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "0d1ee3fa705d42be8e41ceec681c4ce3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_5aadfc1b281a4e59abc5d87ae7c89ea4",
+ "placeholder": "",
+ "style": "IPY_MODEL_57fbb52b97684b428f933a6e02fb2228",
+ "tabbable": null,
+ "tooltip": null,
+ "value": "equinox-original: 100%"
+ }
+ },
+ "0dc89b8c52b24ff4b1e6a0b45351d27c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "0ecba73843334c449c6adc370c54391b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "background": null,
+ "description_width": "",
+ "font_size": null,
+ "text_color": null
+ }
+ },
+ "10d60d1486b44fbc8bbd3faa54a6f915": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "110576e094ef44fcaa43bdfe287d65b0": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_31ba4fe654af4fad98c1e09f29de0494",
+ "placeholder": "",
+ "style": "IPY_MODEL_cd03671898e64c7d8b47801776ce2b24",
+ "tabbable": null,
+ "tooltip": null,
+ "value": " 373/373 [00:49<00:00, 7.09it/s]"
+ }
+ },
+ "1458302bf50f4a2186435d0e0f2d49ad": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "160da6fb6320427591dfed49754b016b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "17e39b92dabf4c81a81bc5ae88796ad8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "background": null,
+ "description_width": "",
+ "font_size": null,
+ "text_color": null
+ }
+ },
+ "1893bf392d76449daa021ddb69c0aa6a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_1458302bf50f4a2186435d0e0f2d49ad",
+ "placeholder": "",
+ "style": "IPY_MODEL_3c9899dbd3cb4695b21d2c4ea4686810",
+ "tabbable": null,
+ "tooltip": null,
+ "value": " 8/8 [00:10<00:00, 1.10it/s]"
+ }
+ },
+ "198f2b2bb8ac4a19815ab28440e28af1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_160da6fb6320427591dfed49754b016b",
+ "placeholder": "",
+ "style": "IPY_MODEL_0ecba73843334c449c6adc370c54391b",
+ "tabbable": null,
+ "tooltip": null,
+ "value": " 8/8 [00:05<00:00, 1.56it/s]"
+ }
+ },
+ "1d2208887ff84ef28b759fb284cc1c8a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "1ebd7a176f7a498ba91eff163d6477f3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_2e6dcc8ce49349b3b01e560b91f2d5bc",
+ "placeholder": "",
+ "style": "IPY_MODEL_daf841ec919145349c4fe0ac8baf41ca",
+ "tabbable": null,
+ "tooltip": null,
+ "value": "100%"
+ }
+ },
+ "1f4cb99e204a4b259e5a0bafbf45b1ad": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_8eff8d8991ae483c9b9334c9049f4c4b",
+ "placeholder": "",
+ "style": "IPY_MODEL_e0825ef7ec7240f2a010a6d67b62dca8",
+ "tabbable": null,
+ "tooltip": null,
+ "value": "lucene-weighted: 100%"
+ }
+ },
+ "1fec56b1ec0f44bab464a367147f0d63": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2050bf4ed4754e5fa129740cc3440a00": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "20b779efcc864138a4a4d823fcb607b5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "22ad2409dd7f4e3e927b4ed51ab0b687": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2417bda2b35441aa813960c2d3853e62": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_de9024a569504bd4afd36143c315d361",
+ "placeholder": "",
+ "style": "IPY_MODEL_17e39b92dabf4c81a81bc5ae88796ad8",
+ "tabbable": null,
+ "tooltip": null,
+ "value": " 8/8 [00:06<00:00, 1.30it/s]"
+ }
+ },
+ "247614267d7b45eb8ca13b12ee3e4125": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_7140e4eee15d43b3b78e05505798ed43",
+ "IPY_MODEL_d606de86e26d45579dd670efbb6dca0f",
+ "IPY_MODEL_198f2b2bb8ac4a19815ab28440e28af1"
+ ],
+ "layout": "IPY_MODEL_a20759f9177f4340878ee83d8e199f65",
+ "tabbable": null,
+ "tooltip": null
+ }
+ },
+ "26e7c61625fc41e4bfe28c54ae6d062e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "28bd53fb8811463096b598cb5ffe599d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_26e7c61625fc41e4bfe28c54ae6d062e",
+ "max": 8.0,
+ "min": 0.0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_375fbe7d0d554fd0a72c890daf36f60b",
+ "tabbable": null,
+ "tooltip": null,
+ "value": 8.0
+ }
+ },
+ "2bb2416ab15144c18b8b37104e7ef8a9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_0d1ee3fa705d42be8e41ceec681c4ce3",
+ "IPY_MODEL_919f2ce85b5e49e2b1c170467e254b17",
+ "IPY_MODEL_b8f77d7dbbc1491690614d207daa44ac"
+ ],
+ "layout": "IPY_MODEL_b73469d3def44411a13c5371dc15c5e5",
+ "tabbable": null,
+ "tooltip": null
+ }
+ },
+ "2c297559b06c46b1b6f073a675d90d0e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "background": null,
+ "description_width": "",
+ "font_size": null,
+ "text_color": null
+ }
+ },
+ "2d415d65588b45fd9fc4d08239ae14fc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2e6dcc8ce49349b3b01e560b91f2d5bc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "31639c90e81442a983107b10fb9163c0": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "31ba4fe654af4fad98c1e09f29de0494": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "346e868928b0421f80a3a70ae2bd40a3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "35055e1b7389436b88600679dbbc22eb": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_6f4357e554ec4d98bc23bfddd3996007",
+ "max": 8.0,
+ "min": 0.0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_ad58d70ddeff4b48a4155b8c3200f6d0",
+ "tabbable": null,
+ "tooltip": null,
+ "value": 8.0
+ }
+ },
+ "36e19e3b212f4e2a8ff4962b1d019b69": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_39b91419e0e1422a87a5cd4c0a6e4986",
+ "IPY_MODEL_fd5beca9762c4386a116cb6ea0601b9d",
+ "IPY_MODEL_1893bf392d76449daa021ddb69c0aa6a"
+ ],
+ "layout": "IPY_MODEL_672370ebe3bb44a488573bf617fad3b7",
+ "tabbable": null,
+ "tooltip": null
+ }
+ },
+ "375fbe7d0d554fd0a72c890daf36f60b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "38d1f02d5ce643888933e703e9d2d89b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "3933f5e4a7594d28849fa75577e9aa2b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_1fec56b1ec0f44bab464a367147f0d63",
+ "max": 8.0,
+ "min": 0.0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_0879b02b60f64264b14d61f50ae2c36e",
+ "tabbable": null,
+ "tooltip": null,
+ "value": 8.0
+ }
+ },
+ "39b91419e0e1422a87a5cd4c0a6e4986": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_56bcb0086c664a2990b9a8e9f8669140",
+ "placeholder": "",
+ "style": "IPY_MODEL_2c297559b06c46b1b6f073a675d90d0e",
+ "tabbable": null,
+ "tooltip": null,
+ "value": "CV eclipse: 100%"
+ }
+ },
+ "3bdd59c4fbd04a10a952e98f3ef690ea": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "background": null,
+ "description_width": "",
+ "font_size": null,
+ "text_color": null
+ }
+ },
+ "3c9899dbd3cb4695b21d2c4ea4686810": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "background": null,
+ "description_width": "",
+ "font_size": null,
+ "text_color": null
+ }
+ },
+ "3d0781dbf4504a8dbb18872f4983879d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_38d1f02d5ce643888933e703e9d2d89b",
+ "max": 8.0,
+ "min": 0.0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_04ec49c98dd94569900101513328aa90",
+ "tabbable": null,
+ "tooltip": null,
+ "value": 8.0
+ }
+ },
+ "442dc07ba2c04dfa8b082c16eefe8a51": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_8dc69776a361482c9ccf03de5aa2a6e5",
+ "placeholder": "",
+ "style": "IPY_MODEL_85da01686e91495bbb563806acd71548",
+ "tabbable": null,
+ "tooltip": null,
+ "value": " 8/8 [00:02<00:00, 2.96it/s]"
+ }
+ },
+ "4a1d247bfa6c41f09a7e86e274e8356c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_0dc89b8c52b24ff4b1e6a0b45351d27c",
+ "placeholder": "",
+ "style": "IPY_MODEL_ef513f3203f34b0588b26f821bd4c7bc",
+ "tabbable": null,
+ "tooltip": null,
+ "value": " 8/8 [00:13<00:00, 1.30s/it]"
+ }
+ },
+ "4d2febe205f542f1a8351873555e2b2c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "4ef662a0223341eeaf94159b736f66bb": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "background": null,
+ "description_width": "",
+ "font_size": null,
+ "text_color": null
+ }
+ },
+ "4f67f20befda43119e11318be4df03c5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_d4ab78cc3f874a9a83efd22c31587d45",
+ "IPY_MODEL_28bd53fb8811463096b598cb5ffe599d",
+ "IPY_MODEL_2417bda2b35441aa813960c2d3853e62"
+ ],
+ "layout": "IPY_MODEL_0919568854c74666ba410773c46d6531",
+ "tabbable": null,
+ "tooltip": null
+ }
+ },
+ "51bc441a2ed34beca459f67f6f515d2b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "background": null,
+ "description_width": "",
+ "font_size": null,
+ "text_color": null
+ }
+ },
+ "51d41c1f599546b892ad698782a365c0": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "background": null,
+ "description_width": "",
+ "font_size": null,
+ "text_color": null
+ }
+ },
+ "522cf0aca5074160a4d626771fb88607": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "564a613aa6d94ba39345122e9b745f8b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "background": null,
+ "description_width": "",
+ "font_size": null,
+ "text_color": null
+ }
+ },
+ "56bcb0086c664a2990b9a8e9f8669140": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "57fbb52b97684b428f933a6e02fb2228": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "background": null,
+ "description_width": "",
+ "font_size": null,
+ "text_color": null
+ }
+ },
+ "5aadfc1b281a4e59abc5d87ae7c89ea4": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "5ede1b85299748cfa21d04e2397c8aa6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_522cf0aca5074160a4d626771fb88607",
+ "max": 8.0,
+ "min": 0.0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_fd7e2dcd71d94de8b1f1ca6bd1777c7d",
+ "tabbable": null,
+ "tooltip": null,
+ "value": 8.0
+ }
+ },
+ "605d13e302c849b7a0022fad5b851159": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_e5abd23bb547493a8820c0ab496a7533",
+ "max": 373.0,
+ "min": 0.0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_961efe5a5c634a7d83dd4d378d0dd9ed",
+ "tabbable": null,
+ "tooltip": null,
+ "value": 373.0
+ }
+ },
+ "6083e74a95ca49cabe1652425a7f8e7e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_c35082e5a14145cc8faee9a29f32b967",
+ "placeholder": "",
+ "style": "IPY_MODEL_4ef662a0223341eeaf94159b736f66bb",
+ "tabbable": null,
+ "tooltip": null,
+ "value": "pde-weighted: 100%"
+ }
+ },
+ "617e1b7eed9747e2988c9d572f209b0a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "672370ebe3bb44a488573bf617fad3b7": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "6b0138ae2305411a98ab3b1a1616b33b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "6e08471824604272acf89241db17436f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "background": null,
+ "description_width": "",
+ "font_size": null,
+ "text_color": null
+ }
+ },
+ "6f4357e554ec4d98bc23bfddd3996007": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "7140e4eee15d43b3b78e05505798ed43": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_d88824b36690468685e3d539b71b1c08",
+ "placeholder": "",
+ "style": "IPY_MODEL_3bdd59c4fbd04a10a952e98f3ef690ea",
+ "tabbable": null,
+ "tooltip": null,
+ "value": "Test eclipse: 100%"
+ }
+ },
+ "79ab93a18efe40e380d1f663e9ed349b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "7af6cb2e27ee4eac9b706dc21c32dd50": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "8152e391854d41418417b1f9008208f6": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "83f83c850b5748f1a4ea8bd69078cccf": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_df37ae06dd9b4f84bd6390a1486ea9c0",
+ "IPY_MODEL_a4961208765a4615b9936322ef0141a5",
+ "IPY_MODEL_e1321c32fe604168a3114e4efd86e1f5"
+ ],
+ "layout": "IPY_MODEL_22ad2409dd7f4e3e927b4ed51ab0b687",
+ "tabbable": null,
+ "tooltip": null
+ }
+ },
+ "85da01686e91495bbb563806acd71548": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "background": null,
+ "description_width": "",
+ "font_size": null,
+ "text_color": null
+ }
+ },
+ "8b829c3c176343c68badc2c60e955d5d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "background": null,
+ "description_width": "",
+ "font_size": null,
+ "text_color": null
+ }
+ },
+ "8dc69776a361482c9ccf03de5aa2a6e5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "8eff8d8991ae483c9b9334c9049f4c4b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "919f2ce85b5e49e2b1c170467e254b17": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_6b0138ae2305411a98ab3b1a1616b33b",
+ "max": 8.0,
+ "min": 0.0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_2050bf4ed4754e5fa129740cc3440a00",
+ "tabbable": null,
+ "tooltip": null,
+ "value": 8.0
+ }
+ },
+ "922a74d8bfe245b493776e230f19e35f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_93a382e07d484cedbf2bd4c82c5b511f",
+ "placeholder": "",
+ "style": "IPY_MODEL_f94c9fbd8a2a48c088189d77428c43b6",
+ "tabbable": null,
+ "tooltip": null,
+ "value": "equinox-weighted: 100%"
+ }
+ },
+ "93a382e07d484cedbf2bd4c82c5b511f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "961efe5a5c634a7d83dd4d378d0dd9ed": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "9646791c601f422599eea54b25460718": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "9a2021e1e6cf4dd484127c057cdfd263": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_6083e74a95ca49cabe1652425a7f8e7e",
+ "IPY_MODEL_35055e1b7389436b88600679dbbc22eb",
+ "IPY_MODEL_05235f0bc96547059ca6fe4457025389"
+ ],
+ "layout": "IPY_MODEL_fca5a24e30104db6b134da0596895588",
+ "tabbable": null,
+ "tooltip": null
+ }
+ },
+ "9b852374f85146ea823f1ed5c3a06e5b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_0858affa4842445cac0f54e2a8f7253c",
+ "placeholder": "",
+ "style": "IPY_MODEL_6e08471824604272acf89241db17436f",
+ "tabbable": null,
+ "tooltip": null,
+ "value": " 8/8 [00:01<00:00, 3.11it/s]"
+ }
+ },
+ "a19d03918e994c6f8cab3f054c5a4108": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_1f4cb99e204a4b259e5a0bafbf45b1ad",
+ "IPY_MODEL_5ede1b85299748cfa21d04e2397c8aa6",
+ "IPY_MODEL_ca3f9f0f93574948a68229d0a5141d5d"
+ ],
+ "layout": "IPY_MODEL_dc429b44001e4edcbdebd2fb3fd1c973",
+ "tabbable": null,
+ "tooltip": null
+ }
+ },
+ "a20759f9177f4340878ee83d8e199f65": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a4961208765a4615b9936322ef0141a5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_02e2dc7a600a4c91a197341fa9d40e78",
+ "max": 8.0,
+ "min": 0.0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_346e868928b0421f80a3a70ae2bd40a3",
+ "tabbable": null,
+ "tooltip": null,
+ "value": 8.0
+ }
+ },
+ "a55c1e367ca54a74a222e7e18bfede16": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ad58d70ddeff4b48a4155b8c3200f6d0": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "b65cd0e13eeb4fefb899be3f0a7f316c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "background": null,
+ "description_width": "",
+ "font_size": null,
+ "text_color": null
+ }
+ },
+ "b73469d3def44411a13c5371dc15c5e5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b8f77d7dbbc1491690614d207daa44ac": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_05d22bc8cd804154b2157d2a82a331ee",
+ "placeholder": "",
+ "style": "IPY_MODEL_cc364820cffc48f1b34e0569b5708fbc",
+ "tabbable": null,
+ "tooltip": null,
+ "value": " 8/8 [00:01<00:00, 3.20it/s]"
+ }
+ },
+ "b99273bb58a44526825c247e3a902f3c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_c2fd44f6263b414f9ddb886736d02d9d",
+ "max": 8.0,
+ "min": 0.0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_9646791c601f422599eea54b25460718",
+ "tabbable": null,
+ "tooltip": null,
+ "value": 8.0
+ }
+ },
+ "bb77565a12f3457f98a236d99e9d795e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_1ebd7a176f7a498ba91eff163d6477f3",
+ "IPY_MODEL_605d13e302c849b7a0022fad5b851159",
+ "IPY_MODEL_110576e094ef44fcaa43bdfe287d65b0"
+ ],
+ "layout": "IPY_MODEL_1d2208887ff84ef28b759fb284cc1c8a",
+ "tabbable": null,
+ "tooltip": null
+ }
+ },
+ "bfae16dd28224fbabb7e1905db995c9e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "background": null,
+ "description_width": "",
+ "font_size": null,
+ "text_color": null
+ }
+ },
+ "c2fd44f6263b414f9ddb886736d02d9d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "c35082e5a14145cc8faee9a29f32b967": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "c9899cc475b242b684fdef7203c75fbc": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ca3f9f0f93574948a68229d0a5141d5d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_79ab93a18efe40e380d1f663e9ed349b",
+ "placeholder": "",
+ "style": "IPY_MODEL_bfae16dd28224fbabb7e1905db995c9e",
+ "tabbable": null,
+ "tooltip": null,
+ "value": " 8/8 [00:02<00:00, 4.17it/s]"
+ }
+ },
+ "cc364820cffc48f1b34e0569b5708fbc": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "background": null,
+ "description_width": "",
+ "font_size": null,
+ "text_color": null
+ }
+ },
+ "cd03671898e64c7d8b47801776ce2b24": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "background": null,
+ "description_width": "",
+ "font_size": null,
+ "text_color": null
+ }
+ },
+ "d1434329ff284e2bb99208252de791f9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_f00ba37c0cc94e9d963f88a3e55a355c",
+ "IPY_MODEL_b99273bb58a44526825c247e3a902f3c",
+ "IPY_MODEL_442dc07ba2c04dfa8b082c16eefe8a51"
+ ],
+ "layout": "IPY_MODEL_ff61fd8b0bf44d4584cabd6680eef38e",
+ "tabbable": null,
+ "tooltip": null
+ }
+ },
+ "d4ab78cc3f874a9a83efd22c31587d45": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_8152e391854d41418417b1f9008208f6",
+ "placeholder": "",
+ "style": "IPY_MODEL_51d41c1f599546b892ad698782a365c0",
+ "tabbable": null,
+ "tooltip": null,
+ "value": "Test mylyn: 100%"
+ }
+ },
+ "d606de86e26d45579dd670efbb6dca0f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_a55c1e367ca54a74a222e7e18bfede16",
+ "max": 8.0,
+ "min": 0.0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_e385ab9eb62d48d08d8276f721c0d980",
+ "tabbable": null,
+ "tooltip": null,
+ "value": 8.0
+ }
+ },
+ "d88824b36690468685e3d539b71b1c08": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "daf841ec919145349c4fe0ac8baf41ca": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "background": null,
+ "description_width": "",
+ "font_size": null,
+ "text_color": null
+ }
+ },
+ "dc429b44001e4edcbdebd2fb3fd1c973": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "de9024a569504bd4afd36143c315d361": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "df37ae06dd9b4f84bd6390a1486ea9c0": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_f6149532775647bba639b367269b4f34",
+ "placeholder": "",
+ "style": "IPY_MODEL_0284da0f5b274e78838f38b3ef2dca55",
+ "tabbable": null,
+ "tooltip": null,
+ "value": "lucene-original: 100%"
+ }
+ },
+ "e0825ef7ec7240f2a010a6d67b62dca8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "background": null,
+ "description_width": "",
+ "font_size": null,
+ "text_color": null
+ }
+ },
+ "e1321c32fe604168a3114e4efd86e1f5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_7af6cb2e27ee4eac9b706dc21c32dd50",
+ "placeholder": "",
+ "style": "IPY_MODEL_51bc441a2ed34beca459f67f6f515d2b",
+ "tabbable": null,
+ "tooltip": null,
+ "value": " 8/8 [00:02<00:00, 4.01it/s]"
+ }
+ },
+ "e385ab9eb62d48d08d8276f721c0d980": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "e574f804ba554d1f8e97c1256ba9fa1c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_617e1b7eed9747e2988c9d572f209b0a",
+ "placeholder": "",
+ "style": "IPY_MODEL_8b829c3c176343c68badc2c60e955d5d",
+ "tabbable": null,
+ "tooltip": null,
+ "value": "CV mylyn: 100%"
+ }
+ },
+ "e5abd23bb547493a8820c0ab496a7533": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e6a9373f078d4b079ff1e6f0a095b629": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_e574f804ba554d1f8e97c1256ba9fa1c",
+ "IPY_MODEL_3d0781dbf4504a8dbb18872f4983879d",
+ "IPY_MODEL_4a1d247bfa6c41f09a7e86e274e8356c"
+ ],
+ "layout": "IPY_MODEL_20b779efcc864138a4a4d823fcb607b5",
+ "tabbable": null,
+ "tooltip": null
+ }
+ },
+ "ef513f3203f34b0588b26f821bd4c7bc": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "background": null,
+ "description_width": "",
+ "font_size": null,
+ "text_color": null
+ }
+ },
+ "f00ba37c0cc94e9d963f88a3e55a355c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_2d415d65588b45fd9fc4d08239ae14fc",
+ "placeholder": "",
+ "style": "IPY_MODEL_b65cd0e13eeb4fefb899be3f0a7f316c",
+ "tabbable": null,
+ "tooltip": null,
+ "value": "pde-original: 100%"
+ }
+ },
+ "f6149532775647bba639b367269b4f34": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f94c9fbd8a2a48c088189d77428c43b6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "background": null,
+ "description_width": "",
+ "font_size": null,
+ "text_color": null
+ }
+ },
+ "fc0f28da08a04721acc6065aa9bcb56c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_922a74d8bfe245b493776e230f19e35f",
+ "IPY_MODEL_3933f5e4a7594d28849fa75577e9aa2b",
+ "IPY_MODEL_9b852374f85146ea823f1ed5c3a06e5b"
+ ],
+ "layout": "IPY_MODEL_c9899cc475b242b684fdef7203c75fbc",
+ "tabbable": null,
+ "tooltip": null
+ }
+ },
+ "fca5a24e30104db6b134da0596895588": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "fd5beca9762c4386a116cb6ea0601b9d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_31639c90e81442a983107b10fb9163c0",
+ "max": 8.0,
+ "min": 0.0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_4d2febe205f542f1a8351873555e2b2c",
+ "tabbable": null,
+ "tooltip": null,
+ "value": 8.0
+ }
+ },
+ "fd7e2dcd71d94de8b1f1ca6bd1777c7d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "ff61fd8b0bf44d4584cabd6680eef38e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ }
+ },
+ "version_major": 2,
+ "version_minor": 0
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}