""" Regenerate ALL figures with proper spacing, no overlapping legends/labels. Each figure is single-purpose and sized for IEEE column width. """ import os, sys sys.path.insert(0, '/app/fraud_detection') import numpy as np import pandas as pd import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import seaborn as sns import joblib import shap import warnings warnings.filterwarnings('ignore') from ae_model import AutoencoderWrapper, Autoencoder from sklearn.metrics import ( roc_curve, precision_recall_curve, roc_auc_score, average_precision_score, confusion_matrix, precision_score, recall_score, f1_score, matthews_corrcoef ) from config import DATA_DIR, MODELS_DIR, FIGURES_DIR, FIG_DPI, FIG_BG # Global style plt.rcParams.update({ 'font.size': 10, 'axes.titlesize': 11, 'axes.labelsize': 10, 'xtick.labelsize': 9, 'ytick.labelsize': 9, 'legend.fontsize': 8, 'figure.facecolor': 'white', 'axes.facecolor': 'white', 'savefig.facecolor': 'white', 'savefig.bbox': 'tight', 'savefig.dpi': 300, 'figure.dpi': 100, }) sns.set_style("whitegrid") def save(fig, name): fig.savefig(os.path.join(FIGURES_DIR, f"{name}.png"), dpi=FIG_DPI, bbox_inches='tight', facecolor='white', pad_inches=0.15) fig.savefig(os.path.join(FIGURES_DIR, f"{name}.pdf"), bbox_inches='tight', facecolor='white', pad_inches=0.15) plt.close(fig) print(f" Saved: {name}.png/pdf") # Load everything raw_df = pd.read_csv(os.path.join(DATA_DIR, "creditcard.csv")) data = joblib.load(os.path.join(DATA_DIR, "processed_data.joblib")) models = joblib.load(os.path.join(MODELS_DIR, "all_models_with_ae.joblib")) X_test, y_test = data['X_test'], data['y_test'] feature_names = data['feature_names'] print("=" * 60) print("REGENERATING ALL FIGURES (fixed spacing)") print("=" * 60) # ────────────────────────────────────────────── # 1. CLASS DISTRIBUTION # ────────────────────────────────────────────── print("\n[1] Class distribution") cc = raw_df['Class'].value_counts() fig, axes = plt.subplots(1, 2, figsize=(10, 4)) colors = ['#27ae60', '#e74c3c'] bars = axes[0].bar(['Legitimate (0)', 'Fraud (1)'], cc.values, color=colors, edgecolor='black', lw=0.6, width=0.55) axes[0].set_yscale('log') axes[0].set_ylabel('Count (log scale)') axes[0].set_title('(a) Transaction Counts') for b, v in zip(bars, cc.values): axes[0].text(b.get_x()+b.get_width()/2, v*1.15, f'{v:,}', ha='center', va='bottom', fontsize=9, fontweight='bold') wedges, texts, autotexts = axes[1].pie( cc.values, labels=['Legitimate\n99.827%', 'Fraud\n0.173%'], colors=colors, autopct='', startangle=90, explode=(0, 0.08), textprops={'fontsize': 9}, wedgeprops={'edgecolor': 'white', 'linewidth': 1.5} ) axes[1].set_title('(b) Fraud Ratio') fig.suptitle('Class Distribution in Credit Card Fraud Dataset', fontsize=12, fontweight='bold', y=1.02) fig.tight_layout() save(fig, 'class_distribution') # ────────────────────────────────────────────── # 2. AMOUNT ANALYSIS # ────────────────────────────────────────────── print("[2] Amount analysis") fig, axes = plt.subplots(2, 2, figsize=(10, 8)) legit_amt = raw_df[raw_df['Class']==0]['Amount'] fraud_amt = raw_df[raw_df['Class']==1]['Amount'] axes[0,0].hist(legit_amt, bins=80, color='#27ae60', alpha=0.8, edgecolor='none') axes[0,0].set_title('(a) Legitimate Amounts') axes[0,0].set_xlabel('Amount ($)') axes[0,0].set_ylabel('Frequency') axes[0,0].set_xlim(0, 1500) axes[0,1].hist(fraud_amt, bins=40, color='#e74c3c', alpha=0.8, edgecolor='none') axes[0,1].set_title('(b) Fraudulent Amounts') axes[0,1].set_xlabel('Amount ($)') axes[0,1].set_ylabel('Frequency') axes[1,0].hist(np.log1p(legit_amt), bins=50, color='#27ae60', alpha=0.55, label='Legitimate', edgecolor='none') axes[1,0].hist(np.log1p(fraud_amt), bins=50, color='#e74c3c', alpha=0.55, label='Fraud', edgecolor='none') axes[1,0].set_title('(c) Log-Scaled Comparison') axes[1,0].set_xlabel('log(1 + Amount)') axes[1,0].set_ylabel('Frequency') axes[1,0].legend(loc='upper right', framealpha=0.9) bp = axes[1,1].boxplot( [legit_amt.clip(upper=500), fraud_amt.clip(upper=500)], labels=['Legitimate', 'Fraud'], patch_artist=True, widths=0.45, medianprops=dict(color='black', lw=1.5) ) bp['boxes'][0].set_facecolor('#27ae60') bp['boxes'][1].set_facecolor('#e74c3c') for b in bp['boxes']: b.set_alpha(0.7) axes[1,1].set_title('(d) Boxplot (capped at $500)') axes[1,1].set_ylabel('Amount ($)') fig.suptitle('Transaction Amount Analysis by Class', fontsize=12, fontweight='bold', y=1.01) fig.tight_layout() save(fig, 'amount_analysis') # ────────────────────────────────────────────── # 3. TIME ANALYSIS # ────────────────────────────────────────────── print("[3] Time analysis") raw_df_t = raw_df.copy() raw_df_t['Hour'] = (raw_df_t['Time'] / 3600) % 24 fig, axes = plt.subplots(1, 2, figsize=(10, 4)) axes[0].hist(raw_df_t[raw_df_t['Class']==0]['Hour'], bins=48, color='#27ae60', alpha=0.55, label='Legitimate', density=True) axes[0].hist(raw_df_t[raw_df_t['Class']==1]['Hour'], bins=48, color='#e74c3c', alpha=0.55, label='Fraud', density=True) axes[0].set_title('(a) Transaction Density by Hour') axes[0].set_xlabel('Hour of Day') axes[0].set_ylabel('Density') axes[0].legend(loc='upper left', framealpha=0.9) hourly = raw_df_t.groupby(raw_df_t['Hour'].astype(int))['Class'].mean() * 100 axes[1].bar(hourly.index, hourly.values, color='#e74c3c', alpha=0.75, edgecolor='black', lw=0.3) axes[1].set_title('(b) Fraud Rate by Hour') axes[1].set_xlabel('Hour of Day') axes[1].set_ylabel('Fraud Rate (%)') fig.suptitle('Temporal Patterns in Transaction Data', fontsize=12, fontweight='bold', y=1.02) fig.tight_layout() save(fig, 'time_analysis') # ────────────────────────────────────────────── # 4. CORRELATION HEATMAP (single chart, not dual) # ────────────────────────────────────────────── print("[4] Correlation heatmap") corrs = raw_df.corr()['Class'].drop('Class').sort_values() fig, ax = plt.subplots(figsize=(8, 7)) colors_bar = ['#e74c3c' if v < 0 else '#27ae60' for v in corrs.values] ax.barh(corrs.index, corrs.values, color=colors_bar, edgecolor='none', height=0.7) ax.set_xlabel('Pearson Correlation with Fraud Class') ax.set_title('Feature Correlation with Fraud', fontsize=12, fontweight='bold') ax.axvline(x=0, color='black', lw=0.5) ax.tick_params(axis='y', labelsize=8) fig.tight_layout() save(fig, 'correlation_heatmap') # ────────────────────────────────────────────── # 5. FEATURE DISTRIBUTIONS (6 features, 2x3 grid) # ────────────────────────────────────────────── print("[5] Feature distributions") top6 = corrs.abs().sort_values(ascending=False).head(6).index.tolist() fig, axes = plt.subplots(2, 3, figsize=(12, 7)) axes = axes.ravel() for i, feat in enumerate(top6): axes[i].hist(raw_df[raw_df['Class']==0][feat], bins=50, color='#27ae60', alpha=0.5, label='Legit', density=True) axes[i].hist(raw_df[raw_df['Class']==1][feat], bins=50, color='#e74c3c', alpha=0.5, label='Fraud', density=True) axes[i].set_title(feat, fontweight='bold') axes[i].legend(loc='upper right', fontsize=7, framealpha=0.9) axes[i].set_ylabel('Density') fig.suptitle('Distribution of Top 6 Discriminative Features by Class', fontsize=12, fontweight='bold', y=1.01) fig.tight_layout() save(fig, 'feature_distributions') # ────────────────────────────────────────────── # 6. ROC CURVES (top 5 models only for clarity) # ────────────────────────────────────────────── print("[6] ROC curves") top_models = ['XGBoost', 'Voting_Ensemble', 'XGBoost_Tuned', 'Random_Forest_Tuned', 'LightGBM_Tuned'] cmap = plt.cm.Set1 fig, ax = plt.subplots(figsize=(7, 6)) for i, name in enumerate(top_models): if name not in models: continue proba = models[name].predict_proba(X_test)[:, 1] fpr, tpr, _ = roc_curve(y_test, proba) auc_val = roc_auc_score(y_test, proba) ax.plot(fpr, tpr, color=cmap(i), lw=2, label=f'{name.replace("_"," ")} ({auc_val:.4f})') ax.plot([0,1],[0,1],'k--', lw=0.8, label='Random Baseline') ax.set_xlabel('False Positive Rate') ax.set_ylabel('True Positive Rate') ax.set_title('ROC Curves — Top 5 Models', fontsize=12, fontweight='bold') ax.legend(loc='lower right', fontsize=8, framealpha=0.95) ax.set_xlim([-0.01, 1.01]) ax.set_ylim([-0.01, 1.03]) fig.tight_layout() save(fig, 'roc_curves') # ────────────────────────────────────────────── # 7. PR CURVES # ────────────────────────────────────────────── print("[7] PR curves") fig, ax = plt.subplots(figsize=(7, 6)) for i, name in enumerate(top_models): if name not in models: continue proba = models[name].predict_proba(X_test)[:, 1] prec, rec, _ = precision_recall_curve(y_test, proba) ap = average_precision_score(y_test, proba) ax.plot(rec, prec, color=cmap(i), lw=2, label=f'{name.replace("_"," ")} ({ap:.4f})') baseline = y_test.mean() ax.axhline(y=baseline, color='k', ls='--', lw=0.8, label=f'Baseline ({baseline:.4f})') ax.set_xlabel('Recall') ax.set_ylabel('Precision') ax.set_title('Precision-Recall Curves — Top 5 Models', fontsize=12, fontweight='bold') ax.legend(loc='upper right', fontsize=8, framealpha=0.95) ax.set_xlim([-0.01, 1.01]) ax.set_ylim([-0.01, 1.03]) fig.tight_layout() save(fig, 'pr_curves') # ────────────────────────────────────────────── # 8. CONFUSION MATRICES (2x3 grid for top 6) # ────────────────────────────────────────────── print("[8] Confusion matrices") cm_models = ['XGBoost', 'Voting_Ensemble', 'Random_Forest_Tuned', 'LightGBM_Tuned', 'MLP', 'Logistic_Regression'] fig, axes = plt.subplots(2, 3, figsize=(13, 8)) axes = axes.ravel() for i, name in enumerate(cm_models): if name not in models: continue proba = models[name].predict_proba(X_test)[:, 1] preds = (proba >= 0.5).astype(int) cm = confusion_matrix(y_test, preds) sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[i], xticklabels=['Legit', 'Fraud'], yticklabels=['Legit', 'Fraud'], cbar=False, annot_kws={'size': 10}) axes[i].set_title(name.replace('_', ' '), fontsize=10, fontweight='bold') axes[i].set_ylabel('Actual') axes[i].set_xlabel('Predicted') fig.suptitle('Confusion Matrices on Test Set (threshold = 0.5)', fontsize=12, fontweight='bold', y=1.01) fig.tight_layout() save(fig, 'confusion_matrices') # ────────────────────────────────────────────── # 9. THRESHOLD ANALYSIS # ────────────────────────────────────────────── print("[9] Threshold analysis") proba_xgb = models['XGBoost'].predict_proba(X_test)[:, 1] thresholds = np.arange(0.05, 0.96, 0.025) rows = [] for t in thresholds: p = (proba_xgb >= t).astype(int) rows.append({'t': t, 'Prec': precision_score(y_test, p, zero_division=0), 'Rec': recall_score(y_test, p, zero_division=0), 'F1': f1_score(y_test, p, zero_division=0), 'MCC': matthews_corrcoef(y_test, p)}) dt = pd.DataFrame(rows) best_t = dt.loc[dt['F1'].idxmax(), 't'] fig, axes = plt.subplots(1, 2, figsize=(10, 4.5)) axes[0].plot(dt['t'], dt['Prec'], 'b-', lw=2, label='Precision') axes[0].plot(dt['t'], dt['Rec'], 'r-', lw=2, label='Recall') axes[0].plot(dt['t'], dt['F1'], 'g-', lw=2.5, label='F1 Score') axes[0].axvline(x=best_t, color='gray', ls='--', lw=1.2, label=f'Optimal ({best_t:.2f})') axes[0].set_xlabel('Decision Threshold') axes[0].set_ylabel('Score') axes[0].set_title('(a) Precision / Recall / F1', fontweight='bold') axes[0].legend(loc='center left', framealpha=0.95, fontsize=8) axes[1].plot(dt['t'], dt['MCC'], 'm-', lw=2, label='MCC') axes[1].axvline(x=best_t, color='gray', ls='--', lw=1.2) axes[1].set_xlabel('Decision Threshold') axes[1].set_ylabel('MCC') axes[1].set_title('(b) Matthews Correlation Coefficient', fontweight='bold') axes[1].legend(loc='upper right', framealpha=0.95, fontsize=8) fig.suptitle(f'Threshold Sensitivity Analysis — XGBoost (Optimal = {best_t:.2f})', fontsize=12, fontweight='bold', y=1.02) fig.tight_layout() save(fig, 'threshold_analysis') # ────────────────────────────────────────────── # 10. FEATURE IMPORTANCE (2x2) # ────────────────────────────────────────────── print("[10] Feature importance") fig, axes = plt.subplots(2, 2, figsize=(12, 10)) tree_map = {'(a) Random Forest': 'Random_Forest_Tuned', '(b) XGBoost': 'XGBoost_Tuned', '(c) LightGBM': 'LightGBM_Tuned'} for idx, (title, key) in enumerate(tree_map.items()): r, c = idx // 2, idx % 2 m = models[key] imp = m.feature_importances_ top_idx = np.argsort(imp)[-12:] axes[r,c].barh(range(len(top_idx)), imp[top_idx], color='steelblue', edgecolor='none', height=0.7) axes[r,c].set_yticks(range(len(top_idx))) axes[r,c].set_yticklabels([feature_names[j] for j in top_idx], fontsize=8) axes[r,c].set_xlabel('Importance') axes[r,c].set_title(title, fontweight='bold') lr = models['Logistic_Regression'] coefs = np.abs(lr.coef_[0]) top_idx = np.argsort(coefs)[-12:] axes[1,1].barh(range(len(top_idx)), coefs[top_idx], color='coral', edgecolor='none', height=0.7) axes[1,1].set_yticks(range(len(top_idx))) axes[1,1].set_yticklabels([feature_names[j] for j in top_idx], fontsize=8) axes[1,1].set_xlabel('|Coefficient|') axes[1,1].set_title('(d) Logistic Regression', fontweight='bold') fig.suptitle('Feature Importance Across Models (Top 12)', fontsize=12, fontweight='bold', y=1.0) fig.tight_layout() save(fig, 'feature_importance') # ────────────────────────────────────────────── # 11. SHAP SUMMARY # ────────────────────────────────────────────── print("[11] SHAP summary") explainer = shap.TreeExplainer(models['XGBoost']) X_sample = X_test.iloc[:2000] shap_vals = explainer.shap_values(X_sample) if isinstance(shap_vals, list): shap_vals = shap_vals[1] plt.figure(figsize=(9, 7)) shap.summary_plot(shap_vals, X_sample, feature_names=feature_names, show=False, max_display=15, plot_size=None) plt.title('SHAP Feature Impact on Fraud Prediction (XGBoost)', fontsize=11, fontweight='bold', pad=12) plt.tight_layout() plt.savefig(os.path.join(FIGURES_DIR, "shap_summary.png"), dpi=FIG_DPI, bbox_inches='tight', facecolor='white', pad_inches=0.15) plt.savefig(os.path.join(FIGURES_DIR, "shap_summary.pdf"), bbox_inches='tight', facecolor='white', pad_inches=0.15) plt.close('all') print(" Saved: shap_summary.png/pdf") # ────────────────────────────────────────────── # 12. SHAP TOP 10 BAR # ────────────────────────────────────────────── print("[12] SHAP top 10") mean_shap = np.abs(shap_vals).mean(axis=0) fi = pd.DataFrame({'Feature': feature_names, 'SHAP': mean_shap}).sort_values('SHAP', ascending=False) top10 = fi.head(10) fig, ax = plt.subplots(figsize=(7, 5)) ax.barh(range(10), top10['SHAP'].values[::-1], color='steelblue', edgecolor='none', height=0.6) ax.set_yticks(range(10)) ax.set_yticklabels(top10['Feature'].values[::-1]) ax.set_xlabel('Mean |SHAP Value|') ax.set_title('Top 10 Features Driving Fraud Predictions', fontsize=11, fontweight='bold') fig.tight_layout() save(fig, 'shap_top10') # ────────────────────────────────────────────── # 13. LIME EXPLANATION # ────────────────────────────────────────────── print("[13] LIME explanation") from lime.lime_tabular import LimeTabularExplainer proba_all = models['XGBoost'].predict_proba(X_test)[:, 1] fraud_idx = np.where(y_test == 1)[0] sample_idx = None for idx in fraud_idx: if proba_all[idx] > 0.5: sample_idx = idx break if sample_idx is None: sample_idx = fraud_idx[0] X_np = X_test.values lime_exp = LimeTabularExplainer(X_np, feature_names=feature_names, class_names=['Legit', 'Fraud'], discretize_continuous=True, random_state=42) explanation = lime_exp.explain_instance(X_np[sample_idx], models['XGBoost'].predict_proba, num_features=12, top_labels=1) exp_list = explanation.as_list(label=1) feats = [f for f, w in exp_list] weights = [w for f, w in exp_list] cols = ['#e74c3c' if w > 0 else '#27ae60' for w in weights] fig, ax = plt.subplots(figsize=(9, 6)) ax.barh(range(len(feats)), weights, color=cols, edgecolor='none', height=0.6) ax.set_yticks(range(len(feats))) ax.set_yticklabels(feats, fontsize=8) ax.set_xlabel('Feature Contribution') ax.set_title(f'LIME Explanation — Fraud Sample (P = {proba_all[sample_idx]:.4f})', fontsize=11, fontweight='bold') ax.axvline(x=0, color='black', lw=0.5) from matplotlib.patches import Patch ax.legend(handles=[Patch(fc='#e74c3c', label='Increases fraud risk'), Patch(fc='#27ae60', label='Decreases fraud risk')], loc='lower right', fontsize=8, framealpha=0.95) fig.tight_layout() save(fig, 'lime_explanation') # ────────────────────────────────────────────── # 14. ERROR ANALYSIS # ────────────────────────────────────────────── print("[14] Error analysis") proba_xgb = models['XGBoost'].predict_proba(X_test)[:, 1] preds = (proba_xgb >= 0.5).astype(int) fn_mask = (preds == 0) & (y_test.values == 1) fp_mask = (preds == 1) & (y_test.values == 0) fn_proba = proba_xgb[fn_mask] fp_proba = proba_xgb[fp_mask] fig, axes = plt.subplots(1, 3, figsize=(14, 4.5)) if fn_mask.sum() > 0: axes[0].hist(fn_proba, bins=15, color='#e74c3c', alpha=0.75, edgecolor='black', lw=0.3) axes[0].axvline(x=0.5, color='black', ls='--', lw=1, label='Threshold') axes[0].set_title('(a) Missed Fraud (FN)', fontweight='bold') axes[0].set_xlabel('Predicted P(Fraud)') axes[0].set_ylabel('Count') axes[0].legend(fontsize=8) if fp_mask.sum() > 0: axes[1].hist(fp_proba, bins=15, color='#f39c12', alpha=0.75, edgecolor='black', lw=0.3) axes[1].axvline(x=0.5, color='black', ls='--', lw=1, label='Threshold') axes[1].set_title('(b) False Alarms (FP)', fontweight='bold') axes[1].set_xlabel('Predicted P(Fraud)') axes[1].set_ylabel('Count') axes[1].legend(fontsize=8) axes[2].hist(proba_xgb[y_test.values==0], bins=50, color='#27ae60', alpha=0.5, label='Legit', density=True) axes[2].hist(proba_xgb[y_test.values==1], bins=50, color='#e74c3c', alpha=0.5, label='Fraud', density=True) axes[2].axvline(x=0.5, color='black', ls='--', lw=1, label='Threshold') axes[2].set_title('(c) Score Distribution by Class', fontweight='bold') axes[2].set_xlabel('Predicted P(Fraud)') axes[2].set_ylabel('Density') axes[2].legend(fontsize=8, loc='upper center') fig.suptitle('Error Analysis — XGBoost Predictions', fontsize=12, fontweight='bold', y=1.02) fig.tight_layout() save(fig, 'error_analysis') # ────────────────────────────────────────────── # 15. ARCHITECTURE DIAGRAM (cleaner) # ────────────────────────────────────────────── print("[15] Architecture diagram") from matplotlib.patches import FancyBboxPatch fig, ax = plt.subplots(figsize=(14, 9), facecolor='white') ax.set_xlim(0, 14) ax.set_ylim(0, 10) ax.axis('off') c_in, c_proc, c_mod, c_out, c_stor = '#3498db', '#27ae60', '#c0392b', '#f39c12', '#8e44ad' def bx(x, y, w, h, txt, col, fs=9): r = FancyBboxPatch((x,y), w, h, boxstyle="round,pad=0.12", fc=col, ec='#2c3e50', lw=1.5, alpha=0.88) ax.add_patch(r) ax.text(x+w/2, y+h/2, txt, ha='center', va='center', fontsize=fs, fontweight='bold', color='white', multialignment='center') def ar(x1,y1,x2,y2): ax.annotate('', xy=(x2,y2), xytext=(x1,y1), arrowprops=dict(arrowstyle='->', color='#2c3e50', lw=1.8)) ax.text(7, 9.4, 'Fraud Detection System — End-to-End Architecture', ha='center', fontsize=14, fontweight='bold', color='#2c3e50') # Row 1 bx(0.3, 7.8, 2.8, 0.9, 'Transaction\nInput', c_in, 10) bx(3.8, 7.8, 2.8, 0.9, 'Feature\nEngineering\n(12 new features)', c_proc, 8) bx(7.3, 7.8, 2.8, 0.9, 'RobustScaler\n(train-only fit)', c_proc, 8) bx(10.8, 7.8, 2.8, 0.9, 'Drift\nMonitoring', c_stor, 9) ar(3.1, 8.25, 3.8, 8.25) ar(6.6, 8.25, 7.3, 8.25) ar(10.1, 8.25, 10.8, 8.25) # Row 2 — models model_names = ['LR', 'RF', 'XGBoost\n(BEST)', 'LightGBM', 'MLP', 'Auto-\nencoder'] for i, mn in enumerate(model_names): bx(0.3 + i*2.3, 5.5, 2.0, 1.0, mn, c_mod, 8) ar(8.7, 7.8, 0.3+i*2.3+1.0, 6.5) # Row 3 bx(2.5, 3.2, 3.5, 1.0, 'Optuna\nHyperparameter\nTuning (TPE)', c_stor, 9) bx(7.5, 3.2, 3.5, 1.0, 'Voting Ensemble\n(XGB + LGBM + RF)', c_out, 9) ar(6.0, 3.7, 7.5, 3.7) # Row 4 bx(2.5, 1.0, 3.5, 1.0, 'FastAPI\nPOST /predict\n< 10 ms', c_in, 9) bx(7.5, 1.0, 3.5, 1.0, 'Explainability\nSHAP + LIME', c_proc, 9) ar(9.25, 3.2, 9.25, 2.0) ar(4.25, 3.2, 4.25, 2.0) fig.tight_layout() save(fig, 'architecture_diagram') print("\n" + "=" * 60) print("ALL 15 FIGURES REGENERATED SUCCESSFULLY") print("=" * 60)