File size: 23,071 Bytes

083886c

"""
Regenerate ALL figures with proper spacing, no overlapping legends/labels.
Each figure is single-purpose and sized for IEEE column width.
"""
import os, sys
sys.path.insert(0, '/app/fraud_detection')
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import shap
import warnings
warnings.filterwarnings('ignore')

from ae_model import AutoencoderWrapper, Autoencoder
from sklearn.metrics import (
    roc_curve, precision_recall_curve, roc_auc_score,
    average_precision_score, confusion_matrix,
    precision_score, recall_score, f1_score, matthews_corrcoef
)
from config import DATA_DIR, MODELS_DIR, FIGURES_DIR, FIG_DPI, FIG_BG

# Global style
plt.rcParams.update({
    'font.size': 10,
    'axes.titlesize': 11,
    'axes.labelsize': 10,
    'xtick.labelsize': 9,
    'ytick.labelsize': 9,
    'legend.fontsize': 8,
    'figure.facecolor': 'white',
    'axes.facecolor': 'white',
    'savefig.facecolor': 'white',
    'savefig.bbox': 'tight',
    'savefig.dpi': 300,
    'figure.dpi': 100,
})
sns.set_style("whitegrid")

def save(fig, name):
    fig.savefig(os.path.join(FIGURES_DIR, f"{name}.png"), dpi=FIG_DPI, bbox_inches='tight', facecolor='white', pad_inches=0.15)
    fig.savefig(os.path.join(FIGURES_DIR, f"{name}.pdf"), bbox_inches='tight', facecolor='white', pad_inches=0.15)
    plt.close(fig)
    print(f"  Saved: {name}.png/pdf")

# Load everything
raw_df = pd.read_csv(os.path.join(DATA_DIR, "creditcard.csv"))
data = joblib.load(os.path.join(DATA_DIR, "processed_data.joblib"))
models = joblib.load(os.path.join(MODELS_DIR, "all_models_with_ae.joblib"))
X_test, y_test = data['X_test'], data['y_test']
feature_names = data['feature_names']

print("=" * 60)
print("REGENERATING ALL FIGURES (fixed spacing)")
print("=" * 60)

# ──────────────────────────────────────────────
# 1. CLASS DISTRIBUTION
# ──────────────────────────────────────────────
print("\n[1] Class distribution")
cc = raw_df['Class'].value_counts()
fig, axes = plt.subplots(1, 2, figsize=(10, 4))
colors = ['#27ae60', '#e74c3c']
bars = axes[0].bar(['Legitimate (0)', 'Fraud (1)'], cc.values, color=colors, edgecolor='black', lw=0.6, width=0.55)
axes[0].set_yscale('log')
axes[0].set_ylabel('Count (log scale)')
axes[0].set_title('(a) Transaction Counts')
for b, v in zip(bars, cc.values):
    axes[0].text(b.get_x()+b.get_width()/2, v*1.15, f'{v:,}', ha='center', va='bottom', fontsize=9, fontweight='bold')

wedges, texts, autotexts = axes[1].pie(
    cc.values, labels=['Legitimate\n99.827%', 'Fraud\n0.173%'],
    colors=colors, autopct='', startangle=90, explode=(0, 0.08),
    textprops={'fontsize': 9}, wedgeprops={'edgecolor': 'white', 'linewidth': 1.5}
)
axes[1].set_title('(b) Fraud Ratio')
fig.suptitle('Class Distribution in Credit Card Fraud Dataset', fontsize=12, fontweight='bold', y=1.02)
fig.tight_layout()
save(fig, 'class_distribution')

# ──────────────────────────────────────────────
# 2. AMOUNT ANALYSIS
# ──────────────────────────────────────────────
print("[2] Amount analysis")
fig, axes = plt.subplots(2, 2, figsize=(10, 8))
legit_amt = raw_df[raw_df['Class']==0]['Amount']
fraud_amt = raw_df[raw_df['Class']==1]['Amount']

axes[0,0].hist(legit_amt, bins=80, color='#27ae60', alpha=0.8, edgecolor='none')
axes[0,0].set_title('(a) Legitimate Amounts')
axes[0,0].set_xlabel('Amount ($)')
axes[0,0].set_ylabel('Frequency')
axes[0,0].set_xlim(0, 1500)

axes[0,1].hist(fraud_amt, bins=40, color='#e74c3c', alpha=0.8, edgecolor='none')
axes[0,1].set_title('(b) Fraudulent Amounts')
axes[0,1].set_xlabel('Amount ($)')
axes[0,1].set_ylabel('Frequency')

axes[1,0].hist(np.log1p(legit_amt), bins=50, color='#27ae60', alpha=0.55, label='Legitimate', edgecolor='none')
axes[1,0].hist(np.log1p(fraud_amt), bins=50, color='#e74c3c', alpha=0.55, label='Fraud', edgecolor='none')
axes[1,0].set_title('(c) Log-Scaled Comparison')
axes[1,0].set_xlabel('log(1 + Amount)')
axes[1,0].set_ylabel('Frequency')
axes[1,0].legend(loc='upper right', framealpha=0.9)

bp = axes[1,1].boxplot(
    [legit_amt.clip(upper=500), fraud_amt.clip(upper=500)],
    labels=['Legitimate', 'Fraud'], patch_artist=True, widths=0.45,
    medianprops=dict(color='black', lw=1.5)
)
bp['boxes'][0].set_facecolor('#27ae60')
bp['boxes'][1].set_facecolor('#e74c3c')
for b in bp['boxes']:
    b.set_alpha(0.7)
axes[1,1].set_title('(d) Boxplot (capped at $500)')
axes[1,1].set_ylabel('Amount ($)')

fig.suptitle('Transaction Amount Analysis by Class', fontsize=12, fontweight='bold', y=1.01)
fig.tight_layout()
save(fig, 'amount_analysis')

# ──────────────────────────────────────────────
# 3. TIME ANALYSIS
# ──────────────────────────────────────────────
print("[3] Time analysis")
raw_df_t = raw_df.copy()
raw_df_t['Hour'] = (raw_df_t['Time'] / 3600) % 24
fig, axes = plt.subplots(1, 2, figsize=(10, 4))

axes[0].hist(raw_df_t[raw_df_t['Class']==0]['Hour'], bins=48, color='#27ae60', alpha=0.55, label='Legitimate', density=True)
axes[0].hist(raw_df_t[raw_df_t['Class']==1]['Hour'], bins=48, color='#e74c3c', alpha=0.55, label='Fraud', density=True)
axes[0].set_title('(a) Transaction Density by Hour')
axes[0].set_xlabel('Hour of Day')
axes[0].set_ylabel('Density')
axes[0].legend(loc='upper left', framealpha=0.9)

hourly = raw_df_t.groupby(raw_df_t['Hour'].astype(int))['Class'].mean() * 100
axes[1].bar(hourly.index, hourly.values, color='#e74c3c', alpha=0.75, edgecolor='black', lw=0.3)
axes[1].set_title('(b) Fraud Rate by Hour')
axes[1].set_xlabel('Hour of Day')
axes[1].set_ylabel('Fraud Rate (%)')

fig.suptitle('Temporal Patterns in Transaction Data', fontsize=12, fontweight='bold', y=1.02)
fig.tight_layout()
save(fig, 'time_analysis')

# ──────────────────────────────────────────────
# 4. CORRELATION HEATMAP  (single chart, not dual)
# ──────────────────────────────────────────────
print("[4] Correlation heatmap")
corrs = raw_df.corr()['Class'].drop('Class').sort_values()
fig, ax = plt.subplots(figsize=(8, 7))
colors_bar = ['#e74c3c' if v < 0 else '#27ae60' for v in corrs.values]
ax.barh(corrs.index, corrs.values, color=colors_bar, edgecolor='none', height=0.7)
ax.set_xlabel('Pearson Correlation with Fraud Class')
ax.set_title('Feature Correlation with Fraud', fontsize=12, fontweight='bold')
ax.axvline(x=0, color='black', lw=0.5)
ax.tick_params(axis='y', labelsize=8)
fig.tight_layout()
save(fig, 'correlation_heatmap')

# ──────────────────────────────────────────────
# 5. FEATURE DISTRIBUTIONS (6 features, 2x3 grid)
# ──────────────────────────────────────────────
print("[5] Feature distributions")
top6 = corrs.abs().sort_values(ascending=False).head(6).index.tolist()
fig, axes = plt.subplots(2, 3, figsize=(12, 7))
axes = axes.ravel()
for i, feat in enumerate(top6):
    axes[i].hist(raw_df[raw_df['Class']==0][feat], bins=50, color='#27ae60', alpha=0.5, label='Legit', density=True)
    axes[i].hist(raw_df[raw_df['Class']==1][feat], bins=50, color='#e74c3c', alpha=0.5, label='Fraud', density=True)
    axes[i].set_title(feat, fontweight='bold')
    axes[i].legend(loc='upper right', fontsize=7, framealpha=0.9)
    axes[i].set_ylabel('Density')
fig.suptitle('Distribution of Top 6 Discriminative Features by Class', fontsize=12, fontweight='bold', y=1.01)
fig.tight_layout()
save(fig, 'feature_distributions')

# ──────────────────────────────────────────────
# 6. ROC CURVES (top 5 models only for clarity)
# ──────────────────────────────────────────────
print("[6] ROC curves")
top_models = ['XGBoost', 'Voting_Ensemble', 'XGBoost_Tuned', 'Random_Forest_Tuned', 'LightGBM_Tuned']
cmap = plt.cm.Set1
fig, ax = plt.subplots(figsize=(7, 6))
for i, name in enumerate(top_models):
    if name not in models: continue
    proba = models[name].predict_proba(X_test)[:, 1]
    fpr, tpr, _ = roc_curve(y_test, proba)
    auc_val = roc_auc_score(y_test, proba)
    ax.plot(fpr, tpr, color=cmap(i), lw=2, label=f'{name.replace("_"," ")} ({auc_val:.4f})')
ax.plot([0,1],[0,1],'k--', lw=0.8, label='Random Baseline')
ax.set_xlabel('False Positive Rate')
ax.set_ylabel('True Positive Rate')
ax.set_title('ROC Curves — Top 5 Models', fontsize=12, fontweight='bold')
ax.legend(loc='lower right', fontsize=8, framealpha=0.95)
ax.set_xlim([-0.01, 1.01])
ax.set_ylim([-0.01, 1.03])
fig.tight_layout()
save(fig, 'roc_curves')

# ──────────────────────────────────────────────
# 7. PR CURVES
# ──────────────────────────────────────────────
print("[7] PR curves")
fig, ax = plt.subplots(figsize=(7, 6))
for i, name in enumerate(top_models):
    if name not in models: continue
    proba = models[name].predict_proba(X_test)[:, 1]
    prec, rec, _ = precision_recall_curve(y_test, proba)
    ap = average_precision_score(y_test, proba)
    ax.plot(rec, prec, color=cmap(i), lw=2, label=f'{name.replace("_"," ")} ({ap:.4f})')
baseline = y_test.mean()
ax.axhline(y=baseline, color='k', ls='--', lw=0.8, label=f'Baseline ({baseline:.4f})')
ax.set_xlabel('Recall')
ax.set_ylabel('Precision')
ax.set_title('Precision-Recall Curves — Top 5 Models', fontsize=12, fontweight='bold')
ax.legend(loc='upper right', fontsize=8, framealpha=0.95)
ax.set_xlim([-0.01, 1.01])
ax.set_ylim([-0.01, 1.03])
fig.tight_layout()
save(fig, 'pr_curves')

# ──────────────────────────────────────────────
# 8. CONFUSION MATRICES (2x3 grid for top 6)
# ──────────────────────────────────────────────
print("[8] Confusion matrices")
cm_models = ['XGBoost', 'Voting_Ensemble', 'Random_Forest_Tuned', 'LightGBM_Tuned', 'MLP', 'Logistic_Regression']
fig, axes = plt.subplots(2, 3, figsize=(13, 8))
axes = axes.ravel()
for i, name in enumerate(cm_models):
    if name not in models: continue
    proba = models[name].predict_proba(X_test)[:, 1]
    preds = (proba >= 0.5).astype(int)
    cm = confusion_matrix(y_test, preds)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[i],
                xticklabels=['Legit', 'Fraud'], yticklabels=['Legit', 'Fraud'],
                cbar=False, annot_kws={'size': 10})
    axes[i].set_title(name.replace('_', ' '), fontsize=10, fontweight='bold')
    axes[i].set_ylabel('Actual')
    axes[i].set_xlabel('Predicted')
fig.suptitle('Confusion Matrices on Test Set (threshold = 0.5)', fontsize=12, fontweight='bold', y=1.01)
fig.tight_layout()
save(fig, 'confusion_matrices')

# ──────────────────────────────────────────────
# 9. THRESHOLD ANALYSIS
# ──────────────────────────────────────────────
print("[9] Threshold analysis")
proba_xgb = models['XGBoost'].predict_proba(X_test)[:, 1]
thresholds = np.arange(0.05, 0.96, 0.025)
rows = []
for t in thresholds:
    p = (proba_xgb >= t).astype(int)
    rows.append({'t': t, 'Prec': precision_score(y_test, p, zero_division=0),
                 'Rec': recall_score(y_test, p, zero_division=0),
                 'F1': f1_score(y_test, p, zero_division=0),
                 'MCC': matthews_corrcoef(y_test, p)})
dt = pd.DataFrame(rows)
best_t = dt.loc[dt['F1'].idxmax(), 't']

fig, axes = plt.subplots(1, 2, figsize=(10, 4.5))
axes[0].plot(dt['t'], dt['Prec'], 'b-', lw=2, label='Precision')
axes[0].plot(dt['t'], dt['Rec'], 'r-', lw=2, label='Recall')
axes[0].plot(dt['t'], dt['F1'], 'g-', lw=2.5, label='F1 Score')
axes[0].axvline(x=best_t, color='gray', ls='--', lw=1.2, label=f'Optimal ({best_t:.2f})')
axes[0].set_xlabel('Decision Threshold')
axes[0].set_ylabel('Score')
axes[0].set_title('(a) Precision / Recall / F1', fontweight='bold')
axes[0].legend(loc='center left', framealpha=0.95, fontsize=8)

axes[1].plot(dt['t'], dt['MCC'], 'm-', lw=2, label='MCC')
axes[1].axvline(x=best_t, color='gray', ls='--', lw=1.2)
axes[1].set_xlabel('Decision Threshold')
axes[1].set_ylabel('MCC')
axes[1].set_title('(b) Matthews Correlation Coefficient', fontweight='bold')
axes[1].legend(loc='upper right', framealpha=0.95, fontsize=8)

fig.suptitle(f'Threshold Sensitivity Analysis — XGBoost (Optimal = {best_t:.2f})', fontsize=12, fontweight='bold', y=1.02)
fig.tight_layout()
save(fig, 'threshold_analysis')

# ──────────────────────────────────────────────
# 10. FEATURE IMPORTANCE (2x2)
# ──────────────────────────────────────────────
print("[10] Feature importance")
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
tree_map = {'(a) Random Forest': 'Random_Forest_Tuned', '(b) XGBoost': 'XGBoost_Tuned', '(c) LightGBM': 'LightGBM_Tuned'}
for idx, (title, key) in enumerate(tree_map.items()):
    r, c = idx // 2, idx % 2
    m = models[key]
    imp = m.feature_importances_
    top_idx = np.argsort(imp)[-12:]
    axes[r,c].barh(range(len(top_idx)), imp[top_idx], color='steelblue', edgecolor='none', height=0.7)
    axes[r,c].set_yticks(range(len(top_idx)))
    axes[r,c].set_yticklabels([feature_names[j] for j in top_idx], fontsize=8)
    axes[r,c].set_xlabel('Importance')
    axes[r,c].set_title(title, fontweight='bold')

lr = models['Logistic_Regression']
coefs = np.abs(lr.coef_[0])
top_idx = np.argsort(coefs)[-12:]
axes[1,1].barh(range(len(top_idx)), coefs[top_idx], color='coral', edgecolor='none', height=0.7)
axes[1,1].set_yticks(range(len(top_idx)))
axes[1,1].set_yticklabels([feature_names[j] for j in top_idx], fontsize=8)
axes[1,1].set_xlabel('|Coefficient|')
axes[1,1].set_title('(d) Logistic Regression', fontweight='bold')
fig.suptitle('Feature Importance Across Models (Top 12)', fontsize=12, fontweight='bold', y=1.0)
fig.tight_layout()
save(fig, 'feature_importance')

# ──────────────────────────────────────────────
# 11. SHAP SUMMARY
# ──────────────────────────────────────────────
print("[11] SHAP summary")
explainer = shap.TreeExplainer(models['XGBoost'])
X_sample = X_test.iloc[:2000]
shap_vals = explainer.shap_values(X_sample)
if isinstance(shap_vals, list):
    shap_vals = shap_vals[1]

plt.figure(figsize=(9, 7))
shap.summary_plot(shap_vals, X_sample, feature_names=feature_names, show=False, max_display=15, plot_size=None)
plt.title('SHAP Feature Impact on Fraud Prediction (XGBoost)', fontsize=11, fontweight='bold', pad=12)
plt.tight_layout()
plt.savefig(os.path.join(FIGURES_DIR, "shap_summary.png"), dpi=FIG_DPI, bbox_inches='tight', facecolor='white', pad_inches=0.15)
plt.savefig(os.path.join(FIGURES_DIR, "shap_summary.pdf"), bbox_inches='tight', facecolor='white', pad_inches=0.15)
plt.close('all')
print("  Saved: shap_summary.png/pdf")

# ──────────────────────────────────────────────
# 12. SHAP TOP 10 BAR
# ──────────────────────────────────────────────
print("[12] SHAP top 10")
mean_shap = np.abs(shap_vals).mean(axis=0)
fi = pd.DataFrame({'Feature': feature_names, 'SHAP': mean_shap}).sort_values('SHAP', ascending=False)
top10 = fi.head(10)
fig, ax = plt.subplots(figsize=(7, 5))
ax.barh(range(10), top10['SHAP'].values[::-1], color='steelblue', edgecolor='none', height=0.6)
ax.set_yticks(range(10))
ax.set_yticklabels(top10['Feature'].values[::-1])
ax.set_xlabel('Mean |SHAP Value|')
ax.set_title('Top 10 Features Driving Fraud Predictions', fontsize=11, fontweight='bold')
fig.tight_layout()
save(fig, 'shap_top10')

# ──────────────────────────────────────────────
# 13. LIME EXPLANATION
# ──────────────────────────────────────────────
print("[13] LIME explanation")
from lime.lime_tabular import LimeTabularExplainer
proba_all = models['XGBoost'].predict_proba(X_test)[:, 1]
fraud_idx = np.where(y_test == 1)[0]
sample_idx = None
for idx in fraud_idx:
    if proba_all[idx] > 0.5:
        sample_idx = idx
        break
if sample_idx is None:
    sample_idx = fraud_idx[0]

X_np = X_test.values
lime_exp = LimeTabularExplainer(X_np, feature_names=feature_names, class_names=['Legit', 'Fraud'], discretize_continuous=True, random_state=42)
explanation = lime_exp.explain_instance(X_np[sample_idx], models['XGBoost'].predict_proba, num_features=12, top_labels=1)
exp_list = explanation.as_list(label=1)
feats = [f for f, w in exp_list]
weights = [w for f, w in exp_list]
cols = ['#e74c3c' if w > 0 else '#27ae60' for w in weights]

fig, ax = plt.subplots(figsize=(9, 6))
ax.barh(range(len(feats)), weights, color=cols, edgecolor='none', height=0.6)
ax.set_yticks(range(len(feats)))
ax.set_yticklabels(feats, fontsize=8)
ax.set_xlabel('Feature Contribution')
ax.set_title(f'LIME Explanation — Fraud Sample (P = {proba_all[sample_idx]:.4f})', fontsize=11, fontweight='bold')
ax.axvline(x=0, color='black', lw=0.5)
from matplotlib.patches import Patch
ax.legend(handles=[Patch(fc='#e74c3c', label='Increases fraud risk'), Patch(fc='#27ae60', label='Decreases fraud risk')],
          loc='lower right', fontsize=8, framealpha=0.95)
fig.tight_layout()
save(fig, 'lime_explanation')

# ──────────────────────────────────────────────
# 14. ERROR ANALYSIS
# ──────────────────────────────────────────────
print("[14] Error analysis")
proba_xgb = models['XGBoost'].predict_proba(X_test)[:, 1]
preds = (proba_xgb >= 0.5).astype(int)
fn_mask = (preds == 0) & (y_test.values == 1)
fp_mask = (preds == 1) & (y_test.values == 0)
fn_proba = proba_xgb[fn_mask]
fp_proba = proba_xgb[fp_mask]

fig, axes = plt.subplots(1, 3, figsize=(14, 4.5))
if fn_mask.sum() > 0:
    axes[0].hist(fn_proba, bins=15, color='#e74c3c', alpha=0.75, edgecolor='black', lw=0.3)
axes[0].axvline(x=0.5, color='black', ls='--', lw=1, label='Threshold')
axes[0].set_title('(a) Missed Fraud (FN)', fontweight='bold')
axes[0].set_xlabel('Predicted P(Fraud)')
axes[0].set_ylabel('Count')
axes[0].legend(fontsize=8)

if fp_mask.sum() > 0:
    axes[1].hist(fp_proba, bins=15, color='#f39c12', alpha=0.75, edgecolor='black', lw=0.3)
axes[1].axvline(x=0.5, color='black', ls='--', lw=1, label='Threshold')
axes[1].set_title('(b) False Alarms (FP)', fontweight='bold')
axes[1].set_xlabel('Predicted P(Fraud)')
axes[1].set_ylabel('Count')
axes[1].legend(fontsize=8)

axes[2].hist(proba_xgb[y_test.values==0], bins=50, color='#27ae60', alpha=0.5, label='Legit', density=True)
axes[2].hist(proba_xgb[y_test.values==1], bins=50, color='#e74c3c', alpha=0.5, label='Fraud', density=True)
axes[2].axvline(x=0.5, color='black', ls='--', lw=1, label='Threshold')
axes[2].set_title('(c) Score Distribution by Class', fontweight='bold')
axes[2].set_xlabel('Predicted P(Fraud)')
axes[2].set_ylabel('Density')
axes[2].legend(fontsize=8, loc='upper center')

fig.suptitle('Error Analysis — XGBoost Predictions', fontsize=12, fontweight='bold', y=1.02)
fig.tight_layout()
save(fig, 'error_analysis')

# ──────────────────────────────────────────────
# 15. ARCHITECTURE DIAGRAM (cleaner)
# ──────────────────────────────────────────────
print("[15] Architecture diagram")
from matplotlib.patches import FancyBboxPatch
fig, ax = plt.subplots(figsize=(14, 9), facecolor='white')
ax.set_xlim(0, 14)
ax.set_ylim(0, 10)
ax.axis('off')

c_in, c_proc, c_mod, c_out, c_stor = '#3498db', '#27ae60', '#c0392b', '#f39c12', '#8e44ad'

def bx(x, y, w, h, txt, col, fs=9):
    r = FancyBboxPatch((x,y), w, h, boxstyle="round,pad=0.12", fc=col, ec='#2c3e50', lw=1.5, alpha=0.88)
    ax.add_patch(r)
    ax.text(x+w/2, y+h/2, txt, ha='center', va='center', fontsize=fs, fontweight='bold', color='white', multialignment='center')

def ar(x1,y1,x2,y2):
    ax.annotate('', xy=(x2,y2), xytext=(x1,y1), arrowprops=dict(arrowstyle='->', color='#2c3e50', lw=1.8))

ax.text(7, 9.4, 'Fraud Detection System — End-to-End Architecture', ha='center', fontsize=14, fontweight='bold', color='#2c3e50')

# Row 1
bx(0.3, 7.8, 2.8, 0.9, 'Transaction\nInput', c_in, 10)
bx(3.8, 7.8, 2.8, 0.9, 'Feature\nEngineering\n(12 new features)', c_proc, 8)
bx(7.3, 7.8, 2.8, 0.9, 'RobustScaler\n(train-only fit)', c_proc, 8)
bx(10.8, 7.8, 2.8, 0.9, 'Drift\nMonitoring', c_stor, 9)
ar(3.1, 8.25, 3.8, 8.25)
ar(6.6, 8.25, 7.3, 8.25)
ar(10.1, 8.25, 10.8, 8.25)

# Row 2 — models
model_names = ['LR', 'RF', 'XGBoost\n(BEST)', 'LightGBM', 'MLP', 'Auto-\nencoder']
for i, mn in enumerate(model_names):
    bx(0.3 + i*2.3, 5.5, 2.0, 1.0, mn, c_mod, 8)
    ar(8.7, 7.8, 0.3+i*2.3+1.0, 6.5)

# Row 3
bx(2.5, 3.2, 3.5, 1.0, 'Optuna\nHyperparameter\nTuning (TPE)', c_stor, 9)
bx(7.5, 3.2, 3.5, 1.0, 'Voting Ensemble\n(XGB + LGBM + RF)', c_out, 9)
ar(6.0, 3.7, 7.5, 3.7)

# Row 4
bx(2.5, 1.0, 3.5, 1.0, 'FastAPI\nPOST /predict\n< 10 ms', c_in, 9)
bx(7.5, 1.0, 3.5, 1.0, 'Explainability\nSHAP + LIME', c_proc, 9)
ar(9.25, 3.2, 9.25, 2.0)
ar(4.25, 3.2, 4.25, 2.0)

fig.tight_layout()
save(fig, 'architecture_diagram')

print("\n" + "=" * 60)
print("ALL 15 FIGURES REGENERATED SUCCESSFULLY")
print("=" * 60)