fraud-detection-system / regen_figures.py

Fix: comprehensive 18-page paper with detailed descriptions + fixed non-overlapping figures

083886c verified 9 days ago

23.1 kB

	"""
	Regenerate ALL figures with proper spacing, no overlapping legends/labels.
	Each figure is single-purpose and sized for IEEE column width.
	"""
	import os, sys
	sys.path.insert(0, '/app/fraud_detection')
	import numpy as np
	import pandas as pd
	import matplotlib
	matplotlib.use('Agg')
	import matplotlib.pyplot as plt
	import seaborn as sns
	import joblib
	import shap
	import warnings
	warnings.filterwarnings('ignore')

	from ae_model import AutoencoderWrapper, Autoencoder
	from sklearn.metrics import (
	roc_curve, precision_recall_curve, roc_auc_score,
	average_precision_score, confusion_matrix,
	precision_score, recall_score, f1_score, matthews_corrcoef
	)
	from config import DATA_DIR, MODELS_DIR, FIGURES_DIR, FIG_DPI, FIG_BG

	# Global style
	plt.rcParams.update({
	'font.size': 10,
	'axes.titlesize': 11,
	'axes.labelsize': 10,
	'xtick.labelsize': 9,
	'ytick.labelsize': 9,
	'legend.fontsize': 8,
	'figure.facecolor': 'white',
	'axes.facecolor': 'white',
	'savefig.facecolor': 'white',
	'savefig.bbox': 'tight',
	'savefig.dpi': 300,
	'figure.dpi': 100,
	})
	sns.set_style("whitegrid")

	def save(fig, name):
	fig.savefig(os.path.join(FIGURES_DIR, f"{name}.png"), dpi=FIG_DPI, bbox_inches='tight', facecolor='white', pad_inches=0.15)
	fig.savefig(os.path.join(FIGURES_DIR, f"{name}.pdf"), bbox_inches='tight', facecolor='white', pad_inches=0.15)
	plt.close(fig)
	print(f" Saved: {name}.png/pdf")

	# Load everything
	raw_df = pd.read_csv(os.path.join(DATA_DIR, "creditcard.csv"))
	data = joblib.load(os.path.join(DATA_DIR, "processed_data.joblib"))
	models = joblib.load(os.path.join(MODELS_DIR, "all_models_with_ae.joblib"))
	X_test, y_test = data['X_test'], data['y_test']
	feature_names = data['feature_names']

	print("=" * 60)
	print("REGENERATING ALL FIGURES (fixed spacing)")
	print("=" * 60)

	# ──────────────────────────────────────────────
	# 1. CLASS DISTRIBUTION
	# ──────────────────────────────────────────────
	print("\n[1] Class distribution")
	cc = raw_df['Class'].value_counts()
	fig, axes = plt.subplots(1, 2, figsize=(10, 4))
	colors = ['#27ae60', '#e74c3c']
	bars = axes[0].bar(['Legitimate (0)', 'Fraud (1)'], cc.values, color=colors, edgecolor='black', lw=0.6, width=0.55)
	axes[0].set_yscale('log')
	axes[0].set_ylabel('Count (log scale)')
	axes[0].set_title('(a) Transaction Counts')
	for b, v in zip(bars, cc.values):
	axes[0].text(b.get_x()+b.get_width()/2, v*1.15, f'{v:,}', ha='center', va='bottom', fontsize=9, fontweight='bold')

	wedges, texts, autotexts = axes[1].pie(
	cc.values, labels=['Legitimate\n99.827%', 'Fraud\n0.173%'],
	colors=colors, autopct='', startangle=90, explode=(0, 0.08),
	textprops={'fontsize': 9}, wedgeprops={'edgecolor': 'white', 'linewidth': 1.5}
	)
	axes[1].set_title('(b) Fraud Ratio')
	fig.suptitle('Class Distribution in Credit Card Fraud Dataset', fontsize=12, fontweight='bold', y=1.02)
	fig.tight_layout()
	save(fig, 'class_distribution')

	# ──────────────────────────────────────────────
	# 2. AMOUNT ANALYSIS
	# ──────────────────────────────────────────────
	print("[2] Amount analysis")
	fig, axes = plt.subplots(2, 2, figsize=(10, 8))
	legit_amt = raw_df[raw_df['Class']==0]['Amount']
	fraud_amt = raw_df[raw_df['Class']==1]['Amount']

	axes[0,0].hist(legit_amt, bins=80, color='#27ae60', alpha=0.8, edgecolor='none')
	axes[0,0].set_title('(a) Legitimate Amounts')
	axes[0,0].set_xlabel('Amount ($)')
	axes[0,0].set_ylabel('Frequency')
	axes[0,0].set_xlim(0, 1500)

	axes[0,1].hist(fraud_amt, bins=40, color='#e74c3c', alpha=0.8, edgecolor='none')
	axes[0,1].set_title('(b) Fraudulent Amounts')
	axes[0,1].set_xlabel('Amount ($)')
	axes[0,1].set_ylabel('Frequency')

	axes[1,0].hist(np.log1p(legit_amt), bins=50, color='#27ae60', alpha=0.55, label='Legitimate', edgecolor='none')
	axes[1,0].hist(np.log1p(fraud_amt), bins=50, color='#e74c3c', alpha=0.55, label='Fraud', edgecolor='none')
	axes[1,0].set_title('(c) Log-Scaled Comparison')
	axes[1,0].set_xlabel('log(1 + Amount)')
	axes[1,0].set_ylabel('Frequency')
	axes[1,0].legend(loc='upper right', framealpha=0.9)

	bp = axes[1,1].boxplot(
	[legit_amt.clip(upper=500), fraud_amt.clip(upper=500)],
	labels=['Legitimate', 'Fraud'], patch_artist=True, widths=0.45,
	medianprops=dict(color='black', lw=1.5)
	)
	bp['boxes'][0].set_facecolor('#27ae60')
	bp['boxes'][1].set_facecolor('#e74c3c')
	for b in bp['boxes']:
	b.set_alpha(0.7)
	axes[1,1].set_title('(d) Boxplot (capped at $500)')
	axes[1,1].set_ylabel('Amount ($)')

	fig.suptitle('Transaction Amount Analysis by Class', fontsize=12, fontweight='bold', y=1.01)
	fig.tight_layout()
	save(fig, 'amount_analysis')

	# ──────────────────────────────────────────────
	# 3. TIME ANALYSIS
	# ──────────────────────────────────────────────
	print("[3] Time analysis")
	raw_df_t = raw_df.copy()
	raw_df_t['Hour'] = (raw_df_t['Time'] / 3600) % 24
	fig, axes = plt.subplots(1, 2, figsize=(10, 4))

	axes[0].hist(raw_df_t[raw_df_t['Class']==0]['Hour'], bins=48, color='#27ae60', alpha=0.55, label='Legitimate', density=True)
	axes[0].hist(raw_df_t[raw_df_t['Class']==1]['Hour'], bins=48, color='#e74c3c', alpha=0.55, label='Fraud', density=True)
	axes[0].set_title('(a) Transaction Density by Hour')
	axes[0].set_xlabel('Hour of Day')
	axes[0].set_ylabel('Density')
	axes[0].legend(loc='upper left', framealpha=0.9)

	hourly = raw_df_t.groupby(raw_df_t['Hour'].astype(int))['Class'].mean() * 100
	axes[1].bar(hourly.index, hourly.values, color='#e74c3c', alpha=0.75, edgecolor='black', lw=0.3)
	axes[1].set_title('(b) Fraud Rate by Hour')
	axes[1].set_xlabel('Hour of Day')
	axes[1].set_ylabel('Fraud Rate (%)')

	fig.suptitle('Temporal Patterns in Transaction Data', fontsize=12, fontweight='bold', y=1.02)
	fig.tight_layout()
	save(fig, 'time_analysis')

	# ──────────────────────────────────────────────
	# 4. CORRELATION HEATMAP (single chart, not dual)
	# ──────────────────────────────────────────────
	print("[4] Correlation heatmap")
	corrs = raw_df.corr()['Class'].drop('Class').sort_values()
	fig, ax = plt.subplots(figsize=(8, 7))
	colors_bar = ['#e74c3c' if v < 0 else '#27ae60' for v in corrs.values]
	ax.barh(corrs.index, corrs.values, color=colors_bar, edgecolor='none', height=0.7)
	ax.set_xlabel('Pearson Correlation with Fraud Class')
	ax.set_title('Feature Correlation with Fraud', fontsize=12, fontweight='bold')
	ax.axvline(x=0, color='black', lw=0.5)
	ax.tick_params(axis='y', labelsize=8)
	fig.tight_layout()
	save(fig, 'correlation_heatmap')

	# ──────────────────────────────────────────────
	# 5. FEATURE DISTRIBUTIONS (6 features, 2x3 grid)
	# ──────────────────────────────────────────────
	print("[5] Feature distributions")
	top6 = corrs.abs().sort_values(ascending=False).head(6).index.tolist()
	fig, axes = plt.subplots(2, 3, figsize=(12, 7))
	axes = axes.ravel()
	for i, feat in enumerate(top6):
	axes[i].hist(raw_df[raw_df['Class']==0][feat], bins=50, color='#27ae60', alpha=0.5, label='Legit', density=True)
	axes[i].hist(raw_df[raw_df['Class']==1][feat], bins=50, color='#e74c3c', alpha=0.5, label='Fraud', density=True)
	axes[i].set_title(feat, fontweight='bold')
	axes[i].legend(loc='upper right', fontsize=7, framealpha=0.9)
	axes[i].set_ylabel('Density')
	fig.suptitle('Distribution of Top 6 Discriminative Features by Class', fontsize=12, fontweight='bold', y=1.01)
	fig.tight_layout()
	save(fig, 'feature_distributions')

	# ──────────────────────────────────────────────
	# 6. ROC CURVES (top 5 models only for clarity)
	# ──────────────────────────────────────────────
	print("[6] ROC curves")
	top_models = ['XGBoost', 'Voting_Ensemble', 'XGBoost_Tuned', 'Random_Forest_Tuned', 'LightGBM_Tuned']
	cmap = plt.cm.Set1
	fig, ax = plt.subplots(figsize=(7, 6))
	for i, name in enumerate(top_models):
	if name not in models: continue
	proba = models[name].predict_proba(X_test)[:, 1]
	fpr, tpr, _ = roc_curve(y_test, proba)
	auc_val = roc_auc_score(y_test, proba)
	ax.plot(fpr, tpr, color=cmap(i), lw=2, label=f'{name.replace("_"," ")} ({auc_val:.4f})')
	ax.plot([0,1],[0,1],'k--', lw=0.8, label='Random Baseline')
	ax.set_xlabel('False Positive Rate')
	ax.set_ylabel('True Positive Rate')
	ax.set_title('ROC Curves — Top 5 Models', fontsize=12, fontweight='bold')
	ax.legend(loc='lower right', fontsize=8, framealpha=0.95)
	ax.set_xlim([-0.01, 1.01])
	ax.set_ylim([-0.01, 1.03])
	fig.tight_layout()
	save(fig, 'roc_curves')

	# ──────────────────────────────────────────────
	# 7. PR CURVES
	# ──────────────────────────────────────────────
	print("[7] PR curves")
	fig, ax = plt.subplots(figsize=(7, 6))
	for i, name in enumerate(top_models):
	if name not in models: continue
	proba = models[name].predict_proba(X_test)[:, 1]
	prec, rec, _ = precision_recall_curve(y_test, proba)
	ap = average_precision_score(y_test, proba)
	ax.plot(rec, prec, color=cmap(i), lw=2, label=f'{name.replace("_"," ")} ({ap:.4f})')
	baseline = y_test.mean()
	ax.axhline(y=baseline, color='k', ls='--', lw=0.8, label=f'Baseline ({baseline:.4f})')
	ax.set_xlabel('Recall')
	ax.set_ylabel('Precision')
	ax.set_title('Precision-Recall Curves — Top 5 Models', fontsize=12, fontweight='bold')
	ax.legend(loc='upper right', fontsize=8, framealpha=0.95)
	ax.set_xlim([-0.01, 1.01])
	ax.set_ylim([-0.01, 1.03])
	fig.tight_layout()
	save(fig, 'pr_curves')

	# ──────────────────────────────────────────────
	# 8. CONFUSION MATRICES (2x3 grid for top 6)
	# ──────────────────────────────────────────────
	print("[8] Confusion matrices")
	cm_models = ['XGBoost', 'Voting_Ensemble', 'Random_Forest_Tuned', 'LightGBM_Tuned', 'MLP', 'Logistic_Regression']
	fig, axes = plt.subplots(2, 3, figsize=(13, 8))
	axes = axes.ravel()
	for i, name in enumerate(cm_models):
	if name not in models: continue
	proba = models[name].predict_proba(X_test)[:, 1]
	preds = (proba >= 0.5).astype(int)
	cm = confusion_matrix(y_test, preds)
	sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[i],
	xticklabels=['Legit', 'Fraud'], yticklabels=['Legit', 'Fraud'],
	cbar=False, annot_kws={'size': 10})
	axes[i].set_title(name.replace('_', ' '), fontsize=10, fontweight='bold')
	axes[i].set_ylabel('Actual')
	axes[i].set_xlabel('Predicted')
	fig.suptitle('Confusion Matrices on Test Set (threshold = 0.5)', fontsize=12, fontweight='bold', y=1.01)
	fig.tight_layout()
	save(fig, 'confusion_matrices')

	# ──────────────────────────────────────────────
	# 9. THRESHOLD ANALYSIS
	# ──────────────────────────────────────────────
	print("[9] Threshold analysis")
	proba_xgb = models['XGBoost'].predict_proba(X_test)[:, 1]
	thresholds = np.arange(0.05, 0.96, 0.025)
	rows = []
	for t in thresholds:
	p = (proba_xgb >= t).astype(int)
	rows.append({'t': t, 'Prec': precision_score(y_test, p, zero_division=0),
	'Rec': recall_score(y_test, p, zero_division=0),
	'F1': f1_score(y_test, p, zero_division=0),
	'MCC': matthews_corrcoef(y_test, p)})
	dt = pd.DataFrame(rows)
	best_t = dt.loc[dt['F1'].idxmax(), 't']

	fig, axes = plt.subplots(1, 2, figsize=(10, 4.5))
	axes[0].plot(dt['t'], dt['Prec'], 'b-', lw=2, label='Precision')
	axes[0].plot(dt['t'], dt['Rec'], 'r-', lw=2, label='Recall')
	axes[0].plot(dt['t'], dt['F1'], 'g-', lw=2.5, label='F1 Score')
	axes[0].axvline(x=best_t, color='gray', ls='--', lw=1.2, label=f'Optimal ({best_t:.2f})')
	axes[0].set_xlabel('Decision Threshold')
	axes[0].set_ylabel('Score')
	axes[0].set_title('(a) Precision / Recall / F1', fontweight='bold')
	axes[0].legend(loc='center left', framealpha=0.95, fontsize=8)

	axes[1].plot(dt['t'], dt['MCC'], 'm-', lw=2, label='MCC')
	axes[1].axvline(x=best_t, color='gray', ls='--', lw=1.2)
	axes[1].set_xlabel('Decision Threshold')
	axes[1].set_ylabel('MCC')
	axes[1].set_title('(b) Matthews Correlation Coefficient', fontweight='bold')
	axes[1].legend(loc='upper right', framealpha=0.95, fontsize=8)

	fig.suptitle(f'Threshold Sensitivity Analysis — XGBoost (Optimal = {best_t:.2f})', fontsize=12, fontweight='bold', y=1.02)
	fig.tight_layout()
	save(fig, 'threshold_analysis')

	# ──────────────────────────────────────────────
	# 10. FEATURE IMPORTANCE (2x2)
	# ──────────────────────────────────────────────
	print("[10] Feature importance")
	fig, axes = plt.subplots(2, 2, figsize=(12, 10))
	tree_map = {'(a) Random Forest': 'Random_Forest_Tuned', '(b) XGBoost': 'XGBoost_Tuned', '(c) LightGBM': 'LightGBM_Tuned'}
	for idx, (title, key) in enumerate(tree_map.items()):
	r, c = idx // 2, idx % 2
	m = models[key]
	imp = m.feature_importances_
	top_idx = np.argsort(imp)[-12:]
	axes[r,c].barh(range(len(top_idx)), imp[top_idx], color='steelblue', edgecolor='none', height=0.7)
	axes[r,c].set_yticks(range(len(top_idx)))
	axes[r,c].set_yticklabels([feature_names[j] for j in top_idx], fontsize=8)
	axes[r,c].set_xlabel('Importance')
	axes[r,c].set_title(title, fontweight='bold')

	lr = models['Logistic_Regression']
	coefs = np.abs(lr.coef_[0])
	top_idx = np.argsort(coefs)[-12:]
	axes[1,1].barh(range(len(top_idx)), coefs[top_idx], color='coral', edgecolor='none', height=0.7)
	axes[1,1].set_yticks(range(len(top_idx)))
	axes[1,1].set_yticklabels([feature_names[j] for j in top_idx], fontsize=8)
	axes[1,1].set_xlabel('\|Coefficient\|')
	axes[1,1].set_title('(d) Logistic Regression', fontweight='bold')
	fig.suptitle('Feature Importance Across Models (Top 12)', fontsize=12, fontweight='bold', y=1.0)
	fig.tight_layout()
	save(fig, 'feature_importance')

	# ──────────────────────────────────────────────
	# 11. SHAP SUMMARY
	# ──────────────────────────────────────────────
	print("[11] SHAP summary")
	explainer = shap.TreeExplainer(models['XGBoost'])
	X_sample = X_test.iloc[:2000]
	shap_vals = explainer.shap_values(X_sample)
	if isinstance(shap_vals, list):
	shap_vals = shap_vals[1]

	plt.figure(figsize=(9, 7))
	shap.summary_plot(shap_vals, X_sample, feature_names=feature_names, show=False, max_display=15, plot_size=None)
	plt.title('SHAP Feature Impact on Fraud Prediction (XGBoost)', fontsize=11, fontweight='bold', pad=12)
	plt.tight_layout()
	plt.savefig(os.path.join(FIGURES_DIR, "shap_summary.png"), dpi=FIG_DPI, bbox_inches='tight', facecolor='white', pad_inches=0.15)
	plt.savefig(os.path.join(FIGURES_DIR, "shap_summary.pdf"), bbox_inches='tight', facecolor='white', pad_inches=0.15)
	plt.close('all')
	print(" Saved: shap_summary.png/pdf")

	# ──────────────────────────────────────────────
	# 12. SHAP TOP 10 BAR
	# ──────────────────────────────────────────────
	print("[12] SHAP top 10")
	mean_shap = np.abs(shap_vals).mean(axis=0)
	fi = pd.DataFrame({'Feature': feature_names, 'SHAP': mean_shap}).sort_values('SHAP', ascending=False)
	top10 = fi.head(10)
	fig, ax = plt.subplots(figsize=(7, 5))
	ax.barh(range(10), top10['SHAP'].values[::-1], color='steelblue', edgecolor='none', height=0.6)
	ax.set_yticks(range(10))
	ax.set_yticklabels(top10['Feature'].values[::-1])
	ax.set_xlabel('Mean \|SHAP Value\|')
	ax.set_title('Top 10 Features Driving Fraud Predictions', fontsize=11, fontweight='bold')
	fig.tight_layout()
	save(fig, 'shap_top10')

	# ──────────────────────────────────────────────
	# 13. LIME EXPLANATION
	# ──────────────────────────────────────────────
	print("[13] LIME explanation")
	from lime.lime_tabular import LimeTabularExplainer
	proba_all = models['XGBoost'].predict_proba(X_test)[:, 1]
	fraud_idx = np.where(y_test == 1)[0]
	sample_idx = None
	for idx in fraud_idx:
	if proba_all[idx] > 0.5:
	sample_idx = idx
	break
	if sample_idx is None:
	sample_idx = fraud_idx[0]

	X_np = X_test.values
	lime_exp = LimeTabularExplainer(X_np, feature_names=feature_names, class_names=['Legit', 'Fraud'], discretize_continuous=True, random_state=42)
	explanation = lime_exp.explain_instance(X_np[sample_idx], models['XGBoost'].predict_proba, num_features=12, top_labels=1)
	exp_list = explanation.as_list(label=1)
	feats = [f for f, w in exp_list]
	weights = [w for f, w in exp_list]
	cols = ['#e74c3c' if w > 0 else '#27ae60' for w in weights]

	fig, ax = plt.subplots(figsize=(9, 6))
	ax.barh(range(len(feats)), weights, color=cols, edgecolor='none', height=0.6)
	ax.set_yticks(range(len(feats)))
	ax.set_yticklabels(feats, fontsize=8)
	ax.set_xlabel('Feature Contribution')
	ax.set_title(f'LIME Explanation — Fraud Sample (P = {proba_all[sample_idx]:.4f})', fontsize=11, fontweight='bold')
	ax.axvline(x=0, color='black', lw=0.5)
	from matplotlib.patches import Patch
	ax.legend(handles=[Patch(fc='#e74c3c', label='Increases fraud risk'), Patch(fc='#27ae60', label='Decreases fraud risk')],
	loc='lower right', fontsize=8, framealpha=0.95)
	fig.tight_layout()
	save(fig, 'lime_explanation')

	# ──────────────────────────────────────────────
	# 14. ERROR ANALYSIS
	# ──────────────────────────────────────────────
	print("[14] Error analysis")
	proba_xgb = models['XGBoost'].predict_proba(X_test)[:, 1]
	preds = (proba_xgb >= 0.5).astype(int)
	fn_mask = (preds == 0) & (y_test.values == 1)
	fp_mask = (preds == 1) & (y_test.values == 0)
	fn_proba = proba_xgb[fn_mask]
	fp_proba = proba_xgb[fp_mask]

	fig, axes = plt.subplots(1, 3, figsize=(14, 4.5))
	if fn_mask.sum() > 0:
	axes[0].hist(fn_proba, bins=15, color='#e74c3c', alpha=0.75, edgecolor='black', lw=0.3)
	axes[0].axvline(x=0.5, color='black', ls='--', lw=1, label='Threshold')
	axes[0].set_title('(a) Missed Fraud (FN)', fontweight='bold')
	axes[0].set_xlabel('Predicted P(Fraud)')
	axes[0].set_ylabel('Count')
	axes[0].legend(fontsize=8)

	if fp_mask.sum() > 0:
	axes[1].hist(fp_proba, bins=15, color='#f39c12', alpha=0.75, edgecolor='black', lw=0.3)
	axes[1].axvline(x=0.5, color='black', ls='--', lw=1, label='Threshold')
	axes[1].set_title('(b) False Alarms (FP)', fontweight='bold')
	axes[1].set_xlabel('Predicted P(Fraud)')
	axes[1].set_ylabel('Count')
	axes[1].legend(fontsize=8)

	axes[2].hist(proba_xgb[y_test.values==0], bins=50, color='#27ae60', alpha=0.5, label='Legit', density=True)
	axes[2].hist(proba_xgb[y_test.values==1], bins=50, color='#e74c3c', alpha=0.5, label='Fraud', density=True)
	axes[2].axvline(x=0.5, color='black', ls='--', lw=1, label='Threshold')
	axes[2].set_title('(c) Score Distribution by Class', fontweight='bold')
	axes[2].set_xlabel('Predicted P(Fraud)')
	axes[2].set_ylabel('Density')
	axes[2].legend(fontsize=8, loc='upper center')

	fig.suptitle('Error Analysis — XGBoost Predictions', fontsize=12, fontweight='bold', y=1.02)
	fig.tight_layout()
	save(fig, 'error_analysis')

	# ──────────────────────────────────────────────
	# 15. ARCHITECTURE DIAGRAM (cleaner)
	# ──────────────────────────────────────────────
	print("[15] Architecture diagram")
	from matplotlib.patches import FancyBboxPatch
	fig, ax = plt.subplots(figsize=(14, 9), facecolor='white')
	ax.set_xlim(0, 14)
	ax.set_ylim(0, 10)
	ax.axis('off')

	c_in, c_proc, c_mod, c_out, c_stor = '#3498db', '#27ae60', '#c0392b', '#f39c12', '#8e44ad'

	def bx(x, y, w, h, txt, col, fs=9):
	r = FancyBboxPatch((x,y), w, h, boxstyle="round,pad=0.12", fc=col, ec='#2c3e50', lw=1.5, alpha=0.88)
	ax.add_patch(r)
	ax.text(x+w/2, y+h/2, txt, ha='center', va='center', fontsize=fs, fontweight='bold', color='white', multialignment='center')

	def ar(x1,y1,x2,y2):
	ax.annotate('', xy=(x2,y2), xytext=(x1,y1), arrowprops=dict(arrowstyle='->', color='#2c3e50', lw=1.8))

	ax.text(7, 9.4, 'Fraud Detection System — End-to-End Architecture', ha='center', fontsize=14, fontweight='bold', color='#2c3e50')

	# Row 1
	bx(0.3, 7.8, 2.8, 0.9, 'Transaction\nInput', c_in, 10)
	bx(3.8, 7.8, 2.8, 0.9, 'Feature\nEngineering\n(12 new features)', c_proc, 8)
	bx(7.3, 7.8, 2.8, 0.9, 'RobustScaler\n(train-only fit)', c_proc, 8)
	bx(10.8, 7.8, 2.8, 0.9, 'Drift\nMonitoring', c_stor, 9)
	ar(3.1, 8.25, 3.8, 8.25)
	ar(6.6, 8.25, 7.3, 8.25)
	ar(10.1, 8.25, 10.8, 8.25)

	# Row 2 — models
	model_names = ['LR', 'RF', 'XGBoost\n(BEST)', 'LightGBM', 'MLP', 'Auto-\nencoder']
	for i, mn in enumerate(model_names):
	bx(0.3 + i*2.3, 5.5, 2.0, 1.0, mn, c_mod, 8)
	ar(8.7, 7.8, 0.3+i*2.3+1.0, 6.5)

	# Row 3
	bx(2.5, 3.2, 3.5, 1.0, 'Optuna\nHyperparameter\nTuning (TPE)', c_stor, 9)
	bx(7.5, 3.2, 3.5, 1.0, 'Voting Ensemble\n(XGB + LGBM + RF)', c_out, 9)
	ar(6.0, 3.7, 7.5, 3.7)

	# Row 4
	bx(2.5, 1.0, 3.5, 1.0, 'FastAPI\nPOST /predict\n< 10 ms', c_in, 9)
	bx(7.5, 1.0, 3.5, 1.0, 'Explainability\nSHAP + LIME', c_proc, 9)
	ar(9.25, 3.2, 9.25, 2.0)
	ar(4.25, 3.2, 4.25, 2.0)

	fig.tight_layout()
	save(fig, 'architecture_diagram')

	print("\n" + "=" * 60)
	print("ALL 15 FIGURES REGENERATED SUCCESSFULLY")
	print("=" * 60)