diff --git a/figures/amount_analysis.pdf b/figures/amount_analysis.pdf
index 76263cac9195a8ab6f3b236da279571563c474d3..c4257af3d68a6fcbbad062c0667b9e940062a596 100644
--- a/figures/amount_analysis.pdf
+++ b/figures/amount_analysis.pdf
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5ad2dab89d5e0faaecc71ef8ca9e580da0676b896afc6ce08c2638bdc238b8e3
-size 208946
+oid sha256:0b26a10e4d1972c4e9ffe27982f43efaec36bdbf4458ac75b9fe7491a9cbfa09
+size 198161
diff --git a/figures/amount_analysis.png b/figures/amount_analysis.png
index 45cf1505f57d338f5caf04cd9d583340c30dd4fa..50c6be327dd6dad1312a1bfeddedc0c47f374ef5 100644
--- a/figures/amount_analysis.png
+++ b/figures/amount_analysis.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:37a0ac70dea40399691041310838d77f1fc607f505cea6dc1c96702885f1d4d5
-size 265303
+oid sha256:e77c1b40d7658b429d080f455fc6425e5ab37feaea68d348197a8472ea45832c
+size 251175
diff --git a/figures/architecture_diagram.pdf b/figures/architecture_diagram.pdf
index f86aad29ad606fdb82e69243629651dbe4a8ae6b..57c308dc12c78eea7143221e556768d3f7379894 100644
Binary files a/figures/architecture_diagram.pdf and b/figures/architecture_diagram.pdf differ
diff --git a/figures/architecture_diagram.png b/figures/architecture_diagram.png
index 3c2dbda313a58616ae0e30010dbd55e72415c14d..d9666d285d7b560f394c607e88a71cc46771acf3 100644
--- a/figures/architecture_diagram.png
+++ b/figures/architecture_diagram.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aa5597deab61809b4b7b943e4417a00049583dda2b19589cf7cd2e70555c7087
-size 379316
+oid sha256:abfab5d5efe0f72a6fed9d2c7ca4bd8fbfbef934869eceb83288262fdfc84ea6
+size 306455
diff --git a/figures/class_distribution.pdf b/figures/class_distribution.pdf
index 791a8691d2eb4941b81bfbdbf92b8194c1ea983a..59a63dc12ebe0390a6360fc196c850d5c5b08d8f 100644
Binary files a/figures/class_distribution.pdf and b/figures/class_distribution.pdf differ
diff --git a/figures/class_distribution.png b/figures/class_distribution.png
index 1bb5e14cc987bac47c462d841bb2bb429743bf5a..f5cf6c23578dd8037547110c8464b71b1cbb3cb6 100644
--- a/figures/class_distribution.png
+++ b/figures/class_distribution.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2b05b401fe51303408ef8f0dd41030bf01efdd5b30872e8dcc27039731ae6b35
-size 177820
+oid sha256:1975d0d3024d447ce2b05342e7a5ca923c0a1b1e68d841a192a4be51668c9cd0
+size 123095
diff --git a/figures/confusion_matrices.pdf b/figures/confusion_matrices.pdf
index ad8d47cfa9c0d2e0ab3f8a5d18940c5c19b86658..65a3a0cfc7022c8b563572e5748ff9cf3a4d0818 100644
Binary files a/figures/confusion_matrices.pdf and b/figures/confusion_matrices.pdf differ
diff --git a/figures/confusion_matrices.png b/figures/confusion_matrices.png
index 43fcd760d53f379c38c06d25b720fa85be54119a..3a67ab21c9e9232de531c178edd110005a96adaf 100644
--- a/figures/confusion_matrices.png
+++ b/figures/confusion_matrices.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9b71f608d5cd9b007104b719b8d3e6a1b13b03893ffefebf8b9293184eb80d7f
-size 470480
+oid sha256:18f7d77c1a764bef96bdbc4df2650ff05cbdc7e9c41b85adbc10ef8dbbc9bf0a
+size 182144
diff --git a/figures/correlation_heatmap.pdf b/figures/correlation_heatmap.pdf
index 52bd8c005c30897ed83b4f1f77c60bc10883f74c..e41503e73a1453594ede0bcf789978f919ac8e34 100644
Binary files a/figures/correlation_heatmap.pdf and b/figures/correlation_heatmap.pdf differ
diff --git a/figures/correlation_heatmap.png b/figures/correlation_heatmap.png
index 460a9164b2240a96cf72f44e655bd02481c959d5..430006bcdac77fc18b30ce760c70b9703e2cab02 100644
--- a/figures/correlation_heatmap.png
+++ b/figures/correlation_heatmap.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:182dd8c4e14a6b45a7307b21ebb51f92c9f4c047944b7360b49c849f626b7ec3
-size 462551
+oid sha256:ce62cc0b48be6d27587b3c35c399f265d588a717d599d0ec1a4fdd8242da2702
+size 116957
diff --git a/figures/error_analysis.pdf b/figures/error_analysis.pdf
index 0438ca59cc761621dd8e7d2e582f11934208aa41..768c08a7f498912f38255922231677dc69b7aeb3 100644
Binary files a/figures/error_analysis.pdf and b/figures/error_analysis.pdf differ
diff --git a/figures/error_analysis.png b/figures/error_analysis.png
index 6f9b081208eeb76452d9532524e6a65c94a08b59..c70c45cb600d59b0d758e9606767ef07ce7cc82e 100644
--- a/figures/error_analysis.png
+++ b/figures/error_analysis.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6382ccb9b3b51a9d3d88272e00224a6dfff0b5f456f1245e95c87ebfcefc1995
-size 160363
+oid sha256:6b15721b467f8af07d6637e749661aa88bcae693613c50863ece8bfca8748a7c
+size 153837
diff --git a/figures/feature_distributions.pdf b/figures/feature_distributions.pdf
index 6914424ae4bad7ff3d13c76661b03837067abbfe..12b6af58c9221c70c921cf44ff84b56bb44c039a 100644
Binary files a/figures/feature_distributions.pdf and b/figures/feature_distributions.pdf differ
diff --git a/figures/feature_distributions.png b/figures/feature_distributions.png
index 58a8ab959dea5a6e814e3e017c754cbdb486e317..c926955eb22a3929279790417cb6b41a901c4813 100644
--- a/figures/feature_distributions.png
+++ b/figures/feature_distributions.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fcfe06b7cf2e44eee0146ec50b88d6dd4305e5d946b9158dc6033b72933fbfd5
-size 384124
+oid sha256:5ebc837814d3edaaefc518b994fc495c66381b88981b4d1453faad7756bc1b7e
+size 203835
diff --git a/figures/feature_importance.pdf b/figures/feature_importance.pdf
index 74c4e72078d1a09b8a0c3f73ca1226ca80d3c0df..05eefaa590366bd4f6270ba5870dd3b6e75a326e 100644
Binary files a/figures/feature_importance.pdf and b/figures/feature_importance.pdf differ
diff --git a/figures/feature_importance.png b/figures/feature_importance.png
index 85bba7c1dbb191df53857711db17eec52bc1cb9c..58794808d36f7f39a44d52ad4940686110c13fd0 100644
--- a/figures/feature_importance.png
+++ b/figures/feature_importance.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:04afdc343650ab71d86e33515dc9669297f904102cd73c8229da9fd4aabc5073
-size 347471
+oid sha256:f1652dcd0126c888034b6960a5ae3b253f31f818166b4d6d2dc874896c20eb4e
+size 252256
diff --git a/figures/lime_explanation.pdf b/figures/lime_explanation.pdf
index 2bcc3309c527cb2b1ef069107e09aa524302dd50..eac3397ed426b0172ce17962706ca501bf661a7c 100644
Binary files a/figures/lime_explanation.pdf and b/figures/lime_explanation.pdf differ
diff --git a/figures/lime_explanation.png b/figures/lime_explanation.png
index 96ad2e6e38126ca9bb159a0214b9eb38d89f4808..1ccac5afbfd3e397acc79ad579484989c0c20173 100644
--- a/figures/lime_explanation.png
+++ b/figures/lime_explanation.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:50c6ec68d66e7977aaf65ce90a12413cf7fa1fb97f8e7e77b397e52f3cd20006
-size 203416
+oid sha256:099c78c2fdf92ec5cdca4ab6a823b3c83c5a97564c94ecfe451e220e0ab51440
+size 138350
diff --git a/figures/pr_curves.pdf b/figures/pr_curves.pdf
index 030fd50ede46f3af7ba27671395f77770ca8b119..ff166e0b7a92bd27284802f84ee2c6b815c9f8ed 100644
Binary files a/figures/pr_curves.pdf and b/figures/pr_curves.pdf differ
diff --git a/figures/pr_curves.png b/figures/pr_curves.png
index d6abd5e16b3e20eab4d2afe0af992f235f414866..ca1de83f76b800d72ec5b21e399afc03fa05d199 100644
--- a/figures/pr_curves.png
+++ b/figures/pr_curves.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f719e308aeaa34d5db28b329741134927dc1af1ad82843d2018551a4a9bb1c5
-size 425799
+oid sha256:87e1296d2793fd621666190ae4591f621a355302d23c8a847037e2011d9d50bd
+size 160071
diff --git a/figures/roc_curves.pdf b/figures/roc_curves.pdf
index 07163a4c192206b11962a44d7c658aaeefabddd0..afb429a930e519b84dcaf4a8149a6133c0b3a563 100644
Binary files a/figures/roc_curves.pdf and b/figures/roc_curves.pdf differ
diff --git a/figures/roc_curves.png b/figures/roc_curves.png
index b10cb7f0ffa241648f6027b295f4fca311b44bad..69f310caa69377594ac38e596f4e8506ae7bdc1f 100644
--- a/figures/roc_curves.png
+++ b/figures/roc_curves.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c28cefa7dc92a24b9f6e404cad75071586bd7a8d5b87a2b2ca610a28ae4f55e1
-size 350371
+oid sha256:d6bbeaee334bc0e4b1d367dec1d16cf475af45bfdf8d7b4d73190e49138ef4c6
+size 177411
diff --git a/figures/shap_summary.pdf b/figures/shap_summary.pdf
index a869c89300fe3879c61505c8ca2bec298b11e100..033d5e4009171bc9aeabe75d4f366a235f6c2537 100644
--- a/figures/shap_summary.pdf
+++ b/figures/shap_summary.pdf
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:738db2436aea8790532091e2b8e293a661cfbeb693baf43109296535583fe8e4
-size 109289
+oid sha256:e8a7b6bb9c0877d663b473567d1db2b0c593a55685daf7b9e44ee43d8b42a86f
+size 334449
diff --git a/figures/shap_summary.png b/figures/shap_summary.png
index 36fbcc6d12adb5f54ccb9df978be735afbd922aa..40fa11f2f9d8aa28ad7fb172112985450f900bcc 100644
--- a/figures/shap_summary.png
+++ b/figures/shap_summary.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0ed12074d2049ed48348184340751c01392dd1033c3f16d676a5e168e98a3f6c
-size 578169
+oid sha256:36de1b1347498bf08fca58120e269dca656e4a4c1e5f18da7b9bc56fb5283768
+size 497573
diff --git a/figures/shap_top10.pdf b/figures/shap_top10.pdf
index 6909fca2ec93f1218fbad68a6df1f72e4ba85774..95745622658eda11f233bb98d9e6d7add1415616 100644
Binary files a/figures/shap_top10.pdf and b/figures/shap_top10.pdf differ
diff --git a/figures/shap_top10.png b/figures/shap_top10.png
index 63ab913d57acffdb9ab9876be906aaf63c2c9d63..d79233533873bb81f7490cbd27df7dd1c08922d1 100644
--- a/figures/shap_top10.png
+++ b/figures/shap_top10.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1751f83c1f01f8ca599a89129e2d3c6a061923860155cace7ea1073a3dbc753b
-size 108172
+oid sha256:3c7c56263b2d1596eaa02a90998b0243e66e80406d3420cc059153159af9a700
+size 79031
diff --git a/figures/threshold_analysis.pdf b/figures/threshold_analysis.pdf
index 79ebe3f934d59e10275e0450af863060bcedb7b7..4ef4a78a4766b9d83afd56ac1d902bd2b158ed8d 100644
Binary files a/figures/threshold_analysis.pdf and b/figures/threshold_analysis.pdf differ
diff --git a/figures/threshold_analysis.png b/figures/threshold_analysis.png
index 7d2be07b59e5f0dc5506605f1d7846d93e48da18..c3562e924e1dfd22f9935838a2a82bebb54615c6 100644
--- a/figures/threshold_analysis.png
+++ b/figures/threshold_analysis.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d5c43d80551d97d2166bad1a9fb72e4e1cce49d6841460b1915ea380ad57ffa3
-size 227260
+oid sha256:dcfa1ab061d4666c71e0574e61c7741646eb1e89bc47ae6ea14e4979944ca1cf
+size 198727
diff --git a/figures/time_analysis.pdf b/figures/time_analysis.pdf
index 985538584dd532f9931ca9d52da7716277ab7ce9..572d6bdf6868d5d9974644f8061e44c695ca3e40 100644
Binary files a/figures/time_analysis.pdf and b/figures/time_analysis.pdf differ
diff --git a/figures/time_analysis.png b/figures/time_analysis.png
index 13db7ea3f025d27b6b91bbfe7e38d25d0f589161..4e01e30a77e29d8c9f6b1898608a0701b6f1b9c3 100644
--- a/figures/time_analysis.png
+++ b/figures/time_analysis.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f959c84093b177e118aa0ab0324973eae072085b76992b98f60af247b01c4834
-size 130431
+oid sha256:573c38eb1262d835c4ee77e11b3abe1f3a72aae7773cab77739f355dc7a12d4b
+size 136453
diff --git a/generate_pdf.py b/generate_pdf.py
index df88f4bce89ff13182e595db48a664e0133c7258..35596a0326c647444218efcb8b516305ee19016e 100644
--- a/generate_pdf.py
+++ b/generate_pdf.py
@@ -1,353 +1,709 @@
-"""Generate IEEE-style PDF paper using fpdf2."""
+"""
+Generate a comprehensive IEEE-style PDF paper using fpdf2.
+Full descriptions, observations, analysis in every section.
+"""
 import os, sys
 sys.path.insert(0, '/app/fraud_detection')
 from fpdf import FPDF
 
 FIGURES_DIR = '/app/fraud_detection/figures'
 PAPER_DIR = '/app/fraud_detection/paper'
+os.makedirs(PAPER_DIR, exist_ok=True)
+
+LM = 15   # left margin
+RM = 15   # right margin
+BW = 215.9 - LM - RM  # body width (Letter)
 
 
 class IEEEPaper(FPDF):
     def __init__(self):
         super().__init__('P', 'mm', 'letter')
-        self.set_auto_page_break(auto=True, margin=20)
-    
+        self.set_margins(LM, 18, RM)
+        self.set_auto_page_break(auto=True, margin=22)
+
     def header(self):
         if self.page_no() > 1:
-            self.set_font('Helvetica', 'I', 8)
-            self.cell(0, 5, 'IEEE Transactions on Financial Technology', align='C')
-            self.ln(8)
-    
+            self.set_font('Helvetica', 'I', 7)
+            self.cell(0, 4, 'IEEE Transactions -- Comprehensive Fraud Detection Framework', align='C')
+            self.ln(6)
+
     def footer(self):
-        self.set_y(-15)
-        self.set_font('Helvetica', 'I', 8)
-        self.cell(0, 10, f'Page {self.page_no()}', align='C')
-    
-    def section_title(self, num, title):
-        self.ln(4)
+        self.set_y(-14)
+        self.set_font('Helvetica', 'I', 7)
+        self.cell(0, 10, f'{self.page_no()}', align='C')
+
+    def section(self, num, title):
+        self.ln(5)
         self.set_font('Helvetica', 'B', 11)
-        self.cell(0, 6, f'{num}. {title.upper()}', ln=True)
-        self.ln(2)
-    
-    def subsection_title(self, label, title):
+        self.cell(0, 6, f'{num}.  {title.upper()}', ln=True)
         self.ln(2)
+
+    def subsec(self, label, title):
+        self.ln(3)
         self.set_font('Helvetica', 'B', 10)
         self.cell(0, 5, f'{label} {title}', ln=True)
         self.ln(1)
-    
-    def body_text(self, text):
-        self.set_font('Times', '', 10)
-        self.multi_cell(0, 4.5, text)
+
+    def p(self, text):
+        """Body paragraph."""
+        self.set_font('Times', '', 9.5)
+        self.multi_cell(0, 4.2, text)
+        self.ln(1.5)
+
+    def p_indent(self, text):
+        """Indented body paragraph."""
+        self.set_font('Times', '', 9.5)
+        self.set_x(LM + 5)
+        self.multi_cell(BW - 5, 4.2, text)
+        self.ln(1.5)
+
+    def bullet(self, items):
+        self.set_font('Times', '', 9.5)
+        for item in items:
+            self.set_x(LM + 4)
+            self.cell(4, 4.2, '-')
+            self.multi_cell(BW - 8, 4.2, item)
+            self.ln(0.5)
         self.ln(1)
-    
-    def add_figure(self, img_path, caption, width=170):
-        if os.path.exists(img_path):
-            self.ln(3)
-            x = (self.w - width) / 2
-            self.image(img_path, x=x, w=width)
-            self.ln(2)
-            self.set_font('Helvetica', 'I', 8)
-            self.multi_cell(0, 4, caption, align='C')
-            self.ln(3)
-    
-    def add_table(self, headers, rows, caption=""):
+
+    def fig(self, path, caption, w=155):
+        if not os.path.exists(path):
+            return
+        self.ln(3)
+        x = (self.w - w) / 2
+        self.image(path, x=x, w=w)
+        self.ln(2)
+        self.set_font('Helvetica', 'I', 8)
+        self.multi_cell(0, 3.8, caption, align='C')
+        self.ln(3)
+
+    def tbl(self, hdrs, rows, caption=''):
         if caption:
-            self.set_font('Helvetica', 'I', 8)
-            self.multi_cell(0, 4, caption, align='C')
             self.ln(2)
-        
-        col_width = (self.w - 20) / len(headers)
-        
-        # Header
-        self.set_font('Helvetica', 'B', 8)
-        for h in headers:
-            self.cell(col_width, 5, h, border=1, align='C')
+            self.set_font('Helvetica', 'I', 8)
+            self.multi_cell(0, 3.8, caption, align='C')
+            self.ln(1)
+        cw = BW / len(hdrs)
+        self.set_font('Helvetica', 'B', 7.5)
+        for h in hdrs:
+            self.cell(cw, 4.5, h, border=1, align='C')
         self.ln()
-        
-        # Rows
-        self.set_font('Times', '', 8)
+        self.set_font('Times', '', 7.5)
         for row in rows:
-            for cell in row:
-                self.cell(col_width, 5, str(cell), border=1, align='C')
+            for c in row:
+                self.cell(cw, 4.5, str(c), border=1, align='C')
             self.ln()
-        self.ln(3)
+        self.ln(2)
 
 
-def create_paper():
+def build():
     pdf = IEEEPaper()
-    
-    # Title page
+
+    # ===== TITLE PAGE =====
     pdf.add_page()
-    pdf.ln(15)
-    pdf.set_font('Helvetica', 'B', 16)
-    pdf.multi_cell(0, 8, 'A Comprehensive Ensemble-Based Framework\nfor Credit Card Fraud Detection\nwith Explainable AI', align='C')
-    pdf.ln(5)
+    pdf.ln(18)
+    pdf.set_font('Helvetica', 'B', 17)
+    pdf.multi_cell(0, 9, 'A Comprehensive Ensemble-Based Framework\nfor Credit Card Fraud Detection\nwith Explainable AI', align='C')
+    pdf.ln(6)
     pdf.set_font('Helvetica', '', 11)
     pdf.cell(0, 6, 'Raj Vivan', align='C', ln=True)
-    pdf.set_font('Helvetica', 'I', 10)
-    pdf.cell(0, 5, 'Department of Computer Science, Independent Research', align='C', ln=True)
-    pdf.ln(8)
-    
-    # Abstract
+    pdf.set_font('Helvetica', 'I', 9)
+    pdf.cell(0, 5, 'Department of Computer Science  |  Independent Research', align='C', ln=True)
+    pdf.ln(10)
+
+    # --- ABSTRACT ---
     pdf.set_font('Helvetica', 'B', 10)
     pdf.cell(0, 5, 'Abstract', align='C', ln=True)
     pdf.ln(2)
-    pdf.body_text(
-        'Credit card fraud poses a significant threat to the global financial ecosystem, with estimated losses exceeding $32 billion annually. '
-        'This paper presents a comprehensive end-to-end fraud detection framework that systematically evaluates and compares seven machine learning approaches: '
-        'Logistic Regression, Random Forest, XGBoost, LightGBM, Multilayer Perceptron, Autoencoder-based anomaly detection, and a Voting Ensemble. '
-        'Using the benchmark European Cardholder dataset (284,807 transactions, 0.173% fraud rate), we engineer 12 novel features and address the extreme '
-        'class imbalance through both SMOTE oversampling and cost-sensitive learning with class weights. Our XGBoost model achieves the best performance '
-        'with a PR-AUC of 0.8166, precision of 0.9048, recall of 0.8028, and F1-score of 0.8507 on the held-out test set. We demonstrate that optimizing '
-        'the decision threshold from the default 0.5 to 0.55 improves F1 from 0.8507 to 0.8636. Comprehensive model explainability via SHAP and LIME '
-        'analysis reveals that PCA components V4, V14, and V12 are the primary discriminative features. Error analysis shows that false negatives arise '
-        'from sophisticated fraud patterns that closely mimic legitimate transaction behavior. We deploy the model as a production-ready FastAPI service '
-        'achieving sub-10ms inference latency.'
-    )
-    
-    pdf.set_font('Helvetica', 'I', 9)
-    pdf.cell(0, 5, 'Keywords: Fraud detection, credit card, machine learning, XGBoost, ensemble learning, explainable AI, SHAP', ln=True)
-    
-    # I. Introduction
-    pdf.section_title('I', 'Introduction')
-    pdf.body_text(
-        'Financial fraud detection has become one of the most critical applications of machine learning in the modern digital economy. '
-        'The proliferation of electronic payment systems has led to an exponential increase in both the volume of transactions and the '
-        'sophistication of fraudulent activities. According to the Nilson Report, global card fraud losses reached $32.34 billion in 2021 '
-        'and are projected to exceed $43 billion by 2026.'
-    )
-    pdf.body_text(
-        'The fundamental challenge in fraud detection lies in the extreme class imbalance inherent in transaction data. In typical datasets, '
-        'fraudulent transactions constitute less than 0.5% of all transactions. This imbalance renders conventional classification metrics '
-        'such as accuracy misleading and necessitates specialized evaluation criteria including Precision-Recall AUC and Matthews Correlation Coefficient.'
-    )
-    pdf.body_text(
-        'This paper makes the following contributions: (1) A systematic comparison of seven ML approaches for fraud detection; '
-        '(2) Novel feature engineering with 12 engineered features; (3) Rigorous evaluation with SMOTE applied only after splitting; '
-        '(4) Comprehensive explainability via SHAP and LIME; (5) Production-ready API with sub-10ms latency; '
-        '(6) Quantitative business impact analysis.'
-    )
-    
-    # II. Related Work
-    pdf.section_title('II', 'Related Work')
-    pdf.body_text(
-        'Dal Pozzolo et al. [1] provided foundational analysis of class imbalance and concept drift in fraud detection. '
-        'Chawla et al. [2] introduced SMOTE for synthetic minority oversampling. Fernandez et al. [3] demonstrated that SMOTE '
-        'must be applied exclusively to training data to avoid data leakage. Chen and Guestrin [4] introduced XGBoost, which has '
-        'become dominant for tabular classification. Ke et al. [5] proposed LightGBM with leaf-wise tree growth. '
-        'Pumsirirat and Yan [6] employed autoencoders for anomaly-based fraud detection. Lundberg and Lee [7] introduced SHAP '
-        'for feature attribution. Ribeiro et al. [8] proposed LIME for instance-level interpretability. '
-        'Shwartz-Ziv and Armon [9] demonstrated that tree-based methods still outperform deep learning on tabular data. '
-        'Grinsztajn et al. [10] corroborated this with extensive benchmarking. Akiba et al. [11] introduced Optuna for '
-        'hyperparameter optimization. Bolton and Hand [12] surveyed statistical fraud detection methods. '
-        'Zhang et al. [13] proposed attention-based RNNs for sequential fraud patterns. '
-        'Taha and Malebary [14] demonstrated optimized LightGBM for fraud detection. '
-        'Belle and Papantonis [15] surveyed explainable AI methods for financial decision-making.'
-    )
-    
-    # III. Dataset and EDA
-    pdf.section_title('III', 'Dataset and Exploratory Data Analysis')
-    pdf.body_text(
-        'We use the European Cardholder Credit Card Fraud Detection dataset containing 284,807 transactions made over two days in '
-        'September 2013. The dataset includes 28 PCA-transformed features (V1-V28), Time and Amount features, and a binary Class label. '
-        'The dataset exhibits extreme class imbalance with only 492 fraudulent transactions (0.173%), yielding an imbalance ratio of 1:577.'
-    )
-    
-    pdf.add_figure(os.path.join(FIGURES_DIR, 'class_distribution.png'),
-                   'Fig. 1: Class distribution showing extreme imbalance (0.173% fraud rate).', width=160)
-    
-    pdf.body_text(
-        'Key observations: (1) Fraudulent transactions have a mean of $122.21 vs legitimate mean of $88.29; '
-        '(2) Night-time fraud rate is 0.518% vs 0.137% daytime; (3) V17, V14, V12 show strongest negative correlation with fraud; '
-        '(4) No missing values; 1,081 duplicates removed; (5) Only Time and Amount need normalization.'
-    )
-    
-    pdf.add_figure(os.path.join(FIGURES_DIR, 'correlation_heatmap.png'),
-                   'Fig. 2: Feature correlation with fraud class and correlation heatmap.', width=170)
-    
-    # IV. Methodology
-    pdf.section_title('IV', 'Methodology')
-    
-    pdf.subsection_title('A.', 'Feature Engineering')
-    pdf.body_text(
-        'We engineer 12 additional features: cyclic hour encoding (Hour_sin, Hour_cos), time difference between transactions, '
-        'log-transformed amount, amount deviation from mean/median, transaction velocity, amount z-score, '
-        'interaction features (V14*V17, V12*V14, V10*V14), and PCA magnitude (L2 norm of all V features).'
-    )
-    
-    pdf.subsection_title('B.', 'Class Imbalance Handling')
-    pdf.body_text(
-        'We compare SMOTE (applied to training set only, 1:2 ratio) and cost-sensitive learning with balanced class weights '
-        '(w0=0.501, w1=300.01). SMOTE is used for the MLP; class weights for tree-based models.'
-    )
-    
-    pdf.subsection_title('C.', 'Data Splitting and Scaling')
-    pdf.body_text(
-        'Stratified 70/15/15 train/validation/test split preserves fraud ratio. RobustScaler fitted exclusively on training data '
-        'to prevent data leakage.'
-    )
-    
-    pdf.subsection_title('D.', 'Models')
-    pdf.body_text(
-        'We evaluate: (1) Logistic Regression (baseline, L2, C=0.1); (2) Random Forest (150 trees, depth 12); '
-        '(3) XGBoost (200 estimators, depth 6, lr=0.1); (4) LightGBM (200 estimators, depth 8); '
-        '(5) MLP (128-64-32, ReLU, adaptive lr); (6) Autoencoder (42-64-32-16-32-64-42, trained on legitimate only); '
-        '(7) Voting Ensemble (soft voting over top 3 tuned models).'
-    )
-    
-    pdf.add_figure(os.path.join(FIGURES_DIR, 'architecture_diagram.png'),
-                   'Fig. 3: System architecture diagram.', width=170)
-    
-    # V. Experimental Setup
-    pdf.section_title('V', 'Experimental Setup')
-    pdf.body_text(
-        'All experiments used Python 3.12, scikit-learn 1.8.0, XGBoost 3.2.0, LightGBM 4.6.0, PyTorch 2.11.0, and Optuna 4.8.0. '
-        'Metrics: Precision, Recall, F1, ROC-AUC, PR-AUC (primary), and MCC. '
-        'Hyperparameter tuning via Optuna with TPE sampler (15-20 trials per model).'
-    )
-    
-    # VI. Results
-    pdf.section_title('VI', 'Results and Discussion')
-    
-    pdf.add_table(
-        ['Model', 'Precision', 'Recall', 'F1', 'ROC-AUC', 'PR-AUC', 'MCC'],
+    pdf.p(
+        'Credit card fraud poses a significant and growing threat to the global financial ecosystem, with estimated annual losses exceeding $32 billion. '
+        'As electronic payment volumes increase, so does the sophistication of fraudulent activities, demanding increasingly advanced detection systems. '
+        'This paper presents a comprehensive, end-to-end fraud detection framework that systematically develops, evaluates, and compares seven distinct machine learning approaches: '
+        'Logistic Regression (as an interpretable baseline), Random Forest, XGBoost, LightGBM, a Multilayer Perceptron neural network, an Autoencoder-based anomaly detector, '
+        'and a soft Voting Ensemble combining the three best-performing tuned models. Using the widely-cited European Cardholder benchmark dataset containing 284,807 transactions '
+        'with an extreme class imbalance of 0.173% fraud, we engineer 12 novel features capturing temporal, behavioral, and interaction patterns. We rigorously address class imbalance '
+        'through both SMOTE oversampling (applied exclusively after train-test splitting) and cost-sensitive learning via balanced class weights. '
+        'Our results demonstrate that XGBoost with cost-sensitive learning achieves the best overall performance with a Precision-Recall AUC of 0.8166, precision of 0.9048, '
+        'recall of 0.8028, F1-score of 0.8507, and Matthews Correlation Coefficient of 0.8520 on the held-out test set. Optimizing the decision threshold from the default 0.5 '
+        'to 0.55 further improves the F1-score to 0.8636. Comprehensive model explainability through SHAP and LIME analysis identifies PCA components V4, V14, and V12 as the '
+        'primary discriminative features driving fraud predictions. Detailed error analysis reveals that the 14 false negatives arise from sophisticated fraud attempts whose '
+        'feature distributions closely mimic legitimate transaction behavior. We deploy the final model as a production-ready FastAPI service achieving sub-10ms average inference latency, '
+        'and provide concept drift monitoring recommendations for sustained operational performance. All code, trained models, figures, and this paper are publicly available.'
+    )
+    pdf.set_font('Helvetica', 'I', 8)
+    pdf.cell(0, 5, 'Keywords: fraud detection, credit card, XGBoost, ensemble learning, SHAP, LIME, class imbalance, SMOTE, anomaly detection, explainable AI', ln=True)
+
+    # ===== I. INTRODUCTION =====
+    pdf.section('I', 'Introduction')
+    pdf.p(
+        'Financial fraud detection has emerged as one of the most consequential applications of machine learning in the modern digital economy. '
+        'The global shift toward electronic payment systems has created an unprecedented volume of financial transactions, with Visa alone processing '
+        'over 200 billion transactions annually. This massive scale, while enabling economic growth, simultaneously creates fertile ground for '
+        'increasingly sophisticated fraudulent activities. According to the Nilson Report [21], worldwide payment card fraud losses reached $32.34 billion '
+        'in 2021, representing a 14% year-over-year increase. Projections indicate these losses will exceed $43 billion by 2026 unless detection systems '
+        'improve significantly.'
+    )
+    pdf.p(
+        'The fundamental challenge in fraud detection lies in the extreme class imbalance inherent in transaction data. In real-world datasets, '
+        'fraudulent transactions typically constitute less than 0.5% of all transactions, often as low as 0.1%. This severe imbalance creates a '
+        'paradox where a naive classifier that labels every transaction as legitimate achieves over 99.8% accuracy while catching zero fraud. '
+        'This renders conventional accuracy metrics entirely misleading and necessitates the use of specialized evaluation criteria including '
+        'Precision-Recall Area Under the Curve (PR-AUC), the F1-score, and the Matthews Correlation Coefficient (MCC), which remain informative '
+        'even under extreme class skew [18].'
+    )
+    pdf.p(
+        'A second major challenge is the evolving nature of fraud patterns. Fraudsters continuously adapt their techniques to evade detection, '
+        'a phenomenon known as concept drift [1]. A model trained on historical data may see its performance degrade rapidly as new attack vectors '
+        'emerge. This necessitates continuous monitoring, periodic retraining, and robust model architectures that generalize well to unseen patterns.'
+    )
+    pdf.p(
+        'Previous approaches to fraud detection span a wide spectrum, from early rule-based expert systems [12] that relied on manually defined '
+        'thresholds and patterns, to modern deep learning architectures employing recurrent networks [13] and graph neural networks [19]. '
+        'However, recent extensive benchmarking by Shwartz-Ziv and Armon [9] and Grinsztajn et al. [10] has demonstrated that well-tuned '
+        'gradient-boosted tree methods consistently match or outperform deep learning on tabular data tasks, including fraud detection, '
+        'particularly when combined with thoughtful feature engineering and proper handling of class imbalance.'
+    )
+    pdf.p('This paper makes the following six contributions:')
+    pdf.bullet([
+        'A systematic, head-to-head comparison of seven diverse machine learning approaches for fraud detection, spanning linear models, tree ensembles, neural networks, and anomaly detection.',
+        'Novel feature engineering that produces 12 new features capturing temporal cycles, transaction velocity, amount deviations, and PCA component interactions.',
+        'A rigorous evaluation methodology that applies SMOTE only after stratified train-test splitting and fits feature scaling exclusively on training data, preventing all forms of data leakage.',
+        'Comprehensive model explainability analysis using both SHAP (global feature attribution) and LIME (local, instance-level interpretation) to provide actionable insights into fraud prediction drivers.',
+        'A production-ready FastAPI deployment achieving sub-10ms average inference latency with real-time risk scoring, demonstrating deployment feasibility.',
+        'Quantitative business impact analysis translating model performance into dollar-denominated financial outcomes, directly connecting ML metrics to business value.',
+    ])
+
+    # ===== II. RELATED WORK =====
+    pdf.section('II', 'Related Work')
+    pdf.p(
+        'The literature on fraud detection is extensive and spans several decades. Bolton and Hand [12] provided one of the earliest comprehensive surveys '
+        'of statistical methods for fraud detection, establishing the field and identifying class imbalance as the central technical challenge. '
+        'Dal Pozzolo et al. [1] subsequently provided a foundational analysis of how class imbalance and concept drift interact in real-world '
+        'credit card fraud detection systems, demonstrating that undersampling strategies could be effective but risked discarding valuable information '
+        'from the majority class. Their follow-up work [22] further investigated conditions under which undersampling outperforms other strategies.'
+    )
+    pdf.p(
+        'The class imbalance problem has generated a rich sub-literature. Chawla et al. [2] introduced SMOTE (Synthetic Minority Over-sampling Technique), '
+        'which generates synthetic minority class samples by interpolating between existing examples in feature space. SMOTE became the dominant '
+        'oversampling method in the field, with numerous variants proposed subsequently (Borderline-SMOTE, ADASYN, etc.). Critically, Fernandez et al. [3] '
+        'established through extensive experimentation that SMOTE must be applied exclusively to training data; applying it before the train-test split '
+        'introduces a subtle but severe form of data leakage where synthetic test samples carry information derived from training examples, leading to '
+        'dramatically over-optimistic performance estimates.'
+    )
+    pdf.p(
+        'Tree-based ensemble methods have emerged as the dominant paradigm for tabular fraud detection. Xuan et al. [17] demonstrated that '
+        'Random Forests achieve robust baseline performance through bagging and feature randomization. Chen and Guestrin [4] introduced XGBoost, '
+        'a regularized gradient boosting framework that has since become one of the most widely used algorithms for tabular classification, including '
+        'fraud detection [14]. Ke et al. [5] proposed LightGBM with leaf-wise tree growth and gradient-based one-side sampling (GOSS), achieving '
+        'faster training with comparable or superior accuracy. Prokhorenkova et al. [16] introduced CatBoost with ordered boosting to handle '
+        'categorical features natively without target leakage.'
+    )
+    pdf.p(
+        'Deep learning approaches have also been explored for fraud detection. Pumsirirat and Yan [6] employed autoencoders trained exclusively '
+        'on legitimate transactions, detecting fraud through elevated reconstruction error. This unsupervised approach has the advantage of not '
+        'requiring labeled fraud examples but typically suffers from high false positive rates. Zhang et al. [13] proposed attention-based '
+        'recurrent neural networks that capture sequential transaction patterns, though their complexity often does not justify the marginal '
+        'improvement over tree-based methods on static feature representations.'
+    )
+    pdf.p(
+        'The growing importance of regulatory compliance has brought model explainability to the forefront. Lundberg and Lee [7] introduced '
+        'SHAP (SHapley Additive exPlanations), grounded in cooperative game theory, which provides theoretically consistent and locally accurate '
+        'feature attribution values. Ribeiro et al. [8] proposed LIME (Local Interpretable Model-agnostic Explanations) for instance-level '
+        'interpretability through local linear approximations. Belle and Papantonis [15] surveyed the broader landscape of explainable AI methods '
+        'applicable to financial decision-making, noting the tension between model performance and interpretability.'
+    )
+    pdf.p(
+        'For hyperparameter optimization, Akiba et al. [11] introduced Optuna, a framework using Tree-structured Parzen Estimators (TPE) '
+        'that efficiently explores complex search spaces through adaptive sampling, outperforming grid and random search strategies.'
+    )
+
+    # ===== III. DATASET AND EDA =====
+    pdf.section('III', 'Dataset and Exploratory Data Analysis')
+
+    pdf.subsec('A.', 'Dataset Description')
+    pdf.p(
+        'We use the European Cardholder Credit Card Fraud Detection dataset [1], one of the most widely-cited benchmarks in the fraud detection '
+        'literature. The dataset contains 284,807 transactions made by European cardholders over a two-day period in September 2013. Each transaction '
+        'is described by 31 features: 28 numerical features (V1 through V28) that are the result of a PCA transformation applied to the original '
+        'confidential features, plus the raw Time (seconds elapsed from the first transaction in the dataset), Amount (the transaction dollar value), '
+        'and Class (the binary label: 0 for legitimate, 1 for fraud). The PCA transformation was applied by the dataset creators to protect cardholder '
+        'privacy, which means the original feature semantics (merchant category, geographic location, card type, etc.) are not available. This places '
+        'a constraint on domain-specific feature engineering but ensures the dataset can be shared publicly for research.'
+    )
+
+    pdf.subsec('B.', 'Class Distribution and Imbalance')
+    pdf.p(
+        'The dataset exhibits extreme class imbalance: only 492 out of 284,807 transactions are labeled as fraudulent, representing merely 0.173% '
+        'of the total. This yields an imbalance ratio of approximately 1:577 (one fraud per 577 legitimate transactions). Figure 1 illustrates this '
+        'distribution. The severity of this imbalance has profound implications for model training and evaluation: (i) standard cross-entropy loss '
+        'will overwhelmingly optimize for the majority class, (ii) accuracy is rendered meaningless (a constant "not fraud" classifier achieves 99.83%), '
+        'and (iii) most standard ML algorithms will struggle to learn the minority class boundary without explicit countermeasures.'
+    )
+
+    pdf.tbl(
+        ['Class', 'Count', 'Percentage', 'Imbalance Ratio'],
+        [['Legitimate (0)', '284,315', '99.827%', '---'],
+         ['Fraud (1)', '492', '0.173%', '1 : 577'],
+         ['Total', '284,807', '100%', '---']],
+        'Table I: Class Distribution in the Credit Card Fraud Dataset'
+    )
+    pdf.fig(os.path.join(FIGURES_DIR, 'class_distribution.png'),
+            'Fig. 1. Class distribution showing the extreme imbalance between legitimate (99.83%) and fraudulent (0.17%) transactions.', w=140)
+
+    pdf.subsec('C.', 'Transaction Amount Analysis')
+    pdf.p(
+        'Analysis of transaction amounts reveals distinct behavioral patterns between the two classes. Legitimate transactions have a mean amount of '
+        '$88.29 with a median of $22.00, indicating a right-skewed distribution dominated by small everyday purchases with occasional large transactions '
+        '(maximum: $25,691.16). Fraudulent transactions, perhaps counter-intuitively, have a higher mean of $122.21 but a lower median of only $9.25. '
+        'This bimodal pattern in fraud amounts suggests two distinct fraud strategies: (i) low-value "testing" transactions (often under $5) where '
+        'fraudsters verify that a stolen card number is active before attempting larger purchases, and (ii) moderate-to-high value transactions that '
+        'represent the actual theft. The low median indicates that the testing strategy is more common. Figure 2 presents the detailed amount distributions.'
+    )
+    pdf.fig(os.path.join(FIGURES_DIR, 'amount_analysis.png'),
+            'Fig. 2. Transaction amount analysis: (a) legitimate amounts, (b) fraud amounts, (c) log-scaled comparison, (d) boxplot.', w=155)
+
+    pdf.subsec('D.', 'Temporal Patterns')
+    pdf.p(
+        'Temporal analysis reveals significant differences in the timing of legitimate versus fraudulent transactions. Figure 3 shows that the '
+        'fraud rate during nighttime hours (midnight to 6 AM) is 0.518%, nearly four times the daytime rate of 0.137%. This is consistent with '
+        'known fraud patterns: fraudsters preferentially operate during off-peak hours when transaction monitoring systems may have reduced staffing, '
+        'when cardholders are less likely to notice unauthorized transactions on their accounts, and when automated systems process transactions '
+        'with less stringent real-time checks. This temporal signal motivates our cyclic hour-of-day feature engineering, which encodes the hour '
+        'as sine and cosine components to preserve the circular nature of time.'
+    )
+    pdf.fig(os.path.join(FIGURES_DIR, 'time_analysis.png'),
+            'Fig. 3. Temporal patterns: (a) transaction density by hour showing fraud concentration at night, (b) fraud rate by hour.', w=145)
+
+    pdf.subsec('E.', 'Feature Correlations')
+    pdf.p(
+        'Pearson correlation analysis between each feature and the fraud label identifies the most discriminative PCA components. The features with '
+        'the strongest negative correlation with fraud are V17 (r = -0.326), V14 (r = -0.303), and V12 (r = -0.261), meaning that lower values of '
+        'these features are associated with higher fraud probability. On the positive side, V11 (r = +0.155) and V4 (r = +0.133) show the strongest '
+        'associations. Notably, the raw Amount feature has near-zero correlation (r = 0.006) with fraud, confirming that simple amount-based rules '
+        'would be ineffective. The Time feature also shows negligible correlation (r = -0.012). These findings guide both our feature engineering '
+        '(creating interaction terms between the top correlated features) and our expectation of which features will dominate model importance.'
+    )
+    pdf.fig(os.path.join(FIGURES_DIR, 'correlation_heatmap.png'),
+            'Fig. 4. Feature correlation with the fraud class. Negative values (red) indicate features whose lower values signal fraud.', w=130)
+
+    pdf.subsec('F.', 'Feature Distributions by Class')
+    pdf.p(
+        'Figure 5 visualizes the distributions of the six most discriminative features separated by class. The key observation is that for features '
+        'like V14 and V17, the fraud distribution (red) is shifted significantly to the left compared to the legitimate distribution (green), '
+        'creating a separable signal that tree-based models can exploit through axis-aligned splits. For V4 and V11, the fraud distribution is shifted '
+        'rightward. However, there is substantial overlap between the classes for all features, which explains why no single feature achieves perfect '
+        'separation and why ensemble methods that combine weak signals from multiple features outperform univariate approaches.'
+    )
+    pdf.fig(os.path.join(FIGURES_DIR, 'feature_distributions.png'),
+            'Fig. 5. Distribution of the top 6 discriminative features by class, showing partial but informative separation.', w=155)
+
+    pdf.subsec('G.', 'Five Key Observations')
+    pdf.p('Our exploratory analysis yields five principal observations that directly inform the modeling strategy:')
+    pdf.bullet([
+        'EXTREME CLASS IMBALANCE: With only 0.173% fraud, conventional accuracy is meaningless. All models must employ either oversampling (SMOTE) or cost-sensitive learning, and evaluation must rely on PR-AUC, F1, and MCC rather than accuracy or ROC-AUC alone.',
+        'BIMODAL FRAUD AMOUNTS: The bimodal distribution of fraud amounts (small testing transactions + larger theft transactions) means amount-based thresholds will miss most fraud. Feature engineering that captures amount deviations and z-scores is essential.',
+        'TEMPORAL EXPLOITATION: The 4x higher nighttime fraud rate provides a usable signal when encoded as cyclic features. Time-based features should improve model discrimination.',
+        'PCA FEATURE DOMINANCE: V14, V17, V12, V4, and V11 carry the strongest fraud signal. Interaction features between these variables may capture non-linear relationships that individual features miss.',
+        'CLEAN DATA: The absence of missing values and the pre-applied PCA transformation simplify preprocessing but limit domain-specific engineering. The 1,081 duplicate rows are removed to prevent data leakage.',
+    ])
+
+    # ===== IV. METHODOLOGY =====
+    pdf.section('IV', 'Methodology')
+
+    pdf.subsec('A.', 'Feature Engineering')
+    pdf.p(
+        'We augment the original 30 features (Time, V1-V28, Amount) with 12 engineered features designed to capture temporal, behavioral, and '
+        'interaction patterns that the raw PCA features may not directly encode. The final feature set contains 42 dimensions.'
+    )
+    pdf.p(
+        'Temporal features: We derive the hour of day from the Time column and encode it cyclically using sine and cosine transformations: '
+        'Hour_sin = sin(2*pi*h/24) and Hour_cos = cos(2*pi*h/24), where h = (Time/3600) mod 24. This cyclic encoding ensures that hour 23 '
+        'and hour 0 are treated as adjacent rather than maximally distant, which is critical for capturing the nighttime fraud pattern. '
+        'We also compute Time_diff as the difference in Time from the previous transaction, approximating the inter-arrival time.'
+    )
+    pdf.p(
+        'Amount features: We compute Amount_log = log(1 + Amount) to compress the heavy-tailed amount distribution, Amount_deviation_mean and '
+        'Amount_deviation_median to capture how far each transaction deviates from typical amounts, Amount_zscore for standardized deviation, '
+        'and Transaction_velocity = 1/(Time_diff + 1) as a proxy for how rapidly transactions are occurring.'
+    )
+    pdf.p(
+        'Interaction features: We create three pairwise products between the most discriminative PCA components: V14*V17, V12*V14, and V10*V14. '
+        'These capture joint effects that axis-aligned tree splits would require multiple levels to approximate. Finally, PCA_magnitude computes '
+        'the L2 norm across all 28 PCA features, providing a summary measure of overall transaction "abnormality" in the latent space.'
+    )
+
+    pdf.subsec('B.', 'Class Imbalance Handling')
+    pdf.p(
+        'We implement and compare two established approaches for handling the 1:577 class imbalance. Both are applied exclusively to the '
+        'training data, never to validation or test sets.'
+    )
+    pdf.p(
+        'SMOTE (Synthetic Minority Over-sampling Technique) [2]: We generate synthetic fraud samples by interpolating between existing fraud '
+        'examples in the 42-dimensional feature space. We use a sampling_strategy of 0.5, creating enough synthetic fraud to achieve a 1:2 '
+        'minority-to-majority ratio (99,138 synthetic fraud samples added to 198,277 legitimate). This ratio was chosen as a compromise between '
+        'the extreme original imbalance and full 1:1 balancing, which can introduce too much synthetic noise. SMOTE data is used exclusively '
+        'for the MLP neural network, which does not natively support class weighting.'
+    )
+    pdf.p(
+        'Cost-Sensitive Learning: For tree-based models and Logistic Regression, we apply balanced class weights computed as '
+        'w_c = N / (2 * N_c), yielding w_0 = 0.501 and w_1 = 300.01. This effectively makes each fraud example 599 times more important '
+        'than a legitimate example in the loss function, incentivizing the model to correctly classify fraud even at the cost of some false positives.'
+    )
+
+    pdf.subsec('C.', 'Data Splitting and Scaling')
+    pdf.p(
+        'After removing 1,081 duplicate rows and engineering features, we perform a stratified 70/15/15 train/validation/test split. '
+        'Stratification preserves the original 0.167% fraud ratio in each split: Train (198,608 samples, 331 fraud), Validation (42,559 samples, '
+        '71 fraud), Test (42,559 samples, 71 fraud). Feature scaling uses RobustScaler, which normalizes by the interquartile range '
+        'x\' = (x - median) / IQR, providing robustness to outliers that are common in financial transaction data. The scaler is fitted exclusively '
+        'on the training set and then applied identically to validation and test sets, preventing any information leakage.'
+    )
+
+    pdf.subsec('D.', 'Model Descriptions')
+    pdf.p(
+        '1) Logistic Regression (Baseline): An L2-regularized linear model with C=0.1 and balanced class weights, serving as an interpretable '
+        'baseline. Its coefficients directly indicate feature importance and direction of effect.'
+    )
+    pdf.p(
+        '2) Random Forest: An ensemble of 150 decision trees (max_depth=12, min_samples_split=5) with balanced class weights. '
+        'Each tree is trained on a bootstrap sample with random feature subsets, providing variance reduction through averaging.'
+    )
+    pdf.p(
+        '3) XGBoost: Gradient boosted trees with 200 estimators, max_depth=6, learning_rate=0.1, subsample=0.8, colsample_bytree=0.8, '
+        'and scale_pos_weight derived from class frequencies. Uses histogram-based splitting for computational efficiency.'
+    )
+    pdf.p(
+        '4) LightGBM: Leaf-wise gradient boosting with 200 estimators, max_depth=8, learning_rate=0.05, and gradient-based one-side sampling. '
+        'The leaf-wise growth strategy can produce deeper trees than XGBoost for the same number of leaves, potentially capturing more complex patterns.'
+    )
+    pdf.p(
+        '5) MLP Neural Network: A three-layer perceptron (128-64-32 neurons) with ReLU activation, dropout (implicit via alpha=0.001 L2 '
+        'regularization), adaptive learning rate, and early stopping. Trained on SMOTE-augmented data since sklearn MLPClassifier does not '
+        'support class weights directly.'
+    )
+    pdf.p(
+        '6) Autoencoder (Anomaly Detection): A symmetric autoencoder with architecture 42-64-32-16-32-64-42, trained for 50 epochs '
+        'exclusively on legitimate transactions. The core assumption is that the autoencoder learns to reconstruct normal transaction patterns; '
+        'when a fraudulent transaction is presented, the reconstruction error e(x) = (1/d) * sum((x_i - x_hat_i)^2) will be anomalously high. '
+        'This approach requires no labeled fraud examples during training, making it potentially useful for zero-day fraud detection.'
+    )
+    pdf.p(
+        '7) Voting Ensemble: Soft voting over the three best-performing tuned models (XGBoost, LightGBM, Random Forest), where the final '
+        'fraud probability is the arithmetic mean of the three individual model probabilities. This leverages the diversity of different '
+        'tree-building strategies to reduce variance.'
+    )
+
+    pdf.subsec('E.', 'Hyperparameter Optimization')
+    pdf.p(
+        'We tune the top three models (XGBoost, LightGBM, Random Forest) using Optuna [11] with the Tree-structured Parzen Estimator (TPE) '
+        'sampler. For each model, Optuna explores the hyperparameter space (learning rate, tree depth, regularization, subsampling) over '
+        '15-20 trials, optimizing PR-AUC on the validation set. The TPE sampler adaptively focuses trials on promising regions of the search '
+        'space, achieving better sample efficiency than grid or random search.'
+    )
+
+    pdf.fig(os.path.join(FIGURES_DIR, 'architecture_diagram.png'),
+            'Fig. 6. End-to-end system architecture: from transaction input through feature engineering, model inference, to API output and monitoring.', w=155)
+
+    # ===== V. EXPERIMENTAL SETUP =====
+    pdf.section('V', 'Experimental Setup')
+    pdf.p(
+        'All experiments were conducted using Python 3.12 on CPU infrastructure. The primary libraries and their versions are: '
+        'scikit-learn 1.8.0 (Logistic Regression, Random Forest, MLP, preprocessing), XGBoost 3.2.0, LightGBM 4.6.0, PyTorch 2.11.0 (Autoencoder), '
+        'Optuna 4.8.0 (hyperparameter optimization), SHAP 0.51.0, and LIME 0.2.0.1. Total training time for all models including Optuna tuning '
+        'was approximately 25 minutes on a 2-core CPU.'
+    )
+    pdf.p(
+        'We report six evaluation metrics on the held-out test set: (1) Precision = TP/(TP+FP), measuring the fraction of flagged transactions '
+        'that are actually fraudulent; (2) Recall = TP/(TP+FN), measuring the fraction of actual fraud that is caught; (3) F1-score, the harmonic '
+        'mean of precision and recall; (4) ROC-AUC, the area under the Receiver Operating Characteristic curve; (5) PR-AUC (Average Precision), '
+        'the area under the Precision-Recall curve, which is our primary metric as it is more informative than ROC-AUC under extreme class imbalance [18]; '
+        'and (6) Matthews Correlation Coefficient (MCC), which provides a balanced measure that accounts for all four confusion matrix quadrants '
+        'and returns values between -1 and +1.'
+    )
+
+    # ===== VI. RESULTS AND DISCUSSION =====
+    pdf.section('VI', 'Results and Discussion')
+
+    pdf.subsec('A.', 'Model Comparison')
+    pdf.p(
+        'Table II presents the comprehensive evaluation of all models on the test set using a default threshold of 0.5. The results reveal '
+        'a clear hierarchy with important nuances.'
+    )
+    pdf.tbl(
+        ['Model', 'Prec.', 'Recall', 'F1', 'ROC-AUC', 'PR-AUC', 'MCC'],
         [
-            ['XGBoost', '0.9048', '0.8028', '0.8507', '0.9735', '0.8166', '0.8520'],
-            ['Voting Ens.', '0.8636', '0.8028', '0.8321', '0.9783', '0.8007', '0.8324'],
-            ['LGBM Tuned', '0.7073', '0.8169', '0.7582', '0.9318', '0.7958', '0.7597'],
-            ['XGB Tuned', '0.8382', '0.8028', '0.8201', '0.9697', '0.7929', '0.8200'],
-            ['RF Tuned', '0.8730', '0.7746', '0.8209', '0.9675', '0.7926', '0.8221'],
-            ['Random Forest', '0.8333', '0.7746', '0.8029', '0.9526', '0.7710', '0.8031'],
-            ['MLP', '0.6914', '0.7887', '0.7368', '0.9433', '0.7522', '0.7380'],
-            ['Logistic Reg.', '0.0488', '0.8873', '0.0924', '0.9615', '0.7350', '0.2042'],
-            ['Autoencoder', '0.0033', '1.0000', '0.0067', '0.9604', '0.0442', '0.0409'],
+            ['XGBoost', '0.905', '0.803', '0.851', '0.974', '0.817', '0.852'],
+            ['Voting Ens.', '0.864', '0.803', '0.832', '0.978', '0.801', '0.832'],
+            ['LGBM Tuned', '0.707', '0.817', '0.758', '0.932', '0.796', '0.760'],
+            ['XGB Tuned', '0.838', '0.803', '0.820', '0.970', '0.793', '0.820'],
+            ['RF Tuned', '0.873', '0.775', '0.821', '0.968', '0.793', '0.822'],
+            ['Random Forest', '0.833', '0.775', '0.803', '0.953', '0.771', '0.803'],
+            ['MLP', '0.691', '0.789', '0.737', '0.943', '0.752', '0.738'],
+            ['Logistic Reg.', '0.049', '0.887', '0.092', '0.962', '0.735', '0.204'],
+            ['Autoencoder', '0.003', '1.000', '0.007', '0.960', '0.044', '0.041'],
         ],
-        'Table I: Comprehensive Model Comparison on Test Set'
-    )
-    
-    pdf.body_text(
-        'XGBoost achieves the highest PR-AUC (0.8166), F1 (0.8507), and MCC (0.8520). Tree-based models consistently outperform '
-        'neural approaches. The Autoencoder achieves perfect recall but extremely low precision. '
-        'Threshold optimization from 0.5 to 0.55 improves F1 to 0.8636.'
-    )
-    
-    pdf.add_figure(os.path.join(FIGURES_DIR, 'roc_curves.png'),
-                   'Fig. 4: ROC curves for all models.', width=150)
-    
-    pdf.add_figure(os.path.join(FIGURES_DIR, 'pr_curves.png'),
-                   'Fig. 5: Precision-Recall curves (primary evaluation metric for imbalanced data).', width=150)
-    
-    pdf.add_figure(os.path.join(FIGURES_DIR, 'confusion_matrices.png'),
-                   'Fig. 6: Confusion matrices for all models on test set.', width=170)
-    
-    pdf.add_figure(os.path.join(FIGURES_DIR, 'threshold_analysis.png'),
-                   'Fig. 7: Threshold sensitivity analysis for XGBoost.', width=160)
-    
-    # Business Impact
-    pdf.subsection_title('', 'Business Impact')
-    pdf.add_table(
-        ['Model', 'Caught ($)', 'Missed ($)', 'FP Cost ($)', 'Net Savings ($)', 'Catch Rate'],
+        'Table II: Comprehensive Model Comparison on Test Set (threshold = 0.5)'
+    )
+
+    pdf.p(
+        'Key Observation 1 - Tree-based models dominate: XGBoost achieves the highest PR-AUC (0.817), F1-score (0.851), and MCC (0.852), '
+        'confirming the findings of Shwartz-Ziv and Armon [9] that gradient-boosted trees remain the strongest approach for tabular data. '
+        'The Voting Ensemble achieves a marginally higher ROC-AUC (0.978) through model averaging but does not improve upon the single XGBoost '
+        'on the more informative PR-AUC metric, suggesting that the three ensemble members are not sufficiently diverse to benefit from averaging.'
+    )
+    pdf.p(
+        'Key Observation 2 - The precision-recall tradeoff is stark: Logistic Regression achieves high recall (0.887) but catastrophically '
+        'low precision (0.049), flagging over 1,200 legitimate transactions as fraud for every 63 true fraud caught. This linear model '
+        'creates an extremely aggressive decision boundary due to the large class weight (300x), resulting in a flood of false alarms that '
+        'would overwhelm any operational fraud investigation team.'
+    )
+    pdf.p(
+        'Key Observation 3 - Autoencoder anomaly detection fails in this setting: While the autoencoder achieves perfect recall (1.0) by '
+        'construction (all fraud has higher reconstruction error than the median), its precision is only 0.003, flagging over 21,000 '
+        'legitimate transactions. The PR-AUC of 0.044 is near-random. This failure is likely attributable to the PCA-transformed feature '
+        'space: the autoencoder learns to reconstruct the dominant variance directions, but the fraud signal may reside in minor PCA components '
+        'whose reconstruction error is similar to legitimate noise, making discrimination unreliable.'
+    )
+    pdf.p(
+        'Key Observation 4 - Optuna tuning shows mixed results: Tuning improved LightGBM dramatically (PR-AUC from 0.012 to 0.796) by '
+        'correcting an initial configuration where scale_pos_weight was too aggressive. However, the tuned XGBoost (0.793) slightly underperformed '
+        'the base XGBoost (0.817), suggesting the base configuration was already near-optimal and the tuning search space introduced suboptimal regions.'
+    )
+
+    pdf.fig(os.path.join(FIGURES_DIR, 'roc_curves.png'),
+            'Fig. 7. ROC curves for the top 5 models. All achieve ROC-AUC > 0.93, but ROC-AUC alone is insufficient for evaluation under extreme imbalance.', w=125)
+    pdf.fig(os.path.join(FIGURES_DIR, 'pr_curves.png'),
+            'Fig. 8. Precision-Recall curves -- the primary evaluation metric. XGBoost achieves the largest area under the curve (0.817).', w=125)
+    pdf.fig(os.path.join(FIGURES_DIR, 'confusion_matrices.png'),
+            'Fig. 9. Confusion matrices for six selected models. XGBoost achieves the best balance: 57 true positives with only 6 false positives.', w=160)
+
+    pdf.subsec('B.', 'Threshold Optimization')
+    pdf.p(
+        'The default classification threshold of 0.5 is a common but arbitrary choice that is rarely optimal, especially for imbalanced problems. '
+        'We perform a systematic sweep of thresholds from 0.05 to 0.95 for the best model (XGBoost) and evaluate Precision, Recall, F1, and MCC '
+        'at each threshold. Figure 10 visualizes the tradeoff. The optimal threshold by F1 is 0.55, which improves F1 from 0.851 to 0.864 and '
+        'precision from 0.905 to 0.934 while maintaining the same recall of 0.803. This means raising the threshold by 0.05 eliminates some '
+        'borderline false positives without losing any true positives, a Pareto improvement. At higher thresholds (>0.85), recall begins to drop '
+        'as the model becomes overly conservative. In practice, the operational threshold should be set based on the specific cost ratio between '
+        'missed fraud and false alarm investigation costs.'
+    )
+    pdf.fig(os.path.join(FIGURES_DIR, 'threshold_analysis.png'),
+            'Fig. 10. Threshold sensitivity analysis: (a) precision/recall/F1 vs. threshold, (b) MCC vs. threshold. Optimal F1 at threshold = 0.55.', w=145)
+
+    pdf.subsec('C.', 'Business Impact Analysis')
+    pdf.p(
+        'To translate ML metrics into actionable financial outcomes, we estimate the business impact using the average fraud transaction amount '
+        'of $122.21 and an estimated $5 per false alarm investigation cost. Table III shows that XGBoost provides the highest net savings ($6,936 '
+        'on the 42,559-transaction test set) by catching 57 of 71 fraudulent transactions (80.3% catch rate) while generating only 6 false alarms '
+        '($30 investigation cost). By contrast, Logistic Regression catches more fraud (63 of 71, 88.7%) but generates 1,229 false alarms at $6,145 '
+        'total investigation cost, reducing net savings to only $1,554. The Autoencoder, despite catching all 71 fraud transactions, generates '
+        '21,209 false alarms at $106,045 -- a net loss of $97,368.'
+    )
+    pdf.tbl(
+        ['Model', 'TP', 'FN', 'FP', 'Caught($)', 'Missed($)', 'FP Cost($)', 'Net($)'],
         [
-            ['XGBoost', '6,966', '1,711', '30', '6,936', '80.3%'],
-            ['Ensemble', '6,966', '1,711', '45', '6,921', '80.3%'],
-            ['LR', '7,699', '978', '6,145', '1,554', '88.7%'],
-            ['Autoencoder', '8,677', '0', '106,045', '-97,368', '100%'],
+            ['XGBoost', '57', '14', '6', '6,966', '1,711', '30', '6,936'],
+            ['Ens.', '57', '14', '9', '6,966', '1,711', '45', '6,921'],
+            ['LGBM-T', '58', '13', '24', '7,088', '1,589', '120', '6,968'],
+            ['LR', '63', '8', '1229', '7,699', '978', '6,145', '1,554'],
+            ['AE', '71', '0', '21209', '8,677', '0', '106,045', '-97,368'],
         ],
-        'Table II: Business Impact Analysis'
-    )
-    
-    # Feature Importance
-    pdf.add_figure(os.path.join(FIGURES_DIR, 'feature_importance.png'),
-                   'Fig. 8: Feature importance across models.', width=170)
-    
-    pdf.add_figure(os.path.join(FIGURES_DIR, 'shap_summary.png'),
-                   'Fig. 9: SHAP summary plot showing feature contributions to fraud predictions.', width=160)
-    
-    pdf.add_figure(os.path.join(FIGURES_DIR, 'shap_top10.png'),
-                   'Fig. 10: Top 10 features driving fraud predictions (SHAP analysis).', width=150)
-    
-    pdf.add_figure(os.path.join(FIGURES_DIR, 'lime_explanation.png'),
-                   'Fig. 11: LIME explanation for a single fraud prediction.', width=160)
-    
-    # VII. Error Analysis
-    pdf.section_title('VII', 'Error Analysis')
-    pdf.body_text(
-        'Of 14 false negatives, mean predicted fraud probability was only 0.013. Feature comparison reveals that missed fraud '
-        'transactions have V14 averaging -0.97 vs -8.45 for true positives, and PCA magnitude of 1.82 vs 12.25. '
-        'These transactions closely mimic legitimate behavior. The 6 false positives have feature distributions (V14: -7.13) '
-        'resembling actual fraud. Concept drift analysis shows a +0.115 indicator between early and late periods.'
-    )
-    
-    pdf.add_figure(os.path.join(FIGURES_DIR, 'error_analysis.png'),
-                   'Fig. 12: Error analysis - FN/FP probability distributions and score distributions.', width=170)
-    
-    # VIII. Limitations
-    pdf.section_title('VIII', 'Limitations')
-    pdf.body_text(
-        '(1) PCA anonymization prevents domain-specific feature engineering; '
-        '(2) Two-day temporal scope limits drift assessment; '
-        '(3) Single-institution data may not generalize; '
-        '(4) Missing raw features (merchant, location, device); '
-        '(5) Static threshold without dynamic adaptation.'
-    )
-    
-    # IX. Future Work
-    pdf.section_title('IX', 'Future Work')
-    pdf.body_text(
-        'Promising directions include: Graph Neural Networks for fraud ring detection; '
-        'real-time streaming with Apache Kafka; Federated Learning across banks for privacy-preserving training; '
-        'LLM-generated compliance explanations; temporal modeling with Transformers; '
-        'and adversarial robustness training.'
-    )
-    
-    # X. Conclusion
-    pdf.section_title('X', 'Conclusion')
-    pdf.body_text(
-        'This paper presents a comprehensive fraud detection framework evaluating seven ML approaches on the benchmark '
-        'European Cardholder dataset. XGBoost achieves the best overall performance (PR-AUC: 0.8166, F1: 0.8507) through '
-        'cost-sensitive learning with optimized class weights. Threshold optimization to 0.55 further improves F1 to 0.8636. '
-        'The framework includes complete explainability through SHAP and LIME, production deployment via FastAPI with sub-10ms '
-        'latency, and automated drift monitoring. Tree-based ensemble methods remain the most effective for tabular fraud detection.'
-    )
-    
-    # References
-    pdf.section_title('', 'References')
+        'Table III: Business Impact Analysis on Test Set'
+    )
+    pdf.p(
+        'This analysis underscores a critical insight: maximizing recall without regard for precision is counterproductive in operational settings. '
+        'The Autoencoder catches every fraud but would bankrupt the operations team with false alarm investigations. The optimal model balances '
+        'fraud catch rate against false alarm volume, and XGBoost achieves this balance most effectively.'
+    )
+
+    # ===== EXPLAINABILITY =====
+    pdf.subsec('D.', 'Feature Importance and Explainability')
+    pdf.p(
+        'Model explainability is critical for operational trust, regulatory compliance, and scientific insight. We employ two complementary methods: '
+        'SHAP for global feature attribution and LIME for local, instance-level explanation.'
+    )
+    pdf.p(
+        'SHAP Analysis: Figure 11 shows the SHAP summary plot for XGBoost, computed over 2,000 test samples. The top three features by '
+        'mean absolute SHAP value are V4 (1.913), V14 (1.843), and PCA_magnitude (1.113). The SHAP analysis reveals several important patterns: '
+        '(i) High values of V4 push predictions toward fraud, while low values push toward legitimate; (ii) Low (more negative) values of V14 '
+        'are strongly associated with fraud, consistent with the negative correlation observed in EDA; (iii) High PCA_magnitude indicates '
+        'transactions that are far from the centroid in PCA space, which are more likely to be anomalous. Notably, the engineered feature '
+        'V10_V14_interaction ranks 9th, validating our hypothesis that interaction terms capture additional signal beyond individual features.'
+    )
+    pdf.fig(os.path.join(FIGURES_DIR, 'shap_summary.png'),
+            'Fig. 11. SHAP summary plot: each dot is one test sample. Color indicates feature value (red=high, blue=low). Horizontal position shows SHAP impact.', w=140)
+    pdf.fig(os.path.join(FIGURES_DIR, 'shap_top10.png'),
+            'Fig. 12. Top 10 features by mean |SHAP value|. V4, V14, and PCA_magnitude are the dominant fraud predictors.', w=125)
+
+    pdf.p(
+        'LIME Analysis: For instance-level interpretability, we apply LIME to a correctly classified fraud transaction (P(fraud)=1.0000). '
+        'Figure 13 shows the contribution of each feature to this specific prediction. All top features push toward the fraud class, with '
+        'Time_diff, V4, V12, and V14 being the strongest contributors. This granular explanation could be presented to a human fraud '
+        'analyst to justify why a specific transaction was blocked, supporting regulatory requirements for explainable decisions.'
+    )
+    pdf.fig(os.path.join(FIGURES_DIR, 'lime_explanation.png'),
+            'Fig. 13. LIME explanation for a single fraud sample. Red bars increase fraud risk; green bars decrease it.', w=140)
+    pdf.fig(os.path.join(FIGURES_DIR, 'feature_importance.png'),
+            'Fig. 14. Feature importance comparison across four model types (RF, XGBoost, LightGBM, Logistic Regression).', w=155)
+
+    # ===== VII. ERROR ANALYSIS =====
+    pdf.section('VII', 'Error Analysis')
+
+    pdf.subsec('A.', 'False Negative Analysis (Missed Fraud)')
+    pdf.p(
+        'The XGBoost model misses 14 of 71 fraudulent transactions in the test set (19.7% miss rate). Understanding why these transactions '
+        'escape detection is critical for improving the system. Analysis of the 14 false negatives reveals that their mean predicted fraud '
+        'probability is only 0.013, far below the 0.5 threshold -- the model is highly confident they are legitimate, not merely borderline.'
+    )
+    pdf.p(
+        'Feature comparison provides the explanation: false negatives have V14 averaging -0.97 compared to -8.45 for true positives (a 7.5x '
+        'difference), V12 averaging -0.41 vs -7.69, and PCA_magnitude of 1.82 vs 12.25. In other words, missed fraud transactions have feature '
+        'values that are dramatically closer to legitimate transactions than to caught fraud. These represent sophisticated fraud attempts that '
+        'have been designed (intentionally or coincidentally) to mimic legitimate behavioral patterns. They operate within normal amount ranges, '
+        'at normal hours, and produce PCA component values that fall squarely within the legitimate distribution.'
+    )
+    pdf.p(
+        'Implication: These false negatives cannot be eliminated by simply lowering the threshold -- at threshold 0.12, only one additional '
+        'FN would be caught while generating many more false alarms. Catching these sophisticated fraud attempts likely requires additional '
+        'data sources (transaction sequences, device fingerprints, geographic data) that are not available in the PCA-anonymized dataset.'
+    )
+
+    pdf.subsec('B.', 'False Positive Analysis (False Alarms)')
+    pdf.p(
+        'The 6 false positives have a mean predicted fraud probability of 0.827, with some reaching 1.0 -- the model is highly confident '
+        'these are fraud, yet they are legitimate. Feature analysis shows these transactions have V14 averaging -7.13 (vs -0.04 for true negatives) '
+        'and PCA_magnitude of 7.86 (vs 0.28 for true negatives). These legitimate transactions genuinely exhibit the same anomalous feature '
+        'patterns as actual fraud, likely representing unusual but lawful spending behavior (e.g., first-time purchases in an unusual category, '
+        'international transactions, or large purchases for individuals who typically make small ones). No amount of model tuning can distinguish '
+        'these from actual fraud without additional contextual information.'
+    )
+
+    pdf.subsec('C.', 'Concept Drift Assessment and Retraining Recommendations')
+    pdf.p(
+        'Comparing model confidence between the first and second halves of the test period reveals a drift indicator of +0.115 in mean fraud '
+        'probability for actual fraud cases. While this magnitude is modest, it suggests that even within the two-day dataset window, the '
+        'statistical properties of fraud are not stationary.'
+    )
+    pdf.p('Based on this analysis and industry best practices, we recommend the following operational monitoring regime:')
+    pdf.bullet([
+        'Weekly computation of PR-AUC, F1, and false positive rate on recent labeled data to track model degradation.',
+        'Automated retraining trigger when PR-AUC drops below 0.70 or false positive rate exceeds 2x the baseline.',
+        'Sliding window training using the most recent 3-6 months of labeled data, rather than static historical training.',
+        'Population Stability Index (PSI) monitoring on all input features, with alerts when PSI exceeds 0.25 for any feature.',
+        'A/B testing framework for deploying model updates, with gradual traffic ramps from 1% to 100%.',
+        'Quarterly fraud pattern reviews with domain experts to identify emerging attack vectors that models may not yet capture.',
+    ])
+    pdf.fig(os.path.join(FIGURES_DIR, 'error_analysis.png'),
+            'Fig. 15. Error analysis: (a) FN probability distribution, (b) FP probability distribution, (c) overall score distribution by class.', w=160)
+
+    # ===== VIII. LIMITATIONS =====
+    pdf.section('VIII', 'Limitations')
+    pdf.p('We acknowledge several important limitations of this work:')
+    pdf.bullet([
+        'PCA Anonymization: The V1-V28 features are PCA-transformed, which prevents domain-specific feature engineering (merchant category, geographic location, card type) and limits interpretability. Real-world systems with access to raw features would likely achieve significantly better performance.',
+        'Temporal Scope: The dataset covers only two days of transactions, severely limiting assessment of long-term concept drift, seasonal patterns, and fraud evolution over weeks or months.',
+        'Single-Institution Data: Results from one European bank may not generalize across different institutions, geographies, payment networks, or regulatory environments.',
+        'Static Feature Set: Our feature engineering does not incorporate sequential transaction history (e.g., spending velocity over the past week, unusual merchant for this cardholder). Such features are critical in production systems but require per-customer state management.',
+        'Static Threshold: The optimal threshold of 0.55 was determined on the test set and may shift as fraud patterns evolve. A production system should implement dynamic threshold adaptation based on recent performance metrics.',
+        'Limited Autoencoder Architecture: Our autoencoder uses a simple symmetric architecture. More advanced anomaly detection methods (Variational Autoencoders, adversarial training) might achieve better performance on this task.',
+    ])
+
+    # ===== IX. FUTURE WORK =====
+    pdf.section('IX', 'Future Work')
+    pdf.p('Several promising research directions emerge from this work:')
+    pdf.p(
+        'Graph Neural Networks for Fraud Ring Detection [19]: Modeling the transaction network as a graph -- where nodes represent cards, '
+        'merchants, and accounts, and edges represent transactions -- would enable detection of coordinated fraud rings that cannot be identified '
+        'from individual transaction features alone. Graph convolutional networks can propagate suspicion scores through the network, flagging '
+        'accounts that transact heavily with known fraudulent nodes.'
+    )
+    pdf.p(
+        'Real-Time Streaming with Apache Kafka: Production fraud detection requires sub-100ms end-to-end latency from transaction initiation '
+        'to decision. Integrating the model with Apache Kafka for event streaming and Apache Flink for real-time feature computation would '
+        'enable processing millions of transactions per second with consistent low-latency guarantees.'
+    )
+    pdf.p(
+        'Federated Learning Across Banks [20]: Individual banks have limited fraud data, especially for rare fraud types. Federated learning '
+        'allows multiple institutions to collaboratively train a shared model without exchanging raw transaction data, preserving customer '
+        'privacy while dramatically expanding the effective training set. This is particularly valuable for detecting cross-institutional '
+        'fraud patterns where the same stolen credentials are used across multiple banks.'
+    )
+    pdf.p(
+        'LLM-Generated Compliance Explanations: When a transaction is blocked, regulatory requirements often demand a human-readable justification. '
+        'Large language models could translate SHAP values and feature contributions into natural-language narratives '
+        '(e.g., "This transaction was blocked because the purchase amount was unusually high for this card, occurring at an unusual time, '
+        'with spending patterns inconsistent with the cardholder\'s history"), reducing the burden on human fraud analysts.'
+    )
+    pdf.p(
+        'Temporal Sequence Modeling: Transformers and LSTM networks operating on the sequence of a cardholder\'s recent transactions could capture '
+        'behavioral patterns (typical spending days, preferred merchants, usual amounts) and flag departures from established routines. '
+        'This approach treats fraud detection as an anomaly in a time series rather than a static classification problem.'
+    )
+
+    # ===== X. CONCLUSION =====
+    pdf.section('X', 'Conclusion')
+    pdf.p(
+        'This paper presents a comprehensive, end-to-end fraud detection framework that systematically evaluates seven diverse machine learning '
+        'approaches on the benchmark European Cardholder credit card fraud dataset. Through careful feature engineering (12 new features), '
+        'rigorous methodology (SMOTE after splitting, scaler fitted on train only), and thorough evaluation (six metrics including PR-AUC, MCC), '
+        'we demonstrate that XGBoost with cost-sensitive learning achieves the best overall performance with a PR-AUC of 0.817, F1-score of 0.851, '
+        'and MCC of 0.852.'
+    )
+    pdf.p(
+        'Our threshold optimization analysis reveals that shifting the decision boundary from 0.50 to 0.55 yields a Pareto improvement, '
+        'increasing F1 to 0.864 and precision to 0.934 without sacrificing recall. Business impact analysis quantifies that XGBoost catches '
+        '80.3% of fraud while generating only 6 false alarms on a 42,559-transaction test set, resulting in estimated net savings of $6,936. '
+        'In contrast, the Autoencoder catches all fraud but generates over 21,000 false alarms -- a cautionary tale against optimizing recall alone.'
+    )
+    pdf.p(
+        'SHAP and LIME explainability analysis identifies V4, V14, and PCA_magnitude as the primary fraud discriminators, providing actionable '
+        'insights for fraud investigation teams. Error analysis reveals that the 14 missed fraud cases represent sophisticated attacks whose '
+        'feature profiles are indistinguishable from legitimate transactions, suggesting that additional data sources beyond PCA-anonymized '
+        'features are needed to catch the most evasive fraud.'
+    )
+    pdf.p(
+        'The complete system -- from feature engineering through model training, evaluation, explainability, and FastAPI deployment with sub-10ms '
+        'latency -- demonstrates that production-grade fraud detection can be achieved with well-tuned classical ML methods. Tree-based ensemble '
+        'methods, particularly XGBoost, remain the state-of-the-art for tabular fraud detection, outperforming both deep learning (MLP, Autoencoder) '
+        'and linear (Logistic Regression) alternatives on all metrics that matter for imbalanced classification.'
+    )
+
+    # ===== REFERENCES =====
+    pdf.section('', 'References')
     refs = [
-        '[1] A. Dal Pozzolo et al., "Calibrating probability with undersampling for unbalanced classification," IEEE CIDM, 2015.',
-        '[2] N. V. Chawla et al., "SMOTE: Synthetic Minority Over-sampling Technique," JAIR, vol. 16, 2002.',
-        '[3] A. Fernandez et al., Learning from Imbalanced Data Sets, Springer, 2018.',
-        '[4] T. Chen and C. Guestrin, "XGBoost: A scalable tree boosting system," ACM SIGKDD, 2016.',
-        '[5] G. Ke et al., "LightGBM: A highly efficient gradient boosting decision tree," NeurIPS, 2017.',
-        '[6] A. Pumsirirat and L. Yan, "Credit card fraud detection using deep learning," IJACSA, 2018.',
-        '[7] S. M. Lundberg and S.-I. Lee, "A unified approach to interpreting model predictions," NeurIPS, 2017.',
-        '[8] M. T. Ribeiro et al., "Why should I trust you?," ACM SIGKDD, 2016.',
-        '[9] R. Shwartz-Ziv and A. Armon, "Tabular data: Deep learning is not all you need," Information Fusion, 2022.',
-        '[10] L. Grinsztajn et al., "Why do tree-based models still outperform deep learning on tabular data?," NeurIPS, 2022.',
-        '[11] T. Akiba et al., "Optuna: A next-generation hyperparameter optimization framework," ACM SIGKDD, 2019.',
-        '[12] R. J. Bolton and D. J. Hand, "Statistical fraud detection: A review," Statistical Science, 2002.',
-        '[13] Z. Zhang et al., "A model based on convolutional recurrent neural network for fraud detection," Complexity, 2021.',
-        '[14] A. A. Taha and S. J. Malebary, "An intelligent approach to credit card fraud detection," IEEE Access, 2020.',
-        '[15] V. Belle and I. Papantonis, "Principles and practice of explainable ML," Frontiers in Big Data, 2021.',
-        '[16] L. Prokhorenkova et al., "CatBoost: Unbiased boosting with categorical features," NeurIPS, 2018.',
-        '[17] S. Xuan et al., "Random forest for credit card fraud detection," IEEE ICNSC, 2018.',
-        '[18] T. Saito and M. Rehmsmeier, "The PR plot is more informative than ROC on imbalanced datasets," PLoS ONE, 2015.',
-        '[19] Y. Liu et al., "GNN-based imbalanced learning for fraud detection," Web Conf., 2021.',
-        '[20] Q. Yang et al., "Federated machine learning: Concept and applications," ACM TIST, 2019.',
+        '[1]  A. Dal Pozzolo, O. Caelen, R. A. Johnson, and G. Bontempi, "Calibrating probability with undersampling for unbalanced classification," in Proc. IEEE CIDM, 2015, pp. 159-166.',
+        '[2]  N. V. Chawla, K. W. Bowyer, L. O. Hall, and W. P. Kegelmeyer, "SMOTE: Synthetic Minority Over-sampling Technique," J. Artificial Intelligence Research, vol. 16, pp. 321-357, 2002.',
+        '[3]  A. Fernandez, S. Garcia, M. Galar, R. C. Prati, B. Krawczyk, and F. Herrera, Learning from Imbalanced Data Sets. Springer, 2018.',
+        '[4]  T. Chen and C. Guestrin, "XGBoost: A scalable tree boosting system," in Proc. 22nd ACM SIGKDD, 2016, pp. 785-794.',
+        '[5]  G. Ke, Q. Meng, T. Finley, et al., "LightGBM: A highly efficient gradient boosting decision tree," in NeurIPS, vol. 30, 2017.',
+        '[6]  A. Pumsirirat and L. Yan, "Credit card fraud detection using deep learning based on auto-encoder," IJACSA, vol. 9, no. 1, 2018.',
+        '[7]  S. M. Lundberg and S.-I. Lee, "A unified approach to interpreting model predictions," in NeurIPS, vol. 30, 2017.',
+        '[8]  M. T. Ribeiro, S. Singh, and C. Guestrin, "Why should I trust you?: Explaining the predictions of any classifier," in Proc. ACM SIGKDD, 2016, pp. 1135-1144.',
+        '[9]  R. Shwartz-Ziv and A. Armon, "Tabular data: Deep learning is not all you need," Information Fusion, vol. 81, pp. 84-90, 2022.',
+        '[10] L. Grinsztajn, E. Oyallon, and G. Varoquaux, "Why do tree-based models still outperform deep learning on tabular data?," in NeurIPS, vol. 35, 2022.',
+        '[11] T. Akiba, S. Sano, T. Yanase, T. Ohta, and M. Koyama, "Optuna: A next-generation hyperparameter optimization framework," in Proc. ACM SIGKDD, 2019.',
+        '[12] R. J. Bolton and D. J. Hand, "Statistical fraud detection: A review," Statistical Science, vol. 17, no. 3, pp. 235-255, 2002.',
+        '[13] Z. Zhang, X. Zhou, X. Zhang, L. Wang, and P. Wang, "A model based on convolutional recurrent neural network for fraud detection," Complexity, 2021.',
+        '[14] A. A. Taha and S. J. Malebary, "An intelligent approach to credit card fraud detection using an optimized light gradient boosting machine," IEEE Access, vol. 8, 2020.',
+        '[15] V. Belle and I. Papantonis, "Principles and practice of explainable machine learning," Frontiers in Big Data, vol. 4, 2021.',
+        '[16] L. Prokhorenkova, G. Gusev, A. Vorobev, A. V. Dorogush, and A. Gulin, "CatBoost: Unbiased boosting with categorical features," in NeurIPS, vol. 31, 2018.',
+        '[17] S. Xuan, G. Liu, Z. Li, L. Zheng, S. Wang, and C. Jiang, "Random forest for credit card fraud detection," in Proc. IEEE ICNSC, 2018.',
+        '[18] T. Saito and M. Rehmsmeier, "The precision-recall plot is more informative than the ROC plot when evaluating binary classifiers on imbalanced datasets," PLoS ONE, 2015.',
+        '[19] Y. Liu, M. Ao, C. Chi, F. Feng, D. Yang, and J. He, "Pick and choose: A GNN-based imbalanced learning approach for fraud detection," in Web Conf., 2021.',
+        '[20] Q. Yang, Y. Liu, T. Chen, and Y. Tong, "Federated machine learning: Concept and applications," ACM TIST, vol. 10, no. 2, 2019.',
         '[21] Nilson Report, "Global card fraud losses," Issue 1209, 2022.',
-        '[22] A. Dal Pozzolo et al., "When is undersampling effective?," ECML PKDD, 2015.',
+        '[22] A. Dal Pozzolo, O. Caelen, and G. Bontempi, "When is undersampling effective in unbalanced classification tasks?," in ECML PKDD, 2015.',
     ]
-    
-    pdf.set_font('Times', '', 8)
+    pdf.set_font('Times', '', 7.5)
     for ref in refs:
         pdf.multi_cell(0, 3.5, ref)
-        pdf.ln(0.5)
-    
+        pdf.ln(0.8)
+
     # Save
-    output_path = os.path.join(PAPER_DIR, 'fraud_detection_paper.pdf')
-    pdf.output(output_path)
-    print(f"PDF saved to: {output_path}")
-    print(f"Pages: {pdf.page_no()}")
+    out = os.path.join(PAPER_DIR, 'fraud_detection_paper.pdf')
+    pdf.output(out)
+    print(f"\nPDF saved: {out}  ({pdf.page_no()} pages)")
 
 
-if __name__ == "__main__":
-    create_paper()
+if __name__ == '__main__':
+    build()
diff --git a/paper/figures/amount_analysis.pdf b/paper/figures/amount_analysis.pdf
index 76263cac9195a8ab6f3b236da279571563c474d3..c4257af3d68a6fcbbad062c0667b9e940062a596 100644
--- a/paper/figures/amount_analysis.pdf
+++ b/paper/figures/amount_analysis.pdf
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5ad2dab89d5e0faaecc71ef8ca9e580da0676b896afc6ce08c2638bdc238b8e3
-size 208946
+oid sha256:0b26a10e4d1972c4e9ffe27982f43efaec36bdbf4458ac75b9fe7491a9cbfa09
+size 198161
diff --git a/paper/figures/amount_analysis.png b/paper/figures/amount_analysis.png
index 45cf1505f57d338f5caf04cd9d583340c30dd4fa..50c6be327dd6dad1312a1bfeddedc0c47f374ef5 100644
--- a/paper/figures/amount_analysis.png
+++ b/paper/figures/amount_analysis.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:37a0ac70dea40399691041310838d77f1fc607f505cea6dc1c96702885f1d4d5
-size 265303
+oid sha256:e77c1b40d7658b429d080f455fc6425e5ab37feaea68d348197a8472ea45832c
+size 251175
diff --git a/paper/figures/architecture_diagram.pdf b/paper/figures/architecture_diagram.pdf
index f86aad29ad606fdb82e69243629651dbe4a8ae6b..57c308dc12c78eea7143221e556768d3f7379894 100644
Binary files a/paper/figures/architecture_diagram.pdf and b/paper/figures/architecture_diagram.pdf differ
diff --git a/paper/figures/architecture_diagram.png b/paper/figures/architecture_diagram.png
index 3c2dbda313a58616ae0e30010dbd55e72415c14d..d9666d285d7b560f394c607e88a71cc46771acf3 100644
--- a/paper/figures/architecture_diagram.png
+++ b/paper/figures/architecture_diagram.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aa5597deab61809b4b7b943e4417a00049583dda2b19589cf7cd2e70555c7087
-size 379316
+oid sha256:abfab5d5efe0f72a6fed9d2c7ca4bd8fbfbef934869eceb83288262fdfc84ea6
+size 306455
diff --git a/paper/figures/class_distribution.pdf b/paper/figures/class_distribution.pdf
index 791a8691d2eb4941b81bfbdbf92b8194c1ea983a..59a63dc12ebe0390a6360fc196c850d5c5b08d8f 100644
Binary files a/paper/figures/class_distribution.pdf and b/paper/figures/class_distribution.pdf differ
diff --git a/paper/figures/class_distribution.png b/paper/figures/class_distribution.png
index 1bb5e14cc987bac47c462d841bb2bb429743bf5a..f5cf6c23578dd8037547110c8464b71b1cbb3cb6 100644
--- a/paper/figures/class_distribution.png
+++ b/paper/figures/class_distribution.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2b05b401fe51303408ef8f0dd41030bf01efdd5b30872e8dcc27039731ae6b35
-size 177820
+oid sha256:1975d0d3024d447ce2b05342e7a5ca923c0a1b1e68d841a192a4be51668c9cd0
+size 123095
diff --git a/paper/figures/confusion_matrices.pdf b/paper/figures/confusion_matrices.pdf
index ad8d47cfa9c0d2e0ab3f8a5d18940c5c19b86658..65a3a0cfc7022c8b563572e5748ff9cf3a4d0818 100644
Binary files a/paper/figures/confusion_matrices.pdf and b/paper/figures/confusion_matrices.pdf differ
diff --git a/paper/figures/confusion_matrices.png b/paper/figures/confusion_matrices.png
index 43fcd760d53f379c38c06d25b720fa85be54119a..3a67ab21c9e9232de531c178edd110005a96adaf 100644
--- a/paper/figures/confusion_matrices.png
+++ b/paper/figures/confusion_matrices.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9b71f608d5cd9b007104b719b8d3e6a1b13b03893ffefebf8b9293184eb80d7f
-size 470480
+oid sha256:18f7d77c1a764bef96bdbc4df2650ff05cbdc7e9c41b85adbc10ef8dbbc9bf0a
+size 182144
diff --git a/paper/figures/correlation_heatmap.pdf b/paper/figures/correlation_heatmap.pdf
index 52bd8c005c30897ed83b4f1f77c60bc10883f74c..e41503e73a1453594ede0bcf789978f919ac8e34 100644
Binary files a/paper/figures/correlation_heatmap.pdf and b/paper/figures/correlation_heatmap.pdf differ
diff --git a/paper/figures/correlation_heatmap.png b/paper/figures/correlation_heatmap.png
index 460a9164b2240a96cf72f44e655bd02481c959d5..430006bcdac77fc18b30ce760c70b9703e2cab02 100644
--- a/paper/figures/correlation_heatmap.png
+++ b/paper/figures/correlation_heatmap.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:182dd8c4e14a6b45a7307b21ebb51f92c9f4c047944b7360b49c849f626b7ec3
-size 462551
+oid sha256:ce62cc0b48be6d27587b3c35c399f265d588a717d599d0ec1a4fdd8242da2702
+size 116957
diff --git a/paper/figures/error_analysis.pdf b/paper/figures/error_analysis.pdf
index 0438ca59cc761621dd8e7d2e582f11934208aa41..768c08a7f498912f38255922231677dc69b7aeb3 100644
Binary files a/paper/figures/error_analysis.pdf and b/paper/figures/error_analysis.pdf differ
diff --git a/paper/figures/error_analysis.png b/paper/figures/error_analysis.png
index 6f9b081208eeb76452d9532524e6a65c94a08b59..c70c45cb600d59b0d758e9606767ef07ce7cc82e 100644
--- a/paper/figures/error_analysis.png
+++ b/paper/figures/error_analysis.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6382ccb9b3b51a9d3d88272e00224a6dfff0b5f456f1245e95c87ebfcefc1995
-size 160363
+oid sha256:6b15721b467f8af07d6637e749661aa88bcae693613c50863ece8bfca8748a7c
+size 153837
diff --git a/paper/figures/feature_distributions.pdf b/paper/figures/feature_distributions.pdf
index 6914424ae4bad7ff3d13c76661b03837067abbfe..12b6af58c9221c70c921cf44ff84b56bb44c039a 100644
Binary files a/paper/figures/feature_distributions.pdf and b/paper/figures/feature_distributions.pdf differ
diff --git a/paper/figures/feature_distributions.png b/paper/figures/feature_distributions.png
index 58a8ab959dea5a6e814e3e017c754cbdb486e317..c926955eb22a3929279790417cb6b41a901c4813 100644
--- a/paper/figures/feature_distributions.png
+++ b/paper/figures/feature_distributions.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fcfe06b7cf2e44eee0146ec50b88d6dd4305e5d946b9158dc6033b72933fbfd5
-size 384124
+oid sha256:5ebc837814d3edaaefc518b994fc495c66381b88981b4d1453faad7756bc1b7e
+size 203835
diff --git a/paper/figures/feature_importance.pdf b/paper/figures/feature_importance.pdf
index 74c4e72078d1a09b8a0c3f73ca1226ca80d3c0df..05eefaa590366bd4f6270ba5870dd3b6e75a326e 100644
Binary files a/paper/figures/feature_importance.pdf and b/paper/figures/feature_importance.pdf differ
diff --git a/paper/figures/feature_importance.png b/paper/figures/feature_importance.png
index 85bba7c1dbb191df53857711db17eec52bc1cb9c..58794808d36f7f39a44d52ad4940686110c13fd0 100644
--- a/paper/figures/feature_importance.png
+++ b/paper/figures/feature_importance.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:04afdc343650ab71d86e33515dc9669297f904102cd73c8229da9fd4aabc5073
-size 347471
+oid sha256:f1652dcd0126c888034b6960a5ae3b253f31f818166b4d6d2dc874896c20eb4e
+size 252256
diff --git a/paper/figures/lime_explanation.pdf b/paper/figures/lime_explanation.pdf
index 2bcc3309c527cb2b1ef069107e09aa524302dd50..eac3397ed426b0172ce17962706ca501bf661a7c 100644
Binary files a/paper/figures/lime_explanation.pdf and b/paper/figures/lime_explanation.pdf differ
diff --git a/paper/figures/lime_explanation.png b/paper/figures/lime_explanation.png
index 96ad2e6e38126ca9bb159a0214b9eb38d89f4808..1ccac5afbfd3e397acc79ad579484989c0c20173 100644
--- a/paper/figures/lime_explanation.png
+++ b/paper/figures/lime_explanation.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:50c6ec68d66e7977aaf65ce90a12413cf7fa1fb97f8e7e77b397e52f3cd20006
-size 203416
+oid sha256:099c78c2fdf92ec5cdca4ab6a823b3c83c5a97564c94ecfe451e220e0ab51440
+size 138350
diff --git a/paper/figures/pr_curves.pdf b/paper/figures/pr_curves.pdf
index 030fd50ede46f3af7ba27671395f77770ca8b119..ff166e0b7a92bd27284802f84ee2c6b815c9f8ed 100644
Binary files a/paper/figures/pr_curves.pdf and b/paper/figures/pr_curves.pdf differ
diff --git a/paper/figures/pr_curves.png b/paper/figures/pr_curves.png
index d6abd5e16b3e20eab4d2afe0af992f235f414866..ca1de83f76b800d72ec5b21e399afc03fa05d199 100644
--- a/paper/figures/pr_curves.png
+++ b/paper/figures/pr_curves.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f719e308aeaa34d5db28b329741134927dc1af1ad82843d2018551a4a9bb1c5
-size 425799
+oid sha256:87e1296d2793fd621666190ae4591f621a355302d23c8a847037e2011d9d50bd
+size 160071
diff --git a/paper/figures/roc_curves.pdf b/paper/figures/roc_curves.pdf
index 07163a4c192206b11962a44d7c658aaeefabddd0..afb429a930e519b84dcaf4a8149a6133c0b3a563 100644
Binary files a/paper/figures/roc_curves.pdf and b/paper/figures/roc_curves.pdf differ
diff --git a/paper/figures/roc_curves.png b/paper/figures/roc_curves.png
index b10cb7f0ffa241648f6027b295f4fca311b44bad..69f310caa69377594ac38e596f4e8506ae7bdc1f 100644
--- a/paper/figures/roc_curves.png
+++ b/paper/figures/roc_curves.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c28cefa7dc92a24b9f6e404cad75071586bd7a8d5b87a2b2ca610a28ae4f55e1
-size 350371
+oid sha256:d6bbeaee334bc0e4b1d367dec1d16cf475af45bfdf8d7b4d73190e49138ef4c6
+size 177411
diff --git a/paper/figures/shap_summary.pdf b/paper/figures/shap_summary.pdf
index a869c89300fe3879c61505c8ca2bec298b11e100..033d5e4009171bc9aeabe75d4f366a235f6c2537 100644
--- a/paper/figures/shap_summary.pdf
+++ b/paper/figures/shap_summary.pdf
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:738db2436aea8790532091e2b8e293a661cfbeb693baf43109296535583fe8e4
-size 109289
+oid sha256:e8a7b6bb9c0877d663b473567d1db2b0c593a55685daf7b9e44ee43d8b42a86f
+size 334449
diff --git a/paper/figures/shap_summary.png b/paper/figures/shap_summary.png
index 36fbcc6d12adb5f54ccb9df978be735afbd922aa..40fa11f2f9d8aa28ad7fb172112985450f900bcc 100644
--- a/paper/figures/shap_summary.png
+++ b/paper/figures/shap_summary.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0ed12074d2049ed48348184340751c01392dd1033c3f16d676a5e168e98a3f6c
-size 578169
+oid sha256:36de1b1347498bf08fca58120e269dca656e4a4c1e5f18da7b9bc56fb5283768
+size 497573
diff --git a/paper/figures/shap_top10.pdf b/paper/figures/shap_top10.pdf
index 6909fca2ec93f1218fbad68a6df1f72e4ba85774..95745622658eda11f233bb98d9e6d7add1415616 100644
Binary files a/paper/figures/shap_top10.pdf and b/paper/figures/shap_top10.pdf differ
diff --git a/paper/figures/shap_top10.png b/paper/figures/shap_top10.png
index 63ab913d57acffdb9ab9876be906aaf63c2c9d63..d79233533873bb81f7490cbd27df7dd1c08922d1 100644
--- a/paper/figures/shap_top10.png
+++ b/paper/figures/shap_top10.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1751f83c1f01f8ca599a89129e2d3c6a061923860155cace7ea1073a3dbc753b
-size 108172
+oid sha256:3c7c56263b2d1596eaa02a90998b0243e66e80406d3420cc059153159af9a700
+size 79031
diff --git a/paper/figures/threshold_analysis.pdf b/paper/figures/threshold_analysis.pdf
index 79ebe3f934d59e10275e0450af863060bcedb7b7..4ef4a78a4766b9d83afd56ac1d902bd2b158ed8d 100644
Binary files a/paper/figures/threshold_analysis.pdf and b/paper/figures/threshold_analysis.pdf differ
diff --git a/paper/figures/threshold_analysis.png b/paper/figures/threshold_analysis.png
index 7d2be07b59e5f0dc5506605f1d7846d93e48da18..c3562e924e1dfd22f9935838a2a82bebb54615c6 100644
--- a/paper/figures/threshold_analysis.png
+++ b/paper/figures/threshold_analysis.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d5c43d80551d97d2166bad1a9fb72e4e1cce49d6841460b1915ea380ad57ffa3
-size 227260
+oid sha256:dcfa1ab061d4666c71e0574e61c7741646eb1e89bc47ae6ea14e4979944ca1cf
+size 198727
diff --git a/paper/figures/time_analysis.pdf b/paper/figures/time_analysis.pdf
index 985538584dd532f9931ca9d52da7716277ab7ce9..572d6bdf6868d5d9974644f8061e44c695ca3e40 100644
Binary files a/paper/figures/time_analysis.pdf and b/paper/figures/time_analysis.pdf differ
diff --git a/paper/figures/time_analysis.png b/paper/figures/time_analysis.png
index 13db7ea3f025d27b6b91bbfe7e38d25d0f589161..4e01e30a77e29d8c9f6b1898608a0701b6f1b9c3 100644
--- a/paper/figures/time_analysis.png
+++ b/paper/figures/time_analysis.png
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f959c84093b177e118aa0ab0324973eae072085b76992b98f60af247b01c4834
-size 130431
+oid sha256:573c38eb1262d835c4ee77e11b3abe1f3a72aae7773cab77739f355dc7a12d4b
+size 136453
diff --git a/paper/fraud_detection_paper.pdf b/paper/fraud_detection_paper.pdf
index 44555e3bb204caf08b55116f970ea40f82afb24a..ce35ad8781184310b130a9274d09f0cb9338e68e 100644
--- a/paper/fraud_detection_paper.pdf
+++ b/paper/fraud_detection_paper.pdf
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:793a969d0ea5efde5c37f9a762eb27a2c004b497960228abd2dc920ce827dfc4
-size 3690445
+oid sha256:65b17912a6551ec7b814c5150357df01144fa4582db5b4183bc9377419824d87
+size 2950957
diff --git a/regen_figures.py b/regen_figures.py
new file mode 100644
index 0000000000000000000000000000000000000000..edc743a2293a48e9ff94e1cf6d4a912d2c5a3a8b
--- /dev/null
+++ b/regen_figures.py
@@ -0,0 +1,480 @@
+"""
+Regenerate ALL figures with proper spacing, no overlapping legends/labels.
+Each figure is single-purpose and sized for IEEE column width.
+"""
+import os, sys
+sys.path.insert(0, '/app/fraud_detection')
+import numpy as np
+import pandas as pd
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+import seaborn as sns
+import joblib
+import shap
+import warnings
+warnings.filterwarnings('ignore')
+
+from ae_model import AutoencoderWrapper, Autoencoder
+from sklearn.metrics import (
+    roc_curve, precision_recall_curve, roc_auc_score,
+    average_precision_score, confusion_matrix,
+    precision_score, recall_score, f1_score, matthews_corrcoef
+)
+from config import DATA_DIR, MODELS_DIR, FIGURES_DIR, FIG_DPI, FIG_BG
+
+# Global style
+plt.rcParams.update({
+    'font.size': 10,
+    'axes.titlesize': 11,
+    'axes.labelsize': 10,
+    'xtick.labelsize': 9,
+    'ytick.labelsize': 9,
+    'legend.fontsize': 8,
+    'figure.facecolor': 'white',
+    'axes.facecolor': 'white',
+    'savefig.facecolor': 'white',
+    'savefig.bbox': 'tight',
+    'savefig.dpi': 300,
+    'figure.dpi': 100,
+})
+sns.set_style("whitegrid")
+
+def save(fig, name):
+    fig.savefig(os.path.join(FIGURES_DIR, f"{name}.png"), dpi=FIG_DPI, bbox_inches='tight', facecolor='white', pad_inches=0.15)
+    fig.savefig(os.path.join(FIGURES_DIR, f"{name}.pdf"), bbox_inches='tight', facecolor='white', pad_inches=0.15)
+    plt.close(fig)
+    print(f"  Saved: {name}.png/pdf")
+
+# Load everything
+raw_df = pd.read_csv(os.path.join(DATA_DIR, "creditcard.csv"))
+data = joblib.load(os.path.join(DATA_DIR, "processed_data.joblib"))
+models = joblib.load(os.path.join(MODELS_DIR, "all_models_with_ae.joblib"))
+X_test, y_test = data['X_test'], data['y_test']
+feature_names = data['feature_names']
+
+print("=" * 60)
+print("REGENERATING ALL FIGURES (fixed spacing)")
+print("=" * 60)
+
+# ──────────────────────────────────────────────
+# 1. CLASS DISTRIBUTION
+# ──────────────────────────────────────────────
+print("\n[1] Class distribution")
+cc = raw_df['Class'].value_counts()
+fig, axes = plt.subplots(1, 2, figsize=(10, 4))
+colors = ['#27ae60', '#e74c3c']
+bars = axes[0].bar(['Legitimate (0)', 'Fraud (1)'], cc.values, color=colors, edgecolor='black', lw=0.6, width=0.55)
+axes[0].set_yscale('log')
+axes[0].set_ylabel('Count (log scale)')
+axes[0].set_title('(a) Transaction Counts')
+for b, v in zip(bars, cc.values):
+    axes[0].text(b.get_x()+b.get_width()/2, v*1.15, f'{v:,}', ha='center', va='bottom', fontsize=9, fontweight='bold')
+
+wedges, texts, autotexts = axes[1].pie(
+    cc.values, labels=['Legitimate\n99.827%', 'Fraud\n0.173%'],
+    colors=colors, autopct='', startangle=90, explode=(0, 0.08),
+    textprops={'fontsize': 9}, wedgeprops={'edgecolor': 'white', 'linewidth': 1.5}
+)
+axes[1].set_title('(b) Fraud Ratio')
+fig.suptitle('Class Distribution in Credit Card Fraud Dataset', fontsize=12, fontweight='bold', y=1.02)
+fig.tight_layout()
+save(fig, 'class_distribution')
+
+# ──────────────────────────────────────────────
+# 2. AMOUNT ANALYSIS
+# ──────────────────────────────────────────────
+print("[2] Amount analysis")
+fig, axes = plt.subplots(2, 2, figsize=(10, 8))
+legit_amt = raw_df[raw_df['Class']==0]['Amount']
+fraud_amt = raw_df[raw_df['Class']==1]['Amount']
+
+axes[0,0].hist(legit_amt, bins=80, color='#27ae60', alpha=0.8, edgecolor='none')
+axes[0,0].set_title('(a) Legitimate Amounts')
+axes[0,0].set_xlabel('Amount ($)')
+axes[0,0].set_ylabel('Frequency')
+axes[0,0].set_xlim(0, 1500)
+
+axes[0,1].hist(fraud_amt, bins=40, color='#e74c3c', alpha=0.8, edgecolor='none')
+axes[0,1].set_title('(b) Fraudulent Amounts')
+axes[0,1].set_xlabel('Amount ($)')
+axes[0,1].set_ylabel('Frequency')
+
+axes[1,0].hist(np.log1p(legit_amt), bins=50, color='#27ae60', alpha=0.55, label='Legitimate', edgecolor='none')
+axes[1,0].hist(np.log1p(fraud_amt), bins=50, color='#e74c3c', alpha=0.55, label='Fraud', edgecolor='none')
+axes[1,0].set_title('(c) Log-Scaled Comparison')
+axes[1,0].set_xlabel('log(1 + Amount)')
+axes[1,0].set_ylabel('Frequency')
+axes[1,0].legend(loc='upper right', framealpha=0.9)
+
+bp = axes[1,1].boxplot(
+    [legit_amt.clip(upper=500), fraud_amt.clip(upper=500)],
+    labels=['Legitimate', 'Fraud'], patch_artist=True, widths=0.45,
+    medianprops=dict(color='black', lw=1.5)
+)
+bp['boxes'][0].set_facecolor('#27ae60')
+bp['boxes'][1].set_facecolor('#e74c3c')
+for b in bp['boxes']:
+    b.set_alpha(0.7)
+axes[1,1].set_title('(d) Boxplot (capped at $500)')
+axes[1,1].set_ylabel('Amount ($)')
+
+fig.suptitle('Transaction Amount Analysis by Class', fontsize=12, fontweight='bold', y=1.01)
+fig.tight_layout()
+save(fig, 'amount_analysis')
+
+# ──────────────────────────────────────────────
+# 3. TIME ANALYSIS
+# ──────────────────────────────────────────────
+print("[3] Time analysis")
+raw_df_t = raw_df.copy()
+raw_df_t['Hour'] = (raw_df_t['Time'] / 3600) % 24
+fig, axes = plt.subplots(1, 2, figsize=(10, 4))
+
+axes[0].hist(raw_df_t[raw_df_t['Class']==0]['Hour'], bins=48, color='#27ae60', alpha=0.55, label='Legitimate', density=True)
+axes[0].hist(raw_df_t[raw_df_t['Class']==1]['Hour'], bins=48, color='#e74c3c', alpha=0.55, label='Fraud', density=True)
+axes[0].set_title('(a) Transaction Density by Hour')
+axes[0].set_xlabel('Hour of Day')
+axes[0].set_ylabel('Density')
+axes[0].legend(loc='upper left', framealpha=0.9)
+
+hourly = raw_df_t.groupby(raw_df_t['Hour'].astype(int))['Class'].mean() * 100
+axes[1].bar(hourly.index, hourly.values, color='#e74c3c', alpha=0.75, edgecolor='black', lw=0.3)
+axes[1].set_title('(b) Fraud Rate by Hour')
+axes[1].set_xlabel('Hour of Day')
+axes[1].set_ylabel('Fraud Rate (%)')
+
+fig.suptitle('Temporal Patterns in Transaction Data', fontsize=12, fontweight='bold', y=1.02)
+fig.tight_layout()
+save(fig, 'time_analysis')
+
+# ──────────────────────────────────────────────
+# 4. CORRELATION HEATMAP  (single chart, not dual)
+# ──────────────────────────────────────────────
+print("[4] Correlation heatmap")
+corrs = raw_df.corr()['Class'].drop('Class').sort_values()
+fig, ax = plt.subplots(figsize=(8, 7))
+colors_bar = ['#e74c3c' if v < 0 else '#27ae60' for v in corrs.values]
+ax.barh(corrs.index, corrs.values, color=colors_bar, edgecolor='none', height=0.7)
+ax.set_xlabel('Pearson Correlation with Fraud Class')
+ax.set_title('Feature Correlation with Fraud', fontsize=12, fontweight='bold')
+ax.axvline(x=0, color='black', lw=0.5)
+ax.tick_params(axis='y', labelsize=8)
+fig.tight_layout()
+save(fig, 'correlation_heatmap')
+
+# ──────────────────────────────────────────────
+# 5. FEATURE DISTRIBUTIONS (6 features, 2x3 grid)
+# ──────────────────────────────────────────────
+print("[5] Feature distributions")
+top6 = corrs.abs().sort_values(ascending=False).head(6).index.tolist()
+fig, axes = plt.subplots(2, 3, figsize=(12, 7))
+axes = axes.ravel()
+for i, feat in enumerate(top6):
+    axes[i].hist(raw_df[raw_df['Class']==0][feat], bins=50, color='#27ae60', alpha=0.5, label='Legit', density=True)
+    axes[i].hist(raw_df[raw_df['Class']==1][feat], bins=50, color='#e74c3c', alpha=0.5, label='Fraud', density=True)
+    axes[i].set_title(feat, fontweight='bold')
+    axes[i].legend(loc='upper right', fontsize=7, framealpha=0.9)
+    axes[i].set_ylabel('Density')
+fig.suptitle('Distribution of Top 6 Discriminative Features by Class', fontsize=12, fontweight='bold', y=1.01)
+fig.tight_layout()
+save(fig, 'feature_distributions')
+
+# ──────────────────────────────────────────────
+# 6. ROC CURVES (top 5 models only for clarity)
+# ──────────────────────────────────────────────
+print("[6] ROC curves")
+top_models = ['XGBoost', 'Voting_Ensemble', 'XGBoost_Tuned', 'Random_Forest_Tuned', 'LightGBM_Tuned']
+cmap = plt.cm.Set1
+fig, ax = plt.subplots(figsize=(7, 6))
+for i, name in enumerate(top_models):
+    if name not in models: continue
+    proba = models[name].predict_proba(X_test)[:, 1]
+    fpr, tpr, _ = roc_curve(y_test, proba)
+    auc_val = roc_auc_score(y_test, proba)
+    ax.plot(fpr, tpr, color=cmap(i), lw=2, label=f'{name.replace("_"," ")} ({auc_val:.4f})')
+ax.plot([0,1],[0,1],'k--', lw=0.8, label='Random Baseline')
+ax.set_xlabel('False Positive Rate')
+ax.set_ylabel('True Positive Rate')
+ax.set_title('ROC Curves — Top 5 Models', fontsize=12, fontweight='bold')
+ax.legend(loc='lower right', fontsize=8, framealpha=0.95)
+ax.set_xlim([-0.01, 1.01])
+ax.set_ylim([-0.01, 1.03])
+fig.tight_layout()
+save(fig, 'roc_curves')
+
+# ──────────────────────────────────────────────
+# 7. PR CURVES
+# ──────────────────────────────────────────────
+print("[7] PR curves")
+fig, ax = plt.subplots(figsize=(7, 6))
+for i, name in enumerate(top_models):
+    if name not in models: continue
+    proba = models[name].predict_proba(X_test)[:, 1]
+    prec, rec, _ = precision_recall_curve(y_test, proba)
+    ap = average_precision_score(y_test, proba)
+    ax.plot(rec, prec, color=cmap(i), lw=2, label=f'{name.replace("_"," ")} ({ap:.4f})')
+baseline = y_test.mean()
+ax.axhline(y=baseline, color='k', ls='--', lw=0.8, label=f'Baseline ({baseline:.4f})')
+ax.set_xlabel('Recall')
+ax.set_ylabel('Precision')
+ax.set_title('Precision-Recall Curves — Top 5 Models', fontsize=12, fontweight='bold')
+ax.legend(loc='upper right', fontsize=8, framealpha=0.95)
+ax.set_xlim([-0.01, 1.01])
+ax.set_ylim([-0.01, 1.03])
+fig.tight_layout()
+save(fig, 'pr_curves')
+
+# ──────────────────────────────────────────────
+# 8. CONFUSION MATRICES (2x3 grid for top 6)
+# ──────────────────────────────────────────────
+print("[8] Confusion matrices")
+cm_models = ['XGBoost', 'Voting_Ensemble', 'Random_Forest_Tuned', 'LightGBM_Tuned', 'MLP', 'Logistic_Regression']
+fig, axes = plt.subplots(2, 3, figsize=(13, 8))
+axes = axes.ravel()
+for i, name in enumerate(cm_models):
+    if name not in models: continue
+    proba = models[name].predict_proba(X_test)[:, 1]
+    preds = (proba >= 0.5).astype(int)
+    cm = confusion_matrix(y_test, preds)
+    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[i],
+                xticklabels=['Legit', 'Fraud'], yticklabels=['Legit', 'Fraud'],
+                cbar=False, annot_kws={'size': 10})
+    axes[i].set_title(name.replace('_', ' '), fontsize=10, fontweight='bold')
+    axes[i].set_ylabel('Actual')
+    axes[i].set_xlabel('Predicted')
+fig.suptitle('Confusion Matrices on Test Set (threshold = 0.5)', fontsize=12, fontweight='bold', y=1.01)
+fig.tight_layout()
+save(fig, 'confusion_matrices')
+
+# ──────────────────────────────────────────────
+# 9. THRESHOLD ANALYSIS
+# ──────────────────────────────────────────────
+print("[9] Threshold analysis")
+proba_xgb = models['XGBoost'].predict_proba(X_test)[:, 1]
+thresholds = np.arange(0.05, 0.96, 0.025)
+rows = []
+for t in thresholds:
+    p = (proba_xgb >= t).astype(int)
+    rows.append({'t': t, 'Prec': precision_score(y_test, p, zero_division=0),
+                 'Rec': recall_score(y_test, p, zero_division=0),
+                 'F1': f1_score(y_test, p, zero_division=0),
+                 'MCC': matthews_corrcoef(y_test, p)})
+dt = pd.DataFrame(rows)
+best_t = dt.loc[dt['F1'].idxmax(), 't']
+
+fig, axes = plt.subplots(1, 2, figsize=(10, 4.5))
+axes[0].plot(dt['t'], dt['Prec'], 'b-', lw=2, label='Precision')
+axes[0].plot(dt['t'], dt['Rec'], 'r-', lw=2, label='Recall')
+axes[0].plot(dt['t'], dt['F1'], 'g-', lw=2.5, label='F1 Score')
+axes[0].axvline(x=best_t, color='gray', ls='--', lw=1.2, label=f'Optimal ({best_t:.2f})')
+axes[0].set_xlabel('Decision Threshold')
+axes[0].set_ylabel('Score')
+axes[0].set_title('(a) Precision / Recall / F1', fontweight='bold')
+axes[0].legend(loc='center left', framealpha=0.95, fontsize=8)
+
+axes[1].plot(dt['t'], dt['MCC'], 'm-', lw=2, label='MCC')
+axes[1].axvline(x=best_t, color='gray', ls='--', lw=1.2)
+axes[1].set_xlabel('Decision Threshold')
+axes[1].set_ylabel('MCC')
+axes[1].set_title('(b) Matthews Correlation Coefficient', fontweight='bold')
+axes[1].legend(loc='upper right', framealpha=0.95, fontsize=8)
+
+fig.suptitle(f'Threshold Sensitivity Analysis — XGBoost (Optimal = {best_t:.2f})', fontsize=12, fontweight='bold', y=1.02)
+fig.tight_layout()
+save(fig, 'threshold_analysis')
+
+# ──────────────────────────────────────────────
+# 10. FEATURE IMPORTANCE (2x2)
+# ──────────────────────────────────────────────
+print("[10] Feature importance")
+fig, axes = plt.subplots(2, 2, figsize=(12, 10))
+tree_map = {'(a) Random Forest': 'Random_Forest_Tuned', '(b) XGBoost': 'XGBoost_Tuned', '(c) LightGBM': 'LightGBM_Tuned'}
+for idx, (title, key) in enumerate(tree_map.items()):
+    r, c = idx // 2, idx % 2
+    m = models[key]
+    imp = m.feature_importances_
+    top_idx = np.argsort(imp)[-12:]
+    axes[r,c].barh(range(len(top_idx)), imp[top_idx], color='steelblue', edgecolor='none', height=0.7)
+    axes[r,c].set_yticks(range(len(top_idx)))
+    axes[r,c].set_yticklabels([feature_names[j] for j in top_idx], fontsize=8)
+    axes[r,c].set_xlabel('Importance')
+    axes[r,c].set_title(title, fontweight='bold')
+
+lr = models['Logistic_Regression']
+coefs = np.abs(lr.coef_[0])
+top_idx = np.argsort(coefs)[-12:]
+axes[1,1].barh(range(len(top_idx)), coefs[top_idx], color='coral', edgecolor='none', height=0.7)
+axes[1,1].set_yticks(range(len(top_idx)))
+axes[1,1].set_yticklabels([feature_names[j] for j in top_idx], fontsize=8)
+axes[1,1].set_xlabel('|Coefficient|')
+axes[1,1].set_title('(d) Logistic Regression', fontweight='bold')
+fig.suptitle('Feature Importance Across Models (Top 12)', fontsize=12, fontweight='bold', y=1.0)
+fig.tight_layout()
+save(fig, 'feature_importance')
+
+# ──────────────────────────────────────────────
+# 11. SHAP SUMMARY
+# ──────────────────────────────────────────────
+print("[11] SHAP summary")
+explainer = shap.TreeExplainer(models['XGBoost'])
+X_sample = X_test.iloc[:2000]
+shap_vals = explainer.shap_values(X_sample)
+if isinstance(shap_vals, list):
+    shap_vals = shap_vals[1]
+
+plt.figure(figsize=(9, 7))
+shap.summary_plot(shap_vals, X_sample, feature_names=feature_names, show=False, max_display=15, plot_size=None)
+plt.title('SHAP Feature Impact on Fraud Prediction (XGBoost)', fontsize=11, fontweight='bold', pad=12)
+plt.tight_layout()
+plt.savefig(os.path.join(FIGURES_DIR, "shap_summary.png"), dpi=FIG_DPI, bbox_inches='tight', facecolor='white', pad_inches=0.15)
+plt.savefig(os.path.join(FIGURES_DIR, "shap_summary.pdf"), bbox_inches='tight', facecolor='white', pad_inches=0.15)
+plt.close('all')
+print("  Saved: shap_summary.png/pdf")
+
+# ──────────────────────────────────────────────
+# 12. SHAP TOP 10 BAR
+# ──────────────────────────────────────────────
+print("[12] SHAP top 10")
+mean_shap = np.abs(shap_vals).mean(axis=0)
+fi = pd.DataFrame({'Feature': feature_names, 'SHAP': mean_shap}).sort_values('SHAP', ascending=False)
+top10 = fi.head(10)
+fig, ax = plt.subplots(figsize=(7, 5))
+ax.barh(range(10), top10['SHAP'].values[::-1], color='steelblue', edgecolor='none', height=0.6)
+ax.set_yticks(range(10))
+ax.set_yticklabels(top10['Feature'].values[::-1])
+ax.set_xlabel('Mean |SHAP Value|')
+ax.set_title('Top 10 Features Driving Fraud Predictions', fontsize=11, fontweight='bold')
+fig.tight_layout()
+save(fig, 'shap_top10')
+
+# ──────────────────────────────────────────────
+# 13. LIME EXPLANATION
+# ──────────────────────────────────────────────
+print("[13] LIME explanation")
+from lime.lime_tabular import LimeTabularExplainer
+proba_all = models['XGBoost'].predict_proba(X_test)[:, 1]
+fraud_idx = np.where(y_test == 1)[0]
+sample_idx = None
+for idx in fraud_idx:
+    if proba_all[idx] > 0.5:
+        sample_idx = idx
+        break
+if sample_idx is None:
+    sample_idx = fraud_idx[0]
+
+X_np = X_test.values
+lime_exp = LimeTabularExplainer(X_np, feature_names=feature_names, class_names=['Legit', 'Fraud'], discretize_continuous=True, random_state=42)
+explanation = lime_exp.explain_instance(X_np[sample_idx], models['XGBoost'].predict_proba, num_features=12, top_labels=1)
+exp_list = explanation.as_list(label=1)
+feats = [f for f, w in exp_list]
+weights = [w for f, w in exp_list]
+cols = ['#e74c3c' if w > 0 else '#27ae60' for w in weights]
+
+fig, ax = plt.subplots(figsize=(9, 6))
+ax.barh(range(len(feats)), weights, color=cols, edgecolor='none', height=0.6)
+ax.set_yticks(range(len(feats)))
+ax.set_yticklabels(feats, fontsize=8)
+ax.set_xlabel('Feature Contribution')
+ax.set_title(f'LIME Explanation — Fraud Sample (P = {proba_all[sample_idx]:.4f})', fontsize=11, fontweight='bold')
+ax.axvline(x=0, color='black', lw=0.5)
+from matplotlib.patches import Patch
+ax.legend(handles=[Patch(fc='#e74c3c', label='Increases fraud risk'), Patch(fc='#27ae60', label='Decreases fraud risk')],
+          loc='lower right', fontsize=8, framealpha=0.95)
+fig.tight_layout()
+save(fig, 'lime_explanation')
+
+# ──────────────────────────────────────────────
+# 14. ERROR ANALYSIS
+# ──────────────────────────────────────────────
+print("[14] Error analysis")
+proba_xgb = models['XGBoost'].predict_proba(X_test)[:, 1]
+preds = (proba_xgb >= 0.5).astype(int)
+fn_mask = (preds == 0) & (y_test.values == 1)
+fp_mask = (preds == 1) & (y_test.values == 0)
+fn_proba = proba_xgb[fn_mask]
+fp_proba = proba_xgb[fp_mask]
+
+fig, axes = plt.subplots(1, 3, figsize=(14, 4.5))
+if fn_mask.sum() > 0:
+    axes[0].hist(fn_proba, bins=15, color='#e74c3c', alpha=0.75, edgecolor='black', lw=0.3)
+axes[0].axvline(x=0.5, color='black', ls='--', lw=1, label='Threshold')
+axes[0].set_title('(a) Missed Fraud (FN)', fontweight='bold')
+axes[0].set_xlabel('Predicted P(Fraud)')
+axes[0].set_ylabel('Count')
+axes[0].legend(fontsize=8)
+
+if fp_mask.sum() > 0:
+    axes[1].hist(fp_proba, bins=15, color='#f39c12', alpha=0.75, edgecolor='black', lw=0.3)
+axes[1].axvline(x=0.5, color='black', ls='--', lw=1, label='Threshold')
+axes[1].set_title('(b) False Alarms (FP)', fontweight='bold')
+axes[1].set_xlabel('Predicted P(Fraud)')
+axes[1].set_ylabel('Count')
+axes[1].legend(fontsize=8)
+
+axes[2].hist(proba_xgb[y_test.values==0], bins=50, color='#27ae60', alpha=0.5, label='Legit', density=True)
+axes[2].hist(proba_xgb[y_test.values==1], bins=50, color='#e74c3c', alpha=0.5, label='Fraud', density=True)
+axes[2].axvline(x=0.5, color='black', ls='--', lw=1, label='Threshold')
+axes[2].set_title('(c) Score Distribution by Class', fontweight='bold')
+axes[2].set_xlabel('Predicted P(Fraud)')
+axes[2].set_ylabel('Density')
+axes[2].legend(fontsize=8, loc='upper center')
+
+fig.suptitle('Error Analysis — XGBoost Predictions', fontsize=12, fontweight='bold', y=1.02)
+fig.tight_layout()
+save(fig, 'error_analysis')
+
+# ──────────────────────────────────────────────
+# 15. ARCHITECTURE DIAGRAM (cleaner)
+# ──────────────────────────────────────────────
+print("[15] Architecture diagram")
+from matplotlib.patches import FancyBboxPatch
+fig, ax = plt.subplots(figsize=(14, 9), facecolor='white')
+ax.set_xlim(0, 14)
+ax.set_ylim(0, 10)
+ax.axis('off')
+
+c_in, c_proc, c_mod, c_out, c_stor = '#3498db', '#27ae60', '#c0392b', '#f39c12', '#8e44ad'
+
+def bx(x, y, w, h, txt, col, fs=9):
+    r = FancyBboxPatch((x,y), w, h, boxstyle="round,pad=0.12", fc=col, ec='#2c3e50', lw=1.5, alpha=0.88)
+    ax.add_patch(r)
+    ax.text(x+w/2, y+h/2, txt, ha='center', va='center', fontsize=fs, fontweight='bold', color='white', multialignment='center')
+
+def ar(x1,y1,x2,y2):
+    ax.annotate('', xy=(x2,y2), xytext=(x1,y1), arrowprops=dict(arrowstyle='->', color='#2c3e50', lw=1.8))
+
+ax.text(7, 9.4, 'Fraud Detection System — End-to-End Architecture', ha='center', fontsize=14, fontweight='bold', color='#2c3e50')
+
+# Row 1
+bx(0.3, 7.8, 2.8, 0.9, 'Transaction\nInput', c_in, 10)
+bx(3.8, 7.8, 2.8, 0.9, 'Feature\nEngineering\n(12 new features)', c_proc, 8)
+bx(7.3, 7.8, 2.8, 0.9, 'RobustScaler\n(train-only fit)', c_proc, 8)
+bx(10.8, 7.8, 2.8, 0.9, 'Drift\nMonitoring', c_stor, 9)
+ar(3.1, 8.25, 3.8, 8.25)
+ar(6.6, 8.25, 7.3, 8.25)
+ar(10.1, 8.25, 10.8, 8.25)
+
+# Row 2 — models
+model_names = ['LR', 'RF', 'XGBoost\n(BEST)', 'LightGBM', 'MLP', 'Auto-\nencoder']
+for i, mn in enumerate(model_names):
+    bx(0.3 + i*2.3, 5.5, 2.0, 1.0, mn, c_mod, 8)
+    ar(8.7, 7.8, 0.3+i*2.3+1.0, 6.5)
+
+# Row 3
+bx(2.5, 3.2, 3.5, 1.0, 'Optuna\nHyperparameter\nTuning (TPE)', c_stor, 9)
+bx(7.5, 3.2, 3.5, 1.0, 'Voting Ensemble\n(XGB + LGBM + RF)', c_out, 9)
+ar(6.0, 3.7, 7.5, 3.7)
+
+# Row 4
+bx(2.5, 1.0, 3.5, 1.0, 'FastAPI\nPOST /predict\n< 10 ms', c_in, 9)
+bx(7.5, 1.0, 3.5, 1.0, 'Explainability\nSHAP + LIME', c_proc, 9)
+ar(9.25, 3.2, 9.25, 2.0)
+ar(4.25, 3.2, 4.25, 2.0)
+
+fig.tight_layout()
+save(fig, 'architecture_diagram')
+
+print("\n" + "=" * 60)
+print("ALL 15 FIGURES REGENERATED SUCCESSFULLY")
+print("=" * 60)