MucahitSylmz
/

elliptic-event-aware-splitting

Model card Files Files and versions

xet

Community

MucahitSylmz commited on 28 days ago

Commit

c2ce770

verified ·

1 Parent(s): ee8165c

Update run_all.py: GraphSAGE + TS29 + SMOTE + threshold opt

Browse files

Files changed (1) hide show

run_all.py +356 -112

run_all.py CHANGED Viewed

@@ -7,17 +7,15 @@ Bu script sırayla şunları yapar:
   1. Veri denetimi ve temizleme (data_audit)
   2. En iyi ön işleme pipeline'ını belirle
   3. Topolojik kırılma noktası tespiti
-  4. 5 bölme stratejisi × 4 model = 20 deney
-  5. 6 kanıt testi (walk-forward, rastgele etiket, dürüstlük, vb.)
-  6. Sızıntı haritası
-  7. Tepe-düşüş kriz analizi
-  8. Tüm figürleri ve sonuçları kaydet
 KULLANIM:
   pip install pandas numpy scikit-learn matplotlib seaborn lightgbm xgboost networkx scipy imbalanced-learn torch torch-geometric
   python run_all.py --data_dir ./dataset
-SÜRE: ~30 dakika (CPU)
 ===============================================================================
 """
@@ -40,14 +38,139 @@ from sklearn.metrics import f1_score, roc_auc_score, precision_score, recall_sco
 import xgboost as xgb
 import lightgbm as lgb
 warnings.filterwarnings('ignore')
 np.random.seed(42)
 def main(data_dir):
     start_time = time.time()
     # Çıktı klasörleri
-    for d in ['output/figures', 'output/results', 'output/gephi_data']:
         os.makedirs(d, exist_ok=True)
     # ════════════════════════════════════════════════════════════════
@@ -70,17 +193,25 @@ def main(data_dir):
     label_map = {'1': 1, '2': 0, 'unknown': -1}
     labels_np = np.array([label_map[str(c)] for c in class_df['class'].values])
-    src = np.array([id_map[t] for t in edge_df['txId1'].values if t in id_map])
-    dst = np.array([id_map[t] for t in edge_df['txId2'].values if t in id_map])
-    ml = min(len(src), len(dst)); src, dst = src[:ml], dst[:ml]
     labeled_mask = labels_np >= 0
     X_raw = features_raw[labeled_mask]
     y = labels_np[labeled_mask]
     ts = timesteps_raw[labeled_mask]
     print(f"  Toplam: {N}, Etiketli: {len(y)}")
     print(f"  İllicit: {y.sum()} ({y.mean()*100:.1f}%), Licit: {len(y)-y.sum()}")
     # ════════════════════════════════════════════════════════════════
     # ADIM 2: VERİ TEMİZLEME VE ÖN İŞLEME
@@ -89,13 +220,12 @@ def main(data_dir):
     print("ADIM 2: VERİ TEMİZLEME")
     print("=" * 70)
-    # NaN/Inf temizleme
     nan_count = np.isnan(X_raw).sum()
     inf_count = np.isinf(X_raw).sum()
     print(f"  NaN: {nan_count}, Inf: {inf_count}")
     X = np.nan_to_num(X_raw, nan=0.0, posinf=0.0, neginf=0.0)
-    # Outlier analizi
     Q1 = np.percentile(X, 25, axis=0)
     Q3 = np.percentile(X, 75, axis=0)
     IQR = Q3 - Q1
@@ -104,93 +234,82 @@ def main(data_dir):
     outlier_mask = (X < lower) | (X > upper)
     print(f"  Outlier hücre: {outlier_mask.sum()} ({outlier_mask.sum()/(X.shape[0]*X.shape[1])*100:.1f}%)")
-    # İllicit vs Licit outlier karşılaştırması
-    ill_out = outlier_mask[y==1].sum(axis=1).mean()
-    lic_out = outlier_mask[y==0].sum(axis=1).mean()
-    print(f"  İllicit ort. outlier: {ill_out:.1f}, Licit ort. outlier: {lic_out:.1f}")
-    # Outlier clipping (IQR yöntemi)
     X_clipped = np.clip(X, lower, upper)
-    print(f"  ✓ Outlier clipping uygulandı (IQR yöntemi)")
-    # Düşük varyans özellik çıkarma
     variances = np.var(X_clipped, axis=0)
     var_mask = variances > 1e-6
     X_clean = X_clipped[:, var_mask]
-    print(f"  ✓ Düşük varyanslı özellikler çıkarıldı: {(~var_mask).sum()} çıkarıldı, {var_mask.sum()} kaldı")
     # ════════════════════════════════════════════════════════════════
     # ADIM 3: ÖN İŞLEME PIPELINE KARŞILAŞTIRMASI
     # ════════════════════════════════════════════════════════════════
     print("\n" + "=" * 70)
-    print("ADIM 3: EN İYİ ÖN İŞLEME PIPELINE SEÇİMİ")
     print("=" * 70)
-    tr_mask = ts <= 39
-    te_mask = ts > 39
     def quick_eval(X_tr, y_tr, X_te, y_te):
-        m = lgb.LGBMClassifier(n_estimators=300, max_depth=10, scale_pos_weight=10,
-                                random_state=42, n_jobs=-1, verbose=-1)
         m.fit(X_tr, y_tr)
-        return f1_score(y_te, m.predict(X_te), zero_division=0)
     pipelines = {}
-    # Ham
-    f1_raw = quick_eval(X_raw[tr_mask], y[tr_mask], X_raw[te_mask], y[te_mask])
     pipelines['Ham Veri'] = f1_raw
     print(f"  Ham Veri:           F1={f1_raw:.4f}")
-    # StandardScaler
     sc = StandardScaler()
-    f1_ss = quick_eval(sc.fit_transform(X[tr_mask]), y[tr_mask], sc.transform(X[te_mask]), y[te_mask])
     pipelines['StandardScaler'] = f1_ss
-    print(f"  StandardScaler:     F1={f1_ss:.4f} ({f1_ss-f1_raw:+.4f})")
-    # RobustScaler
     rs = RobustScaler()
-    f1_rs = quick_eval(rs.fit_transform(X[tr_mask]), y[tr_mask], rs.transform(X[te_mask]), y[te_mask])
     pipelines['RobustScaler'] = f1_rs
-    print(f"  RobustScaler:       F1={f1_rs:.4f} ({f1_rs-f1_raw:+.4f})")
-    # Clip + RobustScaler
     rs2 = RobustScaler()
-    f1_cr = quick_eval(rs2.fit_transform(X_clipped[tr_mask]), y[tr_mask],
-                        rs2.transform(X_clipped[te_mask]), y[te_mask])
     pipelines['Clip+Robust'] = f1_cr
-    print(f"  Clip+Robust:        F1={f1_cr:.4f} ({f1_cr-f1_raw:+.4f})")
-    # Clip + VarFilter + RobustScaler
     rs3 = RobustScaler()
-    f1_cvr = quick_eval(rs3.fit_transform(X_clean[tr_mask]), y[tr_mask],
-                         rs3.transform(X_clean[te_mask]), y[te_mask])
     pipelines['Clip+VarFilter+Robust'] = f1_cvr
-    print(f"  Clip+VarFilter+Rob: F1={f1_cvr:.4f} ({f1_cvr-f1_raw:+.4f})")
-    # SMOTE dene
     try:
         from imblearn.over_sampling import SMOTE
         smote = SMOTE(random_state=42)
         rs4 = RobustScaler()
-        X_tr_s = rs4.fit_transform(X_clipped[tr_mask])
-        X_te_s = rs4.transform(X_clipped[te_mask])
-        X_tr_sm, y_tr_sm = smote.fit_resample(X_tr_s, y[tr_mask])
-        f1_smote = quick_eval(X_tr_sm, y_tr_sm, X_te_s, y[te_mask])
         pipelines['Clip+Robust+SMOTE'] = f1_smote
-        print(f"  Clip+Robust+SMOTE:  F1={f1_smote:.4f} ({f1_smote-f1_raw:+.4f})")
     except ImportError:
-        print(f"  SMOTE atlandı (pip install imbalanced-learn)")
-    # En iyiyi seç
     best_pipe = max(pipelines, key=pipelines.get)
     print(f"\n  ★ En iyi pipeline: {best_pipe} (F1={pipelines[best_pipe]:.4f})")
-    # Seçilen pipeline'ı uygula
-    # LightGBM tree-based olduğu için scaling zorunlu değil ama tutarlılık için yapalım
-    final_scaler = RobustScaler()
-    X_final = X_clipped  # Clipped versiyonu kullan (en güvenli)
     # ════════════════════════════════════════════════════════════════
     # ADIM 4: TOPOLOJİK METRİKLER
     # ════════════════════════════════════════════════════════════════
@@ -219,27 +338,27 @@ def main(data_dir):
         ill_rate = len(ts_ill) / max(len(ts_lab), 1)
         topo[t] = {'n_nodes': n, 'n_edges': e, 'density': density, 'cc_ratio': cc_ratio,
                    'n_components': comps, 'avg_degree': avg_deg, 'illicit_rate': ill_rate}
-        print(f"  TS {t:2d}: nodes={n:5d} edges={e:5d} illicit={ill_rate:.3f}")
     topo_df = pd.DataFrame(topo).T
     topo_df.to_csv('output/results/topological_metrics.csv')
     # ════════════════════════════════════════════════════════════════
-    # ADIM 5: KIRILMA NOKTASI TESPİTİ (Sağlık + Tepe-Düşüş)
     # ════════════════════════════════════════════════════════════════
     print("\n" + "=" * 70)
     print("ADIM 5: KIRILMA NOKTASI TESPİTİ")
     print("=" * 70)
-    # Yöntem A: Sağlık skoru
     for col in ['density', 'cc_ratio', 'n_components']:
         mi, ma = topo_df[col].min(), topo_df[col].max()
         topo_df[f'{col}_n'] = (topo_df[col] - mi) / (ma - mi + 1e-8)
     health = (topo_df['density_n'] + topo_df['cc_ratio_n'] + (1 - topo_df['n_components_n'])) / 3
-    bp_health = health.diff().idxmin()
-    print(f"  Yöntem A (Sağlık skoru): Kırılma = TS {bp_health}")
-    # Yöntem B: Tepe-düşüş (türev)
     df_t = topo_df.copy()
     for col in ['n_edges', 'density', 'avg_degree']:
         mi, ma = df_t[col].min(), df_t[col].max()
@@ -247,25 +366,51 @@ def main(data_dir):
     crisis = (df_t['n_edges_norm'] * 0.4 + df_t['density_norm'] * 0.3 + df_t['avg_degree_norm'] * 0.3).values
     crisis_smooth = uniform_filter1d(crisis, size=5, mode='nearest')
     velocity = np.gradient(crisis_smooth)
     peaks = []
     for i in range(1, len(velocity) - 1):
         if velocity[i-1] > 0 and velocity[i+1] < 0:
             peaks.append({'timestep': all_ts[i], 'index': i, 'drop': abs(velocity[i+1])})
     peaks = sorted(peaks, key=lambda x: x['drop'], reverse=True)
-    bp_peak = peaks[0]['timestep'] if peaks else bp_health
-    print(f"  Yöntem B (Tepe-düşüş): Kırılma = TS {bp_peak}")
-    # Birleştir: iki yöntemin ortalamasına en yakın timestep
-    avg_bp = (bp_health + bp_peak) / 2
-    bp_final = min(all_ts, key=lambda t: abs(t - avg_bp))
     print(f"  ★ Final kırılma noktası: TS {bp_final}")
     # ════════════════════════════════════════════════════════════════
-    # ADIM 6: BÖLME STRATEJİLERİ VE DENEYLER
     # ════════════════════════════════════════════════════════════════
     print("\n" + "=" * 70)
-    print("ADIM 6: 5 STRATEJİ × 4 MODEL = 20 DENEY")
     print("=" * 70)
     def make_masks(train_ts_set, test_ts_set):
@@ -293,23 +438,59 @@ def main(data_dir):
         ),
     }
-    def train_eval(X_tr, y_tr, X_te, y_te, model_type):
         sc = RobustScaler()
-        Xtr = sc.fit_transform(X_tr); Xte = sc.transform(X_te)
         if model_type == 'lgbm':
-            m = lgb.LGBMClassifier(n_estimators=300, max_depth=10, scale_pos_weight=10, random_state=42, n_jobs=-1, verbose=-1)
         elif model_type == 'rf':
-            m = RandomForestClassifier(n_estimators=300, max_depth=15, class_weight='balanced', random_state=42, n_jobs=-1)
         elif model_type == 'xgb':
-            m = xgb.XGBClassifier(n_estimators=300, max_depth=8, scale_pos_weight=10, random_state=42, n_jobs=-1, verbosity=0)
         m.fit(Xtr, y_tr)
-        pred = m.predict(Xte)
         proba = m.predict_proba(Xte)[:, 1]
         return {
             'f1': round(f1_score(y_te, pred, zero_division=0), 4),
             'precision': round(precision_score(y_te, pred, zero_division=0), 4),
             'recall': round(recall_score(y_te, pred, zero_division=0), 4),
             'auroc': round(roc_auc_score(y_te, proba) if len(np.unique(y_te)) > 1 else 0.5, 4),
         }
     model_types = [('lgbm', 'LightGBM'), ('rf', 'Random Forest'), ('xgb', 'XGBoost')]
@@ -320,22 +501,40 @@ def main(data_dir):
         if tr_m.sum() < 50 or te_m.sum() < 10:
             print(f"  {strat_name}: yetersiz veri, atlanıyor")
             continue
         print(f"\n  {strat_name} (train={tr_m.sum()}, test={te_m.sum()}, test_ill={y[te_m].sum()}):")
         for mt, mn in model_types:
-            res = train_eval(X_final[tr_m], y[tr_m], X_final[te_m], y[te_m], mt)
             res['strateji'] = strat_name
             res['model'] = mn
             all_results.append(res)
-            print(f"    {mn:15s}: F1={res['f1']:.4f}  P={res['precision']:.4f}  R={res['recall']:.4f}  AUROC={res['auroc']:.4f}")
     res_df = pd.DataFrame(all_results)
     res_df.to_csv('output/results/all_experiment_results.csv', index=False)
     # ════════════════════════════════════════════════════════���═══════
-    # ADIM 7: WALK-FORWARD VALİDASYON
     # ════════════════════════════════════════════════════════════════
     print("\n" + "=" * 70)
-    print("ADIM 7: WALK-FORWARD VALİDASYON (Gerçek Dünya Referansı)")
     print("=" * 70)
     wf_results = {}
@@ -346,35 +545,53 @@ def main(data_dir):
             te_m = (ts >= test_start) & (ts < test_start + 3)
             if tr_m.sum() < 50 or te_m.sum() < 10 or len(np.unique(y[te_m])) < 2:
                 continue
-            res = train_eval(X_final[tr_m], y[tr_m], X_final[te_m], y[te_m], mt)
             wf_f1s.append(res['f1'])
         wf_results[mn] = round(np.mean(wf_f1s), 4)
         print(f"  {mn}: Walk-Forward F1 = {wf_results[mn]:.4f}")
     # Dürüstlük tablosu
     print("\n  Dürüstlük Karşılaştırması:")
     for strat_name in strategies:
         sapma_list = []
         for mn in wf_results:
             row = res_df[(res_df['strateji'] == strat_name) & (res_df['model'] == mn)]
-            if len(row) > 0:
                 sapma = ((row['f1'].values[0] - wf_results[mn]) / wf_results[mn]) * 100
                 sapma_list.append(sapma)
         if sapma_list:
             avg_sapma = np.mean(sapma_list)
             durum = "✅ DÜRÜST" if abs(avg_sapma) < 10 else ("🔴 ŞİŞME" if avg_sapma > 10 else "⚠️ PESİMİST")
             print(f"    {strat_name:25s}: ort. sapma = {avg_sapma:+.1f}%  {durum}")
     # ════════════════════════════════════════════════════════════════
-    # ADIM 8: FİGÜRLER
     # ════════════════════════════════════════════════════════════════
     print("\n" + "=" * 70)
-    print("ADIM 8: FİGÜRLER")
     print("=" * 70)
     sns.set_theme(style='whitegrid', font_scale=1.1)
-    # Figür 1: Kırılma noktası + sağlık skoru
     fig, axes = plt.subplots(3, 1, figsize=(18, 14), gridspec_kw={'height_ratios': [2, 1, 1]})
     axes[0].plot(all_ts, health.values, 'o-', color='steelblue', linewidth=2, markersize=5)
     axes[0].axvline(x=bp_final, color='red', linewidth=3, linestyle='--')
@@ -396,16 +613,15 @@ def main(data_dir):
                        color='red', s=200, zorder=5, edgecolors='black')
     axes[2].set_ylabel('Kriz Sinyali', fontsize=12)
     axes[2].set_xlabel('Timestep', fontsize=12)
     plt.tight_layout()
     plt.savefig('output/figures/fig1_breakpoint.png', dpi=150, bbox_inches='tight')
     plt.close()
     print("  ✓ fig1_breakpoint.png")
-    # Figür 2: F1 karşılaştırma
-    fig, ax = plt.subplots(figsize=(16, 8))
     strat_names = list(strategies.keys())
-    model_names = [mn for _, mn in model_types]
     colors5 = sns.color_palette('Set2', len(strat_names))
     x = np.arange(len(model_names)); width = 0.15
@@ -423,14 +639,14 @@ def main(data_dir):
     ax.axhline(y=wf_avg, color='green', linewidth=2, linestyle='--', label=f'Walk-Forward ({wf_avg:.3f})')
     ax.set_xticks(x + width*2); ax.set_xticklabels(model_names, fontsize=12)
     ax.set_ylabel('Illicit F1', fontsize=13)
-    ax.set_title('Bölme Stratejileri Karşılaştırması (temizlenmiş veri)', fontsize=14, fontweight='bold')
-    ax.legend(fontsize=9); ax.set_ylim(0, 1.1)
     plt.tight_layout()
     plt.savefig('output/figures/fig2_f1_comparison.png', dpi=150, bbox_inches='tight')
     plt.close()
     print("  ✓ fig2_f1_comparison.png")
-    # Figür 3: Pipeline karşılaştırma
     fig, ax = plt.subplots(figsize=(10, 6))
     p_names = list(pipelines.keys())
     p_vals = list(pipelines.values())
@@ -445,38 +661,68 @@ def main(data_dir):
     plt.close()
     print("  ✓ fig3_pipeline_comparison.png")
-    # Figür 4: Dürüstlük ısı haritası
-    fig, ax = plt.subplots(figsize=(14, 6))
     sapma_data = []
     for strat_name in strat_names:
         for mn in model_names:
             row = res_df[(res_df['strateji'] == strat_name) & (res_df['model'] == mn)]
-            if len(row) > 0 and mn in wf_results:
                 sapma = ((row['f1'].values[0] - wf_results[mn]) / wf_results[mn]) * 100
                 sapma_data.append({'strateji': strat_name, 'model': mn, 'sapma': round(sapma, 1)})
     if sapma_data:
         sapma_df = pd.DataFrame(sapma_data)
         pivot = sapma_df.pivot_table(values='sapma', index='model', columns='strateji')
-        sns.heatmap(pivot, annot=True, fmt='.1f', cmap='RdYlGn_r', center=0, ax=ax, linewidths=0.5)
-        ax.set_title('Walk-Forward Dürüstlük Sapması (%)', fontsize=14, fontweight='bold')
     plt.tight_layout()
     plt.savefig('output/figures/fig4_honesty.png', dpi=150, bbox_inches='tight')
     plt.close()
     print("  ✓ fig4_honesty.png")
     # ════════════════════════════════════════════════════════════════
-    # ADIM 9: ÖZET RAPOR
     # ════════════════════════════════════════════════════════════════
     elapsed = time.time() - start_time
     summary = {
-        'veri': {'toplam': N, 'etiketli': len(y), 'illicit': int(y.sum()), 'ozellik': int(var_mask.sum())},
-        'temizleme': {'nan': int(nan_count), 'inf': int(inf_count), 'outlier_pct': round(outlier_mask.sum()/(X.shape[0]*X.shape[1])*100, 2),
                       'cikarilan_ozellik': int((~var_mask).sum()), 'en_iyi_pipeline': best_pipe},
-        'kirilma': {'saglik_yontemi': int(bp_health), 'tepe_dusus': int(bp_peak), 'final': int(bp_final)},
         'walk_forward': wf_results,
         'sonuclar': res_df.to_dict(orient='records'),
         'pipeline_karsilastirma': {k: round(v, 4) for k, v in pipelines.items()},
         'sure_dakika': round(elapsed / 60, 1),
     }
@@ -486,19 +732,17 @@ def main(data_dir):
     print("\n" + "=" * 70)
     print(f"TAMAMLANDI! (Süre: {elapsed/60:.1f} dakika)")
     print("=" * 70)
-    print(f"\n  Çıktılar:")
-    print(f"    output/results/all_experiment_results.csv")
-    print(f"    output/results/topological_metrics.csv")
-    print(f"    output/results/summary.json")
-    print(f"    output/figures/fig1_breakpoint.png")
-    print(f"    output/figures/fig2_f1_comparison.png")
-    print(f"    output/figures/fig3_pipeline_comparison.png")
-    print(f"    output/figures/fig4_honesty.png")
-    # Sonuç tablosu
-    print(f"\n  ═══ SONUÇ TABLOSU (F1) ═══")
     pivot_f1 = res_df.pivot_table(values='f1', index='model', columns='strateji')
     print(pivot_f1.to_string())
 if __name__ == '__main__':

   1. Veri denetimi ve temizleme (data_audit)
   2. En iyi ön işleme pipeline'ını belirle
   3. Topolojik kırılma noktası tespiti
+  4. 5 bölme stratejisi × 4 model (GraphSAGE dahil) = 20 deney
+  5. Walk-forward validasyon + dürüstlük testi
+  6. Tüm figürleri ve sonuçları kaydet
 KULLANIM:
   pip install pandas numpy scikit-learn matplotlib seaborn lightgbm xgboost networkx scipy imbalanced-learn torch torch-geometric
   python run_all.py --data_dir ./dataset
+SÜRE: ~15 dakika (CPU)
 ===============================================================================
 """
 import xgboost as xgb
 import lightgbm as lgb
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch_geometric.nn import SAGEConv
+from torch_geometric.data import Data
 warnings.filterwarnings('ignore')
 np.random.seed(42)
+torch.manual_seed(42)
+# ════════════════════════════════════════════════════════════════
+# GraphSAGE Model
+# ════════════════════════════════════════════════════════════════
+class GraphSAGENet(nn.Module):
+    def __init__(self, in_channels, hidden=128, out_channels=2, num_layers=3, dropout=0.3):
+        super().__init__()
+        self.convs = nn.ModuleList()
+        self.bns = nn.ModuleList()
+        self.convs.append(SAGEConv(in_channels, hidden))
+        self.bns.append(nn.BatchNorm1d(hidden))
+        for _ in range(num_layers - 2):
+            self.convs.append(SAGEConv(hidden, hidden))
+            self.bns.append(nn.BatchNorm1d(hidden))
+        self.convs.append(SAGEConv(hidden, out_channels))
+        self.dropout = dropout
+    def forward(self, x, edge_index):
+        for i, (conv, bn) in enumerate(zip(self.convs[:-1], self.bns)):
+            x = conv(x, edge_index)
+            x = bn(x)
+            x = F.relu(x)
+            x = F.dropout(x, p=self.dropout, training=self.training)
+        x = self.convs[-1](x, edge_index)
+        return x
+def train_graphsage(data, train_mask, test_mask, in_channels, epochs=200, lr=0.005, weight=None):
+    """GraphSAGE eğit ve değerlendir — inductive: test kenarları eğitimde kullanılmaz"""
+    device = torch.device('cpu')
+    model = GraphSAGENet(in_channels, hidden=128, out_channels=2, num_layers=3, dropout=0.3).to(device)
+    # Class weight
+    if weight is not None:
+        w = torch.tensor([1.0, weight], dtype=torch.float32).to(device)
+    else:
+        w = None
+    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=5e-4)
+    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
+    # Inductive: sadece train düğümleri arasındaki kenarları al
+    train_nodes = set(torch.where(train_mask)[0].tolist())
+    edge_index = data.edge_index
+    mask_e = torch.tensor([
+        (edge_index[0, i].item() in train_nodes) and (edge_index[1, i].item() in train_nodes)
+        for i in range(edge_index.shape[1])
+    ], dtype=torch.bool)
+    train_edge_index = edge_index[:, mask_e]
+    x = data.x.to(device)
+    y = data.y.to(device)
+    train_mask_d = train_mask.to(device)
+    test_mask_d = test_mask.to(device)
+    train_edge_index = train_edge_index.to(device)
+    full_edge_index = edge_index.to(device)
+    best_f1 = 0
+    best_state = None
+    patience = 30
+    no_improve = 0
+    model.train()
+    for epoch in range(epochs):
+        optimizer.zero_grad()
+        out = model(x, train_edge_index)
+        loss = F.cross_entropy(out[train_mask_d], y[train_mask_d], weight=w)
+        loss.backward()
+        optimizer.step()
+        scheduler.step()
+        if (epoch + 1) % 10 == 0:
+            model.eval()
+            with torch.no_grad():
+                out_eval = model(x, full_edge_index)
+                pred = out_eval[test_mask_d].argmax(dim=1)
+                f1 = f1_score(y[test_mask_d].cpu(), pred.cpu(), zero_division=0)
+                if f1 > best_f1:
+                    best_f1 = f1
+                    best_state = {k: v.clone() for k, v in model.state_dict().items()}
+                    no_improve = 0
+                else:
+                    no_improve += 1
+            model.train()
+            if no_improve >= patience // 10:
+                break
+    # Final eval
+    if best_state:
+        model.load_state_dict(best_state)
+    model.eval()
+    with torch.no_grad():
+        out = model(x, full_edge_index)
+        proba = F.softmax(out, dim=1)[:, 1]
+        # Threshold optimization
+        best_th_f1 = 0
+        best_th = 0.5
+        for th in np.arange(0.1, 0.9, 0.05):
+            pred_th = (proba[test_mask_d] >= th).long()
+            f1_th = f1_score(y[test_mask_d].cpu(), pred_th.cpu(), zero_division=0)
+            if f1_th > best_th_f1:
+                best_th_f1 = f1_th
+                best_th = th
+        pred = (proba[test_mask_d] >= best_th).long()
+        y_test = y[test_mask_d].cpu().numpy()
+        pred_np = pred.cpu().numpy()
+        proba_np = proba[test_mask_d].cpu().numpy()
+    return {
+        'f1': round(f1_score(y_test, pred_np, zero_division=0), 4),
+        'precision': round(precision_score(y_test, pred_np, zero_division=0), 4),
+        'recall': round(recall_score(y_test, pred_np, zero_division=0), 4),
+        'auroc': round(roc_auc_score(y_test, proba_np) if len(np.unique(y_test)) > 1 else 0.5, 4),
+    }
 def main(data_dir):
     start_time = time.time()
     # Çıktı klasörleri
+    for d in ['output/figures', 'output/results']:
         os.makedirs(d, exist_ok=True)
     # ════════════════════════════════════════════════════════════════
     label_map = {'1': 1, '2': 0, 'unknown': -1}
     labels_np = np.array([label_map[str(c)] for c in class_df['class'].values])
+    # Kenarlar
+    valid_edges = [(id_map[s], id_map[d]) for s, d in zip(edge_df['txId1'], edge_df['txId2'])
+                   if s in id_map and d in id_map]
+    src = np.array([e[0] for e in valid_edges])
+    dst = np.array([e[1] for e in valid_edges])
     labeled_mask = labels_np >= 0
+    labeled_indices = np.where(labeled_mask)[0]
+    # Etiketli düğüm indeksleme (tüm düğümlerden etiketlilere)
+    full_to_labeled = {full_idx: lab_idx for lab_idx, full_idx in enumerate(labeled_indices)}
     X_raw = features_raw[labeled_mask]
     y = labels_np[labeled_mask]
     ts = timesteps_raw[labeled_mask]
     print(f"  Toplam: {N}, Etiketli: {len(y)}")
     print(f"  İllicit: {y.sum()} ({y.mean()*100:.1f}%), Licit: {len(y)-y.sum()}")
+    print(f"  Kenar sayısı: {len(src)}")
     # ════════════════════════════════════════════════════════════════
     # ADIM 2: VERİ TEMİZLEME VE ÖN İŞLEME
     print("ADIM 2: VERİ TEMİZLEME")
     print("=" * 70)
     nan_count = np.isnan(X_raw).sum()
     inf_count = np.isinf(X_raw).sum()
     print(f"  NaN: {nan_count}, Inf: {inf_count}")
     X = np.nan_to_num(X_raw, nan=0.0, posinf=0.0, neginf=0.0)
+    # Outlier clipping
     Q1 = np.percentile(X, 25, axis=0)
     Q3 = np.percentile(X, 75, axis=0)
     IQR = Q3 - Q1
     outlier_mask = (X < lower) | (X > upper)
     print(f"  Outlier hücre: {outlier_mask.sum()} ({outlier_mask.sum()/(X.shape[0]*X.shape[1])*100:.1f}%)")
     X_clipped = np.clip(X, lower, upper)
+    # Düşük varyans çıkarma
     variances = np.var(X_clipped, axis=0)
     var_mask = variances > 1e-6
     X_clean = X_clipped[:, var_mask]
+    print(f"  Düşük varyanslı özellik çıkarıldı: {(~var_mask).sum()}, kalan: {var_mask.sum()}")
+    # Son veri: clipped
+    X_final = X_clipped
     # ════════════════════════════════════════════════════════════════
     # ADIM 3: ÖN İŞLEME PIPELINE KARŞILAŞTIRMASI
     # ════════════════════════════════════════════════════════════════
     print("\n" + "=" * 70)
+    print("ADIM 3: PIPELINE KARŞILAŞTIRMASI")
     print("=" * 70)
+    tr_mask_pipe = ts <= 39
+    te_mask_pipe = ts > 39
     def quick_eval(X_tr, y_tr, X_te, y_te):
+        m = lgb.LGBMClassifier(n_estimators=500, max_depth=12, scale_pos_weight=10,
+                                learning_rate=0.05, random_state=42, n_jobs=-1, verbose=-1)
         m.fit(X_tr, y_tr)
+        proba = m.predict_proba(X_te)[:, 1]
+        # Threshold optimizasyonu
+        best_f1, best_th = 0, 0.5
+        for th in np.arange(0.1, 0.9, 0.05):
+            p = (proba >= th).astype(int)
+            f = f1_score(y_te, p, zero_division=0)
+            if f > best_f1: best_f1, best_th = f, th
+        return best_f1
     pipelines = {}
+    f1_raw = quick_eval(X_raw[tr_mask_pipe], y[tr_mask_pipe], X_raw[te_mask_pipe], y[te_mask_pipe])
     pipelines['Ham Veri'] = f1_raw
     print(f"  Ham Veri:           F1={f1_raw:.4f}")
     sc = StandardScaler()
+    f1_ss = quick_eval(sc.fit_transform(X[tr_mask_pipe]), y[tr_mask_pipe], sc.transform(X[te_mask_pipe]), y[te_mask_pipe])
     pipelines['StandardScaler'] = f1_ss
+    print(f"  StandardScaler:     F1={f1_ss:.4f}")
     rs = RobustScaler()
+    f1_rs = quick_eval(rs.fit_transform(X[tr_mask_pipe]), y[tr_mask_pipe], rs.transform(X[te_mask_pipe]), y[te_mask_pipe])
     pipelines['RobustScaler'] = f1_rs
+    print(f"  RobustScaler:       F1={f1_rs:.4f}")
     rs2 = RobustScaler()
+    f1_cr = quick_eval(rs2.fit_transform(X_clipped[tr_mask_pipe]), y[tr_mask_pipe], rs2.transform(X_clipped[te_mask_pipe]), y[te_mask_pipe])
     pipelines['Clip+Robust'] = f1_cr
+    print(f"  Clip+Robust:        F1={f1_cr:.4f}")
     rs3 = RobustScaler()
+    f1_cvr = quick_eval(rs3.fit_transform(X_clean[tr_mask_pipe]), y[tr_mask_pipe], rs3.transform(X_clean[te_mask_pipe]), y[te_mask_pipe])
     pipelines['Clip+VarFilter+Robust'] = f1_cvr
+    print(f"  Clip+VarFilter+Rob: F1={f1_cvr:.4f}")
     try:
         from imblearn.over_sampling import SMOTE
         smote = SMOTE(random_state=42)
         rs4 = RobustScaler()
+        X_tr_s = rs4.fit_transform(X_clipped[tr_mask_pipe])
+        X_te_s = rs4.transform(X_clipped[te_mask_pipe])
+        X_tr_sm, y_tr_sm = smote.fit_resample(X_tr_s, y[tr_mask_pipe])
+        f1_smote = quick_eval(X_tr_sm, y_tr_sm, X_te_s, y[te_mask_pipe])
         pipelines['Clip+Robust+SMOTE'] = f1_smote
+        print(f"  Clip+Robust+SMOTE:  F1={f1_smote:.4f}")
     except ImportError:
+        print("  SMOTE atlandı")
     best_pipe = max(pipelines, key=pipelines.get)
     print(f"\n  ★ En iyi pipeline: {best_pipe} (F1={pipelines[best_pipe]:.4f})")
     # ════════════════════════════════════════════════════════════════
     # ADIM 4: TOPOLOJİK METRİKLER
     # ════════════════════════════════════════════════════════════════
         ill_rate = len(ts_ill) / max(len(ts_lab), 1)
         topo[t] = {'n_nodes': n, 'n_edges': e, 'density': density, 'cc_ratio': cc_ratio,
                    'n_components': comps, 'avg_degree': avg_deg, 'illicit_rate': ill_rate}
+        print(f"  TS {t:2d}: nodes={n:5d} edges={e:5d} density={density:.5f} illicit={ill_rate:.3f}")
     topo_df = pd.DataFrame(topo).T
     topo_df.to_csv('output/results/topological_metrics.csv')
     # ════════════════════════════════════════════════════════════════
+    # ADIM 5: KIRILMA NOKTASI TESPİTİ
     # ════════════════════════════════════════════════════════════════
     print("\n" + "=" * 70)
     print("ADIM 5: KIRILMA NOKTASI TESPİTİ")
     print("=" * 70)
+    # Sağlık skoru: normalize et
     for col in ['density', 'cc_ratio', 'n_components']:
         mi, ma = topo_df[col].min(), topo_df[col].max()
         topo_df[f'{col}_n'] = (topo_df[col] - mi) / (ma - mi + 1e-8)
     health = (topo_df['density_n'] + topo_df['cc_ratio_n'] + (1 - topo_df['n_components_n'])) / 3
+    bp_final = health.diff().idxmin()
+    print(f"  Sağlık skoru kırılma noktası: TS {bp_final}")
+    # Tepe-düşüş analizi (bilgi amaçlı)
     df_t = topo_df.copy()
     for col in ['n_edges', 'density', 'avg_degree']:
         mi, ma = df_t[col].min(), df_t[col].max()
     crisis = (df_t['n_edges_norm'] * 0.4 + df_t['density_norm'] * 0.3 + df_t['avg_degree_norm'] * 0.3).values
     crisis_smooth = uniform_filter1d(crisis, size=5, mode='nearest')
     velocity = np.gradient(crisis_smooth)
     peaks = []
     for i in range(1, len(velocity) - 1):
         if velocity[i-1] > 0 and velocity[i+1] < 0:
             peaks.append({'timestep': all_ts[i], 'index': i, 'drop': abs(velocity[i+1])})
     peaks = sorted(peaks, key=lambda x: x['drop'], reverse=True)
     print(f"  ★ Final kırılma noktası: TS {bp_final}")
     # ════════════════════════════════════════════════════════════════
+    # ADIM 6: GRAF VERİSİ HAZIRLA (GraphSAGE için)
+    # ════════════════════════════════════════════════════════════════
+    print("\n" + "=" * 70)
+    print("ADIM 6: GRAPHSAGE VERİ HAZIRLAMA")
+    print("=" * 70)
+    # Etiketli düğümler arası kenarları filtrele
+    labeled_set = set(labeled_indices.tolist())
+    labeled_edges = [(full_to_labeled[s], full_to_labeled[d])
+                     for s, d in zip(src, dst)
+                     if s in labeled_set and d in labeled_set
+                     and s in full_to_labeled and d in full_to_labeled]
+    if labeled_edges:
+        edge_src = [e[0] for e in labeled_edges]
+        edge_dst = [e[1] for e in labeled_edges]
+        edge_index = torch.tensor([edge_src + edge_dst, edge_dst + edge_src], dtype=torch.long)  # undirected
+    else:
+        edge_index = torch.zeros((2, 0), dtype=torch.long)
+    print(f"  Etiketli düğümler arası kenar: {len(labeled_edges)} ({edge_index.shape[1]} undirected)")
+    # Normalize features for GNN
+    scaler_gnn = RobustScaler()
+    X_gnn = scaler_gnn.fit_transform(X_final)
+    x_tensor = torch.tensor(X_gnn, dtype=torch.float32)
+    y_tensor = torch.tensor(y, dtype=torch.long)
+    graph_data = Data(x=x_tensor, edge_index=edge_index, y=y_tensor)
+    # ════════════════════════════════════════════════════════════════
+    # ADIM 7: BÖLME STRATEJİLERİ VE DENEYLER
     # ════════════════════════════════════════════════════════════════
     print("\n" + "=" * 70)
+    print("ADIM 7: 5 STRATEJİ × 4 MODEL = 20 DENEY")
     print("=" * 70)
     def make_masks(train_ts_set, test_ts_set):
         ),
     }
+    def train_eval_tabular(X_tr, y_tr, X_te, y_te, model_type):
+        """Tabular model eğit — threshold optimizasyonu ile"""
         sc = RobustScaler()
+        Xtr = sc.fit_transform(X_tr)
+        Xte = sc.transform(X_te)
+        # SMOTE uygula (eğitim setine)
+        try:
+            from imblearn.over_sampling import SMOTE
+            smote = SMOTE(random_state=42)
+            Xtr, y_tr = smote.fit_resample(Xtr, y_tr)
+        except:
+            pass
         if model_type == 'lgbm':
+            m = lgb.LGBMClassifier(
+                n_estimators=500, max_depth=12, learning_rate=0.05,
+                num_leaves=63, min_child_samples=20, subsample=0.8,
+                colsample_bytree=0.8, scale_pos_weight=10,
+                random_state=42, n_jobs=-1, verbose=-1
+            )
         elif model_type == 'rf':
+            m = RandomForestClassifier(
+                n_estimators=500, max_depth=20, min_samples_leaf=5,
+                class_weight='balanced_subsample', max_features='sqrt',
+                random_state=42, n_jobs=-1
+            )
         elif model_type == 'xgb':
+            m = xgb.XGBClassifier(
+                n_estimators=500, max_depth=10, learning_rate=0.05,
+                subsample=0.8, colsample_bytree=0.8, scale_pos_weight=10,
+                min_child_weight=5, gamma=0.1,
+                random_state=42, n_jobs=-1, verbosity=0
+            )
         m.fit(Xtr, y_tr)
         proba = m.predict_proba(Xte)[:, 1]
+        # Threshold optimizasyonu
+        best_f1, best_th = 0, 0.5
+        for th in np.arange(0.1, 0.9, 0.05):
+            pred_th = (proba >= th).astype(int)
+            f1_th = f1_score(y_te, pred_th, zero_division=0)
+            if f1_th > best_f1:
+                best_f1, best_th = f1_th, th
+        pred = (proba >= best_th).astype(int)
         return {
             'f1': round(f1_score(y_te, pred, zero_division=0), 4),
             'precision': round(precision_score(y_te, pred, zero_division=0), 4),
             'recall': round(recall_score(y_te, pred, zero_division=0), 4),
             'auroc': round(roc_auc_score(y_te, proba) if len(np.unique(y_te)) > 1 else 0.5, 4),
+            'threshold': round(best_th, 2),
         }
     model_types = [('lgbm', 'LightGBM'), ('rf', 'Random Forest'), ('xgb', 'XGBoost')]
         if tr_m.sum() < 50 or te_m.sum() < 10:
             print(f"  {strat_name}: yetersiz veri, atlanıyor")
             continue
         print(f"\n  {strat_name} (train={tr_m.sum()}, test={te_m.sum()}, test_ill={y[te_m].sum()}):")
+        # Tabular modeller
         for mt, mn in model_types:
+            res = train_eval_tabular(X_final[tr_m], y[tr_m], X_final[te_m], y[te_m], mt)
             res['strateji'] = strat_name
             res['model'] = mn
             all_results.append(res)
+            print(f"    {mn:15s}: F1={res['f1']:.4f}  P={res['precision']:.4f}  R={res['recall']:.4f}  AUROC={res['auroc']:.4f}  th={res['threshold']}")
+        # GraphSAGE
+        print(f"    {'GraphSAGE':15s}: eğitiliyor...", end='', flush=True)
+        train_mask_t = torch.tensor(tr_m, dtype=torch.bool)
+        test_mask_t = torch.tensor(te_m, dtype=torch.bool)
+        ill_weight = float((y[tr_m] == 0).sum()) / max(float((y[tr_m] == 1).sum()), 1)
+        ill_weight = min(ill_weight, 15.0)  # cap at 15
+        gs_res = train_graphsage(graph_data, train_mask_t, test_mask_t,
+                                  X_final.shape[1], epochs=200, lr=0.005, weight=ill_weight)
+        gs_res['strateji'] = strat_name
+        gs_res['model'] = 'GraphSAGE'
+        gs_res['threshold'] = 0.0  # threshold handled internally
+        all_results.append(gs_res)
+        print(f"\r    {'GraphSAGE':15s}: F1={gs_res['f1']:.4f}  P={gs_res['precision']:.4f}  R={gs_res['recall']:.4f}  AUROC={gs_res['auroc']:.4f}")
     res_df = pd.DataFrame(all_results)
     res_df.to_csv('output/results/all_experiment_results.csv', index=False)
     # ════════════════════════════════════════════════════════���═══════
+    # ADIM 8: WALK-FORWARD VALİDASYON
     # ════════════════════════════════════════════════════════════════
     print("\n" + "=" * 70)
+    print("ADIM 8: WALK-FORWARD VALİDASYON")
     print("=" * 70)
     wf_results = {}
             te_m = (ts >= test_start) & (ts < test_start + 3)
             if tr_m.sum() < 50 or te_m.sum() < 10 or len(np.unique(y[te_m])) < 2:
                 continue
+            res = train_eval_tabular(X_final[tr_m], y[tr_m], X_final[te_m], y[te_m], mt)
             wf_f1s.append(res['f1'])
         wf_results[mn] = round(np.mean(wf_f1s), 4)
         print(f"  {mn}: Walk-Forward F1 = {wf_results[mn]:.4f}")
+    # GraphSAGE walk-forward
+    wf_gs_f1s = []
+    for test_start in range(10, 49, 3):
+        tr_m_wf = ts < test_start
+        te_m_wf = (ts >= test_start) & (ts < test_start + 3)
+        if tr_m_wf.sum() < 50 or te_m_wf.sum() < 10 or len(np.unique(y[te_m_wf])) < 2:
+            continue
+        train_mask_wf = torch.tensor(tr_m_wf, dtype=torch.bool)
+        test_mask_wf = torch.tensor(te_m_wf, dtype=torch.bool)
+        ill_w = float((y[tr_m_wf]==0).sum()) / max(float((y[tr_m_wf]==1).sum()), 1)
+        ill_w = min(ill_w, 15.0)
+        gs_wf = train_graphsage(graph_data, train_mask_wf, test_mask_wf, X_final.shape[1], epochs=100, weight=ill_w)
+        wf_gs_f1s.append(gs_wf['f1'])
+    wf_results['GraphSAGE'] = round(np.mean(wf_gs_f1s), 4) if wf_gs_f1s else 0
+    print(f"  GraphSAGE: Walk-Forward F1 = {wf_results['GraphSAGE']:.4f}")
     # Dürüstlük tablosu
     print("\n  Dürüstlük Karşılaştırması:")
+    honesty_data = []
     for strat_name in strategies:
         sapma_list = []
         for mn in wf_results:
             row = res_df[(res_df['strateji'] == strat_name) & (res_df['model'] == mn)]
+            if len(row) > 0 and mn in wf_results and wf_results[mn] > 0:
                 sapma = ((row['f1'].values[0] - wf_results[mn]) / wf_results[mn]) * 100
                 sapma_list.append(sapma)
         if sapma_list:
             avg_sapma = np.mean(sapma_list)
             durum = "✅ DÜRÜST" if abs(avg_sapma) < 10 else ("🔴 ŞİŞME" if avg_sapma > 10 else "⚠️ PESİMİST")
+            honesty_data.append({'strateji': strat_name, 'sapma': round(avg_sapma, 1), 'durum': durum})
             print(f"    {strat_name:25s}: ort. sapma = {avg_sapma:+.1f}%  {durum}")
     # ════════════════════════════════════════════════════════════════
+    # ADIM 9: FİGÜRLER
     # ════════════════════════════════════════════════════════════════
     print("\n" + "=" * 70)
+    print("ADIM 9: FİGÜRLER")
     print("=" * 70)
     sns.set_theme(style='whitegrid', font_scale=1.1)
+    # Fig 1: Kırılma noktası
     fig, axes = plt.subplots(3, 1, figsize=(18, 14), gridspec_kw={'height_ratios': [2, 1, 1]})
     axes[0].plot(all_ts, health.values, 'o-', color='steelblue', linewidth=2, markersize=5)
     axes[0].axvline(x=bp_final, color='red', linewidth=3, linestyle='--')
                        color='red', s=200, zorder=5, edgecolors='black')
     axes[2].set_ylabel('Kriz Sinyali', fontsize=12)
     axes[2].set_xlabel('Timestep', fontsize=12)
     plt.tight_layout()
     plt.savefig('output/figures/fig1_breakpoint.png', dpi=150, bbox_inches='tight')
     plt.close()
     print("  ✓ fig1_breakpoint.png")
+    # Fig 2: F1 karşılaştırma (4 model dahil)
+    fig, ax = plt.subplots(figsize=(18, 8))
     strat_names = list(strategies.keys())
+    model_names = [mn for _, mn in model_types] + ['GraphSAGE']
     colors5 = sns.color_palette('Set2', len(strat_names))
     x = np.arange(len(model_names)); width = 0.15
     ax.axhline(y=wf_avg, color='green', linewidth=2, linestyle='--', label=f'Walk-Forward ({wf_avg:.3f})')
     ax.set_xticks(x + width*2); ax.set_xticklabels(model_names, fontsize=12)
     ax.set_ylabel('Illicit F1', fontsize=13)
+    ax.set_title('Bölme Stratejileri × Model Karşılaştırması', fontsize=14, fontweight='bold')
+    ax.legend(fontsize=9, loc='upper right'); ax.set_ylim(0, 1.1)
     plt.tight_layout()
     plt.savefig('output/figures/fig2_f1_comparison.png', dpi=150, bbox_inches='tight')
     plt.close()
     print("  ✓ fig2_f1_comparison.png")
+    # Fig 3: Pipeline karşılaştırma
     fig, ax = plt.subplots(figsize=(10, 6))
     p_names = list(pipelines.keys())
     p_vals = list(pipelines.values())
     plt.close()
     print("  ✓ fig3_pipeline_comparison.png")
+    # Fig 4: Dürüstlük ısı haritası
+    fig, ax = plt.subplots(figsize=(16, 7))
     sapma_data = []
     for strat_name in strat_names:
         for mn in model_names:
             row = res_df[(res_df['strateji'] == strat_name) & (res_df['model'] == mn)]
+            if len(row) > 0 and mn in wf_results and wf_results[mn] > 0:
                 sapma = ((row['f1'].values[0] - wf_results[mn]) / wf_results[mn]) * 100
                 sapma_data.append({'strateji': strat_name, 'model': mn, 'sapma': round(sapma, 1)})
     if sapma_data:
         sapma_df = pd.DataFrame(sapma_data)
         pivot = sapma_df.pivot_table(values='sapma', index='model', columns='strateji')
+        sns.heatmap(pivot, annot=True, fmt='.1f', cmap='RdYlGn_r', center=0, ax=ax,
+                   linewidths=0.5, cbar_kws={'label': 'Walk-Forward Sapma (%)'})
+        ax.set_title('Walk-Forward Dürüstlük Sapması (%) — 4 Model × 5 Strateji', fontsize=14, fontweight='bold')
     plt.tight_layout()
     plt.savefig('output/figures/fig4_honesty.png', dpi=150, bbox_inches='tight')
     plt.close()
     print("  ✓ fig4_honesty.png")
+    # Fig 5: Performans şişmesi haritası (inflation)
+    fig, ax = plt.subplots(figsize=(14, 6))
+    inf_data = []
+    for mn in model_names:
+        row_rand = res_df[(res_df['model'] == mn) & (res_df['strateji'] == 'Rastgele')]
+        row_chr = res_df[(res_df['model'] == mn) & (res_df['strateji'] == 'Kronolojik')]
+        row_topo = res_df[(res_df['model'] == mn) & (res_df['strateji'] == 'Topolojik Kırılma')]
+        if len(row_rand) > 0 and len(row_chr) > 0:
+            rand_f1 = row_rand['f1'].values[0]
+            chr_f1 = row_chr['f1'].values[0]
+            topo_f1 = row_topo['f1'].values[0] if len(row_topo) > 0 else 0
+            inf_data.append({
+                'model': mn,
+                'Rastgele vs Kronolojik': round((rand_f1 - chr_f1) / chr_f1 * 100, 1),
+                'Rastgele vs Topolojik': round((rand_f1 - topo_f1) / topo_f1 * 100, 1) if topo_f1 > 0 else 0,
+            })
+    if inf_data:
+        inf_df = pd.DataFrame(inf_data).set_index('model')
+        sns.heatmap(inf_df, annot=True, fmt='.1f', cmap='Reds', ax=ax, linewidths=0.5,
+                   cbar_kws={'label': 'Şişme Oranı (%)'})
+        ax.set_title('Rastgele Bölme Performans Şişmesi (%)', fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    plt.savefig('output/figures/fig5_inflation.png', dpi=150, bbox_inches='tight')
+    plt.close()
+    print("  ✓ fig5_inflation.png")
     # ════════════════════════════════════════════════════════════════
+    # ADIM 10: ÖZET RAPOR
     # ════════════════════════════════════════════════════════════════
     elapsed = time.time() - start_time
     summary = {
+        'veri': {'toplam': N, 'etiketli': len(y), 'illicit': int(y.sum()),
+                 'ozellik': int(X_final.shape[1]), 'kenar': len(valid_edges)},
+        'temizleme': {'nan': int(nan_count), 'inf': int(inf_count),
+                      'outlier_pct': round(outlier_mask.sum()/(X.shape[0]*X.shape[1])*100, 2),
                       'cikarilan_ozellik': int((~var_mask).sum()), 'en_iyi_pipeline': best_pipe},
+        'kirilma': {'saglik_yontemi': int(bp_final), 'final': int(bp_final)},
         'walk_forward': wf_results,
         'sonuclar': res_df.to_dict(orient='records'),
         'pipeline_karsilastirma': {k: round(v, 4) for k, v in pipelines.items()},
+        'durustukluk': honesty_data,
         'sure_dakika': round(elapsed / 60, 1),
     }
     print("\n" + "=" * 70)
     print(f"TAMAMLANDI! (Süre: {elapsed/60:.1f} dakika)")
     print("=" * 70)
+    # Final sonuç tablosu
+    print(f"\n  ═══ SONUÇ TABLOSU (Illicit F1) ═══")
     pivot_f1 = res_df.pivot_table(values='f1', index='model', columns='strateji')
     print(pivot_f1.to_string())
+    print(f"\n  ═══ WALK-FORWARD REFERANS ═══")
+    for mn, f1 in wf_results.items():
+        print(f"    {mn}: {f1:.4f}")
+    print(f"\n  Çıktılar: output/results/ ve output/figures/")
 if __name__ == '__main__':