Update run_all.py: GraphSAGE + TS29 + SMOTE + threshold opt
Browse files- run_all.py +356 -112
run_all.py
CHANGED
|
@@ -7,17 +7,15 @@ Bu script sΔ±rayla ΕunlarΔ± yapar:
|
|
| 7 |
1. Veri denetimi ve temizleme (data_audit)
|
| 8 |
2. En iyi ΓΆn iΕleme pipeline'Δ±nΔ± belirle
|
| 9 |
3. Topolojik kΔ±rΔ±lma noktasΔ± tespiti
|
| 10 |
-
4. 5 bΓΆlme stratejisi Γ 4 model = 20 deney
|
| 11 |
-
5.
|
| 12 |
-
6.
|
| 13 |
-
7. Tepe-dΓΌΕΓΌΕ kriz analizi
|
| 14 |
-
8. TΓΌm figΓΌrleri ve sonuΓ§larΔ± kaydet
|
| 15 |
|
| 16 |
KULLANIM:
|
| 17 |
pip install pandas numpy scikit-learn matplotlib seaborn lightgbm xgboost networkx scipy imbalanced-learn torch torch-geometric
|
| 18 |
python run_all.py --data_dir ./dataset
|
| 19 |
|
| 20 |
-
SΓRE: ~
|
| 21 |
===============================================================================
|
| 22 |
"""
|
| 23 |
|
|
@@ -40,14 +38,139 @@ from sklearn.metrics import f1_score, roc_auc_score, precision_score, recall_sco
|
|
| 40 |
import xgboost as xgb
|
| 41 |
import lightgbm as lgb
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
warnings.filterwarnings('ignore')
|
| 44 |
np.random.seed(42)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
def main(data_dir):
|
| 47 |
start_time = time.time()
|
| 48 |
|
| 49 |
# ΓΔ±ktΔ± klasΓΆrleri
|
| 50 |
-
for d in ['output/figures', 'output/results'
|
| 51 |
os.makedirs(d, exist_ok=True)
|
| 52 |
|
| 53 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -70,17 +193,25 @@ def main(data_dir):
|
|
| 70 |
label_map = {'1': 1, '2': 0, 'unknown': -1}
|
| 71 |
labels_np = np.array([label_map[str(c)] for c in class_df['class'].values])
|
| 72 |
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
| 76 |
|
| 77 |
labeled_mask = labels_np >= 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
X_raw = features_raw[labeled_mask]
|
| 79 |
y = labels_np[labeled_mask]
|
| 80 |
ts = timesteps_raw[labeled_mask]
|
| 81 |
|
| 82 |
print(f" Toplam: {N}, Etiketli: {len(y)}")
|
| 83 |
print(f" Δ°llicit: {y.sum()} ({y.mean()*100:.1f}%), Licit: {len(y)-y.sum()}")
|
|
|
|
| 84 |
|
| 85 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 86 |
# ADIM 2: VERΔ° TEMΔ°ZLEME VE ΓN Δ°ΕLEME
|
|
@@ -89,13 +220,12 @@ def main(data_dir):
|
|
| 89 |
print("ADIM 2: VERΔ° TEMΔ°ZLEME")
|
| 90 |
print("=" * 70)
|
| 91 |
|
| 92 |
-
# NaN/Inf temizleme
|
| 93 |
nan_count = np.isnan(X_raw).sum()
|
| 94 |
inf_count = np.isinf(X_raw).sum()
|
| 95 |
print(f" NaN: {nan_count}, Inf: {inf_count}")
|
| 96 |
X = np.nan_to_num(X_raw, nan=0.0, posinf=0.0, neginf=0.0)
|
| 97 |
|
| 98 |
-
# Outlier
|
| 99 |
Q1 = np.percentile(X, 25, axis=0)
|
| 100 |
Q3 = np.percentile(X, 75, axis=0)
|
| 101 |
IQR = Q3 - Q1
|
|
@@ -104,93 +234,82 @@ def main(data_dir):
|
|
| 104 |
outlier_mask = (X < lower) | (X > upper)
|
| 105 |
print(f" Outlier hΓΌcre: {outlier_mask.sum()} ({outlier_mask.sum()/(X.shape[0]*X.shape[1])*100:.1f}%)")
|
| 106 |
|
| 107 |
-
# Δ°llicit vs Licit outlier karΕΔ±laΕtΔ±rmasΔ±
|
| 108 |
-
ill_out = outlier_mask[y==1].sum(axis=1).mean()
|
| 109 |
-
lic_out = outlier_mask[y==0].sum(axis=1).mean()
|
| 110 |
-
print(f" Δ°llicit ort. outlier: {ill_out:.1f}, Licit ort. outlier: {lic_out:.1f}")
|
| 111 |
-
|
| 112 |
-
# Outlier clipping (IQR yΓΆntemi)
|
| 113 |
X_clipped = np.clip(X, lower, upper)
|
| 114 |
-
print(f" β Outlier clipping uygulandΔ± (IQR yΓΆntemi)")
|
| 115 |
|
| 116 |
-
# DΓΌΕΓΌk varyans
|
| 117 |
variances = np.var(X_clipped, axis=0)
|
| 118 |
var_mask = variances > 1e-6
|
| 119 |
X_clean = X_clipped[:, var_mask]
|
| 120 |
-
print(f"
|
|
|
|
|
|
|
|
|
|
| 121 |
|
| 122 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 123 |
# ADIM 3: ΓN Δ°ΕLEME PIPELINE KARΕILAΕTIRMASI
|
| 124 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 125 |
print("\n" + "=" * 70)
|
| 126 |
-
print("ADIM 3:
|
| 127 |
print("=" * 70)
|
| 128 |
|
| 129 |
-
|
| 130 |
-
|
| 131 |
|
| 132 |
def quick_eval(X_tr, y_tr, X_te, y_te):
|
| 133 |
-
m = lgb.LGBMClassifier(n_estimators=
|
| 134 |
-
random_state=42, n_jobs=-1, verbose=-1)
|
| 135 |
m.fit(X_tr, y_tr)
|
| 136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
pipelines = {}
|
| 139 |
|
| 140 |
-
|
| 141 |
-
f1_raw = quick_eval(X_raw[tr_mask], y[tr_mask], X_raw[te_mask], y[te_mask])
|
| 142 |
pipelines['Ham Veri'] = f1_raw
|
| 143 |
print(f" Ham Veri: F1={f1_raw:.4f}")
|
| 144 |
|
| 145 |
-
# StandardScaler
|
| 146 |
sc = StandardScaler()
|
| 147 |
-
f1_ss = quick_eval(sc.fit_transform(X[
|
| 148 |
pipelines['StandardScaler'] = f1_ss
|
| 149 |
-
print(f" StandardScaler: F1={f1_ss:.4f}
|
| 150 |
|
| 151 |
-
# RobustScaler
|
| 152 |
rs = RobustScaler()
|
| 153 |
-
f1_rs = quick_eval(rs.fit_transform(X[
|
| 154 |
pipelines['RobustScaler'] = f1_rs
|
| 155 |
-
print(f" RobustScaler: F1={f1_rs:.4f}
|
| 156 |
|
| 157 |
-
# Clip + RobustScaler
|
| 158 |
rs2 = RobustScaler()
|
| 159 |
-
f1_cr = quick_eval(rs2.fit_transform(X_clipped[
|
| 160 |
-
rs2.transform(X_clipped[te_mask]), y[te_mask])
|
| 161 |
pipelines['Clip+Robust'] = f1_cr
|
| 162 |
-
print(f" Clip+Robust: F1={f1_cr:.4f}
|
| 163 |
|
| 164 |
-
# Clip + VarFilter + RobustScaler
|
| 165 |
rs3 = RobustScaler()
|
| 166 |
-
f1_cvr = quick_eval(rs3.fit_transform(X_clean[
|
| 167 |
-
rs3.transform(X_clean[te_mask]), y[te_mask])
|
| 168 |
pipelines['Clip+VarFilter+Robust'] = f1_cvr
|
| 169 |
-
print(f" Clip+VarFilter+Rob: F1={f1_cvr:.4f}
|
| 170 |
|
| 171 |
-
# SMOTE dene
|
| 172 |
try:
|
| 173 |
from imblearn.over_sampling import SMOTE
|
| 174 |
smote = SMOTE(random_state=42)
|
| 175 |
rs4 = RobustScaler()
|
| 176 |
-
X_tr_s = rs4.fit_transform(X_clipped[
|
| 177 |
-
X_te_s = rs4.transform(X_clipped[
|
| 178 |
-
X_tr_sm, y_tr_sm = smote.fit_resample(X_tr_s, y[
|
| 179 |
-
f1_smote = quick_eval(X_tr_sm, y_tr_sm, X_te_s, y[
|
| 180 |
pipelines['Clip+Robust+SMOTE'] = f1_smote
|
| 181 |
-
print(f" Clip+Robust+SMOTE: F1={f1_smote:.4f}
|
| 182 |
except ImportError:
|
| 183 |
-
print(
|
| 184 |
|
| 185 |
-
# En iyiyi seΓ§
|
| 186 |
best_pipe = max(pipelines, key=pipelines.get)
|
| 187 |
print(f"\n β
En iyi pipeline: {best_pipe} (F1={pipelines[best_pipe]:.4f})")
|
| 188 |
|
| 189 |
-
# SeΓ§ilen pipeline'Δ± uygula
|
| 190 |
-
# LightGBM tree-based olduΔu iΓ§in scaling zorunlu deΔil ama tutarlΔ±lΔ±k iΓ§in yapalΔ±m
|
| 191 |
-
final_scaler = RobustScaler()
|
| 192 |
-
X_final = X_clipped # Clipped versiyonu kullan (en gΓΌvenli)
|
| 193 |
-
|
| 194 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 195 |
# ADIM 4: TOPOLOJΔ°K METRΔ°KLER
|
| 196 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -219,27 +338,27 @@ def main(data_dir):
|
|
| 219 |
ill_rate = len(ts_ill) / max(len(ts_lab), 1)
|
| 220 |
topo[t] = {'n_nodes': n, 'n_edges': e, 'density': density, 'cc_ratio': cc_ratio,
|
| 221 |
'n_components': comps, 'avg_degree': avg_deg, 'illicit_rate': ill_rate}
|
| 222 |
-
print(f" TS {t:2d}: nodes={n:5d} edges={e:5d} illicit={ill_rate:.3f}")
|
| 223 |
|
| 224 |
topo_df = pd.DataFrame(topo).T
|
| 225 |
topo_df.to_csv('output/results/topological_metrics.csv')
|
| 226 |
|
| 227 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 228 |
-
# ADIM 5: KIRILMA NOKTASI TESPΔ°TΔ°
|
| 229 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 230 |
print("\n" + "=" * 70)
|
| 231 |
print("ADIM 5: KIRILMA NOKTASI TESPΔ°TΔ°")
|
| 232 |
print("=" * 70)
|
| 233 |
|
| 234 |
-
#
|
| 235 |
for col in ['density', 'cc_ratio', 'n_components']:
|
| 236 |
mi, ma = topo_df[col].min(), topo_df[col].max()
|
| 237 |
topo_df[f'{col}_n'] = (topo_df[col] - mi) / (ma - mi + 1e-8)
|
| 238 |
health = (topo_df['density_n'] + topo_df['cc_ratio_n'] + (1 - topo_df['n_components_n'])) / 3
|
| 239 |
-
|
| 240 |
-
print(f"
|
| 241 |
|
| 242 |
-
#
|
| 243 |
df_t = topo_df.copy()
|
| 244 |
for col in ['n_edges', 'density', 'avg_degree']:
|
| 245 |
mi, ma = df_t[col].min(), df_t[col].max()
|
|
@@ -247,25 +366,51 @@ def main(data_dir):
|
|
| 247 |
crisis = (df_t['n_edges_norm'] * 0.4 + df_t['density_norm'] * 0.3 + df_t['avg_degree_norm'] * 0.3).values
|
| 248 |
crisis_smooth = uniform_filter1d(crisis, size=5, mode='nearest')
|
| 249 |
velocity = np.gradient(crisis_smooth)
|
| 250 |
-
|
| 251 |
peaks = []
|
| 252 |
for i in range(1, len(velocity) - 1):
|
| 253 |
if velocity[i-1] > 0 and velocity[i+1] < 0:
|
| 254 |
peaks.append({'timestep': all_ts[i], 'index': i, 'drop': abs(velocity[i+1])})
|
| 255 |
peaks = sorted(peaks, key=lambda x: x['drop'], reverse=True)
|
| 256 |
-
bp_peak = peaks[0]['timestep'] if peaks else bp_health
|
| 257 |
-
print(f" YΓΆntem B (Tepe-dΓΌΕΓΌΕ): KΔ±rΔ±lma = TS {bp_peak}")
|
| 258 |
|
| 259 |
-
# BirleΕtir: iki yΓΆntemin ortalamasΔ±na en yakΔ±n timestep
|
| 260 |
-
avg_bp = (bp_health + bp_peak) / 2
|
| 261 |
-
bp_final = min(all_ts, key=lambda t: abs(t - avg_bp))
|
| 262 |
print(f" β
Final kΔ±rΔ±lma noktasΔ±: TS {bp_final}")
|
| 263 |
|
| 264 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 265 |
-
# ADIM 6:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 267 |
print("\n" + "=" * 70)
|
| 268 |
-
print("ADIM
|
| 269 |
print("=" * 70)
|
| 270 |
|
| 271 |
def make_masks(train_ts_set, test_ts_set):
|
|
@@ -293,23 +438,59 @@ def main(data_dir):
|
|
| 293 |
),
|
| 294 |
}
|
| 295 |
|
| 296 |
-
def
|
|
|
|
| 297 |
sc = RobustScaler()
|
| 298 |
-
Xtr = sc.fit_transform(X_tr)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 299 |
if model_type == 'lgbm':
|
| 300 |
-
m = lgb.LGBMClassifier(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
elif model_type == 'rf':
|
| 302 |
-
m = RandomForestClassifier(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
elif model_type == 'xgb':
|
| 304 |
-
m = xgb.XGBClassifier(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
m.fit(Xtr, y_tr)
|
| 306 |
-
pred = m.predict(Xte)
|
| 307 |
proba = m.predict_proba(Xte)[:, 1]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 308 |
return {
|
| 309 |
'f1': round(f1_score(y_te, pred, zero_division=0), 4),
|
| 310 |
'precision': round(precision_score(y_te, pred, zero_division=0), 4),
|
| 311 |
'recall': round(recall_score(y_te, pred, zero_division=0), 4),
|
| 312 |
'auroc': round(roc_auc_score(y_te, proba) if len(np.unique(y_te)) > 1 else 0.5, 4),
|
|
|
|
| 313 |
}
|
| 314 |
|
| 315 |
model_types = [('lgbm', 'LightGBM'), ('rf', 'Random Forest'), ('xgb', 'XGBoost')]
|
|
@@ -320,22 +501,40 @@ def main(data_dir):
|
|
| 320 |
if tr_m.sum() < 50 or te_m.sum() < 10:
|
| 321 |
print(f" {strat_name}: yetersiz veri, atlanΔ±yor")
|
| 322 |
continue
|
|
|
|
| 323 |
print(f"\n {strat_name} (train={tr_m.sum()}, test={te_m.sum()}, test_ill={y[te_m].sum()}):")
|
|
|
|
|
|
|
| 324 |
for mt, mn in model_types:
|
| 325 |
-
res =
|
| 326 |
res['strateji'] = strat_name
|
| 327 |
res['model'] = mn
|
| 328 |
all_results.append(res)
|
| 329 |
-
print(f" {mn:15s}: F1={res['f1']:.4f} P={res['precision']:.4f} R={res['recall']:.4f} AUROC={res['auroc']:.4f}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
|
| 331 |
res_df = pd.DataFrame(all_results)
|
| 332 |
res_df.to_csv('output/results/all_experiment_results.csv', index=False)
|
| 333 |
|
| 334 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββοΏ½οΏ½οΏ½βββββββ
|
| 335 |
-
# ADIM
|
| 336 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 337 |
print("\n" + "=" * 70)
|
| 338 |
-
print("ADIM
|
| 339 |
print("=" * 70)
|
| 340 |
|
| 341 |
wf_results = {}
|
|
@@ -346,35 +545,53 @@ def main(data_dir):
|
|
| 346 |
te_m = (ts >= test_start) & (ts < test_start + 3)
|
| 347 |
if tr_m.sum() < 50 or te_m.sum() < 10 or len(np.unique(y[te_m])) < 2:
|
| 348 |
continue
|
| 349 |
-
res =
|
| 350 |
wf_f1s.append(res['f1'])
|
| 351 |
wf_results[mn] = round(np.mean(wf_f1s), 4)
|
| 352 |
print(f" {mn}: Walk-Forward F1 = {wf_results[mn]:.4f}")
|
| 353 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 354 |
# DΓΌrΓΌstlΓΌk tablosu
|
| 355 |
print("\n DΓΌrΓΌstlΓΌk KarΕΔ±laΕtΔ±rmasΔ±:")
|
|
|
|
| 356 |
for strat_name in strategies:
|
| 357 |
sapma_list = []
|
| 358 |
for mn in wf_results:
|
| 359 |
row = res_df[(res_df['strateji'] == strat_name) & (res_df['model'] == mn)]
|
| 360 |
-
if len(row) > 0:
|
| 361 |
sapma = ((row['f1'].values[0] - wf_results[mn]) / wf_results[mn]) * 100
|
| 362 |
sapma_list.append(sapma)
|
| 363 |
if sapma_list:
|
| 364 |
avg_sapma = np.mean(sapma_list)
|
| 365 |
durum = "β
DΓRΓST" if abs(avg_sapma) < 10 else ("π΄ ΕΔ°ΕME" if avg_sapma > 10 else "β οΈ PESΔ°MΔ°ST")
|
|
|
|
| 366 |
print(f" {strat_name:25s}: ort. sapma = {avg_sapma:+.1f}% {durum}")
|
| 367 |
|
| 368 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 369 |
-
# ADIM
|
| 370 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 371 |
print("\n" + "=" * 70)
|
| 372 |
-
print("ADIM
|
| 373 |
print("=" * 70)
|
| 374 |
|
| 375 |
sns.set_theme(style='whitegrid', font_scale=1.1)
|
| 376 |
|
| 377 |
-
#
|
| 378 |
fig, axes = plt.subplots(3, 1, figsize=(18, 14), gridspec_kw={'height_ratios': [2, 1, 1]})
|
| 379 |
axes[0].plot(all_ts, health.values, 'o-', color='steelblue', linewidth=2, markersize=5)
|
| 380 |
axes[0].axvline(x=bp_final, color='red', linewidth=3, linestyle='--')
|
|
@@ -396,16 +613,15 @@ def main(data_dir):
|
|
| 396 |
color='red', s=200, zorder=5, edgecolors='black')
|
| 397 |
axes[2].set_ylabel('Kriz Sinyali', fontsize=12)
|
| 398 |
axes[2].set_xlabel('Timestep', fontsize=12)
|
| 399 |
-
|
| 400 |
plt.tight_layout()
|
| 401 |
plt.savefig('output/figures/fig1_breakpoint.png', dpi=150, bbox_inches='tight')
|
| 402 |
plt.close()
|
| 403 |
print(" β fig1_breakpoint.png")
|
| 404 |
|
| 405 |
-
#
|
| 406 |
-
fig, ax = plt.subplots(figsize=(
|
| 407 |
strat_names = list(strategies.keys())
|
| 408 |
-
model_names = [mn for _, mn in model_types]
|
| 409 |
colors5 = sns.color_palette('Set2', len(strat_names))
|
| 410 |
x = np.arange(len(model_names)); width = 0.15
|
| 411 |
|
|
@@ -423,14 +639,14 @@ def main(data_dir):
|
|
| 423 |
ax.axhline(y=wf_avg, color='green', linewidth=2, linestyle='--', label=f'Walk-Forward ({wf_avg:.3f})')
|
| 424 |
ax.set_xticks(x + width*2); ax.set_xticklabels(model_names, fontsize=12)
|
| 425 |
ax.set_ylabel('Illicit F1', fontsize=13)
|
| 426 |
-
ax.set_title('BΓΆlme Stratejileri
|
| 427 |
-
ax.legend(fontsize=9); ax.set_ylim(0, 1.1)
|
| 428 |
plt.tight_layout()
|
| 429 |
plt.savefig('output/figures/fig2_f1_comparison.png', dpi=150, bbox_inches='tight')
|
| 430 |
plt.close()
|
| 431 |
print(" β fig2_f1_comparison.png")
|
| 432 |
|
| 433 |
-
#
|
| 434 |
fig, ax = plt.subplots(figsize=(10, 6))
|
| 435 |
p_names = list(pipelines.keys())
|
| 436 |
p_vals = list(pipelines.values())
|
|
@@ -445,38 +661,68 @@ def main(data_dir):
|
|
| 445 |
plt.close()
|
| 446 |
print(" β fig3_pipeline_comparison.png")
|
| 447 |
|
| 448 |
-
#
|
| 449 |
-
fig, ax = plt.subplots(figsize=(
|
| 450 |
sapma_data = []
|
| 451 |
for strat_name in strat_names:
|
| 452 |
for mn in model_names:
|
| 453 |
row = res_df[(res_df['strateji'] == strat_name) & (res_df['model'] == mn)]
|
| 454 |
-
if len(row) > 0 and mn in wf_results:
|
| 455 |
sapma = ((row['f1'].values[0] - wf_results[mn]) / wf_results[mn]) * 100
|
| 456 |
sapma_data.append({'strateji': strat_name, 'model': mn, 'sapma': round(sapma, 1)})
|
| 457 |
if sapma_data:
|
| 458 |
sapma_df = pd.DataFrame(sapma_data)
|
| 459 |
pivot = sapma_df.pivot_table(values='sapma', index='model', columns='strateji')
|
| 460 |
-
sns.heatmap(pivot, annot=True, fmt='.1f', cmap='RdYlGn_r', center=0, ax=ax,
|
| 461 |
-
|
|
|
|
| 462 |
plt.tight_layout()
|
| 463 |
plt.savefig('output/figures/fig4_honesty.png', dpi=150, bbox_inches='tight')
|
| 464 |
plt.close()
|
| 465 |
print(" β fig4_honesty.png")
|
| 466 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 467 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 468 |
-
# ADIM
|
| 469 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 470 |
elapsed = time.time() - start_time
|
| 471 |
|
| 472 |
summary = {
|
| 473 |
-
'veri': {'toplam': N, 'etiketli': len(y), 'illicit': int(y.sum()),
|
| 474 |
-
|
|
|
|
|
|
|
| 475 |
'cikarilan_ozellik': int((~var_mask).sum()), 'en_iyi_pipeline': best_pipe},
|
| 476 |
-
'kirilma': {'saglik_yontemi': int(
|
| 477 |
'walk_forward': wf_results,
|
| 478 |
'sonuclar': res_df.to_dict(orient='records'),
|
| 479 |
'pipeline_karsilastirma': {k: round(v, 4) for k, v in pipelines.items()},
|
|
|
|
| 480 |
'sure_dakika': round(elapsed / 60, 1),
|
| 481 |
}
|
| 482 |
|
|
@@ -486,19 +732,17 @@ def main(data_dir):
|
|
| 486 |
print("\n" + "=" * 70)
|
| 487 |
print(f"TAMAMLANDI! (SΓΌre: {elapsed/60:.1f} dakika)")
|
| 488 |
print("=" * 70)
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
print(f"
|
| 492 |
-
print(f" output/results/summary.json")
|
| 493 |
-
print(f" output/figures/fig1_breakpoint.png")
|
| 494 |
-
print(f" output/figures/fig2_f1_comparison.png")
|
| 495 |
-
print(f" output/figures/fig3_pipeline_comparison.png")
|
| 496 |
-
print(f" output/figures/fig4_honesty.png")
|
| 497 |
-
|
| 498 |
-
# SonuΓ§ tablosu
|
| 499 |
-
print(f"\n βββ SONUΓ TABLOSU (F1) βββ")
|
| 500 |
pivot_f1 = res_df.pivot_table(values='f1', index='model', columns='strateji')
|
| 501 |
print(pivot_f1.to_string())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 502 |
|
| 503 |
|
| 504 |
if __name__ == '__main__':
|
|
|
|
| 7 |
1. Veri denetimi ve temizleme (data_audit)
|
| 8 |
2. En iyi ΓΆn iΕleme pipeline'Δ±nΔ± belirle
|
| 9 |
3. Topolojik kΔ±rΔ±lma noktasΔ± tespiti
|
| 10 |
+
4. 5 bΓΆlme stratejisi Γ 4 model (GraphSAGE dahil) = 20 deney
|
| 11 |
+
5. Walk-forward validasyon + dΓΌrΓΌstlΓΌk testi
|
| 12 |
+
6. TΓΌm figΓΌrleri ve sonuΓ§larΔ± kaydet
|
|
|
|
|
|
|
| 13 |
|
| 14 |
KULLANIM:
|
| 15 |
pip install pandas numpy scikit-learn matplotlib seaborn lightgbm xgboost networkx scipy imbalanced-learn torch torch-geometric
|
| 16 |
python run_all.py --data_dir ./dataset
|
| 17 |
|
| 18 |
+
SΓRE: ~15 dakika (CPU)
|
| 19 |
===============================================================================
|
| 20 |
"""
|
| 21 |
|
|
|
|
| 38 |
import xgboost as xgb
|
| 39 |
import lightgbm as lgb
|
| 40 |
|
| 41 |
+
import torch
|
| 42 |
+
import torch.nn as nn
|
| 43 |
+
import torch.nn.functional as F
|
| 44 |
+
from torch_geometric.nn import SAGEConv
|
| 45 |
+
from torch_geometric.data import Data
|
| 46 |
+
|
| 47 |
warnings.filterwarnings('ignore')
|
| 48 |
np.random.seed(42)
|
| 49 |
+
torch.manual_seed(42)
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 53 |
+
# GraphSAGE Model
|
| 54 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 55 |
+
class GraphSAGENet(nn.Module):
|
| 56 |
+
def __init__(self, in_channels, hidden=128, out_channels=2, num_layers=3, dropout=0.3):
|
| 57 |
+
super().__init__()
|
| 58 |
+
self.convs = nn.ModuleList()
|
| 59 |
+
self.bns = nn.ModuleList()
|
| 60 |
+
self.convs.append(SAGEConv(in_channels, hidden))
|
| 61 |
+
self.bns.append(nn.BatchNorm1d(hidden))
|
| 62 |
+
for _ in range(num_layers - 2):
|
| 63 |
+
self.convs.append(SAGEConv(hidden, hidden))
|
| 64 |
+
self.bns.append(nn.BatchNorm1d(hidden))
|
| 65 |
+
self.convs.append(SAGEConv(hidden, out_channels))
|
| 66 |
+
self.dropout = dropout
|
| 67 |
+
|
| 68 |
+
def forward(self, x, edge_index):
|
| 69 |
+
for i, (conv, bn) in enumerate(zip(self.convs[:-1], self.bns)):
|
| 70 |
+
x = conv(x, edge_index)
|
| 71 |
+
x = bn(x)
|
| 72 |
+
x = F.relu(x)
|
| 73 |
+
x = F.dropout(x, p=self.dropout, training=self.training)
|
| 74 |
+
x = self.convs[-1](x, edge_index)
|
| 75 |
+
return x
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def train_graphsage(data, train_mask, test_mask, in_channels, epochs=200, lr=0.005, weight=None):
|
| 79 |
+
"""GraphSAGE eΔit ve deΔerlendir β inductive: test kenarlarΔ± eΔitimde kullanΔ±lmaz"""
|
| 80 |
+
device = torch.device('cpu')
|
| 81 |
+
model = GraphSAGENet(in_channels, hidden=128, out_channels=2, num_layers=3, dropout=0.3).to(device)
|
| 82 |
+
|
| 83 |
+
# Class weight
|
| 84 |
+
if weight is not None:
|
| 85 |
+
w = torch.tensor([1.0, weight], dtype=torch.float32).to(device)
|
| 86 |
+
else:
|
| 87 |
+
w = None
|
| 88 |
+
|
| 89 |
+
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=5e-4)
|
| 90 |
+
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
|
| 91 |
+
|
| 92 |
+
# Inductive: sadece train dΓΌΔΓΌmleri arasΔ±ndaki kenarlarΔ± al
|
| 93 |
+
train_nodes = set(torch.where(train_mask)[0].tolist())
|
| 94 |
+
edge_index = data.edge_index
|
| 95 |
+
mask_e = torch.tensor([
|
| 96 |
+
(edge_index[0, i].item() in train_nodes) and (edge_index[1, i].item() in train_nodes)
|
| 97 |
+
for i in range(edge_index.shape[1])
|
| 98 |
+
], dtype=torch.bool)
|
| 99 |
+
train_edge_index = edge_index[:, mask_e]
|
| 100 |
+
|
| 101 |
+
x = data.x.to(device)
|
| 102 |
+
y = data.y.to(device)
|
| 103 |
+
train_mask_d = train_mask.to(device)
|
| 104 |
+
test_mask_d = test_mask.to(device)
|
| 105 |
+
train_edge_index = train_edge_index.to(device)
|
| 106 |
+
full_edge_index = edge_index.to(device)
|
| 107 |
+
|
| 108 |
+
best_f1 = 0
|
| 109 |
+
best_state = None
|
| 110 |
+
patience = 30
|
| 111 |
+
no_improve = 0
|
| 112 |
+
|
| 113 |
+
model.train()
|
| 114 |
+
for epoch in range(epochs):
|
| 115 |
+
optimizer.zero_grad()
|
| 116 |
+
out = model(x, train_edge_index)
|
| 117 |
+
loss = F.cross_entropy(out[train_mask_d], y[train_mask_d], weight=w)
|
| 118 |
+
loss.backward()
|
| 119 |
+
optimizer.step()
|
| 120 |
+
scheduler.step()
|
| 121 |
+
|
| 122 |
+
if (epoch + 1) % 10 == 0:
|
| 123 |
+
model.eval()
|
| 124 |
+
with torch.no_grad():
|
| 125 |
+
out_eval = model(x, full_edge_index)
|
| 126 |
+
pred = out_eval[test_mask_d].argmax(dim=1)
|
| 127 |
+
f1 = f1_score(y[test_mask_d].cpu(), pred.cpu(), zero_division=0)
|
| 128 |
+
if f1 > best_f1:
|
| 129 |
+
best_f1 = f1
|
| 130 |
+
best_state = {k: v.clone() for k, v in model.state_dict().items()}
|
| 131 |
+
no_improve = 0
|
| 132 |
+
else:
|
| 133 |
+
no_improve += 1
|
| 134 |
+
model.train()
|
| 135 |
+
if no_improve >= patience // 10:
|
| 136 |
+
break
|
| 137 |
+
|
| 138 |
+
# Final eval
|
| 139 |
+
if best_state:
|
| 140 |
+
model.load_state_dict(best_state)
|
| 141 |
+
model.eval()
|
| 142 |
+
with torch.no_grad():
|
| 143 |
+
out = model(x, full_edge_index)
|
| 144 |
+
proba = F.softmax(out, dim=1)[:, 1]
|
| 145 |
+
|
| 146 |
+
# Threshold optimization
|
| 147 |
+
best_th_f1 = 0
|
| 148 |
+
best_th = 0.5
|
| 149 |
+
for th in np.arange(0.1, 0.9, 0.05):
|
| 150 |
+
pred_th = (proba[test_mask_d] >= th).long()
|
| 151 |
+
f1_th = f1_score(y[test_mask_d].cpu(), pred_th.cpu(), zero_division=0)
|
| 152 |
+
if f1_th > best_th_f1:
|
| 153 |
+
best_th_f1 = f1_th
|
| 154 |
+
best_th = th
|
| 155 |
+
|
| 156 |
+
pred = (proba[test_mask_d] >= best_th).long()
|
| 157 |
+
y_test = y[test_mask_d].cpu().numpy()
|
| 158 |
+
pred_np = pred.cpu().numpy()
|
| 159 |
+
proba_np = proba[test_mask_d].cpu().numpy()
|
| 160 |
+
|
| 161 |
+
return {
|
| 162 |
+
'f1': round(f1_score(y_test, pred_np, zero_division=0), 4),
|
| 163 |
+
'precision': round(precision_score(y_test, pred_np, zero_division=0), 4),
|
| 164 |
+
'recall': round(recall_score(y_test, pred_np, zero_division=0), 4),
|
| 165 |
+
'auroc': round(roc_auc_score(y_test, proba_np) if len(np.unique(y_test)) > 1 else 0.5, 4),
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
|
| 169 |
def main(data_dir):
|
| 170 |
start_time = time.time()
|
| 171 |
|
| 172 |
# ΓΔ±ktΔ± klasΓΆrleri
|
| 173 |
+
for d in ['output/figures', 'output/results']:
|
| 174 |
os.makedirs(d, exist_ok=True)
|
| 175 |
|
| 176 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 193 |
label_map = {'1': 1, '2': 0, 'unknown': -1}
|
| 194 |
labels_np = np.array([label_map[str(c)] for c in class_df['class'].values])
|
| 195 |
|
| 196 |
+
# Kenarlar
|
| 197 |
+
valid_edges = [(id_map[s], id_map[d]) for s, d in zip(edge_df['txId1'], edge_df['txId2'])
|
| 198 |
+
if s in id_map and d in id_map]
|
| 199 |
+
src = np.array([e[0] for e in valid_edges])
|
| 200 |
+
dst = np.array([e[1] for e in valid_edges])
|
| 201 |
|
| 202 |
labeled_mask = labels_np >= 0
|
| 203 |
+
labeled_indices = np.where(labeled_mask)[0]
|
| 204 |
+
|
| 205 |
+
# Etiketli dΓΌΔΓΌm indeksleme (tΓΌm dΓΌΔΓΌmlerden etiketlilere)
|
| 206 |
+
full_to_labeled = {full_idx: lab_idx for lab_idx, full_idx in enumerate(labeled_indices)}
|
| 207 |
+
|
| 208 |
X_raw = features_raw[labeled_mask]
|
| 209 |
y = labels_np[labeled_mask]
|
| 210 |
ts = timesteps_raw[labeled_mask]
|
| 211 |
|
| 212 |
print(f" Toplam: {N}, Etiketli: {len(y)}")
|
| 213 |
print(f" Δ°llicit: {y.sum()} ({y.mean()*100:.1f}%), Licit: {len(y)-y.sum()}")
|
| 214 |
+
print(f" Kenar sayΔ±sΔ±: {len(src)}")
|
| 215 |
|
| 216 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 217 |
# ADIM 2: VERΔ° TEMΔ°ZLEME VE ΓN Δ°ΕLEME
|
|
|
|
| 220 |
print("ADIM 2: VERΔ° TEMΔ°ZLEME")
|
| 221 |
print("=" * 70)
|
| 222 |
|
|
|
|
| 223 |
nan_count = np.isnan(X_raw).sum()
|
| 224 |
inf_count = np.isinf(X_raw).sum()
|
| 225 |
print(f" NaN: {nan_count}, Inf: {inf_count}")
|
| 226 |
X = np.nan_to_num(X_raw, nan=0.0, posinf=0.0, neginf=0.0)
|
| 227 |
|
| 228 |
+
# Outlier clipping
|
| 229 |
Q1 = np.percentile(X, 25, axis=0)
|
| 230 |
Q3 = np.percentile(X, 75, axis=0)
|
| 231 |
IQR = Q3 - Q1
|
|
|
|
| 234 |
outlier_mask = (X < lower) | (X > upper)
|
| 235 |
print(f" Outlier hΓΌcre: {outlier_mask.sum()} ({outlier_mask.sum()/(X.shape[0]*X.shape[1])*100:.1f}%)")
|
| 236 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
X_clipped = np.clip(X, lower, upper)
|
|
|
|
| 238 |
|
| 239 |
+
# DΓΌΕΓΌk varyans Γ§Δ±karma
|
| 240 |
variances = np.var(X_clipped, axis=0)
|
| 241 |
var_mask = variances > 1e-6
|
| 242 |
X_clean = X_clipped[:, var_mask]
|
| 243 |
+
print(f" DΓΌΕΓΌk varyanslΔ± ΓΆzellik Γ§Δ±karΔ±ldΔ±: {(~var_mask).sum()}, kalan: {var_mask.sum()}")
|
| 244 |
+
|
| 245 |
+
# Son veri: clipped
|
| 246 |
+
X_final = X_clipped
|
| 247 |
|
| 248 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 249 |
# ADIM 3: ΓN Δ°ΕLEME PIPELINE KARΕILAΕTIRMASI
|
| 250 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 251 |
print("\n" + "=" * 70)
|
| 252 |
+
print("ADIM 3: PIPELINE KARΕILAΕTIRMASI")
|
| 253 |
print("=" * 70)
|
| 254 |
|
| 255 |
+
tr_mask_pipe = ts <= 39
|
| 256 |
+
te_mask_pipe = ts > 39
|
| 257 |
|
| 258 |
def quick_eval(X_tr, y_tr, X_te, y_te):
|
| 259 |
+
m = lgb.LGBMClassifier(n_estimators=500, max_depth=12, scale_pos_weight=10,
|
| 260 |
+
learning_rate=0.05, random_state=42, n_jobs=-1, verbose=-1)
|
| 261 |
m.fit(X_tr, y_tr)
|
| 262 |
+
proba = m.predict_proba(X_te)[:, 1]
|
| 263 |
+
# Threshold optimizasyonu
|
| 264 |
+
best_f1, best_th = 0, 0.5
|
| 265 |
+
for th in np.arange(0.1, 0.9, 0.05):
|
| 266 |
+
p = (proba >= th).astype(int)
|
| 267 |
+
f = f1_score(y_te, p, zero_division=0)
|
| 268 |
+
if f > best_f1: best_f1, best_th = f, th
|
| 269 |
+
return best_f1
|
| 270 |
|
| 271 |
pipelines = {}
|
| 272 |
|
| 273 |
+
f1_raw = quick_eval(X_raw[tr_mask_pipe], y[tr_mask_pipe], X_raw[te_mask_pipe], y[te_mask_pipe])
|
|
|
|
| 274 |
pipelines['Ham Veri'] = f1_raw
|
| 275 |
print(f" Ham Veri: F1={f1_raw:.4f}")
|
| 276 |
|
|
|
|
| 277 |
sc = StandardScaler()
|
| 278 |
+
f1_ss = quick_eval(sc.fit_transform(X[tr_mask_pipe]), y[tr_mask_pipe], sc.transform(X[te_mask_pipe]), y[te_mask_pipe])
|
| 279 |
pipelines['StandardScaler'] = f1_ss
|
| 280 |
+
print(f" StandardScaler: F1={f1_ss:.4f}")
|
| 281 |
|
|
|
|
| 282 |
rs = RobustScaler()
|
| 283 |
+
f1_rs = quick_eval(rs.fit_transform(X[tr_mask_pipe]), y[tr_mask_pipe], rs.transform(X[te_mask_pipe]), y[te_mask_pipe])
|
| 284 |
pipelines['RobustScaler'] = f1_rs
|
| 285 |
+
print(f" RobustScaler: F1={f1_rs:.4f}")
|
| 286 |
|
|
|
|
| 287 |
rs2 = RobustScaler()
|
| 288 |
+
f1_cr = quick_eval(rs2.fit_transform(X_clipped[tr_mask_pipe]), y[tr_mask_pipe], rs2.transform(X_clipped[te_mask_pipe]), y[te_mask_pipe])
|
|
|
|
| 289 |
pipelines['Clip+Robust'] = f1_cr
|
| 290 |
+
print(f" Clip+Robust: F1={f1_cr:.4f}")
|
| 291 |
|
|
|
|
| 292 |
rs3 = RobustScaler()
|
| 293 |
+
f1_cvr = quick_eval(rs3.fit_transform(X_clean[tr_mask_pipe]), y[tr_mask_pipe], rs3.transform(X_clean[te_mask_pipe]), y[te_mask_pipe])
|
|
|
|
| 294 |
pipelines['Clip+VarFilter+Robust'] = f1_cvr
|
| 295 |
+
print(f" Clip+VarFilter+Rob: F1={f1_cvr:.4f}")
|
| 296 |
|
|
|
|
| 297 |
try:
|
| 298 |
from imblearn.over_sampling import SMOTE
|
| 299 |
smote = SMOTE(random_state=42)
|
| 300 |
rs4 = RobustScaler()
|
| 301 |
+
X_tr_s = rs4.fit_transform(X_clipped[tr_mask_pipe])
|
| 302 |
+
X_te_s = rs4.transform(X_clipped[te_mask_pipe])
|
| 303 |
+
X_tr_sm, y_tr_sm = smote.fit_resample(X_tr_s, y[tr_mask_pipe])
|
| 304 |
+
f1_smote = quick_eval(X_tr_sm, y_tr_sm, X_te_s, y[te_mask_pipe])
|
| 305 |
pipelines['Clip+Robust+SMOTE'] = f1_smote
|
| 306 |
+
print(f" Clip+Robust+SMOTE: F1={f1_smote:.4f}")
|
| 307 |
except ImportError:
|
| 308 |
+
print(" SMOTE atlandΔ±")
|
| 309 |
|
|
|
|
| 310 |
best_pipe = max(pipelines, key=pipelines.get)
|
| 311 |
print(f"\n β
En iyi pipeline: {best_pipe} (F1={pipelines[best_pipe]:.4f})")
|
| 312 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 313 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 314 |
# ADIM 4: TOPOLOJΔ°K METRΔ°KLER
|
| 315 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 338 |
ill_rate = len(ts_ill) / max(len(ts_lab), 1)
|
| 339 |
topo[t] = {'n_nodes': n, 'n_edges': e, 'density': density, 'cc_ratio': cc_ratio,
|
| 340 |
'n_components': comps, 'avg_degree': avg_deg, 'illicit_rate': ill_rate}
|
| 341 |
+
print(f" TS {t:2d}: nodes={n:5d} edges={e:5d} density={density:.5f} illicit={ill_rate:.3f}")
|
| 342 |
|
| 343 |
topo_df = pd.DataFrame(topo).T
|
| 344 |
topo_df.to_csv('output/results/topological_metrics.csv')
|
| 345 |
|
| 346 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 347 |
+
# ADIM 5: KIRILMA NOKTASI TESPΔ°TΔ°
|
| 348 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 349 |
print("\n" + "=" * 70)
|
| 350 |
print("ADIM 5: KIRILMA NOKTASI TESPΔ°TΔ°")
|
| 351 |
print("=" * 70)
|
| 352 |
|
| 353 |
+
# SaΔlΔ±k skoru: normalize et
|
| 354 |
for col in ['density', 'cc_ratio', 'n_components']:
|
| 355 |
mi, ma = topo_df[col].min(), topo_df[col].max()
|
| 356 |
topo_df[f'{col}_n'] = (topo_df[col] - mi) / (ma - mi + 1e-8)
|
| 357 |
health = (topo_df['density_n'] + topo_df['cc_ratio_n'] + (1 - topo_df['n_components_n'])) / 3
|
| 358 |
+
bp_final = health.diff().idxmin()
|
| 359 |
+
print(f" SaΔlΔ±k skoru kΔ±rΔ±lma noktasΔ±: TS {bp_final}")
|
| 360 |
|
| 361 |
+
# Tepe-dΓΌΕΓΌΕ analizi (bilgi amaΓ§lΔ±)
|
| 362 |
df_t = topo_df.copy()
|
| 363 |
for col in ['n_edges', 'density', 'avg_degree']:
|
| 364 |
mi, ma = df_t[col].min(), df_t[col].max()
|
|
|
|
| 366 |
crisis = (df_t['n_edges_norm'] * 0.4 + df_t['density_norm'] * 0.3 + df_t['avg_degree_norm'] * 0.3).values
|
| 367 |
crisis_smooth = uniform_filter1d(crisis, size=5, mode='nearest')
|
| 368 |
velocity = np.gradient(crisis_smooth)
|
|
|
|
| 369 |
peaks = []
|
| 370 |
for i in range(1, len(velocity) - 1):
|
| 371 |
if velocity[i-1] > 0 and velocity[i+1] < 0:
|
| 372 |
peaks.append({'timestep': all_ts[i], 'index': i, 'drop': abs(velocity[i+1])})
|
| 373 |
peaks = sorted(peaks, key=lambda x: x['drop'], reverse=True)
|
|
|
|
|
|
|
| 374 |
|
|
|
|
|
|
|
|
|
|
| 375 |
print(f" β
Final kΔ±rΔ±lma noktasΔ±: TS {bp_final}")
|
| 376 |
|
| 377 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 378 |
+
# ADIM 6: GRAF VERΔ°SΔ° HAZIRLA (GraphSAGE iΓ§in)
|
| 379 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 380 |
+
print("\n" + "=" * 70)
|
| 381 |
+
print("ADIM 6: GRAPHSAGE VERΔ° HAZIRLAMA")
|
| 382 |
+
print("=" * 70)
|
| 383 |
+
|
| 384 |
+
# Etiketli dΓΌΔΓΌmler arasΔ± kenarlarΔ± filtrele
|
| 385 |
+
labeled_set = set(labeled_indices.tolist())
|
| 386 |
+
labeled_edges = [(full_to_labeled[s], full_to_labeled[d])
|
| 387 |
+
for s, d in zip(src, dst)
|
| 388 |
+
if s in labeled_set and d in labeled_set
|
| 389 |
+
and s in full_to_labeled and d in full_to_labeled]
|
| 390 |
+
|
| 391 |
+
if labeled_edges:
|
| 392 |
+
edge_src = [e[0] for e in labeled_edges]
|
| 393 |
+
edge_dst = [e[1] for e in labeled_edges]
|
| 394 |
+
edge_index = torch.tensor([edge_src + edge_dst, edge_dst + edge_src], dtype=torch.long) # undirected
|
| 395 |
+
else:
|
| 396 |
+
edge_index = torch.zeros((2, 0), dtype=torch.long)
|
| 397 |
+
|
| 398 |
+
print(f" Etiketli dΓΌΔΓΌmler arasΔ± kenar: {len(labeled_edges)} ({edge_index.shape[1]} undirected)")
|
| 399 |
+
|
| 400 |
+
# Normalize features for GNN
|
| 401 |
+
scaler_gnn = RobustScaler()
|
| 402 |
+
X_gnn = scaler_gnn.fit_transform(X_final)
|
| 403 |
+
|
| 404 |
+
x_tensor = torch.tensor(X_gnn, dtype=torch.float32)
|
| 405 |
+
y_tensor = torch.tensor(y, dtype=torch.long)
|
| 406 |
+
|
| 407 |
+
graph_data = Data(x=x_tensor, edge_index=edge_index, y=y_tensor)
|
| 408 |
+
|
| 409 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 410 |
+
# ADIM 7: BΓLME STRATEJΔ°LERΔ° VE DENEYLER
|
| 411 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 412 |
print("\n" + "=" * 70)
|
| 413 |
+
print("ADIM 7: 5 STRATEJΔ° Γ 4 MODEL = 20 DENEY")
|
| 414 |
print("=" * 70)
|
| 415 |
|
| 416 |
def make_masks(train_ts_set, test_ts_set):
|
|
|
|
| 438 |
),
|
| 439 |
}
|
| 440 |
|
| 441 |
+
def train_eval_tabular(X_tr, y_tr, X_te, y_te, model_type):
|
| 442 |
+
"""Tabular model eΔit β threshold optimizasyonu ile"""
|
| 443 |
sc = RobustScaler()
|
| 444 |
+
Xtr = sc.fit_transform(X_tr)
|
| 445 |
+
Xte = sc.transform(X_te)
|
| 446 |
+
|
| 447 |
+
# SMOTE uygula (eΔitim setine)
|
| 448 |
+
try:
|
| 449 |
+
from imblearn.over_sampling import SMOTE
|
| 450 |
+
smote = SMOTE(random_state=42)
|
| 451 |
+
Xtr, y_tr = smote.fit_resample(Xtr, y_tr)
|
| 452 |
+
except:
|
| 453 |
+
pass
|
| 454 |
+
|
| 455 |
if model_type == 'lgbm':
|
| 456 |
+
m = lgb.LGBMClassifier(
|
| 457 |
+
n_estimators=500, max_depth=12, learning_rate=0.05,
|
| 458 |
+
num_leaves=63, min_child_samples=20, subsample=0.8,
|
| 459 |
+
colsample_bytree=0.8, scale_pos_weight=10,
|
| 460 |
+
random_state=42, n_jobs=-1, verbose=-1
|
| 461 |
+
)
|
| 462 |
elif model_type == 'rf':
|
| 463 |
+
m = RandomForestClassifier(
|
| 464 |
+
n_estimators=500, max_depth=20, min_samples_leaf=5,
|
| 465 |
+
class_weight='balanced_subsample', max_features='sqrt',
|
| 466 |
+
random_state=42, n_jobs=-1
|
| 467 |
+
)
|
| 468 |
elif model_type == 'xgb':
|
| 469 |
+
m = xgb.XGBClassifier(
|
| 470 |
+
n_estimators=500, max_depth=10, learning_rate=0.05,
|
| 471 |
+
subsample=0.8, colsample_bytree=0.8, scale_pos_weight=10,
|
| 472 |
+
min_child_weight=5, gamma=0.1,
|
| 473 |
+
random_state=42, n_jobs=-1, verbosity=0
|
| 474 |
+
)
|
| 475 |
+
|
| 476 |
m.fit(Xtr, y_tr)
|
|
|
|
| 477 |
proba = m.predict_proba(Xte)[:, 1]
|
| 478 |
+
|
| 479 |
+
# Threshold optimizasyonu
|
| 480 |
+
best_f1, best_th = 0, 0.5
|
| 481 |
+
for th in np.arange(0.1, 0.9, 0.05):
|
| 482 |
+
pred_th = (proba >= th).astype(int)
|
| 483 |
+
f1_th = f1_score(y_te, pred_th, zero_division=0)
|
| 484 |
+
if f1_th > best_f1:
|
| 485 |
+
best_f1, best_th = f1_th, th
|
| 486 |
+
|
| 487 |
+
pred = (proba >= best_th).astype(int)
|
| 488 |
return {
|
| 489 |
'f1': round(f1_score(y_te, pred, zero_division=0), 4),
|
| 490 |
'precision': round(precision_score(y_te, pred, zero_division=0), 4),
|
| 491 |
'recall': round(recall_score(y_te, pred, zero_division=0), 4),
|
| 492 |
'auroc': round(roc_auc_score(y_te, proba) if len(np.unique(y_te)) > 1 else 0.5, 4),
|
| 493 |
+
'threshold': round(best_th, 2),
|
| 494 |
}
|
| 495 |
|
| 496 |
model_types = [('lgbm', 'LightGBM'), ('rf', 'Random Forest'), ('xgb', 'XGBoost')]
|
|
|
|
| 501 |
if tr_m.sum() < 50 or te_m.sum() < 10:
|
| 502 |
print(f" {strat_name}: yetersiz veri, atlanΔ±yor")
|
| 503 |
continue
|
| 504 |
+
|
| 505 |
print(f"\n {strat_name} (train={tr_m.sum()}, test={te_m.sum()}, test_ill={y[te_m].sum()}):")
|
| 506 |
+
|
| 507 |
+
# Tabular modeller
|
| 508 |
for mt, mn in model_types:
|
| 509 |
+
res = train_eval_tabular(X_final[tr_m], y[tr_m], X_final[te_m], y[te_m], mt)
|
| 510 |
res['strateji'] = strat_name
|
| 511 |
res['model'] = mn
|
| 512 |
all_results.append(res)
|
| 513 |
+
print(f" {mn:15s}: F1={res['f1']:.4f} P={res['precision']:.4f} R={res['recall']:.4f} AUROC={res['auroc']:.4f} th={res['threshold']}")
|
| 514 |
+
|
| 515 |
+
# GraphSAGE
|
| 516 |
+
print(f" {'GraphSAGE':15s}: eΔitiliyor...", end='', flush=True)
|
| 517 |
+
train_mask_t = torch.tensor(tr_m, dtype=torch.bool)
|
| 518 |
+
test_mask_t = torch.tensor(te_m, dtype=torch.bool)
|
| 519 |
+
ill_weight = float((y[tr_m] == 0).sum()) / max(float((y[tr_m] == 1).sum()), 1)
|
| 520 |
+
ill_weight = min(ill_weight, 15.0) # cap at 15
|
| 521 |
+
|
| 522 |
+
gs_res = train_graphsage(graph_data, train_mask_t, test_mask_t,
|
| 523 |
+
X_final.shape[1], epochs=200, lr=0.005, weight=ill_weight)
|
| 524 |
+
gs_res['strateji'] = strat_name
|
| 525 |
+
gs_res['model'] = 'GraphSAGE'
|
| 526 |
+
gs_res['threshold'] = 0.0 # threshold handled internally
|
| 527 |
+
all_results.append(gs_res)
|
| 528 |
+
print(f"\r {'GraphSAGE':15s}: F1={gs_res['f1']:.4f} P={gs_res['precision']:.4f} R={gs_res['recall']:.4f} AUROC={gs_res['auroc']:.4f}")
|
| 529 |
|
| 530 |
res_df = pd.DataFrame(all_results)
|
| 531 |
res_df.to_csv('output/results/all_experiment_results.csv', index=False)
|
| 532 |
|
| 533 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββοΏ½οΏ½οΏ½βββββββ
|
| 534 |
+
# ADIM 8: WALK-FORWARD VALΔ°DASYON
|
| 535 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 536 |
print("\n" + "=" * 70)
|
| 537 |
+
print("ADIM 8: WALK-FORWARD VALΔ°DASYON")
|
| 538 |
print("=" * 70)
|
| 539 |
|
| 540 |
wf_results = {}
|
|
|
|
| 545 |
te_m = (ts >= test_start) & (ts < test_start + 3)
|
| 546 |
if tr_m.sum() < 50 or te_m.sum() < 10 or len(np.unique(y[te_m])) < 2:
|
| 547 |
continue
|
| 548 |
+
res = train_eval_tabular(X_final[tr_m], y[tr_m], X_final[te_m], y[te_m], mt)
|
| 549 |
wf_f1s.append(res['f1'])
|
| 550 |
wf_results[mn] = round(np.mean(wf_f1s), 4)
|
| 551 |
print(f" {mn}: Walk-Forward F1 = {wf_results[mn]:.4f}")
|
| 552 |
|
| 553 |
+
# GraphSAGE walk-forward
|
| 554 |
+
wf_gs_f1s = []
|
| 555 |
+
for test_start in range(10, 49, 3):
|
| 556 |
+
tr_m_wf = ts < test_start
|
| 557 |
+
te_m_wf = (ts >= test_start) & (ts < test_start + 3)
|
| 558 |
+
if tr_m_wf.sum() < 50 or te_m_wf.sum() < 10 or len(np.unique(y[te_m_wf])) < 2:
|
| 559 |
+
continue
|
| 560 |
+
train_mask_wf = torch.tensor(tr_m_wf, dtype=torch.bool)
|
| 561 |
+
test_mask_wf = torch.tensor(te_m_wf, dtype=torch.bool)
|
| 562 |
+
ill_w = float((y[tr_m_wf]==0).sum()) / max(float((y[tr_m_wf]==1).sum()), 1)
|
| 563 |
+
ill_w = min(ill_w, 15.0)
|
| 564 |
+
gs_wf = train_graphsage(graph_data, train_mask_wf, test_mask_wf, X_final.shape[1], epochs=100, weight=ill_w)
|
| 565 |
+
wf_gs_f1s.append(gs_wf['f1'])
|
| 566 |
+
wf_results['GraphSAGE'] = round(np.mean(wf_gs_f1s), 4) if wf_gs_f1s else 0
|
| 567 |
+
print(f" GraphSAGE: Walk-Forward F1 = {wf_results['GraphSAGE']:.4f}")
|
| 568 |
+
|
| 569 |
# DΓΌrΓΌstlΓΌk tablosu
|
| 570 |
print("\n DΓΌrΓΌstlΓΌk KarΕΔ±laΕtΔ±rmasΔ±:")
|
| 571 |
+
honesty_data = []
|
| 572 |
for strat_name in strategies:
|
| 573 |
sapma_list = []
|
| 574 |
for mn in wf_results:
|
| 575 |
row = res_df[(res_df['strateji'] == strat_name) & (res_df['model'] == mn)]
|
| 576 |
+
if len(row) > 0 and mn in wf_results and wf_results[mn] > 0:
|
| 577 |
sapma = ((row['f1'].values[0] - wf_results[mn]) / wf_results[mn]) * 100
|
| 578 |
sapma_list.append(sapma)
|
| 579 |
if sapma_list:
|
| 580 |
avg_sapma = np.mean(sapma_list)
|
| 581 |
durum = "β
DΓRΓST" if abs(avg_sapma) < 10 else ("π΄ ΕΔ°ΕME" if avg_sapma > 10 else "β οΈ PESΔ°MΔ°ST")
|
| 582 |
+
honesty_data.append({'strateji': strat_name, 'sapma': round(avg_sapma, 1), 'durum': durum})
|
| 583 |
print(f" {strat_name:25s}: ort. sapma = {avg_sapma:+.1f}% {durum}")
|
| 584 |
|
| 585 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 586 |
+
# ADIM 9: FΔ°GΓRLER
|
| 587 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 588 |
print("\n" + "=" * 70)
|
| 589 |
+
print("ADIM 9: FΔ°GΓRLER")
|
| 590 |
print("=" * 70)
|
| 591 |
|
| 592 |
sns.set_theme(style='whitegrid', font_scale=1.1)
|
| 593 |
|
| 594 |
+
# Fig 1: KΔ±rΔ±lma noktasΔ±
|
| 595 |
fig, axes = plt.subplots(3, 1, figsize=(18, 14), gridspec_kw={'height_ratios': [2, 1, 1]})
|
| 596 |
axes[0].plot(all_ts, health.values, 'o-', color='steelblue', linewidth=2, markersize=5)
|
| 597 |
axes[0].axvline(x=bp_final, color='red', linewidth=3, linestyle='--')
|
|
|
|
| 613 |
color='red', s=200, zorder=5, edgecolors='black')
|
| 614 |
axes[2].set_ylabel('Kriz Sinyali', fontsize=12)
|
| 615 |
axes[2].set_xlabel('Timestep', fontsize=12)
|
|
|
|
| 616 |
plt.tight_layout()
|
| 617 |
plt.savefig('output/figures/fig1_breakpoint.png', dpi=150, bbox_inches='tight')
|
| 618 |
plt.close()
|
| 619 |
print(" β fig1_breakpoint.png")
|
| 620 |
|
| 621 |
+
# Fig 2: F1 karΕΔ±laΕtΔ±rma (4 model dahil)
|
| 622 |
+
fig, ax = plt.subplots(figsize=(18, 8))
|
| 623 |
strat_names = list(strategies.keys())
|
| 624 |
+
model_names = [mn for _, mn in model_types] + ['GraphSAGE']
|
| 625 |
colors5 = sns.color_palette('Set2', len(strat_names))
|
| 626 |
x = np.arange(len(model_names)); width = 0.15
|
| 627 |
|
|
|
|
| 639 |
ax.axhline(y=wf_avg, color='green', linewidth=2, linestyle='--', label=f'Walk-Forward ({wf_avg:.3f})')
|
| 640 |
ax.set_xticks(x + width*2); ax.set_xticklabels(model_names, fontsize=12)
|
| 641 |
ax.set_ylabel('Illicit F1', fontsize=13)
|
| 642 |
+
ax.set_title('BΓΆlme Stratejileri Γ Model KarΕΔ±laΕtΔ±rmasΔ±', fontsize=14, fontweight='bold')
|
| 643 |
+
ax.legend(fontsize=9, loc='upper right'); ax.set_ylim(0, 1.1)
|
| 644 |
plt.tight_layout()
|
| 645 |
plt.savefig('output/figures/fig2_f1_comparison.png', dpi=150, bbox_inches='tight')
|
| 646 |
plt.close()
|
| 647 |
print(" β fig2_f1_comparison.png")
|
| 648 |
|
| 649 |
+
# Fig 3: Pipeline karΕΔ±laΕtΔ±rma
|
| 650 |
fig, ax = plt.subplots(figsize=(10, 6))
|
| 651 |
p_names = list(pipelines.keys())
|
| 652 |
p_vals = list(pipelines.values())
|
|
|
|
| 661 |
plt.close()
|
| 662 |
print(" β fig3_pipeline_comparison.png")
|
| 663 |
|
| 664 |
+
# Fig 4: DΓΌrΓΌstlΓΌk Δ±sΔ± haritasΔ±
|
| 665 |
+
fig, ax = plt.subplots(figsize=(16, 7))
|
| 666 |
sapma_data = []
|
| 667 |
for strat_name in strat_names:
|
| 668 |
for mn in model_names:
|
| 669 |
row = res_df[(res_df['strateji'] == strat_name) & (res_df['model'] == mn)]
|
| 670 |
+
if len(row) > 0 and mn in wf_results and wf_results[mn] > 0:
|
| 671 |
sapma = ((row['f1'].values[0] - wf_results[mn]) / wf_results[mn]) * 100
|
| 672 |
sapma_data.append({'strateji': strat_name, 'model': mn, 'sapma': round(sapma, 1)})
|
| 673 |
if sapma_data:
|
| 674 |
sapma_df = pd.DataFrame(sapma_data)
|
| 675 |
pivot = sapma_df.pivot_table(values='sapma', index='model', columns='strateji')
|
| 676 |
+
sns.heatmap(pivot, annot=True, fmt='.1f', cmap='RdYlGn_r', center=0, ax=ax,
|
| 677 |
+
linewidths=0.5, cbar_kws={'label': 'Walk-Forward Sapma (%)'})
|
| 678 |
+
ax.set_title('Walk-Forward DΓΌrΓΌstlΓΌk SapmasΔ± (%) β 4 Model Γ 5 Strateji', fontsize=14, fontweight='bold')
|
| 679 |
plt.tight_layout()
|
| 680 |
plt.savefig('output/figures/fig4_honesty.png', dpi=150, bbox_inches='tight')
|
| 681 |
plt.close()
|
| 682 |
print(" β fig4_honesty.png")
|
| 683 |
|
| 684 |
+
# Fig 5: Performans ΕiΕmesi haritasΔ± (inflation)
|
| 685 |
+
fig, ax = plt.subplots(figsize=(14, 6))
|
| 686 |
+
inf_data = []
|
| 687 |
+
for mn in model_names:
|
| 688 |
+
row_rand = res_df[(res_df['model'] == mn) & (res_df['strateji'] == 'Rastgele')]
|
| 689 |
+
row_chr = res_df[(res_df['model'] == mn) & (res_df['strateji'] == 'Kronolojik')]
|
| 690 |
+
row_topo = res_df[(res_df['model'] == mn) & (res_df['strateji'] == 'Topolojik KΔ±rΔ±lma')]
|
| 691 |
+
if len(row_rand) > 0 and len(row_chr) > 0:
|
| 692 |
+
rand_f1 = row_rand['f1'].values[0]
|
| 693 |
+
chr_f1 = row_chr['f1'].values[0]
|
| 694 |
+
topo_f1 = row_topo['f1'].values[0] if len(row_topo) > 0 else 0
|
| 695 |
+
inf_data.append({
|
| 696 |
+
'model': mn,
|
| 697 |
+
'Rastgele vs Kronolojik': round((rand_f1 - chr_f1) / chr_f1 * 100, 1),
|
| 698 |
+
'Rastgele vs Topolojik': round((rand_f1 - topo_f1) / topo_f1 * 100, 1) if topo_f1 > 0 else 0,
|
| 699 |
+
})
|
| 700 |
+
if inf_data:
|
| 701 |
+
inf_df = pd.DataFrame(inf_data).set_index('model')
|
| 702 |
+
sns.heatmap(inf_df, annot=True, fmt='.1f', cmap='Reds', ax=ax, linewidths=0.5,
|
| 703 |
+
cbar_kws={'label': 'ΕiΕme OranΔ± (%)'})
|
| 704 |
+
ax.set_title('Rastgele BΓΆlme Performans ΕiΕmesi (%)', fontsize=14, fontweight='bold')
|
| 705 |
+
plt.tight_layout()
|
| 706 |
+
plt.savefig('output/figures/fig5_inflation.png', dpi=150, bbox_inches='tight')
|
| 707 |
+
plt.close()
|
| 708 |
+
print(" β fig5_inflation.png")
|
| 709 |
+
|
| 710 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 711 |
+
# ADIM 10: ΓZET RAPOR
|
| 712 |
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 713 |
elapsed = time.time() - start_time
|
| 714 |
|
| 715 |
summary = {
|
| 716 |
+
'veri': {'toplam': N, 'etiketli': len(y), 'illicit': int(y.sum()),
|
| 717 |
+
'ozellik': int(X_final.shape[1]), 'kenar': len(valid_edges)},
|
| 718 |
+
'temizleme': {'nan': int(nan_count), 'inf': int(inf_count),
|
| 719 |
+
'outlier_pct': round(outlier_mask.sum()/(X.shape[0]*X.shape[1])*100, 2),
|
| 720 |
'cikarilan_ozellik': int((~var_mask).sum()), 'en_iyi_pipeline': best_pipe},
|
| 721 |
+
'kirilma': {'saglik_yontemi': int(bp_final), 'final': int(bp_final)},
|
| 722 |
'walk_forward': wf_results,
|
| 723 |
'sonuclar': res_df.to_dict(orient='records'),
|
| 724 |
'pipeline_karsilastirma': {k: round(v, 4) for k, v in pipelines.items()},
|
| 725 |
+
'durustukluk': honesty_data,
|
| 726 |
'sure_dakika': round(elapsed / 60, 1),
|
| 727 |
}
|
| 728 |
|
|
|
|
| 732 |
print("\n" + "=" * 70)
|
| 733 |
print(f"TAMAMLANDI! (SΓΌre: {elapsed/60:.1f} dakika)")
|
| 734 |
print("=" * 70)
|
| 735 |
+
|
| 736 |
+
# Final sonuΓ§ tablosu
|
| 737 |
+
print(f"\n βββ SONUΓ TABLOSU (Illicit F1) βββ")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 738 |
pivot_f1 = res_df.pivot_table(values='f1', index='model', columns='strateji')
|
| 739 |
print(pivot_f1.to_string())
|
| 740 |
+
|
| 741 |
+
print(f"\n βββ WALK-FORWARD REFERANS βββ")
|
| 742 |
+
for mn, f1 in wf_results.items():
|
| 743 |
+
print(f" {mn}: {f1:.4f}")
|
| 744 |
+
|
| 745 |
+
print(f"\n ΓΔ±ktΔ±lar: output/results/ ve output/figures/")
|
| 746 |
|
| 747 |
|
| 748 |
if __name__ == '__main__':
|