Spaces:
Sleeping
Sleeping
| import os | |
| import sys | |
| import argparse | |
| import subprocess | |
| import logging | |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(name)s | %(levelname)s | %(message)s") | |
| logger = logging.getLogger("run_pipeline") | |
| def execute_stage(stage_num): | |
| logger.info(f"========== TRIGGERING STAGE {stage_num} ==========") | |
| if stage_num == 1: | |
| script = "src/stage1_ingestion.py" | |
| elif stage_num == 2: | |
| script = "src/stage2_preprocessing.py" | |
| elif stage_num == 3: | |
| script = "src/stage3_training.py" | |
| else: | |
| logger.error(f"Unknown Stage: {stage_num}") | |
| return | |
| if not os.path.exists(script): | |
| logger.error(f"Cannot find script: {script}") | |
| sys.exit(1) | |
| res = subprocess.run([sys.executable, script]) | |
| if res.returncode != 0: | |
| logger.error(f"Stage {stage_num} failed!") | |
| sys.exit(1) | |
| logger.info(f"========== STAGE {stage_num} FINISHED ==========\n") | |
| def execute_evaluation(): | |
| logger.info("========== TRIGGERING FINAL HOLD-OUT BENCHMARK ==========") | |
| import pandas as pd | |
| import numpy as np | |
| from tqdm import tqdm | |
| from sklearn.metrics import classification_report, accuracy_score, confusion_matrix | |
| # Needs to be imported late so it doesn't fail if dependencies aren't setup | |
| from src.stage4_inference import predict_article | |
| df_path = "data/splits/df_holdout.csv" | |
| if not os.path.exists(df_path): | |
| logger.error(f"Holdout file missing at {df_path}. Run Stages 1-3 first.") | |
| sys.exit(1) | |
| df = pd.read_csv(df_path) | |
| logger.info(f"Loaded {len(df)} Stratified Holdout records.") | |
| y_true = df["binary_label"].values | |
| y_pred = [] | |
| logger.info("Executing isolated pipeline inference across holdout targets (RAG safely bypassed)...") | |
| logger.info("NOTE: Since this evaluates the entire heavy 4-model ensemble locally, it may take several minutes.") | |
| for i, row in tqdm(df.iterrows(), total=len(df), desc="Benchmarking Evaluator"): | |
| # We manually map the inference parameters directly into the ultimate test pipeline | |
| res = predict_article( | |
| title=row.get("title", ""), | |
| text=row.get("text", ""), | |
| source_domain=row.get("source_domain", ""), | |
| published_date=row.get("published_date", ""), | |
| mode="full", | |
| trigger_rag=False | |
| ) | |
| # New 4-tier verdict mapping: | |
| # TRUE / UNCERTAIN → 1 (real news) | |
| # LIKELY FALSE / FALSE → 0 (fake news) | |
| v = res["verdict"] | |
| pred_label = 1 if v in ("TRUE", "UNCERTAIN") else 0 | |
| y_pred.append(pred_label) | |
| y_pred = np.array(y_pred) | |
| acc = accuracy_score(y_true, y_pred) | |
| logger.info(f"\n================ BENCHMARK RESULTS ================") | |
| logger.info(f"Final Architecture Accuracy: {acc * 100:.2f}%") | |
| logger.info("\n" + classification_report(y_true, y_pred, target_names=["Fake News (0)", "True News (1)"])) | |
| logger.info(f"Confusion Matrix:\n{confusion_matrix(y_true, y_pred)}") | |
| logger.info("===================================================\n") | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser(description="Fake News Detection System Pipeline") | |
| parser.add_argument("--stage", nargs="+", type=int, choices=[1, 2, 3], help="Specify stages to run (e.g. --stage 1 2 3)") | |
| parser.add_argument("--eval", action="store_true", help="Evaluate the architecture natively on the stratified holdout benchmark") | |
| args = parser.parse_args() | |
| if args.stage: | |
| for s in args.stage: | |
| execute_stage(s) | |
| if args.eval: | |
| execute_evaluation() | |
| if not args.stage and not args.eval: | |
| parser.print_help() | |