# ╔══════════════════════════════════════════════════════════════════════════════╗ # ║ NOTEBOOK MANAGER — Impact Vacances Scolaires sur Prédictions XGB ║ # ║ Version simple : avant/après correction à destination du manager ║ # ╚══════════════════════════════════════════════════════════════════════════════╝ # ════════════════════════════════════════════════════════════════════════════════ # CELLULE 1 — Imports # ════════════════════════════════════════════════════════════════════════════════ import pandas as pd import numpy as np from datetime import date import warnings warnings.filterwarnings("ignore") # ════════════════════════════════════════════════════════════════════════════════ # CELLULE 2 — Calendrier vacances scolaires + mapping DR→zone # ════════════════════════════════════════════════════════════════════════════════ VACANCES = { "2023-2024": { "A": [(date(2023,10,21),date(2023,11,5)), (date(2023,12,23),date(2024,1,7)), (date(2024,2,17),date(2024,3,3)), (date(2024,4,13),date(2024,4,28)), (date(2024,7,6),date(2024,9,1))], "B": [(date(2023,10,21),date(2023,11,5)), (date(2023,12,23),date(2024,1,7)), (date(2024,2,24),date(2024,3,10)), (date(2024,4,20),date(2024,5,5)), (date(2024,7,6),date(2024,9,1))], "C": [(date(2023,10,21),date(2023,11,5)), (date(2023,12,23),date(2024,1,7)), (date(2024,2,10),date(2024,2,25)), (date(2024,4,6),date(2024,4,21)), (date(2024,7,6),date(2024,9,1))], }, "2024-2025": { "A": [(date(2024,10,19),date(2024,11,3)), (date(2024,12,21),date(2025,1,5)), (date(2025,2,8),date(2025,2,23)), (date(2025,4,5),date(2025,4,20)), (date(2025,7,5),date(2025,8,31))], "B": [(date(2024,10,19),date(2024,11,3)), (date(2024,12,21),date(2025,1,5)), (date(2025,2,22),date(2025,3,9)), (date(2025,4,19),date(2025,5,4)), (date(2025,7,5),date(2025,8,31))], "C": [(date(2024,10,19),date(2024,11,3)), (date(2024,12,21),date(2025,1,5)), (date(2025,2,15),date(2025,3,2)), (date(2025,4,12),date(2025,4,27)), (date(2025,7,5),date(2025,8,31))], }, "2025-2026": { "A": [(date(2025,10,18),date(2025,11,2)), (date(2025,12,20),date(2026,1,4)), (date(2026,2,14),date(2026,3,1)), (date(2026,4,4),date(2026,4,19)), (date(2026,7,4),date(2026,8,31))], "B": [(date(2025,10,18),date(2025,11,2)), (date(2025,12,20),date(2026,1,4)), (date(2026,2,21),date(2026,3,8)), (date(2026,4,11),date(2026,4,26)), (date(2026,7,4),date(2026,8,31))], "C": [(date(2025,10,18),date(2025,11,2)), (date(2025,12,20),date(2026,1,4)), (date(2026,2,7),date(2026,2,22)), (date(2026,3,28),date(2026,4,12)), (date(2026,7,4),date(2026,8,31))], }, "2026-2027": { "A": [(date(2026,10,17),date(2026,11,1)), (date(2026,12,19),date(2027,1,3)), (date(2027,2,13),date(2027,2,28)), (date(2027,4,3),date(2027,4,18)), (date(2027,7,3),date(2027,8,31))], "B": [(date(2026,10,17),date(2026,11,1)), (date(2026,12,19),date(2027,1,3)), (date(2027,2,20),date(2027,3,7)), (date(2027,4,10),date(2027,4,25)), (date(2027,7,3),date(2027,8,31))], "C": [(date(2026,10,17),date(2026,11,1)), (date(2026,12,19),date(2027,1,3)), (date(2027,2,6),date(2027,2,21)), (date(2027,3,27),date(2027,4,11)), (date(2027,7,3),date(2027,8,31))], }, } DR_TO_ZONE = { "SIR": "A", "AUV": "A", "ALP": "A", "PCH": "A", "LIM": "A", "AQN": "A", "PYL": "A", "BRG": "A", "AFC": "A", "PIC": "B", "NPC": "B", "PAS": "B", "CAZ": "B", "CAR": "B", "NOR": "B", "BRE": "B", "CEN": "B", "PDL": "B", "LOR": "C", "MPS": "C", "LRO": "C", "NMP": "C", "PAR": "C", "IFE": "C", "IFO": "C", } def get_zone(dr): return DR_TO_ZONE.get(dr, "C") def get_annee_scolaire(d): return f"{d.year}-{d.year+1}" if d.month >= 9 else f"{d.year-1}-{d.year}" def is_vacances(d, zone, vac): for debut, fin in vac.get(zone, []): if debut <= d <= fin: return True return False def get_periode_vacances(d, vac): for zone in ["A","B","C"]: for debut, fin in vac.get(zone, []): if debut <= d <= fin: m = d.month if m in [10,11]: return "Toussaint" elif m in [12,1]: return "Noel" elif m in [2,3]: return "Hiver" elif m in [4,5]: return "Printemps" elif m in [7,8]: return "Ete" return "Hors_vacances" def add_vacances(df): df = df.copy() df["Date"] = pd.to_datetime(df["Date"]).dt.tz_localize(None) df["zone_vacances"] = df["DR"].apply(get_zone) df["annee_scolaire"] = df["Date"].apply(lambda d: get_annee_scolaire(d.date())) def _vac(row): return is_vacances(row["Date"].date(), row["zone_vacances"], VACANCES.get(row["annee_scolaire"], {})) def _per(row): return get_periode_vacances(row["Date"].date(), VACANCES.get(row["annee_scolaire"], {})) df["is_vacances_zone"] = df.apply(_vac, axis=1) df["periode_vacances"] = df.apply(_per, axis=1) return df # ════════════════════════════════════════════════════════════════════════════════ # CELLULE 3 — Rapport simple pour le manager # ════════════════════════════════════════════════════════════════════════════════ def rapport_manager(df): """ Rapport simple : montre la baisse des prédictions après post-processing. """ df = add_vacances(df) dfp = df[(df["count"] > 0) & (df["prediction_XGB"].notna())].copy() if len(dfp) == 0: print("❌ Aucune donnée passée avec prédiction valide.") return df m_v = dfp["is_vacances_zone"] if m_v.sum() == 0: print("❌ Aucun jour de vacances trouvé.") return df facteur_global = dfp.loc[m_v, "count"].mean() / max(dfp.loc[m_v, "prediction_XGB"].mean(), 1) df["prediction_XGB_corrige"] = df["prediction_XGB"].astype(float) m_v_all = df["is_vacances_zone"] & df["prediction_XGB"].notna() df.loc[m_v_all, "prediction_XGB_corrige"] = df.loc[m_v_all, "prediction_XGB"] * facteur_global print("=" * 65) print("📊 IMPACT POST-PROCESSING VACANCES SCOLAIRES") print("=" * 65) print(f"\n📅 Données analysées : {len(dfp):,} jours passés") print(f"🏖️ Jours en vacances : {m_v.sum():,}") print(f"📚 Jours hors vacances : {(~m_v).sum():,}") print(f"\n{'─'*65}") print("📉 VOLUMES MOYENS PRÉDITS — AVANT vs APRÈS CORRECTION") print(f"{'─'*65}") res = [] for label, mask in [ ("Vacances scolaires", dfp["is_vacances_zone"]), ("Hors vacances", ~dfp["is_vacances_zone"]), ]: if mask.sum() < 2: continue sub = dfp[mask] avant = sub["prediction_XGB"].mean() apres = sub["prediction_XGB_corrige"].mean() baisse = ((apres - avant) / max(avant, 1)) * 100 res.append({ "Période": label, "n jours": int(mask.sum()), "Avant correction": round(avant, 1), "Après correction": round(apres, 1), "Différence": f"{baisse:+.1f}%" }) print(pd.DataFrame(res).to_string(index=False)) if m_v.sum() > 0 and (~m_v).sum() > 0: baisse_reelle = ((dfp.loc[m_v, "count"].mean() - dfp.loc[~m_v, "count"].mean()) / max(dfp.loc[~m_v, "count"].mean(), 1)) * 100 print(f"\n🔴 Baisse RÉELLE des volumes en vacances : {baisse_reelle:.1f}%") print(f"💡 Correction appliquée : facteur ×{facteur_global:.4f} (baisse de {(1-facteur_global)*100:.1f}%)") def mae(y_true, y_pred): return np.mean(np.abs(np.asarray(y_true) - np.asarray(y_pred))) print(f"\n{'─'*65}") print("📈 PRÉCISION (MAE) — AVANT vs APRÈS") print(f"{'─'*65}") res_mae = [] for label, mask in [ ("Toutes périodes", pd.Series([True]*len(dfp), index=dfp.index)), ("Vacances scolaires", dfp["is_vacances_zone"]), ("Hors vacances", ~dfp["is_vacances_zone"]), ]: if mask.sum() < 2: continue yt = dfp.loc[mask, "count"].values mae_avant = mae(yt, dfp.loc[mask, "prediction_XGB"].values) mae_apres = mae(yt, dfp.loc[mask, "prediction_XGB_corrige"].values) gain = ((mae_avant - mae_apres) / max(mae_avant, 1)) * 100 res_mae.append({ "Période": label, "MAE avant": round(mae_avant, 2), "MAE après": round(mae_apres, 2), "Gain": f"{gain:+.1f}%" }) print(pd.DataFrame(res_mae).to_string(index=False)) print(f"\n{'='*65}") print("✅ RÉSUMÉ") print(f"{'='*65}") print(f" • Facteur correcteur : ×{facteur_global:.4f}") print(f" • Appliqué sur : {m_v_all.sum():,} jours en vacances (passés + futurs)") print(f" • Hors vacances : inchangé") print(f" • Impact : les prédictions en vacances sont corrigées à la baisse") print(f" pour refléter la baisse réelle observée sur le passé.") return df # ════════════════════════════════════════════════════════════════════════════════ # CELLULE 4 — Exécution # ════════════════════════════════════════════════════════════════════════════════ # df = rapport_manager(df)