madm96
/

correction-vacances-scolaires

ml-intern

Model card Files Files and versions

xet

Community

madm96 commited on 3 days ago

Commit

2ecce3e

verified ·

1 Parent(s): 973f376

Upload notebook_manager.py

Browse files

Files changed (1) hide show

notebook_manager.py +93 -385

notebook_manager.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # ╔══════════════════════════════════════════════════════════════════════════════╗
-# ║  NOTEBOOK : Analyse Vacances - VERSION MANAGER                               ║
-# ║  IPython.display, graphes, tableaux markdown                               ║
 # ╚══════════════════════════════════════════════════════════════════════════════╝
 # ════════════════════════════════════════════════════════════════════════════════
@@ -12,25 +12,9 @@ from datetime import date
 import warnings
 warnings.filterwarnings("ignore")
-try:
-    from IPython.display import display, Markdown
-    IPYTHON_OK = True
-except ImportError:
-    IPYTHON_OK = False
-    def display(x): print(x)
-    def Markdown(x): return x
-try:
-    import matplotlib.pyplot as plt
-    MATPLOTLIB_OK = True
-except ImportError:
-    MATPLOTLIB_OK = False
-    print("⚠️ matplotlib non installé → pas de graphes. pip install matplotlib")
 # ════════════════════════════════════════════════════════════════════════════════
-# CELLULE 2 — Calendrier vacances scolaires (2023-2027)
 # ════════════════════════════════════════════════════════════════════════════════
 VACANCES = {
     "2023-2024": {
         "A": [(date(2023,10,21),date(2023,11,5)), (date(2023,12,23),date(2024,1,7)),
@@ -67,38 +51,34 @@ VACANCES = {
     },
     "2026-2027": {
         "A": [(date(2026,10,17),date(2026,11,1)), (date(2026,12,19),date(2027,1,3)),
-              (date(2027,2,14),date(2027,3,1)), (date(2027,4,4),date(2027,4,19)),
               (date(2027,7,3),date(2027,8,31))],
         "B": [(date(2026,10,17),date(2026,11,1)), (date(2026,12,19),date(2027,1,3)),
-              (date(2027,2,21),date(2027,3,8)), (date(2027,4,11),date(2027,4,26)),
               (date(2027,7,3),date(2027,8,31))],
         "C": [(date(2026,10,17),date(2026,11,1)), (date(2026,12,19),date(2027,1,3)),
-              (date(2027,2,7),date(2027,2,22)), (date(2027,3,28),date(2027,4,12)),
               (date(2027,7,3),date(2027,8,31))],
     },
 }
 DR_TO_ZONE = {
-    "Besancon": "A", "Bordeaux": "A", "Clermont-Ferrand": "A",
-    "Dijon": "A", "Grenoble": "A", "Lyon": "A", "Limoges": "A", "Poitiers": "A",
-    "Aix-Marseille": "B", "Amiens": "B", "Caen": "B", "Lille": "B",
-    "Nantes": "B", "Nice": "B", "Orleans-Tours": "B", "Reims": "B",
-    "Rennes": "B", "Rouen": "B", "Strasbourg": "B",
-    "Creteil": "C", "Montpellier": "C", "Nancy-Metz": "C",
-    "Paris": "C", "Toulouse": "C", "Versailles": "C",
-    "AFC": "C",
 }
 def get_zone(dr): return DR_TO_ZONE.get(dr, "C")
 def is_vacances(d, zone, vac):
     for debut, fin in vac.get(zone, []):
         if debut <= d <= fin: return True
     return False
-def get_annee_scolaire(d):
-    return f"{d.year}-{d.year+1}" if d.month >= 9 else f"{d.year-1}-{d.year}"
 def get_periode_vacances(d, vac):
     for zone in ["A","B","C"]:
         for debut, fin in vac.get(zone, []):
@@ -117,387 +97,115 @@ def add_vacances(df):
     df["zone_vacances"] = df["DR"].apply(get_zone)
     df["annee_scolaire"] = df["Date"].apply(lambda d: get_annee_scolaire(d.date()))
     def _vac(row):
-        d = row["Date"].date()
-        return is_vacances(d, row["zone_vacances"], VACANCES.get(row["annee_scolaire"], {}))
     def _per(row):
-        d = row["Date"].date()
-        return get_periode_vacances(d, VACANCES.get(row["annee_scolaire"], {}))
     df["is_vacances_zone"] = df.apply(_vac, axis=1)
     df["periode_vacances"] = df.apply(_per, axis=1)
     return df
 # ════════════════════════════════════════════════════════════════════════════════
-# CELLULE 3 — Métriques (vocabulaire métier)
-# ════════════════════════════════════════════════════════════════════════════════
-def ecart_absolu(y_true, y_pred):
-    return np.mean(np.abs(np.asarray(y_true) - np.asarray(y_pred)))
-def ecart_relatif_pct(y_true, y_pred):
-    yt, yp = np.asarray(y_true), np.asarray(y_pred)
-    return np.mean(np.abs((yt - yp) / np.maximum(yt, 1))) * 100
-# ════════════════════════════════════════════════════════════════════════════════
-# CELLULE 4 — Analyse globale avec IPython.display
 # ════════════════════════════════════════════════════════════════════════════════
-def display_md(text):
-    """Affiche du texte/Markdown via IPython.display si dispo."""
-    if IPYTHON_OK:
-        display(Markdown(text))
-    else:
-        print(text)
-def display_df(df, title=None):
-    """Affiche un DataFrame formaté via IPython.display."""
-    if title:
-        display_md(f"### {title}")
-    if IPYTHON_OK:
-        # Style pour mise en évidence
-        styled = df.style.set_properties(**{'text-align': 'center'})
-        styled = styled.set_table_styles([
-            {'selector': 'th', 'props': [('text-align', 'center'), ('font-weight', 'bold'), ('background-color', '#f0f0f0')]}
-        ])
-        display(styled)
-    else:
-        print(df.to_string(index=False))
-def analyse_globale(df):
     dfp = df[(df["count"] > 0) & (df["prediction_XGB"].notna())].copy()
-    if len(dfp) == 0:
-        display_md("❌ **Aucune donnée passée avec prédiction valide.**")
-        return None
-    mask_v = dfp["is_vacances_zone"]
-    mask_h = ~mask_v
-    rows = []
-    for mask, label in [(mask_v, "Vacances scolaires"), (mask_h, "Hors vacances")]:
-        sub = dfp[mask]
-        if len(sub) == 0: continue
-        yt, yp = sub["count"].values, sub["prediction_XGB"].values
-        rows.append({
-            "Periode": label,
-            "Nb_jours": len(sub),
-            "Vol_reel": round(yt.mean(), 1),
-            "Vol_pred": round(yp.mean(), 1),
-            "Surprediction_%": round(((yp.mean() - yt.mean()) / max(yt.mean(), 1)) * 100, 1),
-            "Ecart_Absolu": round(ecart_absolu(yt, yp), 1),
-            "Ecart_Relatif_%": round(ecart_relatif_pct(yt, yp), 1),
-        })
-    df_res = pd.DataFrame(rows)
-    display_md("""
-## 📊 EFFET VACANCES SCOLAIRES — RÉSULTATS AVANT CORRECTION
-**Procédure :**
-1. Identification des jours de vacances scolaires par zone (A/B/C)
-2. Comparaison volume réel d'appels vs prédiction XGBoost
-3. Métriques :
-   - **Ecart_Absolu** = erreur moyenne en nombre d'appels/jour
-   - **Ecart_Relatif_%** = erreur moyenne relative (% du volume réel)
-4. Correction = ajustement multiplicatif uniquement sur jours de vacances
-    """)
-    display_df(df_res, "📋 TABLEAU RÉCAPITULATIF")
-    if len(df_res) >= 2:
-        row_v = df_res[df_res["Periode"] == "Vacances scolaires"].iloc[0]
-        row_h = df_res[df_res["Periode"] == "Hors vacances"].iloc[0]
-        baisse = ((row_v["Vol_reel"] - row_h["Vol_reel"]) / max(row_h["Vol_reel"], 1)) * 100
-        display_md(f"""
-## 📈 INTERPRÉTATION MÉTIER
-→ Pendant les vacances scolaires, le volume **baisse de {abs(baisse):.1f}%**
-  - **{row_v['Vol_reel']:.0f}** appels/jour en vacances
-  - **{row_h['Vol_reel']:.0f}** appels/jour hors vacances
-→ Le modèle {'**sur-prédit**' if row_v['Surprediction_%'] > 0 else '**sous-prédit**'}
-  de **{abs(row_v['Surprediction_%']):.1f}%** en période de vacances
-  → Il ne capte pas complètement cette baisse
-→ **Ecart_Absolu** = **{row_v['Ecart_Absolu']:.1f}** appels/jour en vacances
-  (marge d'erreur de **{row_v['Ecart_Relatif_%']:.1f}%** du volume réel)
-        """)
-    return df_res
-# ════════════════════════════════════════════════════════════════════════════════
-# CELLULE 5 — Analyse par sous-type d'accueil
-# ════════════════════════════════════════════════════════════════════════════════
-def analyse_par_sous_type(df):
-    dfp = df[(df["count"] > 0) & (df["prediction_XGB"].notna())].copy()
-    rows = []
-    for st in sorted(dfp["sous_type_accueil"].dropna().unique()):
-        for periode_label, mask_base in [
-            ("Vacances", dfp["is_vacances_zone"]),
-            ("Hors_vacances", ~dfp["is_vacances_zone"])
-        ]:
-            mask = mask_base & (dfp["sous_type_accueil"] == st)
-            if mask.sum() < 5: continue
-            sub = dfp[mask]
-            yt, yp = sub["count"].values, sub["prediction_XGB"].values
-            rows.append({
-                "Sous_type": st,
-                "Periode": periode_label,
-                "Nb_jours": len(sub),
-                "Vol_reel": round(yt.mean(), 1),
-                "Vol_pred": round(yp.mean(), 1),
-                "Surprediction_%": round(((yp.mean() - yt.mean()) / max(yt.mean(), 1)) * 100, 1),
-                "Ecart_Absolu": round(ecart_absolu(yt, yp), 1),
-                "Ecart_Relatif_%": round(ecart_relatif_pct(yt, yp), 1),
-            })
-    df_st = pd.DataFrame(rows)
-    display_md("## 📊 ANALYSE PAR SOUS-TYPE D'ACCUEIL")
-    if len(df_st) > 0:
-        display_df(df_st, "📋 Détail par sous-type")
-        # Tableau markdown aussi
-        display_md("### 📋 Synthèse par sous-type (compact)")
-        md_lines = ["| Sous-type | Baisse vacances | Ecart Absolu (vac) | Ecart Relatif (vac) |"]
-        md_lines.append("|---|---|---|---|")
-        for st in sorted(df_st["Sous_type"].unique()):
-            sub = df_st[df_st["Sous_type"] == st]
-            vac = sub[sub["Periode"] == "Vacances"]
-            hors = sub[sub["Periode"] == "Hors_vacances"]
-            if len(vac) > 0 and len(hors) > 0:
-                baisse = ((vac.iloc[0]["Vol_reel"] - hors.iloc[0]["Vol_reel"])
-                          / max(hors.iloc[0]["Vol_reel"], 1)) * 100
-                md_lines.append(
-                    f"| **{st}** | {baisse:+.1f}% | {vac.iloc[0]['Ecart_Absolu']:.1f} appels "
-                    f"| {vac.iloc[0]['Ecart_Relatif_%']:.1f}% |"
-                )
-        display_md("\n".join(md_lines))
-    else:
-        display_md("❌ Pas assez de données par sous-type.")
-    return df_st
-# ════════════════════════════════════════════════════════════════════════════════
-# CELLULE 6 — Calcul facteurs + correction
-# ════════════════════════════════════════════════════════════════════════════════
-def calcule_facteurs(df):
-    dfp = df[(df["count"] > 0) & (df["prediction_XGB"].notna())].copy()
-    facteurs = {}
-    m_v = dfp["is_vacances_zone"]
-    if m_v.sum() > 0:
-        facteurs[("GLOBAL", "ALL")] = dfp.loc[m_v, "count"].mean() / max(dfp.loc[m_v, "prediction_XGB"].mean(), 1)
-    for zone in ["A", "B", "C"]:
-        for st in dfp["sous_type_accueil"].dropna().unique():
-            m = (dfp["zone_vacances"]==zone) & (dfp["sous_type_accueil"]==st) & dfp["is_vacances_zone"]
-            if m.sum() < 3: continue
-            f = dfp.loc[m, "count"].mean() / max(dfp.loc[m, "prediction_XGB"].mean(), 1)
-            facteurs[(zone, st)] = f
-    return facteurs
-def corrige_predictions(df, facteurs):
-    df = df.copy()
-    df["prediction_XGB_corrige"] = df["prediction_XGB"].astype(float)
-    m_v = df["is_vacances_zone"]
-    for zone in ["A", "B", "C"]:
-        for st in df["sous_type_accueil"].dropna().unique():
-            m = m_v & (df["zone_vacances"]==zone) & (df["sous_type_accueil"]==st)
-            if not m.any(): continue
-            f = facteurs.get((zone, st), facteurs.get(("GLOBAL","ALL"), 1.0))
-            df.loc[m, "prediction_XGB_corrige"] = df.loc[m, "prediction_XGB"] * f
-    return df
-# ════════════════════════════════════════════════════════════════════════════════
-# CELLULE 7 — Évaluation avant/après avec IPython.display
-# ════════════════════════════════════════════════════════════════════════════════
-def evalue_correction(df):
-    dfp = df[(df["count"] > 0) & (df["prediction_XGB"].notna())].copy()
-    rows = []
     for label, mask in [
-        ("Toutes_periodes", pd.Series([True]*len(dfp), index=dfp.index)),
-        ("Vacances", dfp["is_vacances_zone"]),
-        ("Hors_vacances", ~dfp["is_vacances_zone"]),
     ]:
         if mask.sum() < 2: continue
-        yt = dfp.loc[mask, "count"].values
-        y_avant = dfp.loc[mask, "prediction_XGB"].values
-        y_apres = dfp.loc[mask, "prediction_XGB_corrige"].values
-        ea_avant = ecart_absolu(yt, y_avant)
-        ea_apres = ecart_absolu(yt, y_apres)
-        er_avant = ecart_relatif_pct(yt, y_avant)
-        er_apres = ecart_relatif_pct(yt, y_apres)
-        gain = ((ea_avant - ea_apres) / max(ea_avant, 1)) * 100
-        rows.append({
-            "Periode": label,
-            "Nb_jours": mask.sum(),
-            "Ecart_Absolu_avant": round(ea_avant, 2),
-            "Ecart_Absolu_apres": round(ea_apres, 2),
-            "Gain_Ecart_Absolu_%": round(gain, 1),
-            "Ecart_Relatif_%_avant": round(er_avant, 1),
-            "Ecart_Relatif_%_apres": round(er_apres, 1),
         })
-    df_eval = pd.DataFrame(rows)
-    display_md("## 📊 ÉVALUATION : AVANT vs APRÈS CORRECTION")
-    display_df(df_eval, "📋 Résultats")
-    vac_row = df_eval[df_eval["Periode"] == "Vacances"]
-    if len(vac_row) > 0:
-        gain_vac = vac_row.iloc[0]["Gain_Ecart_Absolu_%"]
-        ea_av = vac_row.iloc[0]["Ecart_Absolu_avant"]
-        ea_ap = vac_row.iloc[0]["Ecart_Absolu_apres"]
-        display_md(f"""
-## 📈 INTERPRÉTATION
-→ Sur les jours de vacances :
-  - **Ecart_Absolu** passe de **{ea_av:.2f}** → **{ea_ap:.2f}** appels/jour
-  - **Gain de {gain_vac:.1f}%** sur la précision des prédictions en vacances
-→ Hors vacances : **aucune modification**
-  - La correction ne touche QUE les jours identifiés comme vacances
-→ Le facteur correcteur est appliqué **sans re-entraîner** le modèle
-  - Post-processing uniquement, aucun impact sur le modèle XGBoost
-        """)
-    return df_eval
-# ════════════════════════════════════════════════════════════════════════════════
-# CELLULE 8 — Graphes pour le manager
-# ════════════════════════════════════════════════════════════════════════════════
-def graphes_manager(df, dr_filtre=None, st_filtre=None):
-    if not MATPLOTLIB_OK:
-        display_md("❌ **matplotlib non installé.** `pip install matplotlib`")
-        return
-    dfp = df[(df["count"] > 0) & (df["prediction_XGB"].notna())].copy()
-    if dr_filtre: dfp = dfp[dfp["DR"] == dr_filtre]
-    if st_filtre: dfp = dfp[dfp["sous_type_accueil"] == st_filtre]
-    dfp = dfp.sort_values("Date")
-    if len(dfp) == 0:
-        display_md("❌ **Pas de données pour ce filtre.**")
-        return
-    fig, axes = plt.subplots(3, 1, figsize=(14, 12))
-    titre = f"DR={dr_filtre}, Type={st_filtre}" if (dr_filtre or st_filtre) else "Global"
-    # --- GRAPHE 1 : Série temporelle ---
-    ax1 = axes[0]
-    ax1.plot(dfp["Date"], dfp["count"], label="Réel", color="black", linewidth=1.5)
-    ax1.plot(dfp["Date"], dfp["prediction_XGB"], label="XGB avant", color="orange", alpha=0.8, linewidth=1)
-    if "prediction_XGB_corrige" in dfp.columns:
-        ax1.plot(dfp["Date"], dfp["prediction_XGB_corrige"], label="XGB corrigé", color="green", alpha=0.8, linewidth=1)
-    vac = dfp[dfp["is_vacances_zone"]]
-    if len(vac) > 0:
-        for _, r in vac.iterrows():
-            ax1.axvline(r["Date"], color="red", alpha=0.03)
-    ax1.set_title(f"Volume d'appels — {titre}", fontsize=12, fontweight='bold')
-    ax1.set_ylabel("Appels / jour")
-    ax1.legend(loc="upper left")
-    ax1.grid(True, alpha=0.3)
-    # --- GRAPHE 2 : Boxplot par période ---
-    ax2 = axes[1]
-    data_box, labels_box, colors_box = [], [], []
-    for periode in ["Toussaint", "Noel", "Hiver", "Printemps", "Ete", "Hors_vacances"]:
-        mask = dfp["periode_vacances"] == periode
-        if mask.sum() < 3: continue
-        data_box.append(dfp.loc[mask, "count"].values)
-        labels_box.append(periode)
-        colors_box.append("lightcoral" if periode != "Hors_vacances" else "lightblue")
-    bp = ax2.boxplot(data_box, labels=labels_box, patch_artist=True)
-    for patch, color in zip(bp["boxes"], colors_box):
-        patch.set_facecolor(color)
-    ax2.set_title("Distribution des volumes par période", fontsize=12, fontweight='bold')
-    ax2.set_ylabel("Appels / jour")
-    ax2.grid(True, alpha=0.3, axis="y")
-    # --- GRAPHE 3 : Erreur avant/après ---
-    ax3 = axes[2]
-    periodes, ea_avant, ea_apres = [], [], []
-    for periode in ["Toussaint", "Noel", "Hiver", "Printemps", "Ete"]:
-        mask = dfp["periode_vacances"] == periode
-        if mask.sum() < 3: continue
         yt = dfp.loc[mask, "count"].values
-        yp_av = dfp.loc[mask, "prediction_XGB"].values
-        periodes.append(periode)
-        ea_avant.append(ecart_absolu(yt, yp_av))
-        if "prediction_XGB_corrige" in dfp.columns:
-            yp_ap = dfp.loc[mask, "prediction_XGB_corrige"].values
-            ea_apres.append(ecart_absolu(yt, yp_ap))
-        else:
-            ea_apres.append(ecart_absolu(yt, yp_av))
-    x = np.arange(len(periodes))
-    width = 0.35
-    bars1 = ax3.bar(x - width/2, ea_avant, width, label="Avant correction", color="orange", alpha=0.8)
-    bars2 = ax3.bar(x + width/2, ea_apres, width, label="Après correction", color="green", alpha=0.8)
-    ax3.set_title("Ecart Absolu par période (avant vs après correction)", fontsize=12, fontweight='bold')
-    ax3.set_ylabel("Ecart Absolu (appels/jour)")
-    ax3.set_xticks(x)
-    ax3.set_xticklabels(periodes)
-    ax3.legend()
-    ax3.grid(True, alpha=0.3, axis="y")
-    # Valeurs sur barres
-    for bar in bars1:
-        height = bar.get_height()
-        ax3.annotate(f'{height:.1f}', xy=(bar.get_x() + bar.get_width() / 2, height),
-                    xytext=(0, 3), textcoords="offset points", ha='center', va='bottom', fontsize=8)
-    for bar in bars2:
-        height = bar.get_height()
-        ax3.annotate(f'{height:.1f}', xy=(bar.get_x() + bar.get_width() / 2, height),
-                    xytext=(0, 3), textcoords="offset points", ha='center', va='bottom', fontsize=8)
-    plt.tight_layout()
-    plt.show()
-    display_md("💾 **Sauvegarde :** `plt.savefig('vacances_manager.png', dpi=150, bbox_inches='tight')`")
-# ════════════════════════════════════════════════════════════════════════════════
-# CELLULE 9 — Pipeline complet
-# ════════════════════════════════════════════════════════════════════════════════
-def pipeline_manager(df):
-    display_md("""
-🔵══════════════════════════════════════════════════════════════════════════════🔵
-## ANALYSE VACANCES SCOLAIRES — RAPPORT MANAGER
-🔵══════════════════════════════════════════════════════════════════════════════🔵
-    """)
-    df_global = analyse_globale(df)
-    df_st = analyse_par_sous_type(df)
-    if MATPLOTLIB_OK:
-        display_md("### 📊 Génération des graphes...")
-        graphes_manager(df)
-    facteurs = calcule_facteurs(df)
-    display_md(f"""
-### 🔧 Facteur correcteur
-- **Global** = `{facteurs.get(('GLOBAL','ALL'), 1.0):.4f}`
-- Formule = Volume_reel_vacances / Volume_pred_vacances
-    """)
-    df = corrige_predictions(df, facteurs)
-    df_eval = evalue_correction(df)
-    return df, df_global, df_st, df_eval, facteurs
 # ════════════════════════════════════════════════════════════════════════════════
-# CELLULE 10 — Exécution
 # ════════════════════════════════════════════════════════════════════════════════
-# df = add_vacances(df)
-# df, global_res, st_res, eval_res, facteurs = pipeline_manager(df)

 # ╔══════════════════════════════════════════════════════════════════════════════╗
+# ║  NOTEBOOK MANAGER — Impact Vacances Scolaires sur Prédictions XGB            ║
+# ║  Version simple : avant/après correction à destination du manager            ║
 # ╚══════════════════════════════════════════════════════════════════════════════╝
 # ════════════════════════════════════════════════════════════════════════════════
 import warnings
 warnings.filterwarnings("ignore")
 # ════════════════════════════════════════════════════════════════════════════════
+# CELLULE 2 — Calendrier vacances scolaires + mapping DR→zone
 # ════════════════════════════════════════════════════════════════════════════════
 VACANCES = {
     "2023-2024": {
         "A": [(date(2023,10,21),date(2023,11,5)), (date(2023,12,23),date(2024,1,7)),
     },
     "2026-2027": {
         "A": [(date(2026,10,17),date(2026,11,1)), (date(2026,12,19),date(2027,1,3)),
+              (date(2027,2,13),date(2027,2,28)), (date(2027,4,3),date(2027,4,18)),
               (date(2027,7,3),date(2027,8,31))],
         "B": [(date(2026,10,17),date(2026,11,1)), (date(2026,12,19),date(2027,1,3)),
+              (date(2027,2,20),date(2027,3,7)), (date(2027,4,10),date(2027,4,25)),
               (date(2027,7,3),date(2027,8,31))],
         "C": [(date(2026,10,17),date(2026,11,1)), (date(2026,12,19),date(2027,1,3)),
+              (date(2027,2,6),date(2027,2,21)), (date(2027,3,27),date(2027,4,11)),
               (date(2027,7,3),date(2027,8,31))],
     },
 }
 DR_TO_ZONE = {
+    "SIR": "A", "AUV": "A", "ALP": "A", "PCH": "A", "LIM": "A",
+    "AQN": "A", "PYL": "A", "BRG": "A", "AFC": "A",
+    "PIC": "B", "NPC": "B", "PAS": "B", "CAZ": "B", "CAR": "B",
+    "NOR": "B", "BRE": "B", "CEN": "B", "PDL": "B",
+    "LOR": "C", "MPS": "C", "LRO": "C", "NMP": "C",
+    "PAR": "C", "IFE": "C", "IFO": "C",
 }
 def get_zone(dr): return DR_TO_ZONE.get(dr, "C")
+def get_annee_scolaire(d): return f"{d.year}-{d.year+1}" if d.month >= 9 else f"{d.year-1}-{d.year}"
 def is_vacances(d, zone, vac):
     for debut, fin in vac.get(zone, []):
         if debut <= d <= fin: return True
     return False
 def get_periode_vacances(d, vac):
     for zone in ["A","B","C"]:
         for debut, fin in vac.get(zone, []):
     df["zone_vacances"] = df["DR"].apply(get_zone)
     df["annee_scolaire"] = df["Date"].apply(lambda d: get_annee_scolaire(d.date()))
     def _vac(row):
+        return is_vacances(row["Date"].date(), row["zone_vacances"], VACANCES.get(row["annee_scolaire"], {}))
     def _per(row):
+        return get_periode_vacances(row["Date"].date(), VACANCES.get(row["annee_scolaire"], {}))
     df["is_vacances_zone"] = df.apply(_vac, axis=1)
     df["periode_vacances"] = df.apply(_per, axis=1)
     return df
 # ════════════════════════════════════════════════════════════════════════════════
+# CELLULE 3 — Rapport simple pour le manager
 # ════════════════════════════════════════════════════════════════════════════════
+def rapport_manager(df):
+    """
+    Rapport simple : montre la baisse des prédictions après post-processing.
+    """
+    df = add_vacances(df)
     dfp = df[(df["count"] > 0) & (df["prediction_XGB"].notna())].copy()
+    if len(dfp) == 0:
+        print("❌ Aucune donnée passée avec prédiction valide.")
+        return df
+    m_v = dfp["is_vacances_zone"]
+    if m_v.sum() == 0:
+        print("❌ Aucun jour de vacances trouvé.")
+        return df
+    facteur_global = dfp.loc[m_v, "count"].mean() / max(dfp.loc[m_v, "prediction_XGB"].mean(), 1)
+    df["prediction_XGB_corrige"] = df["prediction_XGB"].astype(float)
+    m_v_all = df["is_vacances_zone"] & df["prediction_XGB"].notna()
+    df.loc[m_v_all, "prediction_XGB_corrige"] = df.loc[m_v_all, "prediction_XGB"] * facteur_global
+    print("=" * 65)
+    print("📊 IMPACT POST-PROCESSING VACANCES SCOLAIRES")
+    print("=" * 65)
+    print(f"\n📅 Données analysées : {len(dfp):,} jours passés")
+    print(f"🏖️  Jours en vacances  : {m_v.sum():,}")
+    print(f"📚 Jours hors vacances : {(~m_v).sum():,}")
+    print(f"\n{'─'*65}")
+    print("📉 VOLUMES MOYENS PRÉDITS — AVANT vs APRÈS CORRECTION")
+    print(f"{'─'*65}")
+    res = []
     for label, mask in [
+        ("Vacances scolaires", dfp["is_vacances_zone"]),
+        ("Hors vacances", ~dfp["is_vacances_zone"]),
     ]:
         if mask.sum() < 2: continue
+        sub = dfp[mask]
+        avant = sub["prediction_XGB"].mean()
+        apres = sub["prediction_XGB_corrige"].mean()
+        baisse = ((apres - avant) / max(avant, 1)) * 100
+        res.append({
+            "Période": label,
+            "n jours": int(mask.sum()),
+            "Avant correction": round(avant, 1),
+            "Après correction": round(apres, 1),
+            "Différence": f"{baisse:+.1f}%"
         })
+    print(pd.DataFrame(res).to_string(index=False))
+    if m_v.sum() > 0 and (~m_v).sum() > 0:
+        baisse_reelle = ((dfp.loc[m_v, "count"].mean() - dfp.loc[~m_v, "count"].mean())
+                         / max(dfp.loc[~m_v, "count"].mean(), 1)) * 100
+        print(f"\n🔴 Baisse RÉELLE des volumes en vacances : {baisse_reelle:.1f}%")
+        print(f"💡 Correction appliquée : facteur ×{facteur_global:.4f} (baisse de {(1-facteur_global)*100:.1f}%)")
+    def mae(y_true, y_pred):
+        return np.mean(np.abs(np.asarray(y_true) - np.asarray(y_pred)))
+    print(f"\n{'─'*65}")
+    print("📈 PRÉCISION (MAE) — AVANT vs APRÈS")
+    print(f"{'─'*65}")
+    res_mae = []
+    for label, mask in [
+        ("Toutes périodes", pd.Series([True]*len(dfp), index=dfp.index)),
+        ("Vacances scolaires", dfp["is_vacances_zone"]),
+        ("Hors vacances", ~dfp["is_vacances_zone"]),
+    ]:
+        if mask.sum() < 2: continue
         yt = dfp.loc[mask, "count"].values
+        mae_avant = mae(yt, dfp.loc[mask, "prediction_XGB"].values)
+        mae_apres = mae(yt, dfp.loc[mask, "prediction_XGB_corrige"].values)
+        gain = ((mae_avant - mae_apres) / max(mae_avant, 1)) * 100
+        res_mae.append({
+            "Période": label,
+            "MAE avant": round(mae_avant, 2),
+            "MAE après": round(mae_apres, 2),
+            "Gain": f"{gain:+.1f}%"
+        })
+    print(pd.DataFrame(res_mae).to_string(index=False))
+    print(f"\n{'='*65}")
+    print("✅ RÉSUMÉ")
+    print(f"{'='*65}")
+    print(f"   • Facteur correcteur : ×{facteur_global:.4f}")
+    print(f"   • Appliqué sur : {m_v_all.sum():,} jours en vacances (passés + futurs)")
+    print(f"   • Hors vacances : inchangé")
+    print(f"   • Impact : les prédictions en vacances sont corrigées à la baisse")
+    print(f"     pour refléter la baisse réelle observée sur le passé.")
+    return df
 # ════════════════════════════════════════════════════════════════════════════════
+# CELLULE 4 — Exécution
 # ════════════════════════════════════════════════════════════════════════════════
+# df = rapport_manager(df)