correction-vacances-scolaires / notebook_manager.py
madm96's picture
Upload notebook_manager.py
2ecce3e verified
# ╔══════════════════════════════════════════════════════════════════════════════╗
# β•‘ NOTEBOOK MANAGER β€” Impact Vacances Scolaires sur PrΓ©dictions XGB β•‘
# ║ Version simple : avant/après correction à destination du manager ║
# β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
# ════════════════════════════════════════════════════════════════════════════════
# CELLULE 1 β€” Imports
# ════════════════════════════════════════════════════════════════════════════════
import pandas as pd
import numpy as np
from datetime import date
import warnings
warnings.filterwarnings("ignore")
# ════════════════════════════════════════════════════════════════════════════════
# CELLULE 2 — Calendrier vacances scolaires + mapping DR→zone
# ════════════════════════════════════════════════════════════════════════════════
VACANCES = {
"2023-2024": {
"A": [(date(2023,10,21),date(2023,11,5)), (date(2023,12,23),date(2024,1,7)),
(date(2024,2,17),date(2024,3,3)), (date(2024,4,13),date(2024,4,28)),
(date(2024,7,6),date(2024,9,1))],
"B": [(date(2023,10,21),date(2023,11,5)), (date(2023,12,23),date(2024,1,7)),
(date(2024,2,24),date(2024,3,10)), (date(2024,4,20),date(2024,5,5)),
(date(2024,7,6),date(2024,9,1))],
"C": [(date(2023,10,21),date(2023,11,5)), (date(2023,12,23),date(2024,1,7)),
(date(2024,2,10),date(2024,2,25)), (date(2024,4,6),date(2024,4,21)),
(date(2024,7,6),date(2024,9,1))],
},
"2024-2025": {
"A": [(date(2024,10,19),date(2024,11,3)), (date(2024,12,21),date(2025,1,5)),
(date(2025,2,8),date(2025,2,23)), (date(2025,4,5),date(2025,4,20)),
(date(2025,7,5),date(2025,8,31))],
"B": [(date(2024,10,19),date(2024,11,3)), (date(2024,12,21),date(2025,1,5)),
(date(2025,2,22),date(2025,3,9)), (date(2025,4,19),date(2025,5,4)),
(date(2025,7,5),date(2025,8,31))],
"C": [(date(2024,10,19),date(2024,11,3)), (date(2024,12,21),date(2025,1,5)),
(date(2025,2,15),date(2025,3,2)), (date(2025,4,12),date(2025,4,27)),
(date(2025,7,5),date(2025,8,31))],
},
"2025-2026": {
"A": [(date(2025,10,18),date(2025,11,2)), (date(2025,12,20),date(2026,1,4)),
(date(2026,2,14),date(2026,3,1)), (date(2026,4,4),date(2026,4,19)),
(date(2026,7,4),date(2026,8,31))],
"B": [(date(2025,10,18),date(2025,11,2)), (date(2025,12,20),date(2026,1,4)),
(date(2026,2,21),date(2026,3,8)), (date(2026,4,11),date(2026,4,26)),
(date(2026,7,4),date(2026,8,31))],
"C": [(date(2025,10,18),date(2025,11,2)), (date(2025,12,20),date(2026,1,4)),
(date(2026,2,7),date(2026,2,22)), (date(2026,3,28),date(2026,4,12)),
(date(2026,7,4),date(2026,8,31))],
},
"2026-2027": {
"A": [(date(2026,10,17),date(2026,11,1)), (date(2026,12,19),date(2027,1,3)),
(date(2027,2,13),date(2027,2,28)), (date(2027,4,3),date(2027,4,18)),
(date(2027,7,3),date(2027,8,31))],
"B": [(date(2026,10,17),date(2026,11,1)), (date(2026,12,19),date(2027,1,3)),
(date(2027,2,20),date(2027,3,7)), (date(2027,4,10),date(2027,4,25)),
(date(2027,7,3),date(2027,8,31))],
"C": [(date(2026,10,17),date(2026,11,1)), (date(2026,12,19),date(2027,1,3)),
(date(2027,2,6),date(2027,2,21)), (date(2027,3,27),date(2027,4,11)),
(date(2027,7,3),date(2027,8,31))],
},
}
DR_TO_ZONE = {
"SIR": "A", "AUV": "A", "ALP": "A", "PCH": "A", "LIM": "A",
"AQN": "A", "PYL": "A", "BRG": "A", "AFC": "A",
"PIC": "B", "NPC": "B", "PAS": "B", "CAZ": "B", "CAR": "B",
"NOR": "B", "BRE": "B", "CEN": "B", "PDL": "B",
"LOR": "C", "MPS": "C", "LRO": "C", "NMP": "C",
"PAR": "C", "IFE": "C", "IFO": "C",
}
def get_zone(dr): return DR_TO_ZONE.get(dr, "C")
def get_annee_scolaire(d): return f"{d.year}-{d.year+1}" if d.month >= 9 else f"{d.year-1}-{d.year}"
def is_vacances(d, zone, vac):
for debut, fin in vac.get(zone, []):
if debut <= d <= fin: return True
return False
def get_periode_vacances(d, vac):
for zone in ["A","B","C"]:
for debut, fin in vac.get(zone, []):
if debut <= d <= fin:
m = d.month
if m in [10,11]: return "Toussaint"
elif m in [12,1]: return "Noel"
elif m in [2,3]: return "Hiver"
elif m in [4,5]: return "Printemps"
elif m in [7,8]: return "Ete"
return "Hors_vacances"
def add_vacances(df):
df = df.copy()
df["Date"] = pd.to_datetime(df["Date"]).dt.tz_localize(None)
df["zone_vacances"] = df["DR"].apply(get_zone)
df["annee_scolaire"] = df["Date"].apply(lambda d: get_annee_scolaire(d.date()))
def _vac(row):
return is_vacances(row["Date"].date(), row["zone_vacances"], VACANCES.get(row["annee_scolaire"], {}))
def _per(row):
return get_periode_vacances(row["Date"].date(), VACANCES.get(row["annee_scolaire"], {}))
df["is_vacances_zone"] = df.apply(_vac, axis=1)
df["periode_vacances"] = df.apply(_per, axis=1)
return df
# ════════════════════════════════════════════════════════════════════════════════
# CELLULE 3 β€” Rapport simple pour le manager
# ════════════════════════════════════════════════════════════════════════════════
def rapport_manager(df):
"""
Rapport simple : montre la baisse des prédictions après post-processing.
"""
df = add_vacances(df)
dfp = df[(df["count"] > 0) & (df["prediction_XGB"].notna())].copy()
if len(dfp) == 0:
print("❌ Aucune donnée passée avec prédiction valide.")
return df
m_v = dfp["is_vacances_zone"]
if m_v.sum() == 0:
print("❌ Aucun jour de vacances trouvé.")
return df
facteur_global = dfp.loc[m_v, "count"].mean() / max(dfp.loc[m_v, "prediction_XGB"].mean(), 1)
df["prediction_XGB_corrige"] = df["prediction_XGB"].astype(float)
m_v_all = df["is_vacances_zone"] & df["prediction_XGB"].notna()
df.loc[m_v_all, "prediction_XGB_corrige"] = df.loc[m_v_all, "prediction_XGB"] * facteur_global
print("=" * 65)
print("πŸ“Š IMPACT POST-PROCESSING VACANCES SCOLAIRES")
print("=" * 65)
print(f"\nπŸ“… DonnΓ©es analysΓ©es : {len(dfp):,} jours passΓ©s")
print(f"πŸ–οΈ Jours en vacances : {m_v.sum():,}")
print(f"πŸ“š Jours hors vacances : {(~m_v).sum():,}")
print(f"\n{'─'*65}")
print("πŸ“‰ VOLUMES MOYENS PRΓ‰DITS β€” AVANT vs APRÈS CORRECTION")
print(f"{'─'*65}")
res = []
for label, mask in [
("Vacances scolaires", dfp["is_vacances_zone"]),
("Hors vacances", ~dfp["is_vacances_zone"]),
]:
if mask.sum() < 2: continue
sub = dfp[mask]
avant = sub["prediction_XGB"].mean()
apres = sub["prediction_XGB_corrige"].mean()
baisse = ((apres - avant) / max(avant, 1)) * 100
res.append({
"PΓ©riode": label,
"n jours": int(mask.sum()),
"Avant correction": round(avant, 1),
"Après correction": round(apres, 1),
"DiffΓ©rence": f"{baisse:+.1f}%"
})
print(pd.DataFrame(res).to_string(index=False))
if m_v.sum() > 0 and (~m_v).sum() > 0:
baisse_reelle = ((dfp.loc[m_v, "count"].mean() - dfp.loc[~m_v, "count"].mean())
/ max(dfp.loc[~m_v, "count"].mean(), 1)) * 100
print(f"\nπŸ”΄ Baisse RΓ‰ELLE des volumes en vacances : {baisse_reelle:.1f}%")
print(f"πŸ’‘ Correction appliquΓ©e : facteur Γ—{facteur_global:.4f} (baisse de {(1-facteur_global)*100:.1f}%)")
def mae(y_true, y_pred):
return np.mean(np.abs(np.asarray(y_true) - np.asarray(y_pred)))
print(f"\n{'─'*65}")
print("πŸ“ˆ PRΓ‰CISION (MAE) β€” AVANT vs APRÈS")
print(f"{'─'*65}")
res_mae = []
for label, mask in [
("Toutes pΓ©riodes", pd.Series([True]*len(dfp), index=dfp.index)),
("Vacances scolaires", dfp["is_vacances_zone"]),
("Hors vacances", ~dfp["is_vacances_zone"]),
]:
if mask.sum() < 2: continue
yt = dfp.loc[mask, "count"].values
mae_avant = mae(yt, dfp.loc[mask, "prediction_XGB"].values)
mae_apres = mae(yt, dfp.loc[mask, "prediction_XGB_corrige"].values)
gain = ((mae_avant - mae_apres) / max(mae_avant, 1)) * 100
res_mae.append({
"PΓ©riode": label,
"MAE avant": round(mae_avant, 2),
"MAE après": round(mae_apres, 2),
"Gain": f"{gain:+.1f}%"
})
print(pd.DataFrame(res_mae).to_string(index=False))
print(f"\n{'='*65}")
print("βœ… RΓ‰SUMΓ‰")
print(f"{'='*65}")
print(f" β€’ Facteur correcteur : Γ—{facteur_global:.4f}")
print(f" β€’ AppliquΓ© sur : {m_v_all.sum():,} jours en vacances (passΓ©s + futurs)")
print(f" β€’ Hors vacances : inchangΓ©")
print(f" β€’ Impact : les prΓ©dictions en vacances sont corrigΓ©es Γ  la baisse")
print(f" pour reflΓ©ter la baisse rΓ©elle observΓ©e sur le passΓ©.")
return df
# ════════════════════════════════════════════════════════════════════════════════
# CELLULE 4 β€” ExΓ©cution
# ════════════════════════════════════════════════════════════════════════════════
# df = rapport_manager(df)