| |
| |
| |
| |
|
|
| |
| |
| |
| import pandas as pd |
| import numpy as np |
| from datetime import date |
| import warnings |
| warnings.filterwarnings("ignore") |
|
|
| |
| |
| |
| VACANCES = { |
| "2023-2024": { |
| "A": [(date(2023,10,21),date(2023,11,5)), (date(2023,12,23),date(2024,1,7)), |
| (date(2024,2,17),date(2024,3,3)), (date(2024,4,13),date(2024,4,28)), |
| (date(2024,7,6),date(2024,9,1))], |
| "B": [(date(2023,10,21),date(2023,11,5)), (date(2023,12,23),date(2024,1,7)), |
| (date(2024,2,24),date(2024,3,10)), (date(2024,4,20),date(2024,5,5)), |
| (date(2024,7,6),date(2024,9,1))], |
| "C": [(date(2023,10,21),date(2023,11,5)), (date(2023,12,23),date(2024,1,7)), |
| (date(2024,2,10),date(2024,2,25)), (date(2024,4,6),date(2024,4,21)), |
| (date(2024,7,6),date(2024,9,1))], |
| }, |
| "2024-2025": { |
| "A": [(date(2024,10,19),date(2024,11,3)), (date(2024,12,21),date(2025,1,5)), |
| (date(2025,2,8),date(2025,2,23)), (date(2025,4,5),date(2025,4,20)), |
| (date(2025,7,5),date(2025,8,31))], |
| "B": [(date(2024,10,19),date(2024,11,3)), (date(2024,12,21),date(2025,1,5)), |
| (date(2025,2,22),date(2025,3,9)), (date(2025,4,19),date(2025,5,4)), |
| (date(2025,7,5),date(2025,8,31))], |
| "C": [(date(2024,10,19),date(2024,11,3)), (date(2024,12,21),date(2025,1,5)), |
| (date(2025,2,15),date(2025,3,2)), (date(2025,4,12),date(2025,4,27)), |
| (date(2025,7,5),date(2025,8,31))], |
| }, |
| "2025-2026": { |
| "A": [(date(2025,10,18),date(2025,11,2)), (date(2025,12,20),date(2026,1,4)), |
| (date(2026,2,14),date(2026,3,1)), (date(2026,4,4),date(2026,4,19)), |
| (date(2026,7,4),date(2026,8,31))], |
| "B": [(date(2025,10,18),date(2025,11,2)), (date(2025,12,20),date(2026,1,4)), |
| (date(2026,2,21),date(2026,3,8)), (date(2026,4,11),date(2026,4,26)), |
| (date(2026,7,4),date(2026,8,31))], |
| "C": [(date(2025,10,18),date(2025,11,2)), (date(2025,12,20),date(2026,1,4)), |
| (date(2026,2,7),date(2026,2,22)), (date(2026,3,28),date(2026,4,12)), |
| (date(2026,7,4),date(2026,8,31))], |
| }, |
| "2026-2027": { |
| "A": [(date(2026,10,17),date(2026,11,1)), (date(2026,12,19),date(2027,1,3)), |
| (date(2027,2,13),date(2027,2,28)), (date(2027,4,3),date(2027,4,18)), |
| (date(2027,7,3),date(2027,8,31))], |
| "B": [(date(2026,10,17),date(2026,11,1)), (date(2026,12,19),date(2027,1,3)), |
| (date(2027,2,20),date(2027,3,7)), (date(2027,4,10),date(2027,4,25)), |
| (date(2027,7,3),date(2027,8,31))], |
| "C": [(date(2026,10,17),date(2026,11,1)), (date(2026,12,19),date(2027,1,3)), |
| (date(2027,2,6),date(2027,2,21)), (date(2027,3,27),date(2027,4,11)), |
| (date(2027,7,3),date(2027,8,31))], |
| }, |
| } |
|
|
| DR_TO_ZONE = { |
| "SIR": "A", "AUV": "A", "ALP": "A", "PCH": "A", "LIM": "A", |
| "AQN": "A", "PYL": "A", "BRG": "A", "AFC": "A", |
| "PIC": "B", "NPC": "B", "PAS": "B", "CAZ": "B", "CAR": "B", |
| "NOR": "B", "BRE": "B", "CEN": "B", "PDL": "B", |
| "LOR": "C", "MPS": "C", "LRO": "C", "NMP": "C", |
| "PAR": "C", "IFE": "C", "IFO": "C", |
| } |
|
|
| def get_zone(dr): return DR_TO_ZONE.get(dr, "C") |
| def get_annee_scolaire(d): return f"{d.year}-{d.year+1}" if d.month >= 9 else f"{d.year-1}-{d.year}" |
|
|
| def is_vacances(d, zone, vac): |
| for debut, fin in vac.get(zone, []): |
| if debut <= d <= fin: return True |
| return False |
|
|
| def get_periode_vacances(d, vac): |
| for zone in ["A","B","C"]: |
| for debut, fin in vac.get(zone, []): |
| if debut <= d <= fin: |
| m = d.month |
| if m in [10,11]: return "Toussaint" |
| elif m in [12,1]: return "Noel" |
| elif m in [2,3]: return "Hiver" |
| elif m in [4,5]: return "Printemps" |
| elif m in [7,8]: return "Ete" |
| return "Hors_vacances" |
|
|
| def add_vacances(df): |
| df = df.copy() |
| df["Date"] = pd.to_datetime(df["Date"]).dt.tz_localize(None) |
| df["zone_vacances"] = df["DR"].apply(get_zone) |
| df["annee_scolaire"] = df["Date"].apply(lambda d: get_annee_scolaire(d.date())) |
| def _vac(row): |
| return is_vacances(row["Date"].date(), row["zone_vacances"], VACANCES.get(row["annee_scolaire"], {})) |
| def _per(row): |
| return get_periode_vacances(row["Date"].date(), VACANCES.get(row["annee_scolaire"], {})) |
| df["is_vacances_zone"] = df.apply(_vac, axis=1) |
| df["periode_vacances"] = df.apply(_per, axis=1) |
| return df |
|
|
| |
| |
| |
|
|
| def rapport_manager(df): |
| """ |
| Rapport simple : montre la baisse des prédictions après post-processing. |
| """ |
| df = add_vacances(df) |
| dfp = df[(df["count"] > 0) & (df["prediction_XGB"].notna())].copy() |
| |
| if len(dfp) == 0: |
| print("β Aucune donnΓ©e passΓ©e avec prΓ©diction valide.") |
| return df |
| |
| m_v = dfp["is_vacances_zone"] |
| if m_v.sum() == 0: |
| print("β Aucun jour de vacances trouvΓ©.") |
| return df |
| |
| facteur_global = dfp.loc[m_v, "count"].mean() / max(dfp.loc[m_v, "prediction_XGB"].mean(), 1) |
| |
| df["prediction_XGB_corrige"] = df["prediction_XGB"].astype(float) |
| m_v_all = df["is_vacances_zone"] & df["prediction_XGB"].notna() |
| df.loc[m_v_all, "prediction_XGB_corrige"] = df.loc[m_v_all, "prediction_XGB"] * facteur_global |
| |
| print("=" * 65) |
| print("π IMPACT POST-PROCESSING VACANCES SCOLAIRES") |
| print("=" * 65) |
| print(f"\nπ
DonnΓ©es analysΓ©es : {len(dfp):,} jours passΓ©s") |
| print(f"ποΈ Jours en vacances : {m_v.sum():,}") |
| print(f"π Jours hors vacances : {(~m_v).sum():,}") |
| |
| print(f"\n{'β'*65}") |
| print("π VOLUMES MOYENS PRΓDITS β AVANT vs APRΓS CORRECTION") |
| print(f"{'β'*65}") |
| |
| res = [] |
| for label, mask in [ |
| ("Vacances scolaires", dfp["is_vacances_zone"]), |
| ("Hors vacances", ~dfp["is_vacances_zone"]), |
| ]: |
| if mask.sum() < 2: continue |
| sub = dfp[mask] |
| avant = sub["prediction_XGB"].mean() |
| apres = sub["prediction_XGB_corrige"].mean() |
| baisse = ((apres - avant) / max(avant, 1)) * 100 |
| res.append({ |
| "PΓ©riode": label, |
| "n jours": int(mask.sum()), |
| "Avant correction": round(avant, 1), |
| "Après correction": round(apres, 1), |
| "DiffΓ©rence": f"{baisse:+.1f}%" |
| }) |
| |
| print(pd.DataFrame(res).to_string(index=False)) |
| |
| if m_v.sum() > 0 and (~m_v).sum() > 0: |
| baisse_reelle = ((dfp.loc[m_v, "count"].mean() - dfp.loc[~m_v, "count"].mean()) |
| / max(dfp.loc[~m_v, "count"].mean(), 1)) * 100 |
| print(f"\nπ΄ Baisse RΓELLE des volumes en vacances : {baisse_reelle:.1f}%") |
| print(f"π‘ Correction appliquΓ©e : facteur Γ{facteur_global:.4f} (baisse de {(1-facteur_global)*100:.1f}%)") |
| |
| def mae(y_true, y_pred): |
| return np.mean(np.abs(np.asarray(y_true) - np.asarray(y_pred))) |
| |
| print(f"\n{'β'*65}") |
| print("π PRΓCISION (MAE) β AVANT vs APRΓS") |
| print(f"{'β'*65}") |
| |
| res_mae = [] |
| for label, mask in [ |
| ("Toutes pΓ©riodes", pd.Series([True]*len(dfp), index=dfp.index)), |
| ("Vacances scolaires", dfp["is_vacances_zone"]), |
| ("Hors vacances", ~dfp["is_vacances_zone"]), |
| ]: |
| if mask.sum() < 2: continue |
| yt = dfp.loc[mask, "count"].values |
| mae_avant = mae(yt, dfp.loc[mask, "prediction_XGB"].values) |
| mae_apres = mae(yt, dfp.loc[mask, "prediction_XGB_corrige"].values) |
| gain = ((mae_avant - mae_apres) / max(mae_avant, 1)) * 100 |
| res_mae.append({ |
| "PΓ©riode": label, |
| "MAE avant": round(mae_avant, 2), |
| "MAE après": round(mae_apres, 2), |
| "Gain": f"{gain:+.1f}%" |
| }) |
| |
| print(pd.DataFrame(res_mae).to_string(index=False)) |
| |
| print(f"\n{'='*65}") |
| print("β
RΓSUMΓ") |
| print(f"{'='*65}") |
| print(f" β’ Facteur correcteur : Γ{facteur_global:.4f}") |
| print(f" β’ AppliquΓ© sur : {m_v_all.sum():,} jours en vacances (passΓ©s + futurs)") |
| print(f" β’ Hors vacances : inchangΓ©") |
| print(f" β’ Impact : les prΓ©dictions en vacances sont corrigΓ©es Γ la baisse") |
| print(f" pour reflΓ©ter la baisse rΓ©elle observΓ©e sur le passΓ©.") |
| |
| return df |
|
|
| |
| |
| |
| |
|
|