madm96 commited on
Commit
5e747dc
ยท
verified ยท
1 Parent(s): 8debfb5

Upload notebook_manager.py

Browse files
Files changed (1) hide show
  1. notebook_manager.py +459 -0
notebook_manager.py ADDED
@@ -0,0 +1,459 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # โ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•—
2
+ # โ•‘ NOTEBOOK : Analyse Vacances - VERSION MANAGER โ•‘
3
+ # โ•‘ Graphes, tableaux markdown, explications mรฉtier โ•‘
4
+ # โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
5
+
6
+ # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
7
+ # CELLULE 1 โ€” Imports
8
+ # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
9
+ import pandas as pd
10
+ import numpy as np
11
+ from datetime import date
12
+ import warnings
13
+ warnings.filterwarnings("ignore")
14
+
15
+ try:
16
+ import matplotlib.pyplot as plt
17
+ MATPLOTLIB_OK = True
18
+ except ImportError:
19
+ MATPLOTLIB_OK = False
20
+ print("โš ๏ธ matplotlib non installรฉ โ†’ pas de graphes. pip install matplotlib")
21
+
22
+ # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
23
+ # CELLULE 2 โ€” Calendrier vacances scolaires (2023-2027)
24
+ # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
25
+
26
+ VACANCES = {
27
+ "2023-2024": {
28
+ "A": [(date(2023,10,21),date(2023,11,5)), (date(2023,12,23),date(2024,1,7)),
29
+ (date(2024,2,17),date(2024,3,3)), (date(2024,4,13),date(2024,4,28)),
30
+ (date(2024,7,6),date(2024,9,1))],
31
+ "B": [(date(2023,10,21),date(2023,11,5)), (date(2023,12,23),date(2024,1,7)),
32
+ (date(2024,2,24),date(2024,3,10)), (date(2024,4,20),date(2024,5,5)),
33
+ (date(2024,7,6),date(2024,9,1))],
34
+ "C": [(date(2023,10,21),date(2023,11,5)), (date(2023,12,23),date(2024,1,7)),
35
+ (date(2024,2,10),date(2024,2,25)), (date(2024,4,6),date(2024,4,21)),
36
+ (date(2024,7,6),date(2024,9,1))],
37
+ },
38
+ "2024-2025": {
39
+ "A": [(date(2024,10,19),date(2024,11,3)), (date(2024,12,21),date(2025,1,5)),
40
+ (date(2025,2,8),date(2025,2,23)), (date(2025,4,5),date(2025,4,20)),
41
+ (date(2025,7,5),date(2025,8,31))],
42
+ "B": [(date(2024,10,19),date(2024,11,3)), (date(2024,12,21),date(2025,1,5)),
43
+ (date(2025,2,22),date(2025,3,9)), (date(2025,4,19),date(2025,5,4)),
44
+ (date(2025,7,5),date(2025,8,31))],
45
+ "C": [(date(2024,10,19),date(2024,11,3)), (date(2024,12,21),date(2025,1,5)),
46
+ (date(2025,2,15),date(2025,3,2)), (date(2025,4,12),date(2025,4,27)),
47
+ (date(2025,7,5),date(2025,8,31))],
48
+ },
49
+ "2025-2026": {
50
+ "A": [(date(2025,10,18),date(2025,11,2)), (date(2025,12,20),date(2026,1,4)),
51
+ (date(2026,2,14),date(2026,3,1)), (date(2026,4,4),date(2026,4,19)),
52
+ (date(2026,7,4),date(2026,8,31))],
53
+ "B": [(date(2025,10,18),date(2025,11,2)), (date(2025,12,20),date(2026,1,4)),
54
+ (date(2026,2,21),date(2026,3,8)), (date(2026,4,11),date(2026,4,26)),
55
+ (date(2026,7,4),date(2026,8,31))],
56
+ "C": [(date(2025,10,18),date(2025,11,2)), (date(2025,12,20),date(2026,1,4)),
57
+ (date(2026,2,7),date(2026,2,22)), (date(2026,3,28),date(2026,4,12)),
58
+ (date(2026,7,4),date(2026,8,31))],
59
+ },
60
+ "2026-2027": {
61
+ "A": [(date(2026,10,17),date(2026,11,1)), (date(2026,12,19),date(2027,1,3)),
62
+ (date(2027,2,14),date(2027,3,1)), (date(2027,4,4),date(2027,4,19)),
63
+ (date(2027,7,3),date(2027,8,31))],
64
+ "B": [(date(2026,10,17),date(2026,11,1)), (date(2026,12,19),date(2027,1,3)),
65
+ (date(2027,2,21),date(2027,3,8)), (date(2027,4,11),date(2027,4,26)),
66
+ (date(2027,7,3),date(2027,8,31))],
67
+ "C": [(date(2026,10,17),date(2026,11,1)), (date(2026,12,19),date(2027,1,3)),
68
+ (date(2027,2,7),date(2027,2,22)), (date(2027,3,28),date(2027,4,12)),
69
+ (date(2027,7,3),date(2027,8,31))],
70
+ },
71
+ }
72
+
73
+ DR_TO_ZONE = {
74
+ "Besancon": "A", "Bordeaux": "A", "Clermont-Ferrand": "A",
75
+ "Dijon": "A", "Grenoble": "A", "Lyon": "A", "Limoges": "A", "Poitiers": "A",
76
+ "Aix-Marseille": "B", "Amiens": "B", "Caen": "B", "Lille": "B",
77
+ "Nantes": "B", "Nice": "B", "Orleans-Tours": "B", "Reims": "B",
78
+ "Rennes": "B", "Rouen": "B", "Strasbourg": "B",
79
+ "Creteil": "C", "Montpellier": "C", "Nancy-Metz": "C",
80
+ "Paris": "C", "Toulouse": "C", "Versailles": "C",
81
+ "AFC": "C",
82
+ }
83
+
84
+ def get_zone(dr): return DR_TO_ZONE.get(dr, "C")
85
+
86
+ def is_vacances(d, zone, vac):
87
+ for debut, fin in vac.get(zone, []):
88
+ if debut <= d <= fin: return True
89
+ return False
90
+
91
+ def get_annee_scolaire(d):
92
+ return f"{d.year}-{d.year+1}" if d.month >= 9 else f"{d.year-1}-{d.year}"
93
+
94
+ def get_periode_vacances(d, vac):
95
+ for zone in ["A","B","C"]:
96
+ for debut, fin in vac.get(zone, []):
97
+ if debut <= d <= fin:
98
+ m = d.month
99
+ if m in [10,11]: return "Toussaint"
100
+ elif m in [12,1]: return "Noel"
101
+ elif m in [2,3]: return "Hiver"
102
+ elif m in [4,5]: return "Printemps"
103
+ elif m in [7,8]: return "Ete"
104
+ return "Hors_vacances"
105
+
106
+ def add_vacances(df):
107
+ df = df.copy()
108
+ df["Date"] = pd.to_datetime(df["Date"]).dt.tz_localize(None)
109
+ df["zone_vacances"] = df["DR"].apply(get_zone)
110
+ df["annee_scolaire"] = df["Date"].apply(lambda d: get_annee_scolaire(d.date()))
111
+ def _vac(row):
112
+ d = row["Date"].date()
113
+ return is_vacances(d, row["zone_vacances"], VACANCES.get(row["annee_scolaire"], {}))
114
+ def _per(row):
115
+ d = row["Date"].date()
116
+ return get_periode_vacances(d, VACANCES.get(row["annee_scolaire"], {}))
117
+ df["is_vacances_zone"] = df.apply(_vac, axis=1)
118
+ df["periode_vacances"] = df.apply(_per, axis=1)
119
+ return df
120
+
121
+ # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
122
+ # CELLULE 3 โ€” Mรฉtriques (vocabulaire mรฉtier)
123
+ # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
124
+
125
+ def ecart_absolu(y_true, y_pred):
126
+ return np.mean(np.abs(np.asarray(y_true) - np.asarray(y_pred)))
127
+
128
+ def ecart_relatif_pct(y_true, y_pred):
129
+ yt, yp = np.asarray(y_true), np.asarray(y_pred)
130
+ return np.mean(np.abs((yt - yp) / np.maximum(yt, 1))) * 100
131
+
132
+ # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
133
+ # CELLULE 4 โ€” Analyse globale avec explications
134
+ # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
135
+
136
+ def analyse_globale(df):
137
+ dfp = df[(df["count"] > 0) & (df["prediction_XGB"].notna())].copy()
138
+ if len(dfp) == 0:
139
+ print("โŒ Aucune donnรฉe passรฉe avec prรฉdiction valide.")
140
+ return None
141
+
142
+ mask_v = dfp["is_vacances_zone"]
143
+ mask_h = ~mask_v
144
+
145
+ rows = []
146
+ for mask, label in [(mask_v, "Vacances scolaires"), (mask_h, "Hors vacances")]:
147
+ sub = dfp[mask]
148
+ if len(sub) == 0: continue
149
+ yt, yp = sub["count"].values, sub["prediction_XGB"].values
150
+ rows.append({
151
+ "Periode": label,
152
+ "Nb_jours": len(sub),
153
+ "Vol_reel": round(yt.mean(), 1),
154
+ "Vol_pred": round(yp.mean(), 1),
155
+ "Surprediction_%": round(((yp.mean() - yt.mean()) / max(yt.mean(), 1)) * 100, 1),
156
+ "Ecart_Absolu": round(ecart_absolu(yt, yp), 1),
157
+ "Ecart_Relatif_%": round(ecart_relatif_pct(yt, yp), 1),
158
+ })
159
+
160
+ df_res = pd.DataFrame(rows)
161
+
162
+ print("\n" + "=" * 75)
163
+ print("๐Ÿ“Š EFFET VACANCES SCOLAIRES โ€” Rร‰SULTATS AVANT CORRECTION")
164
+ print("=" * 75)
165
+ print("""
166
+ PROCร‰DURE :
167
+ 1. Identification des jours de vacances scolaires par zone (A/B/C)
168
+ 2. Comparaison volume rรฉel d'appels vs prรฉdiction XGBoost
169
+ 3. Mรฉtriques :
170
+ โ€ข Ecart_Absolu = erreur moyenne en nombre d'appels/jour
171
+ โ€ข Ecart_Relatif_% = erreur moyenne relative (% du volume rรฉel)
172
+ 4. Correction = ajustement multiplicatif uniquement sur jours de vacances
173
+ """)
174
+
175
+ print("\n๐Ÿ“‹ TABLEAU Rร‰CAPITULATIF (markdown)\n")
176
+ print(df_res.to_markdown(index=False))
177
+
178
+ if len(df_res) >= 2:
179
+ row_v = df_res[df_res["Periode"] == "Vacances scolaires"].iloc[0]
180
+ row_h = df_res[df_res["Periode"] == "Hors vacances"].iloc[0]
181
+ baisse = ((row_v["Vol_reel"] - row_h["Vol_reel"]) / max(row_h["Vol_reel"], 1)) * 100
182
+
183
+ print(f"""
184
+ ๐Ÿ“ˆ INTERPRร‰TATION Mร‰TIER :
185
+
186
+ โ†’ Pendant les vacances scolaires, le volume baisse de {abs(baisse):.1f}%
187
+ ({row_v['Vol_reel']:.0f} appels/jour vs {row_h['Vol_reel']:.0f} hors vacances)
188
+
189
+ โ†’ Le modรจle {'sur-prรฉdit' if row_v['Surprediction_%'] > 0 else 'sous-prรฉdit'}
190
+ de {abs(row_v['Surprediction_%']):.1f}% en pรฉriode de vacances
191
+ โ†’ Il ne capte pas complรจtement cette baisse
192
+
193
+ โ†’ Ecart_Absolu = {row_v['Ecart_Absolu']:.1f} appels/jour en vacances
194
+ (marge d'erreur de {row_v['Ecart_Relatif_%']:.1f}% du volume rรฉel)
195
+ """)
196
+
197
+ return df_res
198
+
199
+ # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
200
+ # CELLULE 5 โ€” Analyse par sous-type d'accueil
201
+ # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
202
+
203
+ def analyse_par_sous_type(df):
204
+ dfp = df[(df["count"] > 0) & (df["prediction_XGB"].notna())].copy()
205
+
206
+ rows = []
207
+ for st in sorted(dfp["sous_type_accueil"].dropna().unique()):
208
+ for periode_label, mask_base in [
209
+ ("Vacances", dfp["is_vacances_zone"]),
210
+ ("Hors_vacances", ~dfp["is_vacances_zone"])
211
+ ]:
212
+ mask = mask_base & (dfp["sous_type_accueil"] == st)
213
+ if mask.sum() < 5: continue
214
+ sub = dfp[mask]
215
+ yt, yp = sub["count"].values, sub["prediction_XGB"].values
216
+ rows.append({
217
+ "Sous_type": st,
218
+ "Periode": periode_label,
219
+ "Nb_jours": len(sub),
220
+ "Vol_reel": round(yt.mean(), 1),
221
+ "Vol_pred": round(yp.mean(), 1),
222
+ "Surprediction_%": round(((yp.mean() - yt.mean()) / max(yt.mean(), 1)) * 100, 1),
223
+ "Ecart_Absolu": round(ecart_absolu(yt, yp), 1),
224
+ "Ecart_Relatif_%": round(ecart_relatif_pct(yt, yp), 1),
225
+ })
226
+
227
+ df_st = pd.DataFrame(rows)
228
+
229
+ print("\n" + "=" * 75)
230
+ print("๐Ÿ“Š ANALYSE PAR SOUS-TYPE D'ACCUEIL")
231
+ print("=" * 75)
232
+
233
+ if len(df_st) > 0:
234
+ print("\n๐Ÿ“‹ Dรฉtail par sous-type (markdown)\n")
235
+ print(df_st.to_markdown(index=False))
236
+
237
+ print("\n๐Ÿ“‹ Synthรจse par sous-type :\n")
238
+ for st in sorted(df_st["Sous_type"].unique()):
239
+ sub = df_st[df_st["Sous_type"] == st]
240
+ vac = sub[sub["Periode"] == "Vacances"]
241
+ hors = sub[sub["Periode"] == "Hors_vacances"]
242
+ if len(vac) > 0 and len(hors) > 0:
243
+ baisse = ((vac.iloc[0]["Vol_reel"] - hors.iloc[0]["Vol_reel"])
244
+ / max(hors.iloc[0]["Vol_reel"], 1)) * 100
245
+ print(f" โ€ข {st:<25} : baisse vacances = {baisse:+.1f}% | "
246
+ f"Ecart vac = {vac.iloc[0]['Ecart_Absolu']:.1f} appels "
247
+ f"({vac.iloc[0]['Ecart_Relatif_%']:.1f}%)")
248
+
249
+ return df_st
250
+
251
+ # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
252
+ # CELLULE 6 โ€” Calcul facteurs + correction
253
+ # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
254
+
255
+ def calcule_facteurs(df):
256
+ dfp = df[(df["count"] > 0) & (df["prediction_XGB"].notna())].copy()
257
+ facteurs = {}
258
+ m_v = dfp["is_vacances_zone"]
259
+ if m_v.sum() > 0:
260
+ facteurs[("GLOBAL", "ALL")] = dfp.loc[m_v, "count"].mean() / max(dfp.loc[m_v, "prediction_XGB"].mean(), 1)
261
+ for zone in ["A", "B", "C"]:
262
+ for st in dfp["sous_type_accueil"].dropna().unique():
263
+ m = (dfp["zone_vacances"]==zone) & (dfp["sous_type_accueil"]==st) & dfp["is_vacances_zone"]
264
+ if m.sum() < 3: continue
265
+ f = dfp.loc[m, "count"].mean() / max(dfp.loc[m, "prediction_XGB"].mean(), 1)
266
+ facteurs[(zone, st)] = f
267
+ return facteurs
268
+
269
+ def corrige_predictions(df, facteurs):
270
+ df = df.copy()
271
+ df["prediction_XGB_corrige"] = df["prediction_XGB"].astype(float)
272
+ m_v = df["is_vacances_zone"]
273
+ for zone in ["A", "B", "C"]:
274
+ for st in df["sous_type_accueil"].dropna().unique():
275
+ m = m_v & (df["zone_vacances"]==zone) & (df["sous_type_accueil"]==st)
276
+ if not m.any(): continue
277
+ f = facteurs.get((zone, st), facteurs.get(("GLOBAL","ALL"), 1.0))
278
+ df.loc[m, "prediction_XGB_corrige"] = df.loc[m, "prediction_XGB"] * f
279
+ return df
280
+
281
+ # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
282
+ # CELLULE 7 โ€” ร‰valuation avant/aprรจs avec markdown
283
+ # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•๏ฟฝ๏ฟฝโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
284
+
285
+ def evalue_correction(df):
286
+ dfp = df[(df["count"] > 0) & (df["prediction_XGB"].notna())].copy()
287
+
288
+ rows = []
289
+ for label, mask in [
290
+ ("Toutes_periodes", pd.Series([True]*len(dfp), index=dfp.index)),
291
+ ("Vacances", dfp["is_vacances_zone"]),
292
+ ("Hors_vacances", ~dfp["is_vacances_zone"]),
293
+ ]:
294
+ if mask.sum() < 2: continue
295
+ yt = dfp.loc[mask, "count"].values
296
+ y_avant = dfp.loc[mask, "prediction_XGB"].values
297
+ y_apres = dfp.loc[mask, "prediction_XGB_corrige"].values
298
+
299
+ ea_avant = ecart_absolu(yt, y_avant)
300
+ ea_apres = ecart_absolu(yt, y_apres)
301
+ er_avant = ecart_relatif_pct(yt, y_avant)
302
+ er_apres = ecart_relatif_pct(yt, y_apres)
303
+ gain = ((ea_avant - ea_apres) / max(ea_avant, 1)) * 100
304
+
305
+ rows.append({
306
+ "Periode": label,
307
+ "Nb_jours": mask.sum(),
308
+ "Ecart_Absolu_avant": round(ea_avant, 2),
309
+ "Ecart_Absolu_apres": round(ea_apres, 2),
310
+ "Gain_Ecart_Absolu_%": round(gain, 1),
311
+ "Ecart_Relatif_%_avant": round(er_avant, 1),
312
+ "Ecart_Relatif_%_apres": round(er_apres, 1),
313
+ })
314
+
315
+ df_eval = pd.DataFrame(rows)
316
+
317
+ print("\n" + "=" * 75)
318
+ print("๐Ÿ“Š ร‰VALUATION : AVANT vs APRรˆS CORRECTION")
319
+ print("=" * 75)
320
+ print("\n๐Ÿ“‹ Rรฉsultats (markdown)\n")
321
+ print(df_eval.to_markdown(index=False))
322
+
323
+ vac_row = df_eval[df_eval["Periode"] == "Vacances"]
324
+ if len(vac_row) > 0:
325
+ gain_vac = vac_row.iloc[0]["Gain_Ecart_Absolu_%"]
326
+ ea_av = vac_row.iloc[0]["Ecart_Absolu_avant"]
327
+ ea_ap = vac_row.iloc[0]["Ecart_Absolu_apres"]
328
+ print(f"""
329
+ ๐Ÿ“ˆ INTERPRร‰TATION :
330
+
331
+ โ†’ Sur les jours de vacances :
332
+ Ecart_Absolu passe de {ea_av:.2f} ร  {ea_ap:.2f} appels/jour
333
+ โ†’ Gain de {gain_vac:.1f}% sur la prรฉcision des prรฉdictions
334
+
335
+ โ†’ Hors vacances : aucune modification
336
+ โ†’ La correction ne touche QUE les jours identifiรฉs comme vacances
337
+
338
+ โ†’ Le facteur correcteur est appliquรฉ sans re-entraรฎner le modรจle
339
+ (post-processing uniquement)
340
+ """)
341
+
342
+ return df_eval
343
+
344
+ # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
345
+ # CELLULE 8 โ€” Graphes pour le manager
346
+ # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
347
+
348
+ def graphes_manager(df, dr_filtre=None, st_filtre=None):
349
+ if not MATPLOTLIB_OK:
350
+ print("โŒ matplotlib non installรฉ. pip install matplotlib")
351
+ return
352
+
353
+ dfp = df[(df["count"] > 0) & (df["prediction_XGB"].notna())].copy()
354
+ if dr_filtre: dfp = dfp[dfp["DR"] == dr_filtre]
355
+ if st_filtre: dfp = dfp[dfp["sous_type_accueil"] == st_filtre]
356
+ dfp = dfp.sort_values("Date")
357
+
358
+ if len(dfp) == 0:
359
+ print("โŒ Pas de donnรฉes pour ce filtre")
360
+ return
361
+
362
+ fig, axes = plt.subplots(3, 1, figsize=(14, 12))
363
+ titre = f"DR={dr_filtre}, Type={st_filtre}" if (dr_filtre or st_filtre) else "Global"
364
+
365
+ # --- GRAPHE 1 : Sรฉrie temporelle ---
366
+ ax1 = axes[0]
367
+ ax1.plot(dfp["Date"], dfp["count"], label="Rรฉel", color="black", linewidth=1)
368
+ ax1.plot(dfp["Date"], dfp["prediction_XGB"], label="XGB avant", color="orange", alpha=0.8)
369
+ if "prediction_XGB_corrige" in dfp.columns:
370
+ ax1.plot(dfp["Date"], dfp["prediction_XGB_corrige"], label="XGB corrigรฉ", color="green", alpha=0.8)
371
+
372
+ vac = dfp[dfp["is_vacances_zone"]]
373
+ if len(vac) > 0:
374
+ for _, r in vac.iterrows():
375
+ ax1.axvline(r["Date"], color="red", alpha=0.03)
376
+
377
+ ax1.set_title(f"Volume d'appels โ€” {titre}")
378
+ ax1.set_ylabel("Appels / jour")
379
+ ax1.legend(loc="upper left")
380
+ ax1.grid(True, alpha=0.3)
381
+
382
+ # --- GRAPHE 2 : Boxplot par pรฉriode ---
383
+ ax2 = axes[1]
384
+ data_box, labels_box, colors_box = [], [], []
385
+ for periode in ["Toussaint", "Noel", "Hiver", "Printemps", "Ete", "Hors_vacances"]:
386
+ mask = dfp["periode_vacances"] == periode
387
+ if mask.sum() < 3: continue
388
+ data_box.append(dfp.loc[mask, "count"].values)
389
+ labels_box.append(periode)
390
+ colors_box.append("lightcoral" if periode != "Hors_vacances" else "lightblue")
391
+
392
+ bp = ax2.boxplot(data_box, labels=labels_box, patch_artist=True)
393
+ for patch, color in zip(bp["boxes"], colors_box):
394
+ patch.set_facecolor(color)
395
+ ax2.set_title("Distribution des volumes par pรฉriode")
396
+ ax2.set_ylabel("Appels / jour")
397
+ ax2.grid(True, alpha=0.3, axis="y")
398
+
399
+ # --- GRAPHE 3 : Erreur avant/aprรจs ---
400
+ ax3 = axes[2]
401
+ periodes, ea_avant, ea_apres = [], [], []
402
+ for periode in ["Toussaint", "Noel", "Hiver", "Printemps", "Ete"]:
403
+ mask = dfp["periode_vacances"] == periode
404
+ if mask.sum() < 3: continue
405
+ yt = dfp.loc[mask, "count"].values
406
+ yp_av = dfp.loc[mask, "prediction_XGB"].values
407
+ periodes.append(periode)
408
+ ea_avant.append(ecart_absolu(yt, yp_av))
409
+ if "prediction_XGB_corrige" in dfp.columns:
410
+ yp_ap = dfp.loc[mask, "prediction_XGB_corrige"].values
411
+ ea_apres.append(ecart_absolu(yt, yp_ap))
412
+ else:
413
+ ea_apres.append(ecart_absolu(yt, yp_av))
414
+
415
+ x = np.arange(len(periodes))
416
+ width = 0.35
417
+ ax3.bar(x - width/2, ea_avant, width, label="Avant", color="orange", alpha=0.8)
418
+ ax3.bar(x + width/2, ea_apres, width, label="Aprรจs", color="green", alpha=0.8)
419
+ ax3.set_title("Ecart Absolu par pรฉriode (avant vs aprรจs correction)")
420
+ ax3.set_ylabel("Ecart Absolu (appels/jour)")
421
+ ax3.set_xticks(x)
422
+ ax3.set_xticklabels(periodes)
423
+ ax3.legend()
424
+ ax3.grid(True, alpha=0.3, axis="y")
425
+
426
+ plt.tight_layout()
427
+ plt.show()
428
+ print("๐Ÿ’พ Sauvegarde : plt.savefig('vacances_manager.png', dpi=150, bbox_inches='tight')")
429
+
430
+ # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
431
+ # CELLULE 9 โ€” Pipeline complet
432
+ # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
433
+
434
+ def pipeline_manager(df):
435
+ print("\n๐Ÿ”ต" + "โ•" * 73 + "๐Ÿ”ต")
436
+ print(" ANALYSE VACANCES SCOLAIRES โ€” RAPPORT MANAGER")
437
+ print("๐Ÿ”ต" + "โ•" * 73 + "๐Ÿ”ต")
438
+
439
+ df_global = analyse_globale(df)
440
+ df_st = analyse_par_sous_type(df)
441
+
442
+ if MATPLOTLIB_OK:
443
+ print("\n๐Ÿ“Š Gรฉnรฉration des graphes...")
444
+ graphes_manager(df)
445
+
446
+ facteurs = calcule_facteurs(df)
447
+ print(f"\n๐Ÿ”ง Facteur correcteur global = {facteurs.get(('GLOBAL','ALL'), 1.0):.4f}")
448
+ print(" (โ‰ˆ Volume_reel_vacances / Volume_pred_vacances)")
449
+
450
+ df = corrige_predictions(df, facteurs)
451
+ df_eval = evalue_correction(df)
452
+
453
+ return df, df_global, df_st, df_eval, facteurs
454
+
455
+ # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
456
+ # CELLULE 10 โ€” Exรฉcution
457
+ # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
458
+ # df = add_vacances(df)
459
+ # df, global_res, st_res, eval_res, facteurs = pipeline_manager(df)