cloud450 commited on
Commit
8f078dd
Β·
verified Β·
1 Parent(s): aa58d6a

Upload 12 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ shap_dot_plot.png filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,13 +1,77 @@
1
  ---
2
- title: Severity Score
3
- emoji: πŸ“š
4
  colorFrom: yellow
5
  colorTo: red
6
  sdk: gradio
7
- sdk_version: 6.12.0
8
  app_file: app.py
9
  pinned: false
10
- short_description: Severity Of Score
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Pothole Severity Scoring
3
+ emoji: πŸ•³οΈ
4
  colorFrom: yellow
5
  colorTo: red
6
  sdk: gradio
7
+ sdk_version: 4.0.0
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
+ # πŸ›£οΈ Pothole Severity Scoring Pipeline
13
+
14
+ Active ML pipeline for generating synthetic civic data and training an XGBoost-based regression model to predict pothole severity scores ($S \in [0,1]$).
15
+
16
+ ## πŸš€ Quick Start
17
+
18
+ 1. **Install Dependencies**:
19
+ ```bash
20
+ pip install numpy pandas scikit-learn xgboost shap matplotlib joblib
21
+ ```
22
+
23
+ 2. **Run Pipeline**:
24
+ ```bash
25
+ python severity_model_pipeline.py
26
+ ```
27
+
28
+ ## πŸ—οΈ Project Structure
29
+
30
+ | File | Description |
31
+ | :--- | :--- |
32
+ | `severity_model_pipeline.py` | Main end-to-end pipeline script. |
33
+ | `synthetic_pothole_data.csv` | The generated dataset (10k samples). |
34
+ | `severity_model.json` | Trained XGBoost model (Native JSON format). |
35
+ | `feature_scaler.pkl` | MinMaxScaler for normalizing real-time features. |
36
+ | `feature_list.json` | JSON list ensuring correct feature ordering during inference. |
37
+ | `shap_bar_plot.png` | Global feature importance visualization. |
38
+ | `shap_dot_plot.png` | Detailed SHAP summary plot showing feature impact. |
39
+
40
+ ## πŸ“Š Feature Definitions
41
+
42
+ All features are normalized within the range `[0, 1]`:
43
+
44
+ - **A**: Defect area ratio (size relative to image).
45
+ - **D**: Defect density (fragmentation level).
46
+ - **C**: Centrality (distance from road center).
47
+ - **Q**: Detection confidence (CV confidence score).
48
+ - **M**: Multi-user confirmation score (crowdsourced weight).
49
+ - **T**: Temporal persistence (time since detection).
50
+ - **R**: Traffic importance (Highway: 1.0, Main: 0.7, Local: 0.4).
51
+ - **P**: Proximity to critical infrastructure (Hospitals, schools).
52
+ - **F**: Recurrence frequency (historical patch failure).
53
+ - **X**: Resolution failure score (reopen count).
54
+
55
+ ## 🧠 Model Logic
56
+
57
+ - **Ground Truth Foundation**:
58
+ $S_{base} = 0.28A + 0.10D + 0.14C + 0.04Q + 0.08M + 0.07T + 0.09R + 0.10P + 0.06F + 0.04X$
59
+ - **Infrastructure Boost**: $K = 1 + 0.5P$
60
+ - **Final Target**: $S = \min(1, S_{base} * K + \text{Gaussian Noise})$
61
+
62
+ ---
63
+
64
+ ## πŸ› οΈ Inference Usage
65
+
66
+ You can use the `predict_severity` function within `severity_model_pipeline.py` to get predictions:
67
+
68
+ ```python
69
+ from severity_model_pipeline import predict_severity, load_inference_artefacts
70
+
71
+ # Load trained components
72
+ model, scaler, features = load_inference_artefacts()
73
+
74
+ # Predict
75
+ result = predict_severity(my_data_dict, model, scaler, features)
76
+ print(f"Severity: {result['score']} ({result['label']})")
77
+ ```
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import xgboost as xgb
3
+ import joblib
4
+ import json
5
+ import numpy as np
6
+ import os
7
+
8
+ # --- Load Assets ---
9
+ MODEL_PATH = "severity_model.json"
10
+ SCALER_PATH = "feature_scaler.pkl"
11
+ FEATURES_PATH = "feature_list.json"
12
+
13
+ def load_resources():
14
+ model = xgb.XGBRegressor()
15
+ model.load_model(MODEL_PATH)
16
+ scaler = joblib.load(SCALER_PATH)
17
+ with open(FEATURES_PATH) as f:
18
+ features = json.load(f)
19
+ return model, scaler, features
20
+
21
+ model, scaler, feature_names = load_resources()
22
+
23
+ def get_label(score):
24
+ if score < 0.33: return "Low 🟒"
25
+ if score < 0.66: return "Medium 🟑"
26
+ return "High πŸ”΄"
27
+
28
+ def predict(*args):
29
+ # Map arguments to feature list
30
+ input_dict = dict(zip(feature_names, args))
31
+ row = np.array([[input_dict[f] for f in feature_names]], dtype=np.float32)
32
+
33
+ # Scale and predict
34
+ scaled_row = scaler.transform(row)
35
+ prediction = float(model.predict(scaled_row)[0])
36
+ score = max(0, min(1, prediction))
37
+
38
+ return round(score, 4), get_label(score)
39
+
40
+ # --- UI Setup ---
41
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
42
+ gr.Markdown("# πŸ•³οΈ Pothole Severity Predictor (Civic AI)")
43
+ gr.Markdown("Adjust the sliders below to simulate pothole features and predict repair priority.")
44
+
45
+ with gr.Row():
46
+ with gr.Column():
47
+ a = gr.Slider(0, 1, value=0.1, label="Area Ratio (A)", info="Size of pothole")
48
+ d = gr.Slider(0, 1, value=0.1, label="Density (D)", info="Fragmentation")
49
+ c = gr.Slider(0, 1, value=0.5, label="Centrality (C)", info="0=Edge, 1=Center")
50
+ q = gr.Slider(0, 1, value=0.9, label="Confidence (Q)", info="CV Model Certainty")
51
+ m = gr.Slider(0, 1, value=0.1, label="Confirmations (M)", info="User reports")
52
+ with gr.Column():
53
+ t = gr.Slider(0, 1, value=0.1, label="Persistence (T)", info="Wait time")
54
+ r = gr.Slider(0, 1, value=0.4, label="Road Type (R)", info="0.4:Local, 1.0:Highway")
55
+ p = gr.Slider(0, 1, value=0.1, label="Critical Infra (P)", info="Proximity to hospitals/schools")
56
+ f = gr.Slider(0, 1, value=0.1, label="Recurrence (F)", info="Historical failure")
57
+ x = gr.Slider(0, 1, value=0.0, label="Reopen Count (X)", info="Failed repairs")
58
+
59
+ btn = gr.Button("Calculate Severity Score", variant="primary")
60
+
61
+ with gr.Row():
62
+ out_score = gr.Number(label="Severity Score (0-1)")
63
+ out_label = gr.Textbox(label="Priority Level")
64
+
65
+ btn.click(predict, inputs=[a, d, c, q, m, t, r, p, f, x], outputs=[out_score, out_label])
66
+
67
+ if __name__ == "__main__":
68
+ demo.launch()
feature_list.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ "A",
3
+ "D",
4
+ "C",
5
+ "Q",
6
+ "M",
7
+ "T",
8
+ "R",
9
+ "P",
10
+ "F",
11
+ "X"
12
+ ]
feature_scaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98bee969099864217324a3154bd7e0e65ef2a167d6616feb66e656c2a853cc7f
3
+ size 1351
priority_queue.py ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ =============================================================================
3
+ CIVIC ISSUE MANAGEMENT β€” PRIORITY QUEUE SYSTEM
4
+ =============================================================================
5
+ A production-grade Priority Queue for managing civic issues (potholes),
6
+ prioritized by a composite score evaluating Severity, SLA Breach,
7
+ Escalation Status, and Reopen Frequency.
8
+
9
+ Features:
10
+ - Global Queue, Ward-specific Queues, and Contractor-specific Queues.
11
+ - O(log N) task insertion and updates.
12
+ - Real-time SLA breach overrides and explicit emergency handling.
13
+ - Smart lazy-deletion to maintain computational efficiency during updates.
14
+ =============================================================================
15
+ """
16
+
17
+ import heapq
18
+ import itertools
19
+ from dataclasses import dataclass
20
+ from datetime import datetime, timedelta
21
+ import random
22
+ from typing import Dict, List, Optional
23
+
24
+
25
+ # =============================================================================
26
+ # DATA STRUCTURES & CONFIGURATION
27
+ # =============================================================================
28
+
29
+ @dataclass
30
+ class CivicTask:
31
+ task_id: str
32
+ severity_score: float
33
+ severity_label: str
34
+ created_at: datetime
35
+ days_pending: int
36
+ sla_days: int
37
+ ward: str
38
+ contractor_id: str
39
+ is_escalated: bool
40
+ reopen_count: int
41
+ emergency_override: bool = False
42
+
43
+ def compute_priority(self) -> float:
44
+ """
45
+ Computes the priority score based on the specified formula:
46
+ Priority = (Sev * 0.6) + (SLA Breach * 0.2) + (Escalation * 0.1) + (Reopen * 0.1)
47
+ """
48
+ if self.emergency_override:
49
+ return float('inf') # Highest conceivable priority
50
+
51
+ # SLA breach factor computation
52
+ if self.days_pending <= self.sla_days:
53
+ sla_breach_factor = 0.0
54
+ else:
55
+ sla_breach_factor = min(1.0, (self.days_pending - self.sla_days) / self.sla_days)
56
+
57
+ # Escalation factor
58
+ escalation_factor = 1.0 if self.is_escalated else 0.0
59
+
60
+ # Reopen factor
61
+ reopen_factor = min(1.0, self.reopen_count / 3.0)
62
+
63
+ # Final Priority Score
64
+ priority_score = (
65
+ (self.severity_score * 0.6) +
66
+ (sla_breach_factor * 0.2) +
67
+ (escalation_factor * 0.1) +
68
+ (reopen_factor * 0.1)
69
+ )
70
+ return priority_score
71
+
72
+ def get_priority_reason(self) -> str:
73
+ """Helper to generate a human-readable explanation of why this is prioritized."""
74
+ if self.emergency_override:
75
+ return "🚨 EMERGENCY OVERRIDE"
76
+
77
+ reasons = []
78
+ if self.severity_score >= 0.66:
79
+ reasons.append("πŸ”₯ High Severity")
80
+ if self.days_pending > self.sla_days:
81
+ reasons.append(f"⏳ SLA Breach (+{self.days_pending - self.sla_days} days)")
82
+ if self.is_escalated:
83
+ reasons.append("πŸ“£ Escalated")
84
+ if self.reopen_count > 0:
85
+ reasons.append(f"πŸ” Reopened ({self.reopen_count}x)")
86
+
87
+ return " | ".join(reasons) if reasons else "βœ… Standard Processing"
88
+
89
+
90
+ # =============================================================================
91
+ # QUEUE IMPLEMENTATION
92
+ # =============================================================================
93
+
94
+ class PriorityQueue:
95
+ """
96
+ Min-heap implementation storing negative priorities to act as a Max-Heap.
97
+ Implements lazy deletion for O(1) removals and O(log N) updates.
98
+ """
99
+ def __init__(self, name: str):
100
+ self.name = name
101
+ self.pq = [] # list of entries arranged in a heap
102
+ self.entry_finder = {} # mapping of tasks to entries
103
+ self.REMOVED = '<removed-task>' # placeholder for a removed task
104
+ self.counter = itertools.count() # unique sequence count for tie-breaking
105
+
106
+ def add_task(self, task: CivicTask):
107
+ """Add a new task or update the priority of an existing task."""
108
+ if task.task_id in self.entry_finder:
109
+ self.remove_task(task.task_id)
110
+
111
+ score = task.compute_priority()
112
+ count = next(self.counter)
113
+
114
+ # Store negative score so the smallest (most negative) bubbles to the top
115
+ entry = [-score, count, task]
116
+ self.entry_finder[task.task_id] = entry
117
+ heapq.heappush(self.pq, entry)
118
+
119
+ def remove_task(self, task_id: str):
120
+ """Mark an existing task as REMOVED. Doesn't break heap structure."""
121
+ entry = self.entry_finder.pop(task_id, None)
122
+ if entry is not None:
123
+ entry[-1] = self.REMOVED
124
+
125
+ def pop_task(self) -> Optional[CivicTask]:
126
+ """Remove and return the lowest priority task. Raises KeyError if empty."""
127
+ while self.pq:
128
+ score, count, task = heapq.heappop(self.pq)
129
+ if task is not self.REMOVED:
130
+ del self.entry_finder[task.task_id]
131
+ return task
132
+ return None
133
+
134
+ def peek_top(self) -> Optional[CivicTask]:
135
+ """Look at the highest priority task without removing it."""
136
+ while self.pq:
137
+ score, count, task = self.pq[0]
138
+ if task is not self.REMOVED:
139
+ return task
140
+ heapq.heappop(self.pq) # Clean up removed items floating at the top
141
+ return None
142
+
143
+ def reprioritize_all(self):
144
+ """Re-evaluate all priority scores. Required when time passes (SLA changes)."""
145
+ valid_tasks = [entry[-1] for entry in self.entry_finder.values() if entry[-1] is not self.REMOVED]
146
+ self.pq = []
147
+ self.entry_finder = {}
148
+ for task in valid_tasks:
149
+ self.add_task(task)
150
+
151
+ def get_sorted_tasks(self) -> List[CivicTask]:
152
+ """Return all valid tasks sorted by priority (Read-only, doesn't pop)."""
153
+ valid_entries = [e for e in self.entry_finder.values() if e[-1] is not self.REMOVED]
154
+ valid_entries.sort(key=lambda x: (x[0], x[1]))
155
+ return [e[-1] for e in valid_entries]
156
+
157
+
158
+ class CivicDispatchSystem:
159
+ """Orchestrates Global, Ward, and Contractor queues."""
160
+ def __init__(self):
161
+ self.global_queue = PriorityQueue("Global Queue")
162
+ self.ward_queues: Dict[str, PriorityQueue] = {}
163
+ self.contractor_queues: Dict[str, PriorityQueue] = {}
164
+ self.task_registry: Dict[str, CivicTask] = {}
165
+
166
+ def add_task(self, task: CivicTask):
167
+ self.task_registry[task.task_id] = task
168
+ self.global_queue.add_task(task)
169
+
170
+ # Ward specific queue
171
+ if task.ward not in self.ward_queues:
172
+ self.ward_queues[task.ward] = PriorityQueue(f"Ward-{task.ward}")
173
+ self.ward_queues[task.ward].add_task(task)
174
+
175
+ # Contractor specific queue
176
+ if task.contractor_id not in self.contractor_queues:
177
+ self.contractor_queues[task.contractor_id] = PriorityQueue(f"Contractor-{task.contractor_id}")
178
+ self.contractor_queues[task.contractor_id].add_task(task)
179
+
180
+ def get_next_task(self) -> Optional[CivicTask]:
181
+ """Pops highest global priority."""
182
+ task = self.global_queue.pop_task()
183
+ if task:
184
+ self._sync_removals(task.task_id, task.ward, task.contractor_id)
185
+ return task
186
+
187
+ def remove_task(self, task_id: str):
188
+ if task_id in self.task_registry:
189
+ task = self.task_registry[task_id]
190
+ self.global_queue.remove_task(task_id)
191
+ self._sync_removals(task_id, task.ward, task.contractor_id)
192
+
193
+ def _sync_removals(self, task_id: str, ward: str, contractor_id: str):
194
+ """Keep sub-queues in sync if popped from global."""
195
+ if task_id in self.task_registry:
196
+ del self.task_registry[task_id]
197
+ if ward in self.ward_queues:
198
+ self.ward_queues[ward].remove_task(task_id)
199
+ if contractor_id in self.contractor_queues:
200
+ self.contractor_queues[contractor_id].remove_task(task_id)
201
+
202
+ def update_task(self, task_id: str, updates: dict):
203
+ """Apply updates and re-insert into queues to recalculate priorities."""
204
+ if task_id in self.task_registry:
205
+ task = self.task_registry[task_id]
206
+ for key, value in updates.items():
207
+ if hasattr(task, key):
208
+ setattr(task, key, value)
209
+ self.add_task(task) # add_task handles the update internally
210
+
211
+ def reprioritize_system(self):
212
+ """Execute when system time passes or bulk updates happen."""
213
+ self.global_queue.reprioritize_all()
214
+ for q in self.ward_queues.values(): q.reprioritize_all()
215
+ for q in self.contractor_queues.values(): q.reprioritize_all()
216
+
217
+
218
+ # =============================================================================
219
+ # SIMULATION ENGINE
220
+ # =============================================================================
221
+
222
+ def generate_random_tasks(num_tasks: int) -> List[CivicTask]:
223
+ tasks = []
224
+ wards = ["North", "South", "East", "West", "Central"]
225
+ contractors = ["AlphaRepairs", "CityFix", "OmegaPaving"]
226
+
227
+ for i in range(num_tasks):
228
+ score = round(random.uniform(0.1, 0.95), 2)
229
+ label = "High" if score > 0.66 else ("Medium" if score > 0.33 else "Low")
230
+
231
+ task = CivicTask(
232
+ task_id=f"TSK-{i:04d}",
233
+ severity_score=score,
234
+ severity_label=label,
235
+ created_at=datetime.now() - timedelta(days=random.randint(0, 10)),
236
+ days_pending=random.randint(0, 15),
237
+ sla_days=10,
238
+ ward=random.choice(wards),
239
+ contractor_id=random.choice(contractors),
240
+ is_escalated=random.random() > 0.85, # 15% chance
241
+ reopen_count=random.randint(0, 5) if random.random() > 0.8 else 0
242
+ )
243
+ tasks.append(task)
244
+ return tasks
245
+
246
+
247
+ def run_simulation():
248
+ print("="*70)
249
+ print(" πŸš€ INITIALIZING SYSTEM & INSERTING TASKS")
250
+ print("="*70)
251
+ system = CivicDispatchSystem()
252
+ tasks = generate_random_tasks(50)
253
+
254
+ for t in tasks:
255
+ system.add_task(t)
256
+
257
+ print(f"βœ… Loaded {len(tasks)} tasks.")
258
+
259
+ print("\n" + "="*70)
260
+ print(" πŸ† TOP 10 TASKS IN GLOBAL QUEUE")
261
+ print("="*70)
262
+ top_tasks = system.global_queue.get_sorted_tasks()[:10]
263
+ for idx, t in enumerate(top_tasks, start=1):
264
+ score = t.compute_priority()
265
+ print(f"{idx:-2d} | [{score:.4f}] {t.task_id:<8} | Sev: {t.severity_score:.2f} ({t.severity_label:<6}) | "
266
+ f"Wait: {t.days_pending}/{t.sla_days}d | {t.get_priority_reason()}")
267
+
268
+ print("\n" + "="*70)
269
+ print(" ⏱️ SIMULATING TIME PASSING (+5 DAYS)")
270
+ print("="*70)
271
+ # Fast forward 5 days for all tasks left in queue
272
+ for task in system.task_registry.values():
273
+ task.days_pending += 5
274
+ system.reprioritize_system()
275
+
276
+ print("Re-evaluating priorities after SLA changes...\n")
277
+ new_top = system.global_queue.peek_top()
278
+ print(f"πŸ†• NEW TOP TASK: {new_top.task_id} (Score: {new_top.compute_priority():.4f})")
279
+ print(f"Reason: {new_top.get_priority_reason()}")
280
+
281
+ print("\n" + "="*70)
282
+ print(" πŸ”₯ SIMULATING EMERGENCY OVERRIDE")
283
+ print("="*70)
284
+ # Pick a random low priority task and make it an emergency
285
+ low_priority_task = system.global_queue.get_sorted_tasks()[-1]
286
+ print(f"Targeting bottom task {low_priority_task.task_id} (Score: {low_priority_task.compute_priority():.4f})")
287
+
288
+ system.update_task(low_priority_task.task_id, {"emergency_override": True})
289
+
290
+ emergency_top = system.global_queue.peek_top()
291
+ print(f"🚨 CURRENT TOP TASK: {emergency_top.task_id} (Score: {emergency_top.compute_priority()})")
292
+ print(f"Reason: {emergency_top.get_priority_reason()}")
293
+
294
+ print("\n" + "="*70)
295
+ print(" πŸ‘· PROCESSING TASKS BY CONTRACTOR (AlphaRepairs)")
296
+ print("="*70)
297
+ alpha_q = system.contractor_queues.get("AlphaRepairs")
298
+ if alpha_q:
299
+ c_tasks = alpha_q.get_sorted_tasks()[:5]
300
+ for t in c_tasks:
301
+ print(f"[{t.compute_priority():.4f}] {t.task_id} | {t.get_priority_reason()}")
302
+
303
+ if __name__ == "__main__":
304
+ run_simulation()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ numpy
2
+ pandas
3
+ scikit-learn
4
+ xgboost
5
+ joblib
6
+ gradio
severity_model.json ADDED
The diff for this file is too large to render. See raw diff
 
severity_model_pipeline.py ADDED
@@ -0,0 +1,550 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ =============================================================================
3
+ CIVIC ISSUE DETECTION β€” POTHOLE SEVERITY SCORING PIPELINE
4
+ =============================================================================
5
+ Produces a trained XGBoost regression model that predicts severity S ∈ [0,1]
6
+ from 10 engineered features derived from a civic-issue detection system.
7
+
8
+ Pipeline Stages
9
+ ---------------
10
+ 1. Synthetic dataset generation (10 000 samples, realistic distributions)
11
+ 2. Ground-truth severity formula (weighted sum + infrastructure boost + noise)
12
+ 3. Model training (XGBoost Regressor, 80/20 split)
13
+ 4. Evaluation (RMSE, MAE, RΒ²)
14
+ 5. Interpretability (SHAP summary + top-feature analysis)
15
+ 6. Artefact export (severity_model.json, scaler, feature list)
16
+ 7. Inference function (predict_severity β†’ score + label)
17
+ =============================================================================
18
+ """
19
+
20
+ # ---------------------------------------------------------------------------
21
+ # Imports
22
+ # ---------------------------------------------------------------------------
23
+ import json
24
+ import os
25
+ import warnings
26
+
27
+ import matplotlib.pyplot as plt
28
+ import numpy as np
29
+ import pandas as pd
30
+ import shap
31
+ import xgboost as xgb
32
+ from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
33
+ from sklearn.model_selection import train_test_split
34
+ from sklearn.preprocessing import MinMaxScaler
35
+ import joblib
36
+
37
+ warnings.filterwarnings("ignore")
38
+
39
+ # Ensure reproducible results
40
+ RANDOM_SEED = 42
41
+ np.random.seed(RANDOM_SEED)
42
+
43
+
44
+ # =============================================================================
45
+ # STEP 1 β€” GENERATE SYNTHETIC DATASET
46
+ # =============================================================================
47
+
48
+ def generate_synthetic_dataset(n_samples: int = 10_000, seed: int = RANDOM_SEED) -> pd.DataFrame:
49
+ """
50
+ Generate a synthetic dataset with realistic feature distributions for
51
+ pothole severity modelling.
52
+
53
+ Feature definitions (all in [0, 1]):
54
+ A β€” defect area ratio
55
+ D β€” defect density
56
+ C β€” centrality (closeness to road centre)
57
+ Q β€” detection confidence
58
+ M β€” multi-user confirmation score
59
+ T β€” temporal persistence
60
+ R β€” traffic importance (road hierarchy)
61
+ P β€” proximity to critical infrastructure
62
+ F β€” recurrence frequency
63
+ X β€” resolution failure score
64
+ """
65
+ rng = np.random.default_rng(seed)
66
+
67
+ n = n_samples
68
+
69
+ # A: skewed small (most potholes are small) β€” Beta(2, 8)
70
+ A = rng.beta(2, 8, n)
71
+
72
+ # D: low-to-moderate, sparse β€” Beta(1.5, 6)
73
+ D = rng.beta(1.5, 6, n)
74
+
75
+ # C: uniform (pothole can be anywhere laterally) β€” Uniform(0, 1)
76
+ C = rng.uniform(0, 1, n)
77
+
78
+ # Q: high-biased (confident detections) β€” Beta(8, 2)
79
+ Q = rng.beta(8, 2, n)
80
+
81
+ # M: sparse confirmations β€” exponential-ish via Beta(1.2, 8)
82
+ M = rng.beta(1.2, 8, n)
83
+
84
+ # T: right-skewed (few very old issues) β€” Beta(1.5, 5)
85
+ T = rng.beta(1.5, 5, n)
86
+
87
+ # R: categorical road hierarchy mapped to numeric
88
+ road_types = rng.choice(
89
+ [1.0, 0.7, 0.4], # highway, main road, local street
90
+ size=n,
91
+ p=[0.10, 0.35, 0.55], # realistic road-type proportions
92
+ )
93
+ R = road_types.astype(float)
94
+
95
+ # P: mostly low, few high β€” Beta(1, 10)
96
+ P = rng.beta(1, 10, n)
97
+
98
+ # F: low recurrence freq β€” Beta(1.2, 9)
99
+ F = rng.beta(1.2, 9, n)
100
+
101
+ # X: very low resolution failure rate β€” Beta(1, 15)
102
+ X = rng.beta(1, 15, n)
103
+
104
+ df = pd.DataFrame({
105
+ "A": A,
106
+ "D": D,
107
+ "C": C,
108
+ "Q": Q,
109
+ "M": M,
110
+ "T": T,
111
+ "R": R,
112
+ "P": P,
113
+ "F": F,
114
+ "X": X,
115
+ })
116
+
117
+ return df
118
+
119
+
120
+ # =============================================================================
121
+ # STEP 2 β€” GROUND-TRUTH SEVERITY FORMULA
122
+ # =============================================================================
123
+
124
+ def compute_severity(df: pd.DataFrame, noise_std: float = 0.03, seed: int = RANDOM_SEED) -> pd.Series:
125
+ """
126
+ Compute ground-truth severity scores.
127
+
128
+ Formula
129
+ -------
130
+ S_base = 0.28A + 0.10D + 0.14C + 0.04Q +
131
+ 0.08M + 0.07T + 0.09R + 0.10P +
132
+ 0.06F + 0.04X
133
+
134
+ K = 1 + 0.5 * P (infrastructure proximity multiplier)
135
+
136
+ S = clamp(S_base * K + noise, 0, 1)
137
+ """
138
+ rng = np.random.default_rng(seed)
139
+
140
+ # Weighted severity base
141
+ S_base = (
142
+ 0.28 * df["A"] +
143
+ 0.10 * df["D"] +
144
+ 0.14 * df["C"] +
145
+ 0.04 * df["Q"] +
146
+ 0.08 * df["M"] +
147
+ 0.07 * df["T"] +
148
+ 0.09 * df["R"] +
149
+ 0.10 * df["P"] +
150
+ 0.06 * df["F"] +
151
+ 0.04 * df["X"]
152
+ )
153
+
154
+ # Critical-infrastructure proximity multiplier
155
+ K = 1 + 0.5 * df["P"]
156
+
157
+ # Boosted severity
158
+ S_raw = S_base * K
159
+
160
+ # Add Gaussian noise, clamp to [0, 1]
161
+ noise = rng.normal(loc=0, scale=noise_std, size=len(df))
162
+ S = np.clip(S_raw + noise, 0, 1)
163
+
164
+ return pd.Series(S, name="severity", index=df.index)
165
+
166
+
167
+ # =============================================================================
168
+ # STEP 3 β€” TRAIN XGBOOST MODEL
169
+ # =============================================================================
170
+
171
+ FEATURE_COLS = ["A", "D", "C", "Q", "M", "T", "R", "P", "F", "X"]
172
+
173
+ def build_and_train_model(
174
+ X_train: np.ndarray,
175
+ y_train: np.ndarray,
176
+ seed: int = RANDOM_SEED,
177
+ ) -> xgb.XGBRegressor:
178
+ """
179
+ Instantiate and train an XGBoost Regressor on the training split.
180
+
181
+ Hyperparameters are fixed as specified; no tuning loop is performed here
182
+ (add GridSearchCV / Optuna wrapping for production hyper-opt).
183
+ """
184
+ model = xgb.XGBRegressor(
185
+ objective="reg:squarederror",
186
+ n_estimators=200,
187
+ max_depth=5,
188
+ learning_rate=0.05,
189
+ subsample=0.8,
190
+ colsample_bytree=0.8,
191
+ random_state=seed,
192
+ verbosity=0,
193
+ n_jobs=-1,
194
+ )
195
+
196
+ print("── Training XGBoost Regressor …")
197
+ model.fit(X_train, y_train)
198
+ print(" Training complete.\n")
199
+ return model
200
+
201
+
202
+ # =============================================================================
203
+ # STEP 4 β€” EVALUATION
204
+ # =============================================================================
205
+
206
+ def evaluate_model(
207
+ model: xgb.XGBRegressor,
208
+ X_test: np.ndarray,
209
+ y_test: np.ndarray,
210
+ feature_names: list[str],
211
+ ) -> dict:
212
+ """
213
+ Compute RMSE, MAE, RΒ² and print feature importance ranking.
214
+ Returns a dict of metric values.
215
+ """
216
+ y_pred = model.predict(X_test)
217
+
218
+ rmse = np.sqrt(mean_squared_error(y_test, y_pred))
219
+ mae = mean_absolute_error(y_test, y_pred)
220
+ r2 = r2_score(y_test, y_pred)
221
+
222
+ print("=" * 50)
223
+ print(" MODEL EVALUATION METRICS")
224
+ print("=" * 50)
225
+ print(f" RMSE : {rmse:.6f}")
226
+ print(f" MAE : {mae:.6f}")
227
+ print(f" RΒ² : {r2:.6f}")
228
+ print("=" * 50)
229
+
230
+ # Feature importance (gain-based)
231
+ importances = model.feature_importances_
232
+ importance_df = (
233
+ pd.DataFrame({"Feature": feature_names, "Importance": importances})
234
+ .sort_values("Importance", ascending=False)
235
+ .reset_index(drop=True)
236
+ )
237
+
238
+ print("\n FEATURE IMPORTANCE RANKING (gain)")
239
+ print(" " + "-" * 36)
240
+ for _, row in importance_df.iterrows():
241
+ bar = "β–ˆ" * int(row["Importance"] * 100)
242
+ print(f" {row['Feature']:>3} {row['Importance']:.4f} {bar}")
243
+ print()
244
+
245
+ return {"rmse": rmse, "mae": mae, "r2": r2, "importance": importance_df}
246
+
247
+
248
+ # =============================================================================
249
+ # STEP 5 β€” SHAP INTERPRETABILITY
250
+ # =============================================================================
251
+
252
+ def run_shap_analysis(
253
+ model: xgb.XGBRegressor,
254
+ X_test: np.ndarray,
255
+ feature_names: list[str],
256
+ output_dir: str = ".",
257
+ ) -> None:
258
+ """
259
+ Generate SHAP summary plot and print mean |SHAP| feature ranking.
260
+ Verifies that A, C, P dominate the explanation.
261
+ """
262
+ print("── Running SHAP analysis …")
263
+
264
+ explainer = shap.TreeExplainer(model)
265
+ shap_values = explainer.shap_values(X_test)
266
+
267
+ # ── Summary bar plot ──────────────────────────────────────────────────
268
+ plt.figure(figsize=(10, 6))
269
+ shap.summary_plot(
270
+ shap_values,
271
+ X_test,
272
+ feature_names=feature_names,
273
+ plot_type="bar",
274
+ show=False,
275
+ )
276
+ plt.title("SHAP Feature Importance β€” Mean |SHAP value|", fontsize=14, fontweight="bold")
277
+ plt.tight_layout()
278
+ bar_path = os.path.join(output_dir, "shap_bar_plot.png")
279
+ plt.savefig(bar_path, dpi=150, bbox_inches="tight")
280
+ plt.close()
281
+ print(f" Saved: {bar_path}")
282
+
283
+ # ── Beeswarm / dot summary plot ───────────────────────────────────────
284
+ plt.figure(figsize=(10, 6))
285
+ shap.summary_plot(
286
+ shap_values,
287
+ X_test,
288
+ feature_names=feature_names,
289
+ show=False,
290
+ )
291
+ plt.title("SHAP Summary Plot β€” Impact on Severity Score", fontsize=14, fontweight="bold")
292
+ plt.tight_layout()
293
+ dot_path = os.path.join(output_dir, "shap_dot_plot.png")
294
+ plt.savefig(dot_path, dpi=150, bbox_inches="tight")
295
+ plt.close()
296
+ print(f" Saved: {dot_path}\n")
297
+
298
+ # ── Mean |SHAP| ranking ───────────────────────────────────────────────
299
+ mean_shap = np.abs(shap_values).mean(axis=0)
300
+ shap_df = (
301
+ pd.DataFrame({"Feature": feature_names, "Mean|SHAP|": mean_shap})
302
+ .sort_values("Mean|SHAP|", ascending=False)
303
+ .reset_index(drop=True)
304
+ )
305
+
306
+ print(" SHAP MEAN |VALUE| RANKING")
307
+ print(" " + "-" * 36)
308
+ top3 = shap_df["Feature"].head(3).tolist()
309
+ for rank, (_, row) in enumerate(shap_df.iterrows(), start=1):
310
+ tag = " β—€ dominant" if row["Feature"] in ["A", "C", "P"] else ""
311
+ print(f" #{rank:<2} {row['Feature']:>3} {row['Mean|SHAP|']:.5f}{tag}")
312
+ print()
313
+
314
+ # Verify dominance of A, C, P
315
+ expected_dominant = {"A", "C", "P"}
316
+ actual_top3 = set(top3)
317
+ overlap = expected_dominant & actual_top3
318
+ if len(overlap) >= 2:
319
+ print(f" βœ… Dominance check PASSED β€” {overlap} appear in top-3 SHAP features.")
320
+ else:
321
+ print(f" ⚠️ Dominance check NOTE β€” top-3 are {top3}; "
322
+ "model learned different patterns from the data.")
323
+ print()
324
+
325
+
326
+ # =============================================================================
327
+ # STEP 6 β€” SAVE MODEL & ARTEFACTS
328
+ # =============================================================================
329
+
330
+ def save_artefacts(
331
+ model: xgb.XGBRegressor,
332
+ scaler: MinMaxScaler | None,
333
+ feature_names: list[str],
334
+ output_dir: str = ".",
335
+ ) -> None:
336
+ """
337
+ Export:
338
+ severity_model.json β€” XGBoost model (native JSON format)
339
+ feature_scaler.pkl β€” fitted MinMaxScaler (or None sentinel)
340
+ feature_list.json β€” ordered list of feature names
341
+ """
342
+ os.makedirs(output_dir, exist_ok=True)
343
+
344
+ # XGBoost native JSON
345
+ model_path = os.path.join(output_dir, "severity_model.json")
346
+ model.save_model(model_path)
347
+ print(f"── Model saved: {model_path}")
348
+
349
+ # Scaler
350
+ scaler_path = os.path.join(output_dir, "feature_scaler.pkl")
351
+ joblib.dump(scaler, scaler_path)
352
+ print(f"── Scaler saved: {scaler_path}")
353
+
354
+ # Feature list
355
+ feature_path = os.path.join(output_dir, "feature_list.json")
356
+ with open(feature_path, "w") as fp:
357
+ json.dump(feature_names, fp, indent=2)
358
+ print(f"── Feature list saved: {feature_path}\n")
359
+
360
+
361
+ # =============================================================================
362
+ # STEP 7 β€” INFERENCE FUNCTION
363
+ # =============================================================================
364
+
365
+ def load_inference_artefacts(
366
+ model_path: str = "severity_model.json",
367
+ scaler_path: str = "feature_scaler.pkl",
368
+ feature_list_path: str = "feature_list.json",
369
+ ) -> tuple[xgb.XGBRegressor, MinMaxScaler | None, list[str]]:
370
+ """Load saved model, scaler, and feature list for inference."""
371
+ model = xgb.XGBRegressor()
372
+ model.load_model(model_path)
373
+
374
+ scaler = joblib.load(scaler_path)
375
+
376
+ with open(feature_list_path) as fp:
377
+ feature_names = json.load(fp)
378
+
379
+ return model, scaler, feature_names
380
+
381
+
382
+ def _severity_label(score: float) -> str:
383
+ """
384
+ Assign a human-readable label to a numeric severity score.
385
+
386
+ Thresholds (domain-tunable):
387
+ Low : score < 0.33
388
+ Medium : 0.33 ≀ score < 0.66
389
+ High : score β‰₯ 0.66
390
+ """
391
+ if score < 0.33:
392
+ return "Low"
393
+ elif score < 0.66:
394
+ return "Medium"
395
+ else:
396
+ return "High"
397
+
398
+
399
+ def predict_severity(
400
+ features_dict: dict,
401
+ model: xgb.XGBRegressor,
402
+ scaler: MinMaxScaler | None,
403
+ feature_names: list[str],
404
+ ) -> dict:
405
+ """
406
+ Predict severity for a single pothole observation.
407
+
408
+ Parameters
409
+ ----------
410
+ features_dict : dict
411
+ Keys must match feature_names; values are raw (pre-scaling) floats.
412
+ model : trained XGBRegressor
413
+ scaler : fitted MinMaxScaler (or None if features are already scaled)
414
+ feature_names : ordered list of feature column names
415
+
416
+ Returns
417
+ -------
418
+ dict with:
419
+ "score" : float β€” predicted severity in [0, 1]
420
+ "label" : str β€” "Low" | "Medium" | "High"
421
+ """
422
+ # Validate input keys
423
+ missing = set(feature_names) - set(features_dict.keys())
424
+ if missing:
425
+ raise ValueError(f"Missing features in input dict: {missing}")
426
+
427
+ # Build ordered feature vector
428
+ row = np.array([[features_dict[f] for f in feature_names]], dtype=np.float32)
429
+
430
+ # Apply scaler if provided
431
+ if scaler is not None:
432
+ row = scaler.transform(row)
433
+
434
+ # Predict and clamp
435
+ raw_score = float(model.predict(row)[0])
436
+ score = float(np.clip(raw_score, 0.0, 1.0))
437
+ label = _severity_label(score)
438
+
439
+ return {"score": round(score, 4), "label": label}
440
+
441
+
442
+ # =============================================================================
443
+ # MAIN PIPELINE RUNNER
444
+ # =============================================================================
445
+
446
+ def main(output_dir: str = ".") -> None:
447
+ print("\n" + "=" * 60)
448
+ print(" CIVIC POTHOLE SEVERITY SCORING β€” FULL ML PIPELINE")
449
+ print("=" * 60 + "\n")
450
+
451
+ # ── 1. Generate dataset ──────────────────────────────────────────────
452
+ print("── [1/7] Generating synthetic dataset …")
453
+ df = generate_synthetic_dataset(n_samples=10_000)
454
+ y = compute_severity(df)
455
+
456
+ # Save the dataset for persistence/user inspection
457
+ full_dataset = df.copy()
458
+ full_dataset['severity'] = y
459
+ dataset_path = os.path.join(output_dir, "synthetic_pothole_data.csv")
460
+ full_dataset.to_csv(dataset_path, index=False)
461
+
462
+ print(f" Dataset shape : {df.shape}")
463
+ print(f" Dataset saved to: {dataset_path}")
464
+ print(f" Severity stats: mean={y.mean():.4f}, std={y.std():.4f}, "
465
+ f"min={y.min():.4f}, max={y.max():.4f}\n")
466
+
467
+ # ── 2. Feature scaling ───────────────────────────────────────────────
468
+ print("── [2/7] Scaling features (MinMaxScaler) …")
469
+ # NOTE: Features are already in [0, 1] by construction, but we fit a
470
+ # scaler so the inference function can handle raw un-normalised inputs
471
+ # if the production system requires it.
472
+ scaler = MinMaxScaler()
473
+ X_scaled = scaler.fit_transform(df[FEATURE_COLS])
474
+ print(" Scaling complete.\n")
475
+
476
+ # ── 3. Train / test split ────────────────────────────────────────────
477
+ print("── [3/7] Splitting data (80 % train / 20 % test) …")
478
+ X_train, X_test, y_train, y_test = train_test_split(
479
+ X_scaled, y, test_size=0.20, random_state=RANDOM_SEED
480
+ )
481
+ print(f" Train samples : {len(X_train)}")
482
+ print(f" Test samples : {len(X_test)}\n")
483
+
484
+ # ── 4. Train model ───────────────────────────────────────────────────
485
+ print("── [4/7] Training model …")
486
+ model = build_and_train_model(X_train, y_train)
487
+
488
+ # ── 5. Evaluate ──────────────────────────────────────────────────────
489
+ print("── [5/7] Evaluating model …\n")
490
+ metrics = evaluate_model(model, X_test, y_test, FEATURE_COLS)
491
+
492
+ # ── 6. SHAP ──────────────────────────────────────────────────────────
493
+ print("── [6/7] SHAP interpretability …\n")
494
+ run_shap_analysis(model, X_test, FEATURE_COLS, output_dir=output_dir)
495
+
496
+ # ── 7. Save artefacts ────────────────────────────────────────────────
497
+ print("── [7/7] Saving model artefacts …")
498
+ save_artefacts(model, scaler, FEATURE_COLS, output_dir=output_dir)
499
+
500
+ # ── Sample predictions ───────────────────────────────────────────────
501
+ print("=" * 60)
502
+ print(" SAMPLE PREDICTIONS")
503
+ print("=" * 60)
504
+
505
+ sample_cases = [
506
+ {
507
+ "name": "Minor Local-Street Pothole",
508
+ "features": dict(zip(FEATURE_COLS,
509
+ [0.05, 0.08, 0.30, 0.90, 0.05, 0.10, 0.40, 0.02, 0.03, 0.01])),
510
+ },
511
+ {
512
+ "name": "Moderate Main-Road Pothole",
513
+ "features": dict(zip(FEATURE_COLS,
514
+ [0.25, 0.20, 0.55, 0.75, 0.35, 0.40, 0.70, 0.15, 0.20, 0.10])),
515
+ },
516
+ {
517
+ "name": "Severe Highway near Hospital",
518
+ "features": dict(zip(FEATURE_COLS,
519
+ [0.70, 0.55, 0.85, 0.95, 0.80, 0.75, 1.00, 0.90, 0.65, 0.40])),
520
+ },
521
+ {
522
+ "name": "Recurring Pothole (high reopen)",
523
+ "features": dict(zip(FEATURE_COLS,
524
+ [0.40, 0.35, 0.60, 0.80, 0.50, 0.85, 0.70, 0.30, 0.75, 0.80])),
525
+ },
526
+ ]
527
+
528
+ for case in sample_cases:
529
+ result = predict_severity(
530
+ features_dict=case["features"],
531
+ model=model,
532
+ scaler=scaler,
533
+ feature_names=FEATURE_COLS,
534
+ )
535
+ print(f"\n πŸ“ {case['name']}")
536
+ feature_str = ", ".join(f"{k}={v}" for k, v in case["features"].items())
537
+ print(f" Features : {feature_str}")
538
+ print(f" Score : {result['score']:.4f}")
539
+ print(f" Label : {result['label']}")
540
+
541
+ print("\n" + "=" * 60)
542
+ print(" PIPELINE COMPLETE")
543
+ print(f" Output artefacts β†’ {os.path.abspath(output_dir)}")
544
+ print("=" * 60 + "\n")
545
+
546
+
547
+ if __name__ == "__main__":
548
+ # Output directory for all saved files (same folder as this script)
549
+ OUTPUT_DIR = os.path.dirname(os.path.abspath(__file__))
550
+ main(output_dir=OUTPUT_DIR)
shap_bar_plot.png ADDED
shap_dot_plot.png ADDED

Git LFS Details

  • SHA256: ed5f94e557b4cc5eafd8a992a94cb75db874fc4c8e79c8a237947e5b003fc7d7
  • Pointer size: 131 Bytes
  • Size of remote file: 130 kB
simulation_output.txt ADDED
Binary file (5.09 kB). View file
 
synthetic_pothole_data.csv ADDED
The diff for this file is too large to render. See raw diff