Esvanth commited on
Commit
efe8b7d
Β·
1 Parent(s): 512b5c0

Save changes before rebase

Browse files
task2_segmentation.py CHANGED
@@ -3,8 +3,6 @@ EcoCart Customer Segmentation β€” Bias Detection & Mitigation
3
  Task 2 β€” Demonstrates urban-rural bias in K-Means segmentation and
4
  applies reweighing to fix it.
5
 
6
- NCI MSCAI | Fundamentals of AI TABA 2026
7
-
8
  Run: python3 task2_segmentation.py
9
  Out: bias_before_after.png, disparate_impact.png
10
  """
@@ -17,8 +15,7 @@ from sklearn.preprocessing import StandardScaler
17
 
18
  RNG = np.random.default_rng(42)
19
 
20
-
21
- # ── 1. Generate biased customer data ────────────────────────
22
  # Urban customers have more data, higher frequency, higher spend β€” mimicking
23
  # a real scenario where the platform launched in cities first.
24
 
@@ -60,7 +57,6 @@ def segment(df, features=["freq", "spend", "recency"]):
60
  df["segment"] = df["cluster"].map(label_map)
61
  return df
62
 
63
-
64
  # ── 3. Bias metrics ────────────────────────────────────────
65
  def compute_fairness(df):
66
  urban = df[df.region == "urban"]
@@ -75,7 +71,6 @@ def compute_fairness(df):
75
  "fair": di >= 0.8,
76
  }
77
 
78
-
79
  # ── 4. Mitigation: reweigh + balanced re-sample ────────────
80
  def mitigate(df):
81
  """
@@ -126,24 +121,20 @@ def mitigate(df):
126
  target_rural_high = int(target_rate * n_rural)
127
  current_rural_high = ((balanced[rural_mask].segment == "High Value")).sum()
128
  need = target_rural_high - current_rural_high
129
-
130
  if need > 0:
131
  # Promote from Medium first, then Low Value
132
  candidates = balanced[rural_mask & (balanced.segment != "High Value")]
133
  if len(candidates) > 0:
134
  promote = candidates.nlargest(min(need, len(candidates)), "adj_spend").index
135
  balanced.loc[promote, "segment"] = "High Value"
136
-
137
  return balanced
138
 
139
 
140
  # ── 5. Plots ────────────────────────────────────────────────
141
  SEG_COLORS = {"High Value": "#10b981", "Medium": "#f59e0b", "Low Value": "#ef4444"}
142
-
143
  def plot_before_after(before_df, after_df, before_fair, after_fair):
144
  fig, axes = plt.subplots(1, 2, figsize=(14, 5.5))
145
  fig.patch.set_facecolor("#0d1117")
146
-
147
  for ax, df, fair, title in [
148
  (axes[0], before_df, before_fair, "BEFORE mitigation (biased)"),
149
  (axes[1], after_df, after_fair, "AFTER mitigation (reweighed + adjusted)"),
@@ -172,18 +163,15 @@ def plot_before_after(before_df, after_df, before_fair, after_fair):
172
  bbox_inches="tight", facecolor="#0d1117")
173
  plt.close()
174
 
175
-
176
  def plot_di(before_fair, after_fair):
177
  fig, ax = plt.subplots(figsize=(8, 4))
178
  fig.patch.set_facecolor("#0d1117")
179
  ax.set_facecolor("#0d1117")
180
-
181
  cats = ["Urban β†’ High", "Rural β†’ High", "Disparate Impact"]
182
  before_vals = [before_fair["urban_high_pct"], before_fair["rural_high_pct"],
183
  before_fair["disparate_impact"] * 100]
184
  after_vals = [after_fair["urban_high_pct"], after_fair["rural_high_pct"],
185
  after_fair["disparate_impact"] * 100]
186
-
187
  x = range(len(cats))
188
  w = 0.35
189
  ax.bar([i - w/2 for i in x], before_vals, w, label="Before", color="#ef4444", alpha=0.85)
 
3
  Task 2 β€” Demonstrates urban-rural bias in K-Means segmentation and
4
  applies reweighing to fix it.
5
 
 
 
6
  Run: python3 task2_segmentation.py
7
  Out: bias_before_after.png, disparate_impact.png
8
  """
 
15
 
16
  RNG = np.random.default_rng(42)
17
 
18
+ # 1. Generate biased customer data
 
19
  # Urban customers have more data, higher frequency, higher spend β€” mimicking
20
  # a real scenario where the platform launched in cities first.
21
 
 
57
  df["segment"] = df["cluster"].map(label_map)
58
  return df
59
 
 
60
  # ── 3. Bias metrics ────────────────────────────────────────
61
  def compute_fairness(df):
62
  urban = df[df.region == "urban"]
 
71
  "fair": di >= 0.8,
72
  }
73
 
 
74
  # ── 4. Mitigation: reweigh + balanced re-sample ────────────
75
  def mitigate(df):
76
  """
 
121
  target_rural_high = int(target_rate * n_rural)
122
  current_rural_high = ((balanced[rural_mask].segment == "High Value")).sum()
123
  need = target_rural_high - current_rural_high
 
124
  if need > 0:
125
  # Promote from Medium first, then Low Value
126
  candidates = balanced[rural_mask & (balanced.segment != "High Value")]
127
  if len(candidates) > 0:
128
  promote = candidates.nlargest(min(need, len(candidates)), "adj_spend").index
129
  balanced.loc[promote, "segment"] = "High Value"
 
130
  return balanced
131
 
132
 
133
  # ── 5. Plots ────────────────────────────────────────────────
134
  SEG_COLORS = {"High Value": "#10b981", "Medium": "#f59e0b", "Low Value": "#ef4444"}
 
135
  def plot_before_after(before_df, after_df, before_fair, after_fair):
136
  fig, axes = plt.subplots(1, 2, figsize=(14, 5.5))
137
  fig.patch.set_facecolor("#0d1117")
 
138
  for ax, df, fair, title in [
139
  (axes[0], before_df, before_fair, "BEFORE mitigation (biased)"),
140
  (axes[1], after_df, after_fair, "AFTER mitigation (reweighed + adjusted)"),
 
163
  bbox_inches="tight", facecolor="#0d1117")
164
  plt.close()
165
 
 
166
  def plot_di(before_fair, after_fair):
167
  fig, ax = plt.subplots(figsize=(8, 4))
168
  fig.patch.set_facecolor("#0d1117")
169
  ax.set_facecolor("#0d1117")
 
170
  cats = ["Urban β†’ High", "Rural β†’ High", "Disparate Impact"]
171
  before_vals = [before_fair["urban_high_pct"], before_fair["rural_high_pct"],
172
  before_fair["disparate_impact"] * 100]
173
  after_vals = [after_fair["urban_high_pct"], after_fair["rural_high_pct"],
174
  after_fair["disparate_impact"] * 100]
 
175
  x = range(len(cats))
176
  w = 0.35
177
  ax.bar([i - w/2 for i in x], before_vals, w, label="Before", color="#ef4444", alpha=0.85)
task3_4_routing.py CHANGED
@@ -2,9 +2,6 @@
2
  EcoCart Route Optimisation Prototype
3
  Tasks 3 & 4 β€” BFS, DFS, A*, IDA* on a weighted delivery network
4
  + Green Routing mode (CO2-weighted edges for sustainability)
5
-
6
- NCI MSCAI | Fundamentals of AI TABA 2026
7
-
8
  Run: python3 task3_4_routing.py
9
  Out: network_map.png, algo_comparison.png, green_vs_fast.png
10
  """
@@ -28,10 +25,8 @@ NODES = {
28
  "R7":(6.5,6.0,"rural"),"R8":(9.0,7.0,"rural"),"R9":(11.0,6.0,"rural"),
29
  "R10":(8.0,5.5,"rural"),
30
  }
31
-
32
  def _dist(a, b):
33
  return math.hypot(NODES[a][0]-NODES[b][0], NODES[a][1]-NODES[b][1])
34
-
35
  _PAIRS = [
36
  ("U1","U2"),("U2","U3"),("U1","U4"),("U2","U4"),("U2","U5"),
37
  ("U3","U6"),("U4","U5"),("U5","U6"),("U4","U7"),("U5","U8"),
@@ -41,10 +36,8 @@ _PAIRS = [
41
  ("R7","R8"),("R8","R9"),("R6","R9"),("R8","R10"),("R5","R8"),
42
  ("U3","R1"),("U10","R4"),("U6","R1"),("U9","R7"),
43
  ]
44
-
45
- # Road distance β‰ˆ 1.15Γ— straight-line
46
  EDGES = [(a, b, round(_dist(a,b)*1.15, 2)) for a, b in _PAIRS]
47
-
48
  # CO2 cost per edge: urban roads have traffic β†’ higher emissions per km
49
  # Rural roads: 0.12 kg CO2/km; Urban roads: 0.21 kg CO2/km
50
  def _co2(a, b, km):
@@ -53,7 +46,6 @@ def _co2(a, b, km):
53
  return round(km * rate, 3)
54
 
55
  CO2_EDGES = [(a, b, _co2(a, b, w)) for a, b, w in EDGES]
56
-
57
  ADJ_KM = {n: [] for n in NODES}
58
  ADJ_CO2 = {n: [] for n in NODES}
59
  for i, (a, b, w) in enumerate(EDGES):
@@ -66,7 +58,6 @@ for i, (a, b, w) in enumerate(EDGES):
66
  # ── 2. Algorithms ───────────────────────────────────────────
67
  def heuristic(n, goal, scale=1.0):
68
  return _dist(n, goal) * scale
69
-
70
  def bfs(start, goal, adj=ADJ_KM):
71
  expanded = 0
72
  q = deque([(start, [start])])
@@ -82,7 +73,6 @@ def bfs(start, goal, adj=ADJ_KM):
82
  seen.add(nb)
83
  q.append((nb, path + [nb]))
84
  return None, math.inf, expanded
85
-
86
  def dfs(start, goal, adj=ADJ_KM, depth_limit=50):
87
  expanded = 0
88
  stack = [(start, [start])]
@@ -100,7 +90,6 @@ def dfs(start, goal, adj=ADJ_KM, depth_limit=50):
100
  seen.add(nb)
101
  stack.append((nb, path + [nb]))
102
  return None, math.inf, expanded
103
-
104
  def astar(start, goal, adj=ADJ_KM, h_scale=1.0):
105
  expanded, counter = 0, 0
106
  heap = [(heuristic(start, goal, h_scale), 0.0, counter, start, [start])]
@@ -143,7 +132,6 @@ def ida_star(start, goal, adj=ADJ_KM, h_scale=1.0):
143
  path.pop()
144
  visited.remove(nb)
145
  return None, nxt
146
-
147
  bound = heuristic(start, goal, h_scale)
148
  while True:
149
  r, t = _dfs(start, 0.0, bound, [start], {start})
@@ -152,7 +140,6 @@ def ida_star(start, goal, adj=ADJ_KM, h_scale=1.0):
152
  if t == math.inf:
153
  return None, math.inf, expanded[0]
154
  bound = t
155
-
156
  def _edge_w(a, b, adj):
157
  for nb, w in adj[a]:
158
  if nb == b:
@@ -179,7 +166,6 @@ def benchmark(algo, start, goal, adj=ADJ_KM, repeats=20):
179
  "cost": cost,
180
  "path": path,
181
  }
182
-
183
  OD_URBAN = [("U1","U10"),("U7","U6"),("U2","U9"),("U1","U9"),("U3","U8")]
184
  OD_RURAL = [("R1","R9"),("R2","R8"),("R3","R10"),("R1","R6"),("R4","R9")]
185
 
@@ -192,7 +178,6 @@ def plot_network():
192
  G.add_edge(a, b, weight=w)
193
  pos = {n: (NODES[n][0], NODES[n][1]) for n in NODES}
194
  colors = ["#ef4444" if NODES[n][2] == "urban" else "#10b981" for n in NODES]
195
-
196
  fig, ax = plt.subplots(figsize=(13, 6))
197
  ax.set_facecolor("#0d1117")
198
  fig.patch.set_facecolor("#0d1117")
@@ -213,7 +198,6 @@ def plot_network():
213
  facecolor="#0d1117")
214
  plt.close()
215
 
216
-
217
  def plot_comparison(results):
218
  metrics = [("Runtime (ms)", "ms"), ("Nodes expanded", "expanded"), ("Peak memory (KB)", "kb")]
219
  fig, axes = plt.subplots(1, 3, figsize=(15, 4.5))
@@ -240,30 +224,24 @@ def plot_comparison(results):
240
  plt.savefig("output/algo_comparison.png", dpi=150,
241
  bbox_inches="tight", facecolor="#0d1117")
242
  plt.close()
243
-
244
-
245
  def plot_green_vs_fast():
246
  """Compare fastest route (A* on km) vs greenest route (A* on CO2)."""
247
  pairs = [("U1", "R9"), ("U7", "R6"), ("R1", "U10")]
248
  fig, axes = plt.subplots(1, 3, figsize=(15, 5))
249
  fig.patch.set_facecolor("#0d1117")
250
-
251
  G = nx.Graph()
252
  for n, (x, y, _) in NODES.items():
253
  G.add_node(n, pos=(x, y))
254
  for a, b, w in EDGES:
255
  G.add_edge(a, b)
256
  pos = {n: (NODES[n][0], NODES[n][1]) for n in NODES}
257
-
258
  for ax, (s, g) in zip(axes, pairs):
259
  ax.set_facecolor("#0d1117")
260
  fast_path, fast_km, _ = astar(s, g, ADJ_KM)
261
  green_path, green_co2, _ = astar(s, g, ADJ_CO2, h_scale=0.10)
262
-
263
  # Compute cross-metrics
264
  fast_co2 = sum(_edge_w(fast_path[i], fast_path[i+1], ADJ_CO2) for i in range(len(fast_path)-1))
265
  green_km = sum(_edge_w(green_path[i], green_path[i+1], ADJ_KM) for i in range(len(green_path)-1))
266
-
267
  colors = ["#ef4444" if NODES[n][2] == "urban" else "#10b981" for n in NODES]
268
  nx.draw(G, pos, ax=ax, with_labels=True, node_color=colors,
269
  node_size=300, font_size=7, font_weight="bold",
@@ -290,7 +268,6 @@ def plot_green_vs_fast():
290
  bbox_inches="tight", facecolor="#0d1117")
291
  plt.close()
292
 
293
-
294
  # ── 5. Main ─────────────────────────────────────────────────
295
  def main():
296
  print("="*70)
 
2
  EcoCart Route Optimisation Prototype
3
  Tasks 3 & 4 β€” BFS, DFS, A*, IDA* on a weighted delivery network
4
  + Green Routing mode (CO2-weighted edges for sustainability)
 
 
 
5
  Run: python3 task3_4_routing.py
6
  Out: network_map.png, algo_comparison.png, green_vs_fast.png
7
  """
 
25
  "R7":(6.5,6.0,"rural"),"R8":(9.0,7.0,"rural"),"R9":(11.0,6.0,"rural"),
26
  "R10":(8.0,5.5,"rural"),
27
  }
 
28
  def _dist(a, b):
29
  return math.hypot(NODES[a][0]-NODES[b][0], NODES[a][1]-NODES[b][1])
 
30
  _PAIRS = [
31
  ("U1","U2"),("U2","U3"),("U1","U4"),("U2","U4"),("U2","U5"),
32
  ("U3","U6"),("U4","U5"),("U5","U6"),("U4","U7"),("U5","U8"),
 
36
  ("R7","R8"),("R8","R9"),("R6","R9"),("R8","R10"),("R5","R8"),
37
  ("U3","R1"),("U10","R4"),("U6","R1"),("U9","R7"),
38
  ]
39
+ # Road distance = 1.15Γ— straight-line
 
40
  EDGES = [(a, b, round(_dist(a,b)*1.15, 2)) for a, b in _PAIRS]
 
41
  # CO2 cost per edge: urban roads have traffic β†’ higher emissions per km
42
  # Rural roads: 0.12 kg CO2/km; Urban roads: 0.21 kg CO2/km
43
  def _co2(a, b, km):
 
46
  return round(km * rate, 3)
47
 
48
  CO2_EDGES = [(a, b, _co2(a, b, w)) for a, b, w in EDGES]
 
49
  ADJ_KM = {n: [] for n in NODES}
50
  ADJ_CO2 = {n: [] for n in NODES}
51
  for i, (a, b, w) in enumerate(EDGES):
 
58
  # ── 2. Algorithms ───────────────────────────────────────────
59
  def heuristic(n, goal, scale=1.0):
60
  return _dist(n, goal) * scale
 
61
  def bfs(start, goal, adj=ADJ_KM):
62
  expanded = 0
63
  q = deque([(start, [start])])
 
73
  seen.add(nb)
74
  q.append((nb, path + [nb]))
75
  return None, math.inf, expanded
 
76
  def dfs(start, goal, adj=ADJ_KM, depth_limit=50):
77
  expanded = 0
78
  stack = [(start, [start])]
 
90
  seen.add(nb)
91
  stack.append((nb, path + [nb]))
92
  return None, math.inf, expanded
 
93
  def astar(start, goal, adj=ADJ_KM, h_scale=1.0):
94
  expanded, counter = 0, 0
95
  heap = [(heuristic(start, goal, h_scale), 0.0, counter, start, [start])]
 
132
  path.pop()
133
  visited.remove(nb)
134
  return None, nxt
 
135
  bound = heuristic(start, goal, h_scale)
136
  while True:
137
  r, t = _dfs(start, 0.0, bound, [start], {start})
 
140
  if t == math.inf:
141
  return None, math.inf, expanded[0]
142
  bound = t
 
143
  def _edge_w(a, b, adj):
144
  for nb, w in adj[a]:
145
  if nb == b:
 
166
  "cost": cost,
167
  "path": path,
168
  }
 
169
  OD_URBAN = [("U1","U10"),("U7","U6"),("U2","U9"),("U1","U9"),("U3","U8")]
170
  OD_RURAL = [("R1","R9"),("R2","R8"),("R3","R10"),("R1","R6"),("R4","R9")]
171
 
 
178
  G.add_edge(a, b, weight=w)
179
  pos = {n: (NODES[n][0], NODES[n][1]) for n in NODES}
180
  colors = ["#ef4444" if NODES[n][2] == "urban" else "#10b981" for n in NODES]
 
181
  fig, ax = plt.subplots(figsize=(13, 6))
182
  ax.set_facecolor("#0d1117")
183
  fig.patch.set_facecolor("#0d1117")
 
198
  facecolor="#0d1117")
199
  plt.close()
200
 
 
201
  def plot_comparison(results):
202
  metrics = [("Runtime (ms)", "ms"), ("Nodes expanded", "expanded"), ("Peak memory (KB)", "kb")]
203
  fig, axes = plt.subplots(1, 3, figsize=(15, 4.5))
 
224
  plt.savefig("output/algo_comparison.png", dpi=150,
225
  bbox_inches="tight", facecolor="#0d1117")
226
  plt.close()
 
 
227
  def plot_green_vs_fast():
228
  """Compare fastest route (A* on km) vs greenest route (A* on CO2)."""
229
  pairs = [("U1", "R9"), ("U7", "R6"), ("R1", "U10")]
230
  fig, axes = plt.subplots(1, 3, figsize=(15, 5))
231
  fig.patch.set_facecolor("#0d1117")
 
232
  G = nx.Graph()
233
  for n, (x, y, _) in NODES.items():
234
  G.add_node(n, pos=(x, y))
235
  for a, b, w in EDGES:
236
  G.add_edge(a, b)
237
  pos = {n: (NODES[n][0], NODES[n][1]) for n in NODES}
 
238
  for ax, (s, g) in zip(axes, pairs):
239
  ax.set_facecolor("#0d1117")
240
  fast_path, fast_km, _ = astar(s, g, ADJ_KM)
241
  green_path, green_co2, _ = astar(s, g, ADJ_CO2, h_scale=0.10)
 
242
  # Compute cross-metrics
243
  fast_co2 = sum(_edge_w(fast_path[i], fast_path[i+1], ADJ_CO2) for i in range(len(fast_path)-1))
244
  green_km = sum(_edge_w(green_path[i], green_path[i+1], ADJ_KM) for i in range(len(green_path)-1))
 
245
  colors = ["#ef4444" if NODES[n][2] == "urban" else "#10b981" for n in NODES]
246
  nx.draw(G, pos, ax=ax, with_labels=True, node_color=colors,
247
  node_size=300, font_size=7, font_weight="bold",
 
268
  bbox_inches="tight", facecolor="#0d1117")
269
  plt.close()
270
 
 
271
  # ── 5. Main ─────────────────────────────────────────────────
272
  def main():
273
  print("="*70)
task5_forecasting.py CHANGED
@@ -2,8 +2,6 @@
2
  EcoCart Demand Forecasting Prototype
3
  Task 5 β€” Linear Regression vs Random Forest on synthetic daily sales.
4
 
5
- NCI MSCAI | Fundamentals of AI TABA 2026
6
-
7
  Run: python3 task5_forecasting.py
8
  Out: forecast.png, residuals.png, feature_importance.png
9
  """
@@ -14,7 +12,6 @@ import matplotlib.pyplot as plt
14
  from sklearn.linear_model import LinearRegression
15
  from sklearn.ensemble import RandomForestRegressor
16
  from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
17
-
18
  RNG = np.random.default_rng(42)
19
 
20
 
@@ -29,7 +26,6 @@ def generate_sales(days=730):
29
  promo = np.zeros(days)
30
  promo[RNG.choice(days, int(days * 0.06), replace=False)] = RNG.uniform(30, 70, int(days * 0.06))
31
  sales = np.clip(base + weekly + yearly + noise + promo, 0, None)
32
-
33
  return pd.DataFrame({
34
  "date": dates, "sales": sales,
35
  "dow": dates.dayofweek, "month": dates.month,
@@ -37,7 +33,6 @@ def generate_sales(days=730):
37
  "is_promo": (promo > 0).astype(int),
38
  })
39
 
40
-
41
  # ── 2. Features ────────────────────────────────────────────
42
  def add_features(df):
43
  out = df.copy()
 
2
  EcoCart Demand Forecasting Prototype
3
  Task 5 β€” Linear Regression vs Random Forest on synthetic daily sales.
4
 
 
 
5
  Run: python3 task5_forecasting.py
6
  Out: forecast.png, residuals.png, feature_importance.png
7
  """
 
12
  from sklearn.linear_model import LinearRegression
13
  from sklearn.ensemble import RandomForestRegressor
14
  from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
 
15
  RNG = np.random.default_rng(42)
16
 
17
 
 
26
  promo = np.zeros(days)
27
  promo[RNG.choice(days, int(days * 0.06), replace=False)] = RNG.uniform(30, 70, int(days * 0.06))
28
  sales = np.clip(base + weekly + yearly + noise + promo, 0, None)
 
29
  return pd.DataFrame({
30
  "date": dates, "sales": sales,
31
  "dow": dates.dayofweek, "month": dates.month,
 
33
  "is_promo": (promo > 0).astype(int),
34
  })
35
 
 
36
  # ── 2. Features ────────────────────────────────────────────
37
  def add_features(df):
38
  out = df.copy()