Spaces:

Esvanth
/

EcoCartAI

Sleeping

App Files Files Community

Esvanth commited on 8 days ago

Commit

efe8b7d

1 Parent(s): 512b5c0

Save changes before rebase

Browse files

Files changed (3) hide show

task2_segmentation.py +1 -13
task3_4_routing.py +1 -24
task5_forecasting.py +0 -5

task2_segmentation.py CHANGED Viewed

@@ -3,8 +3,6 @@ EcoCart Customer Segmentation — Bias Detection & Mitigation
 Task 2 — Demonstrates urban-rural bias in K-Means segmentation and
           applies reweighing to fix it.
-NCI MSCAI | Fundamentals of AI TABA 2026
 Run:  python3 task2_segmentation.py
 Out:  bias_before_after.png, disparate_impact.png
 """
@@ -17,8 +15,7 @@ from sklearn.preprocessing import StandardScaler
 RNG = np.random.default_rng(42)
-# ── 1. Generate biased customer data ────────────────────────
 # Urban customers have more data, higher frequency, higher spend — mimicking
 # a real scenario where the platform launched in cities first.
@@ -60,7 +57,6 @@ def segment(df, features=["freq", "spend", "recency"]):
     df["segment"] = df["cluster"].map(label_map)
     return df
 # ── 3. Bias metrics ────────────────────────────────────────
 def compute_fairness(df):
     urban = df[df.region == "urban"]
@@ -75,7 +71,6 @@ def compute_fairness(df):
         "fair": di >= 0.8,
     }
 # ── 4. Mitigation: reweigh + balanced re-sample ────────────
 def mitigate(df):
     """
@@ -126,24 +121,20 @@ def mitigate(df):
     target_rural_high = int(target_rate * n_rural)
     current_rural_high = ((balanced[rural_mask].segment == "High Value")).sum()
     need = target_rural_high - current_rural_high
     if need > 0:
         # Promote from Medium first, then Low Value
         candidates = balanced[rural_mask & (balanced.segment != "High Value")]
         if len(candidates) > 0:
             promote = candidates.nlargest(min(need, len(candidates)), "adj_spend").index
             balanced.loc[promote, "segment"] = "High Value"
     return balanced
 # ── 5. Plots ────────────────────────────────────────────────
 SEG_COLORS = {"High Value": "#10b981", "Medium": "#f59e0b", "Low Value": "#ef4444"}
 def plot_before_after(before_df, after_df, before_fair, after_fair):
     fig, axes = plt.subplots(1, 2, figsize=(14, 5.5))
     fig.patch.set_facecolor("#0d1117")
     for ax, df, fair, title in [
         (axes[0], before_df, before_fair, "BEFORE mitigation (biased)"),
         (axes[1], after_df,  after_fair,  "AFTER mitigation (reweighed + adjusted)"),
@@ -172,18 +163,15 @@ def plot_before_after(before_df, after_df, before_fair, after_fair):
                 bbox_inches="tight", facecolor="#0d1117")
     plt.close()
 def plot_di(before_fair, after_fair):
     fig, ax = plt.subplots(figsize=(8, 4))
     fig.patch.set_facecolor("#0d1117")
     ax.set_facecolor("#0d1117")
     cats = ["Urban → High", "Rural → High", "Disparate Impact"]
     before_vals = [before_fair["urban_high_pct"], before_fair["rural_high_pct"],
                    before_fair["disparate_impact"] * 100]
     after_vals  = [after_fair["urban_high_pct"],  after_fair["rural_high_pct"],
                    after_fair["disparate_impact"] * 100]
     x = range(len(cats))
     w = 0.35
     ax.bar([i - w/2 for i in x], before_vals, w, label="Before", color="#ef4444", alpha=0.85)

 Task 2 — Demonstrates urban-rural bias in K-Means segmentation and
           applies reweighing to fix it.
 Run:  python3 task2_segmentation.py
 Out:  bias_before_after.png, disparate_impact.png
 """
 RNG = np.random.default_rng(42)
+# 1. Generate biased customer data
 # Urban customers have more data, higher frequency, higher spend — mimicking
 # a real scenario where the platform launched in cities first.
     df["segment"] = df["cluster"].map(label_map)
     return df
 # ── 3. Bias metrics ────────────────────────────────────────
 def compute_fairness(df):
     urban = df[df.region == "urban"]
         "fair": di >= 0.8,
     }
 # ── 4. Mitigation: reweigh + balanced re-sample ────────────
 def mitigate(df):
     """
     target_rural_high = int(target_rate * n_rural)
     current_rural_high = ((balanced[rural_mask].segment == "High Value")).sum()
     need = target_rural_high - current_rural_high
     if need > 0:
         # Promote from Medium first, then Low Value
         candidates = balanced[rural_mask & (balanced.segment != "High Value")]
         if len(candidates) > 0:
             promote = candidates.nlargest(min(need, len(candidates)), "adj_spend").index
             balanced.loc[promote, "segment"] = "High Value"
     return balanced
 # ── 5. Plots ────────────────────────────────────────────────
 SEG_COLORS = {"High Value": "#10b981", "Medium": "#f59e0b", "Low Value": "#ef4444"}
 def plot_before_after(before_df, after_df, before_fair, after_fair):
     fig, axes = plt.subplots(1, 2, figsize=(14, 5.5))
     fig.patch.set_facecolor("#0d1117")
     for ax, df, fair, title in [
         (axes[0], before_df, before_fair, "BEFORE mitigation (biased)"),
         (axes[1], after_df,  after_fair,  "AFTER mitigation (reweighed + adjusted)"),
                 bbox_inches="tight", facecolor="#0d1117")
     plt.close()
 def plot_di(before_fair, after_fair):
     fig, ax = plt.subplots(figsize=(8, 4))
     fig.patch.set_facecolor("#0d1117")
     ax.set_facecolor("#0d1117")
     cats = ["Urban → High", "Rural → High", "Disparate Impact"]
     before_vals = [before_fair["urban_high_pct"], before_fair["rural_high_pct"],
                    before_fair["disparate_impact"] * 100]
     after_vals  = [after_fair["urban_high_pct"],  after_fair["rural_high_pct"],
                    after_fair["disparate_impact"] * 100]
     x = range(len(cats))
     w = 0.35
     ax.bar([i - w/2 for i in x], before_vals, w, label="Before", color="#ef4444", alpha=0.85)

task3_4_routing.py CHANGED Viewed

@@ -2,9 +2,6 @@
 EcoCart Route Optimisation Prototype
 Tasks 3 & 4 — BFS, DFS, A*, IDA* on a weighted delivery network
               + Green Routing mode (CO2-weighted edges for sustainability)
-NCI MSCAI | Fundamentals of AI TABA 2026
 Run:  python3 task3_4_routing.py
 Out:  network_map.png, algo_comparison.png, green_vs_fast.png
 """
@@ -28,10 +25,8 @@ NODES = {
     "R7":(6.5,6.0,"rural"),"R8":(9.0,7.0,"rural"),"R9":(11.0,6.0,"rural"),
     "R10":(8.0,5.5,"rural"),
 }
 def _dist(a, b):
     return math.hypot(NODES[a][0]-NODES[b][0], NODES[a][1]-NODES[b][1])
 _PAIRS = [
     ("U1","U2"),("U2","U3"),("U1","U4"),("U2","U4"),("U2","U5"),
     ("U3","U6"),("U4","U5"),("U5","U6"),("U4","U7"),("U5","U8"),
@@ -41,10 +36,8 @@ _PAIRS = [
     ("R7","R8"),("R8","R9"),("R6","R9"),("R8","R10"),("R5","R8"),
     ("U3","R1"),("U10","R4"),("U6","R1"),("U9","R7"),
 ]
-# Road distance ≈ 1.15× straight-line
 EDGES = [(a, b, round(_dist(a,b)*1.15, 2)) for a, b in _PAIRS]
 # CO2 cost per edge: urban roads have traffic → higher emissions per km
 # Rural roads: 0.12 kg CO2/km;  Urban roads: 0.21 kg CO2/km
 def _co2(a, b, km):
@@ -53,7 +46,6 @@ def _co2(a, b, km):
     return round(km * rate, 3)
 CO2_EDGES = [(a, b, _co2(a, b, w)) for a, b, w in EDGES]
 ADJ_KM = {n: [] for n in NODES}
 ADJ_CO2 = {n: [] for n in NODES}
 for i, (a, b, w) in enumerate(EDGES):
@@ -66,7 +58,6 @@ for i, (a, b, w) in enumerate(EDGES):
 # ── 2. Algorithms ───────────────────────────────────────────
 def heuristic(n, goal, scale=1.0):
     return _dist(n, goal) * scale
 def bfs(start, goal, adj=ADJ_KM):
     expanded = 0
     q = deque([(start, [start])])
@@ -82,7 +73,6 @@ def bfs(start, goal, adj=ADJ_KM):
                 seen.add(nb)
                 q.append((nb, path + [nb]))
     return None, math.inf, expanded
 def dfs(start, goal, adj=ADJ_KM, depth_limit=50):
     expanded = 0
     stack = [(start, [start])]
@@ -100,7 +90,6 @@ def dfs(start, goal, adj=ADJ_KM, depth_limit=50):
                 seen.add(nb)
                 stack.append((nb, path + [nb]))
     return None, math.inf, expanded
 def astar(start, goal, adj=ADJ_KM, h_scale=1.0):
     expanded, counter = 0, 0
     heap = [(heuristic(start, goal, h_scale), 0.0, counter, start, [start])]
@@ -143,7 +132,6 @@ def ida_star(start, goal, adj=ADJ_KM, h_scale=1.0):
             path.pop()
             visited.remove(nb)
         return None, nxt
     bound = heuristic(start, goal, h_scale)
     while True:
         r, t = _dfs(start, 0.0, bound, [start], {start})
@@ -152,7 +140,6 @@ def ida_star(start, goal, adj=ADJ_KM, h_scale=1.0):
         if t == math.inf:
             return None, math.inf, expanded[0]
         bound = t
 def _edge_w(a, b, adj):
     for nb, w in adj[a]:
         if nb == b:
@@ -179,7 +166,6 @@ def benchmark(algo, start, goal, adj=ADJ_KM, repeats=20):
         "cost": cost,
         "path": path,
     }
 OD_URBAN = [("U1","U10"),("U7","U6"),("U2","U9"),("U1","U9"),("U3","U8")]
 OD_RURAL = [("R1","R9"),("R2","R8"),("R3","R10"),("R1","R6"),("R4","R9")]
@@ -192,7 +178,6 @@ def plot_network():
         G.add_edge(a, b, weight=w)
     pos = {n: (NODES[n][0], NODES[n][1]) for n in NODES}
     colors = ["#ef4444" if NODES[n][2] == "urban" else "#10b981" for n in NODES]
     fig, ax = plt.subplots(figsize=(13, 6))
     ax.set_facecolor("#0d1117")
     fig.patch.set_facecolor("#0d1117")
@@ -213,7 +198,6 @@ def plot_network():
                 facecolor="#0d1117")
     plt.close()
 def plot_comparison(results):
     metrics = [("Runtime (ms)", "ms"), ("Nodes expanded", "expanded"), ("Peak memory (KB)", "kb")]
     fig, axes = plt.subplots(1, 3, figsize=(15, 4.5))
@@ -240,30 +224,24 @@ def plot_comparison(results):
     plt.savefig("output/algo_comparison.png", dpi=150,
                 bbox_inches="tight", facecolor="#0d1117")
     plt.close()
 def plot_green_vs_fast():
     """Compare fastest route (A* on km) vs greenest route (A* on CO2)."""
     pairs = [("U1", "R9"), ("U7", "R6"), ("R1", "U10")]
     fig, axes = plt.subplots(1, 3, figsize=(15, 5))
     fig.patch.set_facecolor("#0d1117")
     G = nx.Graph()
     for n, (x, y, _) in NODES.items():
         G.add_node(n, pos=(x, y))
     for a, b, w in EDGES:
         G.add_edge(a, b)
     pos = {n: (NODES[n][0], NODES[n][1]) for n in NODES}
     for ax, (s, g) in zip(axes, pairs):
         ax.set_facecolor("#0d1117")
         fast_path, fast_km, _ = astar(s, g, ADJ_KM)
         green_path, green_co2, _ = astar(s, g, ADJ_CO2, h_scale=0.10)
         # Compute cross-metrics
         fast_co2 = sum(_edge_w(fast_path[i], fast_path[i+1], ADJ_CO2) for i in range(len(fast_path)-1))
         green_km = sum(_edge_w(green_path[i], green_path[i+1], ADJ_KM) for i in range(len(green_path)-1))
         colors = ["#ef4444" if NODES[n][2] == "urban" else "#10b981" for n in NODES]
         nx.draw(G, pos, ax=ax, with_labels=True, node_color=colors,
                 node_size=300, font_size=7, font_weight="bold",
@@ -290,7 +268,6 @@ def plot_green_vs_fast():
                 bbox_inches="tight", facecolor="#0d1117")
     plt.close()
 # ── 5. Main ─────────────────────────────────────────────────
 def main():
     print("="*70)

 EcoCart Route Optimisation Prototype
 Tasks 3 & 4 — BFS, DFS, A*, IDA* on a weighted delivery network
               + Green Routing mode (CO2-weighted edges for sustainability)
 Run:  python3 task3_4_routing.py
 Out:  network_map.png, algo_comparison.png, green_vs_fast.png
 """
     "R7":(6.5,6.0,"rural"),"R8":(9.0,7.0,"rural"),"R9":(11.0,6.0,"rural"),
     "R10":(8.0,5.5,"rural"),
 }
 def _dist(a, b):
     return math.hypot(NODES[a][0]-NODES[b][0], NODES[a][1]-NODES[b][1])
 _PAIRS = [
     ("U1","U2"),("U2","U3"),("U1","U4"),("U2","U4"),("U2","U5"),
     ("U3","U6"),("U4","U5"),("U5","U6"),("U4","U7"),("U5","U8"),
     ("R7","R8"),("R8","R9"),("R6","R9"),("R8","R10"),("R5","R8"),
     ("U3","R1"),("U10","R4"),("U6","R1"),("U9","R7"),
 ]
+# Road distance = 1.15× straight-line
 EDGES = [(a, b, round(_dist(a,b)*1.15, 2)) for a, b in _PAIRS]
 # CO2 cost per edge: urban roads have traffic → higher emissions per km
 # Rural roads: 0.12 kg CO2/km;  Urban roads: 0.21 kg CO2/km
 def _co2(a, b, km):
     return round(km * rate, 3)
 CO2_EDGES = [(a, b, _co2(a, b, w)) for a, b, w in EDGES]
 ADJ_KM = {n: [] for n in NODES}
 ADJ_CO2 = {n: [] for n in NODES}
 for i, (a, b, w) in enumerate(EDGES):
 # ── 2. Algorithms ───────────────────────────────────────────
 def heuristic(n, goal, scale=1.0):
     return _dist(n, goal) * scale
 def bfs(start, goal, adj=ADJ_KM):
     expanded = 0
     q = deque([(start, [start])])
                 seen.add(nb)
                 q.append((nb, path + [nb]))
     return None, math.inf, expanded
 def dfs(start, goal, adj=ADJ_KM, depth_limit=50):
     expanded = 0
     stack = [(start, [start])]
                 seen.add(nb)
                 stack.append((nb, path + [nb]))
     return None, math.inf, expanded
 def astar(start, goal, adj=ADJ_KM, h_scale=1.0):
     expanded, counter = 0, 0
     heap = [(heuristic(start, goal, h_scale), 0.0, counter, start, [start])]
             path.pop()
             visited.remove(nb)
         return None, nxt
     bound = heuristic(start, goal, h_scale)
     while True:
         r, t = _dfs(start, 0.0, bound, [start], {start})
         if t == math.inf:
             return None, math.inf, expanded[0]
         bound = t
 def _edge_w(a, b, adj):
     for nb, w in adj[a]:
         if nb == b:
         "cost": cost,
         "path": path,
     }
 OD_URBAN = [("U1","U10"),("U7","U6"),("U2","U9"),("U1","U9"),("U3","U8")]
 OD_RURAL = [("R1","R9"),("R2","R8"),("R3","R10"),("R1","R6"),("R4","R9")]
         G.add_edge(a, b, weight=w)
     pos = {n: (NODES[n][0], NODES[n][1]) for n in NODES}
     colors = ["#ef4444" if NODES[n][2] == "urban" else "#10b981" for n in NODES]
     fig, ax = plt.subplots(figsize=(13, 6))
     ax.set_facecolor("#0d1117")
     fig.patch.set_facecolor("#0d1117")
                 facecolor="#0d1117")
     plt.close()
 def plot_comparison(results):
     metrics = [("Runtime (ms)", "ms"), ("Nodes expanded", "expanded"), ("Peak memory (KB)", "kb")]
     fig, axes = plt.subplots(1, 3, figsize=(15, 4.5))
     plt.savefig("output/algo_comparison.png", dpi=150,
                 bbox_inches="tight", facecolor="#0d1117")
     plt.close()
 def plot_green_vs_fast():
     """Compare fastest route (A* on km) vs greenest route (A* on CO2)."""
     pairs = [("U1", "R9"), ("U7", "R6"), ("R1", "U10")]
     fig, axes = plt.subplots(1, 3, figsize=(15, 5))
     fig.patch.set_facecolor("#0d1117")
     G = nx.Graph()
     for n, (x, y, _) in NODES.items():
         G.add_node(n, pos=(x, y))
     for a, b, w in EDGES:
         G.add_edge(a, b)
     pos = {n: (NODES[n][0], NODES[n][1]) for n in NODES}
     for ax, (s, g) in zip(axes, pairs):
         ax.set_facecolor("#0d1117")
         fast_path, fast_km, _ = astar(s, g, ADJ_KM)
         green_path, green_co2, _ = astar(s, g, ADJ_CO2, h_scale=0.10)
         # Compute cross-metrics
         fast_co2 = sum(_edge_w(fast_path[i], fast_path[i+1], ADJ_CO2) for i in range(len(fast_path)-1))
         green_km = sum(_edge_w(green_path[i], green_path[i+1], ADJ_KM) for i in range(len(green_path)-1))
         colors = ["#ef4444" if NODES[n][2] == "urban" else "#10b981" for n in NODES]
         nx.draw(G, pos, ax=ax, with_labels=True, node_color=colors,
                 node_size=300, font_size=7, font_weight="bold",
                 bbox_inches="tight", facecolor="#0d1117")
     plt.close()
 # ── 5. Main ─────────────────────────────────────────────────
 def main():
     print("="*70)

task5_forecasting.py CHANGED Viewed

@@ -2,8 +2,6 @@
 EcoCart Demand Forecasting Prototype
 Task 5 — Linear Regression vs Random Forest on synthetic daily sales.
-NCI MSCAI | Fundamentals of AI TABA 2026
 Run:  python3 task5_forecasting.py
 Out:  forecast.png, residuals.png, feature_importance.png
 """
@@ -14,7 +12,6 @@ import matplotlib.pyplot as plt
 from sklearn.linear_model import LinearRegression
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
 RNG = np.random.default_rng(42)
@@ -29,7 +26,6 @@ def generate_sales(days=730):
     promo  = np.zeros(days)
     promo[RNG.choice(days, int(days * 0.06), replace=False)] = RNG.uniform(30, 70, int(days * 0.06))
     sales = np.clip(base + weekly + yearly + noise + promo, 0, None)
     return pd.DataFrame({
         "date": dates, "sales": sales,
         "dow": dates.dayofweek, "month": dates.month,
@@ -37,7 +33,6 @@ def generate_sales(days=730):
         "is_promo": (promo > 0).astype(int),
     })
 # ── 2. Features ────────────────────────────────────────────
 def add_features(df):
     out = df.copy()

 EcoCart Demand Forecasting Prototype
 Task 5 — Linear Regression vs Random Forest on synthetic daily sales.
 Run:  python3 task5_forecasting.py
 Out:  forecast.png, residuals.png, feature_importance.png
 """
 from sklearn.linear_model import LinearRegression
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
 RNG = np.random.default_rng(42)
     promo  = np.zeros(days)
     promo[RNG.choice(days, int(days * 0.06), replace=False)] = RNG.uniform(30, 70, int(days * 0.06))
     sales = np.clip(base + weekly + yearly + noise + promo, 0, None)
     return pd.DataFrame({
         "date": dates, "sales": sales,
         "dow": dates.dayofweek, "month": dates.month,
         "is_promo": (promo > 0).astype(int),
     })
 # ── 2. Features ────────────────────────────────────────────
 def add_features(df):
     out = df.copy()