Spaces:

rohan-debayan
/

sf-taxi-equity-dashboard-plotlydash

Sleeping

App Files Files Community

Debayan Mandal commited on 25 days ago

Commit

acda8b7

1 Parent(s): 139c8bc

Initial Dashboard Upload

Browse files

Files changed (8) hide show

.gitignore +4 -0
Dockerfile +24 -0
README.md +57 -7
app.py +790 -0
assets/styles.css +42 -0
dashboard_helpers.py +471 -0
data_pipeline.py +274 -0
requirements.txt +10 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+*.db
+*.csv
+__pycache__/
+*.pyc

Dockerfile ADDED Viewed

	@@ -0,0 +1,24 @@

+FROM python:3.11-slim
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libgdal-dev \
+    gdal-bin \
+    libgeos-dev \
+    libproj-dev \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY data_pipeline.py .
+COPY dashboard_helpers.py .
+COPY app.py .
+COPY assets/ assets/
+RUN python data_pipeline.py
+EXPOSE 7860
+CMD ["gunicorn", "app:server", "--bind", "0.0.0.0:7860", "--workers", "2", "--timeout", "120"]

README.md CHANGED Viewed

@@ -1,12 +1,62 @@
 ---
-title: Sf Taxi Equity Dashboard Plotlydash
-emoji: 🦀
 colorFrom: blue
-colorTo: green
 sdk: docker
-pinned: false
-license: mit
-short_description: A Plotly Dash Web App for SF Taxi Neighborhoods
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: SF Taxi Mobility Equity Dashboard
+emoji: 🚕
 colorFrom: blue
+colorTo: yellow
 sdk: docker
+app_port: 7860
+pinned: True
 ---
+# SF Taxi Mobility Equity Dashboard
+An interactive Plotly Dash dashboard analyzing spatial equity in San Francisco taxi services. Compares **Street-Hail** vs **App-Based** trip patterns across SF's 41 Analysis Neighborhoods and evaluates service representation relative to demographic baselines using Representative Ratios.
+_Debayan Mandal_
+## Features
+- **Interactive choropleth maps** — click any neighborhood to cross-filter all views
+- **Representative Ratio visualizations** — bar chart and heatmap showing the central equity metric (overrepresentation vs underrepresentation by demographic group)
+- **Neighborhood detail panel** — click to see full profile: trips, demographics, deviations, and trends
+- **Monthly comparison** — side-by-side difference maps revealing temporal trends
+- **Dynamic narrative** — auto-generated equity insights that update with your selections
+- **CSV data export** — download filtered trip + demographic data
+- **Publication-ready image export** — high-resolution PNG via the camera icon on each map
+- **Guided tour** — step-by-step walkthrough for non-technical audiences
+- **Colorblind-safe palettes** — Viridis, Cividis, and RdBu scales
+## Data Sources
+- **SF Taxi Trips**: [DataSF Taxi Trips (m8hk-2ipk)](https://data.sfgov.org/Transportation/Taxi-Trips/m8hk-2ipk/)
+- **SF Analysis Neighborhoods**: [DataSF Analysis Neighborhoods (j2bu-swwd)](https://data.sfgov.org/resource/j2bu-swwd.geojson)
+- **Census Demographics**: [ACS 5-Year 2022, Table B02001](https://api.census.gov/data/2022/acs/acs5.html), Block Groups for SF County (FIPS 06075)
+- **Block Group Geometries**: [TIGER/Line 2022](https://www2.census.gov/geo/tiger/TIGER2022/BG/tl_2022_06_bg.zip)
+## Local Setup
+```bash
+pip install -r requirements.txt
+python data_pipeline.py      # builds sf_dashboard.db
+python app.py                # opens dashboard at http://localhost:7860
+```
+## Docker
+```bash
+docker build -t sf-taxi-dashboard .
+docker run -p 7860:7860 sf-taxi-dashboard
+```
+Then open http://localhost:7860.
+## Architecture
+| File | Purpose |
+|------|---------|
+| `data_pipeline.py` | Downloads taxi trips, neighborhoods, and census data; builds `sf_dashboard.db` |
+| `dashboard_helpers.py` | Plotly figure builders and data query helpers |
+| `app.py` | Plotly Dash application layout and callbacks |
+| `assets/styles.css` | Custom CSS |
+| `requirements.txt` | Python dependencies |
+| `Dockerfile` | Containerization for Hugging Face Spaces |

app.py ADDED Viewed

	@@ -0,0 +1,790 @@

+import dash
+from dash import dcc, html, Input, Output, State, callback, ctx, dash_table
+import dash_bootstrap_components as dbc
+import duckdb
+from dashboard_helpers import (
+    get_neighborhood_geojson,
+    get_all_neighborhoods,
+    build_trip_choropleth,
+    build_demo_choropleth,
+    build_rr_bar_chart,
+    build_rr_heatmap,
+    build_neighborhood_profile,
+    build_comparison_map,
+    get_trip_stats_df,
+    get_download_csv,
+    _GRAPH_CONFIG,
+)
+DB_PATH = "sf_dashboard.db"
+def get_con():
+    c = duckdb.connect(DB_PATH, read_only=True)
+    c.install_extension("spatial")
+    c.load_extension("spatial")
+    return c
+_init_con = get_con()
+GEOJSON = get_neighborhood_geojson(_init_con)
+NEIGHBORHOODS = get_all_neighborhoods(_init_con)
+MONTHS = _init_con.sql(
+    "SELECT DISTINCT month FROM trip_counts_pu ORDER BY month"
+).df()["month"].tolist()
+baseline_df = _init_con.sql("SELECT * FROM city_baselines").df()
+BASELINE_WHITE = float(baseline_df["baseline_white_pct"].iloc[0])
+BASELINE_ASIAN = float(baseline_df["baseline_asian_pct"].iloc[0])
+_init_con.close()
+app = dash.Dash(
+    __name__,
+    external_stylesheets=[dbc.themes.DARKLY],
+    meta_tags=[
+        {"name": "viewport", "content": "width=device-width, initial-scale=1"}
+    ],
+    title="SF Taxi Mobility Equity Dashboard",
+)
+server = app.server
+sidebar = dbc.Card(
+    [
+        html.H4("Controls", className="text-center mb-3"),
+        html.Hr(),
+        dbc.Label("Month"),
+        dcc.Dropdown(
+            id="month-selector",
+            options=[{"label": m, "value": m} for m in MONTHS],
+            value="Jan2024",
+            clearable=False,
+            className="mb-3",
+        ),
+        dbc.Label("Hail Type"),
+        dbc.Checklist(
+            id="hail-type-filter",
+            options=[
+                {"label": " Street-Hail", "value": "Street"},
+                {"label": " App-Based", "value": "App"},
+            ],
+            value=["Street", "App"],
+            className="mb-3",
+        ),
+        html.Hr(),
+        html.Div(id="selected-nhood-display", className="mb-3"),
+        dbc.Button(
+            "Reset Selection",
+            id="reset-selection-btn",
+            color="secondary",
+            size="sm",
+            className="w-100 mb-2",
+        ),
+        dbc.Button(
+            "Download CSV",
+            id="download-btn",
+            color="info",
+            size="sm",
+            className="w-100 mb-2",
+        ),
+        dbc.Button(
+            "Download GeoJSON",
+            id="download-geojson-btn",
+            color="success",
+            size="sm",
+            className="w-100 mb-2",
+        ),
+        dcc.Download(id="csv-download"),
+        dcc.Download(id="geojson-download"),
+    ],
+    body=True,
+    id="sidebar",
+    className="bg-dark",
+    style={
+        "position": "sticky",
+        "top": "10px",
+        "height": "calc(100vh - 20px)",
+        "overflowY": "auto",
+    },
+)
+_STAT_CARD_STYLE = {"height": "100%"}
+insights_banner = dbc.Row(
+    [
+        dbc.Col(
+            dbc.Card(
+                [
+                    html.H3(id="stat-total-trips", className="text-center mb-0"),
+                    html.P("Total Trips", className="text-center text-muted small"),
+                ],
+                body=True,
+                className="bg-dark border-secondary",
+                style=_STAT_CARD_STYLE,
+            ),
+            md=3,
+        ),
+        dbc.Col(
+            dbc.Card(
+                [
+                    html.H3(id="stat-top-nhood", className="text-center mb-0",
+                             style={"fontSize": "1.6rem"}),
+                    html.P("Most Served", className="text-center text-muted small"),
+                ],
+                body=True,
+                className="bg-dark border-secondary",
+                style=_STAT_CARD_STYLE,
+            ),
+            md=3,
+        ),
+        dbc.Col(
+            dbc.Card(
+                [
+                    html.H3(id="stat-rr-white", className="text-center mb-0"),
+                    html.P("White RR", className="text-center text-muted small"),
+                ],
+                body=True,
+                className="bg-dark border-secondary",
+                style=_STAT_CARD_STYLE,
+            ),
+            md=3,
+        ),
+        dbc.Col(
+            dbc.Card(
+                [
+                    html.H3(id="stat-rr-asian", className="text-center mb-0"),
+                    html.P("Asian RR", className="text-center text-muted small"),
+                ],
+                body=True,
+                className="bg-dark border-secondary",
+                style=_STAT_CARD_STYLE,
+            ),
+            md=3,
+        ),
+    ],
+    id="insights-banner",
+    className="mb-2 g-2",
+)
+narrative_row = dbc.Row(
+    dbc.Col(
+        html.P(
+            id="narrative-text",
+            className="text-center fst-italic",
+            style={"color": "#adb5bd", "fontSize": "0.95rem"},
+        ),
+    ),
+    className="mb-3",
+)
+trip_maps = dbc.Row(
+    [
+        dbc.Col(
+            dcc.Graph(id="street-pu-map", config=_GRAPH_CONFIG),
+            md=6,
+        ),
+        dbc.Col(
+            dcc.Graph(id="app-pu-map", config=_GRAPH_CONFIG),
+            md=6,
+        ),
+        dbc.Col(
+            dcc.Graph(id="street-do-map", config=_GRAPH_CONFIG),
+            md=6,
+            className="mt-2",
+        ),
+        dbc.Col(
+            dcc.Graph(id="app-do-map", config=_GRAPH_CONFIG),
+            md=6,
+            className="mt-2",
+        ),
+    ],
+    className="mb-3",
+)
+demographics_tab = dbc.Row(
+    [
+        dbc.Col(dcc.Graph(id="white-deviation-map", config=_GRAPH_CONFIG), md=6),
+        dbc.Col(dcc.Graph(id="asian-deviation-map", config=_GRAPH_CONFIG), md=6),
+    ]
+)
+rr_tab = html.Div(
+    [
+        dbc.Row(
+            [
+                dbc.Col(dcc.Graph(id="rr-bar-chart", config=_GRAPH_CONFIG), md=7),
+                dbc.Col(dcc.Graph(id="rr-heatmap", config=_GRAPH_CONFIG), md=5),
+            ]
+        ),
+    ],
+    id="rr-section",
+)
+comparison_tab = html.Div(
+    [
+        dbc.Row(
+            [
+                dbc.Col(
+                    [
+                        dbc.Label("Month A"),
+                        dcc.Dropdown(
+                            id="comp-month-a",
+                            options=[{"label": m, "value": m} for m in MONTHS],
+                            value="Jan2024",
+                            clearable=False,
+                        ),
+                    ],
+                    md=3,
+                ),
+                dbc.Col(
+                    [
+                        dbc.Label("Month B"),
+                        dcc.Dropdown(
+                            id="comp-month-b",
+                            options=[{"label": m, "value": m} for m in MONTHS],
+                            value="Mar2024",
+                            clearable=False,
+                        ),
+                    ],
+                    md=3,
+                ),
+                dbc.Col(
+                    [
+                        dbc.Label("Hail Type"),
+                        dcc.Dropdown(
+                            id="comp-hail",
+                            options=[
+                                {"label": "Street-Hail", "value": "Street"},
+                                {"label": "App-Based", "value": "App"},
+                            ],
+                            value="Street",
+                            clearable=False,
+                        ),
+                    ],
+                    md=3,
+                ),
+                dbc.Col(
+                    [
+                        dbc.Label("Metric"),
+                        dcc.Dropdown(
+                            id="comp-metric",
+                            options=[
+                                {"label": "Pickups", "value": "pu"},
+                                {"label": "Drop-offs", "value": "do"},
+                            ],
+                            value="pu",
+                            clearable=False,
+                        ),
+                    ],
+                    md=3,
+                ),
+            ],
+            className="mb-3",
+        ),
+        dbc.Row(dbc.Col(dcc.Graph(id="comparison-map", config=_GRAPH_CONFIG))),
+    ]
+)
+top10_section = dbc.Row(
+    [
+        dbc.Col(
+            dbc.Card(
+                [
+                    html.H5(
+                        id="street-stats-title",
+                        className="text-center",
+                    ),
+                    dbc.Row(
+                        [
+                            dbc.Col(
+                                [
+                                    html.H6("Pickups", className="text-center text-muted"),
+                                    html.Div(id="street-pu-table"),
+                                ],
+                                md=6,
+                            ),
+                            dbc.Col(
+                                [
+                                    html.H6("Drop-offs", className="text-center text-muted"),
+                                    html.Div(id="street-do-table"),
+                                ],
+                                md=6,
+                            ),
+                        ]
+                    ),
+                ],
+                body=True,
+                className="bg-dark border-secondary",
+            ),
+            md=6,
+        ),
+        dbc.Col(
+            dbc.Card(
+                [
+                    html.H5(
+                        id="app-stats-title",
+                        className="text-center",
+                    ),
+                    dbc.Row(
+                        [
+                            dbc.Col(
+                                [
+                                    html.H6("Pickups", className="text-center text-muted"),
+                                    html.Div(id="app-pu-table"),
+                                ],
+                                md=6,
+                            ),
+                            dbc.Col(
+                                [
+                                    html.H6("Drop-offs", className="text-center text-muted"),
+                                    html.Div(id="app-do-table"),
+                                ],
+                                md=6,
+                            ),
+                        ]
+                    ),
+                ],
+                body=True,
+                className="bg-dark border-secondary",
+            ),
+            md=6,
+        ),
+    ],
+    className="mt-3",
+)
+analysis_tabs = dbc.Tabs(
+    [
+        dbc.Tab(demographics_tab, label="Demographics", tab_id="tab-demo"),
+        dbc.Tab(rr_tab, label="Representative Ratios", tab_id="tab-rr"),
+        dbc.Tab(comparison_tab, label="Monthly Comparison", tab_id="tab-comp"),
+    ],
+    id="analysis-tabs",
+    active_tab="tab-demo",
+    className="mb-3",
+)
+nhood_offcanvas = dbc.Offcanvas(
+    html.Div(id="nhood-detail-content"),
+    id="nhood-offcanvas",
+    title="Neighborhood Profile",
+    placement="end",
+    is_open=False,
+    style={"width": "400px", "backgroundColor": "#303030"},
+)
+app.layout = dbc.Container(
+    [
+        dcc.Store(id="selected-neighborhood", data=None),
+        nhood_offcanvas,
+        # Header
+        dbc.Row(
+            dbc.Col(
+                [
+                    html.H2(
+                        "SF Taxi Mobility Equity Dashboard",
+                        className="text-center mt-3 mb-1",
+                    ),
+                    html.P(
+                        "Analyzing whether Street-Hail and App-Based taxi services "
+                        "in San Francisco are equitably distributed across "
+                        "neighborhoods with different demographic compositions.",
+                        className="text-center text-muted mb-3",
+                        style={"maxWidth": "700px", "margin": "0 auto"},
+                    ),
+                ]
+            )
+        ),
+        # Body: sidebar + main
+        dbc.Row(
+            [
+                dbc.Col(sidebar, md=2, className="pe-1"),
+                dbc.Col(
+                    [
+                        insights_banner,
+                        narrative_row,
+                        html.H5("Trip Distribution Maps", className="mb-2"),
+                        trip_maps,
+                        html.H5("Analysis", className="mb-2"),
+                        analysis_tabs,
+                        html.H5("Top 10 Neighborhoods", className="mb-2"),
+                        top10_section,
+                    ],
+                    md=10,
+                ),
+            ]
+        ),
+        # Footer
+        dbc.Row(
+            dbc.Col(
+                html.P(
+                    [
+                        "Data: ",
+                        html.A("DataSF Taxi Trips", href="https://data.sfgov.org/Transportation/Taxi-Trips/m8hk-2ipk/", target="_blank"),
+                        " | ",
+                        html.A("ACS 2022", href="https://api.census.gov/data/2022/acs/acs5.html", target="_blank"),
+                        " | Debayan Mandal",
+                    ],
+                    className="text-center text-muted small mt-4 mb-3",
+                )
+            )
+        ),
+    ],
+    fluid=True,
+)
+@callback(
+    Output("street-pu-map", "figure"),
+    Output("app-pu-map", "figure"),
+    Output("street-do-map", "figure"),
+    Output("app-do-map", "figure"),
+    Input("month-selector", "value"),
+    Input("hail-type-filter", "value"),
+    Input("selected-neighborhood", "data"),
+)
+def update_trip_maps(month, hail_types, sel_nhood):
+    hail_types = hail_types or ["Street", "App"]
+    con = get_con()
+    figs = []
+    for ht, metric in [
+        ("Street", "pu"),
+        ("App", "pu"),
+        ("Street", "do"),
+        ("App", "do"),
+    ]:
+        if ht in hail_types:
+            figs.append(
+                build_trip_choropleth(con, GEOJSON, ht, month, metric, sel_nhood)
+            )
+        else:
+            import plotly.graph_objects as go
+            fig = go.Figure()
+            fig.update_layout(
+                title=f"{ht} {'Pickups' if metric == 'pu' else 'Drop-offs'} (filtered out)",
+                paper_bgcolor="rgba(0,0,0,0)",
+                font_color="#e0e0e0",
+                height=420,
+            )
+            figs.append(fig)
+    return figs[0], figs[1], figs[2], figs[3]
+@callback(
+    Output("selected-neighborhood", "data"),
+    Input("street-pu-map", "clickData"),
+    Input("app-pu-map", "clickData"),
+    Input("street-do-map", "clickData"),
+    Input("app-do-map", "clickData"),
+    Input("white-deviation-map", "clickData"),
+    Input("asian-deviation-map", "clickData"),
+    Input("reset-selection-btn", "n_clicks"),
+    prevent_initial_call=True,
+)
+def sync_map_selection(c1, c2, c3, c4, c5, c6, reset):
+    trigger = ctx.triggered_id
+    if trigger == "reset-selection-btn":
+        return None
+    for click in [c1, c2, c3, c4, c5, c6]:
+        if click and trigger in [
+            "street-pu-map", "app-pu-map", "street-do-map", "app-do-map",
+            "white-deviation-map", "asian-deviation-map",
+        ]:
+            try:
+                return click["points"][0]["customdata"][0]
+            except (KeyError, IndexError, TypeError):
+                try:
+                    return click["points"][0]["location"]
+                except (KeyError, IndexError, TypeError):
+                    pass
+    return dash.no_update
+@callback(
+    Output("selected-nhood-display", "children"),
+    Input("selected-neighborhood", "data"),
+)
+def display_selected_nhood(nhood):
+    if nhood:
+        return dbc.Alert(
+            [html.Strong("Selected: "), nhood],
+            color="info",
+            className="py-2 mb-0",
+        )
+    return html.P("Click a neighborhood on any map", className="text-muted small")
+@callback(
+    Output("stat-total-trips", "children"),
+    Output("stat-top-nhood", "children"),
+    Output("stat-rr-white", "children"),
+    Output("stat-rr-asian", "children"),
+    Output("narrative-text", "children"),
+    Input("month-selector", "value"),
+    Input("hail-type-filter", "value"),
+)
+def update_insights(month, hail_types):
+    hail_types = hail_types or ["Street", "App"]
+    ht_filter = ", ".join(f"'{h}'" for h in hail_types)
+    con = get_con()
+    total = con.sql(f"""
+        SELECT SUM(trips_pu) AS total
+        FROM trip_counts_pu
+        WHERE month = '{month}' AND hail_type IN ({ht_filter})
+    """).df()["total"].iloc[0]
+    total = int(total) if total else 0
+    top = con.sql(f"""
+        SELECT nhood, SUM(trips_pu) AS t
+        FROM trip_counts_pu
+        WHERE month = '{month}' AND hail_type IN ({ht_filter})
+        GROUP BY nhood ORDER BY t DESC LIMIT 1
+    """).df()
+    top_nhood = top["nhood"].iloc[0] if not top.empty else "N/A"
+    rr = con.sql(f"""
+        SELECT AVG(RR_white_PU) AS rr_w, AVG(RR_asian_PU) AS rr_a
+        FROM representative_ratios
+        WHERE month = '{month}' AND hail_type IN ({ht_filter})
+    """).df()
+    rr_w = round(float(rr["rr_w"].iloc[0]), 2) if not rr.empty and rr["rr_w"].iloc[0] else 0
+    rr_a = round(float(rr["rr_a"].iloc[0]), 2) if not rr.empty and rr["rr_a"].iloc[0] else 0
+    # Dynamic narrative
+    parts = [f"In {month}, {total:,} taxi trips were recorded across SF."]
+    parts.append(f"{top_nhood} was the most served neighborhood.")
+    if rr_w > 1.0:
+        parts.append(
+            f"White-majority neighborhoods received {rr_w:.2f}x their expected "
+            f"share of service,"
+        )
+    if rr_a < 1.0:
+        parts.append(
+            f"and Asian-majority neighborhoods received {rr_a:.2f}x."
+        )
+    narrative = " ".join(parts)
+    return f"{total:,}", top_nhood, f"{rr_w:.2f}x", f"{rr_a:.2f}x", narrative
+@callback(
+    Output("white-deviation-map", "figure"),
+    Output("asian-deviation-map", "figure"),
+    Input("selected-neighborhood", "data"),
+)
+def update_demo_maps(sel_nhood):
+    con = get_con()
+    w = build_demo_choropleth(
+        con, GEOJSON, "white_pct", BASELINE_WHITE,
+        f"White Pop. Deviation ({BASELINE_WHITE:.1f}% baseline)",
+        sel_nhood,
+    )
+    a = build_demo_choropleth(
+        con, GEOJSON, "asian_pct", BASELINE_ASIAN,
+        f"Asian Pop. Deviation ({BASELINE_ASIAN:.1f}% baseline)",
+        sel_nhood,
+    )
+    return w, a
+@callback(
+    Output("rr-bar-chart", "figure"),
+    Output("rr-heatmap", "figure"),
+    Input("month-selector", "value"),
+)
+def update_rr(month):
+    con = get_con()
+    return build_rr_bar_chart(con, month), build_rr_heatmap(con)
+@callback(
+    Output("comparison-map", "figure"),
+    Input("comp-month-a", "value"),
+    Input("comp-month-b", "value"),
+    Input("comp-hail", "value"),
+    Input("comp-metric", "value"),
+)
+def update_comparison(month_a, month_b, hail, metric):
+    con = get_con()
+    return build_comparison_map(con, GEOJSON, hail, metric, month_a, month_b)
+@callback(
+    Output("nhood-offcanvas", "is_open"),
+    Output("nhood-detail-content", "children"),
+    Input("selected-neighborhood", "data"),
+    State("month-selector", "value"),
+)
+def update_nhood_panel(nhood, month):
+    if not nhood:
+        return False, []
+    con = get_con()
+    profile = build_neighborhood_profile(con, nhood, month)
+    demo = profile["demographics"]
+    children = [
+        html.H4(profile["name"]),
+        html.Hr(),
+    ]
+    if demo:
+        children.extend(
+            [
+                html.H6("Demographics"),
+                html.P(f"Population: {demo['total_pop']:,}"),
+                html.P(f"White: {demo['white_pct']}% (deviation: {demo['white_dev']:+.1f} pp)"),
+                html.P(f"Black: {demo['black_pct']}%"),
+                html.P(f"Asian: {demo['asian_pct']}% (deviation: {demo['asian_dev']:+.1f} pp)"),
+                html.Hr(),
+            ]
+        )
+    if profile["trips"]:
+        children.append(html.H6(f"Trips ({month})"))
+        for key, val in sorted(profile["trips"].items()):
+            if month in key:
+                label = key.replace(f"_{month}", "").replace("_", " ")
+                children.append(html.P(f"{label}: {val:,}"))
+        children.append(html.Hr())
+    if profile["trend_fig"]:
+        children.append(
+            dcc.Graph(
+                figure=profile["trend_fig"],
+                config={"displayModeBar": False},
+                style={"height": "280px"},
+            )
+        )
+    return True, children
+def _make_table(df):
+    if df.empty:
+        return html.P("No data", className="text-muted")
+    return dash_table.DataTable(
+        data=df.to_dict("records"),
+        columns=[{"name": c, "id": c} for c in df.columns],
+        style_table={"overflowX": "auto"},
+        style_header={
+            "backgroundColor": "#375a7f",
+            "color": "white",
+            "fontWeight": "bold",
+            "textAlign": "center",
+        },
+        style_cell={
+            "backgroundColor": "#303030",
+            "color": "#e0e0e0",
+            "textAlign": "center",
+            "padding": "6px",
+            "fontSize": "0.85rem",
+        },
+        style_data_conditional=[
+            {
+                "if": {"row_index": 0},
+                "backgroundColor": "#3a506b",
+                "fontWeight": "bold",
+            }
+        ],
+        page_size=10,
+    )
+@callback(
+    Output("street-stats-title", "children"),
+    Output("street-pu-table", "children"),
+    Output("street-do-table", "children"),
+    Output("app-stats-title", "children"),
+    Output("app-pu-table", "children"),
+    Output("app-do-table", "children"),
+    Input("month-selector", "value"),
+)
+def update_top10(month):
+    con = get_con()
+    s_pu = get_trip_stats_df(con, "Street", month, "pu")
+    s_do = get_trip_stats_df(con, "Street", month, "do")
+    a_pu = get_trip_stats_df(con, "App", month, "pu")
+    a_do = get_trip_stats_df(con, "App", month, "do")
+    return (
+        f"Street-Hail Top 10 ({month})",
+        _make_table(s_pu),
+        _make_table(s_do),
+        f"App-Based Top 10 ({month})",
+        _make_table(a_pu),
+        _make_table(a_do),
+    )
+@callback(
+    Output("csv-download", "data"),
+    Input("download-btn", "n_clicks"),
+    State("month-selector", "value"),
+    State("hail-type-filter", "value"),
+    State("selected-neighborhood", "data"),
+    prevent_initial_call=True,
+)
+def trigger_download(n_clicks, month, hail_types, nhood):
+    con = get_con()
+    df = get_download_csv(con, month, hail_types, nhood)
+    filename = f"sf_taxi_data_{month}"
+    if nhood:
+        filename += f"_{nhood.replace(' ', '_').replace('/', '_')}"
+    return dcc.send_data_frame(df.to_csv, f"{filename}.csv", index=False)
+@callback(
+    Output("geojson-download", "data"),
+    Input("download-geojson-btn", "n_clicks"),
+    State("month-selector", "value"),
+    State("hail-type-filter", "value"),
+    State("selected-neighborhood", "data"),
+    prevent_initial_call=True,
+)
+def trigger_geojson_download(n_clicks, month, hail_types, nhood):
+    import json
+    import copy
+    con = get_con()
+    hail_types = hail_types or ["Street", "App"]
+    ht_filter = ", ".join(f"'{h}'" for h in hail_types)
+    nhood_clause = f"AND n.nhood = '{nhood}'" if nhood else ""
+    # Get trip + demographic data per neighborhood
+    data_df = con.sql(f"""
+        SELECT n.nhood,
+               COALESCE(SUM(pu.trips_pu), 0) AS total_pickups,
+               COALESCE(SUM(td.trips_do), 0) AS total_dropoffs,
+               nd.total_pop, nd.white_pct, nd.black_pct, nd.asian_pct
+        FROM neighborhoods n
+        LEFT JOIN trip_counts_pu pu
+          ON pu.nhood = n.nhood AND pu.month = '{month}'
+          AND pu.hail_type IN ({ht_filter})
+        LEFT JOIN trip_counts_do td
+          ON td.nhood = n.nhood AND td.month = '{month}'
+          AND td.hail_type IN ({ht_filter})
+        LEFT JOIN nhood_demographics nd ON nd.nhood = n.nhood
+        WHERE 1=1 {nhood_clause}
+        GROUP BY n.nhood, nd.total_pop, nd.white_pct, nd.black_pct, nd.asian_pct
+    """).df()
+    data_map = {row["nhood"]: row.to_dict() for _, row in data_df.iterrows()}
+    geojson = copy.deepcopy(GEOJSON)
+    # Filter to selected neighborhood if one is chosen
+    if nhood:
+        geojson["features"] = [
+            f for f in geojson["features"]
+            if f["properties"]["nhood"] == nhood
+        ]
+    # Enrich features with data
+    for feat in geojson["features"]:
+        name = feat["properties"]["nhood"]
+        if name in data_map:
+            d = data_map[name]
+            feat["properties"]["month"] = month
+            feat["properties"]["total_pickups"] = int(d["total_pickups"])
+            feat["properties"]["total_dropoffs"] = int(d["total_dropoffs"])
+            feat["properties"]["total_pop"] = int(d["total_pop"]) if d["total_pop"] else 0
+            feat["properties"]["white_pct"] = round(float(d["white_pct"]), 1) if d["white_pct"] else 0
+            feat["properties"]["black_pct"] = round(float(d["black_pct"]), 1) if d["black_pct"] else 0
+            feat["properties"]["asian_pct"] = round(float(d["asian_pct"]), 1) if d["asian_pct"] else 0
+    filename = f"sf_taxi_{month}"
+    if nhood:
+        filename += f"_{nhood.replace(' ', '_').replace('/', '_')}"
+    return dict(
+        content=json.dumps(geojson, indent=2),
+        filename=f"{filename}.geojson",
+        type="application/geo+json",
+    )
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=7860, debug=False)

assets/styles.css ADDED Viewed

	@@ -0,0 +1,42 @@

+.card {
+    transition: box-shadow 0.2s ease;
+}
+.card:hover {
+    box-shadow: 0 0 12px rgba(55, 90, 127, 0.3);
+}
+.js-plotly-plot .plotly .modebar {
+    top: 4px !important;
+    right: 4px !important;
+}
+.offcanvas {
+    background-color: #303030 !important;
+    color: #e0e0e0 !important;
+}
+.offcanvas-header .btn-close {
+    filter: invert(1);
+}
+.dash-dropdown-value,
+.dash-dropdown-value-item,
+.dash-dropdown-trigger {
+    color: #000 !important;
+}
+body {
+    overflow-y: auto;
+}
+.dash-table-container .dash-spreadsheet-container {
+    border-radius: 4px;
+    overflow: hidden;
+}
+@media (max-width: 768px) {
+    #sidebar {
+        position: relative !important;
+        height: auto !important;
+        margin-bottom: 1rem;
+    }
+}

dashboard_helpers.py ADDED Viewed

	@@ -0,0 +1,471 @@

+import json
+import functools
+import pandas as pd
+import geopandas as gpd
+import plotly.express as px
+import plotly.graph_objects as go
+_geojson_cache = {}
+def get_neighborhood_geojson(con) -> dict:
+    if "geojson" not in _geojson_cache:
+        df = con.sql(
+            "SELECT nhood, ST_AsText(geometry) AS geometry FROM neighborhoods"
+        ).df()
+        gdf = gpd.GeoDataFrame(
+            df,
+            geometry=gpd.GeoSeries.from_wkt(df["geometry"]),
+            crs="EPSG:4326",
+        )
+        _geojson_cache["geojson"] = json.loads(gdf.to_json())
+    return _geojson_cache["geojson"]
+def get_all_neighborhoods(con) -> list[str]:
+    return (
+        con.sql("SELECT DISTINCT nhood FROM neighborhoods ORDER BY nhood")
+        .df()["nhood"]
+        .tolist()
+    )
+_MAP_CENTER = {"lat": 37.76, "lon": -122.44}
+_MAP_ZOOM = 11
+_MAP_STYLE = "carto-darkmatter"
+_MAP_MARGIN = dict(l=0, r=0, t=32, b=0)
+_MAP_HEIGHT = 420
+_GRAPH_CONFIG = {
+    "toImageButtonOptions": {
+        "format": "png",
+        "width": 1200,
+        "height": 800,
+        "scale": 2,
+    },
+    "displayModeBar": True,
+}
+def _highlight_trace(con, geojson, nhood):
+    for feat in geojson["features"]:
+        if feat["properties"]["nhood"] == nhood:
+            geom = feat["geometry"]
+            coords = (
+                geom["coordinates"][0]
+                if geom["type"] == "Polygon"
+                else geom["coordinates"][0][0]
+            )
+            lons = [c[0] for c in coords] + [None]
+            lats = [c[1] for c in coords] + [None]
+            return go.Scattermapbox(
+                lon=lons,
+                lat=lats,
+                mode="lines",
+                line=dict(width=3, color="#ff4444"),
+                hoverinfo="skip",
+                showlegend=False,
+            )
+    return None
+def build_trip_choropleth(
+    con, geojson, hail_type, month, metric, selected_nhood=None
+):
+    if metric == "pu":
+        table, col = "trip_counts_pu", "trips_pu"
+        color_scale = "Viridis"
+        title = f"{'Street-Hail' if hail_type == 'Street' else 'App-Based'} Pickups"
+    else:
+        table, col = "trip_counts_do", "trips_do"
+        color_scale = "Cividis"
+        title = f"{'Street-Hail' if hail_type == 'Street' else 'App-Based'} Drop-offs"
+    df = con.sql(f"""
+        SELECT n.nhood, COALESCE(t.{col}, 0) AS trips
+        FROM neighborhoods n
+        LEFT JOIN {table} t
+          ON t.nhood = n.nhood
+         AND t.hail_type = '{hail_type}'
+         AND t.month = '{month}'
+    """).df()
+    fig = px.choropleth_mapbox(
+        df,
+        geojson=geojson,
+        locations="nhood",
+        featureidkey="properties.nhood",
+        color="trips",
+        color_continuous_scale=color_scale,
+        mapbox_style=_MAP_STYLE,
+        center=_MAP_CENTER,
+        zoom=_MAP_ZOOM,
+        opacity=0.75,
+        hover_data={"nhood": True, "trips": True},
+        title=f"{title} ({month})",
+    )
+    fig.update_traces(
+        customdata=df[["nhood"]].values,
+        hovertemplate="<b>%{customdata[0]}</b><br>Trips: %{z:,}<extra></extra>",
+    )
+    if selected_nhood:
+        trace = _highlight_trace(con, geojson, selected_nhood)
+        if trace:
+            fig.add_trace(trace)
+    fig.update_layout(
+        margin=_MAP_MARGIN,
+        height=_MAP_HEIGHT,
+        coloraxis_colorbar=dict(title="Trips", thickness=15, len=0.6),
+        paper_bgcolor="rgba(0,0,0,0)",
+        plot_bgcolor="rgba(0,0,0,0)",
+        font_color="#e0e0e0",
+    )
+    return fig
+def build_demo_choropleth(
+    con, geojson, column, baseline_val, legend_title, selected_nhood=None
+):
+    df = con.sql(f"""
+        SELECT nd.nhood,
+               ROUND(nd.{column} - {baseline_val}, 1) AS deviation
+        FROM nhood_demographics nd
+        JOIN neighborhoods n ON nd.nhood = n.nhood
+        WHERE nd.total_pop > 0
+    """).df()
+    max_abs = max(abs(df["deviation"].min()), abs(df["deviation"].max()), 1)
+    fig = px.choropleth_mapbox(
+        df,
+        geojson=geojson,
+        locations="nhood",
+        featureidkey="properties.nhood",
+        color="deviation",
+        color_continuous_scale="RdBu",
+        range_color=[-max_abs, max_abs],
+        color_continuous_midpoint=0,
+        mapbox_style=_MAP_STYLE,
+        center=_MAP_CENTER,
+        zoom=_MAP_ZOOM,
+        opacity=0.75,
+        title=legend_title,
+    )
+    fig.update_traces(
+        customdata=df[["nhood", "deviation"]].values,
+        hovertemplate=(
+            "<b>%{customdata[0]}</b><br>"
+            "Deviation: %{customdata[1]:+.1f} pp<extra></extra>"
+        ),
+    )
+    if selected_nhood:
+        trace = _highlight_trace(con, geojson, selected_nhood)
+        if trace:
+            fig.add_trace(trace)
+    fig.update_layout(
+        margin=_MAP_MARGIN,
+        height=_MAP_HEIGHT,
+        coloraxis_colorbar=dict(title="Dev (pp)", thickness=15, len=0.6),
+        paper_bgcolor="rgba(0,0,0,0)",
+        plot_bgcolor="rgba(0,0,0,0)",
+        font_color="#e0e0e0",
+    )
+    return fig
+def build_rr_bar_chart(con, month):
+    df = con.sql(f"""
+        SELECT hail_type, month,
+               RR_white_PU, RR_asian_PU, RR_white_DO, RR_asian_DO
+        FROM representative_ratios
+        WHERE month = '{month}'
+    """).df()
+    if df.empty:
+        return go.Figure().update_layout(
+            title="No data", paper_bgcolor="rgba(0,0,0,0)"
+        )
+    rows = []
+    for _, r in df.iterrows():
+        for metric_col, label in [
+            ("RR_white_PU", "White: Pickups"),
+            ("RR_white_DO", "White: Drop-offs"),
+            ("RR_asian_PU", "Asian: Pickups"),
+            ("RR_asian_DO", "Asian: Drop-offs"),
+        ]:
+            rows.append(
+                {
+                    "Hail Type": r["hail_type"],
+                    "Metric": label,
+                    "RR": round(float(r[metric_col]), 3),
+                }
+            )
+    plot_df = pd.DataFrame(rows)
+    fig = px.bar(
+        plot_df,
+        x="Metric",
+        y="RR",
+        color="Hail Type",
+        barmode="group",
+        color_discrete_map={"Street": "#636EFA", "App": "#EF553B"},
+        title=f"Representative Ratios: {month}",
+    )
+    fig.add_hline(
+        y=1.0,
+        line_dash="dash",
+        line_color="#ffd700",
+        annotation_text="Perfect Representation (1.0)",
+        annotation_position="top left",
+        annotation_font_color="#ffd700",
+    )
+    fig.update_layout(
+        yaxis_title="Representative Ratio",
+        xaxis_title="",
+        height=420,
+        paper_bgcolor="rgba(0,0,0,0)",
+        plot_bgcolor="rgba(0,0,0,0)",
+        font_color="#e0e0e0",
+        legend=dict(orientation="h", yanchor="bottom", y=1.02, x=0.5, xanchor="center"),
+    )
+    fig.update_yaxes(gridcolor="rgba(255,255,255,0.1)")
+    return fig
+def build_rr_heatmap(con):
+    df = con.sql(
+        "SELECT * FROM representative_ratios ORDER BY hail_type, month"
+    ).df()
+    if df.empty:
+        return go.Figure().update_layout(
+            title="No data", paper_bgcolor="rgba(0,0,0,0)"
+        )
+    labels = []
+    z_vals = []
+    for _, r in df.iterrows():
+        row_label = f"{r['hail_type']}: {r['month']}"
+        labels.append(row_label)
+        z_vals.append(
+            [
+                round(float(r["RR_white_PU"]), 3),
+                round(float(r["RR_asian_PU"]), 3),
+                round(float(r["RR_white_DO"]), 3),
+                round(float(r["RR_asian_DO"]), 3),
+            ]
+        )
+    col_labels = ["White PU", "Asian PU", "White DO", "Asian DO"]
+    fig = go.Figure(
+        data=go.Heatmap(
+            z=z_vals,
+            x=col_labels,
+            y=labels,
+            colorscale="RdBu",
+            zmid=1.0,
+            text=z_vals,
+            texttemplate="%{text:.3f}",
+            textfont=dict(size=12),
+            hovertemplate=(
+                "<b>%{y}</b><br>%{x}: %{z:.3f}<extra></extra>"
+            ),
+            colorbar=dict(title="RR", thickness=15),
+        )
+    )
+    fig.update_layout(
+        title="Representative Ratios: All Months",
+        height=350,
+        margin=dict(l=0, r=0, t=40, b=0),
+        paper_bgcolor="rgba(0,0,0,0)",
+        plot_bgcolor="rgba(0,0,0,0)",
+        font_color="#e0e0e0",
+        xaxis=dict(side="top"),
+    )
+    return fig
+def build_neighborhood_profile(con, nhood, month):
+    demo = con.sql(f"""
+        SELECT total_pop, white_pop, black_pop, asian_pop,
+               white_pct, black_pct, asian_pct
+        FROM nhood_demographics
+        WHERE nhood = '{nhood}'
+    """).df()
+    baselines = con.sql("SELECT * FROM city_baselines").df()
+    trips_pu = con.sql(f"""
+        SELECT hail_type, month, trips_pu
+        FROM trip_counts_pu
+        WHERE nhood = '{nhood}'
+        ORDER BY month, hail_type
+    """).df()
+    trips_do = con.sql(f"""
+        SELECT hail_type, month, trips_do
+        FROM trip_counts_do
+        WHERE nhood = '{nhood}'
+        ORDER BY month, hail_type
+    """).df()
+    profile = {"name": nhood, "demographics": {}, "trips": {}, "trend_fig": None}
+    if not demo.empty:
+        d = demo.iloc[0]
+        bw = float(baselines["baseline_white_pct"].iloc[0])
+        ba = float(baselines["baseline_asian_pct"].iloc[0])
+        profile["demographics"] = {
+            "total_pop": int(d["total_pop"]),
+            "white_pct": round(float(d["white_pct"]), 1),
+            "black_pct": round(float(d["black_pct"]), 1),
+            "asian_pct": round(float(d["asian_pct"]), 1),
+            "white_dev": round(float(d["white_pct"]) - bw, 1),
+            "asian_dev": round(float(d["asian_pct"]) - ba, 1),
+        }
+    for _, r in trips_pu.iterrows():
+        key = f"{r['hail_type']}_PU_{r['month']}"
+        profile["trips"][key] = int(r["trips_pu"])
+    for _, r in trips_do.iterrows():
+        key = f"{r['hail_type']}_DO_{r['month']}"
+        profile["trips"][key] = int(r["trips_do"])
+    # Mini trend chart
+    trend_rows = []
+    for _, r in trips_pu.iterrows():
+        trend_rows.append(
+            {"Month": r["month"], "Type": f"{r['hail_type']} PU", "Trips": int(r["trips_pu"])}
+        )
+    for _, r in trips_do.iterrows():
+        trend_rows.append(
+            {"Month": r["month"], "Type": f"{r['hail_type']} DO", "Trips": int(r["trips_do"])}
+        )
+    if trend_rows:
+        trend_df = pd.DataFrame(trend_rows)
+        trend_fig = px.bar(
+            trend_df,
+            x="Month",
+            y="Trips",
+            color="Type",
+            barmode="group",
+            title=f"Trip Trends: {nhood}",
+            height=280,
+        )
+        trend_fig.update_layout(
+            paper_bgcolor="rgba(0,0,0,0)",
+            plot_bgcolor="rgba(0,0,0,0)",
+            font_color="#e0e0e0",
+            margin=dict(l=0, r=0, t=40, b=0),
+            legend=dict(orientation="h", y=-0.2),
+        )
+        trend_fig.update_yaxes(gridcolor="rgba(255,255,255,0.1)")
+        profile["trend_fig"] = trend_fig
+    return profile
+def get_trip_stats_df(con, hail_type, month, metric):
+    if metric == "pu":
+        table, col, alias = "trip_counts_pu", "trips_pu", "Pickups"
+    else:
+        table, col, alias = "trip_counts_do", "trips_do", "Drop-offs"
+    return con.sql(f"""
+        SELECT n.nhood AS Neighborhood, t.{col} AS "{alias}"
+        FROM {table} t
+        JOIN neighborhoods n ON t.nhood = n.nhood
+        WHERE t.hail_type = '{hail_type}' AND t.month = '{month}'
+        ORDER BY t.{col} DESC
+        LIMIT 10
+    """).df()
+def get_download_csv(con, month, hail_types, nhood=None):
+    ht_filter = ", ".join(f"'{h}'" for h in hail_types) if hail_types else "'Street','App'"
+    nhood_clause = f"AND n.nhood = '{nhood}'" if nhood else ""
+    pu = con.sql(f"""
+        SELECT n.nhood, t.hail_type, t.month, t.trips_pu,
+               nd.total_pop, nd.white_pct, nd.black_pct, nd.asian_pct
+        FROM trip_counts_pu t
+        JOIN neighborhoods n ON t.nhood = n.nhood
+        JOIN nhood_demographics nd ON n.nhood = nd.nhood
+        WHERE t.month = '{month}'
+          AND t.hail_type IN ({ht_filter})
+          {nhood_clause}
+        ORDER BY t.trips_pu DESC
+    """).df()
+    do = con.sql(f"""
+        SELECT n.nhood, t.hail_type, t.month, t.trips_do
+        FROM trip_counts_do t
+        JOIN neighborhoods n ON t.nhood = n.nhood
+        WHERE t.month = '{month}'
+          AND t.hail_type IN ({ht_filter})
+          {nhood_clause}
+    """).df()
+    merged = pd.merge(
+        pu,
+        do[["nhood", "hail_type", "month", "trips_do"]],
+        on=["nhood", "hail_type", "month"],
+        how="outer",
+    )
+    return merged.fillna(0)
+def build_comparison_map(con, geojson, hail_type, metric, month_a, month_b):
+    if metric == "pu":
+        table, col = "trip_counts_pu", "trips_pu"
+        label = "Pickups"
+    else:
+        table, col = "trip_counts_do", "trips_do"
+        label = "Drop-offs"
+    df = con.sql(f"""
+        WITH a AS (
+            SELECT nhood, {col} AS trips_a
+            FROM {table}
+            WHERE hail_type = '{hail_type}' AND month = '{month_a}'
+        ),
+        b AS (
+            SELECT nhood, {col} AS trips_b
+            FROM {table}
+            WHERE hail_type = '{hail_type}' AND month = '{month_b}'
+        )
+        SELECT n.nhood,
+               COALESCE(b.trips_b, 0) - COALESCE(a.trips_a, 0) AS diff
+        FROM neighborhoods n
+        LEFT JOIN a ON n.nhood = a.nhood
+        LEFT JOIN b ON n.nhood = b.nhood
+    """).df()
+    max_abs = max(abs(df["diff"].min()), abs(df["diff"].max()), 1)
+    fig = px.choropleth_mapbox(
+        df,
+        geojson=geojson,
+        locations="nhood",
+        featureidkey="properties.nhood",
+        color="diff",
+        color_continuous_scale="RdBu",
+        range_color=[-max_abs, max_abs],
+        color_continuous_midpoint=0,
+        mapbox_style=_MAP_STYLE,
+        center=_MAP_CENTER,
+        zoom=_MAP_ZOOM,
+        opacity=0.75,
+        title=f"{hail_type} {label}: {month_b} vs {month_a}",
+    )
+    fig.update_traces(
+        customdata=df[["nhood", "diff"]].values,
+        hovertemplate=(
+            "<b>%{customdata[0]}</b><br>"
+            "Change: %{customdata[1]:+d} trips<extra></extra>"
+        ),
+    )
+    fig.update_layout(
+        margin=_MAP_MARGIN,
+        height=_MAP_HEIGHT,
+        coloraxis_colorbar=dict(title="Change", thickness=15, len=0.6),
+        paper_bgcolor="rgba(0,0,0,0)",
+        plot_bgcolor="rgba(0,0,0,0)",
+        font_color="#e0e0e0",
+    )
+    return fig

data_pipeline.py ADDED Viewed

	@@ -0,0 +1,274 @@

+import io
+import os
+import pathlib
+import duckdb
+import pandas as pd
+import geopandas as gpd
+import requests
+con = duckdb.connect("sf_dashboard.db")
+con.install_extension("httpfs")
+con.load_extension("httpfs")
+con.install_extension("spatial")
+con.load_extension("spatial")
+NHOOD_URL = "https://data.sfgov.org/resource/j2bu-swwd.geojson"
+print("[1/6] Loading SF Analysis Neighborhoods ...")
+nhoods = gpd.read_file(NHOOD_URL)
+nhoods_df = nhoods[["nhood", "geometry"]].copy()
+nhoods_df = nhoods_df.to_crs("EPSG:4326")
+nhoods_utm  = nhoods_df.to_crs("EPSG:26910")
+print(f"      Loaded {len(nhoods_df)} neighborhoods")
+# Register as DuckDB table
+nhoods_df["geometry"] = nhoods_df["geometry"].apply(lambda g: bytes(g.wkb))
+con.sql("""
+    CREATE OR REPLACE TABLE neighborhoods AS
+    SELECT nhood, ST_GeomFromWKB(geometry)::GEOMETRY AS geometry
+    FROM nhoods_df
+""")
+# Register UTM version
+nhoods_utm["geometry"] = nhoods_utm["geometry"].apply(lambda g: bytes(g.wkb))
+con.sql("""
+    CREATE OR REPLACE TABLE neighborhoods_utm AS
+    SELECT nhood, ST_GeomFromWKB(geometry)::GEOMETRY AS geometry
+    FROM nhoods_utm
+""")
+SOCRATA_BASE = "https://data.sfgov.org/resource/m8hk-2ipk.csv"
+MONTHS = [
+    ("Jan2024", "2024-01-01T00:00:00", "2024-01-31T23:59:59"),
+    ("Feb2024", "2024-02-01T00:00:00", "2024-02-29T23:59:59"),
+    ("Mar2024", "2024-03-01T00:00:00", "2024-03-31T23:59:59"),
+    ("Apr2024", "2024-04-01T00:00:00", "2024-04-30T23:59:59"),
+    ("May2024", "2024-05-01T00:00:00", "2024-05-31T23:59:59"),
+    ("Jun2024", "2024-06-01T00:00:00", "2024-06-30T23:59:59"),
+    ("Jul2024", "2024-07-01T00:00:00", "2024-07-31T23:59:59"),
+    ("Aug2024", "2024-08-01T00:00:00", "2024-08-31T23:59:59"),
+    ("Sep2024", "2024-09-01T00:00:00", "2024-09-30T23:59:59"),
+    ("Oct2024", "2024-10-01T00:00:00", "2024-10-31T23:59:59"),
+    ("Nov2024", "2024-11-01T00:00:00", "2024-11-30T23:59:59"),
+    ("Dec2024", "2024-12-01T00:00:00", "2024-12-31T23:59:59"),
+]
+LIMIT = 1000
+print("[2/6] Downloading SF taxi trips ...")
+if not os.path.exists("raw_trips.csv"):
+    all_trips = []
+    for month_label, start, end in MONTHS:
+        OFFSET = 0
+        while True:
+            params = {
+                "$where": f"start_time_local between '{start}' and '{end}'",
+                "$limit": LIMIT,
+                "$offset": OFFSET,
+                "$order": "start_time_local"
+            }
+            response = requests.get(SOCRATA_BASE, params=params, timeout=30)
+            df = pd.read_csv(io.StringIO(response.text))
+            df["month"] = month_label
+            all_trips.append(df)
+            if len(df) < LIMIT:
+                break
+            OFFSET += LIMIT
+        print(f"      {month_label}: {len(df)} rows")
+    trips_df = pd.concat(all_trips, ignore_index=True)
+    trips_df.to_csv("raw_trips.csv", index=False)
+else:
+    trips_df = pd.read_csv("raw_trips.csv")
+# Drop rows with missing or zero coordinates
+trips_df = trips_df.dropna(
+    subset=[
+        "pickup_location_latitude", "pickup_location_longitude",
+        "dropoff_location_latitude", "dropoff_location_longitude",
+    ]
+)
+trips_df = trips_df[
+    (trips_df["pickup_location_latitude"] != 0)
+    & (trips_df["pickup_location_longitude"] != 0)
+    & (trips_df["dropoff_location_latitude"] != 0)
+    & (trips_df["dropoff_location_longitude"] != 0)
+]
+# Normalise hail_type to two categories
+def normalise_hail_type(hail_type):
+    if hail_type in ["street","dispatch"]:
+        return "Street"
+    else:
+        return "App"
+trips_df["hail_type"] = trips_df["hail_type"].apply(normalise_hail_type)
+bad_flags = ['DR', 'FTR', 'ST', 'ET']
+trips_df = trips_df[
+    ~trips_df['qa_flags'].fillna('').apply(
+        lambda flags: any(f in flags.split('-') for f in bad_flags)
+    )
+]
+con.sql("CREATE OR REPLACE TABLE raw_trips AS SELECT * FROM trips_df")
+print("[3/6] Spatial join: pickup points to neighborhoods ...")
+con.sql("""
+    CREATE OR REPLACE TABLE trip_counts_pu AS
+    SELECT
+        t.hail_type,
+        t.month,
+        n.nhood,
+        COUNT(*) AS trips_pu
+    FROM raw_trips AS t
+    JOIN neighborhoods AS n
+      ON ST_Intersects(
+          n.geometry,
+          ST_Point(t.pickup_location_longitude, t.pickup_location_latitude)::GEOMETRY
+    )
+    GROUP BY t.hail_type, t.month, n.nhood
+""")
+print("[3/6] Spatial join: dropoff points to neighborhoods ...")
+con.sql("""
+    CREATE OR REPLACE TABLE trip_counts_do AS
+    SELECT
+        t.hail_type,
+        t.month,
+        n.nhood,
+        COUNT(*) AS trips_do
+    FROM raw_trips AS t
+    JOIN neighborhoods AS n
+      ON ST_Intersects(
+          n.geometry,
+          ST_Point(t.dropoff_location_longitude, t.dropoff_location_latitude)::GEOMETRY
+      )
+    GROUP BY t.hail_type, t.month, n.nhood
+""")
+top5 = con.sql("""
+    SELECT nhood, SUM(trips_pu) AS total
+    FROM trip_counts_pu GROUP BY nhood ORDER BY total DESC LIMIT 5
+""").df()
+print("      Top 5 pickup neighborhoods:")
+print(top5.to_string(index=False))
+print("[4/6] Computing neighborhood demographics ...")
+# ACS 5-Year 2022, block groups in SF County (state=06, county=075)
+response = requests.get(
+    "https://api.census.gov/data/2022/acs/acs5",
+    params={
+        "get": "B02001_001E,B02001_002E,B02001_003E,B02001_005E",
+        "ucgid": "pseudo(0500000US06075$1500000)"
+    },
+    timeout=30
+)
+data = response.json()
+census_df = pd.DataFrame(data[1:], columns=data[0])
+# Convert to numeric
+for col in ["B02001_001E", "B02001_002E", "B02001_003E", "B02001_005E"]:
+    census_df[col] = pd.to_numeric(census_df[col], errors="coerce")
+census_df = census_df.rename(columns={
+    "B02001_001E": "total_pop",
+    "B02001_002E": "white_pop",
+    "B02001_003E": "black_pop",
+    "B02001_005E": "asian_pop",
+})
+census_df["GEOID"] = census_df["ucgid"].str[-12:]
+BG_URL = "https://www2.census.gov/geo/tiger/TIGER2022/BG/tl_2022_06_bg.zip"
+bg_gdf = gpd.read_file(BG_URL)
+bg_gdf = bg_gdf[bg_gdf["COUNTYFP"] == "075"]  # SF county only
+bg_gdf = bg_gdf[["GEOID", "geometry"]].copy()
+bg_gdf = bg_gdf.to_crs("EPSG:4326")
+# Merge census data with geometries
+census_gdf = bg_gdf.merge(
+    census_df[["GEOID", "total_pop", "white_pop", "black_pop", "asian_pop"]],
+    on="GEOID",
+    how="inner"
+)
+census_db = pd.DataFrame(census_gdf)
+census_db["geometry"] = census_gdf["geometry"].apply(lambda g: bytes(g.wkb))
+con.register("census_raw", census_db)
+con.sql("""
+    CREATE OR REPLACE TABLE census_blocks AS
+    SELECT GEOID, total_pop, white_pop, black_pop, asian_pop,
+           ST_GeomFromWKB(geometry)::GEOMETRY AS geometry
+    FROM census_raw
+""")
+con.sql("""
+    CREATE OR REPLACE TABLE nhood_demographics AS
+    SELECT
+        n.nhood,
+        SUM(cb.total_pop)  AS total_pop,
+        SUM(cb.white_pop)  AS white_pop,
+        SUM(cb.black_pop)  AS black_pop,
+        SUM(cb.asian_pop)  AS asian_pop,
+        CASE WHEN SUM(cb.total_pop) > 0
+             THEN 100.0 * SUM(cb.white_pop) / SUM(cb.total_pop)
+             ELSE 0 END AS white_pct,
+        CASE WHEN SUM(cb.total_pop) > 0
+             THEN 100.0 * SUM(cb.black_pop) / SUM(cb.total_pop)
+             ELSE 0 END AS black_pct,
+        CASE WHEN SUM(cb.total_pop) > 0
+             THEN 100.0 * SUM(cb.asian_pop) / SUM(cb.total_pop)
+             ELSE 0 END AS asian_pct
+    FROM census_blocks AS cb
+    JOIN neighborhoods AS n
+      ON ST_Intersects(n.geometry, cb.geometry)
+    GROUP BY n.nhood
+""")
+con.sql("SELECT * FROM nhood_demographics ORDER BY total_pop DESC LIMIT 10").df()
+print("[5/6] Computing city-wide baselines ...")
+baseline_df = con.sql("""
+    SELECT
+        ROUND(100.0 * SUM(white_pop) / SUM(total_pop), 2) AS baseline_white_pct,
+        ROUND(100.0 * SUM(black_pop) / SUM(total_pop), 2) AS baseline_black_pct,
+        ROUND(100.0 * SUM(asian_pop) / SUM(total_pop), 2) AS baseline_asian_pct
+    FROM nhood_demographics
+    WHERE total_pop > 0
+""").df()
+con.sql("CREATE OR REPLACE TABLE city_baselines AS SELECT * FROM baseline_df")
+print(f"      Baselines: {baseline_df.to_dict('records')[0]}")
+bw = float(baseline_df["baseline_white_pct"].iloc[0])
+bb = float(baseline_df["baseline_black_pct"].iloc[0])
+ba = float(baseline_df["baseline_asian_pct"].iloc[0])
+print("[6/6] Computing representative ratios ...")
+rr_pu_df = con.sql(f"""
+    SELECT tp.hail_type, tp.month,
+        SUM(tp.trips_pu * nd.white_pct) * 1.0
+            / SUM(tp.trips_pu) / {bw} AS RR_white_PU,
+        SUM(tp.trips_pu * nd.asian_pct) * 1.0
+            / SUM(tp.trips_pu) / {ba} AS RR_asian_PU
+    FROM trip_counts_pu AS tp
+    JOIN nhood_demographics AS nd ON tp.nhood = nd.nhood
+    WHERE nd.total_pop > 0
+    GROUP BY tp.hail_type, tp.month
+""").df()
+rr_do_df = con.sql(f"""
+    SELECT td.hail_type, td.month,
+        SUM(td.trips_do * nd.white_pct) * 1.0
+            / SUM(td.trips_do) / {bw} AS RR_white_DO,
+        SUM(td.trips_do * nd.asian_pct) * 1.0
+            / SUM(td.trips_do) / {ba} AS RR_asian_DO
+    FROM trip_counts_do AS td
+    JOIN nhood_demographics AS nd ON td.nhood = nd.nhood
+    WHERE nd.total_pop > 0
+    GROUP BY td.hail_type, td.month
+""").df()
+rr_combined = pd.merge(
+    rr_pu_df, rr_do_df, on=["hail_type", "month"], how="outer"
+)
+con.sql("CREATE OR REPLACE TABLE representative_ratios AS SELECT * FROM rr_combined")
+print("\nPipeline complete. Database: sf_dashboard.db")
+print("Representative ratios:")
+print(rr_combined.to_string(index=False))
+con.close()

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+dash>=2.14.0
+dash-bootstrap-components>=1.5.0
+plotly>=5.18.0
+duckdb>=1.0.0
+pandas>=2.0.0
+geopandas>=0.14.0
+shapely>=2.0.0
+pyproj>=3.6.0
+requests>=2.28.0
+gunicorn>=21.2.0