Debayan Mandal commited on
Commit ·
dd79a40
1
Parent(s): c0878af
Initial Dashboard Deployment
Browse files- .gitignore +21 -0
- Dockerfile +35 -0
- README.md +45 -6
- app.py +80 -0
- dashboard_helpers.py +74 -0
- data_pipeline.py +132 -0
- requirements.txt +9 -0
.gitignore
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python Cache
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# Databases and Downloaded Archives
|
| 7 |
+
*.db
|
| 8 |
+
*.db.wal
|
| 9 |
+
*.zip
|
| 10 |
+
|
| 11 |
+
# Jupyter Notebook Checkpoints
|
| 12 |
+
.ipynb_checkpoints/
|
| 13 |
+
|
| 14 |
+
# Virtual Environments
|
| 15 |
+
venv/
|
| 16 |
+
env/
|
| 17 |
+
.env
|
| 18 |
+
|
| 19 |
+
# OS Generated Files
|
| 20 |
+
.DS_Store
|
| 21 |
+
Thumbs.db
|
Dockerfile
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
# System deps for GDAL/geopandas
|
| 4 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 5 |
+
libgdal-dev \
|
| 6 |
+
gdal-bin \
|
| 7 |
+
libgeos-dev \
|
| 8 |
+
libproj-dev \
|
| 9 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 10 |
+
|
| 11 |
+
WORKDIR /app
|
| 12 |
+
|
| 13 |
+
# Install Python deps
|
| 14 |
+
COPY requirements.txt .
|
| 15 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 16 |
+
|
| 17 |
+
# Copy application files
|
| 18 |
+
COPY data_pipeline.py .
|
| 19 |
+
COPY dashboard_helpers.py .
|
| 20 |
+
COPY app.ipynb .
|
| 21 |
+
|
| 22 |
+
# Pre-download databases so it's baked into the image
|
| 23 |
+
RUN python -c "import leafmap; \
|
| 24 |
+
leafmap.download_file( \
|
| 25 |
+
'https://opengeos.org/data/duckdb/nyc_data.db.zip', \
|
| 26 |
+
unzip=True, \
|
| 27 |
+
overwrite=True \
|
| 28 |
+
)"
|
| 29 |
+
RUN python data_pipeline.py
|
| 30 |
+
|
| 31 |
+
# Expose the port HF Spaces expects
|
| 32 |
+
EXPOSE 7860
|
| 33 |
+
|
| 34 |
+
# Launch Solara
|
| 35 |
+
CMD ["solara", "run", "app.py", "--host=0.0.0.0", "--port=7860"]
|
README.md
CHANGED
|
@@ -1,12 +1,51 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji: 🦀
|
| 4 |
colorFrom: blue
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
-
pinned:
|
| 8 |
license: mit
|
| 9 |
-
short_description: The NYC Mobility Dashboard powered by Solara
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: NYC Taxi Mobility Equity Dashboard_Solara
|
|
|
|
| 3 |
colorFrom: blue
|
| 4 |
+
colorTo: green
|
| 5 |
sdk: docker
|
| 6 |
+
pinned: true
|
| 7 |
license: mit
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
+
# NYC Mobility Equity Dashboard
|
| 11 |
+
_Debayan Mandal_
|
| 12 |
+
|
| 13 |
+
An interactive Solara dashboard analyzing whether **FHV** and
|
| 14 |
+
**Yellow Taxi** services are equitably distributed across NYC neighborhoods
|
| 15 |
+
relative to demographic baselines.
|
| 16 |
+
|
| 17 |
+
## Features
|
| 18 |
+
|
| 19 |
+
- **Pickup & Drop-off choropleth maps** for FHV and Yellow Taxi by month
|
| 20 |
+
- **Diverging demographic maps** showing deviation from citywide population baselines
|
| 21 |
+
- **Top-10 stats tables** split by service and direction
|
| 22 |
+
- **Month dropdown** to explore Jan–Mar 2025 data
|
| 23 |
+
|
| 24 |
+
## Data Sources
|
| 25 |
+
|
| 26 |
+
- [NYC TLC Trip Record Data](https://www.nyc.gov/site/tlc/about/tlc-trip-record-data.page) (parquet via CloudFront)
|
| 27 |
+
- [NYC Taxi Zones](https://data.source.coop/cholmes/nyc-taxi-zones/) (Source Cooperative)
|
| 28 |
+
- [NYC Census Blocks](https://opengeos.org/data/duckdb/nyc_data.db.zip) (OpenGeos)
|
| 29 |
+
|
| 30 |
+
## Architecture
|
| 31 |
+
|
| 32 |
+
| File | Purpose |
|
| 33 |
+
|------|---------|
|
| 34 |
+
| `data_pipeline.py` | DuckDB setup, spatial joins, trip ingestion, relative risk |
|
| 35 |
+
| `dashboard_helpers.py` | Reusable map and stats-table builder functions |
|
| 36 |
+
| `app.py` | Main Solara application and UI component script |
|
| 37 |
+
| `Dockerfile` | Container setup for Hugging Face Spaces deployment |
|
| 38 |
+
|
| 39 |
+
## Local Development
|
| 40 |
+
|
| 41 |
+
To run this dashboard locally, you must first build the pre-computed DuckDB database, and then launch the Solara server.
|
| 42 |
+
|
| 43 |
+
```bash
|
| 44 |
+
# 1. Install dependencies
|
| 45 |
+
pip install -r requirements.txt
|
| 46 |
+
|
| 47 |
+
# 2. Run the pipeline to fetch data and generate processed_dashboard.db
|
| 48 |
+
python data_pipeline.py
|
| 49 |
+
|
| 50 |
+
# 3. Launch the hot-reloading Solara development server
|
| 51 |
+
solara run app.py
|
app.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import solara
|
| 2 |
+
import duckdb
|
| 3 |
+
from dashboard_helpers import build_trip_map, build_demo_map, build_stats
|
| 4 |
+
|
| 5 |
+
# 1. Connect to our pre-baked database in read-only mode
|
| 6 |
+
con = duckdb.connect('processed_dashboard.db', read_only=True)
|
| 7 |
+
con.install_extension('spatial')
|
| 8 |
+
con.load_extension('spatial')
|
| 9 |
+
|
| 10 |
+
# 2. Grab baselines
|
| 11 |
+
baseline_df = con.sql("SELECT * FROM city_baselines").df()
|
| 12 |
+
baseline_white = float(baseline_df["baseline_white_pct"].iloc[0]) / 100.0
|
| 13 |
+
baseline_black = float(baseline_df["baseline_black_pct"].iloc[0]) / 100.0
|
| 14 |
+
|
| 15 |
+
# 3. Define our Reactive State
|
| 16 |
+
selected_month = solara.reactive("Feb2025")
|
| 17 |
+
|
| 18 |
+
# 4. Build Reusable Components
|
| 19 |
+
@solara.component
|
| 20 |
+
def TaxiMap(service, metric, cmap, title):
|
| 21 |
+
month = selected_month.value
|
| 22 |
+
m = build_trip_map(con, service, month, metric, cmap, title)
|
| 23 |
+
|
| 24 |
+
with solara.Card(title=f"{title} ({month})"):
|
| 25 |
+
solara.display(m)
|
| 26 |
+
|
| 27 |
+
@solara.component
|
| 28 |
+
def ServiceStatsCard(service, cmap_pu, cmap_do):
|
| 29 |
+
month = selected_month.value
|
| 30 |
+
pu_head, pu_table = build_stats(con, service, month, 'pu', cmap_pu, 'Pickups')
|
| 31 |
+
do_head, do_table = build_stats(con, service, month, 'do', cmap_do, 'Drop-offs')
|
| 32 |
+
with solara.Card():
|
| 33 |
+
solara.Markdown(f"### {service} Top 10 Destinations ({month})", style={"text-align": "center"})
|
| 34 |
+
with solara.Row():
|
| 35 |
+
with solara.Column():
|
| 36 |
+
solara.display(pu_head)
|
| 37 |
+
solara.display(pu_table)
|
| 38 |
+
with solara.Column():
|
| 39 |
+
solara.display(do_head)
|
| 40 |
+
solara.display(do_table)
|
| 41 |
+
|
| 42 |
+
@solara.component
|
| 43 |
+
def DemographicMap(column, baseline_val, title):
|
| 44 |
+
m = build_demo_map(con, column, baseline_val, title)
|
| 45 |
+
with solara.Card(title=title):
|
| 46 |
+
solara.display(m)
|
| 47 |
+
|
| 48 |
+
# 5. Build the Main Page Layout
|
| 49 |
+
@solara.component
|
| 50 |
+
def Page():
|
| 51 |
+
solara.Title("NYC Taxi Mobility Equity Dashboard")
|
| 52 |
+
|
| 53 |
+
with solara.Column():
|
| 54 |
+
with solara.Column(align="center"):
|
| 55 |
+
# Header
|
| 56 |
+
solara.Markdown("# NYC Taxi Mobility Equity Dashboard", style={"text-align": "center"})
|
| 57 |
+
solara.Markdown(
|
| 58 |
+
"<div style='text-align: center;'>This interactive dashboard analyzes whether <b>FHV</b> and <b>Yellow Taxi</b> services are over- or under-represented in areas with different demographic compositions relative to the NYC baseline.</div>"
|
| 59 |
+
)
|
| 60 |
+
# UI Control
|
| 61 |
+
solara.Select(label="Select Month", value=selected_month, values=['Jan2025', 'Feb2025', 'Mar2025'])
|
| 62 |
+
|
| 63 |
+
# Grid Layout
|
| 64 |
+
with solara.GridFixed(columns=2):
|
| 65 |
+
# Row 1: Pickups
|
| 66 |
+
TaxiMap('FHV', 'pu', 'Blues', 'FHV Pickups')
|
| 67 |
+
TaxiMap('Yellow', 'pu', 'Blues', 'Yellow Pickups')
|
| 68 |
+
|
| 69 |
+
# Row 2: Drop-offs
|
| 70 |
+
TaxiMap('FHV', 'do', 'Greens', 'FHV Drop-offs')
|
| 71 |
+
TaxiMap('Yellow', 'do', 'Greens', 'Yellow Drop-offs')
|
| 72 |
+
|
| 73 |
+
# Row 3: Demographics
|
| 74 |
+
DemographicMap('white_pct', baseline_white * 100, f'White Pop. Deviation ({baseline_white*100:.1f}%)')
|
| 75 |
+
DemographicMap('black_pct', baseline_black * 100, f'Black Pop. Deviation ({baseline_black*100:.1f}%)')
|
| 76 |
+
|
| 77 |
+
with solara.Column(align="center"):
|
| 78 |
+
with solara.GridFixed(columns=2):
|
| 79 |
+
ServiceStatsCard('FHV', 'Blues', 'YlOrBr')
|
| 80 |
+
ServiceStatsCard('Yellow', 'Greens', 'YlOrBr')
|
dashboard_helpers.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import geopandas as gpd
|
| 2 |
+
import ipywidgets as widgets
|
| 3 |
+
import leafmap
|
| 4 |
+
|
| 5 |
+
# Trip-volume choropleth
|
| 6 |
+
def build_trip_map(con, service, month, metric, cmap, legend_prefix):
|
| 7 |
+
if metric == "pu":
|
| 8 |
+
table, col = "trip_counts_pu", "trips_pu"
|
| 9 |
+
else:
|
| 10 |
+
table, col = "trip_counts_do", "trips_do"
|
| 11 |
+
|
| 12 |
+
df = con.sql(f"""
|
| 13 |
+
SELECT tz.zone, t.{col} AS trips,
|
| 14 |
+
ST_AsText(ST_Transform(tz.geometry, 'EPSG:26918', 'OGC:CRS84')) AS geometry
|
| 15 |
+
FROM {table} AS t
|
| 16 |
+
JOIN taxi_zones_utm AS tz ON t.LocationID = tz.LocationID
|
| 17 |
+
WHERE t.service = '{service}' AND t.month = '{month}'
|
| 18 |
+
""").df()
|
| 19 |
+
|
| 20 |
+
gdf = gpd.GeoDataFrame(
|
| 21 |
+
df, geometry=gpd.GeoSeries.from_wkt(df["geometry"]), crs="EPSG:4326"
|
| 22 |
+
)
|
| 23 |
+
m = leafmap.Map(center=[40.7, -73.9], zoom=10, draw_control=False)
|
| 24 |
+
m.layout.height = "400px"
|
| 25 |
+
m.add_basemap("CartoDB.DarkMatter")
|
| 26 |
+
m.add_data(
|
| 27 |
+
gdf, column="trips", cmap=cmap,
|
| 28 |
+
legend_title=f"{legend_prefix} ({month})",
|
| 29 |
+
)
|
| 30 |
+
return m
|
| 31 |
+
|
| 32 |
+
# Demographic baseline choropleth
|
| 33 |
+
def build_demo_map(con, column, baseline_val, legend_title):
|
| 34 |
+
df = con.sql(f"""
|
| 35 |
+
SELECT zd.TaxiZone AS zone,
|
| 36 |
+
ROUND(zd.{column} - {baseline_val}, 1) AS deviation,
|
| 37 |
+
ST_AsText(ST_Transform(tz.geometry, 'EPSG:26918', 'OGC:CRS84')) AS geometry
|
| 38 |
+
FROM zone_demographics AS zd
|
| 39 |
+
JOIN taxi_zones_utm AS tz ON zd.LocationID = tz.LocationID
|
| 40 |
+
WHERE zd.TotalPop > 0
|
| 41 |
+
""").df()
|
| 42 |
+
|
| 43 |
+
gdf = gpd.GeoDataFrame(
|
| 44 |
+
df, geometry=gpd.GeoSeries.from_wkt(df["geometry"]), crs="EPSG:4326"
|
| 45 |
+
)
|
| 46 |
+
m = leafmap.Map(center=[40.7, -73.9], zoom=10, draw_control=False)
|
| 47 |
+
m.layout.height = "400px"
|
| 48 |
+
m.add_basemap("CartoDB.DarkMatter")
|
| 49 |
+
m.add_data(
|
| 50 |
+
gdf, column="deviation", cmap="RdYlBu",
|
| 51 |
+
legend_title=legend_title,
|
| 52 |
+
)
|
| 53 |
+
return m
|
| 54 |
+
|
| 55 |
+
# Top-10 stats table
|
| 56 |
+
def build_stats(con, service, month, metric, cmap, label):
|
| 57 |
+
if metric == "pu":
|
| 58 |
+
table, col, alias = "trip_counts_pu", "trips_pu", "Pickups"
|
| 59 |
+
else:
|
| 60 |
+
table, col, alias = "trip_counts_do", "trips_do", "Dropoffs"
|
| 61 |
+
|
| 62 |
+
df = con.sql(f"""
|
| 63 |
+
SELECT tz.zone AS Neighborhood, t.{col} AS {alias}
|
| 64 |
+
FROM {table} t
|
| 65 |
+
JOIN taxi_zones_utm tz ON t.LocationID = tz.LocationID
|
| 66 |
+
WHERE t.service = '{service}' AND t.month = '{month}'
|
| 67 |
+
ORDER BY t.{col} DESC LIMIT 10
|
| 68 |
+
""").df()
|
| 69 |
+
|
| 70 |
+
header = widgets.HTML(
|
| 71 |
+
f"<h3 style='text-align:center;'>{service} Top 10 {label} ({month})</h3>"
|
| 72 |
+
)
|
| 73 |
+
styled = df.style.background_gradient(cmap=cmap, subset=[alias]).hide(axis="index")
|
| 74 |
+
return header, styled
|
data_pipeline.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import pathlib
|
| 3 |
+
import duckdb
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import leafmap
|
| 6 |
+
|
| 7 |
+
# DuckDB connection & extensions
|
| 8 |
+
con = duckdb.connect('processed_dashboard.db')
|
| 9 |
+
con.install_extension("httpfs")
|
| 10 |
+
con.load_extension("httpfs")
|
| 11 |
+
con.install_extension("spatial")
|
| 12 |
+
con.load_extension("spatial")
|
| 13 |
+
|
| 14 |
+
# Taxi-zone geometry
|
| 15 |
+
TAXI_ZONES_URL = (
|
| 16 |
+
"https://data.source.coop/cholmes/nyc-taxi-zones/taxi_zones.parquet"
|
| 17 |
+
)
|
| 18 |
+
con.sql(f"CREATE OR REPLACE VIEW taxi_zones AS SELECT * FROM '{TAXI_ZONES_URL}'")
|
| 19 |
+
|
| 20 |
+
# NYC census blocks
|
| 21 |
+
DB_PATH = pathlib.Path("nyc_data.db")
|
| 22 |
+
if not DB_PATH.exists():
|
| 23 |
+
leafmap.download_file(
|
| 24 |
+
"https://opengeos.org/data/duckdb/nyc_data.db.zip",
|
| 25 |
+
unzip=True,
|
| 26 |
+
overwrite=True,
|
| 27 |
+
)
|
| 28 |
+
con.execute("ATTACH 'nyc_data.db' AS nyc_data (READ_ONLY)")
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
# Compute demographics
|
| 32 |
+
con.sql("""
|
| 33 |
+
CREATE OR REPLACE TABLE taxi_zones_utm AS
|
| 34 |
+
SELECT * EXCLUDE (geometry),
|
| 35 |
+
ST_GeomFromWKB(ST_AsWKB(ST_Transform(geometry, 'EPSG:2263', 'EPSG:26918'))) AS geometry
|
| 36 |
+
FROM taxi_zones
|
| 37 |
+
""")
|
| 38 |
+
|
| 39 |
+
con.sql("""
|
| 40 |
+
CREATE OR REPLACE TABLE zone_demographics AS
|
| 41 |
+
SELECT
|
| 42 |
+
tz.LocationID,
|
| 43 |
+
tz.zone AS TaxiZone,
|
| 44 |
+
tz.borough AS Borough,
|
| 45 |
+
SUM(cb.popn_total) AS TotalPop,
|
| 46 |
+
SUM(cb.popn_white) AS WhitePop,
|
| 47 |
+
SUM(cb.popn_black) AS BlackPop,
|
| 48 |
+
100.0 * SUM(cb.popn_white) / SUM(cb.popn_total) AS white_pct,
|
| 49 |
+
100.0 * SUM(cb.popn_black) / SUM(cb.popn_total) AS black_pct
|
| 50 |
+
FROM nyc_data.nyc_census_blocks AS cb
|
| 51 |
+
JOIN taxi_zones_utm AS tz ON ST_Intersects(tz.geometry, cb.geom)
|
| 52 |
+
GROUP BY tz.LocationID, tz.zone, tz.borough
|
| 53 |
+
""")
|
| 54 |
+
|
| 55 |
+
baseline_df = con.sql("""
|
| 56 |
+
SELECT
|
| 57 |
+
ROUND(100.0 * SUM(popn_white) / SUM(popn_total), 2) AS baseline_white_pct,
|
| 58 |
+
ROUND(100.0 * SUM(popn_black) / SUM(popn_total), 2) AS baseline_black_pct
|
| 59 |
+
FROM nyc_data.nyc_census_blocks
|
| 60 |
+
""").df()
|
| 61 |
+
con.sql("CREATE OR REPLACE TABLE city_baselines AS SELECT * FROM baseline_df")
|
| 62 |
+
|
| 63 |
+
baseline_white: float = float(baseline_df["baseline_white_pct"].iloc[0]) / 100.0
|
| 64 |
+
baseline_black: float = float(baseline_df["baseline_black_pct"].iloc[0]) / 100.0
|
| 65 |
+
|
| 66 |
+
# Trip data ingestion
|
| 67 |
+
_pu_field = {"FHV": "PUlocationID", "Yellow": "PULocationID"}
|
| 68 |
+
_do_field = {"FHV": "DOlocationID", "Yellow": "DOLocationID"}
|
| 69 |
+
|
| 70 |
+
TRIP_URLS = {
|
| 71 |
+
"FHV_Jan2025": "https://d37ci6vzurychx.cloudfront.net/trip-data/fhv_tripdata_2025-01.parquet",
|
| 72 |
+
"FHV_Feb2025": "https://d37ci6vzurychx.cloudfront.net/trip-data/fhv_tripdata_2025-02.parquet",
|
| 73 |
+
"FHV_Mar2025": "https://d37ci6vzurychx.cloudfront.net/trip-data/fhv_tripdata_2025-03.parquet",
|
| 74 |
+
"Yellow_Jan2025": "https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2025-01.parquet",
|
| 75 |
+
"Yellow_Feb2025": "https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2025-02.parquet",
|
| 76 |
+
"Yellow_Mar2025": "https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2025-03.parquet",
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
con.execute(
|
| 80 |
+
"CREATE OR REPLACE TABLE trip_counts_pu "
|
| 81 |
+
"(service VARCHAR, month VARCHAR, LocationID INTEGER, trips_pu BIGINT)"
|
| 82 |
+
)
|
| 83 |
+
con.execute(
|
| 84 |
+
"CREATE OR REPLACE TABLE trip_counts_do "
|
| 85 |
+
"(service VARCHAR, month VARCHAR, LocationID INTEGER, trips_do BIGINT)"
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
for key, url in TRIP_URLS.items():
|
| 89 |
+
service, month = key.split("_")
|
| 90 |
+
pu, do = _pu_field[service], _do_field[service]
|
| 91 |
+
con.sql(
|
| 92 |
+
f"INSERT INTO trip_counts_pu "
|
| 93 |
+
f"SELECT '{service}', '{month}', CAST({pu} AS INTEGER), COUNT(*) "
|
| 94 |
+
f"FROM '{url}' "
|
| 95 |
+
f"WHERE {pu} IS NOT NULL AND CAST({pu} AS INTEGER) NOT IN (0, 264, 265) "
|
| 96 |
+
f"GROUP BY {pu}"
|
| 97 |
+
)
|
| 98 |
+
con.sql(
|
| 99 |
+
f"INSERT INTO trip_counts_do "
|
| 100 |
+
f"SELECT '{service}', '{month}', CAST({do} AS INTEGER), COUNT(*) "
|
| 101 |
+
f"FROM '{url}' "
|
| 102 |
+
f"WHERE {do} IS NOT NULL AND CAST({do} AS INTEGER) NOT IN (0, 264, 265) "
|
| 103 |
+
f"GROUP BY {do}"
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
# Representative ratio summary
|
| 107 |
+
bw = baseline_white * 100
|
| 108 |
+
bb = baseline_black * 100
|
| 109 |
+
|
| 110 |
+
rr_pu_df = con.sql(f"""
|
| 111 |
+
SELECT tp.service, tp.month,
|
| 112 |
+
SUM(tp.trips_pu * zd.white_pct) * 1.0 / SUM(tp.trips_pu) / {bw} AS RR_white_PU,
|
| 113 |
+
SUM(tp.trips_pu * zd.black_pct) * 1.0 / SUM(tp.trips_pu) / {bb} AS RR_black_PU
|
| 114 |
+
FROM trip_counts_pu AS tp
|
| 115 |
+
JOIN zone_demographics AS zd ON tp.LocationID = zd.LocationID
|
| 116 |
+
WHERE zd.TotalPop > 0
|
| 117 |
+
GROUP BY tp.service, tp.month
|
| 118 |
+
""").df()
|
| 119 |
+
|
| 120 |
+
rr_do_df = con.sql(f"""
|
| 121 |
+
SELECT td.service, td.month,
|
| 122 |
+
SUM(td.trips_do * zd.white_pct) * 1.0 / SUM(td.trips_do) / {bw} AS RR_white_DO,
|
| 123 |
+
SUM(td.trips_do * zd.black_pct) * 1.0 / SUM(td.trips_do) / {bb} AS RR_black_DO
|
| 124 |
+
FROM trip_counts_do AS td
|
| 125 |
+
JOIN zone_demographics AS zd ON td.LocationID = zd.LocationID
|
| 126 |
+
WHERE zd.TotalPop > 0
|
| 127 |
+
GROUP BY td.service, td.month
|
| 128 |
+
""").df()
|
| 129 |
+
|
| 130 |
+
rr_combined: pd.DataFrame = pd.merge(
|
| 131 |
+
rr_pu_df, rr_do_df, on=["service", "month"], how="outer"
|
| 132 |
+
)
|
requirements.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
solara>=1.32.0
|
| 2 |
+
duckdb>=1.0.0
|
| 3 |
+
pandas>=2.0.0
|
| 4 |
+
geopandas>=0.14.0
|
| 5 |
+
leafmap>=0.36.0
|
| 6 |
+
ipywidgets>=8.0.0
|
| 7 |
+
shapely>=2.0.0
|
| 8 |
+
pyproj>=3.6.0
|
| 9 |
+
mapclassify>=2.5.0
|