Debayan Mandal commited on
Commit
dd79a40
·
1 Parent(s): c0878af

Initial Dashboard Deployment

Browse files
Files changed (7) hide show
  1. .gitignore +21 -0
  2. Dockerfile +35 -0
  3. README.md +45 -6
  4. app.py +80 -0
  5. dashboard_helpers.py +74 -0
  6. data_pipeline.py +132 -0
  7. requirements.txt +9 -0
.gitignore ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python Cache
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Databases and Downloaded Archives
7
+ *.db
8
+ *.db.wal
9
+ *.zip
10
+
11
+ # Jupyter Notebook Checkpoints
12
+ .ipynb_checkpoints/
13
+
14
+ # Virtual Environments
15
+ venv/
16
+ env/
17
+ .env
18
+
19
+ # OS Generated Files
20
+ .DS_Store
21
+ Thumbs.db
Dockerfile ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ # System deps for GDAL/geopandas
4
+ RUN apt-get update && apt-get install -y --no-install-recommends \
5
+ libgdal-dev \
6
+ gdal-bin \
7
+ libgeos-dev \
8
+ libproj-dev \
9
+ && rm -rf /var/lib/apt/lists/*
10
+
11
+ WORKDIR /app
12
+
13
+ # Install Python deps
14
+ COPY requirements.txt .
15
+ RUN pip install --no-cache-dir -r requirements.txt
16
+
17
+ # Copy application files
18
+ COPY data_pipeline.py .
19
+ COPY dashboard_helpers.py .
20
+ COPY app.ipynb .
21
+
22
+ # Pre-download databases so it's baked into the image
23
+ RUN python -c "import leafmap; \
24
+ leafmap.download_file( \
25
+ 'https://opengeos.org/data/duckdb/nyc_data.db.zip', \
26
+ unzip=True, \
27
+ overwrite=True \
28
+ )"
29
+ RUN python data_pipeline.py
30
+
31
+ # Expose the port HF Spaces expects
32
+ EXPOSE 7860
33
+
34
+ # Launch Solara
35
+ CMD ["solara", "run", "app.py", "--host=0.0.0.0", "--port=7860"]
README.md CHANGED
@@ -1,12 +1,51 @@
1
  ---
2
- title: Nyc Mobility Solara Dashboard
3
- emoji: 🦀
4
  colorFrom: blue
5
- colorTo: indigo
6
  sdk: docker
7
- pinned: false
8
  license: mit
9
- short_description: The NYC Mobility Dashboard powered by Solara
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: NYC Taxi Mobility Equity Dashboard_Solara
 
3
  colorFrom: blue
4
+ colorTo: green
5
  sdk: docker
6
+ pinned: true
7
  license: mit
 
8
  ---
9
 
10
+ # NYC Mobility Equity Dashboard
11
+ _Debayan Mandal_
12
+
13
+ An interactive Solara dashboard analyzing whether **FHV** and
14
+ **Yellow Taxi** services are equitably distributed across NYC neighborhoods
15
+ relative to demographic baselines.
16
+
17
+ ## Features
18
+
19
+ - **Pickup & Drop-off choropleth maps** for FHV and Yellow Taxi by month
20
+ - **Diverging demographic maps** showing deviation from citywide population baselines
21
+ - **Top-10 stats tables** split by service and direction
22
+ - **Month dropdown** to explore Jan–Mar 2025 data
23
+
24
+ ## Data Sources
25
+
26
+ - [NYC TLC Trip Record Data](https://www.nyc.gov/site/tlc/about/tlc-trip-record-data.page) (parquet via CloudFront)
27
+ - [NYC Taxi Zones](https://data.source.coop/cholmes/nyc-taxi-zones/) (Source Cooperative)
28
+ - [NYC Census Blocks](https://opengeos.org/data/duckdb/nyc_data.db.zip) (OpenGeos)
29
+
30
+ ## Architecture
31
+
32
+ | File | Purpose |
33
+ |------|---------|
34
+ | `data_pipeline.py` | DuckDB setup, spatial joins, trip ingestion, relative risk |
35
+ | `dashboard_helpers.py` | Reusable map and stats-table builder functions |
36
+ | `app.py` | Main Solara application and UI component script |
37
+ | `Dockerfile` | Container setup for Hugging Face Spaces deployment |
38
+
39
+ ## Local Development
40
+
41
+ To run this dashboard locally, you must first build the pre-computed DuckDB database, and then launch the Solara server.
42
+
43
+ ```bash
44
+ # 1. Install dependencies
45
+ pip install -r requirements.txt
46
+
47
+ # 2. Run the pipeline to fetch data and generate processed_dashboard.db
48
+ python data_pipeline.py
49
+
50
+ # 3. Launch the hot-reloading Solara development server
51
+ solara run app.py
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import solara
2
+ import duckdb
3
+ from dashboard_helpers import build_trip_map, build_demo_map, build_stats
4
+
5
+ # 1. Connect to our pre-baked database in read-only mode
6
+ con = duckdb.connect('processed_dashboard.db', read_only=True)
7
+ con.install_extension('spatial')
8
+ con.load_extension('spatial')
9
+
10
+ # 2. Grab baselines
11
+ baseline_df = con.sql("SELECT * FROM city_baselines").df()
12
+ baseline_white = float(baseline_df["baseline_white_pct"].iloc[0]) / 100.0
13
+ baseline_black = float(baseline_df["baseline_black_pct"].iloc[0]) / 100.0
14
+
15
+ # 3. Define our Reactive State
16
+ selected_month = solara.reactive("Feb2025")
17
+
18
+ # 4. Build Reusable Components
19
+ @solara.component
20
+ def TaxiMap(service, metric, cmap, title):
21
+ month = selected_month.value
22
+ m = build_trip_map(con, service, month, metric, cmap, title)
23
+
24
+ with solara.Card(title=f"{title} ({month})"):
25
+ solara.display(m)
26
+
27
+ @solara.component
28
+ def ServiceStatsCard(service, cmap_pu, cmap_do):
29
+ month = selected_month.value
30
+ pu_head, pu_table = build_stats(con, service, month, 'pu', cmap_pu, 'Pickups')
31
+ do_head, do_table = build_stats(con, service, month, 'do', cmap_do, 'Drop-offs')
32
+ with solara.Card():
33
+ solara.Markdown(f"### {service} Top 10 Destinations ({month})", style={"text-align": "center"})
34
+ with solara.Row():
35
+ with solara.Column():
36
+ solara.display(pu_head)
37
+ solara.display(pu_table)
38
+ with solara.Column():
39
+ solara.display(do_head)
40
+ solara.display(do_table)
41
+
42
+ @solara.component
43
+ def DemographicMap(column, baseline_val, title):
44
+ m = build_demo_map(con, column, baseline_val, title)
45
+ with solara.Card(title=title):
46
+ solara.display(m)
47
+
48
+ # 5. Build the Main Page Layout
49
+ @solara.component
50
+ def Page():
51
+ solara.Title("NYC Taxi Mobility Equity Dashboard")
52
+
53
+ with solara.Column():
54
+ with solara.Column(align="center"):
55
+ # Header
56
+ solara.Markdown("# NYC Taxi Mobility Equity Dashboard", style={"text-align": "center"})
57
+ solara.Markdown(
58
+ "<div style='text-align: center;'>This interactive dashboard analyzes whether <b>FHV</b> and <b>Yellow Taxi</b> services are over- or under-represented in areas with different demographic compositions relative to the NYC baseline.</div>"
59
+ )
60
+ # UI Control
61
+ solara.Select(label="Select Month", value=selected_month, values=['Jan2025', 'Feb2025', 'Mar2025'])
62
+
63
+ # Grid Layout
64
+ with solara.GridFixed(columns=2):
65
+ # Row 1: Pickups
66
+ TaxiMap('FHV', 'pu', 'Blues', 'FHV Pickups')
67
+ TaxiMap('Yellow', 'pu', 'Blues', 'Yellow Pickups')
68
+
69
+ # Row 2: Drop-offs
70
+ TaxiMap('FHV', 'do', 'Greens', 'FHV Drop-offs')
71
+ TaxiMap('Yellow', 'do', 'Greens', 'Yellow Drop-offs')
72
+
73
+ # Row 3: Demographics
74
+ DemographicMap('white_pct', baseline_white * 100, f'White Pop. Deviation ({baseline_white*100:.1f}%)')
75
+ DemographicMap('black_pct', baseline_black * 100, f'Black Pop. Deviation ({baseline_black*100:.1f}%)')
76
+
77
+ with solara.Column(align="center"):
78
+ with solara.GridFixed(columns=2):
79
+ ServiceStatsCard('FHV', 'Blues', 'YlOrBr')
80
+ ServiceStatsCard('Yellow', 'Greens', 'YlOrBr')
dashboard_helpers.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import geopandas as gpd
2
+ import ipywidgets as widgets
3
+ import leafmap
4
+
5
+ # Trip-volume choropleth
6
+ def build_trip_map(con, service, month, metric, cmap, legend_prefix):
7
+ if metric == "pu":
8
+ table, col = "trip_counts_pu", "trips_pu"
9
+ else:
10
+ table, col = "trip_counts_do", "trips_do"
11
+
12
+ df = con.sql(f"""
13
+ SELECT tz.zone, t.{col} AS trips,
14
+ ST_AsText(ST_Transform(tz.geometry, 'EPSG:26918', 'OGC:CRS84')) AS geometry
15
+ FROM {table} AS t
16
+ JOIN taxi_zones_utm AS tz ON t.LocationID = tz.LocationID
17
+ WHERE t.service = '{service}' AND t.month = '{month}'
18
+ """).df()
19
+
20
+ gdf = gpd.GeoDataFrame(
21
+ df, geometry=gpd.GeoSeries.from_wkt(df["geometry"]), crs="EPSG:4326"
22
+ )
23
+ m = leafmap.Map(center=[40.7, -73.9], zoom=10, draw_control=False)
24
+ m.layout.height = "400px"
25
+ m.add_basemap("CartoDB.DarkMatter")
26
+ m.add_data(
27
+ gdf, column="trips", cmap=cmap,
28
+ legend_title=f"{legend_prefix} ({month})",
29
+ )
30
+ return m
31
+
32
+ # Demographic baseline choropleth
33
+ def build_demo_map(con, column, baseline_val, legend_title):
34
+ df = con.sql(f"""
35
+ SELECT zd.TaxiZone AS zone,
36
+ ROUND(zd.{column} - {baseline_val}, 1) AS deviation,
37
+ ST_AsText(ST_Transform(tz.geometry, 'EPSG:26918', 'OGC:CRS84')) AS geometry
38
+ FROM zone_demographics AS zd
39
+ JOIN taxi_zones_utm AS tz ON zd.LocationID = tz.LocationID
40
+ WHERE zd.TotalPop > 0
41
+ """).df()
42
+
43
+ gdf = gpd.GeoDataFrame(
44
+ df, geometry=gpd.GeoSeries.from_wkt(df["geometry"]), crs="EPSG:4326"
45
+ )
46
+ m = leafmap.Map(center=[40.7, -73.9], zoom=10, draw_control=False)
47
+ m.layout.height = "400px"
48
+ m.add_basemap("CartoDB.DarkMatter")
49
+ m.add_data(
50
+ gdf, column="deviation", cmap="RdYlBu",
51
+ legend_title=legend_title,
52
+ )
53
+ return m
54
+
55
+ # Top-10 stats table
56
+ def build_stats(con, service, month, metric, cmap, label):
57
+ if metric == "pu":
58
+ table, col, alias = "trip_counts_pu", "trips_pu", "Pickups"
59
+ else:
60
+ table, col, alias = "trip_counts_do", "trips_do", "Dropoffs"
61
+
62
+ df = con.sql(f"""
63
+ SELECT tz.zone AS Neighborhood, t.{col} AS {alias}
64
+ FROM {table} t
65
+ JOIN taxi_zones_utm tz ON t.LocationID = tz.LocationID
66
+ WHERE t.service = '{service}' AND t.month = '{month}'
67
+ ORDER BY t.{col} DESC LIMIT 10
68
+ """).df()
69
+
70
+ header = widgets.HTML(
71
+ f"<h3 style='text-align:center;'>{service} Top 10 {label} ({month})</h3>"
72
+ )
73
+ styled = df.style.background_gradient(cmap=cmap, subset=[alias]).hide(axis="index")
74
+ return header, styled
data_pipeline.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pathlib
3
+ import duckdb
4
+ import pandas as pd
5
+ import leafmap
6
+
7
+ # DuckDB connection & extensions
8
+ con = duckdb.connect('processed_dashboard.db')
9
+ con.install_extension("httpfs")
10
+ con.load_extension("httpfs")
11
+ con.install_extension("spatial")
12
+ con.load_extension("spatial")
13
+
14
+ # Taxi-zone geometry
15
+ TAXI_ZONES_URL = (
16
+ "https://data.source.coop/cholmes/nyc-taxi-zones/taxi_zones.parquet"
17
+ )
18
+ con.sql(f"CREATE OR REPLACE VIEW taxi_zones AS SELECT * FROM '{TAXI_ZONES_URL}'")
19
+
20
+ # NYC census blocks
21
+ DB_PATH = pathlib.Path("nyc_data.db")
22
+ if not DB_PATH.exists():
23
+ leafmap.download_file(
24
+ "https://opengeos.org/data/duckdb/nyc_data.db.zip",
25
+ unzip=True,
26
+ overwrite=True,
27
+ )
28
+ con.execute("ATTACH 'nyc_data.db' AS nyc_data (READ_ONLY)")
29
+
30
+
31
+ # Compute demographics
32
+ con.sql("""
33
+ CREATE OR REPLACE TABLE taxi_zones_utm AS
34
+ SELECT * EXCLUDE (geometry),
35
+ ST_GeomFromWKB(ST_AsWKB(ST_Transform(geometry, 'EPSG:2263', 'EPSG:26918'))) AS geometry
36
+ FROM taxi_zones
37
+ """)
38
+
39
+ con.sql("""
40
+ CREATE OR REPLACE TABLE zone_demographics AS
41
+ SELECT
42
+ tz.LocationID,
43
+ tz.zone AS TaxiZone,
44
+ tz.borough AS Borough,
45
+ SUM(cb.popn_total) AS TotalPop,
46
+ SUM(cb.popn_white) AS WhitePop,
47
+ SUM(cb.popn_black) AS BlackPop,
48
+ 100.0 * SUM(cb.popn_white) / SUM(cb.popn_total) AS white_pct,
49
+ 100.0 * SUM(cb.popn_black) / SUM(cb.popn_total) AS black_pct
50
+ FROM nyc_data.nyc_census_blocks AS cb
51
+ JOIN taxi_zones_utm AS tz ON ST_Intersects(tz.geometry, cb.geom)
52
+ GROUP BY tz.LocationID, tz.zone, tz.borough
53
+ """)
54
+
55
+ baseline_df = con.sql("""
56
+ SELECT
57
+ ROUND(100.0 * SUM(popn_white) / SUM(popn_total), 2) AS baseline_white_pct,
58
+ ROUND(100.0 * SUM(popn_black) / SUM(popn_total), 2) AS baseline_black_pct
59
+ FROM nyc_data.nyc_census_blocks
60
+ """).df()
61
+ con.sql("CREATE OR REPLACE TABLE city_baselines AS SELECT * FROM baseline_df")
62
+
63
+ baseline_white: float = float(baseline_df["baseline_white_pct"].iloc[0]) / 100.0
64
+ baseline_black: float = float(baseline_df["baseline_black_pct"].iloc[0]) / 100.0
65
+
66
+ # Trip data ingestion
67
+ _pu_field = {"FHV": "PUlocationID", "Yellow": "PULocationID"}
68
+ _do_field = {"FHV": "DOlocationID", "Yellow": "DOLocationID"}
69
+
70
+ TRIP_URLS = {
71
+ "FHV_Jan2025": "https://d37ci6vzurychx.cloudfront.net/trip-data/fhv_tripdata_2025-01.parquet",
72
+ "FHV_Feb2025": "https://d37ci6vzurychx.cloudfront.net/trip-data/fhv_tripdata_2025-02.parquet",
73
+ "FHV_Mar2025": "https://d37ci6vzurychx.cloudfront.net/trip-data/fhv_tripdata_2025-03.parquet",
74
+ "Yellow_Jan2025": "https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2025-01.parquet",
75
+ "Yellow_Feb2025": "https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2025-02.parquet",
76
+ "Yellow_Mar2025": "https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2025-03.parquet",
77
+ }
78
+
79
+ con.execute(
80
+ "CREATE OR REPLACE TABLE trip_counts_pu "
81
+ "(service VARCHAR, month VARCHAR, LocationID INTEGER, trips_pu BIGINT)"
82
+ )
83
+ con.execute(
84
+ "CREATE OR REPLACE TABLE trip_counts_do "
85
+ "(service VARCHAR, month VARCHAR, LocationID INTEGER, trips_do BIGINT)"
86
+ )
87
+
88
+ for key, url in TRIP_URLS.items():
89
+ service, month = key.split("_")
90
+ pu, do = _pu_field[service], _do_field[service]
91
+ con.sql(
92
+ f"INSERT INTO trip_counts_pu "
93
+ f"SELECT '{service}', '{month}', CAST({pu} AS INTEGER), COUNT(*) "
94
+ f"FROM '{url}' "
95
+ f"WHERE {pu} IS NOT NULL AND CAST({pu} AS INTEGER) NOT IN (0, 264, 265) "
96
+ f"GROUP BY {pu}"
97
+ )
98
+ con.sql(
99
+ f"INSERT INTO trip_counts_do "
100
+ f"SELECT '{service}', '{month}', CAST({do} AS INTEGER), COUNT(*) "
101
+ f"FROM '{url}' "
102
+ f"WHERE {do} IS NOT NULL AND CAST({do} AS INTEGER) NOT IN (0, 264, 265) "
103
+ f"GROUP BY {do}"
104
+ )
105
+
106
+ # Representative ratio summary
107
+ bw = baseline_white * 100
108
+ bb = baseline_black * 100
109
+
110
+ rr_pu_df = con.sql(f"""
111
+ SELECT tp.service, tp.month,
112
+ SUM(tp.trips_pu * zd.white_pct) * 1.0 / SUM(tp.trips_pu) / {bw} AS RR_white_PU,
113
+ SUM(tp.trips_pu * zd.black_pct) * 1.0 / SUM(tp.trips_pu) / {bb} AS RR_black_PU
114
+ FROM trip_counts_pu AS tp
115
+ JOIN zone_demographics AS zd ON tp.LocationID = zd.LocationID
116
+ WHERE zd.TotalPop > 0
117
+ GROUP BY tp.service, tp.month
118
+ """).df()
119
+
120
+ rr_do_df = con.sql(f"""
121
+ SELECT td.service, td.month,
122
+ SUM(td.trips_do * zd.white_pct) * 1.0 / SUM(td.trips_do) / {bw} AS RR_white_DO,
123
+ SUM(td.trips_do * zd.black_pct) * 1.0 / SUM(td.trips_do) / {bb} AS RR_black_DO
124
+ FROM trip_counts_do AS td
125
+ JOIN zone_demographics AS zd ON td.LocationID = zd.LocationID
126
+ WHERE zd.TotalPop > 0
127
+ GROUP BY td.service, td.month
128
+ """).df()
129
+
130
+ rr_combined: pd.DataFrame = pd.merge(
131
+ rr_pu_df, rr_do_df, on=["service", "month"], how="outer"
132
+ )
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ solara>=1.32.0
2
+ duckdb>=1.0.0
3
+ pandas>=2.0.0
4
+ geopandas>=0.14.0
5
+ leafmap>=0.36.0
6
+ ipywidgets>=8.0.0
7
+ shapely>=2.0.0
8
+ pyproj>=3.6.0
9
+ mapclassify>=2.5.0