cmande62 commited on
Commit
d77a6bf
·
1 Parent(s): a3c731b

Initial dashboard deployment

Browse files
Files changed (6) hide show
  1. Dockerfile +35 -0
  2. README.md +26 -4
  3. app.py +86 -0
  4. dashboard_helpers.py +61 -0
  5. data_pipeline.py +146 -0
  6. requirements.txt +0 -0
Dockerfile ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ # System deps for GDAL/geopandas
4
+ RUN apt-get update && apt-get install -y --no-install-recommends \
5
+ libgdal-dev \
6
+ gdal-bin \
7
+ libgeos-dev \
8
+ libproj-dev \
9
+ && rm -rf /var/lib/apt/lists/*
10
+
11
+ WORKDIR /app
12
+
13
+ # Install Python deps
14
+ COPY requirements.txt .
15
+ RUN pip install --no-cache-dir -r requirements.txt
16
+
17
+ # Copy application files
18
+ COPY data_pipeline.py .
19
+ COPY dashboard_helpers.py .
20
+ COPY app.py .
21
+
22
+ # Pre-download databases and bake the data
23
+ RUN python -c "import leafmap; \
24
+ leafmap.download_file( \
25
+ 'https://opengeos.org/data/duckdb/nyc_data.db.zip', \
26
+ unzip=True, \
27
+ overwrite=True \
28
+ )"
29
+ RUN python data_pipeline.py
30
+
31
+ # Expose the port HF Spaces expects
32
+ EXPOSE 7860
33
+
34
+ # Launch Solara
35
+ CMD ["solara", "run", "app.py", "--host=0.0.0.0", "--port=7860"]
README.md CHANGED
@@ -1,11 +1,33 @@
1
  ---
2
- title: HuggingFaceNYCTaxi
3
- emoji: 🏆
4
  colorFrom: blue
5
- colorTo: gray
6
  sdk: docker
7
  pinned: false
8
  license: mit
9
  ---
10
 
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: NYC Taxi Mobility Equity Dashboard
 
3
  colorFrom: blue
4
+ colorTo: green
5
  sdk: docker
6
  pinned: false
7
  license: mit
8
  ---
9
 
10
+ # NYC Taxi Mobility Equity Dashboard
11
+
12
+ An interactive Solara dashboard analyzing whether **FHV** and **Yellow Taxi**
13
+ services are equitably distributed across NYC neighborhoods relative to
14
+ demographic baselines.
15
+
16
+ ## Features
17
+
18
+ - Pickup & drop-off choropleth maps for FHV and Yellow Taxi
19
+ - Diverging demographic maps showing deviation from citywide baseline
20
+ - Top-10 stats tables split by service and direction
21
+ - Month selector to explore Jan-Mar 2025 data
22
+
23
+ ## Data Sources
24
+
25
+ - NYC Taxi Zone geometry: data.source.coop
26
+ - NYC Census Blocks: opengeos.org
27
+ - Trip Records: NYC TLC (Jan-Mar 2025)
28
+
29
+ ## Architecture
30
+
31
+ - `data_pipeline.py`: one-time data processing, writes to `processed_dashboard.db`
32
+ - `dashboard_helpers.py`: reusable map and stats functions
33
+ - `app.py`: Solara UI components and layout
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import solara
2
+ import duckdb
3
+ from dashboard_helpers import build_trip_map, build_demo_map, build_stats
4
+
5
+ # Connect to pre-baked database in read-only mode
6
+ con = duckdb.connect('processed_dashboard.db', read_only=True)
7
+ con.install_extension('spatial')
8
+ con.load_extension('spatial')
9
+
10
+ # Read baselines
11
+ baseline_df = con.sql("SELECT * FROM city_baselines").df()
12
+ baseline_white = float(baseline_df['baseline_white_pct'].iloc[0]) / 100.0
13
+ baseline_black = float(baseline_df['baseline_black_pct'].iloc[0]) / 100.0
14
+
15
+ # Reactive state
16
+ selected_month = solara.reactive('Feb2025')
17
+
18
+
19
+ @solara.component
20
+ def TaxiMap(service, metric, cmap, title):
21
+ month = selected_month.value
22
+ m = build_trip_map(con, service, month, metric, cmap, title)
23
+ with solara.Card(title=f'{title} ({month})'):
24
+ solara.display(m)
25
+
26
+
27
+ @solara.component
28
+ def DemographicMap(column, baseline_val, title):
29
+ m = build_demo_map(con, column, baseline_val, title)
30
+ with solara.Card(title=title):
31
+ solara.display(m)
32
+
33
+
34
+ @solara.component
35
+ def ServiceStatsCard(service, cmap_pu, cmap_do):
36
+ month = selected_month.value
37
+ pu_head, pu_table = build_stats(con, service, month, 'pu', cmap_pu, 'Pickups')
38
+ do_head, do_table = build_stats(con, service, month, 'do', cmap_do, 'Drop-offs')
39
+ with solara.Card():
40
+ with solara.Row():
41
+ with solara.Column():
42
+ solara.Markdown(pu_head, style={'text-align': 'center'})
43
+ solara.display(pu_table)
44
+ with solara.Column():
45
+ solara.Markdown(do_head, style={'text-align': 'center'})
46
+ solara.display(do_table)
47
+
48
+
49
+ @solara.component
50
+ def Page():
51
+ solara.Title('NYC Taxi Mobility Equity Dashboard')
52
+
53
+ with solara.Column():
54
+ with solara.Column(align='center'):
55
+ solara.Markdown('# NYC Taxi Mobility Equity Dashboard',
56
+ style={'text-align': 'center'})
57
+ solara.Markdown(
58
+ '<div style="text-align:center;">This interactive dashboard analyzes whether '
59
+ '<b>FHV</b> and <b>Yellow Taxi</b> services are over- or under-represented '
60
+ 'in areas with different demographic compositions relative to the NYC baseline.</div>'
61
+ )
62
+ solara.Select(
63
+ label='Select Month',
64
+ value=selected_month,
65
+ values=['Jan2025', 'Feb2025', 'Mar2025']
66
+ )
67
+
68
+ # Trip maps grid
69
+ with solara.GridFixed(columns=2):
70
+ TaxiMap('FHV', 'pu', 'Blues', 'FHV Pickups')
71
+ TaxiMap('Yellow', 'pu', 'Blues', 'Yellow Pickups')
72
+ TaxiMap('FHV', 'do', 'Greens', 'FHV Drop-offs')
73
+ TaxiMap('Yellow', 'do', 'Greens', 'Yellow Drop-offs')
74
+
75
+ # Demographic baseline maps
76
+ with solara.GridFixed(columns=2):
77
+ DemographicMap('white_pct', baseline_white * 100,
78
+ f'White Pop. Deviation from Baseline ({baseline_white*100:.1f}%)')
79
+ DemographicMap('black_pct', baseline_black * 100,
80
+ f'Black Pop. Deviation from Baseline ({baseline_black*100:.1f}%)')
81
+
82
+ # Stats tables
83
+ with solara.Column(align='center'):
84
+ with solara.GridFixed(columns=2):
85
+ ServiceStatsCard('FHV', 'Blues', 'Greens')
86
+ ServiceStatsCard('Yellow', 'YlOrBr', 'YlOrBr')
dashboard_helpers.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import geopandas as gpd
2
+ import leafmap
3
+
4
+ def build_trip_map(con, service, month, metric, cmap, legend_prefix):
5
+ if metric == 'pu':
6
+ table, col = 'trip_counts_pu', 'trips_pu'
7
+ else:
8
+ table, col = 'trip_counts_do', 'trips_do'
9
+ df = con.sql(f"""
10
+ SELECT
11
+ tz.zone,
12
+ t.{col} AS trips,
13
+ ST_AsText(ST_Transform(tz.geometry, 'EPSG:26918', 'OGC:CRS84')) AS geometry
14
+ FROM {table} AS t
15
+ JOIN taxi_zones_utm AS tz ON t.LocationID = tz.LocationID
16
+ WHERE t.service = '{service}' AND t.month = '{month}'
17
+ """).df()
18
+ gdf = gpd.GeoDataFrame(df, geometry=gpd.GeoSeries.from_wkt(df['geometry']), crs='EPSG:4326')
19
+ m = leafmap.Map(center=[40.7, -73.9], zoom=10, draw_control=False)
20
+ m.layout.height = '400px'
21
+ m.add_basemap('CartoDB.DarkMatter')
22
+ m.add_data(gdf, column='trips', cmap=cmap,
23
+ legend_title=f'{legend_prefix} ({month})')
24
+ return m
25
+
26
+
27
+ def build_demo_map(con, column, baseline_val, legend_title):
28
+ df = con.sql(f"""
29
+ SELECT
30
+ zd.TaxiZone AS zone,
31
+ ROUND(zd.{column} - {baseline_val}, 1) AS deviation,
32
+ ST_AsText(ST_Transform(tz.geometry, 'EPSG:26918', 'OGC:CRS84')) AS geometry
33
+ FROM zone_demographics AS zd
34
+ JOIN taxi_zones_utm AS tz ON zd.LocationID = tz.LocationID
35
+ WHERE zd.TotalPop > 0
36
+ """).df()
37
+ gdf = gpd.GeoDataFrame(df, geometry=gpd.GeoSeries.from_wkt(df['geometry']), crs='EPSG:4326')
38
+ m = leafmap.Map(center=[40.7, -73.9], zoom=10, draw_control=False)
39
+ m.layout.height = '400px'
40
+ m.add_basemap('CartoDB.DarkMatter')
41
+ m.add_data(gdf, column='deviation', cmap='RdYlBu', legend_title=legend_title)
42
+ return m
43
+
44
+
45
+ def build_stats(con, service, month, metric, cmap, label):
46
+ if metric == 'pu':
47
+ table, col, alias = 'trip_counts_pu', 'trips_pu', 'Pickups'
48
+ else:
49
+ table, col, alias = 'trip_counts_do', 'trips_do', 'Dropoffs'
50
+ df = con.sql(f"""
51
+ SELECT
52
+ tz.zone AS Neighborhood,
53
+ t.{col} AS {alias}
54
+ FROM {table} t
55
+ JOIN taxi_zones_utm tz ON t.LocationID = tz.LocationID
56
+ WHERE t.service = '{service}' AND t.month = '{month}'
57
+ ORDER BY t.{col} DESC LIMIT 10
58
+ """).df()
59
+ header = f'### {service} Top 10 {label} ({month})'
60
+ styled = df.style.background_gradient(cmap=cmap, subset=[alias]).hide(axis='index')
61
+ return header, styled
data_pipeline.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pathlib
3
+ import duckdb
4
+ import pandas as pd
5
+ import leafmap
6
+ import requests
7
+
8
+ # DuckDB connection & extensions
9
+ con = duckdb.connect('processed_dashboard.db')
10
+ con.install_extension('httpfs')
11
+ con.load_extension('httpfs')
12
+ con.install_extension('spatial')
13
+ con.load_extension('spatial')
14
+
15
+ # Taxi-zone geometry
16
+ TAXI_ZONES_URL = 'https://data.source.coop/cholmes/nyc-taxi-zones/taxi_zones.parquet'
17
+ con.sql(f"CREATE OR REPLACE VIEW taxi_zones AS SELECT * FROM '{TAXI_ZONES_URL}'")
18
+
19
+ # NYC census blocks
20
+ DB_PATH = pathlib.Path('nyc_data.db')
21
+ if not DB_PATH.exists():
22
+ leafmap.download_file(
23
+ 'https://opengeos.org/data/duckdb/nyc_data.db.zip',
24
+ unzip=True,
25
+ overwrite=True,
26
+ )
27
+
28
+ con.execute("ATTACH 'nyc_data.db' AS nyc_data (READ_ONLY)")
29
+
30
+ # Reproject taxi zones to UTM
31
+ con.sql("""
32
+ CREATE OR REPLACE TABLE taxi_zones_utm AS
33
+ SELECT * EXCLUDE (geometry),
34
+ ST_Transform(geometry, 'EPSG:2263', 'EPSG:26918') AS geometry
35
+ FROM taxi_zones
36
+ """)
37
+
38
+ # Zone demographics via spatial join
39
+ con.sql("""
40
+ CREATE OR REPLACE TABLE zone_demographics AS
41
+ SELECT
42
+ tz.LocationID,
43
+ tz.zone AS TaxiZone,
44
+ tz.borough AS Borough,
45
+ SUM(cb.popn_total) AS TotalPop,
46
+ SUM(cb.popn_white) AS WhitePop,
47
+ SUM(cb.popn_black) AS BlackPop,
48
+ 100.0 * SUM(cb.popn_white) / SUM(cb.popn_total) AS white_pct,
49
+ 100.0 * SUM(cb.popn_black) / SUM(cb.popn_total) AS black_pct
50
+ FROM nyc_data.nyc_census_blocks AS cb
51
+ JOIN taxi_zones_utm AS tz ON ST_Intersects(tz.geometry, cb.geom)
52
+ GROUP BY tz.LocationID, tz.zone, tz.borough
53
+ """)
54
+
55
+ # Citywide baselines
56
+ baseline_df = con.sql("""
57
+ SELECT
58
+ ROUND(100.0 * SUM(popn_white) / SUM(popn_total), 2) AS baseline_white_pct,
59
+ ROUND(100.0 * SUM(popn_black) / SUM(popn_total), 2) AS baseline_black_pct
60
+ FROM nyc_data.nyc_census_blocks
61
+ """).df()
62
+
63
+ baseline_white = float(baseline_df['baseline_white_pct'].iloc[0]) / 100.0
64
+ baseline_black = float(baseline_df['baseline_black_pct'].iloc[0]) / 100.0
65
+
66
+ # Save baselines into the db for app.py to read later
67
+ con.sql(f"""
68
+ CREATE OR REPLACE TABLE city_baselines AS
69
+ SELECT
70
+ {baseline_white * 100} AS baseline_white_pct,
71
+ {baseline_black * 100} AS baseline_black_pct
72
+ """)
73
+
74
+ # Trip data
75
+ pu_field = {'FHV': 'PUlocationID', 'Yellow': 'PULocationID'}
76
+ do_field = {'FHV': 'DOlocationID', 'Yellow': 'DOLocationID'}
77
+ trip_urls = {
78
+ 'FHV_Jan2025': 'https://d37ci6vzurychx.cloudfront.net/trip-data/fhv_tripdata_2025-01.parquet',
79
+ 'FHV_Feb2025': 'https://d37ci6vzurychx.cloudfront.net/trip-data/fhv_tripdata_2025-02.parquet',
80
+ 'FHV_Mar2025': 'https://d37ci6vzurychx.cloudfront.net/trip-data/fhv_tripdata_2025-03.parquet',
81
+ 'Yellow_Jan2025': 'https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2025-01.parquet',
82
+ 'Yellow_Feb2025': 'https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2025-02.parquet',
83
+ 'Yellow_Mar2025': 'https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2025-03.parquet',
84
+ }
85
+
86
+ con.execute("CREATE OR REPLACE TABLE trip_counts_pu (service VARCHAR, month VARCHAR, LocationID INTEGER, trips_pu BIGINT)")
87
+ con.execute("CREATE OR REPLACE TABLE trip_counts_do (service VARCHAR, month VARCHAR, LocationID INTEGER, trips_do BIGINT)")
88
+
89
+ DATA_DIR = 'trip_data'
90
+ os.makedirs(DATA_DIR, exist_ok=True)
91
+
92
+ for key, url in trip_urls.items():
93
+ local_path = os.path.join(DATA_DIR, f'{key}.parquet')
94
+ if not os.path.exists(local_path):
95
+ print(f'Downloading {key}...')
96
+ r = requests.get(url, timeout=120)
97
+ r.raise_for_status()
98
+ with open(local_path, 'wb') as f:
99
+ f.write(r.content)
100
+ else:
101
+ print(f'Already exists: {local_path}')
102
+ service, month = key.split('_')
103
+ pu, do = pu_field[service], do_field[service]
104
+ con.sql(f"""
105
+ INSERT INTO trip_counts_pu
106
+ SELECT '{service}', '{month}', CAST({pu} AS INTEGER), COUNT(*)
107
+ FROM '{local_path}'
108
+ WHERE {pu} IS NOT NULL AND CAST({pu} AS INTEGER) NOT IN (0, 264, 265)
109
+ GROUP BY {pu}
110
+ """)
111
+ con.sql(f"""
112
+ INSERT INTO trip_counts_do
113
+ SELECT '{service}', '{month}', CAST({do} AS INTEGER), COUNT(*)
114
+ FROM '{local_path}'
115
+ WHERE {do} IS NOT NULL AND CAST({do} AS INTEGER) NOT IN (0, 264, 265)
116
+ GROUP BY {do}
117
+ """)
118
+
119
+ # Representative ratios
120
+ rr_pu_df = con.sql(f"""
121
+ SELECT
122
+ tp.service, tp.month,
123
+ SUM(tp.trips_pu * zd.white_pct) * 1.0 / SUM(tp.trips_pu) / {baseline_white*100} AS RR_white_PU,
124
+ SUM(tp.trips_pu * zd.black_pct) * 1.0 / SUM(tp.trips_pu) / {baseline_black*100} AS RR_black_PU
125
+ FROM trip_counts_pu AS tp
126
+ JOIN zone_demographics AS zd ON tp.LocationID = zd.LocationID
127
+ WHERE zd.TotalPop > 0
128
+ GROUP BY tp.service, tp.month
129
+ """).df()
130
+
131
+ rr_do_df = con.sql(f"""
132
+ SELECT
133
+ td.service, td.month,
134
+ SUM(td.trips_do * zd.white_pct) * 1.0 / SUM(td.trips_do) / {baseline_white*100} AS RR_white_DO,
135
+ SUM(td.trips_do * zd.black_pct) * 1.0 / SUM(td.trips_do) / {baseline_black*100} AS RR_black_DO
136
+ FROM trip_counts_do AS td
137
+ JOIN zone_demographics AS zd ON td.LocationID = zd.LocationID
138
+ WHERE zd.TotalPop > 0
139
+ GROUP BY td.service, td.month
140
+ """).df()
141
+
142
+ rr_combined = pd.merge(rr_pu_df, rr_do_df, on=['service', 'month'], how='outer')
143
+ con.sql("CREATE OR REPLACE TABLE rr_combined AS SELECT * FROM rr_combined")
144
+
145
+ print('Pipeline complete. processed_dashboard.db is ready.')
146
+ con.close()
requirements.txt ADDED
File without changes