| import streamlit as st |
| import pandas as pd |
| import plotly.express as px |
| import plotly.graph_objects as go |
| from datetime import datetime |
| import os |
|
|
| st.set_page_config( |
| page_title="Urban Traffic Flow Dashboard", |
| page_icon="π", |
| layout="wide", |
| initial_sidebar_state="expanded", |
| ) |
|
|
|
|
| @st.cache_data |
| def load_data(): |
| script_dir = os.path.dirname(os.path.abspath(__file__)) |
| csv_path = os.path.join(script_dir, "urban_traffic_flow_with_target.csv") |
| df = pd.read_csv(csv_path) |
| df["Timestamp"] = pd.to_datetime(df["Timestamp"]) |
| df["Hour"] = df["Timestamp"].dt.hour |
| df["DayOfWeek"] = df["Timestamp"].dt.day_name() |
| df["Date"] = df["Timestamp"].dt.date |
| df["IsWeekend"] = df["DayOfWeek"].isin(["Saturday", "Sunday"]) |
| return df |
|
|
|
|
| def main(): |
| st.title("π Urban Traffic Flow Dashboard") |
| st.markdown( |
| "Explore urban traffic patterns, congestion levels, and temporal trends" |
| ) |
|
|
| df = load_data() |
|
|
| with st.sidebar: |
| st.header("π Filters") |
|
|
| min_date = df["Timestamp"].min().date() |
| max_date = df["Timestamp"].max().date() |
|
|
| date_range = st.date_input( |
| "Date Range", |
| value=(min_date, max_date), |
| min_value=min_date, |
| max_value=max_date, |
| ) |
|
|
| selected_locations = st.multiselect( |
| "Select Locations", |
| options=sorted(df["Location"].unique()), |
| default=sorted(df["Location"].unique()), |
| ) |
|
|
| peak_filter = st.multiselect( |
| "Peak/Off-Peak", |
| options=sorted(df["Peak_Off_Peak"].unique()), |
| default=sorted(df["Peak_Off_Peak"].unique()), |
| ) |
|
|
| day_filter = st.multiselect( |
| "Day of Week", |
| options=sorted(df["DayOfWeek"].unique()), |
| default=sorted(df["DayOfWeek"].unique()), |
| ) |
|
|
| congestion_filter = st.slider( |
| "Min Congestion Level", min_value=0, max_value=5, value=0, step=1 |
| ) |
|
|
| filtered_df = df.copy() |
|
|
| if len(date_range) == 2: |
| start_date, end_date = date_range |
| filtered_df = filtered_df[ |
| (filtered_df["Timestamp"].dt.date >= start_date) |
| & (filtered_df["Timestamp"].dt.date <= end_date) |
| ] |
|
|
| if selected_locations: |
| filtered_df = filtered_df[filtered_df["Location"].isin(selected_locations)] |
|
|
| if peak_filter: |
| filtered_df = filtered_df[filtered_df["Peak_Off_Peak"].isin(peak_filter)] |
|
|
| if day_filter: |
| filtered_df = filtered_df[filtered_df["DayOfWeek"].isin(day_filter)] |
|
|
| filtered_df = filtered_df[filtered_df["Congestion_Level"] >= congestion_filter] |
|
|
| st.subheader("π Key Performance Indicators") |
|
|
| kpi_col1, kpi_col2, kpi_col3, kpi_col4 = st.columns(4) |
|
|
| with kpi_col1: |
| st.metric( |
| "Total Vehicle Count", |
| f"{filtered_df['Vehicle_Count'].sum():,.0f}", |
| help="Total number of vehicles recorded", |
| ) |
|
|
| with kpi_col2: |
| st.metric( |
| "Avg Vehicle Speed", |
| f"{filtered_df['Vehicle_Speed'].mean():.1f} km/h", |
| help="Average speed across all locations", |
| ) |
|
|
| with kpi_col3: |
| st.metric( |
| "Avg Congestion Level", |
| f"{filtered_df['Congestion_Level'].mean():.1f}", |
| help="Average congestion level (0-5 scale)", |
| ) |
|
|
| with kpi_col4: |
| st.metric( |
| "Peak Hours Ratio", |
| f"{(filtered_df['Peak_Off_Peak'] == 'Peak').sum() / len(filtered_df) * 100:.1f}%", |
| help="Percentage of peak hour observations", |
| ) |
|
|
| st.markdown("---") |
|
|
| tab1, tab2, tab3, tab4 = st.tabs( |
| [ |
| "π Temporal Trends", |
| "π Location Analysis", |
| "π Distribution", |
| "π Top Zones", |
| ] |
| ) |
|
|
| with tab1: |
| st.subheader("Hourly Traffic Patterns") |
|
|
| hourly_avg = ( |
| filtered_df.groupby("Hour") |
| .agg( |
| { |
| "Vehicle_Count": "mean", |
| "Vehicle_Speed": "mean", |
| "Congestion_Level": "mean", |
| } |
| ) |
| .reset_index() |
| ) |
|
|
| fig_hourly = go.Figure() |
|
|
| fig_hourly.add_trace( |
| go.Scatter( |
| x=hourly_avg["Hour"], |
| y=hourly_avg["Vehicle_Count"], |
| mode="lines+markers", |
| name="Avg Vehicle Count", |
| line=dict(color="#1f77b4", width=3), |
| yaxis="y", |
| ) |
| ) |
|
|
| fig_hourly.add_trace( |
| go.Scatter( |
| x=hourly_avg["Hour"], |
| y=hourly_avg["Vehicle_Speed"], |
| mode="lines+markers", |
| name="Avg Speed (km/h)", |
| line=dict(color="#2ca02c", width=3), |
| yaxis="y2", |
| ) |
| ) |
|
|
| fig_hourly.update_layout( |
| title="Average Traffic by Hour of Day", |
| xaxis_title="Hour", |
| yaxis_title="Vehicle Count", |
| yaxis2=dict(title="Speed (km/h)", overlaying="y", side="right"), |
| hovermode="x unified", |
| template="plotly_white", |
| height=500, |
| ) |
|
|
| st.plotly_chart(fig_hourly, use_container_width=True) |
|
|
| st.subheader("Traffic Evolution Over Time") |
|
|
| time_series = ( |
| filtered_df.groupby(["Timestamp", "Location"]) |
| .agg({"Vehicle_Count": "sum", "Congestion_Level": "mean"}) |
| .reset_index() |
| ) |
|
|
| fig_ts = px.line( |
| time_series, |
| x="Timestamp", |
| y="Vehicle_Count", |
| color="Location", |
| title="Traffic Volume Over Time by Location", |
| labels={"Vehicle_Count": "Vehicle Count", "Timestamp": "Time"}, |
| ) |
|
|
| fig_ts.update_layout(hovermode="x unified", template="plotly_white", height=500) |
|
|
| st.plotly_chart(fig_ts, use_container_width=True) |
|
|
| with tab2: |
| col1, col2 = st.columns(2) |
|
|
| with col1: |
| st.subheader("Traffic by Location") |
|
|
| location_stats = ( |
| filtered_df.groupby("Location") |
| .agg( |
| { |
| "Vehicle_Count": "sum", |
| "Vehicle_Speed": "mean", |
| "Congestion_Level": "mean", |
| } |
| ) |
| .reset_index() |
| ) |
|
|
| fig_loc = px.bar( |
| location_stats, |
| x="Location", |
| y="Vehicle_Count", |
| title="Total Vehicle Count by Location", |
| color="Vehicle_Count", |
| color_continuous_scale="Blues", |
| labels={"Vehicle_Count": "Total Count"}, |
| ) |
|
|
| fig_loc.update_layout(template="plotly_white", height=400) |
|
|
| st.plotly_chart(fig_loc, use_container_width=True) |
|
|
| with col2: |
| st.subheader("Avg Speed by Location") |
|
|
| fig_speed = px.bar( |
| location_stats, |
| x="Location", |
| y="Vehicle_Speed", |
| title="Average Speed by Location", |
| color="Vehicle_Speed", |
| color_continuous_scale="RdYlGn", |
| labels={"Vehicle_Speed": "Speed (km/h)"}, |
| ) |
|
|
| fig_speed.update_layout(template="plotly_white", height=400) |
|
|
| st.plotly_chart(fig_speed, use_container_width=True) |
|
|
| st.subheader("Congestion Heatmap: Hour vs Location") |
|
|
| heatmap_data = filtered_df.pivot_table( |
| values="Congestion_Level", index="Hour", columns="Location", aggfunc="mean" |
| ) |
|
|
| fig_heatmap = px.imshow( |
| heatmap_data, |
| labels=dict(x="Location", y="Hour", color="Avg Congestion Level"), |
| title="Average Congestion Level by Hour and Location", |
| color_continuous_scale="RdYlGn_r", |
| aspect="auto", |
| ) |
|
|
| fig_heatmap.update_layout(template="plotly_white", height=500) |
|
|
| st.plotly_chart(fig_heatmap, use_container_width=True) |
|
|
| with tab3: |
| col1, col2 = st.columns(2) |
|
|
| with col1: |
| st.subheader("Vehicle Count Distribution") |
|
|
| fig_count_box = px.box( |
| filtered_df, |
| x="Location", |
| y="Vehicle_Count", |
| title="Vehicle Count Distribution by Location", |
| color="Location", |
| ) |
|
|
| fig_count_box.update_layout( |
| template="plotly_white", height=400, showlegend=False |
| ) |
|
|
| st.plotly_chart(fig_count_box, use_container_width=True) |
|
|
| with col2: |
| st.subheader("Speed Distribution") |
|
|
| fig_speed_box = px.box( |
| filtered_df, |
| x="Location", |
| y="Vehicle_Speed", |
| title="Speed Distribution by Location", |
| color="Location", |
| ) |
|
|
| fig_speed_box.update_layout( |
| template="plotly_white", height=400, showlegend=False |
| ) |
|
|
| st.plotly_chart(fig_speed_box, use_container_width=True) |
|
|
| st.subheader("Congestion Level Distribution") |
|
|
| congestion_dist = ( |
| filtered_df["Congestion_Level"].value_counts().sort_index().reset_index() |
| ) |
| congestion_dist.columns = ["Congestion_Level", "Count"] |
|
|
| fig_congestion = px.bar( |
| congestion_dist, |
| x="Congestion_Level", |
| y="Count", |
| title="Distribution of Congestion Levels", |
| color="Congestion_Level", |
| color_continuous_scale="Reds", |
| labels={ |
| "Count": "Number of Records", |
| "Congestion_Level": "Congestion Level", |
| }, |
| ) |
|
|
| fig_congestion.update_layout(template="plotly_white", height=400) |
|
|
| st.plotly_chart(fig_congestion, use_container_width=True) |
|
|
| st.subheader("Congestion by Peak/Off-Peak") |
|
|
| fig_peak = px.box( |
| filtered_df, |
| x="Peak_Off_Peak", |
| y="Congestion_Level", |
| title="Congestion Level: Peak vs Off-Peak", |
| color="Peak_Off_Peak", |
| ) |
|
|
| fig_peak.update_layout(template="plotly_white", height=400, showlegend=False) |
|
|
| st.plotly_chart(fig_peak, use_container_width=True) |
|
|
| with tab4: |
| st.subheader("Most Congested Locations") |
|
|
| location_congestion = ( |
| filtered_df.groupby("Location") |
| .agg( |
| { |
| "Congestion_Level": "mean", |
| "Vehicle_Count": "mean", |
| "Vehicle_Speed": "mean", |
| } |
| ) |
| .round(2) |
| .reset_index() |
| ) |
|
|
| location_congestion = location_congestion.sort_values( |
| "Congestion_Level", ascending=True |
| ) |
|
|
| st.dataframe(location_congestion, use_container_width=True, hide_index=True) |
|
|
| st.subheader("Top 5 Busiest Locations") |
|
|
| top_locations = ( |
| filtered_df.groupby("Location")["Vehicle_Count"] |
| .sum() |
| .sort_values(ascending=False) |
| .head(5) |
| .reset_index() |
| ) |
|
|
| fig_top = px.bar( |
| top_locations, |
| x="Vehicle_Count", |
| y="Location", |
| orientation="h", |
| title="Top 5 Locations by Total Traffic Volume", |
| color="Vehicle_Count", |
| color_continuous_scale="Blues", |
| ) |
|
|
| fig_top.update_layout( |
| template="plotly_white", |
| height=400, |
| yaxis={"categoryorder": "total ascending"}, |
| ) |
|
|
| st.plotly_chart(fig_top, use_container_width=True) |
|
|
| st.subheader("Slowest Locations (Lowest Avg Speed)") |
|
|
| slowest_locations = ( |
| filtered_df.groupby("Location")["Vehicle_Speed"] |
| .mean() |
| .sort_values() |
| .head(5) |
| .reset_index() |
| ) |
|
|
| fig_slow = px.bar( |
| slowest_locations, |
| x="Vehicle_Speed", |
| y="Location", |
| orientation="h", |
| title="Top 5 Slowest Locations", |
| color="Vehicle_Speed", |
| color_continuous_scale="Reds_r", |
| ) |
|
|
| fig_slow.update_layout( |
| template="plotly_white", |
| height=400, |
| yaxis={"categoryorder": "total ascending"}, |
| ) |
|
|
| st.plotly_chart(fig_slow, use_container_width=True) |
|
|
| st.markdown("---") |
| st.subheader("π‘ Automatic Insights") |
|
|
| insights = [] |
|
|
| if len(filtered_df) > 0: |
| peak_hour = filtered_df.groupby("Hour")["Vehicle_Count"].mean().idxmax() |
| insights.append( |
| f"π **Peak traffic hour**: {peak_hour}:00 - {peak_hour + 1}:00 with avg {filtered_df.groupby('Hour')['Vehicle_Count'].mean().max():.0f} vehicles" |
| ) |
|
|
| busiest_loc = filtered_df.groupby("Location")["Vehicle_Count"].sum().idxmax() |
| busiest_count = filtered_df.groupby("Location")["Vehicle_Count"].sum().max() |
| insights.append( |
| f"π **Busiest location**: {busiest_loc} with {busiest_count:,.0f} total vehicles" |
| ) |
|
|
| avg_congestion = filtered_df["Congestion_Level"].mean() |
| if avg_congestion < 2: |
| congestion_status = "Low" |
| elif avg_congestion < 4: |
| congestion_status = "Moderate" |
| else: |
| congestion_status = "High" |
| insights.append( |
| f"π¦ **Overall congestion**: {congestion_status} (avg level: {avg_congestion:.1f}/5)" |
| ) |
|
|
| weekday_avg = filtered_df[~filtered_df["IsWeekend"]]["Vehicle_Count"].mean() |
| weekend_avg = filtered_df[filtered_df["IsWeekend"]]["Vehicle_Count"].mean() |
| diff_pct = ( |
| ((weekday_avg - weekend_avg) / weekend_avg * 100) if weekend_avg > 0 else 0 |
| ) |
| insights.append( |
| f"π
**Weekday vs Weekend**: Weekdays have {abs(diff_pct):.1f}% {'more' if diff_pct > 0 else 'less'} traffic on average" |
| ) |
|
|
| peak_vs_offpeak_peak = filtered_df[filtered_df["Peak_Off_Peak"] == "Peak"][ |
| "Congestion_Level" |
| ].mean() |
| peak_vs_offpeak_off = filtered_df[filtered_df["Peak_Off_Peak"] == "Off-Peak"][ |
| "Congestion_Level" |
| ].mean() |
| insights.append( |
| f"β° **Peak hours**: Congestion is {(peak_vs_offpeak_peak - peak_vs_offpeak_off):.1f} levels higher during peak hours" |
| ) |
|
|
| for insight in insights: |
| st.markdown(f"- {insight}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|