import streamlit as st import json import pandas as pd import altair as alt # --------------------------- # CONFIG # --------------------------- FILE_PATH = "src/data_final_version.json" st.set_page_config(layout="wide") st.title("Dota Toxic Messages Dashboard") # --------------------------- # LOAD DATA # --------------------------- @st.cache_data def load_data(): with open(FILE_PATH, "r", encoding="utf-8") as f: raw = json.load(f) return pd.DataFrame(raw) df = load_data() # --------------------------- # CLEAN DATA # --------------------------- df["lobby_type"] = df["lobby_type"].fillna("Unknown") df["region"] = df["region"].fillna("Unknown") df["message_classified"] = df["message_classified"].fillna("unknown") # --------------------------- # SIDEBAR FILTERS # --------------------------- st.sidebar.header("Filters") regions = st.sidebar.multiselect( "Region", options=df["region"].unique(), default=df["region"].unique() ) lobbies = st.sidebar.multiselect( "Lobby Type", options=df["lobby_type"].unique(), default=df["lobby_type"].unique() ) df_filtered = df[ (df["region"].isin(regions)) & (df["lobby_type"].isin(lobbies)) ] # --------------------------- # KPI # --------------------------- col1, col2, col3 = st.columns(3) col1.metric("Total Messages", len(df_filtered)) col2.metric("Unique Matches", df_filtered["match_id"].nunique()) col3.metric("Avg Duration", round(df_filtered["duration_game"].mean(), 1)) # --------------------------- # CHART 1 - Toxic categories # --------------------------- st.subheader("Toxic Message Distribution") chart1 = alt.Chart(df_filtered).mark_bar().encode( x=alt.X("count()", title="Count"), y=alt.Y("message_classified", sort="-x", title="Category"), color="message_classified" ) st.altair_chart(chart1, use_container_width=True) # --------------------------- # CHART 2 - By Lobby Type # --------------------------- st.subheader("Toxicity by Lobby Type") chart2 = alt.Chart(df_filtered).mark_bar().encode( x="lobby_type", y="count()", color="lobby_type" ) st.altair_chart(chart2, use_container_width=True) # --------------------------- # CHART 3 - By Region # --------------------------- st.subheader("Toxicity by Region") chart3 = alt.Chart(df_filtered).mark_bar().encode( x=alt.X("region", sort="-y"), y="count()", color="region" ) st.altair_chart(chart3, use_container_width=True) # --------------------------- # RAW DATA (optional) # --------------------------- with st.expander("Show raw data"): st.dataframe(df_filtered)