FP / src /streamlit_app.py
ND18's picture
Update src/streamlit_app.py
c9b0532 verified
import streamlit as st
import json
import pandas as pd
import altair as alt
# ---------------------------
# CONFIG
# ---------------------------
FILE_PATH = "src/data_final_version.json"
st.set_page_config(layout="wide")
st.title("Dota Toxic Messages Dashboard")
# ---------------------------
# LOAD DATA
# ---------------------------
@st.cache_data
def load_data():
with open(FILE_PATH, "r", encoding="utf-8") as f:
raw = json.load(f)
return pd.DataFrame(raw)
df = load_data()
# ---------------------------
# CLEAN DATA
# ---------------------------
df["lobby_type"] = df["lobby_type"].fillna("Unknown")
df["region"] = df["region"].fillna("Unknown")
df["message_classified"] = df["message_classified"].fillna("unknown")
# ---------------------------
# SIDEBAR FILTERS
# ---------------------------
st.sidebar.header("Filters")
regions = st.sidebar.multiselect(
"Region",
options=df["region"].unique(),
default=df["region"].unique()
)
lobbies = st.sidebar.multiselect(
"Lobby Type",
options=df["lobby_type"].unique(),
default=df["lobby_type"].unique()
)
df_filtered = df[
(df["region"].isin(regions)) &
(df["lobby_type"].isin(lobbies))
]
# ---------------------------
# KPI
# ---------------------------
col1, col2, col3 = st.columns(3)
col1.metric("Total Messages", len(df_filtered))
col2.metric("Unique Matches", df_filtered["match_id"].nunique())
col3.metric("Avg Duration", round(df_filtered["duration_game"].mean(), 1))
# ---------------------------
# CHART 1 - Toxic categories
# ---------------------------
st.subheader("Toxic Message Distribution")
chart1 = alt.Chart(df_filtered).mark_bar().encode(
x=alt.X("count()", title="Count"),
y=alt.Y("message_classified", sort="-x", title="Category"),
color="message_classified"
)
st.altair_chart(chart1, use_container_width=True)
# ---------------------------
# CHART 2 - By Lobby Type
# ---------------------------
st.subheader("Toxicity by Lobby Type")
chart2 = alt.Chart(df_filtered).mark_bar().encode(
x="lobby_type",
y="count()",
color="lobby_type"
)
st.altair_chart(chart2, use_container_width=True)
# ---------------------------
# CHART 3 - By Region
# ---------------------------
st.subheader("Toxicity by Region")
chart3 = alt.Chart(df_filtered).mark_bar().encode(
x=alt.X("region", sort="-y"),
y="count()",
color="region"
)
st.altair_chart(chart3, use_container_width=True)
# ---------------------------
# RAW DATA (optional)
# ---------------------------
with st.expander("Show raw data"):
st.dataframe(df_filtered)