DT / src /streamlit_app.py
ND18's picture
Update src/streamlit_app.py
31a5d8c verified
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import streamlit as st
# ======================
# PAGE CONFIG
# ======================
st.set_page_config(
page_title="MOBA Toxicity Analysis",
layout="wide"
)
st.title("Toxicity in MOBA Matches")
st.markdown(
"""
This dashboard explores the relationship between toxic chat messages
and match outcomes in MOBA games (Dota 2).
"""
)
st.warning("This analysis is based on a limited sample of matches. For some game modes or lobby types, results may be missing or not statistically reliable.")
# ======================
# LOAD DATA
# ======================
@st.cache_data
def load_data():
return pd.read_excel("src/Final_Dataset_Dota.xlsx")
df = load_data()
# ======================
# TOXICITY DEFINITION
# ======================
negative_labels = [
"piece of shit", "retarded", "Retarded",
"Bitch", "Clowns", "report",
"End Fast", "end fast"
]
df["is_negative"] = df["Message_Clean_Classified"].isin(negative_labels)
df["time_min"] = df["time"] / 60
df["outcome"] = df["radiant_win"].map({True: "Win", False: "Lose"})
# ======================
# FILTERS (SAFE)
# ======================
with st.sidebar:
st.header("Context filters")
game_mode_filter = st.multiselect(
"Game mode",
sorted(df["game_mode"].dropna().unique())
)
lobby_type_filter = st.multiselect(
"Lobby type",
sorted(df["lobby_type"].dropna().unique())
)
# Apply filters only if selection is non-empty
df_f = df.copy()
if game_mode_filter:
df_f = df_f[df_f["game_mode"].isin(game_mode_filter)]
if lobby_type_filter:
df_f = df_f[df_f["lobby_type"].isin(lobby_type_filter)]
# Safety fallback (should never be empty in normal use)
if df_f.empty:
st.warning("No data available for the selected filters.")
st.stop()
# ======================
# GRAPH 1 – TOXICITY vs MATCH OUTCOME
# ======================
st.subheader("Toxicity vs Match Outcome")
toxicity_per_match = (
df_f.groupby(["match_id", "outcome"])["is_negative"]
.sum()
.reset_index(name="toxic_messages")
)
fig1, ax1 = plt.subplots(figsize=(6, 4))
sns.boxplot(
data=toxicity_per_match,
x="outcome",
y="toxic_messages",
ax=ax1
)
ax1.set_xlabel("Match outcome")
ax1.set_ylabel("Number of toxic messages")
st.pyplot(fig1)
# ======================
# GRAPH 2 – TOXICITY BY GAME PHASE
# ======================
st.subheader("Toxicity by Game Phase")
def game_phase(t):
if t < 10:
return "Early game"
elif t < 25:
return "Mid game"
else:
return "Late game"
df_f = df_f.copy()
df_f["phase"] = df_f["time_min"].apply(game_phase)
phase_stats = (
df_f[df_f["is_negative"]]
.groupby("phase")
.size()
.reset_index(name="toxic_messages")
)
fig2, ax2 = plt.subplots(figsize=(6, 4))
sns.barplot(
data=phase_stats,
x="phase",
y="toxic_messages",
ax=ax2
)
ax2.set_xlabel("Game phase")
ax2.set_ylabel("Number of toxic messages")
st.pyplot(fig2)
# ======================
# GRAPH 3 – TOXICITY HEATMAP BY REGION
# ======================
st.subheader("Toxicity by Region")
region_toxic = (
df_f[df_f["is_negative"]]
.groupby("region")
.size()
.reset_index(name="toxic_messages")
)
region_total = (
df_f.groupby("region")
.size()
.reset_index(name="total_messages")
)
region_stats = region_toxic.merge(region_total, on="region", how="left")
region_stats["toxicity_rate"] = (
region_stats["toxic_messages"] / region_stats["total_messages"]
)
heatmap_data = region_stats.pivot_table(
values="toxicity_rate",
index="region"
)
fig3, ax3 = plt.subplots(figsize=(4, 6))
sns.heatmap(
heatmap_data,
cmap="Reds",
linewidths=0.5,
cbar_kws={"label": "Toxicity rate"},
ax=ax3
)
ax3.set_xlabel("")
ax3.set_ylabel("Region")
st.pyplot(fig3)
# ======================
# FOOTER
# ======================
st.markdown("---")