import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import streamlit as st # ====================== # PAGE CONFIG # ====================== st.set_page_config( page_title="MOBA Toxicity Analysis", layout="wide" ) st.title("Toxicity in MOBA Matches") st.markdown( """ This dashboard explores the relationship between toxic chat messages and match outcomes in MOBA games (Dota 2). """ ) st.warning("This analysis is based on a limited sample of matches. For some game modes or lobby types, results may be missing or not statistically reliable.") # ====================== # LOAD DATA # ====================== @st.cache_data def load_data(): return pd.read_excel("src/Final_Dataset_Dota.xlsx") df = load_data() # ====================== # TOXICITY DEFINITION # ====================== negative_labels = [ "piece of shit", "retarded", "Retarded", "Bitch", "Clowns", "report", "End Fast", "end fast" ] df["is_negative"] = df["Message_Clean_Classified"].isin(negative_labels) df["time_min"] = df["time"] / 60 df["outcome"] = df["radiant_win"].map({True: "Win", False: "Lose"}) # ====================== # FILTERS (SAFE) # ====================== with st.sidebar: st.header("Context filters") game_mode_filter = st.multiselect( "Game mode", sorted(df["game_mode"].dropna().unique()) ) lobby_type_filter = st.multiselect( "Lobby type", sorted(df["lobby_type"].dropna().unique()) ) # Apply filters only if selection is non-empty df_f = df.copy() if game_mode_filter: df_f = df_f[df_f["game_mode"].isin(game_mode_filter)] if lobby_type_filter: df_f = df_f[df_f["lobby_type"].isin(lobby_type_filter)] # Safety fallback (should never be empty in normal use) if df_f.empty: st.warning("No data available for the selected filters.") st.stop() # ====================== # GRAPH 1 – TOXICITY vs MATCH OUTCOME # ====================== st.subheader("Toxicity vs Match Outcome") toxicity_per_match = ( df_f.groupby(["match_id", "outcome"])["is_negative"] .sum() .reset_index(name="toxic_messages") ) fig1, ax1 = plt.subplots(figsize=(6, 4)) sns.boxplot( data=toxicity_per_match, x="outcome", y="toxic_messages", ax=ax1 ) ax1.set_xlabel("Match outcome") ax1.set_ylabel("Number of toxic messages") st.pyplot(fig1) # ====================== # GRAPH 2 – TOXICITY BY GAME PHASE # ====================== st.subheader("Toxicity by Game Phase") def game_phase(t): if t < 10: return "Early game" elif t < 25: return "Mid game" else: return "Late game" df_f = df_f.copy() df_f["phase"] = df_f["time_min"].apply(game_phase) phase_stats = ( df_f[df_f["is_negative"]] .groupby("phase") .size() .reset_index(name="toxic_messages") ) fig2, ax2 = plt.subplots(figsize=(6, 4)) sns.barplot( data=phase_stats, x="phase", y="toxic_messages", ax=ax2 ) ax2.set_xlabel("Game phase") ax2.set_ylabel("Number of toxic messages") st.pyplot(fig2) # ====================== # GRAPH 3 – TOXICITY HEATMAP BY REGION # ====================== st.subheader("Toxicity by Region") region_toxic = ( df_f[df_f["is_negative"]] .groupby("region") .size() .reset_index(name="toxic_messages") ) region_total = ( df_f.groupby("region") .size() .reset_index(name="total_messages") ) region_stats = region_toxic.merge(region_total, on="region", how="left") region_stats["toxicity_rate"] = ( region_stats["toxic_messages"] / region_stats["total_messages"] ) heatmap_data = region_stats.pivot_table( values="toxicity_rate", index="region" ) fig3, ax3 = plt.subplots(figsize=(4, 6)) sns.heatmap( heatmap_data, cmap="Reds", linewidths=0.5, cbar_kws={"label": "Toxicity rate"}, ax=ax3 ) ax3.set_xlabel("") ax3.set_ylabel("Region") st.pyplot(fig3) # ====================== # FOOTER # ====================== st.markdown("---")