| import pandas as pd |
| import matplotlib.pyplot as plt |
| import seaborn as sns |
| import streamlit as st |
|
|
| |
| |
| |
| st.set_page_config( |
| page_title="MOBA Toxicity Analysis", |
| layout="wide" |
| ) |
|
|
| st.title("Toxicity in MOBA Matches") |
| st.markdown( |
| """ |
| This dashboard explores the relationship between toxic chat messages |
| and match outcomes in MOBA games (Dota 2). |
| """ |
| ) |
| st.warning("This analysis is based on a limited sample of matches. For some game modes or lobby types, results may be missing or not statistically reliable.") |
|
|
| |
| |
| |
| @st.cache_data |
| def load_data(): |
| return pd.read_excel("src/Final_Dataset_Dota.xlsx") |
|
|
| df = load_data() |
|
|
| |
| |
| |
| negative_labels = [ |
| "piece of shit", "retarded", "Retarded", |
| "Bitch", "Clowns", "report", |
| "End Fast", "end fast" |
| ] |
|
|
| df["is_negative"] = df["Message_Clean_Classified"].isin(negative_labels) |
| df["time_min"] = df["time"] / 60 |
| df["outcome"] = df["radiant_win"].map({True: "Win", False: "Lose"}) |
|
|
| |
| |
| |
| with st.sidebar: |
| st.header("Context filters") |
|
|
| game_mode_filter = st.multiselect( |
| "Game mode", |
| sorted(df["game_mode"].dropna().unique()) |
| ) |
|
|
| lobby_type_filter = st.multiselect( |
| "Lobby type", |
| sorted(df["lobby_type"].dropna().unique()) |
| ) |
|
|
| |
| df_f = df.copy() |
|
|
| if game_mode_filter: |
| df_f = df_f[df_f["game_mode"].isin(game_mode_filter)] |
|
|
| if lobby_type_filter: |
| df_f = df_f[df_f["lobby_type"].isin(lobby_type_filter)] |
|
|
| |
| if df_f.empty: |
| st.warning("No data available for the selected filters.") |
| st.stop() |
|
|
| |
| |
| |
| st.subheader("Toxicity vs Match Outcome") |
|
|
| toxicity_per_match = ( |
| df_f.groupby(["match_id", "outcome"])["is_negative"] |
| .sum() |
| .reset_index(name="toxic_messages") |
| ) |
|
|
| fig1, ax1 = plt.subplots(figsize=(6, 4)) |
| sns.boxplot( |
| data=toxicity_per_match, |
| x="outcome", |
| y="toxic_messages", |
| ax=ax1 |
| ) |
| ax1.set_xlabel("Match outcome") |
| ax1.set_ylabel("Number of toxic messages") |
|
|
| st.pyplot(fig1) |
|
|
| |
| |
| |
| st.subheader("Toxicity by Game Phase") |
|
|
| def game_phase(t): |
| if t < 10: |
| return "Early game" |
| elif t < 25: |
| return "Mid game" |
| else: |
| return "Late game" |
|
|
| df_f = df_f.copy() |
| df_f["phase"] = df_f["time_min"].apply(game_phase) |
|
|
| phase_stats = ( |
| df_f[df_f["is_negative"]] |
| .groupby("phase") |
| .size() |
| .reset_index(name="toxic_messages") |
| ) |
|
|
| fig2, ax2 = plt.subplots(figsize=(6, 4)) |
| sns.barplot( |
| data=phase_stats, |
| x="phase", |
| y="toxic_messages", |
| ax=ax2 |
| ) |
| ax2.set_xlabel("Game phase") |
| ax2.set_ylabel("Number of toxic messages") |
|
|
| st.pyplot(fig2) |
|
|
| |
| |
| |
| st.subheader("Toxicity by Region") |
|
|
| region_toxic = ( |
| df_f[df_f["is_negative"]] |
| .groupby("region") |
| .size() |
| .reset_index(name="toxic_messages") |
| ) |
|
|
| region_total = ( |
| df_f.groupby("region") |
| .size() |
| .reset_index(name="total_messages") |
| ) |
|
|
| region_stats = region_toxic.merge(region_total, on="region", how="left") |
| region_stats["toxicity_rate"] = ( |
| region_stats["toxic_messages"] / region_stats["total_messages"] |
| ) |
|
|
| heatmap_data = region_stats.pivot_table( |
| values="toxicity_rate", |
| index="region" |
| ) |
|
|
| fig3, ax3 = plt.subplots(figsize=(4, 6)) |
| sns.heatmap( |
| heatmap_data, |
| cmap="Reds", |
| linewidths=0.5, |
| cbar_kws={"label": "Toxicity rate"}, |
| ax=ax3 |
| ) |
|
|
| ax3.set_xlabel("") |
| ax3.set_ylabel("Region") |
|
|
| st.pyplot(fig3) |
|
|
| |
| |
| |
| st.markdown("---") |