Spaces:

ND18
/

DT

Running

App Files Files Community

DT / src /streamlit_app.py

ND18

Update src/streamlit_app.py

31a5d8c verified about 2 months ago

raw

history blame contribute delete

3.97 kB

	import pandas as pd
	import matplotlib.pyplot as plt
	import seaborn as sns
	import streamlit as st

	# ======================
	# PAGE CONFIG
	# ======================
	st.set_page_config(
	page_title="MOBA Toxicity Analysis",
	layout="wide"
	)

	st.title("Toxicity in MOBA Matches")
	st.markdown(
	"""
	This dashboard explores the relationship between toxic chat messages
	and match outcomes in MOBA games (Dota 2).
	"""
	)
	st.warning("This analysis is based on a limited sample of matches. For some game modes or lobby types, results may be missing or not statistically reliable.")

	# ======================
	# LOAD DATA
	# ======================
	@st.cache_data
	def load_data():
	return pd.read_excel("src/Final_Dataset_Dota.xlsx")

	df = load_data()

	# ======================
	# TOXICITY DEFINITION
	# ======================
	negative_labels = [
	"piece of shit", "retarded", "Retarded",
	"Bitch", "Clowns", "report",
	"End Fast", "end fast"
	]

	df["is_negative"] = df["Message_Clean_Classified"].isin(negative_labels)
	df["time_min"] = df["time"] / 60
	df["outcome"] = df["radiant_win"].map({True: "Win", False: "Lose"})

	# ======================
	# FILTERS (SAFE)
	# ======================
	with st.sidebar:
	st.header("Context filters")

	game_mode_filter = st.multiselect(
	"Game mode",
	sorted(df["game_mode"].dropna().unique())
	)

	lobby_type_filter = st.multiselect(
	"Lobby type",
	sorted(df["lobby_type"].dropna().unique())
	)

	# Apply filters only if selection is non-empty
	df_f = df.copy()

	if game_mode_filter:
	df_f = df_f[df_f["game_mode"].isin(game_mode_filter)]

	if lobby_type_filter:
	df_f = df_f[df_f["lobby_type"].isin(lobby_type_filter)]

	# Safety fallback (should never be empty in normal use)
	if df_f.empty:
	st.warning("No data available for the selected filters.")
	st.stop()

	# ======================
	# GRAPH 1 – TOXICITY vs MATCH OUTCOME
	# ======================
	st.subheader("Toxicity vs Match Outcome")

	toxicity_per_match = (
	df_f.groupby(["match_id", "outcome"])["is_negative"]
	.sum()
	.reset_index(name="toxic_messages")
	)

	fig1, ax1 = plt.subplots(figsize=(6, 4))
	sns.boxplot(
	data=toxicity_per_match,
	x="outcome",
	y="toxic_messages",
	ax=ax1
	)
	ax1.set_xlabel("Match outcome")
	ax1.set_ylabel("Number of toxic messages")

	st.pyplot(fig1)

	# ======================
	# GRAPH 2 – TOXICITY BY GAME PHASE
	# ======================
	st.subheader("Toxicity by Game Phase")

	def game_phase(t):
	if t < 10:
	return "Early game"
	elif t < 25:
	return "Mid game"
	else:
	return "Late game"

	df_f = df_f.copy()
	df_f["phase"] = df_f["time_min"].apply(game_phase)

	phase_stats = (
	df_f[df_f["is_negative"]]
	.groupby("phase")
	.size()
	.reset_index(name="toxic_messages")
	)

	fig2, ax2 = plt.subplots(figsize=(6, 4))
	sns.barplot(
	data=phase_stats,
	x="phase",
	y="toxic_messages",
	ax=ax2
	)
	ax2.set_xlabel("Game phase")
	ax2.set_ylabel("Number of toxic messages")

	st.pyplot(fig2)

	# ======================
	# GRAPH 3 – TOXICITY HEATMAP BY REGION
	# ======================
	st.subheader("Toxicity by Region")

	region_toxic = (
	df_f[df_f["is_negative"]]
	.groupby("region")
	.size()
	.reset_index(name="toxic_messages")
	)

	region_total = (
	df_f.groupby("region")
	.size()
	.reset_index(name="total_messages")
	)

	region_stats = region_toxic.merge(region_total, on="region", how="left")
	region_stats["toxicity_rate"] = (
	region_stats["toxic_messages"] / region_stats["total_messages"]
	)

	heatmap_data = region_stats.pivot_table(
	values="toxicity_rate",
	index="region"
	)

	fig3, ax3 = plt.subplots(figsize=(4, 6))
	sns.heatmap(
	heatmap_data,
	cmap="Reds",
	linewidths=0.5,
	cbar_kws={"label": "Toxicity rate"},
	ax=ax3
	)

	ax3.set_xlabel("")
	ax3.set_ylabel("Region")

	st.pyplot(fig3)

	# ======================
	# FOOTER
	# ======================
	st.markdown("---")