ND18 commited on
Commit
c9b0532
·
verified ·
1 Parent(s): effc62c

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +105 -105
src/streamlit_app.py CHANGED
@@ -1,106 +1,106 @@
1
- import streamlit as st
2
- import json
3
- import pandas as pd
4
- import altair as alt
5
-
6
- # ---------------------------
7
- # CONFIG
8
- # ---------------------------
9
- FILE_PATH = "data_final_version.json"
10
-
11
- st.set_page_config(layout="wide")
12
- st.title("Dota Toxic Messages Dashboard")
13
-
14
- # ---------------------------
15
- # LOAD DATA
16
- # ---------------------------
17
- @st.cache_data
18
- def load_data():
19
- with open(FILE_PATH, "r", encoding="utf-8") as f:
20
- raw = json.load(f)
21
- return pd.DataFrame(raw)
22
-
23
- df = load_data()
24
-
25
- # ---------------------------
26
- # CLEAN DATA
27
- # ---------------------------
28
- df["lobby_type"] = df["lobby_type"].fillna("Unknown")
29
- df["region"] = df["region"].fillna("Unknown")
30
- df["message_classified"] = df["message_classified"].fillna("unknown")
31
-
32
- # ---------------------------
33
- # SIDEBAR FILTERS
34
- # ---------------------------
35
- st.sidebar.header("Filters")
36
-
37
- regions = st.sidebar.multiselect(
38
- "Region",
39
- options=df["region"].unique(),
40
- default=df["region"].unique()
41
- )
42
-
43
- lobbies = st.sidebar.multiselect(
44
- "Lobby Type",
45
- options=df["lobby_type"].unique(),
46
- default=df["lobby_type"].unique()
47
- )
48
-
49
- df_filtered = df[
50
- (df["region"].isin(regions)) &
51
- (df["lobby_type"].isin(lobbies))
52
- ]
53
-
54
- # ---------------------------
55
- # KPI
56
- # ---------------------------
57
- col1, col2, col3 = st.columns(3)
58
-
59
- col1.metric("Total Messages", len(df_filtered))
60
- col2.metric("Unique Matches", df_filtered["match_id"].nunique())
61
- col3.metric("Avg Duration", round(df_filtered["duration_game"].mean(), 1))
62
-
63
- # ---------------------------
64
- # CHART 1 - Toxic categories
65
- # ---------------------------
66
- st.subheader("Toxic Message Distribution")
67
-
68
- chart1 = alt.Chart(df_filtered).mark_bar().encode(
69
- x=alt.X("count()", title="Count"),
70
- y=alt.Y("message_classified", sort="-x", title="Category"),
71
- color="message_classified"
72
- )
73
-
74
- st.altair_chart(chart1, use_container_width=True)
75
-
76
- # ---------------------------
77
- # CHART 2 - By Lobby Type
78
- # ---------------------------
79
- st.subheader("Toxicity by Lobby Type")
80
-
81
- chart2 = alt.Chart(df_filtered).mark_bar().encode(
82
- x="lobby_type",
83
- y="count()",
84
- color="lobby_type"
85
- )
86
-
87
- st.altair_chart(chart2, use_container_width=True)
88
-
89
- # ---------------------------
90
- # CHART 3 - By Region
91
- # ---------------------------
92
- st.subheader("Toxicity by Region")
93
-
94
- chart3 = alt.Chart(df_filtered).mark_bar().encode(
95
- x=alt.X("region", sort="-y"),
96
- y="count()",
97
- color="region"
98
- )
99
-
100
- st.altair_chart(chart3, use_container_width=True)
101
-
102
- # ---------------------------
103
- # RAW DATA (optional)
104
- # ---------------------------
105
- with st.expander("Show raw data"):
106
  st.dataframe(df_filtered)
 
1
+ import streamlit as st
2
+ import json
3
+ import pandas as pd
4
+ import altair as alt
5
+
6
+ # ---------------------------
7
+ # CONFIG
8
+ # ---------------------------
9
+ FILE_PATH = "src/data_final_version.json"
10
+
11
+ st.set_page_config(layout="wide")
12
+ st.title("Dota Toxic Messages Dashboard")
13
+
14
+ # ---------------------------
15
+ # LOAD DATA
16
+ # ---------------------------
17
+ @st.cache_data
18
+ def load_data():
19
+ with open(FILE_PATH, "r", encoding="utf-8") as f:
20
+ raw = json.load(f)
21
+ return pd.DataFrame(raw)
22
+
23
+ df = load_data()
24
+
25
+ # ---------------------------
26
+ # CLEAN DATA
27
+ # ---------------------------
28
+ df["lobby_type"] = df["lobby_type"].fillna("Unknown")
29
+ df["region"] = df["region"].fillna("Unknown")
30
+ df["message_classified"] = df["message_classified"].fillna("unknown")
31
+
32
+ # ---------------------------
33
+ # SIDEBAR FILTERS
34
+ # ---------------------------
35
+ st.sidebar.header("Filters")
36
+
37
+ regions = st.sidebar.multiselect(
38
+ "Region",
39
+ options=df["region"].unique(),
40
+ default=df["region"].unique()
41
+ )
42
+
43
+ lobbies = st.sidebar.multiselect(
44
+ "Lobby Type",
45
+ options=df["lobby_type"].unique(),
46
+ default=df["lobby_type"].unique()
47
+ )
48
+
49
+ df_filtered = df[
50
+ (df["region"].isin(regions)) &
51
+ (df["lobby_type"].isin(lobbies))
52
+ ]
53
+
54
+ # ---------------------------
55
+ # KPI
56
+ # ---------------------------
57
+ col1, col2, col3 = st.columns(3)
58
+
59
+ col1.metric("Total Messages", len(df_filtered))
60
+ col2.metric("Unique Matches", df_filtered["match_id"].nunique())
61
+ col3.metric("Avg Duration", round(df_filtered["duration_game"].mean(), 1))
62
+
63
+ # ---------------------------
64
+ # CHART 1 - Toxic categories
65
+ # ---------------------------
66
+ st.subheader("Toxic Message Distribution")
67
+
68
+ chart1 = alt.Chart(df_filtered).mark_bar().encode(
69
+ x=alt.X("count()", title="Count"),
70
+ y=alt.Y("message_classified", sort="-x", title="Category"),
71
+ color="message_classified"
72
+ )
73
+
74
+ st.altair_chart(chart1, use_container_width=True)
75
+
76
+ # ---------------------------
77
+ # CHART 2 - By Lobby Type
78
+ # ---------------------------
79
+ st.subheader("Toxicity by Lobby Type")
80
+
81
+ chart2 = alt.Chart(df_filtered).mark_bar().encode(
82
+ x="lobby_type",
83
+ y="count()",
84
+ color="lobby_type"
85
+ )
86
+
87
+ st.altair_chart(chart2, use_container_width=True)
88
+
89
+ # ---------------------------
90
+ # CHART 3 - By Region
91
+ # ---------------------------
92
+ st.subheader("Toxicity by Region")
93
+
94
+ chart3 = alt.Chart(df_filtered).mark_bar().encode(
95
+ x=alt.X("region", sort="-y"),
96
+ y="count()",
97
+ color="region"
98
+ )
99
+
100
+ st.altair_chart(chart3, use_container_width=True)
101
+
102
+ # ---------------------------
103
+ # RAW DATA (optional)
104
+ # ---------------------------
105
+ with st.expander("Show raw data"):
106
  st.dataframe(df_filtered)