Intention commited on
Commit
fdf8278
·
0 Parent(s):
Files changed (6) hide show
  1. .gitignore +129 -0
  2. .streamlit/config.toml +8 -0
  3. README.md +2 -0
  4. app.py +149 -0
  5. requirements.txt +8 -0
  6. sample_chat.json +62 -0
.gitignore ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ .python-version
86
+
87
+ # pipenv
88
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
90
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
91
+ # install all needed dependencies.
92
+ #Pipfile.lock
93
+
94
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95
+ __pypackages__/
96
+
97
+ # Celery stuff
98
+ celerybeat-schedule
99
+ celerybeat.pid
100
+
101
+ # SageMath parsed files
102
+ *.sage.py
103
+
104
+ # Environments
105
+ .env
106
+ .venv
107
+ env/
108
+ venv/
109
+ ENV/
110
+ env.bak/
111
+ venv.bak/
112
+
113
+ # Spyder project settings
114
+ .spyderproject
115
+ .spyproject
116
+
117
+ # Rope project settings
118
+ .ropeproject
119
+
120
+ # mkdocs documentation
121
+ /site
122
+
123
+ # mypy
124
+ .mypy_cache/
125
+ .dmypy.json
126
+ dmypy.json
127
+
128
+ # Pyre type checker
129
+ .pyre/
.streamlit/config.toml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ [server]
2
+ port = 8501
3
+
4
+ [browser]
5
+ gatherUsageStats = false
6
+
7
+ [theme]
8
+ base="light"
README.md ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Intention Study
2
+
app.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import json
4
+ import scrubadub
5
+ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
6
+ from pymongo.mongo_client import MongoClient
7
+ from pymongo.server_api import ServerApi
8
+ from datetime import datetime
9
+ from uuid import uuid4
10
+
11
+ # -----------------------------
12
+ # Consent Setup
13
+ # -----------------------------
14
+ st.set_page_config(page_title="ChatGPT Log Analyzer", page_icon="🤖")
15
+
16
+ if "consent" not in st.session_state:
17
+ st.session_state.consent = ""
18
+
19
+ placeholder = st.empty()
20
+ with placeholder.container():
21
+ with st.expander("Consent", expanded=True):
22
+ st.markdown("##### Take Part in Our Study")
23
+ st.markdown("""
24
+ Please consider participating in our research study on ChatGPT interactions.
25
+ In this study, you will be asked to upload ChatGPT logs. These will be analyzed for sentiment, redacted to remove personal information, and stored in a research database.
26
+
27
+ **You must be 18 years or older to participate.**
28
+ You can still use the app without sharing your data by clicking **'No, I do not consent'**.
29
+ """)
30
+
31
+ st.radio(
32
+ "**Do you consent to participating in this study and sharing anonymized information?**",
33
+ ["", "Yes, I consent", "No, I do not consent"],
34
+ key="consent", horizontal=True
35
+ )
36
+
37
+ if st.session_state.consent == "Yes, I consent":
38
+ placeholder.empty()
39
+ if "id" not in st.session_state:
40
+ st.session_state.id = datetime.now().strftime('%Y%m-%d%H-%M-') + str(uuid4())
41
+ st.success("✅ You consented to participate.")
42
+ st.info(f"Your anonymized ID is: **{st.session_state.id}**. Keep this if you want your data deleted later.")
43
+
44
+ elif st.session_state.consent == "No, I do not consent":
45
+ placeholder.empty()
46
+ st.warning("⚠️ You did not consent. You can still use the app, but your logs will not be stored.")
47
+
48
+ # -----------------------------
49
+ # Parser Function
50
+ # -----------------------------
51
+ def parse_chatgpt_export(data):
52
+ rows = []
53
+ conversations = data.get("conversations", [])
54
+ for conv in conversations:
55
+ conv_id = conv.get("id")
56
+ title = conv.get("title")
57
+ mapping = conv.get("mapping", {})
58
+
59
+ for msg_id, msg in mapping.items():
60
+ author = msg.get("author", {})
61
+ role = author.get("role", "unknown")
62
+ content = msg.get("content", {})
63
+ parts = content.get("parts", [])
64
+ text = "\n".join(parts) if parts else ""
65
+
66
+ rows.append({
67
+ "conversation_id": conv_id,
68
+ "title": title,
69
+ "message_id": msg_id,
70
+ "role": role,
71
+ "content": text,
72
+ "create_time": msg.get("create_time")
73
+ })
74
+ return pd.DataFrame(rows)
75
+
76
+ # -----------------------------
77
+ # File Upload
78
+ # -----------------------------
79
+ uploaded_file = st.file_uploader("Upload ChatGPT export (.json)", type=["json"])
80
+
81
+ if uploaded_file:
82
+ data = json.load(uploaded_file)
83
+ if isinstance(data, dict) and "conversations" in data:
84
+ df = parse_chatgpt_export(data)
85
+ convo_titles = df["title"].unique()
86
+ else:
87
+ st.error("Unsupported JSON structure")
88
+ st.stop()
89
+
90
+ # -----------------------------
91
+ # Session State for Pagination
92
+ # -----------------------------
93
+ if "convo_index" not in st.session_state:
94
+ st.session_state.convo_index = 0
95
+
96
+ # -----------------------------
97
+ # Dropdown Selection
98
+ # -----------------------------
99
+ selected_title = st.selectbox("Select Conversation", options=convo_titles, index=st.session_state.convo_index)
100
+ st.session_state.convo_index = list(convo_titles).index(selected_title)
101
+
102
+ # -----------------------------
103
+ # Pagination Buttons
104
+ # -----------------------------
105
+ col1, col2, col3 = st.columns([1,2,1])
106
+ with col1:
107
+ if st.button("⬅ Previous") and st.session_state.convo_index > 0:
108
+ st.session_state.convo_index -= 1
109
+ selected_title = convo_titles[st.session_state.convo_index]
110
+ with col3:
111
+ if st.button("Next ➡") and st.session_state.convo_index < len(convo_titles)-1:
112
+ st.session_state.convo_index += 1
113
+ selected_title = convo_titles[st.session_state.convo_index]
114
+
115
+ # -----------------------------
116
+ # Filter Selected Conversation
117
+ # -----------------------------
118
+ convo_df = df[df["title"] == selected_title].copy()
119
+
120
+ # -----------------------------
121
+ # Scrub + Sentiment
122
+ # -----------------------------
123
+ cleaner = scrubadub.Scrubber()
124
+ convo_df["redacted"] = convo_df["content"].apply(lambda x: cleaner.clean(str(x)))
125
+
126
+ analyzer = SentimentIntensityAnalyzer()
127
+ convo_df["sentiment"] = convo_df["content"].apply(lambda x: analyzer.polarity_scores(str(x))["compound"])
128
+
129
+ # -----------------------------
130
+ # Show Conversation
131
+ # -----------------------------
132
+ st.subheader(f"💬 Conversation ({st.session_state.convo_index+1}/{len(convo_titles)}): {selected_title}")
133
+ st.dataframe(convo_df[["role", "redacted", "sentiment", "create_time"]])
134
+
135
+ # -----------------------------
136
+ # Optional: Save to MongoDB
137
+ # -----------------------------
138
+ if st.button("📥 Save Conversation to Database"):
139
+ with MongoClient(st.secrets["mongo"], server_api=ServerApi('1')) as client:
140
+ db = client.bridge
141
+ collection = db.app
142
+ record = {
143
+ "conversation_id": convo_df["conversation_id"].iloc[0],
144
+ "title": selected_title,
145
+ "inserted_at": datetime.utcnow(),
146
+ "messages": convo_df.to_dict(orient="records")
147
+ }
148
+ collection.insert_one(record)
149
+ st.success(f"✅ Conversation '{selected_title}' saved to MongoDB.")
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit==1.23.0
2
+ pymongo[srv]==3.12
3
+ datetime
4
+ uuid
5
+ numpy
6
+ scrubadub
7
+ textblob
8
+ pandas
sample_chat.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "conversations": [
3
+ {
4
+ "id": "conv_123456",
5
+ "title": "Trip Planning",
6
+ "create_time": "2023-08-01T12:34:56.000Z",
7
+ "mapping": {
8
+ "msg_1": {
9
+ "id": "msg_1",
10
+ "author": { "role": "user" },
11
+ "create_time": "2023-08-01T12:34:56.000Z",
12
+ "content": {
13
+ "content_type": "text",
14
+ "parts": [
15
+ "Hi, my name is John Doe and my email is john.doe@example.com. Can you help me plan a trip?"
16
+ ]
17
+ }
18
+ },
19
+ "msg_2": {
20
+ "id": "msg_2",
21
+ "author": { "role": "assistant" },
22
+ "create_time": "2023-08-01T12:35:10.000Z",
23
+ "content": {
24
+ "content_type": "text",
25
+ "parts": [
26
+ "Of course! Where would you like to travel?"
27
+ ]
28
+ }
29
+ }
30
+ }
31
+ },
32
+ {
33
+ "id": "conv_654321",
34
+ "title": "Job Advice",
35
+ "create_time": "2023-09-01T15:00:00.000Z",
36
+ "mapping": {
37
+ "msg_1": {
38
+ "id": "msg_1",
39
+ "author": { "role": "user" },
40
+ "create_time": "2023-09-01T15:00:01.000Z",
41
+ "content": {
42
+ "content_type": "text",
43
+ "parts": [
44
+ "Hi, I’m Jane Smith. I’m applying for jobs and my phone is (555) 222-9999. Can you review my resume?"
45
+ ]
46
+ }
47
+ },
48
+ "msg_2": {
49
+ "id": "msg_2",
50
+ "author": { "role": "assistant" },
51
+ "create_time": "2023-09-01T15:00:20.000Z",
52
+ "content": {
53
+ "content_type": "text",
54
+ "parts": [
55
+ "Sure! Please paste your resume text here and I’ll give feedback."
56
+ ]
57
+ }
58
+ }
59
+ }
60
+ }
61
+ ]
62
+ }