saim1309 commited on
Commit
1b37b0d
Β·
verified Β·
1 Parent(s): d04bd80

Upload 3 files

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. app.py +1197 -0
  3. getscene_ai.sqlite +3 -0
  4. requirements.txt +5 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ getscene_ai.sqlite filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,1197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import sqlite3
3
+ import openai
4
+ import json
5
+ import numpy as np
6
+ from bs4 import BeautifulSoup
7
+ from datetime import datetime, timedelta
8
+ import re
9
+ import requests
10
+ import uuid
11
+ from typing import List, Dict, Any
12
+ from dotenv import load_dotenv
13
+
14
+ import os
15
+
16
+ # ============================================================================
17
+ # CONFIGURATION
18
+ # ============================================================================
19
+ # Use environment variable for API key (set in Hugging Face Secrets)
20
+ load_dotenv() # loads variables from .env into environment
21
+
22
+ openai_api_key = os.getenv("OPENAI_API_KEY")
23
+
24
+ if not openai_api_key:
25
+ raise ValueError("OPENAI_API_KEY not found in .env file")
26
+ DB_PATH = "getscene_ai.sqlite"
27
+ EMBED_MODEL = "text-embedding-3-small"
28
+
29
+ # Store session ID for the conversation
30
+ session_id = str(uuid.uuid4())
31
+
32
+ # Cache for workshop data and embeddings
33
+ workshop_cache = {
34
+ 'data': [],
35
+ 'embeddings': [],
36
+ 'last_updated': None,
37
+ 'cache_duration': timedelta(hours=24)
38
+ }
39
+
40
+ # ============================================================================
41
+ # KEYWORD LISTS FOR ROUTING
42
+ # ============================================================================
43
+
44
+ EMOTIONAL_KEYWORDS = [
45
+ 'stuck', 'frustrated', 'discouraged', 'overwhelmed', 'scared',
46
+ 'nervous', 'anxious', 'worried', 'fear', 'doubt', 'confidence',
47
+ 'insecure', 'lost', 'confused', 'struggling', 'hard time',
48
+ 'giving up', 'burnout', 'rejection', 'failed', 'can\'t',
49
+ 'feeling', 'feel', 'emotional', 'depressed', 'sad', 'unmotivated',
50
+ 'hopeless', 'stressed', 'pressure', 'imposter'
51
+ ]
52
+
53
+ ACTION_KEYWORDS = [
54
+ 'get an agent', 'find agent', 'need agent', 'want agent', 'sign with agent',
55
+ 'more auditions', 'book', 'booking', 'callbacks', 'improve',
56
+ 'better', 'self-tape', 'materials', 'headshots', 'reel',
57
+ 'network', 'connections', 'industry', 'career', 'strategy',
58
+ 'agent prep', 'total agent prep', 'workshop', 'class', 'training',
59
+ 'results', 'success', 'grow', 'advance', 'level up'
60
+ ]
61
+
62
+ POLICY_KEYWORDS = [
63
+ 'refund', 'refunds', 'money back',
64
+ 'attend', 'attendance', 'miss', 'missed', 'missing', 'absent',
65
+ 'late', 'lateness', 'tardy',
66
+ 'reschedule', 'change date', 'move class',
67
+ 'credit', 'credits',
68
+ 'cancel', 'cancellation', 'canceling',
69
+ 'policy', 'policies'
70
+ ]
71
+
72
+ DETAIL_SYNONYMS = [
73
+ 'detail', 'details', 'explain', 'elaborate', 'tell me more',
74
+ 'more info', 'describe', 'thorough', 'comprehensive'
75
+ ]
76
+
77
+ PERSONA_INSTRUCTION = """
78
+ You are a warm, encouraging mentor at Get Scene Studios. Your goal is to help actors navigate their careers with confidence.
79
+ - Sound natural and human, not scripted or robotic. Use conversational transitions like "I'd suggest starting with..." or "A great way to approach this is..."
80
+ - Be encouraging but practical. Acknowledge that the acting journey is a marathon, not a sprint.
81
+ - Help the user THINK: Instead of just giving an answer, add a brief "mentorship flourish" that explains the value of a recommendation (e.g., "This workshop is great because it gets you comfortable with the pressure of a real callback.")
82
+ """
83
+
84
+ # ============================================================================
85
+ # HELPER FUNCTIONS
86
+ # ============================================================================
87
+
88
+ def calculate_workshop_confidence(w: Dict) -> float:
89
+ """Calculate confidence score of retrieved workshop data"""
90
+ score = 0.0
91
+ if w.get('title'): score += 0.3
92
+ if w.get('instructor_name'): score += 0.3
93
+ if w.get('date'): score += 0.2
94
+ if w.get('time'): score += 0.1
95
+ if w.get('source_url'): score += 0.1
96
+ return round(score, 2)
97
+
98
+ def get_embedding(text):
99
+ text_strip = text.replace("\n", " ").strip()
100
+ response = openai.embeddings.create(input=[text_strip], model=EMBED_MODEL)
101
+ return response.data[0].embedding
102
+
103
+ def cosine_similarity(a, b):
104
+ a = np.array(a)
105
+ b = np.array(b)
106
+ return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
107
+
108
+ # ============================================================================
109
+ # WORKSHOP SCRAPING FUNCTIONS
110
+ # ============================================================================
111
+
112
+ def scrape_workshops_from_squarespace(url: str) -> List[Dict[str, str]]:
113
+ """Extract workshops using robust Squarespace JSON + HTML parsing system"""
114
+ headers = {
115
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
116
+ }
117
+
118
+ try:
119
+ # First try the Squarespace JSON API
120
+ json_url = f"{url}?format=json"
121
+ print(f"πŸ” Trying Squarespace JSON API: {json_url}")
122
+
123
+ response = requests.get(json_url, headers=headers, timeout=10)
124
+ if response.status_code == 200:
125
+ try:
126
+ json_data = response.json()
127
+ workshops = extract_workshops_from_json(json_data, json_url)
128
+ if workshops:
129
+ print(f"βœ… Extracted {len(workshops)} workshops from JSON API")
130
+ return workshops
131
+ else:
132
+ print("❌ No workshops found in JSON, falling back to HTML")
133
+ except json.JSONDecodeError:
134
+ print("❌ Invalid JSON response, falling back to HTML")
135
+
136
+ # Fallback to HTML scraping if JSON fails
137
+ print(f"πŸ“„ Falling back to HTML scraping for {url}")
138
+ response = requests.get(url, headers=headers, timeout=10)
139
+ response.raise_for_status()
140
+
141
+ soup = BeautifulSoup(response.content, 'html.parser')
142
+ workshops = parse_workshops_from_html(soup, url)
143
+
144
+ if workshops:
145
+ print(f"βœ… Extracted {len(workshops)} workshops from HTML parsing")
146
+ return workshops
147
+ else:
148
+ print("❌ No workshops found in HTML")
149
+ return []
150
+
151
+ except Exception as e:
152
+ print(f"❌ Error scraping workshops from {url}: {e}")
153
+ return []
154
+
155
+ def extract_workshops_from_json(data: Any, source_url: str) -> List[Dict[str, str]]:
156
+ """Extract workshop information from Squarespace JSON data"""
157
+ workshops = []
158
+
159
+ if isinstance(data, dict) and 'mainContent' in data:
160
+ main_content_html = data['mainContent']
161
+ if isinstance(main_content_html, str):
162
+ print(f"🎯 Found mainContent HTML! Length: {len(main_content_html)} characters")
163
+
164
+ soup = BeautifulSoup(main_content_html, 'html.parser')
165
+ workshops = parse_workshops_from_html(soup, source_url)
166
+
167
+ if workshops:
168
+ return workshops
169
+
170
+ return workshops
171
+
172
+ def parse_workshops_from_html(soup, source_url: str) -> List[Dict[str, str]]:
173
+ """Enhanced HTML parsing specifically for workshop content"""
174
+ workshops = []
175
+ workshop_texts = set()
176
+
177
+ print(f"πŸ” ENHANCED HTML PARSING:")
178
+
179
+ # Method 1: Find individual workshop containers
180
+ potential_containers = soup.find_all(['div', 'section', 'article'],
181
+ attrs={'class': re.compile(r'(item|card|product|workshop|class)', re.I)})
182
+
183
+ print(f" Found {len(potential_containers)} potential workshop containers")
184
+
185
+ for container in potential_containers:
186
+ workshop_text = container.get_text(strip=True)
187
+
188
+ if len(workshop_text) < 30 or workshop_text in workshop_texts:
189
+ continue
190
+
191
+ if any(keyword in workshop_text.lower() for keyword in ['with', 'casting', 'director', 'agent', 'perfect submission', 'crush the callback', 'get scene']):
192
+ workshop = extract_single_workshop_from_text(workshop_text, source_url)
193
+ if workshop and not is_duplicate_workshop(workshop, workshops):
194
+ workshops.append(workshop)
195
+ workshop_texts.add(workshop_text)
196
+
197
+ print(f"🎯 TOTAL UNIQUE WORKSHOPS FOUND: {len(workshops)}")
198
+ return workshops
199
+
200
+ def extract_single_workshop_from_text(text: str, source_url: str) -> Dict[str, str]:
201
+ """Extract workshop info from a single text block"""
202
+
203
+ # Clean up the text
204
+ text = re.sub(r'\$[0-9,]+\.00', '', text)
205
+ text = re.sub(r'Featured|Sold Out', '', text, flags=re.IGNORECASE)
206
+ text = re.sub(r'\s+', ' ', text).strip()
207
+ text = re.sub(r'\n+', ' ', text)
208
+
209
+ patterns = [
210
+ # Pattern A: "Title with Professional Name on Date @ Time"
211
+ r'((?:The\s+)?(?:Perfect\s+Submission|Crush\s+the\s+Callback|Get\s+Scene\s+360?))\s+with\s+((?:Casting\s+Director|CD|DDO\s+Agent|Manager|Director|Producer|Agent|Acting\s+Coach|Talent\s+Agent|Executive\s+Casting\s+Producer|Atlanta\s+Models\s+&\s+Talent\s+President)\s+[A-Za-z\s\-]+?)\s+on\s+(\w+\s+\d+(?:st|nd|rd|th)?)\s*[@\s]*([0-9:]+\s*(?:AM|PM))?',
212
+
213
+ # Pattern B: "Professional Name, Title on Date @ Time"
214
+ r'((?:Atlanta\s+Models\s+&\s+Talent\s+President|Talent\s+Agent|Casting\s+Director|Casting\s+Associate|Manager|Director|Producer|Agent|Executive\s+Casting\s+Producer)\s+[A-Za-z\s\-]+?),\s+((?:The\s+)?(?:Perfect\s+Submission|Crush\s+the\s+Callback|Get\s+Scene\s+360?))\s+on\s+(\w+\s+\d+(?:st|nd|rd|th)?)\s*[@\s]*([0-9:]+\s*(?:AM|PM))?',
215
+
216
+ # Pattern C: "Casting Director Name, Date at Time"
217
+ r'(Casting\s+Director|Casting\s+Associate)\s+([A-Za-z\s\-]+?),\s+(\w+\s+\d+(?:st|nd|rd|th)?)\s*(?:at\s+)?([0-9:]+\s*(?:AM|PM))?',
218
+ ]
219
+
220
+ for i, pattern in enumerate(patterns):
221
+ match = re.search(pattern, text, re.IGNORECASE)
222
+ if match:
223
+ return parse_pattern_match(match, i, source_url)
224
+
225
+ return None
226
+
227
+ def parse_pattern_match(match, pattern_index: int, source_url: str) -> Dict[str, str]:
228
+ """Parse a regex match based on pattern type"""
229
+ try:
230
+ if pattern_index == 0: # Pattern A
231
+ workshop_title = match.group(1).strip()
232
+ professional_full = match.group(2).strip()
233
+ date_str = match.group(3).strip()
234
+ time_str = match.group(4).strip() if match.group(4) else ""
235
+
236
+ if professional_full.startswith('CD '):
237
+ professional_full = 'Casting Director ' + professional_full[3:]
238
+
239
+ instructor_title, instructor_name = parse_professional_info(professional_full)
240
+
241
+ elif pattern_index == 1: # Pattern B
242
+ professional_full = match.group(1).strip()
243
+ workshop_title = match.group(2).strip()
244
+ date_str = match.group(3).strip()
245
+ time_str = match.group(4).strip() if match.group(4) else ""
246
+
247
+ instructor_title, instructor_name = parse_professional_info(professional_full)
248
+
249
+ elif pattern_index == 2: # Pattern C
250
+ instructor_title = match.group(1).strip()
251
+ instructor_name = match.group(2).strip()
252
+ date_str = match.group(3).strip()
253
+ time_str = match.group(4).strip() if match.group(4) else ""
254
+ workshop_title = "Casting Workshop"
255
+
256
+ if instructor_name and date_str:
257
+ # Create full_text for embedding
258
+ full_text = f"{workshop_title} with {instructor_title} {instructor_name}"
259
+ if date_str:
260
+ full_text += f" on {date_str}"
261
+ if time_str:
262
+ full_text += f" at {clean_time(time_str)}"
263
+
264
+ return {
265
+ 'title': workshop_title,
266
+ 'instructor_name': instructor_name,
267
+ 'instructor_title': instructor_title,
268
+ 'date': date_str,
269
+ 'time': clean_time(time_str),
270
+ 'full_text': full_text,
271
+ 'source_url': source_url
272
+ }
273
+
274
+ except Exception as e:
275
+ print(f"Error parsing pattern match: {e}")
276
+
277
+ return None
278
+
279
+ def parse_professional_info(professional_full: str) -> tuple:
280
+ """Parse professional title and name from full string"""
281
+
282
+ professional_full = re.sub(r'\s+', ' ', professional_full).strip()
283
+
284
+ # Handle specific multi-word titles
285
+ specific_titles = [
286
+ 'Atlanta Models & Talent President',
287
+ 'Executive Casting Producer',
288
+ 'Casting Director',
289
+ 'Casting Associate',
290
+ 'DDO Agent',
291
+ 'Talent Agent',
292
+ 'Acting Coach'
293
+ ]
294
+
295
+ for title in specific_titles:
296
+ if title in professional_full:
297
+ title_pos = professional_full.find(title)
298
+
299
+ if title_pos == 0:
300
+ name_part = professional_full[len(title):].strip()
301
+ return title, name_part
302
+ else:
303
+ name_part = professional_full[:title_pos].strip().rstrip(',')
304
+ return title, name_part
305
+
306
+ # Fallback for single-word titles
307
+ single_word_titles = ['Manager', 'Director', 'Producer', 'Agent', 'Coach', 'President']
308
+
309
+ words = professional_full.split()
310
+ for i, word in enumerate(words):
311
+ if word in single_word_titles:
312
+ if i > 0 and words[i-1] in ['Casting', 'Talent', 'Executive', 'DDO', 'Acting']:
313
+ title = f"{words[i-1]} {word}"
314
+ name_parts = words[:i-1] + words[i+1:]
315
+ else:
316
+ title = word
317
+ name_parts = words[:i] + words[i+1:]
318
+
319
+ name = ' '.join(name_parts).strip()
320
+ return title, name
321
+
322
+ # Final fallback
323
+ if len(words) >= 2:
324
+ return words[0], ' '.join(words[1:])
325
+
326
+ return '', professional_full
327
+
328
+ def clean_time(time_str: str) -> str:
329
+ """Clean up time string"""
330
+ if not time_str:
331
+ return ""
332
+
333
+ time_match = re.search(r'(\d{1,2}):?(\d{0,2})\s*(AM|PM)', time_str, re.IGNORECASE)
334
+ if time_match:
335
+ hour = time_match.group(1)
336
+ minute = time_match.group(2) or "00"
337
+ ampm = time_match.group(3).upper()
338
+ return f"{hour}:{minute} {ampm}"
339
+
340
+ return time_str.strip()
341
+
342
+ def is_duplicate_workshop(new_workshop: Dict, existing_workshops: List[Dict]) -> bool:
343
+ """Enhanced duplicate detection"""
344
+ for existing in existing_workshops:
345
+ if (existing.get('instructor_name', '').strip().lower() == new_workshop.get('instructor_name', '').strip().lower() and
346
+ existing.get('date', '').strip().lower() == new_workshop.get('date', '').strip().lower()):
347
+
348
+ existing_title = existing.get('title', '').strip().lower()
349
+ new_title = new_workshop.get('title', '').strip().lower()
350
+
351
+ if (existing_title == new_title or
352
+ 'workshop' in existing_title and 'workshop' in new_title or
353
+ existing_title in new_title or new_title in existing_title):
354
+ return True
355
+ return False
356
+
357
+ # ============================================================================
358
+ # DATABASE FUNCTIONS
359
+ # ============================================================================
360
+
361
+ def fetch_all_embeddings(table):
362
+ conn = sqlite3.connect(DB_PATH)
363
+ cur = conn.cursor()
364
+ cur.execute(f"SELECT id, full_text, embedding FROM {table}")
365
+ rows = cur.fetchall()
366
+ conn.close()
367
+ parsed = []
368
+ for row in rows:
369
+ try:
370
+ parsed.append((row[0], row[1], json.loads(row[2])))
371
+ except:
372
+ continue
373
+ return parsed
374
+
375
+ def fetch_row_by_id(table, row_id):
376
+ conn = sqlite3.connect(DB_PATH)
377
+ conn.row_factory = sqlite3.Row
378
+ cur = conn.cursor()
379
+ cur.execute(f"SELECT * FROM {table} WHERE id = ?", (row_id,))
380
+ row = cur.fetchone()
381
+ conn.close()
382
+ return dict(row) if row else {}
383
+
384
+ def get_session_state(session_id: str) -> Dict[str, Any]:
385
+ """Get session state from DB"""
386
+ conn = sqlite3.connect(DB_PATH)
387
+ conn.row_factory = sqlite3.Row
388
+ cur = conn.cursor()
389
+ cur.execute("SELECT * FROM user_sessions WHERE session_id = ?", (session_id,))
390
+ row = cur.fetchone()
391
+ conn.close()
392
+ if row:
393
+ return dict(row)
394
+ return {"preference": None, "msg_count": 0, "clarification_count": 0, "knowledge_context": "{}"}
395
+
396
+ def update_session_state(session_id: str, preference: str = None, increment_count: bool = True, increment_clarification: bool = False, reset_clarification: bool = False, knowledge_update: Dict = None):
397
+ """Update session state with Knowledge Dictionary support"""
398
+ conn = sqlite3.connect(DB_PATH)
399
+ cur = conn.cursor()
400
+
401
+ # Check if exists
402
+ cur.execute("SELECT preference, msg_count, clarification_count, knowledge_context FROM user_sessions WHERE session_id = ?", (session_id,))
403
+ row = cur.fetchone()
404
+
405
+ current_knowledge = {}
406
+ if row:
407
+ curr_pref, curr_count, curr_clarification, curr_knowledge_json = row
408
+ try:
409
+ current_knowledge = json.loads(curr_knowledge_json)
410
+ except:
411
+ current_knowledge = {}
412
+
413
+ new_pref = preference if preference else curr_pref
414
+ new_count = curr_count + 1 if increment_count else curr_count
415
+
416
+ # 10-Message Memory Rule: Reset if we hit the limit
417
+ if new_count > 10:
418
+ print(f"πŸ”„ Session {session_id} reached 10 messages. Resetting memory context.")
419
+ new_count = 1
420
+ new_pref = None
421
+ current_knowledge = {}
422
+ new_clarification = 0
423
+ else:
424
+ new_clarification = curr_clarification
425
+ if reset_clarification:
426
+ new_clarification = 0
427
+ elif increment_clarification:
428
+ new_clarification = curr_clarification + 1
429
+
430
+ # Merge knowledge updates
431
+ if knowledge_update:
432
+ current_knowledge.update(knowledge_update)
433
+
434
+ new_knowledge_json = json.dumps(current_knowledge)
435
+
436
+ cur.execute("""
437
+ UPDATE user_sessions
438
+ SET preference = ?, msg_count = ?, clarification_count = ?, knowledge_context = ?, last_updated = CURRENT_TIMESTAMP
439
+ WHERE session_id = ?
440
+ """, (new_pref, new_count, new_clarification, new_knowledge_json, session_id))
441
+ else:
442
+ new_pref = preference
443
+ new_count = 1 if increment_count else 0
444
+ new_clarification = 1 if increment_clarification else 0
445
+
446
+ if knowledge_update:
447
+ current_knowledge.update(knowledge_update)
448
+ new_knowledge_json = json.dumps(current_knowledge)
449
+
450
+ cur.execute("""
451
+ INSERT INTO user_sessions (session_id, preference, msg_count, clarification_count, knowledge_context)
452
+ VALUES (?, ?, ?, ?, ?)
453
+ """, (session_id, new_pref, new_count, new_clarification, new_knowledge_json))
454
+
455
+ conn.commit()
456
+ conn.close()
457
+
458
+ def update_knowledge_from_question(session_id: str, question: str):
459
+ """Extract attributes and update knowledge dictionary"""
460
+ updates = {}
461
+
462
+ # Extract Format
463
+ pref = detect_preference(question)
464
+ if pref:
465
+ updates['format'] = pref
466
+
467
+ # Extract Topic
468
+ cats = detect_question_category(question)
469
+ if cats:
470
+ # Prioritize specific topics over generic ones
471
+ priority_topics = ['agent_seeking', 'beginner', 'audition_help', 'mentorship', 'pricing']
472
+ for topic in priority_topics:
473
+ if topic in cats:
474
+ updates['topic'] = topic
475
+ break
476
+ if 'topic' not in updates and cats:
477
+ updates['topic'] = cats[0]
478
+
479
+ if updates:
480
+ update_session_state(session_id, knowledge_update=updates, increment_count=False)
481
+ return updates
482
+ return {}
483
+
484
+ def fetch_all_faq_embeddings():
485
+ conn = sqlite3.connect(DB_PATH)
486
+ cur = conn.cursor()
487
+ cur.execute("SELECT id, question, answer, embedding FROM faq_entries")
488
+ rows = cur.fetchall()
489
+ conn.close()
490
+ parsed = []
491
+ for row in rows:
492
+ try:
493
+ parsed.append((row[0], row[1], row[2], json.loads(row[3])))
494
+ except Exception:
495
+ continue
496
+ return parsed
497
+
498
+ # ============================================================================
499
+ # WORKSHOP FUNCTIONS
500
+ # ============================================================================
501
+
502
+ def get_current_workshops():
503
+ """Get current workshops with caching"""
504
+ global workshop_cache
505
+
506
+ now = datetime.now()
507
+
508
+ # Check if cache is still valid
509
+ if (workshop_cache['last_updated'] and
510
+ now - workshop_cache['last_updated'] < workshop_cache['cache_duration'] and
511
+ workshop_cache['data']):
512
+ print("Using cached workshop data")
513
+ return workshop_cache['data'], workshop_cache['embeddings']
514
+
515
+ print("Fetching fresh workshop data...")
516
+
517
+ # Use robust Squarespace scraping system
518
+ online_workshops = scrape_workshops_from_squarespace("https://www.getscenestudios.com/online")
519
+ instudio_workshops = scrape_workshops_from_squarespace("https://www.getscenestudios.com/instudio")
520
+
521
+ all_workshops = online_workshops + instudio_workshops
522
+
523
+ # Data Integrity: Validate and score workshops
524
+ valid_workshops = []
525
+ total_score = 0
526
+ for w in all_workshops:
527
+ conf = calculate_workshop_confidence(w)
528
+ if conf >= 0.8:
529
+ valid_workshops.append(w)
530
+ total_score += conf
531
+ else:
532
+ print(f"⚠️ Rejecting weak record (Confidence: {conf}): {w.get('title', 'Unknown')}", flush=True)
533
+
534
+ avg_conf = total_score / len(valid_workshops) if valid_workshops else 0
535
+ print(f"πŸ“Š DATA INTEGRITY: Found {len(all_workshops)} total, {len(valid_workshops)} valid (Confidence >= 0.8)", flush=True)
536
+ print(f"πŸ“ˆ Retrieval Confidence: {avg_conf:.2f} (Average)", flush=True)
537
+
538
+ all_workshops = valid_workshops
539
+
540
+ if not all_workshops:
541
+ if workshop_cache['data']:
542
+ print("Scraping failed, using cached data")
543
+ return workshop_cache['data'], workshop_cache['embeddings']
544
+ else:
545
+ print("No workshop data available")
546
+ return [], []
547
+
548
+ # Generate embeddings for workshops
549
+ workshop_embeddings = []
550
+ for workshop in all_workshops:
551
+ try:
552
+ embedding = get_embedding(workshop['full_text'])
553
+ workshop_embeddings.append(embedding)
554
+ except Exception as e:
555
+ print(f"Error generating embedding for workshop: {e}")
556
+ workshop_embeddings.append([0] * 1536)
557
+
558
+ # Update cache
559
+ workshop_cache['data'] = all_workshops
560
+ workshop_cache['embeddings'] = workshop_embeddings
561
+ workshop_cache['last_updated'] = now
562
+
563
+ print(f"Cached {len(all_workshops)} workshops")
564
+ return all_workshops, workshop_embeddings
565
+
566
+ def find_top_workshops(user_embedding, k=3):
567
+ """Find top matching workshops using real-time data"""
568
+ workshops, workshop_embeddings = get_current_workshops()
569
+
570
+ if not workshops:
571
+ return []
572
+
573
+ scored = []
574
+ for i, (workshop, emb) in enumerate(zip(workshops, workshop_embeddings)):
575
+ try:
576
+ score = cosine_similarity(user_embedding, emb)
577
+ scored.append((score, i, workshop['full_text'], workshop))
578
+ except Exception as e:
579
+ print(f"Error calculating similarity: {e}")
580
+ continue
581
+
582
+ scored.sort(reverse=True)
583
+ return scored[:k]
584
+
585
+ def find_top_k_matches(user_embedding, dataset, k=3):
586
+ scored = []
587
+ for entry_id, text, emb in dataset:
588
+ score = cosine_similarity(user_embedding, emb)
589
+ scored.append((score, entry_id, text))
590
+ scored.sort(reverse=True)
591
+ return scored[:k]
592
+
593
+ # ============================================================================
594
+ # PROMPT BUILDING FUNCTIONS
595
+ # ============================================================================
596
+
597
+ def generate_enriched_links(row):
598
+ base_url = row.get("youtube_url")
599
+ guest_name = row.get("guest_name", "")
600
+ highlights = json.loads(row.get("highlight_json", "[]"))
601
+ summary = highlights[0]["summary"] if highlights else ""
602
+
603
+ # Truncate summary to first sentence only
604
+ if summary:
605
+ first_sentence = summary.split('.')[0] + '.'
606
+
607
+ if len(first_sentence) > 120:
608
+ short_summary = first_sentence[:117] + "..."
609
+ else:
610
+ short_summary = first_sentence
611
+ else:
612
+ short_summary = "Industry insights for actors"
613
+
614
+ markdown = f"🎧 [Watch {guest_name}'s episode here]({base_url}) - {short_summary}"
615
+ return [markdown]
616
+
617
+ def build_enhanced_prompt(user_question, context_results, top_workshops, user_preference=None, enriched_podcast_links=None, wants_details=False, current_topic=None):
618
+ """Builds the system prompt with strict formatting rules."""
619
+
620
+ # Free classes are ONLY available online (never in-studio)
621
+ free_class_url = "https://www.getscenestudios.com/online"
622
+
623
+ single_podcast = ""
624
+
625
+ # helper for clean links
626
+ def format_workshop(w):
627
+ if not w.get('title') or not w.get('instructor_name') or not w.get('date'):
628
+ return None
629
+
630
+ link = "https://www.getscenestudios.com/instudio" if "/instudio" in w.get('source_url', '') else "https://www.getscenestudios.com/online"
631
+
632
+ # User Preference Filtering
633
+ if user_preference:
634
+ w_type = "Online" if "online" in w.get('source_url', '') else "In-Studio"
635
+ if user_preference.lower() != w_type.lower():
636
+ return None
637
+
638
+ return f"- [{w['title']}]({link}) with {w['instructor_name']} ({w.get('time', '')}) on {w['date']}"
639
+
640
+ # Prepare workshop list (Top 3 max to display, but check top 10 for better filtering)
641
+ workshop_lines = []
642
+ if top_workshops:
643
+ for _, _, _, w_data in top_workshops[:10]: # Check top 10, take top 3 valid after filtering
644
+ formatted = format_workshop(w_data)
645
+ if formatted:
646
+ workshop_lines.append(formatted)
647
+
648
+
649
+ workshop_text = ""
650
+ if workshop_lines:
651
+ workshop_text = "\n".join(workshop_lines[:3])
652
+ else:
653
+ # Fallback link should respect user preference
654
+ if user_preference and user_preference.lower() == 'instudio':
655
+ workshop_text = "Check our schedule for current availability at https://www.getscenestudios.com/instudio"
656
+ else:
657
+ workshop_text = "Check our schedule for current availability at https://www.getscenestudios.com/online"
658
+
659
+ # Handle missing podcast data strictly
660
+ if not enriched_podcast_links:
661
+ single_podcast = "Our latest industry insights are available on YouTube: https://www.youtube.com/@GetSceneStudios"
662
+ else:
663
+ single_podcast = enriched_podcast_links[0]
664
+
665
+ # --- EMOTIONAL / SUPPORT MODE CHECK ---
666
+ is_emotional = detect_response_type(user_question) == "support"
667
+
668
+ if is_emotional:
669
+ prompt = f"""{PERSONA_INSTRUCTION}
670
+
671
+ You are acting in SUPPORT MODE. Provide a response with extra empathy.
672
+
673
+ CRITICAL INSTRUCTIONS:
674
+ - Answer with EMPATHY and ENCOURAGEMENT (2-3 sentences)
675
+ - Acknowledge their feelings ("stuck", "frustrated", etc.)
676
+ - DO NOT upsell paid workshops in this response.
677
+ - Offer ONE gentle step: a free resource (podcast or free class).
678
+
679
+ USER'S QUESTION: {user_question}
680
+
681
+ REQUIRED RESPONSE FORMAT:
682
+ [Your empathetic, supportive response]
683
+
684
+ Here's a free resource that might help:
685
+ 1. Podcast episode: {single_podcast}
686
+ 2. Or join our free class: {free_class_url}
687
+
688
+ Questions? Contact info@getscenestudios.com"""
689
+ return prompt
690
+
691
+ # --- STANDARD LOGIC FOR CONTEXT SNIPPET ---
692
+ question_lower = user_question.lower()
693
+ context_snippet = ""
694
+
695
+ # Priority 1: Direct Keywords in current question
696
+ detected_topic = None
697
+ if any(word in question_lower for word in ['agent', 'representation', 'rep', 'manager']):
698
+ detected_topic = 'agent'
699
+ elif any(word in question_lower for word in ['beginner', 'new', 'start', 'beginning']):
700
+ detected_topic = 'beginner'
701
+ elif any(word in question_lower for word in ['callback', 'audition', 'tape', 'self-tape', 'booking']):
702
+ detected_topic = 'audition'
703
+ elif any(word in question_lower for word in ['mentorship', 'coaching']):
704
+ detected_topic = 'mentorship'
705
+ elif any(word in question_lower for word in ['price', 'cost', 'how much']):
706
+ detected_topic = 'pricing'
707
+
708
+ # Priority 2: Fallback to session context if current question is ambiguous
709
+ if not detected_topic and current_topic:
710
+ topic_map = {
711
+ 'agent_seeking': 'agent',
712
+ 'beginner': 'beginner',
713
+ 'audition_help': 'audition',
714
+ 'mentorship': 'mentorship',
715
+ 'pricing': 'pricing'
716
+ }
717
+ detected_topic = topic_map.get(current_topic)
718
+
719
+ # Assign snippet based on topic
720
+ if detected_topic == 'agent':
721
+ context_snippet = "Get Scene Studios has helped 1000+ actors land representation. Total Agent Prep offers live practice with working agents (age 16+, limited to 12 actors)."
722
+ elif detected_topic == 'beginner':
723
+ context_snippet = "Get Scene Studios specializes in getting actors audition-ready fast with camera technique and professional self-tape skills."
724
+ elif detected_topic == 'audition':
725
+ context_snippet = "Get Scene offers Crush the Callback (Zoom simulation) and Perfect Submission (self-tape mastery) for actors refining their technique."
726
+ elif detected_topic == 'mentorship':
727
+ context_snippet = "Working Actor Mentorship is a 6-month program ($3,000) with structured feedback and industry access."
728
+ elif detected_topic == 'pricing':
729
+ context_snippet = "Get Scene Studios pricing varies by program. Most workshops cap at 12-14 actors for personalized feedback."
730
+ else:
731
+ context_snippet = "Get Scene Studios (founded by Jesse Malinowski) offers training for TV/film actors at all levels."
732
+
733
+ preference_instruction = ""
734
+ if not user_preference:
735
+ preference_instruction = """
736
+ IMPORTANT: We need to know if the user prefers "Online" or "In-Studio" workshops.
737
+ If their question implies a location or they haven't specified, ask: "Are you looking for Online or In-Studio training?" as part of your response.
738
+ """
739
+ else:
740
+ preference_instruction = f"""
741
+ USER PREFERENCE KNOWN: {user_preference.upper()}
742
+ 1. DO NOT ask "Online or In-Studio" again.
743
+ 2. Ensure your recommendations align with {user_preference.upper()} where possible.
744
+ """
745
+
746
+ # Brevity & Cognitive Load: Direct instructions based on user intent
747
+ detail_instruction = "Answer the user's question briefly (2-3 sentences max, ≀150 words total)."
748
+ if wants_details:
749
+ detail_instruction = "Provide a detailed and thorough explanation for the user's request, but keep it structured and readable."
750
+
751
+ prompt = f"""{PERSONA_INSTRUCTION}
752
+
753
+ {context_snippet}
754
+
755
+ CRITICAL INSTRUCTIONS:
756
+ - {detail_instruction}
757
+ - Use natural, human transitions between your answer and the recommendations.
758
+ - For each recommendation, add a tiny bit of "mentor advice" on why it helps.
759
+ - Then ALWAYS provide exactly these three numbered recommendations (1. 2. 3.):
760
+ - Use ONLY the provided links - do not invent recommendations
761
+ - Focus on clean, readable formatting.{preference_instruction}
762
+
763
+ USER'S QUESTION: {user_question}
764
+
765
+ REQUIRED RESPONSE FORMAT:
766
+ [Your brief answer to their question]
767
+
768
+ Here's your path forward:
769
+ 1. Free class (start here, no credit card required): {free_class_url}
770
+ 2. Recommended podcast episode:
771
+ {single_podcast}
772
+ 3. Relevant paid workshop:
773
+ {workshop_text}
774
+
775
+ Questions? Contact info@getscenestudios.com"""
776
+
777
+ return prompt
778
+
779
+ # ============================================================================
780
+ # DETECTION FUNCTIONS
781
+ # ============================================================================
782
+
783
+ def detect_question_category(question):
784
+ """Categorize user questions for better context injection"""
785
+ question_lower = question.lower()
786
+
787
+ categories = {
788
+ 'agent_seeking': ['agent', 'representation', 'rep', 'manager', 'get an agent'],
789
+ 'beginner': ['beginner', 'new', 'start', 'beginning', 'first time', 'never acted'],
790
+ 'audition_help': ['audition', 'callback', 'tape', 'self-tape', 'submission'],
791
+ 'mentorship': ['mentorship', 'coaching', 'intensive', 'mentor', 'one-on-one'],
792
+ 'pricing': ['price', 'cost', 'pricing', '$', 'money', 'payment', 'fee'],
793
+ 'classes': ['class', 'workshop', 'training', 'course', 'learn'],
794
+ 'membership': ['membership', 'join', 'member', 'gsp', 'plus'],
795
+ 'technical': ['self-tape', 'equipment', 'lighting', 'editing', 'camera']
796
+ }
797
+
798
+ detected = []
799
+ for category, keywords in categories.items():
800
+ if any(keyword in question_lower for keyword in keywords):
801
+ detected.append(category)
802
+
803
+ return detected
804
+
805
+ def detect_response_type(question):
806
+ """Detect if question is emotional/support vs action/results oriented"""
807
+ question_lower = question.lower()
808
+
809
+ emotional_count = sum(1 for word in EMOTIONAL_KEYWORDS if word in question_lower)
810
+ action_count = sum(1 for word in ACTION_KEYWORDS if word in question_lower)
811
+
812
+ if emotional_count > 0 and emotional_count >= action_count:
813
+ return "support"
814
+ return "standard"
815
+
816
+ def detect_policy_issue(question):
817
+ """Detect if question violates hard policy rules"""
818
+ question_lower = question.lower()
819
+ return any(word in question_lower for word in POLICY_KEYWORDS)
820
+
821
+ def detect_preference(question):
822
+ """Detect if user is stating a preference"""
823
+ q_lower = question.lower()
824
+ if 'online' in q_lower and 'studio' not in q_lower:
825
+ return 'online'
826
+ if ('studio' in q_lower or 'person' in q_lower or 'atlanta' in q_lower) and 'online' not in q_lower:
827
+ return 'instudio'
828
+ return None
829
+
830
+ def get_contextual_business_info(categories):
831
+ """Return relevant business information based on detected question categories"""
832
+
833
+ context_map = {
834
+ 'agent_seeking': {
835
+ 'programs': ['Total Agent Prep', 'Working Actor Mentorship'],
836
+ 'key_info': 'Live pitch practice with real agents, Actors Access optimization',
837
+ 'journey': 'Total Agent Prep β†’ GSP β†’ Mentorship for sustained progress'
838
+ },
839
+ 'beginner': {
840
+ 'programs': ['Free Classes', 'Get Scene 360', 'Get Scene Plus'],
841
+ 'key_info': 'Start with holistic foundation, build consistency',
842
+ 'journey': 'Free class β†’ Get Scene 360 β†’ GSP membership'
843
+ },
844
+ 'audition_help': {
845
+ 'programs': ['Perfect Submission', 'Crush the Callback', 'Audition Insight'],
846
+ 'key_info': 'Self-tape mastery, callback simulation, pro feedback',
847
+ 'journey': 'Perfect Submission β†’ GSP for ongoing Audition Insight'
848
+ },
849
+ 'mentorship': {
850
+ 'programs': ['Working Actor Mentorship'],
851
+ 'key_info': '6-month intensive with structured feedback and accountability',
852
+ 'journey': 'Ready for commitment β†’ WAM β†’ Advanced workshops'
853
+ }
854
+ }
855
+
856
+ relevant_info = {}
857
+ for category in categories:
858
+ if category in context_map:
859
+ relevant_info[category] = context_map[category]
860
+
861
+ return relevant_info
862
+
863
+ # ============================================================================
864
+ # MAIN CHATBOT LOGIC
865
+ # ============================================================================
866
+
867
+ def process_question(question: str, current_session_id: str):
868
+ """Main function to process user questions - replaces Flask /ask endpoint"""
869
+
870
+ if not question:
871
+ return "Question is required"
872
+
873
+ # 0. HARD POLICY CHECK
874
+ if detect_policy_issue(question):
875
+ conn = sqlite3.connect(DB_PATH)
876
+ cur = conn.cursor()
877
+ cur.execute("INSERT INTO question_logs (question) VALUES (?)", (question,))
878
+ conn.commit()
879
+ conn.close()
880
+
881
+ return "Please email info@getscenestudios.com."
882
+
883
+ # 1. Handle Session & Knowledge State
884
+ update_knowledge_from_question(current_session_id, question)
885
+
886
+ session_state = get_session_state(current_session_id)
887
+
888
+ try:
889
+ knowledge = json.loads(session_state.get('knowledge_context', '{}'))
890
+ except:
891
+ knowledge = {}
892
+
893
+ user_preference = knowledge.get('format')
894
+ current_topic = knowledge.get('topic')
895
+
896
+ if not user_preference:
897
+ user_preference = session_state.get('preference')
898
+
899
+ update_session_state(current_session_id, increment_count=True)
900
+
901
+ # Create embedding of user question
902
+ user_embedding = get_embedding(question)
903
+
904
+ # Check FAQ embeddings first
905
+ faq_data = fetch_all_faq_embeddings()
906
+ top_faqs = []
907
+
908
+ for entry_id, question_text, answer_text, emb in faq_data:
909
+ score = cosine_similarity(user_embedding, emb)
910
+ top_faqs.append((score, entry_id, question_text, answer_text))
911
+ top_faqs.sort(reverse=True)
912
+
913
+ faq_threshold = 0.85
914
+ ambiguous_threshold = 0.70
915
+
916
+ # If high-confidence FAQ match found
917
+ if top_faqs and top_faqs[0][0] >= faq_threshold:
918
+ update_session_state(current_session_id, reset_clarification=True, increment_count=False)
919
+
920
+ best_score, faq_id, question_text, answer_text = top_faqs[0]
921
+
922
+ mentor_framing_start = "That's a great question! Here's the information on that:"
923
+ mentor_framing_end = "I hope that clears things up! Remember, every bit of knowledge helps you steer your career in the right direction."
924
+
925
+ enhanced_answer = f"{mentor_framing_start}\n\n{answer_text}"
926
+
927
+ categories = detect_question_category(question)
928
+ contextual_info = get_contextual_business_info(categories)
929
+
930
+ if contextual_info:
931
+ next_steps = []
932
+ for category, info in contextual_info.items():
933
+ next_steps.append(f"A great next step for you: {info['journey']}")
934
+
935
+ if next_steps:
936
+ enhanced_answer += f"\n\n{chr(10).join(next_steps)}"
937
+
938
+ enhanced_answer += f"\n\n{mentor_framing_end}\n\nQuestions? Contact info@getscenestudios.com"
939
+
940
+ # Log question
941
+ conn = sqlite3.connect(DB_PATH)
942
+ cur = conn.cursor()
943
+ cur.execute("INSERT INTO question_logs (question) VALUES (?)", (question,))
944
+ conn.commit()
945
+ conn.close()
946
+
947
+ return enhanced_answer
948
+
949
+ elif top_faqs and top_faqs[0][0] >= ambiguous_threshold:
950
+ # AMBIGUOUS ZONE
951
+ needs_clarification = False
952
+
953
+ if not user_preference:
954
+ needs_clarification = True
955
+
956
+ is_generic_query = any(w in question.lower() for w in ['price', 'cost', 'how much', 'schedule', 'when'])
957
+ if is_generic_query and not current_topic:
958
+ needs_clarification = True
959
+
960
+ clarification_count = session_state.get('clarification_count', 0)
961
+ if clarification_count > 0:
962
+ needs_clarification = False
963
+
964
+ if needs_clarification:
965
+ update_session_state(current_session_id, increment_clarification=True, increment_count=False)
966
+ best_match_q = top_faqs[0][2]
967
+ return f"Did you mean: {best_match_q}?"
968
+
969
+ # Auto-Resolve
970
+ update_session_state(current_session_id, reset_clarification=True, increment_count=False)
971
+
972
+ best_score, faq_id, question_text, answer_text = top_faqs[0]
973
+
974
+ categories = detect_question_category(question)
975
+ contextual_info = get_contextual_business_info(categories)
976
+
977
+ enhanced_answer = answer_text
978
+ if contextual_info:
979
+ next_steps = []
980
+ for category, info in contextual_info.items():
981
+ next_steps.append(f"Next step: Consider {info['journey']}")
982
+
983
+ if next_steps:
984
+ enhanced_answer += f"\n\n{chr(10).join(next_steps)}"
985
+ enhanced_answer += f"\n\nQuestions? Contact info@getscenestudios.com"
986
+
987
+ conn = sqlite3.connect(DB_PATH)
988
+ cur = conn.cursor()
989
+ cur.execute("INSERT INTO question_logs (question) VALUES (?)", (question,))
990
+ conn.commit()
991
+ conn.close()
992
+
993
+ return enhanced_answer
994
+
995
+ else:
996
+ # 3. HALLUCINATION GUARD
997
+ categories = detect_question_category(question)
998
+
999
+ has_session_context = (current_topic is not None) or (user_preference is not None)
1000
+
1001
+ is_acting_related = (
1002
+ len(categories) > 0 or
1003
+ detect_response_type(question) == "support" or
1004
+ any(k in question.lower() for k in ACTION_KEYWORDS) or
1005
+ any(k in question.lower() for k in ['class', 'workshop', 'coaching', 'studio', 'acting', 'online', 'person', 'atlanta', 'training']) or
1006
+ (has_session_context and len(question.split()) <= 10)
1007
+ )
1008
+
1009
+ if not is_acting_related:
1010
+ return "I'm not exactly sure about that. Please email info@getscenestudios.com so a member of our team can get you the most accurate answer!"
1011
+
1012
+ # 4. LLM PATH
1013
+ update_session_state(current_session_id, reset_clarification=True, increment_count=False)
1014
+ podcast_data = fetch_all_embeddings("podcast_episodes")
1015
+ top_workshops = find_top_workshops(user_embedding, k=10)
1016
+ top_podcasts = find_top_k_matches(user_embedding, podcast_data, k=3)
1017
+
1018
+ enriched_podcast_links = []
1019
+ for _, podcast_id, _ in top_podcasts:
1020
+ row = fetch_row_by_id("podcast_episodes", podcast_id)
1021
+ enriched_podcast_links.extend(generate_enriched_links(row))
1022
+
1023
+ if not enriched_podcast_links:
1024
+ fallback = fetch_row_by_id("podcast_episodes", podcast_data[0][0])
1025
+ enriched_podcast_links = generate_enriched_links(fallback)
1026
+
1027
+ # 5. Brevity & Detail Detection
1028
+ wants_details = any(syn in question.lower() for syn in DETAIL_SYNONYMS)
1029
+
1030
+ final_prompt = build_enhanced_prompt(
1031
+ question,
1032
+ None,
1033
+ top_workshops,
1034
+ user_preference=user_preference,
1035
+ enriched_podcast_links=enriched_podcast_links,
1036
+ wants_details=wants_details,
1037
+ current_topic=current_topic
1038
+ )
1039
+
1040
+ response = openai.chat.completions.create(
1041
+ model="gpt-4",
1042
+ messages=[
1043
+ {"role": "system", "content": final_prompt},
1044
+ {"role": "user", "content": question}
1045
+ ]
1046
+ )
1047
+
1048
+ # Log question
1049
+ conn = sqlite3.connect(DB_PATH)
1050
+ cur = conn.cursor()
1051
+ cur.execute("INSERT INTO question_logs (question) VALUES (?)", (question,))
1052
+ conn.commit()
1053
+ conn.close()
1054
+
1055
+ return response.choices[0].message.content.strip()
1056
+
1057
+ # ============================================================================
1058
+ # GRADIO INTERFACE
1059
+ # ============================================================================
1060
+
1061
+ def chat_with_bot(message, history):
1062
+ """
1063
+ Process message directly without Flask API
1064
+
1065
+ Args:
1066
+ message: User's current message
1067
+ history: Chat history (list of message dictionaries)
1068
+
1069
+ Returns:
1070
+ Updated history with new exchange
1071
+ """
1072
+ global session_id
1073
+
1074
+ if not message.strip():
1075
+ return history
1076
+
1077
+ try:
1078
+ # Process question directly
1079
+ bot_reply = process_question(message, session_id)
1080
+ except Exception as e:
1081
+ bot_reply = f"❌ Error: {str(e)}"
1082
+
1083
+ # Append to history in Gradio 6.0 format
1084
+ history.append({"role": "user", "content": message})
1085
+ history.append({"role": "assistant", "content": bot_reply})
1086
+ return history
1087
+
1088
+ def reset_session():
1089
+ """Reset session ID for new conversation"""
1090
+ global session_id
1091
+ session_id = str(uuid.uuid4())
1092
+ return [], f"πŸ”„ New session started: {session_id[:8]}..."
1093
+
1094
+ # Create Gradio interface
1095
+ with gr.Blocks(title="Get Scene Studios Chatbot") as demo:
1096
+
1097
+ gr.Markdown(
1098
+ """
1099
+ # 🎬 Get Scene Studios AI Chatbot
1100
+
1101
+ Ask questions about acting classes, workshops and more!
1102
+ """
1103
+ )
1104
+
1105
+ # Session info display
1106
+ session_info = gr.Textbox(
1107
+ label="Current Session ID",
1108
+ value=f"Session: {session_id[:8]}...",
1109
+ interactive=False,
1110
+ scale=1
1111
+ )
1112
+
1113
+ # Chatbot interface
1114
+ chatbot = gr.Chatbot(
1115
+ label="Conversation",
1116
+ height=500
1117
+ )
1118
+
1119
+ # Input area
1120
+ with gr.Row():
1121
+ msg = gr.Textbox(
1122
+ label="Your Message",
1123
+ lines=2,
1124
+ scale=4
1125
+ )
1126
+ submit_btn = gr.Button("Send πŸ“€", scale=1, variant="primary")
1127
+
1128
+ # Action buttons
1129
+ with gr.Row():
1130
+ clear_btn = gr.Button("Clear Chat πŸ—‘οΈ", scale=1)
1131
+ reset_btn = gr.Button("New Session πŸ”„", scale=1)
1132
+
1133
+ # Example questions
1134
+ # gr.Examples(
1135
+ # examples=[
1136
+ # "How much does it cost?",
1137
+ # "I want to get an agent",
1138
+ # "I'm a beginner, where should I start?",
1139
+ # "Tell me about your workshops",
1140
+ # "Do you have online classes?",
1141
+ # "What's the difference between Perfect Submission and Crush the Callback?",
1142
+ # "I prefer in-studio training",
1143
+ # "Tell me about mentorship programs"
1144
+ # ],
1145
+ # inputs=msg,
1146
+ # label="πŸ’‘ Try these example questions:"
1147
+ # )
1148
+
1149
+ # Event handlers
1150
+ submit_btn.click(
1151
+ fn=chat_with_bot,
1152
+ inputs=[msg, chatbot],
1153
+ outputs=[chatbot]
1154
+ ).then(
1155
+ fn=lambda: "",
1156
+ inputs=None,
1157
+ outputs=[msg]
1158
+ )
1159
+
1160
+ msg.submit(
1161
+ fn=chat_with_bot,
1162
+ inputs=[msg, chatbot],
1163
+ outputs=[chatbot]
1164
+ ).then(
1165
+ fn=lambda: "",
1166
+ inputs=None,
1167
+ outputs=[msg]
1168
+ )
1169
+
1170
+ clear_btn.click(
1171
+ fn=lambda: [],
1172
+ inputs=None,
1173
+ outputs=[chatbot]
1174
+ )
1175
+
1176
+ reset_btn.click(
1177
+ fn=reset_session,
1178
+ inputs=None,
1179
+ outputs=[chatbot, session_info]
1180
+ )
1181
+
1182
+ # Launch the app
1183
+ if __name__ == "__main__":
1184
+ print("\n" + "="*60)
1185
+ print("🎬 Get Scene Studios Chatbot")
1186
+ print("="*60)
1187
+ print("\nβœ… No Flask API needed - all processing is done directly!")
1188
+ print("🌐 Gradio interface will open in your browser")
1189
+ print("="*60 + "\n")
1190
+
1191
+ demo.launch(
1192
+ server_name="127.0.0.1",
1193
+ server_port=7860,
1194
+ share=False,
1195
+ show_error=True,
1196
+ theme=gr.themes.Soft()
1197
+ )
getscene_ai.sqlite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecdd5a663c15ed4d70bc7bf82a8920d4adafc288508d682df0093df81f532c6d
3
+ size 10178560
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ openai
3
+ numpy
4
+ beautifulsoup4
5
+ requests