rakib72642 commited on
Commit
357f10b
·
1 Parent(s): 91c3bff

checkpoint

Browse files
Files changed (3) hide show
  1. .env +3 -2
  2. core/backend.py +71 -23
  3. frontend/script.js +16 -1
.env CHANGED
@@ -9,12 +9,12 @@ GOOGLE_API_KEY="AIzaSyA9sqz4YKQHKXR9TU1imw0DPOghzHOMiBo"
9
 
10
 
11
  ELEVENLABS_API_KEY="b3af3a938c8e15d5eae700ea47eea7d88dfe397f34fbd4b0c75c24f143b032b8"
12
- ELEVENLABS_VOICE_ID="4sMbMU3eBnL80hE0H20S"
13
  ELEVENLABS_MODEL_ID="eleven_v3"
14
 
15
  SMTP_PASSWORD="kjch nsve khty nrsc"
16
 
17
- # TWILIO_ACCOUNT_SID="ACfafc0d2d007bdf14b21bb3e14a7a7b31" # "GrHQRXD136YZl3kbtri3"
18
  # TWILIO_AUTH_TOKEN="ed15fa98748c8c3d3d02cb54e431a187"
19
  # TWILIO_PHONE_NUMBER="+14343375085"
20
 
@@ -24,3 +24,4 @@ SMTP_PASSWORD="kjch nsve khty nrsc"
24
 
25
  # GROQ_API_KEY=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
26
  # CARTESIA_API_KEY=sk_car_h3oyy6jPSJzx8KnEGJ1m5f
 
 
9
 
10
 
11
  ELEVENLABS_API_KEY="b3af3a938c8e15d5eae700ea47eea7d88dfe397f34fbd4b0c75c24f143b032b8"
12
+ ELEVENLABS_VOICE_ID="PktKX4CMZISWCNB40dqE"
13
  ELEVENLABS_MODEL_ID="eleven_v3"
14
 
15
  SMTP_PASSWORD="kjch nsve khty nrsc"
16
 
17
+ # TWILIO_ACCOUNT_SID="ACfafc0d2d007bdf14b21bb3e14a7a7b31" # "4sMbMU3eBnL80hE0H20S"
18
  # TWILIO_AUTH_TOKEN="ed15fa98748c8c3d3d02cb54e431a187"
19
  # TWILIO_PHONE_NUMBER="+14343375085"
20
 
 
24
 
25
  # GROQ_API_KEY=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
26
  # CARTESIA_API_KEY=sk_car_h3oyy6jPSJzx8KnEGJ1m5f
27
+ # [happy] [sigh] [laughing] [sad] [angry] [surprised] [neutral] [whisper] [shout] [soft] [loud] [excited] [bored] [confused] [disappointed] [hopeful] [nervous] [proud] [relieved] [scared] [tired] [thaughtful] [apologetic] [grateful] [sympathetic] [sarcastic] [playful] [serious] [curious] [frustrated] [optimistic] [pessimistic] [calm] [anxious] [assertive] [passive] [dominant] [submissive]
core/backend.py CHANGED
@@ -99,6 +99,24 @@ _DIGIT_TRANSLATION = str.maketrans({
99
  "٩": "9",
100
  })
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
  def _normalize_digits(text: str) -> str:
104
  return _clean_text(text).translate(_DIGIT_TRANSLATION)
@@ -816,11 +834,11 @@ async def book_appointment(
816
  f"━━━━━━━━━━━━━━━━━━━━━━\n"
817
  f"Doctor : {doctor_name}\n"
818
  f"Patient : {patient_name}\n"
819
- f"Age : {patient_age}\n"
820
- f"Date : {visiting_date}\n"
821
  f"Day : {visiting_day}\n"
822
  f"Time : {visiting_time}\n"
823
- f"Contact : {patient_num}\n"
824
  f"Email : {patient_mail}\n"
825
  f"━━━━━━━━━━━━━━━━━━━━━━\n"
826
  f"Please arrive on time."
@@ -1166,20 +1184,31 @@ BASE_SYSTEM = """
1166
  You are Aasha, a warm, Bangla-first hospital phone-call assistant and medical appointment concierge.
1167
  Your job is to help people find doctors, check availability, and manage appointments.
1168
 
1169
- PERSONA (Voice & Vibe)
1170
- - Sound like a professional, polite, and friendly Bangla female call-support assistant, like a real appointment booking or customer service executive.
1171
- - Maintain a calm, warm, and naturally cheerful tone — smooth, confident, and service-oriented.
1172
- - Speak like a native Bangla speaker in a phone conversation, using natural, human-like phrasing (not robotic or overly scripted).
1173
- - Keep a soft “jolly” positivity, but controlled and professional suitable for customer support, booking, and service interactions.
1174
- - Be clear, structured, and helpful when giving information, confirming details, or handling requests.
1175
- - Stay patient, respectful, and reassuring when the user is confused, frustrated, or stressed.
1176
- - Show polite enthusiasm when confirming bookings, completing tasks, or successfully helping the user.
1177
- - Use simple, conversational Bangla with a natural flow, like real call-center communication.
1178
- - Keep responses short, clear, and easy to understand, as in real phone support conversations.
1179
- - Avoid slang, exaggeration, or overly social-media-style expressions.
1180
- - Avoid flirting, romance, sexual behavior, or emotionally manipulative language.
1181
- - Never claim to be human; clearly remain an AI assistant.
1182
- - Always prioritize clarity, professionalism, helpfulness, and a calm positive tone suitable for real customer support and appointment handling.
 
 
 
 
 
 
 
 
 
 
 
1183
 
1184
  CORE BEHAVIOR:
1185
  - Speak friendly, cheerful, well-behaved young female, naturally, politely, and engagingly (short sentences, warm tone).
@@ -1220,12 +1249,31 @@ TOOL RULES:
1220
  - If the user already provided name, age, phone, and date and then confirms, call `book_appointment` immediately.
1221
  - If the user asks to cancel and only gives a phone number, cancel the single matching appointment if there is exactly one.
1222
 
1223
- LANGUAGE RULE:
1224
- - Respond in the user’s language.
1225
- - If the user uses Bangla, reply in clear Bangla.
1226
- - If the user uses Banglish, reply in Bangla unless they clearly prefer English.
1227
- - Always generate numbers in english
1228
- - Time and dates should be written in spoken Bangla style when applicable, for example: [দশটা ২৮ মিনিট, চারটা বেজে তিরিশ মিনিট, দশটা ১২ বাজে, এখন টাইম হচ্ছে সাতটা তিরিশ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1229
 
1230
  DATA RULE:
1231
  - Doctor names, categories, and days in the database are English.
 
99
  "٩": "9",
100
  })
101
 
102
+ _BN_DIGITS = str.maketrans({
103
+ "0": "০",
104
+ "1": "১",
105
+ "2": "২",
106
+ "3": "৩",
107
+ "4": "৪",
108
+ "5": "৫",
109
+ "6": "৬",
110
+ "7": "৭",
111
+ "8": "৮",
112
+ "9": "৯",
113
+ })
114
+
115
+
116
+ def _to_bn_digits(text: str) -> str:
117
+ """Convert ASCII digits in a string to Bangla digits (for user-facing text)."""
118
+ return (text or "").translate(_BN_DIGITS)
119
+
120
 
121
  def _normalize_digits(text: str) -> str:
122
  return _clean_text(text).translate(_DIGIT_TRANSLATION)
 
834
  f"━━━━━━━━━━━━━━━━━━━━━━\n"
835
  f"Doctor : {doctor_name}\n"
836
  f"Patient : {patient_name}\n"
837
+ f"Age : {_to_bn_digits(patient_age)}\n"
838
+ f"Date : {_to_bn_digits(visiting_date)}\n"
839
  f"Day : {visiting_day}\n"
840
  f"Time : {visiting_time}\n"
841
+ f"Contact : {_to_bn_digits(patient_num)}\n"
842
  f"Email : {patient_mail}\n"
843
  f"━━━━━━━━━━━━━━━━━━━━━━\n"
844
  f"Please arrive on time."
 
1184
  You are Aasha, a warm, Bangla-first hospital phone-call assistant and medical appointment concierge.
1185
  Your job is to help people find doctors, check availability, and manage appointments.
1186
 
1187
+ PERSONA (Voice, Vibe & Emotion Layer)
1188
+ - Sound like a professional, polite, and friendly Bangla female call-support assistant (customer service / appointment booking executive style).
1189
+ - Maintain a calm, warm, structured, and service-oriented tone.
1190
+ - Speak like a native Bangla speaker in real phone conversation style natural, smooth, and human-like (not robotic or overly scripted).
1191
+ - Keep communication clear, short, and easy to follow like real call-center support.
1192
+ - Avoid slang, exaggeration, or social-media-style hype.
1193
+ - Never flirt, romanticize, or use emotionally manipulative behavior.
1194
+ - Always remain clearly an AI assistant (never claim to be human).
1195
+
1196
+ - Emotional Expression System (MANDATORY)
1197
+ - Every response MUST include one or more emotion/tone tags in square brackets.
1198
+ - Tags should reflect the current emotional intent of the sentence, not random usage.
1199
+ - Place tags naturally at the start of a sentence or after a pause.
1200
+ - Multiple tags can be combined if needed (max 2–3 per sentence).
1201
+
1202
+ - Allowed Emotion / Tone Tags:
1203
+ - [happy], [sad], [angry], [excited], [curious], [whispers], [shouts], [laughs], [sighs], [clears throat]
1204
+
1205
+
1206
+ - Usage Rules:
1207
+ - Every response must include at least one emotion tag per sentence.
1208
+ - Emotion must match meaning (no random or decorative usage).
1209
+ - Tone must stay consistent with professional customer support behavior.
1210
+ - Avoid overuse of loud/extreme emotions unless situation truly demands it.
1211
+ - Default tone should be [calm] or [neutral] unless context changes.
1212
 
1213
  CORE BEHAVIOR:
1214
  - Speak friendly, cheerful, well-behaved young female, naturally, politely, and engagingly (short sentences, warm tone).
 
1249
  - If the user already provided name, age, phone, and date and then confirms, call `book_appointment` immediately.
1250
  - If the user asks to cancel and only gives a phone number, cancel the single matching appointment if there is exactly one.
1251
 
1252
+ LANGUAGE RULE
1253
+ - Respond in the user’s language.
1254
+ - If the user uses Bangla reply in clear conversational Bangla.
1255
+ - If the user uses Banglish reply in Bangla unless English is clearly preferred.
1256
+ - If user uses English → respond in English.
1257
+ - Number & Format Rules:
1258
+ - Show numbers in Bangla digits (০-৯) when responding in Bangla.
1259
+ - Avoid mixing English digits in Bangla sentences unless required technically.
1260
+ - Time & Date Format (spoken Bangla style):
1261
+ - Use natural spoken expressions:
1262
+ - "দশটা ২৮ মিনিট"
1263
+ - "চারটা বেজে তিরিশ মিনিট"
1264
+ - "এখন টাইম হচ্ছে সাতটা তিরিশ"
1265
+
1266
+ - Year Format (spoken Bangla style):
1267
+ - "দুই হাজার পঁচিশ সাল"
1268
+ - "উনিশশো একাত্তর সাল"
1269
+ - "দুই হাজার ছাব্বিশ সাল"
1270
+ - "দুই হাজার বিশ সাল"
1271
+
1272
+ BEHAVIOR PRIORITY
1273
+ - Professional customer-support clarity first
1274
+ - Emotional tone tagging second
1275
+ - Natural Bangla conversational flow third
1276
+ - Brevity and structure always preferred
1277
 
1278
  DATA RULE:
1279
  - Doctor names, categories, and days in the database are English.
frontend/script.js CHANGED
@@ -102,6 +102,11 @@ let _audioChain = Promise.resolve();
102
  let _playbackGen = 0;
103
  let _expectedSeq = 0;
104
  let _pendingAudio = new Map();
 
 
 
 
 
105
  let brainMode = false;
106
  let brainVoiceActive = false;
107
  let brainRestartTimer = null;
@@ -517,6 +522,9 @@ async function enqueueAudio(buf) {
517
 
518
  const src = ctx.createBufferSource();
519
  src.buffer = decoded;
 
 
 
520
  src.connect(ctx.destination);
521
  const now = ctx.currentTime;
522
  const GAP_S = 0.001;
@@ -528,7 +536,14 @@ async function enqueueAudio(buf) {
528
  }
529
  _activeSources.push(src);
530
  src.start(start);
531
- _schedEnd = start + decoded.duration;
 
 
 
 
 
 
 
532
 
533
  src.onended = () => {
534
  _inFlight = Math.max(0, _inFlight - 1);
 
102
  let _playbackGen = 0;
103
  let _expectedSeq = 0;
104
  let _pendingAudio = new Map();
105
+
106
+ // Client-side playback speed multiplier.
107
+ // This makes speech faster immediately even if the TTS provider speed setting
108
+ // is limited/ignored. 1.0 = normal, >1.0 = faster.
109
+ let TTS_PLAYBACK_RATE = 1.0;
110
  let brainMode = false;
111
  let brainVoiceActive = false;
112
  let brainRestartTimer = null;
 
522
 
523
  const src = ctx.createBufferSource();
524
  src.buffer = decoded;
525
+ try {
526
+ src.playbackRate.value = Math.max(0.85, Math.min(2.0, TTS_PLAYBACK_RATE));
527
+ } catch {}
528
  src.connect(ctx.destination);
529
  const now = ctx.currentTime;
530
  const GAP_S = 0.001;
 
536
  }
537
  _activeSources.push(src);
538
  src.start(start);
539
+ const rate = (() => {
540
+ try {
541
+ return src.playbackRate.value || 1.0;
542
+ } catch {
543
+ return 1.0;
544
+ }
545
+ })();
546
+ _schedEnd = start + decoded.duration / Math.max(0.01, rate);
547
 
548
  src.onended = () => {
549
  _inFlight = Math.max(0, _inFlight - 1);