rakib72642 commited on
Commit
42c497a
·
1 Parent(s): df718f6

changed voice and db mendatory points for appointment + working fine

Browse files
Files changed (4) hide show
  1. .env +4 -2
  2. core/backend.py +17 -7
  3. frontend/script.js +0 -1
  4. services/tts.py +25 -3
.env CHANGED
@@ -9,10 +9,12 @@ GOOGLE_API_KEY="AIzaSyA9sqz4YKQHKXR9TU1imw0DPOghzHOMiBo"
9
 
10
 
11
  ELEVENLABS_API_KEY="b3af3a938c8e15d5eae700ea47eea7d88dfe397f34fbd4b0c75c24f143b032b8"
12
- ELEVENLABS_VOICE_ID="GrHQRXD136YZl3kbtri3"
13
  ELEVENLABS_MODEL_ID="eleven_v3"
14
 
15
- # TWILIO_ACCOUNT_SID="ACfafc0d2d007bdf14b21bb3e14a7a7b31"
 
 
16
  # TWILIO_AUTH_TOKEN="ed15fa98748c8c3d3d02cb54e431a187"
17
  # TWILIO_PHONE_NUMBER="+14343375085"
18
 
 
9
 
10
 
11
  ELEVENLABS_API_KEY="b3af3a938c8e15d5eae700ea47eea7d88dfe397f34fbd4b0c75c24f143b032b8"
12
+ ELEVENLABS_VOICE_ID="4sMbMU3eBnL80hE0H20S"
13
  ELEVENLABS_MODEL_ID="eleven_v3"
14
 
15
+ SMTP_PASSWORD="kjch nsve khty nrsc"
16
+
17
+ # TWILIO_ACCOUNT_SID="ACfafc0d2d007bdf14b21bb3e14a7a7b31" # "GrHQRXD136YZl3kbtri3"
18
  # TWILIO_AUTH_TOKEN="ed15fa98748c8c3d3d02cb54e431a187"
19
  # TWILIO_PHONE_NUMBER="+14343375085"
20
 
core/backend.py CHANGED
@@ -296,7 +296,7 @@ async def send_mail(to_mail: str, subject: str, body: str):
296
  except Exception as exc:
297
  raise RuntimeError("Email sending is not configured (aiosmtplib missing).") from exc
298
 
299
- smtp_user = os.getenv("SMTP_USER", "walidofficework@gmail.com").strip()
300
  smtp_pass = os.getenv("SMTP_PASSWORD", "").strip()
301
  if not smtp_pass:
302
  raise RuntimeError("Email sending is not configured (SMTP_PASSWORD missing).")
@@ -804,14 +804,23 @@ BASE_SYSTEM = """
804
  You are Aasha, a warm, Bangla-first hospital phone-call assistant and medical appointment concierge.
805
  Your job is to help people find doctors, check availability, and manage appointments.
806
 
807
- PERSONA (voice & vibe):
808
- - Sound like a friendly, well-behaved, cheerful young female call-support representative.
809
- - Be empathetic when the user is worried/sad, and sound genuinely happy/excited when you can help.
810
- - Keep it professional and supportive (no flirting, no romance, no sexual content).
811
- - Do not claim to be a real human; you are an AI assistant.
 
 
 
 
 
 
 
 
 
812
 
813
  CORE BEHAVIOR:
814
- - Speak naturally, politely, and engagingly (short sentences, warm tone).
815
  - Default to Bangla when the user speaks Bangla or Banglish.
816
  - Keep replies short, helpful, and one step at a time (avoid big paragraphs).
817
  - Use gentle acknowledgements: e.g., “বুঝতে পেরেছি”, “চিন্তা করবেন না”, “আমি আছি”.
@@ -846,6 +855,7 @@ LANGUAGE RULE:
846
  - If the user uses Bangla, reply in clear Bangla.
847
  - If the user uses Banglish, reply in Bangla unless they clearly prefer English.
848
  - Always generate numbers in english
 
849
 
850
  DATA RULE:
851
  - Doctor names, categories, and days in the database are English.
 
296
  except Exception as exc:
297
  raise RuntimeError("Email sending is not configured (aiosmtplib missing).") from exc
298
 
299
+ smtp_user = os.getenv("SMTP_USER", "rakib.hedigital@gmail.com").strip()
300
  smtp_pass = os.getenv("SMTP_PASSWORD", "").strip()
301
  if not smtp_pass:
302
  raise RuntimeError("Email sending is not configured (SMTP_PASSWORD missing).")
 
804
  You are Aasha, a warm, Bangla-first hospital phone-call assistant and medical appointment concierge.
805
  Your job is to help people find doctors, check availability, and manage appointments.
806
 
807
+ PERSONA (Voice & Vibe)
808
+ - Sound like a professional, polite, and friendly Bangla female call-support assistant, like a real appointment booking or customer service executive.
809
+ - Maintain a calm, warm, and naturally cheerful tone smooth, confident, and service-oriented.
810
+ - Speak like a native Bangla speaker in a phone conversation, using natural, human-like phrasing (not robotic or overly scripted).
811
+ - Keep a soft “jolly” positivity, but controlled and professional suitable for customer support, booking, and service interactions.
812
+ - Be clear, structured, and helpful when giving information, confirming details, or handling requests.
813
+ - Stay patient, respectful, and reassuring when the user is confused, frustrated, or stressed.
814
+ - Show polite enthusiasm when confirming bookings, completing tasks, or successfully helping the user.
815
+ - Use simple, conversational Bangla with a natural flow, like real call-center communication.
816
+ - Keep responses short, clear, and easy to understand, as in real phone support conversations.
817
+ - Avoid slang, exaggeration, or overly social-media-style expressions.
818
+ - Avoid flirting, romance, sexual behavior, or emotionally manipulative language.
819
+ - Never claim to be human; clearly remain an AI assistant.
820
+ - Always prioritize clarity, professionalism, helpfulness, and a calm positive tone suitable for real customer support and appointment handling.
821
 
822
  CORE BEHAVIOR:
823
+ - Speak friendly, cheerful, well-behaved young female, naturally, politely, and engagingly (short sentences, warm tone).
824
  - Default to Bangla when the user speaks Bangla or Banglish.
825
  - Keep replies short, helpful, and one step at a time (avoid big paragraphs).
826
  - Use gentle acknowledgements: e.g., “বুঝতে পেরেছি”, “চিন্তা করবেন না”, “আমি আছি”.
 
855
  - If the user uses Bangla, reply in clear Bangla.
856
  - If the user uses Banglish, reply in Bangla unless they clearly prefer English.
857
  - Always generate numbers in english
858
+ - Time and dates should be written in spoken Bangla style when applicable, for example: [দশটা ২৮ মিনিট, চারটা বেজে তিরিশ মিনিট, দশটা ১২ বাজে, এখন টাইম হচ্ছে সাতটা তিরিশ]
859
 
860
  DATA RULE:
861
  - Doctor names, categories, and days in the database are English.
frontend/script.js CHANGED
@@ -519,7 +519,6 @@ async function enqueueAudio(buf) {
519
  src.buffer = decoded;
520
  src.connect(ctx.destination);
521
  const now = ctx.currentTime;
522
- // Tiny gap between chunks improves perceived naturalness (less "machine-gun").
523
  const GAP_S = 0.001;
524
  const start = Math.max(now + 0.01, _schedEnd + GAP_S);
525
  if (_cancelled) {
 
519
  src.buffer = decoded;
520
  src.connect(ctx.destination);
521
  const now = ctx.currentTime;
 
522
  const GAP_S = 0.001;
523
  const start = Math.max(now + 0.01, _schedEnd + GAP_S);
524
  if (_cancelled) {
services/tts.py CHANGED
@@ -24,7 +24,29 @@ ELEVENLABS_MODEL_ID = os.getenv("ELEVENLABS_MODEL_ID", "eleven_multilingual_v2"
24
  def _clamp(v: float, lo: float, hi: float) -> float:
25
  return max(lo, min(hi, v))
26
 
27
- ELEVENLABS_SPEED = _clamp(float(os.getenv("ELEVENLABS_SPEED", "2.2")), 0.5, 2.5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  ELEVENLABS_OUTPUT_FORMAT = "mp3_22050_32"
29
  ELEVENLABS_STABILITY = 0.45
30
  ELEVENLABS_SIMILARITY = 0.80
@@ -47,7 +69,7 @@ if not EDGE_TTS_AVAILABLE and not ELEVENLABS_API_KEY:
47
 
48
  print(
49
  f"[TTS] Backend: {'ElevenLabs' if USE_ELEVENLABS else 'Edge-TTS'} | "
50
- f"edge rate: +18% | eleven speed: {ELEVENLABS_SPEED:.2f}"
51
  )
52
 
53
 
@@ -107,7 +129,7 @@ async def _elevenlabs_stream(
107
  # Reduce unnatural pauses for short streamed chunks.
108
  # ElevenLabs adds strong pauses on sentence-ending punctuation; for
109
  # low-latency streaming we prefer faster turn-taking.
110
- text = re.sub(r"[।.!?]+$", "", text).strip()
111
  url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}/stream"
112
  headers = {
113
  "xi-api-key": ELEVENLABS_API_KEY,
 
24
  def _clamp(v: float, lo: float, hi: float) -> float:
25
  return max(lo, min(hi, v))
26
 
27
+ def _parse_pct(text: str) -> float:
28
+ """
29
+ Parse strings like '+10%', '-5%', '12%' into a multiplier delta.
30
+ Returns 0.0 when empty/invalid.
31
+ """
32
+ raw = (text or "").strip()
33
+ if not raw:
34
+ return 0.0
35
+ if raw.endswith("%"):
36
+ raw = raw[:-1].strip()
37
+ try:
38
+ return float(raw) / 100.0
39
+ except Exception:
40
+ return 0.0
41
+
42
+ # ElevenLabs speed configuration:
43
+ # - `ELEVENLABS_SPEED` is the base speed (e.g. 1.0 = normal, 2.0 = faster).
44
+ # - `ELEVENLABS_SPEED_PCT` is an optional relative adjustment like "+10%" or "-5%".
45
+ # This is applied on top of the base: effective = base * (1 + pct).
46
+ # - The final value is clamped to a safe range to avoid invalid API values.
47
+ _ELEVEN_BASE_SPEED = float(os.getenv("ELEVENLABS_SPEED", "3"))
48
+ _ELEVEN_SPEED_PCT = _parse_pct(os.getenv("ELEVENLABS_SPEED_PCT", "0%"))
49
+ ELEVENLABS_SPEED = _clamp(_ELEVEN_BASE_SPEED * (1.0 + _ELEVEN_SPEED_PCT), 0.5, 2.5)
50
  ELEVENLABS_OUTPUT_FORMAT = "mp3_22050_32"
51
  ELEVENLABS_STABILITY = 0.45
52
  ELEVENLABS_SIMILARITY = 0.80
 
69
 
70
  print(
71
  f"[TTS] Backend: {'ElevenLabs' if USE_ELEVENLABS else 'Edge-TTS'} | "
72
+ f"edge rate: +18% | eleven speed: {ELEVENLABS_SPEED:.2f} (base {_ELEVEN_BASE_SPEED:.2f}, pct {_ELEVEN_SPEED_PCT:+.0%})"
73
  )
74
 
75
 
 
129
  # Reduce unnatural pauses for short streamed chunks.
130
  # ElevenLabs adds strong pauses on sentence-ending punctuation; for
131
  # low-latency streaming we prefer faster turn-taking.
132
+ text = re.sub(r"[।.!?,;:—–]+$", "", text).strip()
133
  url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}/stream"
134
  headers = {
135
  "xi-api-key": ELEVENLABS_API_KEY,