Spaces:

Isshi14
/

voiceverse-ai

Sleeping

App Files Files Community

Isshi14 commited on Feb 18

Commit

620c87c

verified ·

1 Parent(s): 09c6c01

Update script_gen.py

Browse files

Files changed (1) hide show

script_gen.py +20 -13

script_gen.py CHANGED Viewed

@@ -2,10 +2,11 @@
 VoiceVerse AI — Script Generation Module.
 Generates spoken-style scripts from retrieved document chunks
-using Mistral-7B-Instruct via the Hugging Face Inference API.
 Design decisions:
-  - Serverless HF Inference API avoids loading a 14 GB model locally
   - Prompt template enforces podcast/narration structure
   - Max 1024 new tokens keeps scripts a reasonable length for TTS
   - Temperature 0.7 balances creativity with factual grounding
@@ -16,7 +17,7 @@ from huggingface_hub import InferenceClient
 from utils import logger
 # — Configuration ————————————————————————————————————
-MODEL_ID = "HuggingFaceH4/zephyr-7b-beta"
 MAX_NEW_TOKENS = 1024
 TEMPERATURE = 0.7
@@ -80,7 +81,7 @@ def generate_script(
     # Combine chunks into a single context block
     context = "\n\n".join(context_chunks)
-    # Truncate if too long (Mistral context window is 32k, but we want focused input)
     max_context_chars = 6000
     if len(context) > max_context_chars:
         context = context[:max_context_chars]
@@ -94,22 +95,28 @@ def generate_script(
     client = _get_client()
-    # Call the model using chat completion (conversational task)
-    response = client.chat_completion(
         model=MODEL_ID,
-        messages=[
-            {"role": "system", "content": SYSTEM_PROMPT},
-            {"role": "user", "content": user_message},
-        ],
-        max_tokens=MAX_NEW_TOKENS,
         temperature=TEMPERATURE,
         top_p=0.9,
     )
-    script = response.choices[0].message.content.strip()
     if not script:
         raise RuntimeError("The model returned an empty script. Please try again.")
     logger.info("Script generated: %d characters", len(script))
-    return script

 VoiceVerse AI — Script Generation Module.
 Generates spoken-style scripts from retrieved document chunks
+using SmolLM3-3B via the Hugging Face Inference API.
 Design decisions:
+  - Serverless HF Inference API avoids loading a large model locally
+  - SmolLM3-3B is deployed on the free hf-inference provider
   - Prompt template enforces podcast/narration structure
   - Max 1024 new tokens keeps scripts a reasonable length for TTS
   - Temperature 0.7 balances creativity with factual grounding
 from utils import logger
 # — Configuration ————————————————————————————————————
+MODEL_ID = "HuggingFaceTB/SmolLM3-3B"
 MAX_NEW_TOKENS = 1024
 TEMPERATURE = 0.7
     # Combine chunks into a single context block
     context = "\n\n".join(context_chunks)
+    # Truncate if too long
     max_context_chars = 6000
     if len(context) > max_context_chars:
         context = context[:max_context_chars]
     client = _get_client()
+    # Call the model using text_generation
+    response = client.text_generation(
+        prompt=_format_prompt(SYSTEM_PROMPT, user_message),
         model=MODEL_ID,
+        max_new_tokens=MAX_NEW_TOKENS,
         temperature=TEMPERATURE,
         top_p=0.9,
+        do_sample=True,
     )
+    script = response.strip()
     if not script:
         raise RuntimeError("The model returned an empty script. Please try again.")
     logger.info("Script generated: %d characters", len(script))
+    return script
+def _format_prompt(system: str, user: str) -> str:
+    """
+    Format a prompt for SmolLM3 instruction following.
+    Uses a simple system + user format.
+    """
+    return f"<|im_start|>system\n{system}<|im_end|>\n<|im_start|>user\n{user}<|im_end|>\n<|im_start|>assistant\n"