Spaces:
Sleeping
Sleeping
Update script_gen.py
Browse files- script_gen.py +20 -13
script_gen.py
CHANGED
|
@@ -2,10 +2,11 @@
|
|
| 2 |
VoiceVerse AI β Script Generation Module.
|
| 3 |
|
| 4 |
Generates spoken-style scripts from retrieved document chunks
|
| 5 |
-
using
|
| 6 |
|
| 7 |
Design decisions:
|
| 8 |
-
- Serverless HF Inference API avoids loading a
|
|
|
|
| 9 |
- Prompt template enforces podcast/narration structure
|
| 10 |
- Max 1024 new tokens keeps scripts a reasonable length for TTS
|
| 11 |
- Temperature 0.7 balances creativity with factual grounding
|
|
@@ -16,7 +17,7 @@ from huggingface_hub import InferenceClient
|
|
| 16 |
from utils import logger
|
| 17 |
|
| 18 |
# β Configuration ββββββββββββββββββββββββββββββββββββ
|
| 19 |
-
MODEL_ID = "
|
| 20 |
MAX_NEW_TOKENS = 1024
|
| 21 |
TEMPERATURE = 0.7
|
| 22 |
|
|
@@ -80,7 +81,7 @@ def generate_script(
|
|
| 80 |
# Combine chunks into a single context block
|
| 81 |
context = "\n\n".join(context_chunks)
|
| 82 |
|
| 83 |
-
# Truncate if too long
|
| 84 |
max_context_chars = 6000
|
| 85 |
if len(context) > max_context_chars:
|
| 86 |
context = context[:max_context_chars]
|
|
@@ -94,22 +95,28 @@ def generate_script(
|
|
| 94 |
|
| 95 |
client = _get_client()
|
| 96 |
|
| 97 |
-
# Call the model using
|
| 98 |
-
response = client.
|
|
|
|
| 99 |
model=MODEL_ID,
|
| 100 |
-
|
| 101 |
-
{"role": "system", "content": SYSTEM_PROMPT},
|
| 102 |
-
{"role": "user", "content": user_message},
|
| 103 |
-
],
|
| 104 |
-
max_tokens=MAX_NEW_TOKENS,
|
| 105 |
temperature=TEMPERATURE,
|
| 106 |
top_p=0.9,
|
|
|
|
| 107 |
)
|
| 108 |
|
| 109 |
-
script = response.
|
| 110 |
|
| 111 |
if not script:
|
| 112 |
raise RuntimeError("The model returned an empty script. Please try again.")
|
| 113 |
|
| 114 |
logger.info("Script generated: %d characters", len(script))
|
| 115 |
-
return script
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
VoiceVerse AI β Script Generation Module.
|
| 3 |
|
| 4 |
Generates spoken-style scripts from retrieved document chunks
|
| 5 |
+
using SmolLM3-3B via the Hugging Face Inference API.
|
| 6 |
|
| 7 |
Design decisions:
|
| 8 |
+
- Serverless HF Inference API avoids loading a large model locally
|
| 9 |
+
- SmolLM3-3B is deployed on the free hf-inference provider
|
| 10 |
- Prompt template enforces podcast/narration structure
|
| 11 |
- Max 1024 new tokens keeps scripts a reasonable length for TTS
|
| 12 |
- Temperature 0.7 balances creativity with factual grounding
|
|
|
|
| 17 |
from utils import logger
|
| 18 |
|
| 19 |
# β Configuration ββββββββββββββββββββββββββββββββββββ
|
| 20 |
+
MODEL_ID = "HuggingFaceTB/SmolLM3-3B"
|
| 21 |
MAX_NEW_TOKENS = 1024
|
| 22 |
TEMPERATURE = 0.7
|
| 23 |
|
|
|
|
| 81 |
# Combine chunks into a single context block
|
| 82 |
context = "\n\n".join(context_chunks)
|
| 83 |
|
| 84 |
+
# Truncate if too long
|
| 85 |
max_context_chars = 6000
|
| 86 |
if len(context) > max_context_chars:
|
| 87 |
context = context[:max_context_chars]
|
|
|
|
| 95 |
|
| 96 |
client = _get_client()
|
| 97 |
|
| 98 |
+
# Call the model using text_generation
|
| 99 |
+
response = client.text_generation(
|
| 100 |
+
prompt=_format_prompt(SYSTEM_PROMPT, user_message),
|
| 101 |
model=MODEL_ID,
|
| 102 |
+
max_new_tokens=MAX_NEW_TOKENS,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
temperature=TEMPERATURE,
|
| 104 |
top_p=0.9,
|
| 105 |
+
do_sample=True,
|
| 106 |
)
|
| 107 |
|
| 108 |
+
script = response.strip()
|
| 109 |
|
| 110 |
if not script:
|
| 111 |
raise RuntimeError("The model returned an empty script. Please try again.")
|
| 112 |
|
| 113 |
logger.info("Script generated: %d characters", len(script))
|
| 114 |
+
return script
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def _format_prompt(system: str, user: str) -> str:
|
| 118 |
+
"""
|
| 119 |
+
Format a prompt for SmolLM3 instruction following.
|
| 120 |
+
Uses a simple system + user format.
|
| 121 |
+
"""
|
| 122 |
+
return f"<|im_start|>system\n{system}<|im_end|>\n<|im_start|>user\n{user}<|im_end|>\n<|im_start|>assistant\n"
|