Isshi14 commited on
Commit
620c87c
Β·
verified Β·
1 Parent(s): 09c6c01

Update script_gen.py

Browse files
Files changed (1) hide show
  1. script_gen.py +20 -13
script_gen.py CHANGED
@@ -2,10 +2,11 @@
2
  VoiceVerse AI β€” Script Generation Module.
3
 
4
  Generates spoken-style scripts from retrieved document chunks
5
- using Mistral-7B-Instruct via the Hugging Face Inference API.
6
 
7
  Design decisions:
8
- - Serverless HF Inference API avoids loading a 14 GB model locally
 
9
  - Prompt template enforces podcast/narration structure
10
  - Max 1024 new tokens keeps scripts a reasonable length for TTS
11
  - Temperature 0.7 balances creativity with factual grounding
@@ -16,7 +17,7 @@ from huggingface_hub import InferenceClient
16
  from utils import logger
17
 
18
  # β€” Configuration β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
19
- MODEL_ID = "HuggingFaceH4/zephyr-7b-beta"
20
  MAX_NEW_TOKENS = 1024
21
  TEMPERATURE = 0.7
22
 
@@ -80,7 +81,7 @@ def generate_script(
80
  # Combine chunks into a single context block
81
  context = "\n\n".join(context_chunks)
82
 
83
- # Truncate if too long (Mistral context window is 32k, but we want focused input)
84
  max_context_chars = 6000
85
  if len(context) > max_context_chars:
86
  context = context[:max_context_chars]
@@ -94,22 +95,28 @@ def generate_script(
94
 
95
  client = _get_client()
96
 
97
- # Call the model using chat completion (conversational task)
98
- response = client.chat_completion(
 
99
  model=MODEL_ID,
100
- messages=[
101
- {"role": "system", "content": SYSTEM_PROMPT},
102
- {"role": "user", "content": user_message},
103
- ],
104
- max_tokens=MAX_NEW_TOKENS,
105
  temperature=TEMPERATURE,
106
  top_p=0.9,
 
107
  )
108
 
109
- script = response.choices[0].message.content.strip()
110
 
111
  if not script:
112
  raise RuntimeError("The model returned an empty script. Please try again.")
113
 
114
  logger.info("Script generated: %d characters", len(script))
115
- return script
 
 
 
 
 
 
 
 
 
2
  VoiceVerse AI β€” Script Generation Module.
3
 
4
  Generates spoken-style scripts from retrieved document chunks
5
+ using SmolLM3-3B via the Hugging Face Inference API.
6
 
7
  Design decisions:
8
+ - Serverless HF Inference API avoids loading a large model locally
9
+ - SmolLM3-3B is deployed on the free hf-inference provider
10
  - Prompt template enforces podcast/narration structure
11
  - Max 1024 new tokens keeps scripts a reasonable length for TTS
12
  - Temperature 0.7 balances creativity with factual grounding
 
17
  from utils import logger
18
 
19
  # β€” Configuration β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
20
+ MODEL_ID = "HuggingFaceTB/SmolLM3-3B"
21
  MAX_NEW_TOKENS = 1024
22
  TEMPERATURE = 0.7
23
 
 
81
  # Combine chunks into a single context block
82
  context = "\n\n".join(context_chunks)
83
 
84
+ # Truncate if too long
85
  max_context_chars = 6000
86
  if len(context) > max_context_chars:
87
  context = context[:max_context_chars]
 
95
 
96
  client = _get_client()
97
 
98
+ # Call the model using text_generation
99
+ response = client.text_generation(
100
+ prompt=_format_prompt(SYSTEM_PROMPT, user_message),
101
  model=MODEL_ID,
102
+ max_new_tokens=MAX_NEW_TOKENS,
 
 
 
 
103
  temperature=TEMPERATURE,
104
  top_p=0.9,
105
+ do_sample=True,
106
  )
107
 
108
+ script = response.strip()
109
 
110
  if not script:
111
  raise RuntimeError("The model returned an empty script. Please try again.")
112
 
113
  logger.info("Script generated: %d characters", len(script))
114
+ return script
115
+
116
+
117
+ def _format_prompt(system: str, user: str) -> str:
118
+ """
119
+ Format a prompt for SmolLM3 instruction following.
120
+ Uses a simple system + user format.
121
+ """
122
+ return f"<|im_start|>system\n{system}<|im_end|>\n<|im_start|>user\n{user}<|im_end|>\n<|im_start|>assistant\n"