Isshi14 commited on
Commit
94a026e
Β·
verified Β·
1 Parent(s): 71b59d0

Update script_gen.py

Browse files
Files changed (1) hide show
  1. script_gen.py +76 -22
script_gen.py CHANGED
@@ -9,43 +9,91 @@ Design decisions:
9
  - SmolLM3-3B is deployed on the free hf-inference provider
10
  - Prompt template enforces podcast/narration structure
11
  - Max 1024 new tokens keeps scripts a reasonable length for TTS
12
- - Temperature 0.7 balances creativity with factual grounding
 
13
  """
14
 
15
  import os
 
16
  from huggingface_hub import InferenceClient
17
  from utils import logger
18
 
19
  # β€” Configuration β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
20
  MODEL_ID = "HuggingFaceTB/SmolLM3-3B"
21
  MAX_NEW_TOKENS = 1024
22
- TEMPERATURE = 0.7
23
 
24
  # β€” Prompt Template β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
25
 
26
- SYSTEM_PROMPT = """You are a professional podcast script writer. Your job is to transform document content into an engaging spoken script for a podcast-style narration.
27
 
28
- Rules you MUST follow:
29
- 1. Write ONLY for listening, not reading. Use conversational, natural language.
30
- 2. Structure the script with: a compelling hook/introduction, the main narrative, and a clear closing/takeaway.
31
- 3. Use short sentences. Vary sentence length for rhythm.
32
- 4. Include natural transitions like "Now, here's where it gets interesting..." or "Let's break this down..."
33
- 5. Add brief pauses indicated by "..." where the speaker should pause for effect.
34
- 6. NEVER include headers, bullet points, or any visual formatting.
35
- 7. NEVER mention "the document" or "the text" β€” speak as if you're the expert sharing knowledge.
36
- 8. Ground ALL content strictly in the provided context. Do NOT invent facts.
37
- 9. If information is insufficient, acknowledge it naturally: "There's still much to explore here..."
38
- 10. Keep the tone warm, engaging, and authoritative β€” like a great podcast host."""
39
 
40
- USER_PROMPT_TEMPLATE = """Based on the following content, create an engaging spoken script for a podcast-style narration.
41
 
42
- --- CONTENT FROM DOCUMENT ---
43
  {context}
44
- --- END OF CONTENT ---
45
 
46
- Topic focus: {topic}
47
 
48
- Write the complete spoken script now. Remember: this will be read aloud, so make it sound natural and engaging."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
 
51
  # β€” Script Generation β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
@@ -110,10 +158,16 @@ def generate_script(
110
  top_p=0.9,
111
  )
112
 
113
- script = response.choices[0].message.content.strip()
114
 
115
- if not script:
116
  raise RuntimeError("The model returned an empty script. Please try again.")
117
 
118
- logger.info("Script generated: %d characters", len(script))
 
 
 
 
 
 
119
  return script
 
9
  - SmolLM3-3B is deployed on the free hf-inference provider
10
  - Prompt template enforces podcast/narration structure
11
  - Max 1024 new tokens keeps scripts a reasonable length for TTS
12
+ - Temperature 0.4 keeps output grounded and factual
13
+ - Post-processing strips markdown/XML artifacts for clean TTS
14
  """
15
 
16
  import os
17
+ import re
18
  from huggingface_hub import InferenceClient
19
  from utils import logger
20
 
21
  # β€” Configuration β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
22
  MODEL_ID = "HuggingFaceTB/SmolLM3-3B"
23
  MAX_NEW_TOKENS = 1024
24
+ TEMPERATURE = 0.4
25
 
26
  # β€” Prompt Template β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
27
 
28
+ SYSTEM_PROMPT = """You are a podcast script writer. Convert the provided document content into a spoken narration script.
29
 
30
+ CRITICAL RULES:
31
+ 1. ONLY use facts, ideas, and information that appear in the provided content. Do NOT add any outside knowledge or make up details.
32
+ 2. Write in plain text only. No markdown, no headers, no bullet points, no asterisks, no hashtags, no HTML tags, no XML tags.
33
+ 3. Write naturally as if speaking aloud. Use short sentences and conversational language.
34
+ 4. Structure: brief intro, main points from the content, brief conclusion.
35
+ 5. Never say "the document says" or "according to the text". Speak as if you are the expert.
36
+ 6. If the content is limited, keep the script short rather than inventing information.
37
+ 7. Do NOT use any special formatting or tags like <think>, **, ##, etc.
38
+ 8. Output ONLY the spoken script text, nothing else."""
 
 
39
 
40
+ USER_PROMPT_TEMPLATE = """Here is the document content to convert into a spoken podcast script:
41
 
42
+ --- CONTENT ---
43
  {context}
44
+ --- END ---
45
 
46
+ Topic: {topic}
47
 
48
+ Now write ONLY the spoken script based strictly on the content above. Do not add information that is not in the content."""
49
+
50
+
51
+ # β€” Post-processing β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
52
+
53
+ def _clean_script_for_tts(text: str) -> str:
54
+ """
55
+ Remove markdown, XML/HTML tags, and other artifacts that would be
56
+ read aloud by TTS engines.
57
+ """
58
+ # Remove <think>...</think> blocks entirely (SmolLM3 reasoning traces)
59
+ text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
60
+
61
+ # Remove any remaining XML/HTML-style tags
62
+ text = re.sub(r'<[^>]+>', '', text)
63
+
64
+ # Remove markdown headers (# ## ### etc.)
65
+ text = re.sub(r'^#{1,6}\s+', '', text, flags=re.MULTILINE)
66
+
67
+ # Remove markdown bold/italic markers
68
+ text = re.sub(r'\*{1,3}([^*]+)\*{1,3}', r'\1', text)
69
+ text = re.sub(r'_{1,3}([^_]+)_{1,3}', r'\1', text)
70
+
71
+ # Remove markdown links [text](url) -> text
72
+ text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text)
73
+
74
+ # Remove markdown code blocks and inline code
75
+ text = re.sub(r'```[^`]*```', '', text, flags=re.DOTALL)
76
+ text = re.sub(r'`([^`]+)`', r'\1', text)
77
+
78
+ # Remove bullet point markers
79
+ text = re.sub(r'^[\s]*[-*+]\s+', '', text, flags=re.MULTILINE)
80
+
81
+ # Remove numbered list markers
82
+ text = re.sub(r'^[\s]*\d+\.\s+', '', text, flags=re.MULTILINE)
83
+
84
+ # Remove blockquote markers
85
+ text = re.sub(r'^>\s+', '', text, flags=re.MULTILINE)
86
+
87
+ # Remove horizontal rules
88
+ text = re.sub(r'^[-*_]{3,}\s*$', '', text, flags=re.MULTILINE)
89
+
90
+ # Collapse multiple newlines into one
91
+ text = re.sub(r'\n{3,}', '\n\n', text)
92
+
93
+ # Collapse multiple spaces
94
+ text = re.sub(r' {2,}', ' ', text)
95
+
96
+ return text.strip()
97
 
98
 
99
  # β€” Script Generation β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
 
158
  top_p=0.9,
159
  )
160
 
161
+ raw_script = response.choices[0].message.content.strip()
162
 
163
+ if not raw_script:
164
  raise RuntimeError("The model returned an empty script. Please try again.")
165
 
166
+ # Clean the script for TTS (remove markdown, XML tags, etc.)
167
+ script = _clean_script_for_tts(raw_script)
168
+
169
+ if not script:
170
+ raise RuntimeError("Script was empty after cleaning. Please try again.")
171
+
172
+ logger.info("Script generated: %d chars (raw: %d chars)", len(script), len(raw_script))
173
  return script