github-actions[bot] commited on
Commit
561adc1
Β·
1 Parent(s): 82e7376

deploy: switch to chatterbox requirements @ 1e57710

Browse files
Files changed (2) hide show
  1. steps/lang/_shared.py +1 -1
  2. steps/s3_translate.py +64 -24
steps/lang/_shared.py CHANGED
@@ -10,7 +10,7 @@ from dotenv import load_dotenv
10
  load_dotenv()
11
 
12
  POLLINATIONS_BASE = "https://gen.pollinations.ai/v1"
13
- MODEL = os.getenv("POLLEN_MODEL", "gemini-search")
14
 
15
 
16
  def build_client() -> OpenAI:
 
10
  load_dotenv()
11
 
12
  POLLINATIONS_BASE = "https://gen.pollinations.ai/v1"
13
+ MODEL = os.getenv("POLLEN_MODEL", "openai-large")
14
 
15
 
16
  def build_client() -> OpenAI:
steps/s3_translate.py CHANGED
@@ -21,30 +21,70 @@ def _translate_batch(segments: list[dict], target_language: str) -> list[dict]:
21
 
22
  # Default prompt (generic, works for most languages)
23
  default_prompt = (
24
- f"You are a professional voice-over translator. "
25
- f"Translate the following numbered lines into {target_language}.\n\n"
26
- f"CRITICAL β€” DURATION CONSTRAINT:\n"
27
- f"Each line shows its spoken duration in brackets (e.g. [4.6s]). "
28
- f"The translation will be spoken by TTS and MUST fit within that duration.\n"
29
- f"STRICT RULE: Your translation MUST have FEWER words than the original English. "
30
- f"If the English has 10 words, aim for 7-8 words maximum.\n"
31
- f"Every word must earn its place β€” if removing a word doesn't lose core meaning, remove it. "
32
- f"Paraphrase aggressively. Use shorter synonyms. Merge clauses. "
33
- f"A concise translation that fits the time is ALWAYS better than a complete one that overflows.\n\n"
34
- f"TTS COMPATIBILITY β€” IMPORTANT:\n"
35
- f"The TTS model struggles with long sentences that have multiple commas or clauses. "
36
- f"Restructure into short, direct sentences β€” but the TOTAL text must still fit the duration shown in brackets. "
37
- f"Do NOT add extra words or content when restructuring. The goal is simpler phrasing, not more text.\n"
38
- f"AVOID: 'She found her husband, the true owner of her heart, in her third marriage at age 40.'\n"
39
- f"PREFER: 'She found her true love. It was her third marriage, at age 40.'\n"
40
- f"Each output line is still ONE item in the array (one per input line). "
41
- f"You may use multiple short sentences within that single line, but it must all fit the original duration.\n\n"
42
- f"TRANSLATION PURITY:\n"
43
- f"Do NOT leave any English words in the translation. Translate ALL interjections and filler words (Oh, Ah, Hmm, Well, So, Right, etc.) into {target_language}.\n\n"
44
- f"Return ONLY a JSON array of translated strings, in order, no extra text. "
45
- f"Do NOT include the duration prefix or numbering in the output β€” only the translated text itself. "
46
- f'Example input: 1. [3.0s] Hello\n2. [2.5s] Goodbye '
47
- f'Example output: ["Hola", "AdiΓ³s"]'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  )
49
 
50
  # Let language-specific handler override the prompt if needed
 
21
 
22
  # Default prompt (generic, works for most languages)
23
  default_prompt = (
24
+ f"You are a voice-over dubbing writer β€” not a translator. "
25
+ f"Your job is to write what a native {target_language} speaker would *actually say out loud* "
26
+ f"in a casual, natural conversation. Forget the source words. Capture the meaning, tone, and energy.\n\n"
27
+
28
+ f"INPUT FORMAT:\n"
29
+ f"Numbered lines with a spoken duration in brackets, e.g.: 1. [4.6s] Hello there\n\n"
30
+
31
+ f"OUTPUT FORMAT:\n"
32
+ f"A JSON array of strings β€” one per input line, in order. No numbering, no brackets, no extra text.\n"
33
+ f'Example: ["ΒΏQuΓ© tal?", "AdiΓ³s"]\n\n'
34
+
35
+ f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
36
+ f"SCORING RUBRIC β€” evaluate every line against these before outputting:\n"
37
+ f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
38
+
39
+ f"[1] NATURALNESS β€” weight: HIGH\n"
40
+ f" Would a native speaker actually say this in real life?\n"
41
+ f" βœ— Fail: dictionary phrasing, formal register, textbook grammar\n"
42
+ f" βœ“ Pass: contractions, colloquial rhythm, everyday vocabulary\n"
43
+ f" Ask yourself: 'Would I hear this in a TV show or on the street?' If no β†’ rewrite.\n\n"
44
+
45
+ f"[2] SPOKEN FIT β€” weight: CRITICAL\n"
46
+ f" The line will be read by TTS within the duration shown in brackets.\n"
47
+ f" Fewer words is almost always safer. Aim for 70–80% of the original word count.\n"
48
+ f" βœ— Fail: translation is longer or same length as the English\n"
49
+ f" βœ“ Pass: shorter, with no loss of core meaning or emotional tone\n"
50
+ f" Trick: cut filler, merge ideas, use contractions and short-form spoken words.\n\n"
51
+
52
+ f"[3] TTS READABILITY β€” weight: HIGH\n"
53
+ f" Long sentences with multiple commas trip up TTS engines.\n"
54
+ f" βœ— Fail: 'She met him, her true love, on a rainy evening, in the city she once fled.'\n"
55
+ f" βœ“ Pass: 'She met him on a rainy evening. Her true love. In the city she once fled.'\n"
56
+ f" Short beats. Natural pauses. Each sentence punches clean.\n\n"
57
+
58
+ f"[4] EMOTIONAL REGISTER β€” weight: HIGH\n"
59
+ f" Match the tone of the original: casual, urgent, tender, funny, sarcastic β€” whatever it is.\n"
60
+ f" βœ— Fail: a sarcastic line becomes polite; a tender moment becomes clinical\n"
61
+ f" βœ“ Pass: the emotional texture is preserved even if the words are completely different\n\n"
62
+
63
+ f"[5] TRANSLATION PURITY β€” weight: MEDIUM\n"
64
+ f" Zero English words in the output. None.\n"
65
+ f" This includes: filler words (Oh, Hmm, Well, So, Right), names used as exclamations, "
66
+ f"brand-style interjections. Find the {target_language} equivalent every time.\n"
67
+ f" βœ— Fail: 'Oh, lo siento' / 'Hmm, je ne sais pas'\n"
68
+ f" βœ“ Pass: 'Ay, lo siento' / 'Eh bien, je ne sais pas'\n\n"
69
+
70
+ f"[6] WORD-FOR-WORD TRAP β€” weight: HIGH (avoid this)\n"
71
+ f" Do NOT translate word by word. No one speaks that way.\n"
72
+ f" βœ— Fail (Spanish): 'Ella encontrΓ³ a su esposo verdadero en su tercer matrimonio a los 40 aΓ±os'\n"
73
+ f" βœ“ Pass (Spanish): 'A los 40, en su tercer intento, encontrΓ³ al amor de su vida'\n"
74
+ f" Restructure freely. The target language has its own natural word order β€” use it.\n\n"
75
+
76
+ f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
77
+ f"BEFORE RETURNING OUTPUT:\n"
78
+ f"For each line, silently run this checklist:\n"
79
+ f" β–‘ Would a native speaker say this naturally out loud?\n"
80
+ f" β–‘ Is it shorter than the English original?\n"
81
+ f" β–‘ Are there any commas that create awkward TTS pauses? β†’ break into short sentences\n"
82
+ f" β–‘ Does the emotional tone match?\n"
83
+ f" β–‘ Are there any English words hiding in the output?\n"
84
+ f"If any box fails β†’ rewrite that line. Then output.\n"
85
+ f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
86
+
87
+ f"Return ONLY the JSON array. No preamble, no explanation, no duration prefixes."
88
  )
89
 
90
  # Let language-specific handler override the prompt if needed