Spaces:
Running on Zero
Running on Zero
github-actions[bot] commited on
Commit Β·
561adc1
1
Parent(s): 82e7376
deploy: switch to chatterbox requirements @ 1e57710
Browse files- steps/lang/_shared.py +1 -1
- steps/s3_translate.py +64 -24
steps/lang/_shared.py
CHANGED
|
@@ -10,7 +10,7 @@ from dotenv import load_dotenv
|
|
| 10 |
load_dotenv()
|
| 11 |
|
| 12 |
POLLINATIONS_BASE = "https://gen.pollinations.ai/v1"
|
| 13 |
-
MODEL = os.getenv("POLLEN_MODEL", "
|
| 14 |
|
| 15 |
|
| 16 |
def build_client() -> OpenAI:
|
|
|
|
| 10 |
load_dotenv()
|
| 11 |
|
| 12 |
POLLINATIONS_BASE = "https://gen.pollinations.ai/v1"
|
| 13 |
+
MODEL = os.getenv("POLLEN_MODEL", "openai-large")
|
| 14 |
|
| 15 |
|
| 16 |
def build_client() -> OpenAI:
|
steps/s3_translate.py
CHANGED
|
@@ -21,30 +21,70 @@ def _translate_batch(segments: list[dict], target_language: str) -> list[dict]:
|
|
| 21 |
|
| 22 |
# Default prompt (generic, works for most languages)
|
| 23 |
default_prompt = (
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
)
|
| 49 |
|
| 50 |
# Let language-specific handler override the prompt if needed
|
|
|
|
| 21 |
|
| 22 |
# Default prompt (generic, works for most languages)
|
| 23 |
default_prompt = (
|
| 24 |
+
f"You are a voice-over dubbing writer β not a translator. "
|
| 25 |
+
f"Your job is to write what a native {target_language} speaker would *actually say out loud* "
|
| 26 |
+
f"in a casual, natural conversation. Forget the source words. Capture the meaning, tone, and energy.\n\n"
|
| 27 |
+
|
| 28 |
+
f"INPUT FORMAT:\n"
|
| 29 |
+
f"Numbered lines with a spoken duration in brackets, e.g.: 1. [4.6s] Hello there\n\n"
|
| 30 |
+
|
| 31 |
+
f"OUTPUT FORMAT:\n"
|
| 32 |
+
f"A JSON array of strings β one per input line, in order. No numbering, no brackets, no extra text.\n"
|
| 33 |
+
f'Example: ["ΒΏQuΓ© tal?", "AdiΓ³s"]\n\n'
|
| 34 |
+
|
| 35 |
+
f"βββββββββββββββββββββββββββββββββββββ\n"
|
| 36 |
+
f"SCORING RUBRIC β evaluate every line against these before outputting:\n"
|
| 37 |
+
f"βββββββββββββββββββββββββββββββββββββ\n\n"
|
| 38 |
+
|
| 39 |
+
f"[1] NATURALNESS β weight: HIGH\n"
|
| 40 |
+
f" Would a native speaker actually say this in real life?\n"
|
| 41 |
+
f" β Fail: dictionary phrasing, formal register, textbook grammar\n"
|
| 42 |
+
f" β Pass: contractions, colloquial rhythm, everyday vocabulary\n"
|
| 43 |
+
f" Ask yourself: 'Would I hear this in a TV show or on the street?' If no β rewrite.\n\n"
|
| 44 |
+
|
| 45 |
+
f"[2] SPOKEN FIT β weight: CRITICAL\n"
|
| 46 |
+
f" The line will be read by TTS within the duration shown in brackets.\n"
|
| 47 |
+
f" Fewer words is almost always safer. Aim for 70β80% of the original word count.\n"
|
| 48 |
+
f" β Fail: translation is longer or same length as the English\n"
|
| 49 |
+
f" β Pass: shorter, with no loss of core meaning or emotional tone\n"
|
| 50 |
+
f" Trick: cut filler, merge ideas, use contractions and short-form spoken words.\n\n"
|
| 51 |
+
|
| 52 |
+
f"[3] TTS READABILITY β weight: HIGH\n"
|
| 53 |
+
f" Long sentences with multiple commas trip up TTS engines.\n"
|
| 54 |
+
f" β Fail: 'She met him, her true love, on a rainy evening, in the city she once fled.'\n"
|
| 55 |
+
f" β Pass: 'She met him on a rainy evening. Her true love. In the city she once fled.'\n"
|
| 56 |
+
f" Short beats. Natural pauses. Each sentence punches clean.\n\n"
|
| 57 |
+
|
| 58 |
+
f"[4] EMOTIONAL REGISTER β weight: HIGH\n"
|
| 59 |
+
f" Match the tone of the original: casual, urgent, tender, funny, sarcastic β whatever it is.\n"
|
| 60 |
+
f" β Fail: a sarcastic line becomes polite; a tender moment becomes clinical\n"
|
| 61 |
+
f" β Pass: the emotional texture is preserved even if the words are completely different\n\n"
|
| 62 |
+
|
| 63 |
+
f"[5] TRANSLATION PURITY β weight: MEDIUM\n"
|
| 64 |
+
f" Zero English words in the output. None.\n"
|
| 65 |
+
f" This includes: filler words (Oh, Hmm, Well, So, Right), names used as exclamations, "
|
| 66 |
+
f"brand-style interjections. Find the {target_language} equivalent every time.\n"
|
| 67 |
+
f" β Fail: 'Oh, lo siento' / 'Hmm, je ne sais pas'\n"
|
| 68 |
+
f" β Pass: 'Ay, lo siento' / 'Eh bien, je ne sais pas'\n\n"
|
| 69 |
+
|
| 70 |
+
f"[6] WORD-FOR-WORD TRAP β weight: HIGH (avoid this)\n"
|
| 71 |
+
f" Do NOT translate word by word. No one speaks that way.\n"
|
| 72 |
+
f" β Fail (Spanish): 'Ella encontrΓ³ a su esposo verdadero en su tercer matrimonio a los 40 aΓ±os'\n"
|
| 73 |
+
f" β Pass (Spanish): 'A los 40, en su tercer intento, encontrΓ³ al amor de su vida'\n"
|
| 74 |
+
f" Restructure freely. The target language has its own natural word order β use it.\n\n"
|
| 75 |
+
|
| 76 |
+
f"βββββββββββββββββββββββββββββββββββββ\n"
|
| 77 |
+
f"BEFORE RETURNING OUTPUT:\n"
|
| 78 |
+
f"For each line, silently run this checklist:\n"
|
| 79 |
+
f" β‘ Would a native speaker say this naturally out loud?\n"
|
| 80 |
+
f" β‘ Is it shorter than the English original?\n"
|
| 81 |
+
f" β‘ Are there any commas that create awkward TTS pauses? β break into short sentences\n"
|
| 82 |
+
f" β‘ Does the emotional tone match?\n"
|
| 83 |
+
f" β‘ Are there any English words hiding in the output?\n"
|
| 84 |
+
f"If any box fails β rewrite that line. Then output.\n"
|
| 85 |
+
f"βββββββββββββββββββββββββββββββββββββ\n\n"
|
| 86 |
+
|
| 87 |
+
f"Return ONLY the JSON array. No preamble, no explanation, no duration prefixes."
|
| 88 |
)
|
| 89 |
|
| 90 |
# Let language-specific handler override the prompt if needed
|