github-actions[bot] commited on
Commit
96e0666
·
1 Parent(s): 1fa323e

deploy: switch to chatterbox requirements @ 0fae627

Browse files
packages.txt CHANGED
@@ -1,2 +1,4 @@
1
  ffmpeg
2
  nodejs
 
 
 
1
  ffmpeg
2
  nodejs
3
+ fonts-noto-core
4
+ fonts-noto-cjk
server.py CHANGED
@@ -302,12 +302,24 @@ def _queue_status_for(job_id: str) -> str | None:
302
 
303
 
304
  def _config_languages() -> list[str]:
305
- """Expose supported language names from the pipeline."""
306
  from pipeline import LANGUAGE_CODES
307
 
308
  return list(LANGUAGE_CODES.keys())
309
 
310
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  async def _artifact_reaper_loop():
312
  """Delete stale per-job artifact directories from ARTIFACTS_ROOT."""
313
  while True:
@@ -563,6 +575,8 @@ async def config():
563
  "max_file_size_mb": MAX_FILE_SIZE_MB,
564
  "max_duration_sec": MAX_DURATION_SEC,
565
  "languages": _config_languages(),
 
 
566
  "tts_models": [TTS_ENGINE],
567
  "tts_engine": TTS_ENGINE,
568
  }
 
302
 
303
 
304
  def _config_languages() -> list[str]:
305
+ """Expose supported language names from the pipeline (Chatterbox set)."""
306
  from pipeline import LANGUAGE_CODES
307
 
308
  return list(LANGUAGE_CODES.keys())
309
 
310
 
311
+ def _chatterbox_language_options() -> list[dict]:
312
+ from pipeline import LANGUAGE_CODES
313
+
314
+ return [{"name": name, "code": code} for name, code in LANGUAGE_CODES.items()]
315
+
316
+
317
+ def _omnivoice_language_options() -> list[dict]:
318
+ from steps.lang.omnivoice_languages import OMNIVOICE_LANGUAGE_CODES
319
+
320
+ return [{"name": name, "code": code} for name, code in OMNIVOICE_LANGUAGE_CODES.items()]
321
+
322
+
323
  async def _artifact_reaper_loop():
324
  """Delete stale per-job artifact directories from ARTIFACTS_ROOT."""
325
  while True:
 
575
  "max_file_size_mb": MAX_FILE_SIZE_MB,
576
  "max_duration_sec": MAX_DURATION_SEC,
577
  "languages": _config_languages(),
578
+ "chatterbox_languages": _chatterbox_language_options(),
579
+ "omnivoice_languages": _omnivoice_language_options(),
580
  "tts_models": [TTS_ENGINE],
581
  "tts_engine": TTS_ENGINE,
582
  }
steps/lang/omnivoice_languages.py ADDED
@@ -0,0 +1,652 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTO-GENERATED from k2-fsa/OmniVoice omnivoice/utils/lang_map.py
2
+ # Source: https://github.com/k2-fsa/OmniVoice/blob/master/omnivoice/utils/lang_map.py
3
+ """Omnivoice-supported languages (display name -> Omnivoice language id)."""
4
+
5
+ OMNIVOICE_LANGUAGE_CODES: dict[str, str] = {
6
+ "Abadi": "kbt",
7
+ "Abkhazian": "ab",
8
+ "Abron": "abr",
9
+ "Abua": "abn",
10
+ "Adamawa Fulfulde": "fub",
11
+ "Adyghe": "ady",
12
+ "Afade": "aal",
13
+ "Afrikaans": "af",
14
+ "Agwagwune": "yay",
15
+ "Aja (Benin)": "ajg",
16
+ "Akebu": "keu",
17
+ "Alago": "ala",
18
+ "Albanian": "sq",
19
+ "Algerian Arabic": "arq",
20
+ "Algerian Saharan Arabic": "aao",
21
+ "Ambo-Pasco Quechua": "qva",
22
+ "Ambonese Malay": "abs",
23
+ "Amdo Tibetan": "adx",
24
+ "Amharic": "am",
25
+ "Anaang": "anw",
26
+ "Angika": "anp",
27
+ "Antankarana Malagasy": "xmv",
28
+ "Aragonese": "an",
29
+ "Arbëreshë Albanian": "aae",
30
+ "Arequipa-La Unión Quechua": "qxu",
31
+ "Armenian": "hy",
32
+ "Ashe": "ahs",
33
+ "Ashéninka Perené": "prq",
34
+ "Askopan": "eiv",
35
+ "Assamese": "as",
36
+ "Asturian": "ast",
37
+ "Atayal": "tay",
38
+ "Awak": "awo",
39
+ "Ayacucho Quechua": "quy",
40
+ "Azerbaijani": "az",
41
+ "Baatonum": "bba",
42
+ "Bacama": "bcy",
43
+ "Bade": "bde",
44
+ "Bafia": "ksf",
45
+ "Bafut": "bfd",
46
+ "Bagirmi Fulfulde": "fui",
47
+ "Bago-Kusuntu": "bqg",
48
+ "Baharna Arabic": "abv",
49
+ "Bakoko": "bkh",
50
+ "Balanta-Ganja": "bjt",
51
+ "Balti": "bft",
52
+ "Bamenyam": "bce",
53
+ "Bamun": "bax",
54
+ "Bangwinji": "bsj",
55
+ "Banjar": "bjn",
56
+ "Bankon": "abb",
57
+ "Baoulé": "bci",
58
+ "Bara Malagasy": "bhr",
59
+ "Barok": "bjk",
60
+ "Basa (Cameroon)": "bas",
61
+ "Basa (Nigeria)": "bzw",
62
+ "Bashkir": "ba",
63
+ "Basque": "eu",
64
+ "Batak Mandailing": "btm",
65
+ "Batanga": "bnm",
66
+ "Bateri": "btv",
67
+ "Bats": "bbl",
68
+ "Bayot": "bda",
69
+ "Bebele": "beb",
70
+ "Belarusian": "be",
71
+ "Bengali": "bn",
72
+ "Betawi": "bew",
73
+ "Bhili": "bhb",
74
+ "Bhojpuri": "bho",
75
+ "Bilur": "bxf",
76
+ "Bima": "bhp",
77
+ "Bodo": "brx",
78
+ "Boghom": "bux",
79
+ "Bokyi": "bky",
80
+ "Bomu": "bmq",
81
+ "Bondei": "bou",
82
+ "Borgu Fulfulde": "fue",
83
+ "Bosnian": "bs",
84
+ "Brahui": "brh",
85
+ "Braj": "bra",
86
+ "Breton": "br",
87
+ "Buduma": "bdm",
88
+ "Buginese": "bug",
89
+ "Bukharic": "bhh",
90
+ "Bulgarian": "bg",
91
+ "Bulu (Cameroon)": "bum",
92
+ "Bundeli": "bns",
93
+ "Bunun": "bnn",
94
+ "Bura-Pabir": "bwr",
95
+ "Burak": "bys",
96
+ "Burmese": "my",
97
+ "Burushaski": "bsk",
98
+ "Cacaloxtepec Mixtec": "miu",
99
+ "Cajatambo North Lima Quechua": "qvl",
100
+ "Cakfem-Mushere": "cky",
101
+ "Cameroon Pidgin": "wes",
102
+ "Campidanese Sardinian": "sro",
103
+ "Cantonese": "yue",
104
+ "Catalan": "ca",
105
+ "Cebuano": "ceb",
106
+ "Cen": "cen",
107
+ "Central Kurdish": "ckb",
108
+ "Central Nahuatl": "nhn",
109
+ "Central Pame": "pbs",
110
+ "Central Pashto": "pst",
111
+ "Central Puebla Nahuatl": "ncx",
112
+ "Central Tarahumara": "tar",
113
+ "Central Yupik": "esu",
114
+ "Central-Eastern Niger Fulfulde": "fuq",
115
+ "Chadian Arabic": "shu",
116
+ "Chichewa": "ny",
117
+ "Chichicapan Zapotec": "zpv",
118
+ "Chiga": "cgg",
119
+ "Chimalapa Zoque": "zoh",
120
+ "Chimborazo Highland Quichua": "qug",
121
+ "Chinese": "zh",
122
+ "Chiquián Ancash Quechua": "qxa",
123
+ "Chitwania Tharu": "the",
124
+ "Chokwe": "cjk",
125
+ "Chuvash": "cv",
126
+ "Cibak": "ckl",
127
+ "Coastal Konjo": "kjc",
128
+ "Copainalá Zoque": "zoc",
129
+ "Cornish": "kw",
130
+ "Corongo Ancash Quechua": "qwa",
131
+ "Croatian": "hr",
132
+ "Cross River Mbembe": "mfn",
133
+ "Cuyamecalco Mixtec": "xtu",
134
+ "Czech": "cs",
135
+ "Dadiya": "dbd",
136
+ "Dagbani": "dag",
137
+ "Dameli": "dml",
138
+ "Danish": "da",
139
+ "Dargwa": "dar",
140
+ "Dazaga": "dzg",
141
+ "Deccan": "dcc",
142
+ "Degema": "deg",
143
+ "Dera (Nigeria)": "kna",
144
+ "Dghwede": "dgh",
145
+ "Dhatki": "mki",
146
+ "Dhivehi": "dv",
147
+ "Dhofari Arabic": "adf",
148
+ "Dijim-Bwilim": "cfa",
149
+ "Dogri": "dgo",
150
+ "Domaaki": "dmk",
151
+ "Dotyali": "dty",
152
+ "Duala": "dua",
153
+ "Dutch": "nl",
154
+ "DũYa": "ldb",
155
+ "Dyula": "dyu",
156
+ "Eastern Balochi": "bgp",
157
+ "Eastern Bolivian Guaraní": "gui",
158
+ "Eastern Egyptian Bedawi Arabic": "avl",
159
+ "Eastern Krahn": "kqo",
160
+ "Eastern Mari": "mhr",
161
+ "Eastern Yiddish": "ydd",
162
+ "Ebrié": "ebr",
163
+ "Eggon": "ego",
164
+ "Egyptian Arabic": "arz",
165
+ "Ejagham": "etu",
166
+ "Eleme": "elm",
167
+ "Eloyi": "afo",
168
+ "Embu": "ebu",
169
+ "English": "en",
170
+ "Erzya": "myv",
171
+ "Esan": "ish",
172
+ "Esperanto": "eo",
173
+ "Estonian": "et",
174
+ "Eton (Cameroon)": "eto",
175
+ "Ewondo": "ewo",
176
+ "Extremaduran": "ext",
177
+ "Fang (Equatorial Guinea)": "fan",
178
+ "Fanti": "fat",
179
+ "Farefare": "gur",
180
+ "Fe'fe'": "fmp",
181
+ "Filipino": "fil",
182
+ "Filomena Mata-Coahuitlán Totonac": "tlp",
183
+ "Finnish": "fi",
184
+ "Fipa": "fip",
185
+ "French": "fr",
186
+ "Fulah": "ff",
187
+ "Galician": "gl",
188
+ "Gambian Wolof": "wof",
189
+ "Ganda": "lg",
190
+ "Garhwali": "gbm",
191
+ "Gawar-Bati": "gwt",
192
+ "Gawri": "gwc",
193
+ "Gbagyi": "gbr",
194
+ "Gbari": "gby",
195
+ "Geji": "gyz",
196
+ "Gen": "gej",
197
+ "Georgian": "ka",
198
+ "German": "de",
199
+ "Geser-Gorom": "ges",
200
+ "Gheg Albanian": "aln",
201
+ "Ghomálá'": "bbj",
202
+ "Gidar": "gid",
203
+ "Glavda": "glw",
204
+ "Goan Konkani": "gom",
205
+ "Goaria": "gig",
206
+ "Goemai": "ank",
207
+ "Gola": "gol",
208
+ "Greek": "el",
209
+ "Guarani": "gn",
210
+ "Guduf-Gava": "gdf",
211
+ "Guerrero Amuzgo": "amu",
212
+ "Gujarati": "gu",
213
+ "Gujari": "gju",
214
+ "Gulf Arabic": "afb",
215
+ "Gurgula": "ggg",
216
+ "Gusii": "guz",
217
+ "Gusilay": "gsl",
218
+ "Gweno": "gwe",
219
+ "Güilá Zapotec": "ztu",
220
+ "Hadothi": "hoj",
221
+ "Hahon": "hah",
222
+ "Haitian": "ht",
223
+ "Hakha Chin": "cnh",
224
+ "Hakö": "hao",
225
+ "Halia": "hla",
226
+ "Hausa": "ha",
227
+ "Hawaiian": "haw",
228
+ "Hazaragi": "haz",
229
+ "Hebrew": "he",
230
+ "Hemba": "hem",
231
+ "Herero": "hz",
232
+ "Highland Konjo": "kjk",
233
+ "Hijazi Arabic": "acw",
234
+ "Hindi": "hi",
235
+ "Huarijio": "var",
236
+ "Huautla Mazatec": "mau",
237
+ "Huaxcaleca Nahuatl": "nhq",
238
+ "Huba": "hbb",
239
+ "Huitepec Mixtec": "mxs",
240
+ "Hula": "hul",
241
+ "Hungarian": "hu",
242
+ "Hunjara-Kaina Ke": "hkk",
243
+ "Hwana": "hwo",
244
+ "Ibibio": "ibb",
245
+ "Icelandic": "is",
246
+ "Idakho-Isukha-Tiriki": "ida",
247
+ "Idoma": "idu",
248
+ "Igbo": "ig",
249
+ "Igo": "ahl",
250
+ "Ikposo": "kpo",
251
+ "Ikwere": "ikw",
252
+ "Imbabura Highland Quichua": "qvi",
253
+ "Indonesian": "id",
254
+ "Indus Kohistani": "mvy",
255
+ "Interlingua (International Auxiliary Language Association)": "ia",
256
+ "Inupiaq": "ik",
257
+ "Irish": "ga",
258
+ "Iron Ossetic": "os",
259
+ "Isekiri": "its",
260
+ "Isoko": "iso",
261
+ "Italian": "it",
262
+ "Ito": "itw",
263
+ "Itzá": "itz",
264
+ "Ixtayutla Mixtec": "vmj",
265
+ "Izon": "ijc",
266
+ "Jambi Malay": "jax",
267
+ "Japanese": "ja",
268
+ "Jaqaru": "jqr",
269
+ "Jauja Wanca Quechua": "qxw",
270
+ "Jaunsari": "jns",
271
+ "Javanese": "jv",
272
+ "Jiba": "juo",
273
+ "Jju": "kaj",
274
+ "Judeo-Moroccan Arabic": "aju",
275
+ "Juxtlahuaca Mixtec": "vmc",
276
+ "Kabardian": "kbd",
277
+ "Kabras": "lkb",
278
+ "Kabuverdianu": "kea",
279
+ "Kabyle": "kab",
280
+ "Kachi Koli": "gjk",
281
+ "Kairak": "ckr",
282
+ "Kalabari": "ijn",
283
+ "Kalasha": "kls",
284
+ "Kalenjin": "kln",
285
+ "Kalkoti": "xka",
286
+ "Kamba": "kam",
287
+ "Kamo": "kcq",
288
+ "Kanauji": "bjj",
289
+ "Kanembu": "kbl",
290
+ "Kannada": "kn",
291
+ "Karekare": "kai",
292
+ "Kashmiri": "ks",
293
+ "Kathoriya Tharu": "tkt",
294
+ "Kati": "bsh",
295
+ "Kazakh": "kk",
296
+ "Keiyo": "eyo",
297
+ "Khams Tibetan": "khg",
298
+ "Khana": "ogo",
299
+ "Khetrani": "xhe",
300
+ "Khmer": "km",
301
+ "Khowar": "khw",
302
+ "Kinga": "zga",
303
+ "Kinnauri": "kfk",
304
+ "Kinyarwanda": "rw",
305
+ "Kirghiz": "ky",
306
+ "Kirya-Konzəl": "fkk",
307
+ "Kochila Tharu": "thq",
308
+ "Kohistani Shina": "plk",
309
+ "Kohumono": "bcs",
310
+ "Kok Borok": "trp",
311
+ "Kol (Papua New Guinea)": "kol",
312
+ "Kom (Cameroon)": "bkm",
313
+ "Koma": "kmy",
314
+ "Konkani": "knn",
315
+ "Konzo": "koo",
316
+ "Korean": "ko",
317
+ "Korwa": "kfp",
318
+ "Kota (India)": "kfe",
319
+ "Koti": "eko",
320
+ "Kuanua": "ksd",
321
+ "Kuanyama": "kj",
322
+ "Kui (India)": "uki",
323
+ "Kulung (Nigeria)": "bbu",
324
+ "Kuot": "kto",
325
+ "Kushi": "kuh",
326
+ "Kwambi": "kwm",
327
+ "Kwasio": "nmg",
328
+ "Lala-Roba": "lla",
329
+ "Lamang": "hia",
330
+ "Lao": "lo",
331
+ "Larike-Wakasihu": "alo",
332
+ "Lasi": "lss",
333
+ "Latgalian": "ltg",
334
+ "Latvian": "lv",
335
+ "Levantine Arabic": "apc",
336
+ "Liana-Seti": "ste",
337
+ "Liberia Kpelle": "xpe",
338
+ "Liberian English": "lir",
339
+ "Libyan Arabic": "ayl",
340
+ "Ligurian": "lij",
341
+ "Lijili": "mgi",
342
+ "Lingala": "ln",
343
+ "Lithuanian": "lt",
344
+ "Loarki": "lrk",
345
+ "Logooli": "rag",
346
+ "Logudorese Sardinian": "src",
347
+ "Loja Highland Quichua": "qvj",
348
+ "Loloda": "loa",
349
+ "Longuda": "lnu",
350
+ "Loxicha Zapotec": "ztp",
351
+ "Luba-Lulua": "lua",
352
+ "Luo": "luo",
353
+ "Lushai": "lus",
354
+ "Luxembourgish": "lb",
355
+ "Maasina Fulfulde": "ffm",
356
+ "Maba (Chad)": "mde",
357
+ "Macedo-Romanian": "rup",
358
+ "Macedonian": "mk",
359
+ "Mada (Cameroon)": "mxu",
360
+ "Mafa": "maf",
361
+ "Maithili": "mai",
362
+ "Malay": "ms",
363
+ "Malayalam": "ml",
364
+ "Mali": "gcc",
365
+ "Malinaltepec Me'phaa": "tcf",
366
+ "Maltese": "mt",
367
+ "Mandara": "tbf",
368
+ "Mandjak": "mfv",
369
+ "Manggarai": "mqy",
370
+ "Manipuri": "mni",
371
+ "Mansoanka": "msw",
372
+ "Manx": "gv",
373
+ "Maori": "mi",
374
+ "Marathi": "mr",
375
+ "Marghi Central": "mrt",
376
+ "Marghi South": "mfm",
377
+ "Maria (India)": "mrr",
378
+ "Marwari (Pakistan)": "mve",
379
+ "Masana": "mcn",
380
+ "Masikoro Malagasy": "msh",
381
+ "Matsés": "mcf",
382
+ "Mazaltepec Zapotec": "zpy",
383
+ "Mazatlán Mazatec": "vmz",
384
+ "Mazatlán Mixe": "mzl",
385
+ "Mbe": "mfo",
386
+ "Mbo (Cameroon)": "mbo",
387
+ "Mbum": "mdd",
388
+ "Medumba": "byv",
389
+ "Mekeo": "mek",
390
+ "Meru": "mer",
391
+ "Mesopotamian Arabic": "acm",
392
+ "Mewari": "mtr",
393
+ "Min Nan Chinese": "nan",
394
+ "Mingrelian": "xmf",
395
+ "Mitlatongo Mixtec": "vmm",
396
+ "Miya": "mkf",
397
+ "Mokpwe": "bri",
398
+ "Moksha": "mdf",
399
+ "Mom Jango": "ver",
400
+ "Mongolian": "mn",
401
+ "Moroccan Arabic": "ary",
402
+ "Motu": "meu",
403
+ "Mpiemo": "mcx",
404
+ "Mpumpong": "mgg",
405
+ "Mundang": "mua",
406
+ "Mungaka": "mhk",
407
+ "Musey": "mse",
408
+ "Musgu": "mug",
409
+ "Musi": "mui",
410
+ "Naba": "mne",
411
+ "Najdi Arabic": "ars",
412
+ "Nalik": "nal",
413
+ "Nawdm": "nmz",
414
+ "Ndonga": "ng",
415
+ "Neapolitan": "nap",
416
+ "Nepali": "npi",
417
+ "Ngamo": "nbh",
418
+ "Ngas": "anc",
419
+ "Ngiemboon": "nnh",
420
+ "Ngizim": "ngi",
421
+ "Ngomba": "jgo",
422
+ "Ngombale": "nla",
423
+ "Nigerian Fulfulde": "fuv",
424
+ "Nigerian Pidgin": "pcm",
425
+ "Nimadi": "noe",
426
+ "Nobiin": "fia",
427
+ "North Mesopotamian Arabic": "ayp",
428
+ "North Moluccan Malay": "max",
429
+ "Northern Betsimisaraka Malagasy": "bmm",
430
+ "Northern Hindko": "hno",
431
+ "Northern Kurdish": "kmr",
432
+ "Northern Pame": "pmq",
433
+ "Northern Pashto": "pbu",
434
+ "Northern Uzbek": "uzn",
435
+ "Northwest Gbaya": "gya",
436
+ "Norwegian": "no",
437
+ "Norwegian Bokmål": "nb",
438
+ "Norwegian Nynorsk": "nn",
439
+ "Notsi": "ncf",
440
+ "Nyankpa": "yes",
441
+ "Nyungwe": "nyu",
442
+ "Nzanyi": "nja",
443
+ "Nüpode Huitoto": "hux",
444
+ "Occitan": "oc",
445
+ "Od": "odk",
446
+ "Odia": "ory",
447
+ "Odual": "odu",
448
+ "Omani Arabic": "acx",
449
+ "Orizaba Nahuatl": "nlv",
450
+ "Orma": "orc",
451
+ "Ormuri": "oru",
452
+ "Oromo": "om",
453
+ "Pahari-Potwari": "phr",
454
+ "Paiwan": "pwn",
455
+ "Panjabi": "pa",
456
+ "Papuan Malay": "pmy",
457
+ "Parkari Koli": "kvx",
458
+ "Pedi": "nso",
459
+ "Pero": "pip",
460
+ "Persian": "fa",
461
+ "Petats": "pex",
462
+ "Phalura": "phl",
463
+ "Piemontese": "pms",
464
+ "Piya-Kwonci": "piy",
465
+ "Plateau Malagasy": "plt",
466
+ "Polish": "pl",
467
+ "Poqomam": "poc",
468
+ "Portuguese": "pt",
469
+ "Pulaar": "fuc",
470
+ "Pular": "fuf",
471
+ "Puno Quechua": "qxp",
472
+ "Pushto": "ps",
473
+ "Pökoot": "pko",
474
+ "Qaqet": "byx",
475
+ "Quiotepec Chinantec": "chq",
476
+ "Rana Tharu": "thr",
477
+ "Rangi": "lag",
478
+ "Rapoisi": "kyx",
479
+ "Ratahan": "rth",
480
+ "Rayón Zoque": "zor",
481
+ "Romanian": "ro",
482
+ "Romansh": "rm",
483
+ "Rombo": "rof",
484
+ "Rotokas": "roo",
485
+ "Rukai": "dru",
486
+ "Russian": "ru",
487
+ "Sacapulteco": "quv",
488
+ "Saidi Arabic": "aec",
489
+ "Sakalava Malagasy": "skg",
490
+ "Sakizaya": "szy",
491
+ "Saleman": "sau",
492
+ "Samba Daka": "ccg",
493
+ "Samba Leko": "ndi",
494
+ "San Felipe Otlaltepec Popoloca": "pow",
495
+ "San Francisco Del Mar Huave": "hue",
496
+ "San Juan Atzingo Popoloca": "poe",
497
+ "San Martín Itunyoso Triqui": "trq",
498
+ "San Miguel El Grande Mixtec": "mig",
499
+ "Sansi": "ssi",
500
+ "Sanskrit": "sa",
501
+ "Santa Ana de Tusi Pasco Quechua": "qxt",
502
+ "Santa Catarina Albarradas Zapotec": "ztn",
503
+ "Santali": "sat",
504
+ "Santiago del Estero Quichua": "qus",
505
+ "Saposa": "sps",
506
+ "Saraiki": "skr",
507
+ "Sardinian": "sc",
508
+ "Saya": "say",
509
+ "Sediq": "trv",
510
+ "Serbian": "sr",
511
+ "Seri": "sei",
512
+ "Shina": "scl",
513
+ "Shona": "sn",
514
+ "Siar-Lak": "sjr",
515
+ "Sibe": "nco",
516
+ "Sicilian": "scn",
517
+ "Sihuas Ancash Quechua": "qws",
518
+ "Sikkimese": "sip",
519
+ "Sinaugoro": "snc",
520
+ "Sindhi": "sd",
521
+ "Sindhi Bhil": "sbn",
522
+ "Sinhala": "si",
523
+ "Sinicahua Mixtec": "xti",
524
+ "Sipacapense": "qum",
525
+ "Siwai": "siw",
526
+ "Slovak": "sk",
527
+ "Slovenian": "sl",
528
+ "Solos": "sol",
529
+ "Somali": "so",
530
+ "Soninke": "snk",
531
+ "South Giziga": "giz",
532
+ "South Ucayali Ashéninka": "cpy",
533
+ "Southeastern Nochixtlán Mixtec": "mxy",
534
+ "Southern Betsimisaraka Malagasy": "bzc",
535
+ "Southern Pashto": "pbt",
536
+ "Southern Pastaza Quechua": "qup",
537
+ "Soyaltepec Mazatec": "vmp",
538
+ "Spanish": "es",
539
+ "Standard Arabic": "arb",
540
+ "Standard Moroccan Tamazight": "zgh",
541
+ "Sudanese Arabic": "apd",
542
+ "Sulka": "sua",
543
+ "Svan": "sva",
544
+ "Swahili": "sw",
545
+ "Swedish": "sv",
546
+ "Tae'": "rob",
547
+ "Tahaggart Tamahaq": "thv",
548
+ "Taita": "dav",
549
+ "Tajik": "tg",
550
+ "Tamil": "ta",
551
+ "Tandroy-Mahafaly Malagasy": "tdx",
552
+ "Tangale": "tan",
553
+ "Tanosy Malagasy": "txy",
554
+ "Tarok": "yer",
555
+ "Tatar": "tt",
556
+ "Tedaga": "tuq",
557
+ "Telugu": "te",
558
+ "Tem": "kdh",
559
+ "Teop": "tio",
560
+ "Tepeuxila Cuicatec": "cux",
561
+ "Tepinapa Chinantec": "cte",
562
+ "Tera": "ttr",
563
+ "Terei": "buo",
564
+ "Termanu": "twu",
565
+ "Tesaka Malagasy": "tkg",
566
+ "Tetelcingo Nahuatl": "nhg",
567
+ "Teutila Cuicatec": "cut",
568
+ "Thai": "th",
569
+ "Tibetan": "bo",
570
+ "Tidaá Mixtec": "mtx",
571
+ "Tidore": "tvo",
572
+ "Tigak": "tgc",
573
+ "Tigre": "tig",
574
+ "Tigrinya": "ti",
575
+ "Tilquiapan Zapotec": "zts",
576
+ "Tinputz": "tpz",
577
+ "Tlacoapa Me'phaa": "tpl",
578
+ "Tlacoatzintepec Chinantec": "ctl",
579
+ "Tlingit": "tli",
580
+ "Toki Pona": "tok",
581
+ "Tomoip": "tqp",
582
+ "Tondano": "tdn",
583
+ "Tonsea": "txs",
584
+ "Tooro": "ttj",
585
+ "Torau": "ttu",
586
+ "Torwali": "trw",
587
+ "Tsimihety Malagasy": "xmw",
588
+ "Tsotso": "lto",
589
+ "Tswana": "tn",
590
+ "Tugen": "tuy",
591
+ "Tuki": "bag",
592
+ "Tula": "tul",
593
+ "Tulu": "tcy",
594
+ "Tunen": "tvu",
595
+ "Tungag": "lcm",
596
+ "Tunisian Arabic": "aeb",
597
+ "Tupuri": "tui",
598
+ "Turkana": "tuv",
599
+ "Turkish": "tr",
600
+ "Turkmen": "tk",
601
+ "Tututepec Mixtec": "mtu",
602
+ "Twi": "tw",
603
+ "Ubaghara": "byc",
604
+ "Uighur": "ug",
605
+ "Ukrainian": "uk",
606
+ "Umbundu": "umb",
607
+ "Upper Sorbian": "hsb",
608
+ "Urdu": "ur",
609
+ "Ushojo": "ush",
610
+ "Uzbek": "uz",
611
+ "Vai": "vai",
612
+ "Vietnamese": "vi",
613
+ "Votic": "vot",
614
+ "Võro": "vro",
615
+ "Waci Gbe": "wci",
616
+ "Wadiyara Koli": "kxp",
617
+ "Waja": "wja",
618
+ "Wakhi": "wbl",
619
+ "Wanga": "lwg",
620
+ "Wapan": "juk",
621
+ "Warji": "wji",
622
+ "Welsh": "cy",
623
+ "Wemale": "weo",
624
+ "Western Frisian": "fy",
625
+ "Western Highland Purepecha": "pua",
626
+ "Western Juxtlahuaca Mixtec": "jmx",
627
+ "Western Maninkakan": "mlq",
628
+ "Western Mari": "mrj",
629
+ "Western Niger Fulfulde": "fuh",
630
+ "Western Panjabi": "pnb",
631
+ "Wolof": "wo",
632
+ "Wuzlam": "udl",
633
+ "Xanaguía Zapotec": "ztg",
634
+ "Xhosa": "xh",
635
+ "Yace": "ekr",
636
+ "Yakut": "sah",
637
+ "Yalahatan": "jal",
638
+ "Yanahuanca Pasco Quechua": "qur",
639
+ "Yangben": "yav",
640
+ "Yaqui": "yaq",
641
+ "Yauyos Quechua": "qux",
642
+ "Yekhee": "ets",
643
+ "Yiddish": "yi",
644
+ "Yidgha": "ydg",
645
+ "Yoruba": "yo",
646
+ "Yutanduchi Mixtec": "mab",
647
+ "Zacatlán-Ahuacatlán-Tepetzintla Nahuatl": "nhi",
648
+ "Zarma": "dje",
649
+ "Zaza": "zza",
650
+ "Zulu": "zu",
651
+ "Ömie": "aom",
652
+ }
steps/s6_captions.py CHANGED
@@ -155,7 +155,7 @@ def generate_captions(
155
  wrap_style = 2 if is_rtl else 0
156
  # Tahoma has reliable Arabic/Urdu shaping across macOS/Windows/Linux ffmpeg
157
  # builds; Arial often lacks the glyph coverage on headless Linux.
158
- font = "Tahoma" if is_rtl else "Arial"
159
  # Encoding 178 = Windows Arabic codepage — hints libass font selection.
160
  encoding = 178 if is_rtl else 0
161
 
 
155
  wrap_style = 2 if is_rtl else 0
156
  # Tahoma has reliable Arabic/Urdu shaping across macOS/Windows/Linux ffmpeg
157
  # builds; Arial often lacks the glyph coverage on headless Linux.
158
+ font = "Tahoma" if is_rtl else "Noto Sans"
159
  # Encoding 178 = Windows Arabic codepage — hints libass font selection.
160
  encoding = 178 if is_rtl else 0
161