boffire commited on
Commit
2c13ff5
·
verified ·
1 Parent(s): eaf6981

Update src/gradio_app.py

Browse files
Files changed (1) hide show
  1. src/gradio_app.py +105 -13
src/gradio_app.py CHANGED
@@ -6,6 +6,7 @@ import tempfile
6
  import re as regex
7
  import yt_dlp
8
  import glob
 
9
 
10
  # --- Configuration ---
11
  MAX_SIZE_MB = "50"
@@ -13,6 +14,11 @@ MAX_SECONDS = 60
13
  LIBRE_API_KEY = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
14
  TRANSLATE_URL = "https://imsidag-community-libretranslate-kabyle.hf.space/translate"
15
 
 
 
 
 
 
16
  # --- Translation Logic ---
17
  def translate_to_english(text):
18
  if not text or any(symbol in text for symbol in ["⚠️", "❌"]):
@@ -81,6 +87,52 @@ def download_soundcloud_audio(url: str) -> str:
81
  except Exception as e:
82
  raise RuntimeError(f"yt-dlp failed: {str(e)}")
83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  # --- Unified Processing Logic ---
85
  def process_audio(audio_file):
86
  """Handles validation -> Transcription -> Translation."""
@@ -124,6 +176,24 @@ def process_soundcloud(url):
124
 
125
  return transcript, translation
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  # --- Build Gradio UI ---
128
  with gr.Blocks(title="🎙️ Mmeslay") as demo:
129
  gr.Markdown(
@@ -132,7 +202,7 @@ with gr.Blocks(title="🎙️ Mmeslay") as demo:
132
  ### Kabyle ASR & Translation
133
  *Powered by Squeezeformer (ASR) and LibreTranslate (NMT)*
134
 
135
- Upload a Kabyle audio file, record directly, **or paste a SoundCloud link** to get a transcript and English translation.
136
  """
137
  )
138
 
@@ -170,28 +240,50 @@ with gr.Blocks(title="🎙️ Mmeslay") as demo:
170
  inputs=audio_input,
171
  )
172
 
173
- with gr.Tab("🎵 SoundCloud Link"):
174
  with gr.Row():
175
  with gr.Column(scale=1):
176
- sc_url = gr.Textbox(
177
- label="SoundCloud URL",
178
- placeholder="https://soundcloud.com/artist/track",
179
- lines=1
180
  )
181
- sc_btn = gr.Button("🚀 Download & Transcribe", variant="primary", size="lg")
 
182
 
183
  with gr.Column(scale=2):
184
- text_output_2 = gr.Textbox(label="Transcription (Kabyle)", lines=5)
185
- translation_output_2 = gr.Textbox(
186
  label="LibreTranslate (English)",
187
  lines=5,
188
  placeholder="English LibreTranslate translation will appear here..."
189
  )
190
 
191
- sc_btn.click(
192
- fn=process_soundcloud,
193
- inputs=sc_url,
194
- outputs=[text_output_2, translation_output_2],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  )
196
 
197
  gr.Markdown(
 
6
  import re as regex
7
  import yt_dlp
8
  import glob
9
+ import random
10
 
11
  # --- Configuration ---
12
  MAX_SIZE_MB = "50"
 
14
  LIBRE_API_KEY = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
15
  TRANSLATE_URL = "https://imsidag-community-libretranslate-kabyle.hf.space/translate"
16
 
17
+ # --- Dataset Configuration ---
18
+ DATASET_REPO = "boffire/kabyle-synth-voice"
19
+ DATASET_AUDIO_BASE_URL = f"https://huggingface.co/datasets/{DATASET_REPO}/resolve/main/audio"
20
+ DATASET_API_TREE_URL = f"https://huggingface.co/api/datasets/{DATASET_REPO}/tree/main/audio"
21
+
22
  # --- Translation Logic ---
23
  def translate_to_english(text):
24
  if not text or any(symbol in text for symbol in ["⚠️", "❌"]):
 
87
  except Exception as e:
88
  raise RuntimeError(f"yt-dlp failed: {str(e)}")
89
 
90
+ # --- Dataset Random Sample Logic ---
91
+ _audio_files_cache = None
92
+
93
+ def get_dataset_audio_files():
94
+ """Fetch the list of audio files from the dataset API (cached)."""
95
+ global _audio_files_cache
96
+ if _audio_files_cache is not None:
97
+ return _audio_files_cache
98
+
99
+ try:
100
+ resp = requests.get(DATASET_API_TREE_URL, timeout=15)
101
+ resp.raise_for_status()
102
+ items = resp.json()
103
+ # Filter only .wav files and extract filenames
104
+ files = [
105
+ item["path"].replace("audio/", "")
106
+ for item in items
107
+ if item.get("type") == "file" and item["path"].endswith(".wav")
108
+ ]
109
+ _audio_files_cache = files
110
+ return files
111
+ except Exception as e:
112
+ raise RuntimeError(f"Failed to fetch dataset file list: {e}")
113
+
114
+ def download_random_dataset_sample() -> str:
115
+ """Pick a random audio file from the dataset and download it."""
116
+ files = get_dataset_audio_files()
117
+ if not files:
118
+ raise RuntimeError("No audio files found in the dataset.")
119
+
120
+ filename = random.choice(files)
121
+ file_url = f"{DATASET_AUDIO_BASE_URL}/{filename}"
122
+ tmp_dir = tempfile.gettempdir()
123
+ local_path = os.path.join(tmp_dir, f"dataset_{filename}")
124
+
125
+ # Download the file
126
+ try:
127
+ resp = requests.get(file_url, timeout=30, stream=True)
128
+ resp.raise_for_status()
129
+ with open(local_path, "wb") as f:
130
+ for chunk in resp.iter_content(chunk_size=8192):
131
+ f.write(chunk)
132
+ return local_path
133
+ except Exception as e:
134
+ raise RuntimeError(f"Failed to download {filename}: {e}")
135
+
136
  # --- Unified Processing Logic ---
137
  def process_audio(audio_file):
138
  """Handles validation -> Transcription -> Translation."""
 
176
 
177
  return transcript, translation
178
 
179
+ def process_random_dataset():
180
+ """Downloads a random sample from the dataset and runs ASR."""
181
+ try:
182
+ audio_path = download_random_dataset_sample()
183
+ except Exception as e:
184
+ return f"❌ Dataset Error: {str(e)}", ""
185
+
186
+ transcript, translation = process_audio(audio_path)
187
+
188
+ # Cleanup temp file
189
+ try:
190
+ if os.path.exists(audio_path):
191
+ os.remove(audio_path)
192
+ except Exception:
193
+ pass
194
+
195
+ return transcript, translation
196
+
197
  # --- Build Gradio UI ---
198
  with gr.Blocks(title="🎙️ Mmeslay") as demo:
199
  gr.Markdown(
 
202
  ### Kabyle ASR & Translation
203
  *Powered by Squeezeformer (ASR) and LibreTranslate (NMT)*
204
 
205
+ Upload a Kabyle audio file, record directly, **or pick a random sample** from the Kabyle Synth Voice dataset to get a transcript and English translation.
206
  """
207
  )
208
 
 
240
  inputs=audio_input,
241
  )
242
 
243
+ with gr.Tab("🎲 Random Dataset Sample"):
244
  with gr.Row():
245
  with gr.Column(scale=1):
246
+ gr.Markdown(
247
+ """
248
+ Click the button below to fetch a **random audio sample** from the [Kabyle Synth Voice](https://huggingface.co/datasets/boffire/kabyle-synth-voice) dataset.
249
+ """
250
  )
251
+ random_btn = gr.Button("🎲 Pick Random & Transcribe", variant="primary", size="lg")
252
+ dataset_status = gr.Textbox(label="Status", interactive=False, value="Ready")
253
 
254
  with gr.Column(scale=2):
255
+ text_output_3 = gr.Textbox(label="Transcription (Kabyle)", lines=5)
256
+ translation_output_3 = gr.Textbox(
257
  label="LibreTranslate (English)",
258
  lines=5,
259
  placeholder="English LibreTranslate translation will appear here..."
260
  )
261
 
262
+ def process_random_with_status():
263
+ # Update status
264
+ yield "⏳ Fetching random sample...", "", ""
265
+ try:
266
+ audio_path = download_random_dataset_sample()
267
+ except Exception as e:
268
+ yield f"❌ Dataset Error: {str(e)}", "", ""
269
+ return
270
+
271
+ yield "⏳ Transcribing...", "", ""
272
+ transcript, translation = process_audio(audio_path)
273
+
274
+ # Cleanup temp file
275
+ try:
276
+ if os.path.exists(audio_path):
277
+ os.remove(audio_path)
278
+ except Exception:
279
+ pass
280
+
281
+ yield "✅ Done!", transcript, translation
282
+
283
+ random_btn.click(
284
+ fn=process_random_with_status,
285
+ inputs=[],
286
+ outputs=[dataset_status, text_output_3, translation_output_3],
287
  )
288
 
289
  gr.Markdown(