github-actions[bot] commited on
Commit
ba2e30f
·
1 Parent(s): e92ff79

Auto-deploy from GitHub: 18b54f17939a0679569c10197cff43c713b55b9e

Browse files
Files changed (1) hide show
  1. app.py +72 -6
app.py CHANGED
@@ -33,9 +33,10 @@ def init_db():
33
  caption TEXT,
34
  created_at TEXT NOT NULL,
35
  processed_at TEXT,
 
 
36
  hide_from_ui INTEGER DEFAULT 0)'''
37
  )
38
-
39
  conn.commit()
40
  conn.close()
41
 
@@ -140,12 +141,17 @@ def worker_loop():
140
  print(f"🔄 Running STT on: {os.path.abspath(filepath)}")
141
  command = f"""cd {CWD} && {PYTHON_PATH} --input {shlex.quote(os.path.abspath(filepath))} --model {STT_MODEL_NAME}"""
142
 
143
- subprocess.run(
 
 
144
  command,
145
  shell=True,
146
  executable="/bin/bash",
147
- check=True,
 
148
  cwd=CWD,
 
 
149
  env={
150
  **os.environ,
151
  'PYTHONUNBUFFERED': '1',
@@ -154,6 +160,53 @@ def worker_loop():
154
  }
155
  )
156
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  # Read transcription result
158
  output_path = f'{CWD}/temp_dir/output_transcription.json'
159
  with open(output_path, 'r') as file:
@@ -189,6 +242,15 @@ def worker_loop():
189
  print(f"⚠️ Worker error: {str(e)}")
190
  time.sleep(POLL_INTERVAL)
191
 
 
 
 
 
 
 
 
 
 
192
  def update_status(file_id, status, caption=None, error=None):
193
  """Update the status of a file in the database"""
194
  conn = sqlite3.connect('audio_captions.db')
@@ -196,12 +258,12 @@ def update_status(file_id, status, caption=None, error=None):
196
 
197
  if status == 'completed':
198
  c.execute('''UPDATE audio_files
199
- SET status = ?, caption = ?, processed_at = ?
200
  WHERE id = ?''',
201
  (status, caption, datetime.now().isoformat(), file_id))
202
  elif status == 'failed':
203
  c.execute('''UPDATE audio_files
204
- SET status = ?, caption = ?, processed_at = ?
205
  WHERE id = ?''',
206
  (status, f"Error: {error}", datetime.now().isoformat(), file_id))
207
  else:
@@ -318,9 +380,11 @@ def get_files():
318
  'id': row['id'],
319
  'filename': row['filename'],
320
  'status': row['status'],
321
- 'caption': row['caption'],
322
  'created_at': row['created_at'],
323
  'processed_at': row['processed_at'],
 
 
324
  'queue_position': queue_position,
325
  'estimated_start_seconds': estimated_start_seconds
326
  })
@@ -371,6 +435,8 @@ def get_file(file_id):
371
  'caption': row['caption'],
372
  'created_at': row['created_at'],
373
  'processed_at': row['processed_at'],
 
 
374
  'queue_position': queue_position,
375
  'estimated_start_seconds': estimated_start_seconds
376
  })
 
33
  caption TEXT,
34
  created_at TEXT NOT NULL,
35
  processed_at TEXT,
36
+ progress INTEGER DEFAULT 0,
37
+ progress_text TEXT,
38
  hide_from_ui INTEGER DEFAULT 0)'''
39
  )
 
40
  conn.commit()
41
  conn.close()
42
 
 
141
  print(f"🔄 Running STT on: {os.path.abspath(filepath)}")
142
  command = f"""cd {CWD} && {PYTHON_PATH} --input {shlex.quote(os.path.abspath(filepath))} --model {STT_MODEL_NAME}"""
143
 
144
+ import re
145
+
146
+ process = subprocess.Popen(
147
  command,
148
  shell=True,
149
  executable="/bin/bash",
150
+ stdout=subprocess.PIPE,
151
+ stderr=subprocess.STDOUT,
152
  cwd=CWD,
153
+ text=True,
154
+ bufsize=1,
155
  env={
156
  **os.environ,
157
  'PYTHONUNBUFFERED': '1',
 
160
  }
161
  )
162
 
163
+
164
+ current_chunk = 1
165
+ total_chunks = 1
166
+
167
+ for line in process.stdout:
168
+ print(line, end='')
169
+
170
+ # Track chunk progress
171
+ chunk_match = re.search(r'Processing chunk (\d+)/(\d+)', line)
172
+ if chunk_match:
173
+ try:
174
+ current_chunk = int(chunk_match.group(1))
175
+ total_chunks = int(chunk_match.group(2))
176
+ except: pass
177
+
178
+ # Generic percentage matcher
179
+ percent_match = re.search(r'(\d+)%', line)
180
+ if percent_match:
181
+ try:
182
+ percent = int(percent_match.group(1))
183
+ if 'audio' in line.lower() or 'extract' in line.lower():
184
+ update_progress(file_id, percent // 2, "Extracting audio...")
185
+ elif 'transcrib' in line.lower() or 'model' in line.lower():
186
+ # Calculate overall transcription progress based on chunks
187
+ chunk_base = ((current_chunk - 1) / total_chunks) * 100
188
+ chunk_progress = (percent / total_chunks)
189
+ overall_transcription_progress = chunk_base + chunk_progress
190
+
191
+ # Remap so 50-100% of the overall bar is transcription
192
+ overall_progress = int(50 + (overall_transcription_progress / 2))
193
+ update_progress(file_id, overall_progress, f"Transcribing... (Chunk {current_chunk}/{total_chunks})")
194
+ else:
195
+ update_progress(file_id, percent, "Processing...")
196
+ except: pass
197
+
198
+ # Stage matchers
199
+ if 'extracting audio' in line.lower():
200
+ update_progress(file_id, 10, "Extracting audio...")
201
+ elif 'transcription started' in line.lower() and total_chunks == 1:
202
+ update_progress(file_id, 50, "Transcribing started...")
203
+ elif 'model loaded' in line.lower():
204
+ update_progress(file_id, 20, "Model loaded...")
205
+
206
+ process.wait()
207
+ if process.returncode != 0:
208
+ raise Exception(f"STT process failed with return code {process.returncode}")
209
+
210
  # Read transcription result
211
  output_path = f'{CWD}/temp_dir/output_transcription.json'
212
  with open(output_path, 'r') as file:
 
242
  print(f"⚠️ Worker error: {str(e)}")
243
  time.sleep(POLL_INTERVAL)
244
 
245
+ def update_progress(file_id, progress, progress_text=None):
246
+ """Update the progress of a file in the database"""
247
+ conn = sqlite3.connect('audio_captions.db')
248
+ c = conn.cursor()
249
+ c.execute('UPDATE audio_files SET progress = ?, progress_text = ? WHERE id = ?',
250
+ (progress, progress_text, file_id))
251
+ conn.commit()
252
+ conn.close()
253
+
254
  def update_status(file_id, status, caption=None, error=None):
255
  """Update the status of a file in the database"""
256
  conn = sqlite3.connect('audio_captions.db')
 
258
 
259
  if status == 'completed':
260
  c.execute('''UPDATE audio_files
261
+ SET status = ?, caption = ?, processed_at = ?, progress = 100, progress_text = 'Completed'
262
  WHERE id = ?''',
263
  (status, caption, datetime.now().isoformat(), file_id))
264
  elif status == 'failed':
265
  c.execute('''UPDATE audio_files
266
+ SET status = ?, caption = ?, processed_at = ?, progress_text = 'Failed'
267
  WHERE id = ?''',
268
  (status, f"Error: {error}", datetime.now().isoformat(), file_id))
269
  else:
 
380
  'id': row['id'],
381
  'filename': row['filename'],
382
  'status': row['status'],
383
+ 'caption': "HIDDEN_IN_LIST_VIEW", # Don't send full captions in list view
384
  'created_at': row['created_at'],
385
  'processed_at': row['processed_at'],
386
+ 'progress': row['progress'] or 0,
387
+ 'progress_text': row['progress_text'],
388
  'queue_position': queue_position,
389
  'estimated_start_seconds': estimated_start_seconds
390
  })
 
435
  'caption': row['caption'],
436
  'created_at': row['created_at'],
437
  'processed_at': row['processed_at'],
438
+ 'progress': row['progress'] or 0,
439
+ 'progress_text': row['progress_text'],
440
  'queue_position': queue_position,
441
  'estimated_start_seconds': estimated_start_seconds
442
  })