Spaces:

Hammad712
/

lid

Sleeping

App Files Files Community

Hammad712 commited on 7 days ago

Commit

ab82fde

verified ·

1 Parent(s): 5599463

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -45

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import torch.nn.functional as F
 import numpy as np
 import onnxruntime as ort
 import soundfile as sf
 from fastapi import FastAPI, UploadFile, File, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
@@ -24,62 +25,69 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# 🚨 Use Absolute Path
 MODEL_PATH = "/app/local_model/pakistani_lid_v3.onnx"
-logger.info(f"🚀 Attempting to load model from: {MODEL_PATH}")
 try:
-    if not os.path.exists(MODEL_PATH):
-        # List files for debugging in logs if it fails
-        logger.error(f"Files in /app/local_model: {os.listdir('/app/local_model') if os.path.exists('/app/local_model') else 'Dir not found'}")
-        raise FileNotFoundError(f"Model file missing at {MODEL_PATH}")
-    # Load with mmap to save RAM
-    session_options = ort.SessionOptions()
-    session = ort.InferenceSession(MODEL_PATH, sess_options=session_options, providers=['CPUExecutionProvider'])
     logger.info("✅ Engine is LIVE and Ready!")
 except Exception as e:
-    logger.error(f"❌ Initialization Error: {e}")
     raise e
 labels = ("balochi", "english", "pashto", "sindhi", "urdu")
 id2label = {i: label for i, label in enumerate(labels)}
-def predict_audio(audio_path):
-    data, sr = sf.read(audio_path)
-    waveform = torch.from_numpy(data).float()
-    if waveform.ndim == 2:
-        waveform = waveform.T.mean(dim=0, keepdim=True)
-    else:
-        waveform = waveform.unsqueeze(0)
-    if sr != 16000:
-        waveform = torchaudio.functional.resample(waveform, sr, 16000)
-    target_frames = 16000 * 15
-    waveform = waveform[:, :target_frames]
-    waveform = (waveform / waveform.abs().max().clamp(min=1e-6)) - waveform.mean()
-    waveform = waveform / waveform.std().clamp(min=1e-6)
-    length = waveform.shape[1]
-    mask = torch.zeros(target_frames, dtype=torch.long)
-    if length < target_frames:
-        mask[:length] = 1
-        waveform = F.pad(waveform, (0, target_frames - length))
-    else:
-        mask[:] = 1
-    ort_inputs = {
-        "input_values": waveform.numpy(),
-        "attention_mask": mask.unsqueeze(0).numpy()
-    }
-    logits = session.run(None, ort_inputs)[0]
-    probs = np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True)
-    pred_id = np.argmax(probs, axis=1)[0]
-    return id2label[pred_id], float(probs[0][pred_id])
 @app.post("/predict")
 async def predict(file: UploadFile = File(...)):
@@ -87,9 +95,12 @@ async def predict(file: UploadFile = File(...)):
     try:
         with open(temp_path, "wb") as f:
             f.write(await file.read())
         lang, conf = predict_audio(temp_path)
-        os.remove(temp_path)
         return {"success": True, "language": lang.upper(), "confidence": round(conf * 100, 2)}
     except Exception as e:
         logger.error(f"Inference Error: {e}")
         if os.path.exists(temp_path): os.remove(temp_path)

 import numpy as np
 import onnxruntime as ort
 import soundfile as sf
+import subprocess
 from fastapi import FastAPI, UploadFile, File, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
     allow_headers=["*"],
 )
+# Use Absolute Path (Success from previous log!)
 MODEL_PATH = "/app/local_model/pakistani_lid_v3.onnx"
+logger.info(f"🚀 Loading pre-baked ONNX model from: {MODEL_PATH}")
 try:
+    session = ort.InferenceSession(MODEL_PATH, providers=['CPUExecutionProvider'])
     logger.info("✅ Engine is LIVE and Ready!")
 except Exception as e:
+    logger.error(f"❌ Failed to load model: {e}")
     raise e
 labels = ("balochi", "english", "pashto", "sindhi", "urdu")
 id2label = {i: label for i, label in enumerate(labels)}
+def predict_audio(input_path):
+    clean_wav_path = "cleaned_audio.wav"
+    try:
+        # 🛠️ THE FIX: Use FFmpeg to convert ANY format (WebM, OGG, etc.) to Standard WAV
+        # This handles the "Format not recognised" error
+        subprocess.run([
+            'ffmpeg', '-y', '-i', input_path,
+            '-ar', '16000', '-ac', '1', clean_wav_path
+        ], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        # Now read the standard WAV
+        data, sr = sf.read(clean_wav_path)
+        waveform = torch.from_numpy(data).float()
+        if waveform.ndim == 1:
+            waveform = waveform.unsqueeze(0)
+        # Audio Preprocessing
+        target_frames = 16000 * 15
+        if waveform.shape[1] > target_frames:
+            waveform = waveform[:, :target_frames]
+        waveform = (waveform / waveform.abs().max().clamp(min=1e-6)) - waveform.mean()
+        waveform = waveform / waveform.std().clamp(min=1e-6)
+        length = waveform.shape[1]
+        mask = torch.zeros(target_frames, dtype=torch.long)
+        if length < target_frames:
+            mask[:length] = 1
+            waveform = F.pad(waveform, (0, target_frames - length))
+        else:
+            mask[:] = 1
+        # ONNX Inference
+        ort_inputs = {
+            "input_values": waveform.numpy(),
+            "attention_mask": mask.unsqueeze(0).numpy()
+        }
+        logits = session.run(None, ort_inputs)[0]
+        probs = np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True)
+        pred_id = np.argmax(probs, axis=1)[0]
+        if os.path.exists(clean_wav_path): os.remove(clean_wav_path)
+        return id2label[pred_id], float(probs[0][pred_id])
+    except Exception as e:
+        if os.path.exists(clean_wav_path): os.remove(clean_wav_path)
+        raise e
 @app.post("/predict")
 async def predict(file: UploadFile = File(...)):
     try:
         with open(temp_path, "wb") as f:
             f.write(await file.read())
         lang, conf = predict_audio(temp_path)
+        if os.path.exists(temp_path): os.remove(temp_path)
         return {"success": True, "language": lang.upper(), "confidence": round(conf * 100, 2)}
     except Exception as e:
         logger.error(f"Inference Error: {e}")
         if os.path.exists(temp_path): os.remove(temp_path)