Upload 3 files
Browse files- Dockerfile +13 -0
- main.py +120 -0
- requirements.txt +7 -0
Dockerfile
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.8-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
COPY requirements.txt .
|
| 6 |
+
COPY main.py .
|
| 7 |
+
COPY iic iic/
|
| 8 |
+
|
| 9 |
+
RUN pip install --upgrade pip
|
| 10 |
+
RUN pip install torch>=1.13 torchaudio --index-url https://download.pytorch.org/whl/cpu
|
| 11 |
+
RUN pip install -r requirements.txt
|
| 12 |
+
|
| 13 |
+
CMD ["python", "main.py"]
|
main.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""
|
| 3 |
+
Author: 一铭
|
| 4 |
+
Date : 2024-08-28
|
| 5 |
+
|
| 6 |
+
Github: https://github.com/HG-ha
|
| 7 |
+
Home : https://api2.wer.plus
|
| 8 |
+
|
| 9 |
+
Description:
|
| 10 |
+
From ali dharma school project: https://github.com/FunAudioLLM/SenseVoice
|
| 11 |
+
|
| 12 |
+
This program is distributed using ONNX-encapsulated fastapi,Provides an interface for reading audio from a network or file and predicting content.
|
| 13 |
+
|
| 14 |
+
If you need to use cuda, you need to install the OnnxRun-time gpu, not the onnxruntime.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
import librosa
|
| 18 |
+
import numpy as np
|
| 19 |
+
import aiohttp
|
| 20 |
+
from fastapi import FastAPI, Form, UploadFile, HTTPException
|
| 21 |
+
from pydantic import HttpUrl, ValidationError, BaseModel, Field
|
| 22 |
+
from typing import List, Union
|
| 23 |
+
from funasr_onnx import SenseVoiceSmall
|
| 24 |
+
from funasr_onnx.utils.postprocess_utils import rich_transcription_postprocess
|
| 25 |
+
from io import BytesIO
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class ApiResponse(BaseModel):
|
| 29 |
+
message: str = Field(..., description="Status message indicating the success of the operation.")
|
| 30 |
+
results: str = Field(..., description="Remove label output")
|
| 31 |
+
label_result: str = Field(..., description="Default output")
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
app = FastAPI()
|
| 35 |
+
|
| 36 |
+
async def from_url_load_audio(audio: HttpUrl) -> np.array:
|
| 37 |
+
async with aiohttp.ClientSession() as session:
|
| 38 |
+
async with session.get(
|
| 39 |
+
audio,
|
| 40 |
+
headers={
|
| 41 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36 Edg/127.0.0.0"
|
| 42 |
+
},
|
| 43 |
+
) as response:
|
| 44 |
+
if response.status != 200:
|
| 45 |
+
raise HTTPException(
|
| 46 |
+
status_code=400,
|
| 47 |
+
detail=f"Failed to download image: {response.status}",
|
| 48 |
+
)
|
| 49 |
+
image_bytes = await response.read()
|
| 50 |
+
return BytesIO(image_bytes)
|
| 51 |
+
|
| 52 |
+
@app.post("/extract_text",response_model=ApiResponse)
|
| 53 |
+
async def upload_url(url: Union[HttpUrl, None] = Form(None), file: Union[UploadFile, None] = Form(None)):
|
| 54 |
+
if file:
|
| 55 |
+
audio = BytesIO(await file.read())
|
| 56 |
+
elif url:
|
| 57 |
+
try:
|
| 58 |
+
audio = await from_url_load_audio(str(url))
|
| 59 |
+
except Exception as e:
|
| 60 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 61 |
+
|
| 62 |
+
else:
|
| 63 |
+
return HTTPException(400,{"error": "No valid audio source provided."})
|
| 64 |
+
try:
|
| 65 |
+
res = model(audio, language=language, use_itn=True)
|
| 66 |
+
return {
|
| 67 |
+
"message": "input processed successfully",
|
| 68 |
+
"results": rich_transcription_postprocess(res[0]),
|
| 69 |
+
"label_result": res[0]
|
| 70 |
+
}
|
| 71 |
+
except ValidationError as e:
|
| 72 |
+
raise HTTPException(status_code=400, detail=e.errors())
|
| 73 |
+
except Exception as e:
|
| 74 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
if __name__ == "__main__":
|
| 78 |
+
|
| 79 |
+
model_dir = "iic/SenseVoiceSmall"
|
| 80 |
+
device_id = 0 # Use GPU 0, automatically use CPU when not available
|
| 81 |
+
batch_size = 16
|
| 82 |
+
language = "auto"
|
| 83 |
+
quantize = True # Quantization model, small size, fast speed, accuracy may be insufficient: model_quant.onnx
|
| 84 |
+
# quantize = False # Standard model: model.onnx
|
| 85 |
+
|
| 86 |
+
# Override built-in load_data method to fix np.ndarray type accuracy bug
|
| 87 |
+
# cannot pass the librosa.load object directly, which would make the accuracy of other languages extremely poor
|
| 88 |
+
# No specific reason
|
| 89 |
+
def load_data(self, wav_content: Union[str, np.ndarray, List[str], BytesIO], fs: int = None) -> List:
|
| 90 |
+
def load_wav(path: str) -> np.ndarray:
|
| 91 |
+
waveform, _ = librosa.load(path, sr=fs)
|
| 92 |
+
return waveform
|
| 93 |
+
|
| 94 |
+
if isinstance(wav_content, np.ndarray):
|
| 95 |
+
return [wav_content]
|
| 96 |
+
|
| 97 |
+
if isinstance(wav_content, str):
|
| 98 |
+
return [load_wav(wav_content)]
|
| 99 |
+
|
| 100 |
+
if isinstance(wav_content, list):
|
| 101 |
+
return [load_wav(path) for path in wav_content]
|
| 102 |
+
|
| 103 |
+
if isinstance(wav_content, BytesIO):
|
| 104 |
+
return [load_wav(wav_content)]
|
| 105 |
+
|
| 106 |
+
raise TypeError(f"The type of {wav_content} is not in [str, np.ndarray, list]")
|
| 107 |
+
|
| 108 |
+
SenseVoiceSmall.load_data = load_data
|
| 109 |
+
|
| 110 |
+
model = SenseVoiceSmall(
|
| 111 |
+
model_dir,
|
| 112 |
+
quantize=quantize,
|
| 113 |
+
device_id=device_id,
|
| 114 |
+
batch_size=batch_size
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
print("\n\nDocs: http://127.0.0.1:8000/docs\n")
|
| 118 |
+
import uvicorn
|
| 119 |
+
|
| 120 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
funasr_onnx==0.4.1
|
| 2 |
+
fastapi==0.112.2
|
| 3 |
+
numpy==1.26.4
|
| 4 |
+
uvicorn==0.30.6
|
| 5 |
+
librosa==0.10.2
|
| 6 |
+
aiohttp==3.10.5
|
| 7 |
+
python-multipart==0.0.9
|