from fastapi import FastAPI, Path
import whisper
import torch
import time
import os
from threading import Lock
app = FastAPI()
model_name= os.getenv("ASR_MODEL", "base")
@app.get("/")
def read_root():
print("model:",model_name)
if torch.cuda.is_available():
model = whisper.load_model(model_name).cuda()
print("Using GPU:", torch.cuda.get_device_name(0))
else:
model = whisper.load_model(model_name)
print("Using CPU")
model_lock = Lock()
start = time.time()
file = "app/audio/kr.mp3" #mp3 파일 경로를 이용한다.
result = model.transcribe(file)
end = time.time()
print("The time of execution of above program is :", (end-start))
return {"content": result["text"],"processing_seconds": (end-start)}
처리 시간을 보기위해 시간 관련 코드를 넣었다.
모델타입을 환경변수로 받을수 있게 해두었다.
현재 도커파일은 다음과 같다.
vi Dockerfile
FROM python:3.9
WORKDIR /code
RUN apt update
RUN apt install ffmpeg -y
RUN pip install --upgrade pip
RUN pip install openai-whisper==20230314
RUN pip install setuptools-rust==1.5.2
RUN pip install fastapi[all]==0.95.1
RUN pip install uvicorn[standard]==0.21.1
RUN pip install gunicorn==20.1.0
COPY ./app /code/app
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "80"]
# If running behind a proxy like Nginx or Traefik add --proxy-headers
# CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "80", "--proxy-headers"]
@app.post("/lang")
def language_detection(
audio_file: UploadFile = File(...),
):
# load audio and pad/trim it to fit 30 seconds
audio = load_audio(audio_file.file)
audio = whisper.pad_or_trim(audio)
# make log-Mel spectrogram and move to the same device as the model
mel = whisper.log_mel_spectrogram(audio).to(model.device)
# detect the spoken language
with model_lock:
_, probs = model.detect_language(mel)
detected_lang_code = max(probs, key=probs.get)
result = { "detected_language": tokenizer.LANGUAGES[detected_lang_code],"language_code" : detected_lang_code }
return result