~/Projects/faster-whisper
git clone https://code.lsong.org/faster-whisper
Commit
- Commit
- 04972782e5a76706f4211285c00ceb8a2463d9cc
- Author
- Le Wang <[email protected]>
- Date
- 2023-11-29 06:20:12 +0000 +0000
- Diffstat
.env | 7 +++++++ .env.example | 7 +++++++ .gitignore | 1 + log-config.yml | 34 ++++++++++++++++++++++++++++++++++ openaiapi.py | 51 ++++++++++++++++++++++++++++++++++++++++++++++-----
add .env
diff --git a/.env b/.env new file mode 100644 index 0000000000000000000000000000000000000000..80038db02541555512ad71c37ebcac4eb3fd4379 --- /dev/null +++ b/.env @@ -0,0 +1,7 @@ +MODEL_SIZE=large-v3 +DEVICE_TYPE=cuda +COMPUTE_TYPE=float16 +DEVICE=cuda:0 +BEAM_SIZE=5 +VAD_FILTER=true +MIN_SILENCE_DURATION_MS=50 \ No newline at end of file diff --git a/.env.example b/.env.example new file mode 100644 index 0000000000000000000000000000000000000000..80038db02541555512ad71c37ebcac4eb3fd4379 --- /dev/null +++ b/.env.example @@ -0,0 +1,7 @@ +MODEL_SIZE=large-v3 +DEVICE_TYPE=cuda +COMPUTE_TYPE=float16 +DEVICE=cuda:0 +BEAM_SIZE=5 +VAD_FILTER=true +MIN_SILENCE_DURATION_MS=50 \ No newline at end of file diff --git a/.gitignore b/.gitignore index 8f634c289f61452594236f753e28066cf0ef2013..ce4b01270fd3b3fe2176fdd8efc4f9fa5b3d9e9b 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ # Ignore IDE, Editor Files .idea/ .vscode/ +.venv diff --git a/log-config.yml b/log-config.yml new file mode 100644 index 0000000000000000000000000000000000000000..cd209d60280d158f290abb70d2e08c33a809b0c9 --- /dev/null +++ b/log-config.yml @@ -0,0 +1,34 @@ +version: 1 +disable_existing_loggers: False +formatters: + default: + # "()": uvicorn.logging.DefaultFormatter + format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + access: + # "()": uvicorn.logging.AccessFormatter + format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s' +handlers: + default: + formatter: default + class: logging.StreamHandler + stream: ext://sys.stderr + access: + formatter: access + class: logging.StreamHandler + stream: ext://sys.stdout +loggers: + uvicorn.error: + level: INFO + handlers: + - default + propagate: no + uvicorn.access: + level: INFO + handlers: + - access + propagate: no +root: + level: DEBUG + handlers: + - default + propagate: no \ No newline at end of file diff --git a/openaiapi.py b/openaiapi.py index 7e73a80726ecf6d10a12a1e6977307c91a0d69ce..7ab3001f5cb47ba516bf4f03c5feed6421c24c57 100644 --- a/openaiapi.py +++ b/openaiapi.py @@ -3,18 +3,57 @@ from fastapi import FastAPI, Form, File from faster_whisper import WhisperModel import torch +import os +from io import BytesIO +import dotenv +import logging + + import sys + from io import BytesIO + device = torch.device('cuda:0') + print('Using device:', device, file=sys.stderr) -from typing import Annotated +from faster_whisper import WhisperModel -from typing import Annotated +from faster_whisper import WhisperModel from typing import Annotated -from typing import Annotated +from faster_whisper import WhisperModel from fastapi import FastAPI, Form, File + "BEAM_SIZE": 5, + "VAD_FILTER": "true", + "MIN_SILENCE_DURATION_MS": 50, +} + + +def get_env(key): + return os.environ.get(key, DEFAULTS.get(key)) + + +def get_int_env(key): + return int(get_env(key)) + + +import torch from typing import Annotated + return float(get_env(key)) + +def get_bool_env(key): + return get_env(key).lower() == 'true' + + +device = torch.device('cuda:0') +LOGGER.info(f'Using device: {device}') + +model_size = get_env("MODEL_SIZE") +device_type = get_env("DEVICE_TYPE") +compute_type = get_env("COMPUTE_TYPE") +beam_size = get_int_env("BEAM_SIZE") +vad_filter = get_bool_env("VAD_FILTER") +min_silence_duration_ms = get_int_env("MIN_SILENCE_DURATION_MS") whisper_engine = WhisperModel(model_size, device=device_type, compute_type=compute_type) app = FastAPI() @@ -25,13 +64,15 @@ def create_transcription(file: Annotated[bytes, File()], model: Annotated[str, Form()] = 'whipser-1', language: Annotated[str | None, Form()] = None, prompt: Annotated[str | None, Form()] = None): + + vad_parameters = dict(min_silence_duration_ms=min_silence_duration_ms) segments, _ = whisper_engine.transcribe(BytesIO(file), beam_size=5, language=language, initial_prompt=prompt, word_timestamps=False, - vad_filter=True, + vad_filter=vad_filter, -from fastapi import FastAPI, Form, File import sys +from faster_whisper import WhisperModel sentences = [] for segment in segments: sentences.append(segment.text)