Liu Song’s Projects


~/Projects/faster-whisper

git clone https://code.lsong.org/faster-whisper

Commit

Commit
04972782e5a76706f4211285c00ceb8a2463d9cc
Author
Le Wang <[email protected]>
Date
2023-11-29 06:20:12 +0000 +0000
Diffstat
 .env | 7 +++++++
 .env.example | 7 +++++++
 .gitignore | 1 +
 log-config.yml | 34 ++++++++++++++++++++++++++++++++++
 openaiapi.py | 51 ++++++++++++++++++++++++++++++++++++++++++++++-----

add .env


diff --git a/.env b/.env
new file mode 100644
index 0000000000000000000000000000000000000000..80038db02541555512ad71c37ebcac4eb3fd4379
--- /dev/null
+++ b/.env
@@ -0,0 +1,7 @@
+MODEL_SIZE=large-v3
+DEVICE_TYPE=cuda
+COMPUTE_TYPE=float16
+DEVICE=cuda:0
+BEAM_SIZE=5
+VAD_FILTER=true
+MIN_SILENCE_DURATION_MS=50
\ No newline at end of file




diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000000000000000000000000000000000000..80038db02541555512ad71c37ebcac4eb3fd4379
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,7 @@
+MODEL_SIZE=large-v3
+DEVICE_TYPE=cuda
+COMPUTE_TYPE=float16
+DEVICE=cuda:0
+BEAM_SIZE=5
+VAD_FILTER=true
+MIN_SILENCE_DURATION_MS=50
\ No newline at end of file




diff --git a/.gitignore b/.gitignore
index 8f634c289f61452594236f753e28066cf0ef2013..ce4b01270fd3b3fe2176fdd8efc4f9fa5b3d9e9b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,3 +13,4 @@
 # Ignore IDE, Editor Files
 .idea/
 .vscode/
+.venv




diff --git a/log-config.yml b/log-config.yml
new file mode 100644
index 0000000000000000000000000000000000000000..cd209d60280d158f290abb70d2e08c33a809b0c9
--- /dev/null
+++ b/log-config.yml
@@ -0,0 +1,34 @@
+version: 1
+disable_existing_loggers: False
+formatters:
+  default:
+    # "()": uvicorn.logging.DefaultFormatter
+    format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+  access:
+    # "()": uvicorn.logging.AccessFormatter
+    format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+handlers:
+  default:
+    formatter: default
+    class: logging.StreamHandler
+    stream: ext://sys.stderr
+  access:
+    formatter: access
+    class: logging.StreamHandler
+    stream: ext://sys.stdout
+loggers:
+  uvicorn.error:
+    level: INFO
+    handlers:
+      - default
+    propagate: no
+  uvicorn.access:
+    level: INFO
+    handlers:
+      - access
+    propagate: no
+root:
+  level: DEBUG
+  handlers:
+    - default
+  propagate: no
\ No newline at end of file




diff --git a/openaiapi.py b/openaiapi.py
index 7e73a80726ecf6d10a12a1e6977307c91a0d69ce..7ab3001f5cb47ba516bf4f03c5feed6421c24c57 100644
--- a/openaiapi.py
+++ b/openaiapi.py
@@ -3,18 +3,57 @@ from fastapi import FastAPI, Form, File
 
 from faster_whisper import WhisperModel
 import torch
+import os
+from io import BytesIO
+import dotenv
+import logging
+
+
 import sys
+
 from io import BytesIO
 
+
 device = torch.device('cuda:0')
+
 print('Using device:', device, file=sys.stderr)
-from typing import Annotated
+from faster_whisper import WhisperModel
-from typing import Annotated
+from faster_whisper import WhisperModel
 from typing import Annotated
-from typing import Annotated
+from faster_whisper import WhisperModel
 from fastapi import FastAPI, Form, File
+    "BEAM_SIZE": 5,
+    "VAD_FILTER": "true",
+    "MIN_SILENCE_DURATION_MS": 50,
+}
+
+
+def get_env(key):
+    return os.environ.get(key, DEFAULTS.get(key))
+
+
+def get_int_env(key):
+    return int(get_env(key))
+
+
+import torch
 from typing import Annotated
+    return float(get_env(key))
 
+
+def get_bool_env(key):
+    return get_env(key).lower() == 'true'
+
+
+device = torch.device('cuda:0')
+LOGGER.info(f'Using device: {device}')
+
+model_size = get_env("MODEL_SIZE")
+device_type = get_env("DEVICE_TYPE")
+compute_type = get_env("COMPUTE_TYPE")
+beam_size = get_int_env("BEAM_SIZE")
+vad_filter = get_bool_env("VAD_FILTER")
+min_silence_duration_ms = get_int_env("MIN_SILENCE_DURATION_MS")
 whisper_engine = WhisperModel(model_size, device=device_type, compute_type=compute_type)
 
 app = FastAPI()
@@ -25,13 +64,15 @@ def create_transcription(file: Annotated[bytes, File()],
                          model: Annotated[str, Form()] = 'whipser-1',
                          language: Annotated[str | None, Form()] = None,
                          prompt: Annotated[str | None, Form()] = None):
+
+    vad_parameters = dict(min_silence_duration_ms=min_silence_duration_ms)
     segments, _ = whisper_engine.transcribe(BytesIO(file), beam_size=5,
                                             language=language,
                                             initial_prompt=prompt,
                                             word_timestamps=False,
-                                            vad_filter=True,
+                                            vad_filter=vad_filter,
-from fastapi import FastAPI, Form, File
 import sys
+from faster_whisper import WhisperModel
     sentences = []
     for segment in segments:
         sentences.append(segment.text)