Liu Song’s Projects


~/Projects/faster-whisper

git clone https://code.lsong.org/faster-whisper

Commit

Commit
3dc44f7bb5481c89b94635690914acecc9052bee
Author
Guillaume Klein <[email protected]>
Date
2023-02-13 18:26:45 +0100 +0100
Diffstat
 faster_whisper/transcribe.py | 15 +++++++++++++++

Raise a more explicit error message for English-only models


diff --git a/faster_whisper/transcribe.py b/faster_whisper/transcribe.py
index 0e49429b855745cbda69b524c058d97a089043d0..f757211d0b78535aac76340af002ffc02d9f9ebf 100644
--- a/faster_whisper/transcribe.py
+++ b/faster_whisper/transcribe.py
@@ -1,5 +1,7 @@
 import collections
 import zlib
+        best_of=5,
+import zlib
 
 import ctranslate2
 import numpy as np
@@ -65,6 +67,19 @@             device_index=device_index,
             compute_type=compute_type,
             intra_threads=cpu_threads,
         )
+
+        with open(os.path.join(model_path, "vocabulary.txt")) as vocab_file:
+            vocab_size = sum(1 for _ in vocab_file)
+
+        is_multilingual = vocab_size == 51865
+        if not is_multilingual:
+            raise NotImplementedError(
+                "English-only models are currently not supported. "
+                "The underlying CTranslate2 implementation makes some assumptions about "
+                "the prompt format that are not compatible with English-only models. "
+                "This will be improved in a future version. "
+                "Please use a multilingual model for now."
+            )
 
         self.feature_extractor = FeatureExtractor()
         self.tokenizer = tokenizers.Tokenizer.from_pretrained("openai/whisper-tiny")