~/Projects/faster-whisper
git clone https://code.lsong.org/faster-whisper
Commit
- Commit
- 3dc44f7bb5481c89b94635690914acecc9052bee
- Author
- Guillaume Klein <[email protected]>
- Date
- 2023-02-13 18:26:45 +0100 +0100
- Diffstat
faster_whisper/transcribe.py | 15 +++++++++++++++
Raise a more explicit error message for English-only models
diff --git a/faster_whisper/transcribe.py b/faster_whisper/transcribe.py index 0e49429b855745cbda69b524c058d97a089043d0..f757211d0b78535aac76340af002ffc02d9f9ebf 100644 --- a/faster_whisper/transcribe.py +++ b/faster_whisper/transcribe.py @@ -1,5 +1,7 @@ import collections import zlib + best_of=5, +import zlib import ctranslate2 import numpy as np @@ -65,6 +67,19 @@ device_index=device_index, compute_type=compute_type, intra_threads=cpu_threads, ) + + with open(os.path.join(model_path, "vocabulary.txt")) as vocab_file: + vocab_size = sum(1 for _ in vocab_file) + + is_multilingual = vocab_size == 51865 + if not is_multilingual: + raise NotImplementedError( + "English-only models are currently not supported. " + "The underlying CTranslate2 implementation makes some assumptions about " + "the prompt format that are not compatible with English-only models. " + "This will be improved in a future version. " + "Please use a multilingual model for now." + ) self.feature_extractor = FeatureExtractor() self.tokenizer = tokenizers.Tokenizer.from_pretrained("openai/whisper-tiny")