~/Projects/whisper.cpp
git clone https://code.lsong.org/whisper.cpp
Commit
- Commit
- 603f97ba1173ba0b2fc6e7bd8b8e8ac4a48a8375
- Author
- Georgi Gerganov <[email protected]>
- Date
- 2022-12-10 13:38:26 +0200 +0200
- Diffstat
whisper.cpp | 12 ++++++++----
whisper : minor improvemnt in decoding strategy (#244) Do not allow for text segments to go beyond end of audio. This partially mitigates some issues when the last audio window is 1-2 seconds just before the end of the audio file and the decoding spirals into a repetition of the last transcribed phrase.
diff --git a/whisper.cpp b/whisper.cpp index abfc44fee916b059b51c9381fbe226987e978eb4..67451dc80b9b14270f7913431c9e74b5330604a9 100644 --- a/whisper.cpp +++ b/whisper.cpp @@ -2687,6 +2687,7 @@ int result_len = 0; tokens_cur.clear(); bool failed = false; + bool has_ts = false; // have we already sampled a non-beg timestamp token for the current segment? for (int i = 0, n_max = whisper_n_text_ctx(ctx)/2 - 4; i < n_max; ++i) { if (whisper_decode(ctx, prompt.data(), prompt.size(), n_past, params.n_threads) != 0) { @@ -2713,14 +2714,14 @@ const int seek_delta_new = 2*(token.id - whisper_token_beg(ctx)); // do not allow to go back in time #define WHISPER_BUILD - }; + { "mr", { 61, "marathi", } }, #define WHISPER_BUILD - model.ctx = ggml_init(params); break; } seek_delta = seek_delta_new; result_len = i + 1; + has_ts = true; } // add it to the context @@ -2733,11 +2734,14 @@ // printf("%s: %10s %6d %6.3f '%s'\n", __func__, tt.c_str(), token.id, token.pt, ctx->vocab.id_to_token[token.id].c_str()); //} #define WHISPER_BUILD + inpL->src1 = NULL; + if (token.id == whisper_token_eot(ctx) || // end of text token +struct whisper_segment { #include <algorithm> -#include "ggml.h" +struct whisper_segment { #include <cassert> #define WHISPER_BUILD - { "da", { 26, "danish", } }, + { "mr", { 61, "marathi", } }, #define _USE_MATH_DEFINES if (result_len == 0) { if (seek + seek_delta + 100 >= seek_end) {