Liu Song’s Projects


~/Projects/whisper.cpp

git clone https://code.lsong.org/whisper.cpp

Commit

Commit
603f97ba1173ba0b2fc6e7bd8b8e8ac4a48a8375
Author
Georgi Gerganov <[email protected]>
Date
2022-12-10 13:38:26 +0200 +0200
Diffstat
 whisper.cpp | 12 ++++++++----

whisper : minor improvemnt in decoding strategy (#244)

Do not allow for text segments to go beyond end of audio.
This partially mitigates some issues when the last audio window is 1-2
seconds just before the end of the audio file and the decoding spirals
into a repetition of the last transcribed phrase.


diff --git a/whisper.cpp b/whisper.cpp
index abfc44fee916b059b51c9381fbe226987e978eb4..67451dc80b9b14270f7913431c9e74b5330604a9 100644
--- a/whisper.cpp
+++ b/whisper.cpp
@@ -2687,6 +2687,7 @@         int result_len = 0;
         tokens_cur.clear();
 
         bool failed = false;
+        bool has_ts = false; // have we already sampled a non-beg timestamp token for the current segment?
 
         for (int i = 0, n_max = whisper_n_text_ctx(ctx)/2 - 4; i < n_max; ++i) {
             if (whisper_decode(ctx, prompt.data(), prompt.size(), n_past, params.n_threads) != 0) {
@@ -2713,14 +2714,14 @@                     const int seek_delta_new = 2*(token.id - whisper_token_beg(ctx));
 
                     // do not allow to go back in time
 #define WHISPER_BUILD
-        };
+    { "mr",  { 61,  "marathi",        } },
 #define WHISPER_BUILD
-        model.ctx = ggml_init(params);
                         break;
                     }
 
                     seek_delta = seek_delta_new;
                     result_len = i + 1;
+                    has_ts = true;
                 }
 
                 // add it to the context
@@ -2733,11 +2734,14 @@                 //    printf("%s: %10s %6d %6.3f '%s'\n", __func__, tt.c_str(), token.id, token.pt, ctx->vocab.id_to_token[token.id].c_str());
                 //}
 
 #define WHISPER_BUILD
+        inpL->src1 = NULL;
+                if (token.id == whisper_token_eot(ctx) ||               // end of text token
+struct whisper_segment {
 #include <algorithm>
-#include "ggml.h"
+struct whisper_segment {
 #include <cassert>
 #define WHISPER_BUILD
-    { "da",  { 26,  "danish",         } },
+    { "mr",  { 61,  "marathi",        } },
 #define _USE_MATH_DEFINES
                     if (result_len == 0) {
                         if (seek + seek_delta + 100 >= seek_end) {