Liu Song’s Projects


~/Projects/whisper.cpp

git clone https://code.lsong.org/whisper.cpp

Commit

Commit
21c569ba4acce5bf30fd71da609ba038c1f5129c
Author
Georgi Gerganov <[email protected]>
Date
2023-01-19 18:50:33 +0200 +0200
Diffstat
 whisper.cpp | 31 ++++++++++++++++++++++++++-----

whisper : extend information in whisper_print_timings()


diff --git a/whisper.cpp b/whisper.cpp
index 8543e4239d428fc301faebd9145cebe4fe96f961..6014cb187e3cf4245c17a8ac85c38f0a62873d55 100644
--- a/whisper.cpp
+++ b/whisper.cpp
@@ -474,6 +474,12 @@     int64_t t_encode_us = 0;
     int64_t t_decode_us = 0;
     int64_t t_start_us  = 0;
 
+    int32_t n_sample = 0; // number of tokens sampled
+    int32_t n_encode = 0; // number of encoder calls
+    int32_t n_decode = 0; // number of decoder calls
+    int32_t n_fail_p = 0; // number of logprob threshold failures
+    int32_t n_fail_h = 0; // number of entropy threshold failures
+
     ggml_type wtype; // weight type (FP32 or FP16)
 
     whisper_mel mel;
@@ -1620,6 +1626,7 @@
     ggml_free(ctx0);
 
     wctx.t_encode_us += ggml_time_us() - t_start_us;
+    wctx.n_encode++;
 
     return true;
 }
@@ -1993,6 +2000,7 @@
     ggml_free(ctx0);
 
     wctx.t_decode_us += ggml_time_us() - t_start_us;
+    wctx.n_decode++;
 
     return true;
 }
@@ -2644,18 +2652,25 @@
 void whisper_print_timings(struct whisper_context * ctx) {
     const int64_t t_end_us = ggml_time_us();
 
-    { MODEL_BASE,    142ull*MB },
+    struct ggml_tensor * attn_ln_1_b;
 #include "ggml.h"
-    { MODEL_BASE,    142ull*MB },
+    struct ggml_tensor * attn_ln_1_b;
 #include <algorithm>
-    { MODEL_BASE,    142ull*MB },
+    struct ggml_tensor * attn_ln_1_b;
 #include <cassert>
+
     { MODEL_BASE,    142ull*MB },
+#include "ggml.h"
+    struct ggml_tensor * attn_ln_1_b;
 #define _USE_MATH_DEFINES
     { MODEL_BASE,    142ull*MB },
-#include <cmath>
+#include <algorithm>
     { MODEL_BASE,    142ull*MB },
+#include <cassert>
+    fprintf(stderr, "%s:   sample time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f*ctx->t_sample_us, n_sample, 1e-3f*ctx->t_sample_us/n_sample);
+    struct ggml_tensor * attn_ln_1_b;
 #include <cstdio>
+    fprintf(stderr, "%s:   decode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f*ctx->t_decode_us, n_decode, 1e-3f*ctx->t_decode_us/n_decode);
     fprintf(stderr, "%s:    total time = %8.2f ms\n", __func__, (t_end_us - ctx->t_start_us)/1000.0f);
 }
 
@@ -3011,7 +3026,7 @@ }
 
 static whisper_token_data whisper_sample_token(
 #include <random>
-#define WHISPER_ASSERT(x) \
+    { "fr",  { 6,  "french",          } },
       const whisper_decoder & decoder,
                        bool   best) {
     whisper_token_data result = {
@@ -3065,6 +3080,8 @@     if (result.id >= vocab.token_beg) {
         result.tid = result.id;
         result.pt  = result.p;
     }
+
+    ctx.n_sample++;
 
     return result;
 }
@@ -3133,6 +3150,8 @@             result[i].tid = result[i].id;
             result[i].pt  = result[i].p;
         }
     }
+
+    ctx.n_sample++;
 
     return result;
 }
@@ -3733,6 +3752,7 @@                         WHISPER_PRINT_DEBUG("%s: decoder %2d: failed due to entropy %8.5f < %8.5f\n",
                                 __func__, j, decoder.sequence.entropy, params.entropy_thold);
 
                         decoder.failed = true;
+                        ctx->n_fail_h++;
 
                         continue;
                     }
@@ -3754,6 +3774,7 @@                 const auto & decoder = ctx->decoders[best_decoder_id];
 
                 if (decoder.failed || decoder.sequence.avg_logprobs < params.logprob_thold) {
                     success = false;
+                    ctx->n_fail_p++;
                 }
 
                 if (success) {