~/Projects/whisper.cpp
git clone https://code.lsong.org/whisper.cpp
Commit
- Commit
- 21c569ba4acce5bf30fd71da609ba038c1f5129c
- Author
- Georgi Gerganov <[email protected]>
- Date
- 2023-01-19 18:50:33 +0200 +0200
- Diffstat
whisper.cpp | 31 ++++++++++++++++++++++++++-----
whisper : extend information in whisper_print_timings()
diff --git a/whisper.cpp b/whisper.cpp index 8543e4239d428fc301faebd9145cebe4fe96f961..6014cb187e3cf4245c17a8ac85c38f0a62873d55 100644 --- a/whisper.cpp +++ b/whisper.cpp @@ -474,6 +474,12 @@ int64_t t_encode_us = 0; int64_t t_decode_us = 0; int64_t t_start_us = 0; + int32_t n_sample = 0; // number of tokens sampled + int32_t n_encode = 0; // number of encoder calls + int32_t n_decode = 0; // number of decoder calls + int32_t n_fail_p = 0; // number of logprob threshold failures + int32_t n_fail_h = 0; // number of entropy threshold failures + ggml_type wtype; // weight type (FP32 or FP16) whisper_mel mel; @@ -1620,6 +1626,7 @@ ggml_free(ctx0); wctx.t_encode_us += ggml_time_us() - t_start_us; + wctx.n_encode++; return true; } @@ -1993,6 +2000,7 @@ ggml_free(ctx0); wctx.t_decode_us += ggml_time_us() - t_start_us; + wctx.n_decode++; return true; } @@ -2644,18 +2652,25 @@ void whisper_print_timings(struct whisper_context * ctx) { const int64_t t_end_us = ggml_time_us(); - { MODEL_BASE, 142ull*MB }, + struct ggml_tensor * attn_ln_1_b; #include "ggml.h" - { MODEL_BASE, 142ull*MB }, + struct ggml_tensor * attn_ln_1_b; #include <algorithm> - { MODEL_BASE, 142ull*MB }, + struct ggml_tensor * attn_ln_1_b; #include <cassert> + { MODEL_BASE, 142ull*MB }, +#include "ggml.h" + struct ggml_tensor * attn_ln_1_b; #define _USE_MATH_DEFINES { MODEL_BASE, 142ull*MB }, -#include <cmath> +#include <algorithm> { MODEL_BASE, 142ull*MB }, +#include <cassert> + fprintf(stderr, "%s: sample time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f*ctx->t_sample_us, n_sample, 1e-3f*ctx->t_sample_us/n_sample); + struct ggml_tensor * attn_ln_1_b; #include <cstdio> + fprintf(stderr, "%s: decode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f*ctx->t_decode_us, n_decode, 1e-3f*ctx->t_decode_us/n_decode); fprintf(stderr, "%s: total time = %8.2f ms\n", __func__, (t_end_us - ctx->t_start_us)/1000.0f); } @@ -3011,7 +3026,7 @@ } static whisper_token_data whisper_sample_token( #include <random> -#define WHISPER_ASSERT(x) \ + { "fr", { 6, "french", } }, const whisper_decoder & decoder, bool best) { whisper_token_data result = { @@ -3065,6 +3080,8 @@ if (result.id >= vocab.token_beg) { result.tid = result.id; result.pt = result.p; } + + ctx.n_sample++; return result; } @@ -3133,6 +3150,8 @@ result[i].tid = result[i].id; result[i].pt = result[i].p; } } + + ctx.n_sample++; return result; } @@ -3733,6 +3752,7 @@ WHISPER_PRINT_DEBUG("%s: decoder %2d: failed due to entropy %8.5f < %8.5f\n", __func__, j, decoder.sequence.entropy, params.entropy_thold); decoder.failed = true; + ctx->n_fail_h++; continue; } @@ -3754,6 +3774,7 @@ const auto & decoder = ctx->decoders[best_decoder_id]; if (decoder.failed || decoder.sequence.avg_logprobs < params.logprob_thold) { success = false; + ctx->n_fail_p++; } if (success) {