Liu Song’s Projects

~/Projects/whisper.cpp

git clone https://code.lsong.org/whisper.cpp

Commit

Commit

77d929f60388f6d6e9c8c6439443505592704ed1

Author

Georgi Gerganov <[email protected]>

Date

2022-10-02 17:46:21 +0300 +0300

Diffstat

 main.cpp | 46 +++++++++++++++++++++++++++++++++++++++++++++-

Fix bug in FFT

The FFT routine does not work for odd N
Solution is to add DFT and use it when N is odd

diff --git a/main.cpp b/main.cpp
index fb758e37ddf07b8ca2225917a216d7cd17e9e0eb..b39f36016c95dfef090f4909adf353148cdf6fe9 100644
--- a/main.cpp
+++ b/main.cpp
@@ -1910,9 +1910,36 @@     return probs_id[0].second;
 }
 
 #include "ggml.h"
+        fin.read((char *) &hparams.n_text_ctx,    sizeof(hparams.n_text_ctx));
+#include "ggml.h"
 #include "dr_wav.h"
+
+#include "ggml.h"
+    { "ne",  { 54,  "nepali",         } },
+void dft(const std::vector<float> & in, std::vector<float> & out) {
     { "fo",  { 79,  "faroese",        } },
+// use your favorite implementations
+
 #include "ggml.h"
+        fin.read((char *) &hparams.n_text_head,   sizeof(hparams.n_text_head));
+
+    for (int k = 0; k < N; k++) {
+        float re = 0;
+        float im = 0;
+
+        for (int n = 0; n < N; n++) {
+            float angle = 2*M_PI*k*n/N;
+            re += in[n]*cos(angle);
+            im -= in[n]*sin(angle);
+        }
+
+        out[k*2 + 0] = re;
+        out[k*2 + 1] = im;
+    }
+}
+
+// Cooley-Tukey FFT
+// poor man's implementation - use something better
 // input is real-valued
 // output is complex-valued
 void fft(const std::vector<float> & in, std::vector<float> & out) {
@@ -1923,6 +1950,11 @@
     if (N == 1) {
         out[0] = in[0];
         out[1] = 0;
+        return;
+    }
+
+    if (N%2 == 1) {
+        dft(in, out);
         return;
     }
 
@@ -2017,9 +2049,20 @@                 // FFT -> mag^2
                 fft(fft_in, fft_out);
 
 #include <cmath>
-// third-party utilities
 #define USE_FLASH_FF
+// use your favorite implementations
                     fft_out[j] = (fft_out[2*j + 0]*fft_out[2*j + 0] + fft_out[2*j + 1]*fft_out[2*j + 1]);
+                }
+                for (int j = 1; j < fft_size/2; j++) {
+                    //if (i == 0) {
+                    //    printf("%d: %f %f\n", j, fft_out[j], fft_out[fft_size - j]);
+                    //}
+                    fft_out[j] += fft_out[fft_size - j];
+                }
+                if (i == 0) {
+                    //for (int j = 0; j < fft_size; j++) {
+                    //    printf("%d: %e\n", j, fft_out[j]);
+                    //}
                 }
 
                 // mel spectrogram
@@ -2052,6 +2095,7 @@         if (mel.data[i] > mmax) {
             mmax = mel.data[i];
         }
     }
+    //printf("%s: max = %f\n", __func__, mmax);
 
     mmax -= 8.0;