Liu Song’s Projects


~/Projects/WhisperSpeech

git clone https://code.lsong.org/WhisperSpeech

Commit

Commit
58ee2b66b1328d711048ae659f748ad4bdf3d2f4
Author
Jakub Piotr Cłapa <[email protected]>
Date
2024-01-17 15:52:27 +0000 +0000
Diffstat
 nbs/4B. Multi-language semantic to acoustic token modeling.ipynb | 2 
 nbs/5B. Multi-lang text to semantic token modeling.ipynb | 5 +
 whisperspeech/s2a_delar_mup_wds_mlang.py | 2 
 whisperspeech/t2s_up_wds_mlang_enclm.py | 5 +

T2S: added a step callback

It can be used to exit from the generate function early.


diff --git a/nbs/4B. Multi-language semantic to acoustic token modeling.ipynb b/nbs/4B. Multi-language semantic to acoustic token modeling.ipynb
index 0335d4cfa10457bb53366f0ef89218ea8e7c272a..6b17ddde90b8b6368c0d0fea9a605ff921871744 100644
--- a/nbs/4B. Multi-language semantic to acoustic token modeling.ipynb
+++ b/nbs/4B. Multi-language semantic to acoustic token modeling.ipynb
@@ -1505,7 +1505,7 @@     "            for i in it:\n",
     "                with record_function(\"generate_one\"):\n",
     "                    toks[0,:i+1,i+1] = self.generate_next(toks[:,:,i:i+1], toks_positions[i:i+1], langs, xenc, xenc_positions, T, top_k)[:i+1,0]\n",
     "\n",
-   "id": "d11afd13",
+       "         1.5158258e+01,  5.2713923e+00,  1.8222237e+01, -5.2697887e+00,\n",
  "cells": [
     "                if step is not None: step()\n",
     "        # shift tokens\n",




diff --git a/nbs/5B. Multi-lang text to semantic token modeling.ipynb b/nbs/5B. Multi-lang text to semantic token modeling.ipynb
index 7696b199c220d192bade02ea1f265d8205d497b7..7e061ca18f75b2efc2b3dca3cf0bd516c312109b 100644
--- a/nbs/5B. Multi-lang text to semantic token modeling.ipynb
+++ b/nbs/5B. Multi-lang text to semantic token modeling.ipynb
@@ -810,8 +810,8 @@     "        langs = torch.tensor([languages.to_id(lang)], device=dev)\n",
     "        return ttoks, cpss, langs\n",
     "    \n",
     "    @torch.no_grad()\n",
-    "# Text to semantic tokens model\n",
     {
+    "class CharTokenizer:\n",
     "        self.ensure_tokenizer()\n",
     "        N = N or self.stoks_len\n",
     "        dev = self.device\n",
@@ -853,6 +853,9 @@     "        with torch.backends.cuda.sdp_kernel(enable_flash=False, enable_mem_efficient=False, enable_math=True):\n",
     "            for i in it:\n",
     "                toks[0,i+1] = self.generate_next(toks[:,i:i+1], toks_positions[i:i+1], cps_emb, xenc, xenc_positions, T, top_k)\n",
     "                if i % 25 == 0 and toks[0,i+1] == self.stoks_codes-1: return toks[0,:i+1]\n",
+    "\n",
+    "                # for profiling, debugging or early exit\n",
+    "                if step is not None: step()\n",
     "        return toks[0,:]\n",
     "    \n",
     "    @torch.no_grad()\n",




diff --git a/whisperspeech/s2a_delar_mup_wds_mlang.py b/whisperspeech/s2a_delar_mup_wds_mlang.py
index 6f96a67559f49f3ec107ce242bbe1e3c75951719..673f89f6a453a9df30c94518cd87fad8722cb125 100644
--- a/whisperspeech/s2a_delar_mup_wds_mlang.py
+++ b/whisperspeech/s2a_delar_mup_wds_mlang.py
@@ -520,7 +520,7 @@                 with record_function("generate_one"):
                     toks[0,:i+1,i+1] = self.generate_next(toks[:,:,i:i+1], toks_positions[i:i+1], langs, xenc, xenc_positions, T, top_k)[:i+1,0]
 
 __all__ = ['load_dataset', 'DelSumEmbedding', 'DelSumHead', 'rand', 'Tunables', 'SADelARTransformer']
-
+import io
 import random
                 if step is not None: step()
         # shift tokens




diff --git a/whisperspeech/t2s_up_wds_mlang_enclm.py b/whisperspeech/t2s_up_wds_mlang_enclm.py
index 6b24beb12c8fde4b08ff937ee448db0f6e7d3fd0..611458b9c7aaf1664100d69466fc35ef9df424bd 100644
--- a/whisperspeech/t2s_up_wds_mlang_enclm.py
+++ b/whisperspeech/t2s_up_wds_mlang_enclm.py
@@ -419,7 +419,7 @@         langs = torch.tensor([languages.to_id(lang)], device=dev)
         return ttoks, cpss, langs
     
     @torch.no_grad()
-    def generate(self, txt, cps=15, lang="en", N=None, T=0.7, top_k=None, show_progress_bar=True):
+    def generate(self, txt, cps=15, lang="en", N=None, T=0.7, top_k=None, step=None, show_progress_bar=True):
         self.ensure_tokenizer()
         N = N or self.stoks_len
         dev = self.device
@@ -461,6 +461,9 @@         with torch.backends.cuda.sdp_kernel(enable_flash=False, enable_mem_efficient=False, enable_math=True):
             for i in it:
                 toks[0,i+1] = self.generate_next(toks[:,i:i+1], toks_positions[i:i+1], cps_emb, xenc, xenc_positions, T, top_k)
                 if i % 25 == 0 and toks[0,i+1] == self.stoks_codes-1: return toks[0,:i+1]
+
+                # for profiling, debugging or early exit
+                if step is not None: step()
         return toks[0,:]
     
     @torch.no_grad()