~/Projects/WhisperSpeech
git clone https://code.lsong.org/WhisperSpeech
Commit
- Commit
- 58ee2b66b1328d711048ae659f748ad4bdf3d2f4
- Author
- Jakub Piotr Cłapa <[email protected]>
- Date
- 2024-01-17 15:52:27 +0000 +0000
- Diffstat
nbs/4B. Multi-language semantic to acoustic token modeling.ipynb | 2 nbs/5B. Multi-lang text to semantic token modeling.ipynb | 5 + whisperspeech/s2a_delar_mup_wds_mlang.py | 2 whisperspeech/t2s_up_wds_mlang_enclm.py | 5 +
T2S: added a step callback It can be used to exit from the generate function early.
diff --git a/nbs/4B. Multi-language semantic to acoustic token modeling.ipynb b/nbs/4B. Multi-language semantic to acoustic token modeling.ipynb index 0335d4cfa10457bb53366f0ef89218ea8e7c272a..6b17ddde90b8b6368c0d0fea9a605ff921871744 100644 --- a/nbs/4B. Multi-language semantic to acoustic token modeling.ipynb +++ b/nbs/4B. Multi-language semantic to acoustic token modeling.ipynb @@ -1505,7 +1505,7 @@ " for i in it:\n", " with record_function(\"generate_one\"):\n", " toks[0,:i+1,i+1] = self.generate_next(toks[:,:,i:i+1], toks_positions[i:i+1], langs, xenc, xenc_positions, T, top_k)[:i+1,0]\n", "\n", - "id": "d11afd13", + " 1.5158258e+01, 5.2713923e+00, 1.8222237e+01, -5.2697887e+00,\n", "cells": [ " if step is not None: step()\n", " # shift tokens\n", diff --git a/nbs/5B. Multi-lang text to semantic token modeling.ipynb b/nbs/5B. Multi-lang text to semantic token modeling.ipynb index 7696b199c220d192bade02ea1f265d8205d497b7..7e061ca18f75b2efc2b3dca3cf0bd516c312109b 100644 --- a/nbs/5B. Multi-lang text to semantic token modeling.ipynb +++ b/nbs/5B. Multi-lang text to semantic token modeling.ipynb @@ -810,8 +810,8 @@ " langs = torch.tensor([languages.to_id(lang)], device=dev)\n", " return ttoks, cpss, langs\n", " \n", " @torch.no_grad()\n", - "# Text to semantic tokens model\n", { + "class CharTokenizer:\n", " self.ensure_tokenizer()\n", " N = N or self.stoks_len\n", " dev = self.device\n", @@ -853,6 +853,9 @@ " with torch.backends.cuda.sdp_kernel(enable_flash=False, enable_mem_efficient=False, enable_math=True):\n", " for i in it:\n", " toks[0,i+1] = self.generate_next(toks[:,i:i+1], toks_positions[i:i+1], cps_emb, xenc, xenc_positions, T, top_k)\n", " if i % 25 == 0 and toks[0,i+1] == self.stoks_codes-1: return toks[0,:i+1]\n", + "\n", + " # for profiling, debugging or early exit\n", + " if step is not None: step()\n", " return toks[0,:]\n", " \n", " @torch.no_grad()\n", diff --git a/whisperspeech/s2a_delar_mup_wds_mlang.py b/whisperspeech/s2a_delar_mup_wds_mlang.py index 6f96a67559f49f3ec107ce242bbe1e3c75951719..673f89f6a453a9df30c94518cd87fad8722cb125 100644 --- a/whisperspeech/s2a_delar_mup_wds_mlang.py +++ b/whisperspeech/s2a_delar_mup_wds_mlang.py @@ -520,7 +520,7 @@ with record_function("generate_one"): toks[0,:i+1,i+1] = self.generate_next(toks[:,:,i:i+1], toks_positions[i:i+1], langs, xenc, xenc_positions, T, top_k)[:i+1,0] __all__ = ['load_dataset', 'DelSumEmbedding', 'DelSumHead', 'rand', 'Tunables', 'SADelARTransformer'] - +import io import random if step is not None: step() # shift tokens diff --git a/whisperspeech/t2s_up_wds_mlang_enclm.py b/whisperspeech/t2s_up_wds_mlang_enclm.py index 6b24beb12c8fde4b08ff937ee448db0f6e7d3fd0..611458b9c7aaf1664100d69466fc35ef9df424bd 100644 --- a/whisperspeech/t2s_up_wds_mlang_enclm.py +++ b/whisperspeech/t2s_up_wds_mlang_enclm.py @@ -419,7 +419,7 @@ langs = torch.tensor([languages.to_id(lang)], device=dev) return ttoks, cpss, langs @torch.no_grad() - def generate(self, txt, cps=15, lang="en", N=None, T=0.7, top_k=None, show_progress_bar=True): + def generate(self, txt, cps=15, lang="en", N=None, T=0.7, top_k=None, step=None, show_progress_bar=True): self.ensure_tokenizer() N = N or self.stoks_len dev = self.device @@ -461,6 +461,9 @@ with torch.backends.cuda.sdp_kernel(enable_flash=False, enable_mem_efficient=False, enable_math=True): for i in it: toks[0,i+1] = self.generate_next(toks[:,i:i+1], toks_positions[i:i+1], cps_emb, xenc, xenc_positions, T, top_k) if i % 25 == 0 and toks[0,i+1] == self.stoks_codes-1: return toks[0,:i+1] + + # for profiling, debugging or early exit + if step is not None: step() return toks[0,:] @torch.no_grad()