~/Projects/WhisperSpeech
git clone https://code.lsong.org/WhisperSpeech
Commit
- Commit
- 08722a7de357ae5f378e18b4ba89adf8fa5b2dd0
- Author
- Jakub Piotr Cłapa <[email protected]>
- Date
- 2023-07-13 17:24:31 +0000 +0000
- Diffstat
nbs/B1. Training.ipynb | 36 +++++++++++++++++++---------- nbs/B2. Training (Lightning).ipynb | 37 +++++++++++++++++++++---------- spear_tts_pytorch/train.py | 32 +++++++++++++++++++++----- spear_tts_pytorch/train_multi.py | 38 +++++++++++++++++++++----------
Added support for μP training optimizer adjustments
diff --git a/nbs/B1. Training.ipynb b/nbs/B1. Training.ipynb index 34567bd6108c07de5c7a1dfdc554c23d651ae8e5..8cf086e34b1c3e522faf882655830a108246f216 100644 --- a/nbs/B1. Training.ipynb +++ b/nbs/B1. Training.ipynb @@ -150,35 +150,45 @@ " avg_train_loss = torch.nan\n", " \n", " try:\n", " scheduler = None\n", + "\n", " all_params = set(model.parameters())\n", -{ + " customized_params = set()\n", + " groups = []\n", + " group_map = {}\n", + "cells": [ "source": [ -{ + "cells": [ "#| default_exp train" -{ + "cells": [ ] -{ + "cells": [ }, -{ + "cells": [ "id": "12e79ccb", -{ + "cells": [ "#| hide\n", -{ + "cells": [ "from nbdev.showdoc import *" "cells": [ - "cells": [ -{ "id": "7dfd417d", -{ + "cells": [ "#| exporti\n", -{ + "cells": [ "import io\n", -{ + "cells": [ "import time\n", -{ + "cells": [ "import random\n", { + "import torch.nn as nn\n", + "cells": [ "from pathlib import Path\n", + " \n", + " param_groups = groups + [\n", + " {\"names\": [\"other\"], \"params\": list(other_params), \"weight_decay\": weight_decay },\n", + " ]\n", + "\n", + " optimizer = torch.optim.AdamW(lr=lr, betas=(0.9, 0.95), fused=device!='cpu', params=param_groups)\n", " scaler = torch.cuda.amp.GradScaler(enabled=half)\n", " scheduler = torch.optim.lr_scheduler.OneCycleLR(\n", " optimizer, max_lr=lr, pct_start=pct_start, steps_per_epoch=len(train_loader), epochs=epochs,\n", diff --git a/nbs/B2. Training (Lightning).ipynb b/nbs/B2. Training (Lightning).ipynb index 63dc264d7da6b490e9cc63b8707079bb3d2c8d3a..7522e848ec97be532034cc77f586d094c2501451 100644 --- a/nbs/B2. Training (Lightning).ipynb +++ b/nbs/B2. Training (Lightning).ipynb @@ -68,33 +68,46 @@ " self.model_hparams = model_hparams\n", " \n", " def configure_optimizers(self):\n", " \"\"\" Initialize AdamW optimizer\"\"\"\n", - " all_params = set(self.model.parameters())\n", - "cell_type": "code", + "import random\n", "execution_count": null, - "cell_type": "code", + "import random\n", "id": "2afd7255", - "cell_type": "code", + "import random\n", "metadata": {}, - "cell_type": "code", + "import random\n", "outputs": [], - "cell_type": "code", + "import random\n", "source": [ - "execution_count": null, + "from pathlib import Path\n", - "execution_count": null, + "from pathlib import Path\n", { "cells": [ +{ "cells": [ + " m_lr = lr * getattr(m, 'lr_scale', 1)\n", + " group = group_map.get((m_wd, m_lr), None)\n", + "from pathlib import Path\n", "execution_count": null, + " group = {\"params\": [], \"names\": [], \"weight_decay\": m_wd, \"lr\": m_lr}\n", + " groups.append(group)\n", + " group_map[(m_wd, m_lr)] = group\n", + " group['params'] += m.parameters()\n", + " group['names'].append(name)\n", + " \n", + "\n", "cells": [ "execution_count": null, + "outputs": [], + "\n", { - "execution_count": null, + "\n", "cell_type": "code", - "execution_count": null, + "\n", "execution_count": null, - " ]\n", + "\n", "execution_count": null, - "metadata": {}, + "cells": [ + " fused=True, params=param_groups)\n", " \n", " # modified from https://github.com/Lightning-AI/lightning/issues/5449#issuecomment-1501597319\n", " def num_steps_per_epoch() -> int:\n", diff --git a/spear_tts_pytorch/train.py b/spear_tts_pytorch/train.py index b5868ad9608a85432d8b7dbea3b176ee82453ef0..6645f077cc1d7184efb0cc80df0d4ff8f2344f1d 100644 --- a/spear_tts_pytorch/train.py +++ b/spear_tts_pytorch/train.py @@ -14,6 +14,7 @@ import fastprogress import numpy as np import pylab as plt +import math import IPython @@ -108,32 +109,49 @@ avg_train_loss = torch.nan try: scheduler = None + all_params = set(model.parameters()) + customized_params = set() + groups = [] +from torch.utils.data.dataloader import DataLoader from pathlib import Path + for name,m in model.named_modules(): + if hasattr(m, 'no_weight_decay') or hasattr(m, 'lr_scale'): + customized_params |= set(m.parameters()) + m_wd = 0 if hasattr(m, 'no_weight_decay') else weight_decay +from torch.profiler import record_function __all__ = ['SimpleVisual', 'train'] -from pathlib import Path +from torch.profiler import record_function # %% ../nbs/B1. Training.ipynb 2 -from pathlib import Path +from torch.profiler import record_function import io -from pathlib import Path +from torch.profiler import record_function import time -from pathlib import Path +from torch.profiler import record_function import random -from pathlib import Path +from torch.profiler import record_function from pathlib import Path # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/B1. Training.ipynb. +from pathlib import Path - + group['names'].append(name) # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/B1. Training.ipynb. # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/B1. Training.ipynb. +# %% ../nbs/B1. Training.ipynb 2 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/B1. Training.ipynb. + scheduler = None +# %% auto 0 - # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/B1. Training.ipynb. +from pathlib import Path # %% auto 0 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/B1. Training.ipynb. +from pathlib import Path __all__ = ['SimpleVisual', 'train'] # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/B1. Training.ipynb. +from pathlib import Path # %% ../nbs/B1. Training.ipynb 2 + # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/B1. Training.ipynb. +from pathlib import Path import io scaler = torch.cuda.amp.GradScaler(enabled=half) scheduler = torch.optim.lr_scheduler.OneCycleLR( diff --git a/spear_tts_pytorch/train_multi.py b/spear_tts_pytorch/train_multi.py index 81b1fcdfc7399aec268958730366a830b51f647b..bca07219fa0d5614ecfe64ea61fb24686e734415 100644 --- a/spear_tts_pytorch/train_multi.py +++ b/spear_tts_pytorch/train_multi.py @@ -35,33 +35,45 @@ self.model_hparams = model_hparams def configure_optimizers(self): """ Initialize AdamW optimizer""" +import torch # %% auto 0 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/B2. Training (Lightning).ipynb. + final_div_factor=25 -# %% auto 0 + groups = [] + group_map = {} + for name,m in model.named_modules(): + if hasattr(m, 'no_weight_decay') or hasattr(m, 'lr_scale'): + customized_params |= set(m.parameters()) + m_wd = 0 if hasattr(m, 'no_weight_decay') else weight_decay + m_lr = lr * getattr(m, 'lr_scale', 1) +import torch.nn as nn -# %% auto 0 +import torch.nn as nn # %% auto 0 -# %% auto 0 +import torch.nn as nn __all__ = [] -# %% auto 0 +import torch.nn as nn # %% ../nbs/B2. Training (Lightning).ipynb 2 -# %% auto 0 +import torch.nn as nn import io -# %% auto 0 +import torch.nn as nn import time -# %% auto 0 +import torch.nn as nn import random - -# %% auto 0 +import torch.nn as nn from pathlib import Path -__all__ = [] +from torch.utils.data.dataloader import DataLoader __all__ = [] +# %% ../nbs/B2. Training (Lightning).ipynb 2 +from torch.utils.data.dataloader import DataLoader # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/B2. Training (Lightning).ipynb. -__all__ = [] +from torch.utils.data.dataloader import DataLoader -__all__ = [] +from torch.utils.data.dataloader import DataLoader # %% auto 0 -__all__ = [] + + optimizer = torch.optim.AdamW(lr=self.model_hparams['lr0'], betas=(0.9, 0.95), +from torch.utils.data.dataloader import DataLoader __all__ = [] # modified from https://github.com/Lightning-AI/lightning/issues/5449#issuecomment-1501597319