diff --git a/NeuralAudio/NAMModel.h b/NeuralAudio/NAMModel.h index a9c5ff7..178ed0b 100644 --- a/NeuralAudio/NAMModel.h +++ b/NeuralAudio/NAMModel.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace NeuralAudio { @@ -14,6 +15,8 @@ namespace NeuralAudio NAMModel() { nam::activations::Activation::enable_fast_tanh(); + + slimmableSize = defaultQualityScaleFactor; } ~NAMModel() @@ -36,9 +39,46 @@ namespace NeuralAudio namModel = nam::get_dsp(modelJson); + auto* slim = dynamic_cast(namModel.get()); + + if (slim != nullptr) + { + isSlimmable = true; + + slim->SetSlimmableSize(slimmableSize); + } + return true; } + bool HasQualityScaling() + { + return isSlimmable; + } + + float GetQualityScaleFactor() + { + return slimmableSize; + } + + void SetQualityScaleFactor(float scaleFactor) + { + if (HasQualityScaling()) + { + if (slimmableSize != scaleFactor) + { + slimmableSize = scaleFactor; + + if (namModel != nullptr) + { + auto* slim = dynamic_cast(namModel.get()); + + slim->SetSlimmableSize(slimmableSize); + } + } + } + } + void Process(float* input, float* output, size_t numSamples) { namModel->process(&input, &output, (int)numSamples); @@ -51,5 +91,7 @@ namespace NeuralAudio private: std::unique_ptr namModel = nullptr; + float slimmableSize = 1.0f; + bool isSlimmable = false; }; } \ No newline at end of file diff --git a/NeuralAudio/NeuralModel.h b/NeuralAudio/NeuralModel.h index 0e1483b..c60abc5 100644 --- a/NeuralAudio/NeuralModel.h +++ b/NeuralAudio/NeuralModel.h @@ -56,11 +56,30 @@ namespace NeuralAudio defaultMaxAudioBufferSize = maxSize; } + static void SetDefaultQualityScaleFactor(float scaleFactor) + { + defaultQualityScaleFactor = scaleFactor; + } + virtual EModelLoadMode GetLoadMode() { return EModelLoadMode::Internal; } + virtual bool HasQualityScaling() + { + return false; + } + + virtual float GetQualityScaleFactor() + { + return 1.0f; + } + + virtual void SetQualityScaleFactor(float scaleFactor) + { + } + virtual bool IsStatic() { return false; @@ -115,6 +134,7 @@ namespace NeuralAudio inline static EModelLoadMode lstmLoadMode = EModelLoadMode::Internal; inline static EModelLoadMode wavenetLoadMode = EModelLoadMode::Internal; inline static int defaultMaxAudioBufferSize = 128; + inline static float defaultQualityScaleFactor = 1.0f; void Prewarm(size_t numSamples, size_t blockSize) { diff --git a/README.md b/README.md index 2140259..a341924 100644 --- a/README.md +++ b/README.md @@ -94,6 +94,36 @@ You can check which implementation was actually used to load the model with ```m **NOTE:** Because of compile time and executable size considerations, only the internal, NAM Core and dynamic RTNeural implementations are built by default. If you want to use RTNeural, it is recommended that you add ```-DBUILD_STATIC_RTNEURAL=ON``` to your cmake commandline. This will create static model implmentations for the same sets of WaveNet and LSTM models as the internal implmentation, and results in increased performance. Interal static LSTM model support is also off by default - to turn it on use ```-DBUILD_INTERNAL_STATIC_LSTM=ON```. +## Setting model quality scaling factor + +Some models (notably, slimmable NAM A2 models) support quality scaling - trading off quality for performance. + +Quality scaling is a floating point range from 0.0 (highest performance) to 1.0 (highest quality). + +To set the default quality scaling factor, do: + +``` +NeuralAudio::NeuralModel::SetDefaultQualityScaleFactor(scaleFactor); +``` + +To check if a model supports quality scaling, do: + +``` +if (model->HasQualityScaling()) ... +``` + +To set the quality scaling factor for a model, do: + +``` +model->SetQualityScaleFactor(scaleFactor); +``` + +To get the quality scaling factor for a model, do: + +``` +float scaleFactor = model->GetQualityScaleFactor(); +``` + ## Getting the model receptive field size WaveNet models have a fixed receptive field size (ie: size of the input that the output depends on).