Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions NeuralAudio/NAMModel.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <NAM/get_dsp.h>
#include <NAM/dsp.h>
#include <NAM/registry.h>
#include <NAM/slimmable.h>

namespace NeuralAudio
{
Expand All @@ -14,6 +15,8 @@ namespace NeuralAudio
NAMModel()
{
nam::activations::Activation::enable_fast_tanh();

slimmableSize = defaultQualityScaleFactor;
}

~NAMModel()
Expand All @@ -36,9 +39,46 @@ namespace NeuralAudio

namModel = nam::get_dsp(modelJson);

auto* slim = dynamic_cast<nam::SlimmableModel*>(namModel.get());

if (slim != nullptr)
{
isSlimmable = true;

slim->SetSlimmableSize(slimmableSize);
}

return true;
}

bool HasQualityScaling()
{
return isSlimmable;
}

float GetQualityScaleFactor()
{
return slimmableSize;
}

void SetQualityScaleFactor(float scaleFactor)
{
if (HasQualityScaling())
{
if (slimmableSize != scaleFactor)
{
slimmableSize = scaleFactor;

if (namModel != nullptr)
{
auto* slim = dynamic_cast<nam::SlimmableModel*>(namModel.get());

slim->SetSlimmableSize(slimmableSize);
}
}
}
}

void Process(float* input, float* output, size_t numSamples)
{
namModel->process(&input, &output, (int)numSamples);
Expand All @@ -51,5 +91,7 @@ namespace NeuralAudio

private:
std::unique_ptr<nam::DSP> namModel = nullptr;
float slimmableSize = 1.0f;
bool isSlimmable = false;
};
}
20 changes: 20 additions & 0 deletions NeuralAudio/NeuralModel.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,30 @@ namespace NeuralAudio
defaultMaxAudioBufferSize = maxSize;
}

static void SetDefaultQualityScaleFactor(float scaleFactor)
{
defaultQualityScaleFactor = scaleFactor;
}

virtual EModelLoadMode GetLoadMode()
{
return EModelLoadMode::Internal;
}

virtual bool HasQualityScaling()
{
return false;
}

virtual float GetQualityScaleFactor()
{
return 1.0f;
}

virtual void SetQualityScaleFactor(float scaleFactor)
{
}

virtual bool IsStatic()
{
return false;
Expand Down Expand Up @@ -115,6 +134,7 @@ namespace NeuralAudio
inline static EModelLoadMode lstmLoadMode = EModelLoadMode::Internal;
inline static EModelLoadMode wavenetLoadMode = EModelLoadMode::Internal;
inline static int defaultMaxAudioBufferSize = 128;
inline static float defaultQualityScaleFactor = 1.0f;

void Prewarm(size_t numSamples, size_t blockSize)
{
Expand Down
30 changes: 30 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,36 @@ You can check which implementation was actually used to load the model with ```m

**NOTE:** Because of compile time and executable size considerations, only the internal, NAM Core and dynamic RTNeural implementations are built by default. If you want to use RTNeural, it is recommended that you add ```-DBUILD_STATIC_RTNEURAL=ON``` to your cmake commandline. This will create static model implmentations for the same sets of WaveNet and LSTM models as the internal implmentation, and results in increased performance. Interal static LSTM model support is also off by default - to turn it on use ```-DBUILD_INTERNAL_STATIC_LSTM=ON```.

## Setting model quality scaling factor

Some models (notably, slimmable NAM A2 models) support quality scaling - trading off quality for performance.

Quality scaling is a floating point range from 0.0 (highest performance) to 1.0 (highest quality).

To set the default quality scaling factor, do:

```
NeuralAudio::NeuralModel::SetDefaultQualityScaleFactor(scaleFactor);
```

To check if a model supports quality scaling, do:

```
if (model->HasQualityScaling()) ...
```

To set the quality scaling factor for a model, do:

```
model->SetQualityScaleFactor(scaleFactor);
```

To get the quality scaling factor for a model, do:

```
float scaleFactor = model->GetQualityScaleFactor();
```

## Getting the model receptive field size

WaveNet models have a fixed receptive field size (ie: size of the input that the output depends on).
Expand Down
Loading