Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ include(FetchContent)
FetchContent_Declare(
signalsmith-linear
GIT_REPOSITORY https://github.com/Signalsmith-Audio/linear.git
GIT_TAG 0.2.3
GIT_TAG 0.3.0
GIT_SHALLOW ON
)
FetchContent_MakeAvailable(signalsmith-linear)
Expand Down
17 changes: 10 additions & 7 deletions cmd/Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
all: out/stretch

DEV_FLAGS := --semitones=4 --time=0.667 --asymmetry=0.5

dev: out/stretch
./out/stretch inputs/dev.wav out/dev-2048.wav --process-chunk=2048 $(DEV_FLAGS)
./out/stretch inputs/dev.wav out/dev-512.wav --process-chunk=512 $(DEV_FLAGS)
./out/stretch inputs/dev.wav out/dev-100.wav --process-chunk=100 $(DEV_FLAGS)
./out/stretch inputs/dev.wav out/dev-2048-sc.wav --process-chunk=2048 --split-computation $(DEV_FLAGS)
./out/stretch inputs/dev.wav out/dev-512-sc.wav --process-chunk=512 --split-computation $(DEV_FLAGS)
./out/stretch inputs/dev.wav out/dev-100-sc.wav --process-chunk=100 --split-computation $(DEV_FLAGS)

out/stretch: main.cpp ../signalsmith-stretch.h util/*.h util/*.hxx
mkdir -p out
g++ -std=c++11 -O3 -g \
Expand Down Expand Up @@ -27,13 +37,6 @@ examples: out/stretch

TEST_WAV ?= "inputs/voice.wav"

dev: out/stretch
out/stretch --time=0.8 --semitones=10 $(TEST_WAV) out/shift.wav
out/stretch --time=0.8 --semitones=10 --formant-comp $(TEST_WAV) out/shift-fc.wav
out/stretch --time=0.8 --semitones=10 --formant-comp --formant=3 $(TEST_WAV) out/shift-fc-f3.wav
out/stretch --time=0.8 --semitones=10 --formant-comp --formant=3 --formant-base=500 $(TEST_WAV) out/shift-fc-f3-fb500.wav
out/stretch --time=0.8 --semitones=10 --formant-comp --formant=2 --formant-base=100 $(TEST_WAV) out/shift-fc-f2-fb100.wav

clean:
rm -rf out

Expand Down
71 changes: 54 additions & 17 deletions cmd/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ using SignalsmithStretch = signalsmith::stretch::SignalsmithStretch<float>;

#include "./util/simple-args.h"
#include "./util/wav.h"
#include "./util/stopwatch.h"

#include "plot/plot.h"

int main(int argc, char* argv[]) {
SimpleArgs args(argc, argv);
Expand All @@ -19,13 +22,15 @@ int main(int argc, char* argv[]) {

std::string inputWav = args.arg<std::string>("input.wav", "16-bit WAV file");
std::string outputWav = args.arg<std::string>("output.wav", "output WAV file");
double time = args.flag<double>("time", "time-stretch factor", 1);
double semitones = args.flag<double>("semitones", "pitch-shift amount", 0);
double formants = args.flag<double>("formant", "formant-shift amount (semitones)", 0);
bool formantComp = args.hasFlag("formant-comp", "formant compensation");
double formantBase = args.flag<double>("formant-base", "formant base frequency (Hz, 0=auto)", 100);
double tonality = args.flag<double>("tonality", "tonality limit (Hz)", 8000);
double time = args.flag<double>("time", "time-stretch factor", 1);
double asymmetry = args.flag<double>("asymmetry", "asymmetrical STFT analysis (0-1)", 0);
bool splitComputation = args.hasFlag("split-computation", "distributes the computation more evenly (but higher latency)");
int processChunkSize = args.flag<int>("process-chunk", "process chunk size in samples", -1);
args.errorExit(); // exits on error, or with `--help`

std::cout << inputWav << " -> " << outputWav << "\n";
Expand All @@ -42,7 +47,7 @@ int main(int argc, char* argv[]) {
outWav.resize(outputLength);

SignalsmithStretch stretch;
stretch.presetDefault(int(inWav.channels), inWav.sampleRate, splitComputation);
stretch.configure(int(inWav.channels), inWav.sampleRate*0.12, inWav.sampleRate*0.03, splitComputation, asymmetry);
stretch.setTransposeSemitones(semitones, tonality/inWav.sampleRate);
stretch.setFormantSemitones(formants, formantComp);
stretch.setFormantBase(formantBase/inWav.sampleRate);
Expand All @@ -56,30 +61,62 @@ int main(int argc, char* argv[]) {
// First, an "output seek", where we provide a chunk of input.
// This is suitable for starting playback of a sample at a given playback rate.
auto seekLength = stretch.outputSeekLength(1/time);
signalsmith::Stopwatch stopwatch;
stretch.outputSeek(inWav, seekLength);
double seekTime = stopwatch.seconds(stopwatch.lap());
// At this point, the next output samples we get will correspond to the beginning of the audio file.

// We're going to process until *just* before the end of the audio file (so we can get a tidier end using `.flush()`.
int outputIndex = outputLength - stretch.intervalSamples();
int outputMainBlockLength = outputLength - stretch.intervalSamples();
// And this is how much input we'll need for that
int inputMainBlockLength = outputMainBlockLength/time;

// Stretch's internal output position is slightly ahead of the output samples we get
int outputPos = outputIndex + stretch.outputLatency();
// Time-map: where do we want the input position to be at that moment?
int inputPos = std::round(outputPos/time);
// And therefore which input samples do we need to be supplying?
int inputIndex = inputPos + stretch.inputLatency();

// In this particular case, our `inputPos` will be at the end of the file
// and `inputIndex` will be beyond the end, so we pad with 0s to have enough input
inWav.resize(inputIndex);
// This zero-pads the input, since we'll go past the end of it
inWav.resize(inputMainBlockLength + seekLength);

// OK, go for it
// Main block of processing
inWav.offset = seekLength;
stretch.process(inWav, inputIndex - seekLength, outWav, outputIndex);
if (processChunkSize <= 0) {
stretch.process(inWav, inputMainBlockLength, outWav, outputMainBlockLength);
} else {
// Plot computation time for each chunk
signalsmith::plot::Plot2D timePlot(500, 200);
timePlot.x.major(0);
timePlot.y.major(0);
timePlot.y.minor(0.01*processChunkSize/inWav.sampleRate, "1%");
timePlot.y.minor(0.02*processChunkSize/inWav.sampleRate, "2%");
auto &timeLine = timePlot.line();
auto &timeLineSeek = timePlot.line().fillToY(0);
timeLine.add(outWav.offset, 0); // output seek
timeLineSeek.add(0, 0);
timeLineSeek.add(0, seekTime);
timeLineSeek.add(inWav.offset, seekTime);
timeLineSeek.add(inWav.offset, 0);

float residue = 0.f;
while (outWav.offset < size_t(outputMainBlockLength)) {
int outputSamples = std::min<int>(processChunkSize, outputMainBlockLength - outWav.offset);
float inputPrecise = outputSamples/time + residue;
int inputSamples = std::round(inputPrecise);
residue = inputPrecise - inputSamples;

stopwatch.startLap();
stretch.process(inWav, inputSamples, outWav, outputSamples);
double time = stopwatch.seconds(stopwatch.lap());
timeLine.add(outWav.offset, time);
timeLine.add(outWav.offset + outputSamples, time);

inWav.offset += inputSamples;
outWav.offset += outputSamples;
}

timeLine.add(outWav.offset, 0);
timePlot.write(outputWav + ".svg");
}

// And as promised, get the last bits using `.flush()`, which does some extra stuff to avoid introducing clicks.
outWav.offset = outputIndex;
stretch.flush(outWav, outputLength - outputIndex);
outWav.offset = outputMainBlockLength;
stretch.flush(outWav, outputLength - outputMainBlockLength);
outWav.offset = 0;

if (!outWav.write(outputWav).warn()) args.errorExit("failed to write WAV");
Expand Down
Loading