diff --git a/cmd/Makefile b/cmd/Makefile index 6887a65..30fdfb2 100644 --- a/cmd/Makefile +++ b/cmd/Makefile @@ -11,19 +11,19 @@ out/stretch: main.cpp ../signalsmith-stretch.h util/*.h util/*.hxx # Uses input files from: https://signalsmith-audio.co.uk/code/stretch/inputs.zip examples: out/stretch mkdir -p out/examples - inputs/run-all.sh out/examples/u2- out/stretch --semitones=2 - inputs/run-all.sh out/examples/d2- out/stretch --semitones=-2 - inputs/run-all.sh out/examples/u4- out/stretch --semitones=4 - inputs/run-all.sh out/examples/d4- out/stretch --semitones=-4 - inputs/run-all.sh out/examples/u8- out/stretch --semitones=8 - inputs/run-all.sh out/examples/d8- out/stretch --semitones=-8 - inputs/run-all.sh out/examples/u16- out/stretch --semitones=16 - inputs/run-all.sh out/examples/d16- out/stretch --semitones=-16 - inputs/run-all.sh out/examples/t_8- out/stretch --time=0.8 - inputs/run-all.sh out/examples/t1_2- out/stretch --time=1.2 - inputs/run-all.sh out/examples/t1_5- out/stretch --time=1.5 - inputs/run-all.sh out/examples/t2- out/stretch --time=2 - inputs/run-all.sh out/examples/t4- out/stretch --time=4 + inputs/run-all.sh out/examples/u2- out/stretch --semitones=2 --exact + inputs/run-all.sh out/examples/d2- out/stretch --semitones=-2 --exact + inputs/run-all.sh out/examples/u4- out/stretch --semitones=4 --exact + inputs/run-all.sh out/examples/d4- out/stretch --semitones=-4 --exact + inputs/run-all.sh out/examples/u8- out/stretch --semitones=8 --exact + inputs/run-all.sh out/examples/d8- out/stretch --semitones=-8 --exact + inputs/run-all.sh out/examples/u16- out/stretch --semitones=16 --exact + inputs/run-all.sh out/examples/d16- out/stretch --semitones=-16 --exact + inputs/run-all.sh out/examples/t_8- out/stretch --time=0.8 --exact + inputs/run-all.sh out/examples/t1_2- out/stretch --time=1.2 --exact + inputs/run-all.sh out/examples/t1_5- out/stretch --time=1.5 --exact + inputs/run-all.sh out/examples/t2- out/stretch --time=2 --exact + inputs/run-all.sh out/examples/t4- out/stretch --time=4 --exact TEST_WAV ?= "inputs/voice.wav" diff --git a/cmd/main.cpp b/cmd/main.cpp index 8cdec21..af3435b 100644 --- a/cmd/main.cpp +++ b/cmd/main.cpp @@ -123,26 +123,34 @@ int main(int argc, char* argv[]) { std::cout << "\tallocated " << (initMemory.allocBytes/1000) << "kB, freed " << (initMemory.freeBytes/1000) << "kB\n"; } - // pad the input at the end, since we'll be reading slightly ahead - size_t paddedInputLength = inputLength + stretch.inputLatency(); - inWav.samples.resize(paddedInputLength*inWav.channels); - // pad the output at the end, since we have output latency as well - int tailSamples = exactLength ? stretch.outputLatency() : (stretch.outputLatency() + stretch.inputLatency()); // if we don't need exact length, add a bit more output to catch any wobbles past the end - int paddedOutputLength = outputLength + tailSamples; - outWav.samples.resize(paddedOutputLength*outWav.channels); - signalsmith::MemoryTracker processMemory; - stopwatch.start(); - // The simplest way to deal with input latency (when have access to the audio buffer) is to always be slightly ahead in the input - stretch.seek(inWav, stretch.inputLatency(), 1/time); - inWav.offset += stretch.inputLatency(); - // Process it all in one call, although it works just the same if we split into smaller blocks - stretch.process(inWav, int(inputLength), outWav, int(outputLength)); - // Read the last bit of output without giving it any more input - outWav.offset += outputLength; - stretch.flush(outWav, tailSamples); - outWav.offset -= outputLength; + if (exactLength) { + outWav.samples.resize(outputLength*outWav.channels); + stopwatch.start(); + processMemory = {}; + stretch.exact(inWav, int(inputLength), outWav, int(outputLength)); + } else { + // pad the input at the end, since we'll be reading slightly ahead + size_t paddedInputLength = inputLength + stretch.inputLatency(); + inWav.samples.resize(paddedInputLength*inWav.channels); + // pad the output at the end, since we have output latency as well + int tailSamples = exactLength ? stretch.outputLatency() : (stretch.outputLatency() + stretch.inputLatency()); // if we don't need exact length, add a bit more output to catch any wobbles past the end + int paddedOutputLength = outputLength + tailSamples; + outWav.samples.resize(paddedOutputLength*outWav.channels); + + stopwatch.start(); + // The simplest way to deal with input latency (when have access to the audio buffer) is to always be slightly ahead in the input + stretch.seek(inWav, stretch.inputLatency(), 1/time); + inWav.offset += stretch.inputLatency(); + // Process it all in one call, although it works just the same if we split into smaller blocks + processMemory = {}; + stretch.process(inWav, int(inputLength), outWav, int(outputLength)); + // Read the last bit of output without giving it any more input + outWav.offset += outputLength; + stretch.flush(outWav, tailSamples); + outWav.offset -= outputLength; + } double processSeconds = stopwatch.lap(); double processRate = (inWav.length()/inWav.sampleRate)/processSeconds; @@ -154,20 +162,6 @@ int main(int argc, char* argv[]) { if (processMemory) args.errorExit("allocated during process()"); } - if (exactLength) { - // The start has some extra output - we could just trim it, but we might as well fold it back into the output - for (size_t c = 0; c < outWav.channels; ++c) { - for (int i = 0; i < stretch.outputLatency(); ++i) { - double trimmed = outWav[stretch.outputLatency() - 1 - i][c]; - outWav[stretch.outputLatency() + i][c] -= trimmed; // reversed in time and negated - } - } - // Skips the output - outWav.offset += stretch.outputLatency(); - - // the `.flush()` call already handled foldback stuff at the end (since we asked for a shorter `tailSamples`) - } - #ifdef PROFILE_PLOT_CHUNKS signalsmith::plot::Figure figure; auto &plot = figure(0, 0).plot(400, 150); diff --git a/signalsmith-stretch.h b/signalsmith-stretch.h index 024dcbb..6a0a869 100644 --- a/signalsmith-stretch.h +++ b/signalsmith-stretch.h @@ -402,6 +402,69 @@ struct SignalsmithStretch { } } } + + template + bool exact(Inputs &&inputs, int inputSamples, Outputs &&outputs, int outputSamples) { + if (outputSamples < outputLatency()*2) return false; // too short for this + + struct ZeroPaddedInput { + Inputs &inputs; + int offset, length; + + struct Channel { + ZeroPaddedInput &zpi; + int channel; + + Sample operator[](int i) { + if (zpi.offset + i < zpi.length) return zpi.inputs[channel][zpi.offset + i]; + return 0; + } + }; + + Channel operator[](int c){ + return {*this, c}; + } + } zpi{inputs, inputLatency(), inputSamples}; + seek(inputs, inputLatency(), Sample(inputSamples)/outputSamples); // start positioned on the centre of the input + process(zpi, inputSamples, outputs, outputSamples); + + // Fold the first bit of the input back onto itself + for (int c = 0; c < channels; ++c) { + auto &&channel = outputs[c]; + for (int i = 0; i < std::min(outputSamples - outputLatency(), outputLatency()); ++i) { + channel[i + outputLatency()] -= channel[outputLatency() - 1 - i]; + } + } + // Shuffle everything along to compensate for output latency + for (int c = 0; c < channels; ++c) { + auto &&channel = outputs[c]; + for (int i = 0; i < outputSamples - outputLatency(); ++i) { + channel[i] = channel[i + outputLatency()]; + } + } + + struct OffsetOutput { + Outputs &outputs; + int offset; + + struct Channel { + OffsetOutput &oo; + int channel; + + decltype(outputs[0][0]) operator[](int i) { + return oo.outputs[channel][oo.offset + i]; + } + }; + + Channel operator[](int c){ + return {*this, c}; + } + } oo{outputs, outputSamples - outputLatency()}; + // Get the final chunk - extra output is already folded back as part of this + flush(oo, outputLatency()); + return true; + } + private: bool _splitComputation = false; struct {