From c3fcda8563eca1bf65076775e6619fecc4f8182f Mon Sep 17 00:00:00 2001 From: Geraint Date: Sun, 10 Aug 2025 20:13:54 +0100 Subject: [PATCH] Add `.outputSeek()` for playing back samples --- cmd/main.cpp | 38 ++++++++++++++------ signalsmith-stretch.h | 83 ++++++++++++++++++++++++++++++++++++------- 2 files changed, 99 insertions(+), 22 deletions(-) diff --git a/cmd/main.cpp b/cmd/main.cpp index 133b174..9e1a956 100644 --- a/cmd/main.cpp +++ b/cmd/main.cpp @@ -51,18 +51,36 @@ int main(int argc, char* argv[]) { stretch.exact(inWav, int(inputLength), outWav, int(outputLength)); - However, we'll do it in separate stages to demonstrate more of the API. - */ - - // First, an "output seek" - // This is suitable for starting playback of a sample at a given playback rate: - auto seekSamples = stretch.outputSeekSamples(1/time); - stretch.outputSeek(inWav, seekSamples); - // At this point, the next output samples we get will correspond to the beginning of the audio file - + However, we'll do it in separate stages to show more of the API. */ + // First, an "output seek", where we provide a chunk of input. + // This is suitable for starting playback of a sample at a given playback rate. + auto seekLength = stretch.outputSeekLength(1/time); + stretch.outputSeek(inWav, seekLength); + // At this point, the next output samples we get will correspond to the beginning of the audio file. - stretch.exact(inWav, int(inputLength), outWav, int(outputLength)); + // We're going to process until *just* before the end of the audio file (so we can get a tidier end using `.flush()`. + int outputIndex = outputLength - stretch.outputLatency(); + + // Stretch's internal output position is slightly ahead of the output samples we get + int outputPos = outputLength + stretch.outputLatency(); + // Time-map: where do we want the input position to be at that moment? + int inputPos = std::round(outputPos/time); + // And therefore which input samples do we need to be supplying? + int inputIndex = inputPos + stretch.inputLatency(); + + // In this particular case, our `inputPos` will be at the end of the file + // and `inputIndex` will be beyond the end, so we pad with 0s to have enough input + inWav.resize(inputIndex); + + // OK, go for it + inWav.offset = seekLength; + stretch.process(inWav, inputIndex - seekLength, outWav, outputIndex); + + // And as promised, get the last bits using `.flush()`, which does some extra stuff to avoid introducing clicks. + outWav.offset = outputIndex; + stretch.flush(outWav, outputLength - outputIndex); + outWav.offset = 0; if (!outWav.write(outputWav).warn()) args.errorExit("failed to write WAV"); } diff --git a/signalsmith-stretch.h b/signalsmith-stretch.h index 97efc7c..7d0a73d 100644 --- a/signalsmith-stretch.h +++ b/signalsmith-stretch.h @@ -37,13 +37,8 @@ struct SignalsmithStretch { SignalsmithStretch() : randomEngine(std::random_device{}()) {} SignalsmithStretch(long seed) : randomEngine(seed) {} - - int blockSamples() const { - return int(stft.blockSamples()); - } - int intervalSamples() const { - return int(stft.defaultInterval()); - } + + // The difference between the internal position (centre of a block) and the input samples you're supplying int inputLatency() const { return int(stft.analysisLatency()); } @@ -81,7 +76,6 @@ struct SignalsmithStretch { stft.reset(0.1); stashedInput = stft.input; stashedOutput = stft.output; - tmpBuffer.resize(blockSamples + intervalSamples); bands = int(stft.bands()); channelBands.assign(bands*channels, Band()); @@ -94,6 +88,18 @@ struct SignalsmithStretch { blockProcess = {}; formantMetric.resize(bands + 2); + + tmpBuffer.resize(std::max(outputLatency()*channels, blockSamples + intervalSamples)); + } + // For querying the existing config + int blockSamples() const { + return int(stft.blockSamples()); + } + int intervalSamples() const { + return int(stft.defaultInterval()); + } + bool splitComputation() const { + return _splitComputation; } /// Frequency multiplier, and optional tonality limit (as multiple of sample-rate) @@ -126,8 +132,9 @@ struct SignalsmithStretch { void setFormantBase(Sample baseFreq=0) { formantBaseFreq = baseFreq; } - - // Provide previous input ("pre-roll"), without affecting the speed calculation. You should ideally feed it one block-length + one interval + + // Provide previous input ("pre-roll") to smoothly change the input location without interrupting the output. This doesn't do any calculation, just copies intput to a buffer. + // You should ideally feed it `seekLength()` frames of input, unless it's directly after a `.reset()` (in which case `.outputSeek()` might be a better choice) template void seek(Inputs &&inputs, int inputSamples, double playbackRate) { tmpBuffer.resize(0); @@ -155,7 +162,60 @@ struct SignalsmithStretch { didSeek = true; seekTimeFactor = (playbackRate*stft.defaultInterval() > 1) ? 1/playbackRate : stft.defaultInterval(); } + int seekLength() const { + return int(stft.blockSamples() + stft.defaultInterval()); + } + // Moves the input position *and* pre-calculates some output, so that the next samples returned from `.process()` are aligned to the beginning of the sample. + // The time-stretch rate is inferred from `inputLength`, so use `.outputSeekLength()` to get a correct value for that. + template + void outputSeek(Inputs &&inputs, int inputLength) { + // TODO: add fade-out parameter to avoid clicks, instead of doing a full reset + reset(); + // Assume we've been handed enough surplus input to produce `outputLatency()` samples of pre-roll + int surplusInput = std::max(inputLength - inputLatency(), 0); + Sample playbackRate = surplusInput/Sample(outputLatency()); + + // Move the input position to the start of the sound + int seekSamples = inputLength - surplusInput; + seek(inputs, seekSamples, playbackRate); + + // Awkward proxy classes to avoid copying/allocating anything + struct OffsetInput { + Inputs &inputs; + int offset; + + struct Channel { + Inputs &inputs; + int channel; + int offset; + + Sample operator[](int i) { + return Sample(inputs[channel][i + offset]); + } + }; + Channel operator[](int c) { + return {inputs, c, offset}; + } + } offsetInput{inputs, seekSamples}; + tmpBuffer.resize(outputLatency()*channels); + struct PreRollOutput { + Sample *samples; + int length; + + Sample * operator[](int c) { + return samples + c*length; + } + } preRollOutput{tmpBuffer.data(), outputLatency()}; + + // Use the surplus input to produce pre-roll output + process(offsetInput, surplusInput, preRollOutput, outputLatency()); + // TODO: put the thing down, flip it and reverse it + } + int outputSeekLength(Sample playbackRate) const { + return inputLatency() + playbackRate*outputLatency(); + } + template void process(Inputs &&inputs, int inputSamples, Outputs &&outputs, int outputSamples) { #ifdef SIGNALSMITH_STRETCH_PROFILE_PROCESS_START @@ -383,7 +443,6 @@ struct SignalsmithStretch { stft.readOutput(c, plainOutput, tmpBuffer.data()); auto &&outputChannel = outputs[c]; for (int i = 0; i < plainOutput; ++i) { - // TODO: plain output should be gain- outputChannel[i] = tmpBuffer[i]; } tmpBuffer.resize(foldedBackOutput); @@ -392,7 +451,7 @@ struct SignalsmithStretch { outputChannel[outputSamples - 1 - i] -= tmpBuffer[i]; } } - stft.reset(0.1); + stft.reset(0.1f); // Reset the phase-vocoder stuff, so the next block gets a fresh start for (int c = 0; c < channels; ++c) {