Add .outputSeek() for playing back samples

2025-08-10 20:13:54 +01:00 · 2025-08-10 20:13:54 +01:00 · c3fcda8563
commit c3fcda8563
parent 12de19e05d
2 changed files with 99 additions and 22 deletions
--- a/cmd/main.cpp
+++ b/cmd/main.cpp
@ -51,18 +51,36 @@ int main(int argc, char* argv[]) {
 	
 		stretch.exact(inWav, int(inputLength), outWav, int(outputLength));
 		
-	However, we'll do it in separate stages to demonstrate more of the API.
-	*/
+	However, we'll do it in separate stages to show more of the API. */
 	
-	// First, an "output seek"
-	// This is suitable for starting playback of a sample at a given playback rate:
-	auto seekSamples = stretch.outputSeekSamples(1/time);
-	stretch.outputSeek(inWav, seekSamples);
-	// At this point, the next output samples we get will correspond to the beginning of the audio file
+	// First, an "output seek", where we provide a chunk of input.
+	// This is suitable for starting playback of a sample at a given playback rate.
+	auto seekLength = stretch.outputSeekLength(1/time);
+	stretch.outputSeek(inWav, seekLength);
+	// At this point, the next output samples we get will correspond to the beginning of the audio file.

+	// We're going to process until *just* before the end of the audio file (so we can get a tidier end using `.flush()`.
+	int outputIndex = outputLength - stretch.outputLatency();

+	// Stretch's internal output position is slightly ahead of the output samples we get
+	int outputPos = outputLength + stretch.outputLatency();
+	// Time-map: where do we want the input position to be at that moment?
+	int inputPos = std::round(outputPos/time);
+	// And therefore which input samples do we need to be supplying?
+	int inputIndex = inputPos + stretch.inputLatency();
 	
-	stretch.exact(inWav, int(inputLength), outWav, int(outputLength));
+	// In this particular case, our `inputPos` will be at the end of the file
+	// and `inputIndex` will be beyond the end, so we pad with 0s to have enough input
+	inWav.resize(inputIndex);
+
+	// OK, go for it
+	inWav.offset = seekLength;
+	stretch.process(inWav, inputIndex - seekLength, outWav, outputIndex);
+	
+	// And as promised, get the last bits using `.flush()`, which does some extra stuff to avoid introducing clicks.
+	outWav.offset = outputIndex;
+	stretch.flush(outWav, outputLength - outputIndex);
+	outWav.offset = 0;

 	if (!outWav.write(outputWav).warn()) args.errorExit("failed to write WAV");
 }
--- a/signalsmith-stretch.h
+++ b/signalsmith-stretch.h
@ -38,12 +38,7 @@ struct SignalsmithStretch {
 	SignalsmithStretch() : randomEngine(std::random_device{}()) {}
 	SignalsmithStretch(long seed) : randomEngine(seed) {}
 		
-	int blockSamples() const {
-		return int(stft.blockSamples());
-	}
-	int intervalSamples() const {
-		return int(stft.defaultInterval());
-	}
+	// The difference between the internal position (centre of a block) and the input samples you're supplying
 	int inputLatency() const {
 		return int(stft.analysisLatency());
 	}
@ -81,7 +76,6 @@ struct SignalsmithStretch {
 		stft.reset(0.1);
 		stashedInput = stft.input;
 		stashedOutput = stft.output;
-		tmpBuffer.resize(blockSamples + intervalSamples);

 		bands = int(stft.bands());
 		channelBands.assign(bands*channels, Band());
@ -94,6 +88,18 @@ struct SignalsmithStretch {

 		blockProcess = {};
 		formantMetric.resize(bands + 2);
+
+		tmpBuffer.resize(std::max(outputLatency()*channels, blockSamples + intervalSamples));
+	}
+	// For querying the existing config
+	int blockSamples() const {
+		return int(stft.blockSamples());
+	}
+	int intervalSamples() const {
+		return int(stft.defaultInterval());
+	}
+	bool splitComputation() const {
+		return _splitComputation;
 	}

 	/// Frequency multiplier, and optional tonality limit (as multiple of sample-rate)
@ -127,7 +133,8 @@ struct SignalsmithStretch {
 		formantBaseFreq = baseFreq;
 	}
 	
-	// Provide previous input ("pre-roll"), without affecting the speed calculation.  You should ideally feed it one block-length + one interval
+	// Provide previous input ("pre-roll") to smoothly change the input location without interrupting the output.  This doesn't do any calculation, just copies intput to a buffer.
+	// You should ideally feed it `seekLength()` frames of input, unless it's directly after a `.reset()` (in which case `.outputSeek()` might be a better choice)
 	template<class Inputs>
 	void seek(Inputs &&inputs, int inputSamples, double playbackRate) {
 		tmpBuffer.resize(0);
@ -155,6 +162,59 @@ struct SignalsmithStretch {
 		didSeek = true;
 		seekTimeFactor = (playbackRate*stft.defaultInterval() > 1) ? 1/playbackRate : stft.defaultInterval();
 	}
+	int seekLength() const {
+		return int(stft.blockSamples() + stft.defaultInterval());
+	}
+	
+	// Moves the input position *and* pre-calculates some output, so that the next samples returned from `.process()` are aligned to the beginning of the sample.
+	// The time-stretch rate is inferred from `inputLength`, so use `.outputSeekLength()` to get a correct value for that.
+	template<class Inputs>
+	void outputSeek(Inputs &&inputs, int inputLength) {
+		// TODO: add fade-out parameter to avoid clicks, instead of doing a full reset
+		reset();
+		// Assume we've been handed enough surplus input to produce `outputLatency()` samples of pre-roll
+		int surplusInput = std::max<int>(inputLength - inputLatency(), 0);
+		Sample playbackRate = surplusInput/Sample(outputLatency());
+
+		// Move the input position to the start of the sound
+		int seekSamples = inputLength - surplusInput;
+		seek(inputs, seekSamples, playbackRate);
+		
+		// Awkward proxy classes to avoid copying/allocating anything
+		struct OffsetInput {
+			Inputs &inputs;
+			int offset;
+
+			struct Channel {
+				Inputs &inputs;
+				int channel;
+				int offset;
+				
+				Sample operator[](int i) {
+					return Sample(inputs[channel][i + offset]);
+				}
+			};
+			Channel operator[](int c) {
+				return {inputs, c, offset};
+			}
+		} offsetInput{inputs, seekSamples};
+		tmpBuffer.resize(outputLatency()*channels);
+		struct PreRollOutput {
+			Sample *samples;
+			int length;
+			
+			Sample * operator[](int c) {
+				return samples + c*length;
+			}
+		} preRollOutput{tmpBuffer.data(), outputLatency()};
+		
+		// Use the surplus input to produce pre-roll output
+		process(offsetInput, surplusInput, preRollOutput, outputLatency());
+		// TODO: put the thing down, flip it and reverse it
+	}
+	int outputSeekLength(Sample playbackRate) const {
+		return inputLatency() + playbackRate*outputLatency();
+	}

 	template<class Inputs, class Outputs>
 	void process(Inputs &&inputs, int inputSamples, Outputs &&outputs, int outputSamples) {
@ -383,7 +443,6 @@ struct SignalsmithStretch {
 			stft.readOutput(c, plainOutput, tmpBuffer.data());
 			auto &&outputChannel = outputs[c];
 			for (int i = 0; i < plainOutput; ++i) {
-				// TODO: plain output should be gain-
 				outputChannel[i] = tmpBuffer[i];
 			}
 			tmpBuffer.resize(foldedBackOutput);
@ -392,7 +451,7 @@ struct SignalsmithStretch {
 				outputChannel[outputSamples - 1 - i] -= tmpBuffer[i];
 			}
 		}
-		stft.reset(0.1);
+		stft.reset(0.1f);

 		// Reset the phase-vocoder stuff, so the next block gets a fresh start
 		for (int c = 0; c < channels; ++c) {