From 8c3852cae3a574c92d22614f0b35f80434bc4c67 Mon Sep 17 00:00:00 2001
From: Geraint Luff <geraint@signalsmith-audio.co.uk>
Date: Thu, 27 Feb 2025 14:38:11 +0000
Subject: [PATCH] Refactor chunked computation to be neater, fix some int
 warnings

---
 cmd/main.cpp          |   4 +-
 cmd/util/wav.h        |  57 +++++++++++------------
 signalsmith-stretch.h | 102 +++++++++++++++++++++---------------------
 3 files changed, 82 insertions(+), 81 deletions(-)
diff --git a/cmd/main.cpp b/cmd/main.cpp
index 2b01b79..acc80b4 100644
--- a/cmd/main.cpp
+++ b/cmd/main.cpp
@@ -105,7 +105,7 @@ int main(int argc, char* argv[]) {
 	signalsmith::Stopwatch stopwatch;
 
 	stopwatch.start();
-	stretch.presetDefault(inWav.channels, inWav.sampleRate);
+	stretch.presetDefault(int(inWav.channels), inWav.sampleRate);
 	stretch.setTransposeSemitones(semitones, tonality/inWav.sampleRate);
 	double initSeconds = stopwatch.lap();
 
@@ -130,7 +130,7 @@ int main(int argc, char* argv[]) {
 	stretch.seek(inWav, stretch.inputLatency(), 1/time);
 	inWav.offset += stretch.inputLatency();
 	// Process it all in one call, although it works just the same if we split into smaller blocks
-	stretch.process(inWav, inputLength, outWav, outputLength);
+	stretch.process(inWav, int(inputLength), outWav, int(outputLength));
 	// Read the last bit of output without giving it any more input
 	outWav.offset += outputLength;
 	stretch.flush(outWav, tailSamples);
diff --git a/cmd/util/wav.h b/cmd/util/wav.h
index 2a716a4..fd195a2 100644
--- a/cmd/util/wav.h
+++ b/cmd/util/wav.h
@@ -66,39 +66,40 @@ public:
 		}
 	};
 	
-	unsigned int sampleRate = 48000;
-	unsigned int channels = 1, offset = 0;
+	size_t sampleRate = 48000;
+	size_t channels = 1, offset = 0;
 	std::vector<double> samples;
-	int length() const {
-		return samples.size()/channels - offset;
+	size_t length() const {
+		size_t perChannel = samples.size()/channels;
+		return (perChannel >= offset) ? perChannel - offset : 0;
 	}
-	void resize(int length) {
+	void resize(size_t length) {
 		samples.resize((offset + length)*channels, 0);
 	}
 	template<bool isConst>
 	class ChannelReader {
 		using CSample = typename std::conditional<isConst, const double, double>::type;
 		CSample *data;
-		int stride;
+		size_t stride;
 	public:
-		ChannelReader(CSample *samples, int channels) : data(samples), stride(channels) {}
+		ChannelReader(CSample *samples, size_t channels) : data(samples), stride(channels) {}
 		
-		CSample & operator [](int i) {
+		CSample & operator [](size_t i) {
 			return data[i*stride];
 		}
 	};
-	ChannelReader<false> operator [](int c) {
+	ChannelReader<false> operator [](size_t c) {
 		return ChannelReader<false>(samples.data() + offset*channels + c, channels);
 	}
-	ChannelReader<true> operator [](int c) const {
+	ChannelReader<true> operator [](size_t c) const {
 		return ChannelReader<true>(samples.data() + offset*channels + c, channels);
 	}
 	
 	Result result = Result(Result::Code::OK);
 
 	Wav() {}
-	Wav(double sampleRate, int channels) : sampleRate(sampleRate), channels(channels) {}
-	Wav(double sampleRate, int channels, const std::vector<double> &samples) : sampleRate(sampleRate), channels(channels), samples(samples) {}
+	Wav(double sampleRate, size_t channels) : sampleRate(sampleRate), channels(channels) {}
+	Wav(double sampleRate, size_t channels, const std::vector<double> &samples) : sampleRate(sampleRate), channels(channels), samples(samples) {}
 	Wav(std::string filename) {
 		result = read(filename).warn();
 	}
@@ -141,9 +142,9 @@ public:
 				sampleRate = read32(file);
 				if (sampleRate < 1) return result = Result(Result::Code::FORMAT_ERROR, "Cannot have zero sampleRate");
 
-				unsigned int expectedBytesPerSecond = read32(file);
-				unsigned int bytesPerFrame = read16(file);
-				unsigned int bitsPerSample = read16(file);
+				size_t expectedBytesPerSecond = read32(file);
+				size_t bytesPerFrame = read16(file);
+				size_t bitsPerSample = read16(file);
 				if (!formatIsValid(formatInt, bitsPerSample)) return result = Result(Result::Code::UNSUPPORTED, "Unsupported format:bits: " + std::to_string(formatInt) + ":" + std::to_string(bitsPerSample));
 				// Since it's plain WAVE, we can do some extra checks for consistency
 				if (bitsPerSample*channels != bytesPerFrame*8) return result = Result(Result::Code::FORMAT_ERROR, "Format sizes don't add up");
@@ -191,7 +192,7 @@ public:
 		file.open(filename, std::ios::binary);
 		if (!file.is_open()) return result = Result(Result::Code::IO_ERROR, "Failed to open file: " + filename);
 		
-		int bytesPerSample;
+		size_t bytesPerSample;
 		switch (format) {
 		case Format::PCM:
 			bytesPerSample = 2;
@@ -199,30 +200,30 @@ public:
 		}
 		
 		// File size - 44 bytes is RIFF header, "fmt" block, and "data" block header
-		unsigned int dataLength = (samples.size() - offset*channels)*bytesPerSample;
-		unsigned int fileLength = 44 + dataLength;
+		size_t dataLength = (samples.size() - offset*channels)*bytesPerSample;
+		size_t fileLength = 44 + dataLength;
 
 		// RIFF chunk
 		write32(file, value_RIFF);
-		write32(file, fileLength - 8); // File length, excluding the RIFF header
+		write32(file, uint32_t(fileLength - 8)); // File length, excluding the RIFF header
 		write32(file, value_WAVE);
 		// "fmt " block
 		write32(file, value_fmt);
 		write32(file, 16); // block length
-		write16(file, (uint16_t)format);
-		write16(file, channels);
-		write32(file, sampleRate);
-		unsigned int expectedBytesPerSecond = sampleRate*channels*bytesPerSample;
-		write32(file, expectedBytesPerSecond);
-		write16(file, channels*bytesPerSample); // Bytes per frame
-		write16(file, bytesPerSample*8); // bist per sample
+		write16(file, uint16_t(format));
+		write16(file, uint16_t(channels));
+		write32(file, uint32_t(sampleRate));
+		size_t expectedBytesPerSecond = sampleRate*channels*bytesPerSample;
+		write32(file, uint32_t(expectedBytesPerSecond));
+		write16(file, uint16_t(channels*bytesPerSample)); // Bytes per frame
+		write16(file, uint16_t(bytesPerSample*8)); // bist per sample
 		
 		// "data" block
 		write32(file, value_data);
-		write32(file, dataLength);
+		write32(file, uint32_t(dataLength));
 		switch (format) {
 		case Format::PCM:
-			for (unsigned int i = offset*channels; i < samples.size(); i++) {
+			for (size_t i = offset*channels; i < samples.size(); i++) {
 				double value = samples[i]*32768;
 				if (value > 32767) value = 32767;
 				if (value <= -32768) value = -32768;
diff --git a/signalsmith-stretch.h b/signalsmith-stretch.h
index 7814c95..5c1811b 100644
--- a/signalsmith-stretch.h
+++ b/signalsmith-stretch.h
@@ -41,16 +41,16 @@ struct SignalsmithStretch {
 	SignalsmithStretch(long seed) : randomEngine(seed) {}
 
 	int blockSamples() const {
-		return stft.blockSamples();
+		return int(stft.blockSamples());
 	}
 	int intervalSamples() const {
-		return stft.defaultInterval();
+		return int(stft.defaultInterval());
 	}
 	int inputLatency() const {
-		return stft.blockSamples() - stft.analysisOffset();
+		return int(stft.analysisLatency());
 	}
 	int outputLatency() const {
-		return stft.synthesisOffset() + stft.defaultInterval();
+		return int(stft.synthesisLatency() + stft.defaultInterval());
 	}
 	
 	void reset() {
@@ -84,7 +84,7 @@ struct SignalsmithStretch {
 		stashedOutput = stft.output;
 		tmpBuffer.resize(blockSamples + intervalSamples);
 
-		bands = stft.bands();
+		bands = int(stft.bands());
 		channelBands.assign(bands*channels, Band());
 		
 		peaks.reserve(bands/2);
@@ -122,7 +122,7 @@ struct SignalsmithStretch {
 		tmpBuffer.resize(stft.blockSamples() + stft.defaultInterval());
 
 		int startIndex = std::max<int>(0, inputSamples - int(tmpBuffer.size())); // start position in input
-		int padStart = tmpBuffer.size() - (inputSamples - startIndex); // start position in tmpBuffer
+		int padStart = int(tmpBuffer.size() + startIndex) - inputSamples; // start position in tmpBuffer
 
 		Sample totalEnergy = 0;
 		for (int c = 0; c < channels; ++c) {
@@ -152,7 +152,7 @@ struct SignalsmithStretch {
 		int prevCopiedInput = 0;
 		auto copyInput = [&](int toIndex){
 
-			int length = std::min<int>(stft.blockSamples() + stft.defaultInterval(), toIndex - prevCopiedInput);
+			int length = std::min<int>(int(stft.blockSamples() + stft.defaultInterval()), toIndex - prevCopiedInput);
 			tmpBuffer.resize(length);
 			int offset = toIndex - length;
 			for (int c = 0; c < channels; ++c) {
@@ -217,13 +217,48 @@ struct SignalsmithStretch {
 		
 		for (int outputIndex = 0; outputIndex < outputSamples; ++outputIndex) {
 			Sample processRatio = Sample(blockProcess.samplesSinceLast)/stft.defaultInterval();
-			size_t processToStep = std::min<size_t>(blockProcess.steps, blockProcess.steps*processRatio);
+			if (processRatio >= 1) { // we're ready to start a new block
+				processRatio = 0;
+				blockProcess.step = 0;
+				blockProcess.steps = 0; // how many steps
+				blockProcess.samplesSinceLast = 0;
+				
+				// Time to process a spectrum!  Where should it come from in the input?
+				int inputOffset = std::round(outputIndex*Sample(inputSamples)/outputSamples);
+				int inputInterval = inputOffset - prevInputOffset;
+				prevInputOffset = inputOffset;
+				
+				copyInput(inputOffset);
+				stashedInput = stft.input; // save the input state, since that's what we'll analyse later
+				stashedOutput = stft.output; // save the current output, and read from it
+				stft.moveOutput(stft.defaultInterval()); // the actual input jumps forward in time by one interval, ready for the synthesis
+
+				blockProcess.newSpectrum = didSeek || (inputInterval > 0);
+				blockProcess.mappedFrequencies = customFreqMap || freqMultiplier != 1;
+				if (blockProcess.newSpectrum) {
+					// make sure the previous input is the correct distance in the past (give or take 1 sample)
+					blockProcess.reanalysePrev = didSeek || std::abs(inputInterval - int(stft.defaultInterval())) > 1;
+					if (blockProcess.reanalysePrev) blockProcess.steps += stft.analyseSteps() + 1;
+
+					// analyse a new input
+					blockProcess.steps += stft.analyseSteps() + 1;
+				}
+
+				blockProcess.timeFactor = didSeek ? seekTimeFactor : stft.defaultInterval()/std::max<Sample>(1, inputInterval);
+				didSeek = false;
+
+				updateProcessSpectrumSteps();
+				blockProcess.steps += processSpectrumSteps;
+
+				blockProcess.steps += stft.synthesiseSteps() + 1;
+			}
+			size_t processToStep = std::min<size_t>(blockProcess.steps, (blockProcess.steps + 1)*processRatio);
+			
 			while (blockProcess.step < processToStep) {
 				size_t step = blockProcess.step++;
 #ifdef SIGNALSMITH_STRETCH_PROFILE_PROCESS_STEP
 				SIGNALSMITH_STRETCH_PROFILE_PROCESS_STEP(step, blockProcess.steps);
 #endif
-				
 				if (blockProcess.newSpectrum) {
 					if (blockProcess.reanalysePrev) {
 						// analyse past input
@@ -294,41 +329,6 @@ struct SignalsmithStretch {
 					continue;
 				}
 			}
-			if (processRatio >= 1) { // we *should* have just written a block, and are now ready to start a new one
-				blockProcess.step = 0;
-				blockProcess.steps = 0; // how many steps
-				blockProcess.samplesSinceLast = 0;
-				
-				// Time to process a spectrum!  Where should it come from in the input?
-				int inputOffset = std::round(outputIndex*Sample(inputSamples)/outputSamples);
-				int inputInterval = inputOffset - prevInputOffset;
-				prevInputOffset = inputOffset;
-				
-				copyInput(inputOffset);
-				stashedInput = stft.input; // save the input state, since that's what we'll analyse later
-				stashedOutput = stft.output; // save the current output, and read from it
-				stft.moveOutput(stft.defaultInterval()); // the actual input jumps forward in time by one interval, ready for the synthesis
-
-				blockProcess.newSpectrum = didSeek || (inputInterval > 0);
-				blockProcess.mappedFrequencies = customFreqMap || freqMultiplier != 1;
-				if (blockProcess.newSpectrum) {
-					// make sure the previous input is the correct distance in the past (give or take 1 sample)
-					blockProcess.reanalysePrev = didSeek || std::abs(inputInterval - int(stft.defaultInterval())) > 1;
-					if (blockProcess.reanalysePrev) blockProcess.steps += stft.analyseSteps() + 1;
-
-					// analyse a new input
-					blockProcess.steps += stft.analyseSteps() + 1;
-				}
-
-				blockProcess.timeFactor = didSeek ? seekTimeFactor : stft.defaultInterval()/std::max<Sample>(1, inputInterval);
-				didSeek = false;
-
-				updateProcessSpectrumSteps();
-				blockProcess.steps += processSpectrumSteps;
-
-				blockProcess.steps += stft.synthesiseSteps() + 1;
-				blockProcess.steps += 1; // planning the next block
-			}
 #ifdef SIGNALSMITH_STRETCH_PROFILE_PROCESS_ENDSTEP
 			SIGNALSMITH_STRETCH_PROFILE_PROCESS_ENDSTEP();
 #endif
@@ -355,7 +355,7 @@ struct SignalsmithStretch {
 	// Read the remaining output, providing no further input.  `outputSamples` should ideally be at least `.outputLatency()`
 	template<class Outputs>
 	void flush(Outputs &&outputs, int outputSamples) {
-		int plainOutput = std::min<int>(outputSamples, stft.blockSamples());
+		int plainOutput = std::min<int>(outputSamples, int(stft.blockSamples()));
 		int foldedBackOutput = std::min<int>(outputSamples, int(stft.blockSamples()) - plainOutput);
 		stft.finishOutput(1);
 		for (int c = 0; c < channels; ++c) {
@@ -495,7 +495,7 @@ private:
 	}
 
 	// If RandomEngine=void, use std::default_random_engine;
-	using RandomEngineImpl = std::conditional<
+	using RandomEngineImpl = typename std::conditional<
 		std::is_void<RandomEngine>::value,
 		std::default_random_engine,
 		RandomEngine
@@ -527,7 +527,7 @@ private:
 
 		if (blockProcess.newSpectrum) {
 			if (step < size_t(channels)) {
-				int channel = step;
+				int channel = int(step);
 				auto bins = bandsForChannel(channel);
 
 				Complex rot = std::polar(Sample(1), bandToFreq(0)*stft.defaultInterval()*Sample(2*M_PI));
@@ -572,7 +572,7 @@ private:
 			return;
 		}
 		if (step < size_t(channels)) {
-			size_t c = step;
+			int c = int(step);
 			Band *bins = bandsForChannel(c);
 			auto *predictions = predictionsForChannel(c);
 			for (int b = 0; b < bands; ++b) {
@@ -598,9 +598,9 @@ private:
 
 		if (step < splitMainPrediction) {
 			// Re-predict using phase differences between frequencies
-			int chunk = step;
-			int startB = bands*chunk/splitMainPrediction;
-			int endB = bands*(chunk + 1)/splitMainPrediction;
+			size_t chunk = step;
+			int startB = int(bands*chunk/splitMainPrediction);
+			int endB = int(bands*(chunk + 1)/splitMainPrediction);
 			for (int b = startB; b < endB; ++b) {
 				// Find maximum-energy channel and calculate that
 				int maxChannel = 0;