From 8c3852cae3a574c92d22614f0b35f80434bc4c67 Mon Sep 17 00:00:00 2001 From: Geraint Luff Date: Thu, 27 Feb 2025 14:38:11 +0000 Subject: [PATCH] Refactor chunked computation to be neater, fix some int warnings --- cmd/main.cpp | 4 +- cmd/util/wav.h | 57 +++++++++++------------ signalsmith-stretch.h | 102 +++++++++++++++++++++--------------------- 3 files changed, 82 insertions(+), 81 deletions(-) diff --git a/cmd/main.cpp b/cmd/main.cpp index 2b01b79..acc80b4 100644 --- a/cmd/main.cpp +++ b/cmd/main.cpp @@ -105,7 +105,7 @@ int main(int argc, char* argv[]) { signalsmith::Stopwatch stopwatch; stopwatch.start(); - stretch.presetDefault(inWav.channels, inWav.sampleRate); + stretch.presetDefault(int(inWav.channels), inWav.sampleRate); stretch.setTransposeSemitones(semitones, tonality/inWav.sampleRate); double initSeconds = stopwatch.lap(); @@ -130,7 +130,7 @@ int main(int argc, char* argv[]) { stretch.seek(inWav, stretch.inputLatency(), 1/time); inWav.offset += stretch.inputLatency(); // Process it all in one call, although it works just the same if we split into smaller blocks - stretch.process(inWav, inputLength, outWav, outputLength); + stretch.process(inWav, int(inputLength), outWav, int(outputLength)); // Read the last bit of output without giving it any more input outWav.offset += outputLength; stretch.flush(outWav, tailSamples); diff --git a/cmd/util/wav.h b/cmd/util/wav.h index 2a716a4..fd195a2 100644 --- a/cmd/util/wav.h +++ b/cmd/util/wav.h @@ -66,39 +66,40 @@ public: } }; - unsigned int sampleRate = 48000; - unsigned int channels = 1, offset = 0; + size_t sampleRate = 48000; + size_t channels = 1, offset = 0; std::vector samples; - int length() const { - return samples.size()/channels - offset; + size_t length() const { + size_t perChannel = samples.size()/channels; + return (perChannel >= offset) ? perChannel - offset : 0; } - void resize(int length) { + void resize(size_t length) { samples.resize((offset + length)*channels, 0); } template class ChannelReader { using CSample = typename std::conditional::type; CSample *data; - int stride; + size_t stride; public: - ChannelReader(CSample *samples, int channels) : data(samples), stride(channels) {} + ChannelReader(CSample *samples, size_t channels) : data(samples), stride(channels) {} - CSample & operator [](int i) { + CSample & operator [](size_t i) { return data[i*stride]; } }; - ChannelReader operator [](int c) { + ChannelReader operator [](size_t c) { return ChannelReader(samples.data() + offset*channels + c, channels); } - ChannelReader operator [](int c) const { + ChannelReader operator [](size_t c) const { return ChannelReader(samples.data() + offset*channels + c, channels); } Result result = Result(Result::Code::OK); Wav() {} - Wav(double sampleRate, int channels) : sampleRate(sampleRate), channels(channels) {} - Wav(double sampleRate, int channels, const std::vector &samples) : sampleRate(sampleRate), channels(channels), samples(samples) {} + Wav(double sampleRate, size_t channels) : sampleRate(sampleRate), channels(channels) {} + Wav(double sampleRate, size_t channels, const std::vector &samples) : sampleRate(sampleRate), channels(channels), samples(samples) {} Wav(std::string filename) { result = read(filename).warn(); } @@ -141,9 +142,9 @@ public: sampleRate = read32(file); if (sampleRate < 1) return result = Result(Result::Code::FORMAT_ERROR, "Cannot have zero sampleRate"); - unsigned int expectedBytesPerSecond = read32(file); - unsigned int bytesPerFrame = read16(file); - unsigned int bitsPerSample = read16(file); + size_t expectedBytesPerSecond = read32(file); + size_t bytesPerFrame = read16(file); + size_t bitsPerSample = read16(file); if (!formatIsValid(formatInt, bitsPerSample)) return result = Result(Result::Code::UNSUPPORTED, "Unsupported format:bits: " + std::to_string(formatInt) + ":" + std::to_string(bitsPerSample)); // Since it's plain WAVE, we can do some extra checks for consistency if (bitsPerSample*channels != bytesPerFrame*8) return result = Result(Result::Code::FORMAT_ERROR, "Format sizes don't add up"); @@ -191,7 +192,7 @@ public: file.open(filename, std::ios::binary); if (!file.is_open()) return result = Result(Result::Code::IO_ERROR, "Failed to open file: " + filename); - int bytesPerSample; + size_t bytesPerSample; switch (format) { case Format::PCM: bytesPerSample = 2; @@ -199,30 +200,30 @@ public: } // File size - 44 bytes is RIFF header, "fmt" block, and "data" block header - unsigned int dataLength = (samples.size() - offset*channels)*bytesPerSample; - unsigned int fileLength = 44 + dataLength; + size_t dataLength = (samples.size() - offset*channels)*bytesPerSample; + size_t fileLength = 44 + dataLength; // RIFF chunk write32(file, value_RIFF); - write32(file, fileLength - 8); // File length, excluding the RIFF header + write32(file, uint32_t(fileLength - 8)); // File length, excluding the RIFF header write32(file, value_WAVE); // "fmt " block write32(file, value_fmt); write32(file, 16); // block length - write16(file, (uint16_t)format); - write16(file, channels); - write32(file, sampleRate); - unsigned int expectedBytesPerSecond = sampleRate*channels*bytesPerSample; - write32(file, expectedBytesPerSecond); - write16(file, channels*bytesPerSample); // Bytes per frame - write16(file, bytesPerSample*8); // bist per sample + write16(file, uint16_t(format)); + write16(file, uint16_t(channels)); + write32(file, uint32_t(sampleRate)); + size_t expectedBytesPerSecond = sampleRate*channels*bytesPerSample; + write32(file, uint32_t(expectedBytesPerSecond)); + write16(file, uint16_t(channels*bytesPerSample)); // Bytes per frame + write16(file, uint16_t(bytesPerSample*8)); // bist per sample // "data" block write32(file, value_data); - write32(file, dataLength); + write32(file, uint32_t(dataLength)); switch (format) { case Format::PCM: - for (unsigned int i = offset*channels; i < samples.size(); i++) { + for (size_t i = offset*channels; i < samples.size(); i++) { double value = samples[i]*32768; if (value > 32767) value = 32767; if (value <= -32768) value = -32768; diff --git a/signalsmith-stretch.h b/signalsmith-stretch.h index 7814c95..5c1811b 100644 --- a/signalsmith-stretch.h +++ b/signalsmith-stretch.h @@ -41,16 +41,16 @@ struct SignalsmithStretch { SignalsmithStretch(long seed) : randomEngine(seed) {} int blockSamples() const { - return stft.blockSamples(); + return int(stft.blockSamples()); } int intervalSamples() const { - return stft.defaultInterval(); + return int(stft.defaultInterval()); } int inputLatency() const { - return stft.blockSamples() - stft.analysisOffset(); + return int(stft.analysisLatency()); } int outputLatency() const { - return stft.synthesisOffset() + stft.defaultInterval(); + return int(stft.synthesisLatency() + stft.defaultInterval()); } void reset() { @@ -84,7 +84,7 @@ struct SignalsmithStretch { stashedOutput = stft.output; tmpBuffer.resize(blockSamples + intervalSamples); - bands = stft.bands(); + bands = int(stft.bands()); channelBands.assign(bands*channels, Band()); peaks.reserve(bands/2); @@ -122,7 +122,7 @@ struct SignalsmithStretch { tmpBuffer.resize(stft.blockSamples() + stft.defaultInterval()); int startIndex = std::max(0, inputSamples - int(tmpBuffer.size())); // start position in input - int padStart = tmpBuffer.size() - (inputSamples - startIndex); // start position in tmpBuffer + int padStart = int(tmpBuffer.size() + startIndex) - inputSamples; // start position in tmpBuffer Sample totalEnergy = 0; for (int c = 0; c < channels; ++c) { @@ -152,7 +152,7 @@ struct SignalsmithStretch { int prevCopiedInput = 0; auto copyInput = [&](int toIndex){ - int length = std::min(stft.blockSamples() + stft.defaultInterval(), toIndex - prevCopiedInput); + int length = std::min(int(stft.blockSamples() + stft.defaultInterval()), toIndex - prevCopiedInput); tmpBuffer.resize(length); int offset = toIndex - length; for (int c = 0; c < channels; ++c) { @@ -217,13 +217,48 @@ struct SignalsmithStretch { for (int outputIndex = 0; outputIndex < outputSamples; ++outputIndex) { Sample processRatio = Sample(blockProcess.samplesSinceLast)/stft.defaultInterval(); - size_t processToStep = std::min(blockProcess.steps, blockProcess.steps*processRatio); + if (processRatio >= 1) { // we're ready to start a new block + processRatio = 0; + blockProcess.step = 0; + blockProcess.steps = 0; // how many steps + blockProcess.samplesSinceLast = 0; + + // Time to process a spectrum! Where should it come from in the input? + int inputOffset = std::round(outputIndex*Sample(inputSamples)/outputSamples); + int inputInterval = inputOffset - prevInputOffset; + prevInputOffset = inputOffset; + + copyInput(inputOffset); + stashedInput = stft.input; // save the input state, since that's what we'll analyse later + stashedOutput = stft.output; // save the current output, and read from it + stft.moveOutput(stft.defaultInterval()); // the actual input jumps forward in time by one interval, ready for the synthesis + + blockProcess.newSpectrum = didSeek || (inputInterval > 0); + blockProcess.mappedFrequencies = customFreqMap || freqMultiplier != 1; + if (blockProcess.newSpectrum) { + // make sure the previous input is the correct distance in the past (give or take 1 sample) + blockProcess.reanalysePrev = didSeek || std::abs(inputInterval - int(stft.defaultInterval())) > 1; + if (blockProcess.reanalysePrev) blockProcess.steps += stft.analyseSteps() + 1; + + // analyse a new input + blockProcess.steps += stft.analyseSteps() + 1; + } + + blockProcess.timeFactor = didSeek ? seekTimeFactor : stft.defaultInterval()/std::max(1, inputInterval); + didSeek = false; + + updateProcessSpectrumSteps(); + blockProcess.steps += processSpectrumSteps; + + blockProcess.steps += stft.synthesiseSteps() + 1; + } + size_t processToStep = std::min(blockProcess.steps, (blockProcess.steps + 1)*processRatio); + while (blockProcess.step < processToStep) { size_t step = blockProcess.step++; #ifdef SIGNALSMITH_STRETCH_PROFILE_PROCESS_STEP SIGNALSMITH_STRETCH_PROFILE_PROCESS_STEP(step, blockProcess.steps); #endif - if (blockProcess.newSpectrum) { if (blockProcess.reanalysePrev) { // analyse past input @@ -294,41 +329,6 @@ struct SignalsmithStretch { continue; } } - if (processRatio >= 1) { // we *should* have just written a block, and are now ready to start a new one - blockProcess.step = 0; - blockProcess.steps = 0; // how many steps - blockProcess.samplesSinceLast = 0; - - // Time to process a spectrum! Where should it come from in the input? - int inputOffset = std::round(outputIndex*Sample(inputSamples)/outputSamples); - int inputInterval = inputOffset - prevInputOffset; - prevInputOffset = inputOffset; - - copyInput(inputOffset); - stashedInput = stft.input; // save the input state, since that's what we'll analyse later - stashedOutput = stft.output; // save the current output, and read from it - stft.moveOutput(stft.defaultInterval()); // the actual input jumps forward in time by one interval, ready for the synthesis - - blockProcess.newSpectrum = didSeek || (inputInterval > 0); - blockProcess.mappedFrequencies = customFreqMap || freqMultiplier != 1; - if (blockProcess.newSpectrum) { - // make sure the previous input is the correct distance in the past (give or take 1 sample) - blockProcess.reanalysePrev = didSeek || std::abs(inputInterval - int(stft.defaultInterval())) > 1; - if (blockProcess.reanalysePrev) blockProcess.steps += stft.analyseSteps() + 1; - - // analyse a new input - blockProcess.steps += stft.analyseSteps() + 1; - } - - blockProcess.timeFactor = didSeek ? seekTimeFactor : stft.defaultInterval()/std::max(1, inputInterval); - didSeek = false; - - updateProcessSpectrumSteps(); - blockProcess.steps += processSpectrumSteps; - - blockProcess.steps += stft.synthesiseSteps() + 1; - blockProcess.steps += 1; // planning the next block - } #ifdef SIGNALSMITH_STRETCH_PROFILE_PROCESS_ENDSTEP SIGNALSMITH_STRETCH_PROFILE_PROCESS_ENDSTEP(); #endif @@ -355,7 +355,7 @@ struct SignalsmithStretch { // Read the remaining output, providing no further input. `outputSamples` should ideally be at least `.outputLatency()` template void flush(Outputs &&outputs, int outputSamples) { - int plainOutput = std::min(outputSamples, stft.blockSamples()); + int plainOutput = std::min(outputSamples, int(stft.blockSamples())); int foldedBackOutput = std::min(outputSamples, int(stft.blockSamples()) - plainOutput); stft.finishOutput(1); for (int c = 0; c < channels; ++c) { @@ -495,7 +495,7 @@ private: } // If RandomEngine=void, use std::default_random_engine; - using RandomEngineImpl = std::conditional< + using RandomEngineImpl = typename std::conditional< std::is_void::value, std::default_random_engine, RandomEngine @@ -527,7 +527,7 @@ private: if (blockProcess.newSpectrum) { if (step < size_t(channels)) { - int channel = step; + int channel = int(step); auto bins = bandsForChannel(channel); Complex rot = std::polar(Sample(1), bandToFreq(0)*stft.defaultInterval()*Sample(2*M_PI)); @@ -572,7 +572,7 @@ private: return; } if (step < size_t(channels)) { - size_t c = step; + int c = int(step); Band *bins = bandsForChannel(c); auto *predictions = predictionsForChannel(c); for (int b = 0; b < bands; ++b) { @@ -598,9 +598,9 @@ private: if (step < splitMainPrediction) { // Re-predict using phase differences between frequencies - int chunk = step; - int startB = bands*chunk/splitMainPrediction; - int endB = bands*(chunk + 1)/splitMainPrediction; + size_t chunk = step; + int startB = int(bands*chunk/splitMainPrediction); + int endB = int(bands*(chunk + 1)/splitMainPrediction); for (int b = startB; b < endB; ++b) { // Find maximum-energy channel and calculate that int maxChannel = 0;