Refactor chunked computation to be neater, fix some int warnings
This commit is contained in:
parent
1ad2839a04
commit
8c3852cae3
@ -105,7 +105,7 @@ int main(int argc, char* argv[]) {
|
||||
signalsmith::Stopwatch stopwatch;
|
||||
|
||||
stopwatch.start();
|
||||
stretch.presetDefault(inWav.channels, inWav.sampleRate);
|
||||
stretch.presetDefault(int(inWav.channels), inWav.sampleRate);
|
||||
stretch.setTransposeSemitones(semitones, tonality/inWav.sampleRate);
|
||||
double initSeconds = stopwatch.lap();
|
||||
|
||||
@ -130,7 +130,7 @@ int main(int argc, char* argv[]) {
|
||||
stretch.seek(inWav, stretch.inputLatency(), 1/time);
|
||||
inWav.offset += stretch.inputLatency();
|
||||
// Process it all in one call, although it works just the same if we split into smaller blocks
|
||||
stretch.process(inWav, inputLength, outWav, outputLength);
|
||||
stretch.process(inWav, int(inputLength), outWav, int(outputLength));
|
||||
// Read the last bit of output without giving it any more input
|
||||
outWav.offset += outputLength;
|
||||
stretch.flush(outWav, tailSamples);
|
||||
|
||||
@ -66,39 +66,40 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
unsigned int sampleRate = 48000;
|
||||
unsigned int channels = 1, offset = 0;
|
||||
size_t sampleRate = 48000;
|
||||
size_t channels = 1, offset = 0;
|
||||
std::vector<double> samples;
|
||||
int length() const {
|
||||
return samples.size()/channels - offset;
|
||||
size_t length() const {
|
||||
size_t perChannel = samples.size()/channels;
|
||||
return (perChannel >= offset) ? perChannel - offset : 0;
|
||||
}
|
||||
void resize(int length) {
|
||||
void resize(size_t length) {
|
||||
samples.resize((offset + length)*channels, 0);
|
||||
}
|
||||
template<bool isConst>
|
||||
class ChannelReader {
|
||||
using CSample = typename std::conditional<isConst, const double, double>::type;
|
||||
CSample *data;
|
||||
int stride;
|
||||
size_t stride;
|
||||
public:
|
||||
ChannelReader(CSample *samples, int channels) : data(samples), stride(channels) {}
|
||||
ChannelReader(CSample *samples, size_t channels) : data(samples), stride(channels) {}
|
||||
|
||||
CSample & operator [](int i) {
|
||||
CSample & operator [](size_t i) {
|
||||
return data[i*stride];
|
||||
}
|
||||
};
|
||||
ChannelReader<false> operator [](int c) {
|
||||
ChannelReader<false> operator [](size_t c) {
|
||||
return ChannelReader<false>(samples.data() + offset*channels + c, channels);
|
||||
}
|
||||
ChannelReader<true> operator [](int c) const {
|
||||
ChannelReader<true> operator [](size_t c) const {
|
||||
return ChannelReader<true>(samples.data() + offset*channels + c, channels);
|
||||
}
|
||||
|
||||
Result result = Result(Result::Code::OK);
|
||||
|
||||
Wav() {}
|
||||
Wav(double sampleRate, int channels) : sampleRate(sampleRate), channels(channels) {}
|
||||
Wav(double sampleRate, int channels, const std::vector<double> &samples) : sampleRate(sampleRate), channels(channels), samples(samples) {}
|
||||
Wav(double sampleRate, size_t channels) : sampleRate(sampleRate), channels(channels) {}
|
||||
Wav(double sampleRate, size_t channels, const std::vector<double> &samples) : sampleRate(sampleRate), channels(channels), samples(samples) {}
|
||||
Wav(std::string filename) {
|
||||
result = read(filename).warn();
|
||||
}
|
||||
@ -141,9 +142,9 @@ public:
|
||||
sampleRate = read32(file);
|
||||
if (sampleRate < 1) return result = Result(Result::Code::FORMAT_ERROR, "Cannot have zero sampleRate");
|
||||
|
||||
unsigned int expectedBytesPerSecond = read32(file);
|
||||
unsigned int bytesPerFrame = read16(file);
|
||||
unsigned int bitsPerSample = read16(file);
|
||||
size_t expectedBytesPerSecond = read32(file);
|
||||
size_t bytesPerFrame = read16(file);
|
||||
size_t bitsPerSample = read16(file);
|
||||
if (!formatIsValid(formatInt, bitsPerSample)) return result = Result(Result::Code::UNSUPPORTED, "Unsupported format:bits: " + std::to_string(formatInt) + ":" + std::to_string(bitsPerSample));
|
||||
// Since it's plain WAVE, we can do some extra checks for consistency
|
||||
if (bitsPerSample*channels != bytesPerFrame*8) return result = Result(Result::Code::FORMAT_ERROR, "Format sizes don't add up");
|
||||
@ -191,7 +192,7 @@ public:
|
||||
file.open(filename, std::ios::binary);
|
||||
if (!file.is_open()) return result = Result(Result::Code::IO_ERROR, "Failed to open file: " + filename);
|
||||
|
||||
int bytesPerSample;
|
||||
size_t bytesPerSample;
|
||||
switch (format) {
|
||||
case Format::PCM:
|
||||
bytesPerSample = 2;
|
||||
@ -199,30 +200,30 @@ public:
|
||||
}
|
||||
|
||||
// File size - 44 bytes is RIFF header, "fmt" block, and "data" block header
|
||||
unsigned int dataLength = (samples.size() - offset*channels)*bytesPerSample;
|
||||
unsigned int fileLength = 44 + dataLength;
|
||||
size_t dataLength = (samples.size() - offset*channels)*bytesPerSample;
|
||||
size_t fileLength = 44 + dataLength;
|
||||
|
||||
// RIFF chunk
|
||||
write32(file, value_RIFF);
|
||||
write32(file, fileLength - 8); // File length, excluding the RIFF header
|
||||
write32(file, uint32_t(fileLength - 8)); // File length, excluding the RIFF header
|
||||
write32(file, value_WAVE);
|
||||
// "fmt " block
|
||||
write32(file, value_fmt);
|
||||
write32(file, 16); // block length
|
||||
write16(file, (uint16_t)format);
|
||||
write16(file, channels);
|
||||
write32(file, sampleRate);
|
||||
unsigned int expectedBytesPerSecond = sampleRate*channels*bytesPerSample;
|
||||
write32(file, expectedBytesPerSecond);
|
||||
write16(file, channels*bytesPerSample); // Bytes per frame
|
||||
write16(file, bytesPerSample*8); // bist per sample
|
||||
write16(file, uint16_t(format));
|
||||
write16(file, uint16_t(channels));
|
||||
write32(file, uint32_t(sampleRate));
|
||||
size_t expectedBytesPerSecond = sampleRate*channels*bytesPerSample;
|
||||
write32(file, uint32_t(expectedBytesPerSecond));
|
||||
write16(file, uint16_t(channels*bytesPerSample)); // Bytes per frame
|
||||
write16(file, uint16_t(bytesPerSample*8)); // bist per sample
|
||||
|
||||
// "data" block
|
||||
write32(file, value_data);
|
||||
write32(file, dataLength);
|
||||
write32(file, uint32_t(dataLength));
|
||||
switch (format) {
|
||||
case Format::PCM:
|
||||
for (unsigned int i = offset*channels; i < samples.size(); i++) {
|
||||
for (size_t i = offset*channels; i < samples.size(); i++) {
|
||||
double value = samples[i]*32768;
|
||||
if (value > 32767) value = 32767;
|
||||
if (value <= -32768) value = -32768;
|
||||
|
||||
@ -41,16 +41,16 @@ struct SignalsmithStretch {
|
||||
SignalsmithStretch(long seed) : randomEngine(seed) {}
|
||||
|
||||
int blockSamples() const {
|
||||
return stft.blockSamples();
|
||||
return int(stft.blockSamples());
|
||||
}
|
||||
int intervalSamples() const {
|
||||
return stft.defaultInterval();
|
||||
return int(stft.defaultInterval());
|
||||
}
|
||||
int inputLatency() const {
|
||||
return stft.blockSamples() - stft.analysisOffset();
|
||||
return int(stft.analysisLatency());
|
||||
}
|
||||
int outputLatency() const {
|
||||
return stft.synthesisOffset() + stft.defaultInterval();
|
||||
return int(stft.synthesisLatency() + stft.defaultInterval());
|
||||
}
|
||||
|
||||
void reset() {
|
||||
@ -84,7 +84,7 @@ struct SignalsmithStretch {
|
||||
stashedOutput = stft.output;
|
||||
tmpBuffer.resize(blockSamples + intervalSamples);
|
||||
|
||||
bands = stft.bands();
|
||||
bands = int(stft.bands());
|
||||
channelBands.assign(bands*channels, Band());
|
||||
|
||||
peaks.reserve(bands/2);
|
||||
@ -122,7 +122,7 @@ struct SignalsmithStretch {
|
||||
tmpBuffer.resize(stft.blockSamples() + stft.defaultInterval());
|
||||
|
||||
int startIndex = std::max<int>(0, inputSamples - int(tmpBuffer.size())); // start position in input
|
||||
int padStart = tmpBuffer.size() - (inputSamples - startIndex); // start position in tmpBuffer
|
||||
int padStart = int(tmpBuffer.size() + startIndex) - inputSamples; // start position in tmpBuffer
|
||||
|
||||
Sample totalEnergy = 0;
|
||||
for (int c = 0; c < channels; ++c) {
|
||||
@ -152,7 +152,7 @@ struct SignalsmithStretch {
|
||||
int prevCopiedInput = 0;
|
||||
auto copyInput = [&](int toIndex){
|
||||
|
||||
int length = std::min<int>(stft.blockSamples() + stft.defaultInterval(), toIndex - prevCopiedInput);
|
||||
int length = std::min<int>(int(stft.blockSamples() + stft.defaultInterval()), toIndex - prevCopiedInput);
|
||||
tmpBuffer.resize(length);
|
||||
int offset = toIndex - length;
|
||||
for (int c = 0; c < channels; ++c) {
|
||||
@ -217,13 +217,48 @@ struct SignalsmithStretch {
|
||||
|
||||
for (int outputIndex = 0; outputIndex < outputSamples; ++outputIndex) {
|
||||
Sample processRatio = Sample(blockProcess.samplesSinceLast)/stft.defaultInterval();
|
||||
size_t processToStep = std::min<size_t>(blockProcess.steps, blockProcess.steps*processRatio);
|
||||
if (processRatio >= 1) { // we're ready to start a new block
|
||||
processRatio = 0;
|
||||
blockProcess.step = 0;
|
||||
blockProcess.steps = 0; // how many steps
|
||||
blockProcess.samplesSinceLast = 0;
|
||||
|
||||
// Time to process a spectrum! Where should it come from in the input?
|
||||
int inputOffset = std::round(outputIndex*Sample(inputSamples)/outputSamples);
|
||||
int inputInterval = inputOffset - prevInputOffset;
|
||||
prevInputOffset = inputOffset;
|
||||
|
||||
copyInput(inputOffset);
|
||||
stashedInput = stft.input; // save the input state, since that's what we'll analyse later
|
||||
stashedOutput = stft.output; // save the current output, and read from it
|
||||
stft.moveOutput(stft.defaultInterval()); // the actual input jumps forward in time by one interval, ready for the synthesis
|
||||
|
||||
blockProcess.newSpectrum = didSeek || (inputInterval > 0);
|
||||
blockProcess.mappedFrequencies = customFreqMap || freqMultiplier != 1;
|
||||
if (blockProcess.newSpectrum) {
|
||||
// make sure the previous input is the correct distance in the past (give or take 1 sample)
|
||||
blockProcess.reanalysePrev = didSeek || std::abs(inputInterval - int(stft.defaultInterval())) > 1;
|
||||
if (blockProcess.reanalysePrev) blockProcess.steps += stft.analyseSteps() + 1;
|
||||
|
||||
// analyse a new input
|
||||
blockProcess.steps += stft.analyseSteps() + 1;
|
||||
}
|
||||
|
||||
blockProcess.timeFactor = didSeek ? seekTimeFactor : stft.defaultInterval()/std::max<Sample>(1, inputInterval);
|
||||
didSeek = false;
|
||||
|
||||
updateProcessSpectrumSteps();
|
||||
blockProcess.steps += processSpectrumSteps;
|
||||
|
||||
blockProcess.steps += stft.synthesiseSteps() + 1;
|
||||
}
|
||||
size_t processToStep = std::min<size_t>(blockProcess.steps, (blockProcess.steps + 1)*processRatio);
|
||||
|
||||
while (blockProcess.step < processToStep) {
|
||||
size_t step = blockProcess.step++;
|
||||
#ifdef SIGNALSMITH_STRETCH_PROFILE_PROCESS_STEP
|
||||
SIGNALSMITH_STRETCH_PROFILE_PROCESS_STEP(step, blockProcess.steps);
|
||||
#endif
|
||||
|
||||
if (blockProcess.newSpectrum) {
|
||||
if (blockProcess.reanalysePrev) {
|
||||
// analyse past input
|
||||
@ -294,41 +329,6 @@ struct SignalsmithStretch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (processRatio >= 1) { // we *should* have just written a block, and are now ready to start a new one
|
||||
blockProcess.step = 0;
|
||||
blockProcess.steps = 0; // how many steps
|
||||
blockProcess.samplesSinceLast = 0;
|
||||
|
||||
// Time to process a spectrum! Where should it come from in the input?
|
||||
int inputOffset = std::round(outputIndex*Sample(inputSamples)/outputSamples);
|
||||
int inputInterval = inputOffset - prevInputOffset;
|
||||
prevInputOffset = inputOffset;
|
||||
|
||||
copyInput(inputOffset);
|
||||
stashedInput = stft.input; // save the input state, since that's what we'll analyse later
|
||||
stashedOutput = stft.output; // save the current output, and read from it
|
||||
stft.moveOutput(stft.defaultInterval()); // the actual input jumps forward in time by one interval, ready for the synthesis
|
||||
|
||||
blockProcess.newSpectrum = didSeek || (inputInterval > 0);
|
||||
blockProcess.mappedFrequencies = customFreqMap || freqMultiplier != 1;
|
||||
if (blockProcess.newSpectrum) {
|
||||
// make sure the previous input is the correct distance in the past (give or take 1 sample)
|
||||
blockProcess.reanalysePrev = didSeek || std::abs(inputInterval - int(stft.defaultInterval())) > 1;
|
||||
if (blockProcess.reanalysePrev) blockProcess.steps += stft.analyseSteps() + 1;
|
||||
|
||||
// analyse a new input
|
||||
blockProcess.steps += stft.analyseSteps() + 1;
|
||||
}
|
||||
|
||||
blockProcess.timeFactor = didSeek ? seekTimeFactor : stft.defaultInterval()/std::max<Sample>(1, inputInterval);
|
||||
didSeek = false;
|
||||
|
||||
updateProcessSpectrumSteps();
|
||||
blockProcess.steps += processSpectrumSteps;
|
||||
|
||||
blockProcess.steps += stft.synthesiseSteps() + 1;
|
||||
blockProcess.steps += 1; // planning the next block
|
||||
}
|
||||
#ifdef SIGNALSMITH_STRETCH_PROFILE_PROCESS_ENDSTEP
|
||||
SIGNALSMITH_STRETCH_PROFILE_PROCESS_ENDSTEP();
|
||||
#endif
|
||||
@ -355,7 +355,7 @@ struct SignalsmithStretch {
|
||||
// Read the remaining output, providing no further input. `outputSamples` should ideally be at least `.outputLatency()`
|
||||
template<class Outputs>
|
||||
void flush(Outputs &&outputs, int outputSamples) {
|
||||
int plainOutput = std::min<int>(outputSamples, stft.blockSamples());
|
||||
int plainOutput = std::min<int>(outputSamples, int(stft.blockSamples()));
|
||||
int foldedBackOutput = std::min<int>(outputSamples, int(stft.blockSamples()) - plainOutput);
|
||||
stft.finishOutput(1);
|
||||
for (int c = 0; c < channels; ++c) {
|
||||
@ -495,7 +495,7 @@ private:
|
||||
}
|
||||
|
||||
// If RandomEngine=void, use std::default_random_engine;
|
||||
using RandomEngineImpl = std::conditional<
|
||||
using RandomEngineImpl = typename std::conditional<
|
||||
std::is_void<RandomEngine>::value,
|
||||
std::default_random_engine,
|
||||
RandomEngine
|
||||
@ -527,7 +527,7 @@ private:
|
||||
|
||||
if (blockProcess.newSpectrum) {
|
||||
if (step < size_t(channels)) {
|
||||
int channel = step;
|
||||
int channel = int(step);
|
||||
auto bins = bandsForChannel(channel);
|
||||
|
||||
Complex rot = std::polar(Sample(1), bandToFreq(0)*stft.defaultInterval()*Sample(2*M_PI));
|
||||
@ -572,7 +572,7 @@ private:
|
||||
return;
|
||||
}
|
||||
if (step < size_t(channels)) {
|
||||
size_t c = step;
|
||||
int c = int(step);
|
||||
Band *bins = bandsForChannel(c);
|
||||
auto *predictions = predictionsForChannel(c);
|
||||
for (int b = 0; b < bands; ++b) {
|
||||
@ -598,9 +598,9 @@ private:
|
||||
|
||||
if (step < splitMainPrediction) {
|
||||
// Re-predict using phase differences between frequencies
|
||||
int chunk = step;
|
||||
int startB = bands*chunk/splitMainPrediction;
|
||||
int endB = bands*(chunk + 1)/splitMainPrediction;
|
||||
size_t chunk = step;
|
||||
int startB = int(bands*chunk/splitMainPrediction);
|
||||
int endB = int(bands*(chunk + 1)/splitMainPrediction);
|
||||
for (int b = startB; b < endB; ++b) {
|
||||
// Find maximum-energy channel and calculate that
|
||||
int maxChannel = 0;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user