It sounds OK, but doesn't null with the previous one

This commit is contained in:
Geraint 2025-02-05 17:51:42 +00:00
parent 0e31507194
commit 37f855cc7f
2 changed files with 99 additions and 112 deletions

View File

@ -6,6 +6,7 @@ out/stretch: ../signalsmith-stretch.h main.cpp util/*.h util/*.hxx ../dsp/*.h
-Wall -Wextra -Wfatal-errors -Wpedantic -pedantic-errors \ -Wall -Wextra -Wfatal-errors -Wpedantic -pedantic-errors \
main.cpp -o out/stretch main.cpp -o out/stretch
# Uses input files from: https://signalsmith-audio.co.uk/code/stretch/inputs.zip
examples: out/stretch examples: out/stretch
mkdir -p out/examples mkdir -p out/examples
inputs/run-all.sh out/examples/u2- out/stretch --semitones=2 inputs/run-all.sh out/examples/u2- out/stretch --semitones=2
@ -28,5 +29,10 @@ clean:
### Example use of CMake ### Example use of CMake
cmake: cmake:
cmake -B build -G Xcode # CMAKE_BUILD_TYPE is needed for single-config generators (e.g. Makefiles)
cmake -B build -DCMAKE_BUILD_TYPE=Release
cmake --build build --config Release cmake --build build --config Release
# Copy to out/ so that `make clean cmake examples` works
mkdir -p out
cp build/Release/* out/

View File

@ -1,10 +1,12 @@
#ifndef SIGNALSMITH_STRETCH_H #ifndef SIGNALSMITH_STRETCH_H
#define SIGNALSMITH_STRETCH_H #define SIGNALSMITH_STRETCH_H
#include "dsp/spectral.h" //#include "dsp/spectral.h"
#include "dsp/delay.h" //#include "dsp/delay.h"
#include "dsp/perf.h" #include "dsp/perf.h"
SIGNALSMITH_DSP_VERSION_CHECK(1, 6, 0); // Check version is compatible SIGNALSMITH_DSP_VERSION_CHECK(1, 6, 0); // Check version is compatible
#include "linear/stft.h"
#include <vector> #include <vector>
#include <algorithm> #include <algorithm>
#include <functional> #include <functional>
@ -20,26 +22,24 @@ struct SignalsmithStretch {
SignalsmithStretch(long seed) : randomEngine(seed) {} SignalsmithStretch(long seed) : randomEngine(seed) {}
int blockSamples() const { int blockSamples() const {
return stft.windowSize(); return stft.blockSamples();
} }
int intervalSamples() const { int intervalSamples() const {
return stft.interval(); return stft.defaultInterval();
} }
int inputLatency() const { int inputLatency() const {
return stft.windowSize()/2; return stft.blockSamples() - stft.analysisOffset();
} }
int outputLatency() const { int outputLatency() const {
return stft.windowSize() - inputLatency(); return stft.synthesisOffset();
} }
void reset() { void reset() {
stft.reset(); stft.reset(0.1);
inputBuffer.reset();
prevInputOffset = -1; prevInputOffset = -1;
channelBands.assign(channelBands.size(), Band()); channelBands.assign(channelBands.size(), Band());
silenceCounter = 0; silenceCounter = 0;
didSeek = false; didSeek = false;
flushed = true;
} }
// Configures using a default preset // Configures using a default preset
@ -53,11 +53,12 @@ struct SignalsmithStretch {
// Manual setup // Manual setup
void configure(int nChannels, int blockSamples, int intervalSamples) { void configure(int nChannels, int blockSamples, int intervalSamples) {
channels = nChannels; channels = nChannels;
stft.setWindow(stft.kaiser, true); stft.configure(channels, channels, blockSamples, intervalSamples + 1);
stft.resize(channels, blockSamples, intervalSamples); stft.setInterval(intervalSamples, stft.kaiser);
stft.reset(0.1);
tmpBuffer.resize(blockSamples + intervalSamples);
bands = stft.bands(); bands = stft.bands();
inputBuffer.resize(channels, blockSamples + intervalSamples + 1);
timeBuffer.assign(stft.fftSize(), 0);
channelBands.assign(bands*channels, Band()); channelBands.assign(bands*channels, Band());
peaks.reserve(bands/2); peaks.reserve(bands/2);
@ -89,29 +90,47 @@ struct SignalsmithStretch {
// Provide previous input ("pre-roll"), without affecting the speed calculation. You should ideally feed it one block-length + one interval // Provide previous input ("pre-roll"), without affecting the speed calculation. You should ideally feed it one block-length + one interval
template<class Inputs> template<class Inputs>
void seek(Inputs &&inputs, int inputSamples, double playbackRate) { void seek(Inputs &&inputs, int inputSamples, double playbackRate) {
inputBuffer.reset(); tmpBuffer.resize(0);
tmpBuffer.resize(stft.blockSamples() + stft.defaultInterval());
Sample totalEnergy = 0; Sample totalEnergy = 0;
for (int c = 0; c < channels; ++c) { for (int c = 0; c < channels; ++c) {
auto &&inputChannel = inputs[c]; auto &&inputChannel = inputs[c];
auto &&bufferChannel = inputBuffer[c]; int startIndex = std::max<int>(0, inputSamples - int(tmpBuffer.size()));
int startIndex = std::max<int>(0, inputSamples - stft.windowSize() - stft.interval());
for (int i = startIndex; i < inputSamples; ++i) { for (int i = startIndex; i < inputSamples; ++i) {
Sample s = inputChannel[i]; Sample s = inputChannel[i];
totalEnergy += s*s; totalEnergy += s*s;
bufferChannel[i] = s; tmpBuffer[i - startIndex] = s;
} }
stft.writeInput(c, 0, tmpBuffer.size(), tmpBuffer.data());
} }
if (totalEnergy >= noiseFloor) { if (totalEnergy >= noiseFloor) {
silenceCounter = 0; silenceCounter = 0;
silenceFirst = true; silenceFirst = true;
} }
inputBuffer += inputSamples;
didSeek = true; didSeek = true;
seekTimeFactor = (playbackRate*stft.interval() > 1) ? 1/playbackRate : stft.interval(); seekTimeFactor = (playbackRate*stft.defaultInterval() > 1) ? 1/playbackRate : stft.defaultInterval();
} }
template<class Inputs, class Outputs> template<class Inputs, class Outputs>
void process(Inputs &&inputs, int inputSamples, Outputs &&outputs, int outputSamples) { void process(Inputs &&inputs, int inputSamples, Outputs &&outputs, int outputSamples) {
int prevCopiedInput = 0;
auto copyInput = [&](int toIndex){
int length = std::min<int>(stft.blockSamples() + stft.defaultInterval(), toIndex - prevCopiedInput);
tmpBuffer.resize(length);
int offset = toIndex - length;
for (int c = 0; c < channels; ++c) {
auto &&inputBuffer = inputs[c];
for (int i = 0; i < length; ++i) {
tmpBuffer[i] = inputBuffer[i + offset];
}
stft.writeInput(c, length, tmpBuffer.data());
}
stft.moveInput(toIndex - prevCopiedInput);
prevCopiedInput = toIndex;
};
Sample totalEnergy = 0; Sample totalEnergy = 0;
for (int c = 0; c < channels; ++c) { for (int c = 0; c < channels; ++c) {
auto &&inputChannel = inputs[c]; auto &&inputChannel = inputs[c];
@ -121,9 +140,10 @@ struct SignalsmithStretch {
} }
} }
if (totalEnergy < noiseFloor) { if (totalEnergy < noiseFloor) {
if (silenceCounter >= 2*stft.windowSize()) { if (silenceCounter >= 2*stft.blockSamples()) {
if (silenceFirst) { if (silenceFirst) { // first block of silence processing
silenceFirst = false; silenceFirst = false;
//stft.reset();
for (auto &b : channelBands) { for (auto &b : channelBands) {
b.input = b.prevInput = b.output = 0; b.input = b.prevInput = b.output = 0;
b.inputEnergy = 0; b.inputEnergy = 0;
@ -148,15 +168,7 @@ struct SignalsmithStretch {
} }
// Store input in history buffer // Store input in history buffer
for (int c = 0; c < channels; ++c) { copyInput(inputSamples);
auto &&inputChannel = inputs[c];
auto &&bufferChannel = inputBuffer[c];
int startIndex = std::max<int>(0, inputSamples - stft.windowSize() - stft.interval());
for (int i = startIndex; i < inputSamples; ++i) {
bufferChannel[i] = inputChannel[i];
}
}
inputBuffer += inputSamples;
return; return;
} else { } else {
silenceCounter += inputSamples; silenceCounter += inputSamples;
@ -167,117 +179,87 @@ struct SignalsmithStretch {
} }
for (int outputIndex = 0; outputIndex < outputSamples; ++outputIndex) { for (int outputIndex = 0; outputIndex < outputSamples; ++outputIndex) {
stft.ensureValid(outputIndex, [&](int outputOffset) { if (stft.samplesSinceSynthesis() >= stft.defaultInterval()) {
// Time to process a spectrum! Where should it come from in the input? // Time to process a spectrum! Where should it come from in the input?
int inputOffset = std::round(outputOffset*Sample(inputSamples)/outputSamples) - stft.windowSize(); int inputOffset = std::round(outputIndex*Sample(inputSamples)/outputSamples);
int inputInterval = inputOffset - prevInputOffset; int inputInterval = inputOffset - prevInputOffset;
prevInputOffset = inputOffset; prevInputOffset = inputOffset;
copyInput(inputOffset);
bool newSpectrum = didSeek || (inputInterval > 0); bool newSpectrum = didSeek || (inputInterval > 0);
if (newSpectrum) { if (newSpectrum) {
for (int c = 0; c < channels; ++c) { if (didSeek || inputInterval != int(stft.samplesSinceAnalysis())) { // make sure the previous input is the correct distance in the past
// Copy from the history buffer, if needed stft.analyse(stft.defaultInterval());
auto &&bufferChannel = inputBuffer[c]; // Copy previous analysis to our band objects
for (int i = 0; i < -inputOffset; ++i) {
timeBuffer[i] = bufferChannel[i + inputOffset];
}
// Copy the rest from the input
auto &&inputChannel = inputs[c];
for (int i = std::max<int>(0, -inputOffset); i < stft.windowSize(); ++i) {
timeBuffer[i] = inputChannel[i + inputOffset];
}
stft.analyse(c, timeBuffer);
}
flushed = false; // TODO: first block after a flush should be gain-compensated
for (int c = 0; c < channels; ++c) { for (int c = 0; c < channels; ++c) {
auto channelBands = bandsForChannel(c); auto channelBands = bandsForChannel(c);
auto &&spectrumBands = stft.spectrum[c]; auto *spectrumBands = stft.spectrum(c);
for (int b = 0; b < bands; ++b) {
channelBands[b].input = spectrumBands[b];
}
}
if (didSeek || inputInterval != stft.interval()) { // make sure the previous input is the correct distance in the past
int prevIntervalOffset = inputOffset - stft.interval();
for (int c = 0; c < channels; ++c) {
// Copy from the history buffer, if needed
auto &&bufferChannel = inputBuffer[c];
for (int i = 0; i < std::min(-prevIntervalOffset, stft.windowSize()); ++i) {
timeBuffer[i] = bufferChannel[i + prevIntervalOffset];
}
// Copy the rest from the input
auto &&inputChannel = inputs[c];
for (int i = std::max<int>(0, -prevIntervalOffset); i < stft.windowSize(); ++i) {
timeBuffer[i] = inputChannel[i + prevIntervalOffset];
}
stft.analyse(c, timeBuffer);
}
for (int c = 0; c < channels; ++c) {
auto channelBands = bandsForChannel(c);
auto &&spectrumBands = stft.spectrum[c];
for (int b = 0; b < bands; ++b) { for (int b = 0; b < bands; ++b) {
channelBands[b].prevInput = spectrumBands[b]; channelBands[b].prevInput = spectrumBands[b];
} }
} }
} }
stft.analyse();
// Copy analysed spectrum into our band objects
for (int c = 0; c < channels; ++c) {
auto channelBands = bandsForChannel(c);
auto *spectrumBands = stft.spectrum(c);
for (int b = 0; b < bands; ++b) {
channelBands[b].input = spectrumBands[b];
}
}
} }
Sample timeFactor = didSeek ? seekTimeFactor : stft.interval()/std::max<Sample>(1, inputInterval); Sample timeFactor = didSeek ? seekTimeFactor : stft.defaultInterval()/std::max<Sample>(1, inputInterval);
processSpectrum(newSpectrum, timeFactor); processSpectrum(newSpectrum, timeFactor);
didSeek = false; didSeek = false;
for (int c = 0; c < channels; ++c) { for (int c = 0; c < channels; ++c) {
auto channelBands = bandsForChannel(c); auto channelBands = bandsForChannel(c);
auto &&spectrumBands = stft.spectrum[c]; auto *spectrumBands = stft.spectrum(c);
for (int b = 0; b < bands; ++b) { for (int b = 0; b < bands; ++b) {
spectrumBands[b] = channelBands[b].output; spectrumBands[b] = channelBands[b].output;
} }
} }
}); stft.synthesise();
};
for (int c = 0; c < channels; ++c) { for (int c = 0; c < channels; ++c) {
auto &&outputChannel = outputs[c]; auto &&outputChannel = outputs[c];
auto &&stftChannel = stft[c]; Sample v = 0;
outputChannel[outputIndex] = stftChannel[outputIndex]; stft.readOutput(c, 1, &v);
outputChannel[outputIndex] = v;
} }
stft.moveOutput(1);
} }
// Store input in history buffer copyInput(inputSamples);
for (int c = 0; c < channels; ++c) {
auto &&inputChannel = inputs[c];
auto &&bufferChannel = inputBuffer[c];
int startIndex = std::max<int>(0, inputSamples - stft.windowSize());
for (int i = startIndex; i < inputSamples; ++i) {
bufferChannel[i] = inputChannel[i];
}
}
inputBuffer += inputSamples;
stft += outputSamples;
prevInputOffset -= inputSamples; prevInputOffset -= inputSamples;
} }
// Read the remaining output, providing no further input. `outputSamples` should ideally be at least `.outputLatency()` // Read the remaining output, providing no further input. `outputSamples` should ideally be at least `.outputLatency()`
template<class Outputs> template<class Outputs>
void flush(Outputs &&outputs, int outputSamples) { void flush(Outputs &&outputs, int outputSamples) {
int plainOutput = std::min<int>(outputSamples, stft.windowSize()); int plainOutput = std::min<int>(outputSamples, stft.blockSamples());
int foldedBackOutput = std::min<int>(outputSamples, stft.windowSize() - plainOutput); int foldedBackOutput = std::min<int>(outputSamples, int(stft.blockSamples()) - plainOutput);
for (int c = 0; c < channels; ++c) { for (int c = 0; c < channels; ++c) {
tmpBuffer.resize(plainOutput);
stft.readOutput(c, plainOutput, tmpBuffer.data());
auto &&outputChannel = outputs[c]; auto &&outputChannel = outputs[c];
auto &&stftChannel = stft[c];
for (int i = 0; i < plainOutput; ++i) { for (int i = 0; i < plainOutput; ++i) {
// TODO: plain output should be gain- // TODO: plain output should be gain-
outputChannel[i] = stftChannel[i]; outputChannel[i] = tmpBuffer[i];
} }
tmpBuffer.resize(foldedBackOutput);
stft.readOutput(c, plainOutput, foldedBackOutput, tmpBuffer.data());
for (int i = 0; i < foldedBackOutput; ++i) { for (int i = 0; i < foldedBackOutput; ++i) {
outputChannel[outputSamples - 1 - i] -= stftChannel[plainOutput + i]; outputChannel[outputSamples - 1 - i] -= tmpBuffer[i];
}
for (int i = 0; i < plainOutput + foldedBackOutput; ++i) {
stftChannel[i] = 0;
} }
} }
// Skip the output we just used/cleared stft.reset(0.1);
stft += plainOutput + foldedBackOutput;
// Reset the phase-vocoder stuff, so the next block gets a fresh start // Reset the phase-vocoder stuff, so the next block gets a fresh start
for (int c = 0; c < channels; ++c) { for (int c = 0; c < channels; ++c) {
auto channelBands = bandsForChannel(c); auto channelBands = bandsForChannel(c);
@ -285,31 +267,30 @@ struct SignalsmithStretch {
channelBands[b].prevInput = channelBands[b].output = 0; channelBands[b].prevInput = channelBands[b].output = 0;
} }
} }
flushed = true;
} }
private: private:
using Complex = std::complex<Sample>; using Complex = std::complex<Sample>;
static constexpr Sample noiseFloor{1e-15}; static constexpr Sample noiseFloor{1e-15};
static constexpr Sample maxCleanStretch{2}; // time-stretch ratio before we start randomising phases static constexpr Sample maxCleanStretch{2}; // time-stretch ratio before we start randomising phases
int silenceCounter = 0; size_t silenceCounter = 0;
bool silenceFirst = true; bool silenceFirst = true;
Sample freqMultiplier = 1, freqTonalityLimit = 0.5; Sample freqMultiplier = 1, freqTonalityLimit = 0.5;
std::function<Sample(Sample)> customFreqMap = nullptr; std::function<Sample(Sample)> customFreqMap = nullptr;
signalsmith::spectral::STFT<Sample> stft{0, 1, 1}; signalsmith::linear::DynamicSTFT<Sample, false, true> stft;
signalsmith::delay::MultiBuffer<Sample> inputBuffer; std::vector<Sample> tmpBuffer;
int channels = 0, bands = 0; int channels = 0, bands = 0;
int prevInputOffset = -1; int prevInputOffset = -1;
std::vector<Sample> timeBuffer; bool didSeek = false;
bool didSeek = false, flushed = true;
Sample seekTimeFactor = 1; Sample seekTimeFactor = 1;
Sample bandToFreq(Sample b) const { Sample bandToFreq(Sample b) const {
return (b + Sample(0.5))/stft.fftSize(); return stft.binToFreq(b);
} }
Sample freqToBand(Sample f) const { Sample freqToBand(Sample f) const {
return f*stft.fftSize() - Sample(0.5); return stft.freqToBin(f);
} }
struct Band { struct Band {
@ -395,9 +376,9 @@ private:
for (int c = 0; c < channels; ++c) { for (int c = 0; c < channels; ++c) {
auto bins = bandsForChannel(c); auto bins = bandsForChannel(c);
Complex rot = std::polar(Sample(1), bandToFreq(0)*stft.interval()*Sample(2*M_PI)); Complex rot = std::polar(Sample(1), bandToFreq(0)*stft.defaultInterval()*Sample(2*M_PI));
Sample freqStep = bandToFreq(1) - bandToFreq(0); Sample freqStep = bandToFreq(1) - bandToFreq(0);
Complex rotStep = std::polar(Sample(1), freqStep*stft.interval()*Sample(2*M_PI)); Complex rotStep = std::polar(Sample(1), freqStep*stft.defaultInterval()*Sample(2*M_PI));
for (int b = 0; b < bands; ++b) { for (int b = 0; b < bands; ++b) {
auto &bin = bins[b]; auto &bin = bins[b];
@ -408,7 +389,7 @@ private:
} }
} }
Sample smoothingBins = Sample(stft.fftSize())/stft.interval(); Sample smoothingBins = Sample(stft.fftSamples())/stft.defaultInterval();
int longVerticalStep = std::round(smoothingBins); int longVerticalStep = std::round(smoothingBins);
if (customFreqMap || freqMultiplier != 1) { if (customFreqMap || freqMultiplier != 1) {
findPeaks(smoothingBins); findPeaks(smoothingBins);