Bypasses processing when given silent input
This commit is contained in:
parent
901df7bf97
commit
31a4c2b5ba
12
README.md
12
README.md
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
This is a C++11 library for pitch and time stretching, using the final approach from the ADC22 presentation _Four Ways To Write A Pitch-Shifter_.
|
This is a C++11 library for pitch and time stretching, using the final approach from the ADC22 presentation _Four Ways To Write A Pitch-Shifter_.
|
||||||
|
|
||||||
It's still a work-in-progress: the pitch-shifting is fine, but the time-stretching isn't finished.
|
It can handle a wide-range of pitch-shifts (multiple octaves) but time-stretching sounds best for more modest changes (between 0.5x and 2x).
|
||||||
|
|
||||||
## How to use it
|
## How to use it
|
||||||
|
|
||||||
@ -20,7 +20,7 @@ The easiest way to configure is `.presetDefault()`:
|
|||||||
stretch.presetDefault(channels, sampleRate);
|
stretch.presetDefault(channels, sampleRate);
|
||||||
```
|
```
|
||||||
|
|
||||||
If you want to test out different block-sizes etc. then you can use `.configure()` manually, and even change `.freqWeight`/`.timeWeight`/`.channelWeight`.
|
If you want to test out different block-sizes etc. then you can use `.configure()` manually.
|
||||||
|
|
||||||
### Processing (and resetting)
|
### Processing (and resetting)
|
||||||
|
|
||||||
@ -54,6 +54,14 @@ You can set a "tonality limit", which uses a non-linear frequency map to preserv
|
|||||||
stretch.setTransposeSemitones(4, 8000/sampleRate);
|
stretch.setTransposeSemitones(4, 8000/sampleRate);
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Alternatively, you can set a custom frequency map, mapping input frequencies to output frequencies (both normalised against the sample-rate):
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
stretch.setFreqMap([](float inputFreq) {
|
||||||
|
return inputFreq*2; // up one octave
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
## Compiling
|
## Compiling
|
||||||
|
|
||||||
Just include `signalsmith-stretch.h` in your build.
|
Just include `signalsmith-stretch.h` in your build.
|
||||||
|
|||||||
@ -4,8 +4,10 @@
|
|||||||
#include "dsp/spectral.h"
|
#include "dsp/spectral.h"
|
||||||
#include "dsp/delay.h"
|
#include "dsp/delay.h"
|
||||||
#include "dsp/curves.h"
|
#include "dsp/curves.h"
|
||||||
|
SIGNALSMITH_DSP_VERSION_CHECK(1, 3, 3); // Check version is compatible
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <functional>
|
||||||
|
|
||||||
namespace signalsmith { namespace stretch {
|
namespace signalsmith { namespace stretch {
|
||||||
|
|
||||||
@ -30,14 +32,15 @@ struct SignalsmithStretch {
|
|||||||
inputBuffer.reset();
|
inputBuffer.reset();
|
||||||
prevInputOffset = -1;
|
prevInputOffset = -1;
|
||||||
channelBands.assign(channelBands.size(), Band());
|
channelBands.assign(channelBands.size(), Band());
|
||||||
|
silenceCounter = 2*stft.windowSize();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Configures using a default preset
|
// Configures using a default preset
|
||||||
void presetDefault(int nChannels, Sample sampleRate) {
|
void presetDefault(int nChannels, Sample sampleRate) {
|
||||||
configure(nChannels, sampleRate*0.12, sampleRate*0.03);
|
configure(nChannels, sampleRate*0.12, sampleRate*0.03);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Manual setup
|
// Manual setup
|
||||||
void configure(int nChannels, int blockSamples, int intervalSamples) {
|
void configure(int nChannels, int blockSamples, int intervalSamples) {
|
||||||
channels = nChannels;
|
channels = nChannels;
|
||||||
stft.resize(channels, blockSamples, intervalSamples);
|
stft.resize(channels, blockSamples, intervalSamples);
|
||||||
@ -61,7 +64,59 @@ struct SignalsmithStretch {
|
|||||||
|
|
||||||
template<class Inputs, class Outputs>
|
template<class Inputs, class Outputs>
|
||||||
void process(Inputs &&inputs, int inputSamples, Outputs &&outputs, int outputSamples) {
|
void process(Inputs &&inputs, int inputSamples, Outputs &&outputs, int outputSamples) {
|
||||||
Sample timeScaling = Sample(inputSamples)/outputSamples;
|
Sample totalEnergy = 0;
|
||||||
|
for (int c = 0; c < channels; ++c) {
|
||||||
|
auto &&inputChannel = inputs[c];
|
||||||
|
for (int i = 0; i < inputSamples; ++i) {
|
||||||
|
Sample s = inputChannel[i];
|
||||||
|
totalEnergy += s*s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (totalEnergy < noiseFloor) {
|
||||||
|
if (silenceCounter >= 2*stft.windowSize()) {
|
||||||
|
if (silenceFirst) {
|
||||||
|
silenceFirst = false;
|
||||||
|
for (auto &b : channelBands) {
|
||||||
|
b.input = b.prevInput = b.output = b.prevOutput = 0;
|
||||||
|
b.inputEnergy = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inputSamples > 0) {
|
||||||
|
// copy from the input, wrapping around if needed
|
||||||
|
for (int outputIndex = 0; outputIndex < outputSamples; ++outputIndex) {
|
||||||
|
int inputIndex = outputIndex%inputSamples;
|
||||||
|
for (int c = 0; c < channels; ++c) {
|
||||||
|
outputs[c][outputIndex] = inputs[c][inputIndex];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (int c = 0; c < channels; ++c) {
|
||||||
|
auto &&outputChannel = outputs[c];
|
||||||
|
for (int outputIndex = 0; outputIndex < outputSamples; ++outputIndex) {
|
||||||
|
outputChannel[outputIndex] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store input in history buffer
|
||||||
|
for (int c = 0; c < channels; ++c) {
|
||||||
|
auto &&inputChannel = inputs[c];
|
||||||
|
auto &&bufferChannel = inputBuffer[c];
|
||||||
|
int startIndex = std::max<int>(0, inputSamples - stft.windowSize());
|
||||||
|
for (int i = startIndex; i < inputSamples; ++i) {
|
||||||
|
bufferChannel[i] = inputChannel[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
inputBuffer += inputSamples;
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
silenceCounter += inputSamples;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
silenceCounter = 0;
|
||||||
|
silenceFirst = true;
|
||||||
|
}
|
||||||
|
|
||||||
for (int outputIndex = 0; outputIndex < outputSamples; ++outputIndex) {
|
for (int outputIndex = 0; outputIndex < outputSamples; ++outputIndex) {
|
||||||
stft.ensureValid(outputIndex, [&](int outputOffset) {
|
stft.ensureValid(outputIndex, [&](int outputOffset) {
|
||||||
@ -83,7 +138,6 @@ struct SignalsmithStretch {
|
|||||||
for (int i = std::max<int>(0, -inputOffset); i < stft.windowSize(); ++i) {
|
for (int i = std::max<int>(0, -inputOffset); i < stft.windowSize(); ++i) {
|
||||||
timeBuffer[i] = inputChannel[i + inputOffset];
|
timeBuffer[i] = inputChannel[i + inputOffset];
|
||||||
}
|
}
|
||||||
|
|
||||||
stft.analyse(c, timeBuffer);
|
stft.analyse(c, timeBuffer);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -111,6 +165,9 @@ struct SignalsmithStretch {
|
|||||||
auto &&outputChannel = outputs[c];
|
auto &&outputChannel = outputs[c];
|
||||||
auto &&stftChannel = stft[c];
|
auto &&stftChannel = stft[c];
|
||||||
outputChannel[outputIndex] = stftChannel[outputIndex];
|
outputChannel[outputIndex] = stftChannel[outputIndex];
|
||||||
|
|
||||||
|
// Debug:
|
||||||
|
outputChannel[outputIndex] *= -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -136,14 +193,25 @@ struct SignalsmithStretch {
|
|||||||
} else {
|
} else {
|
||||||
freqTonalityLimit = 1;
|
freqTonalityLimit = 1;
|
||||||
}
|
}
|
||||||
|
customFreqMap = nullptr;
|
||||||
}
|
}
|
||||||
void setTransposeSemitones(Sample semitones, Sample tonalityLimit=0) {
|
void setTransposeSemitones(Sample semitones, Sample tonalityLimit=0) {
|
||||||
setTransposeFactor(std::pow(2, semitones/12), tonalityLimit);
|
setTransposeFactor(std::pow(2, semitones/12), tonalityLimit);
|
||||||
|
customFreqMap = nullptr;
|
||||||
|
}
|
||||||
|
// Sets a custom frequency map - should be monotonically increasing
|
||||||
|
void setFreqMap(std::function<Sample(Sample)> inputToOutput) {
|
||||||
|
customFreqMap = inputToOutput;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
static constexpr Sample noiseFloor{1e-15};
|
||||||
|
int silenceCounter = 0;
|
||||||
|
bool silenceFirst = true;
|
||||||
|
|
||||||
using Complex = std::complex<Sample>;
|
using Complex = std::complex<Sample>;
|
||||||
Sample freqMultiplier = 1, freqTonalityLimit = 0.5;
|
Sample freqMultiplier = 1, freqTonalityLimit = 0.5;
|
||||||
|
std::function<Sample(Sample)> customFreqMap = nullptr;
|
||||||
|
|
||||||
signalsmith::spectral::STFT<Sample> stft{0, 1, 1};
|
signalsmith::spectral::STFT<Sample> stft{0, 1, 1};
|
||||||
signalsmith::delay::MultiBuffer<Sample> inputBuffer;
|
signalsmith::delay::MultiBuffer<Sample> inputBuffer;
|
||||||
@ -211,7 +279,7 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct Peak {
|
struct Peak {
|
||||||
Sample input, output, energy;
|
Sample input, output;
|
||||||
|
|
||||||
bool operator< (const Peak &other) const {
|
bool operator< (const Peak &other) const {
|
||||||
return output < other.output;
|
return output < other.output;
|
||||||
@ -241,14 +309,15 @@ private:
|
|||||||
int bands = stft.bands();
|
int bands = stft.bands();
|
||||||
|
|
||||||
Sample rate = outputInterval/std::max<Sample>(1, inputInterval);
|
Sample rate = outputInterval/std::max<Sample>(1, inputInterval);
|
||||||
|
rate = std::min<Sample>(2, rate); // For now, limit the intra-block time stretching to 2x
|
||||||
|
|
||||||
if (inputInterval > 0) {
|
if (inputInterval > 0) {
|
||||||
for (int c = 0; c < channels; ++c) {
|
for (int c = 0; c < channels; ++c) {
|
||||||
auto bins = bandsForChannel(c);
|
auto bins = bandsForChannel(c);
|
||||||
for (int b = 0; b < stft.bands(); ++b) {
|
for (int b = 0; b < stft.bands(); ++b) {
|
||||||
auto &bin = bins[b];
|
auto &bin = bins[b];
|
||||||
bins[b].prevOutput *= rotPrevOutput[b];
|
bin.prevOutput *= rotPrevOutput[b];
|
||||||
bins[b].prevInput *= rotPrevInput[b];
|
bin.prevInput *= rotPrevInput[b];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -295,7 +364,7 @@ private:
|
|||||||
predictions[b] = prediction;
|
predictions[b] = prediction;
|
||||||
|
|
||||||
// Rough output prediction based on phase-vocoder, sensitive to previous input/output magnitude
|
// Rough output prediction based on phase-vocoder, sensitive to previous input/output magnitude
|
||||||
outputBin.output = prediction.freqPrediction/(prediction.energy + Sample(1e-10));
|
outputBin.output = prediction.freqPrediction/(prediction.energy + noiseFloor);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (int b = 0; b < stft.bands(); ++b) {
|
for (int b = 0; b < stft.bands(); ++b) {
|
||||||
@ -340,7 +409,7 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
Sample phaseNorm = std::norm(phase);
|
Sample phaseNorm = std::norm(phase);
|
||||||
if (phaseNorm > 1e-15) {
|
if (phaseNorm > noiseFloor) {
|
||||||
outputBin.output = phase*std::sqrt(prediction.energy/phaseNorm);
|
outputBin.output = phase*std::sqrt(prediction.energy/phaseNorm);
|
||||||
} else {
|
} else {
|
||||||
outputBin.output = prediction.input;
|
outputBin.output = prediction.input;
|
||||||
@ -352,12 +421,12 @@ private:
|
|||||||
auto &channelBin = bandsForChannel(c)[b];
|
auto &channelBin = bandsForChannel(c)[b];
|
||||||
auto &channelPrediction = predictionsForChannel(c)[b];
|
auto &channelPrediction = predictionsForChannel(c)[b];
|
||||||
|
|
||||||
Complex channelTwist = prediction.input*std::conj(channelPrediction.input);
|
Complex channelTwist = channelPrediction.input*std::conj(prediction.input);
|
||||||
Complex channelPhase = outputBin.output*channelTwist;
|
Complex channelPhase = outputBin.output*channelTwist;
|
||||||
|
|
||||||
Sample channelPhaseNorm = std::norm(channelPhase);
|
Sample channelPhaseNorm = std::norm(channelPhase);
|
||||||
if (channelPhaseNorm > 1e-15) {
|
if (channelPhaseNorm > noiseFloor) {
|
||||||
channelBin.output = channelPhase*std::sqrt(prediction.energy/channelPhaseNorm);
|
channelBin.output = channelPhase*std::sqrt(channelPrediction.energy/channelPhaseNorm);
|
||||||
} else {
|
} else {
|
||||||
channelBin.output = channelPrediction.input;
|
channelBin.output = channelPrediction.input;
|
||||||
}
|
}
|
||||||
@ -365,10 +434,14 @@ private:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (inputInterval > 0) {
|
||||||
for (auto &bin : channelBands) {
|
for (auto &bin : channelBands) {
|
||||||
bin.prevOutput = bin.output;
|
bin.prevOutput = bin.output;
|
||||||
bin.prevInput = bin.input;
|
bin.prevInput = bin.input;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
for (auto &bin : channelBands) bin.prevOutput = bin.output;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Produces smoothed energy across all channels
|
// Produces smoothed energy across all channels
|
||||||
@ -399,7 +472,8 @@ private:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Sample defaultFreqMap(Sample freq) const {
|
Sample mapFreq(Sample freq) const {
|
||||||
|
if (customFreqMap) return customFreqMap(freq);
|
||||||
if (freq > freqTonalityLimit) {
|
if (freq > freqTonalityLimit) {
|
||||||
Sample diff = freq - freqTonalityLimit;
|
Sample diff = freq - freqTonalityLimit;
|
||||||
return freqTonalityLimit*freqMultiplier + diff;
|
return freqTonalityLimit*freqMultiplier + diff;
|
||||||
@ -429,7 +503,7 @@ private:
|
|||||||
}
|
}
|
||||||
Sample avgFreq = freqSum/(stft.fftSize()*energySum);
|
Sample avgFreq = freqSum/(stft.fftSize()*energySum);
|
||||||
Sample avgEnergy = energySum/(end - start);
|
Sample avgEnergy = energySum/(end - start);
|
||||||
peaks.emplace_back(Peak{avgFreq*stft.fftSize(), defaultFreqMap(avgFreq)*stft.fftSize(), avgEnergy});
|
peaks.emplace_back(Peak{avgFreq*stft.fftSize(), mapFreq(avgFreq)*stft.fftSize()});
|
||||||
|
|
||||||
start = end;
|
start = end;
|
||||||
}
|
}
|
||||||
@ -438,6 +512,12 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
void updateOutputMap(Sample peakWidthBins) {
|
void updateOutputMap(Sample peakWidthBins) {
|
||||||
|
if (peaks.empty()) {
|
||||||
|
for (int b = 0; b < stft.bands(); ++b) {
|
||||||
|
outputMap[b] = {Sample(b), 1};
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
Sample linearZoneBins = peakWidthBins*Sample(0.5);
|
Sample linearZoneBins = peakWidthBins*Sample(0.5);
|
||||||
Sample bottomOffset = peaks[0].input - peaks[0].output;
|
Sample bottomOffset = peaks[0].input - peaks[0].output;
|
||||||
for (int b = 0; b < std::min<int>(stft.bands(), peaks[0].output); ++b) {
|
for (int b = 0; b < std::min<int>(stft.bands(), peaks[0].output); ++b) {
|
||||||
@ -449,7 +529,7 @@ private:
|
|||||||
Sample nextStart = next.output - linearZoneBins;
|
Sample nextStart = next.output - linearZoneBins;
|
||||||
if (nextStart < prevEnd) nextStart = prevEnd = (nextStart + prevEnd)*Sample(0.5);
|
if (nextStart < prevEnd) nextStart = prevEnd = (nextStart + prevEnd)*Sample(0.5);
|
||||||
signalsmith::curves::Linear<Sample> segment(prevEnd, nextStart, prev.input + linearZoneBins, next.input - linearZoneBins);
|
signalsmith::curves::Linear<Sample> segment(prevEnd, nextStart, prev.input + linearZoneBins, next.input - linearZoneBins);
|
||||||
Sample segmentGrad = ((prev.input + linearZoneBins) - (next.input - linearZoneBins))/(prevEnd - nextStart + Sample(1e-10));
|
Sample segmentGrad = ((prev.input + linearZoneBins) - (next.input - linearZoneBins))/(prevEnd - nextStart + noiseFloor);
|
||||||
|
|
||||||
prevEnd = std::max<Sample>(0, std::min<Sample>(stft.bands(), prevEnd));
|
prevEnd = std::max<Sample>(0, std::min<Sample>(stft.bands(), prevEnd));
|
||||||
nextStart = std::max<Sample>(0, std::min<Sample>(stft.bands(), nextStart));
|
nextStart = std::max<Sample>(0, std::min<Sample>(stft.bands(), nextStart));
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user