Bypasses processing when given silent input

2022-11-29 15:26:44 +00:00 · 2022-11-29 15:26:44 +00:00 · 31a4c2b5ba
commit 31a4c2b5ba
parent 901df7bf97
2 changed files with 108 additions and 20 deletions
--- a/README.md
+++ b/README.md
@ -2,7 +2,7 @@
 This is a C++11 library for pitch and time stretching, using the final approach from the ADC22 presentation _Four Ways To Write A Pitch-Shifter_.
-It's still a work-in-progress: the pitch-shifting is fine, but the time-stretching isn't finished.
+It can handle a wide-range of pitch-shifts (multiple octaves) but time-stretching sounds best for more modest changes (between 0.5x and 2x).
 ## How to use it
@ -20,7 +20,7 @@ The easiest way to configure is `.presetDefault()`:
 stretch.presetDefault(channels, sampleRate);
 ```
-If you want to test out different block-sizes etc. then you can use `.configure()` manually, and even change `.freqWeight`/`.timeWeight`/`.channelWeight`.
+If you want to test out different block-sizes etc. then you can use `.configure()` manually.
 ### Processing (and resetting)
@ -54,6 +54,14 @@ You can set a "tonality limit", which uses a non-linear frequency map to preserv
 stretch.setTransposeSemitones(4, 8000/sampleRate);
 ```
 Alternatively, you can set a custom frequency map, mapping input frequencies to output frequencies (both normalised against the sample-rate): 
 ```cpp
 stretch.setFreqMap([](float inputFreq) {
 	return inputFreq*2; // up one octave
 });
 ```
 ## Compiling
 Just include `signalsmith-stretch.h` in your build.
--- a/signalsmith-stretch.h
+++ b/signalsmith-stretch.h
@ -4,8 +4,10 @@
 #include "dsp/spectral.h"
 #include "dsp/delay.h"
 #include "dsp/curves.h"
 SIGNALSMITH_DSP_VERSION_CHECK(1, 3, 3); // Check version is compatible
 #include <vector>
 #include <algorithm>
 #include <functional>
 namespace signalsmith { namespace stretch {
@ -30,14 +32,15 @@ struct SignalsmithStretch {
 		inputBuffer.reset();
 		prevInputOffset = -1;
 		channelBands.assign(channelBands.size(), Band());
 		silenceCounter = 2*stft.windowSize();
 	}
-	/// Configures using a default preset
+	// Configures using a default preset
 	void presetDefault(int nChannels, Sample sampleRate) {
 		configure(nChannels, sampleRate*0.12, sampleRate*0.03);
 	}
-	/// Manual setup
+	// Manual setup
 	void configure(int nChannels, int blockSamples, int intervalSamples) {
 		channels = nChannels;
 		stft.resize(channels, blockSamples, intervalSamples);
@ -61,7 +64,59 @@ struct SignalsmithStretch {
 	template<class Inputs, class Outputs>
 	void process(Inputs &&inputs, int inputSamples, Outputs &&outputs, int outputSamples) {
-		Sample timeScaling = Sample(inputSamples)/outputSamples;
+		Sample totalEnergy = 0;
 		for (int c = 0; c < channels; ++c) {
 			auto &&inputChannel = inputs[c];
 			for (int i = 0; i < inputSamples; ++i) {
 				Sample s = inputChannel[i];
 				totalEnergy += s*s;
 			}
 		}
 		if (totalEnergy < noiseFloor) {
 			if (silenceCounter >= 2*stft.windowSize()) {
 				if (silenceFirst) {
 					silenceFirst = false;
 					for (auto &b : channelBands) {
 						b.input = b.prevInput = b.output = b.prevOutput = 0;
 						b.inputEnergy = 0;
 					}
 				}
 				if (inputSamples > 0) {
 					// copy from the input, wrapping around if needed
 					for (int outputIndex = 0; outputIndex < outputSamples; ++outputIndex) {
 						int inputIndex = outputIndex%inputSamples;
 						for (int c = 0; c < channels; ++c) {
 							outputs[c][outputIndex] = inputs[c][inputIndex];
 						}
 					}
 				} else {
 					for (int c = 0; c < channels; ++c) {
 						auto &&outputChannel = outputs[c];
 						for (int outputIndex = 0; outputIndex < outputSamples; ++outputIndex) {
 							outputChannel[outputIndex] = 0;
 						}
 					}
 				}
 				// Store input in history buffer
 				for (int c = 0; c < channels; ++c) {
 					auto &&inputChannel = inputs[c];
 					auto &&bufferChannel = inputBuffer[c];
 					int startIndex = std::max<int>(0, inputSamples - stft.windowSize());
 					for (int i = startIndex; i < inputSamples; ++i) {
 						bufferChannel[i] = inputChannel[i];
 					}
 				}
 				inputBuffer += inputSamples;
 				return;
 			} else {
 				silenceCounter += inputSamples;
 			}
 		} else {
 			silenceCounter = 0;
 			silenceFirst = true;
 		}
 		for (int outputIndex = 0; outputIndex < outputSamples; ++outputIndex) {
 			stft.ensureValid(outputIndex, [&](int outputOffset) {
@ -83,7 +138,6 @@ struct SignalsmithStretch {
 						for (int i = std::max<int>(0, -inputOffset); i < stft.windowSize(); ++i) {
 							timeBuffer[i] = inputChannel[i + inputOffset];
 						}
 						stft.analyse(c, timeBuffer);
 					}
 				}
@ -111,6 +165,9 @@ struct SignalsmithStretch {
 				auto &&outputChannel = outputs[c];
 				auto &&stftChannel = stft[c];
 				outputChannel[outputIndex] = stftChannel[outputIndex];
 				// Debug:
 				outputChannel[outputIndex] *= -1;
 			}
 		}
@ -136,14 +193,25 @@ struct SignalsmithStretch {
 		} else {
 			freqTonalityLimit = 1;
 		}
 		customFreqMap = nullptr;
 	}
 	void setTransposeSemitones(Sample semitones, Sample tonalityLimit=0) {
 		setTransposeFactor(std::pow(2, semitones/12), tonalityLimit);
 		customFreqMap = nullptr;
 	}
 	// Sets a custom frequency map - should be monotonically increasing
 	void setFreqMap(std::function<Sample(Sample)> inputToOutput) {
 		customFreqMap = inputToOutput;
 	}
 private:
 	static constexpr Sample noiseFloor{1e-15};
 	int silenceCounter = 0;
 	bool silenceFirst = true;
 	using Complex = std::complex<Sample>;
 	Sample freqMultiplier = 1, freqTonalityLimit = 0.5;
 	std::function<Sample(Sample)> customFreqMap = nullptr;
 	signalsmith::spectral::STFT<Sample> stft{0, 1, 1};
 	signalsmith::delay::MultiBuffer<Sample> inputBuffer;
@ -211,7 +279,7 @@ private:
 	}
 	struct Peak {
-		Sample input, output, energy;
+		Sample input, output;
 		bool operator< (const Peak &other) const {
 			return output < other.output;
@ -241,14 +309,15 @@ private:
 		int bands = stft.bands();
 		Sample rate = outputInterval/std::max<Sample>(1, inputInterval);
 		rate = std::min<Sample>(2, rate); // For now, limit the intra-block time stretching to 2x
 		if (inputInterval > 0) {
 			for (int c = 0; c < channels; ++c) {
 				auto bins = bandsForChannel(c);
 				for (int b = 0; b < stft.bands(); ++b) {
 					auto &bin = bins[b];
-					bins[b].prevOutput *= rotPrevOutput[b];
+					bin.prevOutput *= rotPrevOutput[b];
-					bins[b].prevInput *= rotPrevInput[b];
+					bin.prevInput *= rotPrevInput[b];
 				}
 			}
 		}
@ -295,7 +364,7 @@ private:
 				predictions[b] = prediction;
 				// Rough output prediction based on phase-vocoder, sensitive to previous input/output magnitude
-				outputBin.output = prediction.freqPrediction/(prediction.energy + Sample(1e-10));
+				outputBin.output = prediction.freqPrediction/(prediction.energy + noiseFloor);
 			}
 		}
 		for (int b = 0; b < stft.bands(); ++b) {
@ -340,7 +409,7 @@ private:
 			}
 			Sample phaseNorm = std::norm(phase);
-			if (phaseNorm > 1e-15) {
+			if (phaseNorm > noiseFloor) {
 				outputBin.output = phase*std::sqrt(prediction.energy/phaseNorm);
 			} else {
 				outputBin.output = prediction.input;
@ -352,12 +421,12 @@ private:
 					auto &channelBin = bandsForChannel(c)[b];
 					auto &channelPrediction = predictionsForChannel(c)[b];
-					Complex channelTwist = prediction.input*std::conj(channelPrediction.input);
+					Complex channelTwist = channelPrediction.input*std::conj(prediction.input);
 					Complex channelPhase = outputBin.output*channelTwist;
 					Sample channelPhaseNorm = std::norm(channelPhase);
-					if (channelPhaseNorm > 1e-15) {
+					if (channelPhaseNorm > noiseFloor) {
-						channelBin.output = channelPhase*std::sqrt(prediction.energy/channelPhaseNorm);
+						channelBin.output = channelPhase*std::sqrt(channelPrediction.energy/channelPhaseNorm);
 					} else {
 						channelBin.output = channelPrediction.input;
 					}
@ -365,10 +434,14 @@ private:
 			}
 		}
 		if (inputInterval > 0) {
 			for (auto &bin : channelBands) {
 				bin.prevOutput = bin.output;
 				bin.prevInput = bin.input;
 			}
 		} else {
 			for (auto &bin : channelBands) bin.prevOutput = bin.output;
 		}
 	}
 	// Produces smoothed energy across all channels
@ -399,7 +472,8 @@ private:
 		}
 	}
-	Sample defaultFreqMap(Sample freq) const {
+	Sample mapFreq(Sample freq) const {
 		if (customFreqMap) return customFreqMap(freq);
 		if (freq > freqTonalityLimit) {
 			Sample diff = freq - freqTonalityLimit;
 			return freqTonalityLimit*freqMultiplier + diff;
@ -429,7 +503,7 @@ private:
 				}
 				Sample avgFreq = freqSum/(stft.fftSize()*energySum);
 				Sample avgEnergy = energySum/(end - start);
-				peaks.emplace_back(Peak{avgFreq*stft.fftSize(), defaultFreqMap(avgFreq)*stft.fftSize(), avgEnergy});
+				peaks.emplace_back(Peak{avgFreq*stft.fftSize(), mapFreq(avgFreq)*stft.fftSize()});
 				start = end;
 			}
@ -438,6 +512,12 @@ private:
 	}
 	void updateOutputMap(Sample peakWidthBins) {
 		if (peaks.empty()) {
 			for (int b = 0; b < stft.bands(); ++b) {
 				outputMap[b] = {Sample(b), 1};
 			}
 			return;
 		}
 		Sample linearZoneBins = peakWidthBins*Sample(0.5);
 		Sample bottomOffset = peaks[0].input - peaks[0].output;
 		for (int b = 0; b < std::min<int>(stft.bands(), peaks[0].output); ++b) {
@ -449,7 +529,7 @@ private:
 			Sample nextStart = next.output - linearZoneBins;
 			if (nextStart < prevEnd) nextStart = prevEnd = (nextStart + prevEnd)*Sample(0.5);
 			signalsmith::curves::Linear<Sample> segment(prevEnd, nextStart, prev.input + linearZoneBins, next.input - linearZoneBins);
-			Sample segmentGrad = ((prev.input + linearZoneBins) - (next.input - linearZoneBins))/(prevEnd - nextStart + Sample(1e-10));
+			Sample segmentGrad = ((prev.input + linearZoneBins) - (next.input - linearZoneBins))/(prevEnd - nextStart + noiseFloor);
 			prevEnd = std::max<Sample>(0, std::min<Sample>(stft.bands(), prevEnd));
 			nextStart = std::max<Sample>(0, std::min<Sample>(stft.bands(), nextStart));