Add .setFormantBase() method (specified relative to sample-rate)

2025-02-12 17:58:43 +00:00 · 2025-02-12 17:58:43 +00:00 · 97594ea3eb
commit 97594ea3eb
parent 26a3a75deb
3 changed files with 41 additions and 29 deletions
--- a/cmd/Makefile
+++ b/cmd/Makefile
@ -31,6 +31,7 @@ dev: out/stretch
 	out/stretch --time=0.8 --semitones=10 $(TEST_WAV) out/shift.wav
 	out/stretch --time=0.8 --semitones=10 --formant-comp $(TEST_WAV) out/shift-fc.wav
 	out/stretch --time=0.8 --semitones=10 --formant-comp --formant=3 $(TEST_WAV) out/shift-fc-f3.wav
+	out/stretch --time=0.8 --semitones=10 --formant-comp --formant=3 --formant-base=500 $(TEST_WAV) out/shift-fc-f3-fb500.wav

 clean:
 	rm -rf out
--- a/cmd/main.cpp
+++ b/cmd/main.cpp
@ -76,6 +76,7 @@ int main(int argc, char* argv[]) {
 	double semitones = args.flag<double>("semitones", "pitch-shift amount", 0);
 	double formants = args.flag<double>("formant", "formant-shift amount (semitones)", 0);
 	bool formantComp = args.hasFlag("formant-comp", "formant compensation");
+	double formantBase = args.flag<double>("formant-base", "formant base frequency (Hz, 0=auto)", 0);
 	double tonality = args.flag<double>("tonality", "tonality limit (Hz)", 8000);
 	double time = args.flag<double>("time", "time-stretch factor", 1);
 	bool exactLength = args.hasFlag("exact", "trims the start/end so the output has the correct length");
@ -112,6 +113,7 @@ int main(int argc, char* argv[]) {
 	stretch.presetDefault(int(inWav.channels), inWav.sampleRate, splitComputation);
 	stretch.setTransposeSemitones(semitones, tonality/inWav.sampleRate);
 	stretch.setFormantSemitones(formants, formantComp);
+	stretch.setFormantBase(formantBase/inWav.sampleRate);
 	double initSeconds = stopwatch.lap();

 	initMemory = initMemory.diff();
--- a/signalsmith-stretch.h
+++ b/signalsmith-stretch.h
@ -120,6 +120,10 @@ struct SignalsmithStretch {
 	void setFormantSemitones(Sample semitones, bool compensatePitch=false) {
 		setFormantFactor(std::pow(2, semitones/12), compensatePitch);
 	}
+	// 0 means attempting to detect the pitch
+	void setFormantBase(Sample baseFreq=0) {
+		formantBaseFreq = baseFreq;
+	}

 	// Provide previous input ("pre-roll"), without affecting the speed calculation.  You should ideally feed it one block-length + one interval
 	template<class Inputs>
@ -826,6 +830,7 @@ private:
 	Sample freqEstimateWeight = 0;
 	
 	std::vector<Sample> formantMetric;
+	Sample formantBaseFreq = 0;
 	void updateFormants(size_t) {
 		for (auto &e : formantMetric) e = 0;
 		for (int c = 0; c < channels; ++c) {
@ -835,6 +840,8 @@ private:
 			}
 		}

+		Sample freqEstimate = freqToBand(formantBaseFreq);
+		if (formantBaseFreq <= 0) {
 			// 3 highest peaks in the input
 			std::array<int, 3> peakIndices{0, 0, 0};
 			for (int b = 1; b < bands - 1; ++b) {
@ -869,7 +876,9 @@ private:
 			// Smooth it out a bit
 			freqEstimateWeighted += (peakEstimate*weight - freqEstimateWeighted)*0.25;
 			freqEstimateWeight += (weight - freqEstimateWeight)*0.25;
-		Sample freqEstimate = freqEstimateWeighted/(freqEstimateWeight + Sample(1e-30));
+			
+			freqEstimate = freqEstimateWeighted/(freqEstimateWeight + Sample(1e-30));
+		}
 	
 		for (int b = 0; b < bands; ++b) {
 			formantMetric[b] = std::sqrt(std::sqrt(formantMetric[b]));