diff --git a/cmd/Makefile b/cmd/Makefile index 54be081..d2985bb 100644 --- a/cmd/Makefile +++ b/cmd/Makefile @@ -31,6 +31,7 @@ dev: out/stretch out/stretch --time=0.8 --semitones=10 $(TEST_WAV) out/shift.wav out/stretch --time=0.8 --semitones=10 --formant-comp $(TEST_WAV) out/shift-fc.wav out/stretch --time=0.8 --semitones=10 --formant-comp --formant=3 $(TEST_WAV) out/shift-fc-f3.wav + out/stretch --time=0.8 --semitones=10 --formant-comp --formant=3 --formant-base=500 $(TEST_WAV) out/shift-fc-f3-fb500.wav clean: rm -rf out diff --git a/cmd/main.cpp b/cmd/main.cpp index 47dad4d..3a1f2b1 100644 --- a/cmd/main.cpp +++ b/cmd/main.cpp @@ -76,6 +76,7 @@ int main(int argc, char* argv[]) { double semitones = args.flag("semitones", "pitch-shift amount", 0); double formants = args.flag("formant", "formant-shift amount (semitones)", 0); bool formantComp = args.hasFlag("formant-comp", "formant compensation"); + double formantBase = args.flag("formant-base", "formant base frequency (Hz, 0=auto)", 0); double tonality = args.flag("tonality", "tonality limit (Hz)", 8000); double time = args.flag("time", "time-stretch factor", 1); bool exactLength = args.hasFlag("exact", "trims the start/end so the output has the correct length"); @@ -112,6 +113,7 @@ int main(int argc, char* argv[]) { stretch.presetDefault(int(inWav.channels), inWav.sampleRate, splitComputation); stretch.setTransposeSemitones(semitones, tonality/inWav.sampleRate); stretch.setFormantSemitones(formants, formantComp); + stretch.setFormantBase(formantBase/inWav.sampleRate); double initSeconds = stopwatch.lap(); initMemory = initMemory.diff(); diff --git a/signalsmith-stretch.h b/signalsmith-stretch.h index 6d68040..da98cef 100644 --- a/signalsmith-stretch.h +++ b/signalsmith-stretch.h @@ -120,6 +120,10 @@ struct SignalsmithStretch { void setFormantSemitones(Sample semitones, bool compensatePitch=false) { setFormantFactor(std::pow(2, semitones/12), compensatePitch); } + // 0 means attempting to detect the pitch + void setFormantBase(Sample baseFreq=0) { + formantBaseFreq = baseFreq; + } // Provide previous input ("pre-roll"), without affecting the speed calculation. You should ideally feed it one block-length + one interval template @@ -826,6 +830,7 @@ private: Sample freqEstimateWeight = 0; std::vector formantMetric; + Sample formantBaseFreq = 0; void updateFormants(size_t) { for (auto &e : formantMetric) e = 0; for (int c = 0; c < channels; ++c) { @@ -834,42 +839,46 @@ private: formantMetric[b] += bins[b].inputEnergy; } } - - // 3 highest peaks in the input - std::array peakIndices{0, 0, 0}; - for (int b = 1; b < bands - 1; ++b) { - Sample e = formantMetric[b]; - // local maxima only - if (e < formantMetric[b - 1] || e <= formantMetric[b + 1]) continue; - - if (e > formantMetric[peakIndices[0]]) { - if (e > formantMetric[peakIndices[1]]) { - if (e > formantMetric[peakIndices[2]]) { - peakIndices = {peakIndices[1], peakIndices[2], b}; + + Sample freqEstimate = freqToBand(formantBaseFreq); + if (formantBaseFreq <= 0) { + // 3 highest peaks in the input + std::array peakIndices{0, 0, 0}; + for (int b = 1; b < bands - 1; ++b) { + Sample e = formantMetric[b]; + // local maxima only + if (e < formantMetric[b - 1] || e <= formantMetric[b + 1]) continue; + + if (e > formantMetric[peakIndices[0]]) { + if (e > formantMetric[peakIndices[1]]) { + if (e > formantMetric[peakIndices[2]]) { + peakIndices = {peakIndices[1], peakIndices[2], b}; + } else { + peakIndices = {peakIndices[1], b, peakIndices[2]}; + } } else { - peakIndices = {peakIndices[1], b, peakIndices[2]}; + peakIndices[0] = b; } - } else { - peakIndices[0] = b; } } - } - - // VERY rough pitch estimation - int peakEstimate = peakIndices[2]; - if (formantMetric[peakIndices[1]] > formantMetric[peakIndices[2]]*0.1) { - int diff = std::abs(peakEstimate - peakIndices[1]); - if (diff > peakEstimate/8 && diff < peakEstimate*7/8) peakEstimate = peakEstimate%diff; - if (formantMetric[peakIndices[0]] > formantMetric[peakIndices[2]]*0.01) { - int diff = std::abs(peakEstimate - peakIndices[0]); + + // VERY rough pitch estimation + int peakEstimate = peakIndices[2]; + if (formantMetric[peakIndices[1]] > formantMetric[peakIndices[2]]*0.1) { + int diff = std::abs(peakEstimate - peakIndices[1]); if (diff > peakEstimate/8 && diff < peakEstimate*7/8) peakEstimate = peakEstimate%diff; + if (formantMetric[peakIndices[0]] > formantMetric[peakIndices[2]]*0.01) { + int diff = std::abs(peakEstimate - peakIndices[0]); + if (diff > peakEstimate/8 && diff < peakEstimate*7/8) peakEstimate = peakEstimate%diff; + } } + Sample weight = formantMetric[peakIndices[2]]; + // Smooth it out a bit + freqEstimateWeighted += (peakEstimate*weight - freqEstimateWeighted)*0.25; + freqEstimateWeight += (weight - freqEstimateWeight)*0.25; + + freqEstimate = freqEstimateWeighted/(freqEstimateWeight + Sample(1e-30)); } - Sample weight = formantMetric[peakIndices[2]]; - // Smooth it out a bit - freqEstimateWeighted += (peakEstimate*weight - freqEstimateWeighted)*0.25; - freqEstimateWeight += (weight - freqEstimateWeight)*0.25; - Sample freqEstimate = freqEstimateWeighted/(freqEstimateWeight + Sample(1e-30)); for (int b = 0; b < bands; ++b) { formantMetric[b] = std::sqrt(std::sqrt(formantMetric[b]));