Add .setFormantBase() method (specified relative to sample-rate)

This commit is contained in:
Geraint 2025-02-12 17:58:43 +00:00 committed by Geraint
parent 26a3a75deb
commit 97594ea3eb
3 changed files with 41 additions and 29 deletions

View File

@ -31,6 +31,7 @@ dev: out/stretch
out/stretch --time=0.8 --semitones=10 $(TEST_WAV) out/shift.wav
out/stretch --time=0.8 --semitones=10 --formant-comp $(TEST_WAV) out/shift-fc.wav
out/stretch --time=0.8 --semitones=10 --formant-comp --formant=3 $(TEST_WAV) out/shift-fc-f3.wav
out/stretch --time=0.8 --semitones=10 --formant-comp --formant=3 --formant-base=500 $(TEST_WAV) out/shift-fc-f3-fb500.wav
clean:
rm -rf out

View File

@ -76,6 +76,7 @@ int main(int argc, char* argv[]) {
double semitones = args.flag<double>("semitones", "pitch-shift amount", 0);
double formants = args.flag<double>("formant", "formant-shift amount (semitones)", 0);
bool formantComp = args.hasFlag("formant-comp", "formant compensation");
double formantBase = args.flag<double>("formant-base", "formant base frequency (Hz, 0=auto)", 0);
double tonality = args.flag<double>("tonality", "tonality limit (Hz)", 8000);
double time = args.flag<double>("time", "time-stretch factor", 1);
bool exactLength = args.hasFlag("exact", "trims the start/end so the output has the correct length");
@ -112,6 +113,7 @@ int main(int argc, char* argv[]) {
stretch.presetDefault(int(inWav.channels), inWav.sampleRate, splitComputation);
stretch.setTransposeSemitones(semitones, tonality/inWav.sampleRate);
stretch.setFormantSemitones(formants, formantComp);
stretch.setFormantBase(formantBase/inWav.sampleRate);
double initSeconds = stopwatch.lap();
initMemory = initMemory.diff();

View File

@ -120,6 +120,10 @@ struct SignalsmithStretch {
void setFormantSemitones(Sample semitones, bool compensatePitch=false) {
setFormantFactor(std::pow(2, semitones/12), compensatePitch);
}
// 0 means attempting to detect the pitch
void setFormantBase(Sample baseFreq=0) {
formantBaseFreq = baseFreq;
}
// Provide previous input ("pre-roll"), without affecting the speed calculation. You should ideally feed it one block-length + one interval
template<class Inputs>
@ -826,6 +830,7 @@ private:
Sample freqEstimateWeight = 0;
std::vector<Sample> formantMetric;
Sample formantBaseFreq = 0;
void updateFormants(size_t) {
for (auto &e : formantMetric) e = 0;
for (int c = 0; c < channels; ++c) {
@ -835,6 +840,8 @@ private:
}
}
Sample freqEstimate = freqToBand(formantBaseFreq);
if (formantBaseFreq <= 0) {
// 3 highest peaks in the input
std::array<int, 3> peakIndices{0, 0, 0};
for (int b = 1; b < bands - 1; ++b) {
@ -869,7 +876,9 @@ private:
// Smooth it out a bit
freqEstimateWeighted += (peakEstimate*weight - freqEstimateWeighted)*0.25;
freqEstimateWeight += (weight - freqEstimateWeight)*0.25;
Sample freqEstimate = freqEstimateWeighted/(freqEstimateWeight + Sample(1e-30));
freqEstimate = freqEstimateWeighted/(freqEstimateWeight + Sample(1e-30));
}
for (int b = 0; b < bands; ++b) {
formantMetric[b] = std::sqrt(std::sqrt(formantMetric[b]));