Add .setFormantBase() method (specified relative to sample-rate)

This commit is contained in:
Geraint 2025-02-12 17:58:43 +00:00 committed by Geraint
parent 26a3a75deb
commit 97594ea3eb
3 changed files with 41 additions and 29 deletions

View File

@ -31,6 +31,7 @@ dev: out/stretch
out/stretch --time=0.8 --semitones=10 $(TEST_WAV) out/shift.wav out/stretch --time=0.8 --semitones=10 $(TEST_WAV) out/shift.wav
out/stretch --time=0.8 --semitones=10 --formant-comp $(TEST_WAV) out/shift-fc.wav out/stretch --time=0.8 --semitones=10 --formant-comp $(TEST_WAV) out/shift-fc.wav
out/stretch --time=0.8 --semitones=10 --formant-comp --formant=3 $(TEST_WAV) out/shift-fc-f3.wav out/stretch --time=0.8 --semitones=10 --formant-comp --formant=3 $(TEST_WAV) out/shift-fc-f3.wav
out/stretch --time=0.8 --semitones=10 --formant-comp --formant=3 --formant-base=500 $(TEST_WAV) out/shift-fc-f3-fb500.wav
clean: clean:
rm -rf out rm -rf out

View File

@ -76,6 +76,7 @@ int main(int argc, char* argv[]) {
double semitones = args.flag<double>("semitones", "pitch-shift amount", 0); double semitones = args.flag<double>("semitones", "pitch-shift amount", 0);
double formants = args.flag<double>("formant", "formant-shift amount (semitones)", 0); double formants = args.flag<double>("formant", "formant-shift amount (semitones)", 0);
bool formantComp = args.hasFlag("formant-comp", "formant compensation"); bool formantComp = args.hasFlag("formant-comp", "formant compensation");
double formantBase = args.flag<double>("formant-base", "formant base frequency (Hz, 0=auto)", 0);
double tonality = args.flag<double>("tonality", "tonality limit (Hz)", 8000); double tonality = args.flag<double>("tonality", "tonality limit (Hz)", 8000);
double time = args.flag<double>("time", "time-stretch factor", 1); double time = args.flag<double>("time", "time-stretch factor", 1);
bool exactLength = args.hasFlag("exact", "trims the start/end so the output has the correct length"); bool exactLength = args.hasFlag("exact", "trims the start/end so the output has the correct length");
@ -112,6 +113,7 @@ int main(int argc, char* argv[]) {
stretch.presetDefault(int(inWav.channels), inWav.sampleRate, splitComputation); stretch.presetDefault(int(inWav.channels), inWav.sampleRate, splitComputation);
stretch.setTransposeSemitones(semitones, tonality/inWav.sampleRate); stretch.setTransposeSemitones(semitones, tonality/inWav.sampleRate);
stretch.setFormantSemitones(formants, formantComp); stretch.setFormantSemitones(formants, formantComp);
stretch.setFormantBase(formantBase/inWav.sampleRate);
double initSeconds = stopwatch.lap(); double initSeconds = stopwatch.lap();
initMemory = initMemory.diff(); initMemory = initMemory.diff();

View File

@ -120,6 +120,10 @@ struct SignalsmithStretch {
void setFormantSemitones(Sample semitones, bool compensatePitch=false) { void setFormantSemitones(Sample semitones, bool compensatePitch=false) {
setFormantFactor(std::pow(2, semitones/12), compensatePitch); setFormantFactor(std::pow(2, semitones/12), compensatePitch);
} }
// 0 means attempting to detect the pitch
void setFormantBase(Sample baseFreq=0) {
formantBaseFreq = baseFreq;
}
// Provide previous input ("pre-roll"), without affecting the speed calculation. You should ideally feed it one block-length + one interval // Provide previous input ("pre-roll"), without affecting the speed calculation. You should ideally feed it one block-length + one interval
template<class Inputs> template<class Inputs>
@ -826,6 +830,7 @@ private:
Sample freqEstimateWeight = 0; Sample freqEstimateWeight = 0;
std::vector<Sample> formantMetric; std::vector<Sample> formantMetric;
Sample formantBaseFreq = 0;
void updateFormants(size_t) { void updateFormants(size_t) {
for (auto &e : formantMetric) e = 0; for (auto &e : formantMetric) e = 0;
for (int c = 0; c < channels; ++c) { for (int c = 0; c < channels; ++c) {
@ -834,42 +839,46 @@ private:
formantMetric[b] += bins[b].inputEnergy; formantMetric[b] += bins[b].inputEnergy;
} }
} }
// 3 highest peaks in the input Sample freqEstimate = freqToBand(formantBaseFreq);
std::array<int, 3> peakIndices{0, 0, 0}; if (formantBaseFreq <= 0) {
for (int b = 1; b < bands - 1; ++b) { // 3 highest peaks in the input
Sample e = formantMetric[b]; std::array<int, 3> peakIndices{0, 0, 0};
// local maxima only for (int b = 1; b < bands - 1; ++b) {
if (e < formantMetric[b - 1] || e <= formantMetric[b + 1]) continue; Sample e = formantMetric[b];
// local maxima only
if (e > formantMetric[peakIndices[0]]) { if (e < formantMetric[b - 1] || e <= formantMetric[b + 1]) continue;
if (e > formantMetric[peakIndices[1]]) {
if (e > formantMetric[peakIndices[2]]) { if (e > formantMetric[peakIndices[0]]) {
peakIndices = {peakIndices[1], peakIndices[2], b}; if (e > formantMetric[peakIndices[1]]) {
if (e > formantMetric[peakIndices[2]]) {
peakIndices = {peakIndices[1], peakIndices[2], b};
} else {
peakIndices = {peakIndices[1], b, peakIndices[2]};
}
} else { } else {
peakIndices = {peakIndices[1], b, peakIndices[2]}; peakIndices[0] = b;
} }
} else {
peakIndices[0] = b;
} }
} }
}
// VERY rough pitch estimation
// VERY rough pitch estimation int peakEstimate = peakIndices[2];
int peakEstimate = peakIndices[2]; if (formantMetric[peakIndices[1]] > formantMetric[peakIndices[2]]*0.1) {
if (formantMetric[peakIndices[1]] > formantMetric[peakIndices[2]]*0.1) { int diff = std::abs(peakEstimate - peakIndices[1]);
int diff = std::abs(peakEstimate - peakIndices[1]);
if (diff > peakEstimate/8 && diff < peakEstimate*7/8) peakEstimate = peakEstimate%diff;
if (formantMetric[peakIndices[0]] > formantMetric[peakIndices[2]]*0.01) {
int diff = std::abs(peakEstimate - peakIndices[0]);
if (diff > peakEstimate/8 && diff < peakEstimate*7/8) peakEstimate = peakEstimate%diff; if (diff > peakEstimate/8 && diff < peakEstimate*7/8) peakEstimate = peakEstimate%diff;
if (formantMetric[peakIndices[0]] > formantMetric[peakIndices[2]]*0.01) {
int diff = std::abs(peakEstimate - peakIndices[0]);
if (diff > peakEstimate/8 && diff < peakEstimate*7/8) peakEstimate = peakEstimate%diff;
}
} }
Sample weight = formantMetric[peakIndices[2]];
// Smooth it out a bit
freqEstimateWeighted += (peakEstimate*weight - freqEstimateWeighted)*0.25;
freqEstimateWeight += (weight - freqEstimateWeight)*0.25;
freqEstimate = freqEstimateWeighted/(freqEstimateWeight + Sample(1e-30));
} }
Sample weight = formantMetric[peakIndices[2]];
// Smooth it out a bit
freqEstimateWeighted += (peakEstimate*weight - freqEstimateWeighted)*0.25;
freqEstimateWeight += (weight - freqEstimateWeight)*0.25;
Sample freqEstimate = freqEstimateWeighted/(freqEstimateWeight + Sample(1e-30));
for (int b = 0; b < bands; ++b) { for (int b = 0; b < bands; ++b) {
formantMetric[b] = std::sqrt(std::sqrt(formantMetric[b])); formantMetric[b] = std::sqrt(std::sqrt(formantMetric[b]));