From b84e9cf5e9166514369966d75c6ab15bec0fd3c6 Mon Sep 17 00:00:00 2001 From: Geraint Date: Fri, 18 Apr 2025 21:03:15 +0100 Subject: [PATCH] Refactor, split formants into 3 computation steps --- cmd/Makefile | 1 + cmd/main.cpp | 1 + signalsmith-stretch.h | 188 ++++++++++++++++++++++-------------------- 3 files changed, 99 insertions(+), 91 deletions(-) diff --git a/cmd/Makefile b/cmd/Makefile index d2985bb..6887a65 100644 --- a/cmd/Makefile +++ b/cmd/Makefile @@ -32,6 +32,7 @@ dev: out/stretch out/stretch --time=0.8 --semitones=10 --formant-comp $(TEST_WAV) out/shift-fc.wav out/stretch --time=0.8 --semitones=10 --formant-comp --formant=3 $(TEST_WAV) out/shift-fc-f3.wav out/stretch --time=0.8 --semitones=10 --formant-comp --formant=3 --formant-base=500 $(TEST_WAV) out/shift-fc-f3-fb500.wav + out/stretch --time=0.8 --semitones=10 --formant-comp --formant=2 --formant-base=100 $(TEST_WAV) out/shift-fc-f2-fb100.wav clean: rm -rf out diff --git a/cmd/main.cpp b/cmd/main.cpp index 3a1f2b1..8cdec21 100644 --- a/cmd/main.cpp +++ b/cmd/main.cpp @@ -2,6 +2,7 @@ #include #define LOG_EXPR(expr) std::cout << #expr << " = " << (expr) << "\n"; +#define PROFILE_PLOT_CHUNKS #ifdef PROFILE_PLOT_CHUNKS size_t activeStepIndex = 0; void profileProcessStart(int, int); diff --git a/signalsmith-stretch.h b/signalsmith-stretch.h index a77b8cd..172147d 100644 --- a/signalsmith-stretch.h +++ b/signalsmith-stretch.h @@ -257,7 +257,6 @@ struct SignalsmithStretch { } blockProcess.processFormants = formantMultiplier != 1 || (formantCompensation && blockProcess.mappedFrequencies); - if (blockProcess.processFormants) ++blockProcess.steps; blockProcess.timeFactor = didSeek ? seekTimeFactor : stft.defaultInterval()/std::max(1, inputInterval); didSeek = false; @@ -540,6 +539,7 @@ private: processSpectrumSteps += channels; // preliminary phase-vocoder prediction processSpectrumSteps += splitMainPrediction; if (blockProcess.newSpectrum) processSpectrumSteps += 1; // .input -> .prevInput + if (blockProcess.processFormants) processSpectrumSteps += 3; } void processSpectrum(size_t step) { Sample timeFactor = blockProcess.timeFactor; @@ -598,10 +598,11 @@ private: return; } if (blockProcess.processFormants) { - if (step-- == 0) { - updateFormants(0); + if (step < 3) { + updateFormants(step); return; } + step -= 3; } // Preliminary output prediction from phase-vocoder if (step < size_t(channels)) { @@ -836,99 +837,104 @@ private: Sample freqEstimateWeighted = 0; Sample freqEstimateWeight = 0; + Sample estimateFrequency() { + // 3 highest peaks in the input + std::array peakIndices{0, 0, 0}; + for (int b = 1; b < bands - 1; ++b) { + Sample e = formantMetric[b]; + // local maxima only + if (e < formantMetric[b - 1] || e <= formantMetric[b + 1]) continue; + + if (e > formantMetric[peakIndices[0]]) { + if (e > formantMetric[peakIndices[1]]) { + if (e > formantMetric[peakIndices[2]]) { + peakIndices = {peakIndices[1], peakIndices[2], b}; + } else { + peakIndices = {peakIndices[1], b, peakIndices[2]}; + } + } else { + peakIndices[0] = b; + } + } + } + + // VERY rough pitch estimation + int peakEstimate = peakIndices[2]; + if (formantMetric[peakIndices[1]] > formantMetric[peakIndices[2]]*0.1) { + int diff = std::abs(peakEstimate - peakIndices[1]); + if (diff > peakEstimate/8 && diff < peakEstimate*7/8) peakEstimate = peakEstimate%diff; + if (formantMetric[peakIndices[0]] > formantMetric[peakIndices[2]]*0.01) { + int diff = std::abs(peakEstimate - peakIndices[0]); + if (diff > peakEstimate/8 && diff < peakEstimate*7/8) peakEstimate = peakEstimate%diff; + } + } + Sample weight = formantMetric[peakIndices[2]]; + // Smooth it out a bit + freqEstimateWeighted += (peakEstimate*weight - freqEstimateWeighted)*0.25; + freqEstimateWeight += (weight - freqEstimateWeight)*0.25; + + return freqEstimateWeighted/(freqEstimateWeight + Sample(1e-30)); + } + + Sample freqEstimate; std::vector formantMetric; Sample formantBaseFreq = 0; - void updateFormants(size_t) { - return; - for (auto &e : formantMetric) e = 0; - for (int c = 0; c < channels; ++c) { - Band *bins = bandsForChannel(c); - for (int b = 0; b < bands; ++b) { - formantMetric[b] += bins[b].inputEnergy; - } - } - - Sample freqEstimate = freqToBand(formantBaseFreq); - if (formantBaseFreq <= 0) { - // 3 highest peaks in the input - std::array peakIndices{0, 0, 0}; - for (int b = 1; b < bands - 1; ++b) { - Sample e = formantMetric[b]; - // local maxima only - if (e < formantMetric[b - 1] || e <= formantMetric[b + 1]) continue; - - if (e > formantMetric[peakIndices[0]]) { - if (e > formantMetric[peakIndices[1]]) { - if (e > formantMetric[peakIndices[2]]) { - peakIndices = {peakIndices[1], peakIndices[2], b}; - } else { - peakIndices = {peakIndices[1], b, peakIndices[2]}; - } - } else { - peakIndices[0] = b; - } - } - } - - // VERY rough pitch estimation - int peakEstimate = peakIndices[2]; - if (formantMetric[peakIndices[1]] > formantMetric[peakIndices[2]]*0.1) { - int diff = std::abs(peakEstimate - peakIndices[1]); - if (diff > peakEstimate/8 && diff < peakEstimate*7/8) peakEstimate = peakEstimate%diff; - if (formantMetric[peakIndices[0]] > formantMetric[peakIndices[2]]*0.01) { - int diff = std::abs(peakEstimate - peakIndices[0]); - if (diff > peakEstimate/8 && diff < peakEstimate*7/8) peakEstimate = peakEstimate%diff; - } - } - Sample weight = formantMetric[peakIndices[2]]; - // Smooth it out a bit - freqEstimateWeighted += (peakEstimate*weight - freqEstimateWeighted)*0.25; - freqEstimateWeight += (weight - freqEstimateWeight)*0.25; - - freqEstimate = freqEstimateWeighted/(freqEstimateWeight + Sample(1e-30)); - } - - for (int b = 0; b < bands; ++b) { - formantMetric[b] = std::sqrt(std::sqrt(formantMetric[b])); - } - Sample slew = 1/(freqEstimate*0.71 + 1); - Sample e = 0; - for (int repeat = 0; repeat < 1; ++repeat) { - for (int b = bands - 1; b >= 0; --b) { - e += (formantMetric[b] - e)*slew; - formantMetric[b] = e; - } - for (int b = 0; b < bands; ++b) { - e += (formantMetric[b] - e)*slew; - formantMetric[b] = e; - } - } - - auto getFormant = [&](Sample band) -> Sample { - if (band < 0) return 0; - band = std::min(band, bands); - int floorBand = std::floor(band); - Sample fracBand = band - floorBand; - Sample low = formantMetric[floorBand], high = formantMetric[floorBand + 1]; - return low + (high - low)*fracBand; - }; - - for (int b = 0; b < bands; ++b) { - Sample inputF = bandToFreq(b); - Sample outputF = formantCompensation ? mapFreq(inputF) : inputF; - outputF = invMapFormant(outputF); - - Sample inputE = formantMetric[b]; - Sample targetE = getFormant(freqToBand(outputF)); - - Sample formantRatio = targetE/(inputE + Sample(1e-30)); - Sample energyRatio = (formantRatio*formantRatio)*(formantRatio*formantRatio); - + void updateFormants(size_t step) { + if (step-- == 0) { + for (auto &e : formantMetric) e = 0; for (int c = 0; c < channels; ++c) { Band *bins = bandsForChannel(c); - // This is what's used to decide the output energy, so this affects the output - bins[b].inputEnergy *= energyRatio; + for (int b = 0; b < bands; ++b) { + formantMetric[b] += bins[b].inputEnergy; + } + } + + freqEstimate = freqToBand(formantBaseFreq); + if (formantBaseFreq <= 0) freqEstimate = estimateFrequency(); + + for (int b = 0; b < bands; ++b) { + formantMetric[b] = std::sqrt(formantMetric[b]); + } + } else if (step-- == 0) { + Sample slew = 1/(freqEstimate*0.5 + 1); + Sample e = 0; + for (size_t repeat = 0; repeat < 2; ++repeat) { + for (int b = bands - 1; b >= 0; --b) { + e += (formantMetric[b] - e)*slew; + formantMetric[b] = e; + } + for (int b = 0; b < bands; ++b) { + e += (formantMetric[b] - e)*slew; + formantMetric[b] = e; + } + } + } else { + auto getFormant = [&](Sample band) -> Sample { + if (band < 0) return 0; + band = std::min(band, bands); + int floorBand = std::floor(band); + Sample fracBand = band - floorBand; + Sample low = formantMetric[floorBand], high = formantMetric[floorBand + 1]; + return low + (high - low)*fracBand; + }; + + for (int b = 0; b < bands; ++b) { + Sample inputF = bandToFreq(b); + Sample outputF = formantCompensation ? mapFreq(inputF) : inputF; + outputF = invMapFormant(outputF); + + Sample inputE = formantMetric[b]; + Sample targetE = getFormant(freqToBand(outputF)); + + Sample formantRatio = targetE/(inputE + Sample(1e-30)); + Sample energyRatio = formantRatio*formantRatio; + + for (int c = 0; c < channels; ++c) { + Band *bins = bandsForChannel(c); + // This is what's used to decide the output energy, so this affects the output + bins[b].inputEnergy *= energyRatio; + } } } }