Refactor, split formants into 3 computation steps

This commit is contained in:
Geraint 2025-04-18 21:03:15 +01:00
parent 004a52b30d
commit b84e9cf5e9
3 changed files with 99 additions and 91 deletions

View File

@ -32,6 +32,7 @@ dev: out/stretch
out/stretch --time=0.8 --semitones=10 --formant-comp $(TEST_WAV) out/shift-fc.wav out/stretch --time=0.8 --semitones=10 --formant-comp $(TEST_WAV) out/shift-fc.wav
out/stretch --time=0.8 --semitones=10 --formant-comp --formant=3 $(TEST_WAV) out/shift-fc-f3.wav out/stretch --time=0.8 --semitones=10 --formant-comp --formant=3 $(TEST_WAV) out/shift-fc-f3.wav
out/stretch --time=0.8 --semitones=10 --formant-comp --formant=3 --formant-base=500 $(TEST_WAV) out/shift-fc-f3-fb500.wav out/stretch --time=0.8 --semitones=10 --formant-comp --formant=3 --formant-base=500 $(TEST_WAV) out/shift-fc-f3-fb500.wav
out/stretch --time=0.8 --semitones=10 --formant-comp --formant=2 --formant-base=100 $(TEST_WAV) out/shift-fc-f2-fb100.wav
clean: clean:
rm -rf out rm -rf out

View File

@ -2,6 +2,7 @@
#include <iostream> #include <iostream>
#define LOG_EXPR(expr) std::cout << #expr << " = " << (expr) << "\n"; #define LOG_EXPR(expr) std::cout << #expr << " = " << (expr) << "\n";
#define PROFILE_PLOT_CHUNKS
#ifdef PROFILE_PLOT_CHUNKS #ifdef PROFILE_PLOT_CHUNKS
size_t activeStepIndex = 0; size_t activeStepIndex = 0;
void profileProcessStart(int, int); void profileProcessStart(int, int);

View File

@ -257,7 +257,6 @@ struct SignalsmithStretch {
} }
blockProcess.processFormants = formantMultiplier != 1 || (formantCompensation && blockProcess.mappedFrequencies); blockProcess.processFormants = formantMultiplier != 1 || (formantCompensation && blockProcess.mappedFrequencies);
if (blockProcess.processFormants) ++blockProcess.steps;
blockProcess.timeFactor = didSeek ? seekTimeFactor : stft.defaultInterval()/std::max<Sample>(1, inputInterval); blockProcess.timeFactor = didSeek ? seekTimeFactor : stft.defaultInterval()/std::max<Sample>(1, inputInterval);
didSeek = false; didSeek = false;
@ -540,6 +539,7 @@ private:
processSpectrumSteps += channels; // preliminary phase-vocoder prediction processSpectrumSteps += channels; // preliminary phase-vocoder prediction
processSpectrumSteps += splitMainPrediction; processSpectrumSteps += splitMainPrediction;
if (blockProcess.newSpectrum) processSpectrumSteps += 1; // .input -> .prevInput if (blockProcess.newSpectrum) processSpectrumSteps += 1; // .input -> .prevInput
if (blockProcess.processFormants) processSpectrumSteps += 3;
} }
void processSpectrum(size_t step) { void processSpectrum(size_t step) {
Sample timeFactor = blockProcess.timeFactor; Sample timeFactor = blockProcess.timeFactor;
@ -598,10 +598,11 @@ private:
return; return;
} }
if (blockProcess.processFormants) { if (blockProcess.processFormants) {
if (step-- == 0) { if (step < 3) {
updateFormants(0); updateFormants(step);
return; return;
} }
step -= 3;
} }
// Preliminary output prediction from phase-vocoder // Preliminary output prediction from phase-vocoder
if (step < size_t(channels)) { if (step < size_t(channels)) {
@ -836,21 +837,7 @@ private:
Sample freqEstimateWeighted = 0; Sample freqEstimateWeighted = 0;
Sample freqEstimateWeight = 0; Sample freqEstimateWeight = 0;
Sample estimateFrequency() {
std::vector<Sample> formantMetric;
Sample formantBaseFreq = 0;
void updateFormants(size_t) {
return;
for (auto &e : formantMetric) e = 0;
for (int c = 0; c < channels; ++c) {
Band *bins = bandsForChannel(c);
for (int b = 0; b < bands; ++b) {
formantMetric[b] += bins[b].inputEnergy;
}
}
Sample freqEstimate = freqToBand(formantBaseFreq);
if (formantBaseFreq <= 0) {
// 3 highest peaks in the input // 3 highest peaks in the input
std::array<int, 3> peakIndices{0, 0, 0}; std::array<int, 3> peakIndices{0, 0, 0};
for (int b = 1; b < bands - 1; ++b) { for (int b = 1; b < bands - 1; ++b) {
@ -886,15 +873,33 @@ private:
freqEstimateWeighted += (peakEstimate*weight - freqEstimateWeighted)*0.25; freqEstimateWeighted += (peakEstimate*weight - freqEstimateWeighted)*0.25;
freqEstimateWeight += (weight - freqEstimateWeight)*0.25; freqEstimateWeight += (weight - freqEstimateWeight)*0.25;
freqEstimate = freqEstimateWeighted/(freqEstimateWeight + Sample(1e-30)); return freqEstimateWeighted/(freqEstimateWeight + Sample(1e-30));
} }
Sample freqEstimate;
std::vector<Sample> formantMetric;
Sample formantBaseFreq = 0;
void updateFormants(size_t step) {
if (step-- == 0) {
for (auto &e : formantMetric) e = 0;
for (int c = 0; c < channels; ++c) {
Band *bins = bandsForChannel(c);
for (int b = 0; b < bands; ++b) { for (int b = 0; b < bands; ++b) {
formantMetric[b] = std::sqrt(std::sqrt(formantMetric[b])); formantMetric[b] += bins[b].inputEnergy;
} }
Sample slew = 1/(freqEstimate*0.71 + 1); }
freqEstimate = freqToBand(formantBaseFreq);
if (formantBaseFreq <= 0) freqEstimate = estimateFrequency();
for (int b = 0; b < bands; ++b) {
formantMetric[b] = std::sqrt(formantMetric[b]);
}
} else if (step-- == 0) {
Sample slew = 1/(freqEstimate*0.5 + 1);
Sample e = 0; Sample e = 0;
for (int repeat = 0; repeat < 1; ++repeat) { for (size_t repeat = 0; repeat < 2; ++repeat) {
for (int b = bands - 1; b >= 0; --b) { for (int b = bands - 1; b >= 0; --b) {
e += (formantMetric[b] - e)*slew; e += (formantMetric[b] - e)*slew;
formantMetric[b] = e; formantMetric[b] = e;
@ -904,7 +909,7 @@ private:
formantMetric[b] = e; formantMetric[b] = e;
} }
} }
} else {
auto getFormant = [&](Sample band) -> Sample { auto getFormant = [&](Sample band) -> Sample {
if (band < 0) return 0; if (band < 0) return 0;
band = std::min<Sample>(band, bands); band = std::min<Sample>(band, bands);
@ -923,7 +928,7 @@ private:
Sample targetE = getFormant(freqToBand(outputF)); Sample targetE = getFormant(freqToBand(outputF));
Sample formantRatio = targetE/(inputE + Sample(1e-30)); Sample formantRatio = targetE/(inputE + Sample(1e-30));
Sample energyRatio = (formantRatio*formantRatio)*(formantRatio*formantRatio); Sample energyRatio = formantRatio*formantRatio;
for (int c = 0; c < channels; ++c) { for (int c = 0; c < channels; ++c) {
Band *bins = bandsForChannel(c); Band *bins = bandsForChannel(c);
@ -932,6 +937,7 @@ private:
} }
} }
} }
}
}; };
}} // namespace }} // namespace