497 lines
16 KiB
C++
497 lines
16 KiB
C++
#include "./common.h"
|
|
|
|
#ifndef SIGNALSMITH_DSP_SPECTRAL_H
|
|
#define SIGNALSMITH_DSP_SPECTRAL_H
|
|
|
|
#include "./perf.h"
|
|
#include "./fft.h"
|
|
#include "./windows.h"
|
|
#include "./delay.h"
|
|
|
|
#include <cmath>
|
|
|
|
namespace signalsmith {
|
|
namespace spectral {
|
|
/** @defgroup Spectral Spectral Processing
|
|
@brief Tools for frequency-domain manipulation of audio signals
|
|
|
|
@{
|
|
@file
|
|
*/
|
|
|
|
/** @brief An FFT with built-in windowing and round-trip scaling
|
|
|
|
This uses a Modified Real FFT, which applies half-bin shift before the transform. The result therefore has `N/2` bins, centred at the frequencies: `(i + 0.5)/N`.
|
|
|
|
This avoids the awkward (real-valued) bands for DC-offset and Nyquist.
|
|
*/
|
|
template<typename Sample>
|
|
class WindowedFFT {
|
|
using MRFFT = signalsmith::fft::ModifiedRealFFT<Sample>;
|
|
using Complex = std::complex<Sample>;
|
|
MRFFT mrfft{2};
|
|
|
|
std::vector<Sample> fftWindow;
|
|
std::vector<Sample> timeBuffer;
|
|
int offsetSamples = 0;
|
|
public:
|
|
/// Returns a fast FFT size <= `size`
|
|
static int fastSizeAbove(int size, int divisor=1) {
|
|
return MRFFT::fastSizeAbove(size/divisor)*divisor;
|
|
}
|
|
/// Returns a fast FFT size >= `size`
|
|
static int fastSizeBelow(int size, int divisor=1) {
|
|
return MRFFT::fastSizeBelow(1 + (size - 1)/divisor)*divisor;
|
|
}
|
|
|
|
WindowedFFT() {}
|
|
WindowedFFT(int size, int rotateSamples=0) {
|
|
setSize(size, rotateSamples);
|
|
}
|
|
template<class WindowFn>
|
|
WindowedFFT(int size, WindowFn fn, Sample windowOffset=0.5, int rotateSamples=0) {
|
|
setSize(size, fn, windowOffset, rotateSamples);
|
|
}
|
|
|
|
/// Sets the size, returning the window for modification (initially all 1s)
|
|
std::vector<Sample> & setSizeWindow(int size, int rotateSamples=0) {
|
|
mrfft.setSize(size);
|
|
fftWindow.assign(size, 1);
|
|
timeBuffer.resize(size);
|
|
offsetSamples = rotateSamples;
|
|
if (offsetSamples < 0) offsetSamples += size; // TODO: for a negative rotation, the other half of the result is inverted
|
|
return fftWindow;
|
|
}
|
|
/// Sets the FFT size, with a user-defined functor for the window
|
|
template<class WindowFn>
|
|
void setSize(int size, WindowFn fn, Sample windowOffset=0.5, int rotateSamples=0) {
|
|
setSizeWindow(size, rotateSamples);
|
|
|
|
Sample invSize = 1/(Sample)size;
|
|
for (int i = 0; i < size; ++i) {
|
|
Sample r = (i + windowOffset)*invSize;
|
|
fftWindow[i] = fn(r);
|
|
}
|
|
}
|
|
/// Sets the size (using the default Blackman-Harris window)
|
|
void setSize(int size, int rotateSamples=0) {
|
|
setSize(size, [](double x) {
|
|
double phase = 2*M_PI*x;
|
|
// Blackman-Harris
|
|
return 0.35875 - 0.48829*std::cos(phase) + 0.14128*std::cos(phase*2) - 0.01168*std::cos(phase*3);
|
|
}, Sample(0.5), rotateSamples);
|
|
}
|
|
|
|
const std::vector<Sample> & window() const {
|
|
return this->fftWindow;
|
|
}
|
|
int size() const {
|
|
return mrfft.size();
|
|
}
|
|
|
|
/// Performs an FFT, with windowing and rotation (if enabled)
|
|
template<bool withWindow=true, bool withScaling=false, class Input, class Output>
|
|
void fft(Input &&input, Output &&output) {
|
|
int fftSize = size();
|
|
const Sample norm = (withScaling ? 1/(Sample)fftSize : 1);
|
|
for (int i = 0; i < offsetSamples; ++i) {
|
|
// Inverted polarity since we're using the MRFFT
|
|
timeBuffer[i + fftSize - offsetSamples] = -input[i]*norm*(withWindow ? fftWindow[i] : Sample(1));
|
|
}
|
|
for (int i = offsetSamples; i < fftSize; ++i) {
|
|
timeBuffer[i - offsetSamples] = input[i]*norm*(withWindow ? fftWindow[i] : Sample(1));
|
|
}
|
|
mrfft.fft(timeBuffer, output);
|
|
}
|
|
/// Performs an FFT (no windowing or rotation)
|
|
template<class Input, class Output>
|
|
void fftRaw(Input &&input, Output &&output) {
|
|
mrfft.fft(input, output);
|
|
}
|
|
|
|
/// Inverse FFT, with windowing, 1/N scaling and rotation (if enabled)
|
|
template<bool withWindow=true, bool withScaling=true, class Input, class Output>
|
|
void ifft(Input &&input, Output &&output) {
|
|
mrfft.ifft(input, timeBuffer);
|
|
int fftSize = mrfft.size();
|
|
const Sample norm = (withScaling ? 1/(Sample)fftSize : 1);
|
|
|
|
for (int i = 0; i < offsetSamples; ++i) {
|
|
// Inverted polarity since we're using the MRFFT
|
|
output[i] = -timeBuffer[i + fftSize - offsetSamples]*norm*(withWindow ? fftWindow[i] : Sample(1));
|
|
}
|
|
for (int i = offsetSamples; i < fftSize; ++i) {
|
|
output[i] = timeBuffer[i - offsetSamples]*norm*(withWindow ? fftWindow[i] : Sample(1));
|
|
}
|
|
}
|
|
/// Performs an IFFT (no windowing, scaling or rotation)
|
|
template<class Input, class Output>
|
|
void ifftRaw(Input &&input, Output &&output) {
|
|
mrfft.ifft(input, output);
|
|
}
|
|
};
|
|
|
|
/** STFT synthesis, built on a `MultiBuffer`.
|
|
|
|
Any window length and block interval is supported, but the FFT size may be rounded up to a faster size (by zero-padding). It uses a heuristically-optimal Kaiser window modified for perfect-reconstruction.
|
|
|
|
\diagram{stft-aliasing-simulated.svg,Simulated bad-case aliasing (random phase-shift for each band) for overlapping ratios}
|
|
|
|
There is a "latest valid index", and you can read the output up to one `historyLength` behind this (see `.resize()`). You can read up to one window-length _ahead_ to get partially-summed future output.
|
|
|
|
\diagram{stft-buffer-validity.svg}
|
|
|
|
You move the valid index along using `.ensureValid()`, passing in a functor which provides spectra (using `.analyse()` and/or direct modification through `.spectrum[c]`):
|
|
|
|
\code
|
|
void processSample(...) {
|
|
stft.ensureValid([&](int) {
|
|
// Here, we introduce (1 - windowSize) of latency
|
|
stft.analyse(inputBuffer.view(1 - windowSize))
|
|
});
|
|
// read as a MultiBuffer
|
|
auto result = stft.at(0);
|
|
++stft; // also moves the latest valid index
|
|
}
|
|
|
|
void processBlock(...) {
|
|
// assuming `historyLength` == blockSize
|
|
stft.ensureValid(blockSize, [&](int blockStartIndex) {
|
|
int inputStart = blockStartIndex + (1 - windowSize);
|
|
stft.analyse(inputBuffer.view(inputStart));
|
|
});
|
|
auto earliestValid = stft.at(0);
|
|
auto latestValid = stft.at(blockSize);
|
|
stft += blockSize;
|
|
}
|
|
\endcode
|
|
|
|
The index passed to this functor will be greater than the previous valid index, and `<=` the index you pass in. Therefore, if you call `.ensureValid()` every sample, it can only ever be `0`.
|
|
*/
|
|
template<typename Sample>
|
|
class STFT : public signalsmith::delay::MultiBuffer<Sample> {
|
|
|
|
using Super = signalsmith::delay::MultiBuffer<Sample>;
|
|
using Complex = std::complex<Sample>;
|
|
|
|
int channels = 0, _windowSize = 0, _fftSize = 0, _interval = 1;
|
|
int validUntilIndex = 0;
|
|
|
|
class MultiSpectrum {
|
|
int channels, stride;
|
|
std::vector<Complex> buffer;
|
|
public:
|
|
MultiSpectrum() : MultiSpectrum(0, 0) {}
|
|
MultiSpectrum(int channels, int bands) : channels(channels), stride(bands), buffer(channels*bands, 0) {}
|
|
|
|
void resize(int nChannels, int nBands) {
|
|
channels = nChannels;
|
|
stride = nBands;
|
|
buffer.assign(channels*stride, 0);
|
|
}
|
|
|
|
void reset() {
|
|
buffer.assign(buffer.size(), 0);
|
|
}
|
|
|
|
void swap(MultiSpectrum &other) {
|
|
using std::swap;
|
|
swap(buffer, other.buffer);
|
|
}
|
|
|
|
Complex * operator [](int channel) {
|
|
return buffer.data() + channel*stride;
|
|
}
|
|
const Complex * operator [](int channel) const {
|
|
return buffer.data() + channel*stride;
|
|
}
|
|
};
|
|
std::vector<Sample> timeBuffer;
|
|
|
|
bool rotate = false;
|
|
void resizeInternal(int newChannels, int windowSize, int newInterval, int historyLength, int zeroPadding) {
|
|
Super::resize(newChannels,
|
|
windowSize /* for output summing */
|
|
+ newInterval /* so we can read `windowSize` ahead (we'll be at most `interval-1` from the most recent block */
|
|
+ historyLength);
|
|
|
|
int fftSize = fft.fastSizeAbove(windowSize + zeroPadding);
|
|
|
|
this->channels = newChannels;
|
|
_windowSize = windowSize;
|
|
this->_fftSize = fftSize;
|
|
this->_interval = newInterval;
|
|
validUntilIndex = -1;
|
|
|
|
setWindow(windowShape, rotate);
|
|
|
|
spectrum.resize(channels, fftSize/2);
|
|
timeBuffer.resize(fftSize);
|
|
}
|
|
public:
|
|
enum class Window {kaiser, acg};
|
|
/// \deprecated use `.setWindow()` which actually updates the window when you change it
|
|
Window windowShape = Window::kaiser;
|
|
// for convenience
|
|
static constexpr Window kaiser = Window::kaiser;
|
|
static constexpr Window acg = Window::acg;
|
|
|
|
/** Swaps between the default (Kaiser) shape and Approximate Confined Gaussian (ACG).
|
|
\diagram{stft-windows.svg,Default (Kaiser) windows and partial cumulative sum}
|
|
The ACG has better rolloff since its edges go to 0:
|
|
\diagram{stft-windows-acg.svg,ACG windows and partial cumulative sum}
|
|
However, it generally has worse performance in terms of total sidelobe energy, affecting worst-case aliasing levels for (most) higher overlap ratios:
|
|
\diagram{stft-aliasing-simulated-acg.svg,Simulated bad-case aliasing for ACG windows - compare with above}*/
|
|
// TODO: these should both be set before resize()
|
|
void setWindow(Window shape, bool rotateToZero=false) {
|
|
windowShape = shape;
|
|
rotate = rotateToZero;
|
|
|
|
auto &window = fft.setSizeWindow(_fftSize, rotateToZero ? _windowSize/2 : 0);
|
|
if (windowShape == Window::kaiser) {
|
|
using Kaiser = ::signalsmith::windows::Kaiser;
|
|
/// Roughly optimal Kaiser for STFT analysis (forced to perfect reconstruction)
|
|
auto kaiser = Kaiser::withBandwidth(_windowSize/double(_interval), true);
|
|
kaiser.fill(window, _windowSize);
|
|
} else {
|
|
using Confined = ::signalsmith::windows::ApproximateConfinedGaussian;
|
|
auto confined = Confined::withBandwidth(_windowSize/double(_interval));
|
|
confined.fill(window, _windowSize);
|
|
}
|
|
::signalsmith::windows::forcePerfectReconstruction(window, _windowSize, _interval);
|
|
|
|
// TODO: fill extra bits of an input buffer with NaN/Infinity, to break this, and then fix by adding zero-padding to WindowedFFT (as opposed to zero-valued window sections)
|
|
for (int i = _windowSize; i < _fftSize; ++i) {
|
|
window[i] = 0;
|
|
}
|
|
}
|
|
|
|
using Spectrum = MultiSpectrum;
|
|
Spectrum spectrum;
|
|
WindowedFFT<Sample> fft;
|
|
|
|
STFT() {}
|
|
/// Parameters passed straight to `.resize()`
|
|
STFT(int channels, int windowSize, int interval, int historyLength=0, int zeroPadding=0) {
|
|
resize(channels, windowSize, interval, historyLength, zeroPadding);
|
|
}
|
|
|
|
/// Sets the channel-count, FFT size and interval.
|
|
void resize(int nChannels, int windowSize, int interval, int historyLength=0, int zeroPadding=0) {
|
|
resizeInternal(nChannels, windowSize, interval, historyLength, zeroPadding);
|
|
}
|
|
|
|
int windowSize() const {
|
|
return _windowSize;
|
|
}
|
|
int fftSize() const {
|
|
return _fftSize;
|
|
}
|
|
int interval() const {
|
|
return _interval;
|
|
}
|
|
/// Returns the (analysis and synthesis) window
|
|
decltype(fft.window()) window() const {
|
|
return fft.window();
|
|
}
|
|
/// Calculates the effective window for the partially-summed future output (relative to the most recent block)
|
|
std::vector<Sample> partialSumWindow(bool includeLatestBlock=true) const {
|
|
const auto &w = window();
|
|
std::vector<Sample> result(_windowSize, 0);
|
|
int firstOffset = (includeLatestBlock ? 0 : _interval);
|
|
for (int offset = firstOffset; offset < _windowSize; offset += _interval) {
|
|
for (int i = 0; i < _windowSize - offset; ++i) {
|
|
Sample value = w[i + offset];
|
|
result[i] += value*value;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/// Resets everything - since we clear the output sum, it will take `windowSize` samples to get proper output.
|
|
void reset() {
|
|
Super::reset();
|
|
spectrum.reset();
|
|
validUntilIndex = -1;
|
|
}
|
|
|
|
/** Generates valid output up to the specified index (or 0), using the callback as many times as needed.
|
|
|
|
The callback should be a functor accepting a single integer argument, which is the index for which a spectrum is required.
|
|
|
|
The block created from these spectra will start at this index in the output, plus `.latency()`.
|
|
*/
|
|
template<class AnalysisFn>
|
|
void ensureValid(int i, AnalysisFn fn) {
|
|
while (validUntilIndex < i) {
|
|
int blockIndex = validUntilIndex + 1;
|
|
fn(blockIndex);
|
|
|
|
auto output = this->view(blockIndex);
|
|
for (int c = 0; c < channels; ++c) {
|
|
auto channel = output[c];
|
|
|
|
// Clear out the future sum, a window-length and an interval ahead
|
|
for (int wi = _windowSize; wi < _windowSize + _interval; ++wi) {
|
|
channel[wi] = 0;
|
|
}
|
|
|
|
// Add in the IFFT'd result
|
|
fft.ifft(spectrum[c], timeBuffer);
|
|
for (int wi = 0; wi < _windowSize; ++wi) {
|
|
channel[wi] += timeBuffer[wi];
|
|
}
|
|
}
|
|
validUntilIndex += _interval;
|
|
}
|
|
}
|
|
/// The same as above, assuming index 0
|
|
template<class AnalysisFn>
|
|
void ensureValid(AnalysisFn fn) {
|
|
return ensureValid(0, fn);
|
|
}
|
|
/// Returns the next invalid index (a.k.a. the index of the next block)
|
|
int nextInvalid() const {
|
|
return validUntilIndex + 1;
|
|
}
|
|
|
|
/** Analyse a multi-channel input, for any type where `data[channel][index]` returns samples
|
|
|
|
Results can be read/edited using `.spectrum`. */
|
|
template<class Data>
|
|
void analyse(Data &&data) {
|
|
for (int c = 0; c < channels; ++c) {
|
|
fft.fft(data[c], spectrum[c]);
|
|
}
|
|
}
|
|
template<class Data>
|
|
void analyse(int c, Data &&data) {
|
|
fft.fft(data, spectrum[c]);
|
|
}
|
|
/// Analyse without windowing or zero-rotation
|
|
template<class Data>
|
|
void analyseRaw(Data &&data) {
|
|
for (int c = 0; c < channels; ++c) {
|
|
fft.fftRaw(data[c], spectrum[c]);
|
|
}
|
|
}
|
|
template<class Data>
|
|
void analyseRaw(int c, Data &&data) {
|
|
fft.fftRaw(data, spectrum[c]);
|
|
}
|
|
|
|
int bands() const {
|
|
return _fftSize/2;
|
|
}
|
|
|
|
/** Internal latency (between the block-index requested in `.ensureValid()` and its position in the output)
|
|
|
|
Currently unused, but it's in here to allow for a future implementation which spreads the FFT calculations out across each interval.*/
|
|
int latency() {
|
|
return 0;
|
|
}
|
|
|
|
// @name Shift the underlying buffer (moving the "valid" index accordingly)
|
|
// @{
|
|
STFT & operator ++() {
|
|
Super::operator ++();
|
|
validUntilIndex--;
|
|
return *this;
|
|
}
|
|
STFT & operator +=(int i) {
|
|
Super::operator +=(i);
|
|
validUntilIndex -= i;
|
|
return *this;
|
|
}
|
|
STFT & operator --() {
|
|
Super::operator --();
|
|
validUntilIndex++;
|
|
return *this;
|
|
}
|
|
STFT & operator -=(int i) {
|
|
Super::operator -=(i);
|
|
validUntilIndex += i;
|
|
return *this;
|
|
}
|
|
// @}
|
|
|
|
typename Super::MutableView operator ++(int postIncrement) {
|
|
auto result = Super::operator ++(postIncrement);
|
|
validUntilIndex--;
|
|
return result;
|
|
}
|
|
typename Super::MutableView operator --(int postIncrement) {
|
|
auto result = Super::operator --(postIncrement);
|
|
validUntilIndex++;
|
|
return result;
|
|
}
|
|
};
|
|
|
|
/** STFT processing, with input/output.
|
|
Before calling `.ensureValid(index)`, you should make sure the input is filled up to `index`.
|
|
*/
|
|
template<typename Sample>
|
|
class ProcessSTFT : public STFT<Sample> {
|
|
using Super = STFT<Sample>;
|
|
public:
|
|
signalsmith::delay::MultiBuffer<Sample> input;
|
|
|
|
ProcessSTFT(int inChannels, int outChannels, int windowSize, int interval, int historyLength=0) {
|
|
resize(inChannels, outChannels, windowSize, interval, historyLength);
|
|
}
|
|
|
|
/** Alter the spectrum, using input up to this point, for the output block starting from this point.
|
|
Sub-classes should replace this with whatever processing is desired. */
|
|
virtual void processSpectrum(int /*blockIndex*/) {}
|
|
|
|
/// Sets the input/output channels, FFT size and interval.
|
|
void resize(int inChannels, int outChannels, int windowSize, int interval, int historyLength=0) {
|
|
Super::resize(outChannels, windowSize, interval, historyLength);
|
|
input.resize(inChannels, windowSize + interval + historyLength);
|
|
}
|
|
void reset(Sample value=Sample()) {
|
|
Super::reset(value);
|
|
input.reset(value);
|
|
}
|
|
|
|
/// Internal latency, including buffering samples for analysis.
|
|
int latency() {
|
|
return Super::latency() + (this->windowSize() - 1);
|
|
}
|
|
|
|
void ensureValid(int i=0) {
|
|
Super::ensureValid(i, [&](int blockIndex) {
|
|
this->analyse(input.view(blockIndex - this->windowSize() + 1));
|
|
this->processSpectrum(blockIndex);
|
|
});
|
|
}
|
|
|
|
// @name Shift the output, input, and valid index.
|
|
// @{
|
|
ProcessSTFT & operator ++() {
|
|
Super::operator ++();
|
|
++input;
|
|
return *this;
|
|
}
|
|
ProcessSTFT & operator +=(int i) {
|
|
Super::operator +=(i);
|
|
input += i;
|
|
return *this;
|
|
}
|
|
ProcessSTFT & operator --() {
|
|
Super::operator --();
|
|
--input;
|
|
return *this;
|
|
}
|
|
ProcessSTFT & operator -=(int i) {
|
|
Super::operator -=(i);
|
|
input -= i;
|
|
return *this;
|
|
}
|
|
// @}
|
|
};
|
|
|
|
/** @} */
|
|
}} // signalsmith::spectral::
|
|
#endif // include guard
|