586 lines
18 KiB
C++

#include "./common.h"
#ifndef SIGNALSMITH_FFT_V5
#define SIGNALSMITH_FFT_V5
#include "./perf.h"
#include <vector>
#include <complex>
#include <cmath>
namespace signalsmith { namespace fft {
/** @defgroup FFT FFT (complex and real)
@brief Fourier transforms (complex and real)
@{
@file
*/
namespace _fft_impl {
template <typename V>
SIGNALSMITH_INLINE V complexReal(const std::complex<V> &c) {
return ((V*)(&c))[0];
}
template <typename V>
SIGNALSMITH_INLINE V complexImag(const std::complex<V> &c) {
return ((V*)(&c))[1];
}
// Complex multiplication has edge-cases around Inf/NaN - handling those properly makes std::complex non-inlineable, so we use our own
template <bool conjugateSecond=false, typename V>
SIGNALSMITH_INLINE std::complex<V> complexMul(const std::complex<V> &a, const std::complex<V> &b) {
V aReal = complexReal(a), aImag = complexImag(a);
V bReal = complexReal(b), bImag = complexImag(b);
return conjugateSecond ? std::complex<V>{
bReal*aReal + bImag*aImag,
bReal*aImag - bImag*aReal
} : std::complex<V>{
aReal*bReal - aImag*bImag,
aReal*bImag + aImag*bReal
};
}
template<bool flipped, typename V>
SIGNALSMITH_INLINE std::complex<V> complexAddI(const std::complex<V> &a, const std::complex<V> &b) {
V aReal = complexReal(a), aImag = complexImag(a);
V bReal = complexReal(b), bImag = complexImag(b);
return flipped ? std::complex<V>{
aReal + bImag,
aImag - bReal
} : std::complex<V>{
aReal - bImag,
aImag + bReal
};
}
// Use SFINAE to get an iterator from std::begin(), if supported - otherwise assume the value itself is an iterator
template<typename T, typename=void>
struct GetIterator {
static T get(const T &t) {
return t;
}
};
template<typename T>
struct GetIterator<T, decltype((void)std::begin(std::declval<T>()))> {
static auto get(const T &t) -> decltype(std::begin(t)) {
return std::begin(t);
}
};
template<typename T>
SIGNALSMITH_AUTO_RETURN(getIterator(T &&t), GetIterator<T>::get(t))
}
/** Floating-point FFT implementation.
It is fast for 2^a * 3^b.
Here are the peak and RMS errors for `float`/`double` computation:
\diagram{fft-errors.svg Simulated errors for pure-tone harmonic inputs\, compared to a theoretical upper bound from "Roundoff error analysis of the fast Fourier transform" (G. Ramos, 1971)}
*/
template<typename V=double>
class FFT {
using complex = std::complex<V>;
size_t _size;
std::vector<complex> workingVector;
enum class StepType {
generic, step2, step3, step4
};
struct Step {
StepType type;
size_t factor;
size_t startIndex;
size_t innerRepeats;
size_t outerRepeats;
size_t twiddleIndex;
};
std::vector<size_t> factors;
std::vector<Step> plan;
std::vector<complex> twiddleVector;
struct PermutationPair {size_t from, to;};
std::vector<PermutationPair> permutation;
void addPlanSteps(size_t factorIndex, size_t start, size_t length, size_t repeats) {
if (factorIndex >= factors.size()) return;
size_t factor = factors[factorIndex];
if (factorIndex + 1 < factors.size()) {
if (factors[factorIndex] == 2 && factors[factorIndex + 1] == 2) {
++factorIndex;
factor = 4;
}
}
size_t subLength = length/factor;
Step mainStep{StepType::generic, factor, start, subLength, repeats, twiddleVector.size()};
if (factor == 2) mainStep.type = StepType::step2;
if (factor == 3) mainStep.type = StepType::step3;
if (factor == 4) mainStep.type = StepType::step4;
// Twiddles
bool foundStep = false;
for (const Step &existingStep : plan) {
if (existingStep.factor == mainStep.factor && existingStep.innerRepeats == mainStep.innerRepeats) {
foundStep = true;
mainStep.twiddleIndex = existingStep.twiddleIndex;
break;
}
}
if (!foundStep) {
for (size_t i = 0; i < subLength; ++i) {
for (size_t f = 0; f < factor; ++f) {
double phase = 2*M_PI*i*f/length;
complex twiddle = {V(std::cos(phase)), V(-std::sin(phase))};
twiddleVector.push_back(twiddle);
}
}
}
if (repeats == 1 && sizeof(complex)*subLength > 65536) {
for (size_t i = 0; i < factor; ++i) {
addPlanSteps(factorIndex + 1, start + i*subLength, subLength, 1);
}
} else {
addPlanSteps(factorIndex + 1, start, subLength, repeats*factor);
}
plan.push_back(mainStep);
}
void setPlan() {
factors.resize(0);
size_t size = _size, factor = 2;
while (size > 1) {
if (size%factor == 0) {
factors.push_back(factor);
size /= factor;
} else if (factor > sqrt(size)) {
factor = size;
} else {
++factor;
}
}
plan.resize(0);
twiddleVector.resize(0);
addPlanSteps(0, 0, _size, 1);
permutation.resize(0);
permutation.push_back(PermutationPair{0, 0});
size_t indexLow = 0, indexHigh = factors.size();
size_t inputStepLow = _size, outputStepLow = 1;
size_t inputStepHigh = 1, outputStepHigh = _size;
while (outputStepLow*inputStepHigh < _size) {
size_t f, inputStep, outputStep;
if (outputStepLow <= inputStepHigh) {
f = factors[indexLow++];
inputStep = (inputStepLow /= f);
outputStep = outputStepLow;
outputStepLow *= f;
} else {
f = factors[--indexHigh];
inputStep = inputStepHigh;
inputStepHigh *= f;
outputStep = (outputStepHigh /= f);
}
size_t oldSize = permutation.size();
for (size_t i = 1; i < f; ++i) {
for (size_t j = 0; j < oldSize; ++j) {
PermutationPair pair = permutation[j];
pair.from += i*inputStep;
pair.to += i*outputStep;
permutation.push_back(pair);
}
}
}
}
template<typename RandomAccessIterator>
void fftStepGeneric(RandomAccessIterator &&origData, const Step &step) {
complex *working = workingVector.data();
const size_t stride = step.innerRepeats;
for (size_t outerRepeat = 0; outerRepeat < step.outerRepeats; ++outerRepeat) {
RandomAccessIterator data = origData;
const complex *twiddles = twiddleVector.data() + step.twiddleIndex;
const size_t factor = step.factor;
for (size_t repeat = 0; repeat < step.innerRepeats; ++repeat) {
for (size_t i = 0; i < step.factor; ++i) {
working[i] = _fft_impl::complexMul(data[i*stride], twiddles[i]);
}
for (size_t f = 0; f < factor; ++f) {
complex sum = working[0];
for (size_t i = 1; i < factor; ++i) {
double phase = 2*M_PI*f*i/factor;
complex twiddle = {V(std::cos(phase)), V(-std::sin(phase))};
sum += _fft_impl::complexMul(working[i], twiddle);
}
data[f*stride] = sum;
}
++data;
twiddles += factor;
}
origData += step.factor*step.innerRepeats;
}
}
template<typename RandomAccessIterator>
SIGNALSMITH_INLINE void fftStep2(RandomAccessIterator &&origData, const Step &step) {
const size_t stride = step.innerRepeats;
const complex *origTwiddles = twiddleVector.data() + step.twiddleIndex;
for (size_t outerRepeat = 0; outerRepeat < step.outerRepeats; ++outerRepeat) {
const complex* twiddles = origTwiddles;
for (RandomAccessIterator data = origData; data < origData + stride; ++data) {
complex A = data[0];
complex B = _fft_impl::complexMul(data[stride], twiddles[1]);
data[0] = A + B;
data[stride] = A - B;
twiddles += 2;
}
origData += 2*stride;
}
}
template<typename RandomAccessIterator>
SIGNALSMITH_INLINE void fftStep3(RandomAccessIterator &&origData, const Step &step) {
constexpr complex factor3 = {-0.5, -0.8660254037844386};
const size_t stride = step.innerRepeats;
const complex *origTwiddles = twiddleVector.data() + step.twiddleIndex;
for (size_t outerRepeat = 0; outerRepeat < step.outerRepeats; ++outerRepeat) {
const complex* twiddles = origTwiddles;
for (RandomAccessIterator data = origData; data < origData + stride; ++data) {
complex A = data[0];
complex B = _fft_impl::complexMul(data[stride], twiddles[1]);
complex C = _fft_impl::complexMul(data[stride*2], twiddles[2]);
complex realSum = A + (B + C)*factor3.real();
complex imagSum = (B - C)*factor3.imag();
data[0] = A + B + C;
data[stride] = _fft_impl::complexAddI<false>(realSum, imagSum);
data[stride*2] = _fft_impl::complexAddI<true>(realSum, imagSum);
twiddles += 3;
}
origData += 3*stride;
}
}
template<typename RandomAccessIterator>
SIGNALSMITH_INLINE void fftStep4(RandomAccessIterator &&origData, const Step &step) {
const size_t stride = step.innerRepeats;
const complex *origTwiddles = twiddleVector.data() + step.twiddleIndex;
for (size_t outerRepeat = 0; outerRepeat < step.outerRepeats; ++outerRepeat) {
const complex* twiddles = origTwiddles;
for (RandomAccessIterator data = origData; data < origData + stride; ++data) {
complex A = data[0];
complex C = _fft_impl::complexMul(data[stride], twiddles[2]);
complex B = _fft_impl::complexMul(data[stride*2], twiddles[1]);
complex D = _fft_impl::complexMul(data[stride*3], twiddles[3]);
complex sumAC = A + C, sumBD = B + D;
complex diffAC = A - C, diffBD = B - D;
data[0] = sumAC + sumBD;
data[stride] = _fft_impl::complexAddI<true>(diffAC, diffBD);
data[stride*2] = sumAC - sumBD;
data[stride*3] = _fft_impl::complexAddI<false>(diffAC, diffBD);
twiddles += 4;
}
origData += 4*stride;
}
}
template<typename InputIterator, typename OutputIterator>
void permute(bool conjugateFlip, InputIterator input, OutputIterator data) {
data[0] = input[0];
if (conjugateFlip) {
for (size_t i = 1; i < permutation.size(); ++i) {
auto &pair = permutation[i];
data[pair.from] = input[_size - pair.to];
}
} else {
for (size_t i = 1; i < permutation.size(); ++i) {
auto &pair = permutation[i];
data[pair.from] = input[pair.to];
}
}
}
template<typename InputIterator, typename OutputIterator>
struct Task {
FFT &fft;
bool inverse;
InputIterator input;
OutputIterator output;
void operator()(int stepIndex) {
if (stepIndex == 0) {
fft.permute(inverse, input, output);
} else {
auto &step = fft.plan[stepIndex - 1];
switch (step.type) {
case StepType::generic:
fft.fftStepGeneric(output + step.startIndex, step);
break;
case StepType::step2:
fft.fftStep2(output + step.startIndex, step);
break;
case StepType::step3:
fft.fftStep3(output + step.startIndex, step);
break;
case StepType::step4:
fft.fftStep4(output + step.startIndex, step);
break;
}
}
}
};
template<typename InputIterator, typename OutputIterator>
signalsmith::perf::SegmentedTask<Task<InputIterator, OutputIterator>> makeTask(bool inverse, InputIterator &&input, OutputIterator &&output) {
return {{*this, inverse, input, output}, int(plan.size() + 1)};
}
static bool validSize(size_t size) {
constexpr static bool filter[32] = {
1, 1, 1, 1, 1, 0, 1, 0, 1, 1, // 0-9
0, 0, 1, 0, 0, 0, 1, 0, 1, 0, // 10-19
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, // 20-29
0, 0
};
return filter[size];
}
public:
static size_t fastSizeAbove(size_t size) {
size_t power2 = 1;
while (size >= 32) {
size = (size - 1)/2 + 1;
power2 *= 2;
}
while (size < 32 && !validSize(size)) {
++size;
}
return power2*size;
}
static size_t fastSizeBelow(size_t size) {
size_t power2 = 1;
while (size >= 32) {
size /= 2;
power2 *= 2;
}
while (size > 1 && !validSize(size)) {
--size;
}
return power2*size;
}
FFT(size_t size, int fastDirection=0) : _size(0) {
if (fastDirection > 0) size = fastSizeAbove(size);
if (fastDirection < 0) size = fastSizeBelow(size);
this->setSize(size);
}
size_t setSize(size_t size) {
if (size != _size) {
_size = size;
workingVector.resize(size);
setPlan();
}
return _size;
}
size_t setFastSizeAbove(size_t size) {
return setSize(fastSizeAbove(size));
}
size_t setFastSizeBelow(size_t size) {
return setSize(fastSizeBelow(size));
}
const size_t & size() const {
return _size;
}
template<typename Input, typename Output>
void fft(Input &&input, Output &&output) {
return task(false, input, output)(1);
}
template<typename Input, typename Output>
void ifft(Input &&input, Output &&output) {
return task(true, input, output)(1);
}
template<typename Input, typename Output>
SIGNALSMITH_AUTO_RETURN(task(bool inverse, Input &&input, Output &&output),
makeTask(inverse, _fft_impl::getIterator(input), _fft_impl::getIterator(output))
)
};
struct FFTOptions {
static constexpr int halfFreqShift = 1;
};
template<typename V, int optionFlags=0>
class RealFFT {
static constexpr bool modified = (optionFlags&FFTOptions::halfFreqShift);
using complex = std::complex<V>;
std::vector<complex> complexBuffer1, complexBuffer2;
std::vector<complex> twiddlesMinusI;
std::vector<complex> modifiedRotations;
FFT<V> complexFft;
template<typename Input>
void fftPackInput(Input input) {
size_t hSize = complexFft.size();
for (size_t i = 0; i < hSize; ++i) {
if (modified) {
complexBuffer1[i] = _fft_impl::complexMul({input[2*i], input[2*i + 1]}, modifiedRotations[i]);
} else {
complexBuffer1[i] = {input[2*i], input[2*i + 1]};
}
}
}
template<typename Output>
void fftOutputBufferfly(Output output) {
if (!modified) output[0] = {
complexBuffer2[0].real() + complexBuffer2[0].imag(),
complexBuffer2[0].real() - complexBuffer2[0].imag()
};
size_t hSize = complexFft.size();
for (size_t i = modified ? 0 : 1; i <= hSize/2; ++i) {
size_t conjI = modified ? (hSize - 1 - i) : (hSize - i);
complex odd = (complexBuffer2[i] + conj(complexBuffer2[conjI]))*(V)0.5;
complex evenI = (complexBuffer2[i] - conj(complexBuffer2[conjI]))*(V)0.5;
complex evenRotMinusI = _fft_impl::complexMul(evenI, twiddlesMinusI[i]);
output[i] = odd + evenRotMinusI;
output[conjI] = conj(odd - evenRotMinusI);
}
}
template<typename Input>
void ifftInputBufferfly(Input input) {
size_t hSize = complexFft.size();
if (!modified) complexBuffer1[0] = {
input[0].real() + input[0].imag(),
input[0].real() - input[0].imag()
};
for (size_t i = modified ? 0 : 1; i <= hSize/2; ++i) {
size_t conjI = modified ? (hSize - 1 - i) : (hSize - i);
complex v = input[i], v2 = input[conjI];
complex odd = v + conj(v2);
complex evenRotMinusI = v - conj(v2);
complex evenI = _fft_impl::complexMul<true>(evenRotMinusI, twiddlesMinusI[i]);
complexBuffer1[i] = odd + evenI;
complexBuffer1[conjI] = conj(odd - evenI);
}
}
template<typename Output>
void ifftUnpackOutput(Output output) {
size_t hSize = complexFft.size();
for (size_t i = 0; i < hSize; ++i) {
complex v = complexBuffer2[i];
if (modified) v = _fft_impl::complexMul<true>(v, modifiedRotations[i]);
output[2*i] = v.real();
output[2*i + 1] = v.imag();
}
}
public:
static size_t fastSizeAbove(size_t size) {
return FFT<V>::fastSizeAbove((size + 1)/2)*2;
}
static size_t fastSizeBelow(size_t size) {
return FFT<V>::fastSizeBelow(size/2)*2;
}
RealFFT(size_t size=0, int fastDirection=0) : complexFft(0) {
if (fastDirection > 0) size = fastSizeAbove(size);
if (fastDirection < 0) size = fastSizeBelow(size);
this->setSize(std::max<size_t>(size, 2));
}
size_t setSize(size_t size) {
complexBuffer1.resize(size/2);
complexBuffer2.resize(size/2);
size_t hhSize = size/4 + 1;
twiddlesMinusI.resize(hhSize);
for (size_t i = 0; i < hhSize; ++i) {
V rotPhase = -2*M_PI*(modified ? i + 0.5 : i)/size;
twiddlesMinusI[i] = {std::sin(rotPhase), -std::cos(rotPhase)};
}
if (modified) {
modifiedRotations.resize(size/2);
for (size_t i = 0; i < size/2; ++i) {
V rotPhase = -2*M_PI*i/size;
modifiedRotations[i] = {std::cos(rotPhase), std::sin(rotPhase)};
}
}
return complexFft.setSize(size/2);
}
size_t setFastSizeAbove(size_t size) {
return setSize(fastSizeAbove(size));
}
size_t setFastSizeBelow(size_t size) {
return setSize(fastSizeBelow(size));
}
size_t size() const {
return complexFft.size()*2;
}
template<typename Input, typename Output>
SIGNALSMITH_AUTO_RETURN(fftTask(Input &&input, Output &&output),
signalsmith::perf::segmentTask(std::bind(
&RealFFT::fftPackInput<decltype(_fft_impl::getIterator(input))>,
this,
_fft_impl::getIterator(input)
), 1)
.then(complexFft.task(false, complexBuffer1, complexBuffer2))
.then(std::bind(
&RealFFT::fftOutputBufferfly<decltype(_fft_impl::getIterator(output))>,
this,
_fft_impl::getIterator(output)
), 1)
)
template<typename Input, typename Output>
void fft(Input &&input, Output &&output) {
fftTask(std::forward<Input>(input), std::forward<Output>(output))(1);
}
template<typename Input, typename Output>
SIGNALSMITH_AUTO_RETURN(ifftTask(Input &&input, Output &&output),
signalsmith::perf::segmentTask(std::bind(
&RealFFT::ifftInputBufferfly<decltype(_fft_impl::getIterator(input))>,
this,
_fft_impl::getIterator(input)
), 1)
.then(complexFft.task(true, complexBuffer1, complexBuffer2))
.then(std::bind(
&RealFFT::ifftUnpackOutput<decltype(_fft_impl::getIterator(output))>,
this,
_fft_impl::getIterator(output)
), 1)
)
template<typename Input, typename Output>
void ifft(Input &&input, Output &&output) {
ifftTask(std::forward<Input>(input), std::forward<Output>(output))(1);
}
};
template<typename V>
struct ModifiedRealFFT : public RealFFT<V, FFTOptions::halfFreqShift> {
using RealFFT<V, FFTOptions::halfFreqShift>::RealFFT;
};
/// @}
}} // namespace
#endif // include guard