Update DSP library

This commit is contained in:
Geraint 2025-01-29 15:50:30 +00:00
parent 3b4ca143ce
commit 851a478ce0
5 changed files with 109 additions and 233 deletions

View File

@ -28,5 +28,5 @@ clean:
### Example use of CMake ### Example use of CMake
cmake: cmake:
cmake -B build -G Xcode -D CMAKE_RUNTIME_OUTPUT_DIRECTORY=../out cmake -B build -G Xcode
cmake --build build --config Release cmake --build build --config Release

View File

@ -22,7 +22,7 @@ Delay delayLine(1024);
You can add a compile-time version-check to make sure you have a compatible version of the library: You can add a compile-time version-check to make sure you have a compatible version of the library:
```cpp ```cpp
#include "dsp/envelopes.h" #include "dsp/envelopes.h"
SIGNALSMITH_DSP_VERSION_CHECK(1, 6, 0) SIGNALSMITH_DSP_VERSION_CHECK(1, 6, 1)
``` ```
### Development / contributing ### Development / contributing

View File

@ -2,18 +2,13 @@
#define SIGNALSMITH_DSP_COMMON_H #define SIGNALSMITH_DSP_COMMON_H
#if defined(__FAST_MATH__) && (__apple_build_version__ >= 16000000) && (__apple_build_version__ <= 16000099) #if defined(__FAST_MATH__) && (__apple_build_version__ >= 16000000) && (__apple_build_version__ <= 16000099)
# error Apple Clang 16.0.0 is broken, and generates completely incorrect code for some SIMD operations. -ffast-math makes it worse, so if you HAVE to use this version of Clang, you can't enable -ffast-math. # error Apple Clang 16.0.0 generates incorrect SIMD for ARM. If you HAVE to use this version of Clang, turn off -ffast-math.
#endif #endif
#ifndef M_PI #ifndef M_PI
#define M_PI 3.14159265358979323846264338327950288 #define M_PI 3.14159265358979323846264338327950288
#endif #endif
// C++11 doesn't have full auto return types, but this is enough
#ifndef SIGNALSMITH_AUTO_RETURN
# define SIGNALSMITH_AUTO_RETURN(signature, expr) auto signature -> decltype(expr) {return expr;}
#endif
namespace signalsmith { namespace signalsmith {
/** @defgroup Common Common /** @defgroup Common Common
@brief Definitions and helper classes used by the rest of the library @brief Definitions and helper classes used by the rest of the library
@ -24,8 +19,8 @@ namespace signalsmith {
#define SIGNALSMITH_DSP_VERSION_MAJOR 1 #define SIGNALSMITH_DSP_VERSION_MAJOR 1
#define SIGNALSMITH_DSP_VERSION_MINOR 6 #define SIGNALSMITH_DSP_VERSION_MINOR 6
#define SIGNALSMITH_DSP_VERSION_PATCH 0 #define SIGNALSMITH_DSP_VERSION_PATCH 1
#define SIGNALSMITH_DSP_VERSION_STRING "1.6.0" #define SIGNALSMITH_DSP_VERSION_STRING "1.6.1"
/** Version compatability check. /** Version compatability check.
\code{.cpp} \code{.cpp}
@ -48,5 +43,5 @@ namespace signalsmith {
} // signalsmith:: } // signalsmith::
#else #else
// If we've already included it, check it's the same version // If we've already included it, check it's the same version
static_assert(SIGNALSMITH_DSP_VERSION_MAJOR == 1 && SIGNALSMITH_DSP_VERSION_MINOR == 6 && SIGNALSMITH_DSP_VERSION_PATCH == 0, "multiple versions of the Signalsmith DSP library"); static_assert(SIGNALSMITH_DSP_VERSION_MAJOR == 1 && SIGNALSMITH_DSP_VERSION_MINOR == 6 && SIGNALSMITH_DSP_VERSION_PATCH == 1, "multiple versions of the Signalsmith DSP library");
#endif // include guard #endif // include guard

247
dsp/fft.h
View File

@ -29,7 +29,7 @@ namespace signalsmith { namespace fft {
} }
// Complex multiplication has edge-cases around Inf/NaN - handling those properly makes std::complex non-inlineable, so we use our own // Complex multiplication has edge-cases around Inf/NaN - handling those properly makes std::complex non-inlineable, so we use our own
template <bool conjugateSecond=false, typename V> template <bool conjugateSecond, typename V>
SIGNALSMITH_INLINE std::complex<V> complexMul(const std::complex<V> &a, const std::complex<V> &b) { SIGNALSMITH_INLINE std::complex<V> complexMul(const std::complex<V> &a, const std::complex<V> &b) {
V aReal = complexReal(a), aImag = complexImag(a); V aReal = complexReal(a), aImag = complexImag(a);
V bReal = complexReal(b), bImag = complexImag(b); V bReal = complexReal(b), bImag = complexImag(b);
@ -68,8 +68,6 @@ namespace signalsmith { namespace fft {
return std::begin(t); return std::begin(t);
} }
}; };
template<typename T>
SIGNALSMITH_AUTO_RETURN(getIterator(T &&t), GetIterator<T>::get(t))
} }
/** Floating-point FFT implementation. /** Floating-point FFT implementation.
@ -197,7 +195,7 @@ namespace signalsmith { namespace fft {
} }
} }
template<typename RandomAccessIterator> template<bool inverse, typename RandomAccessIterator>
void fftStepGeneric(RandomAccessIterator &&origData, const Step &step) { void fftStepGeneric(RandomAccessIterator &&origData, const Step &step) {
complex *working = workingVector.data(); complex *working = workingVector.data();
const size_t stride = step.innerRepeats; const size_t stride = step.innerRepeats;
@ -209,14 +207,14 @@ namespace signalsmith { namespace fft {
const size_t factor = step.factor; const size_t factor = step.factor;
for (size_t repeat = 0; repeat < step.innerRepeats; ++repeat) { for (size_t repeat = 0; repeat < step.innerRepeats; ++repeat) {
for (size_t i = 0; i < step.factor; ++i) { for (size_t i = 0; i < step.factor; ++i) {
working[i] = _fft_impl::complexMul(data[i*stride], twiddles[i]); working[i] = _fft_impl::complexMul<inverse>(data[i*stride], twiddles[i]);
} }
for (size_t f = 0; f < factor; ++f) { for (size_t f = 0; f < factor; ++f) {
complex sum = working[0]; complex sum = working[0];
for (size_t i = 1; i < factor; ++i) { for (size_t i = 1; i < factor; ++i) {
double phase = 2*M_PI*f*i/factor; double phase = 2*M_PI*f*i/factor;
complex twiddle = {V(std::cos(phase)), V(-std::sin(phase))}; complex twiddle = {V(std::cos(phase)), V(-std::sin(phase))};
sum += _fft_impl::complexMul(working[i], twiddle); sum += _fft_impl::complexMul<inverse>(working[i], twiddle);
} }
data[f*stride] = sum; data[f*stride] = sum;
} }
@ -227,7 +225,7 @@ namespace signalsmith { namespace fft {
} }
} }
template<typename RandomAccessIterator> template<bool inverse, typename RandomAccessIterator>
SIGNALSMITH_INLINE void fftStep2(RandomAccessIterator &&origData, const Step &step) { SIGNALSMITH_INLINE void fftStep2(RandomAccessIterator &&origData, const Step &step) {
const size_t stride = step.innerRepeats; const size_t stride = step.innerRepeats;
const complex *origTwiddles = twiddleVector.data() + step.twiddleIndex; const complex *origTwiddles = twiddleVector.data() + step.twiddleIndex;
@ -235,7 +233,7 @@ namespace signalsmith { namespace fft {
const complex* twiddles = origTwiddles; const complex* twiddles = origTwiddles;
for (RandomAccessIterator data = origData; data < origData + stride; ++data) { for (RandomAccessIterator data = origData; data < origData + stride; ++data) {
complex A = data[0]; complex A = data[0];
complex B = _fft_impl::complexMul(data[stride], twiddles[1]); complex B = _fft_impl::complexMul<inverse>(data[stride], twiddles[1]);
data[0] = A + B; data[0] = A + B;
data[stride] = A - B; data[stride] = A - B;
@ -245,9 +243,9 @@ namespace signalsmith { namespace fft {
} }
} }
template<typename RandomAccessIterator> template<bool inverse, typename RandomAccessIterator>
SIGNALSMITH_INLINE void fftStep3(RandomAccessIterator &&origData, const Step &step) { SIGNALSMITH_INLINE void fftStep3(RandomAccessIterator &&origData, const Step &step) {
constexpr complex factor3 = {-0.5, -0.8660254037844386}; constexpr complex factor3 = {-0.5, inverse ? 0.8660254037844386 : -0.8660254037844386};
const size_t stride = step.innerRepeats; const size_t stride = step.innerRepeats;
const complex *origTwiddles = twiddleVector.data() + step.twiddleIndex; const complex *origTwiddles = twiddleVector.data() + step.twiddleIndex;
@ -255,8 +253,8 @@ namespace signalsmith { namespace fft {
const complex* twiddles = origTwiddles; const complex* twiddles = origTwiddles;
for (RandomAccessIterator data = origData; data < origData + stride; ++data) { for (RandomAccessIterator data = origData; data < origData + stride; ++data) {
complex A = data[0]; complex A = data[0];
complex B = _fft_impl::complexMul(data[stride], twiddles[1]); complex B = _fft_impl::complexMul<inverse>(data[stride], twiddles[1]);
complex C = _fft_impl::complexMul(data[stride*2], twiddles[2]); complex C = _fft_impl::complexMul<inverse>(data[stride*2], twiddles[2]);
complex realSum = A + (B + C)*factor3.real(); complex realSum = A + (B + C)*factor3.real();
complex imagSum = (B - C)*factor3.imag(); complex imagSum = (B - C)*factor3.imag();
@ -271,7 +269,7 @@ namespace signalsmith { namespace fft {
} }
} }
template<typename RandomAccessIterator> template<bool inverse, typename RandomAccessIterator>
SIGNALSMITH_INLINE void fftStep4(RandomAccessIterator &&origData, const Step &step) { SIGNALSMITH_INLINE void fftStep4(RandomAccessIterator &&origData, const Step &step) {
const size_t stride = step.innerRepeats; const size_t stride = step.innerRepeats;
const complex *origTwiddles = twiddleVector.data() + step.twiddleIndex; const complex *origTwiddles = twiddleVector.data() + step.twiddleIndex;
@ -280,17 +278,17 @@ namespace signalsmith { namespace fft {
const complex* twiddles = origTwiddles; const complex* twiddles = origTwiddles;
for (RandomAccessIterator data = origData; data < origData + stride; ++data) { for (RandomAccessIterator data = origData; data < origData + stride; ++data) {
complex A = data[0]; complex A = data[0];
complex C = _fft_impl::complexMul(data[stride], twiddles[2]); complex C = _fft_impl::complexMul<inverse>(data[stride], twiddles[2]);
complex B = _fft_impl::complexMul(data[stride*2], twiddles[1]); complex B = _fft_impl::complexMul<inverse>(data[stride*2], twiddles[1]);
complex D = _fft_impl::complexMul(data[stride*3], twiddles[3]); complex D = _fft_impl::complexMul<inverse>(data[stride*3], twiddles[3]);
complex sumAC = A + C, sumBD = B + D; complex sumAC = A + C, sumBD = B + D;
complex diffAC = A - C, diffBD = B - D; complex diffAC = A - C, diffBD = B - D;
data[0] = sumAC + sumBD; data[0] = sumAC + sumBD;
data[stride] = _fft_impl::complexAddI<true>(diffAC, diffBD); data[stride] = _fft_impl::complexAddI<!inverse>(diffAC, diffBD);
data[stride*2] = sumAC - sumBD; data[stride*2] = sumAC - sumBD;
data[stride*3] = _fft_impl::complexAddI<false>(diffAC, diffBD); data[stride*3] = _fft_impl::complexAddI<inverse>(diffAC, diffBD);
twiddles += 4; twiddles += 4;
} }
@ -299,54 +297,33 @@ namespace signalsmith { namespace fft {
} }
template<typename InputIterator, typename OutputIterator> template<typename InputIterator, typename OutputIterator>
void permute(bool conjugateFlip, InputIterator input, OutputIterator data) { void permute(InputIterator input, OutputIterator data) {
data[0] = input[0]; for (auto pair : permutation) {
if (conjugateFlip) {
for (size_t i = 1; i < permutation.size(); ++i) {
auto &pair = permutation[i];
data[pair.from] = input[_size - pair.to];
}
} else {
for (size_t i = 1; i < permutation.size(); ++i) {
auto &pair = permutation[i];
data[pair.from] = input[pair.to]; data[pair.from] = input[pair.to];
} }
} }
}
template<typename InputIterator, typename OutputIterator> template<bool inverse, typename InputIterator, typename OutputIterator>
struct Task { void run(InputIterator &&input, OutputIterator &&data) {
FFT &fft; permute(input, data);
bool inverse;
InputIterator input;
OutputIterator output;
void operator()(int stepIndex) { for (const Step &step : plan) {
if (stepIndex == 0) {
fft.permute(inverse, input, output);
} else {
auto &step = fft.plan[stepIndex - 1];
switch (step.type) { switch (step.type) {
case StepType::generic: case StepType::generic:
fft.fftStepGeneric(output + step.startIndex, step); fftStepGeneric<inverse>(data + step.startIndex, step);
break; break;
case StepType::step2: case StepType::step2:
fft.fftStep2(output + step.startIndex, step); fftStep2<inverse>(data + step.startIndex, step);
break; break;
case StepType::step3: case StepType::step3:
fft.fftStep3(output + step.startIndex, step); fftStep3<inverse>(data + step.startIndex, step);
break; break;
case StepType::step4: case StepType::step4:
fft.fftStep4(output + step.startIndex, step); fftStep4<inverse>(data + step.startIndex, step);
break; break;
} }
} }
} }
};
template<typename InputIterator, typename OutputIterator>
signalsmith::perf::SegmentedTask<Task<InputIterator, OutputIterator>> makeTask(bool inverse, InputIterator &&input, OutputIterator &&output) {
return {{*this, inverse, input, output}, int(plan.size() + 1)};
}
static bool validSize(size_t size) { static bool validSize(size_t size) {
constexpr static bool filter[32] = { constexpr static bool filter[32] = {
@ -405,20 +382,19 @@ namespace signalsmith { namespace fft {
return _size; return _size;
} }
template<typename Input, typename Output> template<typename InputIterator, typename OutputIterator>
void fft(Input &&input, Output &&output) { void fft(InputIterator &&input, OutputIterator &&output) {
return task(false, input, output)(1); auto inputIter = _fft_impl::GetIterator<InputIterator>::get(input);
auto outputIter = _fft_impl::GetIterator<OutputIterator>::get(output);
return run<false>(inputIter, outputIter);
} }
template<typename Input, typename Output> template<typename InputIterator, typename OutputIterator>
void ifft(Input &&input, Output &&output) { void ifft(InputIterator &&input, OutputIterator &&output) {
return task(true, input, output)(1); auto inputIter = _fft_impl::GetIterator<InputIterator>::get(input);
auto outputIter = _fft_impl::GetIterator<OutputIterator>::get(output);
return run<true>(inputIter, outputIter);
} }
template<typename Input, typename Output>
SIGNALSMITH_AUTO_RETURN(task(bool inverse, Input &&input, Output &&output),
makeTask(inverse, _fft_impl::getIterator(input), _fft_impl::getIterator(output))
)
}; };
struct FFTOptions { struct FFTOptions {
@ -434,66 +410,6 @@ namespace signalsmith { namespace fft {
std::vector<complex> twiddlesMinusI; std::vector<complex> twiddlesMinusI;
std::vector<complex> modifiedRotations; std::vector<complex> modifiedRotations;
FFT<V> complexFft; FFT<V> complexFft;
template<typename Input>
void fftPackInput(Input input) {
size_t hSize = complexFft.size();
for (size_t i = 0; i < hSize; ++i) {
if (modified) {
complexBuffer1[i] = _fft_impl::complexMul({input[2*i], input[2*i + 1]}, modifiedRotations[i]);
} else {
complexBuffer1[i] = {input[2*i], input[2*i + 1]};
}
}
}
template<typename Output>
void fftOutputBufferfly(Output output) {
if (!modified) output[0] = {
complexBuffer2[0].real() + complexBuffer2[0].imag(),
complexBuffer2[0].real() - complexBuffer2[0].imag()
};
size_t hSize = complexFft.size();
for (size_t i = modified ? 0 : 1; i <= hSize/2; ++i) {
size_t conjI = modified ? (hSize - 1 - i) : (hSize - i);
complex odd = (complexBuffer2[i] + conj(complexBuffer2[conjI]))*(V)0.5;
complex evenI = (complexBuffer2[i] - conj(complexBuffer2[conjI]))*(V)0.5;
complex evenRotMinusI = _fft_impl::complexMul(evenI, twiddlesMinusI[i]);
output[i] = odd + evenRotMinusI;
output[conjI] = conj(odd - evenRotMinusI);
}
}
template<typename Input>
void ifftInputBufferfly(Input input) {
size_t hSize = complexFft.size();
if (!modified) complexBuffer1[0] = {
input[0].real() + input[0].imag(),
input[0].real() - input[0].imag()
};
for (size_t i = modified ? 0 : 1; i <= hSize/2; ++i) {
size_t conjI = modified ? (hSize - 1 - i) : (hSize - i);
complex v = input[i], v2 = input[conjI];
complex odd = v + conj(v2);
complex evenRotMinusI = v - conj(v2);
complex evenI = _fft_impl::complexMul<true>(evenRotMinusI, twiddlesMinusI[i]);
complexBuffer1[i] = odd + evenI;
complexBuffer1[conjI] = conj(odd - evenI);
}
}
template<typename Output>
void ifftUnpackOutput(Output output) {
size_t hSize = complexFft.size();
for (size_t i = 0; i < hSize; ++i) {
complex v = complexBuffer2[i];
if (modified) v = _fft_impl::complexMul<true>(v, modifiedRotations[i]);
output[2*i] = v.real();
output[2*i + 1] = v.imag();
}
}
public: public:
static size_t fastSizeAbove(size_t size) { static size_t fastSizeAbove(size_t size) {
return FFT<V>::fastSizeAbove((size + 1)/2)*2; return FFT<V>::fastSizeAbove((size + 1)/2)*2;
@ -526,8 +442,7 @@ namespace signalsmith { namespace fft {
} }
} }
size_t result = complexFft.setSize(size/2); return complexFft.setSize(size/2);
return result*2;
} }
size_t setFastSizeAbove(size_t size) { size_t setFastSizeAbove(size_t size) {
return setSize(fastSizeAbove(size)); return setSize(fastSizeAbove(size));
@ -539,42 +454,62 @@ namespace signalsmith { namespace fft {
return complexFft.size()*2; return complexFft.size()*2;
} }
template<typename Input, typename Output> template<typename InputIterator, typename OutputIterator>
SIGNALSMITH_AUTO_RETURN(fftTask(Input &&input, Output &&output), void fft(InputIterator &&input, OutputIterator &&output) {
signalsmith::perf::segmentTask(std::bind( size_t hSize = complexFft.size();
&RealFFT::fftPackInput<decltype(_fft_impl::getIterator(input))>, for (size_t i = 0; i < hSize; ++i) {
this, if (modified) {
_fft_impl::getIterator(input) complexBuffer1[i] = _fft_impl::complexMul<false>({input[2*i], input[2*i + 1]}, modifiedRotations[i]);
), 1) } else {
.then(complexFft.task(false, complexBuffer1, complexBuffer2)) complexBuffer1[i] = {input[2*i], input[2*i + 1]};
.then(std::bind( }
&RealFFT::fftOutputBufferfly<decltype(_fft_impl::getIterator(output))>,
this,
_fft_impl::getIterator(output)
), 1)
)
template<typename Input, typename Output>
void fft(Input &&input, Output &&output) {
fftTask(std::forward<Input>(input), std::forward<Output>(output))(1);
} }
template<typename Input, typename Output> complexFft.fft(complexBuffer1.data(), complexBuffer2.data());
SIGNALSMITH_AUTO_RETURN(ifftTask(Input &&input, Output &&output),
signalsmith::perf::segmentTask(std::bind( if (!modified) output[0] = {
&RealFFT::ifftInputBufferfly<decltype(_fft_impl::getIterator(input))>, complexBuffer2[0].real() + complexBuffer2[0].imag(),
this, complexBuffer2[0].real() - complexBuffer2[0].imag()
_fft_impl::getIterator(input) };
), 1) for (size_t i = modified ? 0 : 1; i <= hSize/2; ++i) {
.then(complexFft.task(true, complexBuffer1, complexBuffer2)) size_t conjI = modified ? (hSize - 1 - i) : (hSize - i);
.then(std::bind(
&RealFFT::ifftUnpackOutput<decltype(_fft_impl::getIterator(output))>, complex odd = (complexBuffer2[i] + conj(complexBuffer2[conjI]))*(V)0.5;
this, complex evenI = (complexBuffer2[i] - conj(complexBuffer2[conjI]))*(V)0.5;
_fft_impl::getIterator(output) complex evenRotMinusI = _fft_impl::complexMul<false>(evenI, twiddlesMinusI[i]);
), 1)
) output[i] = odd + evenRotMinusI;
template<typename Input, typename Output> output[conjI] = conj(odd - evenRotMinusI);
void ifft(Input &&input, Output &&output) { }
ifftTask(std::forward<Input>(input), std::forward<Output>(output))(1); }
template<typename InputIterator, typename OutputIterator>
void ifft(InputIterator &&input, OutputIterator &&output) {
size_t hSize = complexFft.size();
if (!modified) complexBuffer1[0] = {
input[0].real() + input[0].imag(),
input[0].real() - input[0].imag()
};
for (size_t i = modified ? 0 : 1; i <= hSize/2; ++i) {
size_t conjI = modified ? (hSize - 1 - i) : (hSize - i);
complex v = input[i], v2 = input[conjI];
complex odd = v + conj(v2);
complex evenRotMinusI = v - conj(v2);
complex evenI = _fft_impl::complexMul<true>(evenRotMinusI, twiddlesMinusI[i]);
complexBuffer1[i] = odd + evenI;
complexBuffer1[conjI] = conj(odd - evenI);
}
complexFft.ifft(complexBuffer1.data(), complexBuffer2.data());
for (size_t i = 0; i < hSize; ++i) {
complex v = complexBuffer2[i];
if (modified) v = _fft_impl::complexMul<true>(v, modifiedRotations[i]);
output[2*i] = v.real();
output[2*i + 1] = v.imag();
}
} }
}; };

View File

@ -4,7 +4,6 @@
#define SIGNALSMITH_DSP_PERF_H #define SIGNALSMITH_DSP_PERF_H
#include <complex> #include <complex>
#include <functional>
#if defined(__SSE__) || defined(_M_X64) #if defined(__SSE__) || defined(_M_X64)
# include <xmmintrin.h> # include <xmmintrin.h>
@ -79,59 +78,6 @@ namespace perf {
class StopDenormals {}; // FIXME: add for other architectures class StopDenormals {}; // FIXME: add for other architectures
#endif #endif
/// Packs a "runner" lambda into an object that can be called repeatedly to do work in chunks
template<class BoundFn>
class SegmentedTask {
BoundFn fn;
int steps;
int nextStep = 0;
template<class ThenFn>
struct Then {
int fn1Steps;
BoundFn fn1;
ThenFn fn2;
void operator()(int step) {
if (step < fn1Steps) {
fn1(step);
} else {
fn2(step - fn1Steps);
}
}
};
template<typename Fn2> // all SegmentedTasks are in cahoots
friend class SegmentedTask;
public:
SegmentedTask(BoundFn fn, int steps) : fn(fn), steps(steps) {}
/// Completes the step up to the ratio (0-1)
void operator()(float ratio) {
int endStep = std::round(ratio*steps);
while (nextStep < endStep) {
fn(nextStep++);
}
}
void reset() { // So you can run the task again with the same arguments later
nextStep = 0;
}
template<class Fn>
SegmentedTask<Then<Fn>> then(SegmentedTask<Fn> next) {
return then(next.fn, next.steps);
}
template<class Fn>
SegmentedTask<Then<Fn>> then(Fn nextFn, int nextSteps) {
return {{steps, fn, nextFn}, steps + nextSteps};
}
};
template<class BoundFn>
auto segmentTask(BoundFn fn, int steps) -> SegmentedTask<BoundFn> {
return {fn, steps};
}
/** @} */ /** @} */
}} // signalsmith::perf:: }} // signalsmith::perf::