Update DSP library

This commit is contained in:
Geraint 2025-01-29 15:50:30 +00:00
parent 3b4ca143ce
commit 851a478ce0
5 changed files with 109 additions and 233 deletions

View File

@ -28,5 +28,5 @@ clean:
### Example use of CMake
cmake:
cmake -B build -G Xcode -D CMAKE_RUNTIME_OUTPUT_DIRECTORY=../out
cmake -B build -G Xcode
cmake --build build --config Release

View File

@ -22,7 +22,7 @@ Delay delayLine(1024);
You can add a compile-time version-check to make sure you have a compatible version of the library:
```cpp
#include "dsp/envelopes.h"
SIGNALSMITH_DSP_VERSION_CHECK(1, 6, 0)
SIGNALSMITH_DSP_VERSION_CHECK(1, 6, 1)
```
### Development / contributing

View File

@ -2,18 +2,13 @@
#define SIGNALSMITH_DSP_COMMON_H
#if defined(__FAST_MATH__) && (__apple_build_version__ >= 16000000) && (__apple_build_version__ <= 16000099)
# error Apple Clang 16.0.0 is broken, and generates completely incorrect code for some SIMD operations. -ffast-math makes it worse, so if you HAVE to use this version of Clang, you can't enable -ffast-math.
# error Apple Clang 16.0.0 generates incorrect SIMD for ARM. If you HAVE to use this version of Clang, turn off -ffast-math.
#endif
#ifndef M_PI
#define M_PI 3.14159265358979323846264338327950288
#endif
// C++11 doesn't have full auto return types, but this is enough
#ifndef SIGNALSMITH_AUTO_RETURN
# define SIGNALSMITH_AUTO_RETURN(signature, expr) auto signature -> decltype(expr) {return expr;}
#endif
namespace signalsmith {
/** @defgroup Common Common
@brief Definitions and helper classes used by the rest of the library
@ -24,8 +19,8 @@ namespace signalsmith {
#define SIGNALSMITH_DSP_VERSION_MAJOR 1
#define SIGNALSMITH_DSP_VERSION_MINOR 6
#define SIGNALSMITH_DSP_VERSION_PATCH 0
#define SIGNALSMITH_DSP_VERSION_STRING "1.6.0"
#define SIGNALSMITH_DSP_VERSION_PATCH 1
#define SIGNALSMITH_DSP_VERSION_STRING "1.6.1"
/** Version compatability check.
\code{.cpp}
@ -48,5 +43,5 @@ namespace signalsmith {
} // signalsmith::
#else
// If we've already included it, check it's the same version
static_assert(SIGNALSMITH_DSP_VERSION_MAJOR == 1 && SIGNALSMITH_DSP_VERSION_MINOR == 6 && SIGNALSMITH_DSP_VERSION_PATCH == 0, "multiple versions of the Signalsmith DSP library");
static_assert(SIGNALSMITH_DSP_VERSION_MAJOR == 1 && SIGNALSMITH_DSP_VERSION_MINOR == 6 && SIGNALSMITH_DSP_VERSION_PATCH == 1, "multiple versions of the Signalsmith DSP library");
#endif // include guard

271
dsp/fft.h
View File

@ -29,7 +29,7 @@ namespace signalsmith { namespace fft {
}
// Complex multiplication has edge-cases around Inf/NaN - handling those properly makes std::complex non-inlineable, so we use our own
template <bool conjugateSecond=false, typename V>
template <bool conjugateSecond, typename V>
SIGNALSMITH_INLINE std::complex<V> complexMul(const std::complex<V> &a, const std::complex<V> &b) {
V aReal = complexReal(a), aImag = complexImag(a);
V bReal = complexReal(b), bImag = complexImag(b);
@ -68,8 +68,6 @@ namespace signalsmith { namespace fft {
return std::begin(t);
}
};
template<typename T>
SIGNALSMITH_AUTO_RETURN(getIterator(T &&t), GetIterator<T>::get(t))
}
/** Floating-point FFT implementation.
@ -197,7 +195,7 @@ namespace signalsmith { namespace fft {
}
}
template<typename RandomAccessIterator>
template<bool inverse, typename RandomAccessIterator>
void fftStepGeneric(RandomAccessIterator &&origData, const Step &step) {
complex *working = workingVector.data();
const size_t stride = step.innerRepeats;
@ -209,14 +207,14 @@ namespace signalsmith { namespace fft {
const size_t factor = step.factor;
for (size_t repeat = 0; repeat < step.innerRepeats; ++repeat) {
for (size_t i = 0; i < step.factor; ++i) {
working[i] = _fft_impl::complexMul(data[i*stride], twiddles[i]);
working[i] = _fft_impl::complexMul<inverse>(data[i*stride], twiddles[i]);
}
for (size_t f = 0; f < factor; ++f) {
complex sum = working[0];
for (size_t i = 1; i < factor; ++i) {
double phase = 2*M_PI*f*i/factor;
complex twiddle = {V(std::cos(phase)), V(-std::sin(phase))};
sum += _fft_impl::complexMul(working[i], twiddle);
sum += _fft_impl::complexMul<inverse>(working[i], twiddle);
}
data[f*stride] = sum;
}
@ -227,7 +225,7 @@ namespace signalsmith { namespace fft {
}
}
template<typename RandomAccessIterator>
template<bool inverse, typename RandomAccessIterator>
SIGNALSMITH_INLINE void fftStep2(RandomAccessIterator &&origData, const Step &step) {
const size_t stride = step.innerRepeats;
const complex *origTwiddles = twiddleVector.data() + step.twiddleIndex;
@ -235,7 +233,7 @@ namespace signalsmith { namespace fft {
const complex* twiddles = origTwiddles;
for (RandomAccessIterator data = origData; data < origData + stride; ++data) {
complex A = data[0];
complex B = _fft_impl::complexMul(data[stride], twiddles[1]);
complex B = _fft_impl::complexMul<inverse>(data[stride], twiddles[1]);
data[0] = A + B;
data[stride] = A - B;
@ -245,9 +243,9 @@ namespace signalsmith { namespace fft {
}
}
template<typename RandomAccessIterator>
template<bool inverse, typename RandomAccessIterator>
SIGNALSMITH_INLINE void fftStep3(RandomAccessIterator &&origData, const Step &step) {
constexpr complex factor3 = {-0.5, -0.8660254037844386};
constexpr complex factor3 = {-0.5, inverse ? 0.8660254037844386 : -0.8660254037844386};
const size_t stride = step.innerRepeats;
const complex *origTwiddles = twiddleVector.data() + step.twiddleIndex;
@ -255,8 +253,8 @@ namespace signalsmith { namespace fft {
const complex* twiddles = origTwiddles;
for (RandomAccessIterator data = origData; data < origData + stride; ++data) {
complex A = data[0];
complex B = _fft_impl::complexMul(data[stride], twiddles[1]);
complex C = _fft_impl::complexMul(data[stride*2], twiddles[2]);
complex B = _fft_impl::complexMul<inverse>(data[stride], twiddles[1]);
complex C = _fft_impl::complexMul<inverse>(data[stride*2], twiddles[2]);
complex realSum = A + (B + C)*factor3.real();
complex imagSum = (B - C)*factor3.imag();
@ -271,7 +269,7 @@ namespace signalsmith { namespace fft {
}
}
template<typename RandomAccessIterator>
template<bool inverse, typename RandomAccessIterator>
SIGNALSMITH_INLINE void fftStep4(RandomAccessIterator &&origData, const Step &step) {
const size_t stride = step.innerRepeats;
const complex *origTwiddles = twiddleVector.data() + step.twiddleIndex;
@ -280,17 +278,17 @@ namespace signalsmith { namespace fft {
const complex* twiddles = origTwiddles;
for (RandomAccessIterator data = origData; data < origData + stride; ++data) {
complex A = data[0];
complex C = _fft_impl::complexMul(data[stride], twiddles[2]);
complex B = _fft_impl::complexMul(data[stride*2], twiddles[1]);
complex D = _fft_impl::complexMul(data[stride*3], twiddles[3]);
complex C = _fft_impl::complexMul<inverse>(data[stride], twiddles[2]);
complex B = _fft_impl::complexMul<inverse>(data[stride*2], twiddles[1]);
complex D = _fft_impl::complexMul<inverse>(data[stride*3], twiddles[3]);
complex sumAC = A + C, sumBD = B + D;
complex diffAC = A - C, diffBD = B - D;
data[0] = sumAC + sumBD;
data[stride] = _fft_impl::complexAddI<true>(diffAC, diffBD);
data[stride] = _fft_impl::complexAddI<!inverse>(diffAC, diffBD);
data[stride*2] = sumAC - sumBD;
data[stride*3] = _fft_impl::complexAddI<false>(diffAC, diffBD);
data[stride*3] = _fft_impl::complexAddI<inverse>(diffAC, diffBD);
twiddles += 4;
}
@ -299,53 +297,32 @@ namespace signalsmith { namespace fft {
}
template<typename InputIterator, typename OutputIterator>
void permute(bool conjugateFlip, InputIterator input, OutputIterator data) {
data[0] = input[0];
if (conjugateFlip) {
for (size_t i = 1; i < permutation.size(); ++i) {
auto &pair = permutation[i];
data[pair.from] = input[_size - pair.to];
}
} else {
for (size_t i = 1; i < permutation.size(); ++i) {
auto &pair = permutation[i];
data[pair.from] = input[pair.to];
}
void permute(InputIterator input, OutputIterator data) {
for (auto pair : permutation) {
data[pair.from] = input[pair.to];
}
}
template<typename InputIterator, typename OutputIterator>
struct Task {
FFT &fft;
bool inverse;
InputIterator input;
OutputIterator output;
template<bool inverse, typename InputIterator, typename OutputIterator>
void run(InputIterator &&input, OutputIterator &&data) {
permute(input, data);
void operator()(int stepIndex) {
if (stepIndex == 0) {
fft.permute(inverse, input, output);
} else {
auto &step = fft.plan[stepIndex - 1];
switch (step.type) {
case StepType::generic:
fft.fftStepGeneric(output + step.startIndex, step);
break;
case StepType::step2:
fft.fftStep2(output + step.startIndex, step);
break;
case StepType::step3:
fft.fftStep3(output + step.startIndex, step);
break;
case StepType::step4:
fft.fftStep4(output + step.startIndex, step);
break;
}
for (const Step &step : plan) {
switch (step.type) {
case StepType::generic:
fftStepGeneric<inverse>(data + step.startIndex, step);
break;
case StepType::step2:
fftStep2<inverse>(data + step.startIndex, step);
break;
case StepType::step3:
fftStep3<inverse>(data + step.startIndex, step);
break;
case StepType::step4:
fftStep4<inverse>(data + step.startIndex, step);
break;
}
}
};
template<typename InputIterator, typename OutputIterator>
signalsmith::perf::SegmentedTask<Task<InputIterator, OutputIterator>> makeTask(bool inverse, InputIterator &&input, OutputIterator &&output) {
return {{*this, inverse, input, output}, int(plan.size() + 1)};
}
static bool validSize(size_t size) {
@ -405,20 +382,19 @@ namespace signalsmith { namespace fft {
return _size;
}
template<typename Input, typename Output>
void fft(Input &&input, Output &&output) {
return task(false, input, output)(1);
template<typename InputIterator, typename OutputIterator>
void fft(InputIterator &&input, OutputIterator &&output) {
auto inputIter = _fft_impl::GetIterator<InputIterator>::get(input);
auto outputIter = _fft_impl::GetIterator<OutputIterator>::get(output);
return run<false>(inputIter, outputIter);
}
template<typename Input, typename Output>
void ifft(Input &&input, Output &&output) {
return task(true, input, output)(1);
template<typename InputIterator, typename OutputIterator>
void ifft(InputIterator &&input, OutputIterator &&output) {
auto inputIter = _fft_impl::GetIterator<InputIterator>::get(input);
auto outputIter = _fft_impl::GetIterator<OutputIterator>::get(output);
return run<true>(inputIter, outputIter);
}
template<typename Input, typename Output>
SIGNALSMITH_AUTO_RETURN(task(bool inverse, Input &&input, Output &&output),
makeTask(inverse, _fft_impl::getIterator(input), _fft_impl::getIterator(output))
)
};
struct FFTOptions {
@ -434,66 +410,6 @@ namespace signalsmith { namespace fft {
std::vector<complex> twiddlesMinusI;
std::vector<complex> modifiedRotations;
FFT<V> complexFft;
template<typename Input>
void fftPackInput(Input input) {
size_t hSize = complexFft.size();
for (size_t i = 0; i < hSize; ++i) {
if (modified) {
complexBuffer1[i] = _fft_impl::complexMul({input[2*i], input[2*i + 1]}, modifiedRotations[i]);
} else {
complexBuffer1[i] = {input[2*i], input[2*i + 1]};
}
}
}
template<typename Output>
void fftOutputBufferfly(Output output) {
if (!modified) output[0] = {
complexBuffer2[0].real() + complexBuffer2[0].imag(),
complexBuffer2[0].real() - complexBuffer2[0].imag()
};
size_t hSize = complexFft.size();
for (size_t i = modified ? 0 : 1; i <= hSize/2; ++i) {
size_t conjI = modified ? (hSize - 1 - i) : (hSize - i);
complex odd = (complexBuffer2[i] + conj(complexBuffer2[conjI]))*(V)0.5;
complex evenI = (complexBuffer2[i] - conj(complexBuffer2[conjI]))*(V)0.5;
complex evenRotMinusI = _fft_impl::complexMul(evenI, twiddlesMinusI[i]);
output[i] = odd + evenRotMinusI;
output[conjI] = conj(odd - evenRotMinusI);
}
}
template<typename Input>
void ifftInputBufferfly(Input input) {
size_t hSize = complexFft.size();
if (!modified) complexBuffer1[0] = {
input[0].real() + input[0].imag(),
input[0].real() - input[0].imag()
};
for (size_t i = modified ? 0 : 1; i <= hSize/2; ++i) {
size_t conjI = modified ? (hSize - 1 - i) : (hSize - i);
complex v = input[i], v2 = input[conjI];
complex odd = v + conj(v2);
complex evenRotMinusI = v - conj(v2);
complex evenI = _fft_impl::complexMul<true>(evenRotMinusI, twiddlesMinusI[i]);
complexBuffer1[i] = odd + evenI;
complexBuffer1[conjI] = conj(odd - evenI);
}
}
template<typename Output>
void ifftUnpackOutput(Output output) {
size_t hSize = complexFft.size();
for (size_t i = 0; i < hSize; ++i) {
complex v = complexBuffer2[i];
if (modified) v = _fft_impl::complexMul<true>(v, modifiedRotations[i]);
output[2*i] = v.real();
output[2*i + 1] = v.imag();
}
}
public:
static size_t fastSizeAbove(size_t size) {
return FFT<V>::fastSizeAbove((size + 1)/2)*2;
@ -526,8 +442,7 @@ namespace signalsmith { namespace fft {
}
}
size_t result = complexFft.setSize(size/2);
return result*2;
return complexFft.setSize(size/2);
}
size_t setFastSizeAbove(size_t size) {
return setSize(fastSizeAbove(size));
@ -538,43 +453,63 @@ namespace signalsmith { namespace fft {
size_t size() const {
return complexFft.size()*2;
}
template<typename Input, typename Output>
SIGNALSMITH_AUTO_RETURN(fftTask(Input &&input, Output &&output),
signalsmith::perf::segmentTask(std::bind(
&RealFFT::fftPackInput<decltype(_fft_impl::getIterator(input))>,
this,
_fft_impl::getIterator(input)
), 1)
.then(complexFft.task(false, complexBuffer1, complexBuffer2))
.then(std::bind(
&RealFFT::fftOutputBufferfly<decltype(_fft_impl::getIterator(output))>,
this,
_fft_impl::getIterator(output)
), 1)
)
template<typename Input, typename Output>
void fft(Input &&input, Output &&output) {
fftTask(std::forward<Input>(input), std::forward<Output>(output))(1);
template<typename InputIterator, typename OutputIterator>
void fft(InputIterator &&input, OutputIterator &&output) {
size_t hSize = complexFft.size();
for (size_t i = 0; i < hSize; ++i) {
if (modified) {
complexBuffer1[i] = _fft_impl::complexMul<false>({input[2*i], input[2*i + 1]}, modifiedRotations[i]);
} else {
complexBuffer1[i] = {input[2*i], input[2*i + 1]};
}
}
complexFft.fft(complexBuffer1.data(), complexBuffer2.data());
if (!modified) output[0] = {
complexBuffer2[0].real() + complexBuffer2[0].imag(),
complexBuffer2[0].real() - complexBuffer2[0].imag()
};
for (size_t i = modified ? 0 : 1; i <= hSize/2; ++i) {
size_t conjI = modified ? (hSize - 1 - i) : (hSize - i);
complex odd = (complexBuffer2[i] + conj(complexBuffer2[conjI]))*(V)0.5;
complex evenI = (complexBuffer2[i] - conj(complexBuffer2[conjI]))*(V)0.5;
complex evenRotMinusI = _fft_impl::complexMul<false>(evenI, twiddlesMinusI[i]);
output[i] = odd + evenRotMinusI;
output[conjI] = conj(odd - evenRotMinusI);
}
}
template<typename Input, typename Output>
SIGNALSMITH_AUTO_RETURN(ifftTask(Input &&input, Output &&output),
signalsmith::perf::segmentTask(std::bind(
&RealFFT::ifftInputBufferfly<decltype(_fft_impl::getIterator(input))>,
this,
_fft_impl::getIterator(input)
), 1)
.then(complexFft.task(true, complexBuffer1, complexBuffer2))
.then(std::bind(
&RealFFT::ifftUnpackOutput<decltype(_fft_impl::getIterator(output))>,
this,
_fft_impl::getIterator(output)
), 1)
)
template<typename Input, typename Output>
void ifft(Input &&input, Output &&output) {
ifftTask(std::forward<Input>(input), std::forward<Output>(output))(1);
template<typename InputIterator, typename OutputIterator>
void ifft(InputIterator &&input, OutputIterator &&output) {
size_t hSize = complexFft.size();
if (!modified) complexBuffer1[0] = {
input[0].real() + input[0].imag(),
input[0].real() - input[0].imag()
};
for (size_t i = modified ? 0 : 1; i <= hSize/2; ++i) {
size_t conjI = modified ? (hSize - 1 - i) : (hSize - i);
complex v = input[i], v2 = input[conjI];
complex odd = v + conj(v2);
complex evenRotMinusI = v - conj(v2);
complex evenI = _fft_impl::complexMul<true>(evenRotMinusI, twiddlesMinusI[i]);
complexBuffer1[i] = odd + evenI;
complexBuffer1[conjI] = conj(odd - evenI);
}
complexFft.ifft(complexBuffer1.data(), complexBuffer2.data());
for (size_t i = 0; i < hSize; ++i) {
complex v = complexBuffer2[i];
if (modified) v = _fft_impl::complexMul<true>(v, modifiedRotations[i]);
output[2*i] = v.real();
output[2*i + 1] = v.imag();
}
}
};

View File

@ -4,7 +4,6 @@
#define SIGNALSMITH_DSP_PERF_H
#include <complex>
#include <functional>
#if defined(__SSE__) || defined(_M_X64)
# include <xmmintrin.h>
@ -79,59 +78,6 @@ namespace perf {
class StopDenormals {}; // FIXME: add for other architectures
#endif
/// Packs a "runner" lambda into an object that can be called repeatedly to do work in chunks
template<class BoundFn>
class SegmentedTask {
BoundFn fn;
int steps;
int nextStep = 0;
template<class ThenFn>
struct Then {
int fn1Steps;
BoundFn fn1;
ThenFn fn2;
void operator()(int step) {
if (step < fn1Steps) {
fn1(step);
} else {
fn2(step - fn1Steps);
}
}
};
template<typename Fn2> // all SegmentedTasks are in cahoots
friend class SegmentedTask;
public:
SegmentedTask(BoundFn fn, int steps) : fn(fn), steps(steps) {}
/// Completes the step up to the ratio (0-1)
void operator()(float ratio) {
int endStep = std::round(ratio*steps);
while (nextStep < endStep) {
fn(nextStep++);
}
}
void reset() { // So you can run the task again with the same arguments later
nextStep = 0;
}
template<class Fn>
SegmentedTask<Then<Fn>> then(SegmentedTask<Fn> next) {
return then(next.fn, next.steps);
}
template<class Fn>
SegmentedTask<Then<Fn>> then(Fn nextFn, int nextSteps) {
return {{steps, fn, nextFn}, steps + nextSteps};
}
};
template<class BoundFn>
auto segmentTask(BoundFn fn, int steps) -> SegmentedTask<BoundFn> {
return {fn, steps};
}
/** @} */
}} // signalsmith::perf::