diff --git a/cmd/Makefile b/cmd/Makefile index 76a2078..07eb1c3 100644 --- a/cmd/Makefile +++ b/cmd/Makefile @@ -28,5 +28,5 @@ clean: ### Example use of CMake cmake: - cmake -B build -G Xcode -D CMAKE_RUNTIME_OUTPUT_DIRECTORY=../out + cmake -B build -G Xcode cmake --build build --config Release \ No newline at end of file diff --git a/dsp/README.md b/dsp/README.md index 2811d20..6363d74 100644 --- a/dsp/README.md +++ b/dsp/README.md @@ -22,7 +22,7 @@ Delay delayLine(1024); You can add a compile-time version-check to make sure you have a compatible version of the library: ```cpp #include "dsp/envelopes.h" -SIGNALSMITH_DSP_VERSION_CHECK(1, 6, 0) +SIGNALSMITH_DSP_VERSION_CHECK(1, 6, 1) ``` ### Development / contributing diff --git a/dsp/common.h b/dsp/common.h index c0fde1e..4526309 100644 --- a/dsp/common.h +++ b/dsp/common.h @@ -2,18 +2,13 @@ #define SIGNALSMITH_DSP_COMMON_H #if defined(__FAST_MATH__) && (__apple_build_version__ >= 16000000) && (__apple_build_version__ <= 16000099) -# error Apple Clang 16.0.0 is broken, and generates completely incorrect code for some SIMD operations. -ffast-math makes it worse, so if you HAVE to use this version of Clang, you can't enable -ffast-math. +# error Apple Clang 16.0.0 generates incorrect SIMD for ARM. If you HAVE to use this version of Clang, turn off -ffast-math. #endif #ifndef M_PI #define M_PI 3.14159265358979323846264338327950288 #endif -// C++11 doesn't have full auto return types, but this is enough -#ifndef SIGNALSMITH_AUTO_RETURN -# define SIGNALSMITH_AUTO_RETURN(signature, expr) auto signature -> decltype(expr) {return expr;} -#endif - namespace signalsmith { /** @defgroup Common Common @brief Definitions and helper classes used by the rest of the library @@ -24,8 +19,8 @@ namespace signalsmith { #define SIGNALSMITH_DSP_VERSION_MAJOR 1 #define SIGNALSMITH_DSP_VERSION_MINOR 6 -#define SIGNALSMITH_DSP_VERSION_PATCH 0 -#define SIGNALSMITH_DSP_VERSION_STRING "1.6.0" +#define SIGNALSMITH_DSP_VERSION_PATCH 1 +#define SIGNALSMITH_DSP_VERSION_STRING "1.6.1" /** Version compatability check. \code{.cpp} @@ -48,5 +43,5 @@ namespace signalsmith { } // signalsmith:: #else // If we've already included it, check it's the same version -static_assert(SIGNALSMITH_DSP_VERSION_MAJOR == 1 && SIGNALSMITH_DSP_VERSION_MINOR == 6 && SIGNALSMITH_DSP_VERSION_PATCH == 0, "multiple versions of the Signalsmith DSP library"); +static_assert(SIGNALSMITH_DSP_VERSION_MAJOR == 1 && SIGNALSMITH_DSP_VERSION_MINOR == 6 && SIGNALSMITH_DSP_VERSION_PATCH == 1, "multiple versions of the Signalsmith DSP library"); #endif // include guard diff --git a/dsp/fft.h b/dsp/fft.h index d9a7e80..5897586 100644 --- a/dsp/fft.h +++ b/dsp/fft.h @@ -29,7 +29,7 @@ namespace signalsmith { namespace fft { } // Complex multiplication has edge-cases around Inf/NaN - handling those properly makes std::complex non-inlineable, so we use our own - template + template SIGNALSMITH_INLINE std::complex complexMul(const std::complex &a, const std::complex &b) { V aReal = complexReal(a), aImag = complexImag(a); V bReal = complexReal(b), bImag = complexImag(b); @@ -68,8 +68,6 @@ namespace signalsmith { namespace fft { return std::begin(t); } }; - template - SIGNALSMITH_AUTO_RETURN(getIterator(T &&t), GetIterator::get(t)) } /** Floating-point FFT implementation. @@ -197,7 +195,7 @@ namespace signalsmith { namespace fft { } } - template + template void fftStepGeneric(RandomAccessIterator &&origData, const Step &step) { complex *working = workingVector.data(); const size_t stride = step.innerRepeats; @@ -209,14 +207,14 @@ namespace signalsmith { namespace fft { const size_t factor = step.factor; for (size_t repeat = 0; repeat < step.innerRepeats; ++repeat) { for (size_t i = 0; i < step.factor; ++i) { - working[i] = _fft_impl::complexMul(data[i*stride], twiddles[i]); + working[i] = _fft_impl::complexMul(data[i*stride], twiddles[i]); } for (size_t f = 0; f < factor; ++f) { complex sum = working[0]; for (size_t i = 1; i < factor; ++i) { double phase = 2*M_PI*f*i/factor; complex twiddle = {V(std::cos(phase)), V(-std::sin(phase))}; - sum += _fft_impl::complexMul(working[i], twiddle); + sum += _fft_impl::complexMul(working[i], twiddle); } data[f*stride] = sum; } @@ -227,7 +225,7 @@ namespace signalsmith { namespace fft { } } - template + template SIGNALSMITH_INLINE void fftStep2(RandomAccessIterator &&origData, const Step &step) { const size_t stride = step.innerRepeats; const complex *origTwiddles = twiddleVector.data() + step.twiddleIndex; @@ -235,7 +233,7 @@ namespace signalsmith { namespace fft { const complex* twiddles = origTwiddles; for (RandomAccessIterator data = origData; data < origData + stride; ++data) { complex A = data[0]; - complex B = _fft_impl::complexMul(data[stride], twiddles[1]); + complex B = _fft_impl::complexMul(data[stride], twiddles[1]); data[0] = A + B; data[stride] = A - B; @@ -245,9 +243,9 @@ namespace signalsmith { namespace fft { } } - template + template SIGNALSMITH_INLINE void fftStep3(RandomAccessIterator &&origData, const Step &step) { - constexpr complex factor3 = {-0.5, -0.8660254037844386}; + constexpr complex factor3 = {-0.5, inverse ? 0.8660254037844386 : -0.8660254037844386}; const size_t stride = step.innerRepeats; const complex *origTwiddles = twiddleVector.data() + step.twiddleIndex; @@ -255,8 +253,8 @@ namespace signalsmith { namespace fft { const complex* twiddles = origTwiddles; for (RandomAccessIterator data = origData; data < origData + stride; ++data) { complex A = data[0]; - complex B = _fft_impl::complexMul(data[stride], twiddles[1]); - complex C = _fft_impl::complexMul(data[stride*2], twiddles[2]); + complex B = _fft_impl::complexMul(data[stride], twiddles[1]); + complex C = _fft_impl::complexMul(data[stride*2], twiddles[2]); complex realSum = A + (B + C)*factor3.real(); complex imagSum = (B - C)*factor3.imag(); @@ -271,7 +269,7 @@ namespace signalsmith { namespace fft { } } - template + template SIGNALSMITH_INLINE void fftStep4(RandomAccessIterator &&origData, const Step &step) { const size_t stride = step.innerRepeats; const complex *origTwiddles = twiddleVector.data() + step.twiddleIndex; @@ -280,17 +278,17 @@ namespace signalsmith { namespace fft { const complex* twiddles = origTwiddles; for (RandomAccessIterator data = origData; data < origData + stride; ++data) { complex A = data[0]; - complex C = _fft_impl::complexMul(data[stride], twiddles[2]); - complex B = _fft_impl::complexMul(data[stride*2], twiddles[1]); - complex D = _fft_impl::complexMul(data[stride*3], twiddles[3]); + complex C = _fft_impl::complexMul(data[stride], twiddles[2]); + complex B = _fft_impl::complexMul(data[stride*2], twiddles[1]); + complex D = _fft_impl::complexMul(data[stride*3], twiddles[3]); complex sumAC = A + C, sumBD = B + D; complex diffAC = A - C, diffBD = B - D; data[0] = sumAC + sumBD; - data[stride] = _fft_impl::complexAddI(diffAC, diffBD); + data[stride] = _fft_impl::complexAddI(diffAC, diffBD); data[stride*2] = sumAC - sumBD; - data[stride*3] = _fft_impl::complexAddI(diffAC, diffBD); + data[stride*3] = _fft_impl::complexAddI(diffAC, diffBD); twiddles += 4; } @@ -299,53 +297,32 @@ namespace signalsmith { namespace fft { } template - void permute(bool conjugateFlip, InputIterator input, OutputIterator data) { - data[0] = input[0]; - if (conjugateFlip) { - for (size_t i = 1; i < permutation.size(); ++i) { - auto &pair = permutation[i]; - data[pair.from] = input[_size - pair.to]; - } - } else { - for (size_t i = 1; i < permutation.size(); ++i) { - auto &pair = permutation[i]; - data[pair.from] = input[pair.to]; - } + void permute(InputIterator input, OutputIterator data) { + for (auto pair : permutation) { + data[pair.from] = input[pair.to]; } } - template - struct Task { - FFT &fft; - bool inverse; - InputIterator input; - OutputIterator output; + template + void run(InputIterator &&input, OutputIterator &&data) { + permute(input, data); - void operator()(int stepIndex) { - if (stepIndex == 0) { - fft.permute(inverse, input, output); - } else { - auto &step = fft.plan[stepIndex - 1]; - switch (step.type) { - case StepType::generic: - fft.fftStepGeneric(output + step.startIndex, step); - break; - case StepType::step2: - fft.fftStep2(output + step.startIndex, step); - break; - case StepType::step3: - fft.fftStep3(output + step.startIndex, step); - break; - case StepType::step4: - fft.fftStep4(output + step.startIndex, step); - break; - } + for (const Step &step : plan) { + switch (step.type) { + case StepType::generic: + fftStepGeneric(data + step.startIndex, step); + break; + case StepType::step2: + fftStep2(data + step.startIndex, step); + break; + case StepType::step3: + fftStep3(data + step.startIndex, step); + break; + case StepType::step4: + fftStep4(data + step.startIndex, step); + break; } } - }; - template - signalsmith::perf::SegmentedTask> makeTask(bool inverse, InputIterator &&input, OutputIterator &&output) { - return {{*this, inverse, input, output}, int(plan.size() + 1)}; } static bool validSize(size_t size) { @@ -405,20 +382,19 @@ namespace signalsmith { namespace fft { return _size; } - template - void fft(Input &&input, Output &&output) { - return task(false, input, output)(1); + template + void fft(InputIterator &&input, OutputIterator &&output) { + auto inputIter = _fft_impl::GetIterator::get(input); + auto outputIter = _fft_impl::GetIterator::get(output); + return run(inputIter, outputIter); } - template - void ifft(Input &&input, Output &&output) { - return task(true, input, output)(1); + template + void ifft(InputIterator &&input, OutputIterator &&output) { + auto inputIter = _fft_impl::GetIterator::get(input); + auto outputIter = _fft_impl::GetIterator::get(output); + return run(inputIter, outputIter); } - - template - SIGNALSMITH_AUTO_RETURN(task(bool inverse, Input &&input, Output &&output), - makeTask(inverse, _fft_impl::getIterator(input), _fft_impl::getIterator(output)) - ) }; struct FFTOptions { @@ -434,66 +410,6 @@ namespace signalsmith { namespace fft { std::vector twiddlesMinusI; std::vector modifiedRotations; FFT complexFft; - - template - void fftPackInput(Input input) { - size_t hSize = complexFft.size(); - for (size_t i = 0; i < hSize; ++i) { - if (modified) { - complexBuffer1[i] = _fft_impl::complexMul({input[2*i], input[2*i + 1]}, modifiedRotations[i]); - } else { - complexBuffer1[i] = {input[2*i], input[2*i + 1]}; - } - } - } - template - void fftOutputBufferfly(Output output) { - if (!modified) output[0] = { - complexBuffer2[0].real() + complexBuffer2[0].imag(), - complexBuffer2[0].real() - complexBuffer2[0].imag() - }; - size_t hSize = complexFft.size(); - for (size_t i = modified ? 0 : 1; i <= hSize/2; ++i) { - size_t conjI = modified ? (hSize - 1 - i) : (hSize - i); - - complex odd = (complexBuffer2[i] + conj(complexBuffer2[conjI]))*(V)0.5; - complex evenI = (complexBuffer2[i] - conj(complexBuffer2[conjI]))*(V)0.5; - complex evenRotMinusI = _fft_impl::complexMul(evenI, twiddlesMinusI[i]); - - output[i] = odd + evenRotMinusI; - output[conjI] = conj(odd - evenRotMinusI); - } - } - template - void ifftInputBufferfly(Input input) { - size_t hSize = complexFft.size(); - if (!modified) complexBuffer1[0] = { - input[0].real() + input[0].imag(), - input[0].real() - input[0].imag() - }; - for (size_t i = modified ? 0 : 1; i <= hSize/2; ++i) { - size_t conjI = modified ? (hSize - 1 - i) : (hSize - i); - complex v = input[i], v2 = input[conjI]; - - complex odd = v + conj(v2); - complex evenRotMinusI = v - conj(v2); - complex evenI = _fft_impl::complexMul(evenRotMinusI, twiddlesMinusI[i]); - - complexBuffer1[i] = odd + evenI; - complexBuffer1[conjI] = conj(odd - evenI); - } - } - template - void ifftUnpackOutput(Output output) { - size_t hSize = complexFft.size(); - for (size_t i = 0; i < hSize; ++i) { - complex v = complexBuffer2[i]; - if (modified) v = _fft_impl::complexMul(v, modifiedRotations[i]); - output[2*i] = v.real(); - output[2*i + 1] = v.imag(); - } - } - public: static size_t fastSizeAbove(size_t size) { return FFT::fastSizeAbove((size + 1)/2)*2; @@ -526,8 +442,7 @@ namespace signalsmith { namespace fft { } } - size_t result = complexFft.setSize(size/2); - return result*2; + return complexFft.setSize(size/2); } size_t setFastSizeAbove(size_t size) { return setSize(fastSizeAbove(size)); @@ -538,43 +453,63 @@ namespace signalsmith { namespace fft { size_t size() const { return complexFft.size()*2; } - - template - SIGNALSMITH_AUTO_RETURN(fftTask(Input &&input, Output &&output), - signalsmith::perf::segmentTask(std::bind( - &RealFFT::fftPackInput, - this, - _fft_impl::getIterator(input) - ), 1) - .then(complexFft.task(false, complexBuffer1, complexBuffer2)) - .then(std::bind( - &RealFFT::fftOutputBufferfly, - this, - _fft_impl::getIterator(output) - ), 1) - ) - template - void fft(Input &&input, Output &&output) { - fftTask(std::forward(input), std::forward(output))(1); + + template + void fft(InputIterator &&input, OutputIterator &&output) { + size_t hSize = complexFft.size(); + for (size_t i = 0; i < hSize; ++i) { + if (modified) { + complexBuffer1[i] = _fft_impl::complexMul({input[2*i], input[2*i + 1]}, modifiedRotations[i]); + } else { + complexBuffer1[i] = {input[2*i], input[2*i + 1]}; + } + } + + complexFft.fft(complexBuffer1.data(), complexBuffer2.data()); + + if (!modified) output[0] = { + complexBuffer2[0].real() + complexBuffer2[0].imag(), + complexBuffer2[0].real() - complexBuffer2[0].imag() + }; + for (size_t i = modified ? 0 : 1; i <= hSize/2; ++i) { + size_t conjI = modified ? (hSize - 1 - i) : (hSize - i); + + complex odd = (complexBuffer2[i] + conj(complexBuffer2[conjI]))*(V)0.5; + complex evenI = (complexBuffer2[i] - conj(complexBuffer2[conjI]))*(V)0.5; + complex evenRotMinusI = _fft_impl::complexMul(evenI, twiddlesMinusI[i]); + + output[i] = odd + evenRotMinusI; + output[conjI] = conj(odd - evenRotMinusI); + } } - - template - SIGNALSMITH_AUTO_RETURN(ifftTask(Input &&input, Output &&output), - signalsmith::perf::segmentTask(std::bind( - &RealFFT::ifftInputBufferfly, - this, - _fft_impl::getIterator(input) - ), 1) - .then(complexFft.task(true, complexBuffer1, complexBuffer2)) - .then(std::bind( - &RealFFT::ifftUnpackOutput, - this, - _fft_impl::getIterator(output) - ), 1) - ) - template - void ifft(Input &&input, Output &&output) { - ifftTask(std::forward(input), std::forward(output))(1); + + template + void ifft(InputIterator &&input, OutputIterator &&output) { + size_t hSize = complexFft.size(); + if (!modified) complexBuffer1[0] = { + input[0].real() + input[0].imag(), + input[0].real() - input[0].imag() + }; + for (size_t i = modified ? 0 : 1; i <= hSize/2; ++i) { + size_t conjI = modified ? (hSize - 1 - i) : (hSize - i); + complex v = input[i], v2 = input[conjI]; + + complex odd = v + conj(v2); + complex evenRotMinusI = v - conj(v2); + complex evenI = _fft_impl::complexMul(evenRotMinusI, twiddlesMinusI[i]); + + complexBuffer1[i] = odd + evenI; + complexBuffer1[conjI] = conj(odd - evenI); + } + + complexFft.ifft(complexBuffer1.data(), complexBuffer2.data()); + + for (size_t i = 0; i < hSize; ++i) { + complex v = complexBuffer2[i]; + if (modified) v = _fft_impl::complexMul(v, modifiedRotations[i]); + output[2*i] = v.real(); + output[2*i + 1] = v.imag(); + } } }; diff --git a/dsp/perf.h b/dsp/perf.h index c678ce7..779c7b7 100644 --- a/dsp/perf.h +++ b/dsp/perf.h @@ -4,7 +4,6 @@ #define SIGNALSMITH_DSP_PERF_H #include -#include #if defined(__SSE__) || defined(_M_X64) # include @@ -79,59 +78,6 @@ namespace perf { class StopDenormals {}; // FIXME: add for other architectures #endif - /// Packs a "runner" lambda into an object that can be called repeatedly to do work in chunks - template - class SegmentedTask { - BoundFn fn; - int steps; - int nextStep = 0; - - template - struct Then { - int fn1Steps; - BoundFn fn1; - ThenFn fn2; - - void operator()(int step) { - if (step < fn1Steps) { - fn1(step); - } else { - fn2(step - fn1Steps); - } - } - }; - template // all SegmentedTasks are in cahoots - friend class SegmentedTask; - public: - SegmentedTask(BoundFn fn, int steps) : fn(fn), steps(steps) {} - - /// Completes the step up to the ratio (0-1) - void operator()(float ratio) { - int endStep = std::round(ratio*steps); - while (nextStep < endStep) { - fn(nextStep++); - } - } - - void reset() { // So you can run the task again with the same arguments later - nextStep = 0; - } - - template - SegmentedTask> then(SegmentedTask next) { - return then(next.fn, next.steps); - } - template - SegmentedTask> then(Fn nextFn, int nextSteps) { - return {{steps, fn, nextFn}, steps + nextSteps}; - } - }; - template - auto segmentTask(BoundFn fn, int steps) -> SegmentedTask { - return {fn, steps}; - } - - /** @} */ }} // signalsmith::perf::