diff --git a/CMakeLists.txt b/CMakeLists.txt index fb91d9c..6624bb7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ include(FetchContent) FetchContent_Declare( signalsmith-linear GIT_REPOSITORY https://github.com/Signalsmith-Audio/linear.git - GIT_TAG c600e0420d260469566c41e1ccb64f89ee439dd3 + GIT_TAG 0.1.0 GIT_SHALLOW ON ) FetchContent_MakeAvailable(signalsmith-linear) diff --git a/cmd/main.cpp b/cmd/main.cpp index 53f57cd..cd28805 100644 --- a/cmd/main.cpp +++ b/cmd/main.cpp @@ -2,6 +2,16 @@ #include #define LOG_EXPR(expr) std::cout << #expr << " = " << (expr) << "\n"; +size_t activeStepIndex = 0; +void profileProcessStart(int, int); +void profileProcessEndStep(); +void profileProcessStep(size_t, size_t); +void profileProcessEnd(); +#define SIGNALSMITH_STRETCH_PROFILE_PROCESS_START profileProcessStart +#define SIGNALSMITH_STRETCH_PROFILE_PROCESS_STEP profileProcessStep +#define SIGNALSMITH_STRETCH_PROFILE_PROCESS_ENDSTEP profileProcessEndStep +#define SIGNALSMITH_STRETCH_PROFILE_PROCESS_END profileProcessEnd + #include "signalsmith-stretch/signalsmith-stretch.h" #include "./util/stopwatch.h" @@ -9,8 +19,43 @@ #include "./util/simple-args.h" #include "./util/wav.h" +#include "plot/plot.h" +std::vector processStopwatches; +signalsmith::Stopwatch processStopwatchStart, processStopwatchEnd; +bool started = false; +bool activeStep = false; +void profileProcessStart(int /*inputSamples*/, int /*outputSamples*/) { + activeStep = false; + started = true; + processStopwatchStart.startLap(); +} +void profileProcessEndStep() { + if (activeStep) { + activeStep = false; + processStopwatches[activeStepIndex].lap(); + } else if (started) { + started = false; + processStopwatchStart.lap(); + } + processStopwatchEnd.startLap(); +} +void profileProcessStep(size_t step, size_t count) { + profileProcessEndStep(); + activeStep = true; + activeStepIndex = step; + if (processStopwatches.size() < count) { + processStopwatches.resize(count); + } + processStopwatches[step].startLap(); +} +void profileProcessEnd() { + processStopwatchEnd.lap(); +} + int main(int argc, char* argv[]) { signalsmith::stretch::SignalsmithStretch stretch; // optional cheaper RNG for performance comparison + + processStopwatches.reserve(1000); SimpleArgs args(argc, argv); @@ -56,7 +101,7 @@ int main(int argc, char* argv[]) { stopwatch.start(); stretch.presetDefault(inWav.channels, inWav.sampleRate); stretch.setTransposeSemitones(semitones, tonality/inWav.sampleRate); - double initSeconds = stopwatch.seconds(stopwatch.lap()); + double initSeconds = stopwatch.lap(); initMemory = initMemory.diff(); std::cout << "Setup:\n\t" << initSeconds << "s\n"; @@ -85,7 +130,7 @@ int main(int argc, char* argv[]) { stretch.flush(outWav, tailSamples); outWav.offset -= outputLength; - double processSeconds = stopwatch.seconds(stopwatch.lap()); + double processSeconds = stopwatch.lap(); double processRate = (inWav.length()/inWav.sampleRate)/processSeconds; double processPercent = 100/processRate; processMemory = processMemory.diff(); @@ -109,6 +154,31 @@ int main(int argc, char* argv[]) { // the `.flush()` call already handled foldback stuff at the end (since we asked for a shorter `tailSamples`) } + signalsmith::plot::Plot2D plot(400, 150); + plot.x.major(0, "").label("step"); + plot.y.major(0).label("time spent"); + auto &line = plot.line().fillToY(0); + auto &extraLine = plot.line().fillToY(0); + for (size_t i = 0; i < processStopwatches.size(); ++i) { + double time = processStopwatches[i].total(); + if (i%5 == 0) { + plot.x.tick(i + 0.5, std::to_string(i)); + } else { + plot.x.tick(i + 0.5, ""); + } + line.add(i, time); + line.add(i + 1, time); + } + extraLine.add(-1, 0); + extraLine.add(-1, processStopwatchStart.total()); + extraLine.add(0, processStopwatchStart.total()); + extraLine.add(0, 0); + extraLine.add(processStopwatches.size(), 0); + extraLine.add(processStopwatches.size(), processStopwatchEnd.total()); + extraLine.add(processStopwatches.size() + 1, processStopwatchEnd.total()); + extraLine.add(processStopwatches.size() + 1, 0); + plot.write("profile.svg"); + if (!outWav.write(outputWav).warn()) args.errorExit("failed to write WAV"); if (compareReference && prevWav.result) { diff --git a/cmd/util/stop-denormals.h b/cmd/util/stop-denormals.h new file mode 100644 index 0000000..c9a7bb5 --- /dev/null +++ b/cmd/util/stop-denormals.h @@ -0,0 +1,34 @@ +#pragma once + +#if defined(__SSE__) || defined(_M_X64) + class StopDenormals { + unsigned int controlStatusRegister; + public: + StopDenormals() : controlStatusRegister(_mm_getcsr()) { + _mm_setcsr(controlStatusRegister|0x8040); // Flush-to-Zero and Denormals-Are-Zero + } + ~StopDenormals() { + _mm_setcsr(controlStatusRegister); + } + }; +#elif (defined (__ARM_NEON) || defined (__ARM_NEON__)) + class StopDenormals { + uintptr_t status; + public: + StopDenormals() { + uintptr_t asmStatus; + asm volatile("mrs %0, fpcr" : "=r"(asmStatus)); + status = asmStatus = asmStatus|0x01000000U; // Flush to Zero + asm volatile("msr fpcr, %0" : : "ri"(asmStatus)); + } + ~StopDenormals() { + uintptr_t asmStatus = status; + asm volatile("msr fpcr, %0" : : "ri"(asmStatus)); + } + }; +#else +# if __cplusplus >= 202302L +# warning "The `StopDenormals` class doesn't do anything for this architecture" +# endif + class StopDenormals {}; // FIXME: add for other architectures +#endif diff --git a/cmd/util/stopwatch.h b/cmd/util/stopwatch.h index e87dab2..72c4130 100644 --- a/cmd/util/stopwatch.h +++ b/cmd/util/stopwatch.h @@ -6,37 +6,40 @@ #include #include -// We want CPU time, not wall-clock time, so we can't use `std::chrono::high_resolution_clock` -#ifdef WINDOWS +#ifdef WINDOWS // completely untested! # include namespace signalsmith { class Stopwatch { using Time = __int64; + using Duration = Time; inline Time now() { LARGE_INTEGER result; QueryPerformanceCounter(&result); return result.QuadPart; } - static double timeToSeconds(double t) { + static double toSeconds(Duration t) { LARGE_INTEGER freq; QueryPerformanceFrequency(&freq); return t/double(freq); } #else -# include +# include namespace signalsmith { class Stopwatch { - using Time = std::clock_t; + using Clock = std::conditional::type; + using Time = Clock::time_point; + using Duration = std::chrono::duration; + inline Time now() { - return std::clock(); + return Clock::now(); } - static double timeToSeconds(double t) { - return t/double(CLOCKS_PER_SEC); + static double toSeconds(Duration duration) { + return duration.count(); } #endif std::atomic