Compare commits

...

10 Commits

20 changed files with 461 additions and 1234 deletions

3
.gitmodules vendored Normal file
View File

@ -0,0 +1,3 @@
[submodule "cmd/util"]
path = cmd/util
url = https://github.com/geraintluff/util.git

View File

@ -7,7 +7,7 @@ include(FetchContent)
FetchContent_Declare(
signalsmith-linear
GIT_REPOSITORY https://github.com/Signalsmith-Audio/linear.git
GIT_TAG 0.1.2
GIT_TAG 0.2.3
GIT_SHALLOW ON
)
FetchContent_MakeAvailable(signalsmith-linear)

View File

@ -149,7 +149,7 @@ The algorithm has a lot of number-crunching, so Debug builds are much slower (up
### Dependencies and `#define`s
This uses the [Signalsmith Linear](https://github.com/Signalsmith-Audio/linear) library for FFTs and other speedups. There are [flags]([Linear repo](https://github.com/Signalsmith-Audio/linear?tab=readme-ov-file#building)) to enable Accelerate (`SIGNALSMITH_USE_ACCELERATE`) or IPP (`SIGNALSMITH_USE_IPP`).
This uses the [Signalsmith Linear](https://github.com/Signalsmith-Audio/linear) library for FFTs and other speedups. There are [flags]([Linear repo](https://github.com/Signalsmith-Audio/linear?tab=readme-ov-file#building)) to use faster FFT implementations (`SIGNALSMITH_USE_ACCELERATE`/`SIGNALSMITH_USE_IPP`/`SIGNALSMITH_USE_PFFFT`/`SIGNALSMITH_USE_PFFFT_DOUBLE`), and corresponding CMake options.
## License

3
SUPPORT.txt Normal file
View File

@ -0,0 +1,3 @@
# See: https://github.com/geraintluff/SUPPORT.txt
2030-01-01 Geraint Luff <geraint@signalsmith-audio.co.uk>

View File

@ -11,19 +11,19 @@ out/stretch: main.cpp ../signalsmith-stretch.h util/*.h util/*.hxx
# Uses input files from: https://signalsmith-audio.co.uk/code/stretch/inputs.zip
examples: out/stretch
mkdir -p out/examples
inputs/run-all.sh out/examples/u2- out/stretch --semitones=2 --exact
inputs/run-all.sh out/examples/d2- out/stretch --semitones=-2 --exact
inputs/run-all.sh out/examples/u4- out/stretch --semitones=4 --exact
inputs/run-all.sh out/examples/d4- out/stretch --semitones=-4 --exact
inputs/run-all.sh out/examples/u8- out/stretch --semitones=8 --exact
inputs/run-all.sh out/examples/d8- out/stretch --semitones=-8 --exact
inputs/run-all.sh out/examples/u16- out/stretch --semitones=16 --exact
inputs/run-all.sh out/examples/d16- out/stretch --semitones=-16 --exact
inputs/run-all.sh out/examples/t_8- out/stretch --time=0.8 --exact
inputs/run-all.sh out/examples/t1_2- out/stretch --time=1.2 --exact
inputs/run-all.sh out/examples/t1_5- out/stretch --time=1.5 --exact
inputs/run-all.sh out/examples/t2- out/stretch --time=2 --exact
inputs/run-all.sh out/examples/t4- out/stretch --time=4 --exact
inputs/run-all.sh out/examples/u2- out/stretch --semitones=2
inputs/run-all.sh out/examples/d2- out/stretch --semitones=-2
inputs/run-all.sh out/examples/u4- out/stretch --semitones=4
inputs/run-all.sh out/examples/d4- out/stretch --semitones=-4
inputs/run-all.sh out/examples/u8- out/stretch --semitones=8
inputs/run-all.sh out/examples/d8- out/stretch --semitones=-8
inputs/run-all.sh out/examples/u16- out/stretch --semitones=16
inputs/run-all.sh out/examples/d16- out/stretch --semitones=-16
inputs/run-all.sh out/examples/t_8- out/stretch --time=0.8
inputs/run-all.sh out/examples/t1_2- out/stretch --time=1.2
inputs/run-all.sh out/examples/t1_5- out/stretch --time=1.5
inputs/run-all.sh out/examples/t2- out/stretch --time=2
inputs/run-all.sh out/examples/t4- out/stretch --time=4
TEST_WAV ?= "inputs/voice.wav"

234
cmd/main-dev.cpp Normal file
View File

@ -0,0 +1,234 @@
// helper for debugging
#include <iostream>
#define LOG_EXPR(expr) std::cout << #expr << " = " << (expr) << "\n";
#define PROFILE_PLOT_CHUNKS
#ifdef PROFILE_PLOT_CHUNKS
size_t activeStepIndex = 0;
void profileProcessStart(int, int);
void profileProcessEndStep();
void profileProcessStep(size_t, size_t);
void profileProcessEnd();
# define SIGNALSMITH_STRETCH_PROFILE_PROCESS_START profileProcessStart
# define SIGNALSMITH_STRETCH_PROFILE_PROCESS_STEP profileProcessStep
# define SIGNALSMITH_STRETCH_PROFILE_PROCESS_ENDSTEP profileProcessEndStep
# define SIGNALSMITH_STRETCH_PROFILE_PROCESS_END profileProcessEnd
#endif
#include "signalsmith-stretch/signalsmith-stretch.h"
#include "./util/stopwatch.h"
#include "./util/memory-tracker.hxx"
#include "./util/simple-args.h"
#include "./util/wav.h"
#ifdef PROFILE_PLOT_CHUNKS
#include "plot/plot.h"
std::vector<signalsmith::Stopwatch> processStopwatches;
signalsmith::Stopwatch processStopwatchStart, processStopwatchEnd;
bool started = false;
bool activeStep = false;
void profileProcessStart(int /*inputSamples*/, int /*outputSamples*/) {
activeStep = false;
started = true;
processStopwatchStart.startLap();
}
void profileProcessEndStep() {
if (activeStep) {
activeStep = false;
processStopwatches[activeStepIndex].lap();
} else if (started) {
started = false;
processStopwatchStart.lap();
}
processStopwatchEnd.startLap();
}
void profileProcessStep(size_t step, size_t count) {
profileProcessEndStep();
activeStep = true;
activeStepIndex = step;
if (processStopwatches.size() < count) {
processStopwatches.resize(count);
}
processStopwatches[step].startLap();
}
void profileProcessEnd() {
processStopwatchEnd.lap();
}
#endif
int main(int argc, char* argv[]) {
signalsmith::stretch::SignalsmithStretch<float/*, std::ranlux48_base*/> stretch; // optional cheaper RNG for performance comparison
#ifdef PROFILE_PLOT_CHUNKS
processStopwatches.reserve(1000);
#endif
SimpleArgs args(argc, argv);
if (args.hasFlag("v", "prints the version")) {
std::cout << stretch.version[0] << "." << stretch.version[1] << "." << stretch.version[2] << "\n";
return 0;
}
std::string inputWav = args.arg<std::string>("input.wav", "16-bit WAV file");
std::string outputWav = args.arg<std::string>("output.wav", "output WAV file");
double semitones = args.flag<double>("semitones", "pitch-shift amount", 0);
double formants = args.flag<double>("formant", "formant-shift amount (semitones)", 0);
bool formantComp = args.hasFlag("formant-comp", "formant compensation");
double formantBase = args.flag<double>("formant-base", "formant base frequency (Hz, 0=auto)", 0);
double tonality = args.flag<double>("tonality", "tonality limit (Hz)", 8000);
double time = args.flag<double>("time", "time-stretch factor", 1);
bool exactLength = args.hasFlag("exact", "trims the start/end so the output has the correct length");
bool splitComputation = args.hasFlag("split-computation", "distributes the computation more evenly (but higher latency)");
args.errorExit();
std::cout << Console::Bright << inputWav << Console::Reset;
std::cout << " -> ";
std::cout << Console::Bright << outputWav << Console::Reset << "\n";
std::cout << "\tsemitones: " << semitones << "\n\t time: " << time << "x" << (exactLength ? " (exact)" : "") << "\n\t tonality: " << tonality << "Hz\n";
Wav inWav;
std::cout << inputWav << " -> " << outputWav << "\n";
if (!inWav.read(inputWav).warn()) args.errorExit("failed to read WAV");
size_t inputLength = inWav.samples.size()/inWav.channels;
Wav prevWav; // Used during development, to compare against known-good previous render
bool compareReference = (time <= 1.6);
if (compareReference && !prevWav.read(outputWav + "-reference.wav")) {
if (prevWav.read(outputWav)) {
prevWav.write(outputWav + "-reference.wav");
}
}
Wav outWav;
outWav.channels = inWav.channels;
outWav.sampleRate = inWav.sampleRate;
int outputLength = std::round(inputLength*time);
signalsmith::MemoryTracker initMemory;
signalsmith::Stopwatch stopwatch;
stopwatch.start();
stretch.presetDefault(int(inWav.channels), inWav.sampleRate, splitComputation);
stretch.setTransposeSemitones(semitones, tonality/inWav.sampleRate);
stretch.setFormantSemitones(formants, formantComp);
stretch.setFormantBase(formantBase/inWav.sampleRate);
double initSeconds = stopwatch.lap();
initMemory = initMemory.diff();
std::cout << "Setup:\n\t" << initSeconds << "s\n";
if (initMemory.implemented) {
std::cout << "\tallocated " << (initMemory.allocBytes/1000) << "kB, freed " << (initMemory.freeBytes/1000) << "kB\n";
}
signalsmith::MemoryTracker processMemory;
if (exactLength) {
outWav.samples.resize(outputLength*outWav.channels);
stopwatch.start();
processMemory = {};
stretch.exact(inWav, int(inputLength), outWav, int(outputLength));
} else {
// pad the input at the end, since we'll be reading slightly ahead
size_t paddedInputLength = inputLength + stretch.inputLatency();
inWav.samples.resize(paddedInputLength*inWav.channels);
// pad the output at the end, since we have output latency as well
int tailSamples = exactLength ? stretch.outputLatency() : (stretch.outputLatency() + stretch.inputLatency()); // if we don't need exact length, add a bit more output to catch any wobbles past the end
int paddedOutputLength = outputLength + tailSamples;
outWav.samples.resize(paddedOutputLength*outWav.channels);
stopwatch.start();
// The simplest way to deal with input latency (when have access to the audio buffer) is to always be slightly ahead in the input
stretch.seek(inWav, stretch.inputLatency(), 1/time);
inWav.offset += stretch.inputLatency();
// Process it all in one call, although it works just the same if we split into smaller blocks
processMemory = {};
stretch.process(inWav, int(inputLength), outWav, int(outputLength));
// Read the last bit of output without giving it any more input
outWav.offset += outputLength;
stretch.flush(outWav, tailSamples);
outWav.offset -= outputLength;
}
double processSeconds = stopwatch.lap();
double processRate = (inWav.length()/inWav.sampleRate)/processSeconds;
double processPercent = 100/processRate;
processMemory = processMemory.diff();
std::cout << "Process:\n\t" << processSeconds << "s, " << processRate << "x realtime, " << processPercent << "% CPU\n";
if (processMemory.implemented) {
std::cout << "\tallocated " << (processMemory.allocBytes/1000) << "kB, freed " << (processMemory.freeBytes/1000) << "kB\n";
if (processMemory) args.errorExit("allocated during process()");
}
#ifdef PROFILE_PLOT_CHUNKS
signalsmith::plot::Figure figure;
auto &plot = figure(0, 0).plot(400, 150);
plot.x.blank().label("step");
plot.y.major(0, "");
plot.title("computation time");
auto &cumulativePlot = figure(1, 0).plot(150, 150);
cumulativePlot.x.major(processStopwatches.size(), "");
cumulativePlot.y.major(0, "");
cumulativePlot.title("cumulative");
auto &line = plot.line().fillToY(0);
auto &extraLine = plot.line().fillToY(0);
auto &cumulativeLine = cumulativePlot.line();
auto &flatLine = cumulativePlot.line();
double cumulativeTime = 0;
line.add(0, 0);
cumulativeLine.add(0, 0);
for (size_t i = 0; i < processStopwatches.size(); ++i) {
double time = processStopwatches[i].total();
if (i%5 == 0) {
plot.x.tick(i + 0.5, std::to_string(i));
} else {
plot.x.tick(i + 0.5, "");
}
line.add(i, time);
line.add(i + 1, time);
cumulativeTime += time;
cumulativeLine.add(i, cumulativeTime);
cumulativeLine.add(i + 1, cumulativeTime);
}
line.add(processStopwatches.size(), 0);
extraLine.add(0, 0);
extraLine.add(0, processStopwatchStart.total());
extraLine.add(1, processStopwatchStart.total());
extraLine.add(1, 0);
extraLine.add(processStopwatches.size() - 1, 0);
extraLine.add(processStopwatches.size() - 1, processStopwatchEnd.total());
extraLine.add(processStopwatches.size(), processStopwatchEnd.total());
extraLine.add(processStopwatches.size(), 0);
flatLine.add(0, 0);
flatLine.add(processStopwatches.size(), cumulativeTime);
figure.write("profile.svg");
#endif
if (!outWav.write(outputWav).warn()) args.errorExit("failed to write WAV");
if (compareReference && prevWav.result) {
outWav.read(outputWav);
if (prevWav.length() != outWav.length()) args.errorExit("lengths differ");
double diff2 = 0;
for (size_t i = 0; i < prevWav.samples.size(); ++i) {
double diff = prevWav.samples[i] - outWav.samples[i];
diff2 += diff*diff;
}
diff2 /= prevWav.samples.size();
double diffDb = 10*std::log10(diff2);
std::cout << "Reference:\n\tdifference: ";
if (diff2 < 1e-6) {
std::cout << Console::Yellow;
} else if (diff2 < 1e-10) {
std::cout << Console::Green;
} else {
std::cout << Console::Red;
}
std::cout << Console::Bright << diffDb << Console::Reset << " dB\n";
if (diffDb > -60) args.errorExit("too much difference\n");
}
}

View File

@ -2,233 +2,85 @@
#include <iostream>
#define LOG_EXPR(expr) std::cout << #expr << " = " << (expr) << "\n";
#define PROFILE_PLOT_CHUNKS
#ifdef PROFILE_PLOT_CHUNKS
size_t activeStepIndex = 0;
void profileProcessStart(int, int);
void profileProcessEndStep();
void profileProcessStep(size_t, size_t);
void profileProcessEnd();
# define SIGNALSMITH_STRETCH_PROFILE_PROCESS_START profileProcessStart
# define SIGNALSMITH_STRETCH_PROFILE_PROCESS_STEP profileProcessStep
# define SIGNALSMITH_STRETCH_PROFILE_PROCESS_ENDSTEP profileProcessEndStep
# define SIGNALSMITH_STRETCH_PROFILE_PROCESS_END profileProcessEnd
#endif
#include "signalsmith-stretch/signalsmith-stretch.h"
using SignalsmithStretch = signalsmith::stretch::SignalsmithStretch<float>;
#include "./util/stopwatch.h"
#include "./util/memory-tracker.hxx"
#include "./util/simple-args.h"
#include "./util/wav.h"
#ifdef PROFILE_PLOT_CHUNKS
#include "plot/plot.h"
std::vector<signalsmith::Stopwatch> processStopwatches;
signalsmith::Stopwatch processStopwatchStart, processStopwatchEnd;
bool started = false;
bool activeStep = false;
void profileProcessStart(int /*inputSamples*/, int /*outputSamples*/) {
activeStep = false;
started = true;
processStopwatchStart.startLap();
}
void profileProcessEndStep() {
if (activeStep) {
activeStep = false;
processStopwatches[activeStepIndex].lap();
} else if (started) {
started = false;
processStopwatchStart.lap();
}
processStopwatchEnd.startLap();
}
void profileProcessStep(size_t step, size_t count) {
profileProcessEndStep();
activeStep = true;
activeStepIndex = step;
if (processStopwatches.size() < count) {
processStopwatches.resize(count);
}
processStopwatches[step].startLap();
}
void profileProcessEnd() {
processStopwatchEnd.lap();
}
#endif
int main(int argc, char* argv[]) {
signalsmith::stretch::SignalsmithStretch<float/*, std::ranlux48_base*/> stretch; // optional cheaper RNG for performance comparison
#ifdef PROFILE_PLOT_CHUNKS
processStopwatches.reserve(1000);
#endif
SimpleArgs args(argc, argv);
if (args.hasFlag("v", "prints the version")) {
std::cout << stretch.version[0] << "." << stretch.version[1] << "." << stretch.version[2] << "\n";
auto &version = SignalsmithStretch::version;
std::cout << version[0] << "." << version[1] << "." << version[2] << "\n";
return 0;
}
std::string inputWav = args.arg<std::string>("input.wav", "16-bit WAV file");
std::string outputWav = args.arg<std::string>("output.wav", "output WAV file");
double semitones = args.flag<double>("semitones", "pitch-shift amount", 0);
double formants = args.flag<double>("formant", "formant-shift amount (semitones)", 0);
bool formantComp = args.hasFlag("formant-comp", "formant compensation");
double formantBase = args.flag<double>("formant-base", "formant base frequency (Hz, 0=auto)", 0);
double formantBase = args.flag<double>("formant-base", "formant base frequency (Hz, 0=auto)", 100);
double tonality = args.flag<double>("tonality", "tonality limit (Hz)", 8000);
double time = args.flag<double>("time", "time-stretch factor", 1);
bool exactLength = args.hasFlag("exact", "trims the start/end so the output has the correct length");
bool splitComputation = args.hasFlag("split-computation", "distributes the computation more evenly (but higher latency)");
args.errorExit();
args.errorExit(); // exits on error, or with `--help`
std::cout << Console::Bright << inputWav << Console::Reset;
std::cout << " -> ";
std::cout << Console::Bright << outputWav << Console::Reset << "\n";
std::cout << "\tsemitones: " << semitones << "\n\t time: " << time << "x" << (exactLength ? " (exact)" : "") << "\n\t tonality: " << tonality << "Hz\n";
std::cout << inputWav << " -> " << outputWav << "\n";
Wav inWav;
std::cout << inputWav << " -> " << outputWav << "\n";
if (!inWav.read(inputWav).warn()) args.errorExit("failed to read WAV");
size_t inputLength = inWav.samples.size()/inWav.channels;
Wav prevWav; // Used during development, to compare against known-good previous render
bool compareReference = (time <= 1.6);
if (compareReference && !prevWav.read(outputWav + "-reference.wav")) {
if (prevWav.read(outputWav)) {
prevWav.write(outputWav + "-reference.wav");
}
}
size_t inputLength = inWav.length();
size_t outputLength = std::round(inputLength*time);
Wav outWav;
outWav.channels = inWav.channels;
outWav.sampleRate = inWav.sampleRate;
int outputLength = std::round(inputLength*time);
outWav.resize(outputLength);
signalsmith::MemoryTracker initMemory;
signalsmith::Stopwatch stopwatch;
stopwatch.start();
SignalsmithStretch stretch;
stretch.presetDefault(int(inWav.channels), inWav.sampleRate, splitComputation);
stretch.setTransposeSemitones(semitones, tonality/inWav.sampleRate);
stretch.setFormantSemitones(formants, formantComp);
stretch.setFormantBase(formantBase/inWav.sampleRate);
double initSeconds = stopwatch.lap();
initMemory = initMemory.diff();
std::cout << "Setup:\n\t" << initSeconds << "s\n";
if (initMemory.implemented) {
std::cout << "\tallocated " << (initMemory.allocBytes/1000) << "kB, freed " << (initMemory.freeBytes/1000) << "kB\n";
}
/* Since the WAV helper allows sample access like `wav[c][index]`, we could just call:
signalsmith::MemoryTracker processMemory;
if (exactLength) {
outWav.samples.resize(outputLength*outWav.channels);
stopwatch.start();
processMemory = {};
stretch.exact(inWav, int(inputLength), outWav, int(outputLength));
} else {
// pad the input at the end, since we'll be reading slightly ahead
size_t paddedInputLength = inputLength + stretch.inputLatency();
inWav.samples.resize(paddedInputLength*inWav.channels);
// pad the output at the end, since we have output latency as well
int tailSamples = exactLength ? stretch.outputLatency() : (stretch.outputLatency() + stretch.inputLatency()); // if we don't need exact length, add a bit more output to catch any wobbles past the end
int paddedOutputLength = outputLength + tailSamples;
outWav.samples.resize(paddedOutputLength*outWav.channels);
stopwatch.start();
// The simplest way to deal with input latency (when have access to the audio buffer) is to always be slightly ahead in the input
stretch.seek(inWav, stretch.inputLatency(), 1/time);
inWav.offset += stretch.inputLatency();
// Process it all in one call, although it works just the same if we split into smaller blocks
processMemory = {};
stretch.process(inWav, int(inputLength), outWav, int(outputLength));
// Read the last bit of output without giving it any more input
outWav.offset += outputLength;
stretch.flush(outWav, tailSamples);
outWav.offset -= outputLength;
}
However, we'll do it in separate stages to show more of the API. */
double processSeconds = stopwatch.lap();
double processRate = (inWav.length()/inWav.sampleRate)/processSeconds;
double processPercent = 100/processRate;
processMemory = processMemory.diff();
std::cout << "Process:\n\t" << processSeconds << "s, " << processRate << "x realtime, " << processPercent << "% CPU\n";
if (processMemory.implemented) {
std::cout << "\tallocated " << (processMemory.allocBytes/1000) << "kB, freed " << (processMemory.freeBytes/1000) << "kB\n";
if (processMemory) args.errorExit("allocated during process()");
}
// First, an "output seek", where we provide a chunk of input.
// This is suitable for starting playback of a sample at a given playback rate.
auto seekLength = stretch.outputSeekLength(1/time);
stretch.outputSeek(inWav, seekLength);
// At this point, the next output samples we get will correspond to the beginning of the audio file.
#ifdef PROFILE_PLOT_CHUNKS
signalsmith::plot::Figure figure;
auto &plot = figure(0, 0).plot(400, 150);
plot.x.blank().label("step");
plot.y.major(0, "");
plot.title("computation time");
auto &cumulativePlot = figure(1, 0).plot(150, 150);
cumulativePlot.x.major(processStopwatches.size(), "");
cumulativePlot.y.major(0, "");
cumulativePlot.title("cumulative");
auto &line = plot.line().fillToY(0);
auto &extraLine = plot.line().fillToY(0);
auto &cumulativeLine = cumulativePlot.line();
auto &flatLine = cumulativePlot.line();
double cumulativeTime = 0;
line.add(0, 0);
cumulativeLine.add(0, 0);
for (size_t i = 0; i < processStopwatches.size(); ++i) {
double time = processStopwatches[i].total();
if (i%5 == 0) {
plot.x.tick(i + 0.5, std::to_string(i));
} else {
plot.x.tick(i + 0.5, "");
}
line.add(i, time);
line.add(i + 1, time);
// We're going to process until *just* before the end of the audio file (so we can get a tidier end using `.flush()`.
int outputIndex = outputLength - stretch.intervalSamples();
cumulativeTime += time;
cumulativeLine.add(i, cumulativeTime);
cumulativeLine.add(i + 1, cumulativeTime);
}
line.add(processStopwatches.size(), 0);
extraLine.add(0, 0);
extraLine.add(0, processStopwatchStart.total());
extraLine.add(1, processStopwatchStart.total());
extraLine.add(1, 0);
extraLine.add(processStopwatches.size() - 1, 0);
extraLine.add(processStopwatches.size() - 1, processStopwatchEnd.total());
extraLine.add(processStopwatches.size(), processStopwatchEnd.total());
extraLine.add(processStopwatches.size(), 0);
flatLine.add(0, 0);
flatLine.add(processStopwatches.size(), cumulativeTime);
figure.write("profile.svg");
#endif
// Stretch's internal output position is slightly ahead of the output samples we get
int outputPos = outputIndex + stretch.outputLatency();
// Time-map: where do we want the input position to be at that moment?
int inputPos = std::round(outputPos/time);
// And therefore which input samples do we need to be supplying?
int inputIndex = inputPos + stretch.inputLatency();
// In this particular case, our `inputPos` will be at the end of the file
// and `inputIndex` will be beyond the end, so we pad with 0s to have enough input
inWav.resize(inputIndex);
// OK, go for it
inWav.offset = seekLength;
stretch.process(inWav, inputIndex - seekLength, outWav, outputIndex);
// And as promised, get the last bits using `.flush()`, which does some extra stuff to avoid introducing clicks.
outWav.offset = outputIndex;
stretch.flush(outWav, outputLength - outputIndex);
outWav.offset = 0;
if (!outWav.write(outputWav).warn()) args.errorExit("failed to write WAV");
if (compareReference && prevWav.result) {
outWav.read(outputWav);
if (prevWav.length() != outWav.length()) args.errorExit("lengths differ");
double diff2 = 0;
for (size_t i = 0; i < prevWav.samples.size(); ++i) {
double diff = prevWav.samples[i] - outWav.samples[i];
diff2 += diff*diff;
}
diff2 /= prevWav.samples.size();
double diffDb = 10*std::log10(diff2);
std::cout << "Reference:\n\tdifference: ";
if (diff2 < 1e-6) {
std::cout << Console::Yellow;
} else if (diff2 < 1e-10) {
std::cout << Console::Green;
} else {
std::cout << Console::Red;
}
std::cout << Console::Bright << diffDb << Console::Reset << " dB\n";
if (diffDb > -60) args.errorExit("too much difference\n");
}
}

1
cmd/util Submodule

@ -0,0 +1 @@
Subproject commit aeb4e31077a453566b58fff1c7e7e998ac824157

View File

@ -1,41 +0,0 @@
#pragma once
#ifndef _CONSOLE_COLOURS_H
#define _CONSOLE_COLOURS_H
#include <string>
namespace Console {
std::string Reset = "\x1b[0m";
std::string Bright = "\x1b[1m";
std::string Dim = "\x1b[2m";
std::string Underscore = "\x1b[4m";
std::string Blink = "\x1b[5m";
std::string Reverse = "\x1b[7m";
std::string Hidden = "\x1b[8m";
namespace Foreground {
std::string Black = "\x1b[30m";
std::string Red = "\x1b[31m";
std::string Green = "\x1b[32m";
std::string Yellow = "\x1b[33m";
std::string Blue = "\x1b[34m";
std::string Magenta = "\x1b[35m";
std::string Cyan = "\x1b[36m";
std::string White = "\x1b[37m";
}
namespace Background {
std::string Black = "\x1b[40m";
std::string Red = "\x1b[41m";
std::string Green = "\x1b[42m";
std::string Yellow = "\x1b[43m";
std::string Blue = "\x1b[44m";
std::string Magenta = "\x1b[45m";
std::string Cyan = "\x1b[46m";
std::string White = "\x1b[47m";
}
using namespace Foreground;
}
#endif

View File

@ -1,29 +0,0 @@
/* Currently only working/tested on Mac. You need to compile in `memory-tracker.cpp` as well, which does the actual stuff */
#ifndef SIGNALSMITH_UTIL_MEMORY_TRACKER_H
#define SIGNALSMITH_UTIL_MEMORY_TRACKER_H
#include <cstddef>
namespace signalsmith {
struct MemoryTracker {
static const bool implemented; // Whether the implementation actually tracks memory or not
size_t allocBytes, freeBytes, currentBytes;
MemoryTracker();
MemoryTracker diff() const {
MemoryTracker now;
return {now.allocBytes - allocBytes, now.freeBytes - freeBytes};
}
// Is a `.diff()` result non-zero
operator bool() const {
return allocBytes > 0 || freeBytes > 0;
}
private:
MemoryTracker(size_t allocBytes, size_t freeBytes) : allocBytes(allocBytes), freeBytes(freeBytes), currentBytes(allocBytes - freeBytes) {}
};
} // namespace
#endif // include guard

View File

@ -1,118 +0,0 @@
#include "./memory-tracker.h"
#if !defined(__has_include) || !__has_include(<dlfcn.h>)
// Fallback if we don't have <dlfcn.h>, which we use to get the existing methods
signalsmith::MemoryTracker::MemoryTracker() : signalsmith::MemoryTracker::MemoryTracker(0, 0) {}
const bool signalsmith::MemoryTracker::implemented = false;
#else
const bool signalsmith::MemoryTracker::implemented = true;
#include <cstdlib>
#include <cstddef>
#include <dlfcn.h>
#include <cassert>
#include <utility>
namespace signalsmith {
namespace memory_tracker {
static size_t memoryTrackerAllocCounter = 0;
static size_t memoryTrackerFreeCounter = 0;
static void * (*originalCalloc)(size_t, size_t) = nullptr;
static void * (*originalMalloc)(size_t) = nullptr;
static void * (*originalRealloc)(void*, size_t) = nullptr;
static void (*originalFree)(void*) = nullptr;
template<class Fn>
static void cacheOriginal(Fn& fn, const char *symbolName) {
if (!fn) {
fn = (Fn)dlsym(RTLD_NEXT, symbolName);
if (!fn) exit(1);
}
}
template<class Fn, typename ...Args>
auto callOriginal(Fn& fn, const char *symbolName, Args &&...args)
-> decltype(fn(std::forward<Args>(args)...)) {
cacheOriginal(fn, symbolName);
return fn(std::forward<Args>(args)...);
}
static constexpr size_t extraInfoBytes = sizeof(std::max_align_t)*2;
void * storeAllocInfo(void *offsetPointer, void *originalPointer, size_t size) {
if (!originalPointer) return nullptr;
memoryTrackerAllocCounter += size;
assert(!((size_t(offsetPointer))%sizeof(size_t))); // make sure it's aligned to size_t
size_t *sizePtr = (size_t *)offsetPointer;
sizePtr[-1] = size_t(originalPointer);
sizePtr[-2] = size;
return offsetPointer;
}
size_t getAllocSize(void *ptr) {
assert(!(size_t(ptr)%sizeof(size_t)));
size_t *sizePtr = (size_t *)ptr;
return sizePtr[-2];
}
void * getAllocPointer(void *ptr) {
assert(!(size_t(ptr)%sizeof(size_t)));
size_t *sizePtr = (size_t *)ptr;
return (void *)sizePtr[-1];
}
}} // namespaces
extern "C" {
void * malloc(size_t size) {
void *ptr = signalsmith::memory_tracker::callOriginal(signalsmith::memory_tracker::originalMalloc, "malloc", size + signalsmith::memory_tracker::extraInfoBytes);
return signalsmith::memory_tracker::storeAllocInfo((unsigned char *)ptr + signalsmith::memory_tracker::extraInfoBytes, ptr, size);
}
void * calloc(size_t size, size_t count) {
size_t extraCount = (signalsmith::memory_tracker::extraInfoBytes + size - 1)/size; // enough extra items to store what we need
void *ptr = signalsmith::memory_tracker::callOriginal(signalsmith::memory_tracker::originalCalloc, "calloc", size, count + extraCount);
return signalsmith::memory_tracker::storeAllocInfo((unsigned char *)ptr + size*extraCount, ptr, size*count);
}
void * realloc(void *ptr, size_t size) {
void *originalPtr = signalsmith::memory_tracker::getAllocPointer(ptr);
auto pointerOffset = (unsigned char *)ptr - (unsigned char *)originalPtr;
size_t originalSize = signalsmith::memory_tracker::getAllocSize(ptr);
signalsmith::memory_tracker::memoryTrackerFreeCounter += originalSize;
ptr = signalsmith::memory_tracker::callOriginal(signalsmith::memory_tracker::originalRealloc, "realloc", originalPtr, size + pointerOffset);
return signalsmith::memory_tracker::storeAllocInfo((unsigned char *)ptr + pointerOffset, ptr, size);
}
void free(void *ptr) {
void *originalPtr = signalsmith::memory_tracker::getAllocPointer(ptr);
size_t originalSize = signalsmith::memory_tracker::getAllocSize(ptr);
signalsmith::memory_tracker::memoryTrackerFreeCounter += originalSize;
signalsmith::memory_tracker::callOriginal(signalsmith::memory_tracker::originalFree, "free", originalPtr);
}
}
#include <new>
void * operator new(size_t size) {
return malloc(size);
}
void * operator new[](size_t size) {
return malloc(size);
}
void operator delete(void *ptr) noexcept {
free(ptr);
}
void operator delete[](void *ptr) noexcept {
free(ptr);
}
signalsmith::MemoryTracker::MemoryTracker() : signalsmith::MemoryTracker::MemoryTracker(signalsmith::memory_tracker::memoryTrackerAllocCounter, signalsmith::memory_tracker::memoryTrackerFreeCounter) {}
#endif // check for <dlfcn.h>

View File

@ -1,322 +0,0 @@
#include <iostream>
#include <string>
#include <cstring>
#include <vector>
#include <map>
#include <set>
#include <cstdlib> // exit() and codes
#include "console-colours.h"
/** Expected use:
SimpleArgs args(argc, argv);
// positional argument
std::string foo = args.arg<std::string>("foo");
// optional argument
std::string bar = args.arg<std::string>("bar", "a string for Bar", "default");
// --flag=value
double = args.flag<double>("baz", "an optional flag", 5);
// Exits if "foo" not supplied
args.errorExit();
If you have multiple commands, each with their own options:
// Switches based on a command
if (args.command("bink", "Bink description")) {
// collect arguments for the command
}
// Exits with a help message (and list of commands) if no command matched
args.errorCommand();
By default, a flag of "-h" (or a command of "help", if any commands are used) prints a help message. To override:
SimpleArgs args(argc, argv);
args.helpFlag("h");
args.helpCommand("help");
**/
class SimpleArgs {
int argc;
const char* const* argv;
template<typename T>
T valueFromString(const char *arg);
std::string parsedCommand;
struct Keywords {
std::string keyword;
std::string description;
bool isHelp;
};
std::vector<Keywords> keywordOptions;
std::vector<Keywords> argDetails;
std::vector<Keywords> flagOptions;
std::set<std::string> flagSet;
void clearKeywords() {
keywordOptions.resize(0);
flagSet.clear();
flagOptions.clear();
}
bool helpMode = false;
bool checkedHelpCommand = false;
bool hasError = false;
std::string errorMessage;
void setError(std::string message) {
if (!hasError) {
hasError = true;
errorMessage = message;
}
}
std::map<std::string, std::string> flagMap;
void consumeFlags() {
while (index < argc && std::strlen(argv[index]) > 1 && argv[index][0] == '-') {
const char* arg = argv[index++];
size_t length = strlen(arg);
size_t keyStart = 1, keyEnd = keyStart + 1;
size_t valueStart = keyEnd;
// If it's "--long-arg" format
if (length > 1 && arg[1] == '-') {
keyStart++;
while (keyEnd < length && arg[keyEnd] != '=') {
keyEnd++;
}
valueStart = keyEnd;
if (keyEnd < length) valueStart++;
}
std::string key = std::string(arg + keyStart, keyEnd - keyStart);
std::string value = std::string(arg + valueStart);
if (key == "help") {
helpMode = true;
}
flagMap[key] = value;
}
}
int index = 1;
public:
SimpleArgs(int argc, const char* const argv[]) : argc(argc), argv(argv) {
std::string cmd = argv[0];
size_t slashPos = cmd.find_last_of("\\/");
if (slashPos != std::string::npos) cmd = cmd.substr(slashPos + 1);
parsedCommand = cmd;
}
void help(std::ostream& out=std::cerr) const {
std::string parsedCommand = this->parsedCommand;
if (keywordOptions.size() > 0) {
parsedCommand += std::string(" <command>");
}
out << "Usage:\n\t" << parsedCommand << "\n\n";
if (keywordOptions.size() > 0) {
out << "Commands:\n";
for (unsigned int i = 0; i < keywordOptions.size(); i++) {
out << "\t" << keywordOptions[i].keyword;
if (keywordOptions[i].isHelp) out << " [command...]";
if (keywordOptions[i].description.size()) out << " - " << keywordOptions[i].description;
out << "\n";
}
out << "\n";
}
if (argDetails.size() > 0) {
out << "Arguments:\n";
for (Keywords const &arg : argDetails) {
out << "\t" << arg.keyword;
if (arg.description.size()) out << " - " << arg.description;
out << "\n";
}
out << "\n";
}
if (flagOptions.size() > 0) {
out << "Options: " << Console::Dim << "(--arg=value)" << Console::Reset << "\n";
for (Keywords const &pair : flagOptions) {
out << "\t" << (pair.keyword.length() > 1 ? "--" : "-") << pair.keyword;
if (pair.description.size()) out << " - " << pair.description;
out << "\n";
}
out << "\n";
}
}
bool isHelp() const {
return helpMode;
}
bool finished() const {
return index >= argc;
}
std::string peek() const {
return (index >= argc) ? "" : argv[index];
}
int errorExit(std::ostream& out=std::cerr) const {
if (hasError || helpMode) {
help(out);
if (!helpMode) {
out << Console::Red << errorMessage << Console::Reset << "\n";
}
std::exit((!helpMode && hasError) ? EXIT_FAILURE : EXIT_SUCCESS);
}
return 0;
}
int errorExit(std::string forcedError, std::ostream& out=std::cerr) const {
if (hasError) return errorExit(out); // Argument errors take priority
out << Console::Red << forcedError << Console::Reset << "\n";
std::exit(EXIT_FAILURE);
return 0;
}
int errorCommand(std::string message="", std::ostream& out=std::cerr) const {
if (keywordOptions.size()) {
// We expected a command, but didn't match on any
if (helpMode) return errorExit(out);
if (index >= argc) help(out);
if (message.length() == 0) {
message = (index < argc) ? std::string("Unknown command: ") + argv[index] : "Missing command";
}
errorExit(message, out);
}
return 0;
}
template<typename T=std::string>
T arg(std::string name, std::string longName, T defaultValue) {
consumeFlags();
if (index < argc) clearKeywords();
parsedCommand += std::string(" [") + name + "]";
argDetails.push_back(Keywords{name, longName, false});
if (index >= argc) return defaultValue;
return valueFromString<T>(argv[index++]);
}
template<typename T=std::string>
T arg(std::string name, std::string longName="") {
consumeFlags();
if (index < argc) clearKeywords();
parsedCommand += std::string(" <") + name + ">";
argDetails.push_back(Keywords{name, longName, false});
if (index >= argc) {
if (longName.length() > 0) {
setError("Missing " + longName + " <" + name + ">");
} else {
setError("Missing argument <" + name + ">");
}
return T();
}
return valueFromString<T>(argv[index++]);
}
bool command(std::string keyword, std::string description="", bool isHelp=false) {
consumeFlags();
if (index == 1) {
helpCommand();
}
if (index < argc && !keyword.compare(argv[index])) {
clearKeywords();
index++;
if (!isHelp) parsedCommand += std::string(" ") + keyword;
return true;
}
keywordOptions.push_back(Keywords{keyword, description, isHelp});
return false;
}
bool helpCommand(std::string keyword="help") {
if (!checkedHelpCommand && index == 1) {
keywordOptions.push_back(Keywords{keyword, "", true});
if (index < argc && !keyword.compare(argv[index])) {
index++;
helpMode = true;
}
}
checkedHelpCommand = true;
return helpMode;
}
template<typename T=std::string>
T flag(std::string key, std::string description, T defaultValue) {
consumeFlags();
if (!hasFlag(key, description)) return defaultValue;
auto iterator = flagMap.find(key);
return valueFromString<T>(iterator->second.c_str());
}
template<typename T=std::string>
T flag(std::string key, T defaultValue) {
consumeFlags();
if (!hasFlag(key, "")) return defaultValue;
auto iterator = flagMap.find(key);
return valueFromString<T>(iterator->second.c_str());
}
template<typename T=std::string>
T flag(std::string key) {
return flag<T>(key, T());
}
bool hasFlag(std::string key, std::string description="") {
consumeFlags();
auto iterator = flagSet.find(key);
if (iterator == flagSet.end()) {
flagSet.insert(key);
flagOptions.push_back(Keywords{key, description, false});
} else if (description.length() > 0) {
bool found = false;
for (auto &option : flagOptions) {
if (option.keyword == key) {
option.description = description;
found = true;
break;
}
}
if (!found) {
flagOptions.push_back(Keywords{key, description, false});
}
}
auto mapIterator = flagMap.find(key);
return mapIterator != flagMap.end();
}
bool helpFlag(std::string key, std::string description="shows this help") {
consumeFlags();
hasFlag(key, description);
auto iterator = flagMap.find(key);
helpMode = (iterator != flagMap.end());
return helpMode;
}
};
template<>
std::string SimpleArgs::valueFromString(const char *arg) {
return arg;
}
template<>
const char * SimpleArgs::valueFromString(const char *arg) {
return arg;
}
template<>
int SimpleArgs::valueFromString(const char *arg) {
return std::stoi(arg);
}
template<>
long SimpleArgs::valueFromString(const char *arg) {
return std::stol(arg);
}
template<>
unsigned long SimpleArgs::valueFromString(const char *arg) {
return std::stoul(arg);
}
template<>
float SimpleArgs::valueFromString(const char *arg) {
return std::stof(arg);
}
template<>
double SimpleArgs::valueFromString(const char *arg) {
return std::stod(arg);
}

View File

@ -1,34 +0,0 @@
#pragma once
#if defined(__SSE__) || defined(_M_X64)
class StopDenormals {
unsigned int controlStatusRegister;
public:
StopDenormals() : controlStatusRegister(_mm_getcsr()) {
_mm_setcsr(controlStatusRegister|0x8040); // Flush-to-Zero and Denormals-Are-Zero
}
~StopDenormals() {
_mm_setcsr(controlStatusRegister);
}
};
#elif (defined (__ARM_NEON) || defined (__ARM_NEON__))
class StopDenormals {
uintptr_t status;
public:
StopDenormals() {
uintptr_t asmStatus;
asm volatile("mrs %0, fpcr" : "=r"(asmStatus));
status = asmStatus = asmStatus|0x01000000U; // Flush to Zero
asm volatile("msr fpcr, %0" : : "ri"(asmStatus));
}
~StopDenormals() {
uintptr_t asmStatus = status;
asm volatile("msr fpcr, %0" : : "ri"(asmStatus));
}
};
#else
# if __cplusplus >= 202302L
# warning "The `StopDenormals` class doesn't do anything for this architecture"
# endif
class StopDenormals {}; // FIXME: add for other architectures
#endif

View File

@ -1,107 +0,0 @@
#ifndef SIGNALSMITH_STOPWATCH_UTIL_H
#define SIGNALSMITH_STOPWATCH_UTIL_H
#include <limits>
#include <cmath>
#include <atomic>
#include <algorithm>
#ifdef WINDOWS // completely untested!
# include <windows.h>
namespace signalsmith {
class Stopwatch {
using Time = __int64;
using Duration = Time;
inline Time now() {
LARGE_INTEGER result;
QueryPerformanceCounter(&result);
return result.QuadPart;
}
static double toSeconds(Duration t) {
LARGE_INTEGER freq;
QueryPerformanceFrequency(&freq);
return t/double(freq);
}
#else
# include <chrono>
namespace signalsmith {
class Stopwatch {
using Clock = std::conditional<std::chrono::high_resolution_clock::is_steady, std::chrono::high_resolution_clock, std::chrono::steady_clock>::type;
using Time = Clock::time_point;
using Duration = std::chrono::duration<double>;
inline Time now() {
return Clock::now();
}
static double toSeconds(Duration duration) {
return duration.count();
}
#endif
std::atomic<Time> lapStart; // the atomic store/load should act as barriers for reordering operations
double lapBest, lapTotal, lapTotal2;
double lapOverhead = 0;
int lapCount = 0;
public:
Stopwatch(bool compensate=true) {
if (compensate) {
start();
const int repeats = 1000;
for (int i = 0; i < repeats; ++i) {
startLap();
lap();
}
lapOverhead = (double)lapTotal/lapCount;
}
start();
}
// Explicit because std::atomic<> can't be copied/moved
Stopwatch(const Stopwatch &other) : lapBest(other.lapBest), lapTotal(other.lapTotal), lapTotal2(other.lapTotal2), lapOverhead(other.lapOverhead), lapCount(other.lapCount) {
lapStart.store(other.lapStart.load());
}
void start() {
lapCount = 0;
lapTotal = lapTotal2 = 0;
lapBest = std::numeric_limits<double>::max();
startLap();
}
void startLap() {
lapStart.store(now());
}
double lap() {
double diff = toSeconds(now() - lapStart.load());
if (diff < lapBest) lapBest = diff;
lapCount++;
lapTotal += diff;
lapTotal2 += diff*diff;
startLap();
return diff;
}
double total() const {
return std::max(0.0, lapTotal - lapCount*lapOverhead);
}
double mean() const {
return total()/lapCount;
}
double var() const {
double m = (double)lapTotal/lapCount, m2 = (double)lapTotal2/lapCount;
return std::max(0.0, m2 - m*m);
}
double std() const {
return sqrt(var());
}
double best() const {
return std::max(0.0, lapBest - lapOverhead);
}
double optimistic(double deviations=1) const {
return std::max(best(), mean() - std()*deviations);
}
};
} //namespace
#endif // include guard

View File

@ -1,255 +0,0 @@
#ifndef RIFF_WAVE_H_
#define RIFF_WAVE_H_
#include <vector>
#include <iostream>
#include <fstream>
// TODO: something better here that doesn't assume little-endian architecture
template<bool littleEndian=true>
struct BigEndian {
static uint32_t read16(std::istream& in) {
unsigned char a[2];
in.read((char*)a, sizeof(a));
return ((uint32_t)a[0]) + ((uint32_t)a[1])*256;
}
static uint32_t read32(std::istream& in) {
unsigned char a[4];
in.read((char*)a, sizeof(a));
return ((uint32_t)a[0]&0xff) + ((uint32_t)a[1])*256 + ((uint32_t)a[2])*65536 + ((uint32_t)a[3])*256*65536;
}
static void write16(std::ostream& out, uint16_t value) {
char a[2] = {(char)(value>>0), (char)(value>>8)};
out.write(a, sizeof(a));
}
static void write32(std::ostream& out, uint32_t value) {
char a[4] = {(char)(value>>0), (char)(value>>8), (char)(value>>16), (char)(value>>24)};
out.write(a, sizeof(a));
}
};
class Wav : BigEndian<true> {
// Little-endian versions of text values
uint32_t value_RIFF = 0x46464952;
uint32_t value_WAVE = 0x45564157;
uint32_t value_fmt = 0x20746d66;
uint32_t value_data = 0x61746164;
using BigEndian<true>::read16;
using BigEndian<true>::read32;
using BigEndian<true>::write16;
using BigEndian<true>::write32;
public:
struct Result {
enum class Code {
OK = 0,
IO_ERROR,
FORMAT_ERROR,
UNSUPPORTED,
WEIRD_CONFIG
};
Code code = Code::OK;
std::string reason;
Result(Code code, std::string reason="") : code(code), reason(reason) {};
// Used to neatly test for success
explicit operator bool () const {
return code == Code::OK;
};
const Result & warn(std::ostream& output=std::cerr) const {
if (!(bool)*this) {
output << "WAV error: " << reason << std::endl;
}
return *this;
}
};
size_t sampleRate = 48000;
size_t channels = 1, offset = 0;
std::vector<double> samples;
size_t length() const {
size_t perChannel = samples.size()/channels;
return (perChannel >= offset) ? perChannel - offset : 0;
}
void resize(size_t length) {
samples.resize((offset + length)*channels, 0);
}
template<bool isConst>
class ChannelReader {
using CSample = typename std::conditional<isConst, const double, double>::type;
CSample *data;
size_t stride;
public:
ChannelReader(CSample *samples, size_t channels) : data(samples), stride(channels) {}
CSample & operator [](size_t i) {
return data[i*stride];
}
};
ChannelReader<false> operator [](size_t c) {
return ChannelReader<false>(samples.data() + offset*channels + c, channels);
}
ChannelReader<true> operator [](size_t c) const {
return ChannelReader<true>(samples.data() + offset*channels + c, channels);
}
Result result = Result(Result::Code::OK);
Wav() {}
Wav(double sampleRate, size_t channels) : sampleRate(sampleRate), channels(channels) {}
Wav(double sampleRate, size_t channels, const std::vector<double> &samples) : sampleRate(sampleRate), channels(channels), samples(samples) {}
Wav(std::string filename) {
result = read(filename).warn();
}
enum class Format {
PCM=1
};
bool formatIsValid(uint16_t format, uint16_t bits) const {
if (format == (uint16_t)Format::PCM) {
if (bits == 16) {
return true;
}
}
return false;
}
Result read(std::string filename) {
std::ifstream file;
file.open(filename, std::ios::binary);
if (!file.is_open()) return result = Result(Result::Code::IO_ERROR, "Failed to open file: " + filename);
// RIFF chunk
if (read32(file) != value_RIFF) return result = Result(Result::Code::FORMAT_ERROR, "Input is not a RIFF file");
read32(file); // File length - we don't check this
if (read32(file) != value_WAVE) return result = Result(Result::Code::FORMAT_ERROR, "Input is not a plain WAVE file");
auto blockStart = file.tellg(); // start of the blocks - we will seek back to here periodically
bool hasFormat = false, hasData = false;
Format format = Format::PCM; // Shouldn't matter, we should always read the `fmt ` chunk before `data`
while (!file.eof()) {
auto blockType = read32(file), blockLength = read32(file);
if (file.eof()) break;
if (!hasFormat && blockType == value_fmt) {
auto formatInt = read16(file);
format = (Format)formatInt;
channels = read16(file);
if (channels < 1) return result = Result(Result::Code::FORMAT_ERROR, "Cannot have zero channels");
sampleRate = read32(file);
if (sampleRate < 1) return result = Result(Result::Code::FORMAT_ERROR, "Cannot have zero sampleRate");
size_t expectedBytesPerSecond = read32(file);
size_t bytesPerFrame = read16(file);
size_t bitsPerSample = read16(file);
if (!formatIsValid(formatInt, bitsPerSample)) return result = Result(Result::Code::UNSUPPORTED, "Unsupported format:bits: " + std::to_string(formatInt) + ":" + std::to_string(bitsPerSample));
// Since it's plain WAVE, we can do some extra checks for consistency
if (bitsPerSample*channels != bytesPerFrame*8) return result = Result(Result::Code::FORMAT_ERROR, "Format sizes don't add up");
if (expectedBytesPerSecond != sampleRate*bytesPerFrame) return result = Result(Result::Code::FORMAT_ERROR, "Format sizes don't add up");
hasFormat = true;
file.clear();
file.seekg(blockStart);
} else if (hasFormat && blockType == value_data) {
std::vector<double> samples(0);
switch (format) {
case Format::PCM:
samples.reserve(blockLength/2);
for (size_t i = 0; i < blockLength/2; ++i) {
uint16_t value = read16(file);
if (file.eof()) break;
if (value >= 32768) {
samples.push_back(((double)value - 65536)/32768);
} else {
samples.push_back((double)value/32768);
}
}
}
while (samples.size()%channels != 0) {
samples.push_back(0);
}
this->samples = samples;
offset = 0;
hasData = true;
} else {
// We either don't recognise
file.ignore(blockLength);
}
}
if (!hasFormat) return result = Result(Result::Code::FORMAT_ERROR, "missing `fmt ` block");
if (!hasData) return result = Result(Result::Code::FORMAT_ERROR, "missing `data` block");
return result = Result(Result::Code::OK);
}
Result write(std::string filename, Format format=Format::PCM) {
if (channels == 0 || channels > 65535) return result = Result(Result::Code::WEIRD_CONFIG, "Invalid channel count");
if (sampleRate <= 0 || sampleRate > 0xFFFFFFFFu) return result = Result(Result::Code::WEIRD_CONFIG, "Invalid sample rate");
std::ofstream file;
file.open(filename, std::ios::binary);
if (!file.is_open()) return result = Result(Result::Code::IO_ERROR, "Failed to open file: " + filename);
size_t bytesPerSample;
switch (format) {
case Format::PCM:
bytesPerSample = 2;
break;
}
// File size - 44 bytes is RIFF header, "fmt" block, and "data" block header
size_t dataLength = (samples.size() - offset*channels)*bytesPerSample;
size_t fileLength = 44 + dataLength;
// RIFF chunk
write32(file, value_RIFF);
write32(file, uint32_t(fileLength - 8)); // File length, excluding the RIFF header
write32(file, value_WAVE);
// "fmt " block
write32(file, value_fmt);
write32(file, 16); // block length
write16(file, uint16_t(format));
write16(file, uint16_t(channels));
write32(file, uint32_t(sampleRate));
size_t expectedBytesPerSecond = sampleRate*channels*bytesPerSample;
write32(file, uint32_t(expectedBytesPerSecond));
write16(file, uint16_t(channels*bytesPerSample)); // Bytes per frame
write16(file, uint16_t(bytesPerSample*8)); // bist per sample
// "data" block
write32(file, value_data);
write32(file, uint32_t(dataLength));
switch (format) {
case Format::PCM:
for (size_t i = offset*channels; i < samples.size(); i++) {
double value = samples[i]*32768;
if (value > 32767) value = 32767;
if (value <= -32768) value = -32768;
if (value < 0) value += 65536;
write16(file, (uint16_t)value);
}
break;
}
return result = Result(Result::Code::OK);
}
void makeMono() {
std::vector<double> newSamples(samples.size()/channels, 0);
for (size_t channel = 0; channel < channels; ++channel) {
for (size_t i = 0; i < newSamples.size(); ++i) {
newSamples[i] += samples[i*channels + channel];
}
}
for (size_t i = 0; i < newSamples.size(); ++i) {
newSamples[i] /= channels;
}
channels = 1;
samples = newSamples;
}
};
#endif // RIFF_WAVE_H_

View File

@ -33,17 +33,12 @@ namespace _impl {
template<typename Sample=float, class RandomEngine=void>
struct SignalsmithStretch {
static constexpr size_t version[3] = {1, 3, 1};
static constexpr size_t version[3] = {1, 3, 2};
SignalsmithStretch() : randomEngine(std::random_device{}()) {}
SignalsmithStretch(long seed) : randomEngine(seed) {}
int blockSamples() const {
return int(stft.blockSamples());
}
int intervalSamples() const {
return int(stft.defaultInterval());
}
// The difference between the internal position (centre of a block) and the input samples you're supplying
int inputLatency() const {
return int(stft.analysisLatency());
}
@ -81,7 +76,6 @@ struct SignalsmithStretch {
stft.reset(0.1);
stashedInput = stft.input;
stashedOutput = stft.output;
tmpBuffer.resize(blockSamples + intervalSamples);
bands = int(stft.bands());
channelBands.assign(bands*channels, Band());
@ -94,6 +88,19 @@ struct SignalsmithStretch {
blockProcess = {};
formantMetric.resize(bands + 2);
tmpProcessBuffer.resize(blockSamples + intervalSamples);
tmpPreRollBuffer.resize(outputLatency()*channels);
}
// For querying the existing config
int blockSamples() const {
return int(stft.blockSamples());
}
int intervalSamples() const {
return int(stft.defaultInterval());
}
bool splitComputation() const {
return _splitComputation;
}
/// Frequency multiplier, and optional tonality limit (as multiple of sample-rate)
@ -127,14 +134,15 @@ struct SignalsmithStretch {
formantBaseFreq = baseFreq;
}
// Provide previous input ("pre-roll"), without affecting the speed calculation. You should ideally feed it one block-length + one interval
// Provide previous input ("pre-roll") to smoothly change the input location without interrupting the output. This doesn't do any calculation, just copies intput to a buffer.
// You should ideally feed it `seekLength()` frames of input, unless it's directly after a `.reset()` (in which case `.outputSeek()` might be a better choice)
template<class Inputs>
void seek(Inputs &&inputs, int inputSamples, double playbackRate) {
tmpBuffer.resize(0);
tmpBuffer.resize(stft.blockSamples() + stft.defaultInterval());
tmpProcessBuffer.resize(0);
tmpProcessBuffer.resize(stft.blockSamples() + stft.defaultInterval());
int startIndex = std::max<int>(0, inputSamples - int(tmpBuffer.size())); // start position in input
int padStart = int(tmpBuffer.size() + startIndex) - inputSamples; // start position in tmpBuffer
int startIndex = std::max<int>(0, inputSamples - int(tmpProcessBuffer.size())); // start position in input
int padStart = int(tmpProcessBuffer.size() + startIndex) - inputSamples; // start position in tmpProcessBuffer
Sample totalEnergy = 0;
for (int c = 0; c < channels; ++c) {
@ -142,12 +150,12 @@ struct SignalsmithStretch {
for (int i = startIndex; i < inputSamples; ++i) {
Sample s = inputChannel[i];
totalEnergy += s*s;
tmpBuffer[i - startIndex + padStart] = s;
tmpProcessBuffer[i - startIndex + padStart] = s;
}
stft.writeInput(c, tmpBuffer.size(), tmpBuffer.data());
stft.writeInput(c, tmpProcessBuffer.size(), tmpProcessBuffer.data());
}
stft.moveInput(tmpBuffer.size());
stft.moveInput(tmpProcessBuffer.size());
if (totalEnergy >= noiseFloor) {
silenceCounter = 0;
silenceFirst = true;
@ -155,6 +163,48 @@ struct SignalsmithStretch {
didSeek = true;
seekTimeFactor = (playbackRate*stft.defaultInterval() > 1) ? 1/playbackRate : stft.defaultInterval();
}
int seekLength() const {
return int(stft.blockSamples() + stft.defaultInterval());
}
// Moves the input position *and* pre-calculates some output, so that the next samples returned from `.process()` are aligned to the beginning of the sample.
// The time-stretch rate is inferred from `inputLength`, so use `.outputSeekLength()` to get a correct value for that.
template<class Inputs>
void outputSeek(Inputs &&inputs, int inputLength) {
// TODO: add fade-out parameter to avoid clicks, instead of doing a full reset
reset();
// Assume we've been handed enough surplus input to produce `outputLatency()` samples of pre-roll
int surplusInput = std::max<int>(inputLength - inputLatency(), 0);
Sample playbackRate = surplusInput/Sample(outputLatency());
// Move the input position to the start of the sound
int seekSamples = inputLength - surplusInput;
seek(inputs, seekSamples, playbackRate);
tmpPreRollBuffer.resize(outputLatency()*channels);
struct BufferOutput {
Sample *samples;
int length;
Sample * operator[](int c) {
return samples + c*length;
}
} preRollOutput{tmpPreRollBuffer.data(), outputLatency()};
// Use the surplus input to produce pre-roll output
OffsetIO<Inputs> offsetInput{inputs, seekSamples};
process(offsetInput, surplusInput, preRollOutput, preRollOutput.length);
// put the thing down, flip it and reverse it
for (auto &v : tmpPreRollBuffer) v = -v;
for (int c = 0; c < channels; ++c) {
std::reverse(preRollOutput[c], preRollOutput[c] + preRollOutput.length);
stft.addOutput(c, preRollOutput.length, preRollOutput[c]);
}
}
int outputSeekLength(Sample playbackRate) const {
return inputLatency() + playbackRate*outputLatency();
}
template<class Inputs, class Outputs>
void process(Inputs &&inputs, int inputSamples, Outputs &&outputs, int outputSamples) {
@ -165,14 +215,14 @@ struct SignalsmithStretch {
auto copyInput = [&](int toIndex){
int length = std::min<int>(int(stft.blockSamples() + stft.defaultInterval()), toIndex - prevCopiedInput);
tmpBuffer.resize(length);
tmpProcessBuffer.resize(length);
int offset = toIndex - length;
for (int c = 0; c < channels; ++c) {
auto &&inputBuffer = inputs[c];
for (int i = 0; i < length; ++i) {
tmpBuffer[i] = inputBuffer[i + offset];
tmpProcessBuffer[i] = inputBuffer[i + offset];
}
stft.writeInput(c, length, tmpBuffer.data());
stft.writeInput(c, length, tmpProcessBuffer.data());
}
stft.moveInput(length);
prevCopiedInput = toIndex;
@ -372,28 +422,38 @@ struct SignalsmithStretch {
#endif
}
// Read the remaining output, providing no further input. `outputSamples` should ideally be at least `.outputLatency()`
// Read the remaining output, providing no further input. If `outputSamples` is more than one interval, it will compute additional blocks assuming a zero-valued input
template<class Outputs>
void flush(Outputs &&outputs, int outputSamples) {
int plainOutput = std::min<int>(outputSamples, int(stft.blockSamples()));
int foldedBackOutput = std::min<int>(outputSamples, int(stft.blockSamples()) - plainOutput);
void flush(Outputs &&outputs, int outputSamples, Sample playbackRate=0) {
struct Zeros {
struct Channel {
Sample operator[](int) {
return 0;
}
};
Channel operator[](int) {
return {};
}
} zeros;
// If we're asked for more than an interval of extra output, then zero-pad the input
int outputBlock = std::max<int>(0, outputSamples - stft.defaultInterval());
if (outputBlock > 0) process(zeros, outputBlock*playbackRate, outputs, outputBlock);
int tailSamples = outputSamples - outputBlock; // at most one interval
tmpProcessBuffer.resize(tailSamples);
stft.finishOutput(1);
for (int c = 0; c < channels; ++c) {
tmpBuffer.resize(plainOutput);
stft.readOutput(c, plainOutput, tmpBuffer.data());
stft.readOutput(c, tailSamples, tmpProcessBuffer.data());
auto &&outputChannel = outputs[c];
for (int i = 0; i < plainOutput; ++i) {
// TODO: plain output should be gain-
outputChannel[i] = tmpBuffer[i];
for (int i = 0; i < tailSamples; ++i) {
outputChannel[outputBlock + i] = tmpProcessBuffer[i];
}
tmpBuffer.resize(foldedBackOutput);
stft.readOutput(c, plainOutput, foldedBackOutput, tmpBuffer.data());
for (int i = 0; i < foldedBackOutput; ++i) {
outputChannel[outputSamples - 1 - i] -= tmpBuffer[i];
stft.readOutput(c, tailSamples, tailSamples, tmpProcessBuffer.data());
for (int i = 0; i < tailSamples; ++i) {
outputChannel[outputBlock + tailSamples - 1 - i] -= tmpProcessBuffer[i];
}
}
stft.reset(0.1);
stft.reset(0.1f);
// Reset the phase-vocoder stuff, so the next block gets a fresh start
for (int c = 0; c < channels; ++c) {
auto channelBands = bandsForChannel(c);
@ -403,65 +463,30 @@ struct SignalsmithStretch {
}
}
// Process a complete audio buffer all in one go
template<class Inputs, class Outputs>
bool exact(Inputs &&inputs, int inputSamples, Outputs &&outputs, int outputSamples) {
if (outputSamples < outputLatency()*2) return false; // too short for this
struct ZeroPaddedInput {
Inputs &inputs;
int offset, length;
struct Channel {
ZeroPaddedInput &zpi;
int channel;
Sample operator[](int i) {
if (zpi.offset + i < zpi.length) return zpi.inputs[channel][zpi.offset + i];
return 0;
Sample playbackRate = inputSamples/Sample(outputSamples);
auto seekLength = outputSeekLength(playbackRate);
if (inputSamples < seekLength) {
// to short for this - zero the output just to be polite
for (int c = 0; c < channels; ++c) {
auto &&channel = outputs[c];
for (int i = 0; i < outputSamples; ++i) {
channel[i] = 0;
}
};
Channel operator[](int c){
return {*this, c};
}
} zpi{inputs, inputLatency(), inputSamples};
seek(inputs, inputLatency(), Sample(inputSamples)/outputSamples); // start positioned on the centre of the input
process(zpi, inputSamples, outputs, outputSamples);
// Fold the first bit of the input back onto itself
for (int c = 0; c < channels; ++c) {
auto &&channel = outputs[c];
for (int i = 0; i < std::min<int>(outputSamples - outputLatency(), outputLatency()); ++i) {
channel[i + outputLatency()] -= channel[outputLatency() - 1 - i];
}
}
// Shuffle everything along to compensate for output latency
for (int c = 0; c < channels; ++c) {
auto &&channel = outputs[c];
for (int i = 0; i < outputSamples - outputLatency(); ++i) {
channel[i] = channel[i + outputLatency()];
}
return false;
}
struct OffsetOutput {
Outputs &outputs;
int offset;
outputSeek(inputs, seekLength);
struct Channel {
OffsetOutput &oo;
int channel;
int outputIndex = outputSamples - seekLength/playbackRate;
OffsetIO<Inputs> offsetInput{inputs, seekLength};
process(offsetInput, inputSamples - seekLength, outputs, outputIndex);
decltype(outputs[0][0]) operator[](int i) {
return oo.outputs[channel][oo.offset + i];
}
};
Channel operator[](int c){
return {*this, c};
}
} oo{outputs, outputSamples - outputLatency()};
// Get the final chunk - extra output is already folded back as part of this
flush(oo, outputLatency());
OffsetIO<Outputs> offsetOutput{outputs, outputIndex};
flush(offsetOutput, outputSamples - outputIndex, playbackRate);
return true;
}
@ -496,7 +521,7 @@ private:
typename STFT::Input stashedInput;
typename STFT::Output stashedOutput;
std::vector<Sample> tmpBuffer;
std::vector<Sample> tmpProcessBuffer, tmpPreRollBuffer;
int channels = 0, bands = 0;
int prevInputOffset = -1;
@ -956,20 +981,27 @@ private:
freqEstimate = freqToBand(formantBaseFreq);
if (formantBaseFreq <= 0) freqEstimate = estimateFrequency();
for (int b = 0; b < bands; ++b) {
formantMetric[b] = std::sqrt(formantMetric[b]);
}
} else if (step-- == 0) {
Sample slew = 1/(freqEstimate*0.5 + 1);
Sample decay = 1 - 1/(freqEstimate*0.5 + 1);
Sample e = 0;
for (size_t repeat = 0; repeat < 2; ++repeat) {
for (int b = bands - 1; b >= 0; --b) {
e += (formantMetric[b] - e)*slew;
e = std::max(formantMetric[b], e*decay);
formantMetric[b] = e;
}
for (int b = 0; b < bands; ++b) {
e += (formantMetric[b] - e)*slew;
e = std::max(formantMetric[b], e*decay);
formantMetric[b] = e;
}
}
decay = 1/decay;
for (size_t repeat = 0; repeat < 2; ++repeat) {
for (int b = bands - 1; b >= 0; --b) {
e = std::min(formantMetric[b], e*decay);
formantMetric[b] = e;
}
for (int b = 0; b < bands; ++b) {
e = std::min(formantMetric[b], e*decay);
formantMetric[b] = e;
}
}
@ -992,7 +1024,7 @@ private:
Sample targetE = getFormant(freqToBand(outputF));
Sample formantRatio = targetE/(inputE + Sample(1e-30));
Sample energyRatio = formantRatio*formantRatio;
Sample energyRatio = formantRatio;
for (int c = 0; c < channels; ++c) {
Band *bins = bandsForChannel(c);
@ -1002,6 +1034,26 @@ private:
}
}
}
// Proxy class to avoid copying/allocating anything
template<class Io>
struct OffsetIO {
Io &io;
int offset;
struct Channel {
Io &io;
int channel;
int offset;
auto operator[](int i) -> decltype(io[0][0]) {
return io[channel][i + offset];
}
};
Channel operator[](int c) {
return {io, c, offset};
}
};
};
}} // namespace

View File

@ -91,15 +91,11 @@ function registerWorkletProcessor(Module, audioNodeKey) {
latestSegment = this.timeMap.pop();
}
let obj = {
active: latestSegment.active,
let obj = Object.assign({}, latestSegment);
Object.assign(obj, {
input: null,
output: outputTime,
rate: latestSegment.rate,
semitones: latestSegment.semitones,
loopStart: latestSegment.loopStart,
loopEnd: latestSegment.loopEnd
};
});
Object.assign(obj, objIn);
if (obj.input === null) {
let rate = (latestSegment.active ? latestSegment.rate : 0);

View File

@ -92,15 +92,11 @@ function registerWorkletProcessor(Module, audioNodeKey) {
latestSegment = this.timeMap.pop();
}
let obj = {
active: latestSegment.active,
let obj = Object.assign({}, latestSegment);
Object.assign(obj, {
input: null,
output: outputTime,
rate: latestSegment.rate,
semitones: latestSegment.semitones,
loopStart: latestSegment.loopStart,
loopEnd: latestSegment.loopEnd
};
});
Object.assign(obj, objIn);
if (obj.input === null) {
let rate = (latestSegment.active ? latestSegment.rate : 0);

View File

@ -1,6 +1,6 @@
{
"name": "signalsmith-stretch",
"version": "1.3.1",
"version": "1.3.2",
"description": "JS/WASM release of the Signalsmith Stretch library",
"main": "SignalsmithStretch.mjs",
"exports": {

View File

@ -72,15 +72,11 @@ function registerWorkletProcessor(Module, audioNodeKey) {
latestSegment = this.timeMap.pop();
}
let obj = {
active: latestSegment.active,
let obj = Object.assign({}, latestSegment);
Object.assign(obj, {
input: null,
output: outputTime,
rate: latestSegment.rate,
semitones: latestSegment.semitones,
loopStart: latestSegment.loopStart,
loopEnd: latestSegment.loopEnd
};
});
Object.assign(obj, objIn);
if (obj.input === null) {
let rate = (latestSegment.active ? latestSegment.rate : 0);