Add .outputSeek() for playing back samples
This commit is contained in:
parent
12de19e05d
commit
c3fcda8563
34
cmd/main.cpp
34
cmd/main.cpp
@ -51,18 +51,36 @@ int main(int argc, char* argv[]) {
|
|||||||
|
|
||||||
stretch.exact(inWav, int(inputLength), outWav, int(outputLength));
|
stretch.exact(inWav, int(inputLength), outWav, int(outputLength));
|
||||||
|
|
||||||
However, we'll do it in separate stages to demonstrate more of the API.
|
However, we'll do it in separate stages to show more of the API. */
|
||||||
*/
|
|
||||||
|
|
||||||
// First, an "output seek"
|
// First, an "output seek", where we provide a chunk of input.
|
||||||
// This is suitable for starting playback of a sample at a given playback rate:
|
// This is suitable for starting playback of a sample at a given playback rate.
|
||||||
auto seekSamples = stretch.outputSeekSamples(1/time);
|
auto seekLength = stretch.outputSeekLength(1/time);
|
||||||
stretch.outputSeek(inWav, seekSamples);
|
stretch.outputSeek(inWav, seekLength);
|
||||||
// At this point, the next output samples we get will correspond to the beginning of the audio file
|
// At this point, the next output samples we get will correspond to the beginning of the audio file.
|
||||||
|
|
||||||
|
// We're going to process until *just* before the end of the audio file (so we can get a tidier end using `.flush()`.
|
||||||
|
int outputIndex = outputLength - stretch.outputLatency();
|
||||||
|
|
||||||
|
// Stretch's internal output position is slightly ahead of the output samples we get
|
||||||
|
int outputPos = outputLength + stretch.outputLatency();
|
||||||
|
// Time-map: where do we want the input position to be at that moment?
|
||||||
|
int inputPos = std::round(outputPos/time);
|
||||||
|
// And therefore which input samples do we need to be supplying?
|
||||||
|
int inputIndex = inputPos + stretch.inputLatency();
|
||||||
|
|
||||||
stretch.exact(inWav, int(inputLength), outWav, int(outputLength));
|
// In this particular case, our `inputPos` will be at the end of the file
|
||||||
|
// and `inputIndex` will be beyond the end, so we pad with 0s to have enough input
|
||||||
|
inWav.resize(inputIndex);
|
||||||
|
|
||||||
|
// OK, go for it
|
||||||
|
inWav.offset = seekLength;
|
||||||
|
stretch.process(inWav, inputIndex - seekLength, outWav, outputIndex);
|
||||||
|
|
||||||
|
// And as promised, get the last bits using `.flush()`, which does some extra stuff to avoid introducing clicks.
|
||||||
|
outWav.offset = outputIndex;
|
||||||
|
stretch.flush(outWav, outputLength - outputIndex);
|
||||||
|
outWav.offset = 0;
|
||||||
|
|
||||||
if (!outWav.write(outputWav).warn()) args.errorExit("failed to write WAV");
|
if (!outWav.write(outputWav).warn()) args.errorExit("failed to write WAV");
|
||||||
}
|
}
|
||||||
|
|||||||
@ -38,12 +38,7 @@ struct SignalsmithStretch {
|
|||||||
SignalsmithStretch() : randomEngine(std::random_device{}()) {}
|
SignalsmithStretch() : randomEngine(std::random_device{}()) {}
|
||||||
SignalsmithStretch(long seed) : randomEngine(seed) {}
|
SignalsmithStretch(long seed) : randomEngine(seed) {}
|
||||||
|
|
||||||
int blockSamples() const {
|
// The difference between the internal position (centre of a block) and the input samples you're supplying
|
||||||
return int(stft.blockSamples());
|
|
||||||
}
|
|
||||||
int intervalSamples() const {
|
|
||||||
return int(stft.defaultInterval());
|
|
||||||
}
|
|
||||||
int inputLatency() const {
|
int inputLatency() const {
|
||||||
return int(stft.analysisLatency());
|
return int(stft.analysisLatency());
|
||||||
}
|
}
|
||||||
@ -81,7 +76,6 @@ struct SignalsmithStretch {
|
|||||||
stft.reset(0.1);
|
stft.reset(0.1);
|
||||||
stashedInput = stft.input;
|
stashedInput = stft.input;
|
||||||
stashedOutput = stft.output;
|
stashedOutput = stft.output;
|
||||||
tmpBuffer.resize(blockSamples + intervalSamples);
|
|
||||||
|
|
||||||
bands = int(stft.bands());
|
bands = int(stft.bands());
|
||||||
channelBands.assign(bands*channels, Band());
|
channelBands.assign(bands*channels, Band());
|
||||||
@ -94,6 +88,18 @@ struct SignalsmithStretch {
|
|||||||
|
|
||||||
blockProcess = {};
|
blockProcess = {};
|
||||||
formantMetric.resize(bands + 2);
|
formantMetric.resize(bands + 2);
|
||||||
|
|
||||||
|
tmpBuffer.resize(std::max(outputLatency()*channels, blockSamples + intervalSamples));
|
||||||
|
}
|
||||||
|
// For querying the existing config
|
||||||
|
int blockSamples() const {
|
||||||
|
return int(stft.blockSamples());
|
||||||
|
}
|
||||||
|
int intervalSamples() const {
|
||||||
|
return int(stft.defaultInterval());
|
||||||
|
}
|
||||||
|
bool splitComputation() const {
|
||||||
|
return _splitComputation;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Frequency multiplier, and optional tonality limit (as multiple of sample-rate)
|
/// Frequency multiplier, and optional tonality limit (as multiple of sample-rate)
|
||||||
@ -127,7 +133,8 @@ struct SignalsmithStretch {
|
|||||||
formantBaseFreq = baseFreq;
|
formantBaseFreq = baseFreq;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Provide previous input ("pre-roll"), without affecting the speed calculation. You should ideally feed it one block-length + one interval
|
// Provide previous input ("pre-roll") to smoothly change the input location without interrupting the output. This doesn't do any calculation, just copies intput to a buffer.
|
||||||
|
// You should ideally feed it `seekLength()` frames of input, unless it's directly after a `.reset()` (in which case `.outputSeek()` might be a better choice)
|
||||||
template<class Inputs>
|
template<class Inputs>
|
||||||
void seek(Inputs &&inputs, int inputSamples, double playbackRate) {
|
void seek(Inputs &&inputs, int inputSamples, double playbackRate) {
|
||||||
tmpBuffer.resize(0);
|
tmpBuffer.resize(0);
|
||||||
@ -155,6 +162,59 @@ struct SignalsmithStretch {
|
|||||||
didSeek = true;
|
didSeek = true;
|
||||||
seekTimeFactor = (playbackRate*stft.defaultInterval() > 1) ? 1/playbackRate : stft.defaultInterval();
|
seekTimeFactor = (playbackRate*stft.defaultInterval() > 1) ? 1/playbackRate : stft.defaultInterval();
|
||||||
}
|
}
|
||||||
|
int seekLength() const {
|
||||||
|
return int(stft.blockSamples() + stft.defaultInterval());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Moves the input position *and* pre-calculates some output, so that the next samples returned from `.process()` are aligned to the beginning of the sample.
|
||||||
|
// The time-stretch rate is inferred from `inputLength`, so use `.outputSeekLength()` to get a correct value for that.
|
||||||
|
template<class Inputs>
|
||||||
|
void outputSeek(Inputs &&inputs, int inputLength) {
|
||||||
|
// TODO: add fade-out parameter to avoid clicks, instead of doing a full reset
|
||||||
|
reset();
|
||||||
|
// Assume we've been handed enough surplus input to produce `outputLatency()` samples of pre-roll
|
||||||
|
int surplusInput = std::max<int>(inputLength - inputLatency(), 0);
|
||||||
|
Sample playbackRate = surplusInput/Sample(outputLatency());
|
||||||
|
|
||||||
|
// Move the input position to the start of the sound
|
||||||
|
int seekSamples = inputLength - surplusInput;
|
||||||
|
seek(inputs, seekSamples, playbackRate);
|
||||||
|
|
||||||
|
// Awkward proxy classes to avoid copying/allocating anything
|
||||||
|
struct OffsetInput {
|
||||||
|
Inputs &inputs;
|
||||||
|
int offset;
|
||||||
|
|
||||||
|
struct Channel {
|
||||||
|
Inputs &inputs;
|
||||||
|
int channel;
|
||||||
|
int offset;
|
||||||
|
|
||||||
|
Sample operator[](int i) {
|
||||||
|
return Sample(inputs[channel][i + offset]);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Channel operator[](int c) {
|
||||||
|
return {inputs, c, offset};
|
||||||
|
}
|
||||||
|
} offsetInput{inputs, seekSamples};
|
||||||
|
tmpBuffer.resize(outputLatency()*channels);
|
||||||
|
struct PreRollOutput {
|
||||||
|
Sample *samples;
|
||||||
|
int length;
|
||||||
|
|
||||||
|
Sample * operator[](int c) {
|
||||||
|
return samples + c*length;
|
||||||
|
}
|
||||||
|
} preRollOutput{tmpBuffer.data(), outputLatency()};
|
||||||
|
|
||||||
|
// Use the surplus input to produce pre-roll output
|
||||||
|
process(offsetInput, surplusInput, preRollOutput, outputLatency());
|
||||||
|
// TODO: put the thing down, flip it and reverse it
|
||||||
|
}
|
||||||
|
int outputSeekLength(Sample playbackRate) const {
|
||||||
|
return inputLatency() + playbackRate*outputLatency();
|
||||||
|
}
|
||||||
|
|
||||||
template<class Inputs, class Outputs>
|
template<class Inputs, class Outputs>
|
||||||
void process(Inputs &&inputs, int inputSamples, Outputs &&outputs, int outputSamples) {
|
void process(Inputs &&inputs, int inputSamples, Outputs &&outputs, int outputSamples) {
|
||||||
@ -383,7 +443,6 @@ struct SignalsmithStretch {
|
|||||||
stft.readOutput(c, plainOutput, tmpBuffer.data());
|
stft.readOutput(c, plainOutput, tmpBuffer.data());
|
||||||
auto &&outputChannel = outputs[c];
|
auto &&outputChannel = outputs[c];
|
||||||
for (int i = 0; i < plainOutput; ++i) {
|
for (int i = 0; i < plainOutput; ++i) {
|
||||||
// TODO: plain output should be gain-
|
|
||||||
outputChannel[i] = tmpBuffer[i];
|
outputChannel[i] = tmpBuffer[i];
|
||||||
}
|
}
|
||||||
tmpBuffer.resize(foldedBackOutput);
|
tmpBuffer.resize(foldedBackOutput);
|
||||||
@ -392,7 +451,7 @@ struct SignalsmithStretch {
|
|||||||
outputChannel[outputSamples - 1 - i] -= tmpBuffer[i];
|
outputChannel[outputSamples - 1 - i] -= tmpBuffer[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stft.reset(0.1);
|
stft.reset(0.1f);
|
||||||
|
|
||||||
// Reset the phase-vocoder stuff, so the next block gets a fresh start
|
// Reset the phase-vocoder stuff, so the next block gets a fresh start
|
||||||
for (int c = 0; c < channels; ++c) {
|
for (int c = 0; c < channels; ++c) {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user