From 2dc12fb1caccf5ff9145a468d01dd9d6bfbf1362 Mon Sep 17 00:00:00 2001 From: falkTX Date: Mon, 14 Mar 2022 23:19:16 +0000 Subject: [PATCH] Force 32bit alignment for vectorized operations, fixes 32bit build Signed-off-by: falkTX --- deps/PawPaw | 2 +- include/dsp/fir.hpp | 164 +++++++++++++++++ include/engine/Port.hpp | 2 +- include/simd/Vector.hpp | 374 +++++++++++++++++++++++++++++++++++++++ plugins/Makefile | 2 +- src/Makefile | 4 +- src/Makefile.cardinal.mk | 2 +- src/override/minblep.cpp | 111 ++++++++++++ 8 files changed, 656 insertions(+), 5 deletions(-) create mode 100644 include/dsp/fir.hpp create mode 100644 include/simd/Vector.hpp create mode 100644 src/override/minblep.cpp diff --git a/deps/PawPaw b/deps/PawPaw index b22a46c..9fa141a 160000 --- a/deps/PawPaw +++ b/deps/PawPaw @@ -1 +1 @@ -Subproject commit b22a46cfb0291d5523099daf2d9facb7af9836c3 +Subproject commit 9fa141a1050cfd81577f068135218723441b8ac5 diff --git a/include/dsp/fir.hpp b/include/dsp/fir.hpp new file mode 100644 index 0000000..7d1c737 --- /dev/null +++ b/include/dsp/fir.hpp @@ -0,0 +1,164 @@ +/* + * DISTRHO Cardinal Plugin + * Copyright (C) 2021-2022 Filipe Coelho + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 3 of + * the License, or any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * For a full copy of the GNU General Public License see the LICENSE file. + */ + +/** + * This file is an edited version of VCVRack's dsp/fir.hpp + * Copyright (C) 2016-2021 VCV. + * + * This program is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 3 of + * the License, or (at your option) any later version. + */ + +#pragma once + +#include + +#include + + +namespace rack { +namespace dsp { + + +/** Performs a direct sum convolution */ +inline float convolveNaive(const float* in, const float* kernel, int len) { + float y = 0.f; + for (int i = 0; i < len; i++) { + y += in[len - 1 - i] * kernel[i]; + } + return y; +} + +/** Computes the impulse response of a boxcar lowpass filter */ +inline void boxcarLowpassIR(float* out, int len, float cutoff = 0.5f) { + for (int i = 0; i < len; i++) { + float t = i - (len - 1) / 2.f; + out[i] = 2 * cutoff * sinc(2 * cutoff * t); + } +} + + +struct RealTimeConvolver { + // `kernelBlocks` number of contiguous FFT blocks of size `blockSize` + // indexed by [i * blockSize*2 + j] + float* kernelFfts = NULL; + float* inputFfts = NULL; + float* outputTail = NULL; + float* tmpBlock = NULL; + size_t blockSize; + size_t kernelBlocks = 0; + size_t inputPos = 0; + PFFFT_Setup* pffft; + + /** `blockSize` is the size of each FFT block. It should be >=32 and a power of 2. */ + RealTimeConvolver(size_t blockSize) { + this->blockSize = blockSize; + pffft = pffft_new_setup(blockSize * 2, PFFFT_REAL); + outputTail = (float*) pffft_aligned_malloc(sizeof(float) * blockSize); + std::memset(outputTail, 0, blockSize * sizeof(float)); + tmpBlock = (float*) pffft_aligned_malloc(sizeof(float) * blockSize * 2); + std::memset(tmpBlock, 0, blockSize * 2 * sizeof(float)); + } + + ~RealTimeConvolver() { + setKernel(NULL, 0); + pffft_aligned_free(outputTail); + pffft_aligned_free(tmpBlock); + pffft_destroy_setup(pffft); + } + + void setKernel(const float* kernel, size_t length) { + // Clear existing kernel + if (kernelFfts) { + pffft_aligned_free(kernelFfts); + kernelFfts = NULL; + } + if (inputFfts) { + pffft_aligned_free(inputFfts); + inputFfts = NULL; + } + kernelBlocks = 0; + inputPos = 0; + + if (kernel && length > 0) { + // Round up to the nearest factor of `blockSize` + kernelBlocks = (length - 1) / blockSize + 1; + + // Allocate blocks + kernelFfts = (float*) pffft_aligned_malloc(sizeof(float) * blockSize * 2 * kernelBlocks); + inputFfts = (float*) pffft_aligned_malloc(sizeof(float) * blockSize * 2 * kernelBlocks); + std::memset(inputFfts, 0, sizeof(float) * blockSize * 2 * kernelBlocks); + + for (size_t i = 0; i < kernelBlocks; i++) { + // Pad each block with zeros + std::memset(tmpBlock, 0, sizeof(float) * blockSize * 2); + size_t len = std::min((int) blockSize, (int)(length - i * blockSize)); + std::memcpy(tmpBlock, &kernel[i * blockSize], sizeof(float)*len); + // Compute fft + pffft_transform(pffft, tmpBlock, &kernelFfts[blockSize * 2 * i], NULL, PFFFT_FORWARD); + } + } + } + + /** Applies reverb to input + input and output must be of size `blockSize` + */ + void processBlock(const float* input, float* output) { + if (kernelBlocks == 0) { + std::memset(output, 0, sizeof(float) * blockSize); + return; + } + + // Step input position + inputPos = (inputPos + 1) % kernelBlocks; + // Pad block with zeros + std::memset(tmpBlock, 0, sizeof(float) * blockSize * 2); + std::memcpy(tmpBlock, input, sizeof(float) * blockSize); + // Compute input fft + pffft_transform(pffft, tmpBlock, &inputFfts[blockSize * 2 * inputPos], NULL, PFFFT_FORWARD); + // Create output fft + std::memset(tmpBlock, 0, sizeof(float) * blockSize * 2); + // convolve input fft by kernel fft + // Note: This is the CPU bottleneck loop + for (size_t i = 0; i < kernelBlocks; i++) { + size_t pos = (inputPos - i + kernelBlocks) % kernelBlocks; + pffft_zconvolve_accumulate(pffft, &kernelFfts[blockSize * 2 * i], &inputFfts[blockSize * 2 * pos], tmpBlock, 1.f); + } + // Compute output + pffft_transform(pffft, tmpBlock, tmpBlock, NULL, PFFFT_BACKWARD); + // Add block tail from last output block + for (size_t i = 0; i < blockSize; i++) { + tmpBlock[i] += outputTail[i]; + } + // Copy output block to output + float scale = 1.f / (blockSize * 2); + for (size_t i = 0; i < blockSize; i++) { + // Scale based on FFT + output[i] = tmpBlock[i] * scale; + } + // Set tail + for (size_t i = 0; i < blockSize; i++) { + outputTail[i] = tmpBlock[i + blockSize]; + } + } +}; + + +} // namespace dsp +} // namespace rack diff --git a/include/engine/Port.hpp b/include/engine/Port.hpp index 87c0e2a..b518870 100644 --- a/include/engine/Port.hpp +++ b/include/engine/Port.hpp @@ -48,7 +48,7 @@ struct Port { /** Voltage of the port. */ /** NOTE alignas is required in order to allow SSE usage. Consecutive data (like in a vector) would otherwise pack Ports in a way that breaks SSE. */ - union alignas(PORT_MAX_CHANNELS) { + union alignas(32) { /** Unstable API. Use getVoltage() and setVoltage() instead. */ float voltages[PORT_MAX_CHANNELS] = {}; /** DEPRECATED. Unstable API. Use getVoltage() and setVoltage() instead. */ diff --git a/include/simd/Vector.hpp b/include/simd/Vector.hpp new file mode 100644 index 0000000..b1640cd --- /dev/null +++ b/include/simd/Vector.hpp @@ -0,0 +1,374 @@ +/* + * DISTRHO Cardinal Plugin + * Copyright (C) 2021-2022 Filipe Coelho + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 3 of + * the License, or any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * For a full copy of the GNU General Public License see the LICENSE file. + */ + +/** + * This file is an edited version of VCVRack's simd/Vector.hpp + * Copyright (C) 2016-2021 VCV. + * + * This program is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 3 of + * the License, or (at your option) any later version. + */ + +#pragma once + +#include +#include + + +namespace rack { + + +/** Abstraction of aligned types for SIMD computation +*/ +namespace simd { + + +/** Generic class for vector types. + +This class is designed to be used just like you use scalars, with extra features for handling bitwise logic, conditions, loading, and storing. + +Example: + + float a[4], b[4]; + float_4 a = float_4::load(in); + float_4 b = 2.f * a / (1 - a); + b *= sin(2 * M_PI * a); + b.store(out); +*/ +template +struct Vector; + + +/** Wrapper for `__m128` representing an aligned vector of 4 single-precision float values. +*/ +template <> +struct Vector { + using type = float; + constexpr static int size = 4; + + /** NOTE alignas is required in order to allow SSE usage. */ + union alignas(32) { + __m128 v; + /** Accessing this array of scalars is slow and defeats the purpose of vectorizing. + */ + float s[4]; + }; + + /** Constructs an uninitialized vector. */ + Vector() = default; + + /** Constructs a vector from a native `__m128` type. */ + Vector(__m128 v) : v(v) {} + + /** Constructs a vector with all elements set to `x`. */ + Vector(float x) { + v = _mm_set1_ps(x); + } + + /** Constructs a vector from four scalars. */ + Vector(float x1, float x2, float x3, float x4) { + v = _mm_setr_ps(x1, x2, x3, x4); + } + + /** Returns a vector with all 0 bits. */ + static Vector zero() { + return Vector(_mm_setzero_ps()); + } + + /** Returns a vector with all 1 bits. */ + static Vector mask() { + return Vector(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()))); + } + + /** Reads an array of 4 values. + On little-endian machines (e.g. x86_64), the order is reversed, so `x[0]` corresponds to `vector.s[3]`. + */ + static Vector load(const float* x) { + /* + My benchmarks show that _mm_loadu_ps() performs equally as fast as _mm_load_ps() when data is actually aligned. + This post seems to agree. https://stackoverflow.com/a/20265193/272642 + I therefore use _mm_loadu_ps() for generality, so you can load unaligned arrays using the same function (although load aligned arrays if you can for best performance). + */ + return Vector(_mm_loadu_ps(x)); + } + + /** Writes an array of 4 values. + On little-endian machines (e.g. x86_64), the order is reversed, so `x[0]` corresponds to `vector.s[3]`. + */ + void store(float* x) { + _mm_storeu_ps(x, v); + } + + /** Accessing vector elements individually is slow and defeats the purpose of vectorizing. + However, this operator is convenient when writing simple serial code in a non-bottlenecked section. + */ + float& operator[](int i) { + return s[i]; + } + const float& operator[](int i) const { + return s[i]; + } + + // Conversions + Vector(Vector a); + // Casts + static Vector cast(Vector a); +}; + + +template <> +struct Vector { + using type = int32_t; + constexpr static int size = 4; + + /** NOTE alignas is required in order to allow SSE usage. */ + union alignas(32) { + __m128i v; + int32_t s[4]; + }; + + Vector() = default; + Vector(__m128i v) : v(v) {} + Vector(int32_t x) { + v = _mm_set1_epi32(x); + } + Vector(int32_t x1, int32_t x2, int32_t x3, int32_t x4) { + v = _mm_setr_epi32(x1, x2, x3, x4); + } + static Vector zero() { + return Vector(_mm_setzero_si128()); + } + static Vector mask() { + return Vector(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); + } + static Vector load(const int32_t* x) { + // HACK + // Use _mm_loadu_si128() because GCC doesn't support _mm_loadu_si32() + return Vector(_mm_loadu_si128((const __m128i*) x)); + } + void store(int32_t* x) { + // HACK + // Use _mm_storeu_si128() because GCC doesn't support _mm_storeu_si32() + _mm_storeu_si128((__m128i*) x, v); + } + int32_t& operator[](int i) { + return s[i]; + } + const int32_t& operator[](int i) const { + return s[i]; + } + Vector(Vector a); + static Vector cast(Vector a); +}; + + +// Conversions and casts + + +inline Vector::Vector(Vector a) { + v = _mm_cvtepi32_ps(a.v); +} + +inline Vector::Vector(Vector a) { + v = _mm_cvttps_epi32(a.v); +} + +inline Vector Vector::cast(Vector a) { + return Vector(_mm_castsi128_ps(a.v)); +} + +inline Vector Vector::cast(Vector a) { + return Vector(_mm_castps_si128(a.v)); +} + + +// Operator overloads + + +/** `a @ b` */ +#define DECLARE_VECTOR_OPERATOR_INFIX(t, s, operator, func) \ + inline Vector operator(const Vector& a, const Vector& b) { \ + return Vector(func(a.v, b.v)); \ + } + +/** `a @= b` */ +#define DECLARE_VECTOR_OPERATOR_INCREMENT(t, s, operator, opfunc) \ + inline Vector& operator(Vector& a, const Vector& b) { \ + return a = opfunc(a, b); \ + } + +DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator+, _mm_add_ps) +DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator+, _mm_add_epi32) + +DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator-, _mm_sub_ps) +DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator-, _mm_sub_epi32) + +DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator*, _mm_mul_ps) +// DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator*, NOT AVAILABLE IN SSE3) + +DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator/, _mm_div_ps) +// DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator/, NOT AVAILABLE IN SSE3) + +/* Use these to apply logic, bit masks, and conditions to elements. +Boolean operators on vectors give 0x00000000 for false and 0xffffffff for true, for each vector element. + +Examples: + +Subtract 1 from value if greater than or equal to 1. + + x -= (x >= 1.f) & 1.f; +*/ +DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator^, _mm_xor_ps) +DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator^, _mm_xor_si128) + +DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator&, _mm_and_ps) +DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator&, _mm_and_si128) + +DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator|, _mm_or_ps) +DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator|, _mm_or_si128) + +DECLARE_VECTOR_OPERATOR_INCREMENT(float, 4, operator+=, operator+) +DECLARE_VECTOR_OPERATOR_INCREMENT(int32_t, 4, operator+=, operator+) + +DECLARE_VECTOR_OPERATOR_INCREMENT(float, 4, operator-=, operator-) +DECLARE_VECTOR_OPERATOR_INCREMENT(int32_t, 4, operator-=, operator-) + +DECLARE_VECTOR_OPERATOR_INCREMENT(float, 4, operator*=, operator*) +// DECLARE_VECTOR_OPERATOR_INCREMENT(int32_t, 4, operator*=, NOT AVAILABLE IN SSE3) + +DECLARE_VECTOR_OPERATOR_INCREMENT(float, 4, operator/=, operator/) +// DECLARE_VECTOR_OPERATOR_INCREMENT(int32_t, 4, operator/=, NOT AVAILABLE IN SSE3) + +DECLARE_VECTOR_OPERATOR_INCREMENT(float, 4, operator^=, operator^) +DECLARE_VECTOR_OPERATOR_INCREMENT(int32_t, 4, operator^=, operator^) + +DECLARE_VECTOR_OPERATOR_INCREMENT(float, 4, operator&=, operator&) +DECLARE_VECTOR_OPERATOR_INCREMENT(int32_t, 4, operator&=, operator&) + +DECLARE_VECTOR_OPERATOR_INCREMENT(float, 4, operator|=, operator|) +DECLARE_VECTOR_OPERATOR_INCREMENT(int32_t, 4, operator|=, operator|) + +DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator==, _mm_cmpeq_ps) +DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator==, _mm_cmpeq_epi32) + +DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator>=, _mm_cmpge_ps) +inline Vector operator>=(const Vector& a, const Vector& b) { + return Vector(_mm_cmpgt_epi32(a.v, b.v)) ^ Vector::mask(); +} + +DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator>, _mm_cmpgt_ps) +DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator>, _mm_cmpgt_epi32) + +DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator<=, _mm_cmple_ps) +inline Vector operator<=(const Vector& a, const Vector& b) { + return Vector(_mm_cmplt_epi32(a.v, b.v)) ^ Vector::mask(); +} + +DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator<, _mm_cmplt_ps) +DECLARE_VECTOR_OPERATOR_INFIX(int32_t, 4, operator<, _mm_cmplt_epi32) + +DECLARE_VECTOR_OPERATOR_INFIX(float, 4, operator!=, _mm_cmpneq_ps) +inline Vector operator!=(const Vector& a, const Vector& b) { + return Vector(_mm_cmpeq_epi32(a.v, b.v)) ^ Vector::mask(); +} + +/** `+a` */ +inline Vector operator+(const Vector& a) { + return a; +} +inline Vector operator+(const Vector& a) { + return a; +} + +/** `-a` */ +inline Vector operator-(const Vector& a) { + return 0.f - a; +} +inline Vector operator-(const Vector& a) { + return 0 - a; +} + +/** `++a` */ +inline Vector& operator++(Vector& a) { + return a += 1.f; +} +inline Vector& operator++(Vector& a) { + return a += 1; +} + +/** `--a` */ +inline Vector& operator--(Vector& a) { + return a -= 1.f; +} +inline Vector& operator--(Vector& a) { + return a -= 1; +} + +/** `a++` */ +inline Vector operator++(Vector& a, int) { + Vector b = a; + ++a; + return b; +} +inline Vector operator++(Vector& a, int) { + Vector b = a; + ++a; + return b; +} + +/** `a--` */ +inline Vector operator--(Vector& a, int) { + Vector b = a; + --a; + return b; +} +inline Vector operator--(Vector& a, int) { + Vector b = a; + --a; + return b; +} + +/** `~a` */ +inline Vector operator~(const Vector& a) { + return a ^ Vector::mask(); +} +inline Vector operator~(const Vector& a) { + return a ^ Vector::mask(); +} + +/** `a << b` */ +inline Vector operator<<(const Vector& a, const int& b) { + return Vector(_mm_slli_epi32(a.v, b)); +} + +/** `a >> b` */ +inline Vector operator>>(const Vector& a, const int& b) { + return Vector(_mm_srli_epi32(a.v, b)); +} + + +// Typedefs + + +using float_4 = Vector; +using int32_4 = Vector; + + +} // namespace simd +} // namespace rack diff --git a/plugins/Makefile b/plugins/Makefile index 94e6979..0bbbb5b 100644 --- a/plugins/Makefile +++ b/plugins/Makefile @@ -954,7 +954,7 @@ endif BUILD_C_FLAGS += -std=gnu11 BUILD_C_FLAGS += -fno-finite-math-only -fno-strict-aliasing -BUILD_CXX_FLAGS += -fno-finite-math-only -fno-strict-aliasing +BUILD_CXX_FLAGS += -fno-finite-math-only -fno-strict-aliasing -faligned-new # Rack code is not tested for this flag, unset it BUILD_CXX_FLAGS += -U_GLIBCXX_ASSERTIONS -Wp,-U_GLIBCXX_ASSERTIONS diff --git a/src/Makefile b/src/Makefile index 48b7d05..13d59ba 100644 --- a/src/Makefile +++ b/src/Makefile @@ -95,7 +95,7 @@ endif BUILD_C_FLAGS += -std=gnu11 BUILD_C_FLAGS += -fno-finite-math-only -fno-strict-aliasing -BUILD_CXX_FLAGS += -fno-finite-math-only -fno-strict-aliasing +BUILD_CXX_FLAGS += -fno-finite-math-only -fno-strict-aliasing -faligned-new # use our custom function to invert some colors BUILD_CXX_FLAGS += -DnsvgParseFromFile=nsvgParseFromFileCardinal @@ -115,6 +115,7 @@ RACK_FILES += custom/network.cpp RACK_FILES += custom/osdialog.cpp RACK_FILES += override/blendish.c RACK_FILES += override/context.cpp +RACK_FILES += override/minblep.cpp RACK_FILES += override/plugin.cpp RACK_FILES += override/Engine.cpp RACK_FILES += override/MenuBar.cpp @@ -144,6 +145,7 @@ IGNORED_FILES += Rack/src/app/MenuBar.cpp IGNORED_FILES += Rack/src/app/MidiDisplay.cpp IGNORED_FILES += Rack/src/app/Scene.cpp IGNORED_FILES += Rack/src/app/TipWindow.cpp +IGNORED_FILES += Rack/src/dsp/minblep.cpp IGNORED_FILES += Rack/src/engine/Engine.cpp IGNORED_FILES += Rack/src/plugin/Model.cpp IGNORED_FILES += Rack/src/window/Window.cpp diff --git a/src/Makefile.cardinal.mk b/src/Makefile.cardinal.mk index 8c704ca..7160b0b 100644 --- a/src/Makefile.cardinal.mk +++ b/src/Makefile.cardinal.mk @@ -170,7 +170,7 @@ endif BUILD_C_FLAGS += -std=gnu11 BUILD_C_FLAGS += -fno-finite-math-only -fno-strict-aliasing -BUILD_CXX_FLAGS += -fno-finite-math-only -fno-strict-aliasing +BUILD_CXX_FLAGS += -fno-finite-math-only -fno-strict-aliasing -faligned-new # Rack code is not tested for this flag, unset it BUILD_CXX_FLAGS += -U_GLIBCXX_ASSERTIONS -Wp,-U_GLIBCXX_ASSERTIONS diff --git a/src/override/minblep.cpp b/src/override/minblep.cpp new file mode 100644 index 0000000..6242b0d --- /dev/null +++ b/src/override/minblep.cpp @@ -0,0 +1,111 @@ +/* + * DISTRHO Cardinal Plugin + * Copyright (C) 2021-2022 Filipe Coelho + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 3 of + * the License, or any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * For a full copy of the GNU General Public License see the LICENSE file. + */ + +/** + * This file is an edited version of VCVRack's dsp/minblep.cpp + * Copyright (C) 2016-2021 VCV. + * + * This program is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 3 of + * the License, or (at your option) any later version. + */ + +#include +#include +#include + + +namespace rack { +namespace dsp { + + +void minBlepImpulse(int z, int o, float* output) { + // Symmetric sinc array with `z` zero-crossings on each side + int n = 2 * z * o; + float* x = (float*) pffft_aligned_malloc(sizeof(float) * n); + for (int i = 0; i < n; i++) { + float p = math::rescale((float) i, 0.f, (float)(n - 1), (float) - z, (float) z); + x[i] = sinc(p); + } + + // Apply window + blackmanHarrisWindow(x, n); + + // Real cepstrum + float* fx = (float*) pffft_aligned_malloc(sizeof(float) * 2 * n); + // Valgrind complains that the array is uninitialized for some reason, unless we clear it. + std::memset(fx, 0, sizeof(float) * 2 * n); + RealFFT rfft(n); + rfft.rfft(x, fx); + // fx = log(abs(fx)) + fx[0] = std::log(std::fabs(fx[0])); + for (int i = 1; i < n; i++) { + fx[2 * i] = std::log(std::hypot(fx[2 * i], fx[2 * i + 1])); + fx[2 * i + 1] = 0.f; + } + fx[1] = std::log(std::fabs(fx[1])); + // Clamp values in case we have -inf + for (int i = 0; i < 2 * n; i++) { + fx[i] = std::fmax(-30.f, fx[i]); + } + rfft.irfft(fx, x); + rfft.scale(x); + + // Minimum-phase reconstruction + for (int i = 1; i < n / 2; i++) { + x[i] *= 2.f; + } + for (int i = (n + 1) / 2; i < n; i++) { + x[i] = 0.f; + } + rfft.rfft(x, fx); + // fx = exp(fx) + fx[0] = std::exp(fx[0]); + for (int i = 1; i < n; i++) { + float re = std::exp(fx[2 * i]); + float im = fx[2 * i + 1]; + fx[2 * i] = re * std::cos(im); + fx[2 * i + 1] = re * std::sin(im); + } + fx[1] = std::exp(fx[1]); + rfft.irfft(fx, x); + rfft.scale(x); + + // Integrate + float total = 0.f; + for (int i = 0; i < n; i++) { + total += x[i]; + x[i] = total; + } + + // Normalize + float norm = 1.f / x[n - 1]; + for (int i = 0; i < n; i++) { + x[i] *= norm; + } + + std::memcpy(output, x, n * sizeof(float)); + + // Cleanup + pffft_aligned_free(x); + pffft_aligned_free(fx); +} + + +} // namespace dsp +} // namespace rack