From 98eab6804414ddc74bc1ba9be239b4ce83e0c241 Mon Sep 17 00:00:00 2001 From: Ayan Shafqat Date: Mon, 29 Dec 2025 20:12:43 -0500 Subject: [PATCH] aarch64: fix unaligned count and peak computation Clamp misaligned prefix loops to the remaining frame count to avoid nframes underflow and potential out-of-bounds access for small buffers. Apply vabsq_f32() to all SIMD loads before peak reduction to ensure correct absolute peak calculation in unrolled NEON paths. --- libs/ardour/aarch64_neon_functions.cc | 32 ++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/libs/ardour/aarch64_neon_functions.cc b/libs/ardour/aarch64_neon_functions.cc index e4ffa78e5e..130a5a1032 100644 --- a/libs/ardour/aarch64_neon_functions.cc +++ b/libs/ardour/aarch64_neon_functions.cc @@ -24,6 +24,7 @@ #include #include +#include /** * @brief Aligns a pointer to the next 16-byte boundary @@ -78,12 +79,18 @@ arm_neon_compute_peak(const float* src, uint32_t nframes, float current) if (UNLIKELY(src_aligned != src)) { size_t unaligned_count = src_aligned - src; - for (size_t i = 0; i < unaligned_count; i++) + + // Handle small number of nframes + size_t count = std::min(unaligned_count, nframes); + + for (size_t i = 0; i < count; i++) { float32x4_t x0 = vld1q_dup_f32(src + i); + x0 = vabsq_f32(x0); vmax = vmaxq_f32(vmax, x0); } - nframes -= unaligned_count; + + nframes -= count; } // Compute the number of SIMD frames @@ -106,6 +113,11 @@ arm_neon_compute_peak(const float* src, uint32_t nframes, float current) x2 = vld1q_f32(src_aligned + offset + (2 * 4)); x3 = vld1q_f32(src_aligned + offset + (3 * 4)); + x0 = vabsq_f32(x0); + x1 = vabsq_f32(x1); + x2 = vabsq_f32(x2); + x3 = vabsq_f32(x3); + max0 = vmaxq_f32(x0, x1); max1 = vmaxq_f32(x2, x3); max2 = vmaxq_f32(max0, max1); @@ -182,13 +194,18 @@ arm_neon_find_peaks(const float* src, uint32_t nframes, float* minf, float* maxf if (UNLIKELY(src_aligned != src)) { size_t unaligned_count = src_aligned - src; - for (size_t i = 0; i < unaligned_count; i++) + + // Handle small number of nframes + size_t count = std::min(unaligned_count, nframes); + + for (size_t i = 0; i < count; i++) { float32x4_t x0 = vld1q_dup_f32(src + i); vmax = vmaxq_f32(vmax, x0); vmin = vminq_f32(vmin, x0); } - nframes -= unaligned_count; + + nframes -= count; } // Compute the number of SIMD frames @@ -299,7 +316,9 @@ arm_neon_apply_gain_to_buffer(float* dst, uint32_t nframes, float gain) if (UNLIKELY(dst_aligned != dst)) { size_t unaligned_count = dst_aligned - dst; - for (size_t i = 0; i < unaligned_count; i++) + size_t count = std::min(unaligned_count, nframes); + + for (size_t i = 0; i < count; i++) { float32_t x0, y0; @@ -307,7 +326,8 @@ arm_neon_apply_gain_to_buffer(float* dst, uint32_t nframes, float gain) y0 = x0 * gain; dst[i] = y0; } - nframes -= unaligned_count; + + nframes -= count; } // Compute the number of SIMD frames