mirror of
https://github.com/Ardour/ardour.git
synced 2026-01-05 05:05:43 +01:00
aarch64: fix unaligned count and peak computation
Clamp misaligned prefix loops to the remaining frame count to avoid nframes underflow and potential out-of-bounds access for small buffers. Apply vabsq_f32() to all SIMD loads before peak reduction to ensure correct absolute peak calculation in unrolled NEON paths.
This commit is contained in:
parent
b673989763
commit
98eab68044
1 changed files with 26 additions and 6 deletions
|
|
@ -24,6 +24,7 @@
|
|||
#include <arm_neon.h>
|
||||
|
||||
#include <cstddef>
|
||||
#include <algorithm>
|
||||
|
||||
/**
|
||||
* @brief Aligns a pointer to the next 16-byte boundary
|
||||
|
|
@ -78,12 +79,18 @@ arm_neon_compute_peak(const float* src, uint32_t nframes, float current)
|
|||
if (UNLIKELY(src_aligned != src))
|
||||
{
|
||||
size_t unaligned_count = src_aligned - src;
|
||||
for (size_t i = 0; i < unaligned_count; i++)
|
||||
|
||||
// Handle small number of nframes
|
||||
size_t count = std::min<size_t>(unaligned_count, nframes);
|
||||
|
||||
for (size_t i = 0; i < count; i++)
|
||||
{
|
||||
float32x4_t x0 = vld1q_dup_f32(src + i);
|
||||
x0 = vabsq_f32(x0);
|
||||
vmax = vmaxq_f32(vmax, x0);
|
||||
}
|
||||
nframes -= unaligned_count;
|
||||
|
||||
nframes -= count;
|
||||
}
|
||||
|
||||
// Compute the number of SIMD frames
|
||||
|
|
@ -106,6 +113,11 @@ arm_neon_compute_peak(const float* src, uint32_t nframes, float current)
|
|||
x2 = vld1q_f32(src_aligned + offset + (2 * 4));
|
||||
x3 = vld1q_f32(src_aligned + offset + (3 * 4));
|
||||
|
||||
x0 = vabsq_f32(x0);
|
||||
x1 = vabsq_f32(x1);
|
||||
x2 = vabsq_f32(x2);
|
||||
x3 = vabsq_f32(x3);
|
||||
|
||||
max0 = vmaxq_f32(x0, x1);
|
||||
max1 = vmaxq_f32(x2, x3);
|
||||
max2 = vmaxq_f32(max0, max1);
|
||||
|
|
@ -182,13 +194,18 @@ arm_neon_find_peaks(const float* src, uint32_t nframes, float* minf, float* maxf
|
|||
if (UNLIKELY(src_aligned != src))
|
||||
{
|
||||
size_t unaligned_count = src_aligned - src;
|
||||
for (size_t i = 0; i < unaligned_count; i++)
|
||||
|
||||
// Handle small number of nframes
|
||||
size_t count = std::min<size_t>(unaligned_count, nframes);
|
||||
|
||||
for (size_t i = 0; i < count; i++)
|
||||
{
|
||||
float32x4_t x0 = vld1q_dup_f32(src + i);
|
||||
vmax = vmaxq_f32(vmax, x0);
|
||||
vmin = vminq_f32(vmin, x0);
|
||||
}
|
||||
nframes -= unaligned_count;
|
||||
|
||||
nframes -= count;
|
||||
}
|
||||
|
||||
// Compute the number of SIMD frames
|
||||
|
|
@ -299,7 +316,9 @@ arm_neon_apply_gain_to_buffer(float* dst, uint32_t nframes, float gain)
|
|||
if (UNLIKELY(dst_aligned != dst))
|
||||
{
|
||||
size_t unaligned_count = dst_aligned - dst;
|
||||
for (size_t i = 0; i < unaligned_count; i++)
|
||||
size_t count = std::min<size_t>(unaligned_count, nframes);
|
||||
|
||||
for (size_t i = 0; i < count; i++)
|
||||
{
|
||||
float32_t x0, y0;
|
||||
|
||||
|
|
@ -307,7 +326,8 @@ arm_neon_apply_gain_to_buffer(float* dst, uint32_t nframes, float gain)
|
|||
y0 = x0 * gain;
|
||||
dst[i] = y0;
|
||||
}
|
||||
nframes -= unaligned_count;
|
||||
|
||||
nframes -= count;
|
||||
}
|
||||
|
||||
// Compute the number of SIMD frames
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue