mirror of
https://github.com/Ardour/ardour.git
synced 2026-01-06 05:35:47 +01:00
Added a xmmintrin.h based SSE function find_peaks(). Needs polishing as
this commit breaks the build system for i386 builds with dynamic SSE enabled. git-svn-id: svn://localhost/ardour2/trunk@1586 d708f5d6-7413-0410-9779-e7cbd77b26cf
This commit is contained in:
parent
29f4d8b52c
commit
75d2f51193
7 changed files with 156 additions and 11 deletions
|
|
@ -287,10 +287,13 @@ env['BUILDERS']['SharedAsmObject'] = Builder (action = '$CXX -c -fPIC $SOURCE -o
|
|||
if env['FPU_OPTIMIZATION']:
|
||||
if env['DIST_TARGET'] == "i386":
|
||||
arch_specific_objects = env.SharedAsmObject('sse_functions.os', 'sse_functions.s')
|
||||
ardour_files += ['sse_functions_xmm.cc']
|
||||
if env['DIST_TARGET'] == "i686":
|
||||
arch_specific_objects = env.SharedAsmObject('sse_functions.os', 'sse_functions.s')
|
||||
ardour_files += ['sse_functions_xmm.cc']
|
||||
if env['DIST_TARGET'] == "x86_64":
|
||||
arch_specific_objects = env.SharedAsmObject('sse_functions_64bit.os', 'sse_functions_64bit.s')
|
||||
ardour_files += ['sse_functions_xmm.cc']
|
||||
|
||||
libardour = ardour.SharedLibrary('ardour', ardour_files + extra_sources + arch_specific_objects)
|
||||
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@
|
|||
|
||||
extern "C" {
|
||||
/* SSE functions */
|
||||
float x86_sse_compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current);
|
||||
float x86_sse_compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current);
|
||||
|
||||
void x86_sse_apply_gain_to_buffer (ARDOUR::Sample *buf, nframes_t nframes, float gain);
|
||||
|
||||
|
|
@ -36,9 +36,11 @@ extern "C" {
|
|||
void x86_sse_mix_buffers_no_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nframes_t nframes);
|
||||
}
|
||||
|
||||
float x86_sse_find_peaks (ARDOUR::Sample *buf, nframes_t nsamples, float *min, float *max);
|
||||
|
||||
/* debug wrappers for SSE functions */
|
||||
|
||||
float debug_compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current);
|
||||
float debug_compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current);
|
||||
|
||||
void debug_apply_gain_to_buffer (ARDOUR::Sample *buf, nframes_t nframes, float gain);
|
||||
|
||||
|
|
@ -52,6 +54,8 @@ void debug_mix_buffers_no_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nfra
|
|||
|
||||
float veclib_compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current);
|
||||
|
||||
float veclib_find_peaks (ARDOUR::Sample *buf, nframes_t nsamples, float *min, float *max);
|
||||
|
||||
void veclib_apply_gain_to_buffer (ARDOUR::Sample *buf, nframes_t nframes, float gain);
|
||||
|
||||
void veclib_mix_buffers_with_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nframes_t nframes, float gain);
|
||||
|
|
@ -62,12 +66,14 @@ void veclib_mix_buffers_no_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src
|
|||
|
||||
/* non-optimized functions */
|
||||
|
||||
float compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current);
|
||||
float compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current);
|
||||
|
||||
void apply_gain_to_buffer (ARDOUR::Sample *buf, nframes_t nframes, float gain);
|
||||
float find_peaks (ARDOUR::Sample *buf, nframes_t nsamples, float *min, float *max);
|
||||
|
||||
void mix_buffers_with_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nframes_t nframes, float gain);
|
||||
void apply_gain_to_buffer (ARDOUR::Sample *buf, nframes_t nframes, float gain);
|
||||
|
||||
void mix_buffers_no_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nframes_t nframes);
|
||||
void mix_buffers_with_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nframes_t nframes, float gain);
|
||||
|
||||
void mix_buffers_no_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nframes_t nframes);
|
||||
|
||||
#endif /* __ardour_mix_h__ */
|
||||
|
|
|
|||
|
|
@ -905,12 +905,14 @@ class Session : public PBD::StatefulDestructible
|
|||
void* ptr,
|
||||
float opt);
|
||||
|
||||
typedef float (*compute_peak_t) (Sample *, nframes_t, float);
|
||||
typedef float (*compute_peak_t) (Sample *, nframes_t, float);
|
||||
typedef float (*find_peaks_t) (Sample *, nframes_t, float *, float*);
|
||||
typedef void (*apply_gain_to_buffer_t) (Sample *, nframes_t, float);
|
||||
typedef void (*mix_buffers_with_gain_t) (Sample *, Sample *, nframes_t, float);
|
||||
typedef void (*mix_buffers_no_gain_t) (Sample *, Sample *, nframes_t);
|
||||
|
||||
static compute_peak_t compute_peak;
|
||||
static compute_peak_t compute_peak;
|
||||
static find_peaks_t find_peaks;
|
||||
static apply_gain_to_buffer_t apply_gain_to_buffer;
|
||||
static mix_buffers_with_gain_t mix_buffers_with_gain;
|
||||
static mix_buffers_no_gain_t mix_buffers_no_gain;
|
||||
|
|
|
|||
|
|
@ -233,6 +233,7 @@ setup_hardware_optimization (bool try_optimization)
|
|||
|
||||
// SSE SET
|
||||
Session::compute_peak = x86_sse_compute_peak;
|
||||
Session::find_peaks = x86_sse_find_peaks;
|
||||
Session::apply_gain_to_buffer = x86_sse_apply_gain_to_buffer;
|
||||
Session::mix_buffers_with_gain = x86_sse_mix_buffers_with_gain;
|
||||
Session::mix_buffers_no_gain = x86_sse_mix_buffers_no_gain;
|
||||
|
|
@ -249,6 +250,7 @@ setup_hardware_optimization (bool try_optimization)
|
|||
|
||||
if (sysVersion >= 0x00001040) { // Tiger at least
|
||||
Session::compute_peak = veclib_compute_peak;
|
||||
Session::find_peaks = veclib_find_peaks;
|
||||
Session::apply_gain_to_buffer = veclib_apply_gain_to_buffer;
|
||||
Session::mix_buffers_with_gain = veclib_mix_buffers_with_gain;
|
||||
Session::mix_buffers_no_gain = veclib_mix_buffers_no_gain;
|
||||
|
|
@ -262,7 +264,8 @@ setup_hardware_optimization (bool try_optimization)
|
|||
|
||||
if (generic_mix_functions) {
|
||||
|
||||
Session::compute_peak = compute_peak;
|
||||
Session::compute_peak = compute_peak;
|
||||
Session::find_peaks = find_peaks;
|
||||
Session::apply_gain_to_buffer = apply_gain_to_buffer;
|
||||
Session::mix_buffers_with_gain = mix_buffers_with_gain;
|
||||
Session::mix_buffers_no_gain = mix_buffers_no_gain;
|
||||
|
|
|
|||
|
|
@ -24,7 +24,6 @@
|
|||
#include <stdint.h>
|
||||
|
||||
#if defined (ARCH_X86) && defined (BUILD_SSE_OPTIMIZATIONS)
|
||||
|
||||
// Debug wrappers
|
||||
|
||||
float
|
||||
|
|
@ -90,6 +89,25 @@ compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current)
|
|||
return current;
|
||||
}
|
||||
|
||||
float
|
||||
find_peaks (ARDOUR::Sample *buf, nframes_t nframes, float *min, float *max)
|
||||
{
|
||||
long i;
|
||||
float a, b;
|
||||
|
||||
a = *max;
|
||||
b = *min;
|
||||
|
||||
for (i = 0; i < nframes; i++)
|
||||
{
|
||||
a = fmax (buf[i], a);
|
||||
b = fmin (buf[i], b);
|
||||
}
|
||||
|
||||
*max = a;
|
||||
*min = b;
|
||||
}
|
||||
|
||||
void
|
||||
apply_gain_to_buffer (ARDOUR::Sample *buf, nframes_t nframes, float gain)
|
||||
{
|
||||
|
|
@ -124,6 +142,25 @@ veclib_compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current)
|
|||
return f_max(current, tmpmax);
|
||||
}
|
||||
|
||||
float
|
||||
veclib_find_peaks (ARDOUR::Sample *buf, nframes_t nframes, float *min, float *max)
|
||||
{
|
||||
// TODO: someone with veclib skills needs to write this one
|
||||
long i;
|
||||
float a, b;
|
||||
|
||||
a = *max;
|
||||
b = *min;
|
||||
|
||||
for (i = 0; i < nframes; i++)
|
||||
{
|
||||
a = fmax (buf[i], a);
|
||||
b = fmin (buf[i], b);
|
||||
}
|
||||
|
||||
*max = a;
|
||||
*min = b;
|
||||
}
|
||||
void
|
||||
veclib_apply_gain_to_buffer (ARDOUR::Sample *buf, nframes_t nframes, float gain)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -88,7 +88,8 @@ const char* Session::dead_sound_dir_name = X_("dead_sounds");
|
|||
const char* Session::interchange_dir_name = X_("interchange");
|
||||
const char* Session::export_dir_name = X_("export");
|
||||
|
||||
Session::compute_peak_t Session::compute_peak = 0;
|
||||
Session::compute_peak_t Session::compute_peak = 0;
|
||||
Session::find_peaks_t Session::find_peaks = 0;
|
||||
Session::apply_gain_to_buffer_t Session::apply_gain_to_buffer = 0;
|
||||
Session::mix_buffers_with_gain_t Session::mix_buffers_with_gain = 0;
|
||||
Session::mix_buffers_no_gain_t Session::mix_buffers_no_gain = 0;
|
||||
|
|
|
|||
93
libs/ardour/sse_functions_xmm.cc
Normal file
93
libs/ardour/sse_functions_xmm.cc
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
/*
|
||||
Copyright (C) 2007 Paul Davis
|
||||
Written by Sampo Savolainen
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
*/
|
||||
|
||||
#include <xmmintrin.h>
|
||||
#include <ardour/types.h>
|
||||
|
||||
void
|
||||
x86_sse_find_peaks(float *buf, nframes_t nframes, float *min, float *max)
|
||||
{
|
||||
__m128 current_max, current_min, work;
|
||||
|
||||
// Load max and min values into all four slots of the XMM registers
|
||||
current_min = _mm_set1_ps(*min);
|
||||
current_max = _mm_set1_ps(*max);
|
||||
|
||||
// Work input until "buf" reaches 16 byte alignment
|
||||
while ( ((unsigned long)buf) % 16 != 0 && nframes > 0) {
|
||||
|
||||
// Load the next float into the work buffer
|
||||
work = _mm_set1_ps(*buf);
|
||||
|
||||
current_min = _mm_min_ps(current_min, work);
|
||||
current_max = _mm_max_ps(current_max, work);
|
||||
|
||||
buf++;
|
||||
nframes--;
|
||||
}
|
||||
|
||||
// work through aligned buffers
|
||||
while (nframes >= 4) {
|
||||
|
||||
work = _mm_load_ps(buf);
|
||||
|
||||
current_min = _mm_min_ps(current_min, work);
|
||||
current_max = _mm_max_ps(current_max, work);
|
||||
|
||||
buf+=4;
|
||||
nframes-=4;
|
||||
}
|
||||
|
||||
// work through the rest < 4 samples
|
||||
while ( nframes > 0) {
|
||||
|
||||
// Load the next float into the work buffer
|
||||
work = _mm_set1_ps(*buf);
|
||||
|
||||
current_min = _mm_min_ps(current_min, work);
|
||||
current_max = _mm_max_ps(current_max, work);
|
||||
|
||||
buf++;
|
||||
nframes--;
|
||||
}
|
||||
|
||||
// Find min & max value in current_max through shuffle tricks
|
||||
|
||||
work = current_min;
|
||||
work = _mm_shuffle_ps(work, work, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
work = _mm_min_ps (work, current_min);
|
||||
current_min = work;
|
||||
work = _mm_shuffle_ps(work, work, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
work = _mm_min_ps (work, current_min);
|
||||
|
||||
_mm_store_ss(min, work);
|
||||
|
||||
work = current_max;
|
||||
work = _mm_shuffle_ps(work, work, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
work = _mm_max_ps (work, current_max);
|
||||
current_max = work;
|
||||
work = _mm_shuffle_ps(work, work, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
work = _mm_max_ps (work, current_max);
|
||||
|
||||
_mm_store_ss(max, work);
|
||||
}
|
||||
|
||||
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue