Added a xmmintrin.h based SSE function find_peaks(). Needs polishing as

this commit breaks the build system for i386 builds with dynamic SSE 
enabled.


git-svn-id: svn://localhost/ardour2/trunk@1586 d708f5d6-7413-0410-9779-e7cbd77b26cf
This commit is contained in:
Sampo Savolainen 2007-03-13 22:42:34 +00:00
parent 29f4d8b52c
commit 75d2f51193
7 changed files with 156 additions and 11 deletions

View file

@ -287,10 +287,13 @@ env['BUILDERS']['SharedAsmObject'] = Builder (action = '$CXX -c -fPIC $SOURCE -o
if env['FPU_OPTIMIZATION']:
if env['DIST_TARGET'] == "i386":
arch_specific_objects = env.SharedAsmObject('sse_functions.os', 'sse_functions.s')
ardour_files += ['sse_functions_xmm.cc']
if env['DIST_TARGET'] == "i686":
arch_specific_objects = env.SharedAsmObject('sse_functions.os', 'sse_functions.s')
ardour_files += ['sse_functions_xmm.cc']
if env['DIST_TARGET'] == "x86_64":
arch_specific_objects = env.SharedAsmObject('sse_functions_64bit.os', 'sse_functions_64bit.s')
ardour_files += ['sse_functions_xmm.cc']
libardour = ardour.SharedLibrary('ardour', ardour_files + extra_sources + arch_specific_objects)

View file

@ -27,7 +27,7 @@
extern "C" {
/* SSE functions */
float x86_sse_compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current);
float x86_sse_compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current);
void x86_sse_apply_gain_to_buffer (ARDOUR::Sample *buf, nframes_t nframes, float gain);
@ -36,9 +36,11 @@ extern "C" {
void x86_sse_mix_buffers_no_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nframes_t nframes);
}
float x86_sse_find_peaks (ARDOUR::Sample *buf, nframes_t nsamples, float *min, float *max);
/* debug wrappers for SSE functions */
float debug_compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current);
float debug_compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current);
void debug_apply_gain_to_buffer (ARDOUR::Sample *buf, nframes_t nframes, float gain);
@ -52,6 +54,8 @@ void debug_mix_buffers_no_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nfra
float veclib_compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current);
float veclib_find_peaks (ARDOUR::Sample *buf, nframes_t nsamples, float *min, float *max);
void veclib_apply_gain_to_buffer (ARDOUR::Sample *buf, nframes_t nframes, float gain);
void veclib_mix_buffers_with_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nframes_t nframes, float gain);
@ -62,12 +66,14 @@ void veclib_mix_buffers_no_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src
/* non-optimized functions */
float compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current);
float compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current);
void apply_gain_to_buffer (ARDOUR::Sample *buf, nframes_t nframes, float gain);
float find_peaks (ARDOUR::Sample *buf, nframes_t nsamples, float *min, float *max);
void mix_buffers_with_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nframes_t nframes, float gain);
void apply_gain_to_buffer (ARDOUR::Sample *buf, nframes_t nframes, float gain);
void mix_buffers_no_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nframes_t nframes);
void mix_buffers_with_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nframes_t nframes, float gain);
void mix_buffers_no_gain (ARDOUR::Sample *dst, ARDOUR::Sample *src, nframes_t nframes);
#endif /* __ardour_mix_h__ */

View file

@ -905,12 +905,14 @@ class Session : public PBD::StatefulDestructible
void* ptr,
float opt);
typedef float (*compute_peak_t) (Sample *, nframes_t, float);
typedef float (*compute_peak_t) (Sample *, nframes_t, float);
typedef float (*find_peaks_t) (Sample *, nframes_t, float *, float*);
typedef void (*apply_gain_to_buffer_t) (Sample *, nframes_t, float);
typedef void (*mix_buffers_with_gain_t) (Sample *, Sample *, nframes_t, float);
typedef void (*mix_buffers_no_gain_t) (Sample *, Sample *, nframes_t);
static compute_peak_t compute_peak;
static compute_peak_t compute_peak;
static find_peaks_t find_peaks;
static apply_gain_to_buffer_t apply_gain_to_buffer;
static mix_buffers_with_gain_t mix_buffers_with_gain;
static mix_buffers_no_gain_t mix_buffers_no_gain;

View file

@ -233,6 +233,7 @@ setup_hardware_optimization (bool try_optimization)
// SSE SET
Session::compute_peak = x86_sse_compute_peak;
Session::find_peaks = x86_sse_find_peaks;
Session::apply_gain_to_buffer = x86_sse_apply_gain_to_buffer;
Session::mix_buffers_with_gain = x86_sse_mix_buffers_with_gain;
Session::mix_buffers_no_gain = x86_sse_mix_buffers_no_gain;
@ -249,6 +250,7 @@ setup_hardware_optimization (bool try_optimization)
if (sysVersion >= 0x00001040) { // Tiger at least
Session::compute_peak = veclib_compute_peak;
Session::find_peaks = veclib_find_peaks;
Session::apply_gain_to_buffer = veclib_apply_gain_to_buffer;
Session::mix_buffers_with_gain = veclib_mix_buffers_with_gain;
Session::mix_buffers_no_gain = veclib_mix_buffers_no_gain;
@ -262,7 +264,8 @@ setup_hardware_optimization (bool try_optimization)
if (generic_mix_functions) {
Session::compute_peak = compute_peak;
Session::compute_peak = compute_peak;
Session::find_peaks = find_peaks;
Session::apply_gain_to_buffer = apply_gain_to_buffer;
Session::mix_buffers_with_gain = mix_buffers_with_gain;
Session::mix_buffers_no_gain = mix_buffers_no_gain;

View file

@ -24,7 +24,6 @@
#include <stdint.h>
#if defined (ARCH_X86) && defined (BUILD_SSE_OPTIMIZATIONS)
// Debug wrappers
float
@ -90,6 +89,25 @@ compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current)
return current;
}
float
find_peaks (ARDOUR::Sample *buf, nframes_t nframes, float *min, float *max)
{
long i;
float a, b;
a = *max;
b = *min;
for (i = 0; i < nframes; i++)
{
a = fmax (buf[i], a);
b = fmin (buf[i], b);
}
*max = a;
*min = b;
}
void
apply_gain_to_buffer (ARDOUR::Sample *buf, nframes_t nframes, float gain)
{
@ -124,6 +142,25 @@ veclib_compute_peak (ARDOUR::Sample *buf, nframes_t nsamples, float current)
return f_max(current, tmpmax);
}
float
veclib_find_peaks (ARDOUR::Sample *buf, nframes_t nframes, float *min, float *max)
{
// TODO: someone with veclib skills needs to write this one
long i;
float a, b;
a = *max;
b = *min;
for (i = 0; i < nframes; i++)
{
a = fmax (buf[i], a);
b = fmin (buf[i], b);
}
*max = a;
*min = b;
}
void
veclib_apply_gain_to_buffer (ARDOUR::Sample *buf, nframes_t nframes, float gain)
{

View file

@ -88,7 +88,8 @@ const char* Session::dead_sound_dir_name = X_("dead_sounds");
const char* Session::interchange_dir_name = X_("interchange");
const char* Session::export_dir_name = X_("export");
Session::compute_peak_t Session::compute_peak = 0;
Session::compute_peak_t Session::compute_peak = 0;
Session::find_peaks_t Session::find_peaks = 0;
Session::apply_gain_to_buffer_t Session::apply_gain_to_buffer = 0;
Session::mix_buffers_with_gain_t Session::mix_buffers_with_gain = 0;
Session::mix_buffers_no_gain_t Session::mix_buffers_no_gain = 0;

View file

@ -0,0 +1,93 @@
/*
Copyright (C) 2007 Paul Davis
Written by Sampo Savolainen
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <xmmintrin.h>
#include <ardour/types.h>
void
x86_sse_find_peaks(float *buf, nframes_t nframes, float *min, float *max)
{
__m128 current_max, current_min, work;
// Load max and min values into all four slots of the XMM registers
current_min = _mm_set1_ps(*min);
current_max = _mm_set1_ps(*max);
// Work input until "buf" reaches 16 byte alignment
while ( ((unsigned long)buf) % 16 != 0 && nframes > 0) {
// Load the next float into the work buffer
work = _mm_set1_ps(*buf);
current_min = _mm_min_ps(current_min, work);
current_max = _mm_max_ps(current_max, work);
buf++;
nframes--;
}
// work through aligned buffers
while (nframes >= 4) {
work = _mm_load_ps(buf);
current_min = _mm_min_ps(current_min, work);
current_max = _mm_max_ps(current_max, work);
buf+=4;
nframes-=4;
}
// work through the rest < 4 samples
while ( nframes > 0) {
// Load the next float into the work buffer
work = _mm_set1_ps(*buf);
current_min = _mm_min_ps(current_min, work);
current_max = _mm_max_ps(current_max, work);
buf++;
nframes--;
}
// Find min & max value in current_max through shuffle tricks
work = current_min;
work = _mm_shuffle_ps(work, work, _MM_SHUFFLE(2, 3, 0, 1));
work = _mm_min_ps (work, current_min);
current_min = work;
work = _mm_shuffle_ps(work, work, _MM_SHUFFLE(1, 0, 3, 2));
work = _mm_min_ps (work, current_min);
_mm_store_ss(min, work);
work = current_max;
work = _mm_shuffle_ps(work, work, _MM_SHUFFLE(2, 3, 0, 1));
work = _mm_max_ps (work, current_max);
current_max = work;
work = _mm_shuffle_ps(work, work, _MM_SHUFFLE(1, 0, 3, 2));
work = _mm_max_ps (work, current_max);
_mm_store_ss(max, work);
}