mirror of
https://github.com/Ardour/ardour.git
synced 2025-12-07 23:35:03 +01:00
Integrate AVX512F support into Ardour
The current implementation is just a stub of AVX and is not utilizing AVX512F.
This commit is contained in:
parent
bf8fced073
commit
6b766e41f4
4 changed files with 80 additions and 1 deletions
|
|
@ -191,7 +191,24 @@ setup_hardware_optimization (bool try_optimization)
|
||||||
FPU* fpu = FPU::instance ();
|
FPU* fpu = FPU::instance ();
|
||||||
|
|
||||||
#if defined(ARCH_X86) && defined(BUILD_SSE_OPTIMIZATIONS)
|
#if defined(ARCH_X86) && defined(BUILD_SSE_OPTIMIZATIONS)
|
||||||
/* We have AVX-optimized code for Windows and Linux */
|
/* Utilize different optimization routines for various x86 extensions */
|
||||||
|
|
||||||
|
#ifdef FPU_AVX512F_SUPPORT
|
||||||
|
if (fpu->has_avx512f ()) {
|
||||||
|
info << "Using AVX512F optimized routines" << endmsg;
|
||||||
|
|
||||||
|
// AVX512F SET
|
||||||
|
compute_peak = x86_avx512f_compute_peak;
|
||||||
|
find_peaks = x86_avx512f_find_peaks;
|
||||||
|
apply_gain_to_buffer = x86_avx512f_apply_gain_to_buffer;
|
||||||
|
mix_buffers_with_gain = x86_avx512f_mix_buffers_with_gain;
|
||||||
|
mix_buffers_no_gain = x86_avx512f_mix_buffers_no_gain;
|
||||||
|
copy_vector = x86_avx512f_copy_vector;
|
||||||
|
|
||||||
|
generic_mix_functions = false;
|
||||||
|
|
||||||
|
} else
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef FPU_AVX_FMA_SUPPORT
|
#ifdef FPU_AVX_FMA_SUPPORT
|
||||||
if (fpu->has_fma ()) {
|
if (fpu->has_fma ()) {
|
||||||
|
|
|
||||||
|
|
@ -153,6 +153,33 @@ FPUTest::avxTest ()
|
||||||
run (align_max);
|
run (align_max);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
FPUTest::avx512fTest ()
|
||||||
|
{
|
||||||
|
PBD::FPU* fpu = PBD::FPU::instance ();
|
||||||
|
if (!fpu->has_avx512f ()) {
|
||||||
|
printf ("AVX512F is not available at run-time\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if ( defined(__x86_64__) || defined(_M_X64) )
|
||||||
|
size_t align_max = 64;
|
||||||
|
#else
|
||||||
|
size_t align_max = 16;
|
||||||
|
#endif
|
||||||
|
CPPUNIT_ASSERT_MESSAGE ("Aligned Malloc", (((intptr_t)_test1) % align_max) == 0);
|
||||||
|
CPPUNIT_ASSERT_MESSAGE ("Aligned Malloc", (((intptr_t)_test2) % align_max) == 0);
|
||||||
|
|
||||||
|
compute_peak = x86_avx512f_compute_peak;
|
||||||
|
find_peaks = x86_avx512f_find_peaks;
|
||||||
|
apply_gain_to_buffer = x86_avx512f_apply_gain_to_buffer;
|
||||||
|
mix_buffers_with_gain = x86_avx512f_mix_buffers_with_gain;
|
||||||
|
mix_buffers_no_gain = x86_avx512f_mix_buffers_no_gain;
|
||||||
|
copy_vector = x86_avx512f_copy_vector;
|
||||||
|
|
||||||
|
run (align_max, FLT_EPSILON);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
FPUTest::sseTest ()
|
FPUTest::sseTest ()
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -465,16 +465,19 @@ def build(bld):
|
||||||
|
|
||||||
avx_sources = []
|
avx_sources = []
|
||||||
fma_sources = []
|
fma_sources = []
|
||||||
|
avx512f_sources = []
|
||||||
|
|
||||||
if Options.options.fpu_optimization:
|
if Options.options.fpu_optimization:
|
||||||
if (bld.env['build_target'] == 'i386' or bld.env['build_target'] == 'i686'):
|
if (bld.env['build_target'] == 'i386' or bld.env['build_target'] == 'i686'):
|
||||||
obj.source += [ 'sse_functions_xmm.cc', 'sse_functions.s', ]
|
obj.source += [ 'sse_functions_xmm.cc', 'sse_functions.s', ]
|
||||||
avx_sources = [ 'sse_functions_avx_linux.cc' ]
|
avx_sources = [ 'sse_functions_avx_linux.cc' ]
|
||||||
fma_sources = [ 'x86_functions_fma.cc' ]
|
fma_sources = [ 'x86_functions_fma.cc' ]
|
||||||
|
avx512f_sources = [ 'x86_functions_avx512f.cc' ]
|
||||||
elif bld.env['build_target'] == 'x86_64':
|
elif bld.env['build_target'] == 'x86_64':
|
||||||
obj.source += [ 'sse_functions_xmm.cc', 'sse_functions_64bit.s', ]
|
obj.source += [ 'sse_functions_xmm.cc', 'sse_functions_64bit.s', ]
|
||||||
avx_sources = [ 'sse_functions_avx_linux.cc' ]
|
avx_sources = [ 'sse_functions_avx_linux.cc' ]
|
||||||
fma_sources = [ 'x86_functions_fma.cc' ]
|
fma_sources = [ 'x86_functions_fma.cc' ]
|
||||||
|
avx512f_sources = [ 'x86_functions_avx512f.cc' ]
|
||||||
elif bld.env['build_target'] == 'mingw':
|
elif bld.env['build_target'] == 'mingw':
|
||||||
# usability of the 64 bit windows assembler depends on the compiler target,
|
# usability of the 64 bit windows assembler depends on the compiler target,
|
||||||
# not the build host, which in turn can only be inferred from the name
|
# not the build host, which in turn can only be inferred from the name
|
||||||
|
|
@ -484,6 +487,7 @@ def build(bld):
|
||||||
obj.source += [ 'sse_functions_64bit_win.s', 'sse_avx_functions_64bit_win.s' ]
|
obj.source += [ 'sse_functions_64bit_win.s', 'sse_avx_functions_64bit_win.s' ]
|
||||||
avx_sources = [ 'sse_functions_avx.cc' ]
|
avx_sources = [ 'sse_functions_avx.cc' ]
|
||||||
fma_sources = [ 'x86_functions_fma.cc' ]
|
fma_sources = [ 'x86_functions_fma.cc' ]
|
||||||
|
avx512f_sources = [ 'x86_functions_avx512f.cc' ]
|
||||||
elif bld.env['build_target'] == 'aarch64':
|
elif bld.env['build_target'] == 'aarch64':
|
||||||
obj.source += ['arm_neon_functions.cc']
|
obj.source += ['arm_neon_functions.cc']
|
||||||
obj.defines += [ 'ARM_NEON_SUPPORT' ]
|
obj.defines += [ 'ARM_NEON_SUPPORT' ]
|
||||||
|
|
@ -537,6 +541,24 @@ def build(bld):
|
||||||
obj.use += ['sse_fma_functions' ]
|
obj.use += ['sse_fma_functions' ]
|
||||||
obj.defines += [ 'FPU_AVX_FMA_SUPPORT' ]
|
obj.defines += [ 'FPU_AVX_FMA_SUPPORT' ]
|
||||||
|
|
||||||
|
if bld.is_defined('FPU_AVX512F_SUPPORT') and avx512f_sources:
|
||||||
|
avx512f_cxxflags = list(bld.env['CXXFLAGS'])
|
||||||
|
avx512f_cxxflags.append (bld.env['compiler_flags_dict']['avx512f'])
|
||||||
|
avx512f_cxxflags.append (bld.env['compiler_flags_dict']['avx'])
|
||||||
|
avx512f_cxxflags.append (bld.env['compiler_flags_dict']['pic'])
|
||||||
|
avx512f_cxxflags.append (bld.env['compiler_flags_dict']['fma'])
|
||||||
|
|
||||||
|
bld(features = 'cxx cxxstlib asm',
|
||||||
|
source = avx512f_sources,
|
||||||
|
cxxflags = avx512f_cxxflags,
|
||||||
|
includes = [ '.' ],
|
||||||
|
use = [ 'libtemporal', 'libpbd', 'libevoral', 'liblua' ],
|
||||||
|
uselib = [ 'GLIBMM', 'XML' ],
|
||||||
|
target = 'avx512f_functions')
|
||||||
|
|
||||||
|
obj.use += ['avx512f_functions' ]
|
||||||
|
obj.defines += [ 'FPU_AVX512F_SUPPORT' ]
|
||||||
|
|
||||||
# i18n
|
# i18n
|
||||||
if bld.is_defined('ENABLE_NLS'):
|
if bld.is_defined('ENABLE_NLS'):
|
||||||
mo_files = bld.path.ant_glob('po/*.mo')
|
mo_files = bld.path.ant_glob('po/*.mo')
|
||||||
|
|
|
||||||
13
wscript
13
wscript
|
|
@ -89,6 +89,8 @@ compiler_flags_dictionaries= {
|
||||||
'attasm': '-masm=att',
|
'attasm': '-masm=att',
|
||||||
# Flags to make AVX instructions/intrinsics available
|
# Flags to make AVX instructions/intrinsics available
|
||||||
'avx': '-mavx',
|
'avx': '-mavx',
|
||||||
|
# Flags to make AVX512F instructions/intrinsics available
|
||||||
|
'avx512f': '-mavx512f',
|
||||||
# Flags to make FMA instructions/intrinsics available
|
# Flags to make FMA instructions/intrinsics available
|
||||||
'fma': '-mfma',
|
'fma': '-mfma',
|
||||||
# Flags to make ARM/NEON instructions/intrinsics available
|
# Flags to make ARM/NEON instructions/intrinsics available
|
||||||
|
|
@ -519,6 +521,16 @@ int main() { return 0; }''',
|
||||||
if re.search ('x86_64-w64', str(conf.env['CC'])) is not None:
|
if re.search ('x86_64-w64', str(conf.env['CC'])) is not None:
|
||||||
conf.define ('FPU_AVX_FMA_SUPPORT', 1)
|
conf.define ('FPU_AVX_FMA_SUPPORT', 1)
|
||||||
elif conf.env['build_target'] == 'i386' or conf.env['build_target'] == 'i686' or conf.env['build_target'] == 'x86_64':
|
elif conf.env['build_target'] == 'i386' or conf.env['build_target'] == 'i686' or conf.env['build_target'] == 'x86_64':
|
||||||
|
conf.check_cxx(fragment = "#include <immintrin.h>\nint main(void) { __m128 a; _mm_fmadd_ss(a, a, a); return 0; }\n",
|
||||||
|
features = ['cxx'],
|
||||||
|
cxxflags = [ conf.env['compiler_flags_dict']['avx512f'], conf.env['compiler_flags_dict']['fma'], conf.env['compiler_flags_dict']['avx'] ],
|
||||||
|
mandatory = False,
|
||||||
|
execute = False,
|
||||||
|
msg = 'Checking compiler for AVX512F intrinsics',
|
||||||
|
okmsg = 'Found',
|
||||||
|
errmsg = 'Not supported',
|
||||||
|
define_name = 'FPU_AVX512F_SUPPORT')
|
||||||
|
|
||||||
conf.check_cxx(fragment = "#include <immintrin.h>\nint main(void) { __m128 a; _mm_fmadd_ss(a, a, a); return 0; }\n",
|
conf.check_cxx(fragment = "#include <immintrin.h>\nint main(void) { __m128 a; _mm_fmadd_ss(a, a, a); return 0; }\n",
|
||||||
features = ['cxx'],
|
features = ['cxx'],
|
||||||
cxxflags = [ conf.env['compiler_flags_dict']['fma'], conf.env['compiler_flags_dict']['avx'] ],
|
cxxflags = [ conf.env['compiler_flags_dict']['fma'], conf.env['compiler_flags_dict']['avx'] ],
|
||||||
|
|
@ -1528,6 +1540,7 @@ const char* const ardour_config_info = "\\n\\
|
||||||
write_config_text('Dr. Mingw', conf.is_defined('HAVE_DRMINGW'))
|
write_config_text('Dr. Mingw', conf.is_defined('HAVE_DRMINGW'))
|
||||||
write_config_text('FLAC', conf.is_defined('HAVE_FLAC'))
|
write_config_text('FLAC', conf.is_defined('HAVE_FLAC'))
|
||||||
write_config_text('FPU optimization', opts.fpu_optimization)
|
write_config_text('FPU optimization', opts.fpu_optimization)
|
||||||
|
write_config_text('FPU AVX512F support', conf.is_defined('FPU_AVX512F_SUPPORT'))
|
||||||
write_config_text('FPU AVX/FMA support', conf.is_defined('FPU_AVX_FMA_SUPPORT'))
|
write_config_text('FPU AVX/FMA support', conf.is_defined('FPU_AVX_FMA_SUPPORT'))
|
||||||
write_config_text('Futex Semaphore', conf.is_defined('USE_FUTEX_SEMAPHORE'))
|
write_config_text('Futex Semaphore', conf.is_defined('USE_FUTEX_SEMAPHORE'))
|
||||||
write_config_text('Freedesktop files', opts.freedesktop)
|
write_config_text('Freedesktop files', opts.freedesktop)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue