From 6b766e41f497eadc4467cf7e119781dda0a82321 Mon Sep 17 00:00:00 2001 From: Ayan Shafqat Date: Thu, 2 Feb 2023 12:13:27 -0500 Subject: [PATCH] Integrate AVX512F support into Ardour The current implementation is just a stub of AVX and is not utilizing AVX512F. --- libs/ardour/globals.cc | 19 ++++++++++++++++++- libs/ardour/test/fpu_test.cc | 27 +++++++++++++++++++++++++++ libs/ardour/wscript | 22 ++++++++++++++++++++++ wscript | 13 +++++++++++++ 4 files changed, 80 insertions(+), 1 deletion(-) diff --git a/libs/ardour/globals.cc b/libs/ardour/globals.cc index f82a6bff75..7cd588bd36 100644 --- a/libs/ardour/globals.cc +++ b/libs/ardour/globals.cc @@ -191,7 +191,24 @@ setup_hardware_optimization (bool try_optimization) FPU* fpu = FPU::instance (); #if defined(ARCH_X86) && defined(BUILD_SSE_OPTIMIZATIONS) - /* We have AVX-optimized code for Windows and Linux */ + /* Utilize different optimization routines for various x86 extensions */ + +#ifdef FPU_AVX512F_SUPPORT + if (fpu->has_avx512f ()) { + info << "Using AVX512F optimized routines" << endmsg; + + // AVX512F SET + compute_peak = x86_avx512f_compute_peak; + find_peaks = x86_avx512f_find_peaks; + apply_gain_to_buffer = x86_avx512f_apply_gain_to_buffer; + mix_buffers_with_gain = x86_avx512f_mix_buffers_with_gain; + mix_buffers_no_gain = x86_avx512f_mix_buffers_no_gain; + copy_vector = x86_avx512f_copy_vector; + + generic_mix_functions = false; + + } else +#endif #ifdef FPU_AVX_FMA_SUPPORT if (fpu->has_fma ()) { diff --git a/libs/ardour/test/fpu_test.cc b/libs/ardour/test/fpu_test.cc index a356b5da3e..93e9a2a48a 100644 --- a/libs/ardour/test/fpu_test.cc +++ b/libs/ardour/test/fpu_test.cc @@ -153,6 +153,33 @@ FPUTest::avxTest () run (align_max); } +void +FPUTest::avx512fTest () +{ + PBD::FPU* fpu = PBD::FPU::instance (); + if (!fpu->has_avx512f ()) { + printf ("AVX512F is not available at run-time\n"); + return; + } + +#if ( defined(__x86_64__) || defined(_M_X64) ) + size_t align_max = 64; +#else + size_t align_max = 16; +#endif + CPPUNIT_ASSERT_MESSAGE ("Aligned Malloc", (((intptr_t)_test1) % align_max) == 0); + CPPUNIT_ASSERT_MESSAGE ("Aligned Malloc", (((intptr_t)_test2) % align_max) == 0); + + compute_peak = x86_avx512f_compute_peak; + find_peaks = x86_avx512f_find_peaks; + apply_gain_to_buffer = x86_avx512f_apply_gain_to_buffer; + mix_buffers_with_gain = x86_avx512f_mix_buffers_with_gain; + mix_buffers_no_gain = x86_avx512f_mix_buffers_no_gain; + copy_vector = x86_avx512f_copy_vector; + + run (align_max, FLT_EPSILON); +} + void FPUTest::sseTest () { diff --git a/libs/ardour/wscript b/libs/ardour/wscript index e3428f76ae..1092963b11 100644 --- a/libs/ardour/wscript +++ b/libs/ardour/wscript @@ -465,16 +465,19 @@ def build(bld): avx_sources = [] fma_sources = [] + avx512f_sources = [] if Options.options.fpu_optimization: if (bld.env['build_target'] == 'i386' or bld.env['build_target'] == 'i686'): obj.source += [ 'sse_functions_xmm.cc', 'sse_functions.s', ] avx_sources = [ 'sse_functions_avx_linux.cc' ] fma_sources = [ 'x86_functions_fma.cc' ] + avx512f_sources = [ 'x86_functions_avx512f.cc' ] elif bld.env['build_target'] == 'x86_64': obj.source += [ 'sse_functions_xmm.cc', 'sse_functions_64bit.s', ] avx_sources = [ 'sse_functions_avx_linux.cc' ] fma_sources = [ 'x86_functions_fma.cc' ] + avx512f_sources = [ 'x86_functions_avx512f.cc' ] elif bld.env['build_target'] == 'mingw': # usability of the 64 bit windows assembler depends on the compiler target, # not the build host, which in turn can only be inferred from the name @@ -484,6 +487,7 @@ def build(bld): obj.source += [ 'sse_functions_64bit_win.s', 'sse_avx_functions_64bit_win.s' ] avx_sources = [ 'sse_functions_avx.cc' ] fma_sources = [ 'x86_functions_fma.cc' ] + avx512f_sources = [ 'x86_functions_avx512f.cc' ] elif bld.env['build_target'] == 'aarch64': obj.source += ['arm_neon_functions.cc'] obj.defines += [ 'ARM_NEON_SUPPORT' ] @@ -537,6 +541,24 @@ def build(bld): obj.use += ['sse_fma_functions' ] obj.defines += [ 'FPU_AVX_FMA_SUPPORT' ] + if bld.is_defined('FPU_AVX512F_SUPPORT') and avx512f_sources: + avx512f_cxxflags = list(bld.env['CXXFLAGS']) + avx512f_cxxflags.append (bld.env['compiler_flags_dict']['avx512f']) + avx512f_cxxflags.append (bld.env['compiler_flags_dict']['avx']) + avx512f_cxxflags.append (bld.env['compiler_flags_dict']['pic']) + avx512f_cxxflags.append (bld.env['compiler_flags_dict']['fma']) + + bld(features = 'cxx cxxstlib asm', + source = avx512f_sources, + cxxflags = avx512f_cxxflags, + includes = [ '.' ], + use = [ 'libtemporal', 'libpbd', 'libevoral', 'liblua' ], + uselib = [ 'GLIBMM', 'XML' ], + target = 'avx512f_functions') + + obj.use += ['avx512f_functions' ] + obj.defines += [ 'FPU_AVX512F_SUPPORT' ] + # i18n if bld.is_defined('ENABLE_NLS'): mo_files = bld.path.ant_glob('po/*.mo') diff --git a/wscript b/wscript index 76aca988f0..e8ef1fcb3a 100644 --- a/wscript +++ b/wscript @@ -89,6 +89,8 @@ compiler_flags_dictionaries= { 'attasm': '-masm=att', # Flags to make AVX instructions/intrinsics available 'avx': '-mavx', + # Flags to make AVX512F instructions/intrinsics available + 'avx512f': '-mavx512f', # Flags to make FMA instructions/intrinsics available 'fma': '-mfma', # Flags to make ARM/NEON instructions/intrinsics available @@ -519,6 +521,16 @@ int main() { return 0; }''', if re.search ('x86_64-w64', str(conf.env['CC'])) is not None: conf.define ('FPU_AVX_FMA_SUPPORT', 1) elif conf.env['build_target'] == 'i386' or conf.env['build_target'] == 'i686' or conf.env['build_target'] == 'x86_64': + conf.check_cxx(fragment = "#include \nint main(void) { __m128 a; _mm_fmadd_ss(a, a, a); return 0; }\n", + features = ['cxx'], + cxxflags = [ conf.env['compiler_flags_dict']['avx512f'], conf.env['compiler_flags_dict']['fma'], conf.env['compiler_flags_dict']['avx'] ], + mandatory = False, + execute = False, + msg = 'Checking compiler for AVX512F intrinsics', + okmsg = 'Found', + errmsg = 'Not supported', + define_name = 'FPU_AVX512F_SUPPORT') + conf.check_cxx(fragment = "#include \nint main(void) { __m128 a; _mm_fmadd_ss(a, a, a); return 0; }\n", features = ['cxx'], cxxflags = [ conf.env['compiler_flags_dict']['fma'], conf.env['compiler_flags_dict']['avx'] ], @@ -1528,6 +1540,7 @@ const char* const ardour_config_info = "\\n\\ write_config_text('Dr. Mingw', conf.is_defined('HAVE_DRMINGW')) write_config_text('FLAC', conf.is_defined('HAVE_FLAC')) write_config_text('FPU optimization', opts.fpu_optimization) + write_config_text('FPU AVX512F support', conf.is_defined('FPU_AVX512F_SUPPORT')) write_config_text('FPU AVX/FMA support', conf.is_defined('FPU_AVX_FMA_SUPPORT')) write_config_text('Futex Semaphore', conf.is_defined('USE_FUTEX_SEMAPHORE')) write_config_text('Freedesktop files', opts.freedesktop)