clean up FPU code with some ideas from Chromium and the web

This commit is contained in:
Paul Davis 2015-08-11 22:56:55 -04:00
parent 3a1a978541
commit 41ccfee7a4
2 changed files with 176 additions and 132 deletions

View file

@ -29,6 +29,7 @@
#include <intrin.h> #include <intrin.h>
#endif #endif
#include "pbd/compose.h"
#include "pbd/fpu.h" #include "pbd/fpu.h"
#include "pbd/error.h" #include "pbd/error.h"
@ -37,107 +38,124 @@
using namespace PBD; using namespace PBD;
using namespace std; using namespace std;
FPU::FPU () FPU* FPU::_instance (0);
{
unsigned long cpuflags = 0;
_flags = Flags (0); #ifndef COMPILER_MSVC
/* use __cpuid() as the name to match the MSVC intrinsic */
static void
__cpuid(int regs[4], int cpuid_leaf)
{
int eax, ebx, ecx, edx;
asm volatile (
#if defined(__i386__)
"pushl %%ebx;\n\t"
#endif
"movl %4, %%eax;\n\t"
"cpuid;\n\t"
"movl %%eax, %0;\n\t"
"movl %%ebx, %1;\n\t"
"movl %%ecx, %2;\n\t"
"movl %%edx, %3;\n\t"
#if defined(__i386__)
"popl %%ebx;\n\t"
#endif
:"=m" (eax), "=m" (ebx), "=m" (ecx), "=m" (edx)
:"r" (cpuid_leaf)
:"%eax",
#if !defined(__i386__)
"%ebx",
#endif
"%ecx", "%edx");
regs[0] = eax;
regs[1] = ebx;
regs[2] = ecx;
regs[3] = edx;
}
static uint64_t
_xgetbv (uint32_t xcr)
{
uint32_t eax, edx;
__asm__ volatile ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (xcr));
return (static_cast<uint64_t>(edx) << 32) | eax;
}
#define _XCR_XFEATURE_ENABLED_MASK 0
#endif /* !COMPILER_MSVC */
FPU*
FPU::instance()
{
if (!_instance) {
_instance = new FPU;
}
return _instance;
}
FPU::FPU ()
: _flags ((Flags) 0)
{
if (_instance) {
error << _("FPU object instantiated more than once") << endmsg;
}
#if !( (defined __x86_64__) || (defined __i386__) || (defined _M_X64) || (defined _M_IX86) ) // !ARCH_X86 #if !( (defined __x86_64__) || (defined __i386__) || (defined _M_X64) || (defined _M_IX86) ) // !ARCH_X86
/* Non-Intel architecture, nothing to do here */
return; return;
#else #else
#ifdef PLATFORM_WINDOWS /* Get the CPU vendor just for kicks */
// Get CPU flags using Microsoft function // __cpuid with an InfoType argument of 0 returns the number of
// It works for both 64 and 32 bit systems // valid Ids in CPUInfo[0] and the CPU identification string in
// no need to use assembler for getting info from register, this function does this for us // the other three array elements. The CPU identification string is
int cpuInfo[4]; // not in linear order. The code below arranges the information
__cpuid (cpuInfo, 1); // in a human readable form. The human readable order is CPUInfo[1] |
cpuflags = cpuInfo[3]; // CPUInfo[3] | CPUInfo[2]. CPUInfo[2] and CPUInfo[3] are swapped
// before using memcpy to copy these three array elements to cpu_string.
#else int cpu_info[4];
char cpu_string[48];
string cpu_vendor;
#ifndef _LP64 /* *nix; 32 bit version. This odd macro constant is required because we need something that identifies this as a 32 bit __cpuid (cpu_info, 0);
build on Linux and on OS X. Anything that serves this purpose will do, but this is the best thing we've identified
so far.
*/
asm volatile ( int num_ids = cpu_info[0];
"mov $1, %%eax\n" std::swap(cpu_info[2], cpu_info[3]);
"pushl %%ebx\n" memcpy(cpu_string, &cpu_info[1], 3 * sizeof(cpu_info[1]));
"cpuid\n" cpu_vendor.assign(cpu_string, 3 * sizeof(cpu_info[1]));
"movl %%edx, %0\n"
"popl %%ebx\n"
: "=r" (cpuflags)
:
: "%eax", "%ecx", "%edx"
);
#else /* *nix; 64 bit version */ info << string_compose (_("CPU vendor: %1"), cpu_vendor) << endmsg;
/* asm notes: although we explicitly save&restore rbx, we must tell if (num_ids > 0) {
gcc that ebx,rbx is clobbered so that it doesn't try to use it as an intermediate
register when storing rbx. gcc 4.3 didn't make this "mistake", but gcc 4.4
does, at least on x86_64.
*/
asm volatile ( /* Now get CPU/FPU flags */
"pushq %%rbx\n"
"movq $1, %%rax\n"
"cpuid\n"
"movq %%rdx, %0\n"
"popq %%rbx\n"
: "=r" (cpuflags)
:
: "%rax", "%rbx", "%rcx", "%rdx"
);
#endif /* _LP64 */ __cpuid (cpu_info, 1);
#endif /* PLATFORM_WINDOWS */
#ifndef __APPLE__ if ((cpu_info[2] & (1<<27)) /* AVX */ &&
/* must check for both AVX and OSXSAVE support in cpuflags before (cpu_info[2] & (1<<28) /* (OS)XSAVE */) &&
* attempting to use AVX related instructions. (_xgetbv (_XCR_XFEATURE_ENABLED_MASK) & 0x6)) { /* OS really supports XSAVE */
*/ info << _("AVX-capable processor") << endmsg;
if ((cpuflags & (1<<27)) /* AVX */ && (cpuflags & (1<<28) /* (OS)XSAVE */)) {
std::cerr << "Looks like AVX\n";
/* now check if YMM resters state is saved: which means OS does
* know about new YMM registers and saves them during context
* switches it's true for most cases, but we must be sure
*
* giving 0 as the argument to _xgetbv() fetches the
* XCR_XFEATURE_ENABLED_MASK, which we need to check for
* the 2nd and 3rd bits, indicating correct register save/restore.
*/
uint64_t xcrFeatureMask = 0;
#if __GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 4
unsigned int eax, edx, index = 0;
asm volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
xcrFeatureMask = ((unsigned long long)edx << 32) | eax;
#elif defined (COMPILER_MSVC)
xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
#endif
if (xcrFeatureMask & 0x6) {
std::cerr << "Definitely AVX\n";
_flags = Flags (_flags | (HasAVX) ); _flags = Flags (_flags | (HasAVX) );
} }
}
#endif /* !__APPLE__ */
if (cpuflags & (1<<25)) { if (cpu_info[3] & (1<<25)) {
_flags = Flags (_flags | (HasSSE|HasFlushToZero)); _flags = Flags (_flags | (HasSSE|HasFlushToZero));
} }
if (cpuflags & (1<<26)) { if (cpu_info[3] & (1<<26)) {
_flags = Flags (_flags | HasSSE2); _flags = Flags (_flags | HasSSE2);
} }
if (cpuflags & (1 << 24)) { /* Figure out CPU/FPU denormal handling capabilities */
if (cpu_info[3] & (1 << 24)) {
char** fxbuf = 0; char** fxbuf = 0;
@ -209,6 +227,27 @@ FPU::FPU ()
#endif #endif
} }
#endif #endif
/* finally get the CPU brand */
__cpuid (cpu_info, 0x80000000);
const int parameter_end = 0x80000004;
string cpu_brand;
if (cpu_info[0] >= parameter_end) {
char* cpu_string_ptr = cpu_string;
for (int parameter = 0x80000002; parameter <= parameter_end &&
cpu_string_ptr < &cpu_string[sizeof(cpu_string)]; parameter++) {
__cpuid(cpu_info, parameter);
memcpy(cpu_string_ptr, cpu_info, sizeof(cpu_info));
cpu_string_ptr += sizeof(cpu_info);
}
cpu_brand.assign(cpu_string, cpu_string_ptr - cpu_string);
info << string_compose (_("CPU brand: %1"), cpu_brand) << endmsg;
}
}
} }
FPU::~FPU () FPU::~FPU ()

View file

@ -35,9 +35,10 @@ class LIBPBD_API FPU {
}; };
public: public:
FPU ();
~FPU (); ~FPU ();
static FPU* instance();
bool has_flush_to_zero () const { return _flags & HasFlushToZero; } bool has_flush_to_zero () const { return _flags & HasFlushToZero; }
bool has_denormals_are_zero () const { return _flags & HasDenormalsAreZero; } bool has_denormals_are_zero () const { return _flags & HasDenormalsAreZero; }
bool has_sse () const { return _flags & HasSSE; } bool has_sse () const { return _flags & HasSSE; }
@ -46,6 +47,10 @@ class LIBPBD_API FPU {
private: private:
Flags _flags; Flags _flags;
static FPU* _instance;
FPU ();
}; };
} }