// Copyright 2010 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Detects configuration and defines compiler-specific macros. // Also, sets user-defined CRUTIL_USE_* macros to default values. #ifndef CRCUTIL_PLATFORM_H_ #define CRCUTIL_PLATFORM_H_ // Permanently disable some annoying warnings generated // by Microsoft CL when compiling Microsoft's headers. #include "std_headers.h" // Use inline asm version of the code? #if !defined(CRCUTIL_USE_ASM) #define CRCUTIL_USE_ASM 1 #endif // !defined(CRCUTIL_USE_ASM) #if !defined(HAVE_I386) #if defined(__i386__) || defined(_M_IX86) #define HAVE_I386 1 #else #define HAVE_I386 0 #endif // defined(__i386__) || defined(_M_IX86) #endif // defined(HAVE_I386) #if !defined(HAVE_AMD64) #if defined(__amd64__) || defined(_M_AMD64) #define HAVE_AMD64 1 #else #define HAVE_AMD64 0 #endif // defined(__amd64__) || defined(_M_AMD64) #endif // defined(HAVE_AMD64) #if HAVE_AMD64 || HAVE_I386 #if defined(_MSC_VER) #pragma warning(push) // '_M_IX86' is not defined as a preprocessor macro #pragma warning(disable: 4668) #include #pragma warning(pop) #endif // defined(_MSC_VER) #if !defined(HAVE_MMX) #if defined(_MSC_VER) || (defined(__GNUC__) && defined(__MMX__)) #define HAVE_MMX 1 #else #define HAVE_MMX 0 #endif // defined(_MSC_VER) || (defined(__GNUC__) && defined(__MMX__)) #endif // !defined(HAVE_MMX) #if !defined(HAVE_SSE) #if defined(_MSC_VER) || (defined(__GNUC__) && defined(__SSE__)) #include #define HAVE_SSE 1 #else #define HAVE_SSE 0 #endif // defined(_MSC_VER) || (defined(__GNUC__) && defined(__SSE__)) #endif // !defined(HAVE_SSE) #if !defined(HAVE_SSE2) #if defined(_MSC_VER) || (defined(__GNUC__) && defined(__SSE2__)) #include #define HAVE_SSE2 1 #else #define HAVE_SSE2 0 #endif // defined(_MSC_VER) || (defined(__GNUC__) && defined(__SSE2__)) #endif // !defined(HAVE_SSE2) #else #if !defined(HAVE_MMX) #define HAVE_MMX 0 #endif // !defined(HAVE_MMX) #if !defined(HAVE_SSE) #define HAVE_SSE 0 #endif // !defined(HAVE_SSE) #if !defined(HAVE_SSE2) #define HAVE_SSE2 0 #endif // !defined(HAVE_SSE2) #endif // HAVE_AMD64 || HAVE_I386 // Error checking #if HAVE_SSE && !HAVE_MMX #error SSE is available but not MMX? #endif // HAVE_SSE && !HAVE_MMX #if HAVE_SSE2 && (!HAVE_SSE || !HAVE_MMX) #error SSE2 is available but not SSE or MMX? #endif // HAVE_SSE2 && (!HAVE_SSE || !HAVE_MMX) #if !defined(CRCUTIL_PREFETCH_WIDTH) // On newer X5550 CPU, heavily optimized CrcMultiword is 3% faster without // prefetch for inputs smaller than 8MB and less than 1% slower for 8MB and // larger blocks. On older Q9650 CPU, the code is 2-3% faster for inputs // smaller than 8MB, 4-5% slower when length >= 8MB. // Tested with prefetch length 256, 512, and 4096. // // At this moment there is no compelling reason to use prefetching. // #define CRCUTIL_PREFETCH_WIDTH 0 #endif // !defined(CRCUTIL_PREFETCH_WIDTH) #if HAVE_SSE && CRCUTIL_PREFETCH_WIDTH > 0 #define PREFETCH(src) \ _mm_prefetch(reinterpret_cast(src) + CRCUTIL_PREFETCH_WIDTH, \ _MM_HINT_T0) #else #define PREFETCH(src) #endif // HAVE_SSE && CRCUTIL_PREFETCH_WIDTH > 0 // If block size exceeds CRCUTIL_MIN_ALIGN_SIZE, align the data // before accessing it at word boundary. See generic_crc.cc, // ALIGN_ON_WORD_BOUNDARY_IF_NEEDED() macro. #if !defined(CRCUTIL_MIN_ALIGN_SIZE) #if HAVE_AMD64 || HAVE_I386 #define CRCUTIL_MIN_ALIGN_SIZE (1024) #else #define CRCUTIL_MIN_ALIGN_SIZE 0 #endif // HAVE_AMD64 || HAVE_I386 #endif // !defined(CRCUTIL_MIN_ALIGN_SIZE) // Use _mm_crc32_u64/32/8 intrinics? // If not, they will be implemented in software. #if !HAVE_I386 && !HAVE_AMD64 #undef CRCUTIL_USE_MM_CRC32 #define CRCUTIL_USE_MM_CRC32 0 #else #if !defined(CRCUTIL_USE_MM_CRC32) #if defined(_MSC_VER) || defined(__GNUC__) #define CRCUTIL_USE_MM_CRC32 1 #else #define CRCUTIL_USE_MM_CRC32 0 #endif // defined(_MSC_VER) || defined(__GNUC__) #endif // !defined(CRCUTIL_USE_MM_CRC32) #endif // !HAVE_I386 && !HAVE_AMD64 // Stringize -- always handy. #define TO_STRING_VALUE(arg) #arg #define TO_STRING(arg) TO_STRING_VALUE(arg) // Compilers give "right shift count >= width of type" warning even // though the shift happens only under appropriate "if". #define SHIFT_RIGHT_NO_WARNING(value, bits) \ ((value) >> (((bits) < (8 * sizeof(value))) ? (bits) : 0)) #define SHIFT_RIGHT_SAFE(value, bits) \ ((bits) < (8 * sizeof(value)) ? SHIFT_RIGHT_NO_WARNING(value, bits) : 0) // The same for left shifts. #define SHIFT_LEFT_NO_WARNING(value, bits) \ ((value) << (((bits) < (8 * sizeof(value))) ? (bits) : 0)) #define SHIFT_LEFT_SAFE(value, bits) \ ((bits) < (8 * sizeof(value)) ? SHIFT_LEFT_NO_WARNING(value, bits) : 0) // GCC-specific macros. // #define GCC_VERSION_AVAILABLE(major, minor) \ (defined(__GNUC__) && \ (__GNUC__ > (major) || \ (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))) #if defined(__GNUC__) // The GenericCrc tables must be properly aligned. // Penalty for misalignment? 50% performance degradation. // For 128-bit SSE2, the penalty is access violation. #define GCC_ALIGN_ATTRIBUTE(n) __attribute__((aligned(n))) #if GCC_VERSION_AVAILABLE(4, 4) // If not marked as "omit frame pointer", // GCC won't be able to find enough registers. #define GCC_OMIT_FRAME_POINTER \ __attribute__((__optimize__(2, "omit-frame-pointer"))) #endif // GCC_VERSION_AVAILABLE(4, 4) #if !defined(__forceinline) #define __forceinline __attribute__((__always_inline__)) inline #endif // !defined(__forceinline) #if defined(__APPLE_CC__) // The version of GCC used by Max OS X xCode v 5664 does not understand // "movq xmm, r64" instruction and requires the use of "movd" (probably // because of the bug in GCC which treats "movq/movd xmm,r64 or r64,xmm" // the same). // // Leaving common sense aside, let's peek into Intel's instruction // reference manual. That's what description of MOVD command says: // MOVD xmm, r/m32 (opcode 66 0F 6E /r) // MOVD r/m32, xmm (opcode 66 0F 7E /r) // MOVQ xmm, r/m64 (opcode 66 REX.W 0F 6E /r) // MOVQ r/m64, xmm (opcode 66 REX.W 0F 7E /r) #define SSE2_MOVQ "movd" #else #define SSE2_MOVQ "movq" #endif // defined(__APPLE_CC__) #endif // defined(__GNUC__) // Define compiler-specific macros that were not set yet. #if !defined(_MSC_VER) && !defined(__forceinline) #define __forceinline inline #endif // !defined(_MSC_VER) && !defined(__forceinline) #if !defined(GCC_OMIT_FRAME_POINTER) #define GCC_OMIT_FRAME_POINTER #endif // !defined(GCC_OMIT_FRAME_POINTER) #if !defined(GCC_ALIGN_ATTRIBUTE) #define GCC_ALIGN_ATTRIBUTE(n) #endif // !defined(GCC_ALIGN_ATTRIBUTE) #endif // CRCUTIL_PLATFORM_H_