16 #ifndef IMPALA_UTIL_SSE_UTIL_H
17 #define IMPALA_UTIL_SSE_UTIL_H
19 #include <emmintrin.h>
78 #if defined(__SSE4_1__) || defined(__POPCNT__)
79 #error "Do not compile with -msse4.1 or higher."
87 #define SSE_ALWAYS_INLINE inline __attribute__ ((__always_inline__))
90 __m128i str1,
int len1, __m128i str2,
int len2,
const int mode) {
93 register __m128i result
asm(
"xmm0");
94 __asm__(
"pcmpestrm %5, %2, %1"
95 :
"=x"(result) :
"x"(str1),
"xm"(str2),
"a"(len1),
"d"(len2),
"K"(mode) :
"cc");
100 __m128i str1,
int len1, __m128i str2,
int len2,
const int mode) {
102 __asm__(
"pcmpestri %5, %2, %1"
103 :
"=c"(result) :
"x"(str1),
"xm"(str2),
"a"(len1),
"d"(len2),
"K"(mode) :
"cc");
108 __asm__(
"crc32b %1, %0" :
"+r"(crc) :
"rm"(v));
113 __asm__(
"crc32l %1, %0" :
"+r"(crc) :
"rm"(v));
119 __asm__(
"popcntq %1, %0" :
"=r"(result) :
"mr"(a) :
"cc");
123 #undef SSE_ALWAYS_INLINE
125 #elif defined(__SSE4_2__) // IR_COMPILE for SSE 4.2.
131 #include <smmintrin.h>
133 #define SSE4_cmpestrm _mm_cmpestrm
134 #define SSE4_cmpestri _mm_cmpestri
135 #define SSE4_crc32_u8 _mm_crc32_u8
136 #define SSE4_crc32_u32 _mm_crc32_u32
137 #define POPCNT_popcnt_u64 _mm_popcnt_u64
139 #else // IR_COMPILE without SSE 4.2.
146 __m128i str1,
int len1, __m128i str2,
int len2,
const int mode) {
147 DCHECK(
false) <<
"CPU doesn't support SSE 4.2";
148 return (__m128i) { 0 };
152 __m128i str1,
int len1, __m128i str2,
int len2,
const int mode) {
153 DCHECK(
false) <<
"CPU doesn't support SSE 4.2";
157 static inline uint32_t
SSE4_crc32_u8(uint32_t crc, uint8_t v) {
158 DCHECK(
false) <<
"CPU doesn't support SSE 4.2";
163 DCHECK(
false) <<
"CPU doesn't support SSE 4.2";
168 DCHECK(
false) <<
"CPU doesn't support SSE 4.2";
static const int SSE_BITMASK[CHARS_PER_128_BIT_REGISTER]
Precomputed mask values up to 16 bits.
static const int PCMPSTR_EQUAL_EACH
static SSE_ALWAYS_INLINE int SSE4_cmpestri(__m128i str1, int len1, __m128i str2, int len2, const int mode)
static const int STRCHR_MODE
static int64_t POPCNT_popcnt_u64(uint64_t a)
static const int PCMPSTR_UBYTE_OPS
static uint32_t SSE4_crc32_u8(uint32_t crc, uint8_t v)
static uint32_t SSE4_crc32_u32(uint32_t crc, uint32_t v)
static const int PCMPSTR_EQUAL_ANY
static const int STRCMP_MODE
static const int CHARS_PER_64_BIT_REGISTER
static SSE_ALWAYS_INLINE __m128i SSE4_cmpestrm(__m128i str1, int len1, __m128i str2, int len2, const int mode)
#define SSE_ALWAYS_INLINE
static const int PCMPSTR_NEG_POLARITY
static const int CHARS_PER_128_BIT_REGISTER