Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
sse-util.h
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 
16 #ifndef IMPALA_UTIL_SSE_UTIL_H
17 #define IMPALA_UTIL_SSE_UTIL_H
18 
19 #include <emmintrin.h>
20 
21 namespace impala {
22 
24 namespace SSEUtil {
27  static const int CHARS_PER_64_BIT_REGISTER = 8;
28  static const int CHARS_PER_128_BIT_REGISTER = 16;
29 
33  static const int PCMPSTR_EQUAL_ANY = 0x00; // strchr
34  static const int PCMPSTR_EQUAL_EACH = 0x08; // strcmp
35  static const int PCMPSTR_UBYTE_OPS = 0x00; // unsigned char (8-bits, rather than 16)
36  static const int PCMPSTR_NEG_POLARITY = 0x10; // see Intel SDM chapter 4.1.4.
37 
41 
46 
49  1 << 0,
50  1 << 1,
51  1 << 2,
52  1 << 3,
53  1 << 4,
54  1 << 5,
55  1 << 6,
56  1 << 7,
57  1 << 8,
58  1 << 9,
59  1 << 10,
60  1 << 11,
61  1 << 12,
62  1 << 13,
63  1 << 14,
64  1 << 15,
65  };
66 }
67 
71 #ifndef IR_COMPILE
72 
78 #if defined(__SSE4_1__) || defined(__POPCNT__)
79 #error "Do not compile with -msse4.1 or higher."
82 #endif
83 
87 #define SSE_ALWAYS_INLINE inline __attribute__ ((__always_inline__))
88 
90  __m128i str1, int len1, __m128i str2, int len2, const int mode) {
93  register __m128i result asm("xmm0");
94  __asm__("pcmpestrm %5, %2, %1"
95  : "=x"(result) : "x"(str1), "xm"(str2), "a"(len1), "d"(len2), "K"(mode) : "cc");
96  return result;
97 }
98 
100  __m128i str1, int len1, __m128i str2, int len2, const int mode) {
101  int result;
102  __asm__("pcmpestri %5, %2, %1"
103  : "=c"(result) : "x"(str1), "xm"(str2), "a"(len1), "d"(len2), "K"(mode) : "cc");
104  return result;
105 }
106 
107 static inline uint32_t SSE4_crc32_u8(uint32_t crc, uint8_t v) {
108  __asm__("crc32b %1, %0" : "+r"(crc) : "rm"(v));
109  return crc;
110 }
111 
112 static inline uint32_t SSE4_crc32_u32(uint32_t crc, uint32_t v) {
113  __asm__("crc32l %1, %0" : "+r"(crc) : "rm"(v));
114  return crc;
115 }
116 
117 static inline int64_t POPCNT_popcnt_u64(uint64_t a) {
118  int64_t result;
119  __asm__("popcntq %1, %0" : "=r"(result) : "mr"(a) : "cc");
120  return result;
121 }
122 
123 #undef SSE_ALWAYS_INLINE
124 
125 #elif defined(__SSE4_2__) // IR_COMPILE for SSE 4.2.
126 
131 #include <smmintrin.h>
132 
133 #define SSE4_cmpestrm _mm_cmpestrm
134 #define SSE4_cmpestri _mm_cmpestri
135 #define SSE4_crc32_u8 _mm_crc32_u8
136 #define SSE4_crc32_u32 _mm_crc32_u32
137 #define POPCNT_popcnt_u64 _mm_popcnt_u64
138 
139 #else // IR_COMPILE without SSE 4.2.
140 
145 static inline __m128i SSE4_cmpestrm(
146  __m128i str1, int len1, __m128i str2, int len2, const int mode) {
147  DCHECK(false) << "CPU doesn't support SSE 4.2";
148  return (__m128i) { 0 };
149 }
150 
151 static inline int SSE4_cmpestri(
152  __m128i str1, int len1, __m128i str2, int len2, const int mode) {
153  DCHECK(false) << "CPU doesn't support SSE 4.2";
154  return 0;
155 }
156 
157 static inline uint32_t SSE4_crc32_u8(uint32_t crc, uint8_t v) {
158  DCHECK(false) << "CPU doesn't support SSE 4.2";
159  return 0;
160 }
161 
162 static inline uint32_t SSE4_crc32_u32(uint32_t crc, uint32_t v) {
163  DCHECK(false) << "CPU doesn't support SSE 4.2";
164  return 0;
165 }
166 
167 static inline int64_t POPCNT_popcnt_u64(uint64_t a) {
168  DCHECK(false) << "CPU doesn't support SSE 4.2";
169  return 0;
170 }
171 
172 #endif
173 
174 }
175 
176 #endif
static const int SSE_BITMASK[CHARS_PER_128_BIT_REGISTER]
Precomputed mask values up to 16 bits.
Definition: sse-util.h:48
static const int PCMPSTR_EQUAL_EACH
Definition: sse-util.h:34
static SSE_ALWAYS_INLINE int SSE4_cmpestri(__m128i str1, int len1, __m128i str2, int len2, const int mode)
Definition: sse-util.h:99
static const int STRCHR_MODE
Definition: sse-util.h:40
static int64_t POPCNT_popcnt_u64(uint64_t a)
Definition: sse-util.h:117
static const int PCMPSTR_UBYTE_OPS
Definition: sse-util.h:35
static uint32_t SSE4_crc32_u8(uint32_t crc, uint8_t v)
Definition: sse-util.h:107
static uint32_t SSE4_crc32_u32(uint32_t crc, uint32_t v)
Definition: sse-util.h:112
static const int PCMPSTR_EQUAL_ANY
Definition: sse-util.h:33
static const int STRCMP_MODE
Definition: sse-util.h:44
static const int CHARS_PER_64_BIT_REGISTER
Definition: sse-util.h:27
static SSE_ALWAYS_INLINE __m128i SSE4_cmpestrm(__m128i str1, int len1, __m128i str2, int len2, const int mode)
Definition: sse-util.h:89
#define SSE_ALWAYS_INLINE
Definition: sse-util.h:87
static const int PCMPSTR_NEG_POLARITY
Definition: sse-util.h:36
static const int CHARS_PER_128_BIT_REGISTER
Definition: sse-util.h:28