Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
string-value.inline.h
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 
16 #ifndef IMPALA_RUNTIME_STRING_VALUE_INLINE_H
17 #define IMPALA_RUNTIME_STRING_VALUE_INLINE_H
18 
19 #include "runtime/string-value.h"
20 
21 #include <cstring>
22 #include "util/cpu-info.h"
23 #include "util/sse-util.h"
24 
25 namespace impala {
26 
37 static inline int StringCompare(const char* s1, int n1, const char* s2, int n2, int len) {
38  DCHECK_EQ(len, std::min(n1, n2));
41  __m128i xmm0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s1));
42  __m128i xmm1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s2));
43  int chars_match = SSE4_cmpestri(xmm0, SSEUtil::CHARS_PER_128_BIT_REGISTER,
45  if (chars_match != SSEUtil::CHARS_PER_128_BIT_REGISTER) {
46  return s1[chars_match] - s2[chars_match];
47  }
51  }
52  }
53  // TODO: for some reason memcmp is way slower than strncmp (2.5x) why?
54  int result = strncmp(s1, s2, len);
55  if (result != 0) return result;
56  return n1 - n2;
57 }
58 
59 inline int StringValue::Compare(const StringValue& other) const {
60  int l = std::min(len, other.len);
61  if (l == 0) {
62  if (len == other.len) {
63  return 0;
64  } else if (len == 0) {
65  return -1;
66  } else {
67  DCHECK_EQ(other.len, 0);
68  return 1;
69  }
70  }
71  return StringCompare(this->ptr, this->len, other.ptr, other.len, l);
72 }
73 
74 inline bool StringValue::Eq(const StringValue& other) const {
75  if (this->len != other.len) return false;
76  return StringCompare(this->ptr, this->len, other.ptr, other.len, this->len) == 0;
77 }
78 
79 inline StringValue StringValue::Substring(int start_pos) const {
80  return StringValue(ptr + start_pos, len - start_pos);
81 }
82 
83 inline StringValue StringValue::Substring(int start_pos, int new_len) const {
84  return StringValue(ptr + start_pos, (new_len < 0) ? (len - start_pos) : new_len);
85 }
86 
88  // Remove leading and trailing spaces.
89  int32_t begin = 0;
90  while (begin < len && ptr[begin] == ' ') {
91  ++begin;
92  }
93  int32_t end = len - 1;
94  while (end > begin && ptr[end] == ' ') {
95  --end;
96  }
97  return StringValue(ptr + begin, end - begin + 1);
98 }
99 
100 inline void StringValue::PadWithSpaces(char* cptr, int64_t cptr_len, int64_t num_chars) {
101  DCHECK(cptr != NULL);
102  DCHECK_GE(cptr_len, 1);
103  DCHECK_GE(cptr_len, num_chars);
104  memset(&cptr[num_chars], ' ', cptr_len - num_chars);
105 }
106 
107 inline int64_t StringValue::UnpaddedCharLength(const char* cptr, int64_t len) {
108  DCHECK(cptr != NULL);
109  DCHECK_GE(len, 0);
110  int64_t last = len - 1;
111  while (last >= 0 && cptr[last] == ' ') --last;
112  return last + 1;
113 }
114 
115 inline char* StringValue::CharSlotToPtr(void* slot, const ColumnType& type) {
116  DCHECK(type.type == TYPE_CHAR);
117  if (slot == NULL) return NULL;
118  if (type.IsVarLen()) {
119  StringValue* sv = reinterpret_cast<StringValue*>(slot);
120  DCHECK_EQ(sv->len, type.len);
121  return sv->ptr;
122  }
123  return reinterpret_cast<char*>(slot);
124 }
125 
126 inline const char* StringValue::CharSlotToPtr(const void* slot, const ColumnType& type) {
127  DCHECK(type.type == TYPE_CHAR);
128  if (slot == NULL) return NULL;
129  if (type.IsVarLen()) {
130  const StringValue* sv = reinterpret_cast<const StringValue*>(slot);
131  DCHECK_EQ(sv->len, type.len);
132  return sv->ptr;
133  }
134  return reinterpret_cast<const char*>(slot);
135 }
136 
137 }
138 #endif
bool Eq(const StringValue &other) const
==
bool IsVarLen() const
Definition: types.h:172
static SSE_ALWAYS_INLINE int SSE4_cmpestri(__m128i str1, int len1, __m128i str2, int len2, const int mode)
Definition: sse-util.h:99
static void PadWithSpaces(char *cptr, int64_t cptr_len, int64_t num_chars)
StringValue Trim() const
Trims leading and trailing spaces.
static int64_t UnpaddedCharLength(const char *cptr, int64_t len)
Returns number of characters in a char array (ignores trailing spaces)
PrimitiveType type
Definition: types.h:60
int Compare(const StringValue &other) const
int len
Only set if type == TYPE_CHAR or type == TYPE_VARCHAR.
Definition: types.h:62
static const int64_t SSE4_2
Definition: cpu-info.h:34
StringValue Substring(int start_pos) const
Returns the substring starting at start_pos until the end of string.
static char * CharSlotToPtr(void *slot, const ColumnType &type)
static const int STRCMP_MODE
Definition: sse-util.h:44
static int StringCompare(const char *s1, int n1, const char *s2, int n2, int len)
static bool IsSupported(long flag)
Returns whether of not the cpu supports this flag.
Definition: cpu-info.h:58
static const int CHARS_PER_128_BIT_REGISTER
Definition: sse-util.h:28