Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
cast-functions.cc
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "exprs/cast-functions.h"
16 
17 #include <boost/lexical_cast.hpp>
18 
19 #include "exprs/anyval-util.h"
22 #include "util/string-parser.h"
23 #include "string-functions.h"
24 
25 #include "common/names.h"
26 
27 using namespace impala;
28 using namespace impala_udf;
29 
30 // The maximum number of characters need to represent a floating-point number (float or
31 // double) as a string. 24 = 17 (maximum significant digits) + 1 (decimal point) + 1 ('E')
32 // + 3 (exponent digits) + 2 (negative signs) (see http://stackoverflow.com/a/1701085)
33 const int MAX_FLOAT_CHARS = 24;
34 
35 #define CAST_FUNCTION(from_type, to_type) \
36  to_type CastFunctions::CastTo##to_type(FunctionContext* ctx, const from_type& val) { \
37  if (val.is_null) return to_type::null(); \
38  return to_type(val.val); \
39  }
40 
47 
49 CAST_FUNCTION(SmallIntVal, TinyIntVal)
51 CAST_FUNCTION(BigIntVal, TinyIntVal)
52 CAST_FUNCTION(FloatVal, TinyIntVal)
53 CAST_FUNCTION(DoubleVal, TinyIntVal)
54 
55 CAST_FUNCTION(BooleanVal, SmallIntVal)
56 CAST_FUNCTION(TinyIntVal, SmallIntVal)
57 CAST_FUNCTION(IntVal, SmallIntVal)
58 CAST_FUNCTION(BigIntVal, SmallIntVal)
59 CAST_FUNCTION(FloatVal, SmallIntVal)
60 CAST_FUNCTION(DoubleVal, SmallIntVal)
61 
62 CAST_FUNCTION(BooleanVal, IntVal)
63 CAST_FUNCTION(TinyIntVal, IntVal)
64 CAST_FUNCTION(SmallIntVal, IntVal)
65 CAST_FUNCTION(BigIntVal, IntVal)
66 CAST_FUNCTION(FloatVal, IntVal)
67 CAST_FUNCTION(DoubleVal, IntVal)
68 
69 CAST_FUNCTION(BooleanVal, BigIntVal)
70 CAST_FUNCTION(TinyIntVal, BigIntVal)
71 CAST_FUNCTION(SmallIntVal, BigIntVal)
72 CAST_FUNCTION(IntVal, BigIntVal)
73 CAST_FUNCTION(FloatVal, BigIntVal)
74 CAST_FUNCTION(DoubleVal, BigIntVal)
75 
76 CAST_FUNCTION(BooleanVal, FloatVal)
77 CAST_FUNCTION(TinyIntVal, FloatVal)
78 CAST_FUNCTION(SmallIntVal, FloatVal)
79 CAST_FUNCTION(IntVal, FloatVal)
80 CAST_FUNCTION(BigIntVal, FloatVal)
81 CAST_FUNCTION(DoubleVal, FloatVal)
82 
83 CAST_FUNCTION(BooleanVal, DoubleVal)
84 CAST_FUNCTION(TinyIntVal, DoubleVal)
85 CAST_FUNCTION(SmallIntVal, DoubleVal)
86 CAST_FUNCTION(IntVal, DoubleVal)
87 CAST_FUNCTION(BigIntVal, DoubleVal)
88 CAST_FUNCTION(FloatVal, DoubleVal)
89 
90 #define CAST_FROM_STRING(num_type, native_type, string_parser_fn) \
91  num_type CastFunctions::CastTo##num_type(FunctionContext* ctx, const StringVal& val) { \
92  if (val.is_null) return num_type::null(); \
93  StringParser::ParseResult result; \
94  num_type ret; \
95  ret.val = StringParser::string_parser_fn<native_type>( \
96  reinterpret_cast<char*>(val.ptr), val.len, &result); \
97  if (UNLIKELY(result != StringParser::PARSE_SUCCESS)) return num_type::null(); \
98  return ret; \
99  }
100 
101 CAST_FROM_STRING(TinyIntVal, int8_t, StringToInt)
102 CAST_FROM_STRING(SmallIntVal, int16_t, StringToInt)
103 CAST_FROM_STRING(IntVal, int32_t, StringToInt)
104 CAST_FROM_STRING(BigIntVal, int64_t, StringToInt)
105 CAST_FROM_STRING(FloatVal, float, StringToFloat)
106 CAST_FROM_STRING(DoubleVal, double, StringToFloat)
107 
108 #define CAST_TO_STRING(num_type) \
109  StringVal CastFunctions::CastToStringVal(FunctionContext* ctx, const num_type& val) { \
110  if (val.is_null) return StringVal::null(); \
111  ColumnType rtype = AnyValUtil::TypeDescToColumnType(ctx->GetReturnType()); \
112  StringVal sv = AnyValUtil::FromString(ctx, lexical_cast<string>(val.val)); \
113  AnyValUtil::TruncateIfNecessary(rtype, &sv); \
114  return sv; \
115  }
116 
117 CAST_TO_STRING(BooleanVal);
118 CAST_TO_STRING(SmallIntVal);
119 CAST_TO_STRING(IntVal);
120 CAST_TO_STRING(BigIntVal);
121 
122 #define CAST_FLOAT_TO_STRING(float_type, format) \
123  StringVal CastFunctions::CastToStringVal(FunctionContext* ctx, const float_type& val) { \
124  if (val.is_null) return StringVal::null(); \
125  /* val.val could be -nan, return "nan" instead */ \
126  if (isnan(val.val)) return StringVal("nan"); \
127  /* Add 1 to MAX_FLOAT_CHARS since snprintf adds a trailing '\0' */ \
128  StringVal sv(ctx, MAX_FLOAT_CHARS + 1); \
129  sv.len = snprintf(reinterpret_cast<char*>(sv.ptr), sv.len, format, val.val); \
130  DCHECK_GT(sv.len, 0); \
131  DCHECK_LE(sv.len, MAX_FLOAT_CHARS); \
132  ColumnType return_type = AnyValUtil::TypeDescToColumnType(ctx->GetReturnType()); \
133  AnyValUtil::TruncateIfNecessary(return_type, &sv); \
134  return sv; \
135  }
136 
137 // Floats have up to 9 significant digits, doubles up to 17
138 // (see http://en.wikipedia.org/wiki/Single-precision_floating-point_format
139 // and http://en.wikipedia.org/wiki/Double-precision_floating-point_format)
140 CAST_FLOAT_TO_STRING(FloatVal, "%.9g");
141 CAST_FLOAT_TO_STRING(DoubleVal, "%.17g");
142 
143 // Special-case tinyint because boost thinks it's a char and handles it differently.
144 // e.g. '0' is written as an empty string.
146  if (val.is_null) return StringVal::null();
147  int64_t tmp_val = val.val;
149  StringVal sv = AnyValUtil::FromString(ctx, lexical_cast<string>(tmp_val));
151  return sv;
152 }
153 
155  if (val.is_null) return StringVal::null();
158  StringVal sv = AnyValUtil::FromString(ctx, lexical_cast<string>(tv));
160  return sv;
161 }
162 
164  if (val.is_null) return StringVal::null();
165  StringVal sv;
167  sv.ptr = val.ptr;
168  sv.len = val.len;
170  return sv;
171 }
172 
174  if (val.is_null) return StringVal::null();
175 
177  DCHECK(type.type == TYPE_CHAR);
178  DCHECK_GE(type.len, 1);
179  char* cptr;
180  if (type.len > val.len) {
181  cptr = reinterpret_cast<char*>(ctx->impl()->AllocateLocal(type.len));
182  memcpy(cptr, val.ptr, min(type.len, val.len));
183  StringValue::PadWithSpaces(cptr, type.len, val.len);
184  } else {
185  cptr = reinterpret_cast<char*>(val.ptr);
186  }
187  StringVal sv;
188  sv.ptr = reinterpret_cast<uint8_t*>(cptr);
189  sv.len = type.len;
190  return sv;
191 }
192 
193 #define CAST_FROM_TIMESTAMP(to_type) \
194  to_type CastFunctions::CastTo##to_type( \
195  FunctionContext* ctx, const TimestampVal& val) { \
196  if (val.is_null) return to_type::null(); \
197  TimestampValue tv = TimestampValue::FromTimestampVal(val); \
198  if (!tv.HasDate()) return to_type::null(); \
199  return to_type(tv.ToUnixTime()); \
200  }
201 
202 CAST_FROM_TIMESTAMP(BooleanVal);
203 CAST_FROM_TIMESTAMP(TinyIntVal);
204 CAST_FROM_TIMESTAMP(SmallIntVal);
205 CAST_FROM_TIMESTAMP(IntVal);
206 CAST_FROM_TIMESTAMP(BigIntVal);
207 
208 #define CAST_FROM_SUBSECOND_TIMESTAMP(to_type) \
209  to_type CastFunctions::CastTo##to_type( \
210  FunctionContext* ctx, const TimestampVal& val) { \
211  if (val.is_null) return to_type::null(); \
212  TimestampValue tv = TimestampValue::FromTimestampVal(val); \
213  if (!tv.HasDate()) return to_type::null(); \
214  return to_type(tv.ToSubsecondUnixTime()); \
215  }
216 
219 
220 #define CAST_TO_TIMESTAMP(from_type) \
221  TimestampVal CastFunctions::CastToTimestampVal(FunctionContext* ctx, \
222  const from_type& val) { \
223  if (val.is_null) return TimestampVal::null(); \
224  TimestampValue timestamp_value(val.val); \
225  if (!timestamp_value.HasDate()) return TimestampVal::null(); \
226  TimestampVal result; \
227  timestamp_value.ToTimestampVal(&result); \
228  return result; \
229  }
230 
231 CAST_TO_TIMESTAMP(BooleanVal);
232 CAST_TO_TIMESTAMP(TinyIntVal);
233 CAST_TO_TIMESTAMP(SmallIntVal);
234 CAST_TO_TIMESTAMP(IntVal);
235 CAST_TO_TIMESTAMP(BigIntVal);
236 CAST_TO_TIMESTAMP(FloatVal);
237 CAST_TO_TIMESTAMP(DoubleVal);
238 
240  const StringVal& val) {
241  if (val.is_null) return TimestampVal::null();
242  TimestampValue timestamp_value(reinterpret_cast<char*>(val.ptr), val.len);
243  // Return null if 'val' did not parse
244  if (!timestamp_value.HasDateOrTime()) return TimestampVal::null();
245  TimestampVal result;
246  timestamp_value.ToTimestampVal(&result);
247  return result;
248 }
static void TruncateIfNecessary(const ColumnType &type, StringVal *val)
Definition: anyval-util.h:188
impala::FunctionContextImpl * impl()
TODO: Add mechanism for UDAs to update stats similar to runtime profile counters. ...
Definition: udf.h:202
const TypeDesc & GetReturnType() const
Definition: udf-ir.cc:34
#define CAST_TO_STRING(num_type)
static ColumnType TypeDescToColumnType(const FunctionContext::TypeDesc &type)
Definition: anyval-util.cc:101
#define CAST_FUNCTION(from_type, to_type)
static TimestampVal CastToTimestampVal(FunctionContext *context, const BooleanVal &val)
#define CAST_FLOAT_TO_STRING(float_type, format)
bool HasDateOrTime() const
static void PadWithSpaces(char *cptr, int64_t cptr_len, int64_t num_chars)
#define CAST_TO_TIMESTAMP(from_type)
#define CAST_FROM_TIMESTAMP(to_type)
This object has a compatible storage format with boost::ptime.
Definition: udf.h:495
static StringVal CastToChar(FunctionContext *context, const StringVal &val)
uint8_t * ptr
Definition: udf.h:523
void ToTimestampVal(impala_udf::TimestampVal *tv) const
bool is_null
Definition: udf.h:359
PrimitiveType type
Definition: types.h:60
#define CAST_FROM_SUBSECOND_TIMESTAMP(to_type)
int len
Only set if type == TYPE_CHAR or type == TYPE_VARCHAR.
Definition: types.h:62
static StringVal FromString(FunctionContext *ctx, const std::string &s)
Definition: anyval-util.h:183
#define CAST_FROM_STRING(num_type, native_type, string_parser_fn)
static TimestampValue FromTimestampVal(const impala_udf::TimestampVal &udf_value)
uint8_t * AllocateLocal(int byte_size)
Definition: udf.cc:386
const int MAX_FLOAT_CHARS
static StringVal CastToStringVal(FunctionContext *context, const BooleanVal &val)