Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
decimal-value.h
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 
16 #ifndef IMPALA_RUNTIME_DECIMAL_VALUE_H
17 #define IMPALA_RUNTIME_DECIMAL_VALUE_H
18 
19 #include <math.h>
20 #include <iomanip>
21 #include <ostream>
22 #include <sstream>
23 
24 #include "common/logging.h"
26 #include "util/decimal-util.h"
27 #include "util/hash-util.h"
28 
29 namespace impala {
30 
39 template<typename T>
40 class DecimalValue {
41  public:
42  DecimalValue() : value_(0) { }
43  DecimalValue(const T& s) : value_(s) { }
44 
45  DecimalValue& operator=(const T& s) {
46  value_ = s;
47  return *this;
48  }
49 
52  static DecimalValue FromDouble(const ColumnType& t, double d, bool* overflow) {
53  // Check overflow.
54  T max_value = DecimalUtil::GetScaleMultiplier<T>(t.precision - t.scale);
55  if (abs(d) >= max_value) {
56  *overflow = true;
57  return DecimalValue();
58  }
59 
60  // Multiply the double by the scale.
61  d *= DecimalUtil::GetScaleMultiplier<double>(t.scale);
62  // Truncate and just take the integer part.
63  return DecimalValue(static_cast<T>(d));
64  }
65 
67  static DecimalValue FromInt(const ColumnType& t, int64_t d, bool* overflow) {
68  // Check overflow. For scale 3, the max value is 10^3 - 1 = 999.
69  T max_value = DecimalUtil::GetScaleMultiplier<T>(t.precision - t.scale);
70  if (abs(d) >= max_value) {
71  *overflow = true;
72  return DecimalValue();
73  }
74  return DecimalValue(DecimalUtil::MultiplyByScale<T>(d, t));
75  }
76 
81  bool operator==(const DecimalValue& other) const {
82  return value_ == other.value_;
83  }
84  bool operator!=(const DecimalValue& other) const {
85  return value_ != other.value_;
86  }
87  bool operator<=(const DecimalValue& other) const {
88  return value_ <= other.value_;
89  }
90  bool operator<(const DecimalValue& other) const {
91  return value_ < other.value_;
92  }
93  bool operator>=(const DecimalValue& other) const {
94  return value_ >= other.value_;
95  }
96  bool operator>(const DecimalValue& other) const {
97  return value_ > other.value_;
98  }
99 
101  return DecimalValue(-value_);
102  }
103 
104  bool is_negative() const { return value_ < 0; }
105 
108  int Compare(const DecimalValue& other) const {
109  T x = value();
110  T y = other.value();
111  if (x == y) return 0;
112  if (x < y) return -1;
113  return 1;
114  }
115 
119  DecimalValue ScaleTo(const ColumnType& src_type, const ColumnType& dst_type,
120  bool* overflow) const {
121  int delta_scale = src_type.scale - dst_type.scale;
122  T result = value();
123  T max_value = DecimalUtil::GetScaleMultiplier<T>(dst_type.precision);
124  if (delta_scale >= 0) {
125  if (delta_scale != 0) result /= DecimalUtil::GetScaleMultiplier<T>(delta_scale);
126  // Even if we are decreasing the absolute unscaled value, we can still overflow.
127  // This path is also used to convert between precisions so for example, converting
128  // from 100 as decimal(3,0) to decimal(2,0) should be considered an overflow.
129  *overflow |= abs(result) >= max_value;
130  } else if (delta_scale < 0) {
131  T mult = DecimalUtil::GetScaleMultiplier<T>(-delta_scale);
132  *overflow |= abs(result) >= max_value / mult;
133  result *= mult;
134  }
135  return DecimalValue(result);
136  }
137 
146  template<typename RESULT_T>
147  DecimalValue<RESULT_T> Add(const ColumnType& this_type, const DecimalValue& other,
148  const ColumnType& other_type, int result_scale, bool* overflow) const {
149  DCHECK_EQ(result_scale, std::max(this_type.scale, other_type.scale));
150  RESULT_T x = 0;
151  RESULT_T y = 0;
152  *overflow |= AdjustToSameScale(*this, this_type, other, other_type, &x, &y);
153  if (sizeof(RESULT_T) == 16) {
154  // Check overflow.
155  if (!*overflow && is_negative() == other.is_negative()) {
156  // Can only overflow if the signs are the same
157  *overflow |= DecimalUtil::MAX_UNSCALED_DECIMAL - abs(x) < abs(y);
158  // TODO: faster to return here? We don't care at all about the perf on
159  // the overflow case but what makes the normal path faster?
160  }
161  } else {
162  DCHECK(!*overflow) << "Cannot overflow unless result is Decimal16Value";
163  }
164  return DecimalValue<RESULT_T>(x + y);
165  }
166 
167  template<typename RESULT_T>
168  DecimalValue<RESULT_T> Subtract(const ColumnType& this_type, const DecimalValue& other,
169  const ColumnType& other_type, int result_scale, bool* overflow) const {
170  return Add<RESULT_T>(this_type, -other, other_type, result_scale, overflow);
171  }
172 
173  template<typename RESULT_T>
174  DecimalValue<RESULT_T> Multiply(const ColumnType& this_type, const DecimalValue& other,
175  const ColumnType& other_type, int result_scale, bool* overflow) const {
176  // In the non-overflow case, we don't need to adjust by the scale since
177  // that is already handled by the FE when it computes the result decimal type.
178  // e.g. 1.23 * .2 (scale 2, scale 1 respectively) is identical to:
179  // 123 * 2 with a resulting scale 3. We can do the multiply on the unscaled values.
180  // The result scale in this case is the sum of the input scales.
181  RESULT_T x = value();
182  RESULT_T y = other.value();
183  if (x == 0 || y == 0) {
184  // Handle zero to avoid divide by zero in the overflow check below.
185  return DecimalValue<RESULT_T>(0);
186  }
187  if (sizeof(RESULT_T) == 16) {
188  // Check overflow
189  *overflow |= DecimalUtil::MAX_UNSCALED_DECIMAL / abs(y) < abs(x);
190  }
191  RESULT_T result = x * y;
192  int delta_scale = this_type.scale + other_type.scale - result_scale;
193  if (UNLIKELY(delta_scale != 0)) {
194  // In this case, the required resulting scale is larger than the max we support.
195  // We cap the resulting scale to the max supported scale (e.g. truncate) in the FE.
196  // TODO: we could also return NULL.
197  DCHECK_GT(delta_scale, 0);
198  result /= DecimalUtil::GetScaleMultiplier<T>(delta_scale);
199  }
200  return DecimalValue<RESULT_T>(result);
201  }
202 
204  template<typename RESULT_T>
205  DecimalValue<RESULT_T> Divide(const ColumnType& this_type, const DecimalValue& other,
206  const ColumnType& other_type, int result_scale, bool* is_nan, bool* overflow)
207  const {
208  DCHECK_GE(result_scale, this_type.scale);
209  if (other.value() == 0) {
210  // Divide by 0.
211  *is_nan = true;
212  return DecimalValue<RESULT_T>();
213  }
214  // We need to scale x up by the result precision and then do an integer divide.
215  // This truncates the result to the output precision.
216  // TODO: confirm with standard that truncate is okay.
217  int scale_by = result_scale + other_type.scale - this_type.scale;
218  // Use higher precision ints for intermediates to avoid overflows. Divides lead to
219  // large numbers very quickly (and get eliminated by the int divide).
220  if (sizeof(T) == 16) {
221  int256_t x = DecimalUtil::MultiplyByScale<int256_t>(
222  ConvertToInt256(value()), scale_by);
223  int256_t y = ConvertToInt256(other.value());
225  return DecimalValue<RESULT_T>(r);
226  } else {
227  int128_t x = DecimalUtil::MultiplyByScale<RESULT_T>(value(), scale_by);
228  int128_t y = other.value();
229  int128_t r = x / y;
230  return DecimalValue<RESULT_T>(static_cast<RESULT_T>(r));
231  }
232  }
233 
235  template<typename RESULT_T>
236  DecimalValue<RESULT_T> Mod(const ColumnType& this_type, const DecimalValue& other,
237  const ColumnType& other_type, int result_scale, bool* is_nan, bool* overflow)
238  const {
239  DCHECK_EQ(result_scale, std::max(this_type.scale, other_type.scale));
240  if (other.value() == 0) {
241  // Mod by 0.
242  *is_nan = true;
243  return DecimalValue<RESULT_T>();
244  }
245  *is_nan = false;
246  RESULT_T x = 0;
247  RESULT_T y = 1; // Initialize y to avoid mod by 0.
248  *overflow |= AdjustToSameScale(*this, this_type, other, other_type, &x, &y);
249  return DecimalValue<RESULT_T>(x % y);
250  }
251 
254  int Compare(const ColumnType& this_type, const DecimalValue& other,
255  const ColumnType& other_type) const;
256 
258  bool Eq(const ColumnType& this_type, const DecimalValue& other,
259  const ColumnType& other_type) const {
260  return Compare(this_type, other, other_type) == 0;
261  }
262  bool Ne(const ColumnType& this_type, const DecimalValue& other,
263  const ColumnType& other_type) const {
264  return Compare(this_type, other, other_type) != 0;
265  }
266  bool Ge(const ColumnType& this_type, const DecimalValue& other,
267  const ColumnType& other_type) const {
268  return Compare(this_type, other, other_type) >= 0;
269  }
270  bool Gt(const ColumnType& this_type, const DecimalValue& other,
271  const ColumnType& other_type) const {
272  return Compare(this_type, other, other_type) > 0;
273  }
274  bool Le(const ColumnType& this_type, const DecimalValue& other,
275  const ColumnType& other_type) const {
276  return Compare(this_type, other, other_type) <= 0;
277  }
278  bool Lt(const ColumnType& this_type, const DecimalValue& other,
279  const ColumnType& other_type) const {
280  return Compare(this_type, other, other_type) < 0;
281  }
282 
285  const T& value() const { return value_; }
286  T& value() { return value_; }
287 
289  const T whole_part(const ColumnType& t) const {
290  return value() / DecimalUtil::GetScaleMultiplier<T>(t.scale);
291  }
292 
294  const T fractional_part(const ColumnType& t) const {
295  return abs(value()) % DecimalUtil::GetScaleMultiplier<T>(t.scale);
296  }
297 
299  double ToDouble(const ColumnType& type) const {
300  return static_cast<double>(value_) / powf(10.0, type.scale);
301  }
302 
303  inline uint32_t Hash(int seed = 0) const {
304  return HashUtil::Hash(&value_, sizeof(value_), seed);
305  }
306 
307  std::string ToString(const ColumnType& type) const;
308 
310 
311  private:
313 
318  template <typename RESULT_T>
319  static bool AdjustToSameScale(const DecimalValue& x, const ColumnType& x_type,
320  const DecimalValue& y, const ColumnType& y_type,
321  RESULT_T* x_scaled, RESULT_T* y_scaled) {
322  int delta_scale = x_type.scale - y_type.scale;
323  RESULT_T scale_factor = DecimalUtil::GetScaleMultiplier<RESULT_T>(abs(delta_scale));
324  if (delta_scale == 0) {
325  *x_scaled = x.value();
326  *y_scaled = y.value();
327  } else if (delta_scale > 0) {
328  if (sizeof(RESULT_T) == 16 &&
329  DecimalUtil::MAX_UNSCALED_DECIMAL / scale_factor < abs(y.value())) {
330  return true;
331  }
332  *x_scaled = x.value();
333  *y_scaled = y.value() * scale_factor;
334  } else {
335  if (sizeof(RESULT_T) == 16 &&
336  DecimalUtil::MAX_UNSCALED_DECIMAL / scale_factor < abs(x.value())) {
337  return true;
338  }
339  *x_scaled = x.value() * scale_factor;
340  *y_scaled = y.value();
341  }
342  return false;
343  }
344 };
345 
351 
355 inline Decimal4Value ToDecimal4(const Decimal4Value& v, bool* overflow) {
356  return v;
357 }
358 inline Decimal8Value ToDecimal8(const Decimal4Value& v, bool* overflow) {
359  return Decimal8Value(static_cast<int64_t>(v.value()));
360 }
361 inline Decimal16Value ToDecimal16(const Decimal4Value& v, bool* overflow) {
362  return Decimal16Value(static_cast<int128_t>(v.value()));
363 }
364 inline Decimal4Value ToDecimal4(const Decimal8Value& v, bool* overflow) {
365  *overflow |= abs(v.value()) > std::numeric_limits<int32_t>::max();
366  return Decimal4Value(static_cast<int32_t>(v.value()));
367 }
368 inline Decimal8Value ToDecimal8(const Decimal8Value& v, bool* overflow) {
369  return v;
370 }
371 inline Decimal16Value ToDecimal16(const Decimal8Value& v, bool* overflow) {
372  return Decimal16Value(static_cast<int128_t>(v.value()));
373 }
374 inline Decimal4Value ToDecimal4(const Decimal16Value& v, bool* overflow) {
375  *overflow |= abs(v.value()) > std::numeric_limits<int32_t>::max();
376  return Decimal4Value(static_cast<int32_t>(v.value()));
377 }
378 inline Decimal8Value ToDecimal8(const Decimal16Value& v, bool* overflow) {
379  *overflow |= abs(v.value()) > std::numeric_limits<int64_t>::max();
380  return Decimal8Value(static_cast<int64_t>(v.value()));
381 }
382 inline Decimal16Value ToDecimal16(const Decimal16Value& v, bool* overflow) {
383  return v;
384 }
385 
386 inline std::ostream& operator<<(std::ostream& os, const Decimal4Value& d) {
387  return os << d.value();
388 }
389 inline std::ostream& operator<<(std::ostream& os, const Decimal8Value& d) {
390  return os << d.value();
391 }
392 inline std::ostream& operator<<(std::ostream& os, const Decimal16Value& d) {
393  return os << d.value();
394 }
395 
397 inline std::size_t hash_value(const Decimal4Value& v) {
398  return v.Hash();
399 }
400 inline std::size_t hash_value(const Decimal8Value& v) {
401  return v.Hash();
402 }
403 inline std::size_t hash_value(const Decimal16Value& v) {
404  return v.Hash();
405 }
406 
410 template <>
411 inline int Decimal4Value::Compare(const ColumnType& this_type,
412  const Decimal4Value& other, const ColumnType& other_type) const {
413  int64_t x, y;
414  bool overflow = AdjustToSameScale(*this, this_type, other, other_type, &x, &y);
415  DCHECK(!overflow) << "Overflow cannot happen with Decimal4Value";
416  if (x == y) return 0;
417  if (x < y) return -1;
418  return 1;
419 }
420 template <>
421 inline int Decimal8Value::Compare(const ColumnType& this_type,
422  const Decimal8Value& other, const ColumnType& other_type) const {
423  int128_t x = 0, y = 0;
424  bool overflow = AdjustToSameScale(*this, this_type, other, other_type, &x, &y);
425  DCHECK(!overflow) << "Overflow cannot happen with Decimal8Value";
426  if (x == y) return 0;
427  if (x < y) return -1;
428  return 1;
429 }
430 template <>
431 inline int Decimal16Value::Compare(const ColumnType& this_type,
432  const Decimal16Value& other, const ColumnType& other_type) const {
433  int256_t x = ConvertToInt256(this->value());
434  int256_t y = ConvertToInt256(other.value());
435  int delta_scale = this_type.scale - other_type.scale;
436  if (delta_scale > 0) {
437  y = DecimalUtil::MultiplyByScale<int256_t>(y, delta_scale);
438  } else if (delta_scale < 0) {
439  x = DecimalUtil::MultiplyByScale<int256_t>(x, -delta_scale);
440  }
441  if (x == y) return 0;
442  if (x < y) return -1;
443  return 1;
444 }
445 
448 template<typename T>
449 inline std::string DecimalValue<T>::ToString(const ColumnType& type) const {
450  // Decimal values are sent to clients as strings so in the interest of
451  // speed the string will be created without the using stringstream with the
452  // whole/fractional_part().
453  DCHECK_EQ(type.type, TYPE_DECIMAL);
454  int last_char_idx = type.precision
455  + (type.scale > 0) // Add a space for decimal place
456  + (type.scale == type.precision) // Add a space for leading 0
457  + (value_ < 0); // Add a space for negative sign
458  std::string str = std::string(last_char_idx, '0');
459  // Start filling in the values in reverse order by taking the last digit
460  // of the value. Use a positive value and worry about the sign later. At this
461  // point the last_char_idx points to the string terminator.
462  T remaining_value = value_;
463  int first_digit_idx = 0;
464  if (value_ < 0) {
465  remaining_value = -value_;
466  first_digit_idx = 1;
467  }
468  if (type.scale > 0) {
469  int remaining_scale = type.scale;
470  do {
471  str[--last_char_idx] = (remaining_value % 10) + '0'; // Ascii offset
472  remaining_value /= 10;
473  } while (--remaining_scale > 0);
474  str[--last_char_idx] = '.';
475  DCHECK_GT(last_char_idx, first_digit_idx) << "Not enough space remaining";
476  }
477  do {
478  str[--last_char_idx] = (remaining_value % 10) + '0'; // Ascii offset
479  remaining_value /= 10;
480  if (remaining_value == 0) {
481  // Trim any extra leading 0's.
482  if (last_char_idx > first_digit_idx) str.erase(0, last_char_idx - first_digit_idx);
483  break;
484  }
485  // For safety, enforce string length independent of remaining_value.
486  } while (last_char_idx > first_digit_idx);
487  if (value_ < 0) str[0] = '-';
488  return str;
489 }
490 
491 }
492 
493 #endif
const T & value() const
static DecimalValue FromDouble(const ColumnType &t, double d, bool *overflow)
Definition: decimal-value.h:52
bool Ge(const ColumnType &this_type, const DecimalValue &other, const ColumnType &other_type) const
bool operator<(const DecimalValue &other) const
Definition: decimal-value.h:90
bool operator<=(const DecimalValue &other) const
Definition: decimal-value.h:87
bool operator==(const DecimalValue &other) const
Definition: decimal-value.h:81
bool Eq(const ColumnType &this_type, const DecimalValue &other, const ColumnType &other_type) const
Comparison utilities.
DecimalValue ScaleTo(const ColumnType &src_type, const ColumnType &dst_type, bool *overflow) const
int128_t abs(const int128_t &x)
int128_t ConvertToInt128(int256_t x, int128_t max_value, bool *overflow)
int precision
Only set if type == TYPE_DECIMAL.
Definition: types.h:68
DecimalValue< int128_t > Decimal16Value
Decimal8Value ToDecimal8(const Decimal4Value &v, bool *overflow)
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 256, 256, boost::multiprecision::signed_magnitude, boost::multiprecision::unchecked, void > > int256_t
Define 256 bit int type.
bool Le(const ColumnType &this_type, const DecimalValue &other, const ColumnType &other_type) const
int256_t ConvertToInt256(const int128_t &x)
DecimalValue< RESULT_T > Subtract(const ColumnType &this_type, const DecimalValue &other, const ColumnType &other_type, int result_scale, bool *overflow) const
Decimal16Value ToDecimal16(const Decimal4Value &v, bool *overflow)
std::size_t hash_value(const Decimal4Value &v)
This function must be called 'hash_value' to be picked up by boost.
DecimalValue operator-() const
static int128_t MAX_UNSCALED_DECIMAL
Maximum absolute value of int128_t that we use. This is 38 digits of 9's.
Definition: decimal-util.h:32
bool Lt(const ColumnType &this_type, const DecimalValue &other, const ColumnType &other_type) const
DecimalValue< int64_t > Decimal8Value
DecimalValue< T > Abs() const
PrimitiveType type
Definition: types.h:60
DecimalValue(const T &s)
Definition: decimal-value.h:43
const T fractional_part(const ColumnType &t) const
Returns the value of the decimal after the decimal point.
bool operator>(const DecimalValue &other) const
Definition: decimal-value.h:96
static uint32_t Hash(const void *data, int32_t bytes, uint32_t seed)
Definition: hash-util.h:135
bool Ne(const ColumnType &this_type, const DecimalValue &other, const ColumnType &other_type) const
DecimalValue< int32_t > Decimal4Value
static bool AdjustToSameScale(const DecimalValue &x, const ColumnType &x_type, const DecimalValue &y, const ColumnType &y_type, RESULT_T *x_scaled, RESULT_T *y_scaled)
DecimalValue< RESULT_T > Divide(const ColumnType &this_type, const DecimalValue &other, const ColumnType &other_type, int result_scale, bool *is_nan, bool *overflow) const
is_nan is set to true if 'other' is 0. The value returned is undefined.
bool operator>=(const DecimalValue &other) const
Definition: decimal-value.h:93
static DecimalValue FromInt(const ColumnType &t, int64_t d, bool *overflow)
Assigns *result as a decimal.
Definition: decimal-value.h:67
std::string ToString(const ColumnType &type) const
bool operator!=(const DecimalValue &other) const
Definition: decimal-value.h:84
bool Gt(const ColumnType &this_type, const DecimalValue &other, const ColumnType &other_type) const
Decimal4Value ToDecimal4(const Decimal4Value &v, bool *overflow)
#define UNLIKELY(expr)
Definition: compiler-util.h:33
DecimalValue< RESULT_T > Multiply(const ColumnType &this_type, const DecimalValue &other, const ColumnType &other_type, int result_scale, bool *overflow) const
bool is_negative() const
DecimalValue< RESULT_T > Mod(const ColumnType &this_type, const DecimalValue &other, const ColumnType &other_type, int result_scale, bool *is_nan, bool *overflow) const
is_nan is set to true if 'other' is 0. The value returned is undefined.
int Compare(const DecimalValue &other) const
ostream & operator<<(ostream &os, const map< TNetworkAddress, llama::TAllocatedResource > &resources)
DecimalValue< RESULT_T > Add(const ColumnType &this_type, const DecimalValue &other, const ColumnType &other_type, int result_scale, bool *overflow) const
double ToDouble(const ColumnType &type) const
Returns an approximate double for this decimal.
const T whole_part(const ColumnType &t) const
Returns the value of the decimal before the decimal point.
uint32_t Hash(int seed=0) const
DecimalValue & operator=(const T &s)
Definition: decimal-value.h:45
__int128_t int128_t
We use the c++ int128_t type. This is stored using 16 bytes and very performant.