55 using std::numeric_limits;
56 using namespace impala;
62 template<u
int32_t BASE>
67 memset(data, 0,
sizeof(data));
70 data[idx++] = v % BASE;
81 this->data[0] =
static_cast<uint32_t
>(r0);
82 if (r0 > numeric_limits<uint32_t>::max()) ++r1;
83 this->data[1] =
static_cast<uint32_t
>(r1);
84 if (r1 > numeric_limits<uint32_t>::max()) ++r2;
85 this->data[2] =
static_cast<uint32_t
>(r2);
86 if (r2 > numeric_limits<uint32_t>::max()) ++r3;
87 this->data[3] =
static_cast<uint32_t
>(r3);
93 bool print_padded =
false;
94 for (
int i = 3; i >= 0; --i) {
95 if (data[i] == 0 && !print_padded)
continue;
97 ss << setw(9) << data[i];
115 memcpy(data, reinterpret_cast<uint8_t*>(&t), 12);
121 memcpy(reinterpret_cast<uint8_t*>(&r) + 4, data, 12);
127 __int128_t x = to_int128();
129 __int128_t r = x + y;
135 __int128_t x = to_int128();
137 __int128_t r = x * y;
143 __int128_t x = to_int128();
145 __int128_t r = x / y;
150 return int96(-to_int128());
187 for (
int i = 0; i < n; ++i) {
197 data->
cpp_add_ints[i] *= numeric_limits<int64_t>::max();
212 data->
doubles.push_back(i + 1);
216 #define TEST_ADD(NAME, RESULT, VALS)\
217 void NAME(int batch_size, void* d) {\
218 TestData* data = reinterpret_cast<TestData*>(d);\
219 for (int i = 0; i < batch_size; ++i) {\
221 for (int j = 0; j < data->VALS.size(); ++j) {\
222 data->RESULT += data->VALS[j];\
227 #define TEST_MULTIPLY(NAME, RESULT, VALS)\
228 void NAME(int batch_size, void* d) {\
229 TestData* data = reinterpret_cast<TestData*>(d);\
230 for (int i = 0; i < batch_size; ++i) {\
232 for (int j = 0; j < data->VALS.size(); ++j) {\
233 data->RESULT *= data->VALS[j];\
238 #define TEST_DIVIDE(NAME, RESULT, VALS)\
239 void NAME(int batch_size, void* d) {\
240 TestData* data = reinterpret_cast<TestData*>(d);\
241 for (int i = 0; i < batch_size; ++i) {\
243 for (int j = 0; j < data->VALS.size() - 1; ++j) {\
244 data->RESULT += data->VALS[j + 1] / data->VALS[j];\
249 TEST_ADD(TestBoostAdd, boost_result, boost_add_ints);
250 TEST_ADD(TestCppAdd, cpp_result, cpp_add_ints);
251 TEST_ADD(TestCpp96Add, cpp96_result, cpp96_add_ints);
252 TEST_ADD(TestBaseBillionAdd, base1b_result, base1b_ints);
253 TEST_ADD(TestInt64Add, int64_result, int64_ints);
254 TEST_ADD(TestDoubleAdd, double_result, doubles);
256 TEST_MULTIPLY(TestBoostMultiply, boost_result, boost_mult_ints);
258 TEST_MULTIPLY(TestCpp96Multiply, cpp96_result, cpp96_mult_ints);
262 TEST_DIVIDE(TestBoostDivide, boost_result, boost_mult_ints);
263 TEST_DIVIDE(TestCppDivide, cpp_result, cpp_mult_ints);
264 TEST_DIVIDE(TestCpp96Divide, cpp96_result, cpp96_mult_ints);
265 TEST_DIVIDE(TestInt64Divide, int64_result, int64_ints);
266 TEST_DIVIDE(TestDoubleDivide, double_result, doubles);
268 int main(
int argc,
char** argv) {
276 add_suite.
AddBenchmark(
"int128_CPP", TestCppAdd, &data);
277 add_suite.
AddBenchmark(
"int128_Boost", TestBoostAdd, &data);
278 add_suite.
AddBenchmark(
"int96_CPP", TestCpp96Add, &data);
280 add_suite.
AddBenchmark(
"int128_Base1B", TestBaseBillionAdd, &data);
281 add_suite.
AddBenchmark(
"doubles", TestDoubleAdd, &data);
282 cout << add_suite.
Measure() << endl;
285 multiply_suite.
AddBenchmark(
"int128_CPP", TestCppMultiply, &data);
286 multiply_suite.
AddBenchmark(
"int128_Boost", TestBoostMultiply, &data);
287 multiply_suite.
AddBenchmark(
"int96_CPP", TestCpp96Multiply, &data);
288 multiply_suite.
AddBenchmark(
"int64", TestInt64Multiply, &data);
289 multiply_suite.
AddBenchmark(
"doubles", TestDoubleMultiply, &data);
290 cout << multiply_suite.
Measure() << endl;
293 divide_suite.
AddBenchmark(
"int128_CPP", TestCppDivide, &data);
294 divide_suite.
AddBenchmark(
"int128_Boost", TestBoostDivide, &data);
295 divide_suite.
AddBenchmark(
"int96_CPP", TestCpp96Divide, &data);
296 divide_suite.
AddBenchmark(
"int64", TestInt64Divide, &data);
297 divide_suite.
AddBenchmark(
"double", TestDoubleDivide, &data);
298 cout << divide_suite.
Measure() << endl;
int AddBenchmark(const std::string &name, BenchmarkFunction fn, void *args, int baseline_idx=0)
Base1BInt128 base1b_result
#define TEST_DIVIDE(NAME, RESULT, VALS)
vector< int64_t > int64_ints
vector< Base1BInt128 > base1b_ints
static std::string GetMachineInfo()
Output machine/build configuration as a string.
__int128_t to_int128() const
vector< __int128_t > cpp_add_ints
std::string Measure()
Runs all the benchmarks and returns the result in a formatted string.
vector< int96 > cpp96_add_ints
boost::multiprecision::int128_t boost_result
#define TEST_MULTIPLY(NAME, RESULT, VALS)
int main(int argc, char **argv)
void InitTestData(TestData *data, int n)
static void Init()
Initialize CpuInfo.
BaseInt128 & operator+=(const BaseInt128 &rhs)
vector< __int128_t > cpp_mult_ints
vector< boost::multiprecision::int128_t > boost_add_ints
vector< boost::multiprecision::int128_t > boost_mult_ints
vector< int96 > cpp96_mult_ints
int96 & operator*=(const int96 &v)
int96 & operator+=(const int96 &v)
int96 operator/(const int96 &v) const
BaseInt128< 1000000000 > Base1BInt128
#define TEST_ADD(NAME, RESULT, VALS)
__int128_t int128_t
We use the c++ int128_t type. This is stored using 16 bytes and very performant.