3 #ifndef IMPALA_EXPERIMENT_DATPROVIDER_H
4 #define IMPALA_EXPERIMENT_DATAPROVIDER_H
9 #include <boost/cstdint.hpp>
10 #include <boost/scoped_ptr.hpp>
11 #include <boost/random/uniform_int.hpp>
12 #include <boost/random/linear_congruential.hpp>
13 #include <boost/random/uniform_int.hpp>
14 #include <boost/random/uniform_real.hpp>
15 #include <boost/random/variate_generator.hpp>
16 #include <boost/generator_iterator.hpp>
76 T
Generate(
double d,
int i, T min, T max)
const {
79 return (T)(d * (max -
min) + min);
81 return (T)(i % (int64_t)(max - min) +
min);
100 void Reset(
int num_rows,
int batch_size,
const std::vector<ColDesc>& columns);
118 void Print(std::ostream*,
char* data,
int num_rows)
const;
136 const bool& min,
const bool &max,
DataGen gen) {
145 const int8_t& min,
const int8_t& max,
DataGen gen) {
154 const int16_t& min,
const int16_t& max,
DataGen gen) {
163 const int32_t& min,
const int32_t& max,
DataGen gen) {
172 const int64_t& min,
const int64_t& max,
DataGen gen) {
181 const float& min,
const float& max,
DataGen gen) {
190 const double& min,
const double& max,
DataGen gen) {
208 template<>
inline bool DataProvider::ColDesc::Generate<bool>(
double d,
int i)
const {
211 return (
int)(round(d * max.b - min.b)) + min.b;
213 return (i % 2) ?
true :
false;
217 template<>
inline int8_t DataProvider::ColDesc::Generate<int8_t>(
double d,
int i)
const {
218 return Generate<int8_t>(d, i, min.int8, max.int8);
220 template<>
inline int16_t DataProvider::ColDesc::Generate<int16_t>(
double d,
int i)
const {
221 return Generate<int16_t>(d, i, min.int16, max.int16);
223 template<>
inline int32_t DataProvider::ColDesc::Generate<int32_t>(
double d,
int i)
const {
224 return Generate<int32_t>(d, i, min.int32, max.int32);
226 template<>
inline int64_t DataProvider::ColDesc::Generate<int64_t>(
double d,
int i)
const {
227 return Generate<int64_t>(d, i, min.int64, max.int64);
229 template<>
inline float DataProvider::ColDesc::Generate<float>(
double d,
int i)
const {
230 return Generate<float>(d, i, min.f, max.f);
232 template<>
inline double DataProvider::ColDesc::Generate<double>(
double d,
int i)
const {
233 return Generate<double>(d, i, min.d, max.d);
int total_rows() const
The total number of rows that will be generated.
void Reset(int num_rows, int batch_size, const std::vector< ColDesc > &columns)
T Generate(double d, int i, T min, T max) const
Default generator - used for int and float types.
static ColDesc Create(const T &min, const T &max, DataGen gen=UNIFORM_RANDOM)
Create a column desc with min/max range and the data gen type.
void * NextBatch(int *rows_returned)
impala::RuntimeProfile * profile_
See data-provider-test.cc on how to use this.
impala::RuntimeProfile::Counter * bytes_generated_
boost::minstd_rand rand_generator_
T Generate(double d, int i) const
std::vector< ColDesc > cols_
impala::PrimitiveType type
boost::scoped_ptr< char > data_
DataProvider(impala::MemPool *pool, impala::RuntimeProfile *profile)
int row_size() const
The size of a row (tuple size)
void Print(std::ostream *, char *data, int num_rows) const
Print the row data in csv format.
ColDesc(impala::PrimitiveType type, int bytes)
DataGen
How the data should be generated.