17 #include <boost/algorithm/string/join.hpp>
21 #include <llvm/ExecutionEngine/ExecutionEngine.h>
22 #include <llvm/IR/DataLayout.h>
26 #include "gen-cpp/Descriptors_types.h"
27 #include "gen-cpp/PlanNodes_types.h"
33 using boost::algorithm::join;
40 out <<
"(offset=" << byte_offset
41 <<
" mask=" << hex << static_cast<int>(bit_mask) << dec <<
")";
50 SlotDescriptor::SlotDescriptor(
const TSlotDescriptor& tdesc)
52 type_(tdesc.slotType),
53 parent_(tdesc.parent),
54 col_path_(tdesc.columnPath),
55 tuple_offset_(tdesc.byteOffset),
56 null_indicator_offset_(tdesc.nullIndicatorByte, tdesc.nullIndicatorBit),
57 slot_idx_(tdesc.slotIdx),
58 slot_size_(type_.GetByteSize()),
60 is_materialized_(tdesc.isMaterialized),
62 set_not_null_fn_(NULL),
68 for (
int i = 0; i < common_levels; ++i) {
80 for (
int i = 1; i <
col_path_.size(); ++i) {
92 : name_(tdesc.tableName),
93 database_(tdesc.dbName),
95 num_cols_(tdesc.numCols),
96 num_clustering_cols_(tdesc.numClusteringCols),
97 col_names_(tdesc.colNames) {
103 out <<
" col_names=[";
111 : line_delim_(thrift_partition.lineDelim),
112 field_delim_(thrift_partition.fieldDelim),
113 collection_delim_(thrift_partition.collectionDelim),
114 escape_char_(thrift_partition.escapeChar),
115 block_size_(thrift_partition.blockSize),
116 location_(thrift_partition.location),
117 id_(thrift_partition.id),
118 exprs_prepared_(false),
119 exprs_opened_(false),
120 exprs_closed_(false),
121 file_format_(thrift_partition.fileFormat),
124 for (
int i = 0; i < thrift_partition.partitionKeyExprs.size(); ++i) {
128 thrift_partition.partitionKeyExprs[i], &ctx);
176 hdfs_base_dir_(tdesc.hdfsTable.hdfsBaseDir),
177 null_partition_key_value_(tdesc.hdfsTable.nullPartitionKeyValue),
178 null_column_value_(tdesc.hdfsTable.nullColumnValue),
180 map<int64_t, THdfsPartition>::const_iterator it;
181 for (it = tdesc.hdfsTable.partitions.begin(); it != tdesc.hdfsTable.partitions.end();
187 avro_schema_ = tdesc.hdfsTable.__isset.avroSchema ? tdesc.hdfsTable.avroSchema :
"";
194 out <<
" partitions=[";
195 vector<string> partition_strings;
196 map<int64_t, HdfsPartitionDescriptor*>::const_iterator it;
199 s <<
" (id: " << it->first <<
", partition: " << it->second->DebugString() <<
")";
200 partition_strings.push_back(s.str());
202 out << join(partition_strings,
",") <<
"]";
211 table_name_(tdesc.hbaseTable.tableName) {
212 for (
int i = 0; i < tdesc.hbaseTable.families.size(); ++i) {
213 bool is_binary_encoded = tdesc.hbaseTable.__isset.binary_encoded &&
214 tdesc.hbaseTable.binary_encoded[i];
216 tdesc.hbaseTable.families[i], tdesc.hbaseTable.qualifiers[i], is_binary_encoded));
224 for (
int i = 0; i <
cols_.size(); ++i) {
225 out << (i > 0 ?
" " :
"") <<
cols_[i].family <<
":" <<
cols_[i].qualifier <<
":"
226 <<
cols_[i].binary_encoded;
235 byte_size_(tdesc.byteSize),
236 num_null_bytes_(tdesc.numNullBytes),
237 num_materialized_slots_(0),
257 for (
size_t i = 0; i <
slots_.size(); ++i) {
258 if (i > 0) out <<
", ";
259 out <<
slots_[i]->DebugString();
267 const vector<TTupleId>& row_tuples,
268 const vector<bool>& nullable_tuples)
269 : tuple_idx_nullable_map_(nullable_tuples) {
270 DCHECK_EQ(nullable_tuples.size(), row_tuples.size());
271 for (
int i = 0; i < row_tuples.size(); ++i) {
273 DCHECK(tuple_desc_map_.back() != NULL);
294 const vector<bool>& nullable_tuples)
295 : tuple_desc_map_(tuple_descs),
296 tuple_idx_nullable_map_(nullable_tuples) {
297 DCHECK_EQ(nullable_tuples.size(), tuple_descs.size());
302 : tuple_desc_map_(1, tuple_desc),
303 tuple_idx_nullable_map_(1, is_nullable) {
346 row_tuple_ids->clear();
382 for (
size_t i = 0; i < thrift_tbl.tableDescriptors.size(); ++i) {
383 const TTableDescriptor& tdesc = thrift_tbl.tableDescriptors[i];
385 switch (tdesc.tableType) {
386 case TTableType::HDFS_TABLE:
389 case TTableType::HBASE_TABLE:
392 case TTableType::DATA_SOURCE_TABLE:
396 DCHECK(
false) <<
"invalid table type: " << tdesc.tableType;
398 (*tbl)->tbl_desc_map_[tdesc.id] = desc;
401 for (
size_t i = 0; i < thrift_tbl.tupleDescriptors.size(); ++i) {
402 const TTupleDescriptor& tdesc = thrift_tbl.tupleDescriptors[i];
405 if (tdesc.__isset.tableId) {
406 desc->
table_desc_ = (*tbl)->GetTableDescriptor(tdesc.tableId);
409 (*tbl)->tuple_desc_map_[tdesc.id] = desc;
412 for (
size_t i = 0; i < thrift_tbl.slotDescriptors.size(); ++i) {
413 const TSlotDescriptor& tdesc = thrift_tbl.slotDescriptors[i];
415 (*tbl)->slot_desc_map_[tdesc.id] = slot_d;
418 TupleDescriptorMap::iterator entry = (*tbl)->tuple_desc_map_.find(tdesc.parent);
419 if (entry == (*tbl)->tuple_desc_map_.end()) {
420 return Status(
"unknown tid in slot descriptor msg");
422 entry->second->AddSlot(slot_d);
429 TableDescriptorMap::const_iterator i =
tbl_desc_map_.find(
id);
462 descs->push_back(i->second);
478 PointerType* tuple_ptr_type = PointerType::get(tuple, 0);
488 Function* fn = prototype.GeneratePrototype(&builder, &tuple_ptr);
490 Value* null_byte_ptr = builder.CreateStructGEP(tuple_ptr, byte_offset,
"null_byte_ptr");
491 Value* null_byte = builder.CreateLoad(null_byte_ptr,
"null_byte");
492 Value* null_mask = builder.CreateAnd(null_byte, mask,
"null_mask");
493 Value* is_null = builder.CreateICmpNE(null_mask, zero,
"is_null");
494 builder.CreateRet(is_null);
511 StructType* tuple,
bool set_null) {
515 PointerType* tuple_ptr_type = PointerType::get(tuple, 0);
522 Function* fn = prototype.GeneratePrototype(&builder, &tuple_ptr);
524 Value* null_byte_ptr =
525 builder.CreateStructGEP(
527 Value* null_byte = builder.CreateLoad(null_byte_ptr,
"null_byte");
528 Value* result = NULL;
533 result = builder.CreateOr(null_byte, null_set);
535 Value* null_clear_val =
537 result = builder.CreateAnd(null_byte, null_clear_val);
540 builder.CreateStore(result, null_byte_ptr);
541 builder.CreateRetVoid();
561 vector<Type*> struct_fields;
568 for (
int i = 0; i <
slots().size(); ++i) {
573 DCHECK_LT(slot_desc->
field_idx(), struct_fields.size());
579 StructType* tuple_struct = StructType::get(codegen->
context(),
580 ArrayRef<Type*>(struct_fields));
586 const DataLayout* data_layout = codegen->
execution_engine()->getDataLayout();
587 const StructLayout* layout = data_layout->getStructLayout(tuple_struct);
588 if (layout->getSizeInBytes() !=
byte_size()) {
589 DCHECK_EQ(layout->getSizeInBytes(),
byte_size());
592 for (
int i = 0; i <
slots().size(); ++i) {
598 if (layout->getElementOffset(field_idx) != slot_desc->
tuple_offset()) {
599 DCHECK_EQ(layout->getElementOffset(field_idx), slot_desc->
tuple_offset());
613 out << i->second->DebugString() <<
'\n';
std::string null_partition_key_value_
virtual std::string DebugString() const
std::string DebugString() const
Status OpenExprs(RuntimeState *state)
std::string table_name_
native name of hbase table
static bool ColPathLessThan(const SlotDescriptor *a, const SlotDescriptor *b)
SlotDescriptorMap slot_desc_map_
llvm::Function * set_null_fn_
std::vector< SlotDescriptor * > string_slots_
Status PrepareExprs(RuntimeState *state)
Utility struct that wraps a variable name and llvm type.
static Status Open(const std::vector< ExprContext * > &ctxs, RuntimeState *state)
Convenience function for opening multiple expr trees.
virtual std::string DebugString() const
const NullIndicatorOffset null_indicator_offset_
TableDescriptor * GetTableDescriptor(TableId id) const
#define RETURN_IF_ERROR(stmt)
some generally useful macros
int num_materialized_slots_
std::vector< bool > tuple_idx_nullable_map_
tuple_idx_nullable_map_[i] is true if tuple i can be null
TableDescriptorMap tbl_desc_map_
std::vector< std::string > col_names_
const std::vector< int > & col_path() const
const std::vector< SlotDescriptor * > & slots() const
std::vector< int > tuple_idx_map_
map from TupleId to position of tuple w/in row
HdfsPartitionDescriptor(const THdfsPartition &thrift_partition, ObjectPool *pool)
std::string null_column_value_
Special string to indicate NULL values in text-encoded columns.
llvm::Function * is_null_fn_
Cached codegen'd functions.
llvm::StructType * GenerateLlvmStruct(LlvmCodeGen *codegen)
TupleDescriptor * GetTupleDescriptor(TupleId id) const
bool TupleIsNullable(int tuple_idx) const
Return true if the Tuple of the given Tuple index is nullable.
static const int INVALID_IDX
ObjectPool * object_pool_
Owned by DescriptorTbl.
static void Close(const std::vector< ExprContext * > &ctxs, RuntimeState *state)
Convenience function for closing multiple expr trees.
TableDescriptor(const TTableDescriptor &tdesc)
LLVM code generator. This is the top level object to generate jitted code.
std::string DebugString() const
HBaseTableDescriptor(const TTableDescriptor &tdesc)
llvm::StructType * llvm_struct_
ObjectPool * object_pool_
void AddArgument(const NamedVariable &var)
Add argument.
std::string hdfs_base_dir_
llvm::Function * set_not_null_fn_
bool IsPrefixOf(const RowDescriptor &other_desc) const
void CloseExprs(RuntimeState *state)
const ColumnType & type() const
llvm::Function * CodegenIsNull(LlvmCodeGen *, llvm::StructType *tuple)
llvm::ExecutionEngine * execution_engine()
Returns execution engine interface.
virtual std::string DebugString() const
std::string DebugString(const T &val)
bool Equals(const RowDescriptor &other_desc) const
Return true if the tuple ids of this descriptor match tuple ids of other desc.
std::vector< SlotDescriptor * > slots_
void ToThrift(std::vector< TTupleId > *row_tuple_ids)
Populate row_tuple_ids with our ids.
std::string DebugString() const
std::vector< TupleDescriptor * > tuple_desc_map_
map from position of tuple w/in row to its descriptor
llvm::Function * CodegenUpdateNull(LlvmCodeGen *, llvm::StructType *tuple, bool set_null)
std::string DebugString() const
Base class for table descriptors.
bool IsAnyTupleNullable() const
Return true if any Tuple of the row is nullable.
int GetTupleIdx(TupleId id) const
Returns INVALID_IDX if id not part of this row.
std::string DebugString() const
TupleDescriptor(const TTupleDescriptor &tdesc)
ostream & operator<<(ostream &os, const NullIndicatorOffset &null_indicator)
bool exprs_prepared_
True if PrepareExprs has been called, to prevent repeating expensive codegen.
std::vector< ExprContext * > partition_key_value_ctxs_
static Status CreateExprTree(ObjectPool *pool, const TExpr &texpr, ExprContext **ctx)
void InitTupleIdxMap()
Initializes tupleIdxMap during c'tor using the tuple_desc_map_.
MemTracker * instance_mem_tracker()
PartitionIdToDescriptorMap partition_descriptors_
std::string avro_schema_
Set to the table's Avro schema if this is an Avro table, empty string otherwise.
virtual std::string DebugString() const
SlotDescriptor * GetSlotDescriptor(SlotId id) const
void AddSlot(SlotDescriptor *slot)
llvm::Type * GetType(const ColumnType &type)
Returns llvm type for the column type.
std::string DebugString() const
int field_idx() const
Returns the field index in the generated llvm struct for this slot's tuple.
Metadata for a single partition inside an Hdfs table.
static Status Create(ObjectPool *pool, const TDescriptorTable &thrift_tbl, DescriptorTbl **tbl)
llvm::Value * GetIntConstant(PrimitiveType type, int64_t val)
Returns the constant 'val' of 'type'.
llvm::Function * FinalizeFunction(llvm::Function *function)
Descriptor for a DataSourceTable.
bool is_materialized() const
std::vector< HBaseColumnDescriptor > cols_
List of family/qualifier pairs.
static Status Prepare(const std::vector< ExprContext * > &ctxs, RuntimeState *state, const RowDescriptor &row_desc, MemTracker *tracker)
RowDescriptor()
dummy descriptor, needed for the JNI EvalPredicate() function
TableDescriptor * table_desc_
TupleDescriptorMap tuple_desc_map_
const std::vector< int > col_path_
HdfsTableDescriptor(const TTableDescriptor &tdesc, ObjectPool *pool)
THdfsFileFormat::type file_format_
The format (e.g. text, sequence file etc.) of data in the files in this partition.
llvm::LLVMContext & context()
std::string DebugString() const
void GetTupleDescs(std::vector< TupleDescriptor * > *descs) const
return all registered tuple descriptors
const int num_null_bytes_