Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
descriptors.cc
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "runtime/descriptors.h"
16 
17 #include <boost/algorithm/string/join.hpp>
18 #include <ios>
19 #include <sstream>
20 
21 #include <llvm/ExecutionEngine/ExecutionEngine.h>
22 #include <llvm/IR/DataLayout.h>
23 
24 #include "codegen/llvm-codegen.h"
25 #include "common/object-pool.h"
26 #include "gen-cpp/Descriptors_types.h"
27 #include "gen-cpp/PlanNodes_types.h"
28 #include "exprs/expr.h"
29 #include "runtime/runtime-state.h"
30 
31 #include "common/names.h"
32 
33 using boost::algorithm::join;
34 using namespace llvm;
35 
36 namespace impala {
37 
39  stringstream out;
40  out << "(offset=" << byte_offset
41  << " mask=" << hex << static_cast<int>(bit_mask) << dec << ")";
42  return out.str();
43 }
44 
45 ostream& operator<<(ostream& os, const NullIndicatorOffset& null_indicator) {
46  os << null_indicator.DebugString();
47  return os;
48 }
49 
50 SlotDescriptor::SlotDescriptor(const TSlotDescriptor& tdesc)
51  : id_(tdesc.id),
52  type_(tdesc.slotType),
53  parent_(tdesc.parent),
54  col_path_(tdesc.columnPath),
55  tuple_offset_(tdesc.byteOffset),
56  null_indicator_offset_(tdesc.nullIndicatorByte, tdesc.nullIndicatorBit),
57  slot_idx_(tdesc.slotIdx),
58  slot_size_(type_.GetByteSize()),
59  field_idx_(-1),
60  is_materialized_(tdesc.isMaterialized),
61  is_null_fn_(NULL),
62  set_not_null_fn_(NULL),
63  set_null_fn_(NULL) {
64 }
65 
67  int common_levels = min(a->col_path().size(), b->col_path().size());
68  for (int i = 0; i < common_levels; ++i) {
69  if (a->col_path()[i] == b->col_path()[i]) continue;
70  return a->col_path()[i] < b->col_path()[i];
71  }
72  return a->col_path().size() < b->col_path().size();
73 }
74 
76  stringstream out;
77  out << "Slot(id=" << id_ << " type=" << type_.DebugString()
78  << " col_path=[";
79  if (col_path_.size() > 0) out << col_path_[0];
80  for (int i = 1; i < col_path_.size(); ++i) {
81  out << ",";
82  out << col_path_[i];
83  }
84  out << "]"
85  << " offset=" << tuple_offset_ << " null=" << null_indicator_offset_.DebugString()
86  << " slot_idx=" << slot_idx_ << " field_idx=" << field_idx_
87  << ")";
88  return out.str();
89 }
90 
91 TableDescriptor::TableDescriptor(const TTableDescriptor& tdesc)
92  : name_(tdesc.tableName),
93  database_(tdesc.dbName),
94  id_(tdesc.id),
95  num_cols_(tdesc.numCols),
96  num_clustering_cols_(tdesc.numClusteringCols),
97  col_names_(tdesc.colNames) {
98 }
99 
101  stringstream out;
102  out << "#cols=" << num_cols_ << " #clustering_cols=" << num_clustering_cols_;
103  out << " col_names=[";
104  out << join(col_names_, ":");
105  out << "]";
106  return out.str();
107 }
108 
109 HdfsPartitionDescriptor::HdfsPartitionDescriptor(const THdfsPartition& thrift_partition,
110  ObjectPool* pool)
111  : line_delim_(thrift_partition.lineDelim),
112  field_delim_(thrift_partition.fieldDelim),
113  collection_delim_(thrift_partition.collectionDelim),
114  escape_char_(thrift_partition.escapeChar),
115  block_size_(thrift_partition.blockSize),
116  location_(thrift_partition.location),
117  id_(thrift_partition.id),
118  exprs_prepared_(false),
119  exprs_opened_(false),
120  exprs_closed_(false),
121  file_format_(thrift_partition.fileFormat),
122  object_pool_(pool) {
123 
124  for (int i = 0; i < thrift_partition.partitionKeyExprs.size(); ++i) {
125  ExprContext* ctx;
126  // TODO: Move to dedicated Init method and treat Status return correctly
128  thrift_partition.partitionKeyExprs[i], &ctx);
129  DCHECK(status.ok());
130  partition_key_value_ctxs_.push_back(ctx);
131  }
132 }
133 
135  if (!exprs_prepared_) {
136  // TODO: RowDescriptor should arguably be optional in Prepare for known literals
137  exprs_prepared_ = true;
138  // Partition exprs are not used in the codegen case. Don't codegen them.
140  state->instance_mem_tracker()));
141  }
142  return Status::OK;
143 }
144 
146  if (exprs_opened_) return Status::OK;
147  exprs_opened_ = true;
148  return Expr::Open(partition_key_value_ctxs_, state);
149 }
150 
152  if (exprs_closed_) return;
153  exprs_closed_ = true;
155 }
156 
158  stringstream out;
159  out << " file_format=" << file_format_ << "'"
160  << " line_delim='" << line_delim_ << "'"
161  << " field_delim='" << field_delim_ << "'"
162  << " coll_delim='" << collection_delim_ << "'"
163  << " escape_char='" << escape_char_ << "')";
164  return out.str();
165 }
166 
168  stringstream out;
169  out << "DataSourceTable(" << TableDescriptor::DebugString() << ")";
170  return out.str();
171 }
172 
173 HdfsTableDescriptor::HdfsTableDescriptor(const TTableDescriptor& tdesc,
174  ObjectPool* pool)
175  : TableDescriptor(tdesc),
176  hdfs_base_dir_(tdesc.hdfsTable.hdfsBaseDir),
177  null_partition_key_value_(tdesc.hdfsTable.nullPartitionKeyValue),
178  null_column_value_(tdesc.hdfsTable.nullColumnValue),
179  object_pool_(pool) {
180  map<int64_t, THdfsPartition>::const_iterator it;
181  for (it = tdesc.hdfsTable.partitions.begin(); it != tdesc.hdfsTable.partitions.end();
182  ++it) {
183  HdfsPartitionDescriptor* partition = new HdfsPartitionDescriptor(it->second, pool);
184  object_pool_->Add(partition);
185  partition_descriptors_[it->first] = partition;
186  }
187  avro_schema_ = tdesc.hdfsTable.__isset.avroSchema ? tdesc.hdfsTable.avroSchema : "";
188 }
189 
191  stringstream out;
192  out << "HdfsTable(" << TableDescriptor::DebugString()
193  << " hdfs_base_dir='" << hdfs_base_dir_ << "'";
194  out << " partitions=[";
195  vector<string> partition_strings;
196  map<int64_t, HdfsPartitionDescriptor*>::const_iterator it;
197  for (it = partition_descriptors_.begin(); it != partition_descriptors_.end(); ++it) {
198  stringstream s;
199  s << " (id: " << it->first << ", partition: " << it->second->DebugString() << ")";
200  partition_strings.push_back(s.str());
201  }
202  out << join(partition_strings, ",") << "]";
203 
204  out << " null_partition_key_value='" << null_partition_key_value_ << "'";
205  out << " null_column_value='" << null_column_value_ << "'";
206  return out.str();
207 }
208 
209 HBaseTableDescriptor::HBaseTableDescriptor(const TTableDescriptor& tdesc)
210  : TableDescriptor(tdesc),
211  table_name_(tdesc.hbaseTable.tableName) {
212  for (int i = 0; i < tdesc.hbaseTable.families.size(); ++i) {
213  bool is_binary_encoded = tdesc.hbaseTable.__isset.binary_encoded &&
214  tdesc.hbaseTable.binary_encoded[i];
216  tdesc.hbaseTable.families[i], tdesc.hbaseTable.qualifiers[i], is_binary_encoded));
217  }
218 }
219 
221  stringstream out;
222  out << "HBaseTable(" << TableDescriptor::DebugString() << " table=" << table_name_;
223  out << " cols=[";
224  for (int i = 0; i < cols_.size(); ++i) {
225  out << (i > 0 ? " " : "") << cols_[i].family << ":" << cols_[i].qualifier << ":"
226  << cols_[i].binary_encoded;
227  }
228  out << "])";
229  return out.str();
230 }
231 
232 TupleDescriptor::TupleDescriptor(const TTupleDescriptor& tdesc)
233  : id_(tdesc.id),
234  table_desc_(NULL),
235  byte_size_(tdesc.byteSize),
236  num_null_bytes_(tdesc.numNullBytes),
237  num_materialized_slots_(0),
238  slots_(),
239  llvm_struct_(NULL) {
240 }
241 
243  slots_.push_back(slot);
244  if (slot->type().IsVarLen() && slot->is_materialized()) {
245  string_slots_.push_back(slot);
246  }
248 }
249 
251  stringstream out;
252  out << "Tuple(id=" << id_ << " size=" << byte_size_;
253  if (table_desc_ != NULL) {
254  //out << " " << table_desc_->DebugString();
255  }
256  out << " slots=[";
257  for (size_t i = 0; i < slots_.size(); ++i) {
258  if (i > 0) out << ", ";
259  out << slots_[i]->DebugString();
260  }
261  out << "]";
262  out << ")";
263  return out.str();
264 }
265 
267  const vector<TTupleId>& row_tuples,
268  const vector<bool>& nullable_tuples)
269  : tuple_idx_nullable_map_(nullable_tuples) {
270  DCHECK_EQ(nullable_tuples.size(), row_tuples.size());
271  for (int i = 0; i < row_tuples.size(); ++i) {
272  tuple_desc_map_.push_back(desc_tbl.GetTupleDescriptor(row_tuples[i]));
273  DCHECK(tuple_desc_map_.back() != NULL);
274  }
275  InitTupleIdxMap();
276 }
277 
279  const RowDescriptor& rhs_row_desc) {
280  tuple_desc_map_.insert(tuple_desc_map_.end(), lhs_row_desc.tuple_desc_map_.begin(),
281  lhs_row_desc.tuple_desc_map_.end());
282  tuple_desc_map_.insert(tuple_desc_map_.end(), rhs_row_desc.tuple_desc_map_.begin(),
283  rhs_row_desc.tuple_desc_map_.end());
285  lhs_row_desc.tuple_idx_nullable_map_.begin(),
286  lhs_row_desc.tuple_idx_nullable_map_.end());
288  rhs_row_desc.tuple_idx_nullable_map_.begin(),
289  rhs_row_desc.tuple_idx_nullable_map_.end());
290  InitTupleIdxMap();
291 }
292 
293 RowDescriptor::RowDescriptor(const vector<TupleDescriptor*>& tuple_descs,
294  const vector<bool>& nullable_tuples)
295  : tuple_desc_map_(tuple_descs),
296  tuple_idx_nullable_map_(nullable_tuples) {
297  DCHECK_EQ(nullable_tuples.size(), tuple_descs.size());
298  InitTupleIdxMap();
299 }
300 
301 RowDescriptor::RowDescriptor(TupleDescriptor* tuple_desc, bool is_nullable)
302  : tuple_desc_map_(1, tuple_desc),
303  tuple_idx_nullable_map_(1, is_nullable) {
304  InitTupleIdxMap();
305 }
306 
308  // find max id
309  TupleId max_id = 0;
310  for (int i = 0; i < tuple_desc_map_.size(); ++i) {
311  max_id = max(tuple_desc_map_[i]->id(), max_id);
312  }
313 
314  tuple_idx_map_.resize(max_id + 1, INVALID_IDX);
315  for (int i = 0; i < tuple_desc_map_.size(); ++i) {
316  tuple_idx_map_[tuple_desc_map_[i]->id()] = i;
317  }
318 }
319 
321  int size = 0;
322  for (int i = 0; i < tuple_desc_map_.size(); ++i) {
323  size += tuple_desc_map_[i]->byte_size();
324  }
325  return size;
326 }
327 
329  DCHECK_LT(id, tuple_idx_map_.size()) << "RowDescriptor: " << DebugString();
330  return tuple_idx_map_[id];
331 }
332 
333 bool RowDescriptor::TupleIsNullable(int tuple_idx) const {
334  DCHECK_LT(tuple_idx, tuple_idx_nullable_map_.size());
335  return tuple_idx_nullable_map_[tuple_idx];
336 }
337 
339  for (int i = 0; i < tuple_idx_nullable_map_.size(); ++i) {
340  if (tuple_idx_nullable_map_[i]) return true;
341  }
342  return false;
343 }
344 
345 void RowDescriptor::ToThrift(vector<TTupleId>* row_tuple_ids) {
346  row_tuple_ids->clear();
347  for (int i = 0; i < tuple_desc_map_.size(); ++i) {
348  row_tuple_ids->push_back(tuple_desc_map_[i]->id());
349  }
350 }
351 
352 bool RowDescriptor::IsPrefixOf(const RowDescriptor& other_desc) const {
353  if (tuple_desc_map_.size() > other_desc.tuple_desc_map_.size()) return false;
354  for (int i = 0; i < tuple_desc_map_.size(); ++i) {
355  // pointer comparison okay, descriptors are unique
356  if (tuple_desc_map_[i] != other_desc.tuple_desc_map_[i]) return false;
357  }
358  return true;
359 }
360 
361 bool RowDescriptor::Equals(const RowDescriptor& other_desc) const {
362  if (tuple_desc_map_.size() != other_desc.tuple_desc_map_.size()) return false;
363  for (int i = 0; i < tuple_desc_map_.size(); ++i) {
364  // pointer comparison okay, descriptors are unique
365  if (tuple_desc_map_[i] != other_desc.tuple_desc_map_[i]) return false;
366  }
367  return true;
368 }
369 
371  stringstream ss;
372  for (int i = 0; i < tuple_desc_map_.size(); ++i) {
373  ss << tuple_desc_map_[i]->DebugString() << endl;
374  }
375  return ss.str();
376 }
377 
378 Status DescriptorTbl::Create(ObjectPool* pool, const TDescriptorTable& thrift_tbl,
379  DescriptorTbl** tbl) {
380  *tbl = pool->Add(new DescriptorTbl());
381  // deserialize table descriptors first, they are being referenced by tuple descriptors
382  for (size_t i = 0; i < thrift_tbl.tableDescriptors.size(); ++i) {
383  const TTableDescriptor& tdesc = thrift_tbl.tableDescriptors[i];
384  TableDescriptor* desc = NULL;
385  switch (tdesc.tableType) {
386  case TTableType::HDFS_TABLE:
387  desc = pool->Add(new HdfsTableDescriptor(tdesc, pool));
388  break;
389  case TTableType::HBASE_TABLE:
390  desc = pool->Add(new HBaseTableDescriptor(tdesc));
391  break;
392  case TTableType::DATA_SOURCE_TABLE:
393  desc = pool->Add(new DataSourceTableDescriptor(tdesc));
394  break;
395  default:
396  DCHECK(false) << "invalid table type: " << tdesc.tableType;
397  }
398  (*tbl)->tbl_desc_map_[tdesc.id] = desc;
399  }
400 
401  for (size_t i = 0; i < thrift_tbl.tupleDescriptors.size(); ++i) {
402  const TTupleDescriptor& tdesc = thrift_tbl.tupleDescriptors[i];
403  TupleDescriptor* desc = pool->Add(new TupleDescriptor(tdesc));
404  // fix up table pointer
405  if (tdesc.__isset.tableId) {
406  desc->table_desc_ = (*tbl)->GetTableDescriptor(tdesc.tableId);
407  DCHECK(desc->table_desc_ != NULL);
408  }
409  (*tbl)->tuple_desc_map_[tdesc.id] = desc;
410  }
411 
412  for (size_t i = 0; i < thrift_tbl.slotDescriptors.size(); ++i) {
413  const TSlotDescriptor& tdesc = thrift_tbl.slotDescriptors[i];
414  SlotDescriptor* slot_d = pool->Add(new SlotDescriptor(tdesc));
415  (*tbl)->slot_desc_map_[tdesc.id] = slot_d;
416 
417  // link to parent
418  TupleDescriptorMap::iterator entry = (*tbl)->tuple_desc_map_.find(tdesc.parent);
419  if (entry == (*tbl)->tuple_desc_map_.end()) {
420  return Status("unknown tid in slot descriptor msg");
421  }
422  entry->second->AddSlot(slot_d);
423  }
424  return Status::OK;
425 }
426 
428  // TODO: is there some boost function to do exactly this?
429  TableDescriptorMap::const_iterator i = tbl_desc_map_.find(id);
430  if (i == tbl_desc_map_.end()) {
431  return NULL;
432  } else {
433  return i->second;
434  }
435 }
436 
438  // TODO: is there some boost function to do exactly this?
439  TupleDescriptorMap::const_iterator i = tuple_desc_map_.find(id);
440  if (i == tuple_desc_map_.end()) {
441  return NULL;
442  } else {
443  return i->second;
444  }
445 }
446 
448  // TODO: is there some boost function to do exactly this?
449  SlotDescriptorMap::const_iterator i = slot_desc_map_.find(id);
450  if (i == slot_desc_map_.end()) {
451  return NULL;
452  } else {
453  return i->second;
454  }
455 }
456 
457 // return all registered tuple descriptors
458 void DescriptorTbl::GetTupleDescs(vector<TupleDescriptor*>* descs) const {
459  descs->clear();
460  for (TupleDescriptorMap::const_iterator i = tuple_desc_map_.begin();
461  i != tuple_desc_map_.end(); ++i) {
462  descs->push_back(i->second);
463  }
464 }
465 
466 // Generate function to check if a slot is null. The resulting IR looks like:
467 // (in this case the tuple contains only a nullable double)
468 // define i1 @IsNull({ i8, double }* %tuple) {
469 // entry:
470 // %null_byte_ptr = getelementptr inbounds { i8, double }* %tuple, i32 0, i32 0
471 // %null_byte = load i8* %null_byte_ptr
472 // %null_mask = and i8 %null_byte, 1
473 // %is_null = icmp ne i8 %null_mask, 0
474 // ret i1 %is_null
475 // }
476 Function* SlotDescriptor::CodegenIsNull(LlvmCodeGen* codegen, StructType* tuple) {
477  if (is_null_fn_ != NULL) return is_null_fn_;
478  PointerType* tuple_ptr_type = PointerType::get(tuple, 0);
479  LlvmCodeGen::FnPrototype prototype(codegen, "IsNull", codegen->GetType(TYPE_BOOLEAN));
480  prototype.AddArgument(LlvmCodeGen::NamedVariable("tuple", tuple_ptr_type));
481 
483  Value* zero = codegen->GetIntConstant(TYPE_TINYINT, 0);
484  int byte_offset = null_indicator_offset_.byte_offset;
485 
486  LlvmCodeGen::LlvmBuilder builder(codegen->context());
487  Value* tuple_ptr;
488  Function* fn = prototype.GeneratePrototype(&builder, &tuple_ptr);
489 
490  Value* null_byte_ptr = builder.CreateStructGEP(tuple_ptr, byte_offset, "null_byte_ptr");
491  Value* null_byte = builder.CreateLoad(null_byte_ptr, "null_byte");
492  Value* null_mask = builder.CreateAnd(null_byte, mask, "null_mask");
493  Value* is_null = builder.CreateICmpNE(null_mask, zero, "is_null");
494  builder.CreateRet(is_null);
495 
496  return is_null_fn_ = codegen->FinalizeFunction(fn);
497 }
498 
499 // Generate function to set a slot to be null or not-null. The resulting IR
500 // for SetNotNull looks like:
501 // (in this case the tuple contains only a nullable double)
502 // define void @SetNotNull({ i8, double }* %tuple) {
503 // entry:
504 // %null_byte_ptr = getelementptr inbounds { i8, double }* %tuple, i32 0, i32 0
505 // %null_byte = load i8* %null_byte_ptr
506 // %0 = and i8 %null_byte, -2
507 // store i8 %0, i8* %null_byte_ptr
508 // ret void
509 // }
511  StructType* tuple, bool set_null) {
512  if (set_null && set_null_fn_ != NULL) return set_null_fn_;
513  if (!set_null && set_not_null_fn_ != NULL) return set_not_null_fn_;
514 
515  PointerType* tuple_ptr_type = PointerType::get(tuple, 0);
516  LlvmCodeGen::FnPrototype prototype(codegen, (set_null) ? "SetNull" :"SetNotNull",
517  codegen->void_type());
518  prototype.AddArgument(LlvmCodeGen::NamedVariable("tuple", tuple_ptr_type));
519 
520  LlvmCodeGen::LlvmBuilder builder(codegen->context());
521  Value* tuple_ptr;
522  Function* fn = prototype.GeneratePrototype(&builder, &tuple_ptr);
523 
524  Value* null_byte_ptr =
525  builder.CreateStructGEP(
526  tuple_ptr, null_indicator_offset_.byte_offset, "null_byte_ptr");
527  Value* null_byte = builder.CreateLoad(null_byte_ptr, "null_byte");
528  Value* result = NULL;
529 
530  if (set_null) {
531  Value* null_set = codegen->GetIntConstant(
533  result = builder.CreateOr(null_byte, null_set);
534  } else {
535  Value* null_clear_val =
537  result = builder.CreateAnd(null_byte, null_clear_val);
538  }
539 
540  builder.CreateStore(result, null_byte_ptr);
541  builder.CreateRetVoid();
542 
543  fn = codegen->FinalizeFunction(fn);
544  if (set_null) {
545  set_null_fn_ = fn;
546  } else {
547  set_not_null_fn_ = fn;
548  }
549  return fn;
550 }
551 
552 // The default llvm packing is identical to what we do in the FE. Each field is aligned
553 // to begin on the size for that type.
554 // TODO: Understand llvm::SetTargetData which allows you to explicitly define the packing
555 // rules.
557  // If we already generated the llvm type, just return it.
558  if (llvm_struct_ != NULL) return llvm_struct_;
559 
560  // For each null byte, add a byte to the struct
561  vector<Type*> struct_fields;
562  struct_fields.resize(num_null_bytes_ + num_materialized_slots_);
563  for (int i = 0; i < num_null_bytes_; ++i) {
564  struct_fields[i] = codegen->GetType(TYPE_TINYINT);
565  }
566 
567  // Add the slot types to the struct description.
568  for (int i = 0; i < slots().size(); ++i) {
569  SlotDescriptor* slot_desc = slots()[i];
570  if (slot_desc->type().type == TYPE_CHAR) return NULL;
571  if (slot_desc->is_materialized()) {
572  slot_desc->field_idx_ = slot_desc->slot_idx_ + num_null_bytes_;
573  DCHECK_LT(slot_desc->field_idx(), struct_fields.size());
574  struct_fields[slot_desc->field_idx()] = codegen->GetType(slot_desc->type());
575  }
576  }
577 
578  // Construct the struct type.
579  StructType* tuple_struct = StructType::get(codegen->context(),
580  ArrayRef<Type*>(struct_fields));
581 
582  // Verify the alignment is correct. It is essential that the layout matches
583  // identically. If the layout does not match, return NULL indicating the
584  // struct could not be codegen'd. This will trigger codegen for anything using
585  // the tuple to be disabled.
586  const DataLayout* data_layout = codegen->execution_engine()->getDataLayout();
587  const StructLayout* layout = data_layout->getStructLayout(tuple_struct);
588  if (layout->getSizeInBytes() != byte_size()) {
589  DCHECK_EQ(layout->getSizeInBytes(), byte_size());
590  return NULL;
591  }
592  for (int i = 0; i < slots().size(); ++i) {
593  SlotDescriptor* slot_desc = slots()[i];
594  if (slot_desc->is_materialized()) {
595  int field_idx = slot_desc->field_idx();
596  // Verify that the byte offset in the llvm struct matches the tuple offset
597  // computed in the FE
598  if (layout->getElementOffset(field_idx) != slot_desc->tuple_offset()) {
599  DCHECK_EQ(layout->getElementOffset(field_idx), slot_desc->tuple_offset());
600  return NULL;
601  }
602  }
603  }
604  llvm_struct_ = tuple_struct;
605  return tuple_struct;
606 }
607 
609  stringstream out;
610  out << "tuples:\n";
611  for (TupleDescriptorMap::const_iterator i = tuple_desc_map_.begin();
612  i != tuple_desc_map_.end(); ++i) {
613  out << i->second->DebugString() << '\n';
614  }
615  return out.str();
616 }
617 
618 }
std::string null_partition_key_value_
Definition: descriptors.h:253
bool IsVarLen() const
Definition: types.h:172
virtual std::string DebugString() const
Definition: descriptors.cc:190
int GetRowSize() const
Definition: descriptors.cc:320
std::string DebugString() const
Definition: descriptors.cc:370
Status OpenExprs(RuntimeState *state)
Definition: descriptors.cc:145
std::string table_name_
native name of hbase table
Definition: descriptors.h:285
static bool ColPathLessThan(const SlotDescriptor *a, const SlotDescriptor *b)
Definition: descriptors.cc:66
SlotDescriptorMap slot_desc_map_
Definition: descriptors.h:361
llvm::Function * set_null_fn_
Definition: descriptors.h:142
std::vector< SlotDescriptor * > string_slots_
Definition: descriptors.h:331
Status PrepareExprs(RuntimeState *state)
Definition: descriptors.cc:134
Utility struct that wraps a variable name and llvm type.
Definition: llvm-codegen.h:149
static Status Open(const std::vector< ExprContext * > &ctxs, RuntimeState *state)
Convenience function for opening multiple expr trees.
int TableId
Definition: global-types.h:25
virtual std::string DebugString() const
Definition: descriptors.cc:100
const NullIndicatorOffset null_indicator_offset_
Definition: descriptors.h:123
TableDescriptor * GetTableDescriptor(TableId id) const
Definition: descriptors.cc:427
#define RETURN_IF_ERROR(stmt)
some generally useful macros
Definition: status.h:242
std::vector< bool > tuple_idx_nullable_map_
tuple_idx_nullable_map_[i] is true if tuple i can be null
Definition: descriptors.h:436
TableDescriptorMap tbl_desc_map_
Definition: descriptors.h:359
std::vector< std::string > col_names_
Definition: descriptors.h:173
const std::vector< int > & col_path() const
Definition: descriptors.h:85
const std::vector< SlotDescriptor * > & slots() const
Definition: descriptors.h:302
std::vector< int > tuple_idx_map_
map from TupleId to position of tuple w/in row
Definition: descriptors.h:439
int byte_size() const
Definition: descriptors.h:300
HdfsPartitionDescriptor(const THdfsPartition &thrift_partition, ObjectPool *pool)
Definition: descriptors.cc:109
std::string null_column_value_
Special string to indicate NULL values in text-encoded columns.
Definition: descriptors.h:255
llvm::Function * is_null_fn_
Cached codegen'd functions.
Definition: descriptors.h:140
llvm::StructType * GenerateLlvmStruct(LlvmCodeGen *codegen)
Definition: descriptors.cc:556
int TupleId
Definition: global-types.h:23
TupleDescriptor * GetTupleDescriptor(TupleId id) const
Definition: descriptors.cc:437
bool TupleIsNullable(int tuple_idx) const
Return true if the Tuple of the given Tuple index is nullable.
Definition: descriptors.cc:333
static const int INVALID_IDX
Definition: descriptors.h:400
ObjectPool * object_pool_
Owned by DescriptorTbl.
Definition: descriptors.h:260
static void Close(const std::vector< ExprContext * > &ctxs, RuntimeState *state)
Convenience function for closing multiple expr trees.
TableDescriptor(const TTableDescriptor &tdesc)
Definition: descriptors.cc:91
LLVM code generator. This is the top level object to generate jitted code.
Definition: llvm-codegen.h:107
std::string DebugString() const
Definition: types.cc:194
HBaseTableDescriptor(const TTableDescriptor &tdesc)
Definition: descriptors.cc:209
llvm::StructType * llvm_struct_
Definition: descriptors.h:332
PrimitiveType type
Definition: types.h:60
void AddArgument(const NamedVariable &var)
Add argument.
Definition: llvm-codegen.h:171
llvm::Function * set_not_null_fn_
Definition: descriptors.h:141
bool IsPrefixOf(const RowDescriptor &other_desc) const
Definition: descriptors.cc:352
void CloseExprs(RuntimeState *state)
Definition: descriptors.cc:151
const ColumnType type_
Definition: descriptors.h:119
const ColumnType & type() const
Definition: descriptors.h:78
llvm::Function * CodegenIsNull(LlvmCodeGen *, llvm::StructType *tuple)
Definition: descriptors.cc:476
llvm::ExecutionEngine * execution_engine()
Returns execution engine interface.
Definition: llvm-codegen.h:217
virtual std::string DebugString() const
Definition: descriptors.cc:167
ObjectPool pool
int SlotId
Definition: global-types.h:24
std::string DebugString(const T &val)
Definition: udf-debug.h:27
bool Equals(const RowDescriptor &other_desc) const
Return true if the tuple ids of this descriptor match tuple ids of other desc.
Definition: descriptors.cc:361
std::vector< SlotDescriptor * > slots_
Definition: descriptors.h:330
void ToThrift(std::vector< TTupleId > *row_tuple_ids)
Populate row_tuple_ids with our ids.
Definition: descriptors.cc:345
std::string DebugString() const
Definition: descriptors.cc:608
std::vector< TupleDescriptor * > tuple_desc_map_
map from position of tuple w/in row to its descriptor
Definition: descriptors.h:433
llvm::Function * CodegenUpdateNull(LlvmCodeGen *, llvm::StructType *tuple, bool set_null)
Definition: descriptors.cc:510
std::string DebugString() const
Definition: descriptors.cc:250
Base class for table descriptors.
Definition: descriptors.h:148
bool IsAnyTupleNullable() const
Return true if any Tuple of the row is nullable.
Definition: descriptors.cc:338
int GetTupleIdx(TupleId id) const
Returns INVALID_IDX if id not part of this row.
Definition: descriptors.cc:328
std::string DebugString() const
Definition: descriptors.cc:38
TupleDescriptor(const TTupleDescriptor &tdesc)
Definition: descriptors.cc:232
ostream & operator<<(ostream &os, const NullIndicatorOffset &null_indicator)
Definition: descriptors.cc:45
bool exprs_prepared_
True if PrepareExprs has been called, to prevent repeating expensive codegen.
Definition: descriptors.h:210
std::vector< ExprContext * > partition_key_value_ctxs_
Definition: descriptors.h:216
static Status CreateExprTree(ObjectPool *pool, const TExpr &texpr, ExprContext **ctx)
Definition: expr.cc:129
void InitTupleIdxMap()
Initializes tupleIdxMap during c'tor using the tuple_desc_map_.
Definition: descriptors.cc:307
MemTracker * instance_mem_tracker()
PartitionIdToDescriptorMap partition_descriptors_
Definition: descriptors.h:256
std::string avro_schema_
Set to the table's Avro schema if this is an Avro table, empty string otherwise.
Definition: descriptors.h:258
virtual std::string DebugString() const
Definition: descriptors.cc:220
SlotDescriptor * GetSlotDescriptor(SlotId id) const
Definition: descriptors.cc:447
static const Status OK
Definition: status.h:87
void AddSlot(SlotDescriptor *slot)
Definition: descriptors.cc:242
llvm::Type * GetType(const ColumnType &type)
Returns llvm type for the column type.
std::string DebugString() const
Definition: descriptors.cc:75
int tuple_offset() const
Definition: descriptors.h:88
int field_idx() const
Returns the field index in the generated llvm struct for this slot's tuple.
Definition: descriptors.h:87
Metadata for a single partition inside an Hdfs table.
Definition: descriptors.h:177
static Status Create(ObjectPool *pool, const TDescriptorTable &thrift_tbl, DescriptorTbl **tbl)
Definition: descriptors.cc:378
llvm::Value * GetIntConstant(PrimitiveType type, int64_t val)
Returns the constant 'val' of 'type'.
llvm::Function * FinalizeFunction(llvm::Function *function)
Descriptor for a DataSourceTable.
Definition: descriptors.h:292
bool is_materialized() const
Definition: descriptors.h:92
std::vector< HBaseColumnDescriptor > cols_
List of family/qualifier pairs.
Definition: descriptors.h:288
static Status Prepare(const std::vector< ExprContext * > &ctxs, RuntimeState *state, const RowDescriptor &row_desc, MemTracker *tracker)
RowDescriptor()
dummy descriptor, needed for the JNI EvalPredicate() function
Definition: descriptors.h:393
TableDescriptor * table_desc_
Definition: descriptors.h:326
bool ok() const
Definition: status.h:172
TupleDescriptorMap tuple_desc_map_
Definition: descriptors.h:360
llvm::Type * void_type()
Definition: llvm-codegen.h:394
const std::vector< int > col_path_
Definition: descriptors.h:121
HdfsTableDescriptor(const TTableDescriptor &tdesc, ObjectPool *pool)
Definition: descriptors.cc:173
THdfsFileFormat::type file_format_
The format (e.g. text, sequence file etc.) of data in the files in this partition.
Definition: descriptors.h:219
llvm::LLVMContext & context()
Definition: llvm-codegen.h:214
std::string DebugString() const
Definition: descriptors.cc:157
void GetTupleDescs(std::vector< TupleDescriptor * > *descs) const
return all registered tuple descriptors
Definition: descriptors.cc:458