Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
descriptors.h
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 
16 #ifndef IMPALA_RUNTIME_DESCRIPTORS_H
17 #define IMPALA_RUNTIME_DESCRIPTORS_H
18 
19 #include <vector>
20 #include <tr1/unordered_map>
21 #include <vector>
22 #include <boost/scoped_ptr.hpp>
23 #include <ostream>
24 
25 #include "common/status.h"
26 #include "common/global-types.h"
27 #include "runtime/types.h"
28 
29 #include "gen-cpp/Descriptors_types.h" // for TTupleId
30 #include "gen-cpp/Types_types.h"
31 
32 namespace llvm {
33  class Function;
34  class PointerType;
35  class StructType;
36 };
37 
38 namespace impala {
39 
40 class LlvmCodeGen;
41 class ObjectPool;
42 class TDescriptorTable;
43 class TSlotDescriptor;
44 class TTable;
45 class TTupleDescriptor;
46 class Expr;
47 class ExprContext;
48 class RuntimeState;
49 
51  llvm::StructType* tuple_struct;
52  llvm::PointerType* tuple_ptr;
53  std::vector<int> indices;
54 };
55 
63  uint8_t bit_mask; // to extract null indicator
64 
65  NullIndicatorOffset(int byte_offset, int bit_offset)
66  : byte_offset(byte_offset),
67  bit_mask(bit_offset == -1 ? 0 : 1 << bit_offset) {
68  }
69 
70  std::string DebugString() const;
71 };
72 
73 std::ostream& operator<<(std::ostream& os, const NullIndicatorOffset& null_indicator);
74 
76  public:
77  SlotId id() const { return id_; }
78  const ColumnType& type() const { return type_; }
79  TupleId parent() const { return parent_; }
84  int col_pos() const { return col_path_[0]; }
85  const std::vector<int>& col_path() const { return col_path_; }
87  int field_idx() const { return field_idx_; }
88  int tuple_offset() const { return tuple_offset_; }
91  }
92  bool is_materialized() const { return is_materialized_; }
93  bool is_nullable() const { return null_indicator_offset_.bit_mask != 0; }
94  int slot_size() const { return slot_size_; }
95 
102  static bool ColPathLessThan(const SlotDescriptor* a, const SlotDescriptor* b);
103 
104  std::string DebugString() const;
105 
108  llvm::Function* CodegenIsNull(LlvmCodeGen*, llvm::StructType* tuple);
109 
112  llvm::Function* CodegenUpdateNull(LlvmCodeGen*, llvm::StructType* tuple, bool set_null);
113 
114  private:
115  friend class DescriptorTbl;
116  friend class TupleDescriptor;
117 
118  const SlotId id_;
121  const std::vector<int> col_path_;
122  const int tuple_offset_;
124 
127  const int slot_idx_;
128 
130  const int slot_size_;
131 
136 
137  const bool is_materialized_;
138 
140  llvm::Function* is_null_fn_;
141  llvm::Function* set_not_null_fn_;
142  llvm::Function* set_null_fn_;
143 
144  SlotDescriptor(const TSlotDescriptor& tdesc);
145 };
146 
149  public:
150  TableDescriptor(const TTableDescriptor& tdesc);
151  virtual ~TableDescriptor() {}
152  int num_cols() const { return num_cols_; }
154  virtual std::string DebugString() const;
155 
158  bool IsClusteringCol(const SlotDescriptor* slot_desc) const {
159  return slot_desc->col_path().size() == 1 &&
160  slot_desc->col_path()[0] < num_clustering_cols_;
161  }
162 
163  const std::string& name() const { return name_; }
164  const std::string& database() const { return database_; }
165  const std::vector<std::string>& col_names() const { return col_names_; }
166 
167  protected:
168  std::string name_;
169  std::string database_;
173  std::vector<std::string> col_names_;
174 };
175 
178  public:
179  HdfsPartitionDescriptor(const THdfsPartition& thrift_partition, ObjectPool* pool);
180  char line_delim() const { return line_delim_; }
181  char field_delim() const { return field_delim_; }
182  char collection_delim() const { return collection_delim_; }
183  char escape_char() const { return escape_char_; }
184  THdfsFileFormat::type file_format() const { return file_format_; }
185  const std::vector<ExprContext*>& partition_key_value_ctxs() const {
187  }
188  int block_size() const { return block_size_; }
189  const std::string& location() const { return location_; }
190  int64_t id() const { return id_; }
191 
195  Status OpenExprs(RuntimeState* state);
196  void CloseExprs(RuntimeState* state);
197 
198  std::string DebugString() const;
199 
200  private:
206  std::string location_;
207  int64_t id_;
208 
213 
216  std::vector<ExprContext*> partition_key_value_ctxs_;
217 
219  THdfsFileFormat::type file_format_;
220 
224 };
225 
227  public:
228  HdfsTableDescriptor(const TTableDescriptor& tdesc, ObjectPool* pool);
229  const std::string& hdfs_base_dir() const { return hdfs_base_dir_; }
230  const std::string& null_partition_key_value() const {
232  }
233  const std::string& null_column_value() const { return null_column_value_; }
234  const std::string& avro_schema() const { return avro_schema_; }
235 
236  typedef std::map<int64_t, HdfsPartitionDescriptor*> PartitionIdToDescriptorMap;
237 
238  HdfsPartitionDescriptor* GetPartition(int64_t partition_id) const {
239  PartitionIdToDescriptorMap::const_iterator it =
240  partition_descriptors_.find(partition_id);
241  if (it == partition_descriptors_.end()) return NULL;
242  return it->second;
243  }
244 
246  return partition_descriptors_;
247  }
248 
249  virtual std::string DebugString() const;
250 
251  protected:
252  std::string hdfs_base_dir_;
255  std::string null_column_value_;
258  std::string avro_schema_;
261 };
262 
264  public:
265  HBaseTableDescriptor(const TTableDescriptor& tdesc);
266  virtual std::string DebugString() const;
267  const std::string table_name() const { return table_name_; }
268 
270  std::string family;
271  std::string qualifier;
273 
274  HBaseColumnDescriptor(const std::string& col_family, const std::string& col_qualifier,
275  bool col_binary_encoded)
276  : family(col_family),
277  qualifier(col_qualifier),
278  binary_encoded(col_binary_encoded){
279  }
280  };
281  const std::vector<HBaseColumnDescriptor>& cols() const { return cols_; }
282 
283  protected:
285  std::string table_name_;
286 
288  std::vector<HBaseColumnDescriptor> cols_;
289 };
290 
293  public:
294  DataSourceTableDescriptor(const TTableDescriptor& tdesc) : TableDescriptor(tdesc) { }
295  virtual std::string DebugString() const;
296 };
297 
299  public:
300  int byte_size() const { return byte_size_; }
301  int num_null_bytes() const { return num_null_bytes_; }
302  const std::vector<SlotDescriptor*>& slots() const { return slots_; }
303  const std::vector<SlotDescriptor*>& string_slots() const { return string_slots_; }
304  const TableDescriptor* table_desc() const { return table_desc_; }
305 
306  TupleId id() const { return id_; }
307  std::string DebugString() const;
308 
320  llvm::StructType* GenerateLlvmStruct(LlvmCodeGen* codegen);
321 
322  protected:
323  friend class DescriptorTbl;
324 
325  const TupleId id_;
327  const int byte_size_;
328  const int num_null_bytes_;
330  std::vector<SlotDescriptor*> slots_; // contains all slots
331  std::vector<SlotDescriptor*> string_slots_; // contains only materialized string slots
332  llvm::StructType* llvm_struct_; // cache for the llvm struct type for this tuple desc
333 
334  TupleDescriptor(const TTupleDescriptor& tdesc);
335  void AddSlot(SlotDescriptor* slot);
336 };
337 
339  public:
342  static Status Create(ObjectPool* pool, const TDescriptorTable& thrift_tbl,
343  DescriptorTbl** tbl);
344 
348 
350  void GetTupleDescs(std::vector<TupleDescriptor*>* descs) const;
351 
352  std::string DebugString() const;
353 
354  private:
355  typedef std::tr1::unordered_map<TableId, TableDescriptor*> TableDescriptorMap;
356  typedef std::tr1::unordered_map<TupleId, TupleDescriptor*> TupleDescriptorMap;
357  typedef std::tr1::unordered_map<SlotId, SlotDescriptor*> SlotDescriptorMap;
358 
362 
364 };
365 
374  public:
375  RowDescriptor(const DescriptorTbl& desc_tbl, const std::vector<TTupleId>& row_tuples,
376  const std::vector<bool>& nullable_tuples);
377 
382  }
383 
385  RowDescriptor(const RowDescriptor& lhs_row_desc, const RowDescriptor& rhs_row_desc);
386 
387  RowDescriptor(const std::vector<TupleDescriptor*>& tuple_descs,
388  const std::vector<bool>& nullable_tuples);
389 
390  RowDescriptor(TupleDescriptor* tuple_desc, bool is_nullable);
391 
394 
398  int GetRowSize() const;
399 
400  static const int INVALID_IDX = -1;
401 
403  int GetTupleIdx(TupleId id) const;
404 
406  bool TupleIsNullable(int tuple_idx) const;
407 
409  bool IsAnyTupleNullable() const;
410 
412  const std::vector<TupleDescriptor*>& tuple_descriptors() const {
413  return tuple_desc_map_;
414  }
415 
417  void ToThrift(std::vector<TTupleId>* row_tuple_ids);
418 
421  bool IsPrefixOf(const RowDescriptor& other_desc) const;
422 
424  bool Equals(const RowDescriptor& other_desc) const;
425 
426  std::string DebugString() const;
427 
428  private:
430  void InitTupleIdxMap();
431 
433  std::vector<TupleDescriptor*> tuple_desc_map_;
434 
436  std::vector<bool> tuple_idx_nullable_map_;
437 
439  std::vector<int> tuple_idx_map_;
440 };
441 
442 }
443 
444 #endif
const std::string & null_column_value() const
Definition: descriptors.h:233
std::string null_partition_key_value_
Definition: descriptors.h:253
const TableDescriptor * table_desc() const
Definition: descriptors.h:304
virtual std::string DebugString() const
Definition: descriptors.cc:190
int GetRowSize() const
Definition: descriptors.cc:320
std::string DebugString() const
Definition: descriptors.cc:370
llvm::PointerType * tuple_ptr
Definition: descriptors.h:52
THdfsFileFormat::type file_format() const
Definition: descriptors.h:184
HBaseColumnDescriptor(const std::string &col_family, const std::string &col_qualifier, bool col_binary_encoded)
Definition: descriptors.h:274
Status OpenExprs(RuntimeState *state)
Definition: descriptors.cc:145
std::string table_name_
native name of hbase table
Definition: descriptors.h:285
static bool ColPathLessThan(const SlotDescriptor *a, const SlotDescriptor *b)
Definition: descriptors.cc:66
SlotDescriptorMap slot_desc_map_
Definition: descriptors.h:361
llvm::Function * set_null_fn_
Definition: descriptors.h:142
DataSourceTableDescriptor(const TTableDescriptor &tdesc)
Definition: descriptors.h:294
const int slot_size_
the byte size of this slot.
Definition: descriptors.h:130
const std::string & database() const
Definition: descriptors.h:164
std::vector< SlotDescriptor * > string_slots_
Definition: descriptors.h:331
Status PrepareExprs(RuntimeState *state)
Definition: descriptors.cc:134
llvm::StructType * tuple_struct
Definition: descriptors.h:51
const std::string table_name() const
Definition: descriptors.h:267
int TableId
Definition: global-types.h:25
virtual std::string DebugString() const
Definition: descriptors.cc:100
const NullIndicatorOffset null_indicator_offset_
Definition: descriptors.h:123
TableDescriptor * GetTableDescriptor(TableId id) const
Definition: descriptors.cc:427
std::vector< bool > tuple_idx_nullable_map_
tuple_idx_nullable_map_[i] is true if tuple i can be null
Definition: descriptors.h:436
TableDescriptorMap tbl_desc_map_
Definition: descriptors.h:359
std::vector< std::string > col_names_
Definition: descriptors.h:173
const std::vector< int > & col_path() const
Definition: descriptors.h:85
const std::string & hdfs_base_dir() const
Definition: descriptors.h:229
int num_null_bytes() const
Definition: descriptors.h:301
const std::vector< SlotDescriptor * > & slots() const
Definition: descriptors.h:302
std::vector< int > tuple_idx_map_
map from TupleId to position of tuple w/in row
Definition: descriptors.h:439
int byte_size() const
Definition: descriptors.h:300
HdfsPartitionDescriptor(const THdfsPartition &thrift_partition, ObjectPool *pool)
Definition: descriptors.cc:109
std::string null_column_value_
Special string to indicate NULL values in text-encoded columns.
Definition: descriptors.h:255
llvm::Function * is_null_fn_
Cached codegen'd functions.
Definition: descriptors.h:140
llvm::StructType * GenerateLlvmStruct(LlvmCodeGen *codegen)
Definition: descriptors.cc:556
const NullIndicatorOffset & null_indicator_offset() const
Definition: descriptors.h:89
int TupleId
Definition: global-types.h:23
TupleDescriptor * GetTupleDescriptor(TupleId id) const
Definition: descriptors.cc:437
bool TupleIsNullable(int tuple_idx) const
Return true if the Tuple of the given Tuple index is nullable.
Definition: descriptors.cc:333
static const int INVALID_IDX
Definition: descriptors.h:400
std::tr1::unordered_map< SlotId, SlotDescriptor * > SlotDescriptorMap
Definition: descriptors.h:357
ObjectPool * object_pool_
Owned by DescriptorTbl.
Definition: descriptors.h:260
TableDescriptor(const TTableDescriptor &tdesc)
Definition: descriptors.cc:91
const std::vector< SlotDescriptor * > & string_slots() const
Definition: descriptors.h:303
SlotDescriptor(const TSlotDescriptor &tdesc)
Definition: descriptors.cc:50
LLVM code generator. This is the top level object to generate jitted code.
Definition: llvm-codegen.h:107
bool is_nullable() const
Definition: descriptors.h:93
std::vector< int > indices
Definition: descriptors.h:53
HBaseTableDescriptor(const TTableDescriptor &tdesc)
Definition: descriptors.cc:209
llvm::StructType * llvm_struct_
Definition: descriptors.h:332
RowDescriptor(const RowDescriptor &desc)
standard copy c'tor, made explicit here
Definition: descriptors.h:379
const std::vector< HBaseColumnDescriptor > & cols() const
Definition: descriptors.h:281
llvm::Function * set_not_null_fn_
Definition: descriptors.h:141
bool IsPrefixOf(const RowDescriptor &other_desc) const
Definition: descriptors.cc:352
void CloseExprs(RuntimeState *state)
Definition: descriptors.cc:151
const ColumnType type_
Definition: descriptors.h:119
const ColumnType & type() const
Definition: descriptors.h:78
const std::string & location() const
Definition: descriptors.h:189
llvm::Function * CodegenIsNull(LlvmCodeGen *, llvm::StructType *tuple)
Definition: descriptors.cc:476
virtual std::string DebugString() const
Definition: descriptors.cc:167
ObjectPool pool
int SlotId
Definition: global-types.h:24
int num_clustering_cols() const
Definition: descriptors.h:153
const PartitionIdToDescriptorMap & partition_descriptors() const
Definition: descriptors.h:245
bool Equals(const RowDescriptor &other_desc) const
Return true if the tuple ids of this descriptor match tuple ids of other desc.
Definition: descriptors.cc:361
std::vector< SlotDescriptor * > slots_
Definition: descriptors.h:330
const TupleId parent_
Definition: descriptors.h:120
void ToThrift(std::vector< TTupleId > *row_tuple_ids)
Populate row_tuple_ids with our ids.
Definition: descriptors.cc:345
std::string DebugString() const
Definition: descriptors.cc:608
std::tr1::unordered_map< TupleId, TupleDescriptor * > TupleDescriptorMap
Definition: descriptors.h:356
std::vector< TupleDescriptor * > tuple_desc_map_
map from position of tuple w/in row to its descriptor
Definition: descriptors.h:433
int col_pos() const
Definition: descriptors.h:84
llvm::Function * CodegenUpdateNull(LlvmCodeGen *, llvm::StructType *tuple, bool set_null)
Definition: descriptors.cc:510
std::string DebugString() const
Definition: descriptors.cc:250
Base class for table descriptors.
Definition: descriptors.h:148
bool IsAnyTupleNullable() const
Return true if any Tuple of the row is nullable.
Definition: descriptors.cc:338
int GetTupleIdx(TupleId id) const
Returns INVALID_IDX if id not part of this row.
Definition: descriptors.cc:328
std::string DebugString() const
Definition: descriptors.cc:38
SlotId id() const
Definition: descriptors.h:77
const std::vector< std::string > & col_names() const
Definition: descriptors.h:165
TupleDescriptor(const TTupleDescriptor &tdesc)
Definition: descriptors.cc:232
TupleId id() const
Definition: descriptors.h:306
bool exprs_prepared_
True if PrepareExprs has been called, to prevent repeating expensive codegen.
Definition: descriptors.h:210
std::vector< ExprContext * > partition_key_value_ctxs_
Definition: descriptors.h:216
std::tr1::unordered_map< TableId, TableDescriptor * > TableDescriptorMap
Definition: descriptors.h:355
void InitTupleIdxMap()
Initializes tupleIdxMap during c'tor using the tuple_desc_map_.
Definition: descriptors.cc:307
const std::string & name() const
Definition: descriptors.h:163
const std::vector< TupleDescriptor * > & tuple_descriptors() const
Return descriptors for all tuples in this row, in order of appearance.
Definition: descriptors.h:412
PartitionIdToDescriptorMap partition_descriptors_
Definition: descriptors.h:256
bool IsClusteringCol(const SlotDescriptor *slot_desc) const
Definition: descriptors.h:158
std::string avro_schema_
Set to the table's Avro schema if this is an Avro table, empty string otherwise.
Definition: descriptors.h:258
virtual std::string DebugString() const
Definition: descriptors.cc:220
SlotDescriptor * GetSlotDescriptor(SlotId id) const
Definition: descriptors.cc:447
int slot_size() const
Definition: descriptors.h:94
void AddSlot(SlotDescriptor *slot)
Definition: descriptors.cc:242
std::string DebugString() const
Definition: descriptors.cc:75
int tuple_offset() const
Definition: descriptors.h:88
int field_idx() const
Returns the field index in the generated llvm struct for this slot's tuple.
Definition: descriptors.h:87
Metadata for a single partition inside an Hdfs table.
Definition: descriptors.h:177
static Status Create(ObjectPool *pool, const TDescriptorTable &thrift_tbl, DescriptorTbl **tbl)
Definition: descriptors.cc:378
Descriptor for a DataSourceTable.
Definition: descriptors.h:292
bool is_materialized() const
Definition: descriptors.h:92
std::vector< HBaseColumnDescriptor > cols_
List of family/qualifier pairs.
Definition: descriptors.h:288
HdfsPartitionDescriptor * GetPartition(int64_t partition_id) const
Definition: descriptors.h:238
RowDescriptor()
dummy descriptor, needed for the JNI EvalPredicate() function
Definition: descriptors.h:393
TableDescriptor * table_desc_
Definition: descriptors.h:326
TupleDescriptorMap tuple_desc_map_
Definition: descriptors.h:360
const std::vector< int > col_path_
Definition: descriptors.h:121
ostream & operator<<(ostream &os, const map< TNetworkAddress, llama::TAllocatedResource > &resources)
NullIndicatorOffset(int byte_offset, int bit_offset)
Definition: descriptors.h:65
HdfsTableDescriptor(const TTableDescriptor &tdesc, ObjectPool *pool)
Definition: descriptors.cc:173
THdfsFileFormat::type file_format_
The format (e.g. text, sequence file etc.) of data in the files in this partition.
Definition: descriptors.h:219
const bool is_materialized_
Definition: descriptors.h:137
TupleId parent() const
Definition: descriptors.h:79
std::string DebugString() const
Definition: descriptors.cc:157
const std::string & null_partition_key_value() const
Definition: descriptors.h:230
const std::string & avro_schema() const
Definition: descriptors.h:234
void GetTupleDescs(std::vector< TupleDescriptor * > *descs) const
return all registered tuple descriptors
Definition: descriptors.cc:458
const std::vector< ExprContext * > & partition_key_value_ctxs() const
Definition: descriptors.h:185
std::map< int64_t, HdfsPartitionDescriptor * > PartitionIdToDescriptorMap
Definition: descriptors.h:236