Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
hbase-table-scanner.h
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef IMPALA_EXEC_HBASE_TABLE_SCANNER_H
16 #define IMPALA_EXEC_HBASE_TABLE_SCANNER_H
17 
18 #include <boost/scoped_ptr.hpp>
19 #include <jni.h>
20 #include <string>
21 #include <sstream>
22 #include <vector>
23 #include "gen-cpp/PlanNodes_types.h"
24 #include "exec/scan-node.h"
26 #include "runtime/hbase-table.h"
27 
28 namespace impala {
29 
30 class TupleDescriptor;
31 class Tuple;
32 class RuntimeState;
33 class MemPool;
34 class Status;
35 class HBaseScanNode;
36 
40 //
47 //
54 //
60 //
69  public:
73  HBaseTableScanner(HBaseScanNode* scan_node, HBaseTableFactory* htable_factory,
74  RuntimeState* state);
75 
78  static Status Init();
79 
81  class ScanRange {
82  public:
84  : start_key_(),
85  stop_key_() {
86  }
87 
88  const std::string& start_key() const { return start_key_; }
89  const std::string& stop_key() const {return stop_key_; }
90  void set_start_key(const std::string& key) { start_key_ = key; }
91  void set_stop_key(const std::string& key) { stop_key_ = key; }
92 
94  void DebugString(int indentation_level, std::stringstream* out);
95 
96  private:
97  std::string start_key_;
98  std::string stop_key_;
99  };
100 
101  typedef std::vector<ScanRange> ScanRangeVector;
102 
106  Status StartScan(JNIEnv* env, const TupleDescriptor* tuple_desc,
107  const ScanRangeVector& scan_range_vector,
108  const std::vector<THBaseFilter>& filters);
109 
113  Status Next(JNIEnv* env, bool* has_next);
114 
116  Status GetRowKey(JNIEnv* env, void** key, int* key_length);
117 
120  Status GetRowKey(JNIEnv* env, const SlotDescriptor* slot_desc, Tuple* tuple);
121 
125  Status GetValue(JNIEnv* env, const std::string& family, const std::string& qualifier,
126  void** value, int* value_length);
127 
132  Status GetValue(JNIEnv* env, const std::string& family, const std::string& qualifier,
133  const SlotDescriptor* slot_desc, Tuple* tuple);
134 
136  void Close(JNIEnv* env);
137 
138  void set_num_requested_cells(int num_requested_cells) {
139  num_requested_cells_ = num_requested_cells;
140  }
141 
142  private:
143  static const int DEFAULT_ROWS_CACHED = 1024;
144 
148 
150  static jclass scan_cl_;
151  static jclass resultscanner_cl_;
152  static jclass result_cl_;
154  static jclass cell_cl_;
155  static jclass hconstants_cl_;
156  static jclass filter_list_cl_;
157  static jclass filter_list_op_cl_;
159  static jclass compare_op_cl_;
161  static jclass scanner_timeout_ex_cl_;
162 
163  static jmethodID scan_ctor_;
164  static jmethodID scan_set_max_versions_id_;
165  static jmethodID scan_set_caching_id_;
166  static jmethodID scan_set_cache_blocks_id_;
167  static jmethodID scan_add_column_id_;
168  static jmethodID scan_set_filter_id_;
169  static jmethodID scan_set_start_row_id_;
170  static jmethodID scan_set_stop_row_id_;
171  static jmethodID resultscanner_next_id_;
172  static jmethodID resultscanner_close_id_;
173  static jmethodID result_isempty_id_;
174  static jmethodID result_raw_cells_id_;
175  static jmethodID cell_get_row_array_;
176  static jmethodID cell_get_family_array_;
177  static jmethodID cell_get_qualifier_array_;
178  static jmethodID cell_get_value_array_;
179  static jmethodID cell_get_family_offset_id_;
180  static jmethodID cell_get_family_length_id_;
183  static jmethodID cell_get_row_offset_id_;
184  static jmethodID cell_get_row_length_id_;
185  static jmethodID cell_get_value_offset_id_;
186  static jmethodID cell_get_value_length_id_;
187  static jmethodID filter_list_ctor_;
188  static jmethodID filter_list_add_filter_id_;
190 
191  static jobject empty_row_;
192  static jobject must_pass_all_op_;
193  static jobjectArray compare_ops_;
194 
197 
200  int current_scan_range_idx_; // the index of the current scan range
201 
203  boost::scoped_ptr<HBaseTable> htable_;
204 
207  jobject scan_; // Java type Scan
208  jobject resultscanner_; // Java type ResultScanner
209 
213  jobjectArray cells_;
214 
217 
221 
225 
228 
232 
235  boost::scoped_ptr<MemPool> value_pool_;
236 
240 
243 
246 
253  Status HandleResultScannerTimeout(JNIEnv* env, bool* timeout);
254 
258  int CompareStrings(const std::string& s, void* data, int length);
259 
261  Status CreateByteArray(JNIEnv* env, const std::string& s, jbyteArray* bytes);
262 
264  Status ScanSetup(JNIEnv* env, const TupleDescriptor* tuple_desc,
265  const std::vector<THBaseFilter>& filters);
266 
268  Status InitScanRange(JNIEnv* env, const ScanRange& scan_range);
271  Status InitScanRange(JNIEnv* env, jbyteArray start_bytes, jbyteArray end_bytes);
272 
274  inline void GetRowKey(JNIEnv* env, jobject cell, void** data, int* length);
275 
278  inline void GetFamily(JNIEnv* env, jobject cell, void** data, int* length);
279 
282  inline void GetQualifier(JNIEnv* env, jobject cell, void** data, int* length);
283 
285  inline void GetValue(JNIEnv* env, jobject cell, void** data, int* length);
286 
290  inline Status GetCurrentValue(JNIEnv* env, const std::string& family,
291  const std::string& qualifier, void** data, int* length, bool* is_null);
292 
297  inline void WriteTupleSlot(const SlotDescriptor* slot_desc, Tuple* tuple, void* data);
298 };
299 
300 } // namespace impala
301 
302 #endif
static jmethodID cell_get_row_length_id_
const std::string & start_key() const
static jmethodID resultscanner_next_id_
static jmethodID cell_get_qualifier_array_
Status StartScan(JNIEnv *env, const TupleDescriptor *tuple_desc, const ScanRangeVector &scan_range_vector, const std::vector< THBaseFilter > &filters)
HBaseTableFactory * htable_factory_
HBase Table factory from runtime state.
static jmethodID scan_set_cache_blocks_id_
Status ScanSetup(JNIEnv *env, const TupleDescriptor *tuple_desc, const std::vector< THBaseFilter > &filters)
First time scanning the table, do some setup.
static jmethodID scan_set_filter_id_
void WriteTupleSlot(const SlotDescriptor *slot_desc, Tuple *tuple, void *data)
static jclass single_column_value_filter_cl_
static jmethodID cell_get_row_offset_id_
A tuple with 0 materialised slots is represented as NULL.
Definition: tuple.h:48
Status Next(JNIEnv *env, bool *has_next)
static jmethodID cell_get_value_array_
Status GetCurrentValue(JNIEnv *env, const std::string &family, const std::string &qualifier, void **data, int *length, bool *is_null)
void set_stop_key(const std::string &key)
static jmethodID cell_get_row_array_
int CompareStrings(const std::string &s, void *data, int length)
static jmethodID scan_set_start_row_id_
static jmethodID single_column_value_filter_ctor_
static jmethodID scan_add_column_id_
Status GetRowKey(JNIEnv *env, void **key, int *key_length)
Get the current HBase row key.
static jmethodID cell_get_value_length_id_
void Close(JNIEnv *env)
Close HTable and ResultScanner.
static jclass scan_cl_
Global class references created with JniUtil. Cleanup is done in JniUtil::Cleanup().
const ScanRangeVector * scan_range_vector_
Vector of ScanRange.
boost::scoped_ptr< HBaseTable > htable_
C++ wrapper for HTable.
static jmethodID result_raw_cells_id_
HBaseTableScanner(HBaseScanNode *scan_node, HBaseTableFactory *htable_factory, RuntimeState *state)
static jmethodID scan_set_caching_id_
static jmethodID scan_set_stop_row_id_
void set_num_requested_cells(int num_requested_cells)
Status HandleResultScannerTimeout(JNIEnv *env, bool *timeout)
Status InitScanRange(JNIEnv *env, const ScanRange &scan_range)
Initialize the scan to the given range.
static jmethodID cell_get_family_offset_id_
static jmethodID filter_list_add_filter_id_
Status GetValue(JNIEnv *env, const std::string &family, const std::string &qualifier, void **value, int *value_length)
HBaseScanNode * scan_node_
The enclosing HBaseScanNode.
static jmethodID result_isempty_id_
static jmethodID cell_get_value_offset_id_
static jmethodID cell_get_qualifier_offset_id_
void set_start_key(const std::string &key)
static jmethodID filter_list_ctor_
const std::string & stop_key() const
static jmethodID cell_get_qualifier_length_id_
static jmethodID cell_get_family_length_id_
int cell_index_
Current position in cells_. Incremented in NextValue(). Reset in Next().
int num_cells_
Number of cells returned from last result_.raw().
std::vector< ScanRange > ScanRangeVector
static jmethodID scan_set_max_versions_id_
static jmethodID resultscanner_close_id_
Status CreateByteArray(JNIEnv *env, const std::string &s, jbyteArray *bytes)
Turn strings into Java byte array.
static const int DEFAULT_ROWS_CACHED
bool cache_blocks_
True if the scanner should set Scan.setCacheBlocks to true.
void GetFamily(JNIEnv *env, jobject cell, void **data, int *length)
static jobjectArray compare_ops_
static jmethodID cell_get_family_array_
HBase scan range; "" means unbounded.
void DebugString(int indentation_level, std::stringstream *out)
Write debug string of this ScanRange into out.
boost::scoped_ptr< MemPool > value_pool_
void GetQualifier(JNIEnv *env, jobject cell, void **data, int *length)
RuntimeProfile::Counter * scan_setup_timer_
HBase specific counters.
static jclass cell_cl_
Cell or KeyValue class depending on HBase version (see class comment).
static jclass scanner_timeout_ex_cl_
Exception thrown when a ResultScanner times out.