Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
com.cloudera.impala.planner.HBaseScanNode Class Reference
Inheritance diagram for com.cloudera.impala.planner.HBaseScanNode:
Collaboration diagram for com.cloudera.impala.planner.HBaseScanNode:

Public Member Functions

 HBaseScanNode (PlanNodeId id, TupleDescriptor desc)
 
void setKeyRanges (List< ValueRange > keyRanges)
 
void init (Analyzer analyzer) throws InternalException
 
void computeStats (Analyzer analyzer)
 
void computeCosts (TQueryOptions queryOptions)
 
TupleDescriptor getTupleDesc ()
 
List< TScanRangeLocations > getScanRangeLocations ()
 
boolean isTableMissingStats ()
 
boolean isTableMissingTableStats ()
 
boolean isTableMissingColumnStats ()
 
long getInputCardinality ()
 
PlanNodeId getId ()
 
void setId (PlanNodeId id)
 
long getLimit ()
 
boolean hasLimit ()
 
long getPerHostMemCost ()
 
long getCardinality ()
 
int getNumNodes ()
 
float getAvgRowSize ()
 
void setFragment (PlanFragment fragment)
 
PlanFragment getFragment ()
 
List< ExprgetConjuncts ()
 
ExprSubstitutionMap getOutputSmap ()
 
void setOutputSmap (ExprSubstitutionMap smap)
 
Set< ExprIdgetAssignedConjuncts ()
 
void setAssignedConjuncts (Set< ExprId > conjuncts)
 
void setLimit (long limit)
 
void unsetLimit ()
 
ArrayList< TupleIdgetTupleIds ()
 
ArrayList< TupleIdgetTblRefIds ()
 
void setTblRefIds (ArrayList< TupleId > ids)
 
Set< TupleIdgetNullableTupleIds ()
 
void addConjuncts (List< Expr > conjuncts)
 
void transferConjuncts (PlanNode recipient)
 
String getExplainString ()
 
TPlan treeToThrift ()
 
boolean isBlockingNode ()
 

Static Public Member Functions

static String printKey (byte[] key)
 
static long getPerHostMemUpperBound ()
 
static long addCardinalities (long a, long b)
 
static long multiplyCardinalities (long a, long b)
 

Protected Member Functions

String debugString ()
 
void toThrift (TPlanNode msg)
 
String getNodeExplainString (String prefix, String detailPrefix, TExplainLevel detailLevel)
 
String getStatsExplainString (String prefix, TExplainLevel detailLevel)
 
final String getExplainString (String rootPrefix, String prefix, TExplainLevel detailLevel)
 
String getExplainString (List<?extends Expr > exprs)
 
void setDisplayName (String s)
 
final String getDisplayLabel ()
 
String getDisplayLabelDetail ()
 
String getOffsetExplainString (String prefix)
 
void assignConjuncts (Analyzer analyzer)
 
ExprSubstitutionMap getCombinedChildSmap ()
 
void createDefaultSmap (Analyzer analyzer)
 
long capAtLimit (long cardinality)
 
void markSlotsMaterialized (Analyzer analyzer, List< Expr > exprs)
 
void computeMemLayout (Analyzer analyzer)
 
double computeSelectivity ()
 
boolean hasValidStats ()
 

Static Protected Member Functions

static TNetworkAddress addressToTNetworkAddress (String address)
 

Protected Attributes

long inputCardinality_ = -1
 
int numPartitionsMissingStats_ = 0
 
List< TScanRangeLocations > scanRanges_
 
String displayName_
 
PlanNodeId id_
 
long limit_
 
ArrayList< TupleIdtupleIds_
 
ArrayList< TupleIdtblRefIds_
 
Set< TupleIdnullableTupleIds_ = Sets.newHashSet()
 
List< Exprconjuncts_ = Lists.newArrayList()
 
PlanFragment fragment_
 
ExprSubstitutionMap outputSmap_
 
Set< ExprIdassignedConjuncts_
 
long cardinality_
 
int numNodes_
 
float avgRowSize_
 
long perHostMemCost_ = -1
 

Static Protected Attributes

static final int DEFAULT_BATCH_SIZE = 1024
 

Private Member Functions

void setStartStopKey (Analyzer analyzer) throws InternalException
 
void createHBaseFilters (Analyzer analyzer)
 
void computeScanRangeLocations (Analyzer analyzer)
 
void setKeyRangeStart (THBaseKeyRange keyRange, byte[] rangeStartKey)
 
void setKeyRangeEnd (THBaseKeyRange keyRange, byte[] rangeEndKey)
 
byte[] convertToBytes (String rowKey, boolean nextKey)
 

Static Private Member Functions

static CompareFilter.CompareOp impalaOpToHBaseOp (BinaryPredicate.Operator impalaOp)
 

Private Attributes

final TupleDescriptor desc_
 
List< ValueRangekeyRanges_
 
byte[] startKey_ = HConstants.EMPTY_START_ROW
 
byte[] stopKey_ = HConstants.EMPTY_END_ROW
 
boolean isEmpty_ = false
 
final List< THBaseFilter > filters_ = new ArrayList<THBaseFilter>()
 
int suggestedCaching_ = DEFAULT_SUGGESTED_CACHING
 

Static Private Attributes

static final Logger LOG = LoggerFactory.getLogger(HBaseScanNode.class)
 
static final int MAX_HBASE_FETCH_BATCH_SIZE = 500 * 1024 * 1024
 
static final int DEFAULT_SUGGESTED_CACHING = 1024
 
static Configuration hbaseConf_ = HBaseConfiguration.create()
 

Detailed Description

Full scan of an HBase table. Only families/qualifiers specified in TupleDescriptor will be retrieved in the backend.

Definition at line 68 of file HBaseScanNode.java.

Constructor & Destructor Documentation

com.cloudera.impala.planner.HBaseScanNode.HBaseScanNode ( PlanNodeId  id,
TupleDescriptor  desc 
)
inline

Member Function Documentation

static long com.cloudera.impala.planner.PlanNode.addCardinalities ( long  a,
long  b 
)
inlinestaticinherited
void com.cloudera.impala.planner.PlanNode.addConjuncts ( List< Expr conjuncts)
inlineinherited

Definition at line 209 of file PlanNode.java.

static TNetworkAddress com.cloudera.impala.planner.ScanNode.addressToTNetworkAddress ( String  address)
inlinestaticprotectedinherited

Helper function to parse a "host:port" address string into TNetworkAddress This is called with ipaddress:port when doing scan range assignment.

Definition at line 132 of file ScanNode.java.

Referenced by com.cloudera.impala.planner.HBaseScanNode.computeScanRangeLocations(), and com.cloudera.impala.planner.DataSourceScanNode.computeScanRangeLocations().

void com.cloudera.impala.planner.HBaseScanNode.computeCosts ( TQueryOptions  queryOptions)
inline
byte [] com.cloudera.impala.planner.HBaseScanNode.convertToBytes ( String  rowKey,
boolean  nextKey 
)
inlineprivate

Convert key into byte array and append a '\0' if 'nextKey' is true.

Definition at line 447 of file HBaseScanNode.java.

Referenced by com.cloudera.impala.planner.HBaseScanNode.setStartStopKey().

void com.cloudera.impala.planner.PlanNode.createDefaultSmap ( Analyzer  analyzer)
inlineprotectedinherited
Set<ExprId> com.cloudera.impala.planner.PlanNode.getAssignedConjuncts ( )
inlineinherited
ExprSubstitutionMap com.cloudera.impala.planner.PlanNode.getCombinedChildSmap ( )
inlineprotectedinherited
List<Expr> com.cloudera.impala.planner.PlanNode.getConjuncts ( )
inlineinherited
String com.cloudera.impala.planner.PlanNode.getDisplayLabelDetail ( )
inlineprotectedinherited

Subclasses can override to provide a node specific detail string that is displayed to the user. e.g. scan can return the table name.

Definition at line 234 of file PlanNode.java.

Referenced by com.cloudera.impala.planner.PlanNode.treeToThriftHelper().

final String com.cloudera.impala.planner.PlanNode.getExplainString ( String  rootPrefix,
String  prefix,
TExplainLevel  detailLevel 
)
inlineprotectedinherited
String com.cloudera.impala.planner.PlanNode.getExplainString ( List<?extends Expr exprs)
inlineprotectedinherited

Definition at line 506 of file PlanNode.java.

PlanFragment com.cloudera.impala.planner.PlanNode.getFragment ( )
inlineinherited
long com.cloudera.impala.planner.PlanNode.getLimit ( )
inlineinherited
Set<TupleId> com.cloudera.impala.planner.PlanNode.getNullableTupleIds ( )
inlineinherited
String com.cloudera.impala.planner.PlanNode.getOffsetExplainString ( String  prefix)
inlineprotectedinherited

Return the offset_ details, if applicable. This is available separately from 'getNodeExplainString' because we want to output 'limit: ...' (which can be printed from PlanNode) before 'offset: ...', which is only printed from SortNodes right now.

Definition at line 336 of file PlanNode.java.

Referenced by com.cloudera.impala.planner.PlanNode.getExplainString().

ExprSubstitutionMap com.cloudera.impala.planner.PlanNode.getOutputSmap ( )
inlineinherited

Definition at line 178 of file PlanNode.java.

References com.cloudera.impala.planner.PlanNode.outputSmap_.

long com.cloudera.impala.planner.PlanNode.getPerHostMemCost ( )
inlineinherited
static long com.cloudera.impala.planner.HBaseScanNode.getPerHostMemUpperBound ( )
inlinestatic

Returns the per-host upper bound of memory that any number of concurrent scan nodes will use. Used for estimating the per-host memory requirement of queries.

Definition at line 499 of file HBaseScanNode.java.

List<TScanRangeLocations> com.cloudera.impala.planner.ScanNode.getScanRangeLocations ( )
inlineinherited

Returns all scan ranges plus their locations.

Definition at line 54 of file ScanNode.java.

References com.cloudera.impala.planner.ScanNode.scanRanges_.

String com.cloudera.impala.planner.ScanNode.getStatsExplainString ( String  prefix,
TExplainLevel  detailLevel 
)
inlineprotectedinherited

Returns the explain string for table and columns stats to be included into the a ScanNode's explain string. The given prefix is prepended to each of the lines. The prefix is used for proper formatting when the string returned by this method is embedded in a query's explain plan.

Definition at line 75 of file ScanNode.java.

References com.cloudera.impala.planner.ScanNode.desc_, com.cloudera.impala.analysis.TupleDescriptor.getSlots(), com.cloudera.impala.analysis.TupleDescriptor.getTable(), and com.cloudera.impala.planner.ScanNode.numPartitionsMissingStats_.

Referenced by com.cloudera.impala.planner.DataSourceScanNode.getNodeExplainString(), com.cloudera.impala.planner.HBaseScanNode.getNodeExplainString(), and com.cloudera.impala.planner.HdfsScanNode.getNodeExplainString().

ArrayList<TupleId> com.cloudera.impala.planner.PlanNode.getTblRefIds ( )
inlineinherited
TupleDescriptor com.cloudera.impala.planner.ScanNode.getTupleDesc ( )
inlineinherited

Definition at line 49 of file ScanNode.java.

References com.cloudera.impala.planner.ScanNode.desc_.

boolean com.cloudera.impala.planner.PlanNode.hasValidStats ( )
inlineprotectedinherited
static CompareFilter.CompareOp com.cloudera.impala.planner.HBaseScanNode.impalaOpToHBaseOp ( BinaryPredicate.Operator  impalaOp)
inlinestaticprivate
boolean com.cloudera.impala.planner.PlanNode.isBlockingNode ( )
inlineinherited

Returns true if this plan node can output its first row only after consuming all rows of all its children. This method is used to group plan nodes into pipelined units for resource estimation.

Definition at line 555 of file PlanNode.java.

Referenced by com.cloudera.impala.planner.PipelinedPlanNodeSet.computePlanNodeSets().

boolean com.cloudera.impala.planner.ScanNode.isTableMissingColumnStats ( )
inlineinherited
boolean com.cloudera.impala.planner.ScanNode.isTableMissingStats ( )
inlineinherited

Returns true if the table underlying this scan is missing table stats or column stats relevant to this scan node.

Definition at line 111 of file ScanNode.java.

References com.cloudera.impala.planner.ScanNode.isTableMissingColumnStats(), and com.cloudera.impala.planner.ScanNode.isTableMissingTableStats().

boolean com.cloudera.impala.planner.ScanNode.isTableMissingTableStats ( )
inlineinherited
void com.cloudera.impala.planner.PlanNode.markSlotsMaterialized ( Analyzer  analyzer,
List< Expr exprs 
)
inlineprotectedinherited

Marks all slots referenced in exprs as materialized.

Definition at line 464 of file PlanNode.java.

Referenced by com.cloudera.impala.planner.HdfsScanNode.init().

static long com.cloudera.impala.planner.PlanNode.multiplyCardinalities ( long  a,
long  b 
)
inlinestaticinherited

Computes and returns the product of two cardinalities. If an overflow occurs, the maximum Long value is returned (Long.MAX_VALUE).

Definition at line 541 of file PlanNode.java.

Referenced by com.cloudera.impala.planner.CrossJoinNode.computeStats(), and com.cloudera.impala.planner.HashJoinNode.getJoinCardinality().

static String com.cloudera.impala.planner.HBaseScanNode.printKey ( byte[]  key)
inlinestatic

Prints non-printable characters in escaped octal, otherwise outputs the characters.

Definition at line 461 of file HBaseScanNode.java.

Referenced by com.cloudera.impala.planner.HBaseScanNode.getNodeExplainString().

void com.cloudera.impala.planner.PlanNode.setAssignedConjuncts ( Set< ExprId conjuncts)
inlineinherited
void com.cloudera.impala.planner.PlanNode.setDisplayName ( String  s)
inlineprotectedinherited

Definition at line 223 of file PlanNode.java.

References com.cloudera.impala.planner.PlanNode.displayName_.

void com.cloudera.impala.planner.PlanNode.setFragment ( PlanFragment  fragment)
inlineinherited

Definition at line 175 of file PlanNode.java.

References com.cloudera.impala.planner.PlanNode.fragment_.

void com.cloudera.impala.planner.PlanNode.setId ( PlanNodeId  id)
inlineinherited

Definition at line 165 of file PlanNode.java.

References com.cloudera.impala.planner.PlanNode.id_.

void com.cloudera.impala.planner.HBaseScanNode.setKeyRangeEnd ( THBaseKeyRange  keyRange,
byte[]  rangeEndKey 
)
inlineprivate

Set the end key of keyRange using the provided key, bounded by stopKey_

Parameters
keyRangethe keyRange to be updated
rangeEndKeythe end key value to be set to

Definition at line 375 of file HBaseScanNode.java.

References com.cloudera.impala.planner.HBaseScanNode.stopKey_.

Referenced by com.cloudera.impala.planner.HBaseScanNode.computeScanRangeLocations().

void com.cloudera.impala.planner.HBaseScanNode.setKeyRanges ( List< ValueRange keyRanges)
inline
void com.cloudera.impala.planner.HBaseScanNode.setKeyRangeStart ( THBaseKeyRange  keyRange,
byte[]  rangeStartKey 
)
inlineprivate

Set the start key of keyRange using the provided key, bounded by startKey_

Parameters
keyRangethe keyRange to be updated
rangeStartKeythe start key value to be set to

Definition at line 359 of file HBaseScanNode.java.

References com.cloudera.impala.planner.HBaseScanNode.startKey_.

Referenced by com.cloudera.impala.planner.HBaseScanNode.computeScanRangeLocations().

void com.cloudera.impala.planner.PlanNode.setLimit ( long  limit)
inlineinherited

Set the limit_ to the given limit_ only if the limit_ hasn't been set, or the new limit_ is lower.

Parameters
limit_

Definition at line 190 of file PlanNode.java.

References com.cloudera.impala.planner.PlanNode.limit_.

void com.cloudera.impala.planner.PlanNode.setOutputSmap ( ExprSubstitutionMap  smap)
inlineinherited

Definition at line 179 of file PlanNode.java.

References com.cloudera.impala.planner.PlanNode.outputSmap_.

void com.cloudera.impala.planner.HBaseScanNode.setStartStopKey ( Analyzer  analyzer) throws InternalException
inlineprivate

Convert keyRanges_ to startKey_ and stopKey_. If ValueRange is not null, transform it into start/stopKey_ by evaluating the expression. Analysis has checked that the expression is string type. If the expression evaluates to null, then there's nothing to scan because Hbase row key cannot be null. At present, we only do row key filtering for string-mapped keys. String-mapped keys are always encded as ascii. ValueRange is null if there is no predicate on the row-key.

Definition at line 141 of file HBaseScanNode.java.

References com.cloudera.impala.planner.HBaseScanNode.convertToBytes(), com.cloudera.impala.planner.ValueRange.getLowerBound(), com.cloudera.impala.planner.ValueRange.getUpperBound(), com.cloudera.impala.planner.HBaseScanNode.isEmpty_, com.cloudera.impala.planner.HBaseScanNode.keyRanges_, com.cloudera.impala.planner.HBaseScanNode.startKey_, com.cloudera.impala.planner.HBaseScanNode.stopKey_, and com.cloudera.impala.catalog.Type.STRING.

Referenced by com.cloudera.impala.planner.HBaseScanNode.init().

void com.cloudera.impala.planner.PlanNode.setTblRefIds ( ArrayList< TupleId ids)
inlineinherited

Definition at line 202 of file PlanNode.java.

References com.cloudera.impala.planner.PlanNode.tblRefIds_.

void com.cloudera.impala.planner.PlanNode.transferConjuncts ( PlanNode  recipient)
inlineinherited

Definition at line 214 of file PlanNode.java.

References com.cloudera.impala.planner.PlanNode.conjuncts_.

TPlan com.cloudera.impala.planner.PlanNode.treeToThrift ( )
inlineinherited
void com.cloudera.impala.planner.PlanNode.unsetLimit ( )
inlineinherited

Definition at line 194 of file PlanNode.java.

References com.cloudera.impala.planner.PlanNode.limit_.

Member Data Documentation

final int com.cloudera.impala.planner.PlanNode.DEFAULT_BATCH_SIZE = 1024
staticprotectedinherited

Definition at line 63 of file PlanNode.java.

final int com.cloudera.impala.planner.HBaseScanNode.DEFAULT_SUGGESTED_CACHING = 1024
staticprivate

Definition at line 99 of file HBaseScanNode.java.

final List<THBaseFilter> com.cloudera.impala.planner.HBaseScanNode.filters_ = new ArrayList<THBaseFilter>()
private
Configuration com.cloudera.impala.planner.HBaseScanNode.hbaseConf_ = HBaseConfiguration.create()
staticprivate
PlanNodeId com.cloudera.impala.planner.PlanNode.id_
protectedinherited
List<ValueRange> com.cloudera.impala.planner.HBaseScanNode.keyRanges_
private
final Logger com.cloudera.impala.planner.HBaseScanNode.LOG = LoggerFactory.getLogger(HBaseScanNode.class)
staticprivate

Definition at line 69 of file HBaseScanNode.java.

final int com.cloudera.impala.planner.HBaseScanNode.MAX_HBASE_FETCH_BATCH_SIZE = 500 * 1024 * 1024
staticprivate
Set<TupleId> com.cloudera.impala.planner.PlanNode.nullableTupleIds_ = Sets.newHashSet()
protectedinherited
int com.cloudera.impala.planner.ScanNode.numPartitionsMissingStats_ = 0
protectedinherited
int com.cloudera.impala.planner.HBaseScanNode.suggestedCaching_ = DEFAULT_SUGGESTED_CACHING
private
ArrayList<TupleId> com.cloudera.impala.planner.PlanNode.tblRefIds_
protectedinherited

The documentation for this class was generated from the following file: