Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
com.cloudera.impala.planner.HdfsScanNode Class Reference
Inheritance diagram for com.cloudera.impala.planner.HdfsScanNode:
Collaboration diagram for com.cloudera.impala.planner.HdfsScanNode:

Public Member Functions

 HdfsScanNode (PlanNodeId id, TupleDescriptor desc, HdfsTable tbl)
 
void init (Analyzer analyzer) throws InternalException
 
void computeStats (Analyzer analyzer)
 
void computeCosts (TQueryOptions queryOptions)
 
TupleDescriptor getTupleDesc ()
 
List< TScanRangeLocations > getScanRangeLocations ()
 
boolean isTableMissingStats ()
 
boolean isTableMissingTableStats ()
 
boolean isTableMissingColumnStats ()
 
long getInputCardinality ()
 
PlanNodeId getId ()
 
void setId (PlanNodeId id)
 
long getLimit ()
 
boolean hasLimit ()
 
long getPerHostMemCost ()
 
long getCardinality ()
 
int getNumNodes ()
 
float getAvgRowSize ()
 
void setFragment (PlanFragment fragment)
 
PlanFragment getFragment ()
 
List< ExprgetConjuncts ()
 
ExprSubstitutionMap getOutputSmap ()
 
void setOutputSmap (ExprSubstitutionMap smap)
 
Set< ExprIdgetAssignedConjuncts ()
 
void setAssignedConjuncts (Set< ExprId > conjuncts)
 
void setLimit (long limit)
 
void unsetLimit ()
 
ArrayList< TupleIdgetTupleIds ()
 
ArrayList< TupleIdgetTblRefIds ()
 
void setTblRefIds (ArrayList< TupleId > ids)
 
Set< TupleIdgetNullableTupleIds ()
 
void addConjuncts (List< Expr > conjuncts)
 
void transferConjuncts (PlanNode recipient)
 
String getExplainString ()
 
TPlan treeToThrift ()
 
boolean isBlockingNode ()
 

Static Public Member Functions

static long getPerHostMemUpperBound ()
 
static long addCardinalities (long a, long b)
 
static long multiplyCardinalities (long a, long b)
 

Protected Member Functions

String debugString ()
 
void toThrift (TPlanNode msg)
 
String getDisplayLabelDetail ()
 
String getNodeExplainString (String prefix, String detailPrefix, TExplainLevel detailLevel)
 
String getStatsExplainString (String prefix, TExplainLevel detailLevel)
 
final String getExplainString (String rootPrefix, String prefix, TExplainLevel detailLevel)
 
String getExplainString (List<?extends Expr > exprs)
 
void setDisplayName (String s)
 
final String getDisplayLabel ()
 
String getOffsetExplainString (String prefix)
 
void assignConjuncts (Analyzer analyzer)
 
ExprSubstitutionMap getCombinedChildSmap ()
 
void createDefaultSmap (Analyzer analyzer)
 
long capAtLimit (long cardinality)
 
void markSlotsMaterialized (Analyzer analyzer, List< Expr > exprs)
 
void computeMemLayout (Analyzer analyzer)
 
double computeSelectivity ()
 
boolean hasValidStats ()
 

Static Protected Member Functions

static TNetworkAddress addressToTNetworkAddress (String address)
 

Protected Attributes

final TupleDescriptor desc_
 
long inputCardinality_ = -1
 
int numPartitionsMissingStats_ = 0
 
List< TScanRangeLocations > scanRanges_
 
String displayName_
 
PlanNodeId id_
 
long limit_
 
ArrayList< TupleIdtupleIds_
 
ArrayList< TupleIdtblRefIds_
 
Set< TupleIdnullableTupleIds_ = Sets.newHashSet()
 
List< Exprconjuncts_ = Lists.newArrayList()
 
PlanFragment fragment_
 
ExprSubstitutionMap outputSmap_
 
Set< ExprIdassignedConjuncts_
 
long cardinality_
 
int numNodes_
 
float avgRowSize_
 
long perHostMemCost_ = -1
 

Static Protected Attributes

static final int DEFAULT_BATCH_SIZE = 1024
 

Private Member Functions

void computeScanRangeLocations (Analyzer analyzer)
 
boolean canEvalUsingPartitionMd (Expr expr, Analyzer analyzer)
 
HashSet< Long > evalBinaryPredicate (Expr expr)
 
HashSet< Long > evalInPredicate (Expr expr)
 
HashSet< Long > evalIsNullPredicate (Expr expr)
 
HashSet< Long > evalSlotBindingFilter (Expr expr)
 
void prunePartitions (Analyzer analyzer) throws InternalException
 
void evalPartitionFiltersInBe (List< HdfsPartitionFilter > filters, HashSet< Long > matchingPartitionIds, Analyzer analyzer) throws InternalException
 

Private Attributes

final HdfsTable tbl_
 
final ArrayList< HdfsPartitionpartitions_ = Lists.newArrayList()
 
long totalFiles_ = 0
 
long totalBytes_ = 0
 

Static Private Attributes

static final Logger LOG = LoggerFactory.getLogger(HdfsScanNode.class)
 
static final long IO_MGR_BUFFER_SIZE = 8L * 1024L * 1024L
 
static final long MAX_IO_BUFFERS_PER_THREAD = 10
 
static final int THREADS_PER_CORE = 3
 
static final double SCAN_RANGE_SKEW_FACTOR = 1.2
 
static final int PARTITION_PRUNING_BATCH_SIZE = 1024
 

Detailed Description

Scan of a single single table. Currently limited to full-table scans. TODO: pass in range restrictions.

Definition at line 72 of file HdfsScanNode.java.

Constructor & Destructor Documentation

com.cloudera.impala.planner.HdfsScanNode.HdfsScanNode ( PlanNodeId  id,
TupleDescriptor  desc,
HdfsTable  tbl 
)
inline

Constructs node to scan given data files of table 'tbl_'.

Definition at line 106 of file HdfsScanNode.java.

References com.cloudera.impala.planner.HdfsScanNode.tbl_.

Member Function Documentation

static long com.cloudera.impala.planner.PlanNode.addCardinalities ( long  a,
long  b 
)
inlinestaticinherited
void com.cloudera.impala.planner.PlanNode.addConjuncts ( List< Expr conjuncts)
inlineinherited

Definition at line 209 of file PlanNode.java.

static TNetworkAddress com.cloudera.impala.planner.ScanNode.addressToTNetworkAddress ( String  address)
inlinestaticprotectedinherited

Helper function to parse a "host:port" address string into TNetworkAddress This is called with ipaddress:port when doing scan range assignment.

Definition at line 132 of file ScanNode.java.

Referenced by com.cloudera.impala.planner.HBaseScanNode.computeScanRangeLocations(), and com.cloudera.impala.planner.DataSourceScanNode.computeScanRangeLocations().

boolean com.cloudera.impala.planner.HdfsScanNode.canEvalUsingPartitionMd ( Expr  expr,
Analyzer  analyzer 
)
inlineprivate

Recursive function that checks if a given partition expr can be evaluated directly from the partition key values. If 'expr' contains any constant expressions, they are evaluated in the BE and are replaced by their corresponding results, as LiteralExprs.

Definition at line 217 of file HdfsScanNode.java.

References com.cloudera.impala.analysis.Expr.isLiteral().

Referenced by com.cloudera.impala.planner.HdfsScanNode.prunePartitions().

void com.cloudera.impala.planner.HdfsScanNode.computeScanRangeLocations ( Analyzer  analyzer)
inlineprivate

Computes scan ranges (hdfs splits) plus their storage locations, including volume ids, based on the given maximum number of bytes each scan range should scan.

Definition at line 155 of file HdfsScanNode.java.

References com.cloudera.impala.catalog.HdfsPartition.getFileDescriptors(), com.cloudera.impala.planner.HdfsScanNode.partitions_, and com.cloudera.impala.planner.ScanNode.scanRanges_.

Referenced by com.cloudera.impala.planner.HdfsScanNode.init().

void com.cloudera.impala.planner.PlanNode.createDefaultSmap ( Analyzer  analyzer)
inlineprotectedinherited
String com.cloudera.impala.planner.HdfsScanNode.debugString ( )
inlineprotected
HashSet<Long> com.cloudera.impala.planner.HdfsScanNode.evalBinaryPredicate ( Expr  expr)
inlineprivate
HashSet<Long> com.cloudera.impala.planner.HdfsScanNode.evalInPredicate ( Expr  expr)
inlineprivate

Evaluate an InPredicate filter on a partition column and return the ids of the matching partitions.

Definition at line 362 of file HdfsScanNode.java.

Referenced by com.cloudera.impala.planner.HdfsScanNode.evalSlotBindingFilter().

HashSet<Long> com.cloudera.impala.planner.HdfsScanNode.evalIsNullPredicate ( Expr  expr)
inlineprivate

Evaluate an IsNullPredicate on a partition column and return the ids of the matching partitions.

Definition at line 403 of file HdfsScanNode.java.

Referenced by com.cloudera.impala.planner.HdfsScanNode.evalSlotBindingFilter().

void com.cloudera.impala.planner.HdfsScanNode.evalPartitionFiltersInBe ( List< HdfsPartitionFilter filters,
HashSet< Long >  matchingPartitionIds,
Analyzer  analyzer 
) throws InternalException
inlineprivate

Evaluate a list of HdfsPartitionFilters in the BE. These are 'complex' filters that could not be evaluated from the partition key values.

Definition at line 537 of file HdfsScanNode.java.

References com.cloudera.impala.planner.HdfsScanNode.PARTITION_PRUNING_BATCH_SIZE.

Referenced by com.cloudera.impala.planner.HdfsScanNode.prunePartitions().

HashSet<Long> com.cloudera.impala.planner.HdfsScanNode.evalSlotBindingFilter ( Expr  expr)
inlineprivate

Evaluate a slot binding predicate on a partition key using the partition key values; return the matching partition ids. An empty set is returned if there are no matching partitions. This function can evaluate the following types of predicates: BinaryPredicate, CompoundPredicate, IsNullPredicate, and InPredicate.

Definition at line 429 of file HdfsScanNode.java.

References com.cloudera.impala.planner.HdfsScanNode.evalBinaryPredicate(), com.cloudera.impala.planner.HdfsScanNode.evalInPredicate(), and com.cloudera.impala.planner.HdfsScanNode.evalIsNullPredicate().

Referenced by com.cloudera.impala.planner.HdfsScanNode.prunePartitions().

Set<ExprId> com.cloudera.impala.planner.PlanNode.getAssignedConjuncts ( )
inlineinherited
ExprSubstitutionMap com.cloudera.impala.planner.PlanNode.getCombinedChildSmap ( )
inlineprotectedinherited
List<Expr> com.cloudera.impala.planner.PlanNode.getConjuncts ( )
inlineinherited
final String com.cloudera.impala.planner.PlanNode.getExplainString ( String  rootPrefix,
String  prefix,
TExplainLevel  detailLevel 
)
inlineprotectedinherited
String com.cloudera.impala.planner.PlanNode.getExplainString ( List<?extends Expr exprs)
inlineprotectedinherited

Definition at line 506 of file PlanNode.java.

PlanFragment com.cloudera.impala.planner.PlanNode.getFragment ( )
inlineinherited
long com.cloudera.impala.planner.PlanNode.getLimit ( )
inlineinherited
Set<TupleId> com.cloudera.impala.planner.PlanNode.getNullableTupleIds ( )
inlineinherited
String com.cloudera.impala.planner.PlanNode.getOffsetExplainString ( String  prefix)
inlineprotectedinherited

Return the offset_ details, if applicable. This is available separately from 'getNodeExplainString' because we want to output 'limit: ...' (which can be printed from PlanNode) before 'offset: ...', which is only printed from SortNodes right now.

Definition at line 336 of file PlanNode.java.

Referenced by com.cloudera.impala.planner.PlanNode.getExplainString().

ExprSubstitutionMap com.cloudera.impala.planner.PlanNode.getOutputSmap ( )
inlineinherited

Definition at line 178 of file PlanNode.java.

References com.cloudera.impala.planner.PlanNode.outputSmap_.

long com.cloudera.impala.planner.PlanNode.getPerHostMemCost ( )
inlineinherited
static long com.cloudera.impala.planner.HdfsScanNode.getPerHostMemUpperBound ( )
inlinestatic

Hdfs scans use a shared pool of buffers managed by the I/O manager. Intuitively, the maximum number of I/O buffers is limited by the total disk bandwidth of a node. Therefore, this upper bound is independent of the number of concurrent scans and queries and helps to derive a tighter per-host memory estimate for queries with multiple concurrent scans.

Definition at line 749 of file HdfsScanNode.java.

References com.cloudera.impala.planner.HdfsScanNode.IO_MGR_BUFFER_SIZE, com.cloudera.impala.planner.HdfsScanNode.MAX_IO_BUFFERS_PER_THREAD, and com.cloudera.impala.planner.HdfsScanNode.THREADS_PER_CORE.

Referenced by com.cloudera.impala.planner.HdfsScanNode.computeCosts().

List<TScanRangeLocations> com.cloudera.impala.planner.ScanNode.getScanRangeLocations ( )
inlineinherited

Returns all scan ranges plus their locations.

Definition at line 54 of file ScanNode.java.

References com.cloudera.impala.planner.ScanNode.scanRanges_.

String com.cloudera.impala.planner.ScanNode.getStatsExplainString ( String  prefix,
TExplainLevel  detailLevel 
)
inlineprotectedinherited

Returns the explain string for table and columns stats to be included into the a ScanNode's explain string. The given prefix is prepended to each of the lines. The prefix is used for proper formatting when the string returned by this method is embedded in a query's explain plan.

Definition at line 75 of file ScanNode.java.

References com.cloudera.impala.planner.ScanNode.desc_, com.cloudera.impala.analysis.TupleDescriptor.getSlots(), com.cloudera.impala.analysis.TupleDescriptor.getTable(), and com.cloudera.impala.planner.ScanNode.numPartitionsMissingStats_.

Referenced by com.cloudera.impala.planner.DataSourceScanNode.getNodeExplainString(), com.cloudera.impala.planner.HBaseScanNode.getNodeExplainString(), and com.cloudera.impala.planner.HdfsScanNode.getNodeExplainString().

ArrayList<TupleId> com.cloudera.impala.planner.PlanNode.getTblRefIds ( )
inlineinherited
TupleDescriptor com.cloudera.impala.planner.ScanNode.getTupleDesc ( )
inlineinherited

Definition at line 49 of file ScanNode.java.

References com.cloudera.impala.planner.ScanNode.desc_.

boolean com.cloudera.impala.planner.PlanNode.hasValidStats ( )
inlineprotectedinherited
boolean com.cloudera.impala.planner.PlanNode.isBlockingNode ( )
inlineinherited

Returns true if this plan node can output its first row only after consuming all rows of all its children. This method is used to group plan nodes into pipelined units for resource estimation.

Definition at line 555 of file PlanNode.java.

Referenced by com.cloudera.impala.planner.PipelinedPlanNodeSet.computePlanNodeSets().

boolean com.cloudera.impala.planner.ScanNode.isTableMissingColumnStats ( )
inlineinherited
boolean com.cloudera.impala.planner.ScanNode.isTableMissingStats ( )
inlineinherited

Returns true if the table underlying this scan is missing table stats or column stats relevant to this scan node.

Definition at line 111 of file ScanNode.java.

References com.cloudera.impala.planner.ScanNode.isTableMissingColumnStats(), and com.cloudera.impala.planner.ScanNode.isTableMissingTableStats().

boolean com.cloudera.impala.planner.ScanNode.isTableMissingTableStats ( )
inlineinherited
void com.cloudera.impala.planner.PlanNode.markSlotsMaterialized ( Analyzer  analyzer,
List< Expr exprs 
)
inlineprotectedinherited

Marks all slots referenced in exprs as materialized.

Definition at line 464 of file PlanNode.java.

Referenced by com.cloudera.impala.planner.HdfsScanNode.init().

static long com.cloudera.impala.planner.PlanNode.multiplyCardinalities ( long  a,
long  b 
)
inlinestaticinherited

Computes and returns the product of two cardinalities. If an overflow occurs, the maximum Long value is returned (Long.MAX_VALUE).

Definition at line 541 of file PlanNode.java.

Referenced by com.cloudera.impala.planner.CrossJoinNode.computeStats(), and com.cloudera.impala.planner.HashJoinNode.getJoinCardinality().

void com.cloudera.impala.planner.PlanNode.setAssignedConjuncts ( Set< ExprId conjuncts)
inlineinherited
void com.cloudera.impala.planner.PlanNode.setDisplayName ( String  s)
inlineprotectedinherited

Definition at line 223 of file PlanNode.java.

References com.cloudera.impala.planner.PlanNode.displayName_.

void com.cloudera.impala.planner.PlanNode.setFragment ( PlanFragment  fragment)
inlineinherited

Definition at line 175 of file PlanNode.java.

References com.cloudera.impala.planner.PlanNode.fragment_.

void com.cloudera.impala.planner.PlanNode.setId ( PlanNodeId  id)
inlineinherited

Definition at line 165 of file PlanNode.java.

References com.cloudera.impala.planner.PlanNode.id_.

void com.cloudera.impala.planner.PlanNode.setLimit ( long  limit)
inlineinherited

Set the limit_ to the given limit_ only if the limit_ hasn't been set, or the new limit_ is lower.

Parameters
limit_

Definition at line 190 of file PlanNode.java.

References com.cloudera.impala.planner.PlanNode.limit_.

void com.cloudera.impala.planner.PlanNode.setOutputSmap ( ExprSubstitutionMap  smap)
inlineinherited

Definition at line 179 of file PlanNode.java.

References com.cloudera.impala.planner.PlanNode.outputSmap_.

void com.cloudera.impala.planner.PlanNode.setTblRefIds ( ArrayList< TupleId ids)
inlineinherited

Definition at line 202 of file PlanNode.java.

References com.cloudera.impala.planner.PlanNode.tblRefIds_.

void com.cloudera.impala.planner.HdfsScanNode.toThrift ( TPlanNode  msg)
inlineprotected
void com.cloudera.impala.planner.PlanNode.transferConjuncts ( PlanNode  recipient)
inlineinherited

Definition at line 214 of file PlanNode.java.

References com.cloudera.impala.planner.PlanNode.conjuncts_.

TPlan com.cloudera.impala.planner.PlanNode.treeToThrift ( )
inlineinherited
void com.cloudera.impala.planner.PlanNode.unsetLimit ( )
inlineinherited

Definition at line 194 of file PlanNode.java.

References com.cloudera.impala.planner.PlanNode.limit_.

Member Data Documentation

final int com.cloudera.impala.planner.PlanNode.DEFAULT_BATCH_SIZE = 1024
staticprotectedinherited

Definition at line 63 of file PlanNode.java.

PlanNodeId com.cloudera.impala.planner.PlanNode.id_
protectedinherited
final long com.cloudera.impala.planner.HdfsScanNode.IO_MGR_BUFFER_SIZE = 8L * 1024L * 1024L
staticprivate
final Logger com.cloudera.impala.planner.HdfsScanNode.LOG = LoggerFactory.getLogger(HdfsScanNode.class)
staticprivate

Definition at line 73 of file HdfsScanNode.java.

final long com.cloudera.impala.planner.HdfsScanNode.MAX_IO_BUFFERS_PER_THREAD = 10
staticprivate
Set<TupleId> com.cloudera.impala.planner.PlanNode.nullableTupleIds_ = Sets.newHashSet()
protectedinherited
int com.cloudera.impala.planner.ScanNode.numPartitionsMissingStats_ = 0
protectedinherited
final int com.cloudera.impala.planner.HdfsScanNode.PARTITION_PRUNING_BATCH_SIZE = 1024
staticprivate
final ArrayList<HdfsPartition> com.cloudera.impala.planner.HdfsScanNode.partitions_ = Lists.newArrayList()
private
final double com.cloudera.impala.planner.HdfsScanNode.SCAN_RANGE_SKEW_FACTOR = 1.2
staticprivate
ArrayList<TupleId> com.cloudera.impala.planner.PlanNode.tblRefIds_
protectedinherited
final int com.cloudera.impala.planner.HdfsScanNode.THREADS_PER_CORE = 3
staticprivate
long com.cloudera.impala.planner.HdfsScanNode.totalFiles_ = 0
private

The documentation for this class was generated from the following file: