doc/html/PlanFragment_8java_source.html

 // Copyright 2012 Cloudera Inc.

 //

 // Licensed under the Apache License, Version 2.0 (the "License");

 // you may not use this file except in compliance with the License.

 // You may obtain a copy of the License at

 //

 // http://www.apache.org/licenses/LICENSE-2.0

 //

 // Unless required by applicable law or agreed to in writing, software

 // distributed under the License is distributed on an "AS IS" BASIS,

 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 // See the License for the specific language governing permissions and

 // limitations under the License.


 package com.cloudera.impala.planner;


 import java.util.List;


 import org.slf4j.Logger;

 import org.slf4j.LoggerFactory;


 import com.cloudera.impala.analysis.Analyzer;

 import com.cloudera.impala.analysis.BinaryPredicate;

 import com.cloudera.impala.analysis.Expr;

 import com.cloudera.impala.analysis.JoinOperator;

 import com.cloudera.impala.analysis.SlotRef;

 import com.cloudera.impala.catalog.HdfsFileFormat;

 import com.cloudera.impala.catalog.HdfsTable;

 import com.cloudera.impala.common.AnalysisException;

 import com.cloudera.impala.common.InternalException;

 import com.cloudera.impala.common.NotImplementedException;

 import com.cloudera.impala.planner.HashJoinNode.DistributionMode;

 import com.cloudera.impala.thrift.TExplainLevel;

 import com.cloudera.impala.thrift.TPartitionType;

 import com.cloudera.impala.thrift.TPlanFragment;

 import com.google.common.base.Preconditions;

 import com.google.common.base.Predicates;

 import com.google.common.collect.Lists;


 public class PlanFragment {

   private final static Logger LOG = LoggerFactory.getLogger(PlanFragment.class);


   private final PlanFragmentId fragmentId_;


   // root of plan tree executed by this fragment

   private PlanNode planRoot_;


   // exchange node to which this fragment sends its output

   private ExchangeNode destNode_;


   // if null, outputs the entire row produced by planRoot_

   private List<Expr> outputExprs_;


   // created in finalize() or set in setSink()

   private DataSink sink_;


   // specification of the partition of the input of this fragment;

   // an UNPARTITIONED fragment is executed on only a single node

   // TODO: improve this comment, "input" is a bit misleading

   private DataPartition dataPartition_;


   // specification of how the output of this fragment is partitioned (i.e., how

   // it's sent to its destination);

   // if the output is UNPARTITIONED, it is being broadcast

   private DataPartition outputPartition_;


   public PlanFragment(PlanFragmentId id, PlanNode root, DataPartition partition) {

     fragmentId_ = id;

     planRoot_ = root;

     dataPartition_ = partition;

     outputPartition_ = DataPartition.UNPARTITIONED;

     setFragmentInPlanTree(planRoot_);

   }


   private void setFragmentInPlanTree(PlanNode node) {

     if (node == null) return;

     node.setFragment(this);

     if (!(node instanceof ExchangeNode)) {

       for (PlanNode child : node.getChildren()) {

         setFragmentInPlanTree(child);

       }

     }

   }


   public void setOutputExprs(List<Expr> outputExprs) {

     outputExprs_ = Expr.cloneList(outputExprs);

   }

   public List<Expr> getOutputExprs() { return outputExprs_; }


   public void finalize(Analyzer analyzer)

       throws InternalException, NotImplementedException {

     if (planRoot_ != null) computeCanAddSlotFilters(planRoot_);


     if (destNode_ != null) {

       Preconditions.checkState(sink_ == null);

       // we're streaming to an exchange node

       DataStreamSink streamSink = new DataStreamSink(destNode_, outputPartition_);

       streamSink.setFragment(this);

       sink_ = streamSink;

     }


     if (!dataPartition_.isHashPartitioned()) return;


     // This fragment is hash partitioned. Gather all exchange nodes and ensure

     // that all hash-partitioning senders hash on exprs-values of the same type.

     List<ExchangeNode> exchNodes = Lists.newArrayList();

     planRoot_.collect(Predicates.instanceOf(ExchangeNode.class), exchNodes);


     // Contains partition-expr lists of all hash-partitioning sender fragments.

     List<List<Expr>> senderPartitionExprs = Lists.newArrayList();

     for (ExchangeNode exchNode: exchNodes) {

       Preconditions.checkState(!exchNode.getChildren().isEmpty());

       PlanFragment senderFragment = exchNode.getChild(0).getFragment();

       Preconditions.checkNotNull(senderFragment);

       if (!senderFragment.getOutputPartition().isHashPartitioned()) continue;

       List<Expr> partExprs = senderFragment.getOutputPartition().getPartitionExprs();

       // All hash-partitioning senders must have compatible partition exprs, otherwise

       // this fragment's data partition must not be hash partitioned.

       Preconditions.checkState(

           partExprs.size() == dataPartition_.getPartitionExprs().size());

       senderPartitionExprs.add(partExprs);

     }


     // Cast all corresponding hash partition exprs of all hash-partitioning senders

     // to their compatible types. Also cast the data partition's exprs for consistency,

     // although not strictly necessary. They should already be type identical to the

     // exprs of one of the senders and they are not directly used for hashing in the BE.

     senderPartitionExprs.add(dataPartition_.getPartitionExprs());

     try {

       analyzer.castToUnionCompatibleTypes(senderPartitionExprs);

     } catch (AnalysisException e) {

       // Should never happen. Analysis should have ensured type compatibility already.

       throw new IllegalStateException(e);

     }

   }


   public int getNumNodes() {

     return dataPartition_ == DataPartition.UNPARTITIONED ? 1 : planRoot_.getNumNodes();

   }


   private boolean computeCanAddSlotFilters(PlanNode node) {

     if (node instanceof HashJoinNode) {

       HashJoinNode hashJoinNode = (HashJoinNode)node;

       boolean childResult = computeCanAddSlotFilters(node.getChild(0));

       if (!childResult) return false;

       if (hashJoinNode.getJoinOp().equals(JoinOperator.FULL_OUTER_JOIN) ||

           hashJoinNode.getJoinOp().equals(JoinOperator.LEFT_OUTER_JOIN) ||

           hashJoinNode.getJoinOp().equals(JoinOperator.LEFT_ANTI_JOIN) ||

           hashJoinNode.getJoinOp().equals(JoinOperator.NULL_AWARE_LEFT_ANTI_JOIN)) {

         // It is not correct to push through an outer or anti join on the probe side.

         // We cannot filter those rows out.

         return false;

       }

       // We can't push down predicates for partitioned joins yet.

       // TODO: this can be hugely helpful to avoid network traffic. Implement this.

       if (hashJoinNode.getDistributionMode() == DistributionMode.PARTITIONED) {

         return false;

       }


       List<BinaryPredicate> joinConjuncts = hashJoinNode.getEqJoinConjuncts();

       // We can only add these filters for conjuncts of the form:

       // <probe_slot> = *. If the hash join has any equal join conjuncts in this form,

       // mark the hash join node.

       for (Expr c: joinConjuncts) {

         if (c.getChild(0) instanceof SlotRef) {

           hashJoinNode.setAddProbeFilters(true);

           break;

         }

       }

       // Even if this join cannot add predicates, return true so the parent node can.

       return true;

     } else if (node instanceof HdfsScanNode) {

       // Since currently only the Parquet scanner employs the slot filter optimization,

       // we enable it only if the majority format is Parquet. Otherwise we are adding

       // the overhead of creating the SlotFilters in the build side in queries not on

       // Parquet data.

       // TODO: Modify the other scanners to exploit the slot filter optimization.

       HdfsScanNode scanNode = (HdfsScanNode) node;

       Preconditions.checkNotNull(scanNode.desc_);

       Preconditions.checkNotNull(scanNode.desc_.getTable() instanceof HdfsTable);

       HdfsTable table = (HdfsTable) scanNode.desc_.getTable();

       if (table.getMajorityFormat() == HdfsFileFormat.PARQUET) {

         return true;

       } else {

         return false;

       }

     } else {

       for (PlanNode child : node.getChildren()) {

         computeCanAddSlotFilters(child);

       }

       return false;

     }

   }


   public long getNumDistinctValues(List<Expr> exprs) {

     Preconditions.checkNotNull(dataPartition_);

     long result = 1;

     int numNodes = getNumNodes();

     Preconditions.checkState(numNodes >= 0);

     // The number of nodes is zero for empty tables.

     if (numNodes == 0) return 0;

     for (Expr expr: exprs) {

       long numDistinct = expr.getNumDistinctValues();

       if (numDistinct == -1) {

         result = -1;

         break;

       }

       if (dataPartition_.getPartitionExprs().contains(expr)) {

         numDistinct = (long)Math.max((double) numDistinct / (double) numNodes, 1L);

       }

       result = PlanNode.multiplyCardinalities(result, numDistinct);

     }

     return result;

   }


   public TPlanFragment toThrift() {

     TPlanFragment result = new TPlanFragment();

     result.setDisplay_name(fragmentId_.toString());

     if (planRoot_ != null) result.setPlan(planRoot_.treeToThrift());

     if (outputExprs_ != null) {

       result.setOutput_exprs(Expr.treesToThrift(outputExprs_));

     }

     if (sink_ != null) result.setOutput_sink(sink_.toThrift());

     result.setPartition(dataPartition_.toThrift());

     return result;

   }


   public String getExplainString(TExplainLevel explainLevel) {

     StringBuilder str = new StringBuilder();

     Preconditions.checkState(dataPartition_ != null);

     String rootPrefix = "";

     String prefix = "";

     String detailPrefix = "|  ";

     if (explainLevel == TExplainLevel.VERBOSE) {

       prefix = "  ";

       rootPrefix = "  ";

       detailPrefix = prefix + "|  ";

       str.append(String.format("%s:PLAN FRAGMENT [%s]\n", fragmentId_.toString(),

           dataPartition_.getExplainString()));

       if (sink_ != null && sink_ instanceof DataStreamSink) {

         str.append(sink_.getExplainString(prefix, detailPrefix, explainLevel) + "\n");

       }

     }

     // Always print table sinks.

     if (sink_ != null && sink_ instanceof TableSink) {

       str.append(sink_.getExplainString(prefix, detailPrefix, explainLevel));

       if (explainLevel.ordinal() >= TExplainLevel.STANDARD.ordinal()) {

         str.append(prefix + "|\n");

       }

     }

     if (planRoot_ != null) {

       str.append(planRoot_.getExplainString(rootPrefix, prefix, explainLevel));

     }

     return str.toString();

   }


   public boolean isPartitioned() {

     return (dataPartition_.getType() != TPartitionType.UNPARTITIONED);

   }


   public PlanFragmentId getId() { return fragmentId_; }

   public PlanFragment getDestFragment() {

     if (destNode_ == null) return null;

     return destNode_.getFragment();

   }

   public ExchangeNode getDestNode() { return destNode_; }

   public DataPartition getDataPartition() { return dataPartition_; }

   public void setDataPartition(DataPartition dataPartition) {

     this.dataPartition_ = dataPartition;

   }

   public DataPartition getOutputPartition() { return outputPartition_; }

   public void setOutputPartition(DataPartition outputPartition) {

     this.outputPartition_ = outputPartition;

   }

   public PlanNode getPlanRoot() { return planRoot_; }

   public void setPlanRoot(PlanNode root) {

     planRoot_ = root;

     setFragmentInPlanTree(planRoot_);

   }


   public void setDestination(ExchangeNode destNode) { destNode_ = destNode; }

   public boolean hasSink() { return sink_ != null; }

   public DataSink getSink() { return sink_; }

   public void setSink(DataSink sink) {

     Preconditions.checkState(this.sink_ == null);

     Preconditions.checkNotNull(sink);

     sink.setFragment(this);

     this.sink_ = sink;

   }


   public void addPlanRoot(PlanNode newRoot) {

     Preconditions.checkState(newRoot.getChildren().size() == 1);

     newRoot.setChild(0, planRoot_);

     planRoot_ = newRoot;

     planRoot_.setFragment(this);

   }

 }

com.cloudera.impala.planner.PlanFragment.LOG
static final Logger LOG
Definition: PlanFragment.java:64

com.cloudera.impala.planner.DataPartition.getExplainString
String getExplainString()
Definition: DataPartition.java:102

com.cloudera.impala.planner.HashJoinNode
Definition: HashJoinNode.java:50

com.cloudera.impala.planner.DataPartition.getPartitionExprs
List< Expr > getPartitionExprs()
Definition: DataPartition.java:72

com.cloudera.impala.analysis.BinaryPredicate
Definition: BinaryPredicate.java:42

com.cloudera.impala.planner.PlanFragment
Definition: PlanFragment.java:63

com.cloudera.impala.catalog.HdfsFileFormat.PARQUET
Definition: HdfsFileFormat.java:31

com.cloudera.impala.planner.TableSink
Definition: TableSink.java:23

com.cloudera.impala.planner.PlanFragment.getOutputPartition
DataPartition getOutputPartition()
Definition: PlanFragment.java:331

com.cloudera.impala.planner.PlanFragment.getNumDistinctValues
long getNumDistinctValues(List< Expr > exprs)
Definition: PlanFragment.java:254

com.cloudera.impala.planner.PlanFragment.getSink
DataSink getSink()
Definition: PlanFragment.java:343

com.cloudera.impala.planner.DataPartition.UNPARTITIONED
static final DataPartition UNPARTITIONED
Definition: DataPartition.java:63

com.cloudera.impala.planner.DataPartition.getType
TPartitionType getType()
Definition: DataPartition.java:71

com.cloudera.impala.analysis.JoinOperator.LEFT_OUTER_JOIN
LEFT_OUTER_JOIN
Definition: JoinOperator.java:21

com.cloudera.impala.common.NotImplementedException
Definition: NotImplementedException.java:21

com.cloudera.impala.planner.PlanFragment.setFragmentInPlanTree
void setFragmentInPlanTree(PlanNode node)
Definition: PlanFragment.java:106

com.cloudera.impala.planner.PlanFragmentId
Definition: PlanFragmentId.java:20

com.cloudera.impala.catalog.HdfsTable.getMajorityFormat
HdfsFileFormat getMajorityFormat()
Definition: HdfsTable.java:1333

com.cloudera.impala.planner.PlanNode
Definition: PlanNode.java:59

com.cloudera.impala.planner.PlanFragment.setDestination
void setDestination(ExchangeNode destNode)
Definition: PlanFragment.java:341

com.cloudera.impala.planner.PlanFragment.isPartitioned
boolean isPartitioned()
Definition: PlanFragment.java:317

com.cloudera.impala.planner.PlanFragment.outputExprs_
List< Expr > outputExprs_
Definition: PlanFragment.java:75

com.cloudera.impala.planner.PlanFragment.setOutputPartition
void setOutputPartition(DataPartition outputPartition)
Definition: PlanFragment.java:332

com.cloudera.impala.planner.DataStreamSink
Definition: DataStreamSink.java:26

com.cloudera.impala.common.AnalysisException
Definition: AnalysisException.java:21

com.cloudera.impala.analysis.SlotRef
Definition: SlotRef.java:35

com.cloudera.impala.planner.HashJoinNode.DistributionMode
Definition: HashJoinNode.java:62

com.cloudera.impala.analysis.JoinOperator
Definition: JoinOperator.java:19

com.cloudera.impala.planner.PlanFragment.setDataPartition
void setDataPartition(DataPartition dataPartition)
Definition: PlanFragment.java:328

com.cloudera.impala.planner.PlanFragment.setPlanRoot
void setPlanRoot(PlanNode root)
Definition: PlanFragment.java:336

com.cloudera.impala.analysis.JoinOperator.FULL_OUTER_JOIN
FULL_OUTER_JOIN
Definition: JoinOperator.java:27

com.cloudera.impala.planner.PlanFragment.getExplainString
String getExplainString(TExplainLevel explainLevel)
Definition: PlanFragment.java:287

com.cloudera.impala.analysis.JoinOperator.LEFT_ANTI_JOIN
LEFT_ANTI_JOIN
Definition: JoinOperator.java:23

com.cloudera.impala.analysis.Expr
Definition: Expr.java:48

com.cloudera.impala.planner.ScanNode.desc_
final TupleDescriptor desc_
Definition: ScanNode.java:33

com.cloudera.impala.planner.PlanFragment.toThrift
TPlanFragment toThrift()
Definition: PlanFragment.java:275

com.cloudera.impala.planner.PlanFragment.PlanFragment
PlanFragment(PlanFragmentId id, PlanNode root, DataPartition partition)
Definition: PlanFragment.java:93

com.cloudera.impala.planner.PlanFragment.getDestNode
ExchangeNode getDestNode()
Definition: PlanFragment.java:326

com.cloudera.impala.planner.PlanFragment.computeCanAddSlotFilters
boolean computeCanAddSlotFilters(PlanNode node)
Definition: PlanFragment.java:195

com.cloudera.impala.planner.PlanFragment.planRoot_
PlanNode planRoot_
Definition: PlanFragment.java:69

com.cloudera.impala.planner.PlanFragment.getDestFragment
PlanFragment getDestFragment()
Definition: PlanFragment.java:322

com.cloudera.impala.planner.HdfsScanNode
Definition: HdfsScanNode.java:72

com.cloudera.impala.common.InternalException
Definition: InternalException.java:21

com.cloudera.impala.catalog.HdfsTable
Definition: HdfsTable.java:104

com.cloudera.impala.planner.PlanFragment.getPlanRoot
PlanNode getPlanRoot()
Definition: PlanFragment.java:335

com.cloudera.impala.analysis.Analyzer
Definition: Analyzer.java:105

com.cloudera.impala.planner.PlanFragment.dataPartition_
DataPartition dataPartition_
Definition: PlanFragment.java:83

com.cloudera.impala.planner.PlanFragment.destNode_
ExchangeNode destNode_
Definition: PlanFragment.java:72

com.cloudera.impala.planner.DataSink
Definition: DataSink.java:34

com.cloudera.impala.analysis.JoinOperator.NULL_AWARE_LEFT_ANTI_JOIN
NULL_AWARE_LEFT_ANTI_JOIN
Definition: JoinOperator.java:32

com.cloudera.impala.planner.PlanFragment.getOutputExprs
List< Expr > getOutputExprs()
Definition: PlanFragment.java:119

com.cloudera.impala.planner.PlanFragment.getDataPartition
DataPartition getDataPartition()
Definition: PlanFragment.java:327

com.cloudera.impala.planner.PlanFragment.hasSink
boolean hasSink()
Definition: PlanFragment.java:342

com.cloudera.impala.catalog.HdfsFileFormat
Definition: HdfsFileFormat.java:26

com.cloudera.impala.planner.ExchangeNode
Definition: ExchangeNode.java:45

com.cloudera.impala.planner.PlanFragment.fragmentId_
final PlanFragmentId fragmentId_
Definition: PlanFragment.java:66

com.cloudera.impala.planner.HashJoinNode.DistributionMode.PARTITIONED
PARTITIONED
Definition: HashJoinNode.java:65

com.cloudera.impala.planner.PlanFragment.setSink
void setSink(DataSink sink)
Definition: PlanFragment.java:344

com.cloudera.impala.planner.PlanFragment.addPlanRoot
void addPlanRoot(PlanNode newRoot)
Definition: PlanFragment.java:355

com.cloudera.impala.planner.HashJoinNode.getJoinOp
JoinOperator getJoinOp()
Definition: HashJoinNode.java:142

com.cloudera.impala.planner.PlanFragment.getId
PlanFragmentId getId()
Definition: PlanFragment.java:321

com.cloudera.impala.planner.PlanFragment.sink_
DataSink sink_
Definition: PlanFragment.java:78

com.cloudera.impala.planner.DataPartition
Definition: DataPartition.java:39

com.cloudera.impala.planner.PlanFragment.outputPartition_
DataPartition outputPartition_
Definition: PlanFragment.java:88

com.cloudera.impala.planner.DataPartition.isHashPartitioned
boolean isHashPartitioned()
Definition: DataPartition.java:70

com.cloudera.impala.planner.PlanFragment.finalize
void finalize(Analyzer analyzer)
Definition: PlanFragment.java:128

com.cloudera.impala.planner.PlanFragment.setOutputExprs
void setOutputExprs(List< Expr > outputExprs)
Definition: PlanFragment.java:116

com.cloudera.impala.planner.PlanFragment.getNumNodes
int getNumNodes()
Definition: PlanFragment.java:179

com.cloudera.impala.analysis.TupleDescriptor.getTable
Table getTable()
Definition: TupleDescriptor.java:97

com.cloudera.impala.planner.HashJoinNode.getDistributionMode
DistributionMode getDistributionMode()
Definition: HashJoinNode.java:144