15 package com.cloudera.impala.planner;
17 import java.util.List;
19 import org.slf4j.Logger;
20 import org.slf4j.LoggerFactory;
35 import com.cloudera.impala.thrift.TEqJoinCondition;
36 import com.cloudera.impala.thrift.TExplainLevel;
37 import com.cloudera.impala.thrift.THashJoinNode;
38 import com.cloudera.impala.thrift.TPlanNode;
39 import com.cloudera.impala.thrift.TPlanNodeType;
40 import com.cloudera.impala.thrift.TQueryOptions;
41 import com.google.common.base.Objects;
42 import com.google.common.base.Preconditions;
43 import com.google.common.collect.Lists;
51 private final static Logger
LOG = LoggerFactory.getLogger(HashJoinNode.class);
70 this.description = descr;
74 public String
toString() {
return description; }
92 List<BinaryPredicate> eqJoinConjuncts, List<Expr> otherJoinConjuncts) {
94 Preconditions.checkArgument(eqJoinConjuncts != null);
95 Preconditions.checkArgument(otherJoinConjuncts != null);
103 case NULL_AWARE_LEFT_ANTI_JOIN: {
104 tupleIds_.addAll(outer.getTupleIds());
107 case RIGHT_ANTI_JOIN:
108 case RIGHT_SEMI_JOIN: {
109 tupleIds_.addAll(inner.getTupleIds());
113 tupleIds_.addAll(outer.getTupleIds());
114 tupleIds_.addAll(inner.getTupleIds());
118 tblRefIds_.addAll(outer.getTblRefIds());
119 tblRefIds_.addAll(inner.getTblRefIds());
124 children_.add(outer);
125 children_.add(inner);
129 nullableTupleIds_.addAll(inner.getNullableTupleIds());
130 nullableTupleIds_.addAll(outer.getNullableTupleIds());
132 nullableTupleIds_.addAll(outer.getTupleIds());
133 nullableTupleIds_.addAll(inner.getTupleIds());
135 nullableTupleIds_.addAll(inner.getTupleIds());
137 nullableTupleIds_.addAll(outer.getTupleIds());
162 List<BinaryPredicate> newEqJoinConjuncts = Lists.newArrayList();
166 Type t0 = eqPred.getChild(0).getType();
167 Type t1 = eqPred.getChild(1).getType();
172 boolean bothDecimal = t0.isDecimal() && t1.
isDecimal();
173 boolean bothString = t0.isStringType() && t1.
isStringType();
174 if (!bothDecimal && !bothString) {
176 t0.
toSql() +
" to " + t1.toSql() +
" in join predicate.");
178 Type compatibleType = Type.getAssignmentCompatibleType(t0, t1);
179 Preconditions.checkState(compatibleType.isDecimal() ||
182 if (!t0.equals(compatibleType)) {
183 eqPred.setChild(0, eqPred.getChild(0).castTo(compatibleType));
185 if (!t1.equals(compatibleType)) {
186 eqPred.setChild(1, eqPred.getChild(1).castTo(compatibleType));
192 Preconditions.checkState(
193 eqPred.getChild(0).getType().matchesType(eqPred.getChild(1).getType()));
195 eqPred.getChild(0), eqPred.getChild(1)));
197 eqJoinConjuncts_ = newEqJoinConjuncts;
223 Preconditions.checkState(
224 joinOp_ == JoinOperator.INNER_JOIN || joinOp_.isOuterJoin());
225 long maxNumDistinct = 0;
227 if (eqJoinPredicate.getChild(0).unwrapSlotRef(
false) == null)
continue;
228 SlotRef rhsSlotRef = eqJoinPredicate.getChild(1).unwrapSlotRef(
false);
229 if (rhsSlotRef == null)
continue;
231 if (slotDesc == null)
continue;
234 long numDistinct = stats.getNumDistinctValues();
235 Table rhsTbl = slotDesc.getParent().getTable();
236 if (rhsTbl != null && rhsTbl.getNumRows() != -1) {
239 LOG.debug(
"#distinct=" + numDistinct +
" #rows="
240 + Long.toString(rhsTbl.getNumRows()));
241 numDistinct = Math.min(numDistinct, rhsTbl.getNumRows());
243 if (getChild(1).
cardinality_ != -1 && numDistinct != -1) {
246 numDistinct = Math.min(numDistinct, getChild(1).cardinality_);
248 maxNumDistinct = Math.max(maxNumDistinct, numDistinct);
249 LOG.debug(
"min slotref=" + rhsSlotRef.toSql() +
" #distinct="
250 + Long.toString(numDistinct));
254 if (maxNumDistinct == 0) {
258 result = getChild(0).cardinality_;
262 result = Math.round((double)result / (
double) maxNumDistinct);
295 Preconditions.checkState(joinOp_.isSemiJoin());
302 cardinality = getChild(1).cardinality_;
305 cardinality = getChild(0).cardinality_;
307 double minSelectivity = 1.0;
309 long lhsNdv =
getNdv(eqJoinPredicate.getChild(0));
310 lhsNdv = Math.min(lhsNdv, getChild(0).cardinality_);
311 long rhsNdv =
getNdv(eqJoinPredicate.getChild(1));
312 rhsNdv = Math.min(rhsNdv, getChild(1).cardinality_);
315 if (lhsNdv == -1 || rhsNdv == -1)
continue;
317 double selectivity = 1.0;
319 case LEFT_SEMI_JOIN: {
320 selectivity = (double) Math.min(lhsNdv, rhsNdv) / (double) (lhsNdv);
323 case RIGHT_SEMI_JOIN: {
324 selectivity = (double) Math.min(lhsNdv, rhsNdv) / (double) (rhsNdv);
328 case NULL_AWARE_LEFT_ANTI_JOIN: {
329 selectivity = (double) Math.max(lhsNdv - rhsNdv, lhsNdv) / (double) lhsNdv;
332 case RIGHT_ANTI_JOIN: {
333 selectivity = (double) Math.max(rhsNdv - lhsNdv, rhsNdv) / (double) rhsNdv;
336 default: Preconditions.checkState(
false);
338 minSelectivity = Math.min(minSelectivity, selectivity);
341 Preconditions.checkState(cardinality != -1);
342 return Math.round(cardinality * minSelectivity);
350 SlotRef slotRef = expr.unwrapSlotRef(
false);
351 if (slotRef == null)
return -1;
353 if (slotDesc == null)
return -1;
356 return stats.getNumDistinctValues();
361 super.computeStats(analyzer);
369 long leftCard = getChild(0).cardinality_;
370 long rightCard = getChild(1).cardinality_;
372 case LEFT_SEMI_JOIN: {
373 if (leftCard != -1) {
378 case RIGHT_SEMI_JOIN: {
379 if (rightCard != -1) {
384 case LEFT_OUTER_JOIN: {
385 if (leftCard != -1) {
390 case RIGHT_OUTER_JOIN: {
391 if (rightCard != -1) {
396 case FULL_OUTER_JOIN: {
397 if (leftCard != -1 && rightCard != -1) {
404 case NULL_AWARE_LEFT_ANTI_JOIN: {
405 if (leftCard != -1) {
410 case RIGHT_ANTI_JOIN: {
411 if (rightCard != -1) {
419 LOG.debug(
"stats HashJoin: cardinality=" + Long.toString(
cardinality_));
424 return Objects.toStringHelper(
this)
426 .addValue(super.debugString())
431 Objects.ToStringHelper helper = Objects.toStringHelper(
this);
433 helper.add(
"lhs" , entry.getChild(0)).add(
"rhs", entry.getChild(1));
435 return helper.toString();
440 msg.node_type = TPlanNodeType.HASH_JOIN_NODE;
441 msg.hash_join_node =
new THashJoinNode();
442 msg.hash_join_node.join_op = joinOp_.toThrift();
444 TEqJoinCondition eqJoinCondition =
445 new TEqJoinCondition(entry.getChild(0).treeToThrift(),
446 entry.getChild(1).treeToThrift());
447 msg.hash_join_node.addToEq_join_conjuncts(eqJoinCondition);
450 msg.hash_join_node.addToOther_join_conjuncts(e.treeToThrift());
459 return output.toString();
464 TExplainLevel detailLevel) {
465 StringBuilder output =
new StringBuilder();
469 if (detailLevel.ordinal() > TExplainLevel.MINIMAL.ordinal()) {
470 output.append(detailPrefix +
"hash predicates: ");
471 for (
int i = 0; i < eqJoinConjuncts_.size(); ++i) {
472 Expr eqConjunct = eqJoinConjuncts_.get(i);
473 output.append(eqConjunct.toSql());
478 output.append(detailPrefix +
"other join predicates: ")
482 output.append(detailPrefix +
"other predicates: ")
486 return output.toString();
String getDisplayLabelDetail()
DistributionMode(String descr)
void computeStats(Analyzer analyzer)
void assignConjuncts(Analyzer analyzer)
ExprSubstitutionMap getCombinedChildSmap()
long getJoinCardinality(Analyzer analyzer)
void init(Analyzer analyzer)
String getExplainString()
final JoinOperator joinOp_
long getSemiJoinCardinality()
boolean matchesType(Type t)
DistributionMode distrMode_
String getNodeExplainString(String prefix, String detailPrefix, TExplainLevel detailLevel)
String eqJoinConjunctsDebugString()
void setDistributionMode(DistributionMode distrMode)
List< Expr > otherJoinConjuncts_
HashJoinNode(PlanNode outer, PlanNode inner, TableRef tblRef, List< BinaryPredicate > eqJoinConjuncts, List< Expr > otherJoinConjuncts)
static long multiplyCardinalities(long a, long b)
void computeCosts(TQueryOptions queryOptions)
static final long DEFAULT_PER_HOST_MEM
List< BinaryPredicate > eqJoinConjuncts_
Expr substitute(ExprSubstitutionMap smap, Analyzer analyzer, boolean preserveRootType)
void setAddProbeFilters(boolean b)
List< BinaryPredicate > getEqJoinConjuncts()
void createDefaultSmap(Analyzer analyzer)
static final double HASH_TBL_SPACE_OVERHEAD
boolean hasNumDistinctValues()
void toThrift(TPlanNode msg)
final String getDisplayLabel()
Set< ExprId > assignedConjuncts_
static long addCardinalities(long a, long b)
DistributionMode getDistributionMode()