Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
TupleDescriptor.java
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 package com.cloudera.impala.analysis;
16 
17 import java.util.ArrayList;
18 import java.util.Collections;
19 import java.util.HashMap;
20 import java.util.List;
21 import java.util.Map;
22 
27 import com.cloudera.impala.thrift.TTupleDescriptor;
28 import com.google.common.base.Joiner;
29 import com.google.common.base.Objects;
30 import com.google.common.base.Preconditions;
31 import com.google.common.collect.Lists;
32 
54 public class TupleDescriptor {
55  private final TupleId id_;
56  private final String debugName_; // debug-only
57  private final ArrayList<SlotDescriptor> slots_ = Lists.newArrayList();
58 
59  // Resolved path to the collection corresponding to this tuple descriptor, if any,
60  // Only set for materialized tuples.
61  private Path path_;
62 
63  // Type of this tuple descriptor. Used for slot/table resolution in analysis.
64  private StructType type_;
65 
66  // All legal aliases of this tuple.
67  private String[] aliases_;
68 
69  // If true, requires that aliases_.length() == 1. However, aliases_.length() == 1
70  // does not imply an explicit alias because nested collection refs have only a
71  // single implicit alias.
72  private boolean hasExplicitAlias_;
73 
74  // If false, this tuple doesn't need to be materialized.
75  private boolean isMaterialized_ = true;
76 
77  // If true, computeMemLayout() has been called and we can't add any additional slots.
78  private boolean hasMemLayout_ = false;
79 
80  private int byteSize_; // of all slots plus null indicators
81  private int numNullBytes_;
82  private float avgSerializedSize_; // in bytes; includes serialization overhead
83 
84  public TupleDescriptor(TupleId id, String debugName) {
85  id_ = id;
86  path_ = null;
87  debugName_ = debugName;
88  }
89 
90  public void addSlot(SlotDescriptor desc) {
91  Preconditions.checkState(!hasMemLayout_);
92  slots_.add(desc);
93  }
94 
95  public TupleId getId() { return id_; }
96  public ArrayList<SlotDescriptor> getSlots() { return slots_; }
97  public Table getTable() {
98  if (path_ == null) return null;
99  return path_.getRootTable();
100  }
102  Table t = getTable();
103  return (t == null) ? null : t.getTableName();
104  }
105  public void setPath(Path p) {
106  Preconditions.checkNotNull(p);
107  Preconditions.checkState(p.isResolved());
108  Preconditions.checkState(p.destType().isCollectionType());
109  path_ = p;
110  if (p.destTable() != null) {
111  // Do not use Path.getTypeAsStruct() to only allow implicit path resolutions,
112  // because this tuple desc belongs to a base table ref.
113  type_ = (StructType) p.destTable().getType().getItemType();
114  } else {
115  // Also allow explicit path resolutions.
116  type_ = Path.getTypeAsStruct(p.destType());
117  }
118  }
119  public Path getPath() { return path_; }
120  public void setType(StructType type) { type_ = type; }
121  public StructType getType() { return type_; }
122  public int getByteSize() { return byteSize_; }
123  public float getAvgSerializedSize() { return avgSerializedSize_; }
124  public boolean isMaterialized() { return isMaterialized_; }
125  public void setIsMaterialized(boolean value) { isMaterialized_ = value; }
126  public void setAliases(String[] aliases, boolean hasExplicitAlias) {
127  aliases_ = aliases;
129  }
130  public boolean hasExplicitAlias() { return hasExplicitAlias_; }
131  public String getAlias() { return (aliases_ != null) ? aliases_[0] : null; }
133  return (aliases_ != null) ? new TableName(null, aliases_[0]) : null;
134  }
135 
136  public String debugString() {
137  String tblStr = (getTable() == null ? "null" : getTable().getFullName());
138  List<String> slotStrings = Lists.newArrayList();
139  for (SlotDescriptor slot : slots_) {
140  slotStrings.add(slot.debugString());
141  }
142  return Objects.toStringHelper(this)
143  .add("id", id_.asInt())
144  .add("name", debugName_)
145  .add("tbl", tblStr)
146  .add("byte_size", byteSize_)
147  .add("is_materialized", isMaterialized_)
148  .add("slots", "[" + Joiner.on(", ").join(slotStrings) + "]")
149  .toString();
150  }
151 
155  public void materializeSlots() {
156  for (SlotDescriptor slot: slots_) {
157  slot.setIsMaterialized(true);
158  }
159  }
160 
161  public TTupleDescriptor toThrift() {
162  TTupleDescriptor ttupleDesc =
163  new TTupleDescriptor(id_.asInt(), byteSize_, numNullBytes_);
164  // do not set the table id or tuple path for views
165  if (getTable() != null && !(getTable() instanceof View)) {
166  ttupleDesc.setTableId(getTable().getId().asInt());
167  Preconditions.checkNotNull(path_);
168  ttupleDesc.setTuplePath(path_.getAbsolutePath());
169  }
170  return ttupleDesc;
171  }
172 
173  public void computeMemLayout() {
174  if (hasMemLayout_) return;
175  hasMemLayout_ = true;
176 
177  // sort slots by size
178  Map<Integer, List<SlotDescriptor>> slotsBySize =
179  new HashMap<Integer, List<SlotDescriptor>>();
180 
181  // populate slotsBySize; also compute avgSerializedSize
182  int numNullableSlots = 0;
183  for (SlotDescriptor d: slots_) {
184  if (!d.isMaterialized()) continue;
185  ColumnStats stats = d.getStats();
186  if (stats.hasAvgSerializedSize()) {
187  avgSerializedSize_ += d.getStats().getAvgSerializedSize();
188  } else {
189  // TODO: for computed slots, try to come up with stats estimates
190  avgSerializedSize_ += d.getType().getSlotSize();
191  }
192  if (!slotsBySize.containsKey(d.getType().getSlotSize())) {
193  slotsBySize.put(d.getType().getSlotSize(), new ArrayList<SlotDescriptor>());
194  }
195  slotsBySize.get(d.getType().getSlotSize()).add(d);
196  if (d.getIsNullable()) ++numNullableSlots;
197  }
198  // we shouldn't have anything of size <= 0
199  Preconditions.checkState(!slotsBySize.containsKey(0));
200  Preconditions.checkState(!slotsBySize.containsKey(-1));
201 
202  // assign offsets to slots in order of ascending size
203  numNullBytes_ = (numNullableSlots + 7) / 8;
204  int offset = numNullBytes_;
205  int nullIndicatorByte = 0;
206  int nullIndicatorBit = 0;
207  // slotIdx is the index into the resulting tuple struct. The first (smallest) field
208  // is 0, next is 1, etc.
209  int slotIdx = 0;
210  List<Integer> sortedSizes = new ArrayList<Integer>(slotsBySize.keySet());
211  Collections.sort(sortedSizes);
212  for (int slotSize: sortedSizes) {
213  if (slotsBySize.get(slotSize).isEmpty()) continue;
214  if (slotSize > 1) {
215  // insert padding
216  int alignTo = Math.min(slotSize, 8);
217  offset = (offset + alignTo - 1) / alignTo * alignTo;
218  }
219 
220  for (SlotDescriptor d: slotsBySize.get(slotSize)) {
221  Preconditions.checkState(d.isMaterialized());
222  d.setByteSize(slotSize);
223  d.setByteOffset(offset);
224  d.setSlotIdx(slotIdx++);
225  offset += slotSize;
226 
227  // assign null indicator
228  if (d.getIsNullable()) {
229  d.setNullIndicatorByte(nullIndicatorByte);
230  d.setNullIndicatorBit(nullIndicatorBit);
231  nullIndicatorBit = (nullIndicatorBit + 1) % 8;
232  if (nullIndicatorBit == 0) ++nullIndicatorByte;
233  } else {
234  // Non-nullable slots will have 0 for the byte offset and -1 for the bit mask
235  d.setNullIndicatorBit(-1);
236  d.setNullIndicatorByte(0);
237  }
238  }
239  }
240 
241  this.byteSize_ = offset;
242  }
243 
248  public boolean isCompatible(TupleDescriptor desc) {
249  if (slots_.size() != desc.slots_.size()) return false;
250  for (int i = 0; i < slots_.size(); ++i) {
251  if (!slots_.get(i).getType().equals(desc.slots_.get(i).getType())) return false;
252  }
253  return true;
254  }
255 }
TupleDescriptor(TupleId id, String debugName)
final ArrayList< SlotDescriptor > slots_
boolean isCompatible(TupleDescriptor desc)
void setAliases(String[] aliases, boolean hasExplicitAlias)
uint8_t offset[7 *64-sizeof(uint64_t)]