Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
Table.java
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 package com.cloudera.impala.catalog;
16 
17 import java.util.ArrayList;
18 import java.util.EnumSet;
19 import java.util.List;
20 import java.util.Map;
21 import java.util.Set;
22 
23 import org.apache.hadoop.hive.common.StatsSetupConst;
24 import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
25 import org.apache.hadoop.hive.metastore.TableType;
26 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
27 import org.apache.hadoop.hive.metastore.api.FieldSchema;
28 import org.apache.log4j.Logger;
29 
31 import com.cloudera.impala.thrift.TAccessLevel;
32 import com.cloudera.impala.thrift.TCatalogObject;
33 import com.cloudera.impala.thrift.TCatalogObjectType;
34 import com.cloudera.impala.thrift.TColumn;
35 import com.cloudera.impala.thrift.TTable;
36 import com.cloudera.impala.thrift.TTableDescriptor;
37 import com.cloudera.impala.thrift.TTableStats;
38 import com.google.common.base.Preconditions;
39 import com.google.common.collect.Lists;
40 import com.google.common.collect.Maps;
41 
51 public abstract class Table implements CatalogObject {
52  private static final Logger LOG = Logger.getLogger(Table.class);
53 
54  // Lock used to serialize calls to the Hive MetaStore to work around MetaStore
55  // concurrency bugs. Currently used to serialize calls to "getTable()" due to HIVE-5457.
56  private static final Object metastoreAccessLock_ = new Object();
58  protected final org.apache.hadoop.hive.metastore.api.Table msTable_;
59 
60  protected final TableId id_;
61  protected final Db db_;
62  protected final String name_;
63  protected final String owner_;
64  protected TTableDescriptor tableDesc_;
65  protected List<FieldSchema> fields_;
66  protected TAccessLevel accessLevel_ = TAccessLevel.READ_WRITE;
67 
68  // Number of clustering columns.
69  protected int numClusteringCols_;
70 
71  // estimated number of rows in table; -1: unknown.
72  protected long numRows_ = -1;
73 
74  // colsByPos[i] refers to the ith column in the table. The first numClusteringCols are
75  // the clustering columns.
76  private final ArrayList<Column> colsByPos_ = Lists.newArrayList();
77 
78  // map from lowercase column name to Column object.
79  private final Map<String, Column> colsByName_ = Maps.newHashMap();
80 
81  // Type of this table (array of struct) that mirrors the columns. Useful for analysis.
82  protected final ArrayType type_ = new ArrayType(new StructType());
83 
84  // The lastDdlTime for this table; -1 if not set
85  protected long lastDdlTime_;
86 
87  // Set of supported table types.
88  protected static EnumSet<TableType> SUPPORTED_TABLE_TYPES = EnumSet.of(
89  TableType.EXTERNAL_TABLE, TableType.MANAGED_TABLE, TableType.VIRTUAL_VIEW);
90 
91  protected Table(TableId id, org.apache.hadoop.hive.metastore.api.Table msTable, Db db,
92  String name, String owner) {
93  id_ = id;
94  msTable_ = msTable;
95  db_ = db;
96  name_ = name.toLowerCase();
97  owner_ = owner;
98  lastDdlTime_ = (msTable_ != null) ?
100  }
101 
102  //number of nodes that contain data for this table; -1: unknown
103  public abstract int getNumNodes();
104  public abstract TTableDescriptor toThriftDescriptor(Set<Long> referencedPartitions);
105  public abstract TCatalogObjectType getCatalogObjectType();
106 
111  public abstract void load(Table oldValue, HiveMetaStoreClient client,
112  org.apache.hadoop.hive.metastore.api.Table msTbl) throws TableLoadingException;
113 
114  public void addColumn(Column col) {
115  colsByPos_.add(col);
116  colsByName_.put(col.getName().toLowerCase(), col);
117  ((StructType) type_.getItemType()).addField(
118  new StructField(col.getName(), col.getType(), col.getComment()));
119  }
120 
121  public void clearColumns() {
122  colsByPos_.clear();
123  colsByName_.clear();
124  ((StructType) type_.getItemType()).clearFields();
125  }
126 
132  public void updateLastDdlTime(long ddlTime) {
133  // Ensure the lastDdlTime never goes backwards.
134  if (ddlTime > lastDdlTime_) lastDdlTime_ = ddlTime;
135  }
136 
137  // Returns a list of all column names for this table which we expect to have column
138  // stats in the HMS. This exists because, when we request the column stats from HMS,
139  // including a column name that does not have stats causes the
140  // getTableColumnStatistics() to return nothing. For Hdfs tables, partition columns do
141  // not have column stats in the HMS, but HBase table clustering columns do have column
142  // stats. This method allows each table type to volunteer the set of columns we should
143  // ask the metastore for in loadAllColumnStats().
144  protected List<String> getColumnNamesWithHmsStats() {
145  List<String> ret = Lists.newArrayList();
146  for (String name: colsByName_.keySet()) ret.add(name);
147  return ret;
148  }
149 
155  protected void loadAllColumnStats(HiveMetaStoreClient client) {
156  LOG.debug("Loading column stats for table: " + name_);
157  List<ColumnStatisticsObj> colStats;
158 
159  // We need to only query those columns which may have stats; asking HMS for other
160  // columns causes loadAllColumnStats() to return nothing.
161  List<String> colNames = getColumnNamesWithHmsStats();
162 
163  try {
164  colStats = client.getTableColumnStatistics(db_.getName(), name_, colNames);
165  } catch (Exception e) {
166  LOG.warn("Could not load column statistics for: " + getFullName(), e);
167  return;
168  }
169 
170  for (ColumnStatisticsObj stats: colStats) {
171  Column col = getColumn(stats.getColName());
172  Preconditions.checkNotNull(col);
173  if (!ColumnStats.isSupportedColType(col.getType())) {
174  LOG.warn(String.format("Statistics for %s, column %s are not supported as " +
175  "column has type %s", getFullName(), col.getName(), col.getType()));
176  continue;
177  }
178 
179  if (!col.updateStats(stats.getStatsData())) {
180  LOG.warn(String.format("Failed to load column stats for %s, column %s. Stats " +
181  "may be incompatible with column type %s. Consider regenerating statistics " +
182  "for %s.", getFullName(), col.getName(), col.getType(), getFullName()));
183  continue;
184  }
185  }
186  }
187 
191  protected static long getRowCount(Map<String, String> parameters) {
192  if (parameters == null) return -1;
193  String numRowsStr = parameters.get(StatsSetupConst.ROW_COUNT);
194  if (numRowsStr == null) return -1;
195  try {
196  return Long.valueOf(numRowsStr);
197  } catch (NumberFormatException exc) {
198  // ignore
199  }
200  return -1;
201  }
202 
207  public static Table fromMetastoreTable(TableId id, Db db,
208  org.apache.hadoop.hive.metastore.api.Table msTbl) {
209  // Create a table of appropriate type
210  Table table = null;
211  if (TableType.valueOf(msTbl.getTableType()) == TableType.VIRTUAL_VIEW) {
212  table = new View(id, msTbl, db, msTbl.getTableName(), msTbl.getOwner());
213  } else if (HBaseTable.isHBaseTable(msTbl)) {
214  table = new HBaseTable(id, msTbl, db, msTbl.getTableName(), msTbl.getOwner());
215  } else if (DataSourceTable.isDataSourceTable(msTbl)) {
216  // It's important to check if this is a DataSourceTable before HdfsTable because
217  // DataSourceTables are still represented by HDFS tables in the metastore but
218  // have a special table property to indicate that Impala should use an external
219  // data source.
220  table = new DataSourceTable(id, msTbl, db, msTbl.getTableName(), msTbl.getOwner());
221  } else if (HdfsFileFormat.isHdfsFormatClass(msTbl.getSd().getInputFormat())) {
222  table = new HdfsTable(id, msTbl, db, msTbl.getTableName(), msTbl.getOwner());
223  }
224  return table;
225  }
226 
231  public static Table fromThrift(Db parentDb, TTable thriftTable)
232  throws TableLoadingException {
233  Table newTable;
234  if (!thriftTable.isSetLoad_status() && thriftTable.isSetMetastore_table()) {
235  newTable = Table.fromMetastoreTable(new TableId(thriftTable.getId()),
236  parentDb, thriftTable.getMetastore_table());
237  } else {
238  newTable = IncompleteTable.createUninitializedTable(
239  TableId.createInvalidId(), parentDb, thriftTable.getTbl_name());
240  }
241  newTable.loadFromThrift(thriftTable);
242  newTable.validate();
243  return newTable;
244  }
245 
246  protected void loadFromThrift(TTable thriftTable) throws TableLoadingException {
247  List<TColumn> columns = new ArrayList<TColumn>();
248  columns.addAll(thriftTable.getClustering_columns());
249  columns.addAll(thriftTable.getColumns());
250 
251  fields_ = new ArrayList<FieldSchema>();
252  colsByPos_.clear();
253  colsByPos_.ensureCapacity(columns.size());
254  for (int i = 0; i < columns.size(); ++i) {
255  Column col = Column.fromThrift(columns.get(i));
256  colsByPos_.add(col.getPosition(), col);
257  colsByName_.put(col.getName().toLowerCase(), col);
258  ((StructType) type_.getItemType()).addField(
259  new StructField(col.getName(), col.getType(), col.getComment()));
260  fields_.add(new FieldSchema(col.getName(),
261  col.getType().toString().toLowerCase(), col.getComment()));
262  }
263 
264  numClusteringCols_ = thriftTable.getClustering_columns().size();
265 
266  // Estimated number of rows
267  numRows_ = thriftTable.isSetTable_stats() ?
268  thriftTable.getTable_stats().getNum_rows() : -1;
269 
270  // Default to READ_WRITE access if the field is not set.
271  accessLevel_ = thriftTable.isSetAccess_level() ? thriftTable.getAccess_level() :
272  TAccessLevel.READ_WRITE;
273  }
274 
279  public void validate() throws TableLoadingException {
280  for (String colName: colsByName_.keySet()) {
281  if (!colName.equals(colName.toLowerCase())) {
282  throw new TableLoadingException(
283  "Expected lower case column name but found: " + colName);
284  }
285  }
286  }
287 
288  public TTable toThrift() {
289  TTable table = new TTable(db_.getName(), name_);
290  table.setId(id_.asInt());
291  table.setAccess_level(accessLevel_);
292 
293  // Populate both regular columns and clustering columns (if there are any).
294  table.setColumns(new ArrayList<TColumn>());
295  table.setClustering_columns(new ArrayList<TColumn>());
296  for (int i = 0; i < colsByPos_.size(); ++i) {
297  TColumn colDesc = colsByPos_.get(i).toThrift();
298  // Clustering columns come first.
299  if (i < numClusteringCols_) {
300  table.addToClustering_columns(colDesc);
301  } else {
302  table.addToColumns(colDesc);
303  }
304  }
305 
306  table.setMetastore_table(getMetaStoreTable());
307  if (numRows_ != -1) {
308  table.setTable_stats(new TTableStats());
309  table.getTable_stats().setNum_rows(numRows_);
310  }
311  return table;
312  }
313 
314  public TCatalogObject toTCatalogObject() {
315  TCatalogObject catalogObject = new TCatalogObject();
316  catalogObject.setType(getCatalogObjectType());
317  catalogObject.setCatalog_version(getCatalogVersion());
318  catalogObject.setTable(toThrift());
319  return catalogObject;
320  }
321 
331  protected Type parseColumnType(FieldSchema fs) throws TableLoadingException {
332  Type type = Type.parseColumnType(fs);
333  if (type == null) {
334  throw new TableLoadingException(String.format(
335  "Unsupported type '%s' in column '%s' of table '%s'",
336  fs.getType(), fs.getName(), getName()));
337  }
338  return type;
339  }
340 
341  public Db getDb() { return db_; }
342  public String getName() { return name_; }
343  public String getFullName() { return (db_ != null ? db_.getName() + "." : "") + name_; }
345  return new TableName(db_ != null ? db_.getName() : null, name_);
346  }
347 
348  public String getOwner() { return owner_; }
349  public ArrayList<Column> getColumns() { return colsByPos_; }
350 
354  public List<String> getColumnNames() {
355  List<String> colNames = Lists.<String>newArrayList();
356  for (Column col: colsByPos_) {
357  colNames.add(col.getName());
358  }
359  return colNames;
360  }
361 
366  public String getStorageHandlerClassName() { return null; }
367 
373  public ArrayList<Column> getColumnsInHiveOrder() {
374  ArrayList<Column> columns = Lists.newArrayList(getNonClusteringColumns());
375 
376  for (Column column: colsByPos_.subList(0, numClusteringCols_)) {
377  columns.add(column);
378  }
379  return columns;
380  }
381 
385  public List<Column> getNonClusteringColumns() {
386  return colsByPos_.subList(numClusteringCols_, colsByPos_.size());
387  }
388 
392  public Column getColumn(String name) { return colsByName_.get(name.toLowerCase()); }
393 
398  public org.apache.hadoop.hive.metastore.api.Table getMetaStoreTable() {
399  return msTable_;
400  }
401 
402  public int getNumClusteringCols() { return numClusteringCols_; }
403  public TableId getId() { return id_; }
404  public long getNumRows() { return numRows_; }
405  public ArrayType getType() { return type_; }
406 
407  @Override
408  public long getCatalogVersion() { return catalogVersion_; }
409 
410  @Override
411  public void setCatalogVersion(long catalogVersion) {
412  catalogVersion_ = catalogVersion;
413  }
414 
415  @Override
416  public boolean isLoaded() { return true; }
417 }
abstract TCatalogObjectType getCatalogObjectType()
ArrayList< Column > getColumnsInHiveOrder()
Definition: Table.java:373
List< String > getColumnNames()
Definition: Table.java:354
static long getRowCount(Map< String, String > parameters)
Definition: Table.java:191
static boolean isSupportedColType(Type colType)
int TableId
Definition: global-types.h:25
void addColumn(Column col)
Definition: Table.java:114
TCatalogObject toTCatalogObject()
Definition: Table.java:314
static boolean isHBaseTable(org.apache.hadoop.hive.metastore.api.Table msTbl)
void setCatalogVersion(long catalogVersion)
Definition: Table.java:411
static boolean isDataSourceTable(org.apache.hadoop.hive.metastore.api.Table msTbl)
static Table fromThrift(Db parentDb, TTable thriftTable)
Definition: Table.java:231
void loadFromThrift(TTable thriftTable)
Definition: Table.java:246
Type parseColumnType(FieldSchema fs)
Definition: Table.java:331
static final long INITIAL_CATALOG_VERSION
Definition: Catalog.java:57
ArrayList< Column > getColumns()
Definition: Table.java:349
abstract void load(Table oldValue, HiveMetaStoreClient client, org.apache.hadoop.hive.metastore.api.Table msTbl)
void loadAllColumnStats(HiveMetaStoreClient client)
Definition: Table.java:155
void updateLastDdlTime(long ddlTime)
Definition: Table.java:132
final ArrayList< Column > colsByPos_
Definition: Table.java:76
List< Column > getNonClusteringColumns()
Definition: Table.java:385
final org.apache.hadoop.hive.metastore.api.Table msTable_
Definition: Table.java:58
static boolean isHdfsFormatClass(String formatClass)
static long getLastDdlTime(org.apache.hadoop.hive.metastore.api.Table msTbl)
final Map< String, Column > colsByName_
Definition: Table.java:79
abstract TTableDescriptor toThriftDescriptor(Set< Long > referencedPartitions)
List< String > getColumnNamesWithHmsStats()
Definition: Table.java:144
static EnumSet< TableType > SUPPORTED_TABLE_TYPES
Definition: Table.java:88
org.apache.hadoop.hive.metastore.api.Table getMetaStoreTable()
Definition: Table.java:398
static final Object metastoreAccessLock_
Definition: Table.java:56
static Table fromMetastoreTable(TableId id, Db db, org.apache.hadoop.hive.metastore.api.Table msTbl)
Definition: Table.java:207
Table(TableId id, org.apache.hadoop.hive.metastore.api.Table msTable, Db db, String name, String owner)
Definition: Table.java:91
boolean updateStats(ColumnStatisticsData statsData)
Definition: Column.java:59
Column getColumn(String name)
Definition: Table.java:392
TTableDescriptor tableDesc_
Definition: Table.java:64
string name
Definition: cpu-info.cc:50
static final Logger LOG
Definition: Table.java:52
List< FieldSchema > fields_
Definition: Table.java:65