15 package com.cloudera.impala.catalog;
17 import java.util.ArrayList;
18 import java.util.EnumSet;
19 import java.util.List;
23 import org.apache.hadoop.hive.common.StatsSetupConst;
24 import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
25 import org.apache.hadoop.hive.metastore.TableType;
26 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
27 import org.apache.hadoop.hive.metastore.api.FieldSchema;
28 import org.apache.log4j.Logger;
31 import com.cloudera.impala.thrift.TAccessLevel;
32 import com.cloudera.impala.thrift.TCatalogObject;
33 import com.cloudera.impala.thrift.TCatalogObjectType;
34 import com.cloudera.impala.thrift.TColumn;
35 import com.cloudera.impala.thrift.TTable;
36 import com.cloudera.impala.thrift.TTableDescriptor;
37 import com.cloudera.impala.thrift.TTableStats;
38 import com.google.common.base.Preconditions;
39 import com.google.common.collect.Lists;
40 import com.google.common.collect.Maps;
52 private static final Logger
LOG = Logger.getLogger(Table.class);
58 protected final org.apache.hadoop.hive.metastore.api.Table
msTable_;
76 private final ArrayList<Column>
colsByPos_ = Lists.newArrayList();
79 private final Map<String, Column>
colsByName_ = Maps.newHashMap();
89 TableType.EXTERNAL_TABLE, TableType.MANAGED_TABLE, TableType.VIRTUAL_VIEW);
91 protected Table(
TableId id, org.apache.hadoop.hive.metastore.api.Table msTable,
Db db,
92 String
name, String owner) {
96 name_ = name.toLowerCase();
111 public abstract void load(
Table oldValue, HiveMetaStoreClient client,
116 colsByName_.put(col.getName().toLowerCase(), col);
118 new StructField(col.getName(), col.getType(), col.getComment()));
145 List<String> ret = Lists.newArrayList();
156 LOG.debug(
"Loading column stats for table: " +
name_);
157 List<ColumnStatisticsObj> colStats;
164 colStats = client.getTableColumnStatistics(db_.getName(),
name_, colNames);
165 }
catch (Exception e) {
166 LOG.warn(
"Could not load column statistics for: " +
getFullName(), e);
170 for (ColumnStatisticsObj stats: colStats) {
172 Preconditions.checkNotNull(col);
174 LOG.warn(String.format(
"Statistics for %s, column %s are not supported as " +
180 LOG.warn(String.format(
"Failed to load column stats for %s, column %s. Stats " +
181 "may be incompatible with column type %s. Consider regenerating statistics " +
191 protected static long getRowCount(Map<String, String> parameters) {
192 if (parameters == null)
return -1;
193 String numRowsStr = parameters.get(StatsSetupConst.ROW_COUNT);
194 if (numRowsStr == null)
return -1;
196 return Long.valueOf(numRowsStr);
197 }
catch (NumberFormatException exc) {
208 org.apache.hadoop.hive.metastore.api.Table msTbl) {
211 if (TableType.valueOf(msTbl.getTableType()) == TableType.VIRTUAL_VIEW) {
212 table =
new View(
id, msTbl, db, msTbl.getTableName(), msTbl.getOwner());
214 table =
new HBaseTable(
id, msTbl, db, msTbl.getTableName(), msTbl.getOwner());
220 table =
new DataSourceTable(
id, msTbl, db, msTbl.getTableName(), msTbl.getOwner());
222 table =
new HdfsTable(
id, msTbl, db, msTbl.getTableName(), msTbl.getOwner());
234 if (!thriftTable.isSetLoad_status() && thriftTable.isSetMetastore_table()) {
235 newTable = Table.fromMetastoreTable(
new TableId(thriftTable.getId()),
236 parentDb, thriftTable.getMetastore_table());
238 newTable = IncompleteTable.createUninitializedTable(
239 TableId.createInvalidId(), parentDb, thriftTable.getTbl_name());
241 newTable.loadFromThrift(thriftTable);
247 List<TColumn> columns =
new ArrayList<TColumn>();
248 columns.addAll(thriftTable.getClustering_columns());
249 columns.addAll(thriftTable.getColumns());
251 fields_ =
new ArrayList<FieldSchema>();
253 colsByPos_.ensureCapacity(columns.size());
254 for (
int i = 0; i < columns.size(); ++i) {
255 Column col = Column.fromThrift(columns.get(i));
256 colsByPos_.add(col.getPosition(), col);
257 colsByName_.put(col.getName().toLowerCase(), col);
260 fields_.add(
new FieldSchema(col.
getName(),
261 col.getType().toString().toLowerCase(), col.
getComment()));
267 numRows_ = thriftTable.isSetTable_stats() ?
268 thriftTable.getTable_stats().getNum_rows() : -1;
271 accessLevel_ = thriftTable.isSetAccess_level() ? thriftTable.getAccess_level() :
272 TAccessLevel.READ_WRITE;
281 if (!colName.equals(colName.toLowerCase())) {
282 throw new TableLoadingException(
283 "Expected lower case column name but found: " + colName);
290 table.setId(id_.asInt());
294 table.setColumns(
new ArrayList<TColumn>());
295 table.setClustering_columns(
new ArrayList<TColumn>());
296 for (
int i = 0; i < colsByPos_.size(); ++i) {
297 TColumn colDesc = colsByPos_.get(i).
toThrift();
300 table.addToClustering_columns(colDesc);
302 table.addToColumns(colDesc);
308 table.setTable_stats(
new TTableStats());
309 table.getTable_stats().setNum_rows(
numRows_);
315 TCatalogObject catalogObject =
new TCatalogObject();
319 return catalogObject;
332 Type type = Type.parseColumnType(fs);
335 "Unsupported type '%s' in column '%s' of table '%s'",
336 fs.getType(), fs.getName(),
getName()));
355 List<String> colNames = Lists.<String>newArrayList();
357 colNames.add(col.getName());
abstract TCatalogObjectType getCatalogObjectType()
ArrayList< Column > getColumnsInHiveOrder()
List< String > getColumnNames()
static long getRowCount(Map< String, String > parameters)
static boolean isSupportedColType(Type colType)
void addColumn(Column col)
TCatalogObject toTCatalogObject()
static boolean isHBaseTable(org.apache.hadoop.hive.metastore.api.Table msTbl)
void setCatalogVersion(long catalogVersion)
static boolean isDataSourceTable(org.apache.hadoop.hive.metastore.api.Table msTbl)
static Table fromThrift(Db parentDb, TTable thriftTable)
void loadFromThrift(TTable thriftTable)
Type parseColumnType(FieldSchema fs)
String getStorageHandlerClassName()
static final long INITIAL_CATALOG_VERSION
ArrayList< Column > getColumns()
abstract void load(Table oldValue, HiveMetaStoreClient client, org.apache.hadoop.hive.metastore.api.Table msTbl)
void loadAllColumnStats(HiveMetaStoreClient client)
void updateLastDdlTime(long ddlTime)
final ArrayList< Column > colsByPos_
List< Column > getNonClusteringColumns()
abstract int getNumNodes()
final org.apache.hadoop.hive.metastore.api.Table msTable_
static long getLastDdlTime(org.apache.hadoop.hive.metastore.api.Table msTbl)
final Map< String, Column > colsByName_
abstract TTableDescriptor toThriftDescriptor(Set< Long > referencedPartitions)
List< String > getColumnNamesWithHmsStats()
static EnumSet< TableType > SUPPORTED_TABLE_TYPES
org.apache.hadoop.hive.metastore.api.Table getMetaStoreTable()
static final Object metastoreAccessLock_
static Table fromMetastoreTable(TableId id, Db db, org.apache.hadoop.hive.metastore.api.Table msTbl)
Table(TableId id, org.apache.hadoop.hive.metastore.api.Table msTable, Db db, String name, String owner)
boolean updateStats(ColumnStatisticsData statsData)
Column getColumn(String name)
TTableDescriptor tableDesc_
TAccessLevel accessLevel_
int getNumClusteringCols()
List< FieldSchema > fields_