Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
DataSourceTable.java
Go to the documentation of this file.
1 // Copyright 2014 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 package com.cloudera.impala.catalog;
16 
17 import java.util.List;
18 import java.util.Set;
19 
20 import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
21 import org.apache.hadoop.hive.metastore.api.FieldSchema;
22 import org.slf4j.Logger;
23 import org.slf4j.LoggerFactory;
24 
25 import com.cloudera.impala.extdatasource.v1.ExternalDataSource;
26 import com.cloudera.impala.thrift.TCatalogObjectType;
27 import com.cloudera.impala.thrift.TColumn;
28 import com.cloudera.impala.thrift.TDataSource;
29 import com.cloudera.impala.thrift.TDataSourceTable;
30 import com.cloudera.impala.thrift.TResultSet;
31 import com.cloudera.impala.thrift.TResultSetMetadata;
32 import com.cloudera.impala.thrift.TTable;
33 import com.cloudera.impala.thrift.TTableDescriptor;
34 import com.cloudera.impala.thrift.TTableType;
36 import com.google.common.base.Preconditions;
37 
46 public class DataSourceTable extends Table {
47  private final static Logger LOG = LoggerFactory.getLogger(DataSourceTable.class);
48 
52  public static final String TBL_PROP_DATA_SRC_NAME = "__IMPALA_DATA_SOURCE_NAME";
53 
57  public static final String TBL_PROP_INIT_STRING = "__IMPALA_DATA_SOURCE_INIT_STRING";
58 
62  public static final String TBL_PROP_LOCATION = "__IMPALA_DATA_SOURCE_LOCATION";
63 
67  public static final String TBL_PROP_CLASS = "__IMPALA_DATA_SOURCE_CLASS";
68 
72  public static final String TBL_PROP_API_VER = "__IMPALA_DATA_SOURCE_API_VERSION";
73 
74  private String initString_;
75  private TDataSource dataSource_;
76 
77  protected DataSourceTable(
78  TableId id, org.apache.hadoop.hive.metastore.api.Table msTable,
79  Db db, String name, String owner) {
80  super(id, msTable, db, name, owner);
81  }
82 
86  public TDataSource getDataSource() { return dataSource_; }
87 
91  public String getInitString() { return initString_; }
92 
93  @Override
94  public int getNumNodes() { return 1; }
95 
96  @Override
97  public TCatalogObjectType getCatalogObjectType() { return TCatalogObjectType.TABLE; }
98 
102  public static boolean isSupportedColumnType(Type colType) {
103  Preconditions.checkNotNull(colType);
104  return isSupportedPrimitiveType(colType.getPrimitiveType());
105  }
106 
110  public static boolean isSupportedPrimitiveType(PrimitiveType primitiveType) {
111  Preconditions.checkNotNull(primitiveType);
112  switch (primitiveType) {
113  case BIGINT:
114  case INT:
115  case SMALLINT:
116  case TINYINT:
117  case DOUBLE:
118  case FLOAT:
119  case BOOLEAN:
120  case STRING:
121  case TIMESTAMP:
122  case DECIMAL:
123  return true;
124  case BINARY:
125  case CHAR:
126  case DATE:
127  case DATETIME:
128  case INVALID_TYPE:
129  case NULL_TYPE:
130  default:
131  return false;
132  }
133  }
134 
140  private void loadColumns(List<FieldSchema> fieldSchemas, HiveMetaStoreClient client)
141  throws TableLoadingException {
142  int pos = 0;
143  for (FieldSchema s: fieldSchemas) {
144  Column col = new Column(s.getName(), parseColumnType(s), s.getComment(), pos);
145  Preconditions.checkArgument(isSupportedColumnType(col.getType()));
146  addColumn(col);
147  ++pos;
148  }
149  }
150 
151  @Override
152  protected void loadFromThrift(TTable thriftTable) throws TableLoadingException {
153  super.loadFromThrift(thriftTable);
154  TDataSourceTable dataSourceTable = thriftTable.getData_source_table();
155  initString_ = dataSourceTable.getInit_string();
156  dataSource_ = dataSourceTable.getData_source();
157  }
158 
159  @Override
160  public void load(Table oldValue, HiveMetaStoreClient client,
161  org.apache.hadoop.hive.metastore.api.Table msTbl) throws TableLoadingException {
162  Preconditions.checkNotNull(msTbl);
163  LOG.debug("load table: " + db_.getName() + "." + name_);
164  String dataSourceName = getRequiredTableProperty(msTbl, TBL_PROP_DATA_SRC_NAME, null);
165  String location = getRequiredTableProperty(msTbl, TBL_PROP_LOCATION, dataSourceName);
166  String className = getRequiredTableProperty(msTbl, TBL_PROP_CLASS, dataSourceName);
167  String apiVersionString = getRequiredTableProperty(msTbl, TBL_PROP_API_VER,
168  dataSourceName);
169  dataSource_ = new TDataSource(dataSourceName, location, className, apiVersionString);
171 
172  if (msTbl.getPartitionKeysSize() > 0) {
173  throw new TableLoadingException("Data source table cannot contain clustering " +
174  "columns: " + name_);
175  }
176  numClusteringCols_ = 0;
177 
178  try {
179  // Create column objects.
180  List<FieldSchema> fieldSchemas = getMetaStoreTable().getSd().getCols();
181  loadColumns(fieldSchemas, client);
182 
183  // Set table stats.
184  numRows_ = getRowCount(super.getMetaStoreTable().getParameters());
185  } catch (Exception e) {
186  throw new TableLoadingException("Failed to load metadata for data source table: " +
187  name_, e);
188  }
189  }
190 
191  private String getRequiredTableProperty(
192  org.apache.hadoop.hive.metastore.api.Table msTbl, String key, String dataSourceName)
193  throws TableLoadingException {
194  String val = msTbl.getParameters().get(key);
195  if (val == null) {
196  throw new TableLoadingException(String.format("Failed to load table %s produced " +
197  "by external data source %s. Missing required metadata: %s", name_,
198  dataSourceName == null ? "<unknown>" : dataSourceName, key));
199  }
200  return val;
201  }
202 
208  public TResultSet getTableStats() {
209  TResultSet result = new TResultSet();
210  TResultSetMetadata resultSchema = new TResultSetMetadata();
211  resultSchema.addToColumns(new TColumn("#Rows", Type.BIGINT.toThrift()));
212  result.setSchema(resultSchema);
213  TResultRowBuilder rowBuilder = new TResultRowBuilder();
214  rowBuilder.add(numRows_);
215  result.addToRows(rowBuilder.get());
216  return result;
217  }
218 
219  @Override
220  public TTableDescriptor toThriftDescriptor(Set<Long> referencedPartitions) {
221  TTableDescriptor tableDesc = new TTableDescriptor(id_.asInt(),
222  TTableType.DATA_SOURCE_TABLE, getColumns().size(), numClusteringCols_, name_,
223  db_.getName());
224  tableDesc.setDataSourceTable(getDataSourceTable());
225  tableDesc.setColNames(getColumnNames());
226  return tableDesc;
227  }
228 
232  @Override
233  public TTable toThrift() {
234  TTable table = super.toThrift();
235  table.setTable_type(TTableType.DATA_SOURCE_TABLE);
236  table.setData_source_table(getDataSourceTable());
237  return table;
238  }
239 
243  private TDataSourceTable getDataSourceTable() {
244  return new TDataSourceTable(dataSource_, initString_);
245  }
246 
252  public static boolean isDataSourceTable(
253  org.apache.hadoop.hive.metastore.api.Table msTbl) {
254  return msTbl.getParameters().containsKey(TBL_PROP_DATA_SRC_NAME);
255  }
256 }
List< String > getColumnNames()
Definition: Table.java:354
static long getRowCount(Map< String, String > parameters)
Definition: Table.java:191
static final ScalarType BIGINT
Definition: Type.java:50
static boolean isSupportedPrimitiveType(PrimitiveType primitiveType)
static boolean isSupportedColumnType(Type colType)
void addColumn(Column col)
Definition: Table.java:114
PrimitiveType getPrimitiveType()
Definition: Type.java:188
DataSourceTable(TableId id, org.apache.hadoop.hive.metastore.api.Table msTable, Db db, String name, String owner)
static boolean isDataSourceTable(org.apache.hadoop.hive.metastore.api.Table msTbl)
Type parseColumnType(FieldSchema fs)
Definition: Table.java:331
ArrayList< Column > getColumns()
Definition: Table.java:349
void loadColumns(List< FieldSchema > fieldSchemas, HiveMetaStoreClient client)
String getRequiredTableProperty(org.apache.hadoop.hive.metastore.api.Table msTbl, String key, String dataSourceName)
void load(Table oldValue, HiveMetaStoreClient client, org.apache.hadoop.hive.metastore.api.Table msTbl)
org.apache.hadoop.hive.metastore.api.Table getMetaStoreTable()
Definition: Table.java:398
void toThrift(TColumnType container)
Table(TableId id, org.apache.hadoop.hive.metastore.api.Table msTable, Db db, String name, String owner)
Definition: Table.java:91
TTableDescriptor toThriftDescriptor(Set< Long > referencedPartitions)
string name
Definition: cpu-info.cc:50