Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
CatalogOpExecutor.java
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 package com.cloudera.impala.service;
16 
17 import java.util.ArrayList;
18 import java.util.HashMap;
19 import java.util.Iterator;
20 import java.util.LinkedHashMap;
21 import java.util.List;
22 import java.util.Map;
23 import java.util.Set;
24 import java.util.concurrent.ExecutorService;
25 import java.util.concurrent.Executors;
26 import java.util.concurrent.atomic.AtomicBoolean;
27 import java.util.concurrent.atomic.AtomicInteger;
28 
29 import org.apache.hadoop.hive.common.StatsSetupConst;
30 import org.apache.hadoop.hive.conf.HiveConf;
31 import org.apache.hadoop.hive.metastore.TableType;
32 import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
33 import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
34 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
35 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
36 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
37 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
38 import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
39 import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
40 import org.apache.hadoop.hive.metastore.api.FieldSchema;
41 import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
42 import org.apache.hadoop.hive.metastore.api.MetaException;
43 import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
44 import org.apache.hadoop.hive.metastore.api.Partition;
45 import org.apache.hadoop.hive.metastore.api.SerDeInfo;
46 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
47 import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
48 import org.apache.hadoop.hive.metastore.MetaStoreUtils;
49 import org.apache.hadoop.hive.metastore.Warehouse;
50 import org.apache.log4j.Logger;
51 import org.apache.thrift.TException;
52 
83 import com.cloudera.impala.common.Pair;
84 import com.cloudera.impala.thrift.ImpalaInternalServiceConstants;
85 import com.cloudera.impala.thrift.JniCatalogConstants;
86 import com.cloudera.impala.thrift.TAlterTableAddPartitionParams;
87 import com.cloudera.impala.thrift.TAlterTableAddReplaceColsParams;
88 import com.cloudera.impala.thrift.TAlterTableChangeColParams;
89 import com.cloudera.impala.thrift.TAlterTableDropColParams;
90 import com.cloudera.impala.thrift.TAlterTableDropPartitionParams;
91 import com.cloudera.impala.thrift.TAlterTableOrViewRenameParams;
92 import com.cloudera.impala.thrift.TAlterTableParams;
93 import com.cloudera.impala.thrift.TAlterTableSetCachedParams;
94 import com.cloudera.impala.thrift.TAlterTableSetFileFormatParams;
95 import com.cloudera.impala.thrift.TAlterTableSetLocationParams;
96 import com.cloudera.impala.thrift.TAlterTableSetTblPropertiesParams;
97 import com.cloudera.impala.thrift.TAlterTableUpdateStatsParams;
98 import com.cloudera.impala.thrift.TCatalogObject;
99 import com.cloudera.impala.thrift.TCatalogObjectType;
100 import com.cloudera.impala.thrift.TCatalogUpdateResult;
101 import com.cloudera.impala.thrift.TColumn;
102 import com.cloudera.impala.thrift.TColumnStats;
103 import com.cloudera.impala.thrift.TColumnType;
104 import com.cloudera.impala.thrift.TColumnValue;
105 import com.cloudera.impala.thrift.TCreateDataSourceParams;
106 import com.cloudera.impala.thrift.TCreateDbParams;
107 import com.cloudera.impala.thrift.TCreateDropRoleParams;
108 import com.cloudera.impala.thrift.TCreateFunctionParams;
109 import com.cloudera.impala.thrift.TCreateOrAlterViewParams;
110 import com.cloudera.impala.thrift.TCreateTableLikeParams;
111 import com.cloudera.impala.thrift.TCreateTableParams;
112 import com.cloudera.impala.thrift.TDatabase;
113 import com.cloudera.impala.thrift.TDdlExecRequest;
114 import com.cloudera.impala.thrift.TDdlExecResponse;
115 import com.cloudera.impala.thrift.TDropDataSourceParams;
116 import com.cloudera.impala.thrift.TDropDbParams;
117 import com.cloudera.impala.thrift.TDropFunctionParams;
118 import com.cloudera.impala.thrift.TDropStatsParams;
119 import com.cloudera.impala.thrift.TDropTableOrViewParams;
120 import com.cloudera.impala.thrift.TErrorCode;
121 import com.cloudera.impala.thrift.TGrantRevokePrivParams;
122 import com.cloudera.impala.thrift.TGrantRevokeRoleParams;
123 import com.cloudera.impala.thrift.THdfsCachingOp;
124 import com.cloudera.impala.thrift.THdfsFileFormat;
125 import com.cloudera.impala.thrift.TPartitionKeyValue;
126 import com.cloudera.impala.thrift.TPartitionStats;
127 import com.cloudera.impala.thrift.TPrivilege;
128 import com.cloudera.impala.thrift.TResetMetadataRequest;
129 import com.cloudera.impala.thrift.TResetMetadataResponse;
130 import com.cloudera.impala.thrift.TResultRow;
131 import com.cloudera.impala.thrift.TResultSet;
132 import com.cloudera.impala.thrift.TResultSetMetadata;
133 import com.cloudera.impala.thrift.TStatus;
134 import com.cloudera.impala.thrift.TTable;
135 import com.cloudera.impala.thrift.TTableName;
136 import com.cloudera.impala.thrift.TTableStats;
137 import com.cloudera.impala.thrift.TUpdateCatalogRequest;
138 import com.cloudera.impala.thrift.TUpdateCatalogResponse;
140 import com.google.common.base.Joiner;
141 import com.google.common.base.Preconditions;
142 import com.google.common.collect.Lists;
143 import com.google.common.collect.Maps;
144 import com.google.common.collect.Sets;
145 import com.google.common.util.concurrent.SettableFuture;
146 
154 public class CatalogOpExecutor {
155  // Format string for exceptions returned by Hive Metastore RPCs.
156  private final static String HMS_RPC_ERROR_FORMAT_STR =
157  "Error making '%s' RPC to Hive Metastore: ";
158 
160 
161  // Lock used to ensure that in-place modifications to cached table/db objects in
162  // catalog_ and the corresponding RPC to apply the change in the HMS are atomic.
163  // Such modifications are done for CREATE/DROP/ALTER TABLE/DATABASE requests.
164  private final Object metastoreDdlLock_ = new Object();
165  private static final Logger LOG = Logger.getLogger(CatalogOpExecutor.class);
166 
167  // The maximum number of partitions to update in one Hive Metastore RPC.
168  // Used when persisting the results of COMPUTE STATS statements.
169  private final static short MAX_PARTITION_UPDATES_PER_RPC = 500;
170 
172  catalog_ = catalog;
173  }
174 
175  public TDdlExecResponse execDdlRequest(TDdlExecRequest ddlRequest)
176  throws ImpalaException {
177  TDdlExecResponse response = new TDdlExecResponse();
178  response.setResult(new TCatalogUpdateResult());
179  response.getResult().setCatalog_service_id(JniCatalog.getServiceId());
180  User requestingUser = null;
181  if (ddlRequest.isSetHeader()) {
182  requestingUser = new User(ddlRequest.getHeader().getRequesting_user());
183  }
184 
185  switch (ddlRequest.ddl_type) {
186  case ALTER_TABLE:
187  alterTable(ddlRequest.getAlter_table_params(), response);
188  break;
189  case ALTER_VIEW:
190  alterView(ddlRequest.getAlter_view_params(), response);
191  break;
192  case CREATE_DATABASE:
193  createDatabase(ddlRequest.getCreate_db_params(), response);
194  break;
195  case CREATE_TABLE_AS_SELECT:
196  response.setNew_table_created(
197  createTable(ddlRequest.getCreate_table_params(), response));
198  break;
199  case CREATE_TABLE:
200  createTable(ddlRequest.getCreate_table_params(), response);
201  break;
202  case CREATE_TABLE_LIKE:
203  createTableLike(ddlRequest.getCreate_table_like_params(), response);
204  break;
205  case CREATE_VIEW:
206  createView(ddlRequest.getCreate_view_params(), response);
207  break;
208  case CREATE_FUNCTION:
209  createFunction(ddlRequest.getCreate_fn_params(), response);
210  break;
211  case CREATE_DATA_SOURCE:
212  createDataSource(ddlRequest.getCreate_data_source_params(), response);
213  break;
214  case COMPUTE_STATS:
215  Preconditions.checkState(false, "Compute stats should trigger an ALTER TABLE.");
216  break;
217  case DROP_STATS:
218  dropStats(ddlRequest.getDrop_stats_params(), response);
219  break;
220  case DROP_DATABASE:
221  dropDatabase(ddlRequest.getDrop_db_params(), response);
222  break;
223  case DROP_TABLE:
224  case DROP_VIEW:
225  dropTableOrView(ddlRequest.getDrop_table_or_view_params(), response);
226  break;
227  case DROP_FUNCTION:
228  dropFunction(ddlRequest.getDrop_fn_params(), response);
229  break;
230  case DROP_DATA_SOURCE:
231  dropDataSource(ddlRequest.getDrop_data_source_params(), response);
232  break;
233  case CREATE_ROLE:
234  case DROP_ROLE:
235  createDropRole(requestingUser, ddlRequest.getCreate_drop_role_params(),
236  response);
237  break;
238  case GRANT_ROLE:
239  case REVOKE_ROLE:
240  grantRevokeRoleGroup(requestingUser, ddlRequest.getGrant_revoke_role_params(),
241  response);
242  break;
243  case GRANT_PRIVILEGE:
244  case REVOKE_PRIVILEGE:
245  grantRevokeRolePrivilege(requestingUser,
246  ddlRequest.getGrant_revoke_priv_params(), response);
247  break;
248  default: throw new IllegalStateException("Unexpected DDL exec request type: " +
249  ddlRequest.ddl_type);
250  }
251  // At this point, the operation is considered successful. If any errors occurred
252  // during execution, this function will throw an exception and the CatalogServer
253  // will handle setting a bad status code.
254  response.getResult().setStatus(new TStatus(TErrorCode.OK, new ArrayList<String>()));
255  return response;
256  }
257 
262  private void alterTable(TAlterTableParams params, TDdlExecResponse response)
263  throws ImpalaException {
264  switch (params.getAlter_type()) {
265  case ADD_REPLACE_COLUMNS:
266  TAlterTableAddReplaceColsParams addReplaceColParams =
267  params.getAdd_replace_cols_params();
268  alterTableAddReplaceCols(TableName.fromThrift(params.getTable_name()),
269  addReplaceColParams.getColumns(),
270  addReplaceColParams.isReplace_existing_cols());
271  break;
272  case ADD_PARTITION:
273  TAlterTableAddPartitionParams addPartParams = params.getAdd_partition_params();
274  // Create and add HdfsPartition object to the corresponding HdfsTable and load
275  // its block metadata. Get the new table object with an updated catalog version.
276  // If the partition already exists in Hive and "IfNotExists" is true, then null
277  // is returned.
279  params.getTable_name()), addPartParams.getPartition_spec(),
280  addPartParams.isIf_not_exists(), addPartParams.getLocation(),
281  addPartParams.getCache_op());
282  response.result.setUpdated_catalog_object(TableToTCatalogObject(refreshedTable));
283  response.result.setVersion(
284  response.result.getUpdated_catalog_object().getCatalog_version());
285  return;
286  case DROP_COLUMN:
287  TAlterTableDropColParams dropColParams = params.getDrop_col_params();
288  alterTableDropCol(TableName.fromThrift(params.getTable_name()),
289  dropColParams.getCol_name());
290  break;
291  case CHANGE_COLUMN:
292  TAlterTableChangeColParams changeColParams = params.getChange_col_params();
293  alterTableChangeCol(TableName.fromThrift(params.getTable_name()),
294  changeColParams.getCol_name(), changeColParams.getNew_col_def());
295  break;
296  case DROP_PARTITION:
297  TAlterTableDropPartitionParams dropPartParams = params.getDrop_partition_params();
298  // Drop the partition from the corresponding HdfsTable. Get the table object
299  // with an updated catalog version. If the partition does not exist and
300  // "IfExists" is true, null is returned.
302  params.getTable_name()), dropPartParams.getPartition_spec(),
303  dropPartParams.isIf_exists());
304  response.result.setUpdated_catalog_object(TableToTCatalogObject(refreshedTable));
305  response.result.setVersion(
306  response.result.getUpdated_catalog_object().getCatalog_version());
307  return;
308  case RENAME_TABLE:
309  case RENAME_VIEW:
310  TAlterTableOrViewRenameParams renameParams = params.getRename_params();
311  alterTableOrViewRename(TableName.fromThrift(params.getTable_name()),
312  TableName.fromThrift(renameParams.getNew_table_name()),
313  response);
314  // Renamed table can't be fast refreshed anyway. Return now.
315  return;
316  case SET_FILE_FORMAT:
317  TAlterTableSetFileFormatParams fileFormatParams =
318  params.getSet_file_format_params();
319  List<TPartitionKeyValue> fileFormatPartitionSpec = null;
320  if (fileFormatParams.isSetPartition_spec()) {
321  fileFormatPartitionSpec = fileFormatParams.getPartition_spec();
322  }
323  alterTableSetFileFormat(TableName.fromThrift(params.getTable_name()),
324  fileFormatPartitionSpec, fileFormatParams.getFile_format());
325  break;
326  case SET_LOCATION:
327  TAlterTableSetLocationParams setLocationParams = params.getSet_location_params();
328  List<TPartitionKeyValue> partitionSpec = null;
329  if (setLocationParams.isSetPartition_spec()) {
330  partitionSpec = setLocationParams.getPartition_spec();
331  }
332  alterTableSetLocation(TableName.fromThrift(params.getTable_name()),
333  partitionSpec, setLocationParams.getLocation());
334  break;
335  case SET_TBL_PROPERTIES:
336  alterTableSetTblProperties(TableName.fromThrift(params.getTable_name()),
337  params.getSet_tbl_properties_params());
338  break;
339  case UPDATE_STATS:
340  Preconditions.checkState(params.isSetUpdate_stats_params());
341  alterTableUpdateStats(params.getUpdate_stats_params(), response);
342  break;
343  case SET_CACHED:
344  Preconditions.checkState(params.isSetSet_cached_params());
345  if (params.getSet_cached_params().getPartition_spec() == null) {
346  alterTableSetCached(TableName.fromThrift(params.getTable_name()),
347  params.getSet_cached_params());
348  } else {
349  alterPartitionSetCached(TableName.fromThrift(params.getTable_name()),
350  params.getSet_cached_params());
351  }
352  break;
353  default:
354  throw new UnsupportedOperationException(
355  "Unknown ALTER TABLE operation type: " + params.getAlter_type());
356  }
357 
358  Table refreshedTable = catalog_.reloadTable(params.getTable_name());
359  response.result.setUpdated_catalog_object(TableToTCatalogObject(refreshedTable));
360  response.result.setVersion(
361  response.result.getUpdated_catalog_object().getCatalog_version());
362  }
363 
368  private Table addHdfsPartition(TableName tableName, Partition partition)
369  throws CatalogException {
370  Preconditions.checkNotNull(partition);
371  Table tbl = getExistingTable(tableName.getDb(), tableName.getTbl());
372  if (!(tbl instanceof HdfsTable)) {
373  throw new CatalogException("Table " + tbl.getFullName() + " is not an HDFS table");
374  }
375  HdfsTable hdfsTable = (HdfsTable) tbl;
376  HdfsPartition hdfsPartition = hdfsTable.createPartition(partition.getSd(), partition);
377  return catalog_.addPartition(hdfsPartition);
378  }
379 
385  private void alterView(TCreateOrAlterViewParams params, TDdlExecResponse resp)
386  throws ImpalaException {
387  TableName tableName = TableName.fromThrift(params.getView_name());
388  Preconditions.checkState(tableName != null && tableName.isFullyQualified());
389  Preconditions.checkState(params.getColumns() != null &&
390  params.getColumns().size() > 0,
391  "Null or empty column list given as argument to DdlExecutor.alterView");
392 
393  synchronized (metastoreDdlLock_) {
394  // Operate on a copy of the metastore table to avoid prematurely applying the
395  // alteration to our cached table in case the actual alteration fails.
396  org.apache.hadoop.hive.metastore.api.Table msTbl = getMetaStoreTable(tableName);
397  if (!msTbl.getTableType().equalsIgnoreCase((TableType.VIRTUAL_VIEW.toString()))) {
398  throw new ImpalaRuntimeException(
399  String.format("ALTER VIEW not allowed on a table: %s",
400  tableName.toString()));
401  }
402 
403  // Set the altered view attributes and update the metastore.
404  setViewAttributes(params, msTbl);
405  LOG.debug(String.format("Altering view %s", tableName));
406  applyAlterTable(msTbl);
407  }
408 
409  Table refreshedTbl = catalog_.reloadTable(tableName.toThrift());
410  resp.result.setUpdated_catalog_object(TableToTCatalogObject(refreshedTbl));
411  resp.result.setVersion(resp.result.getUpdated_catalog_object().getCatalog_version());
412  }
413 
418  private void alterTableUpdateStats(TAlterTableUpdateStatsParams params,
419  TDdlExecResponse resp) throws ImpalaException {
420  Preconditions.checkState(params.isSetPartition_stats() && params.isSetTable_stats());
421 
422  TableName tableName = TableName.fromThrift(params.getTable_name());
423  Preconditions.checkState(tableName != null && tableName.isFullyQualified());
424  LOG.info(String.format("Updating table stats for: %s", tableName));
425 
426  Table table = getExistingTable(tableName.getDb(), tableName.getTbl());
427  // Deep copy the msTbl to avoid updating our cache before successfully persisting
428  // the results to the metastore.
429  org.apache.hadoop.hive.metastore.api.Table msTbl =
430  table.getMetaStoreTable().deepCopy();
431  List<HdfsPartition> partitions = Lists.newArrayList();
432  if (table instanceof HdfsTable) {
433  // Build a list of non-default partitions to update.
434  HdfsTable hdfsTable = (HdfsTable) table;
435  for (HdfsPartition p: hdfsTable.getPartitions()) {
436  if (!p.isDefaultPartition()) partitions.add(p);
437  }
438  }
439 
440  MetaStoreClient msClient = catalog_.getMetaStoreClient();
441  int numTargetedPartitions;
442  int numUpdatedColumns = 0;
443  try {
444  // Update the table and partition row counts based on the query results.
445  List<HdfsPartition> modifiedParts = Lists.newArrayList();
446  numTargetedPartitions = updateTableStats(table, params, msTbl, partitions,
447  modifiedParts);
448 
449  ColumnStatistics colStats = null;
450  if (params.isSetColumn_stats()) {
451  // Create Hive column stats from the query results.
452  colStats = createHiveColStats(params.getColumn_stats(), table);
453  numUpdatedColumns = colStats.getStatsObjSize();
454  }
455 
456  // Update all partitions.
457  bulkAlterPartitions(table.getDb().getName(), table.getName(), modifiedParts);
458 
459  synchronized (metastoreDdlLock_) {
460  if (numUpdatedColumns > 0) {
461  Preconditions.checkNotNull(colStats);
462  // Update column stats.
463  try {
464  msClient.getHiveClient().updateTableColumnStatistics(colStats);
465  } catch (Exception e) {
466  throw new ImpalaRuntimeException(String.format(HMS_RPC_ERROR_FORMAT_STR,
467  "updateTableColumnStatistics"), e);
468  }
469  }
470  // Update the table stats. Apply the table alteration last to ensure the
471  // lastDdlTime is as accurate as possible.
472  applyAlterTable(msTbl);
473  }
474  } finally {
475  msClient.release();
476  }
477 
478  // Set the results to be reported to the client.
479  TResultSet resultSet = new TResultSet();
480  resultSet.setSchema(new TResultSetMetadata(Lists.newArrayList(
481  new TColumn("summary", Type.STRING.toThrift()))));
482  TColumnValue resultColVal = new TColumnValue();
483  resultColVal.setString_val("Updated " + numTargetedPartitions + " partition(s) and " +
484  numUpdatedColumns + " column(s).");
485  TResultRow resultRow = new TResultRow();
486  resultRow.setColVals(Lists.newArrayList(resultColVal));
487  resultSet.setRows(Lists.newArrayList(resultRow));
488  resp.setResult_set(resultSet);
489  }
490 
501  private int updateTableStats(Table table, TAlterTableUpdateStatsParams params,
502  org.apache.hadoop.hive.metastore.api.Table msTbl,
503  List<HdfsPartition> partitions, List<HdfsPartition> modifiedParts)
504  throws ImpalaException {
505  Preconditions.checkState(params.isSetPartition_stats());
506  Preconditions.checkState(params.isSetTable_stats());
507  // Update the partitions' ROW_COUNT parameter.
508  int numTargetedPartitions = 0;
509  for (HdfsPartition partition: partitions) {
510  // NULL keys are returned as 'NULL' in the partition_stats map, so don't substitute
511  // this partition's keys with Hive's replacement value.
512  List<String> partitionValues = partition.getPartitionValuesAsStrings(false);
513  TPartitionStats partitionStats = params.partition_stats.get(partitionValues);
514  TPartitionStats existingPartStats =
515  PartitionStatsUtil.partStatsFromParameters(partition.getParameters());
516 
517  if (partitionStats == null) {
518  // No stats were collected for this partition. This means that it was not included
519  // in the original computation statements. If the backend does not find any rows
520  // for a partition that should be included, it will generate an empty
521  // TPartitionStats object.
522  if (params.expect_all_partitions == false) continue;
523 
524  // If all partitions are expected, fill in any missing stats with an empty entry.
525  partitionStats = new TPartitionStats();
526  if (params.is_incremental) {
527  partitionStats.intermediate_col_stats = Maps.newHashMap();
528  }
529  partitionStats.stats = new TTableStats();
530  partitionStats.stats.setNum_rows(0L);
531  }
532 
533  long numRows = partitionStats.stats.num_rows;
534  LOG.debug(String.format("Updating stats for partition %s: numRows=%s",
535  partition.getValuesAsString(), numRows));
536 
537  boolean updatedPartition = false;
538  // Update the partition stats if: either there are no existing stats for this
539  // partition, or they're different.
540  if (existingPartStats == null || !existingPartStats.equals(partitionStats)) {
541  PartitionStatsUtil.partStatsToParameters(partitionStats, partition);
542  updatedPartition = true;
543  }
544 
545  String existingRowCount =
546  partition.getParameters().get(StatsSetupConst.ROW_COUNT);
547  String newRowCount = String.valueOf(numRows);
548  // Update table stats
549  if (existingRowCount == null || !existingRowCount.equals(newRowCount)) {
550  // The existing row count value wasn't set or has changed.
551  partition.putToParameters(StatsSetupConst.ROW_COUNT, newRowCount);
552  partition.putToParameters(StatsSetupConst.STATS_GENERATED_VIA_STATS_TASK,
553  StatsSetupConst.TRUE);
554  updatedPartition = true;
555  }
556  if (updatedPartition) {
557  ++numTargetedPartitions;
558  modifiedParts.add(partition);
559  }
560  }
561 
562  // For unpartitioned tables and HBase tables report a single updated partition.
563  if (table.getNumClusteringCols() == 0 || table instanceof HBaseTable) {
564  numTargetedPartitions = 1;
565  }
566 
567  // Update the table's ROW_COUNT parameter.
568  msTbl.putToParameters(StatsSetupConst.ROW_COUNT,
569  String.valueOf(params.getTable_stats().num_rows));
570  msTbl.putToParameters(StatsSetupConst.STATS_GENERATED_VIA_STATS_TASK,
571  StatsSetupConst.TRUE);
572  return numTargetedPartitions;
573  }
574 
580  private static ColumnStatistics createHiveColStats(
581  Map<String, TColumnStats> columnStats, Table table) {
582  // Collection of column statistics objects to be returned.
583  ColumnStatistics colStats = new ColumnStatistics();
584  colStats.setStatsDesc(
585  new ColumnStatisticsDesc(true, table.getDb().getName(), table.getName()));
586  // Generate Hive column stats objects from the update stats params.
587  for (Map.Entry<String, TColumnStats> entry: columnStats.entrySet()) {
588  String colName = entry.getKey();
589  Column tableCol = table.getColumn(entry.getKey());
590  // Ignore columns that were dropped in the meantime.
591  if (tableCol == null) continue;
592  ColumnStatisticsData colStatsData =
593  createHiveColStatsData(entry.getValue(), tableCol.getType());
594  if (colStatsData == null) continue;
595  LOG.debug(String.format("Updating column stats for %s: numDVs=%s numNulls=%s " +
596  "maxSize=%s avgSize=%s", colName, entry.getValue().getNum_distinct_values(),
597  entry.getValue().getNum_nulls(), entry.getValue().getMax_size(),
598  entry.getValue().getAvg_size()));
599  ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj(colName,
600  tableCol.getType().toString(), colStatsData);
601  colStats.addToStatsObj(colStatsObj);
602  }
603  return colStats;
604  }
605 
606  private static ColumnStatisticsData createHiveColStatsData(TColumnStats colStats,
607  Type colType) {
608  ColumnStatisticsData colStatsData = new ColumnStatisticsData();
609  long ndvs = colStats.getNum_distinct_values();
610  long numNulls = colStats.getNum_nulls();
611  switch(colType.getPrimitiveType()) {
612  case BOOLEAN:
613  // TODO: Gather and set the numTrues and numFalse stats as well. The planner
614  // currently does not rely on them.
615  colStatsData.setBooleanStats(new BooleanColumnStatsData(1, -1, numNulls));
616  break;
617  case TINYINT:
618  case SMALLINT:
619  case INT:
620  case BIGINT:
621  case TIMESTAMP: // Hive and Impala use LongColumnStatsData for timestamps.
622  // TODO: Gather and set the min/max values stats as well. The planner
623  // currently does not rely on them.
624  colStatsData.setLongStats(new LongColumnStatsData(numNulls, ndvs));
625  break;
626  case FLOAT:
627  case DOUBLE:
628  // TODO: Gather and set the min/max values stats as well. The planner
629  // currently does not rely on them.
630  colStatsData.setDoubleStats(new DoubleColumnStatsData(numNulls, ndvs));
631  break;
632  case CHAR:
633  case VARCHAR:
634  case STRING:
635  long maxStrLen = colStats.getMax_size();
636  double avgStrLen = colStats.getAvg_size();
637  colStatsData.setStringStats(
638  new StringColumnStatsData(maxStrLen, avgStrLen, numNulls, ndvs));
639  break;
640  case DECIMAL:
641  // TODO: Gather and set the min/max values stats as well. The planner
642  // currently does not rely on them.
643  colStatsData.setDecimalStats(
644  new DecimalColumnStatsData(numNulls, ndvs));
645  break;
646  default:
647  return null;
648  }
649  return colStatsData;
650  }
651 
658  private void createDatabase(TCreateDbParams params, TDdlExecResponse resp)
659  throws ImpalaException {
660  Preconditions.checkNotNull(params);
661  String dbName = params.getDb();
662  Preconditions.checkState(dbName != null && !dbName.isEmpty(),
663  "Null or empty database name passed as argument to Catalog.createDatabase");
664  if (params.if_not_exists && catalog_.getDb(dbName) != null) {
665  LOG.debug("Skipping database creation because " + dbName + " already exists and " +
666  "IF NOT EXISTS was specified.");
667  resp.getResult().setVersion(catalog_.getCatalogVersion());
668  return;
669  }
670  org.apache.hadoop.hive.metastore.api.Database db =
671  new org.apache.hadoop.hive.metastore.api.Database();
672  db.setName(dbName);
673  if (params.getComment() != null) {
674  db.setDescription(params.getComment());
675  }
676  if (params.getLocation() != null) {
677  db.setLocationUri(params.getLocation());
678  }
679  LOG.debug("Creating database " + dbName);
680  Db newDb = null;
681  synchronized (metastoreDdlLock_) {
682  MetaStoreClient msClient = catalog_.getMetaStoreClient();
683  try {
684  msClient.getHiveClient().createDatabase(db);
685  newDb = catalog_.addDb(dbName);
686  } catch (AlreadyExistsException e) {
687  if (!params.if_not_exists) {
688  throw new ImpalaRuntimeException(
689  String.format(HMS_RPC_ERROR_FORMAT_STR, "createDatabase"), e);
690  }
691  LOG.debug(String.format("Ignoring '%s' when creating database %s because " +
692  "IF NOT EXISTS was specified.", e, dbName));
693  newDb = catalog_.getDb(dbName);
694  } catch (TException e) {
695  throw new ImpalaRuntimeException(
696  String.format(HMS_RPC_ERROR_FORMAT_STR, "createDatabase"), e);
697  } finally {
698  msClient.release();
699  }
700 
701  Preconditions.checkNotNull(newDb);
702  TCatalogObject thriftDb = new TCatalogObject(TCatalogObjectType.DATABASE,
704  thriftDb.setDb(newDb.toThrift());
705  thriftDb.setCatalog_version(newDb.getCatalogVersion());
706  resp.result.setUpdated_catalog_object(thriftDb);
707  }
708  resp.result.setVersion(resp.result.getUpdated_catalog_object().getCatalog_version());
709  }
710 
711  private void createFunction(TCreateFunctionParams params, TDdlExecResponse resp)
712  throws ImpalaException {
713  Function fn = Function.fromThrift(params.getFn());
714  LOG.debug(String.format("Adding %s: %s",
715  fn.getClass().getSimpleName(), fn.signatureString()));
716  Function existingFn =
717  catalog_.getFunction(fn, Function.CompareMode.IS_INDISTINGUISHABLE);
718  if (existingFn != null && !params.if_not_exists) {
719  throw new CatalogException("Function " + fn.signatureString() +
720  " already exists.");
721  }
722  catalog_.addFunction(fn);
723  TCatalogObject addedObject = new TCatalogObject();
724  addedObject.setType(TCatalogObjectType.FUNCTION);
725  addedObject.setFn(fn.toThrift());
726  addedObject.setCatalog_version(fn.getCatalogVersion());
727  resp.result.setUpdated_catalog_object(addedObject);
728  resp.result.setVersion(fn.getCatalogVersion());
729  }
730 
731  private void createDataSource(TCreateDataSourceParams params, TDdlExecResponse resp)
732  throws ImpalaException {
733  if (LOG.isDebugEnabled()) { LOG.debug("Adding DATA SOURCE: " + params.toString()); }
734  DataSource dataSource = DataSource.fromThrift(params.getData_source());
735  if (catalog_.getDataSource(dataSource.getName()) != null) {
736  if (!params.if_not_exists) {
737  throw new ImpalaRuntimeException("Data source " + dataSource.getName() +
738  " already exists.");
739  }
740  // The user specified IF NOT EXISTS and the data source exists, just
741  // return the current catalog version.
742  resp.result.setVersion(catalog_.getCatalogVersion());
743  return;
744  }
745  catalog_.addDataSource(dataSource);
746  TCatalogObject addedObject = new TCatalogObject();
747  addedObject.setType(TCatalogObjectType.DATA_SOURCE);
748  addedObject.setData_source(dataSource.toThrift());
749  addedObject.setCatalog_version(dataSource.getCatalogVersion());
750  resp.result.setUpdated_catalog_object(addedObject);
751  resp.result.setVersion(dataSource.getCatalogVersion());
752  }
753 
754  private void dropDataSource(TDropDataSourceParams params, TDdlExecResponse resp)
755  throws ImpalaException {
756  if (LOG.isDebugEnabled()) { LOG.debug("Drop DATA SOURCE: " + params.toString()); }
757  DataSource dataSource = catalog_.getDataSource(params.getData_source());
758  if (dataSource == null) {
759  if (!params.if_exists) {
760  throw new ImpalaRuntimeException("Data source " + params.getData_source() +
761  " does not exists.");
762  }
763  // The user specified IF EXISTS and the data source didn't exist, just
764  // return the current catalog version.
765  resp.result.setVersion(catalog_.getCatalogVersion());
766  return;
767  }
768  catalog_.removeDataSource(params.getData_source());
769  TCatalogObject removedObject = new TCatalogObject();
770  removedObject.setType(TCatalogObjectType.DATA_SOURCE);
771  removedObject.setData_source(dataSource.toThrift());
772  removedObject.setCatalog_version(dataSource.getCatalogVersion());
773  resp.result.setRemoved_catalog_object(removedObject);
774  resp.result.setVersion(dataSource.getCatalogVersion());
775  }
776 
782  private void dropStats(TDropStatsParams params, TDdlExecResponse resp)
783  throws ImpalaException {
784  Table table = getExistingTable(params.getTable_name().getDb_name(),
785  params.getTable_name().getTable_name());
786  Preconditions.checkNotNull(table);
787 
788  if (params.getPartition_spec() == null) {
789  // TODO: Report the number of updated partitions/columns to the user?
790  dropColumnStats(table);
791  dropTableStats(table);
792  } else {
793  HdfsPartition partition =
794  ((HdfsTable)table).getPartitionFromThriftPartitionSpec(
795  params.getPartition_spec());
796  if (partition == null) {
797  List<String> partitionDescription = Lists.newArrayList();
798  for (TPartitionKeyValue v: params.getPartition_spec()) {
799  partitionDescription.add(v.name + " = " + v.value);
800  }
801  throw new ImpalaRuntimeException("Could not find partition: " +
802  Joiner.on("/").join(partitionDescription));
803  }
804 
805  if (partition.getPartitionStats() != null) {
806  synchronized (metastoreDdlLock_) {
807  PartitionStatsUtil.deletePartStats(partition);
808  try {
809  applyAlterPartition(table.getTableName(), partition);
810  } finally {
811  partition.markDirty();
812  }
813  }
814  }
815  }
816 
817  Table refreshedTable = catalog_.reloadTable(params.getTable_name());
818  resp.result.setUpdated_catalog_object(TableToTCatalogObject(refreshedTable));
819  resp.result.setVersion(
820  resp.result.getUpdated_catalog_object().getCatalog_version());
821  }
822 
827  private int dropColumnStats(Table table) throws ImpalaRuntimeException {
828  MetaStoreClient msClient = null;
829  int numColsUpdated = 0;
830  try {
831  msClient = catalog_.getMetaStoreClient();
832 
833  for (Column col: table.getColumns()) {
834  // Skip columns that don't have stats.
835  if (!col.getStats().hasStats()) continue;
836 
837  try {
838  synchronized (metastoreDdlLock_) {
839  msClient.getHiveClient().deleteTableColumnStatistics(
840  table.getDb().getName(), table.getName(), col.getName());
841  ++numColsUpdated;
842  }
843  } catch (NoSuchObjectException e) {
844  // We don't care if the column stats do not exist, just ignore the exception.
845  // We would only expect to make it here if the Impala and HMS metadata
846  // diverged.
847  } catch (TException e) {
848  throw new ImpalaRuntimeException(
849  String.format(HMS_RPC_ERROR_FORMAT_STR,
850  "delete_table_column_statistics"), e);
851  }
852  }
853  } finally {
854  if (msClient != null) msClient.release();
855  }
856  return numColsUpdated;
857  }
858 
865  private int dropTableStats(Table table) throws ImpalaRuntimeException {
866  // Delete the ROW_COUNT from the table (if it was set).
867  org.apache.hadoop.hive.metastore.api.Table msTbl = table.getMetaStoreTable();
868  int numTargetedPartitions = 0;
869  if (msTbl.getParameters().remove(StatsSetupConst.ROW_COUNT) != null) {
870  applyAlterTable(msTbl);
871  ++numTargetedPartitions;
872  }
873 
874  if (!(table instanceof HdfsTable) || table.getNumClusteringCols() == 0) {
875  // If this is not an HdfsTable or if the table is not partitioned, there
876  // is no more work to be done so just return.
877  return numTargetedPartitions;
878  }
879 
880  // Now clear the stats for all partitions in the table.
881  HdfsTable hdfsTable = (HdfsTable) table;
882  Preconditions.checkNotNull(hdfsTable);
883 
884  // List of partitions that were modified as part of this operation.
885  List<HdfsPartition> modifiedParts = Lists.newArrayList();
886  for (HdfsPartition part: hdfsTable.getPartitions()) {
887  boolean isModified = false;
888  // The default partition is an Impala-internal abstraction and is not
889  // represented in the Hive Metastore.
890  if (part.getId() == ImpalaInternalServiceConstants.DEFAULT_PARTITION_ID) {
891  continue;
892  }
893  if (part.getPartitionStats() != null) {
894  PartitionStatsUtil.deletePartStats(part);
895  isModified = true;
896  }
897 
898  // Remove the ROW_COUNT parameter if it has been set.
899  if (part.getParameters().remove(StatsSetupConst.ROW_COUNT) != null) {
900  isModified = true;
901  }
902 
903  if (isModified) modifiedParts.add(part);
904  }
905 
906  bulkAlterPartitions(table.getDb().getName(), table.getName(), modifiedParts);
907  return modifiedParts.size();
908  }
909 
916  private void dropDatabase(TDropDbParams params, TDdlExecResponse resp)
917  throws ImpalaException {
918  Preconditions.checkNotNull(params);
919  Preconditions.checkState(params.getDb() != null && !params.getDb().isEmpty(),
920  "Null or empty database name passed as argument to Catalog.dropDatabase");
921 
922  LOG.debug("Dropping database " + params.getDb());
923  Db db = catalog_.getDb(params.db);
924  if (db != null && db.numFunctions() > 0) {
925  throw new CatalogException("Database " + db.getName() + " is not empty");
926  }
927 
928  TCatalogObject removedObject = new TCatalogObject();
929  MetaStoreClient msClient = catalog_.getMetaStoreClient();
930  synchronized (metastoreDdlLock_) {
931  try {
932  msClient.getHiveClient().dropDatabase(params.getDb(), true, params.if_exists);
933  } catch (TException e) {
934  throw new ImpalaRuntimeException(
935  String.format(HMS_RPC_ERROR_FORMAT_STR, "dropDatabase"), e);
936  } finally {
937  msClient.release();
938  }
939  Db removedDb = catalog_.removeDb(params.getDb());
940  // If no db was removed as part of this operation just return the current catalog
941  // version.
942  if (removedDb == null) {
943  removedObject.setCatalog_version(catalog_.getCatalogVersion());
944  } else {
945  removedObject.setCatalog_version(removedDb.getCatalogVersion());
946  }
947  }
948  removedObject.setType(TCatalogObjectType.DATABASE);
949  removedObject.setDb(new TDatabase());
950  removedObject.getDb().setDb_name(params.getDb());
951  resp.result.setVersion(removedObject.getCatalog_version());
952  resp.result.setRemoved_catalog_object(removedObject);
953  }
954 
960  private void dropTableOrView(TDropTableOrViewParams params, TDdlExecResponse resp)
961  throws ImpalaException {
962  TableName tableName = TableName.fromThrift(params.getTable_name());
963  Preconditions.checkState(tableName != null && tableName.isFullyQualified());
964  LOG.debug(String.format("Dropping table/view %s", tableName));
965 
966  TCatalogObject removedObject = new TCatalogObject();
967  synchronized (metastoreDdlLock_) {
968  MetaStoreClient msClient = catalog_.getMetaStoreClient();
969  try {
970  msClient.getHiveClient().dropTable(
971  tableName.getDb(), tableName.getTbl(), true, params.if_exists);
972  } catch (TException e) {
973  throw new ImpalaRuntimeException(
974  String.format(HMS_RPC_ERROR_FORMAT_STR, "dropTable"), e);
975  } finally {
976  msClient.release();
977  }
978 
979  Table table = catalog_.removeTable(params.getTable_name().db_name,
980  params.getTable_name().table_name);
981  if (table != null) {
982  resp.result.setVersion(table.getCatalogVersion());
983  if (table instanceof HdfsTable) {
984  HdfsTable hdfsTable = (HdfsTable) table;
985  if (hdfsTable.isMarkedCached()) {
986  try {
987  HdfsCachingUtil.uncacheTbl(table.getMetaStoreTable());
988  } catch (Exception e) {
989  LOG.error("Unable to uncache table: " + table.getFullName(), e);
990  }
991  }
992  if (table.getNumClusteringCols() > 0) {
993  for (HdfsPartition partition: hdfsTable.getPartitions()) {
994  if (partition.isMarkedCached()) {
995  try {
996  HdfsCachingUtil.uncachePartition(partition);
997  } catch (Exception e) {
998  LOG.error("Unable to uncache partition: " +
999  partition.getPartitionName(), e);
1000  }
1001  }
1002  }
1003  }
1004  }
1005  } else {
1006  resp.result.setVersion(catalog_.getCatalogVersion());
1007  }
1008  }
1009  removedObject.setType(TCatalogObjectType.TABLE);
1010  removedObject.setTable(new TTable());
1011  removedObject.getTable().setTbl_name(tableName.getTbl());
1012  removedObject.getTable().setDb_name(tableName.getDb());
1013  removedObject.setCatalog_version(resp.result.getVersion());
1014  resp.result.setRemoved_catalog_object(removedObject);
1015  }
1016 
1017  private void dropFunction(TDropFunctionParams params, TDdlExecResponse resp)
1018  throws ImpalaException {
1019  ArrayList<Type> argTypes = Lists.newArrayList();
1020  for (TColumnType t: params.arg_types) {
1021  argTypes.add(Type.fromThrift(t));
1022  }
1023  Function desc = new Function(FunctionName.fromThrift(params.fn_name),
1024  argTypes, Type.INVALID, false);
1025  LOG.debug(String.format("Dropping Function %s", desc.signatureString()));
1026  Function fn = catalog_.removeFunction(desc);
1027  if (fn == null) {
1028  if (!params.if_exists) {
1029  throw new CatalogException(
1030  "Function: " + desc.signatureString() + " does not exist.");
1031  }
1032  // The user specified IF NOT EXISTS and the function didn't exist, just
1033  // return the current catalog version.
1034  resp.result.setVersion(catalog_.getCatalogVersion());
1035  } else {
1036  TCatalogObject removedObject = new TCatalogObject();
1037  removedObject.setType(TCatalogObjectType.FUNCTION);
1038  removedObject.setFn(fn.toThrift());
1039  removedObject.setCatalog_version(fn.getCatalogVersion());
1040  resp.result.setRemoved_catalog_object(removedObject);
1041  resp.result.setVersion(fn.getCatalogVersion());
1042  }
1043  }
1044 
1050  private boolean createTable(TCreateTableParams params, TDdlExecResponse response)
1051  throws ImpalaException {
1052  Preconditions.checkNotNull(params);
1053  TableName tableName = TableName.fromThrift(params.getTable_name());
1054  Preconditions.checkState(tableName != null && tableName.isFullyQualified());
1055  Preconditions.checkState(params.getColumns() != null &&
1056  params.getColumns().size() > 0,
1057  "Null or empty column list given as argument to Catalog.createTable");
1058 
1059  if (params.if_not_exists &&
1060  catalog_.containsTable(tableName.getDb(), tableName.getTbl())) {
1061  LOG.debug(String.format("Skipping table creation because %s already exists and " +
1062  "IF NOT EXISTS was specified.", tableName));
1063  response.getResult().setVersion(catalog_.getCatalogVersion());
1064  return false;
1065  }
1066  org.apache.hadoop.hive.metastore.api.Table tbl =
1067  createMetaStoreTable(params);
1068  LOG.debug(String.format("Creating table %s", tableName));
1069  return createTable(tbl, params.if_not_exists, params.getCache_op(), response);
1070  }
1071 
1077  private void createView(TCreateOrAlterViewParams params, TDdlExecResponse response)
1078  throws ImpalaException {
1079  TableName tableName = TableName.fromThrift(params.getView_name());
1080  Preconditions.checkState(tableName != null && tableName.isFullyQualified());
1081  Preconditions.checkState(params.getColumns() != null &&
1082  params.getColumns().size() > 0,
1083  "Null or empty column list given as argument to DdlExecutor.createView");
1084  if (params.if_not_exists &&
1085  catalog_.containsTable(tableName.getDb(), tableName.getTbl())) {
1086  LOG.debug(String.format("Skipping view creation because %s already exists and " +
1087  "ifNotExists is true.", tableName));
1088  }
1089 
1090  // Create new view.
1091  org.apache.hadoop.hive.metastore.api.Table view =
1092  new org.apache.hadoop.hive.metastore.api.Table();
1093  setViewAttributes(params, view);
1094  LOG.debug(String.format("Creating view %s", tableName));
1095  createTable(view, params.if_not_exists, null, response);
1096  }
1097 
1104  private void createTableLike(TCreateTableLikeParams params, TDdlExecResponse response)
1105  throws ImpalaException {
1106  Preconditions.checkNotNull(params);
1107 
1108  THdfsFileFormat fileFormat =
1109  params.isSetFile_format() ? params.getFile_format() : null;
1110  String comment = params.isSetComment() ? params.getComment() : null;
1111  TableName tblName = TableName.fromThrift(params.getTable_name());
1112  TableName srcTblName = TableName.fromThrift(params.getSrc_table_name());
1113  Preconditions.checkState(tblName != null && tblName.isFullyQualified());
1114  Preconditions.checkState(srcTblName != null && srcTblName.isFullyQualified());
1115 
1116  if (params.if_not_exists &&
1117  catalog_.containsTable(tblName.getDb(), tblName.getTbl())) {
1118  LOG.debug(String.format("Skipping table creation because %s already exists and " +
1119  "IF NOT EXISTS was specified.", tblName));
1120  response.getResult().setVersion(catalog_.getCatalogVersion());
1121  return;
1122  }
1123  Table srcTable = getExistingTable(srcTblName.getDb(), srcTblName.getTbl());
1124  org.apache.hadoop.hive.metastore.api.Table tbl =
1125  srcTable.getMetaStoreTable().deepCopy();
1126  tbl.setDbName(tblName.getDb());
1127  tbl.setTableName(tblName.getTbl());
1128  tbl.setOwner(params.getOwner());
1129  if (tbl.getParameters() == null) {
1130  tbl.setParameters(new HashMap<String, String>());
1131  }
1132  if (comment != null) {
1133  tbl.getParameters().put("comment", comment);
1134  }
1135  // The EXTERNAL table property should not be copied from the old table.
1136  if (params.is_external) {
1137  tbl.setTableType(TableType.EXTERNAL_TABLE.toString());
1138  tbl.putToParameters("EXTERNAL", "TRUE");
1139  } else {
1140  tbl.setTableType(TableType.MANAGED_TABLE.toString());
1141  if (tbl.getParameters().containsKey("EXTERNAL")) {
1142  tbl.getParameters().remove("EXTERNAL");
1143  }
1144  }
1145  // The LOCATION property should not be copied from the old table. If the location
1146  // is null (the caller didn't specify a custom location) this will clear the value
1147  // and the table will use the default table location from the parent database.
1148  tbl.getSd().setLocation(params.getLocation());
1149  if (fileFormat != null) {
1150  setStorageDescriptorFileFormat(tbl.getSd(), fileFormat);
1151  } else if (fileFormat == null && srcTable instanceof View) {
1152  // Here, source table is a view which has no input format. So to be
1153  // consistent with CREATE TABLE, default input format is assumed to be
1154  // TEXT unless otherwise specified.
1155  setStorageDescriptorFileFormat(tbl.getSd(), THdfsFileFormat.TEXT);
1156  }
1157  // Set the row count of this table to unknown.
1158  tbl.putToParameters(StatsSetupConst.ROW_COUNT, "-1");
1159  LOG.debug(String.format("Creating table %s LIKE %s", tblName, srcTblName));
1160  createTable(tbl, params.if_not_exists, null, response);
1161  }
1162 
1172  private boolean createTable(org.apache.hadoop.hive.metastore.api.Table newTable,
1173  boolean ifNotExists, THdfsCachingOp cacheOp, TDdlExecResponse response)
1174  throws ImpalaException {
1175  MetaStoreClient msClient = catalog_.getMetaStoreClient();
1176  synchronized (metastoreDdlLock_) {
1177  try {
1178  msClient.getHiveClient().createTable(newTable);
1179  // If this table should be cached, and the table location was not specified by
1180  // the user, an extra step is needed to read the table to find the location.
1181  if (cacheOp != null && cacheOp.isSet_cached() &&
1182  newTable.getSd().getLocation() == null) {
1183  newTable = msClient.getHiveClient().getTable(newTable.getDbName(),
1184  newTable.getTableName());
1185  }
1186  } catch (AlreadyExistsException e) {
1187  if (!ifNotExists) {
1188  throw new ImpalaRuntimeException(
1189  String.format(HMS_RPC_ERROR_FORMAT_STR, "createTable"), e);
1190  }
1191  LOG.debug(String.format("Ignoring '%s' when creating table %s.%s because " +
1192  "IF NOT EXISTS was specified.", e,
1193  newTable.getDbName(), newTable.getTableName()));
1194  return false;
1195  } catch (TException e) {
1196  throw new ImpalaRuntimeException(
1197  String.format(HMS_RPC_ERROR_FORMAT_STR, "createTable"), e);
1198  }
1199  finally {
1200  msClient.release();
1201  }
1202  }
1203 
1204  // Submit the cache request and update the table metadata.
1205  if (cacheOp != null && cacheOp.isSet_cached()) {
1206  short replication = cacheOp.isSetReplication() ? cacheOp.getReplication() :
1207  JniCatalogConstants.HDFS_DEFAULT_CACHE_REPLICATION_FACTOR;
1208  long id = HdfsCachingUtil.submitCacheTblDirective(newTable,
1209  cacheOp.getCache_pool_name(), replication);
1210  catalog_.watchCacheDirs(Lists.<Long>newArrayList(id),
1211  new TTableName(newTable.getDbName(), newTable.getTableName()));
1212  applyAlterTable(newTable);
1213  }
1214 
1215  Table newTbl = catalog_.addTable(newTable.getDbName(), newTable.getTableName());
1216  response.result.setUpdated_catalog_object(TableToTCatalogObject(newTbl));
1217  response.result.setVersion(
1218  response.result.getUpdated_catalog_object().getCatalog_version());
1219  return true;
1220  }
1221 
1225  private void setViewAttributes(TCreateOrAlterViewParams params,
1226  org.apache.hadoop.hive.metastore.api.Table view) {
1227  view.setTableType(TableType.VIRTUAL_VIEW.toString());
1228  view.setViewOriginalText(params.getOriginal_view_def());
1229  view.setViewExpandedText(params.getExpanded_view_def());
1230  view.setDbName(params.getView_name().getDb_name());
1231  view.setTableName(params.getView_name().getTable_name());
1232  view.setOwner(params.getOwner());
1233  if (view.getParameters() == null) view.setParameters(new HashMap<String, String>());
1234  if (params.isSetComment() && params.getComment() != null) {
1235  view.getParameters().put("comment", params.getComment());
1236  }
1237 
1238  // Add all the columns to a new storage descriptor.
1239  StorageDescriptor sd = new StorageDescriptor();
1240  sd.setCols(buildFieldSchemaList(params.getColumns()));
1241  // Set a dummy SerdeInfo for Hive.
1242  sd.setSerdeInfo(new SerDeInfo());
1243  view.setSd(sd);
1244  }
1245 
1250  private void alterTableAddReplaceCols(TableName tableName, List<TColumn> columns,
1251  boolean replaceExistingCols) throws ImpalaException {
1252  org.apache.hadoop.hive.metastore.api.Table msTbl = getMetaStoreTable(tableName);
1253 
1254  List<FieldSchema> newColumns = buildFieldSchemaList(columns);
1255  if (replaceExistingCols) {
1256  msTbl.getSd().setCols(newColumns);
1257  } else {
1258  // Append the new column to the existing list of columns.
1259  for (FieldSchema fs: buildFieldSchemaList(columns)) {
1260  msTbl.getSd().addToCols(fs);
1261  }
1262  }
1263  applyAlterTable(msTbl);
1264  }
1265 
1270  private void alterTableChangeCol(TableName tableName, String colName,
1271  TColumn newCol) throws ImpalaException {
1272  synchronized (metastoreDdlLock_) {
1273  org.apache.hadoop.hive.metastore.api.Table msTbl = getMetaStoreTable(tableName);
1274  // Find the matching column name and change it.
1275  Iterator<FieldSchema> iterator = msTbl.getSd().getColsIterator();
1276  while (iterator.hasNext()) {
1277  FieldSchema fs = iterator.next();
1278  if (fs.getName().toLowerCase().equals(colName.toLowerCase())) {
1279  fs.setName(newCol.getColumnName());
1280  Type type = Type.fromThrift(newCol.getColumnType());
1281  fs.setType(type.toString().toLowerCase());
1282  // Don't overwrite the existing comment unless a new comment is given
1283  if (newCol.getComment() != null) {
1284  fs.setComment(newCol.getComment());
1285  }
1286  break;
1287  }
1288  if (!iterator.hasNext()) {
1289  throw new ColumnNotFoundException(String.format(
1290  "Column name %s not found in table %s.", colName, tableName));
1291  }
1292  }
1293  applyAlterTable(msTbl);
1294  }
1295  }
1296 
1307  List<TPartitionKeyValue> partitionSpec, boolean ifNotExists, String location,
1308  THdfsCachingOp cacheOp) throws ImpalaException {
1309  org.apache.hadoop.hive.metastore.api.Partition partition =
1310  new org.apache.hadoop.hive.metastore.api.Partition();
1311  if (ifNotExists && catalog_.containsHdfsPartition(tableName.getDb(),
1312  tableName.getTbl(), partitionSpec)) {
1313  LOG.debug(String.format("Skipping partition creation because (%s) already exists" +
1314  " and ifNotExists is true.", Joiner.on(", ").join(partitionSpec)));
1315  return null;
1316  }
1317 
1318  Table result = null;
1319  List<Long> cacheIds = null;
1320  synchronized (metastoreDdlLock_) {
1321  org.apache.hadoop.hive.metastore.api.Table msTbl = getMetaStoreTable(tableName);
1322  partition.setDbName(tableName.getDb());
1323  partition.setTableName(tableName.getTbl());
1324 
1325  Long parentTblCacheDirId =
1326  HdfsCachingUtil.getCacheDirectiveId(msTbl.getParameters());
1327 
1328  List<String> values = Lists.newArrayList();
1329  // Need to add in the values in the same order they are defined in the table.
1330  for (FieldSchema fs: msTbl.getPartitionKeys()) {
1331  for (TPartitionKeyValue kv: partitionSpec) {
1332  if (fs.getName().toLowerCase().equals(kv.getName().toLowerCase())) {
1333  values.add(kv.getValue());
1334  }
1335  }
1336  }
1337  partition.setValues(values);
1338  StorageDescriptor sd = msTbl.getSd().deepCopy();
1339  sd.setLocation(location);
1340  partition.setSd(sd);
1341  MetaStoreClient msClient = catalog_.getMetaStoreClient();
1342  try {
1343  // Add the new partition.
1344  partition = msClient.getHiveClient().add_partition(partition);
1345  String cachePoolName = null;
1346  Short replication = null;
1347  if (cacheOp == null && parentTblCacheDirId != null) {
1348  // The user didn't specify an explicit caching operation, inherit the value
1349  // from the parent table.
1350  cachePoolName = HdfsCachingUtil.getCachePool(parentTblCacheDirId);
1351  Preconditions.checkNotNull(cachePoolName);
1352  replication = HdfsCachingUtil.getCacheReplication(parentTblCacheDirId);
1353  Preconditions.checkNotNull(replication);
1354  } else if (cacheOp != null && cacheOp.isSet_cached()) {
1355  // The user explicitly stated that this partition should be cached.
1356  cachePoolName = cacheOp.getCache_pool_name();
1357 
1358  // When the new partition should be cached and and no replication factor
1359  // was specified, inherit the replication factor from the parent table if
1360  // it is cached. If the parent is not cached and no replication factor is
1361  // explicitly set, use the default value.
1362  if (!cacheOp.isSetReplication() && parentTblCacheDirId != null) {
1363  replication = HdfsCachingUtil.getCacheReplication(parentTblCacheDirId);
1364  } else {
1365  replication = HdfsCachingUtil.getReplicationOrDefault(cacheOp);
1366  }
1367  }
1368  // If cache pool name is not null, it indicates this partition should be cached.
1369  if (cachePoolName != null) {
1370  long id = HdfsCachingUtil.submitCachePartitionDirective(partition,
1371  cachePoolName, replication);
1372  cacheIds = Lists.<Long>newArrayList(id);
1373  // Update the partition metadata to include the cache directive id.
1374  msClient.getHiveClient().alter_partition(partition.getDbName(),
1375  partition.getTableName(), partition);
1376  }
1377  updateLastDdlTime(msTbl, msClient);
1378  } catch (AlreadyExistsException e) {
1379  if (!ifNotExists) {
1380  throw new ImpalaRuntimeException(
1381  String.format(HMS_RPC_ERROR_FORMAT_STR, "add_partition"), e);
1382  }
1383  LOG.debug(String.format("Ignoring '%s' when adding partition to %s because" +
1384  " ifNotExists is true.", e, tableName));
1385  } catch (TException e) {
1386  throw new ImpalaRuntimeException(
1387  String.format(HMS_RPC_ERROR_FORMAT_STR, "add_partition"), e);
1388  } finally {
1389  msClient.release();
1390  }
1391  }
1392  if (cacheIds != null) catalog_.watchCacheDirs(cacheIds, tableName.toThrift());
1393  // Return the table object with an updated catalog version after creating the
1394  // partition.
1395  result = addHdfsPartition(tableName, partition);
1396  return result;
1397  }
1398 
1407  List<TPartitionKeyValue> partitionSpec, boolean ifExists)
1408  throws ImpalaException {
1409  if (ifExists && !catalog_.containsHdfsPartition(tableName.getDb(), tableName.getTbl(),
1410  partitionSpec)) {
1411  LOG.debug(String.format("Skipping partition drop because (%s) does not exist " +
1412  "and ifExists is true.", Joiner.on(", ").join(partitionSpec)));
1413  return null;
1414  }
1415 
1416  HdfsPartition part = catalog_.getHdfsPartition(tableName.getDb(),
1417  tableName.getTbl(), partitionSpec);
1418  synchronized (metastoreDdlLock_) {
1419  org.apache.hadoop.hive.metastore.api.Table msTbl = getMetaStoreTable(tableName);
1420  List<String> values = Lists.newArrayList();
1421  // Need to add in the values in the same order they are defined in the table.
1422  for (FieldSchema fs: msTbl.getPartitionKeys()) {
1423  for (TPartitionKeyValue kv: partitionSpec) {
1424  if (fs.getName().toLowerCase().equals(kv.getName().toLowerCase())) {
1425  values.add(kv.getValue());
1426  }
1427  }
1428  }
1429  MetaStoreClient msClient = catalog_.getMetaStoreClient();
1430  try {
1431  msClient.getHiveClient().dropPartition(tableName.getDb(),
1432  tableName.getTbl(), values);
1433  updateLastDdlTime(msTbl, msClient);
1434  if (part.isMarkedCached()) {
1435  HdfsCachingUtil.uncachePartition(part);
1436  }
1437  } catch (NoSuchObjectException e) {
1438  if (!ifExists) {
1439  throw new ImpalaRuntimeException(
1440  String.format(HMS_RPC_ERROR_FORMAT_STR, "dropPartition"), e);
1441  }
1442  LOG.debug(String.format("Ignoring '%s' when dropping partition from %s because" +
1443  " ifExists is true.", e, tableName));
1444  } catch (TException e) {
1445  throw new ImpalaRuntimeException(
1446  String.format(HMS_RPC_ERROR_FORMAT_STR, "dropPartition"), e);
1447  } finally {
1448  msClient.release();
1449  }
1450  }
1451  return catalog_.dropPartition(tableName, partitionSpec);
1452  }
1453 
1457  private void alterTableDropCol(TableName tableName, String colName)
1458  throws ImpalaException {
1459  synchronized (metastoreDdlLock_) {
1460  org.apache.hadoop.hive.metastore.api.Table msTbl = getMetaStoreTable(tableName);
1461 
1462  // Find the matching column name and remove it.
1463  Iterator<FieldSchema> iterator = msTbl.getSd().getColsIterator();
1464  while (iterator.hasNext()) {
1465  FieldSchema fs = iterator.next();
1466  if (fs.getName().toLowerCase().equals(colName.toLowerCase())) {
1467  iterator.remove();
1468  break;
1469  }
1470  if (!iterator.hasNext()) {
1471  throw new ColumnNotFoundException(String.format(
1472  "Column name %s not found in table %s.", colName, tableName));
1473  }
1474  }
1475  applyAlterTable(msTbl);
1476  }
1477  }
1478 
1485  private void alterTableOrViewRename(TableName tableName, TableName newTableName,
1486  TDdlExecResponse response)
1487  throws ImpalaException {
1488  synchronized (metastoreDdlLock_) {
1489  org.apache.hadoop.hive.metastore.api.Table msTbl = getMetaStoreTable(tableName);
1490  msTbl.setDbName(newTableName.getDb());
1491  msTbl.setTableName(newTableName.getTbl());
1492 
1493  MetaStoreClient msClient = catalog_.getMetaStoreClient();
1494  try {
1495  // Workaround for HIVE-9720/IMPALA-1711: When renaming a table with column stats
1496  // across databases, we save, drop and restore the column stats because the HMS
1497  // does not properly move them to the new table via alteration. The following
1498  // block needs to be protected by the metastoreDdlLock_ to avoid conflicts with
1499  // concurrent DDL on this same table (e.g., drop+add table with same db/name).
1500  ColumnStatistics hmsColStats = null;
1501  if (!msTbl.getTableType().equalsIgnoreCase(TableType.VIRTUAL_VIEW.toString()) &&
1502  !tableName.getDb().equalsIgnoreCase(newTableName.getDb())) {
1503  Table oldTbl = getExistingTable(tableName.getDb(), tableName.getTbl());
1504  Map<String, TColumnStats> colStats = Maps.newHashMap();
1505  for (Column c: oldTbl.getColumns()) {
1506  colStats.put(c.getName(), c.getStats().toThrift());
1507  }
1508  hmsColStats = createHiveColStats(colStats, oldTbl);
1509  // Set the new db/table.
1510  hmsColStats.setStatsDesc(
1511  new ColumnStatisticsDesc(true, newTableName.getDb(), newTableName.getTbl()));
1512 
1513  LOG.trace(String.format("Dropping column stats for table %s being " +
1514  "renamed to %s to workaround HIVE-9720.",
1515  tableName.toString(), newTableName.toString()));
1516  // Delete all column stats of the original table from the HMS.
1517  msClient.getHiveClient().deleteTableColumnStatistics(
1518  tableName.getDb(), tableName.getTbl(), null);
1519  }
1520 
1521  // Perform the table rename in any case.
1522  msClient.getHiveClient().alter_table(
1523  tableName.getDb(), tableName.getTbl(), msTbl);
1524 
1525  if (hmsColStats != null) {
1526  LOG.trace(String.format("Restoring column stats for table %s being " +
1527  "renamed to %s to workaround HIVE-9720.",
1528  tableName.toString(), newTableName.toString()));
1529  msClient.getHiveClient().updateTableColumnStatistics(hmsColStats);
1530  }
1531  } catch (TException e) {
1532  throw new ImpalaRuntimeException(
1533  String.format(HMS_RPC_ERROR_FORMAT_STR, "alter_table"), e);
1534  } finally {
1535  msClient.release();
1536  }
1537  }
1538 
1539  // Rename the table in the Catalog and get the resulting catalog object.
1540  // ALTER TABLE/VIEW RENAME is implemented as an ADD + DROP.
1541  TCatalogObject newTable = TableToTCatalogObject(
1542  catalog_.renameTable(tableName.toThrift(), newTableName.toThrift()));
1543  TCatalogObject removedObject = new TCatalogObject();
1544  removedObject.setType(TCatalogObjectType.TABLE);
1545  removedObject.setTable(new TTable());
1546  removedObject.getTable().setTbl_name(tableName.getTbl());
1547  removedObject.getTable().setDb_name(tableName.getDb());
1548  removedObject.setCatalog_version(newTable.getCatalog_version());
1549  response.result.setRemoved_catalog_object(removedObject);
1550  response.result.setUpdated_catalog_object(newTable);
1551  response.result.setVersion(newTable.getCatalog_version());
1552  }
1553 
1560  private void alterTableSetFileFormat(TableName tableName,
1561  List<TPartitionKeyValue> partitionSpec, THdfsFileFormat fileFormat)
1562  throws ImpalaException {
1563  Preconditions.checkState(partitionSpec == null || !partitionSpec.isEmpty());
1564  if (partitionSpec == null) {
1565  synchronized (metastoreDdlLock_) {
1566  org.apache.hadoop.hive.metastore.api.Table msTbl = getMetaStoreTable(tableName);
1567  setStorageDescriptorFileFormat(msTbl.getSd(), fileFormat);
1568  applyAlterTable(msTbl);
1569  }
1570  } else {
1571  synchronized (metastoreDdlLock_) {
1572  HdfsPartition partition = catalog_.getHdfsPartition(
1573  tableName.getDb(), tableName.getTbl(), partitionSpec);
1574  Preconditions.checkNotNull(partition);
1575  partition.setFileFormat(HdfsFileFormat.fromThrift(fileFormat));
1576  try {
1577  applyAlterPartition(tableName, partition);
1578  } finally {
1579  partition.markDirty();
1580  }
1581  }
1582  }
1583  }
1584 
1588  private static void setStorageDescriptorFileFormat(StorageDescriptor sd,
1589  THdfsFileFormat fileFormat) {
1590  StorageDescriptor tempSd =
1591  HiveStorageDescriptorFactory.createSd(fileFormat, RowFormat.DEFAULT_ROW_FORMAT);
1592  sd.setInputFormat(tempSd.getInputFormat());
1593  sd.setOutputFormat(tempSd.getOutputFormat());
1594  sd.getSerdeInfo().setSerializationLib(tempSd.getSerdeInfo().getSerializationLib());
1595  }
1596 
1601  private void alterTableSetLocation(TableName tableName,
1602  List<TPartitionKeyValue> partitionSpec, String location) throws ImpalaException {
1603  Preconditions.checkState(partitionSpec == null || !partitionSpec.isEmpty());
1604  if (partitionSpec == null) {
1605  synchronized (metastoreDdlLock_) {
1606  org.apache.hadoop.hive.metastore.api.Table msTbl = getMetaStoreTable(tableName);
1607  msTbl.getSd().setLocation(location);
1608  applyAlterTable(msTbl);
1609  }
1610  } else {
1611  synchronized (metastoreDdlLock_) {
1612  HdfsPartition partition = catalog_.getHdfsPartition(
1613  tableName.getDb(), tableName.getTbl(), partitionSpec);
1614  partition.setLocation(location);
1615  try {
1616  applyAlterPartition(tableName, partition);
1617  } finally {
1618  partition.markDirty();
1619  }
1620  }
1621  }
1622  }
1623 
1628  private void alterTableSetTblProperties(TableName tableName,
1629  TAlterTableSetTblPropertiesParams params) throws ImpalaException {
1630  Map<String, String> properties = params.getProperties();
1631  Preconditions.checkNotNull(properties);
1632  synchronized (metastoreDdlLock_) {
1633  if (params.isSetPartition_spec()) {
1634  // Alter partition params.
1635  HdfsPartition partition = catalog_.getHdfsPartition(
1636  tableName.getDb(), tableName.getTbl(), params.getPartition_spec());
1637  switch (params.getTarget()) {
1638  case TBL_PROPERTY:
1639  partition.getParameters().putAll(properties);
1640  break;
1641  case SERDE_PROPERTY:
1642  partition.getSerdeInfo().getParameters().putAll(properties);
1643  break;
1644  default:
1645  throw new UnsupportedOperationException(
1646  "Unknown target TTablePropertyType: " + params.getTarget());
1647  }
1648  try {
1649  applyAlterPartition(tableName, partition);
1650  } finally {
1651  partition.markDirty();
1652  }
1653  } else {
1654  // Alter table params.
1655  org.apache.hadoop.hive.metastore.api.Table msTbl = getMetaStoreTable(tableName);
1656  switch (params.getTarget()) {
1657  case TBL_PROPERTY:
1658  msTbl.getParameters().putAll(properties);
1659  break;
1660  case SERDE_PROPERTY:
1661  msTbl.getSd().getSerdeInfo().getParameters().putAll(properties);
1662  break;
1663  default:
1664  throw new UnsupportedOperationException(
1665  "Unknown target TTablePropertyType: " + params.getTarget());
1666  }
1667  applyAlterTable(msTbl);
1668  }
1669  }
1670  }
1671 
1680  private void alterTableSetCached(TableName tableName,
1681  TAlterTableSetCachedParams params) throws ImpalaException {
1682  THdfsCachingOp cacheOp = params.getCache_op();
1683  Preconditions.checkNotNull(cacheOp);
1684  // Alter table params.
1685  Table table = getExistingTable(tableName.getDb(), tableName.getTbl());
1686  if (!(table instanceof HdfsTable)) {
1687  throw new ImpalaRuntimeException("ALTER TABLE SET CACHED/UNCACHED must target " +
1688  "an HDFS table.");
1689  }
1690  HdfsTable hdfsTable = (HdfsTable) table;
1691  org.apache.hadoop.hive.metastore.api.Table msTbl = table.getMetaStoreTable();
1692  Long cacheDirId = HdfsCachingUtil.getCacheDirectiveId(msTbl.getParameters());
1693  if (cacheOp.isSet_cached()) {
1694  // List of cache directive IDs that were submitted as part of this
1695  // ALTER TABLE operation.
1696  List<Long> cacheDirIds = Lists.newArrayList();
1697  short cacheReplication = HdfsCachingUtil.getReplicationOrDefault(cacheOp);
1698  // If the table was not previously cached (cacheDirId == null) we issue a new
1699  // directive for this table. If the table was already cached, we validate
1700  // the pool name and update the cache replication factor if necessary
1701  if (cacheDirId == null) {
1702  cacheDirIds.add(HdfsCachingUtil.submitCacheTblDirective(msTbl,
1703  cacheOp.getCache_pool_name(), cacheReplication));
1704  } else {
1705  // Check if the cache directive needs to be changed
1706  if (HdfsCachingUtil.isUpdateOp(cacheOp, msTbl.getParameters())) {
1707  HdfsCachingUtil.validateCachePool(cacheOp, cacheDirId, tableName);
1708  cacheDirIds.add(HdfsCachingUtil.modifyCacheDirective(cacheDirId, msTbl,
1709  cacheOp.getCache_pool_name(), cacheReplication));
1710  }
1711  }
1712 
1713  if (table.getNumClusteringCols() > 0) {
1714  // If this is a partitioned table, submit cache directives for all uncached
1715  // partitions.
1716  for (HdfsPartition partition: hdfsTable.getPartitions()) {
1717  // No need to cache the default partition because it contains no files and is
1718  // not referred to by scan nodes.
1719  if (partition.getId() == ImpalaInternalServiceConstants.DEFAULT_PARTITION_ID) {
1720  continue;
1721  }
1722  // Only issue cache directives if the data is uncached or the cache directive
1723  // needs to be updated
1724  if (!partition.isMarkedCached() ||
1725  HdfsCachingUtil.isUpdateOp(cacheOp, partition.getParameters())) {
1726  try {
1727  // If the partition was already cached, update the directive otherwise
1728  // issue new cache directive
1729  if (!partition.isMarkedCached()) {
1730  cacheDirIds.add(HdfsCachingUtil.submitCachePartitionDirective(
1731  partition, cacheOp.getCache_pool_name(), cacheReplication));
1732  } else {
1733  Long directiveId =
1734  HdfsCachingUtil.getCacheDirectiveId(partition.getParameters());
1735  cacheDirIds.add(HdfsCachingUtil.modifyCacheDirective(directiveId,
1736  partition, cacheOp.getCache_pool_name(), cacheReplication));
1737  }
1738  } catch (ImpalaRuntimeException e) {
1739  if (partition.isMarkedCached()) {
1740  LOG.error("Unable to modify cache partition: " +
1741  partition.getPartitionName(), e);
1742  } else {
1743  LOG.error("Unable to cache partition: " +
1744  partition.getPartitionName(), e);
1745  }
1746  }
1747 
1748  // Update the partition metadata.
1749  try {
1750  applyAlterPartition(tableName, partition);
1751  } finally {
1752  partition.markDirty();
1753  }
1754  }
1755  }
1756  }
1757 
1758  // Nothing to do.
1759  if (cacheDirIds.isEmpty()) return;
1760 
1761  // Submit a request to watch these cache directives. The TableLoadingMgr will
1762  // asynchronously refresh the table metadata once the directives complete.
1763  catalog_.watchCacheDirs(cacheDirIds, tableName.toThrift());
1764  } else {
1765  // Uncache the table.
1766  if (cacheDirId != null) HdfsCachingUtil.uncacheTbl(msTbl);
1767  // Uncache all table partitions.
1768  if (table.getNumClusteringCols() > 0) {
1769  for (HdfsPartition partition: ((HdfsTable) table).getPartitions()) {
1770  if (partition.getId() == ImpalaInternalServiceConstants.DEFAULT_PARTITION_ID) {
1771  continue;
1772  }
1773  if (partition.isMarkedCached()) {
1774  HdfsCachingUtil.uncachePartition(partition);
1775  try {
1776  applyAlterPartition(tableName, partition);
1777  } finally {
1778  partition.markDirty();
1779  }
1780  }
1781  }
1782  }
1783  }
1784 
1785  // Update the table metadata.
1786  applyAlterTable(msTbl);
1787  }
1788 
1796  private void alterPartitionSetCached(TableName tableName,
1797  TAlterTableSetCachedParams params) throws ImpalaException {
1798  THdfsCachingOp cacheOp = params.getCache_op();
1799  Preconditions.checkNotNull(cacheOp);
1800  Preconditions.checkNotNull(params.getPartition_spec());
1801  synchronized (metastoreDdlLock_) {
1802  // Alter partition params.
1803  HdfsPartition partition = catalog_.getHdfsPartition(
1804  tableName.getDb(), tableName.getTbl(), params.getPartition_spec());
1805  if (cacheOp.isSet_cached()) {
1806 
1807  // The directive is null if the partition is not cached
1808  Long directiveId = HdfsCachingUtil.getCacheDirectiveId(
1809  partition.getParameters());
1810  short replication = HdfsCachingUtil.getReplicationOrDefault(cacheOp);
1811  List<Long> cacheDirs = Lists.newArrayList();
1812 
1813  if (directiveId == null) {
1814  cacheDirs.add(HdfsCachingUtil.submitCachePartitionDirective(partition,
1815  cacheOp.getCache_pool_name(), replication));
1816  } else {
1817  if (HdfsCachingUtil.isUpdateOp(cacheOp, partition.getParameters())) {
1818  HdfsCachingUtil.validateCachePool(cacheOp, directiveId, tableName, partition);
1819  cacheDirs.add(HdfsCachingUtil.modifyCacheDirective(directiveId, partition,
1820  cacheOp.getCache_pool_name(), replication));
1821  }
1822  }
1823 
1824  // Once the cache directives are sbumitted, observe the status of the caching
1825  // until no more progress is made -- either fully cached or out of cache memory
1826  if (!cacheDirs.isEmpty()) {
1827  catalog_.watchCacheDirs(cacheDirs, tableName.toThrift());
1828  }
1829 
1830  } else {
1831  // Partition is not cached, just return.
1832  if (!partition.isMarkedCached()) return;
1833  HdfsCachingUtil.uncachePartition(partition);
1834  }
1835  try {
1836  applyAlterPartition(tableName, partition);
1837  } finally {
1838  partition.markDirty();
1839  }
1840  }
1841  }
1842 
1853  private void applyAlterTable(org.apache.hadoop.hive.metastore.api.Table msTbl)
1854  throws ImpalaRuntimeException {
1855  MetaStoreClient msClient = catalog_.getMetaStoreClient();
1856  long lastDdlTime = -1;
1857  try {
1858  lastDdlTime = calculateDdlTime(msTbl);
1859  msTbl.putToParameters("transient_lastDdlTime", Long.toString(lastDdlTime));
1860  msClient.getHiveClient().alter_table(
1861  msTbl.getDbName(), msTbl.getTableName(), msTbl);
1862  } catch (TException e) {
1863  throw new ImpalaRuntimeException(
1864  String.format(HMS_RPC_ERROR_FORMAT_STR, "alter_table"), e);
1865  } finally {
1866  msClient.release();
1867  catalog_.updateLastDdlTime(
1868  new TTableName(msTbl.getDbName(), msTbl.getTableName()), lastDdlTime);
1869  }
1870  }
1871 
1872  private void applyAlterPartition(TableName tableName, HdfsPartition partition)
1873  throws ImpalaException {
1874  MetaStoreClient msClient = catalog_.getMetaStoreClient();
1875  try {
1876  msClient.getHiveClient().alter_partition(
1877  tableName.getDb(), tableName.getTbl(), partition.toHmsPartition());
1878  org.apache.hadoop.hive.metastore.api.Table msTbl = getMetaStoreTable(tableName);
1879  updateLastDdlTime(msTbl, msClient);
1880  } catch (TException e) {
1881  throw new ImpalaRuntimeException(
1882  String.format(HMS_RPC_ERROR_FORMAT_STR, "alter_partition"), e);
1883  } finally {
1884  msClient.release();
1885  }
1886  }
1887 
1891  private void createDropRole(User requestingUser,
1892  TCreateDropRoleParams createDropRoleParams, TDdlExecResponse resp)
1893  throws ImpalaException {
1894  Preconditions.checkNotNull(requestingUser);
1896 
1897  Role role;
1898  if (createDropRoleParams.isIs_drop()) {
1899  role = catalog_.getSentryProxy().dropRole(requestingUser,
1900  createDropRoleParams.getRole_name());
1901  if (role == null) {
1902  role = new Role(createDropRoleParams.getRole_name(), Sets.<String>newHashSet());
1903  role.setCatalogVersion(catalog_.getCatalogVersion());
1904  }
1905  } else {
1906  role = catalog_.getSentryProxy().createRole(requestingUser,
1907  createDropRoleParams.getRole_name());
1908  }
1909  Preconditions.checkNotNull(role);
1910 
1911  TCatalogObject catalogObject = new TCatalogObject();
1912  catalogObject.setType(role.getCatalogObjectType());
1913  catalogObject.setRole(role.toThrift());
1914  catalogObject.setCatalog_version(role.getCatalogVersion());
1915  if (createDropRoleParams.isIs_drop()) {
1916  resp.result.setRemoved_catalog_object(catalogObject);
1917  } else {
1918  resp.result.setUpdated_catalog_object(catalogObject);
1919  }
1920  resp.result.setVersion(role.getCatalogVersion());
1921  }
1922 
1927  private void grantRevokeRoleGroup(User requestingUser,
1928  TGrantRevokeRoleParams grantRevokeRoleParams, TDdlExecResponse resp)
1929  throws ImpalaException {
1930  Preconditions.checkNotNull(requestingUser);
1932 
1933  String roleName = grantRevokeRoleParams.getRole_names().get(0);
1934  String groupName = grantRevokeRoleParams.getGroup_names().get(0);
1935  Role role = null;
1936  if (grantRevokeRoleParams.isIs_grant()) {
1937  role = catalog_.getSentryProxy().grantRoleGroup(requestingUser, roleName,
1938  groupName);
1939  } else {
1940  role = catalog_.getSentryProxy().revokeRoleGroup(requestingUser, roleName,
1941  groupName);
1942  }
1943  Preconditions.checkNotNull(role);
1944  TCatalogObject catalogObject = new TCatalogObject();
1945  catalogObject.setType(role.getCatalogObjectType());
1946  catalogObject.setRole(role.toThrift());
1947  catalogObject.setCatalog_version(role.getCatalogVersion());
1948  resp.result.setUpdated_catalog_object(catalogObject);
1949  resp.result.setVersion(role.getCatalogVersion());
1950  }
1951 
1956  private void grantRevokeRolePrivilege(User requestingUser,
1957  TGrantRevokePrivParams grantRevokePrivParams, TDdlExecResponse resp)
1958  throws ImpalaException {
1959  Preconditions.checkNotNull(requestingUser);
1961  String roleName = grantRevokePrivParams.getRole_name();
1962  List<TCatalogObject> updatedPrivs = Lists.newArrayList();
1963  for (TPrivilege privilege: grantRevokePrivParams.getPrivileges()) {
1964  RolePrivilege rolePriv;
1965  if (grantRevokePrivParams.isIs_grant()) {
1966  rolePriv = catalog_.getSentryProxy().grantRolePrivilege(requestingUser,
1967  roleName, privilege);
1968  } else {
1969  rolePriv = catalog_.getSentryProxy().revokeRolePrivilege(requestingUser,
1970  roleName, privilege);
1971  if (rolePriv == null) {
1972  rolePriv = RolePrivilege.fromThrift(privilege);
1973  rolePriv.setCatalogVersion(catalog_.getCatalogVersion());
1974  }
1975  }
1976  Preconditions.checkNotNull(rolePriv);
1977  TCatalogObject catalogObject = new TCatalogObject();
1978  catalogObject.setType(rolePriv.getCatalogObjectType());
1979  catalogObject.setPrivilege(rolePriv.toThrift());
1980  catalogObject.setCatalog_version(rolePriv.getCatalogVersion());
1981  updatedPrivs.add(catalogObject);
1982  }
1983 
1984  // TODO: Currently we only support sending back 1 catalog object in a "direct DDL"
1985  // response. If multiple privileges have been updated, just send back the
1986  // catalog version so subscribers can wait for the statestore heartbeat that contains
1987  // all updates.
1988  if (updatedPrivs.size() == 1) {
1989  // If this is a REVOKE statement with hasGrantOpt, only the GRANT OPTION is revoked
1990  // from the privilege.
1991  if (grantRevokePrivParams.isIs_grant() ||
1992  grantRevokePrivParams.getPrivileges().get(0).isHas_grant_opt()) {
1993  resp.result.setUpdated_catalog_object(updatedPrivs.get(0));
1994  } else {
1995  resp.result.setRemoved_catalog_object(updatedPrivs.get(0));
1996  }
1997  resp.result.setVersion(updatedPrivs.get(0).getCatalog_version());
1998  } else if (updatedPrivs.size() > 1) {
1999  resp.result.setVersion(
2000  updatedPrivs.get(updatedPrivs.size() - 1).getCatalog_version());
2001  }
2002  }
2003 
2008  if (catalog_.getSentryProxy() == null) {
2009  throw new CatalogException("Sentry Service is not enabled on the " +
2010  "CatalogServer.");
2011  }
2012  }
2013 
2019  private void bulkAlterPartitions(String dbName, String tableName,
2020  List<HdfsPartition> modifiedParts) throws ImpalaRuntimeException {
2021  MetaStoreClient msClient = null;
2022  List<org.apache.hadoop.hive.metastore.api.Partition> hmsPartitions =
2023  Lists.newArrayList();
2024  for (HdfsPartition p: modifiedParts) {
2025  org.apache.hadoop.hive.metastore.api.Partition msPart = p.toHmsPartition();
2026  if (msPart != null) hmsPartitions.add(msPart);
2027  }
2028  if (hmsPartitions.size() == 0) return;
2029  try {
2030  msClient = catalog_.getMetaStoreClient();
2031 
2032  // Apply the updates in batches of 'MAX_PARTITION_UPDATES_PER_RPC'.
2033  for (int i = 0; i < hmsPartitions.size(); i += MAX_PARTITION_UPDATES_PER_RPC) {
2034  int numPartitionsToUpdate =
2035  Math.min(i + MAX_PARTITION_UPDATES_PER_RPC, hmsPartitions.size());
2036  synchronized (metastoreDdlLock_) {
2037  try {
2038  // Alter partitions in bulk.
2039  msClient.getHiveClient().alter_partitions(dbName, tableName,
2040  hmsPartitions.subList(i, numPartitionsToUpdate));
2041  } catch (TException e) {
2042  throw new ImpalaRuntimeException(
2043  String.format(HMS_RPC_ERROR_FORMAT_STR, "alter_partitions"), e);
2044  }
2045  }
2046  }
2047  } finally {
2048  if (msClient != null) msClient.release();
2049  }
2050  }
2051 
2055  private org.apache.hadoop.hive.metastore.api.Table getMetaStoreTable(
2056  TableName tableName) throws CatalogException {
2057  Preconditions.checkState(tableName != null && tableName.isFullyQualified());
2058  return getExistingTable(tableName.getDb(), tableName.getTbl())
2059  .getMetaStoreTable().deepCopy();
2060  }
2061 
2062  public static List<FieldSchema> buildFieldSchemaList(List<TColumn> columns) {
2063  List<FieldSchema> fsList = Lists.newArrayList();
2064  // Add in all the columns
2065  for (TColumn col: columns) {
2066  Type type = Type.fromThrift(col.getColumnType());
2067  // The type string must be lowercase for Hive to read the column metadata properly.
2068  String typeSql = type.toSql().toLowerCase();
2069  FieldSchema fs = new FieldSchema(col.getColumnName(), typeSql, col.getComment());
2070  fsList.add(fs);
2071  }
2072  return fsList;
2073  }
2074 
2075  private static TCatalogObject TableToTCatalogObject(Table table) {
2076  if (table != null) return table.toTCatalogObject();
2077  return new TCatalogObject(TCatalogObjectType.TABLE,
2079  }
2080 
2087  public long updateLastDdlTime(org.apache.hadoop.hive.metastore.api.Table msTbl,
2088  MetaStoreClient msClient) throws MetaException, NoSuchObjectException, TException {
2089  Preconditions.checkNotNull(msTbl);
2090  LOG.debug("Updating lastDdlTime for table: " + msTbl.getTableName());
2091  Map<String, String> params = msTbl.getParameters();
2092  long lastDdlTime = calculateDdlTime(msTbl);
2093  params.put("transient_lastDdlTime", Long.toString(lastDdlTime));
2094  msTbl.setParameters(params);
2095  if (msClient != null) {
2096  msClient.getHiveClient().alter_table(
2097  msTbl.getDbName(), msTbl.getTableName(), msTbl);
2098  }
2099  catalog_.updateLastDdlTime(
2100  new TTableName(msTbl.getDbName(), msTbl.getTableName()), lastDdlTime);
2101  return lastDdlTime;
2102  }
2103 
2107  private static long calculateDdlTime(
2108  org.apache.hadoop.hive.metastore.api.Table msTbl) {
2109  long existingLastDdlTime = CatalogServiceCatalog.getLastDdlTime(msTbl);
2110  long currentTime = System.currentTimeMillis() / 1000;
2111  if (existingLastDdlTime == currentTime) ++currentTime;
2112  return currentTime;
2113  }
2114 
2121  public static org.apache.hadoop.hive.metastore.api.Table
2122  createMetaStoreTable(TCreateTableParams params) {
2123  Preconditions.checkNotNull(params);
2124  TableName tableName = TableName.fromThrift(params.getTable_name());
2125  org.apache.hadoop.hive.metastore.api.Table tbl =
2126  new org.apache.hadoop.hive.metastore.api.Table();
2127  tbl.setDbName(tableName.getDb());
2128  tbl.setTableName(tableName.getTbl());
2129  tbl.setOwner(params.getOwner());
2130  if (params.isSetTable_properties()) {
2131  tbl.setParameters(params.getTable_properties());
2132  } else {
2133  tbl.setParameters(new HashMap<String, String>());
2134  }
2135 
2136  if (params.getComment() != null) {
2137  tbl.getParameters().put("comment", params.getComment());
2138  }
2139  if (params.is_external) {
2140  tbl.setTableType(TableType.EXTERNAL_TABLE.toString());
2141  tbl.putToParameters("EXTERNAL", "TRUE");
2142  } else {
2143  tbl.setTableType(TableType.MANAGED_TABLE.toString());
2144  }
2145 
2146  StorageDescriptor sd = HiveStorageDescriptorFactory.createSd(
2147  params.getFile_format(), RowFormat.fromThrift(params.getRow_format()));
2148 
2149  if (params.isSetSerde_properties()) {
2150  if (sd.getSerdeInfo().getParameters() == null) {
2151  sd.getSerdeInfo().setParameters(params.getSerde_properties());
2152  } else {
2153  sd.getSerdeInfo().getParameters().putAll(params.getSerde_properties());
2154  }
2155  }
2156 
2157  if (params.getLocation() != null) {
2158  sd.setLocation(params.getLocation());
2159  }
2160  // Add in all the columns
2161  sd.setCols(buildFieldSchemaList(params.getColumns()));
2162  tbl.setSd(sd);
2163  if (params.getPartition_columns() != null) {
2164  // Add in any partition keys that were specified
2165  tbl.setPartitionKeys(buildFieldSchemaList(params.getPartition_columns()));
2166  } else {
2167  tbl.setPartitionKeys(new ArrayList<FieldSchema>());
2168  }
2169  return tbl;
2170  }
2171 
2185  public TResetMetadataResponse execResetMetadata(TResetMetadataRequest req)
2186  throws CatalogException {
2187  TResetMetadataResponse resp = new TResetMetadataResponse();
2188  resp.setResult(new TCatalogUpdateResult());
2189  resp.getResult().setCatalog_service_id(JniCatalog.getServiceId());
2190 
2191  if (req.isSetTable_name()) {
2192  // Tracks any CatalogObjects updated/added/removed as a result of
2193  // the invalidate metadata or refresh call. For refresh() it is only expected
2194  // that a table be modified, but for invalidateTable() the table's parent database
2195  // may have also been added if it did not previously exist in the catalog.
2196  Pair<Db, Table> modifiedObjects = new Pair<Db, Table>(null, null);
2197 
2198  boolean wasRemoved = false;
2199  if (req.isIs_refresh()) {
2200  modifiedObjects.second = catalog_.reloadTable(req.getTable_name());
2201  } else {
2202  wasRemoved = catalog_.invalidateTable(req.getTable_name(), modifiedObjects);
2203  }
2204 
2205  if (modifiedObjects.first == null) {
2206  TCatalogObject thriftTable = TableToTCatalogObject(modifiedObjects.second);
2207  if (modifiedObjects.second != null) {
2208  // Return the TCatalogObject in the result to indicate this request can be
2209  // processed as a direct DDL operation.
2210  if (wasRemoved) {
2211  resp.getResult().setRemoved_catalog_object(thriftTable);
2212  } else {
2213  resp.getResult().setUpdated_catalog_object(thriftTable);
2214  }
2215  } else {
2216  // Table does not exist in the meta store and Impala catalog, throw error.
2217  throw new TableNotFoundException("Table not found: " +
2218  req.getTable_name().getDb_name() + "."
2219  + req.getTable_name().getTable_name());
2220  }
2221  resp.getResult().setVersion(thriftTable.getCatalog_version());
2222  } else {
2223  // If there were two catalog objects modified it indicates there was an
2224  // "invalidateTable()" call that added a new table AND database to the catalog.
2225  Preconditions.checkState(!req.isIs_refresh());
2226  Preconditions.checkNotNull(modifiedObjects.first);
2227  Preconditions.checkNotNull(modifiedObjects.second);
2228 
2229  // The database should always have a lower catalog version than the table because
2230  // it needs to be created before the table can be added.
2231  Preconditions.checkState(modifiedObjects.first.getCatalogVersion() <
2232  modifiedObjects.second.getCatalogVersion());
2233 
2234  // Since multiple catalog objects were modified, don't treat this as a direct DDL
2235  // operation. Just set the overall catalog version and the impalad will wait for
2236  // a statestore heartbeat that contains the update.
2237  resp.getResult().setVersion(modifiedObjects.second.getCatalogVersion());
2238  }
2239  } else {
2240  // Invalidate the entire catalog if no table name is provided.
2241  Preconditions.checkArgument(!req.isIs_refresh());
2242  catalog_.reset();
2243  resp.result.setVersion(catalog_.getCatalogVersion());
2244  }
2245  resp.getResult().setStatus(
2246  new TStatus(TErrorCode.OK, new ArrayList<String>()));
2247  return resp;
2248  }
2249 
2261  public TUpdateCatalogResponse updateCatalog(TUpdateCatalogRequest update)
2262  throws ImpalaException {
2263  TUpdateCatalogResponse response = new TUpdateCatalogResponse();
2264  // Only update metastore for Hdfs tables.
2265  Table table = getExistingTable(update.getDb_name(), update.getTarget_table());
2266  if (!(table instanceof HdfsTable)) {
2267  throw new InternalException("Unexpected table type: " +
2268  update.getTarget_table());
2269  }
2270 
2271  // Collects the cache directive IDs of any cached table/partitions that were
2272  // targeted. A watch on these cache directives is submitted to the TableLoadingMgr
2273  // and the table will be refreshed asynchronously after all cache directives
2274  // complete.
2275  List<Long> cacheDirIds = Lists.<Long>newArrayList();
2276 
2277  // If the table is cached, get its cache pool name and replication factor. New
2278  // partitions will inherit this property.
2279  String cachePoolName = null;
2280  Short cacheReplication = 0;
2281  Long cacheDirId = HdfsCachingUtil.getCacheDirectiveId(
2282  table.getMetaStoreTable().getParameters());
2283  if (cacheDirId != null) {
2284  try {
2285  cachePoolName = HdfsCachingUtil.getCachePool(cacheDirId);
2286  cacheReplication = HdfsCachingUtil.getCacheReplication(cacheDirId);
2287  Preconditions.checkNotNull(cacheReplication);
2288  if (table.getNumClusteringCols() == 0) cacheDirIds.add(cacheDirId);
2289  } catch (ImpalaRuntimeException e) {
2290  // Catch the error so that the actual update to the catalog can progress,
2291  // this resets caching for the table though
2292  LOG.error(
2293  String.format("Cache directive %d was not found, uncache the table %s.%s" +
2294  "to remove this message.", cacheDirId, update.getDb_name(),
2295  update.getTarget_table()));
2296  cacheDirId = null;
2297  }
2298 
2299  }
2300 
2301  TableName tblName = new TableName(table.getDb().getName(), table.getName());
2302  List<String> errorMessages = Lists.newArrayList();
2303  if (table.getNumClusteringCols() > 0) {
2304  // Set of all partition names targeted by the insert that that need to be created
2305  // in the Metastore (partitions that do not currently exist in the catalog).
2306  // In the BE, we don't currently distinguish between which targeted partitions are
2307  // new and which already exist, so initialize the set with all targeted partition
2308  // names and remove the ones that are found to exist.
2309  Set<String> partsToCreate = Sets.newHashSet(update.getCreated_partitions());
2310  for (HdfsPartition partition: ((HdfsTable) table).getPartitions()) {
2311  // Skip dummy default partition.
2312  if (partition.getId() == ImpalaInternalServiceConstants.DEFAULT_PARTITION_ID) {
2313  continue;
2314  }
2315  // TODO: In the BE we build partition names without a trailing char. In FE we
2316  // build partition name with a trailing char. We should make this consistent.
2317  String partName = partition.getPartitionName() + "/";
2318 
2319  // Attempt to remove this partition name from from partsToCreate. If remove
2320  // returns true, it indicates the partition already exists.
2321  if (partsToCreate.remove(partName) && partition.isMarkedCached()) {
2322  // The partition was targeted by the insert and is also a cached. Since data
2323  // was written to the partition, a watch needs to be placed on the cache
2324  // cache directive so the TableLoadingMgr can perform an async refresh once
2325  // all data becomes cached.
2326  cacheDirIds.add(HdfsCachingUtil.getCacheDirectiveId(
2327  partition.getParameters()));
2328  }
2329  if (partsToCreate.size() == 0) break;
2330  }
2331 
2332  if (!partsToCreate.isEmpty()) {
2333  MetaStoreClient msClient = catalog_.getMetaStoreClient();
2334  try {
2335  org.apache.hadoop.hive.metastore.api.Table msTbl = getMetaStoreTable(tblName);
2336  List<org.apache.hadoop.hive.metastore.api.Partition> hmsParts =
2337  Lists.newArrayList();
2338  HiveConf hiveConf = new HiveConf(this.getClass());
2339  Warehouse warehouse = new Warehouse(hiveConf);
2340  for (String partName: partsToCreate) {
2341  org.apache.hadoop.hive.metastore.api.Partition partition =
2342  new org.apache.hadoop.hive.metastore.api.Partition();
2343  hmsParts.add(partition);
2344 
2345  partition.setDbName(tblName.getDb());
2346  partition.setTableName(tblName.getTbl());
2347  partition.setValues(getPartValsFromName(msTbl, partName));
2348  partition.setParameters(new HashMap<String, String>());
2349  partition.setSd(msTbl.getSd().deepCopy());
2350  partition.getSd().setSerdeInfo(msTbl.getSd().getSerdeInfo().deepCopy());
2351  partition.getSd().setLocation(msTbl.getSd().getLocation() + "/" +
2352  partName.substring(0, partName.length() - 1));
2353  MetaStoreUtils.updatePartitionStatsFast(partition, warehouse);
2354  }
2355 
2356  // First add_partitions and then alter_partitions the successful ones with
2357  // caching directives. The reason is that some partitions could have been
2358  // added concurrently, and we want to avoid caching a partition twice and
2359  // leaking a caching directive.
2360  List<org.apache.hadoop.hive.metastore.api.Partition> addedHmsParts =
2361  msClient.getHiveClient().add_partitions(hmsParts, true, true);
2362 
2363  if (addedHmsParts.size() > 0) {
2364  if (cachePoolName != null) {
2365  List<org.apache.hadoop.hive.metastore.api.Partition> cachedHmsParts =
2366  Lists.newArrayList();
2367  // Submit a new cache directive and update the partition metadata with
2368  // the directive id.
2369  for (org.apache.hadoop.hive.metastore.api.Partition part: addedHmsParts) {
2370  try {
2371  cacheDirIds.add(HdfsCachingUtil.submitCachePartitionDirective(
2372  part, cachePoolName, cacheReplication));
2373  cachedHmsParts.add(part);
2374  } catch (ImpalaRuntimeException e) {
2375  String msg = String.format("Partition %s.%s(%s): State: Not cached." +
2376  " Action: Cache manully via 'ALTER TABLE'.",
2377  part.getDbName(), part.getTableName(), part.getValues());
2378  LOG.error(msg, e);
2379  errorMessages.add(msg);
2380  }
2381  }
2382  try {
2383  msClient.getHiveClient().alter_partitions(tblName.getDb(),
2384  tblName.getTbl(), cachedHmsParts);
2385  } catch (Exception e) {
2386  LOG.error("Failed in alter_partitions: ", e);
2387  // Try to uncache the partitions when the alteration in the HMS failed.
2388  for (org.apache.hadoop.hive.metastore.api.Partition part:
2389  cachedHmsParts) {
2390  try {
2391  HdfsCachingUtil.uncachePartition(part);
2392  } catch (ImpalaException e1) {
2393  String msg = String.format(
2394  "Partition %s.%s(%s): State: Leaked caching directive. " +
2395  "Action: Manually uncache directory %s via hdfs cacheAdmin.",
2396  part.getDbName(), part.getTableName(), part.getValues(),
2397  part.getSd().getLocation());
2398  LOG.error(msg, e);
2399  errorMessages.add(msg);
2400  }
2401  }
2402  }
2403  }
2404  updateLastDdlTime(msTbl, msClient);
2405  }
2406  } catch (AlreadyExistsException e) {
2407  throw new InternalException(
2408  "AlreadyExistsException thrown although ifNotExists given", e);
2409  } catch (Exception e) {
2410  throw new InternalException("Error adding partitions", e);
2411  } finally {
2412  msClient.release();
2413  }
2414  }
2415  }
2416 
2417  // Submit the watch request for the given cache directives.
2418  if (!cacheDirIds.isEmpty()) catalog_.watchCacheDirs(cacheDirIds, tblName.toThrift());
2419 
2420  response.setResult(new TCatalogUpdateResult());
2421  response.getResult().setCatalog_service_id(JniCatalog.getServiceId());
2422  if (errorMessages.size() > 0) {
2423  errorMessages.add("Please refer to the catalogd error log for details " +
2424  "regarding the failed un/caching operations.");
2425  response.getResult().setStatus(
2426  new TStatus(TErrorCode.INTERNAL_ERROR, errorMessages));
2427  } else {
2428  response.getResult().setStatus(
2429  new TStatus(TErrorCode.OK, new ArrayList<String>()));
2430  }
2431  // Perform an incremental refresh to load new/modified partitions and files.
2432  Table refreshedTbl = catalog_.reloadTable(tblName.toThrift());
2433  response.getResult().setUpdated_catalog_object(TableToTCatalogObject(refreshedTbl));
2434  response.getResult().setVersion(
2435  response.getResult().getUpdated_catalog_object().getCatalog_version());
2436  return response;
2437  }
2438 
2439  private List<String> getPartValsFromName(org.apache.hadoop.hive.metastore.api.Table
2440  msTbl, String partName) throws MetaException, CatalogException {
2441  Preconditions.checkNotNull(msTbl);
2442  LinkedHashMap<String, String> hm =
2443  org.apache.hadoop.hive.metastore.Warehouse.makeSpecFromName(partName);
2444  List<String> partVals = Lists.newArrayList();
2445  for (FieldSchema field: msTbl.getPartitionKeys()) {
2446  String key = field.getName();
2447  String val = hm.get(key);
2448  if (val == null) {
2449  throw new CatalogException("Incomplete partition name - missing " + key);
2450  }
2451  partVals.add(val);
2452  }
2453  return partVals;
2454  }
2455 
2468  public Table getExistingTable(String dbName, String tblName) throws CatalogException {
2469  Table tbl = catalog_.getOrLoadTable(dbName, tblName);
2470  if (tbl == null) {
2471  throw new TableNotFoundException("Table not found: " + dbName + "." + tblName);
2472  }
2473 
2474  if (!tbl.isLoaded()) {
2475  throw new CatalogException(String.format("Table '%s.%s' was modified while " +
2476  "operation was in progress, aborting execution.", dbName, tblName));
2477  }
2478 
2479  if (tbl instanceof IncompleteTable && tbl.isLoaded()) {
2480  // The table loading failed. Throw an exception.
2481  ImpalaException e = ((IncompleteTable) tbl).getCause();
2482  if (e instanceof TableLoadingException) {
2483  throw (TableLoadingException) e;
2484  }
2485  throw new TableLoadingException(e.getMessage(), e);
2486  }
2487  Preconditions.checkNotNull(tbl);
2488  Preconditions.checkState(tbl.isLoaded());
2489  return tbl;
2490  }
2491 }
static ColumnStatistics createHiveColStats(Map< String, TColumnStats > columnStats, Table table)
int updateTableStats(Table table, TAlterTableUpdateStatsParams params, org.apache.hadoop.hive.metastore.api.Table msTbl, List< HdfsPartition > partitions, List< HdfsPartition > modifiedParts)
void dropFunction(TDropFunctionParams params, TDdlExecResponse resp)
void createTableLike(TCreateTableLikeParams params, TDdlExecResponse response)
void alterTableSetLocation(TableName tableName, List< TPartitionKeyValue > partitionSpec, String location)
void alterTable(TAlterTableParams params, TDdlExecResponse response)
void alterTableSetTblProperties(TableName tableName, TAlterTableSetTblPropertiesParams params)
void alterTableOrViewRename(TableName tableName, TableName newTableName, TDdlExecResponse response)
static TableName fromThrift(TTableName tableName)
Definition: TableName.java:98
long updateLastDdlTime(org.apache.hadoop.hive.metastore.api.Table msTbl, MetaStoreClient msClient)
boolean createTable(org.apache.hadoop.hive.metastore.api.Table newTable, boolean ifNotExists, THdfsCachingOp cacheOp, TDdlExecResponse response)
TUpdateCatalogResponse updateCatalog(TUpdateCatalogRequest update)
void createView(TCreateOrAlterViewParams params, TDdlExecResponse response)
static final ScalarType STRING
Definition: Type.java:53
static final RowFormat DEFAULT_ROW_FORMAT
Definition: RowFormat.java:32
PrimitiveType getPrimitiveType()
Definition: Type.java:188
void dropTableOrView(TDropTableOrViewParams params, TDdlExecResponse resp)
static void setStorageDescriptorFileFormat(StorageDescriptor sd, THdfsFileFormat fileFormat)
CatalogOpExecutor(CatalogServiceCatalog catalog)
Table renameTable(TTableName oldTableName, TTableName newTableName)
void alterTableDropCol(TableName tableName, String colName)
List< HdfsPartition > getPartitions()
Definition: HdfsTable.java:429
static final long INITIAL_CATALOG_VERSION
Definition: Catalog.java:57
static List< FieldSchema > buildFieldSchemaList(List< TColumn > columns)
ArrayList< Column > getColumns()
Definition: Table.java:349
void alterTableSetFileFormat(TableName tableName, List< TPartitionKeyValue > partitionSpec, THdfsFileFormat fileFormat)
void alterTableSetCached(TableName tableName, TAlterTableSetCachedParams params)
static boolean isUpdateOp(THdfsCachingOp op, Map< String, String > params)
void alterTableChangeCol(TableName tableName, String colName, TColumn newCol)
DataSource getDataSource(String dataSourceName)
Definition: Catalog.java:213
Table alterTableDropPartition(TableName tableName, List< TPartitionKeyValue > partitionSpec, boolean ifExists)
void grantRevokeRolePrivilege(User requestingUser, TGrantRevokePrivParams grantRevokePrivParams, TDdlExecResponse resp)
void applyAlterTable(org.apache.hadoop.hive.metastore.api.Table msTbl)
void alterTableAddReplaceCols(TableName tableName, List< TColumn > columns, boolean replaceExistingCols)
static long calculateDdlTime(org.apache.hadoop.hive.metastore.api.Table msTbl)
Table getExistingTable(String dbName, String tblName)
Table alterTableAddPartition(TableName tableName, List< TPartitionKeyValue > partitionSpec, boolean ifNotExists, String location, THdfsCachingOp cacheOp)
org.apache.hadoop.hive.metastore.api.Table getMetaStoreTable(TableName tableName)
void alterPartitionSetCached(TableName tableName, TAlterTableSetCachedParams params)
void alterView(TCreateOrAlterViewParams params, TDdlExecResponse resp)
void applyAlterPartition(TableName tableName, HdfsPartition partition)
void dropDataSource(TDropDataSourceParams params, TDdlExecResponse resp)
static FunctionName fromThrift(TFunctionName fnName)
void dropDatabase(TDropDbParams params, TDdlExecResponse resp)
TDdlExecResponse execDdlRequest(TDdlExecRequest ddlRequest)
void alterTableUpdateStats(TAlterTableUpdateStatsParams params, TDdlExecResponse resp)
void createFunction(TCreateFunctionParams params, TDdlExecResponse resp)
void grantRevokeRoleGroup(User requestingUser, TGrantRevokeRoleParams grantRevokeRoleParams, TDdlExecResponse resp)
boolean createTable(TCreateTableParams params, TDdlExecResponse response)
boolean containsHdfsPartition(String dbName, String tableName, List< TPartitionKeyValue > partitionSpec)
Definition: Catalog.java:384
Table addHdfsPartition(TableName tableName, Partition partition)
void toThrift(TColumnType container)
List< String > getPartValsFromName(org.apache.hadoop.hive.metastore.api.Table msTbl, String partName)
Table(TableId id, org.apache.hadoop.hive.metastore.api.Table msTable, Db db, String name, String owner)
Definition: Table.java:91
void createDatabase(TCreateDbParams params, TDdlExecResponse resp)
static RowFormat fromThrift(TTableRowFormat tableRowFormat)
Definition: RowFormat.java:87
void bulkAlterPartitions(String dbName, String tableName, List< HdfsPartition > modifiedParts)
TResetMetadataResponse execResetMetadata(TResetMetadataRequest req)
static TCatalogObject TableToTCatalogObject(Table table)
void createDataSource(TCreateDataSourceParams params, TDdlExecResponse resp)
void setViewAttributes(TCreateOrAlterViewParams params, org.apache.hadoop.hive.metastore.api.Table view)
boolean containsTable(String dbName, String tableName)
Definition: Catalog.java:187
void createDropRole(User requestingUser, TCreateDropRoleParams createDropRoleParams, TDdlExecResponse resp)
static final ScalarType INVALID
Definition: Type.java:44
void dropStats(TDropStatsParams params, TDdlExecResponse resp)
void watchCacheDirs(List< Long > dirIds, TTableName tblName)
static ColumnStatisticsData createHiveColStatsData(TColumnStats colStats, Type colType)
static org.apache.hadoop.hive.metastore.api.Table createMetaStoreTable(TCreateTableParams params)