Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
MetaStoreUtil.java
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 package com.cloudera.impala.util;
16 
17 import java.util.List;
18 
19 import org.apache.hadoop.hive.conf.HiveConf;
20 import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
21 import org.apache.hadoop.hive.metastore.api.MetaException;
22 import org.apache.hadoop.hive.metastore.api.Partition;
23 import org.apache.log4j.Logger;
24 import org.apache.thrift.TException;
25 
27 import com.google.common.base.Preconditions;
28 import com.google.common.collect.Lists;
29 
33 public class MetaStoreUtil {
34  private static final Logger LOG = Logger.getLogger(MetaStoreUtil.class);
35 
36  // The default maximum number of partitions to fetch from the Hive metastore in one
37  // RPC.
38  private final static short DEFAULT_MAX_PARTITIONS_PER_RPC = 1000;
39 
40  // The maximum number of partitions to fetch from the metastore in one RPC.
41  // Read from the 'hive.metastore.batch.retrieve.table.partition.max' Hive configuration
42  // and defaults to DEFAULT_MAX_PARTITION_BATCH_SIZE if the value is not present in the
43  // Hive configuration.
45 
46  static {
47  // Get the value from the Hive configuration, if present.
48  HiveConf hiveConf = new HiveConf(HdfsTable.class);
49  String strValue = hiveConf.get(
50  HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_TABLE_PARTITION_MAX.toString());
51  if (strValue != null) {
52  try {
53  maxPartitionsPerRpc_ = Short.parseShort(strValue);
54  } catch (NumberFormatException e) {
55  LOG.error("Error parsing max partition batch size from HiveConfig: ", e);
56  }
57  }
58  if (maxPartitionsPerRpc_ <= 0) {
59  LOG.error(String.format("Invalid value for max partition batch size: %d. Using " +
61  maxPartitionsPerRpc_ = DEFAULT_MAX_PARTITIONS_PER_RPC;
62  }
63  }
64 
75  public static List<org.apache.hadoop.hive.metastore.api.Partition> fetchAllPartitions(
76  HiveMetaStoreClient client, String dbName, String tblName, int numRetries)
77  throws MetaException, TException {
78  Preconditions.checkArgument(numRetries >= 0);
79  int retryAttempt = 0;
80  while (true) {
81  try {
82  // First, get all partition names that currently exist.
83  List<String> partNames = client.listPartitionNames(dbName, tblName, (short) -1);
84  return MetaStoreUtil.fetchPartitionsByName(client, partNames, dbName, tblName);
85  } catch (MetaException e) {
86  // Only retry for MetaExceptions, since TExceptions could indicate a broken
87  // connection which we can't recover from by retrying.
88  if (retryAttempt < numRetries) {
89  LOG.error(String.format("Error fetching partitions for table: %s.%s. " +
90  "Retry attempt: %d/%d", dbName, tblName, retryAttempt, numRetries), e);
91  ++retryAttempt;
92  // TODO: Sleep for a bit?
93  } else {
94  throw e;
95  }
96  }
97  }
98  }
99 
106  public static List<Partition> fetchPartitionsByName(
107  HiveMetaStoreClient client, List<String> partNames, String dbName, String tblName)
108  throws MetaException, TException {
109  LOG.trace(String.format("Fetching %d partitions for: %s.%s using partition " +
110  "batch size: %d", partNames.size(), dbName, tblName, maxPartitionsPerRpc_));
111 
112  List<org.apache.hadoop.hive.metastore.api.Partition> fetchedPartitions =
113  Lists.newArrayList();
114  // Fetch the partitions in batches.
115  for (int i = 0; i < partNames.size(); i += maxPartitionsPerRpc_) {
116  // Get a subset of partition names to fetch.
117  List<String> partsToFetch =
118  partNames.subList(i, Math.min(i + maxPartitionsPerRpc_, partNames.size()));
119  // Fetch these partitions from the metastore.
120  fetchedPartitions.addAll(
121  client.getPartitionsByNames(dbName, tblName, partsToFetch));
122  }
123  return fetchedPartitions;
124  }
125 }
static List< Partition > fetchPartitionsByName(HiveMetaStoreClient client, List< String > partNames, String dbName, String tblName)
static final short DEFAULT_MAX_PARTITIONS_PER_RPC
static List< org.apache.hadoop.hive.metastore.api.Partition > fetchAllPartitions(HiveMetaStoreClient client, String dbName, String tblName, int numRetries)