doc/html/HdfsTable_8java_source.html

 // Copyright 2012 Cloudera Inc.

 //

 // Licensed under the Apache License, Version 2.0 (the "License");

 // you may not use this file except in compliance with the License.

 // You may obtain a copy of the License at

 //

 // http://www.apache.org/licenses/LICENSE-2.0

 //

 // Unless required by applicable law or agreed to in writing, software

 // distributed under the License is distributed on an "AS IS" BASIS,

 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 // See the License for the specific language governing permissions and

 // limitations under the License.


 package com.cloudera.impala.catalog;


 import static com.cloudera.impala.thrift.ImpalaInternalServiceConstants.DEFAULT_PARTITION_ID;


 import java.io.IOException;

 import java.io.InputStream;

 import java.net.URI;

 import java.net.URL;

 import java.util.ArrayList;

 import java.util.Arrays;

 import java.util.Collections;

 import java.util.HashMap;

 import java.util.HashSet;

 import java.util.List;

 import java.util.Map;

 import java.util.Set;

 import java.util.TreeMap;


 import org.apache.commons.io.IOUtils;

 import org.apache.hadoop.conf.Configuration;

 import org.apache.hadoop.fs.BlockLocation;

 import org.apache.hadoop.fs.BlockStorageLocation;

 import org.apache.hadoop.fs.FileStatus;

 import org.apache.hadoop.fs.FileSystem;

 import org.apache.hadoop.fs.Path;

 import org.apache.hadoop.fs.VolumeId;

 import org.apache.hadoop.hbase.util.Bytes;

 import org.apache.hadoop.hdfs.DFSConfigKeys;

 import org.apache.hadoop.hdfs.DistributedFileSystem;

 import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;

 import org.apache.hadoop.hive.metastore.api.FieldSchema;

 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;

 import org.apache.hadoop.hive.serde.serdeConstants;

 import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils;

 import org.apache.hadoop.util.StringUtils;

 import org.slf4j.Logger;

 import org.slf4j.LoggerFactory;


 import com.cloudera.impala.analysis.Expr;

 import com.cloudera.impala.analysis.LiteralExpr;

 import com.cloudera.impala.analysis.NullLiteral;

 import com.cloudera.impala.analysis.PartitionKeyValue;

 import com.cloudera.impala.catalog.HdfsPartition.BlockReplica;

 import com.cloudera.impala.catalog.HdfsPartition.FileBlock;

 import com.cloudera.impala.catalog.HdfsPartition.FileDescriptor;

 import com.cloudera.impala.common.AnalysisException;

 import com.cloudera.impala.common.FileSystemUtil;

 import com.cloudera.impala.common.PrintUtils;

 import com.cloudera.impala.thrift.ImpalaInternalServiceConstants;

 import com.cloudera.impala.thrift.TAccessLevel;

 import com.cloudera.impala.thrift.TCatalogObjectType;

 import com.cloudera.impala.thrift.TColumn;

 import com.cloudera.impala.thrift.THdfsFileBlock;

 import com.cloudera.impala.thrift.THdfsPartition;

 import com.cloudera.impala.thrift.THdfsTable;

 import com.cloudera.impala.thrift.TNetworkAddress;

 import com.cloudera.impala.thrift.TPartitionKeyValue;

 import com.cloudera.impala.thrift.TResultRow;

 import com.cloudera.impala.thrift.TResultSet;

 import com.cloudera.impala.thrift.TResultSetMetadata;

 import com.cloudera.impala.thrift.TTable;

 import com.cloudera.impala.thrift.TTableDescriptor;

 import com.cloudera.impala.thrift.TTableType;

 import com.cloudera.impala.util.AvroSchemaParser;

 import com.cloudera.impala.util.FsPermissionChecker;

 import com.cloudera.impala.util.HdfsCachingUtil;

 import com.cloudera.impala.util.ListMap;

 import com.cloudera.impala.util.MetaStoreUtil;

 import com.cloudera.impala.util.TAccessLevelUtil;

 import com.cloudera.impala.util.TResultRowBuilder;

 import com.google.common.base.Joiner;

 import com.google.common.base.Preconditions;

 import com.google.common.collect.Lists;

 import com.google.common.collect.Maps;

 import com.google.common.collect.Sets;


 public class HdfsTable extends Table {

   // hive's default value for table property 'serialization.null.format'

   private static final String DEFAULT_NULL_COLUMN_VALUE = "\\N";


   // Number of times to retry fetching the partitions from the HMS should an error occur.

   private final static int NUM_PARTITION_FETCH_RETRIES = 5;


   // An invalid network address, which will always be treated as remote.

   private final static TNetworkAddress REMOTE_NETWORK_ADDRESS =

       new TNetworkAddress("remote*addr", 0);


   // Minimum block size in bytes allowed for synthetic file blocks (other than the last

   // block, which may be shorter).

   private final static long MIN_SYNTHETIC_BLOCK_SIZE = 1024 * 1024;


   // string to indicate NULL. set in load() from table properties

   private String nullColumnValue_;


   // hive uses this string for NULL partition keys. Set in load().

   private String nullPartitionKeyValue_;


   // Avro schema of this table if this is an Avro table, otherwise null. Set in load().

   private String avroSchema_ = null;


   // True if this table's metadata is marked as cached. Does not necessarily mean the

   // data is cached or that all/any partitions are cached.

   private boolean isMarkedCached_ = false;


   private static boolean hasLoggedDiskIdFormatWarning_ = false;


   private final List<HdfsPartition> partitions_; // these are only non-empty partitions


   // Array of sorted maps storing the association between partition values and

   // partition ids. There is one sorted map per partition key.

   private final ArrayList<TreeMap<LiteralExpr, HashSet<Long>>> partitionValuesMap_ =

       Lists.newArrayList();


   // Array of partition id sets that correspond to partitions with null values

   // in the partition keys; one set per partition key.

   private final ArrayList<HashSet<Long>> nullPartitionIds_ = Lists.newArrayList();


   // Map of partition ids to HdfsPartitions. Used for speeding up partition

   // pruning.

   private final HashMap<Long, HdfsPartition> partitionMap_ = Maps.newHashMap();


   // Store all the partition ids of an HdfsTable.

   private final HashSet<Long> partitionIds_ = Sets.newHashSet();


   // Flag to indicate if the HdfsTable has the partition metadata populated.

   private boolean hasPartitionMd_ = false;


   // Bi-directional map between an integer index and a unique datanode

   // TNetworkAddresses, each of which contains blocks of 1 or more

   // files in this table. The network addresses are stored using IP

   // address as the host name. Each FileBlock specifies a list of

   // indices within this hostIndex_ to specify which nodes contain

   // replicas of the block.

   private final ListMap<TNetworkAddress> hostIndex_ = new ListMap<TNetworkAddress>();


   // Map of parent directory (partition location) to list of files (FileDescriptors)

   // under that directory. Used to look up/index all files in the table.

   private final Map<String, List<FileDescriptor>> fileDescMap_ = Maps.newHashMap();


   // Total number of Hdfs files in this table. Set in load().

   private long numHdfsFiles_;


   // Sum of sizes of all Hdfs files in this table. Set in load().

   private long totalHdfsBytes_;


   // True iff the table's partitions are located on more than one filesystem.

   private boolean multipleFileSystems_ = false;


   // Base Hdfs directory where files of this table are stored.

   // For unpartitioned tables it is simply the path where all files live.

   // For partitioned tables it is the root directory

   // under which partition dirs are placed.

   protected String hdfsBaseDir_;


   private final static Logger LOG = LoggerFactory.getLogger(HdfsTable.class);


   // Caching this configuration object makes calls to getFileSystem much quicker

   // (saves ~50ms on a standard plan)

   // TODO(henry): confirm that this is thread safe - cursory inspection of the class

   // and its usage in getFileSystem suggests it should be.

   private static final Configuration CONF = new Configuration();


   private static final boolean SUPPORTS_VOLUME_ID;


   // Wrapper around a FileSystem object to hash based on the underlying FileSystem's

   // scheme and authority.

   private static class FsKey {

     FileSystem filesystem;


     public FsKey(FileSystem fs) { filesystem = fs; }


     @Override

     public int hashCode() { return filesystem.getUri().hashCode(); }


     @Override

     public boolean equals(Object o) {

       if (o == this) return true;

       if (o != null && o instanceof FsKey) {

         URI uri = filesystem.getUri();

         URI otherUri = ((FsKey)o).filesystem.getUri();

         return uri.equals(otherUri);

       }

       return false;

     }


     @Override

     public String toString() { return filesystem.getUri().toString(); }

   }


   // Keeps track of newly added THdfsFileBlock metadata and its corresponding

   // BlockLocation.  For each i, blocks.get(i) corresponds to locations.get(i).  Once

   // all the new file blocks are collected, the disk volume IDs are retrieved in one

   // batched DFS call.

   private static class FileBlocksInfo {

     final List<THdfsFileBlock> blocks = Lists.newArrayList();

     final List<BlockLocation> locations = Lists.newArrayList();


     public void addBlocks(List<THdfsFileBlock> b, List<BlockLocation> l) {

       Preconditions.checkState(b.size() == l.size());

       blocks.addAll(b);

       locations.addAll(l);

     }

   }


   static {

     SUPPORTS_VOLUME_ID =

         CONF.getBoolean(DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED,

                         DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED_DEFAULT);

   }


   private static int getDiskId(VolumeId hdfsVolumeId) {

     // Initialize the diskId as -1 to indicate it is unknown

     int diskId = -1;


     if (hdfsVolumeId != null) {

       // TODO: this is a hack and we'll have to address this by getting the

       // public API. Also, we need to be very mindful of this when we change

       // the version of HDFS.

       String volumeIdString = hdfsVolumeId.toString();

       // This is the hacky part. The toString is currently the underlying id

       // encoded as hex.

       byte[] volumeIdBytes = StringUtils.hexStringToByte(volumeIdString);

       if (volumeIdBytes != null && volumeIdBytes.length == 4) {

         diskId = Bytes.toInt(volumeIdBytes);

       } else if (!hasLoggedDiskIdFormatWarning_) {

         LOG.warn("wrong disk id format: " + volumeIdString);

         hasLoggedDiskIdFormatWarning_ = true;

       }

     }

     return diskId;

   }


   public Map<String, List<FileDescriptor>> getFileDescMap() { return fileDescMap_; }


   public boolean spansMultipleFileSystems() { return multipleFileSystems_; }


   private void loadBlockMetadata(FileSystem fs, FileStatus file, FileDescriptor fd,

       HdfsFileFormat fileFormat, Map<FsKey, FileBlocksInfo> perFsFileBlocks) {

     Preconditions.checkNotNull(fd);

     Preconditions.checkNotNull(perFsFileBlocks);

     Preconditions.checkArgument(!file.isDirectory());

     LOG.debug("load block md for " + name_ + " file " + fd.getFileName());


     if (!FileSystemUtil.hasGetFileBlockLocations(fs)) {

       synthesizeBlockMetadata(fs, fd, fileFormat);

       return;

     }

     try {

       BlockLocation[] locations = fs.getFileBlockLocations(file, 0, file.getLen());

       Preconditions.checkNotNull(locations);


       // Loop over all blocks in the file.

       for (BlockLocation loc: locations) {

         Preconditions.checkNotNull(loc);

         // Get the location of all block replicas in ip:port format.

         String[] blockHostPorts = loc.getNames();

         // Get the hostnames for all block replicas. Used to resolve which hosts

         // contain cached data. The results are returned in the same order as

         // block.getNames() so it allows us to match a host specified as ip:port to

         // corresponding hostname using the same array index.

         String[] blockHostNames = loc.getHosts();

         Preconditions.checkState(blockHostNames.length == blockHostPorts.length);

         // Get the hostnames that contain cached replicas of this block.

         Set<String> cachedHosts =

             Sets.newHashSet(Arrays.asList(loc.getCachedHosts()));

         Preconditions.checkState(cachedHosts.size() <= blockHostNames.length);


         // Now enumerate all replicas of the block, adding any unknown hosts

         // to hostMap_/hostList_. The host ID (index in to the hostList_) for each

         // replica is stored in replicaHostIdxs.

         List<BlockReplica> replicas = Lists.newArrayListWithExpectedSize(

             blockHostPorts.length);

         for (int i = 0; i < blockHostPorts.length; ++i) {

           TNetworkAddress networkAddress = BlockReplica.parseLocation(blockHostPorts[i]);

           Preconditions.checkState(networkAddress != null);

           replicas.add(new BlockReplica(hostIndex_.getIndex(networkAddress),

               cachedHosts.contains(blockHostNames[i])));

         }

         fd.addFileBlock(new FileBlock(loc.getOffset(), loc.getLength(), replicas));

       }

       // Remember the THdfsFileBlocks and corresponding BlockLocations.  Once all the

       // blocks are collected, the disk IDs will be queried in one batch per filesystem.

       addPerFsFileBlocks(perFsFileBlocks, fs, fd.getFileBlocks(),

           Arrays.asList(locations));

     } catch (IOException e) {

       throw new RuntimeException("couldn't determine block locations for path '" +

           file.getPath() + "':\n" + e.getMessage(), e);

     }

   }


   private void synthesizeBlockMetadata(FileSystem fs, FileDescriptor fd,

       HdfsFileFormat fileFormat) {

     long start = 0;

     long remaining = fd.getFileLength();

     // Workaround HADOOP-11584 by using the filesystem default block size rather than

     // the block size from the FileStatus.

     // TODO: after HADOOP-11584 is resolved, get the block size from the FileStatus.

     long blockSize = fs.getDefaultBlockSize();

     if (blockSize < MIN_SYNTHETIC_BLOCK_SIZE) blockSize = MIN_SYNTHETIC_BLOCK_SIZE;

     if (!fileFormat.isSplittable(HdfsCompression.fromFileName(fd.getFileName()))) {

       blockSize = remaining;

     }

     while (remaining > 0) {

       long len = Math.min(remaining, blockSize);

       List<BlockReplica> replicas = Lists.newArrayList(

           new BlockReplica(hostIndex_.getIndex(REMOTE_NETWORK_ADDRESS), false));

       fd.addFileBlock(new FileBlock(start, len, replicas));

       remaining -= len;

       start += len;

     }

   }


   private void loadDiskIds(Map<FsKey, FileBlocksInfo> perFsFileBlocks) {

     if (!SUPPORTS_VOLUME_ID) return;

     // Loop over each filesystem.  If the filesystem is DFS, retrieve the volume IDs

     // for all the blocks.

     for (FsKey fsKey: perFsFileBlocks.keySet()) {

       FileSystem fs = fsKey.filesystem;

       // Only DistributedFileSystem has getFileBlockStorageLocations().  It's not even

       // part of the FileSystem interface, so we'll need to downcast.

       if (!(fs instanceof DistributedFileSystem)) continue;


       LOG.trace("Loading disk ids for: " + getFullName() + ". nodes: " + getNumNodes() +

           ". filesystem: " + fsKey);

       DistributedFileSystem dfs = (DistributedFileSystem)fs;

       FileBlocksInfo blockLists = perFsFileBlocks.get(fsKey);

       Preconditions.checkNotNull(blockLists);

       BlockStorageLocation[] storageLocs = null;

       try {

         // Get the BlockStorageLocations for all the blocks

         storageLocs = dfs.getFileBlockStorageLocations(blockLists.locations);

       } catch (IOException e) {

         LOG.error("Couldn't determine block storage locations for filesystem " +

             fs + ":\n" + e.getMessage());

         continue;

       }

       if (storageLocs == null || storageLocs.length == 0) {

         LOG.warn("Attempted to get block locations for filesystem " + fs +

             " but the call returned no results");

         continue;

       }

       if (storageLocs.length != blockLists.locations.size()) {

         // Block locations and storage locations didn't match up.

         LOG.error("Number of block storage locations not equal to number of blocks: "

             + "#storage locations=" + Long.toString(storageLocs.length)

             + " #blocks=" + Long.toString(blockLists.locations.size()));

         continue;

       }

       long unknownDiskIdCount = 0;

       // Attach volume IDs given by the storage location to the corresponding

       // THdfsFileBlocks.

       for (int locIdx = 0; locIdx < storageLocs.length; ++locIdx) {

         VolumeId[] volumeIds = storageLocs[locIdx].getVolumeIds();

         THdfsFileBlock block = blockLists.blocks.get(locIdx);

         // Convert opaque VolumeId to 0 based ids.

         // TODO: the diskId should be eventually retrievable from Hdfs when the

         // community agrees this API is useful.

         int[] diskIds = new int[volumeIds.length];

         for (int i = 0; i < volumeIds.length; ++i) {

           diskIds[i] = getDiskId(volumeIds[i]);

           if (diskIds[i] < 0) ++unknownDiskIdCount;

         }

         FileBlock.setDiskIds(diskIds, block);

       }

       if (unknownDiskIdCount > 0) {

         LOG.warn("Unknown disk id count for filesystem " + fs + ":" + unknownDiskIdCount);

       }

     }

   }


   public HdfsTable(TableId id, org.apache.hadoop.hive.metastore.api.Table msTbl,

       Db db, String name, String owner) {

     super(id, msTbl, db, name, owner);

     this.partitions_ = Lists.newArrayList();

   }


   @Override

   public TCatalogObjectType getCatalogObjectType() { return TCatalogObjectType.TABLE; }

   public List<HdfsPartition> getPartitions() {

     return new ArrayList<HdfsPartition>(partitions_);

   }

   public boolean isMarkedCached() { return isMarkedCached_; }


   public HashMap<Long, HdfsPartition> getPartitionMap() { return partitionMap_; }

   public HashSet<Long> getNullPartitionIds(int i) { return nullPartitionIds_.get(i); }

   public HashSet<Long> getPartitionIds() { return partitionIds_; }

   public TreeMap<LiteralExpr, HashSet<Long>> getPartitionValueMap(int i) {

     return partitionValuesMap_.get(i);

   }


   public String getNullPartitionKeyValue() { return nullPartitionKeyValue_; }

   public String getNullColumnValue() { return nullColumnValue_; }


   /*

    * Returns the storage location (HDFS path) of this table.

    */

   public String getLocation() { return super.getMetaStoreTable().getSd().getLocation(); }


   public List<FieldSchema> getFieldSchemas() { return fields_; }

   public List<FieldSchema> getNonPartitionFieldSchemas() {

     return fields_.subList(getNumClusteringCols(), fields_.size());

   }


   // True if Impala has HDFS write permissions on the hdfsBaseDir (for an unpartitioned

   // table) or if Impala has write permissions on all partition directories (for

   // a partitioned table).

   public boolean hasWriteAccess() {

     return TAccessLevelUtil.impliesWriteAccess(accessLevel_);

   }


   public String getFirstLocationWithoutWriteAccess() {

     if (getMetaStoreTable() == null) return null;


     if (getMetaStoreTable().getPartitionKeysSize() == 0) {

       if (!TAccessLevelUtil.impliesWriteAccess(accessLevel_)) {

         return hdfsBaseDir_;

       }

     } else {

       for (HdfsPartition partition: partitions_) {

         if (!TAccessLevelUtil.impliesWriteAccess(partition.getAccessLevel())) {

           return partition.getLocation();

         }

       }

     }

     return null;

   }


   public HdfsPartition getPartition(List<PartitionKeyValue> partitionSpec) {

     List<TPartitionKeyValue> partitionKeyValues = Lists.newArrayList();

     for (PartitionKeyValue kv: partitionSpec) {

       String value = PartitionKeyValue.getPartitionKeyValueString(

           kv.getLiteralValue(), getNullPartitionKeyValue());

       partitionKeyValues.add(new TPartitionKeyValue(kv.getColName(), value));

     }

     return getPartitionFromThriftPartitionSpec(partitionKeyValues);

   }


   public HdfsPartition getPartitionFromThriftPartitionSpec(

       List<TPartitionKeyValue> partitionSpec) {

     // First, build a list of the partition values to search for in the same order they

     // are defined in the table.

     List<String> targetValues = Lists.newArrayList();

     Set<String> keys = Sets.newHashSet();

     for (FieldSchema fs: getMetaStoreTable().getPartitionKeys()) {

       for (TPartitionKeyValue kv: partitionSpec) {

         if (fs.getName().toLowerCase().equals(kv.getName().toLowerCase())) {

           targetValues.add(kv.getValue().toLowerCase());

           // Same key was specified twice

           if (!keys.add(kv.getName().toLowerCase())) {

             return null;

           }

         }

       }

     }


     // Make sure the number of values match up and that some values were found.

     if (targetValues.size() == 0 ||

         (targetValues.size() != getMetaStoreTable().getPartitionKeysSize())) {

       return null;

     }


     // Now search through all the partitions and check if their partition key values match

     // the values being searched for.

     for (HdfsPartition partition: getPartitions()) {

       if (partition.getId() == ImpalaInternalServiceConstants.DEFAULT_PARTITION_ID) {

         continue;

       }

       List<LiteralExpr> partitionValues = partition.getPartitionValues();

       Preconditions.checkState(partitionValues.size() == targetValues.size());

       boolean matchFound = true;

       for (int i = 0; i < targetValues.size(); ++i) {

         String value;

         if (partitionValues.get(i) instanceof NullLiteral) {

           value = getNullPartitionKeyValue();

         } else {

           value = partitionValues.get(i).getStringValue();

           Preconditions.checkNotNull(value);

           // See IMPALA-252: we deliberately map empty strings on to

           // NULL when they're in partition columns. This is for

           // backwards compatibility with Hive, and is clearly broken.

           if (value.isEmpty()) value = getNullPartitionKeyValue();

         }

         if (!targetValues.get(i).equals(value.toLowerCase())) {

           matchFound = false;

           break;

         }

       }

       if (matchFound) {

         return partition;

       }

     }

     return null;

   }


   private void loadColumns(List<FieldSchema> fieldSchemas, HiveMetaStoreClient client)

       throws TableLoadingException {

     int pos = 0;

     for (FieldSchema s: fieldSchemas) {

       Type type = parseColumnType(s);

       // Check if we support partitioning on columns of such a type.

       if (pos < numClusteringCols_ && !type.supportsTablePartitioning()) {

         throw new TableLoadingException(

             String.format("Failed to load metadata for table '%s' because of " +

                 "unsupported partition-column type '%s' in partition column '%s'",

                 getFullName(), type.toString(), s.getName()));

       }


       Column col = new Column(s.getName(), type, s.getComment(), pos);

       addColumn(col);

       ++pos;

     }

     fields_ = fieldSchemas == null ? new ArrayList<FieldSchema>() : fieldSchemas;

     loadAllColumnStats(client);

   }


   private void populatePartitionMd() {

     if (hasPartitionMd_) return;

     for (HdfsPartition partition: partitions_) {

       updatePartitionMdAndColStats(partition);

     }

     hasPartitionMd_ = true;

   }


   private void resetPartitionMd() {

     partitionIds_.clear();

     partitionMap_.clear();

     partitionValuesMap_.clear();

     nullPartitionIds_.clear();

     // Initialize partitionValuesMap_ and nullPartitionIds_. Also reset column stats.

     for (int i = 0; i < numClusteringCols_; ++i) {

       getColumns().get(i).getStats().setNumNulls(0);

       getColumns().get(i).getStats().setNumDistinctValues(0);

       partitionValuesMap_.add(Maps.<LiteralExpr, HashSet<Long>>newTreeMap());

       nullPartitionIds_.add(Sets.<Long>newHashSet());

     }

     hasPartitionMd_ = false;

   }


   private void loadPartitions(

       List<org.apache.hadoop.hive.metastore.api.Partition> msPartitions,

       org.apache.hadoop.hive.metastore.api.Table msTbl,

       Map<String, List<FileDescriptor>> oldFileDescMap) throws IOException,

       CatalogException {

     resetPartitionMd();

     partitions_.clear();

     hdfsBaseDir_ = msTbl.getSd().getLocation();


     // Map of filesystem to the file blocks for new/modified FileDescriptors. Blocks in

     // this map will have their disk volume IDs information (re)loaded. This is used to

     // speed up the incremental refresh of a table's metadata by skipping unmodified,

     // previously loaded blocks.

     Map<FsKey, FileBlocksInfo> blocksToLoad = Maps.newHashMap();


     // INSERT statements need to refer to this if they try to write to new partitions

     // Scans don't refer to this because by definition all partitions they refer to

     // exist.

     addDefaultPartition(msTbl.getSd());


     // We silently ignore cache directives that no longer exist in HDFS, and remove

     // non-existing cache directives from the parameters.

     isMarkedCached_ = HdfsCachingUtil.validateCacheParams(msTbl.getParameters());


     if (msTbl.getPartitionKeysSize() == 0) {

       Preconditions.checkArgument(msPartitions == null || msPartitions.isEmpty());

       // This table has no partition key, which means it has no declared partitions.

       // We model partitions slightly differently to Hive - every file must exist in a

       // partition, so add a single partition with no keys which will get all the

       // files in the table's root directory.

       HdfsPartition part = createPartition(msTbl.getSd(), null, oldFileDescMap,

           blocksToLoad);

       addPartition(part);

       if (isMarkedCached_) part.markCached();

       Path location = new Path(hdfsBaseDir_);

       FileSystem fs = location.getFileSystem(CONF);

       if (fs.exists(location)) {

         accessLevel_ = getAvailableAccessLevel(fs, location);

       }

     } else {

       for (org.apache.hadoop.hive.metastore.api.Partition msPartition: msPartitions) {

         HdfsPartition partition = createPartition(msPartition.getSd(), msPartition,

             oldFileDescMap, blocksToLoad);

         addPartition(partition);

         // If the partition is null, its HDFS path does not exist, and it was not added to

         // this table's partition list. Skip the partition.

         if (partition == null) continue;

         if (msPartition.getParameters() != null); {

           partition.setNumRows(getRowCount(msPartition.getParameters()));

         }

         if (!TAccessLevelUtil.impliesWriteAccess(partition.getAccessLevel())) {

           // TODO: READ_ONLY isn't exactly correct because the it's possible the

           // partition does not have READ permissions either. When we start checking

           // whether we can READ from a table, this should be updated to set the

           // table's access level to the "lowest" effective level across all

           // partitions. That is, if one partition has READ_ONLY and another has

           // WRITE_ONLY the table's access level should be NONE.

           accessLevel_ = TAccessLevel.READ_ONLY;

         }

       }

     }

     loadDiskIds(blocksToLoad);

   }


   private TAccessLevel getAvailableAccessLevel(FileSystem fs, Path location)

       throws IOException {

     FsPermissionChecker permissionChecker = FsPermissionChecker.getInstance();

     while (location != null) {

       if (fs.exists(location)) {

         FsPermissionChecker.Permissions perms =

             permissionChecker.getPermissions(fs, location);

         if (perms.canReadAndWrite()) {

           return TAccessLevel.READ_WRITE;

         } else if (perms.canRead()) {

           return TAccessLevel.READ_ONLY;

         } else if (perms.canWrite()) {

           return TAccessLevel.WRITE_ONLY;

         }

         return TAccessLevel.NONE;

       }

       location = location.getParent();

     }

     // Should never get here.

     Preconditions.checkNotNull(location, "Error: no path ancestor exists");

     return TAccessLevel.NONE;

   }


   public HdfsPartition createPartition(StorageDescriptor storageDescriptor,

       org.apache.hadoop.hive.metastore.api.Partition msPartition)

       throws CatalogException {

     Map<FsKey, FileBlocksInfo> blocksToLoad = Maps.newHashMap();

     HdfsPartition hdfsPartition = createPartition(storageDescriptor, msPartition,

         fileDescMap_, blocksToLoad);

     loadDiskIds(blocksToLoad);

     return hdfsPartition;

   }


   private HdfsPartition createPartition(StorageDescriptor storageDescriptor,

       org.apache.hadoop.hive.metastore.api.Partition msPartition,

       Map<String, List<FileDescriptor>> oldFileDescMap,

       Map<FsKey, FileBlocksInfo> perFsFileBlocks)

       throws CatalogException {

     HdfsStorageDescriptor fileFormatDescriptor =

         HdfsStorageDescriptor.fromStorageDescriptor(this.name_, storageDescriptor);

     Path partDirPath = new Path(storageDescriptor.getLocation());

     List<FileDescriptor> fileDescriptors = Lists.newArrayList();

     // If the partition is marked as cached, the block location metadata must be

     // reloaded, even if the file times have not changed.

     boolean isMarkedCached = isMarkedCached_;

     List<LiteralExpr> keyValues = Lists.newArrayList();

     if (msPartition != null) {

       isMarkedCached = HdfsCachingUtil.validateCacheParams(msPartition.getParameters());

       // Load key values

       for (String partitionKey: msPartition.getValues()) {

         Type type = getColumns().get(keyValues.size()).getType();

         // Deal with Hive's special NULL partition key.

         if (partitionKey.equals(nullPartitionKeyValue_)) {

           keyValues.add(NullLiteral.create(type));

         } else {

           try {

             keyValues.add(LiteralExpr.create(partitionKey, type));

           } catch (Exception ex) {

             LOG.warn("Failed to create literal expression of type: " + type, ex);

             throw new CatalogException("Invalid partition key value of type: " + type,

                 ex);

           }

         }

       }

       try {

         Expr.analyze(keyValues, null);

       } catch (AnalysisException e) {

         // should never happen

         throw new IllegalStateException(e);

       }

     }

     try {

       // Each partition could reside on a different filesystem.

       FileSystem fs = partDirPath.getFileSystem(CONF);

       multipleFileSystems_ = multipleFileSystems_ ||

           !FileSystemUtil.isPathOnFileSystem(new Path(getLocation()), fs);

       if (fs.exists(partDirPath)) {

         // FileSystem does not have an API that takes in a timestamp and returns a list

         // of files that has been added/changed since. Therefore, we are calling

         // fs.listStatus() to list all the files.

         for (FileStatus fileStatus: fs.listStatus(partDirPath)) {

           String fileName = fileStatus.getPath().getName().toString();

           if (fileStatus.isDirectory() || FileSystemUtil.isHiddenFile(fileName) ||

               HdfsCompression.fromFileName(fileName) == HdfsCompression.LZO_INDEX) {

             // Ignore directory, hidden file starting with . or _, and LZO index files

             // If a directory is erroneously created as a subdirectory of a partition dir

             // we should ignore it and move on. Hive will not recurse into directories.

             // Skip index files, these are read by the LZO scanner directly.

             continue;

           }


           String partitionDir = fileStatus.getPath().getParent().toString();

           FileDescriptor fd = null;

           // Search for a FileDescriptor with the same partition dir and file name. If one

           // is found, it will be chosen as a candidate to reuse.

           if (oldFileDescMap != null && oldFileDescMap.get(partitionDir) != null) {

             for (FileDescriptor oldFileDesc: oldFileDescMap.get(partitionDir)) {

               if (oldFileDesc.getFileName().equals(fileName)) {

                 fd = oldFileDesc;

                 break;

               }

             }

           }


           // Check if this FileDescriptor has been modified since last loading its block

           // location information. If it has not been changed, the previously loaded

           // value can be reused.

           if (fd == null || isMarkedCached || fd.getFileLength() != fileStatus.getLen()

               || fd.getModificationTime() != fileStatus.getModificationTime()) {

             // Create a new file descriptor and load the file block metadata,

             // collecting the block metadata into perFsFileBlocks.  The disk IDs for

             // all the blocks of each filesystem will be loaded by loadDiskIds().

             fd = new FileDescriptor(fileName, fileStatus.getLen(),

                 fileStatus.getModificationTime());

             loadBlockMetadata(fs, fileStatus, fd, fileFormatDescriptor.getFileFormat(),

                 perFsFileBlocks);

           }


           List<FileDescriptor> fds = fileDescMap_.get(partitionDir);

           if (fds == null) {

             fds = Lists.newArrayList();

             fileDescMap_.put(partitionDir, fds);

           }

           fds.add(fd);


           // Add to the list of FileDescriptors for this partition.

           fileDescriptors.add(fd);

         }

         numHdfsFiles_ += fileDescriptors.size();

       }

       HdfsPartition partition = new HdfsPartition(this, msPartition, keyValues,

           fileFormatDescriptor, fileDescriptors,

           getAvailableAccessLevel(fs, partDirPath));

       partition.checkWellFormed();

       return partition;

     } catch (Exception e) {

       throw new CatalogException("Failed to create partition: ", e);

     }

   }


   private void addPerFsFileBlocks(Map<FsKey, FileBlocksInfo> fsToBlocks, FileSystem fs,

       List<THdfsFileBlock> blocks, List<BlockLocation> locations) {

     FsKey fsKey = new FsKey(fs);

     FileBlocksInfo infos = fsToBlocks.get(fsKey);

     if (infos == null) {

       infos = new FileBlocksInfo();

       fsToBlocks.put(fsKey, infos);

     }

     infos.addBlocks(blocks, locations);

   }


   public void addPartition(HdfsPartition partition) {

     if (partitions_.contains(partition)) return;

     partitions_.add(partition);

     totalHdfsBytes_ += partition.getSize();

     updatePartitionMdAndColStats(partition);

   }


   private void updatePartitionMdAndColStats(HdfsPartition partition) {

     if (partition.getPartitionValues().size() != numClusteringCols_) return;


     partitionIds_.add(partition.getId());

     partitionMap_.put(partition.getId(), partition);

     for (int i = 0; i < partition.getPartitionValues().size(); ++i) {

       ColumnStats stats = getColumns().get(i).getStats();

       LiteralExpr literal = partition.getPartitionValues().get(i);

       // Store partitions with null partition values separately

       if (literal instanceof NullLiteral) {

         stats.setNumNulls(stats.getNumNulls() + 1);

         if (nullPartitionIds_.get(i).isEmpty()) {

           stats.setNumDistinctValues(stats.getNumDistinctValues() + 1);

         }

         nullPartitionIds_.get(i).add(partition.getId());

         continue;

       }

       HashSet<Long> partitionIds = partitionValuesMap_.get(i).get(literal);

       if (partitionIds == null) {

         partitionIds = Sets.newHashSet();

         partitionValuesMap_.get(i).put(literal, partitionIds);

         stats.setNumDistinctValues(stats.getNumDistinctValues() + 1);

       }

       partitionIds.add(partition.getId());

     }

   }


   public HdfsPartition dropPartition(List<TPartitionKeyValue> partitionSpec) {

     HdfsPartition partition = getPartitionFromThriftPartitionSpec(partitionSpec);

     // Check if the partition does not exist.

     if (partition == null || !partitions_.remove(partition)) return null;

     totalHdfsBytes_ -= partition.getSize();

     Preconditions.checkArgument(partition.getPartitionValues().size() ==

         numClusteringCols_);

     Long partitionId = partition.getId();

     // Remove the partition id from the list of partition ids and other mappings.

     partitionIds_.remove(partitionId);

     partitionMap_.remove(partitionId);

     for (int i = 0; i < partition.getPartitionValues().size(); ++i) {

       ColumnStats stats = getColumns().get(i).getStats();

       LiteralExpr literal = partition.getPartitionValues().get(i);

       // Check if this is a null literal.

       if (literal instanceof NullLiteral) {

         nullPartitionIds_.get(i).remove(partitionId);

         stats.setNumNulls(stats.getNumNulls() - 1);

         if (nullPartitionIds_.get(i).isEmpty()) {

           stats.setNumDistinctValues(stats.getNumDistinctValues() - 1);

         }

         continue;

       }

       HashSet<Long> partitionIds = partitionValuesMap_.get(i).get(literal);

       // If there are multiple partition ids corresponding to a literal, remove

       // only this id. Otherwise, remove the <literal, id> pair.

       if (partitionIds.size() > 1) partitionIds.remove(partitionId);

       else {

         partitionValuesMap_.get(i).remove(literal);

         stats.setNumDistinctValues(stats.getNumDistinctValues() - 1);

       }

     }

     return partition;

   }


   private void addDefaultPartition(StorageDescriptor storageDescriptor)

       throws CatalogException {

     // Default partition has no files and is not referred to by scan nodes. Data sinks

     // refer to this to understand how to create new partitions.

     HdfsStorageDescriptor hdfsStorageDescriptor =

         HdfsStorageDescriptor.fromStorageDescriptor(this.name_, storageDescriptor);

     HdfsPartition partition = HdfsPartition.defaultPartition(this, hdfsStorageDescriptor);

     partitions_.add(partition);

   }


   @Override

   public void load(Table cachedEntry, HiveMetaStoreClient client,

       org.apache.hadoop.hive.metastore.api.Table msTbl) throws TableLoadingException {

     numHdfsFiles_ = 0;

     totalHdfsBytes_ = 0;

     LOG.debug("load table: " + db_.getName() + "." + name_);


     // turn all exceptions into TableLoadingException

     try {

       // set nullPartitionKeyValue from the hive conf.

       nullPartitionKeyValue_ = client.getConfigValue(

           "hive.exec.default.partition.name", "__HIVE_DEFAULT_PARTITION__");


       // set NULL indicator string from table properties

       nullColumnValue_ =

           msTbl.getParameters().get(serdeConstants.SERIALIZATION_NULL_FORMAT);

       if (nullColumnValue_ == null) nullColumnValue_ = DEFAULT_NULL_COLUMN_VALUE;


       // populate with both partition keys and regular columns

       List<FieldSchema> partKeys = msTbl.getPartitionKeys();

       List<FieldSchema> tblFields = Lists.newArrayList();

       String inputFormat = msTbl.getSd().getInputFormat();

       if (HdfsFileFormat.fromJavaClassName(inputFormat) == HdfsFileFormat.AVRO) {

         // Look for the schema in TBLPROPERTIES and in SERDEPROPERTIES, with the latter

         // taking precedence.

         List<Map<String, String>> schemaSearchLocations = Lists.newArrayList();

         schemaSearchLocations.add(

             getMetaStoreTable().getSd().getSerdeInfo().getParameters());

         schemaSearchLocations.add(getMetaStoreTable().getParameters());


         avroSchema_ =

             HdfsTable.getAvroSchema(schemaSearchLocations, getFullName());

         String serdeLib = msTbl.getSd().getSerdeInfo().getSerializationLib();

         if (serdeLib == null ||

             serdeLib.equals("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) {

           // If the SerDe library is null or set to LazySimpleSerDe or is null, it

           // indicates there is an issue with the table metadata since Avro table need a

           // non-native serde. Instead of failing to load the table, fall back to

           // using the fields from the storage descriptor (same as Hive).

           tblFields.addAll(msTbl.getSd().getCols());

         } else {

           // Load the fields from the Avro schema.

           // Since Avro does not include meta-data for CHAR or VARCHAR, an Avro type of

           // "string" is used for CHAR, VARCHAR and STRING. Default back to the storage

           // descriptor to determine the the type for "string"

           List<FieldSchema> sdTypes = msTbl.getSd().getCols();

           int i = 0;

           List<Column> avroTypeList = AvroSchemaParser.parse(avroSchema_);

           boolean canFallBack = sdTypes.size() == avroTypeList.size();

           for (Column parsedCol: avroTypeList) {

             FieldSchema fs = new FieldSchema();

             fs.setName(parsedCol.getName());

             String avroType = parsedCol.getType().toSql();

             if (avroType.toLowerCase().equals("string") && canFallBack) {

               fs.setType(sdTypes.get(i).getType());

             } else {

               fs.setType(avroType);

             }

             fs.setComment("from deserializer");

             tblFields.add(fs);

             i++;

           }

         }

       } else {

         tblFields.addAll(msTbl.getSd().getCols());

       }

       List<FieldSchema> fieldSchemas = new ArrayList<FieldSchema>(

           partKeys.size() + tblFields.size());

       fieldSchemas.addAll(partKeys);

       fieldSchemas.addAll(tblFields);

       // The number of clustering columns is the number of partition keys.

       numClusteringCols_ = partKeys.size();

       loadColumns(fieldSchemas, client);


       // Collect the list of partitions to use for the table. Partitions may be reused

       // from the existing cached table entry (if one exists), read from the metastore,

       // or a mix of both. Whether or not a partition is reused depends on whether

       // the table or partition has been modified.

       List<org.apache.hadoop.hive.metastore.api.Partition> msPartitions =

           Lists.newArrayList();

       if (cachedEntry == null || !(cachedEntry instanceof HdfsTable) ||

           cachedEntry.lastDdlTime_ != lastDdlTime_) {

         msPartitions.addAll(MetaStoreUtil.fetchAllPartitions(

             client, db_.getName(), name_, NUM_PARTITION_FETCH_RETRIES));

       } else {

         // The table was already in the metadata cache and it has not been modified.

         Preconditions.checkArgument(cachedEntry instanceof HdfsTable);

         HdfsTable cachedHdfsTableEntry = (HdfsTable) cachedEntry;

         // Set of partition names that have been modified. Partitions in this Set need to

         // be reloaded from the metastore.

         Set<String> modifiedPartitionNames = Sets.newHashSet();


         // If these are not the exact same object, look up the set of partition names in

         // the metastore. This is to support the special case of CTAS which creates a

         // "temp" table that doesn't actually exist in the metastore.

         if (cachedEntry != this) {

           // Since the table has not been modified, we might be able to reuse some of the

           // old partition metadata if the individual partitions have not been modified.

           // First get a list of all the partition names for this table from the

           // metastore, this is much faster than listing all the Partition objects.

           modifiedPartitionNames.addAll(

               client.listPartitionNames(db_.getName(), name_, (short) -1));

         }


         int totalPartitions = modifiedPartitionNames.size();

         // Get all the partitions from the cached entry that have not been modified.

         for (HdfsPartition cachedPart: cachedHdfsTableEntry.getPartitions()) {

           // Skip the default partition and any partitions that have been modified.

           if (cachedPart.isDirty() || cachedPart.isDefaultPartition()) {

             continue;

           }


           org.apache.hadoop.hive.metastore.api.Partition cachedMsPart =

               cachedPart.toHmsPartition();

           if (cachedMsPart == null) continue;


           // This is a partition we already know about and it hasn't been modified.

           // No need to reload the metadata.

           String cachedPartName = cachedPart.getPartitionName();

           if (modifiedPartitionNames.contains(cachedPartName)) {

             msPartitions.add(cachedMsPart);

             modifiedPartitionNames.remove(cachedPartName);

           }

         }

         LOG.info(String.format("Incrementally refreshing %d/%d partitions.",

             modifiedPartitionNames.size(), totalPartitions));


         // No need to make the metastore call if no partitions are to be updated.

         if (modifiedPartitionNames.size() > 0) {

           // Now reload the the remaining partitions.

           msPartitions.addAll(MetaStoreUtil.fetchPartitionsByName(client,

               Lists.newArrayList(modifiedPartitionNames), db_.getName(), name_));

         }

       }


       Map<String, List<FileDescriptor>> oldFileDescMap = null;

       if (cachedEntry != null && cachedEntry instanceof HdfsTable) {

         HdfsTable cachedHdfsTable = (HdfsTable) cachedEntry;

         oldFileDescMap = cachedHdfsTable.fileDescMap_;

         hostIndex_.populate(cachedHdfsTable.hostIndex_.getList());

       }

       loadPartitions(msPartitions, msTbl, oldFileDescMap);


       // load table stats

       numRows_ = getRowCount(msTbl.getParameters());

       LOG.debug("table #rows=" + Long.toString(numRows_));


       // For unpartitioned tables set the numRows in its partitions

       // to the table's numRows.

       if (numClusteringCols_ == 0 && !partitions_.isEmpty()) {

         // Unpartitioned tables have a 'dummy' partition and a default partition.

         // Temp tables used in CTAS statements have one partition.

         Preconditions.checkState(partitions_.size() == 2 || partitions_.size() == 1);

         for (HdfsPartition p: partitions_) {

           p.setNumRows(numRows_);

         }

       }

     } catch (TableLoadingException e) {

       throw e;

     } catch (Exception e) {

       throw new TableLoadingException(

           "Failed to load metadata for table: " + getFullName(), e);

     }

   }


   public static String getAvroSchema(List<Map<String, String>> schemaSearchLocations,

       String tableName) throws TableLoadingException {

     String url = null;

     // Search all locations and break out on the first valid schema found.

     for (Map<String, String> schemaLocation: schemaSearchLocations) {

       if (schemaLocation == null) continue;


       String literal = schemaLocation.get(AvroSerdeUtils.SCHEMA_LITERAL);

       if (literal != null && !literal.equals(AvroSerdeUtils.SCHEMA_NONE)) return literal;


       url = schemaLocation.get(AvroSerdeUtils.SCHEMA_URL);

       if (url != null) {

         url = url.trim();

         break;

       }

     }

     if (url == null || url.equals(AvroSerdeUtils.SCHEMA_NONE)) {

       throw new TableLoadingException(String.format("No Avro schema provided in " +

           "SERDEPROPERTIES or TBLPROPERTIES for table: %s ", tableName));

     }

     String schema = null;

     if (url.toLowerCase().startsWith("http://")) {

       InputStream urlStream = null;

       try {

         urlStream = new URL(url).openStream();

         schema = IOUtils.toString(urlStream);

       } catch (IOException e) {

         throw new TableLoadingException("Problem reading Avro schema from: " + url, e);

       } finally {

         IOUtils.closeQuietly(urlStream);

       }

     } else {

       Path path = new Path(url);

       FileSystem fs = null;

       try {

         fs = path.getFileSystem(FileSystemUtil.getConfiguration());

       } catch (Exception e) {

         throw new TableLoadingException(String.format(

             "Invalid avro.schema.url: %s. %s", path, e.getMessage()));

       }

       StringBuilder errorMsg = new StringBuilder();

       if (!FileSystemUtil.isPathReachable(path, fs, errorMsg)) {

         throw new TableLoadingException(String.format(

             "Invalid avro.schema.url: %s. %s", path, errorMsg));

       }

       try {

         schema = FileSystemUtil.readFile(path);

       } catch (IOException e) {

         throw new TableLoadingException(

             "Problem reading Avro schema at: " + url, e);

       }

     }

     return schema;

   }


   @Override

   protected List<String> getColumnNamesWithHmsStats() {

     List<String> ret = Lists.newArrayList();

     // Only non-partition columns have column stats in the HMS.

     for (Column column: getColumns().subList(numClusteringCols_, getColumns().size())) {

       ret.add(column.getName().toLowerCase());

     }

     return ret;

   }


   @Override

   protected void loadFromThrift(TTable thriftTable) throws TableLoadingException {

     super.loadFromThrift(thriftTable);

     THdfsTable hdfsTable = thriftTable.getHdfs_table();

     hdfsBaseDir_ = hdfsTable.getHdfsBaseDir();

     nullColumnValue_ = hdfsTable.nullColumnValue;

     nullPartitionKeyValue_ = hdfsTable.nullPartitionKeyValue;

     multipleFileSystems_ = hdfsTable.multiple_filesystems;

     hostIndex_.populate(hdfsTable.getNetwork_addresses());

     resetPartitionMd();


     numHdfsFiles_ = 0;

     totalHdfsBytes_ = 0;

     for (Map.Entry<Long, THdfsPartition> part: hdfsTable.getPartitions().entrySet()) {

       HdfsPartition hdfsPart =

           HdfsPartition.fromThrift(this, part.getKey(), part.getValue());

       numHdfsFiles_ += hdfsPart.getFileDescriptors().size();

       totalHdfsBytes_ += hdfsPart.getSize();

       partitions_.add(hdfsPart);

     }

     avroSchema_ = hdfsTable.isSetAvroSchema() ? hdfsTable.getAvroSchema() : null;

     isMarkedCached_ = HdfsCachingUtil.getCacheDirectiveId(

         getMetaStoreTable().getParameters()) != null;

     populatePartitionMd();

   }


   @Override

   public TTableDescriptor toThriftDescriptor(Set<Long> referencedPartitions) {

     // Create thrift descriptors to send to the BE.  The BE does not

     // need any information below the THdfsPartition level.

     TTableDescriptor tableDesc = new TTableDescriptor(id_.asInt(), TTableType.HDFS_TABLE,

         getColumns().size(), numClusteringCols_, name_, db_.getName());

     tableDesc.setHdfsTable(getTHdfsTable(false, referencedPartitions));

     tableDesc.setColNames(getColumnNames());

     return tableDesc;

   }


   @Override

   public TTable toThrift() {

     // Send all metadata between the catalog service and the FE.

     TTable table = super.toThrift();

     table.setTable_type(TTableType.HDFS_TABLE);

     table.setHdfs_table(getTHdfsTable(true, null));

     return table;

   }


   private THdfsTable getTHdfsTable(boolean includeFileDesc, Set<Long> refPartitions) {

     // includeFileDesc implies all partitions should be included (refPartitions == null).

     Preconditions.checkState(!includeFileDesc || refPartitions == null);

     Map<Long, THdfsPartition> idToPartition = Maps.newHashMap();

     for (HdfsPartition partition: partitions_) {

       long id = partition.getId();

       if (refPartitions == null || refPartitions.contains(id)) {

         idToPartition.put(id, partition.toThrift(includeFileDesc));

       }

     }

     THdfsTable hdfsTable = new THdfsTable(hdfsBaseDir_, getColumnNames(),

         nullPartitionKeyValue_, nullColumnValue_, idToPartition);

     hdfsTable.setAvroSchema(avroSchema_);

     hdfsTable.setMultiple_filesystems(multipleFileSystems_);

     if (includeFileDesc) {

       // Network addresses are used only by THdfsFileBlocks which are inside

       // THdfsFileDesc, so include network addreses only when including THdfsFileDesc.

       hdfsTable.setNetwork_addresses(hostIndex_.getList());

     }

     return hdfsTable;

   }


   public long getNumHdfsFiles() { return numHdfsFiles_; }

   public long getTotalHdfsBytes() { return totalHdfsBytes_; }

   public String getHdfsBaseDir() { return hdfsBaseDir_; }

   public boolean isAvroTable() { return avroSchema_ != null; }


   @Override

   public int getNumNodes() { return hostIndex_.size(); }


   public ListMap<TNetworkAddress> getHostIndex() { return hostIndex_; }


   public HdfsFileFormat getMajorityFormat() {

     Map<HdfsFileFormat, Integer> numPartitionsByFormat = Maps.newHashMap();

     for (HdfsPartition partition: partitions_) {

       HdfsFileFormat format = partition.getInputFormatDescriptor().getFileFormat();

       Integer numPartitions = numPartitionsByFormat.get(format);

       if (numPartitions == null) {

         numPartitions = Integer.valueOf(1);

       } else {

         numPartitions = Integer.valueOf(numPartitions.intValue() + 1);

       }

       numPartitionsByFormat.put(format, numPartitions);

     }


     int maxNumPartitions = Integer.MIN_VALUE;

     HdfsFileFormat majorityFormat = null;

     for (Map.Entry<HdfsFileFormat, Integer> entry: numPartitionsByFormat.entrySet()) {

       if (entry.getValue().intValue() > maxNumPartitions) {

         majorityFormat = entry.getKey();

         maxNumPartitions = entry.getValue().intValue();

       }

     }

     Preconditions.checkNotNull(majorityFormat);

     return majorityFormat;

   }


   public TResultSet getTableStats() {

     TResultSet result = new TResultSet();

     TResultSetMetadata resultSchema = new TResultSetMetadata();

     result.setSchema(resultSchema);


     for (int i = 0; i < numClusteringCols_; ++i) {

       // Add the partition-key values as strings for simplicity.

       Column partCol = getColumns().get(i);

       TColumn colDesc = new TColumn(partCol.getName(), Type.STRING.toThrift());

       resultSchema.addToColumns(colDesc);

     }


     resultSchema.addToColumns(new TColumn("#Rows", Type.BIGINT.toThrift()));

     resultSchema.addToColumns(new TColumn("#Files", Type.BIGINT.toThrift()));

     resultSchema.addToColumns(new TColumn("Size", Type.STRING.toThrift()));

     resultSchema.addToColumns(new TColumn("Bytes Cached", Type.STRING.toThrift()));

     resultSchema.addToColumns(new TColumn("Cache Replication", Type.STRING.toThrift()));

     resultSchema.addToColumns(new TColumn("Format", Type.STRING.toThrift()));

     resultSchema.addToColumns(new TColumn("Incremental stats", Type.STRING.toThrift()));

     resultSchema.addToColumns(new TColumn("Location", Type.STRING.toThrift()));


     // Pretty print partitions and their stats.

     ArrayList<HdfsPartition> orderedPartitions = Lists.newArrayList(partitions_);

     Collections.sort(orderedPartitions);


     long totalCachedBytes = 0L;

     for (HdfsPartition p: orderedPartitions) {

       // Ignore dummy default partition.

       if (p.getId() == ImpalaInternalServiceConstants.DEFAULT_PARTITION_ID) continue;

       TResultRowBuilder rowBuilder = new TResultRowBuilder();


       // Add the partition-key values (as strings for simplicity).

       for (LiteralExpr expr: p.getPartitionValues()) {

         rowBuilder.add(expr.getStringValue());

       }


       // Add number of rows, files, bytes, cache stats, and file format.

       rowBuilder.add(p.getNumRows()).add(p.getFileDescriptors().size())

           .addBytes(p.getSize());

       if (!p.isMarkedCached()) {

         // Helps to differentiate partitions that have 0B cached versus partitions

         // that are not marked as cached.

         rowBuilder.add("NOT CACHED");

         rowBuilder.add("NOT CACHED");

       } else {

         // Calculate the number the number of bytes that are cached.

         long cachedBytes = 0L;

         for (FileDescriptor fd: p.getFileDescriptors()) {

           for (THdfsFileBlock fb: fd.getFileBlocks()) {

             if (fb.getIs_replica_cached().contains(true)) {

               cachedBytes += fb.getLength();

             }

           }

         }

         totalCachedBytes += cachedBytes;

         rowBuilder.addBytes(cachedBytes);


         // Extract cache replication factor from the parameters of the table

         // if the table is not partitioned or directly from the partition.

         Short rep = HdfsCachingUtil.getCachedCacheReplication(

             numClusteringCols_ == 0 ?

             p.getTable().getMetaStoreTable().getParameters() :

             p.getParameters());

         rowBuilder.add(rep.toString());

       }

       rowBuilder.add(p.getInputFormatDescriptor().getFileFormat().toString());


       rowBuilder.add(String.valueOf(p.hasIncrementalStats()));

       rowBuilder.add(p.getLocation());

       result.addToRows(rowBuilder.get());

     }


     // For partitioned tables add a summary row at the bottom.

     if (numClusteringCols_ > 0) {

       TResultRowBuilder rowBuilder = new TResultRowBuilder();

       int numEmptyCells = numClusteringCols_ - 1;

       rowBuilder.add("Total");

       for (int i = 0; i < numEmptyCells; ++i) {

         rowBuilder.add("");

       }


       // Total num rows, files, and bytes (leave format empty).

       rowBuilder.add(numRows_).add(numHdfsFiles_).addBytes(totalHdfsBytes_)

           .addBytes(totalCachedBytes).add("").add("").add("").add("");

       result.addToRows(rowBuilder.get());

     }

     return result;

   }


   public TResultSet getFiles(List<TPartitionKeyValue> partitionSpec) throws CatalogException {

     TResultSet result = new TResultSet();

     TResultSetMetadata resultSchema = new TResultSetMetadata();

     result.setSchema(resultSchema);

     resultSchema.addToColumns(new TColumn("path", Type.STRING.toThrift()));

     resultSchema.addToColumns(new TColumn("size", Type.STRING.toThrift()));

     resultSchema.addToColumns(new TColumn("partition", Type.STRING.toThrift()));

     result.setRows(Lists.<TResultRow>newArrayList());


     List<HdfsPartition> partitions = null;

     if (partitionSpec == null) {

       partitions = partitions_;

     } else {

       // Get the HdfsPartition object for the given partition spec.

       HdfsPartition partition = getPartitionFromThriftPartitionSpec(partitionSpec);

       Preconditions.checkState(partition != null);

       partitions = Lists.newArrayList(partition);

     }


     for (HdfsPartition p: partitions) {

       for (FileDescriptor fd: p.getFileDescriptors()) {

         TResultRowBuilder rowBuilder = new TResultRowBuilder();

         rowBuilder.add(p.getLocation() + "/" + fd.getFileName());

         rowBuilder.add(PrintUtils.printBytes(fd.getFileLength()));

         rowBuilder.add(p.getPartitionName());

         result.addToRows(rowBuilder.get());

       }

     }

     return result;

   }

 }

com.cloudera.impala.catalog.HdfsPartition.FileDescriptor
Definition: HdfsPartition.java:65

com.cloudera.impala.catalog.HdfsTable.isMarkedCached
boolean isMarkedCached()
Definition: HdfsTable.java:432

com.cloudera.impala.common.FileSystemUtil
Definition: FileSystemUtil.java:43

com.cloudera.impala.catalog.HdfsTable.loadDiskIds
void loadDiskIds(Map< FsKey, FileBlocksInfo > perFsFileBlocks)
Definition: HdfsTable.java:363

com.cloudera.impala.catalog.HdfsStorageDescriptor
Definition: HdfsStorageDescriptor.java:33

com.cloudera.impala.catalog.HdfsCompression.fromFileName
static HdfsCompression fromFileName(String fileName)
Definition: HdfsCompression.java:37

com.cloudera.impala.catalog.HdfsPartition.getId
long getId()
Definition: HdfsPartition.java:365

com.cloudera.impala.common.PrintUtils
Definition: PrintUtils.java:28

com.cloudera.impala.catalog.Table.getColumnNames
List< String > getColumnNames()
Definition: Table.java:354

com.cloudera.impala.catalog.HdfsTable.hasWriteAccess
boolean hasWriteAccess()
Definition: HdfsTable.java:461

com.cloudera.impala.catalog.HdfsTable.getNumHdfsFiles
long getNumHdfsFiles()
Definition: HdfsTable.java:1317

path
string path("/usr/lib/sasl2:/usr/lib64/sasl2:/usr/local/lib/sasl2:/usr/lib/x86_64-linux-gnu/sasl2")

com.cloudera.impala.util.TResultRowBuilder
Definition: TResultRowBuilder.java:24

com.cloudera.impala.catalog.HdfsTable.totalHdfsBytes_
long totalHdfsBytes_
Definition: HdfsTable.java:171

com.cloudera.impala.catalog.Table
Definition: Table.java:51

com.cloudera.impala.catalog.HdfsTable.getNumNodes
int getNumNodes()
Definition: HdfsTable.java:1323

com.cloudera.impala.catalog.HdfsTable.getLocation
String getLocation()
Definition: HdfsTable.java:451

com.cloudera.impala.catalog.Table.db_
final Db db_
Definition: Table.java:61

com.cloudera.impala.catalog.HdfsTable.addDefaultPartition
void addDefaultPartition(StorageDescriptor storageDescriptor)
Definition: HdfsTable.java:977

com.cloudera.impala.catalog.HdfsTable.FsKey.toString
String toString()
Definition: HdfsTable.java:214

com.cloudera.impala.common.FileSystemUtil.hasGetFileBlockLocations
static boolean hasGetFileBlockLocations(FileSystem fs)
Definition: FileSystemUtil.java:249

com.cloudera.impala.catalog.Db.getName
String getName()
Definition: Db.java:81

com.cloudera.impala.catalog.Column.getName
String getName()
Definition: Column.java:53

com.cloudera.impala.catalog.Table.getRowCount
static long getRowCount(Map< String, String > parameters)
Definition: Table.java:191

com.cloudera.impala.catalog.HdfsTable.getFieldSchemas
List< FieldSchema > getFieldSchemas()
Definition: HdfsTable.java:453

com.cloudera.impala.catalog.Type.BIGINT
static final ScalarType BIGINT
Definition: Type.java:50

com.cloudera.impala.catalog.Table.getName
String getName()
Definition: Table.java:342

com.cloudera.impala.analysis.PartitionKeyValue
Definition: PartitionKeyValue.java:24

com.cloudera.impala.catalog.HdfsTable.loadBlockMetadata
void loadBlockMetadata(FileSystem fs, FileStatus file, FileDescriptor fd, HdfsFileFormat fileFormat, Map< FsKey, FileBlocksInfo > perFsFileBlocks)
Definition: HdfsTable.java:275

com.cloudera.impala.catalog.Type
Definition: Type.java:42

com.cloudera.impala.catalog.HdfsStorageDescriptor.getFileFormat
HdfsFileFormat getFileFormat()
Definition: HdfsStorageDescriptor.java:235

com.cloudera.impala.catalog.HdfsTable.nullColumnValue_
String nullColumnValue_
Definition: HdfsTable.java:120

com.cloudera.impala.catalog.Column
Definition: Column.java:30

com.cloudera.impala.catalog.HdfsPartition
Definition: HdfsPartition.java:60

com.cloudera.impala.catalog.Table.lastDdlTime_
long lastDdlTime_
Definition: Table.java:85

com.cloudera.impala.catalog.Table.addColumn
void addColumn(Column col)
Definition: Table.java:114

com.cloudera.impala.catalog.HdfsTable.NUM_PARTITION_FETCH_RETRIES
static final int NUM_PARTITION_FETCH_RETRIES
Definition: HdfsTable.java:109

com.cloudera.impala.catalog.HdfsTable.hasLoggedDiskIdFormatWarning_
static boolean hasLoggedDiskIdFormatWarning_
Definition: HdfsTable.java:132

com.cloudera.impala.catalog.HdfsTable.fileDescMap_
final Map< String, List< FileDescriptor > > fileDescMap_
Definition: HdfsTable.java:165

com.cloudera.impala.catalog.HdfsTable.addPartition
void addPartition(HdfsPartition partition)
Definition: HdfsTable.java:892

com.cloudera.impala.catalog.Type.STRING
static final ScalarType STRING
Definition: Type.java:53

com.cloudera.impala.catalog.HdfsTable.load
void load(Table cachedEntry, HiveMetaStoreClient client, org.apache.hadoop.hive.metastore.api.Table msTbl)
Definition: HdfsTable.java:1003

com.cloudera.impala.catalog.HdfsTable.getPartitionIds
HashSet< Long > getPartitionIds()
Definition: HdfsTable.java:436

com.cloudera.impala.catalog.HdfsTable.partitionValuesMap_
final ArrayList< TreeMap< LiteralExpr, HashSet< Long > > > partitionValuesMap_
Definition: HdfsTable.java:138

com.cloudera.impala.catalog.HdfsTable.getMajorityFormat
HdfsFileFormat getMajorityFormat()
Definition: HdfsTable.java:1333

com.cloudera.impala.catalog.HdfsTable.getNullPartitionKeyValue
String getNullPartitionKeyValue()
Definition: HdfsTable.java:445

com.cloudera.impala.catalog.HdfsPartition.BlockReplica
Definition: HdfsPartition.java:121

com.cloudera.impala.catalog.HdfsTable.getPartitionValueMap
TreeMap< LiteralExpr, HashSet< Long > > getPartitionValueMap(int i)
Definition: HdfsTable.java:437

com.cloudera.impala.catalog.HdfsTable.synthesizeBlockMetadata
void synthesizeBlockMetadata(FileSystem fs, FileDescriptor fd, HdfsFileFormat fileFormat)
Definition: HdfsTable.java:335

com.cloudera.impala.util.MetaStoreUtil
Definition: MetaStoreUtil.java:33

com.cloudera.impala.util.FsPermissionChecker
Definition: FsPermissionChecker.java:50

com.cloudera.impala.catalog.TableId
Definition: TableId.java:20

com.cloudera.impala.catalog.Table.name_
final String name_
Definition: Table.java:62

com.cloudera.impala.util.TAccessLevelUtil
Definition: TAccessLevelUtil.java:22

com.cloudera.impala.catalog.HdfsTable.loadFromThrift
void loadFromThrift(TTable thriftTable)
Definition: HdfsTable.java:1243

com.cloudera.impala.common.AnalysisException
Definition: AnalysisException.java:21

com.cloudera.impala.catalog.HdfsTable.FsKey.hashCode
int hashCode()
Definition: HdfsTable.java:200

com.cloudera.impala.catalog.HdfsTable.FileBlocksInfo.blocks
final List< THdfsFileBlock > blocks
Definition: HdfsTable.java:222

com.cloudera.impala.catalog.HdfsTable.loadColumns
void loadColumns(List< FieldSchema > fieldSchemas, HiveMetaStoreClient client)
Definition: HdfsTable.java:567

com.cloudera.impala.catalog.HdfsTable.HdfsTable
HdfsTable(TableId id, org.apache.hadoop.hive.metastore.api.Table msTbl, Db db, String name, String owner)
Definition: HdfsTable.java:421

com.cloudera.impala.catalog.HdfsTable.getNullColumnValue
String getNullColumnValue()
Definition: HdfsTable.java:446

com.cloudera.impala.catalog.Type.supportsTablePartitioning
boolean supportsTablePartitioning()
Definition: Type.java:186

com.cloudera.impala.catalog.Table.parseColumnType
Type parseColumnType(FieldSchema fs)
Definition: Table.java:331

com.cloudera.impala.catalog.HdfsTable.getPartitions
List< HdfsPartition > getPartitions()
Definition: HdfsTable.java:429

com.cloudera.impala.catalog.HdfsTable.getTotalHdfsBytes
long getTotalHdfsBytes()
Definition: HdfsTable.java:1318

com.cloudera.impala.catalog.HdfsTable.getTHdfsTable
THdfsTable getTHdfsTable(boolean includeFileDesc, Set< Long > refPartitions)
Definition: HdfsTable.java:1295

com.cloudera.impala.catalog.Table.getColumns
ArrayList< Column > getColumns()
Definition: Table.java:349

com.cloudera.impala.catalog.HdfsFileFormat.isSplittable
boolean isSplittable(HdfsCompression compression)
Definition: HdfsFileFormat.java:207

com.cloudera.impala.catalog.HdfsTable.getAvailableAccessLevel
TAccessLevel getAvailableAccessLevel(FileSystem fs, Path location)
Definition: HdfsTable.java:699

com.cloudera.impala.catalog.HdfsTable.getAvroSchema
static String getAvroSchema(List< Map< String, String >> schemaSearchLocations, String tableName)
Definition: HdfsTable.java:1177

com.cloudera.impala.catalog.HdfsTable.FsKey
Definition: HdfsTable.java:194

com.cloudera.impala.catalog.HdfsTable.updatePartitionMdAndColStats
void updatePartitionMdAndColStats(HdfsPartition partition)
Definition: HdfsTable.java:904

com.cloudera.impala.catalog.HdfsTable.FileBlocksInfo.addBlocks
void addBlocks(List< THdfsFileBlock > b, List< BlockLocation > l)
Definition: HdfsTable.java:225

com.cloudera.impala.catalog.Table.loadAllColumnStats
void loadAllColumnStats(HiveMetaStoreClient client)
Definition: Table.java:155

com.cloudera.impala.catalog.HdfsPartition.FileBlock
Definition: HdfsPartition.java:157

com.cloudera.impala.catalog.HdfsTable.partitionMap_
final HashMap< Long, HdfsPartition > partitionMap_
Definition: HdfsTable.java:147

com.cloudera.impala.catalog.HdfsTable.getNullPartitionIds
HashSet< Long > getNullPartitionIds(int i)
Definition: HdfsTable.java:435

com.cloudera.impala.catalog.HdfsTable.getDiskId
static int getDiskId(VolumeId hdfsVolumeId)
Definition: HdfsTable.java:243

com.cloudera.impala.catalog.Table.getType
ArrayType getType()
Definition: Table.java:405

com.cloudera.impala.catalog.HdfsTable.SUPPORTS_VOLUME_ID
static final boolean SUPPORTS_VOLUME_ID
Definition: HdfsTable.java:190

com.cloudera.impala.catalog.HdfsPartition.getPartitionValues
List< LiteralExpr > getPartitionValues()
Definition: HdfsPartition.java:428

com.cloudera.impala.analysis.Expr
Definition: Expr.java:48

com.cloudera.impala.catalog.HdfsFileFormat.AVRO
AVRO
Definition: HdfsFileFormat.java:30

com.cloudera.impala.catalog.HdfsTable.MIN_SYNTHETIC_BLOCK_SIZE
static final long MIN_SYNTHETIC_BLOCK_SIZE
Definition: HdfsTable.java:117

com.cloudera.impala.catalog.HdfsTable.getPartition
HdfsPartition getPartition(List< PartitionKeyValue > partitionSpec)
Definition: HdfsTable.java:491

com.cloudera.impala.catalog.HdfsTable.hostIndex_
final ListMap< TNetworkAddress > hostIndex_
Definition: HdfsTable.java:161

com.cloudera.impala.catalog.HdfsTable.getFileDescMap
Map< String, List< FileDescriptor > > getFileDescMap()
Definition: HdfsTable.java:265

com.cloudera.impala.catalog.HdfsTable.LOG
static final Logger LOG
Definition: HdfsTable.java:182

com.cloudera.impala.catalog.HdfsTable.multipleFileSystems_
boolean multipleFileSystems_
Definition: HdfsTable.java:174

com.cloudera.impala.catalog.HdfsTable.dropPartition
HdfsPartition dropPartition(List< TPartitionKeyValue > partitionSpec)
Definition: HdfsTable.java:942

com.cloudera.impala.catalog.HdfsTable.FsKey.equals
boolean equals(Object o)
Definition: HdfsTable.java:203

com.cloudera.impala.catalog.HdfsCompression.LZO_INDEX
Definition: HdfsCompression.java:23

com.cloudera.impala.catalog.HdfsTable.isMarkedCached_
boolean isMarkedCached_
Definition: HdfsTable.java:130

com.cloudera.impala.catalog.HdfsTable.createPartition
HdfsPartition createPartition(StorageDescriptor storageDescriptor, org.apache.hadoop.hive.metastore.api.Partition msPartition, Map< String, List< FileDescriptor >> oldFileDescMap, Map< FsKey, FileBlocksInfo > perFsFileBlocks)
Definition: HdfsTable.java:764

com.cloudera.impala.catalog.Table.numRows_
long numRows_
Definition: Table.java:72

com.cloudera.impala.catalog.HdfsTable.getTableStats
TResultSet getTableStats()
Definition: HdfsTable.java:1363

com.cloudera.impala.catalog.HdfsTable.partitions_
final List< HdfsPartition > partitions_
Definition: HdfsTable.java:134

com.cloudera.impala.catalog.HdfsTable.FileBlocksInfo
Definition: HdfsTable.java:221

com.cloudera.impala.catalog.HdfsTable.nullPartitionIds_
final ArrayList< HashSet< Long > > nullPartitionIds_
Definition: HdfsTable.java:143

com.cloudera.impala.catalog.HdfsTable.FsKey.FsKey
FsKey(FileSystem fs)
Definition: HdfsTable.java:197

com.cloudera.impala.catalog.HdfsTable.isAvroTable
boolean isAvroTable()
Definition: HdfsTable.java:1320

com.cloudera.impala.catalog.HdfsTable.getFirstLocationWithoutWriteAccess
String getFirstLocationWithoutWriteAccess()
Definition: HdfsTable.java:470

com.cloudera.impala.analysis.LiteralExpr
Definition: LiteralExpr.java:35

com.cloudera.impala.catalog.HdfsTable.toThrift
TTable toThrift()
Definition: HdfsTable.java:1280

com.cloudera.impala.catalog.HdfsTable
Definition: HdfsTable.java:104

com.cloudera.impala.analysis.NullLiteral
Definition: NullLiteral.java:23

com.cloudera.impala.catalog.HdfsTable.nullPartitionKeyValue_
String nullPartitionKeyValue_
Definition: HdfsTable.java:123

com.cloudera.impala.catalog.HdfsTable.numHdfsFiles_
long numHdfsFiles_
Definition: HdfsTable.java:168

com.cloudera.impala.catalog.HdfsTable.CONF
static final Configuration CONF
Definition: HdfsTable.java:188

com.cloudera.impala.catalog.HdfsTable.getHdfsBaseDir
String getHdfsBaseDir()
Definition: HdfsTable.java:1319

com.cloudera.impala.catalog.CatalogException
Definition: CatalogException.java:22

com.cloudera.impala.util.TAccessLevelUtil.impliesWriteAccess
static boolean impliesWriteAccess(TAccessLevel level)
Definition: TAccessLevelUtil.java:23

com.cloudera.impala.catalog.HdfsTable.getCatalogObjectType
TCatalogObjectType getCatalogObjectType()
Definition: HdfsTable.java:428

com.cloudera.impala.common.FileSystemUtil.isPathReachable
static Boolean isPathReachable(Path path, FileSystem fs, StringBuilder error_msg)
Definition: FileSystemUtil.java:314

com.cloudera.impala.catalog.HdfsTable.DEFAULT_NULL_COLUMN_VALUE
static final String DEFAULT_NULL_COLUMN_VALUE
Definition: HdfsTable.java:106

com.cloudera.impala.catalog.Table.numClusteringCols_
int numClusteringCols_
Definition: Table.java:69

com.cloudera.impala.catalog.Table.getMetaStoreTable
org.apache.hadoop.hive.metastore.api.Table getMetaStoreTable()
Definition: Table.java:398

com.cloudera.impala.catalog.Table.id_
final TableId id_
Definition: Table.java:60

com.cloudera.impala.catalog.HdfsTable.resetPartitionMd
void resetPartitionMd()
Definition: HdfsTable.java:602

com.cloudera.impala.catalog.HdfsTable.hdfsBaseDir_
String hdfsBaseDir_
Definition: HdfsTable.java:180

com.cloudera.impala.catalog.HdfsTable.populatePartitionMd
void populatePartitionMd()
Definition: HdfsTable.java:591

com.cloudera.impala.catalog.ScalarType.toThrift
void toThrift(TColumnType container)
Definition: ScalarType.java:164

com.cloudera.impala.catalog.HdfsTable.avroSchema_
String avroSchema_
Definition: HdfsTable.java:126

com.cloudera.impala.util.HdfsCachingUtil
Definition: HdfsCachingUtil.java:42

com.cloudera.impala.catalog.HdfsTable.loadPartitions
void loadPartitions(List< org.apache.hadoop.hive.metastore.api.Partition > msPartitions, org.apache.hadoop.hive.metastore.api.Table msTbl, Map< String, List< FileDescriptor >> oldFileDescMap)
Definition: HdfsTable.java:629

com.cloudera.impala.catalog.HdfsTable.FsKey.filesystem
FileSystem filesystem
Definition: HdfsTable.java:195

com.cloudera.impala.catalog.Db
Definition: Db.java:44

com.cloudera.impala.catalog.HdfsFileFormat.fromJavaClassName
static HdfsFileFormat fromJavaClassName(String className)
Definition: HdfsFileFormat.java:100

com.cloudera.impala.catalog.HdfsTable.addPerFsFileBlocks
void addPerFsFileBlocks(Map< FsKey, FileBlocksInfo > fsToBlocks, FileSystem fs, List< THdfsFileBlock > blocks, List< BlockLocation > locations)
Definition: HdfsTable.java:875

com.cloudera.impala.catalog.Table.Table
Table(TableId id, org.apache.hadoop.hive.metastore.api.Table msTable, Db db, String name, String owner)
Definition: Table.java:91

com.cloudera.impala.catalog.HdfsPartition.FileDescriptor.getFileName
String getFileName()
Definition: HdfsPartition.java:68

com.cloudera.impala.catalog.HdfsFileFormat
Definition: HdfsFileFormat.java:26

com.cloudera.impala.catalog.HdfsPartition.FileDescriptor.getFileBlocks
List< THdfsFileBlock > getFileBlocks()
Definition: HdfsPartition.java:76

com.cloudera.impala.catalog.HdfsTable.REMOTE_NETWORK_ADDRESS
static final TNetworkAddress REMOTE_NETWORK_ADDRESS
Definition: HdfsTable.java:112

com.cloudera.impala.catalog.HdfsTable.getHostIndex
ListMap< TNetworkAddress > getHostIndex()
Definition: HdfsTable.java:1328

com.cloudera.impala.catalog.HdfsTable.getFiles
TResultSet getFiles(List< TPartitionKeyValue > partitionSpec)
Definition: HdfsTable.java:1456

com.cloudera.impala.catalog.Table.accessLevel_
TAccessLevel accessLevel_
Definition: Table.java:66

com.cloudera.impala.catalog.HdfsCompression
Definition: HdfsCompression.java:16

com.cloudera.impala.catalog.HdfsTable.getNonPartitionFieldSchemas
List< FieldSchema > getNonPartitionFieldSchemas()
Definition: HdfsTable.java:454

com.cloudera.impala.catalog.HdfsTable.getColumnNamesWithHmsStats
List< String > getColumnNamesWithHmsStats()
Definition: HdfsTable.java:1233

com.cloudera.impala.util.AvroSchemaParser
Definition: AvroSchemaParser.java:44

impala::name
string name
Definition: cpu-info.cc:50

com.cloudera.impala.catalog.HdfsTable.createPartition
HdfsPartition createPartition(StorageDescriptor storageDescriptor, org.apache.hadoop.hive.metastore.api.Partition msPartition)
Definition: HdfsTable.java:735

com.cloudera.impala.catalog.Table.getNumClusteringCols
int getNumClusteringCols()
Definition: Table.java:402

com.cloudera.impala.catalog.HdfsTable.toThriftDescriptor
TTableDescriptor toThriftDescriptor(Set< Long > referencedPartitions)
Definition: HdfsTable.java:1269

com.cloudera.impala.catalog.TableLoadingException
Definition: TableLoadingException.java:21

com.cloudera.impala.catalog.HdfsTable.spansMultipleFileSystems
boolean spansMultipleFileSystems()
Definition: HdfsTable.java:267

com.cloudera.impala.catalog.HdfsTable.partitionIds_
final HashSet< Long > partitionIds_
Definition: HdfsTable.java:150

com.cloudera.impala.catalog.Table.fields_
List< FieldSchema > fields_
Definition: Table.java:65

com.cloudera.impala.catalog.HdfsTable.getPartitionMap
HashMap< Long, HdfsPartition > getPartitionMap()
Definition: HdfsTable.java:434

com.cloudera.impala.catalog.HdfsTable.FileBlocksInfo.locations
final List< BlockLocation > locations
Definition: HdfsTable.java:223

com.cloudera.impala.catalog.ColumnStats
Definition: ColumnStats.java:39

com.cloudera.impala.catalog.Table.getFullName
String getFullName()
Definition: Table.java:343

com.cloudera.impala.catalog.HdfsPartition.getAccessLevel
TAccessLevel getAccessLevel()
Definition: HdfsPartition.java:404

com.cloudera.impala.catalog.HdfsTable.getPartitionFromThriftPartitionSpec
HdfsPartition getPartitionFromThriftPartitionSpec(List< TPartitionKeyValue > partitionSpec)
Definition: HdfsTable.java:505

com.cloudera.impala.catalog.HdfsTable.hasPartitionMd_
boolean hasPartitionMd_
Definition: HdfsTable.java:153