15 package com.cloudera.impala.catalog;
17 import static com.cloudera.impala.thrift.ImpalaInternalServiceConstants.DEFAULT_PARTITION_ID;
19 import java.io.IOException;
20 import java.io.InputStream;
23 import java.util.ArrayList;
24 import java.util.Arrays;
25 import java.util.Collections;
26 import java.util.HashMap;
27 import java.util.HashSet;
28 import java.util.List;
31 import java.util.TreeMap;
33 import org.apache.commons.io.IOUtils;
34 import org.apache.hadoop.conf.Configuration;
35 import org.apache.hadoop.fs.BlockLocation;
36 import org.apache.hadoop.fs.BlockStorageLocation;
37 import org.apache.hadoop.fs.FileStatus;
38 import org.apache.hadoop.fs.FileSystem;
39 import org.apache.hadoop.fs.Path;
40 import org.apache.hadoop.fs.VolumeId;
41 import org.apache.hadoop.hbase.util.Bytes;
42 import org.apache.hadoop.hdfs.DFSConfigKeys;
43 import org.apache.hadoop.hdfs.DistributedFileSystem;
44 import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
45 import org.apache.hadoop.hive.metastore.api.FieldSchema;
46 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
47 import org.apache.hadoop.hive.serde.serdeConstants;
48 import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils;
49 import org.apache.hadoop.util.StringUtils;
50 import org.slf4j.Logger;
51 import org.slf4j.LoggerFactory;
63 import com.cloudera.impala.thrift.ImpalaInternalServiceConstants;
64 import com.cloudera.impala.thrift.TAccessLevel;
65 import com.cloudera.impala.thrift.TCatalogObjectType;
66 import com.cloudera.impala.thrift.TColumn;
67 import com.cloudera.impala.thrift.THdfsFileBlock;
68 import com.cloudera.impala.thrift.THdfsPartition;
69 import com.cloudera.impala.thrift.THdfsTable;
70 import com.cloudera.impala.thrift.TNetworkAddress;
71 import com.cloudera.impala.thrift.TPartitionKeyValue;
72 import com.cloudera.impala.thrift.TResultRow;
73 import com.cloudera.impala.thrift.TResultSet;
74 import com.cloudera.impala.thrift.TResultSetMetadata;
75 import com.cloudera.impala.thrift.TTable;
76 import com.cloudera.impala.thrift.TTableDescriptor;
77 import com.cloudera.impala.thrift.TTableType;
81 import com.cloudera.impala.util.ListMap;
85 import com.google.common.base.Joiner;
86 import com.google.common.base.Preconditions;
87 import com.google.common.collect.Lists;
88 import com.google.common.collect.Maps;
89 import com.google.common.collect.Sets;
113 new TNetworkAddress(
"remote*addr", 0);
139 Lists.newArrayList();
147 private final HashMap<Long, HdfsPartition>
partitionMap_ = Maps.newHashMap();
161 private final ListMap<TNetworkAddress>
hostIndex_ =
new ListMap<TNetworkAddress>();
165 private final Map<String, List<FileDescriptor>>
fileDescMap_ = Maps.newHashMap();
182 private final static Logger
LOG = LoggerFactory.getLogger(HdfsTable.class);
188 private static final Configuration
CONF =
new Configuration();
204 if (o ==
this)
return true;
205 if (o != null && o instanceof
FsKey) {
206 URI uri = filesystem.getUri();
207 URI otherUri = ((
FsKey)o).filesystem.getUri();
208 return uri.equals(otherUri);
222 final List<THdfsFileBlock>
blocks = Lists.newArrayList();
223 final List<BlockLocation>
locations = Lists.newArrayList();
225 public void addBlocks(List<THdfsFileBlock> b, List<BlockLocation> l) {
226 Preconditions.checkState(b.size() == l.size());
234 CONF.getBoolean(DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED,
235 DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED_DEFAULT);
247 if (hdfsVolumeId != null) {
251 String volumeIdString = hdfsVolumeId.toString();
254 byte[] volumeIdBytes = StringUtils.hexStringToByte(volumeIdString);
255 if (volumeIdBytes != null && volumeIdBytes.length == 4) {
256 diskId = Bytes.toInt(volumeIdBytes);
258 LOG.warn(
"wrong disk id format: " + volumeIdString);
276 HdfsFileFormat fileFormat, Map<FsKey, FileBlocksInfo> perFsFileBlocks) {
277 Preconditions.checkNotNull(fd);
278 Preconditions.checkNotNull(perFsFileBlocks);
279 Preconditions.checkArgument(!file.isDirectory());
280 LOG.debug(
"load block md for " +
name_ +
" file " + fd.getFileName());
287 BlockLocation[] locations = fs.getFileBlockLocations(file, 0, file.getLen());
288 Preconditions.checkNotNull(locations);
291 for (BlockLocation loc: locations) {
292 Preconditions.checkNotNull(loc);
294 String[] blockHostPorts = loc.getNames();
299 String[] blockHostNames = loc.getHosts();
300 Preconditions.checkState(blockHostNames.length == blockHostPorts.length);
302 Set<String> cachedHosts =
303 Sets.newHashSet(Arrays.asList(loc.getCachedHosts()));
304 Preconditions.checkState(cachedHosts.size() <= blockHostNames.length);
309 List<BlockReplica> replicas = Lists.newArrayListWithExpectedSize(
310 blockHostPorts.length);
311 for (
int i = 0; i < blockHostPorts.length; ++i) {
312 TNetworkAddress networkAddress = BlockReplica.parseLocation(blockHostPorts[i]);
313 Preconditions.checkState(networkAddress != null);
315 cachedHosts.contains(blockHostNames[i])));
317 fd.addFileBlock(
new FileBlock(loc.getOffset(), loc.getLength(), replicas));
322 Arrays.asList(locations));
323 }
catch (IOException e) {
324 throw new RuntimeException(
"couldn't determine block locations for path '" +
325 file.getPath() +
"':\n" + e.getMessage(), e);
338 long remaining = fd.getFileLength();
342 long blockSize = fs.getDefaultBlockSize();
345 blockSize = remaining;
347 while (remaining > 0) {
348 long len = Math.min(remaining, blockSize);
349 List<BlockReplica> replicas = Lists.newArrayList(
351 fd.addFileBlock(
new FileBlock(start, len, replicas));
363 private void loadDiskIds(Map<FsKey, FileBlocksInfo> perFsFileBlocks) {
367 for (
FsKey fsKey: perFsFileBlocks.keySet()) {
368 FileSystem fs = fsKey.filesystem;
371 if (!(fs instanceof DistributedFileSystem))
continue;
374 ". filesystem: " + fsKey);
375 DistributedFileSystem dfs = (DistributedFileSystem)fs;
377 Preconditions.checkNotNull(blockLists);
378 BlockStorageLocation[] storageLocs = null;
381 storageLocs = dfs.getFileBlockStorageLocations(blockLists.locations);
382 }
catch (IOException e) {
383 LOG.error(
"Couldn't determine block storage locations for filesystem " +
384 fs +
":\n" + e.getMessage());
387 if (storageLocs == null || storageLocs.length == 0) {
388 LOG.warn(
"Attempted to get block locations for filesystem " + fs +
389 " but the call returned no results");
392 if (storageLocs.length != blockLists.
locations.size()) {
394 LOG.error(
"Number of block storage locations not equal to number of blocks: "
395 +
"#storage locations=" + Long.toString(storageLocs.length)
396 +
" #blocks=" + Long.toString(blockLists.
locations.size()));
399 long unknownDiskIdCount = 0;
402 for (
int locIdx = 0; locIdx < storageLocs.length; ++locIdx) {
403 VolumeId[] volumeIds = storageLocs[locIdx].getVolumeIds();
404 THdfsFileBlock block = blockLists.blocks.get(locIdx);
408 int[] diskIds =
new int[volumeIds.length];
409 for (
int i = 0; i < volumeIds.length; ++i) {
411 if (diskIds[i] < 0) ++unknownDiskIdCount;
413 FileBlock.setDiskIds(diskIds, block);
415 if (unknownDiskIdCount > 0) {
416 LOG.warn(
"Unknown disk id count for filesystem " + fs +
":" + unknownDiskIdCount);
422 Db db, String
name, String owner) {
423 super(
id, msTbl, db, name, owner);
424 this.partitions_ = Lists.newArrayList();
438 return partitionValuesMap_.get(i);
451 public String
getLocation() {
return super.getMetaStoreTable().getSd().getLocation(); }
462 return TAccessLevelUtil.impliesWriteAccess(
accessLevel_);
480 return partition.getLocation();
492 List<TPartitionKeyValue> partitionKeyValues = Lists.newArrayList();
494 String value = PartitionKeyValue.getPartitionKeyValueString(
496 partitionKeyValues.add(
new TPartitionKeyValue(kv.getColName(), value));
506 List<TPartitionKeyValue> partitionSpec) {
509 List<String> targetValues = Lists.newArrayList();
510 Set<String> keys = Sets.newHashSet();
512 for (TPartitionKeyValue kv: partitionSpec) {
513 if (fs.getName().toLowerCase().equals(kv.getName().toLowerCase())) {
514 targetValues.add(kv.getValue().toLowerCase());
516 if (!keys.add(kv.getName().toLowerCase())) {
524 if (targetValues.size() == 0 ||
532 if (partition.getId() == ImpalaInternalServiceConstants.DEFAULT_PARTITION_ID) {
535 List<LiteralExpr> partitionValues = partition.getPartitionValues();
536 Preconditions.checkState(partitionValues.size() == targetValues.size());
537 boolean matchFound =
true;
538 for (
int i = 0; i < targetValues.size(); ++i) {
540 if (partitionValues.get(i) instanceof
NullLiteral) {
543 value = partitionValues.get(i).getStringValue();
544 Preconditions.checkNotNull(value);
550 if (!targetValues.get(i).equals(value.toLowerCase())) {
567 private void loadColumns(List<FieldSchema> fieldSchemas, HiveMetaStoreClient client)
570 for (FieldSchema s: fieldSchemas) {
575 String.format(
"Failed to load metadata for table '%s' because of " +
576 "unsupported partition-column type '%s' in partition column '%s'",
580 Column col =
new Column(s.getName(), type, s.getComment(), pos);
584 fields_ = fieldSchemas == null ?
new ArrayList<FieldSchema>() : fieldSchemas;
603 partitionIds_.clear();
604 partitionMap_.clear();
605 partitionValuesMap_.clear();
606 nullPartitionIds_.clear();
609 getColumns().get(i).getStats().setNumNulls(0);
610 getColumns().get(i).getStats().setNumDistinctValues(0);
611 partitionValuesMap_.add(Maps.<
LiteralExpr, HashSet<Long>>newTreeMap());
612 nullPartitionIds_.add(Sets.<Long>newHashSet());
630 List<org.apache.hadoop.hive.metastore.api.Partition> msPartitions,
631 org.apache.hadoop.hive.metastore.api.Table msTbl,
632 Map<String, List<FileDescriptor>> oldFileDescMap) throws IOException,
642 Map<FsKey, FileBlocksInfo> blocksToLoad = Maps.newHashMap();
651 isMarkedCached_ = HdfsCachingUtil.validateCacheParams(msTbl.getParameters());
653 if (msTbl.getPartitionKeysSize() == 0) {
654 Preconditions.checkArgument(msPartitions == null || msPartitions.isEmpty());
662 if (isMarkedCached_) part.markCached();
663 Path location =
new Path(hdfsBaseDir_);
664 FileSystem fs = location.getFileSystem(
CONF);
665 if (fs.exists(location)) {
669 for (org.apache.hadoop.hive.metastore.api.Partition msPartition: msPartitions) {
671 oldFileDescMap, blocksToLoad);
675 if (partition == null)
continue;
676 if (msPartition.getParameters() != null); {
677 partition.setNumRows(
getRowCount(msPartition.getParameters()));
702 while (location != null) {
703 if (fs.exists(location)) {
704 FsPermissionChecker.Permissions perms =
705 permissionChecker.getPermissions(fs, location);
706 if (perms.canReadAndWrite()) {
707 return TAccessLevel.READ_WRITE;
708 }
else if (perms.canRead()) {
709 return TAccessLevel.READ_ONLY;
710 }
else if (perms.canWrite()) {
711 return TAccessLevel.WRITE_ONLY;
713 return TAccessLevel.NONE;
715 location = location.getParent();
718 Preconditions.checkNotNull(location,
"Error: no path ancestor exists");
719 return TAccessLevel.NONE;
736 org.apache.hadoop.hive.metastore.api.Partition msPartition)
738 Map<FsKey, FileBlocksInfo> blocksToLoad = Maps.newHashMap();
742 return hdfsPartition;
765 org.apache.hadoop.hive.metastore.api.Partition msPartition,
766 Map<String, List<FileDescriptor>> oldFileDescMap,
767 Map<FsKey, FileBlocksInfo> perFsFileBlocks)
770 HdfsStorageDescriptor.fromStorageDescriptor(this.name_, storageDescriptor);
771 Path partDirPath =
new Path(storageDescriptor.getLocation());
772 List<FileDescriptor> fileDescriptors = Lists.newArrayList();
776 List<LiteralExpr> keyValues = Lists.newArrayList();
777 if (msPartition != null) {
778 isMarkedCached = HdfsCachingUtil.validateCacheParams(msPartition.getParameters());
780 for (String partitionKey: msPartition.getValues()) {
784 keyValues.add(NullLiteral.create(type));
787 keyValues.add(LiteralExpr.create(partitionKey, type));
788 }
catch (Exception ex) {
789 LOG.warn(
"Failed to create literal expression of type: " + type, ex);
796 Expr.analyze(keyValues, null);
799 throw new IllegalStateException(e);
804 FileSystem fs = partDirPath.getFileSystem(
CONF);
806 !FileSystemUtil.isPathOnFileSystem(
new Path(
getLocation()), fs);
807 if (fs.exists(partDirPath)) {
811 for (FileStatus fileStatus: fs.listStatus(partDirPath)) {
812 String fileName = fileStatus.getPath().
getName().toString();
813 if (fileStatus.isDirectory() || FileSystemUtil.isHiddenFile(fileName) ||
822 String partitionDir = fileStatus.getPath().getParent().toString();
826 if (oldFileDescMap != null && oldFileDescMap.get(partitionDir) != null) {
827 for (
FileDescriptor oldFileDesc: oldFileDescMap.get(partitionDir)) {
828 if (oldFileDesc.getFileName().equals(fileName)) {
838 if (fd == null || isMarkedCached || fd.getFileLength() != fileStatus.getLen()
839 || fd.getModificationTime() != fileStatus.getModificationTime()) {
844 fileStatus.getModificationTime());
849 List<FileDescriptor> fds = fileDescMap_.get(partitionDir);
851 fds = Lists.newArrayList();
852 fileDescMap_.put(partitionDir, fds);
857 fileDescriptors.add(fd);
862 fileFormatDescriptor, fileDescriptors,
864 partition.checkWellFormed();
866 }
catch (Exception e) {
876 List<THdfsFileBlock> blocks, List<BlockLocation> locations) {
881 fsToBlocks.put(fsKey, infos);
883 infos.addBlocks(blocks, locations);
894 partitions_.add(partition);
907 partitionIds_.add(partition.getId());
908 partitionMap_.put(partition.getId(), partition);
909 for (
int i = 0; i < partition.getPartitionValues().size(); ++i) {
911 LiteralExpr literal = partition.getPartitionValues().
get(i);
914 stats.setNumNulls(stats.getNumNulls() + 1);
916 stats.setNumDistinctValues(stats.getNumDistinctValues() + 1);
918 nullPartitionIds_.get(i).add(partition.
getId());
921 HashSet<Long> partitionIds = partitionValuesMap_.get(i).
get(literal);
922 if (partitionIds == null) {
923 partitionIds = Sets.newHashSet();
924 partitionValuesMap_.get(i).put(literal, partitionIds);
925 stats.setNumDistinctValues(stats.getNumDistinctValues() + 1);
927 partitionIds.add(partition.getId());
945 if (partition == null || !
partitions_.remove(partition))
return null;
947 Preconditions.checkArgument(partition.getPartitionValues().size() ==
949 Long partitionId = partition.getId();
951 partitionIds_.remove(partitionId);
952 partitionMap_.remove(partitionId);
953 for (
int i = 0; i < partition.getPartitionValues().size(); ++i) {
955 LiteralExpr literal = partition.getPartitionValues().
get(i);
958 nullPartitionIds_.get(i).
remove(partitionId);
959 stats.setNumNulls(stats.getNumNulls() - 1);
961 stats.setNumDistinctValues(stats.getNumDistinctValues() - 1);
965 HashSet<Long> partitionIds = partitionValuesMap_.get(i).
get(literal);
968 if (partitionIds.size() > 1) partitionIds.remove(partitionId);
970 partitionValuesMap_.get(i).
remove(literal);
971 stats.setNumDistinctValues(stats.getNumDistinctValues() - 1);
982 HdfsStorageDescriptor.fromStorageDescriptor(this.name_, storageDescriptor);
983 HdfsPartition partition = HdfsPartition.defaultPartition(
this, hdfsStorageDescriptor);
984 partitions_.add(partition);
1003 public void load(
Table cachedEntry, HiveMetaStoreClient client,
1007 LOG.debug(
"load table: " + db_.getName() +
"." +
name_);
1013 "hive.exec.default.partition.name",
"__HIVE_DEFAULT_PARTITION__");
1017 msTbl.getParameters().
get(serdeConstants.SERIALIZATION_NULL_FORMAT);
1021 List<FieldSchema> partKeys = msTbl.getPartitionKeys();
1022 List<FieldSchema> tblFields = Lists.newArrayList();
1023 String inputFormat = msTbl.getSd().getInputFormat();
1027 List<Map<String, String>> schemaSearchLocations = Lists.newArrayList();
1028 schemaSearchLocations.add(
1033 HdfsTable.getAvroSchema(schemaSearchLocations,
getFullName());
1034 String serdeLib = msTbl.getSd().getSerdeInfo().getSerializationLib();
1035 if (serdeLib == null ||
1036 serdeLib.equals(
"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) {
1041 tblFields.addAll(msTbl.getSd().getCols());
1047 List<FieldSchema> sdTypes = msTbl.getSd().getCols();
1049 List<Column> avroTypeList = AvroSchemaParser.parse(
avroSchema_);
1050 boolean canFallBack = sdTypes.size() == avroTypeList.size();
1051 for (
Column parsedCol: avroTypeList) {
1052 FieldSchema fs =
new FieldSchema();
1053 fs.setName(parsedCol.getName());
1054 String avroType = parsedCol.getType().toSql();
1055 if (avroType.toLowerCase().equals(
"string") && canFallBack) {
1056 fs.setType(sdTypes.get(i).
getType());
1058 fs.setType(avroType);
1060 fs.setComment(
"from deserializer");
1066 tblFields.addAll(msTbl.getSd().getCols());
1068 List<FieldSchema> fieldSchemas =
new ArrayList<FieldSchema>(
1069 partKeys.size() + tblFields.size());
1070 fieldSchemas.addAll(partKeys);
1071 fieldSchemas.addAll(tblFields);
1080 List<org.apache.hadoop.hive.metastore.api.Partition> msPartitions =
1081 Lists.newArrayList();
1082 if (cachedEntry == null || !(cachedEntry instanceof
HdfsTable) ||
1084 msPartitions.addAll(MetaStoreUtil.fetchAllPartitions(
1088 Preconditions.checkArgument(cachedEntry instanceof
HdfsTable);
1089 HdfsTable cachedHdfsTableEntry = (
HdfsTable) cachedEntry;
1092 Set<String> modifiedPartitionNames = Sets.newHashSet();
1097 if (cachedEntry !=
this) {
1102 modifiedPartitionNames.addAll(
1103 client.listPartitionNames(db_.getName(),
name_, (
short) -1));
1106 int totalPartitions = modifiedPartitionNames.size();
1108 for (
HdfsPartition cachedPart: cachedHdfsTableEntry.getPartitions()) {
1110 if (cachedPart.isDirty() || cachedPart.isDefaultPartition()) {
1114 org.apache.hadoop.hive.metastore.api.Partition cachedMsPart =
1115 cachedPart.toHmsPartition();
1116 if (cachedMsPart == null)
continue;
1120 String cachedPartName = cachedPart.getPartitionName();
1121 if (modifiedPartitionNames.contains(cachedPartName)) {
1122 msPartitions.add(cachedMsPart);
1123 modifiedPartitionNames.remove(cachedPartName);
1126 LOG.info(String.format(
"Incrementally refreshing %d/%d partitions.",
1127 modifiedPartitionNames.size(), totalPartitions));
1130 if (modifiedPartitionNames.size() > 0) {
1132 msPartitions.addAll(MetaStoreUtil.fetchPartitionsByName(client,
1137 Map<String, List<FileDescriptor>> oldFileDescMap = null;
1138 if (cachedEntry != null && cachedEntry instanceof HdfsTable) {
1139 HdfsTable cachedHdfsTable = (
HdfsTable) cachedEntry;
1140 oldFileDescMap = cachedHdfsTable.fileDescMap_;
1141 hostIndex_.populate(cachedHdfsTable.hostIndex_.getList());
1147 LOG.debug(
"table #rows=" + Long.toString(
numRows_));
1151 if (numClusteringCols_ == 0 && !
partitions_.isEmpty()) {
1154 Preconditions.checkState(partitions_.size() == 2 ||
partitions_.size() == 1);
1159 }
catch (TableLoadingException e) {
1161 }
catch (Exception e) {
1162 throw new TableLoadingException(
1163 "Failed to load metadata for table: " +
getFullName(), e);
1177 public static String
getAvroSchema(List<Map<String, String>> schemaSearchLocations,
1181 for (Map<String, String> schemaLocation: schemaSearchLocations) {
1182 if (schemaLocation == null)
continue;
1184 String literal = schemaLocation.get(AvroSerdeUtils.SCHEMA_LITERAL);
1185 if (literal != null && !literal.equals(AvroSerdeUtils.SCHEMA_NONE))
return literal;
1187 url = schemaLocation.get(AvroSerdeUtils.SCHEMA_URL);
1193 if (url == null || url.equals(AvroSerdeUtils.SCHEMA_NONE)) {
1195 "SERDEPROPERTIES or TBLPROPERTIES for table: %s ", tableName));
1197 String schema = null;
1198 if (url.toLowerCase().startsWith(
"http://")) {
1199 InputStream urlStream = null;
1201 urlStream =
new URL(url).openStream();
1202 schema = IOUtils.toString(urlStream);
1203 }
catch (IOException e) {
1206 IOUtils.closeQuietly(urlStream);
1209 Path
path =
new Path(url);
1210 FileSystem fs = null;
1212 fs = path.getFileSystem(FileSystemUtil.getConfiguration());
1213 }
catch (Exception e) {
1215 "Invalid avro.schema.url: %s. %s", path, e.getMessage()));
1217 StringBuilder errorMsg =
new StringBuilder();
1220 "Invalid avro.schema.url: %s. %s", path, errorMsg));
1223 schema = FileSystemUtil.readFile(
path);
1224 }
catch (IOException e) {
1226 "Problem reading Avro schema at: " + url, e);
1234 List<String> ret = Lists.newArrayList();
1237 ret.add(column.getName().toLowerCase());
1244 super.loadFromThrift(thriftTable);
1245 THdfsTable hdfsTable = thriftTable.getHdfs_table();
1250 hostIndex_.populate(hdfsTable.getNetwork_addresses());
1255 for (Map.Entry<Long, THdfsPartition> part: hdfsTable.getPartitions().entrySet()) {
1257 HdfsPartition.fromThrift(
this, part.getKey(), part.getValue());
1260 partitions_.add(hdfsPart);
1262 avroSchema_ = hdfsTable.isSetAvroSchema() ? hdfsTable.getAvroSchema() : null;
1272 TTableDescriptor tableDesc =
new TTableDescriptor(
id_.asInt(), TTableType.HDFS_TABLE,
1274 tableDesc.setHdfsTable(
getTHdfsTable(
false, referencedPartitions));
1282 TTable table = super.toThrift();
1283 table.setTable_type(TTableType.HDFS_TABLE);
1295 private THdfsTable
getTHdfsTable(
boolean includeFileDesc, Set<Long> refPartitions) {
1297 Preconditions.checkState(!includeFileDesc || refPartitions == null);
1298 Map<Long, THdfsPartition> idToPartition = Maps.newHashMap();
1300 long id = partition.getId();
1301 if (refPartitions == null || refPartitions.contains(
id)) {
1302 idToPartition.put(id, partition.toThrift(includeFileDesc));
1309 if (includeFileDesc) {
1312 hdfsTable.setNetwork_addresses(hostIndex_.getList());
1334 Map<HdfsFileFormat, Integer> numPartitionsByFormat = Maps.newHashMap();
1336 HdfsFileFormat format = partition.getInputFormatDescriptor().getFileFormat();
1337 Integer numPartitions = numPartitionsByFormat.get(format);
1338 if (numPartitions == null) {
1339 numPartitions = Integer.valueOf(1);
1341 numPartitions = Integer.valueOf(numPartitions.intValue() + 1);
1343 numPartitionsByFormat.put(format, numPartitions);
1346 int maxNumPartitions = Integer.MIN_VALUE;
1348 for (Map.Entry<
HdfsFileFormat, Integer> entry: numPartitionsByFormat.entrySet()) {
1349 if (entry.getValue().intValue() > maxNumPartitions) {
1350 majorityFormat = entry.getKey();
1351 maxNumPartitions = entry.getValue().intValue();
1354 Preconditions.checkNotNull(majorityFormat);
1355 return majorityFormat;
1364 TResultSet result =
new TResultSet();
1365 TResultSetMetadata resultSchema =
new TResultSetMetadata();
1366 result.setSchema(resultSchema);
1371 TColumn colDesc =
new TColumn(partCol.
getName(), Type.STRING.toThrift());
1372 resultSchema.addToColumns(colDesc);
1385 ArrayList<HdfsPartition> orderedPartitions = Lists.newArrayList(
partitions_);
1386 Collections.sort(orderedPartitions);
1388 long totalCachedBytes = 0L;
1391 if (p.getId() == ImpalaInternalServiceConstants.DEFAULT_PARTITION_ID)
continue;
1396 rowBuilder.add(expr.getStringValue());
1400 rowBuilder.add(p.getNumRows()).add(p.getFileDescriptors().size())
1401 .addBytes(p.getSize());
1402 if (!p.isMarkedCached()) {
1405 rowBuilder.add(
"NOT CACHED");
1406 rowBuilder.add(
"NOT CACHED");
1409 long cachedBytes = 0L;
1411 for (THdfsFileBlock fb: fd.getFileBlocks()) {
1412 if (fb.getIs_replica_cached().contains(
true)) {
1413 cachedBytes += fb.getLength();
1417 totalCachedBytes += cachedBytes;
1418 rowBuilder.addBytes(cachedBytes);
1422 Short rep = HdfsCachingUtil.getCachedCacheReplication(
1423 numClusteringCols_ == 0 ?
1426 rowBuilder.add(rep.toString());
1428 rowBuilder.add(p.getInputFormatDescriptor().getFileFormat().toString());
1430 rowBuilder.add(String.valueOf(p.hasIncrementalStats()));
1431 rowBuilder.add(p.getLocation());
1432 result.addToRows(rowBuilder.get());
1436 if (numClusteringCols_ > 0) {
1438 int numEmptyCells = numClusteringCols_ - 1;
1439 rowBuilder.add(
"Total");
1440 for (
int i = 0; i < numEmptyCells; ++i) {
1446 .addBytes(totalCachedBytes).add(
"").add(
"").add(
"").add(
"");
1447 result.addToRows(rowBuilder.get());
1457 TResultSet result =
new TResultSet();
1458 TResultSetMetadata resultSchema =
new TResultSetMetadata();
1459 result.setSchema(resultSchema);
1463 result.setRows(Lists.<TResultRow>newArrayList());
1465 List<HdfsPartition> partitions = null;
1466 if (partitionSpec == null) {
1471 Preconditions.checkState(partition != null);
1472 partitions = Lists.newArrayList(partition);
1478 rowBuilder.add(p.getLocation() +
"/" + fd.getFileName());
1479 rowBuilder.add(PrintUtils.printBytes(fd.getFileLength()));
1480 rowBuilder.add(p.getPartitionName());
1481 result.addToRows(rowBuilder.get());
void loadDiskIds(Map< FsKey, FileBlocksInfo > perFsFileBlocks)
static HdfsCompression fromFileName(String fileName)
List< String > getColumnNames()
string path("/usr/lib/sasl2:/usr/lib64/sasl2:/usr/local/lib/sasl2:/usr/lib/x86_64-linux-gnu/sasl2")
void addDefaultPartition(StorageDescriptor storageDescriptor)
static boolean hasGetFileBlockLocations(FileSystem fs)
static long getRowCount(Map< String, String > parameters)
List< FieldSchema > getFieldSchemas()
static final ScalarType BIGINT
void loadBlockMetadata(FileSystem fs, FileStatus file, FileDescriptor fd, HdfsFileFormat fileFormat, Map< FsKey, FileBlocksInfo > perFsFileBlocks)
HdfsFileFormat getFileFormat()
void addColumn(Column col)
static final int NUM_PARTITION_FETCH_RETRIES
static boolean hasLoggedDiskIdFormatWarning_
final Map< String, List< FileDescriptor > > fileDescMap_
void addPartition(HdfsPartition partition)
static final ScalarType STRING
void load(Table cachedEntry, HiveMetaStoreClient client, org.apache.hadoop.hive.metastore.api.Table msTbl)
HashSet< Long > getPartitionIds()
final ArrayList< TreeMap< LiteralExpr, HashSet< Long > > > partitionValuesMap_
HdfsFileFormat getMajorityFormat()
String getNullPartitionKeyValue()
TreeMap< LiteralExpr, HashSet< Long > > getPartitionValueMap(int i)
void synthesizeBlockMetadata(FileSystem fs, FileDescriptor fd, HdfsFileFormat fileFormat)
void loadFromThrift(TTable thriftTable)
final List< THdfsFileBlock > blocks
void loadColumns(List< FieldSchema > fieldSchemas, HiveMetaStoreClient client)
HdfsTable(TableId id, org.apache.hadoop.hive.metastore.api.Table msTbl, Db db, String name, String owner)
String getNullColumnValue()
boolean supportsTablePartitioning()
Type parseColumnType(FieldSchema fs)
List< HdfsPartition > getPartitions()
THdfsTable getTHdfsTable(boolean includeFileDesc, Set< Long > refPartitions)
ArrayList< Column > getColumns()
TAccessLevel getAvailableAccessLevel(FileSystem fs, Path location)
static String getAvroSchema(List< Map< String, String >> schemaSearchLocations, String tableName)
void updatePartitionMdAndColStats(HdfsPartition partition)
void addBlocks(List< THdfsFileBlock > b, List< BlockLocation > l)
void loadAllColumnStats(HiveMetaStoreClient client)
final HashMap< Long, HdfsPartition > partitionMap_
HashSet< Long > getNullPartitionIds(int i)
static int getDiskId(VolumeId hdfsVolumeId)
static final boolean SUPPORTS_VOLUME_ID
List< LiteralExpr > getPartitionValues()
static final long MIN_SYNTHETIC_BLOCK_SIZE
HdfsPartition getPartition(List< PartitionKeyValue > partitionSpec)
final ListMap< TNetworkAddress > hostIndex_
Map< String, List< FileDescriptor > > getFileDescMap()
boolean multipleFileSystems_
HdfsPartition dropPartition(List< TPartitionKeyValue > partitionSpec)
HdfsPartition createPartition(StorageDescriptor storageDescriptor, org.apache.hadoop.hive.metastore.api.Partition msPartition, Map< String, List< FileDescriptor >> oldFileDescMap, Map< FsKey, FileBlocksInfo > perFsFileBlocks)
TResultSet getTableStats()
final List< HdfsPartition > partitions_
final ArrayList< HashSet< Long > > nullPartitionIds_
String getFirstLocationWithoutWriteAccess()
String nullPartitionKeyValue_
static final Configuration CONF
static boolean impliesWriteAccess(TAccessLevel level)
TCatalogObjectType getCatalogObjectType()
static Boolean isPathReachable(Path path, FileSystem fs, StringBuilder error_msg)
static final String DEFAULT_NULL_COLUMN_VALUE
org.apache.hadoop.hive.metastore.api.Table getMetaStoreTable()
void populatePartitionMd()
void toThrift(TColumnType container)
void loadPartitions(List< org.apache.hadoop.hive.metastore.api.Partition > msPartitions, org.apache.hadoop.hive.metastore.api.Table msTbl, Map< String, List< FileDescriptor >> oldFileDescMap)
void addPerFsFileBlocks(Map< FsKey, FileBlocksInfo > fsToBlocks, FileSystem fs, List< THdfsFileBlock > blocks, List< BlockLocation > locations)
Table(TableId id, org.apache.hadoop.hive.metastore.api.Table msTable, Db db, String name, String owner)
List< THdfsFileBlock > getFileBlocks()
static final TNetworkAddress REMOTE_NETWORK_ADDRESS
ListMap< TNetworkAddress > getHostIndex()
TResultSet getFiles(List< TPartitionKeyValue > partitionSpec)
TAccessLevel accessLevel_
List< FieldSchema > getNonPartitionFieldSchemas()
List< String > getColumnNamesWithHmsStats()
HdfsPartition createPartition(StorageDescriptor storageDescriptor, org.apache.hadoop.hive.metastore.api.Partition msPartition)
int getNumClusteringCols()
TTableDescriptor toThriftDescriptor(Set< Long > referencedPartitions)
boolean spansMultipleFileSystems()
final HashSet< Long > partitionIds_
List< FieldSchema > fields_
HashMap< Long, HdfsPartition > getPartitionMap()
final List< BlockLocation > locations
TAccessLevel getAccessLevel()
HdfsPartition getPartitionFromThriftPartitionSpec(List< TPartitionKeyValue > partitionSpec)