15 package com.cloudera.impala.catalog;
17 import java.util.ArrayList;
18 import java.util.Arrays;
19 import java.util.List;
21 import java.util.concurrent.atomic.AtomicLong;
23 import org.apache.commons.lang.ArrayUtils;
24 import org.slf4j.Logger;
25 import org.slf4j.LoggerFactory;
33 import com.cloudera.impala.thrift.ImpalaInternalServiceConstants;
34 import com.cloudera.impala.thrift.TAccessLevel;
35 import com.cloudera.impala.thrift.TExpr;
36 import com.cloudera.impala.thrift.TExprNode;
37 import com.cloudera.impala.thrift.THdfsCompression;
38 import com.cloudera.impala.thrift.THdfsFileBlock;
39 import com.cloudera.impala.thrift.THdfsFileDesc;
40 import com.cloudera.impala.thrift.THdfsPartition;
41 import com.cloudera.impala.thrift.TNetworkAddress;
42 import com.cloudera.impala.thrift.TPartitionStats;
43 import com.cloudera.impala.thrift.TTableStats;
45 import com.google.common.base.Joiner;
46 import com.google.common.base.Objects;
47 import com.google.common.base.Preconditions;
48 import com.google.common.collect.ImmutableList;
49 import com.google.common.collect.ImmutableMap;
50 import com.google.common.collect.Lists;
51 import com.google.common.collect.Maps;
52 import com.google.common.annotations.VisibleForTesting;
68 public String
getFileName() {
return fileDescriptor_.getFile_name(); }
71 return fileDescriptor_.getCompression();
74 return fileDescriptor_.getLast_modification_time();
77 return fileDescriptor_.getFile_blocks();
82 public FileDescriptor(String fileName,
long fileLength,
long modificationTime) {
83 Preconditions.checkNotNull(fileName);
84 Preconditions.checkArgument(fileLength >= 0);
86 fileDescriptor_.setFile_name(fileName);
87 fileDescriptor_.setLength(fileLength);
88 fileDescriptor_.setLast_modification_time(modificationTime);
89 fileDescriptor_.setCompression(
90 HdfsCompression.fromFileName(fileName).
toThrift());
91 List<THdfsFileBlock> emptyFileBlockList = Lists.newArrayList();
92 fileDescriptor_.setFile_blocks(emptyFileBlockList);
96 this(fileDesc.getFile_name(), fileDesc.length, fileDesc.last_modification_time);
97 for (THdfsFileBlock block: fileDesc.getFile_blocks()) {
98 fileDescriptor_.addToFile_blocks(block);
103 fileDescriptor_.addToFile_blocks(blockMd.toThrift());
112 return Objects.toStringHelper(
this)
140 Preconditions.checkNotNull(location);
141 String[] ip_port = location.split(
":");
142 if (ip_port.length != 2)
return null;
144 return new TNetworkAddress(ip_port[0], Integer.parseInt(ip_port[1]));
145 }
catch (NumberFormatException e) {
164 for (
boolean isCached: fileBlock.getIs_replica_cached()) {
175 List<BlockReplica> replicaHostIdxs) {
176 Preconditions.checkNotNull(replicaHostIdxs);
178 fileBlock_.setOffset(
offset);
179 fileBlock_.setLength(blockLength);
181 fileBlock_.setReplica_host_idxs(
new ArrayList<Integer>(replicaHostIdxs.size()));
182 fileBlock_.setIs_replica_cached(
new ArrayList<Boolean>(replicaHostIdxs.size()));
185 fileBlock_.addToReplica_host_idxs(replica.getHostIdx());
186 fileBlock_.addToIs_replica_cached(replica.isCached());
187 isCached_ |= replica.isCached();
191 public long getOffset() {
return fileBlock_.getOffset(); }
192 public long getLength() {
return fileBlock_.getLength(); }
196 return fileBlock_.getReplica_host_idxs();
204 public static void setDiskIds(
int[] diskIds, THdfsFileBlock fileBlock) {
205 Preconditions.checkArgument(
206 diskIds.length == fileBlock.getReplica_host_idxs().size());
207 fileBlock.setDisk_ids(Arrays.asList(ArrayUtils.toObject(diskIds)));
216 return fileBlock_.getDisk_ids().
get(hostIndex);
220 return fileBlock_.getIs_replica_cached().
get(hostIndex);
231 return Objects.toStringHelper(
this)
233 .add(
"length", fileBlock_.length)
259 private final static Logger
LOG = LoggerFactory.getLogger(HdfsPartition.class);
275 return id_ == ImpalaInternalServiceConstants.DEFAULT_PARTITION_ID;
288 List<String> partitionCols = Lists.newArrayList();
289 List<String> partitionValues = Lists.newArrayList();
290 for (
int i = 0; i <
getTable().getNumClusteringCols(); ++i) {
291 partitionCols.add(
getTable().getColumns().
get(i).getName());
294 return org.apache.hadoop.hive.common.FileUtils.makePartName(
304 List<String> ret = Lists.newArrayList();
306 if (mapNullsToHiveKey) {
307 ret.add(PartitionKeyValue.getPartitionKeyValueString(
308 partValue,
getTable().getNullPartitionKeyValue()));
310 ret.add(partValue.getStringValue());
323 List<String> partitionCols = Lists.newArrayList();
324 List<String> partitionValues = Lists.newArrayList();
325 for (
int i = 0; i <
getTable().getNumClusteringCols(); ++i) {
326 partitionCols.add(ToSqlUtils.getIdentSql(
getTable().getColumns().
get(i).getName()));
329 List<String> conjuncts = Lists.newArrayList();
330 for (
int i = 0; i < partitionCols.size(); ++i) {
332 String sql = expr.toSql();
333 if (expr instanceof
NullLiteral || sql.isEmpty()) {
334 conjuncts.add(ToSqlUtils.getIdentSql(partitionCols.get(i))
337 conjuncts.add(ToSqlUtils.getIdentSql(partitionCols.get(i))
341 return "(" + Joiner.on(
" AND " ).join(conjuncts) +
")";
348 StringBuilder partDescription =
new StringBuilder();
349 for (
int i = 0; i <
getTable().getNumClusteringCols(); ++i) {
350 String columnName =
getTable().getColumns().
get(i).getName();
351 String value = PartitionKeyValue.getPartitionKeyValueString(
353 getTable().getNullPartitionKeyValue());
354 partDescription.append(columnName +
"=" + value);
355 if (i !=
getTable().getNumClusteringCols() - 1) partDescription.append(
"/");
357 return partDescription.toString();
373 fileFormatDescriptor_.setFileFormat(fileFormat);
379 return cachedMsPartitionDescriptor_.sdSerdeInfo;
386 return PartitionStatsUtil.partStatsFromParameters(
hmsParameters_);
388 LOG.warn(
"Could not deserialise incremental stats state for " +
getPartitionName() +
389 ", consider DROP INCREMENTAL STATS ... PARTITION ... and recomputing " +
390 "incremental stats for this table.");
397 return partStats != null && partStats.intermediate_col_stats != null;
410 return hmsParameters_.get(key);
448 public final org.apache.hadoop.hive.metastore.api.SerDeInfo
sdSerdeInfo;
450 public final List<org.apache.hadoop.hive.metastore.api.Order>
sdSortCols;
456 org.apache.hadoop.hive.metastore.api.Partition msPartition) {
457 org.apache.hadoop.hive.metastore.api.StorageDescriptor sd = null;
458 if (msPartition != null) {
459 sd = msPartition.getSd();
470 sdBucketCols = ImmutableList.copyOf(sd.getBucketCols());
471 sdSortCols = ImmutableList.copyOf(sd.getSortCols());
493 Preconditions.checkNotNull(table_.getFieldSchemas());
494 org.apache.hadoop.hive.metastore.api.StorageDescriptor storageDescriptor =
495 new org.apache.hadoop.hive.metastore.api.StorageDescriptor(
496 table_.getNonPartitionFieldSchemas(),
location_,
505 org.apache.hadoop.hive.metastore.api.Partition partition =
506 new org.apache.hadoop.hive.metastore.api.Partition(
515 org.apache.hadoop.hive.metastore.api.Partition msPartition,
516 List<LiteralExpr> partitionKeyValues,
518 List<HdfsPartition.FileDescriptor> fileDescriptors,
long id,
519 String location, TAccessLevel accessLevel) {
521 if (msPartition == null) {
532 if (msPartition != null && msPartition.getParameters() != null) {
534 msPartition.getParameters()) != null;
544 StringBuilder errorMsg =
new StringBuilder();
546 fileDescriptor.getFileName(), errorMsg)) {
547 throw new RuntimeException(errorMsg.toString());
553 org.apache.hadoop.hive.metastore.api.Partition msPartition,
554 List<LiteralExpr> partitionKeyValues,
556 List<HdfsPartition.FileDescriptor> fileDescriptors, TAccessLevel accessLevel) {
557 this(table, msPartition, partitionKeyValues, fileFormatDescriptor, fileDescriptors,
558 partitionIdCounter_.getAndIncrement(), msPartition != null ?
559 msPartition.getSd().getLocation() : table.
getLocation(), accessLevel);
564 List<LiteralExpr> emptyExprList = Lists.newArrayList();
565 List<FileDescriptor> emptyFileDescriptorList = Lists.newArrayList();
567 storageDescriptor, emptyFileDescriptorList,
568 ImpalaInternalServiceConstants.DEFAULT_PARTITION_ID, null,
569 TAccessLevel.READ_WRITE);
578 result += fileDescriptor.getFileLength();
585 return Objects.toStringHelper(
this)
591 long id, THdfsPartition thriftPartition) {
593 HdfsFileFormat.fromThrift(thriftPartition.getFileFormat()),
594 thriftPartition.lineDelim,
595 thriftPartition.fieldDelim,
596 thriftPartition.collectionDelim,
597 thriftPartition.mapKeyDelim,
598 thriftPartition.escapeChar,
600 thriftPartition.blockSize);
602 List<LiteralExpr> literalExpr = Lists.newArrayList();
603 if (
id != ImpalaInternalServiceConstants.DEFAULT_PARTITION_ID) {
604 List<Column> clusterCols = Lists.newArrayList();
605 for (
int i = 0; i < table.getNumClusteringCols(); ++i) {
606 clusterCols.add(table.getColumns().
get(i));
609 List<TExprNode> exprNodes = Lists.newArrayList();
610 for (TExpr expr: thriftPartition.getPartitionKeyExprs()) {
611 for (TExprNode node: expr.getNodes()) {
615 Preconditions.checkState(clusterCols.size() == exprNodes.size(),
616 String.format(
"Number of partition columns (%d) does not match number " +
617 "of partition key expressions (%d)",
618 clusterCols.size(), exprNodes.size()));
620 for (
int i = 0; i < exprNodes.size(); ++i) {
621 literalExpr.add(LiteralExpr.fromThrift(
622 exprNodes.get(i), clusterCols.get(i).getType()));
626 List<HdfsPartition.FileDescriptor> fileDescriptors = Lists.newArrayList();
627 if (thriftPartition.isSetFile_desc()) {
628 for (THdfsFileDesc desc: thriftPartition.getFile_desc()) {
629 fileDescriptors.add(HdfsPartition.FileDescriptor.fromThrift(desc));
633 TAccessLevel accessLevel = thriftPartition.isSetAccess_level() ?
634 thriftPartition.getAccess_level() : TAccessLevel.READ_WRITE;
636 fileDescriptors,
id, thriftPartition.getLocation(), accessLevel);
637 if (thriftPartition.isSetStats()) {
638 partition.setNumRows(thriftPartition.getStats().getNum_rows());
640 if (thriftPartition.isSetIs_marked_cached()) {
641 partition.isMarkedCached_ = thriftPartition.isIs_marked_cached();
644 if (thriftPartition.isSetHms_parameters()) {
645 partition.hmsParameters_ = thriftPartition.getHms_parameters();
647 partition.hmsParameters_ = Maps.newHashMap();
662 }
catch (Exception e) {
664 ") has invalid partition column values: ", e);
668 public THdfsPartition
toThrift(
boolean includeFileDesc) {
671 THdfsPartition thriftHdfsPart =
new THdfsPartition(
673 fileFormatDescriptor_.getFieldDelim(),
675 fileFormatDescriptor_.getMapKeyDelim(),
677 fileFormatDescriptor_.getFileFormat().
toThrift(), thriftExprs,
678 fileFormatDescriptor_.getBlockSize());
680 thriftHdfsPart.setStats(
new TTableStats(
numRows_));
683 thriftHdfsPart.setId(
getId());
685 if (includeFileDesc) {
688 thriftHdfsPart.addToFile_desc(fd.toThrift());
692 return thriftHdfsPart;
705 List<LiteralExpr> rhs) {
706 int sizeDiff = lhs.size() - rhs.size();
707 if (sizeDiff != 0)
return sizeDiff;
708 for(
int i = 0; i < lhs.size(); ++i) {
709 int cmp = lhs.get(i).
compareTo(rhs.get(i));
710 if (cmp != 0)
return cmp;
TPartitionStats getPartitionStats()
boolean hasIncrementalStats()
void putToParameters(String k, String v)
final List< LiteralExpr > partitionKeyValues_
FileDescriptor(THdfsFileDesc fileDesc)
List< HdfsPartition.FileDescriptor > getFileDescriptors()
FileDescriptor(String fileName, long fileLength, long modificationTime)
THdfsCompression getFileCompression()
List< String > getPartitionValuesAsStrings(boolean mapNullsToHiveKey)
Map< String, String > hmsParameters_
static void setDiskIds(int[] diskIds, THdfsFileBlock fileBlock)
HdfsFileFormat getFileFormat()
long getModificationTime()
final String sdOutputFormat
final List< String > sdBucketCols
int getDiskId(int hostIndex)
final HdfsStorageDescriptor fileFormatDescriptor_
Map< String, String > getParameters()
org.apache.hadoop.hive.metastore.api.SerDeInfo getSerdeInfo()
THdfsFileBlock toThrift()
HdfsPartition(HdfsTable table, org.apache.hadoop.hive.metastore.api.Partition msPartition, List< LiteralExpr > partitionKeyValues, HdfsStorageDescriptor fileFormatDescriptor, List< HdfsPartition.FileDescriptor > fileDescriptors, TAccessLevel accessLevel)
static AtomicLong partitionIdCounter_
final THdfsFileBlock fileBlock_
final List< org.apache.hadoop.hive.metastore.api.Order > sdSortCols
org.apache.hadoop.hive.metastore.api.Partition toHmsPartition()
String getPartitionName()
final org.apache.hadoop.hive.metastore.api.SerDeInfo sdSerdeInfo
List< Integer > getReplicaHostIdxs()
static FileBlock fromThrift(THdfsFileBlock thriftFileBlock)
BlockReplica(int hostIdx, boolean isCached)
static TNetworkAddress parseLocation(String location)
byte getCollectionDelim()
boolean isCached(int hostIndex)
CachedHmsPartitionDescriptor(org.apache.hadoop.hive.metastore.api.Partition msPartition)
boolean hasFileDescriptors()
HdfsPartition(HdfsTable table, org.apache.hadoop.hive.metastore.api.Partition msPartition, List< LiteralExpr > partitionKeyValues, HdfsStorageDescriptor fileFormatDescriptor, List< HdfsPartition.FileDescriptor > fileDescriptors, long id, String location, TAccessLevel accessLevel)
List< LiteralExpr > getPartitionValues()
FileBlock(THdfsFileBlock fileBlock)
THdfsPartition toThrift(boolean includeFileDesc)
HdfsStorageDescriptor getInputFormatDescriptor()
final TAccessLevel accessLevel_
static int comparePartitionKeyValues(List< LiteralExpr > lhs, List< LiteralExpr > rhs)
static HdfsPartition defaultPartition(HdfsTable table, HdfsStorageDescriptor storageDescriptor)
final boolean sdCompressed
void addFileBlock(FileBlock blockMd)
final int msLastAccessTime
final THdfsFileDesc fileDescriptor_
String getValuesAsString()
final Map< String, String > sdParameters
uint8_t offset[7 *64-sizeof(uint64_t)]
FileBlock(long offset, long blockLength, List< BlockReplica > replicaHostIdxs)
List< THdfsFileBlock > getFileBlocks()
void setFileFormat(HdfsFileFormat fileFormat)
boolean isDefaultPartition()
final CachedHmsPartitionDescriptor cachedMsPartitionDescriptor_
void setNumRows(long numRows)
final List< FileDescriptor > fileDescriptors_
static HdfsPartition fromThrift(HdfsTable table, long id, THdfsPartition thriftPartition)
String getParameter(String key)
int compareTo(HdfsPartition o)
void setLocation(String location)
TAccessLevel getAccessLevel()
static FileDescriptor fromThrift(THdfsFileDesc desc)