Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
com.cloudera.impala.catalog.HdfsTable Class Reference
Inheritance diagram for com.cloudera.impala.catalog.HdfsTable:
Collaboration diagram for com.cloudera.impala.catalog.HdfsTable:

Classes

class  FileBlocksInfo
 
class  FsKey
 

Public Member Functions

Map< String, List
< FileDescriptor > > 
getFileDescMap ()
 
boolean spansMultipleFileSystems ()
 
 HdfsTable (TableId id, org.apache.hadoop.hive.metastore.api.Table msTbl, Db db, String name, String owner)
 
TCatalogObjectType getCatalogObjectType ()
 
List< HdfsPartitiongetPartitions ()
 
boolean isMarkedCached ()
 
HashMap< Long, HdfsPartitiongetPartitionMap ()
 
HashSet< Long > getNullPartitionIds (int i)
 
HashSet< Long > getPartitionIds ()
 
TreeMap< LiteralExpr, HashSet
< Long > > 
getPartitionValueMap (int i)
 
String getNullPartitionKeyValue ()
 
String getNullColumnValue ()
 
String getLocation ()
 
List< FieldSchema > getFieldSchemas ()
 
List< FieldSchema > getNonPartitionFieldSchemas ()
 
boolean hasWriteAccess ()
 
String getFirstLocationWithoutWriteAccess ()
 
HdfsPartition getPartition (List< PartitionKeyValue > partitionSpec)
 
HdfsPartition getPartitionFromThriftPartitionSpec (List< TPartitionKeyValue > partitionSpec)
 
HdfsPartition createPartition (StorageDescriptor storageDescriptor, org.apache.hadoop.hive.metastore.api.Partition msPartition) throws CatalogException
 
void addPartition (HdfsPartition partition)
 
HdfsPartition dropPartition (List< TPartitionKeyValue > partitionSpec)
 
void load (Table cachedEntry, HiveMetaStoreClient client, org.apache.hadoop.hive.metastore.api.Table msTbl) throws TableLoadingException
 
TTableDescriptor toThriftDescriptor (Set< Long > referencedPartitions)
 
TTable toThrift ()
 
long getNumHdfsFiles ()
 
long getTotalHdfsBytes ()
 
String getHdfsBaseDir ()
 
boolean isAvroTable ()
 
int getNumNodes ()
 
ListMap< TNetworkAddress > getHostIndex ()
 
HdfsFileFormat getMajorityFormat ()
 
TResultSet getTableStats ()
 
TResultSet getFiles (List< TPartitionKeyValue > partitionSpec) throws CatalogException
 
void addColumn (Column col)
 
void clearColumns ()
 
void updateLastDdlTime (long ddlTime)
 
void validate () throws TableLoadingException
 
TCatalogObject toTCatalogObject ()
 
Db getDb ()
 
String getName ()
 
String getFullName ()
 
TableName getTableName ()
 
String getOwner ()
 
ArrayList< ColumngetColumns ()
 
List< String > getColumnNames ()
 
String getStorageHandlerClassName ()
 
ArrayList< ColumngetColumnsInHiveOrder ()
 
List< ColumngetNonClusteringColumns ()
 
Column getColumn (String name)
 
org.apache.hadoop.hive.metastore.api.Table getMetaStoreTable ()
 
int getNumClusteringCols ()
 
TableId getId ()
 
long getNumRows ()
 
ArrayType getType ()
 
long getCatalogVersion ()
 
void setCatalogVersion (long catalogVersion)
 
boolean isLoaded ()
 

Static Public Member Functions

static String getAvroSchema (List< Map< String, String >> schemaSearchLocations, String tableName) throws TableLoadingException
 
static Table fromMetastoreTable (TableId id, Db db, org.apache.hadoop.hive.metastore.api.Table msTbl)
 
static Table fromThrift (Db parentDb, TTable thriftTable) throws TableLoadingException
 

Protected Member Functions

List< String > getColumnNamesWithHmsStats ()
 
void loadFromThrift (TTable thriftTable) throws TableLoadingException
 
void loadAllColumnStats (HiveMetaStoreClient client)
 
Type parseColumnType (FieldSchema fs) throws TableLoadingException
 

Static Protected Member Functions

static long getRowCount (Map< String, String > parameters)
 

Protected Attributes

String hdfsBaseDir_
 
final
org.apache.hadoop.hive.metastore.api.Table 
msTable_
 
final TableId id_
 
final Db db_
 
final String name_
 
final String owner_
 
TTableDescriptor tableDesc_
 
List< FieldSchema > fields_
 
TAccessLevel accessLevel_ = TAccessLevel.READ_WRITE
 
int numClusteringCols_
 
long numRows_ = -1
 
final ArrayType type_ = new ArrayType(new StructType())
 
long lastDdlTime_
 

Static Protected Attributes

static EnumSet< TableType > SUPPORTED_TABLE_TYPES
 

Static Package Functions

 [static initializer]
 

Private Member Functions

void loadBlockMetadata (FileSystem fs, FileStatus file, FileDescriptor fd, HdfsFileFormat fileFormat, Map< FsKey, FileBlocksInfo > perFsFileBlocks)
 
void synthesizeBlockMetadata (FileSystem fs, FileDescriptor fd, HdfsFileFormat fileFormat)
 
void loadDiskIds (Map< FsKey, FileBlocksInfo > perFsFileBlocks)
 
void loadColumns (List< FieldSchema > fieldSchemas, HiveMetaStoreClient client) throws TableLoadingException
 
void populatePartitionMd ()
 
void resetPartitionMd ()
 
void loadPartitions (List< org.apache.hadoop.hive.metastore.api.Partition > msPartitions, org.apache.hadoop.hive.metastore.api.Table msTbl, Map< String, List< FileDescriptor >> oldFileDescMap) throws IOException, CatalogException
 
TAccessLevel getAvailableAccessLevel (FileSystem fs, Path location) throws IOException
 
HdfsPartition createPartition (StorageDescriptor storageDescriptor, org.apache.hadoop.hive.metastore.api.Partition msPartition, Map< String, List< FileDescriptor >> oldFileDescMap, Map< FsKey, FileBlocksInfo > perFsFileBlocks) throws CatalogException
 
void addPerFsFileBlocks (Map< FsKey, FileBlocksInfo > fsToBlocks, FileSystem fs, List< THdfsFileBlock > blocks, List< BlockLocation > locations)
 
void updatePartitionMdAndColStats (HdfsPartition partition)
 
void addDefaultPartition (StorageDescriptor storageDescriptor) throws CatalogException
 
THdfsTable getTHdfsTable (boolean includeFileDesc, Set< Long > refPartitions)
 

Static Private Member Functions

static int getDiskId (VolumeId hdfsVolumeId)
 

Private Attributes

String nullColumnValue_
 
String nullPartitionKeyValue_
 
String avroSchema_ = null
 
boolean isMarkedCached_ = false
 
final List< HdfsPartitionpartitions_
 
final ArrayList< TreeMap
< LiteralExpr, HashSet< Long > > > 
partitionValuesMap_
 
final ArrayList< HashSet< Long > > nullPartitionIds_ = Lists.newArrayList()
 
final HashMap< Long,
HdfsPartition
partitionMap_ = Maps.newHashMap()
 
final HashSet< Long > partitionIds_ = Sets.newHashSet()
 
boolean hasPartitionMd_ = false
 
final ListMap< TNetworkAddress > hostIndex_ = new ListMap<TNetworkAddress>()
 
final Map< String, List
< FileDescriptor > > 
fileDescMap_ = Maps.newHashMap()
 
long numHdfsFiles_
 
long totalHdfsBytes_
 
boolean multipleFileSystems_ = false
 

Static Private Attributes

static final String DEFAULT_NULL_COLUMN_VALUE = "\\N"
 
static final int NUM_PARTITION_FETCH_RETRIES = 5
 
static final TNetworkAddress REMOTE_NETWORK_ADDRESS
 
static final long MIN_SYNTHETIC_BLOCK_SIZE = 1024 * 1024
 
static boolean hasLoggedDiskIdFormatWarning_ = false
 
static final Logger LOG = LoggerFactory.getLogger(HdfsTable.class)
 
static final Configuration CONF = new Configuration()
 
static final boolean SUPPORTS_VOLUME_ID
 

Detailed Description

Internal representation of table-related metadata of a file-resident table on a Hadoop filesystem. The table data can be accessed through libHDFS (which is more of an abstraction over Hadoop's FileSystem class rather than DFS specifically). A partitioned table can even span multiple filesystems.

Owned by Catalog instance. The partition keys constitute the clustering columns.

This class is not thread-safe due to the static counter variable inside HdfsPartition. Also not thread safe because of possibility of concurrent modifications to the list of partitions in methods addPartition and dropPartition.

Definition at line 104 of file HdfsTable.java.

Constructor & Destructor Documentation

com.cloudera.impala.catalog.HdfsTable.HdfsTable ( TableId  id,
org.apache.hadoop.hive.metastore.api.Table  msTbl,
Db  db,
String  name,
String  owner 
)
inline

Definition at line 421 of file HdfsTable.java.

Referenced by com.cloudera.impala.catalog.HdfsTable.load().

Member Function Documentation

com.cloudera.impala.catalog.HdfsTable.[static initializer] ( )
inlinestaticpackage
void com.cloudera.impala.catalog.HdfsTable.addDefaultPartition ( StorageDescriptor  storageDescriptor) throws CatalogException
inlineprivate
void com.cloudera.impala.catalog.HdfsTable.addPartition ( HdfsPartition  partition)
inline

Adds the partition to the HdfsTable.

Note: This method is not thread safe because it modifies the list of partitions and the HdfsTable's partition metadata.

Definition at line 892 of file HdfsTable.java.

References com.cloudera.impala.catalog.HdfsTable.partitions_, com.cloudera.impala.catalog.HdfsTable.totalHdfsBytes_, and com.cloudera.impala.catalog.HdfsTable.updatePartitionMdAndColStats().

Referenced by com.cloudera.impala.catalog.HdfsTable.loadPartitions().

void com.cloudera.impala.catalog.HdfsTable.addPerFsFileBlocks ( Map< FsKey, FileBlocksInfo fsToBlocks,
FileSystem  fs,
List< THdfsFileBlock >  blocks,
List< BlockLocation >  locations 
)
inlineprivate

Add the given THdfsFileBlocks and BlockLocations to the FileBlockInfo for the given filesystem.

Definition at line 875 of file HdfsTable.java.

Referenced by com.cloudera.impala.catalog.HdfsTable.loadBlockMetadata().

void com.cloudera.impala.catalog.Table.clearColumns ( )
inlineinherited
HdfsPartition com.cloudera.impala.catalog.HdfsTable.createPartition ( StorageDescriptor  storageDescriptor,
org.apache.hadoop.hive.metastore.api.Partition  msPartition 
) throws CatalogException
inline

Creates a new HdfsPartition object to be added to HdfsTable's partition list. Partitions may be empty, or may not even exist in the filesystem (a partition's location may have been changed to a new path that is about to be created by an INSERT). Also loads the block metadata for this partition. Returns new partition if successful or null if none was added. Separated from addPartition to reduce the number of operations done while holding the lock on HdfsTable.

Exceptions
CatalogExceptionif the supplied storage descriptor contains metadata that Impala can't understand.

Definition at line 735 of file HdfsTable.java.

References com.cloudera.impala.catalog.HdfsTable.fileDescMap_, and com.cloudera.impala.catalog.HdfsTable.loadDiskIds().

Referenced by com.cloudera.impala.catalog.HdfsTable.loadPartitions().

HdfsPartition com.cloudera.impala.catalog.HdfsTable.createPartition ( StorageDescriptor  storageDescriptor,
org.apache.hadoop.hive.metastore.api.Partition  msPartition,
Map< String, List< FileDescriptor >>  oldFileDescMap,
Map< FsKey, FileBlocksInfo perFsFileBlocks 
) throws CatalogException
inlineprivate

Creates a new HdfsPartition object to be added to the internal partition list. Populates with file format information and file locations. Partitions may be empty, or may not even exist on the filesystem (a partition's location may have been changed to a new path that is about to be created by an INSERT). For unchanged files (indicated by unchanged mtime), reuses the FileDescriptor from the oldFileDescMap. The one exception is if the partition is marked as cached in which case the block metadata cannot be reused. Otherwise, creates a new FileDescriptor for each modified or new file and adds it to newFileDescMap. Both old and newFileDescMap are Maps of parent directory (partition location) to list of files (FileDescriptors) under that directory. Returns new partition if successful or null if none was added. Separated from addPartition to reduce the number of operations done while holding the lock on the hdfs table.

Exceptions
CatalogExceptionif the supplied storage descriptor contains metadata that Impala can't understand.

Definition at line 764 of file HdfsTable.java.

References com.cloudera.impala.catalog.HdfsTable.CONF, com.cloudera.impala.catalog.HdfsCompression.fromFileName(), com.cloudera.impala.catalog.HdfsTable.getAvailableAccessLevel(), com.cloudera.impala.catalog.Table.getColumns(), com.cloudera.impala.catalog.HdfsStorageDescriptor.getFileFormat(), com.cloudera.impala.catalog.HdfsTable.getLocation(), com.cloudera.impala.catalog.Table.getName(), com.cloudera.impala.catalog.HdfsTable.isMarkedCached(), com.cloudera.impala.catalog.HdfsTable.isMarkedCached_, com.cloudera.impala.catalog.HdfsTable.loadBlockMetadata(), com.cloudera.impala.catalog.HdfsCompression.LZO_INDEX, com.cloudera.impala.catalog.HdfsTable.multipleFileSystems_, com.cloudera.impala.catalog.HdfsTable.nullPartitionKeyValue_, and com.cloudera.impala.catalog.HdfsTable.numHdfsFiles_.

HdfsPartition com.cloudera.impala.catalog.HdfsTable.dropPartition ( List< TPartitionKeyValue >  partitionSpec)
inline

Drops the partition having the given partition spec from HdfsTable. Cleans up its metadata from all the mappings used to speed up partition pruning/lookup. Also updates partition column statistics. Given partitionSpec must match exactly one partition. Returns the HdfsPartition that was dropped. If the partition does not exist, returns null.

Note: This method is not thread safe because it modifies the list of partitions and the HdfsTable's partition metadata.

Definition at line 942 of file HdfsTable.java.

References com.cloudera.impala.catalog.Table.getColumns(), com.cloudera.impala.catalog.HdfsTable.getPartitionFromThriftPartitionSpec(), com.cloudera.impala.catalog.HdfsTable.nullPartitionIds_, com.cloudera.impala.catalog.Table.numClusteringCols_, com.cloudera.impala.catalog.HdfsTable.partitions_, and com.cloudera.impala.catalog.HdfsTable.totalHdfsBytes_.

static Table com.cloudera.impala.catalog.Table.fromMetastoreTable ( TableId  id,
Db  db,
org.apache.hadoop.hive.metastore.api.Table  msTbl 
)
inlinestaticinherited

Creates a table of the appropriate type based on the given hive.metastore.api.Table object.

Definition at line 207 of file Table.java.

References com.cloudera.impala.catalog.DataSourceTable.isDataSourceTable(), com.cloudera.impala.catalog.HBaseTable.isHBaseTable(), and com.cloudera.impala.catalog.HdfsFileFormat.isHdfsFormatClass().

static Table com.cloudera.impala.catalog.Table.fromThrift ( Db  parentDb,
TTable  thriftTable 
) throws TableLoadingException
inlinestaticinherited

Factory method that creates a new Table from its Thrift representation. Determines the type of table to create based on the Thrift table provided.

Definition at line 231 of file Table.java.

TAccessLevel com.cloudera.impala.catalog.HdfsTable.getAvailableAccessLevel ( FileSystem  fs,
Path  location 
) throws IOException
inlineprivate

Gets the AccessLevel that is available for Impala for this table based on the permissions Impala has on the given path. If the path does not exist, recurses up the path until a existing parent directory is found, and inherit access permissions from that.

Definition at line 699 of file HdfsTable.java.

Referenced by com.cloudera.impala.catalog.HdfsTable.createPartition(), and com.cloudera.impala.catalog.HdfsTable.loadPartitions().

static String com.cloudera.impala.catalog.HdfsTable.getAvroSchema ( List< Map< String, String >>  schemaSearchLocations,
String  tableName 
) throws TableLoadingException
inlinestatic

Gets an Avro table's JSON schema from the list of given table property search locations. The schema may be specified as a string literal or provided as a Hadoop FileSystem or http URL that points to the schema. This function does not perform any validation on the returned string (e.g., it may not be a valid schema). If the schema was found to be specified as a SCHEMA_URL, this function will attempt to download the schema from the given URL. Throws a TableLoadingException if no schema is found or if there was any error extracting the schema.

Definition at line 1177 of file HdfsTable.java.

References com.cloudera.impala.common.FileSystemUtil.isPathReachable(), and path().

TCatalogObjectType com.cloudera.impala.catalog.HdfsTable.getCatalogObjectType ( )
inline

Implements com.cloudera.impala.catalog.CatalogObject.

Definition at line 428 of file HdfsTable.java.

Column com.cloudera.impala.catalog.Table.getColumn ( String  name)
inlineinherited
List<String> com.cloudera.impala.catalog.HdfsTable.getColumnNamesWithHmsStats ( )
inlineprotected
ArrayList<Column> com.cloudera.impala.catalog.Table.getColumnsInHiveOrder ( )
inlineinherited

Returns the list of all columns, but with partition columns at the end of the list rather than the beginning. This is equivalent to the order in which Hive enumerates columns.

Definition at line 373 of file Table.java.

References com.cloudera.impala.catalog.Table.colsByPos_, com.cloudera.impala.catalog.Table.getNonClusteringColumns(), and com.cloudera.impala.catalog.Table.numClusteringCols_.

Referenced by com.cloudera.impala.service.DescribeResultFactory.describeTableMinimal(), com.cloudera.impala.service.Frontend.getColumnStats(), and com.cloudera.impala.analysis.InsertStmt.prepareExpressions().

static int com.cloudera.impala.catalog.HdfsTable.getDiskId ( VolumeId  hdfsVolumeId)
inlinestaticprivate

Returns a disk id (0-based) index from the Hdfs VolumeId object. There is currently no public API to get at the volume id. We'll have to get it by accessing the internals.

Definition at line 243 of file HdfsTable.java.

References com.cloudera.impala.catalog.HdfsTable.hasLoggedDiskIdFormatWarning_.

Referenced by com.cloudera.impala.catalog.HdfsTable.loadDiskIds().

List<FieldSchema> com.cloudera.impala.catalog.HdfsTable.getFieldSchemas ( )
inline

Definition at line 453 of file HdfsTable.java.

References com.cloudera.impala.catalog.Table.fields_.

Map<String, List<FileDescriptor> > com.cloudera.impala.catalog.HdfsTable.getFileDescMap ( )
inline
TResultSet com.cloudera.impala.catalog.HdfsTable.getFiles ( List< TPartitionKeyValue >  partitionSpec) throws CatalogException
inline

Returns files info for the given dbname/tableName and partition spec. Returns files info for all partitions if partition spec is null.

Definition at line 1456 of file HdfsTable.java.

References com.cloudera.impala.catalog.HdfsTable.getPartitionFromThriftPartitionSpec(), com.cloudera.impala.catalog.HdfsTable.partitions_, com.cloudera.impala.catalog.Type.STRING, and com.cloudera.impala.catalog.ScalarType.toThrift().

String com.cloudera.impala.catalog.HdfsTable.getFirstLocationWithoutWriteAccess ( )
inline

Returns the first location (HDFS path) that Impala does not have WRITE access to, or an null if none is found. For an unpartitioned table, this just checks the hdfsBaseDir. For a partitioned table it checks all partition directories.

Definition at line 470 of file HdfsTable.java.

References com.cloudera.impala.catalog.Table.accessLevel_, com.cloudera.impala.catalog.Table.getMetaStoreTable(), com.cloudera.impala.catalog.HdfsTable.hdfsBaseDir_, com.cloudera.impala.util.TAccessLevelUtil.impliesWriteAccess(), and com.cloudera.impala.catalog.HdfsTable.partitions_.

Referenced by com.cloudera.impala.analysis.InsertStmt.setTargetTable().

String com.cloudera.impala.catalog.HdfsTable.getHdfsBaseDir ( )
inline
ListMap<TNetworkAddress> com.cloudera.impala.catalog.HdfsTable.getHostIndex ( )
inline

Get the index of hosts that store replicas of blocks of this table.

Definition at line 1328 of file HdfsTable.java.

References com.cloudera.impala.catalog.HdfsTable.hostIndex_.

TableId com.cloudera.impala.catalog.Table.getId ( )
inlineinherited
HdfsFileFormat com.cloudera.impala.catalog.HdfsTable.getMajorityFormat ( )
inline

Returns the file format that the majority of partitions are stored in.

Definition at line 1333 of file HdfsTable.java.

References com.cloudera.impala.catalog.HdfsTable.partitions_.

Referenced by com.cloudera.impala.planner.PlanFragment.computeCanAddSlotFilters(), and com.cloudera.impala.planner.HdfsScanNode.computeCosts().

List<Column> com.cloudera.impala.catalog.Table.getNonClusteringColumns ( )
inlineinherited

Returns the list of all columns excluding any partition columns.

Definition at line 385 of file Table.java.

References com.cloudera.impala.catalog.Table.numClusteringCols_.

Referenced by com.cloudera.impala.analysis.ComputeStatsStmt.analyze(), and com.cloudera.impala.catalog.Table.getColumnsInHiveOrder().

List<FieldSchema> com.cloudera.impala.catalog.HdfsTable.getNonPartitionFieldSchemas ( )
inline
String com.cloudera.impala.catalog.HdfsTable.getNullColumnValue ( )
inline
HashSet<Long> com.cloudera.impala.catalog.HdfsTable.getNullPartitionIds ( int  i)
inline

Definition at line 435 of file HdfsTable.java.

String com.cloudera.impala.catalog.HdfsTable.getNullPartitionKeyValue ( )
inline

Returns the value Hive is configured to use for NULL partition key values. Set during load.

Definition at line 445 of file HdfsTable.java.

References com.cloudera.impala.catalog.HdfsTable.nullPartitionKeyValue_.

Referenced by com.cloudera.impala.catalog.HdfsTable.getPartition(), and com.cloudera.impala.catalog.HdfsTable.getPartitionFromThriftPartitionSpec().

long com.cloudera.impala.catalog.HdfsTable.getNumHdfsFiles ( )
inline
int com.cloudera.impala.catalog.HdfsTable.getNumNodes ( )
inline

Definition at line 1323 of file HdfsTable.java.

Referenced by com.cloudera.impala.catalog.HdfsTable.loadDiskIds().

long com.cloudera.impala.catalog.Table.getNumRows ( )
inlineinherited
String com.cloudera.impala.catalog.Table.getOwner ( )
inlineinherited

Definition at line 348 of file Table.java.

References com.cloudera.impala.catalog.Table.owner_.

HdfsPartition com.cloudera.impala.catalog.HdfsTable.getPartition ( List< PartitionKeyValue partitionSpec)
inline

Gets the HdfsPartition matching the given partition spec. Returns null if no match was found.

Definition at line 491 of file HdfsTable.java.

References com.cloudera.impala.catalog.HdfsTable.getNullPartitionKeyValue(), and com.cloudera.impala.catalog.HdfsTable.getPartitionFromThriftPartitionSpec().

HdfsPartition com.cloudera.impala.catalog.HdfsTable.getPartitionFromThriftPartitionSpec ( List< TPartitionKeyValue >  partitionSpec)
inline
HashSet<Long> com.cloudera.impala.catalog.HdfsTable.getPartitionIds ( )
inline
HashMap<Long, HdfsPartition> com.cloudera.impala.catalog.HdfsTable.getPartitionMap ( )
inline
TreeMap<LiteralExpr, HashSet<Long> > com.cloudera.impala.catalog.HdfsTable.getPartitionValueMap ( int  i)
inline

Definition at line 437 of file HdfsTable.java.

static long com.cloudera.impala.catalog.Table.getRowCount ( Map< String, String >  parameters)
inlinestaticprotectedinherited
String com.cloudera.impala.catalog.Table.getStorageHandlerClassName ( )
inlineinherited

Subclasses should override this if they provide a storage handler class. Currently only HBase tables need to provide a storage handler.

Definition at line 366 of file Table.java.

THdfsTable com.cloudera.impala.catalog.HdfsTable.getTHdfsTable ( boolean  includeFileDesc,
Set< Long >  refPartitions 
)
inlineprivate

Create a THdfsTable corresponding to this HdfsTable. If includeFileDesc is true, then then all partitions and THdfsFileDescs of each partition should be included. Otherwise, don't include any THdfsFileDescs, and include only those partitions in the refPartitions set (the backend doesn't need metadata for unreferenced partitions).

Definition at line 1295 of file HdfsTable.java.

References com.cloudera.impala.catalog.HdfsTable.avroSchema_, com.cloudera.impala.catalog.Table.getColumnNames(), com.cloudera.impala.catalog.HdfsTable.hdfsBaseDir_, com.cloudera.impala.catalog.HdfsTable.multipleFileSystems_, com.cloudera.impala.catalog.HdfsTable.nullColumnValue_, com.cloudera.impala.catalog.HdfsTable.nullPartitionKeyValue_, and com.cloudera.impala.catalog.HdfsTable.partitions_.

Referenced by com.cloudera.impala.catalog.HdfsTable.toThrift(), and com.cloudera.impala.catalog.HdfsTable.toThriftDescriptor().

long com.cloudera.impala.catalog.HdfsTable.getTotalHdfsBytes ( )
inline
ArrayType com.cloudera.impala.catalog.Table.getType ( )
inlineinherited
boolean com.cloudera.impala.catalog.HdfsTable.hasWriteAccess ( )
inline
boolean com.cloudera.impala.catalog.HdfsTable.isAvroTable ( )
inline
void com.cloudera.impala.catalog.HdfsTable.load ( Table  cachedEntry,
HiveMetaStoreClient  client,
org.apache.hadoop.hive.metastore.api.Table  msTbl 
) throws TableLoadingException
inline

Load the table metadata and reuse metadata to speed up metadata loading. If the lastDdlTime has not been changed, that means the Hive metastore metadata has not been changed. Reuses the old Hive partition metadata from cachedEntry. To speed up Hdfs metadata loading, if a file's mtime has not been changed, reuses the old file block metadata from old value.

There are several cases where the cachedEntry might be reused incorrectly:

  1. an ALTER TABLE ADD PARTITION or dynamic partition insert is executed through Hive. This does not update the lastDdlTime.
  2. Hdfs rebalancer is executed. This changes the block locations but won't update the mtime (file modification time). If any of these occurs, user has to execute "invalidate metadata" to invalidate the metadata cache of the table to trigger a fresh load.

Definition at line 1003 of file HdfsTable.java.

References com.cloudera.impala.catalog.HdfsFileFormat.AVRO, com.cloudera.impala.catalog.HdfsTable.avroSchema_, com.cloudera.impala.catalog.Table.db_, com.cloudera.impala.catalog.HdfsTable.DEFAULT_NULL_COLUMN_VALUE, com.cloudera.impala.catalog.HdfsFileFormat.fromJavaClassName(), com.cloudera.impala.catalog.Table.getFullName(), com.cloudera.impala.catalog.Table.getMetaStoreTable(), com.cloudera.impala.catalog.Db.getName(), com.cloudera.impala.catalog.Table.getRowCount(), com.cloudera.impala.catalog.Table.getType(), com.cloudera.impala.catalog.HdfsTable.HdfsTable(), com.cloudera.impala.catalog.Table.lastDdlTime_, com.cloudera.impala.catalog.HdfsTable.loadColumns(), com.cloudera.impala.catalog.HdfsTable.loadPartitions(), com.cloudera.impala.catalog.Table.name_, com.cloudera.impala.catalog.HdfsTable.nullColumnValue_, com.cloudera.impala.catalog.HdfsTable.nullPartitionKeyValue_, com.cloudera.impala.catalog.HdfsTable.NUM_PARTITION_FETCH_RETRIES, com.cloudera.impala.catalog.Table.numClusteringCols_, com.cloudera.impala.catalog.HdfsTable.numHdfsFiles_, com.cloudera.impala.catalog.Table.numRows_, com.cloudera.impala.catalog.HdfsTable.partitions_, and com.cloudera.impala.catalog.HdfsTable.totalHdfsBytes_.

void com.cloudera.impala.catalog.Table.loadAllColumnStats ( HiveMetaStoreClient  client)
inlineprotectedinherited
void com.cloudera.impala.catalog.HdfsTable.loadBlockMetadata ( FileSystem  fs,
FileStatus  file,
FileDescriptor  fd,
HdfsFileFormat  fileFormat,
Map< FsKey, FileBlocksInfo perFsFileBlocks 
)
inlineprivate

Queries the filesystem to load the file block metadata (e.g. DFS blocks) for the given file. Adds the newly created block metadata and block location to the perFsFileBlocks, so that the disk IDs for each block can be retrieved with one call to DFS.

Definition at line 275 of file HdfsTable.java.

References com.cloudera.impala.catalog.HdfsTable.addPerFsFileBlocks(), com.cloudera.impala.catalog.HdfsPartition.FileDescriptor.getFileBlocks(), com.cloudera.impala.common.FileSystemUtil.hasGetFileBlockLocations(), com.cloudera.impala.catalog.HdfsTable.hostIndex_, com.cloudera.impala.catalog.Table.name_, and com.cloudera.impala.catalog.HdfsTable.synthesizeBlockMetadata().

Referenced by com.cloudera.impala.catalog.HdfsTable.createPartition().

void com.cloudera.impala.catalog.HdfsTable.loadColumns ( List< FieldSchema >  fieldSchemas,
HiveMetaStoreClient  client 
) throws TableLoadingException
inlineprivate
void com.cloudera.impala.catalog.HdfsTable.loadDiskIds ( Map< FsKey, FileBlocksInfo perFsFileBlocks)
inlineprivate

Populates disk/volume ID metadata inside the newly created THdfsFileBlocks. perFsFileBlocks maps from each filesystem to a FileBLocksInfo. The first list contains the newly created THdfsFileBlocks and the second contains the corresponding BlockLocations.

Definition at line 363 of file HdfsTable.java.

References com.cloudera.impala.catalog.HdfsTable.getDiskId(), com.cloudera.impala.catalog.Table.getFullName(), com.cloudera.impala.catalog.HdfsTable.getNumNodes(), com.cloudera.impala.catalog.HdfsTable.FileBlocksInfo.locations, and com.cloudera.impala.catalog.HdfsTable.SUPPORTS_VOLUME_ID.

Referenced by com.cloudera.impala.catalog.HdfsTable.createPartition(), and com.cloudera.impala.catalog.HdfsTable.loadPartitions().

void com.cloudera.impala.catalog.HdfsTable.loadPartitions ( List< org.apache.hadoop.hive.metastore.api.Partition >  msPartitions,
org.apache.hadoop.hive.metastore.api.Table  msTbl,
Map< String, List< FileDescriptor >>  oldFileDescMap 
) throws IOException, CatalogException
inlineprivate
Type com.cloudera.impala.catalog.Table.parseColumnType ( FieldSchema  fs) throws TableLoadingException
inlineprotectedinherited

Gets the ColumnType from the given FieldSchema by using Impala's SqlParser. Throws a TableLoadingException if the FieldSchema could not be parsed. The type can either be:

  • Supported by Impala, in which case the type is returned.
  • A type Impala understands but is not yet implemented (e.g. date), the type is returned but type.IsSupported() returns false.
  • A type Impala can't understand at all, and a TableLoadingException is thrown.

Definition at line 331 of file Table.java.

References com.cloudera.impala.catalog.Table.getName().

Referenced by com.cloudera.impala.catalog.View.load(), com.cloudera.impala.catalog.HBaseTable.load(), com.cloudera.impala.catalog.DataSourceTable.loadColumns(), com.cloudera.impala.catalog.HdfsTable.loadColumns(), and com.cloudera.impala.catalog.HBaseTable.supportsBinaryEncoding().

void com.cloudera.impala.catalog.HdfsTable.populatePartitionMd ( )
inlineprivate
void com.cloudera.impala.catalog.HdfsTable.resetPartitionMd ( )
inlineprivate
void com.cloudera.impala.catalog.Table.setCatalogVersion ( long  catalogVersion)
inlineinherited
boolean com.cloudera.impala.catalog.HdfsTable.spansMultipleFileSystems ( )
inline
void com.cloudera.impala.catalog.HdfsTable.synthesizeBlockMetadata ( FileSystem  fs,
FileDescriptor  fd,
HdfsFileFormat  fileFormat 
)
inlineprivate

For filesystems that don't override getFileBlockLocations, synthesize file blocks by manually splitting the file range into fixed-size blocks. That way, scan ranges can be derived from file blocks as usual. All synthesized blocks are given an invalid network address so that the scheduler will treat them as remote.

Definition at line 335 of file HdfsTable.java.

References com.cloudera.impala.catalog.HdfsCompression.fromFileName(), com.cloudera.impala.catalog.HdfsPartition.FileDescriptor.getFileName(), com.cloudera.impala.catalog.HdfsTable.hostIndex_, com.cloudera.impala.catalog.HdfsFileFormat.isSplittable(), com.cloudera.impala.catalog.HdfsTable.MIN_SYNTHETIC_BLOCK_SIZE, and com.cloudera.impala.catalog.HdfsTable.REMOTE_NETWORK_ADDRESS.

Referenced by com.cloudera.impala.catalog.HdfsTable.loadBlockMetadata().

TCatalogObject com.cloudera.impala.catalog.Table.toTCatalogObject ( )
inlineinherited
TTable com.cloudera.impala.catalog.HdfsTable.toThrift ( )
inline
void com.cloudera.impala.catalog.Table.updateLastDdlTime ( long  ddlTime)
inlineinherited

Updates the lastDdlTime for this Table, if the new value is greater than the existing value. Does nothing if the new value is less than or equal to the existing value.

Definition at line 132 of file Table.java.

References com.cloudera.impala.catalog.Table.lastDdlTime_.

void com.cloudera.impala.catalog.HdfsTable.updatePartitionMdAndColStats ( HdfsPartition  partition)
inlineprivate
void com.cloudera.impala.catalog.Table.validate ( ) throws TableLoadingException
inlineinherited

Checks preconditions for this table to function as expected. Currently only checks that all entries in colsByName_ use lower case keys.

Definition at line 279 of file Table.java.

References com.cloudera.impala.catalog.Table.colsByName_.

Member Data Documentation

final Configuration com.cloudera.impala.catalog.HdfsTable.CONF = new Configuration()
staticprivate
final String com.cloudera.impala.catalog.HdfsTable.DEFAULT_NULL_COLUMN_VALUE = "\\N"
staticprivate

Definition at line 106 of file HdfsTable.java.

Referenced by com.cloudera.impala.catalog.HdfsTable.load().

List<FieldSchema> com.cloudera.impala.catalog.Table.fields_
protectedinherited
final Map<String, List<FileDescriptor> > com.cloudera.impala.catalog.HdfsTable.fileDescMap_ = Maps.newHashMap()
private
boolean com.cloudera.impala.catalog.HdfsTable.hasLoggedDiskIdFormatWarning_ = false
staticprivate

Definition at line 132 of file HdfsTable.java.

Referenced by com.cloudera.impala.catalog.HdfsTable.getDiskId().

boolean com.cloudera.impala.catalog.HdfsTable.hasPartitionMd_ = false
private
final ListMap<TNetworkAddress> com.cloudera.impala.catalog.HdfsTable.hostIndex_ = new ListMap<TNetworkAddress>()
private
long com.cloudera.impala.catalog.Table.lastDdlTime_
protectedinherited
final Logger com.cloudera.impala.catalog.HdfsTable.LOG = LoggerFactory.getLogger(HdfsTable.class)
staticprivate

Definition at line 182 of file HdfsTable.java.

final long com.cloudera.impala.catalog.HdfsTable.MIN_SYNTHETIC_BLOCK_SIZE = 1024 * 1024
staticprivate
final org.apache.hadoop.hive.metastore.api.Table com.cloudera.impala.catalog.Table.msTable_
protectedinherited
final ArrayList<HashSet<Long> > com.cloudera.impala.catalog.HdfsTable.nullPartitionIds_ = Lists.newArrayList()
private
final int com.cloudera.impala.catalog.HdfsTable.NUM_PARTITION_FETCH_RETRIES = 5
staticprivate

Definition at line 109 of file HdfsTable.java.

Referenced by com.cloudera.impala.catalog.HdfsTable.load().

final String com.cloudera.impala.catalog.Table.owner_
protectedinherited
final HashSet<Long> com.cloudera.impala.catalog.HdfsTable.partitionIds_ = Sets.newHashSet()
private
final HashMap<Long, HdfsPartition> com.cloudera.impala.catalog.HdfsTable.partitionMap_ = Maps.newHashMap()
private
final ArrayList<TreeMap<LiteralExpr, HashSet<Long> > > com.cloudera.impala.catalog.HdfsTable.partitionValuesMap_
private
Initial value:
=
Lists.newArrayList()

Definition at line 138 of file HdfsTable.java.

final TNetworkAddress com.cloudera.impala.catalog.HdfsTable.REMOTE_NETWORK_ADDRESS
staticprivate
Initial value:
=
new TNetworkAddress("remote*addr", 0)

Definition at line 112 of file HdfsTable.java.

Referenced by com.cloudera.impala.catalog.HdfsTable.synthesizeBlockMetadata().

EnumSet<TableType> com.cloudera.impala.catalog.Table.SUPPORTED_TABLE_TYPES
staticprotectedinherited
Initial value:
= EnumSet.of(
TableType.EXTERNAL_TABLE, TableType.MANAGED_TABLE, TableType.VIRTUAL_VIEW)

Definition at line 88 of file Table.java.

final boolean com.cloudera.impala.catalog.HdfsTable.SUPPORTS_VOLUME_ID
staticprivate

Definition at line 190 of file HdfsTable.java.

Referenced by com.cloudera.impala.catalog.HdfsTable.loadDiskIds().

TTableDescriptor com.cloudera.impala.catalog.Table.tableDesc_
protectedinherited

Definition at line 64 of file Table.java.


The documentation for this class was generated from the following file: