16 #ifndef IMPALA_EXEC_HDFS_TABLE_SINK_H
17 #define IMPALA_EXEC_HDFS_TABLE_SINK_H
20 #include <boost/unordered_map.hpp>
21 #include <boost/scoped_ptr.hpp>
32 class TupleDescriptor;
35 class HdfsTableWriter;
83 boost::scoped_ptr<HdfsTableWriter>
writer;
125 const std::vector<TExpr>& select_list_texprs,
const TDataSink& tsink);
183 void GetHashTblKey(
const std::vector<ExprContext*>& ctxs, std::string* key);
265 typedef boost::unordered_map<std::string, HdfsPartitionDescriptor*>
static Status GetFileBlockSize(OutputPartition *output_partition, int64_t *size)
TableId table_id_
Table id resolved in Prepare() to set tuple_desc_;.
std::vector< ExprContext * > output_expr_ctxs_
Exprs that materialize output values.
std::string final_hdfs_file_name_prefix
virtual RuntimeProfile * profile()
Returns the runtime profile for the sink.
void BuildHdfsFileNames(const HdfsPartitionDescriptor &partition_descriptor, OutputPartition *output)
Status PrepareExprs(RuntimeState *state)
Initialise and prepare select and partition key expressions.
std::string tmp_hdfs_dir_name
std::string current_file_name
PartitionDescriptorMap partition_descriptor_map_
virtual void Close(RuntimeState *state)
HdfsTableSink(const RowDescriptor &row_desc, const std::vector< TExpr > &select_list_texprs, const TDataSink &tsink)
RuntimeProfile::Counter * partitions_created_counter_
Status GetOutputPartition(RuntimeState *state, const std::string &key, PartitionPair **partition_pair)
int32_t num_files
Number of files created in this partition.
MemTracker * mem_tracker()
std::string partition_name
key1=val1/key2=val2/ etc. Used to identify partitions to the metastore.
std::string DebugString() const
const HdfsPartitionDescriptor * partition_descriptor
The descriptor for this partition.
boost::unordered_map< std::string, PartitionPair > PartitionMap
Superclass of all data sinks.
RuntimeProfile::Counter * rows_inserted_counter_
const HdfsPartitionDescriptor * default_partition_
Currently this is the default partition since we don't support multi-format sinks.
hdfsFS hdfs_connection
Connection to hdfs.
hdfsFS hdfs_connection_
Connection to hdfs, established in Open() and closed in Close().
const HdfsTableDescriptor * table_desc_
Descriptor of target table. Set in Prepare().
void ClosePartitionFile(RuntimeState *state, OutputPartition *partition)
Closes the hdfs file for this partition as well as the writer.
Status CreateNewTmpFile(RuntimeState *state, OutputPartition *output_partition)
std::vector< ExprContext * > partition_key_expr_ctxs_
Exprs of partition keys.
RuntimeProfile::Counter * encode_timer_
Time spent converting tuple to on disk format.
virtual Status Prepare(RuntimeState *state)
Prepares output_exprs and partition_key_exprs, and connects to HDFS.
RuntimeProfile::Counter * bytes_written_counter()
hdfsFile tmp_hdfs_file
Hdfs file at tmp_hdfs_file_name.
bool overwrite_
Indicates whether the existing partitions should be overwritten.
RuntimeProfile::Counter * files_created_counter_
RuntimeProfile * runtime_profile_
Allocated from runtime state's pool.
std::pair< OutputPartition *, std::vector< int32_t > > PartitionPair
This class is thread-safe.
void GetHashTblKey(const std::vector< ExprContext * > &ctxs, std::string *key)
int64_t num_rows
Records number of rows appended to the current file in this partition.
boost::scoped_ptr< HdfsTableWriter > writer
Table format specific writer functions.
TupleRow * current_row_
Current row from the current RowBatch to output.
const RowDescriptor & row_desc() const
const std::vector< TExpr > & select_list_texprs_
RuntimeProfile::Counter * compress_timer_
Time spent compressing data.
bool has_empty_input_batch_
RuntimeProfile::Counter * hdfs_write_timer_
Time spent writing to hdfs.
RuntimeProfile::Counter * hdfs_write_timer()
virtual Status Send(RuntimeState *state, RowBatch *batch, bool eos)
Append all rows in batch to the temporary Hdfs files corresponding to partitions. ...
RuntimeProfile::Counter * rows_inserted_counter()
const HdfsTableDescriptor & TableDesc()
RuntimeProfile::Counter * encode_timer()
Status FinalizePartitionFile(RuntimeState *state, OutputPartition *partition)
std::vector< ExprContext * > dynamic_partition_key_expr_ctxs_
Metadata for a single partition inside an Hdfs table.
std::string unique_id_str_
boost::scoped_ptr< MemTracker > mem_tracker_
boost::unordered_map< std::string, HdfsPartitionDescriptor * > PartitionDescriptorMap
Status InitOutputPartition(RuntimeState *state, const HdfsPartitionDescriptor &partition_descriptor, OutputPartition *output_partition)
Initialises the filenames of a given output partition, and opens the temporary file.
RuntimeProfile::Counter * compress_timer()
RuntimeProfile::Counter * bytes_written_counter_
std::string tmp_hdfs_file_name_prefix
virtual Status Open(RuntimeState *state)
PartitionMap partition_keys_to_output_partitions_
const std::vector< TExpr > & partition_key_texprs_
const RowDescriptor & row_desc_
Row descriptor of row batches passed in Send(). Set in c'tor.