15 #ifndef IMPALA_RUNTIME_BUFFERED_TUPLE_STREAM_H
16 #define IMPALA_RUNTIME_BUFFERED_TUPLE_STREAM_H
23 class BufferedBlockMgr;
143 <<
"Cannot have more than 2^16 = 64K blocks in a tuple stream.";
145 <<
"Cannot have blocks larger than 2^24 = 16MB";
147 <<
"Cannot have more than 2^24 = 16M rows in a block.";
167 bool use_initial_small_buffers =
true,
168 bool delete_on_read =
false,
bool read_write =
false);
221 Status GetRows(boost::scoped_ptr<RowBatch>* batch,
bool* got_rows);
356 template <
bool HasNullableTuple>
379 template <
bool HasNullableTuple>
The underlying memory management is done by the BufferedBlockMgr.
uint8_t * read_ptr_
Current ptr offset in read_block_'s buffer.
uint32_t null_indicators_read_block_
static const uint64_t OFFSET_SHIFT
int64_t total_byte_size_
Total size of blocks_, including small blocks.
bool has_write_block() const
void Close()
Must be called once at the end to cleanup all resources. Idempotent.
std::vector< uint8_t * > block_start_idx_
std::list< BufferedBlockMgr::Block * >::iterator read_block_
int blocks_pinned() const
std::string DebugString() const
int64_t rows_returned() const
Number of rows returned via GetNext().
Status PinStream(bool already_reserved, bool *pinned)
Status GetNextInternal(RowBatch *batch, bool *eos, std::vector< RowIdx > *indices)
Templated GetNext implementation.
static const uint64_t BLOCK_SHIFT
RuntimeProfile::Counter * unpin_timer_
BufferedTupleStream(RuntimeState *state, const RowDescriptor &row_desc, BufferedBlockMgr *block_mgr, BufferedBlockMgr::Client *client, bool use_initial_small_buffers=true, bool delete_on_read=false, bool read_write=false)
bool DeepCopy(TupleRow *row, uint8_t **dst)
Wrapper of the templated DeepCopyInternal() function.
int fixed_tuple_row_size_
Sum of the fixed length portion of all the tuples in desc_.
uint64_t set(uint64_t block, uint64_t offset, uint64_t idx)
Status NextBlockForRead()
std::vector< std::pair< int, std::vector< SlotDescriptor * > > > string_slots_
Vector of all the strings slots grouped by tuple_idx.
const RowDescriptor & desc_
Description of rows stored in the stream.
bool use_small_buffers_
If true, this stream is still using small buffers.
static const uint64_t BLOCK_MASK
int64_t byte_size() const
Returns the byte size necessary to store the entire stream in memory.
static const uint64_t IDX_MASK
static const uint64_t IDX_SHIFT
int read_block_idx_
The block index of the current read block.
int blocks_unpinned() const
bool DeepCopyInternal(TupleRow *row, uint8_t **dst)
uint32_t write_tuple_idx_
Current idx of the tuple written at the write_block_ buffer.
int ComputeNumNullIndicatorBytes(int block_size) const
Computes the number of bytes needed for null indicators for a block of 'block_size'.
BufferedBlockMgr::Client * block_mgr_client_
uint32_t read_tuple_idx_
Current idx of the tuple read from the read_block_ buffer.
static const uint64_t OFFSET_MASK
bool has_read_block() const
const bool delete_on_read_
If true, blocks are deleted after they are read.
const bool nullable_tuple_
Whether any tuple in the rows is nullable.
BufferedBlockMgr::Block * write_block_
The current block for writing. NULL if there is no available block to write to.
bool using_small_buffers() const
Status GetRows(boost::scoped_ptr< RowBatch > *batch, bool *got_rows)
const RowDescriptor & row_desc() const
Status UnpinBlock(BufferedBlockMgr::Block *block)
Unpins block if it is an io sized block and updates tracking stats.
BufferedBlockMgr * block_mgr_
Block manager and client used to allocate, pin and release blocks. Not owned.
RuntimeProfile::Counter * pin_timer_
Counters added by this object to the parent runtime profile.
uint32_t null_indicators_write_block_
RuntimeState *const state_
Runtime state instance used to check for cancellation. Not owned.
Status Init(RuntimeProfile *profile=NULL, bool pinned=true)
bool AddRow(TupleRow *row, uint8_t **dst=NULL)
uint8_t * AllocateRow(int size)
std::string DebugString() const
Status PrepareForRead(bool *got_buffer=NULL)
int64_t read_bytes_
Bytes read in read_block_.
std::list< BufferedBlockMgr::Block * > blocks_
List of blocks in the stream.
int64_t num_rows() const
Number of rows in the stream.
Status NewBlockForWrite(int min_size, bool *got_block)
RuntimeProfile::Counter * get_new_block_timer_
Status UnpinStream(bool all=false)
Status SwitchToIoBuffers(bool *got_buffer)
int num_small_blocks_
The total number of small blocks in blocks_;.
int64_t num_rows_
Number of rows stored in the stream.
Status GetNext(RowBatch *batch, bool *eos, std::vector< RowIdx > *indices=NULL)
int ComputeRowSize(TupleRow *row) const
Returns the byte size of this row when encoded in a block.
int64_t rows_returned_
Number of rows returned to the caller from GetNext().
int64_t bytes_in_mem(bool ignore_current) const
void GetTupleRow(const RowIdx &idx, TupleRow *row) const