Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
hdfs-bulk-ops.h
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef IMPALA_UTIL_HDFS_BULK_OPS_H
16 #define IMPALA_UTIL_HDFS_BULK_OPS_H
17 
18 #include <string>
19 #include <boost/thread.hpp>
20 
21 #include "common/hdfs.h"
22 #include "common/atomic.h"
23 #include "common/status.h"
24 #include "util/thread-pool.h"
25 
26 namespace impala {
27 
28 enum HdfsOpType {
34 };
35 
36 class HdfsOperationSet;
37 
40 class HdfsOp {
41  public:
42  HdfsOp(HdfsOpType op, const std::string& src, HdfsOperationSet* op_set);
43 
44  HdfsOp(HdfsOpType op, const std::string& src, const std::string& dst,
45  HdfsOperationSet* op_set);
46 
47  HdfsOp(HdfsOpType op, const std::string& src, short permissions,
48  HdfsOperationSet* op_set);
49 
51  HdfsOp();
52 
53  HdfsOpType op() const { return op_; }
54  const std::string& src() const { return src_; }
55  const std::string& dst() const { return dst_; }
56 
59  void Execute() const;
60 
61  private:
64 
66  std::string src_;
67 
69  std::string dst_;
70 
72  short permissions_;
73 
76 };
77 
79 
81 HdfsOpThreadPool* CreateHdfsOpThreadPool(const std::string& name, uint32_t num_threads,
82  uint32_t max_queue_length);
83 
88  public:
93 
96  void Add(HdfsOpType op, const std::string& src);
97 
99  void Add(HdfsOpType op, const std::string& src, const std::string& dst);
100 
102  void Add(HdfsOpType op, const std::string& src, short permissions);
103 
107  bool Execute(HdfsOpThreadPool* pool, bool abort_on_error);
108 
109  typedef std::pair<const HdfsOp*, std::string> Error;
110  typedef std::vector<Error> Errors;
111 
114  const Errors& errors() { return errors_; }
115 
116  hdfsFS* hdfs_connection() const { return hdfs_connection_; }
117 
118  private:
120  std::vector<HdfsOp> ops_;
121 
125 
129 
132 
134  boost::mutex errors_lock_;
135 
138 
141 
142  friend class HdfsOp;
143 
146  void MarkOneOpDone();
147 
149  void AddError(const std::string& err, const HdfsOp* op);
150 
153  bool ShouldAbort();
154 };
155 
156 }
157 
158 #endif // IMPALA_UTIL_HDFS_BULK_OPS_H
const std::string & dst() const
Definition: hdfs-bulk-ops.h:55
ThreadPool< HdfsOp > HdfsOpThreadPool
Definition: hdfs-bulk-ops.h:78
std::string src_
First operand.
Definition: hdfs-bulk-ops.h:66
HdfsOpType op() const
Definition: hdfs-bulk-ops.h:53
void Execute() const
bool abort_on_error_
True if a single error should cause any subsequent operations to become no-ops.
std::string dst_
Second string operand, ignored except for RENAME.
Definition: hdfs-bulk-ops.h:69
std::vector< HdfsOp > ops_
The set of operations to be submitted to HDFS.
const Errors & errors()
const std::string & src() const
Definition: hdfs-bulk-ops.h:54
HdfsOp()
Required for ThreadPool.
void Add(HdfsOpType op, const std::string &src)
boost::mutex errors_lock_
Protects errors_ and abort_on_error_ during Execute.
void AddError(const std::string &err, const HdfsOp *op)
Called by HdfsOp to record an error.
Errors errors_
All errors produced during Execute.
ObjectPool pool
HdfsOpType op_
The kind of operation to execute.
Definition: hdfs-bulk-ops.h:63
AtomicInt< int64_t > num_ops_
hdfsFS * hdfs_connection_
HDFS connection shared between all operations. Not owned by this class.
HdfsOperationSet(hdfsFS *hdfs_connection)
bool Execute(HdfsOpThreadPool *pool, bool abort_on_error)
short permissions_
Permission operand, ignored except for CHMOD.
Definition: hdfs-bulk-ops.h:72
std::vector< Error > Errors
Promise< bool > promise_
hdfsFS * hdfs_connection() const
HdfsOpThreadPool * CreateHdfsOpThreadPool(const std::string &name, uint32_t num_threads, uint32_t max_queue_length)
Creates a new HdfsOp-processing thread pool.
string name
Definition: cpu-info.cc:50
std::pair< const HdfsOp *, std::string > Error
HdfsOperationSet * op_set_
Containing operation set, used to record errors and to signal completion.
Definition: hdfs-bulk-ops.h:75