Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
tmp-file-mgr.cc
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <boost/algorithm/string.hpp>
16 #include <boost/foreach.hpp>
17 #include <boost/lexical_cast.hpp>
18 #include <boost/uuid/uuid_io.hpp>
19 #include <boost/uuid/random_generator.hpp>
20 #include <boost/filesystem.hpp>
21 #include <gutil/strings/substitute.h>
22 #include <gutil/strings/join.h>
23 
24 #include "runtime/tmp-file-mgr.h"
25 #include "util/debug-util.h"
26 #include "util/disk-info.h"
27 #include "util/filesystem-util.h"
28 
29 DEFINE_string(scratch_dirs, "/tmp", "Writable scratch directories");
30 
31 #include "common/names.h"
32 
33 using boost::algorithm::is_any_of;
34 using boost::algorithm::split;
35 using boost::algorithm::token_compress_on;
37 using boost::uuids::random_generator;
38 using namespace strings;
39 
40 namespace impala {
41 
42 const string TMP_SUB_DIR_NAME = "impala-scratch";
44 bool TmpFileMgr::initialized_;
45 vector<string> TmpFileMgr::tmp_dirs_;
46 
47 Status TmpFileMgr::Init() {
48  DCHECK(!initialized_);
49  string tmp_dirs_spec = FLAGS_scratch_dirs;
50  vector<string> all_tmp_dirs;
51  split(all_tmp_dirs, tmp_dirs_spec, is_any_of(","), token_compress_on);
52  vector<bool> is_tmp_dir_on_disk(DiskInfo::num_disks(), false);
53 
54  // For each tmp directory, find the disk it is on,
55  // so additional tmp directories on the same disk can be skipped.
56  for (int i = 0; i < all_tmp_dirs.size(); ++i) {
57  path tmp_path(trim_right_copy_if(all_tmp_dirs[i], is_any_of("/")));
58  // tmp_path must be a writable directory.
59  RETURN_IF_ERROR(FileSystemUtil::VerifyIsDirectory(tmp_path.string()));
60  // Find the disk id of tmp_path. Add the scratch directory if there isn't another
61  // directory on the same disk (or if we don't know which disk it is on).
62  int disk_id = DiskInfo::disk_id(tmp_path.c_str());
63  if (disk_id < 0 || !is_tmp_dir_on_disk[disk_id]) {
64  uint64_t available_space;
65  RETURN_IF_ERROR(FileSystemUtil::GetSpaceAvailable(tmp_path.string(),
66  &available_space));
67  if (available_space < AVAILABLE_SPACE_THRESHOLD_MB * 1024 * 1024) {
68  LOG(WARNING) << "Filesystem containing scratch directory " << tmp_path
69  << " has less than " << AVAILABLE_SPACE_THRESHOLD_MB
70  << "MB available.";
71  }
72  if (disk_id >= 0) is_tmp_dir_on_disk[disk_id] = true;
73  path create_dir_path(tmp_path / TMP_SUB_DIR_NAME);
74  tmp_dirs_.push_back(create_dir_path.string());
75  }
76  }
77  initialized_ = true;
78  Status status = FileSystemUtil::CreateDirectories(tmp_dirs_);
79  if (status.ok()) {
80  LOG (INFO) << "Created the following scratch dirs:" << JoinStrings(tmp_dirs_, " ");
81  } else {
82  // Attempt to remove the directories created. Ignore any errors.
83  FileSystemUtil::RemovePaths(tmp_dirs_);
84  }
85  return status;
86 }
87 
88 Status TmpFileMgr::GetFile(int tmp_device_id, const TUniqueId& query_id,
89  File** new_file) {
90  DCHECK(initialized_);
91  DCHECK_LT(tmp_device_id, tmp_dirs_.size());
92 
93  // Generate the full file path.
94  string unique_name = lexical_cast<string>(random_generator()());
95  stringstream file_name;
96  file_name << PrintId(query_id) << "_" << unique_name;
97  path new_file_path(tmp_dirs_[tmp_device_id]);
98  new_file_path /= file_name.str();
99 
100  *new_file = new File(new_file_path.string());
101  return Status::OK;
102 }
103 
104 TmpFileMgr::File::File(const string& path)
105  : path_(path),
106  current_offset_(0),
107  current_size_(0) {
108 }
109 
110 Status TmpFileMgr::File::AllocateSpace(int64_t write_size, int64_t* offset) {
111  DCHECK_GT(write_size, 0);
112  DCHECK_GE(current_size_, current_offset_);
113  *offset = current_offset_;
114 
115  if (current_size_ == 0) {
116  // First call to AllocateSpace. Create the file.
118  disk_id_ = DiskInfo::disk_id(path_.c_str());
119  }
120 
121  current_offset_ += write_size;
122  if (current_offset_ > current_size_) {
123  int64_t trunc_len = current_offset_ + write_size;
124  RETURN_IF_ERROR(FileSystemUtil::ResizeFile(path_, trunc_len));
125  current_size_ = trunc_len;
126  }
127 
128  DCHECK_GE(current_size_, current_offset_);
129  return Status::OK;
130 }
131 
133  if (current_size_ > 0) FileSystemUtil::RemovePaths(vector<string>(1, path_));
134  return Status::OK;
135 }
136 
137 } //namespace impala
static Status CreateFile(const std::string &file_path)
Create a file at the specified path.
string path("/usr/lib/sasl2:/usr/lib64/sasl2:/usr/local/lib/sasl2:/usr/lib/x86_64-linux-gnu/sasl2")
const TUniqueId & query_id() const
Definition: coordinator.h:152
Status AllocateSpace(int64_t write_size, int64_t *offset)
#define RETURN_IF_ERROR(stmt)
some generally useful macros
Definition: status.h:242
string PrintId(const TUniqueId &id, const string &separator)
Definition: debug-util.cc:97
static Status RemovePaths(const std::vector< std::string > &directories)
Remove the specified paths and their enclosing files/directories.
DEFINE_string(scratch_dirs,"/tmp","Writable scratch directories")
const uint64_t AVAILABLE_SPACE_THRESHOLD_MB
Definition: tmp-file-mgr.cc:43
static const Status OK
Definition: status.h:87
uint8_t offset[7 *64-sizeof(uint64_t)]
static Status ResizeFile(const std::string &file_path, int64_t trunc_len)
Resize a file to a specified length - uses unistd truncate().
bool ok() const
Definition: status.h:172
const string TMP_SUB_DIR_NAME
Definition: tmp-file-mgr.cc:42
static int disk_id(const char *path)
Definition: disk-info.cc:119