Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
FileSystemUtil.java
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 package com.cloudera.impala.common;
16 
17 import java.io.FileNotFoundException;
18 import java.io.IOException;
19 import java.io.InputStream;
20 import java.net.URI;
21 import java.util.UUID;
22 
23 import org.apache.commons.io.IOUtils;
24 import org.apache.hadoop.conf.Configuration;
25 import org.apache.hadoop.fs.FileStatus;
26 import org.apache.hadoop.fs.FileSystem;
27 import org.apache.hadoop.fs.FileUtil;
28 import org.apache.hadoop.fs.LocalFileSystem;
29 import org.apache.hadoop.fs.Path;
30 import org.apache.hadoop.fs.s3.S3FileSystem;
31 import org.apache.hadoop.fs.s3a.S3AFileSystem;
32 import org.apache.hadoop.fs.s3native.NativeS3FileSystem;
33 import org.apache.hadoop.hdfs.DistributedFileSystem;
34 import org.apache.hadoop.hdfs.client.HdfsAdmin;
35 import org.apache.hadoop.hdfs.protocol.EncryptionZone;
36 import org.apache.log4j.Logger;
37 
38 import com.google.common.base.Preconditions;
39 
43 public class FileSystemUtil {
44  private static final Configuration CONF = new Configuration();
45  private static final Logger LOG = Logger.getLogger(FileSystemUtil.class);
46 
51  public static int deleteAllVisibleFiles(Path directory)
52  throws IOException {
53  FileSystem fs = directory.getFileSystem(CONF);
54  Preconditions.checkState(fs.getFileStatus(directory).isDirectory());
55  int numFilesDeleted = 0;
56  for (FileStatus fStatus: fs.listStatus(directory)) {
57  // Only delete files that are not hidden.
58  if (fStatus.isFile() && !isHiddenFile(fStatus.getPath().getName())) {
59  LOG.debug("Removing: " + fStatus.getPath());
60  fs.delete(fStatus.getPath(), false);
61  ++numFilesDeleted;
62  }
63  }
64  return numFilesDeleted;
65  }
66 
70  public static int getTotalNumVisibleFiles(Path directory) throws IOException {
71  FileSystem fs = directory.getFileSystem(CONF);
72  Preconditions.checkState(fs.getFileStatus(directory).isDirectory());
73  int numFiles = 0;
74  for (FileStatus fStatus: fs.listStatus(directory)) {
75  // Only delete files that are not hidden.
76  if (fStatus.isFile() && !isHiddenFile(fStatus.getPath().getName())) {
77  ++numFiles;
78  }
79  }
80  return numFiles;
81  }
82 
86  private static boolean arePathsInSameEncryptionZone(FileSystem fs, Path p1,
87  Path p2) throws IOException {
88  HdfsAdmin hdfsAdmin = new HdfsAdmin(fs.getUri(), CONF);
89  EncryptionZone z1 = hdfsAdmin.getEncryptionZoneForPath(p1);
90  EncryptionZone z2 = hdfsAdmin.getEncryptionZoneForPath(p2);
91  if (z1 == null && z2 == null) return true;
92  if (z1 == null || z2 == null) return false;
93  return z1.equals(z2);
94  }
95 
105  public static int relocateAllVisibleFiles(Path sourceDir, Path destDir)
106  throws IOException {
107  FileSystem fs = destDir.getFileSystem(CONF);
108  Preconditions.checkState(fs.isDirectory(destDir));
109  Preconditions.checkState(fs.isDirectory(sourceDir));
110 
111  // Use the same UUID to resolve all file name conflicts. This helps mitigate problems
112  // that might happen if there is a conflict moving a set of files that have
113  // dependent file names. For example, foo.lzo and foo.lzo_index.
114  UUID uuid = UUID.randomUUID();
115 
116  // Enumerate all the files in the source
117  int numFilesMoved = 0;
118  for (FileStatus fStatus: fs.listStatus(sourceDir)) {
119  if (fStatus.isDirectory()) {
120  LOG.debug("Skipping copy of directory: " + fStatus.getPath());
121  continue;
122  } else if (isHiddenFile(fStatus.getPath().getName())) {
123  continue;
124  }
125 
126  Path destFile = new Path(destDir, fStatus.getPath().getName());
127  if (fs.exists(destFile)) {
128  destFile = new Path(destDir,
129  appendToBaseFileName(destFile.getName(), uuid.toString()));
130  }
131  FileSystemUtil.relocateFile(fStatus.getPath(), destFile, false);
132  ++numFilesMoved;
133  }
134  return numFilesMoved;
135  }
136 
147  public static void relocateFile(Path sourceFile, Path dest,
148  boolean renameIfAlreadyExists) throws IOException {
149  FileSystem fs = dest.getFileSystem(CONF);
150  // TODO: Handle moving between file systems
151  Preconditions.checkArgument(isPathOnFileSystem(sourceFile, fs));
152 
153  Path destFile = fs.isDirectory(dest) ? new Path(dest, sourceFile.getName()) : dest;
154  // If a file with the same name does not already exist in the destination location
155  // then use the same file name. Otherwise, generate a unique file name.
156  if (renameIfAlreadyExists && fs.exists(destFile)) {
157  Path destDir = fs.isDirectory(dest) ? dest : dest.getParent();
158  destFile = new Path(destDir,
159  appendToBaseFileName(destFile.getName(), UUID.randomUUID().toString()));
160  }
161 
162  if (arePathsInSameEncryptionZone(fs, sourceFile, destFile)) {
163  LOG.debug(String.format(
164  "Moving '%s' to '%s'", sourceFile.toString(), destFile.toString()));
165  // Move (rename) the file.
166  fs.rename(sourceFile, destFile);
167  } else {
168  // We must copy rather than move if the source and dest are in different encryption
169  // zones. A move would return an error from the NN because a move is a metadata-only
170  // operation and the files would not be encrypted/decrypted properly on the DNs.
171  LOG.info(String.format(
172  "Copying source '%s' to '%s' because HDFS encryption zones are different",
173  sourceFile, destFile));
174  FileUtil.copy(sourceFile.getFileSystem(CONF), sourceFile, fs, destFile,
175  true, true, CONF);
176  }
177  }
178 
182  public static String readFile(Path file) throws IOException {
183  FileSystem fs = file.getFileSystem(CONF);
184  InputStream fileStream = fs.open(file);
185  try {
186  return IOUtils.toString(fileStream);
187  } finally {
188  IOUtils.closeQuietly(fileStream);
189  }
190  }
191 
200  private static String appendToBaseFileName(String baseFileName, String appendStr) {
201  StringBuilder sb = new StringBuilder(baseFileName);
202  // Insert the string to append, preserving the file extension.
203  int extensionIdx = baseFileName.lastIndexOf('.');
204  if (extensionIdx != -1) {
205  sb.replace(extensionIdx, extensionIdx + 1, "_" + appendStr + ".");
206  } else {
207  sb.append("_" + appendStr);
208  }
209  return sb.toString();
210  }
211 
215  public static boolean containsSubdirectory(Path directory)
216  throws FileNotFoundException, IOException {
217  FileSystem fs = directory.getFileSystem(CONF);
218  // Enumerate all the files in the source
219  for (FileStatus fStatus: fs.listStatus(directory)) {
220  if (fStatus.isDirectory()) {
221  return true;
222  }
223  }
224  return false;
225  }
226 
230  public static Path makeTmpSubdirectory(Path directory) throws IOException {
231  FileSystem fs = directory.getFileSystem(CONF);
232  Path tmpDir = new Path(directory, ".tmp_" + UUID.randomUUID().toString());
233  fs.mkdirs(tmpDir);
234  return tmpDir;
235  }
236 
237  public static boolean isHiddenFile(String fileName) {
238  // Hidden files start with '.' or '_'. The '.copying' suffix is used by some
239  // filesystem utilities (e.g. hdfs put) as a temporary destination when copying
240  // files. The '.tmp' suffix is Flume's default for temporary files.
241  String lcFileName = fileName.toLowerCase();
242  return lcFileName.startsWith(".") || lcFileName.startsWith("_") ||
243  lcFileName.endsWith(".copying") || lcFileName.endsWith(".tmp");
244  }
245 
249  public static boolean hasGetFileBlockLocations(FileSystem fs) {
250  // Common case.
251  if (isDistributedFileSystem(fs)) return true;
252  // Blacklist FileSystems that are known to not implement getFileBlockLocations().
253  return !(fs instanceof S3AFileSystem || fs instanceof NativeS3FileSystem ||
254  fs instanceof S3FileSystem || fs instanceof LocalFileSystem);
255  }
256 
260  public static boolean isDistributedFileSystem(FileSystem fs) {
261  return fs instanceof DistributedFileSystem;
262  }
263 
267  public static boolean isDistributedFileSystem(Path path) throws IOException {
268  return isDistributedFileSystem(path.getFileSystem(CONF));
269  }
270 
271  public static DistributedFileSystem getDistributedFileSystem() throws IOException {
272  Path path = new Path(FileSystem.getDefaultUri(CONF));
273  FileSystem fs = path.getFileSystem(CONF);
274  Preconditions.checkState(fs instanceof DistributedFileSystem);
275  return (DistributedFileSystem) fs;
276  }
277 
281  public static Path createFullyQualifiedPath(Path location) {
282  URI defaultUri = FileSystem.getDefaultUri(CONF);
283  URI locationUri = location.toUri();
284  // Use the default URI only if location has no scheme or it has the same scheme as
285  // the default URI. Otherwise, Path.makeQualified() will incorrectly use the
286  // authority from the default URI even though the schemes don't match. See HDFS-7031.
287  if (locationUri.getScheme() == null ||
288  locationUri.getScheme().equalsIgnoreCase(defaultUri.getScheme())) {
289  return location.makeQualified(defaultUri, location);
290  }
291  // Already qualified (has scheme).
292  return location;
293  }
294 
298  public static Boolean isPathOnFileSystem(Path path, FileSystem fs) {
299  try {
300  // Call makeQualified() for the side-effect of FileSystem.checkPath() which will
301  // throw an exception if path is not on fs.
302  fs.makeQualified(path);
303  return true;
304  } catch (IllegalArgumentException e) {
305  // Path is not on fs.
306  return false;
307  }
308  }
309 
314  public static Boolean isPathReachable(Path path, FileSystem fs, StringBuilder error_msg) {
315  try {
316  if (fs.exists(path)) {
317  return true;
318  } else {
319  error_msg.append("Path does not exist.");
320  }
321  } catch (Exception e) {
322  error_msg.append(e.getMessage());
323  }
324  return false;
325  }
326 
330  public static Configuration getConfiguration() {
331  return CONF;
332  }
333 }
static boolean arePathsInSameEncryptionZone(FileSystem fs, Path p1, Path p2)
static boolean containsSubdirectory(Path directory)
string path("/usr/lib/sasl2:/usr/lib64/sasl2:/usr/local/lib/sasl2:/usr/lib/x86_64-linux-gnu/sasl2")
static boolean hasGetFileBlockLocations(FileSystem fs)
static Path createFullyQualifiedPath(Path location)
static String appendToBaseFileName(String baseFileName, String appendStr)
static int deleteAllVisibleFiles(Path directory)
static boolean isDistributedFileSystem(Path path)
static DistributedFileSystem getDistributedFileSystem()
static boolean isDistributedFileSystem(FileSystem fs)
static int relocateAllVisibleFiles(Path sourceDir, Path destDir)
static boolean isHiddenFile(String fileName)
static Boolean isPathReachable(Path path, FileSystem fs, StringBuilder error_msg)
static Path makeTmpSubdirectory(Path directory)
static void relocateFile(Path sourceFile, Path dest, boolean renameIfAlreadyExists)
static int getTotalNumVisibleFiles(Path directory)
static Boolean isPathOnFileSystem(Path path, FileSystem fs)