Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
BlockIdGenerator.java
Go to the documentation of this file.
1 // Copyright (c) 2012 Cloudera, Inc. All rights reserved.
2 package com.cloudera.impala.testutil;
3 
4 import java.io.File;
5 import java.io.FileWriter;
6 import java.util.List;
7 
8 import org.apache.hadoop.fs.Path;
9 import org.apache.hadoop.hdfs.DistributedFileSystem;
10 import org.apache.hadoop.hdfs.HdfsConfiguration;
11 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
12 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
13 
20 import com.cloudera.impala.thrift.ImpalaInternalServiceConstants;
21 
28 public class BlockIdGenerator {
29 
30  @SuppressWarnings("deprecation")
31  public static void main(String[] args)
32  throws Exception {
33 
34  if (args.length != 1) {
35  throw new Exception("Invalid args: BlockIdGenerator <output_file>");
36  }
37 
38  HdfsConfiguration hdfsConfig = new HdfsConfiguration();
39  File output = new File(args[0]);
40  FileWriter writer = null;
41 
42  try {
43  writer = new FileWriter(output);
44 
45  // Load all tables in the catalog
46  Catalog catalog = CatalogServiceTestCatalog.create();
47  for (String dbName: catalog.getDbNames(null)) {
48  Db database = catalog.getDb(dbName);
49  for (String tableName: database.getAllTableNames()) {
50  Table table = database.getTable(tableName);
51  // Only do this for hdfs tables
52  if (table == null || !(table instanceof HdfsTable)) {
53  continue;
54  }
55  HdfsTable hdfsTable = (HdfsTable)table;
56 
57  // Write the output as <tablename>: <blockid1> <blockid2> <etc>
58  writer.write(tableName + ":");
59  for (HdfsPartition partition: hdfsTable.getPartitions()) {
60  // Ignore the default partition.
61  if (partition.getId() ==
62  ImpalaInternalServiceConstants.DEFAULT_PARTITION_ID) {
63  continue;
64  }
65  List<FileDescriptor> fileDescriptors = partition.getFileDescriptors();
66  for (FileDescriptor fd : fileDescriptors) {
67  Path p = new Path(partition.getLocation(), fd.getFileName());
68 
69  // Use a deprecated API to get block ids
70  DistributedFileSystem dfs =
71  (DistributedFileSystem)p.getFileSystem(hdfsConfig);
72  LocatedBlocks locations = dfs.getClient().getNamenode().getBlockLocations(
73  p.toUri().getPath(), 0, fd.getFileLength());
74 
75  for (LocatedBlock lb : locations.getLocatedBlocks()) {
76  long id = lb.getBlock().getBlockId();
77  writer.write(" " + id);
78  }
79  }
80  }
81  writer.write("\n");
82  }
83  }
84  } finally {
85  if (writer != null) writer.close();
86  }
87  }
88 }
List< String > getAllTableNames()
Definition: Db.java:96
List< HdfsPartition > getPartitions()
Definition: HdfsTable.java:429
List< String > getDbNames(String dbPattern)
Definition: Catalog.java:136