15 package com.cloudera.impala.catalog;
 
   19 import com.cloudera.impala.thrift.THdfsFileFormat;
 
   20 import com.google.common.base.Preconditions;
 
   21 import com.google.common.collect.ImmutableMap;
 
   34   private static final String RCFILE_INPUT_FORMAT =
 
   35       "org.apache.hadoop.hive.ql.io.RCFileInputFormat";
 
   38   private static final String TEXT_INPUT_FORMAT =
 
   39       "org.apache.hadoop.mapred.TextInputFormat";
 
   42   public static final String LZO_TEXT_INPUT_FORMAT =
 
   43       "com.hadoop.mapred.DeprecatedLzoTextInputFormat";
 
   46   public static final String LZO_TEXT_OUTPUT_FORMAT =
 
   47       "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat";
 
   50   private static final String SEQUENCE_INPUT_FORMAT =
 
   51       "org.apache.hadoop.mapred.SequenceFileInputFormat";
 
   57   private static final String[] PARQUET_INPUT_FORMATS = {
 
   58       "com.cloudera.impala.hive.serde.ParquetInputFormat",
 
   59       "parquet.hive.DeprecatedParquetInputFormat",
 
   60       "parquet.hive.MapredParquetInputFormat",
 
   61       "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat",
 
   65   private static final String AVRO_INPUT_FORMAT =
 
   66       "org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat";
 
   68   private static final Map<String, HdfsFileFormat> VALID_FORMATS =
 
   70           .put(RCFILE_INPUT_FORMAT, RC_FILE)
 
   71           .put(TEXT_INPUT_FORMAT, TEXT)
 
   72           .put(LZO_TEXT_INPUT_FORMAT, TEXT)
 
   73           .put(SEQUENCE_INPUT_FORMAT, SEQUENCE_FILE)
 
   74           .put(AVRO_INPUT_FORMAT, AVRO)
 
   75           .put(PARQUET_INPUT_FORMATS[0], PARQUET)
 
   76           .put(PARQUET_INPUT_FORMATS[1], PARQUET)
 
   77           .put(PARQUET_INPUT_FORMATS[2], PARQUET)
 
   78           .put(PARQUET_INPUT_FORMATS[3], PARQUET)
 
   84     return VALID_FORMATS.containsKey(formatClass);
 
   92     Preconditions.checkNotNull(inputFormatClass);
 
   93     return VALID_FORMATS.get(inputFormatClass);
 
  101     Preconditions.checkNotNull(className);
 
  102     if (isHdfsFormatClass(className)) {
 
  103       return VALID_FORMATS.get(className);
 
  105     throw new IllegalArgumentException(className);
 
  109     for (Map.Entry<String, 
HdfsFileFormat> e: VALID_FORMATS.entrySet()) {
 
  110       if (e.getValue().equals(
this)) 
return e.getKey();
 
  113     throw new IllegalArgumentException(this.toString());
 
  117     switch (thriftFormat) {
 
  124         throw new RuntimeException(
"Unknown THdfsFileFormat: " 
  125             + thriftFormat + 
" - should never happen!");
 
  131       case RC_FILE: 
return THdfsFileFormat.RC_FILE;
 
  132       case TEXT: 
return THdfsFileFormat.TEXT;
 
  133       case SEQUENCE_FILE: 
return THdfsFileFormat.SEQUENCE_FILE;
 
  134       case AVRO: 
return THdfsFileFormat.AVRO;
 
  135       case PARQUET: 
return THdfsFileFormat.PARQUET;
 
  137         throw new RuntimeException(
"Unknown HdfsFormat: " 
  138             + 
this + 
" - should never happen!");
 
  144       case RC_FILE: 
return "RCFILE";
 
  151           return String.format(
"INPUTFORMAT '%s' OUTPUTFORMAT '%s'",
 
  152               LZO_TEXT_INPUT_FORMAT,
 
  153               LZO_TEXT_OUTPUT_FORMAT);
 
  156       case SEQUENCE_FILE: 
return "SEQUENCEFILE";
 
  157       case AVRO: 
return "AVRO";
 
  158       case PARQUET: 
return "PARQUET";
 
  160         throw new RuntimeException(
"Unknown HdfsFormat: " 
  161             + 
this + 
" - should never happen!");
 
  176       StringBuilder errorMsg) {
 
  179     HdfsCompression compressionType = HdfsCompression.fromFileName(fileName);
 
  180     switch (compressionType) {
 
  192           errorMsg.append(
"Expected compressed text file with {.lzo,.gzip,.snappy,.bz2} " 
  193               + 
"suffix: " + fileName);
 
  199         errorMsg.append(
"Unknown compression suffix: " + fileName);
 
  218         throw new RuntimeException(
"Unknown HdfsFormat: " 
  219             + 
this + 
" - should never happen!");