15 package com.cloudera.impala.catalog;
19 import com.cloudera.impala.thrift.THdfsFileFormat;
20 import com.google.common.base.Preconditions;
21 import com.google.common.collect.ImmutableMap;
34 private static final String RCFILE_INPUT_FORMAT =
35 "org.apache.hadoop.hive.ql.io.RCFileInputFormat";
38 private static final String TEXT_INPUT_FORMAT =
39 "org.apache.hadoop.mapred.TextInputFormat";
42 public static final String LZO_TEXT_INPUT_FORMAT =
43 "com.hadoop.mapred.DeprecatedLzoTextInputFormat";
46 public static final String LZO_TEXT_OUTPUT_FORMAT =
47 "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat";
50 private static final String SEQUENCE_INPUT_FORMAT =
51 "org.apache.hadoop.mapred.SequenceFileInputFormat";
57 private static final String[] PARQUET_INPUT_FORMATS = {
58 "com.cloudera.impala.hive.serde.ParquetInputFormat",
59 "parquet.hive.DeprecatedParquetInputFormat",
60 "parquet.hive.MapredParquetInputFormat",
61 "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat",
65 private static final String AVRO_INPUT_FORMAT =
66 "org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat";
68 private static final Map<String, HdfsFileFormat> VALID_FORMATS =
70 .put(RCFILE_INPUT_FORMAT, RC_FILE)
71 .put(TEXT_INPUT_FORMAT, TEXT)
72 .put(LZO_TEXT_INPUT_FORMAT, TEXT)
73 .put(SEQUENCE_INPUT_FORMAT, SEQUENCE_FILE)
74 .put(AVRO_INPUT_FORMAT, AVRO)
75 .put(PARQUET_INPUT_FORMATS[0], PARQUET)
76 .put(PARQUET_INPUT_FORMATS[1], PARQUET)
77 .put(PARQUET_INPUT_FORMATS[2], PARQUET)
78 .put(PARQUET_INPUT_FORMATS[3], PARQUET)
84 return VALID_FORMATS.containsKey(formatClass);
92 Preconditions.checkNotNull(inputFormatClass);
93 return VALID_FORMATS.get(inputFormatClass);
101 Preconditions.checkNotNull(className);
102 if (isHdfsFormatClass(className)) {
103 return VALID_FORMATS.get(className);
105 throw new IllegalArgumentException(className);
109 for (Map.Entry<String,
HdfsFileFormat> e: VALID_FORMATS.entrySet()) {
110 if (e.getValue().equals(
this))
return e.getKey();
113 throw new IllegalArgumentException(this.toString());
117 switch (thriftFormat) {
124 throw new RuntimeException(
"Unknown THdfsFileFormat: "
125 + thriftFormat +
" - should never happen!");
131 case RC_FILE:
return THdfsFileFormat.RC_FILE;
132 case TEXT:
return THdfsFileFormat.TEXT;
133 case SEQUENCE_FILE:
return THdfsFileFormat.SEQUENCE_FILE;
134 case AVRO:
return THdfsFileFormat.AVRO;
135 case PARQUET:
return THdfsFileFormat.PARQUET;
137 throw new RuntimeException(
"Unknown HdfsFormat: "
138 +
this +
" - should never happen!");
144 case RC_FILE:
return "RCFILE";
151 return String.format(
"INPUTFORMAT '%s' OUTPUTFORMAT '%s'",
152 LZO_TEXT_INPUT_FORMAT,
153 LZO_TEXT_OUTPUT_FORMAT);
156 case SEQUENCE_FILE:
return "SEQUENCEFILE";
157 case AVRO:
return "AVRO";
158 case PARQUET:
return "PARQUET";
160 throw new RuntimeException(
"Unknown HdfsFormat: "
161 +
this +
" - should never happen!");
176 StringBuilder errorMsg) {
179 HdfsCompression compressionType = HdfsCompression.fromFileName(fileName);
180 switch (compressionType) {
192 errorMsg.append(
"Expected compressed text file with {.lzo,.gzip,.snappy,.bz2} "
193 +
"suffix: " + fileName);
199 errorMsg.append(
"Unknown compression suffix: " + fileName);
218 throw new RuntimeException(
"Unknown HdfsFormat: "
219 +
this +
" - should never happen!");