Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
HiveStorageDescriptorFactory.java
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 package com.cloudera.impala.catalog;
16 
17 import java.util.HashMap;
18 
19 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
20 
21 import com.cloudera.impala.thrift.THdfsFileFormat;
22 import com.google.common.base.Preconditions;
23 
31  public static StorageDescriptor createSd(THdfsFileFormat fileFormat, RowFormat rowFormat) {
32  Preconditions.checkNotNull(fileFormat);
33  Preconditions.checkNotNull(rowFormat);
34 
35  StorageDescriptor sd = null;
36  switch(fileFormat) {
37  case PARQUET: sd = createParquetFileSd(); break;
38  case RC_FILE: sd = createRcFileSd(); break;
39  case SEQUENCE_FILE: sd = createSequenceFileSd(); break;
40  case TEXT: sd = createTextSd(); break;
41  case AVRO: sd = createAvroSd(); break;
42  default: throw new UnsupportedOperationException(
43  "Unsupported file format: " + fileFormat);
44  }
45 
46  if (rowFormat.getFieldDelimiter() != null) {
47  sd.getSerdeInfo().putToParameters(
48  "serialization.format", rowFormat.getFieldDelimiter());
49  sd.getSerdeInfo().putToParameters("field.delim", rowFormat.getFieldDelimiter());
50  }
51  if (rowFormat.getEscapeChar() != null) {
52  sd.getSerdeInfo().putToParameters("escape.delim", rowFormat.getEscapeChar());
53  }
54  if (rowFormat.getLineDelimiter() != null) {
55  sd.getSerdeInfo().putToParameters("line.delim", rowFormat.getLineDelimiter());
56  }
57  return sd;
58  }
59 
60  private static StorageDescriptor createParquetFileSd() {
61  StorageDescriptor sd = createGenericSd();
62  sd.setInputFormat("parquet.hive.DeprecatedParquetInputFormat");
63  sd.setOutputFormat("parquet.hive.DeprecatedParquetOutputFormat");
64  // TODO: Should we use "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"?
65  sd.getSerdeInfo().setSerializationLib("parquet.hive.serde.ParquetHiveSerDe");
66  return sd;
67  }
68 
69  private static StorageDescriptor createTextSd() {
70  StorageDescriptor sd = createGenericSd();
71  sd.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class.getName());
72  sd.setOutputFormat(
73  org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat.class.getName());
74  sd.getSerdeInfo().setSerializationLib(
75  org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName());
76  return sd;
77  }
78 
79  private static StorageDescriptor createSequenceFileSd() {
80  StorageDescriptor sd = createGenericSd();
81  sd.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class.getName());
82  sd.setOutputFormat(
83  org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat.class.getName());
84  sd.getSerdeInfo().setSerializationLib(
85  org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName());
86  return sd;
87  }
88 
89  private static StorageDescriptor createRcFileSd() {
90  StorageDescriptor sd = createGenericSd();
91  sd.setInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat.class.getName());
92  sd.setOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat.class.getName());
93  sd.getSerdeInfo().setSerializationLib(
94  org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe.class.getName());
95  return sd;
96  }
97 
98  private static StorageDescriptor createAvroSd() {
99  StorageDescriptor sd = createGenericSd();
100  sd.setInputFormat(
101  org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat.class.getName());
102  sd.setOutputFormat(
103  org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat.class.getName());
104  sd.getSerdeInfo().setSerializationLib(
105  org.apache.hadoop.hive.serde2.avro.AvroSerDe.class.getName());
106  // Writing compressed Avro tables is done using a session level configuration
107  // setting, it is not specified as part of the table metadata. The compression
108  // property of the StorageDescriptor has a different purpose.
109  return sd;
110  }
111 
115  private static StorageDescriptor createGenericSd() {
116  StorageDescriptor sd = new StorageDescriptor();
117  sd.setSerdeInfo(new org.apache.hadoop.hive.metastore.api.SerDeInfo());
118  sd.getSerdeInfo().setParameters(new HashMap<String, String>());
119  // The compressed flag is not used to determine whether the table is compressed or
120  // not. Instead, we use the input format or the filename.
121  sd.setCompressed(false);
122  return sd;
123  }
124 }
static StorageDescriptor createSd(THdfsFileFormat fileFormat, RowFormat rowFormat)