15 package com.cloudera.impala.catalog;
17 import static org.junit.Assert.assertEquals;
18 import static org.junit.Assert.assertNotNull;
19 import static org.junit.Assert.fail;
21 import java.util.HashMap;
22 import java.util.List;
24 import org.apache.hadoop.hive.metastore.api.SerDeInfo;
25 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
26 import org.apache.hadoop.hive.serde.serdeConstants;
30 import com.cloudera.impala.thrift.THdfsFileFormat;
31 import com.google.common.collect.ImmutableList;
36 final List<String> DELIMITER_KEYS =
37 ImmutableList.of(serdeConstants.LINE_DELIM, serdeConstants.FIELD_DELIM,
38 serdeConstants.COLLECTION_DELIM, serdeConstants.MAPKEY_DELIM,
39 serdeConstants.ESCAPE_CHAR, serdeConstants.QUOTE_CHAR);
51 String[] parquetSerDe =
new String[] {
52 "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe",
53 "parquet.hive.serde.ParquetHiveSerDe"};
54 String [] inputFormats =
new String [] {
55 "com.cloudera.impala.hive.serde.ParquetInputFormat",
56 "parquet.hive.DeprecatedParquetInputFormat",
57 "parquet.hive.MapredParquetInputFormat",
58 "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"};
59 String [] outputFormats =
new String [] {
60 "com.cloudera.impala.hive.serde.ParquetOutputFormat",
61 "parquet.hive.DeprecatedParquetOutputFormat",
62 "parquet.hive.MapredParquetOutputFormat",
63 "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"};
65 for (String serDe: parquetSerDe) {
66 SerDeInfo serDeInfo =
new SerDeInfo();
67 serDeInfo.setSerializationLib(serDe);
68 serDeInfo.setParameters(
new HashMap<String, String>());
69 for (String inputFormat: inputFormats) {
70 for (String outputFormat: outputFormats) {
71 StorageDescriptor sd =
new StorageDescriptor();
72 sd.setSerdeInfo(serDeInfo);
73 sd.setInputFormat(inputFormat);
74 sd.setOutputFormat(outputFormat);
87 StorageDescriptor sd = HiveStorageDescriptorFactory.createSd(THdfsFileFormat.TEXT,
89 sd.setParameters(
new HashMap<String, String>());
90 sd.getSerdeInfo().setParameters(
new HashMap<String,String>());
91 sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM,
"-2");
94 sd.getSerdeInfo().setParameters(
new HashMap<String,String>());
95 sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM,
"-128");
98 sd.getSerdeInfo().setParameters(
new HashMap<String,String>());
99 sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM,
"127");
102 sd.getSerdeInfo().setParameters(
new HashMap<String,String>());
103 sd.getSerdeInfo().putToParameters(serdeConstants.LINE_DELIM,
"\001");
106 sd.getSerdeInfo().setParameters(
new HashMap<String,String>());
107 sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM,
"|");
110 sd.getSerdeInfo().setParameters(
new HashMap<String,String>());
111 sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM,
"\t");
114 sd.getSerdeInfo().setParameters(
new HashMap<String,String>());
115 sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM,
"ab");
117 HdfsStorageDescriptor.fromStorageDescriptor(
"fake", sd);
120 assertEquals(
"Invalid delimiter: 'ab'. Delimiter must be specified as a " +
121 "single character or as a decimal value in the range [-128:127]",
125 sd.getSerdeInfo().setParameters(
new HashMap<String,String>());
126 sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM,
"128");
128 HdfsStorageDescriptor.fromStorageDescriptor(
"fake", sd);
131 assertEquals(
"Invalid delimiter: '128'. Delimiter must be specified as a " +
132 "single character or as a decimal value in the range [-128:127]",
136 sd.getSerdeInfo().setParameters(
new HashMap<String,String>());
137 sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM,
"\128");
139 HdfsStorageDescriptor.fromStorageDescriptor(
"fake", sd);
142 assertEquals(
"Invalid delimiter: '\128'. Delimiter must be specified as a " +
143 "single character or as a decimal value in the range [-128:127]",
147 sd.getSerdeInfo().setParameters(
new HashMap<String,String>());
148 sd.getSerdeInfo().putToParameters(serdeConstants.LINE_DELIM,
"-129");
150 HdfsStorageDescriptor.fromStorageDescriptor(
"fake", sd);
153 assertEquals(
"Invalid delimiter: '-129'. Delimiter must be specified as a " +
154 "single character or as a decimal value in the range [-128:127]",
static final List< String > DELIMITER_KEYS
void delimitersInCorrectOrder()
uint64_t Test(T *ht, const ProbeTuple *input, uint64_t num_tuples)
static HdfsStorageDescriptor fromStorageDescriptor(String tblName, StorageDescriptor sd)
void testParquetFileFormat()