15 package com.cloudera.impala.analysis;
17 import java.io.FileNotFoundException;
18 import java.io.IOException;
19 import java.util.ArrayList;
20 import java.util.List;
23 import org.apache.hadoop.fs.FileSystem;
24 import org.apache.hadoop.fs.Path;
25 import org.apache.hadoop.fs.permission.FsAction;
27 import parquet.hadoop.ParquetFileReader;
28 import parquet.hadoop.metadata.ParquetMetadata;
29 import parquet.schema.OriginalType;
40 import com.cloudera.impala.thrift.THdfsFileFormat;
41 import com.google.common.base.Preconditions;
42 import com.google.common.collect.Lists;
55 HdfsUri schemaLocation, List<ColumnDef> partitionColumnDescs,
58 boolean ifNotExists, Map<String, String> tblProperties,
59 Map<String, String> serdeProperties) {
60 super(tableName,
new ArrayList<ColumnDef>(), partitionColumnDescs,
61 isExternal, comment, rowFormat,
62 fileFormat, location, cachingOp, ifNotExists, tblProperties, serdeProperties);
75 FileSystem fs = pathToFile.getFileSystem(FileSystemUtil.getConfiguration());
76 if (!fs.isFile(pathToFile)) {
80 }
catch (IOException e) {
82 }
catch (IllegalArgumentException e) {
85 ParquetMetadata readFooter = null;
87 readFooter = ParquetFileReader.readFooter(FileSystemUtil.getConfiguration(),
89 }
catch (FileNotFoundException e) {
91 }
catch (IOException e) {
93 }
catch (RuntimeException e) {
95 if (e.toString().contains(
"is not a Parquet file")) {
101 return readFooter.getFileMetaData().getSchema();
110 Preconditions.checkState(parquetType.isPrimitive());
112 switch (prim.getPrimitiveTypeName()) {
119 case FIXED_LEN_BYTE_ARRAY:
120 throw new AnalysisException(
121 "Unsupported parquet type FIXED_LEN_BYTE_ARRAY for field " +
122 parquetType.getName());
132 Preconditions.checkState(
false,
"Unexpected parquet primitive type: " +
133 prim.getPrimitiveTypeName());
147 OriginalType orig = parquetType.getOriginalType();
148 if (prim.getPrimitiveTypeName() == PrimitiveType.PrimitiveTypeName.BINARY &&
149 orig == OriginalType.UTF8) {
155 if (orig == OriginalType.DECIMAL) {
156 return ScalarType.createDecimalType(prim.getDecimalMetadata().getPrecision(),
157 prim.getDecimalMetadata().getScale());
160 throw new AnalysisException(
161 "Unsupported logical parquet type " + orig +
" (primitive type is " +
162 prim.getPrimitiveTypeName().name() +
") for field " +
163 parquetType.getName());
173 parquet.schema.MessageType parquetSchema =
loadParquetSchema(location.getPath());
174 List<parquet.schema.Type> fields = parquetSchema.getFields();
175 List<ColumnDef> schema =
new ArrayList<ColumnDef>();
177 for (parquet.schema.Type field: fields) {
180 if (field.getOriginalType() != null) {
182 }
else if (field.isPrimitive()) {
189 String colName = field.getName();
191 "inferred from: " + field.
toString()));
198 ArrayList<String> colsSql = Lists.newArrayList();
199 ArrayList<String> partitionColsSql = Lists.newArrayList();
201 schemaLocation_.toString());
202 String s = ToSqlUtils.getCreateTableSql(
getDb(),
203 getTbl() +
" __LIKE_FILEFORMAT__ ",
getComment(), colsSql, partitionColsSql,
208 schemaLocation_.toString());
214 schemaLocation_.analyze(analyzer,
Privilege.ALL, FsAction.READ_WRITE);
223 super.analyze(analyzer);
static final ScalarType BIGINT
final HdfsUri schemaLocation_
static parquet.schema.MessageType loadParquetSchema(Path pathToFile)
static final ScalarType STRING
List< ColumnDef > getColumnDefs()
static final ScalarType BOOLEAN
void analyze(Analyzer analyzer)
static final ScalarType FLOAT
Map< String, String > getTblProperties()
static final ScalarType DOUBLE
THdfsFileFormat getFileFormat()
static Type convertLogicalParquetType(parquet.schema.Type parquetType)
static final ScalarType INT
static List< ColumnDef > extractParquetSchema(HdfsUri location)
final THdfsFileFormat schemaFileFormat_
static Type convertPrimitiveParquetType(parquet.schema.Type parquetType)
CreateTableLikeFileStmt(TableName tableName, THdfsFileFormat schemaFileFormat, HdfsUri schemaLocation, List< ColumnDef > partitionColumnDescs, boolean isExternal, String comment, RowFormat rowFormat, THdfsFileFormat fileFormat, HdfsUri location, HdfsCachingOp cachingOp, boolean ifNotExists, Map< String, String > tblProperties, Map< String, String > serdeProperties)
Map< String, String > getSerdeProperties()
static final ScalarType TIMESTAMP