Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
AvroSchemaParser.java
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 package com.cloudera.impala.util;
16 
17 import static org.apache.avro.Schema.Type.BOOLEAN;
18 import static org.apache.avro.Schema.Type.DOUBLE;
19 import static org.apache.avro.Schema.Type.FLOAT;
20 import static org.apache.avro.Schema.Type.INT;
21 import static org.apache.avro.Schema.Type.LONG;
22 import static org.apache.avro.Schema.Type.STRING;
23 
24 import java.util.Collections;
25 import java.util.Hashtable;
26 import java.util.List;
27 import java.util.Map;
28 
29 import org.apache.avro.Schema;
30 import org.apache.avro.SchemaParseException;
31 import org.codehaus.jackson.JsonNode;
32 
36 import com.google.common.base.Preconditions;
37 import com.google.common.collect.Lists;
38 
44 public class AvroSchemaParser {
45  // Map of Avro to Impala primitive types.
46  private static final Map<Schema.Type, Type> avroToImpalaPrimitiveTypeMap_;
47  static {
48  Map<Schema.Type, Type> typeMap = new Hashtable<Schema.Type, Type>();
49  typeMap.put(STRING, Type.STRING);
50  typeMap.put(INT, Type.INT);
51  typeMap.put(BOOLEAN, Type.BOOLEAN);
52  typeMap.put(LONG, Type.BIGINT);
53  typeMap.put(FLOAT, Type.FLOAT);
54  typeMap.put(DOUBLE, Type.DOUBLE);
55  avroToImpalaPrimitiveTypeMap_ = Collections.unmodifiableMap(typeMap);
56  }
57 
65  public static List<Column> parse(String schemaStr) throws SchemaParseException {
66  Schema.Parser avroSchemaParser = new Schema.Parser();
67  Schema schema = avroSchemaParser.parse(schemaStr);
68  if (!schema.getType().equals(Schema.Type.RECORD)) {
69  throw new UnsupportedOperationException("Schema for table must be of type " +
70  "RECORD. Received type: " + schema.getType());
71  }
72  List<Column> cols = Lists.newArrayList();
73  for (int i = 0; i < schema.getFields().size(); ++i) {
74  Schema.Field field = schema.getFields().get(i);
75  cols.add(new Column(field.name(), getTypeInfo(field.schema(), field.name()), i));
76  }
77  return cols;
78  }
79 
84  private static Type getTypeInfo(Schema schema, String colName) {
85  // Avro requires NULLable types to be defined as unions of some type T
86  // and NULL. This is annoying and we're going to hide it from the user.
87  if (isNullableType(schema)) {
88  return getTypeInfo(getColumnType(schema), colName);
89  }
90 
91  Schema.Type type = schema.getType();
92  if (avroToImpalaPrimitiveTypeMap_.containsKey(type)) {
93  return avroToImpalaPrimitiveTypeMap_.get(type);
94  }
95 
96  switch(type) {
97  case BYTES:
98  // Decimal is stored in Avro as a BYTE.
99  Type decimalType = getDecimalType(schema);
100  if (decimalType != null) return decimalType;
101  case RECORD:
102  case MAP:
103  case ARRAY:
104  case UNION:
105  case ENUM:
106  case FIXED:
107  case NULL:
108  default: {
109  throw new UnsupportedOperationException(String.format(
110  "Unsupported type '%s' of column '%s'", type.getName(), colName));
111  }
112  }
113  }
114 
118  private static boolean isNullableType(Schema schema) {
119  // [null, null] not allowed, so this check is ok.
120  return schema.getType().equals(Schema.Type.UNION) && schema.getTypes().size() == 2 &&
121  (schema.getTypes().get(0).getType().equals(Schema.Type.NULL) ||
122  schema.getTypes().get(1).getType().equals(Schema.Type.NULL));
123  }
124 
129  private static Schema getColumnType(Schema schema) {
130  List<Schema> types = schema.getTypes();
131  return types.get(0).getType().equals(Schema.Type.NULL) ? types.get(1) : types.get(0);
132  }
133 
143  private static Type getDecimalType(Schema schema) {
144  Preconditions.checkState(schema.getType() == Schema.Type.BYTES);
145  String logicalType = schema.getProp("logicalType");
146  if (logicalType != null && logicalType.equalsIgnoreCase("decimal")) {
147  // Parse the scale/precision of the decimal type.
148  Integer scale = getDecimalProp(schema, "scale");
149  // The Avro spec states that scale should default to zero if not set.
150  if (scale == null) scale = 0;
151 
152  // Precision is a required property according to the Avro spec.
153  Integer precision = getDecimalProp(schema, "precision");
154  if (precision == null) {
155  throw new SchemaParseException(
156  "No 'precision' property specified for 'decimal' logicalType");
157  }
158  return ScalarType.createDecimalType(precision, scale);
159  }
160  return null;
161  }
162 
169  private static Integer getDecimalProp(Schema schema, String propName)
170  throws SchemaParseException {
171  JsonNode node = schema.getJsonProp(propName);
172  if (node == null) return null;
173  int propValue = node.getValueAsInt(-1);
174  if (propValue < 0) {
175  throw new SchemaParseException(String.format("Invalid decimal '%s' " +
176  "property value: %s", propName, node.getValueAsText()));
177  }
178  return propValue;
179  }
180 }
static final ScalarType BIGINT
Definition: Type.java:50
static final ScalarType STRING
Definition: Type.java:53
static final Map< Schema.Type, Type > avroToImpalaPrimitiveTypeMap_
static final ScalarType BOOLEAN
Definition: Type.java:46
static Schema getColumnType(Schema schema)
static final ScalarType FLOAT
Definition: Type.java:51
static final ScalarType DOUBLE
Definition: Type.java:52
static final ScalarType INT
Definition: Type.java:49
static Type getTypeInfo(Schema schema, String colName)
static Integer getDecimalProp(Schema schema, String propName)
static boolean isNullableType(Schema schema)
static List< Column > parse(String schemaStr)