Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
CreateTableStmt.java
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 package com.cloudera.impala.analysis;
16 
17 import java.util.ArrayList;
18 import java.util.List;
19 import java.util.Map;
20 import java.util.Set;
21 
22 import org.apache.hadoop.fs.permission.FsAction;
23 
31 import com.cloudera.impala.thrift.TAccessEvent;
32 import com.cloudera.impala.thrift.TCatalogObjectType;
33 import com.cloudera.impala.thrift.TCreateTableParams;
34 import com.cloudera.impala.thrift.THdfsFileFormat;
35 import com.cloudera.impala.thrift.TTableName;
37 import com.google.common.base.Preconditions;
38 import com.google.common.base.Strings;
39 import com.google.common.collect.Lists;
40 import com.google.common.collect.Sets;
41 
45 public class CreateTableStmt extends StatementBase {
46  private final ArrayList<ColumnDef> columnDefs_;
47  private final String comment_;
48  private final boolean isExternal_;
49  private final boolean ifNotExists_;
50  private final THdfsFileFormat fileFormat_;
51  private final ArrayList<ColumnDef> partitionColDefs_;
52  private final RowFormat rowFormat_;
53  private final TableName tableName_;
54  private final Map<String, String> tblProperties_;
55  private final Map<String, String> serdeProperties_;
56  private final HdfsCachingOp cachingOp_;
57  private HdfsUri location_;
58 
59  // Set during analysis
60  private String dbName_;
61  private String owner_;
62 
80  public CreateTableStmt(TableName tableName, List<ColumnDef> columnDefs,
81  List<ColumnDef> partitionColumnDefs, boolean isExternal, String comment,
82  RowFormat rowFormat, THdfsFileFormat fileFormat, HdfsUri location,
83  HdfsCachingOp cachingOp, boolean ifNotExists, Map<String, String> tblProperties,
84  Map<String, String> serdeProperties) {
85  Preconditions.checkNotNull(columnDefs);
86  Preconditions.checkNotNull(partitionColumnDefs);
87  Preconditions.checkNotNull(fileFormat);
88  Preconditions.checkNotNull(rowFormat);
89  Preconditions.checkNotNull(tableName);
90 
91  this.columnDefs_ = Lists.newArrayList(columnDefs);
92  this.comment_ = comment;
93  this.isExternal_ = isExternal;
94  this.ifNotExists_ = ifNotExists;
95  this.fileFormat_ = fileFormat;
96  this.location_ = location;
97  this.cachingOp_ = cachingOp;
98  this.partitionColDefs_ = Lists.newArrayList(partitionColumnDefs);
99  this.rowFormat_ = rowFormat;
100  this.tableName_ = tableName;
101  this.tblProperties_ = tblProperties;
102  this.serdeProperties_ = serdeProperties;
105  }
106 
111  columnDefs_ = Lists.newArrayList(other.columnDefs_);
112  comment_ = other.comment_;
113  isExternal_ = other.isExternal_;
114  ifNotExists_ = other.ifNotExists_;
115  fileFormat_ = other.fileFormat_;
116  location_ = other.location_;
117  cachingOp_ = other.cachingOp_;
118  partitionColDefs_ = Lists.newArrayList(other.partitionColDefs_);
119  rowFormat_ = other.rowFormat_;
120  tableName_ = other.tableName_;
121  tblProperties_ = other.tblProperties_;
122  serdeProperties_ = other.serdeProperties_;
123  }
124 
125  @Override
126  public CreateTableStmt clone() { return new CreateTableStmt(this); }
127 
128  public String getTbl() { return tableName_.getTbl(); }
129  public TableName getTblName() { return tableName_; }
130  public List<ColumnDef> getColumnDefs() { return columnDefs_; }
131  public List<ColumnDef> getPartitionColumnDefs() { return partitionColDefs_; }
132  public String getComment() { return comment_; }
133  public boolean isExternal() { return isExternal_; }
134  public boolean getIfNotExists() { return ifNotExists_; }
135  public HdfsUri getLocation() { return location_; }
136  public void setLocation(HdfsUri location) { this.location_ = location; }
137  public THdfsFileFormat getFileFormat() { return fileFormat_; }
138  public RowFormat getRowFormat() { return rowFormat_; }
139  public Map<String, String> getTblProperties() { return tblProperties_; }
140  public Map<String, String> getSerdeProperties() { return serdeProperties_; }
141 
146  public String getOwner() {
147  Preconditions.checkNotNull(owner_);
148  return owner_;
149  }
150 
155  public String getDb() {
156  Preconditions.checkNotNull(dbName_);
157  return dbName_;
158  }
159 
160  @Override
161  public String toSql() {
162  return ToSqlUtils.getCreateTableSql(this);
163  }
164 
165  public TCreateTableParams toThrift() {
166  TCreateTableParams params = new TCreateTableParams();
167  params.setTable_name(new TTableName(getDb(), getTbl()));
168  for (ColumnDef col: getColumnDefs()) {
169  params.addToColumns(col.toThrift());
170  }
171  for (ColumnDef col: getPartitionColumnDefs()) {
172  params.addToPartition_columns(col.toThrift());
173  }
174  params.setOwner(getOwner());
175  params.setIs_external(isExternal());
176  params.setComment(comment_);
177  params.setLocation(location_ == null ? null : location_.toString());
178  if (cachingOp_ != null) params.setCache_op(cachingOp_.toThrift());
179  params.setRow_format(rowFormat_.toThrift());
180  params.setFile_format(fileFormat_);
181  params.setIf_not_exists(getIfNotExists());
182  if (tblProperties_ != null) params.setTable_properties(tblProperties_);
183  if (serdeProperties_ != null) params.setSerde_properties(serdeProperties_);
184  return params;
185  }
186 
187  @Override
188  public void analyze(Analyzer analyzer) throws AnalysisException {
189  Preconditions.checkState(tableName_ != null && !tableName_.isEmpty());
190  tableName_.analyze();
191  dbName_ = analyzer.getTargetDbName(tableName_);
192  owner_ = analyzer.getUser().getName();
193 
194  if (analyzer.dbContainsTable(dbName_, tableName_.getTbl(), Privilege.CREATE) &&
195  !ifNotExists_) {
197  String.format("%s.%s", dbName_, getTbl()));
198  }
199 
200  analyzer.addAccessEvent(new TAccessEvent(dbName_ + "." + tableName_.getTbl(),
201  TCatalogObjectType.TABLE, Privilege.CREATE.toString()));
202 
203  // Only Avro tables can have empty column defs because they can infer them from
204  // the Avro schema.
205  if (columnDefs_.isEmpty() && fileFormat_ != THdfsFileFormat.AVRO) {
206  throw new AnalysisException("Table requires at least 1 column");
207  }
208 
209  if (location_ != null) {
210  location_.analyze(analyzer, Privilege.ALL, FsAction.READ_WRITE);
211  }
212 
216 
217  // Check that all the column names are valid and unique.
218  analyzeColumnDefs(analyzer);
219 
220  if (fileFormat_ == THdfsFileFormat.AVRO) {
221  List<ColumnDef> newColumnDefs = analyzeAvroSchema(analyzer);
222  if (newColumnDefs != columnDefs_) {
223  // Replace the old column defs with the new ones and analyze them.
224  columnDefs_.clear();
225  columnDefs_.addAll(newColumnDefs);
226  analyzeColumnDefs(analyzer);
227  }
228  }
229 
230  if (cachingOp_ != null) cachingOp_.analyze(analyzer);
231  }
232 
237  private void analyzeColumnDefs(Analyzer analyzer) throws AnalysisException {
238  Set<String> colNames = Sets.newHashSet();
239  for (ColumnDef colDef: columnDefs_) {
240  colDef.analyze();
241  if (!colNames.add(colDef.getColName().toLowerCase())) {
242  throw new AnalysisException("Duplicate column name: " + colDef.getColName());
243  }
244  }
245  for (ColumnDef colDef: partitionColDefs_) {
246  colDef.analyze();
247  if (!colDef.getType().supportsTablePartitioning()) {
248  throw new AnalysisException(
249  String.format("Type '%s' is not supported as partition-column type " +
250  "in column: %s", colDef.getType().toSql(), colDef.getColName()));
251  }
252  if (!colNames.add(colDef.getColName().toLowerCase())) {
253  throw new AnalysisException("Duplicate column name: " + colDef.getColName());
254  }
255  }
256  }
257 
263  private List<ColumnDef> analyzeAvroSchema(Analyzer analyzer)
264  throws AnalysisException {
265  Preconditions.checkState(fileFormat_ == THdfsFileFormat.AVRO);
266  // Look for the schema in TBLPROPERTIES and in SERDEPROPERTIES, with the latter
267  // taking precedence.
268  List<Map<String, String>> schemaSearchLocations = Lists.newArrayList();
269  String fullTblName = dbName_ + "." + tableName_.getTbl();
270  schemaSearchLocations.add(serdeProperties_);
271  schemaSearchLocations.add(tblProperties_);
272  String avroSchema = null;
273  try {
274  avroSchema = HdfsTable.getAvroSchema(schemaSearchLocations,
275  dbName_ + "." + tableName_.getTbl());
276  } catch (TableLoadingException e) {
277  throw new AnalysisException(e.getMessage(), e);
278  }
279 
280  if (Strings.isNullOrEmpty(avroSchema)) {
281  throw new AnalysisException("Avro schema is null or empty: " + fullTblName);
282  }
283 
284  // List of columns parsed from the Avro schema.
285  List<Column> avroColumns = null;
286  try {
287  avroColumns = AvroSchemaParser.parse(avroSchema);
288  } catch (Exception e) {
289  throw new AnalysisException(String.format(
290  "Error parsing Avro schema for table '%s': %s", fullTblName,
291  e.getMessage()));
292  }
293  Preconditions.checkNotNull(avroColumns);
294 
295  // Analyze the Avro schema to detect inconsistencies with the columnDefs_.
296  // In case of inconsistencies, the column defs are ignored in favor of the Avro
297  // schema for simplicity and, in particular, to enable COMPUTE STATS (IMPALA-1104).
298  String warnStr = null; // set if inconsistency detected
299  if (avroColumns.size() != columnDefs_.size() && !columnDefs_.isEmpty()) {
300  warnStr = String.format(
301  "Ignoring column definitions in favor of Avro schema.\n" +
302  "The Avro schema has %s column(s) but %s column definition(s) were given.",
303  avroColumns.size(), columnDefs_.size());
304  } else {
305  // Determine whether the column names and the types match.
306  for (int i = 0; i < columnDefs_.size(); ++i) {
307  ColumnDef colDesc = columnDefs_.get(i);
308  Column avroCol = avroColumns.get(i);
309  String warnDetail = null;
310  if (!colDesc.getColName().equalsIgnoreCase(avroCol.getName())) {
311  warnDetail = "name";
312  }
313  if (colDesc.getType().isStringType() &&
314  avroCol.getType().isStringType()) {
315  // This is OK -- avro types for CHAR, VARCHAR, and STRING are "string"
316  } else if (!colDesc.getType().equals(avroCol.getType())) {
317  warnDetail = "type";
318  }
319  if (warnDetail != null) {
320  warnStr = String.format(
321  "Ignoring column definitions in favor of Avro schema due to a " +
322  "mismatched column %s at position %s.\n" +
323  "Column definition: %s\n" +
324  "Avro schema column: %s", warnDetail, i + 1,
325  colDesc.getColName() + " " + colDesc.getType().toSql(),
326  avroCol.getName() + " " + avroCol.getType().toSql());
327  break;
328  }
329  }
330  }
331 
332  if (warnStr != null || columnDefs_.isEmpty()) {
333  analyzer.addWarning(warnStr);
334  // Create new columnDefs_ based on the Avro schema and return them.
335  List<ColumnDef> avroSchemaColDefs =
336  Lists.newArrayListWithCapacity(avroColumns.size());
337  for (Column avroCol: avroColumns) {
338  ColumnDef colDef =
339  new ColumnDef(avroCol.getName(), null, avroCol.getComment());
340  colDef.setType(avroCol.getType());
341  avroSchemaColDefs.add(colDef);
342  }
343  return avroSchemaColDefs;
344  }
345  // The existing col defs are consistent with the Avro schema.
346  return columnDefs_;
347  }
348 
349  private void analyzeRowFormatValue(String value) throws AnalysisException {
350  if (value == null) return;
351  if (HdfsStorageDescriptor.parseDelim(value) == null) {
352  throw new AnalysisException("ESCAPED BY values and LINE/FIELD " +
353  "terminators must be specified as a single character or as a decimal " +
354  "value in the range [-128:127]: " + value);
355  }
356  }
357 
361  public static void unescapeProperties(Map<String, String> propertyMap) {
362  if (propertyMap == null) return;
363  for (Map.Entry<String, String> kv : propertyMap.entrySet()) {
364  propertyMap.put(kv.getKey(),
365  new StringLiteral(kv.getValue()).getUnescapedValue());
366  }
367  }
368 }
CreateTableStmt(TableName tableName, List< ColumnDef > columnDefs, List< ColumnDef > partitionColumnDefs, boolean isExternal, String comment, RowFormat rowFormat, THdfsFileFormat fileFormat, HdfsUri location, HdfsCachingOp cachingOp, boolean ifNotExists, Map< String, String > tblProperties, Map< String, String > serdeProperties)
static void unescapeProperties(Map< String, String > propertyMap)
final Map< String, String > serdeProperties_
List< ColumnDef > analyzeAvroSchema(Analyzer analyzer)
static final String TBL_ALREADY_EXISTS_ERROR_MSG
Definition: Analyzer.java:110
final ArrayList< ColumnDef > partitionColDefs_