Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
CreateTableAsSelectStmt.java
Go to the documentation of this file.
1 // Copyright 2012 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 package com.cloudera.impala.analysis;
16 
17 import java.util.EnumSet;
18 
28 import com.cloudera.impala.thrift.THdfsFileFormat;
29 import com.google.common.base.Preconditions;
30 
34 public class CreateTableAsSelectStmt extends StatementBase {
36  private final InsertStmt insertStmt_;
37  private final static EnumSet<THdfsFileFormat> SUPPORTED_INSERT_FORMATS =
38  EnumSet.of(THdfsFileFormat.PARQUET, THdfsFileFormat.TEXT);
39 
43  public CreateTableAsSelectStmt(CreateTableStmt createStmt, QueryStmt queryStmt) {
44  Preconditions.checkNotNull(queryStmt);
45  Preconditions.checkNotNull(createStmt);
46  this.createStmt_ = createStmt;
47  this.insertStmt_ = new InsertStmt(null, createStmt.getTblName(), false,
48  null, null, queryStmt, null);
49  }
50 
51  public QueryStmt getQueryStmt() { return insertStmt_.getQueryStmt(); }
52  public InsertStmt getInsertStmt() { return insertStmt_; }
54  @Override
55  public String toSql() { return createStmt_.toSql() + " AS " + getQueryStmt().toSql(); }
56 
57  @Override
58  public void analyze(Analyzer analyzer) throws AnalysisException {
59  super.analyze(analyzer);
60 
61  // The analysis for CTAS happens in two phases - the first phase happens before
62  // the target table exists and we want to validate the CREATE statement and the
63  // query portion of the insert statement. If this passes, analysis will be run
64  // over the full INSERT statement. To avoid duplicate registrations of table/colRefs,
65  // create a new root analyzer and clone the query statement for this initial pass.
66  Analyzer dummyRootAnalyzer = new Analyzer(analyzer.getCatalog(),
67  analyzer.getQueryCtx(), analyzer.getAuthzConfig());
68  QueryStmt tmpQueryStmt = insertStmt_.getQueryStmt().clone();
69  try {
70  Analyzer tmpAnalyzer = new Analyzer(dummyRootAnalyzer);
71  tmpAnalyzer.setUseHiveColLabels(true);
72  tmpQueryStmt.analyze(tmpAnalyzer);
73  if (analyzer.containsSubquery()) {
74  // The select statement of this CTAS is nested. Rewrite the
75  // statement to unnest all subqueries and re-analyze using a new analyzer.
76  StmtRewriter.rewriteQueryStatement(tmpQueryStmt, tmpAnalyzer);
77  // Update the insert statement with the unanalyzed rewritten select stmt.
78  insertStmt_.setQueryStmt(tmpQueryStmt.clone());
79 
80  // Re-analyze the select statement of the CTAS.
81  tmpQueryStmt = insertStmt_.getQueryStmt().clone();
82  tmpAnalyzer = new Analyzer(dummyRootAnalyzer);
83  tmpAnalyzer.setUseHiveColLabels(true);
84  tmpQueryStmt.analyze(tmpAnalyzer);
85  }
86  } finally {
87  // Record missing tables in the original analyzer.
88  analyzer.getMissingTbls().addAll(dummyRootAnalyzer.getMissingTbls());
89  }
90 
91  // Add the columns from the select statement to the create statement.
92  int colCnt = tmpQueryStmt.getColLabels().size();
93  for (int i = 0; i < colCnt; ++i) {
94  ColumnDef colDef = new ColumnDef(
95  tmpQueryStmt.getColLabels().get(i), null, null);
96  colDef.setType(tmpQueryStmt.getBaseTblResultExprs().get(i).getType());
97  createStmt_.getColumnDefs().add(colDef);
98  }
99  createStmt_.analyze(analyzer);
100 
102  throw new AnalysisException(String.format("CREATE TABLE AS SELECT " +
103  "does not support (%s) file format. Supported formats are: (%s)",
104  createStmt_.getFileFormat().toString().replace("_", ""),
105  "PARQUET, TEXTFILE"));
106  }
107 
108  // The full privilege check for the database will be done as part of the INSERT
109  // analysis.
110  Db db = analyzer.getDb(createStmt_.getDb(), Privilege.ANY);
111  if (db == null) {
112  throw new AnalysisException(
114  }
115 
116  // Running analysis on the INSERT portion of the CTAS requires the target INSERT
117  // table to "exist". For CTAS the table does not exist yet, so create a "temp"
118  // table to run analysis against. The schema of this temp table should exactly
119  // match the schema of the table that will be created by running the CREATE
120  // statement.
121  org.apache.hadoop.hive.metastore.api.Table msTbl =
122  CatalogOpExecutor.createMetaStoreTable(createStmt_.toThrift());
123 
124  MetaStoreClient client = analyzer.getCatalog().getMetaStoreClient();
125  try {
126  // Set a valid location of this table using the same rules as the metastore. If the
127  // user specified a location for the table this will be a no-op.
128  msTbl.getSd().setLocation(analyzer.getCatalog().getTablePath(msTbl).toString());
129 
130  // If the user didn't specify a table location for the CREATE statement, inject the
131  // location that was calculated in the getTablePath() call. Since this will be the
132  // target location for the INSERT statement, it is important the two match.
133  if (createStmt_.getLocation() == null) {
134  createStmt_.setLocation(new HdfsUri(msTbl.getSd().getLocation()));
135  }
136 
137  // Create a "temp" table based off the given metastore.api.Table object. Normally,
138  // the CatalogService assigns all table IDs, but in this case we need to assign the
139  // "temp" table an ID locally. This table ID cannot conflict with any table in the
140  // SelectStmt (or the BE will be very confused). To ensure the ID is unique within
141  // this query, just assign it the invalid table ID. The CatalogServer will assign
142  // this table a proper ID once it is created there as part of the CTAS execution.
143  Table table = Table.fromMetastoreTable(TableId.createInvalidId(), db, msTbl);
144  Preconditions.checkState(table != null && table instanceof HdfsTable);
145 
146  HdfsTable hdfsTable = (HdfsTable) table;
147  hdfsTable.load(hdfsTable, client.getHiveClient(), msTbl);
148  insertStmt_.setTargetTable(table);
149  } catch (TableLoadingException e) {
150  throw new AnalysisException(e.getMessage(), e);
151  } catch (Exception e) {
152  throw new AnalysisException(e.getMessage(), e);
153  } finally {
154  client.release();
155  }
156 
157  // Finally, run analysis on the insert statement.
158  insertStmt_.analyze(analyzer);
159  }
160 }
int TableId
Definition: global-types.h:25
static final EnumSet< THdfsFileFormat > SUPPORTED_INSERT_FORMATS
static final String DB_DOES_NOT_EXIST_ERROR_MSG
Definition: Analyzer.java:107
CreateTableAsSelectStmt(CreateTableStmt createStmt, QueryStmt queryStmt)