Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
AnalyzeSubqueriesTest.java
Go to the documentation of this file.
1 // Copyright (c) 2014 Cloudera, Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 package com.cloudera.impala.analysis;
16 
17 import org.junit.Test;
18 
21 
22 public class AnalyzeSubqueriesTest extends AnalyzerTest {
23  private static String cmpOperators[] = {"=", "!=", "<=", ">=", ">", "<"};
24  @Test
25  public void TestInSubqueries() throws AnalysisException {
26  String colNames[] = {"bool_col", "tinyint_col", "smallint_col", "int_col",
27  "bigint_col", "float_col", "double_col", "string_col", "date_string_col",
28  "timestamp_col"};
29  String joinOperators[] = {"inner join", "left outer join", "right outer join",
30  "left semi join", "left anti join"};
31 
32  // [NOT] IN subquery predicates
33  String operators[] = {"in", "not in"};
34  for (String op: operators) {
35  AnalyzesOk(String.format("select * from functional.alltypes where id %s " +
36  "(select id from functional.alltypestiny)", op));
37  // Using column and table aliases similar to the ones produced by the
38  // column/table alias generators during a rewrite.
39  AnalyzesOk(String.format("select id `$c$1` from functional.alltypestiny `$a$1` " +
40  "where id %s (select id from functional.alltypessmall)", op));
41  AnalyzesOk(String.format("select * from functional.alltypes t where t.id %s " +
42  "(select id from functional.alltypestiny)", op));
43  AnalyzesOk(String.format("select * from functional.alltypes t where t.id %s " +
44  "(select a.id from functional.alltypestiny a)", op));
45  AnalyzesOk(String.format("select count(*) from functional.alltypes t where " +
46  "t.id %s (select id from functional.alltypestiny)", op));
47  AnalyzesOk(String.format("select t.id, max(t.int_col) from " +
48  "functional.alltypes t where t.int_col %s (select int_col from " +
49  "functional.alltypesagg) group by t.id having count(*) < 10", op));
50  AnalyzesOk(String.format("select t.bigint_col, t.string_col from " +
51  "functional.alltypes t where t.id %s (select id from " +
52  "functional.alltypesagg where int_col < 10) order by bigint_col", op));
53  AnalyzesOk(String.format("select * from functional.alltypes a where a.id %s " +
54  "(select id from functional.alltypes b where a.id = b.id)", op));
55 
56  // Complex expressions
57  AnalyzesOk(String.format("select * from functional.alltypes t where t.id %s " +
58  "(select id + int_col from functional.alltypestiny)", op));
59  AnalyzesOk(String.format("select 1 from functional.alltypes t where " +
60  "t.int_col + 1 %s (select int_col - 1 from functional.alltypestiny)", op));
61  AnalyzesOk(String.format("select * from functional.alltypes t where " +
62  "abs(t.double_col) %s (select int_col from functional.alltypestiny)", op));
63  AnalyzesOk(String.format("select NULL from functional.alltypes t where " +
64  "cast(t.double_col as int) %s (select int_col from " +
65  "functional.alltypestiny)", op));
66  AnalyzesOk(String.format("select count(*) from functional.alltypes where id %s " +
67  "(select 1 from functional.alltypestiny)", op));
68  AnalyzesOk(String.format("select * from functional.alltypes where id %s " +
69  "(select 1 + 1 from functional.alltypestiny group by int_col)", op));
70  AnalyzesOk(String.format("select max(id) from functional.alltypes where id %s " +
71  "(select max(id) from functional.alltypesagg a where a.int_col < 10) " +
72  "and bool_col = false", op));
73 
74  // Subquery returns multiple columns
75  AnalysisError(String.format("select * from functional.alltypestiny t where id %s " +
76  "(select id, int_col from functional.alltypessmall)", op),
77  "Subquery must return a single column: (SELECT id, int_col " +
78  "FROM functional.alltypessmall)");
79  // Subquery returns an incompatible column type
80  AnalysisError(String.format("select * from functional.alltypestiny t where id %s " +
81  "(select timestamp_col from functional.alltypessmall)", op),
82  "Incompatible return types 'INT' and 'TIMESTAMP' of exprs 'id' and " +
83  "'timestamp_col'.");
84 
85  // Different column types in the subquery predicate
86  for (String col: colNames) {
87  AnalyzesOk(String.format("select * from functional.alltypes t where t.%s %s " +
88  "(select a.%s from functional.alltypestiny a)", col, op, col));
89  }
90  // Decimal in the subquery predicate
91  AnalyzesOk(String.format("select * from functional.alltypes t where " +
92  "t.double_col %s (select d3 from functional.decimal_tbl a)", op));
93  // Varchar in the subquery predicate
94  AnalyzesOk(String.format("select * from functional.alltypes t where " +
95  "t.string_col %s (select cast(a.string_col as varchar(1)) from " +
96  "functional.alltypestiny a)", op));
97 
98  // Subqueries with multiple predicates in the WHERE clause
99  AnalyzesOk(String.format("select * from functional.alltypes t where t.id %s " +
100  "(select a.id from functional.alltypestiny a where a.int_col < 10)", op));
101  AnalyzesOk(String.format("select * from functional.alltypes t where t.id %s " +
102  "(select a.id from functional.alltypestiny a where a.int_col > 10 and " +
103  "a.tinyint_col < 5)", op));
104 
105  // Subqueries with a GROUP BY clause
106  AnalyzesOk(String.format("select * from functional.alltypes t where t.id %s " +
107  "(select a.id from functional.alltypestiny a where a.double_col < 10.1 " +
108  "group by a.id)", op));
109 
110  // Subqueries with GROUP BY and HAVING clauses
111  AnalyzesOk(String.format("select * from functional.alltypes t where t.id %s " +
112  "(select a.id from functional.alltypestiny a where a.bool_col = true and " +
113  "int_col < 10 group by id having count(*) < 10)", op));
114 
115  // Subqueries with a LIMIT clause
116  AnalyzesOk(String.format("select * from functional.alltypes t where t.id %s " +
117  "(select a.id from functional.alltypestiny a where id < 100 limit 10)", op));
118 
119  // Subqueries with multiple tables in the FROM clause
120  AnalyzesOk(String.format("select * from functional.alltypes t where t.id %s " +
121  "(select a.id from functional.alltypestiny a, functional.alltypessmall s " +
122  "where a.int_col = s.int_col and s.bigint_col < 100 and a.tinyint_col < 10)",
123  op));
124 
125  // Different join operators between the tables in subquery's FROM clause
126  for (String joinOp: joinOperators) {
127  AnalyzesOk(String.format("select * from functional.alltypes t where t.id %s " +
128  "(select a.id from functional.alltypestiny a %s functional.alltypessmall " +
129  "s on a.int_col = s.int_col where a.bool_col = false)", op, joinOp));
130  }
131 
132  // Correlated predicates in the subquery's ON clause
133  AnalyzesOk(String.format("select * from functional.alltypes t where t.id %s " +
134  "(select a.id from functional.alltypesagg a left outer join " +
135  "functional.alltypessmall s on s.int_col = t.int_col)", op));
136  AnalyzesOk(String.format("select * from functional.alltypes t where t.id %s " +
137  "(select a.id from functional.alltypesagg a left outer join " +
138  "functional.alltypessmall s on s.bigint_col = a.bigint_col and " +
139  "s.int_col = t.int_col)", op));
140  AnalyzesOk(String.format("select * from functional.alltypes t where t.id %s " +
141  "(select a.id from functional.alltypesagg a left outer join " +
142  "functional.alltypessmall s on a.bool_col = s.bool_col and t.int_col = 1)",
143  op));
144  AnalyzesOk(String.format("select * from functional.alltypes t where t.id %s " +
145  "(select a.id from functional.alltypesagg a left outer join " +
146  "functional.alltypessmall s on ifnull(s.int_col, s.int_col + 20) = " +
147  "t.int_col + t.bigint_col)", op));
148 
149  // Subqueries with inline views
150  AnalyzesOk(String.format("select * from functional.alltypes t where t.id %s " +
151  "(select a.id from functional.alltypestiny a, " +
152  "(select * from functional.alltypessmall) s where s.int_col = a.int_col " +
153  "and s.bool_col = false)", op));
154 
155  // Subqueries with inline views that contain subqueries
156  AnalyzesOk(String.format("select * from functional.alltypes t where t.id %s " +
157  "(select a.id from (select id from functional.alltypesagg g where " +
158  "g.int_col in (select int_col from functional.alltypestiny)) a)", op));
159  AnalyzesOk(String.format("select * from functional.alltypes t where t.id %s " +
160  "(select a.id from (select g.* from functional.alltypesagg g where " +
161  "g.int_col in (select int_col from functional.alltypestiny)) a where " +
162  "a.bigint_col = 100)", op));
163 
164  // Multiple tables in the FROM clause of the outer query block
165  for (String joinOp: joinOperators) {
166  AnalyzesOk(String.format("select * from functional.alltypes t %s " +
167  "functional.alltypessmall s on t.int_col = s.int_col where " +
168  "t.tinyint_col %s (select tinyint_col from functional.alltypesagg) " +
169  "and t.bool_col = false and t.bigint_col = 10", joinOp, op));
170  }
171 
172  // Subqueries in WITH clause
173  AnalyzesOk(String.format("with t as (select a.* from functional.alltypes a where " +
174  "id %s (select id from functional.alltypestiny)) select * from t where " +
175  "t.bool_col = false and t.int_col = 10", op));
176 
177  // Subqueries in WITH and WHERE clauses
178  AnalyzesOk(String.format("with t as (select a.* from functional.alltypes a " +
179  "where id %s (select id from functional.alltypestiny s)) select * from t " +
180  "where t.int_col in (select int_col from functional.alltypessmall) and " +
181  "t.bool_col = false", op));
182 
183  // Subqueries in WITH, FROM and WHERE clauses
184  AnalyzesOk(String.format("with t as (select a.* from functional.alltypes a " +
185  "where id %s (select id from functional.alltypestiny)) select t.* from t, " +
186  "(select * from functional.alltypesagg g where g.id in " +
187  "(select id from functional.alltypes)) s where s.string_col = t.string_col " +
188  "and t.int_col in (select int_col from functional.alltypessmall) and " +
189  "s.bool_col = false", op));
190 
191  // Correlated subqueries
192  AnalyzesOk(String.format("select * from functional.alltypes t where id %s " +
193  "(select id from functional.alltypesagg a where t.int_col = a.int_col) " +
194  "and t.bool_col = false", op));
195  AnalyzesOk(String.format("select * from functional.alltypes t where id %s " +
196  "(select id from functional.alltypesagg a where t.int_col + 1 = a.int_col)",
197  op));
198  AnalyzesOk(String.format("select * from functional.alltypes t where id %s " +
199  "(select id from functional.alltypesagg a where t.int_col + 1 = a.int_col + 1)",
200  op));
201  AnalyzesOk(String.format("select * from functional.alltypes t where id %s " +
202  "(select id from functional.alltypesagg a where t.int_col + a.int_col = " +
203  "a.bigint_col and a.bool_col = true)", op));
204  AnalyzesOk(String.format("select * from functional.alltypes t where id %s " +
205  "(select id from functional.alltypesagg a where t.bool_col = false and " +
206  "a.int_col < 10)", op));
207  AnalyzesOk(String.format("select * from functional.alltypes t where id %s " +
208  "(select id from functional.alltypesagg a where t.bool_col)", op));
209 
210  // Multiple nesting levels (uncorrelated queries)
211  AnalyzesOk(String.format("select * from functional.alltypes t where id %s " +
212  "(select id from functional.alltypesagg where int_col %s " +
213  "(select int_col from functional.alltypestiny) and bool_col = false) " +
214  "and bigint_col < 1000", op, op));
215 
216  // Multiple nesting levels (correlated queries)
217  AnalyzesOk(String.format("select * from functional.alltypes t where id %s " +
218  "(select id from functional.alltypesagg a where a.int_col = t.int_col " +
219  "and a.tinyint_col %s (select tinyint_col from functional.alltypestiny s " +
220  "where s.bigint_col = a.bigint_col))", op, op));
221 
222  // Multiple nesting levels (correlated and uncorrelated queries)
223  AnalyzesOk(String.format("select * from functional.alltypes t where id %s " +
224  "(select id from functional.alltypesagg a where t.int_col = a.int_col " +
225  "and a.int_col %s (select int_col from functional.alltypestiny s))",
226  op, op));
227 
228  // NOT ([NOT] IN predicate)
229  AnalyzesOk(String.format("select * from functional.alltypes t where not (id %s " +
230  "(select id from functional.alltypesagg))", op));
231 
232  // Different cmp operators in the correlation predicate
233  for (String cmpOp: cmpOperators) {
234  AnalyzesOk(String.format("select * from functional.alltypes t " +
235  "where t.id %s (select a.id from functional.alltypesagg a where " +
236  "t.int_col %s a.int_col)", op, cmpOp));
237  }
238 
239  // Uncorrelated IN subquery with analytic function
240  AnalyzesOk(String.format("select id, int_col, bool_col from " +
241  "functional.alltypestiny t1 where int_col %s (select min(bigint_col) " +
242  "over (partition by bool_col) from functional.alltypessmall t2 where " +
243  "int_col < 10)", op));
244  }
245 
246  // Constant on the left hand side
247  AnalyzesOk("select * from functional.alltypes a where 1 in " +
248  "(select id from functional.alltypesagg s where s.int_col = a.int_col)");
249  AnalysisError("select * from functional.alltypes a where 1 not in " +
250  "(select id from functional.alltypesagg s where s.int_col = a.int_col)",
251  "Unsupported NOT IN predicate with subquery: 1 NOT IN (SELECT id FROM " +
252  "functional.alltypesagg s WHERE s.int_col = a.int_col)");
253 
254  // IN subquery that is equivalent to an uncorrelated EXISTS subquery
255  AnalysisError("select * from functional.alltypes t where 1 in " +
256  "(select int_col from functional.alltypesagg)", "Unsupported " +
257  "predicate with subquery: 1 IN (SELECT int_col FROM functional.alltypesagg)");
258  // Different non-equi comparison operators in the correlated predicate
259  String nonEquiCmpOperators[] = {"!=", "<=", ">=", ">", "<"};
260  for (String cmpOp: nonEquiCmpOperators) {
261  AnalysisError(String.format("select 1 from functional.alltypes t where 1 in " +
262  "(select int_col from functional.alltypesagg g where g.id %s t.id)",
263  cmpOp), String.format("Unsupported predicate with subquery: 1 " +
264  "IN (SELECT int_col FROM functional.alltypesagg g WHERE g.id %s t.id)",
265  cmpOp));
266  }
267 
268  // NOT IN subquery with a correlated predicate that can't be used in an equi
269  // join
270  AnalysisError("select 1 from functional.alltypes t where 1 not in " +
271  "(select id from functional.alltypestiny g where g.id < t.id)",
272  "Unsupported predicate with subquery: 1 NOT IN (SELECT id FROM " +
273  "functional.alltypestiny g WHERE g.id < t.id)");
274 
275  // Statement with a GROUP BY and a correlated IN subquery that has
276  // correlated predicate that cannot be transformed into an equi-join.
277  AnalysisError("select id, count(*) from functional.alltypes t " +
278  "where 1 IN (select id from functional.alltypesagg g where t.int_col < " +
279  "g.int_col) group by id", "Unsupported predicate with subquery: 1 IN " +
280  "(SELECT id FROM functional.alltypesagg g WHERE t.int_col < g.int_col)");
281 
282  // Reference a non-existing table in the subquery
283  AnalysisError("select * from functional.alltypestiny t where id in " +
284  "(select id from functional.alltypessmall s left outer join p on " +
285  "(s.int_col = p.int_col))",
286  "Could not resolve table reference: 'p'");
287  // Reference a non-existing column from a table in the outer scope
288  AnalysisError("select * from functional.alltypestiny t where id in " +
289  "(select id from functional.alltypessmall s where s.int_col = t.bad_col)",
290  "Could not resolve column/field reference: 't.bad_col'");
291 
292  // Referencing the same table in the inner and the outer query block
293  // No explicit alias
294  AnalyzesOk("select id from functional.alltypestiny where int_col in " +
295  "(select int_col from functional.alltypestiny)");
296  // Different alias between inner and outer block referencing the same table
297  AnalyzesOk("select id from functional.alltypestiny t where int_col in " +
298  "(select int_col from functional.alltypestiny p)");
299  // Alias only in the outer block
300  AnalyzesOk("select id from functional.alltypestiny t where int_col in " +
301  "(select int_col from functional.alltypestiny)");
302  // Same alias in both inner and outer block
303  AnalyzesOk("select id from functional.alltypestiny t where int_col in " +
304  "(select int_col from functional.alltypestiny t)");
305  // Binary predicate with non-comparable operands
306  AnalysisError("select * from functional.alltypes t where " +
307  "(id in (select id from functional.alltypestiny)) = 'string_val'",
308  "operands of type BOOLEAN and STRING are not comparable: " +
309  "(id IN (SELECT id FROM functional.alltypestiny)) = 'string_val'");
310 
311  // OR with subquery predicates
312  AnalysisError("select * from functional.alltypes t where t.id in " +
313  "(select id from functional.alltypesagg) or t.bool_col = false",
314  "Subqueries in OR predicates are not supported: t.id IN " +
315  "(SELECT id FROM functional.alltypesagg) OR t.bool_col = FALSE");
316  AnalysisError("select * from functional.alltypes t where not (t.id in " +
317  "(select id from functional.alltypesagg) and t.int_col = 10)",
318  "Subqueries in OR predicates are not supported: t.id NOT IN " +
319  "(SELECT id FROM functional.alltypesagg) OR t.int_col != 10");
320  AnalysisError("select * from functional.alltypes t where exists " +
321  "(select * from functional.alltypesagg g where g.bool_col = false) " +
322  "or t.bool_col = true", "Subqueries in OR predicates are not " +
323  "supported: EXISTS (SELECT * FROM functional.alltypesagg g WHERE " +
324  "g.bool_col = FALSE) OR t.bool_col = TRUE");
325  AnalysisError("select * from functional.alltypes t where t.id = " +
326  "(select min(id) from functional.alltypesagg g) or t.id = 10",
327  "Subqueries in OR predicates are not supported: t.id = " +
328  "(SELECT min(id) FROM functional.alltypesagg g) OR t.id = 10");
329 
330  // Correlated subquery with OR predicate
331  AnalysisError("select * from functional.alltypes t where id in " +
332  "(select id from functional.alltypesagg a where " +
333  "a.int_col = t.int_col or a.bool_col = false)", "Disjunctions " +
334  "with correlated predicates are not supported: a.int_col = " +
335  "t.int_col OR a.bool_col = FALSE");
336 
337  AnalyzesOk("select * from functional.alltypes t where id in " +
338  "(select id from functional.alltypestiny) and (bool_col = false or " +
339  "int_col = 10)");
340 
341  // Correlated subqueries with GROUP BY, AGG functions or DISTINCT
342  AnalysisError("select * from functional.alltypes t where t.id in " +
343  "(select max(a.id) from functional.alltypesagg a where " +
344  "t.int_col = a.int_col)", "Unsupported correlated subquery with grouping " +
345  "and/or aggregation: SELECT max(a.id) FROM functional.alltypesagg a " +
346  "WHERE t.int_col = a.int_col");
347  AnalysisError("select * from functional.alltypes t where t.id in " +
348  "(select a.id from functional.alltypesagg a where " +
349  "t.int_col = a.int_col group by a.id)", "Unsupported correlated " +
350  "subquery with grouping and/or aggregation: SELECT a.id FROM " +
351  "functional.alltypesagg a WHERE t.int_col = a.int_col GROUP BY a.id");
352  AnalysisError("select * from functional.alltypes t where t.id in " +
353  "(select distinct a.id from functional.alltypesagg a where " +
354  "a.bigint_col = t.bigint_col)", "Unsupported correlated subquery with " +
355  "grouping and/or aggregation: SELECT DISTINCT a.id FROM " +
356  "functional.alltypesagg a WHERE a.bigint_col = t.bigint_col");
357 
358  // NOT compound predicates with OR
359  AnalyzesOk("select * from functional.alltypes t where not (" +
360  "id in (select id from functional.alltypesagg) or int_col < 10)");
361  AnalyzesOk("select * from functional.alltypes t where not (" +
362  "t.id < 10 or not (t.int_col in (select int_col from " +
363  "functional.alltypesagg) and t.bool_col = false))");
364 
365  // Multiple subquery predicates
366  AnalyzesOk("select * from functional.alltypes t where id in " +
367  "(select id from functional.alltypestiny where int_col = 10) and int_col in " +
368  "(select int_col from functional.alltypessmall where bigint_col = 1000) and " +
369  "string_col not in (select string_col from functional.alltypesagg where " +
370  "tinyint_col > 10) and bool_col = false");
371 
372  // Correlated subquery with a LIMIT clause
373  AnalysisError("select * from functional.alltypes t where id in " +
374  "(select s.id from functional.alltypesagg s where s.int_col = t.int_col " +
375  "limit 1)", "Unsupported correlated subquery with a LIMIT clause: " +
376  "SELECT s.id FROM functional.alltypesagg s WHERE s.int_col = t.int_col " +
377  "LIMIT 1");
378 
379  // Correlated IN with an analytic function
380  AnalysisError("select id, int_col, bool_col from functional.alltypestiny t1 " +
381  "where int_col in (select min(bigint_col) over (partition by bool_col) " +
382  "from functional.alltypessmall t2 where t1.id < t2.id)", "Unsupported " +
383  "correlated subquery with grouping and/or aggregation: SELECT " +
384  "min(bigint_col) OVER (PARTITION BY bool_col) FROM " +
385  "functional.alltypessmall t2 WHERE t1.id < t2.id");
386 
387  // IN subquery in binary predicate
388  AnalysisError("select * from functional.alltypestiny where " +
389  "(tinyint_col in (1,2)) = (bool_col in (select bool_col from " +
390  "functional.alltypes))", "IN subquery predicates are not supported " +
391  "in binary predicates: (tinyint_col IN (1, 2)) = (bool_col IN (SELECT " +
392  "bool_col FROM functional.alltypes))");
393 
394  // Column labels may conflict after the rewrite as an inline view
395  AnalyzesOk("select int_col from functional.alltypestiny where " +
396  "int_col in (select 1 as int_col from functional.alltypesagg)");
397  AnalyzesOk("select int_col from functional.alltypestiny a where " +
398  "int_col not in (select 1 as int_col from functional.alltypesagg b " +
399  "where a.int_col = b.int_col)");
400  }
401 
402  @Test
404  String existsOperators[] = {"exists", "not exists"};
405 
406  for (String op: existsOperators) {
407  // [NOT] EXISTS predicate (correlated)
408  AnalyzesOk(String.format("select * from functional.alltypes t " +
409  "where %s (select * from functional.alltypestiny p where " +
410  "p.id = t.id)", op));
411  AnalyzesOk(String.format("select count(*) from functional.alltypes t " +
412  "where %s (select * from functional.alltypestiny p where " +
413  "p.int_col = t.int_col and p.bool_col = false)", op));
414  AnalyzesOk(String.format("select count(*) from functional.alltypes t, " +
415  "functional.alltypessmall s where s.id = t.id and %s (select * from " +
416  "functional.alltypestiny a where a.int_col = t.int_col)", op));
417  AnalyzesOk(String.format("select count(*) from functional.alltypes t, " +
418  "functional.alltypessmall s where s.id = t.id and %s (select * from " +
419  "functional.alltypestiny a where a.int_col = t.int_col and a.bool_col = " +
420  "t.bool_col)", op));
421  // Multiple [NOT] EXISTS predicates
422  AnalyzesOk(String.format("select 1 from functional.alltypestiny t where " +
423  "%s (select * from functional.alltypessmall s where s.id = t.id) and " +
424  "%s (select NULL from functional.alltypesagg g where t.int_col = g.int_col)",
425  op, op));
426  // OR between two subqueries
427  AnalysisError(String.format("select * from functional.alltypestiny t where " +
428  "%s (select * from functional.alltypesagg a where a.id = t.id) or %s " +
429  "(select * from functional.alltypessmall s where s.int_col = t.int_col)", op,
430  op), String.format("Subqueries in OR predicates are not supported: %s " +
431  "(SELECT * FROM functional.alltypesagg a WHERE a.id = t.id) OR %s (SELECT " +
432  "* FROM functional.alltypessmall s WHERE s.int_col = t.int_col)",
433  op.toUpperCase(), op.toUpperCase()));
434  // Complex correlation predicates
435  AnalyzesOk(String.format("select 1 from functional.alltypestiny t where " +
436  "%s (select * from functional.alltypesagg a where a.id = t.id + 1) and " +
437  "%s (select 1 from functional.alltypes s where s.int_col + s.bigint_col = " +
438  "t.bigint_col + 1)", op, op));
439  // Correlated predicates
440  AnalyzesOk(String.format("select * from functional.alltypestiny t where " +
441  "%s (select * from functional.alltypesagg g where t.int_col = g.int_col " +
442  "and t.bool_col = false)", op));
443  AnalyzesOk(String.format("select * from functional.alltypestiny t where " +
444  "%s (select id from functional.alltypessmall s where t.tinyint_col = " +
445  "s.tinyint_col and t.bool_col)", op));
446  // Multiple nesting levels
447  AnalyzesOk(String.format("select * from functional.alltypes t where %s " +
448  "(select * from functional.alltypessmall s where t.id = s.id and %s " +
449  "(select * from functional.alltypestiny g where g.int_col = s.int_col))",
450  op, op));
451  AnalyzesOk(String.format("select * from functional.alltypes t where %s " +
452  "(select * from functional.alltypessmall s where t.id = s.id and %s " +
453  "(select * from functional.alltypestiny g where g.bool_col = " +
454  "s.bool_col))", op, op));
455  // Correlated EXISTS subquery with a group by and aggregation
456  AnalyzesOk(String.format("select 1 from functional.alltypestiny t " +
457  "where %s (select id, count(*) from functional.alltypesagg g where " +
458  "t.id = g.id group by id)", op));
459  // Correlated EXISTS subquery with an analytic function
460  AnalyzesOk(String.format("select id, int_col, bool_col from " +
461  "functional.alltypestiny t1 where %s (select min(bigint_col) over " +
462  "(partition by bool_col) from functional.alltypessmall t2 where " +
463  "t1.id = t2.id)", op));
464  // Correlated EXISTS subquery with an analytic function and a group by
465  // clause
466  AnalyzesOk(String.format("select id, int_col, bool_col from " +
467  "functional.alltypestiny t1 where exists (select min(bigint_col) " +
468  "over (partition by bool_col) from functional.alltypessmall t2 " +
469  "where t1.id = t2.id group by bigint_col, bool_col)", op));
470 
471  String nullOps[] = {"is null", "is not null"};
472  for (String nullOp: nullOps) {
473  // Uncorrelated EXISTS subquery in an IS [NOT] NULL predicate
474  AnalyzesOk(String.format("select * from functional.alltypes where %s " +
475  "(select * from functional.alltypestiny) %s and id < 5", op, nullOp));
476  // Correlated EXISTS subquery in an IS [NOT] NULL predicate
477  AnalyzesOk(String.format("select * from functional.alltypes t where " +
478  "%s (select 1 from functional.alltypestiny s where t.id = s.id) " +
479  "%s and t.bool_col = false", op, nullOp));
480  }
481  }
482 
483  // Uncorrelated EXISTS subquery with an analytic function
484  AnalyzesOk("select * from functional.alltypestiny t " +
485  "where EXISTS (select id, min(int_col) over (partition by bool_col) " +
486  "from functional.alltypesagg a where bigint_col < 10)");
487 
488  // Different non-equi comparison operators in the correlated predicate
489  String nonEquiCmpOperators[] = {"!=", "<=", ">=", ">", "<"};
490  for (String cmpOp: nonEquiCmpOperators) {
491  AnalysisError(String.format("select * from functional.alltypes t where exists " +
492  "(select * from functional.alltypesagg a where t.id %s a.id)", cmpOp),
493  String.format("Unsupported predicate with subquery: EXISTS (SELECT * FROM " +
494  "functional.alltypesagg a WHERE t.id %s a.id)", cmpOp));
495  }
496  // Uncorrelated EXISTS in a query with GROUP BY
497  AnalyzesOk("select id, count(*) from functional.alltypes t " +
498  "where exists (select 1 from functional.alltypestiny where id < 5) group by id");
499  // Subquery with a correlated predicate that cannot be transformed into an
500  // equi-join
501  AnalysisError("select * from functional.alltypestiny t where " +
502  "exists (select int_col + 1 from functional.alltypessmall s where " +
503  "t.int_col = 10)", "Unsupported predicate with subquery: EXISTS " +
504  "(SELECT int_col + 1 FROM functional.alltypessmall s WHERE t.int_col = 10)");
505  // Uncorrelated EXISTS subquery
506  AnalyzesOk("select * from functional.alltypestiny where exists " +
507  "(select * from functional.alltypesagg where id < 10)");
508  AnalyzesOk("select id from functional.alltypestiny where exists " +
509  "(select id from functional.alltypessmall where bool_col = false)");
510  AnalyzesOk("select 1 from functional.alltypestiny t where exists " +
511  "(select 1 from functional.alltypessmall where id < 5)");
512  AnalyzesOk("select 1 + 1 from functional.alltypestiny where exists " +
513  "(select null from functional.alltypessmall where id != 5)");
514  // Multiple nesting levels with uncorrelated EXISTS
515  AnalyzesOk("select id from functional.alltypes where exists " +
516  "(select id from functional.alltypestiny where int_col < 10 and exists (" +
517  "select id from functional.alltypessmall where bool_col = true))");
518  // Uncorrelated NOT EXISTS subquery
519  AnalysisError("select * from functional.alltypestiny where not exists " +
520  "(select 1 from functional.alltypessmall where bool_col = false)",
521  "Unsupported uncorrelated NOT EXISTS subquery: SELECT 1 FROM " +
522  "functional.alltypessmall WHERE bool_col = FALSE");
523 
524  // Subquery references an explicit alias from the outer block in the FROM
525  // clause
526  AnalysisError("select * from functional.alltypestiny t where " +
527  "exists (select * from t)", "Could not resolve table reference: 't'");
528  // Uncorrelated subquery with no FROM clause
529  AnalyzesOk("select * from functional.alltypes where exists (select 1,2)");
530  // EXISTS subquery in a binary predicate
531  AnalysisError("select * from functional.alltypes where " +
532  "if(exists(select * from functional.alltypesagg), 1, 0) = 1",
533  "EXISTS subquery predicates are not supported in binary predicates: " +
534  "if(EXISTS (SELECT * FROM functional.alltypesagg), 1, 0) = 1");
535  // Correlated subquery with a LIMIT clause
536  AnalyzesOk("select count(*) from functional.alltypes t where exists " +
537  "(select 1 from functional.alltypesagg g where t.id = g.id limit 1)");
538 
539  // Column labels may conflict after the rewrite as an inline view
540  AnalyzesOk("select int_col from functional.alltypestiny where " +
541  "exists (select int_col from functional.alltypesagg)");
542  AnalyzesOk("select int_col from functional.alltypestiny a where " +
543  "not exists (select 1 as int_col from functional.alltypesagg b " +
544  "where a.int_col = b.int_col)");
545  }
546 
547  @Test
549  String aggFns[] = {"count(id)", "max(id)", "min(id)", "avg(id)", "sum(id)"};
550  for (String aggFn: aggFns) {
551  for (String cmpOp: cmpOperators) {
552  // Uncorrelated
553  AnalyzesOk(String.format("select * from functional.alltypes where id %s " +
554  "(select %s from functional.alltypestiny)", cmpOp, aggFn));
555  AnalyzesOk(String.format("select * from functional.alltypes where " +
556  "(select %s from functional.alltypestiny) %s id", aggFn, cmpOp));
557  // Uncorrelated with constant expr
558  AnalyzesOk(String.format("select * from functional.alltypes where 10 %s " +
559  "(select %s from functional.alltypestiny)", cmpOp, aggFn));
560  // Uncorrelated with complex cmp expr
561  AnalyzesOk(String.format("select * from functional.alltypes where id + 10 %s " +
562  "(select %s from functional.alltypestiny)", cmpOp, aggFn));
563  AnalyzesOk(String.format("select * from functional.alltypes where id + 10 %s " +
564  "(select %s + 1 from functional.alltypestiny)", cmpOp, aggFn));
565  AnalyzesOk(String.format("select * from functional.alltypes where " +
566  "(select %s + 1 from functional.alltypestiny) %s id + 10", aggFn, cmpOp));
567  AnalyzesOk(String.format("select 1 from functional.alltypes where " +
568  "1 + (select %s - 1 from functional.alltypestiny where bool_col = false) " +
569  "%s id - 10", aggFn, cmpOp));
570  AnalyzesOk(String.format("select 1 from functional.alltypestiny t1 where " +
571  "(select %s from functional.alltypes) - t1.id %s " +
572  "t1.tinyint_col", aggFn, cmpOp));
573  AnalyzesOk(String.format("select 1 from functional.alltypestiny t1 where " +
574  "(select %s from functional.alltypes) + t1.id %s " +
575  "t1.tinyint_col + t1.bigint_col + 1", aggFn, cmpOp));
576  AnalyzesOk(String.format("select 1 from functional.alltypestiny t1 inner " +
577  "join functional.alltypessmall t2 on t1.id = t2.id where " +
578  "(select %s from functional.alltypes) + 1 %s t1.int_col + t2.int_col",
579  aggFn, cmpOp));
580 
581  // Correlated
582  AnalyzesOk(String.format("select count(*) from functional.alltypes a where " +
583  "id %s (select %s from functional.alltypestiny t where t.bool_col = false " +
584  "and a.int_col = t.int_col) and a.bigint_col < 10", cmpOp, aggFn));
585  // Correlated with constant expr
586  AnalyzesOk(String.format("select count(*) from functional.alltypes a where " +
587  "10 %s (select %s from functional.alltypestiny t where t.bool_col = false " +
588  "and a.int_col = t.int_col) and a.bigint_col < 10", cmpOp, aggFn));
589 
590  // Correlated with complex expr
591  AnalyzesOk(String.format("select count(*) from functional.alltypes a where " +
592  "id - 10 %s (select %s from functional.alltypestiny t where t.bool_col = " +
593  "false and a.int_col = t.int_col) and a.bigint_col < 10", cmpOp, aggFn));
594 
595  // count is not supported in select list expressions of a correlated subquery
596  if (aggFn.equals("count(id)")) {
597  AnalysisError(String.format("select count(*) from functional.alltypes a where " +
598  "id - 10 %s (select 1 + %s from functional.alltypestiny t where " +
599  "t.bool_col = false and a.int_col = t.int_col) and a.bigint_col < 10",
600  cmpOp, aggFn), String.format("Aggregate function that returns non-null " +
601  "on an empty input cannot be used in an expression in a " +
602  "correlated subquery's select list: (SELECT 1 + %s FROM " +
603  "functional.alltypestiny t WHERE t.bool_col = FALSE AND a.int_col = " +
604  "t.int_col)", aggFn));
605  } else {
606  AnalyzesOk(String.format("select count(*) from functional.alltypes a where " +
607  "id - 10 %s (select 1 + %s from functional.alltypestiny t where " +
608  "t.bool_col = false and a.int_col = t.int_col) and a.bigint_col < 10",
609  cmpOp, aggFn));
610  AnalyzesOk(String.format("select count(*) from functional.alltypes a where " +
611  "(select 1 + %s from functional.alltypestiny t where t.bool_col = false " +
612  "and a.int_col = t.int_col) %s id - 10 and a.bigint_col < 10", aggFn, cmpOp));
613  AnalyzesOk(String.format("select count(*) from functional.alltypes a where " +
614  "1 + (select 1 + %s from functional.alltypestiny t where t.id = a.id " +
615  "and t.int_col < 10) %s a.id + 10", aggFn, cmpOp));
616  }
617  }
618  }
619  // Column labels may conflict after the rewrite as an inline view
620  AnalyzesOk("select 1 from functional.alltypestiny where " +
621  "int_col = (select count(int_col) as int_col from functional.alltypesagg)");
622  AnalyzesOk("select 1 from functional.alltypestiny where " +
623  "int_col in (select sum(int_col) as int_col from functional.alltypesagg)");
624 
625  for (String cmpOp: cmpOperators) {
626  // Multiple tables in parent and subquery query blocks
627  AnalyzesOk(String.format("select * from functional.alltypes t, " +
628  "functional.alltypesagg a where a.id = t.id and t.int_col %s (" +
629  "select max(g.int_col) from functional.alltypestiny g left outer join " +
630  "functional.alltypessmall s on s.bigint_col = g.bigint_col where " +
631  "g.bool_col = false) and t.bool_col = true", cmpOp));
632  // Group by in the parent query block
633  AnalyzesOk(String.format("select t.int_col, count(*) from " +
634  "functional.alltypes t left outer join functional.alltypesagg g " +
635  "on t.id = g.id where t.bigint_col %s (select count(*) from " +
636  "functional.alltypestiny a where a.int_col < 10) and g.bool_col = false " +
637  "group by t.int_col having count(*) < 100", cmpOp));
638  // Multiple binary predicates
639  AnalyzesOk(String.format("select * from functional.alltypes a where " +
640  "int_col %s (select min(int_col) from functional.alltypesagg g where " +
641  "g.bool_col = false) and int_col %s (select max(int_col) from " +
642  "functional.alltypesagg g where g.bool_col = true) and a.tinyint_col = 10",
643  cmpOp, cmpOp));
644  // Multiple nesting levels
645  AnalyzesOk(String.format("select * from functional.alltypes a where " +
646  "tinyint_col %s (select count(*) from functional.alltypesagg g where " +
647  "g.int_col %s (select max(int_col) from functional.alltypestiny t where " +
648  "t.id = g.id) and g.id = a.id and g.bool_col = false) and a.int_col < 10",
649  cmpOp, cmpOp));
650  // NOT with a binary subquery predicate
651  AnalyzesOk(String.format("select * from functional.alltypes a where " +
652  "not (int_col %s (select max(int_col) from functional.alltypesagg g where " +
653  "a.id = g.id and g.bool_col = false))", cmpOp));
654  // Subquery returns a scalar (no FORM clause)
655  AnalyzesOk(String.format("select id from functional.alltypestiny where id %s " +
656  "(select 1)", cmpOp));
657  // Incompatible comparison types
658  AnalysisError(String.format("select id from functional.alltypestiny where " +
659  "int_col %s (select max(timestamp_col) from functional.alltypessmall)", cmpOp),
660  String.format("operands of type INT and TIMESTAMP are not comparable: " +
661  "int_col %s (SELECT max(timestamp_col) FROM functional.alltypessmall)", cmpOp));
662  // Distinct in the outer select block
663  AnalyzesOk(String.format("select distinct id from functional.alltypes a " +
664  "where 100 %s (select count(*) from functional.alltypesagg g where " +
665  "a.int_col %s g.int_col) and a.bool_col = false", cmpOp, cmpOp));
666  }
667 
668  // Subquery returns multiple rows
669  AnalysisError("select * from functional.alltypestiny where " +
670  "(select max(id) from functional.alltypes) = " +
671  "(select id from functional.alltypestiny)",
672  "Subquery must return a single row: " +
673  "(SELECT id FROM functional.alltypestiny)");
674  AnalysisError("select id from functional.alltypestiny t where int_col = " +
675  "(select int_col from functional.alltypessmall limit 2)",
676  "Subquery must return a single row: " +
677  "(SELECT int_col FROM functional.alltypessmall LIMIT 2)");
678  AnalysisError("select id from functional.alltypestiny where int_col = " +
679  "(select id from functional.alltypessmall)",
680  "Subquery must return a single row: " +
681  "(SELECT id FROM functional.alltypessmall)");
682 
683  // Subquery returns multiple columns
684  AnalysisError("select id from functional.alltypestiny where int_col = " +
685  "(select id, int_col from functional.alltypessmall)",
686  "Subquery must return a single row: " +
687  "(SELECT id, int_col FROM functional.alltypessmall)");
688  AnalysisError("select * from functional.alltypestiny where id in " +
689  "(select * from (values(1,2)) as t)",
690  "Subquery must return a single column: (SELECT * FROM (VALUES(1, 2)) t)");
691 
692  // Subquery returns multiple columns due to a group by clause
693  AnalysisError("select id from functional.alltypestiny where int_col = " +
694  "(select int_col, count(*) from functional.alltypessmall group by int_col)",
695  "Subquery must return a single row: " +
696  "(SELECT int_col, count(*) FROM functional.alltypessmall " +
697  "GROUP BY int_col)");
698 
699  // Outer join with a table from the outer block using an explicit alias
700  AnalysisError("select id from functional.alltypestiny t where int_col = " +
701  "(select count(*) from functional.alltypessmall s left outer join t " +
702  "on (t.id = s.id))", "Could not resolve table reference: 't'");
703  AnalysisError("select id from functional.alltypestiny t where int_col = " +
704  "(select count(*) from functional.alltypessmall s right outer join t " +
705  "on (t.id = s.id))", "Could not resolve table reference: 't'");
706  AnalysisError("select id from functional.alltypestiny t where int_col = " +
707  "(select count(*) from functional.alltypessmall s full outer join t " +
708  "on (t.id = s.id))", "Could not resolve table reference: 't'");
709 
710  // Multiple subqueries in a binary predicate
711  AnalysisError("select * from functional.alltypestiny t where " +
712  "(select count(*) from functional.alltypessmall) = " +
713  "(select count(*) from functional.alltypesagg)", "Multiple subqueries are not " +
714  "supported in binary predicates: (SELECT count(*) FROM " +
715  "functional.alltypessmall) = (SELECT count(*) FROM functional.alltypesagg)");
716  AnalysisError("select * from functional.alltypestiny t where " +
717  "(select max(id) from functional.alltypessmall) + " +
718  "(select min(id) from functional.alltypessmall) - " +
719  "(select count(id) from functional.alltypessmall) < 1000",
720  "Multiple subqueries are not supported in binary predicates: (SELECT max(id) " +
721  "FROM functional.alltypessmall) + (SELECT min(id) FROM " +
722  "functional.alltypessmall) - (SELECT count(id) FROM functional.alltypessmall) " +
723  "< 1000");
724 
725  // Comparison between invalid types
726  AnalysisError("select * from functional.alltypes where " +
727  "(select max(string_col) from functional.alltypesagg) = 1",
728  "operands of type STRING and TINYINT are not comparable: (SELECT " +
729  "max(string_col) FROM functional.alltypesagg) = 1");
730 
731  // Aggregate subquery with a LIMIT 1 clause
732  AnalyzesOk("select id from functional.alltypestiny t where int_col = " +
733  "(select int_col from functional.alltypessmall limit 1)");
734  // Correlated aggregate suquery with correlated predicate that can't be
735  // transformed into an equi-join
736  AnalyzesOk("select id from functional.alltypestiny t where " +
737  "1 < (select sum(int_col) from functional.alltypessmall s where " +
738  "t.id < 10)");
739  // Aggregate subqueries in an IS [NOT] NULL predicate
740  String nullOps[] = {"is null", "is not null"};
741  for (String aggFn: aggFns) {
742  for (String nullOp: nullOps) {
743  // Uncorrelated aggregate subquery
744  AnalyzesOk(String.format("select * from functional.alltypestiny where " +
745  "(select %s from functional.alltypessmall where bool_col = false) " +
746  "%s and int_col < 10", aggFn, nullOp));
747  // Correlated aggregate subquery
748  AnalyzesOk(String.format("select * from functional.alltypestiny t where " +
749  "(select %s from functional.alltypessmall s where s.id = t.id " +
750  "and s.bool_col = false) %s and bool_col = true", aggFn, nullOp));
751  }
752  }
753  // Aggregate subquery with a correlated predicate that can't be transformed
754  // into an equi-join in an IS NULL predicate
755  AnalyzesOk("select 1 from functional.alltypestiny t where " +
756  "(select max(id) from functional.alltypessmall s where t.id < 10) " +
757  "is null");
758 
759  // Mathematical functions with scalar subqueries
760  String mathFns[] = {"abs", "cos", "ceil", "floor"};
761  for (String mathFn: mathFns) {
762  for (String aggFn: aggFns) {
763  String expr = aggFn.equals("count(id)") ? "" : "1 + ";
764  for (String cmpOp: cmpOperators) {
765  // Uncorrelated scalar subquery
766  AnalyzesOk(String.format("select count(*) from functional.alltypes t where " +
767  "%s((select %s %s from functional.alltypessmall where bool_col = " +
768  "false)) %s 100 - t.int_col and t.bigint_col < 100", mathFn, expr, aggFn,
769  cmpOp));
770  // Correlated scalar subquery
771  AnalyzesOk(String.format("select count(*) from functional.alltypes t where " +
772  "%s((select %s %s from functional.alltypessmall s where bool_col = false " +
773  "and t.id = s.id)) %s 100 - t.int_col and t.bigint_col < 100", mathFn, expr,
774  aggFn, cmpOp));
775  }
776  }
777  }
778 
779  // Conditional functions with scalar subqueries
780  for (String aggFn: aggFns) {
781  AnalyzesOk(String.format("select * from functional.alltypestiny t where " +
782  "nullifzero((select %s from functional.alltypessmall s where " +
783  "s.bool_col = false)) is null", aggFn));
784  AnalyzesOk(String.format("select count(*) from functional.alltypes t where " +
785  "zeroifnull((select %s from functional.alltypessmall s where t.id = s.id)) " +
786  "= 0 and t.int_col < 10", aggFn));
787  AnalyzesOk(String.format("select 1 from functional.alltypes t where " +
788  "isnull((select %s from functional.alltypestiny s where s.bool_col = false " +
789  "), 10) < 5", aggFn));
790  }
791 
792  // Correlated aggregate subquery with a GROUP BY
793  AnalysisError("select min(t.id) as min_id from functional.alltypestiny t " +
794  "where t.int_col < (select max(s.int_col) from functional.alltypessmall s " +
795  "where s.id = t.id group by s.bigint_col order by 1 limit 1)",
796  "Unsupported correlated subquery with grouping and/or aggregation: " +
797  "SELECT max(s.int_col) FROM functional.alltypessmall s WHERE " +
798  "s.id = t.id GROUP BY s.bigint_col ORDER BY 1 ASC LIMIT 1");
799  // Correlated aggregate subquery with a LIMIT clause
800  AnalyzesOk("select count(*) from functional.alltypes t where " +
801  "t.id = (select count(*) from functional.alltypesagg g where " +
802  "g.int_col = t.int_col limit 1)");
803 
804  // Aggregate subquery with analytic function
805  AnalysisError("select id, int_col, bool_col from " +
806  "functional.alltypestiny t1 where int_col = (select min(bigint_col) " +
807  "over (partition by bool_col) from functional.alltypessmall t2 where " +
808  "int_col < 10)", "Subquery must return a single row: (SELECT " +
809  "min(bigint_col) OVER (PARTITION BY bool_col) FROM " +
810  "functional.alltypessmall t2 WHERE int_col < 10)");
811 
812  // Aggregate subquery with analytic function and limit 1 clause
813  AnalysisError("select id, int_col, bool_col from " +
814  "functional.alltypestiny t1 where int_col = (select min(bigint_col) " +
815  "over (partition by bool_col) from functional.alltypessmall t2 where " +
816  "t1.id = t2.id and int_col < 10 limit 1)", "Unsupported correlated " +
817  "subquery with grouping and/or aggregation: SELECT min(bigint_col) " +
818  "OVER (PARTITION BY bool_col) FROM functional.alltypessmall t2 WHERE " +
819  "t1.id = t2.id AND int_col < 10 LIMIT 1");
820 
821  // Uncorrelated aggregate subquery with analytic function and limit 1 clause
822  AnalyzesOk("select id, int_col, bool_col from " +
823  "functional.alltypestiny t1 where int_col = (select min(bigint_col) " +
824  "over (partition by bool_col) from functional.alltypessmall t2 where " +
825  "int_col < 10 limit 1)");
826 
827  // Subquery with distinct in binary predicate
828  AnalysisError("select * from functional.alltypes where int_col = " +
829  "(select distinct int_col from functional.alltypesagg)", "Subquery " +
830  "must return a single row: (SELECT DISTINCT int_col FROM " +
831  "functional.alltypesagg)");
832  AnalyzesOk("select * from functional.alltypes where int_col = " +
833  "(select count(distinct int_col) from functional.alltypesagg)");
834  // Multiple count aggregate functions in a correlated subquery's select list
835  AnalysisError("select * from functional.alltypes t where " +
836  "int_col = (select count(id) + count(int_col) - 1 from " +
837  "functional.alltypesagg g where g.int_col = t.int_col)",
838  "Aggregate function that returns non-null on an empty input " +
839  "cannot be used in an expression in a correlated " +
840  "subquery's select list: (SELECT count(id) + count(int_col) - 1 " +
841  "FROM functional.alltypesagg g WHERE g.int_col = t.int_col)");
842 
843  // UDAs in aggregate subqqueries
844  addTestUda("AggFn", Type.BIGINT, Type.BIGINT);
845  AnalysisError("select * from functional.alltypesagg g where " +
846  "(select aggfn(int_col) from functional.alltypes s where " +
847  "s.id = g.id) = 10", "UDAs are not supported in the select list of " +
848  "correlated subqueries: (SELECT default.aggfn(int_col) FROM " +
849  "functional.alltypes s WHERE s.id = g.id)");
850  AnalyzesOk("select * from functional.alltypesagg g where " +
851  "(select aggfn(int_col) from functional.alltypes s where " +
852  "s.bool_col = false) < 10");
853 
854  // sample, histogram in scalar subqueries
855  String aggFnsReturningStringOnEmpty[] = {"sample(int_col)", "histogram(int_col)"};
856  for (String aggFn: aggFnsReturningStringOnEmpty) {
857  AnalyzesOk(String.format("select * from functional.alltypestiny t where " +
858  "t.string_col = (select %s from functional.alltypesagg g where t.id = " +
859  "g.id)", aggFn));
860  }
861  // Complex correlated predicate in which columns from the subquery appear in
862  // both sides of a correlated binary predicate
863  AnalysisError("select 1 from functional.alltypestiny t where " +
864  "(select sum(t1.id) from functional.alltypesagg t1 inner join " +
865  "functional.alltypes t2 on t1.id = t2.id where " +
866  "t1.id + t2.id = t.int_col + t1.int_col) = t.int_col",
867  "All subquery columns that participate in a predicate " +
868  "must be on the same side of that predicate: t1.id + t2.id = t.int_col " +
869  "+ t1.int_col");
870  }
871 
872  @Test
873  public void TestSubqueries() throws AnalysisException {
874  // Test resolution of column references inside subqueries.
875  // Correlated column references can be qualified or unqualified.
876  AnalyzesOk("select * from functional.jointbl t where exists " +
877  "(select id from functional.alltypes where id = test_id and id = t.test_id)");
878  // Correlated column references are invalid outside of WHERE and ON clauses.
879  AnalysisError("select * from functional.jointbl t where exists " +
880  "(select t.test_id = id from functional.alltypes)",
881  "Could not resolve column/field reference: 't.test_id'");
882  AnalysisError("select * from functional.jointbl t where test_zip in " +
883  "(select count(*) from functional.alltypes group by t.test_id)",
884  "Could not resolve column/field reference: 't.test_id'");
885  AnalysisError("select * from functional.jointbl t where exists " +
886  "(select 1 from functional.alltypes order by t.test_id limit 1)",
887  "Could not resolve column/field reference: 't.test_id'");
888  // Star exprs cannot reference an alias from a parent block.
889  AnalysisError("select * from functional.jointbl t where exists " +
890  "(select t.* from functional.alltypes)",
891  "Could not resolve star expression: 't.*'");
892 
893  // EXISTS, IN and aggregate subqueries
894  AnalyzesOk("select * from functional.alltypes t where exists " +
895  "(select * from functional.alltypesagg a where a.int_col = " +
896  "t.int_col) and t.bigint_col in (select bigint_col from " +
897  "functional.alltypestiny s) and t.bool_col = false and " +
898  "t.int_col = (select min(int_col) from functional.alltypesagg)");
899  // Nested IN with an EXISTS subquery that contains an aggregate subquery
900  AnalyzesOk("select count(*) from functional.alltypes t where t.id " +
901  "in (select id from functional.alltypesagg a where a.int_col = " +
902  "t.int_col and exists (select * from functional.alltypestiny s " +
903  "where s.bool_col = a.bool_col and s.int_col = (select min(int_col) " +
904  "from functional.alltypessmall where bigint_col = 10)))");
905  // Nested EXISTS with an IN subquery that has a nested aggregate subquery
906  AnalyzesOk("select count(*) from functional.alltypes t where exists " +
907  "(select * from functional.alltypesagg a where a.id in (select id " +
908  "from functional.alltypestiny s where bool_col = false and " +
909  "s.int_col < (select max(int_col) from functional.alltypessmall where " +
910  "bigint_col < 100)) and a.int_col = t.int_col)");
911  // Nested aggregate subqueries with EXISTS and IN subqueries
912  AnalyzesOk("select count(*) from functional.alltypes t where t.int_col = " +
913  "(select avg(g.int_col) * 2 from functional.alltypesagg g where g.id in " +
914  "(select id from functional.alltypessmall s where exists (select " +
915  "* from functional.alltypestiny a where a.int_col = s.int_col and " +
916  "a.bigint_col < 10)))");
917 
918  // INSERT SELECT
919  AnalyzesOk("insert into functional.alltypessmall partition (year, month) " +
920  "select * from functional.alltypes where id in (select id from " +
921  "functional.alltypesagg a where a.bool_col = false)");
922  AnalyzesOk("insert into functional.alltypessmall partition (year, month) " +
923  "select * from functional.alltypes t where int_col in (select int_col " +
924  "from functional.alltypesagg a where a.id = t.id) and exists " +
925  "(select * from functional.alltypestiny s where s.bigint_col = " +
926  "t.bigint_col) and int_col < (select min(int_col) from functional.alltypes)");
927  AnalyzesOk("insert into functional.alltypessmall partition (year, month) " +
928  "select * from functional.alltypestiny where id = (select 1) " +
929  "union select * from functional.alltypestiny where id = (select 2)");
930 
931  // CTAS with correlated subqueries
932  AnalyzesOk("create table functional.test_tbl as select * from " +
933  "functional.alltypes t where t.id in (select id from functional.alltypesagg " +
934  "a where a.int_col = t.int_col and a.bool_col = false) and not exists " +
935  "(select * from functional.alltypestiny s where s.int_col = t.int_col) " +
936  "and t.bigint_col = (select count(*) from functional.alltypessmall)");
937  AnalyzesOk("create table functional.test_tbl as " +
938  "select * from functional.alltypestiny where id = (select 1) " +
939  "union select * from functional.alltypestiny where id = (select 2)");
940 
941  // Predicate with a child subquery in the HAVING clause
942  AnalysisError("select id, count(*) from functional.alltypestiny t group by " +
943  "id having count(*) > (select count(*) from functional.alltypesagg)",
944  "Subqueries are not supported in the HAVING clause.");
945  AnalysisError("select id, count(*) from functional.alltypestiny t group by " +
946  "id having (select count(*) from functional.alltypesagg) > 10",
947  "Subqueries are not supported in the HAVING clause.");
948 
949  // Subquery in the select list
950  AnalysisError("select id, (select int_col from functional.alltypestiny) " +
951  "from functional.alltypestiny",
952  "Subqueries are not supported in the select list.");
953 
954  // Subquery in the GROUP BY clause
955  AnalysisError("select id, count(*) from functional.alltypestiny " +
956  "group by (select int_col from functional.alltypestiny)",
957  "Subqueries are not supported in the GROUP BY clause.");
958 
959  // Subquery in the ORDER BY clause
960  AnalysisError("select id from functional.alltypestiny " +
961  "order by (select int_col from functional.alltypestiny)",
962  "Subqueries are not supported in the ORDER BY clause.");
963 
964  // Subquery with an inline view
965  AnalyzesOk("select id from functional.alltypestiny t where exists " +
966  "(select * from (select id, int_col from functional.alltypesagg) a where " +
967  "a.id < 10 and a.int_col = t.int_col)");
968 
969  // Inner block references an inline view in the outer block
970  AnalysisError("select id from (select * from functional.alltypestiny) t " +
971  "where t.int_col = (select count(*) from t)",
972  "Could not resolve table reference: 't'");
973  AnalysisError("select id from (select * from functional.alltypestiny) t " +
974  "where t.int_col = (select count(*) from t) and " +
975  "t.string_col in (select string_col from t)",
976  "Could not resolve table reference: 't'");
977  AnalysisError("select id from (select * from functional.alltypestiny) t " +
978  "where exists (select * from t, functional.alltypesagg p where " +
979  "t.id = p.id)", "Could not resolve table reference: 't'");
980 
981  // Subquery referencing a view
982  AnalyzesOk("select * from functional.alltypes a where exists " +
983  "(select * from functional.alltypes_view b where b.id = a.id)");
984  // Same view referenced in both the inner and outer block
985  AnalyzesOk("select * from functional.alltypes_view a where exists " +
986  "(select * from functional.alltypes_view b where a.id = b.id)");
987 
988  // Union query with subqueries
989  AnalyzesOk("select * from functional.alltypes where id = " +
990  "(select max(id) from functional.alltypestiny) union " +
991  "select * from functional.alltypes where id = " +
992  "(select min(id) from functional.alltypessmall)");
993  AnalyzesOk("select * from functional.alltypes where id = (select 1) " +
994  "union all select * from functional.alltypes where id in " +
995  "(select int_col from functional.alltypestiny)");
996  AnalyzesOk("select * from functional.alltypes where id = (select 1) " +
997  "union select * from (select * from functional.alltypes where id in " +
998  "(select int_col from functional.alltypestiny)) t");
999 
1000  // Union in the subquery
1001  AnalysisError("select * from functional.alltypes where exists " +
1002  "(select id from functional.alltypestiny union " +
1003  "select id from functional.alltypesagg)",
1004  "A subquery must contain a single select block: " +
1005  "(SELECT id FROM functional.alltypestiny UNION " +
1006  "SELECT id FROM functional.alltypesagg)");
1007  AnalysisError("select * from functional.alltypes where exists (values(1))",
1008  "A subquery must contain a single select block: (VALUES(1))");
1009 
1010  // Subquery in LIMIT
1011  AnalysisError("select * from functional.alltypes limit " +
1012  "(select count(*) from functional.alltypesagg)",
1013  "LIMIT expression must be a constant expression: " +
1014  "(SELECT count(*) FROM functional.alltypesagg)");
1015 
1016  // NOT predicates in conjunction with subqueries
1017  AnalyzesOk("select * from functional.alltypes t where t.id not in " +
1018  "(select id from functional.alltypesagg g where g.bool_col = false) " +
1019  "and t.string_col not like '%1%' and not (t.int_col < 5) " +
1020  "and not (t.int_col is null) and not (t.int_col between 5 and 10)");
1021  // IS NULL with an InPredicate that contains a subquery
1022  AnalysisError("select * from functional.alltypestiny t where (id in " +
1023  "(select id from functional.alltypes)) is null", "Unsupported IS NULL " +
1024  "predicate that contains a subquery: (id IN (SELECT id FROM " +
1025  "functional.alltypes)) IS NULL");
1026  // IS NULL with a BinaryPredicate that contains a subquery
1027  AnalyzesOk("select * from functional.alltypestiny where (id = " +
1028  "(select max(id) from functional.alltypessmall)) is null");
1029 
1030  // between predicates with subqueries
1031  AnalyzesOk("select * from functional.alltypestiny where " +
1032  "(select avg(id) from functional.alltypesagg where bool_col = true) " +
1033  "between 1 and 100 and int_col < 10");
1034  AnalyzesOk("select count(*) from functional.alltypestiny t where " +
1035  "(select count(id) from functional.alltypesagg g where t.id = g.id " +
1036  "and g.bigint_col < 10) between 1 and 1000");
1037  AnalyzesOk("select id from functional.alltypestiny where " +
1038  "int_col between (select min(int_col) from functional.alltypesagg where " +
1039  "id < 10) and 100 and bool_col = false");
1040  AnalyzesOk("select * from functional.alltypessmall s where " +
1041  "int_col between (select count(t.id) from functional.alltypestiny t where " +
1042  "t.int_col = s.int_col) and (select max(int_col) from " +
1043  "functional.alltypes a where a.id = s.id and a.bool_col = false)");
1044  AnalyzesOk("select * from functional.alltypessmall where " +
1045  "int_col between (select min(int_col) from functional.alltypestiny) and " +
1046  "(select max(int_col) from functional.alltypestiny) and bigint_col between " +
1047  "(select min(bigint_col) from functional.alltypesagg) and (select " +
1048  "max(bigint_col) from functional.alltypesagg)");
1049  AnalysisError("select * from functional.alltypestiny where (select min(id) " +
1050  "from functional.alltypes) between 1 and (select max(id) from " +
1051  "functional.alltypes)", "Comparison between subqueries is not supported " +
1052  "in a between predicate: (SELECT min(id) FROM functional.alltypes) BETWEEN " +
1053  "1 AND (SELECT max(id) FROM functional.alltypes)");
1054  AnalyzesOk("select * from functional.alltypestiny where " +
1055  "int_col between 0 and 10 and exists (select 1)");
1056  AnalyzesOk("select * from functional.alltypestiny a where " +
1057  "double_col between cast(1 as double) and cast(10 as double) and " +
1058  "exists (select 1 from functional.alltypessmall b where a.id = b.id)");
1059  }
1060 }
static final ScalarType BIGINT
Definition: Type.java:50
void addTestUda(String name, Type retType, Type...argTypes)
uint64_t Test(T *ht, const ProbeTuple *input, uint64_t num_tuples)