3 package com.cloudera.impala.catalog;
5 import static com.cloudera.impala.thrift.ImpalaInternalServiceConstants.DEFAULT_PARTITION_ID;
6 import static org.junit.Assert.assertEquals;
7 import static org.junit.Assert.assertNotNull;
8 import static org.junit.Assert.assertNull;
9 import static org.junit.Assert.assertTrue;
11 import java.util.ArrayList;
12 import java.util.Iterator;
13 import java.util.List;
16 import org.apache.hadoop.hive.metastore.TableType;
17 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
26 import com.google.common.collect.Lists;
27 import com.google.common.collect.Sets;
31 CatalogServiceTestCatalog.create();
35 Table tbl = db.getTable(tblName);
36 assertEquals(tbl.
getName(), tblName);
38 List<Column> cols = tbl.getColumns();
39 assertEquals(colNames.length, colTypes.length);
40 assertEquals(cols.size(), colNames.length);
41 Iterator<Column> it = cols.iterator();
43 while (it.hasNext()) {
45 assertEquals(col.
getName(), colNames[i]);
46 assertTrue(col.
getType().equals(colTypes[i]));
52 String[] hiveColNames, String[] colFamilies, String[] colQualifiers,
57 List<Column> cols = tbl.getColumns();
58 assertEquals(colFamilies.length, colTypes.length);
59 assertEquals(colQualifiers.length, colTypes.length);
60 Iterator<Column> it = cols.iterator();
62 while (it.hasNext()) {
72 Db functionalDb = catalog_.getDb(
"functional");
73 assertNotNull(functionalDb);
74 assertEquals(functionalDb.
getName(),
"functional");
100 Db testDb = catalog_.getDb(
"functional_seq");
101 assertNotNull(testDb);
102 assertEquals(testDb.getName(),
"functional_seq");
106 Db hbaseDb = catalog_.getDb(
"functional_hbase");
107 assertNotNull(hbaseDb);
108 assertEquals(hbaseDb.getName(),
"functional_hbase");
115 "hbasealltypeserrornonulls"));
121 {
"year",
"month",
"id",
"bool_col",
"tinyint_col",
"smallint_col",
122 "int_col",
"bigint_col",
"float_col",
"double_col",
"date_string_col",
123 "string_col",
"timestamp_col"},
131 new String[] {
"id",
"name",
"zip"},
135 new String[] {
"id",
"name",
"zip"},
140 "str_col",
"match_like_col",
"no_match_like_col",
"match_regex_col",
141 "no_match_regex_col"},
146 new String[] {
"id",
"name",
"zip"},
150 new String[] {
"test_id",
"test_name",
"test_zip",
"alltypes_id"},
157 {
"id",
"bigint_col",
"bool_col",
"date_string_col",
"double_col",
"float_col",
158 "int_col",
"month",
"smallint_col",
"string_col",
"timestamp_col",
159 "tinyint_col",
"year"},
161 {
":key",
"d",
"d",
"d",
"d",
"d",
"d",
"d",
"d",
"d",
"d",
"d",
"d"},
163 {null,
"bigint_col",
"bool_col",
"date_string_col",
"double_col",
"float_col",
164 "int_col",
"month",
"smallint_col",
"string_col",
"timestamp_col",
165 "tinyint_col",
"year"},
174 "functional_hbase.hbasealltypeserror",
176 {
"id",
"bigint_col",
"bool_col",
"date_string_col",
"double_col",
"float_col",
177 "int_col",
"smallint_col",
"string_col",
"timestamp_col",
"tinyint_col"},
179 {
":key",
"d",
"d",
"d",
"d",
"d",
"d",
"d",
"d",
"d",
"d"},
181 {null,
"bigint_col",
"bool_col",
"date_string_col",
"double_col",
"float_col",
182 "int_col",
"smallint_col",
"string_col",
"timestamp_col",
"tinyint_col"},
190 "functional_hbase.hbasealltypeserrornonulls",
192 {
"id",
"bigint_col",
"bool_col",
"date_string_col",
"double_col",
"float_col",
193 "int_col",
"smallint_col",
"string_col",
"timestamp_col",
"tinyint_col"},
195 {
":key",
"d",
"d",
"d",
"d",
"d",
"d",
"d",
"d",
"d",
"d"},
197 {null,
"bigint_col",
"bool_col",
"date_string_col",
"double_col",
"float_col",
198 "int_col",
"smallint_col",
"string_col",
"timestamp_col",
"tinyint_col"},
207 {
"id",
"bigint_col",
"bool_col",
"date_string_col",
"day",
"double_col",
208 "float_col",
"int_col",
"month",
"smallint_col",
"string_col",
209 "timestamp_col",
"tinyint_col",
"year"},
211 {
":key",
"d",
"d",
"d",
"d",
"d",
"d",
"d",
"d",
"d",
"d",
"d",
"d",
"d"},
213 {null,
"bigint_col",
"bool_col",
"date_string_col",
"day",
"double_col",
214 "float_col",
"int_col",
"month",
"smallint_col",
"string_col",
215 "timestamp_col",
"tinyint_col",
"year"},
225 {
"id",
"bigint_col",
"bool_col",
"date_string_col",
"day",
"double_col",
226 "float_col",
"int_col",
"month",
"smallint_col",
"string_col",
227 "timestamp_col",
"tinyint_col",
"year"},
229 {
":key",
"d",
"d",
"d",
"d",
"d",
"d",
"d",
"d",
"d",
"d",
"d",
"d",
"d"},
231 {null,
"bigint_col",
"bool_col",
"date_string_col",
"day",
"double_col",
232 "float_col",
"int_col",
"month",
"smallint_col",
"string_col",
233 "timestamp_col",
"tinyint_col",
"year"},
249 {
"pagerank",
"pageurl",
"avgduration"},
255 {
"sourceip",
"desturl",
"visitdate",
"adrevenue",
"useragent",
256 "ccode",
"lcode",
"skeyword",
"avgtimeonsite"},
264 catalog_.getOrLoadTable(
"functional",
"AllTypes"));
271 List<HdfsPartition> partitions = table.getPartitions();
275 assertEquals(25, partitions.size());
276 Set<Long> months = Sets.newHashSet();
278 if (p.getId() == DEFAULT_PARTITION_ID) {
282 assertEquals(2, p.getPartitionValues().size());
284 LiteralExpr key1Expr = p.getPartitionValues().
get(0);
286 long key1 = ((NumericLiteral) key1Expr).getLongValue();
287 assertTrue(key1 == 2009 || key1 == 2010);
289 LiteralExpr key2Expr = p.getPartitionValues().
get(1);
290 assertTrue(key2Expr instanceof NumericLiteral);
291 long key2 = ((NumericLiteral) key2Expr).getLongValue();
292 assertTrue(key2 >= 1 && key2 <= 12);
294 months.add(key1 * 100 + key2);
296 assertEquals(p.getFileDescriptors().size(), 1);
298 assertEquals(months.size(), 24);
310 Column idCol = table.getColumn(
"id");
311 assertEquals(idCol.getStats().getAvgSerializedSize() -
313 PrimitiveType.INT.getSlotSize(), 0.0001);
314 assertEquals(idCol.getStats().getMaxSize(),
PrimitiveType.
INT.getSlotSize());
315 assertTrue(!idCol.getStats().hasNulls());
317 Column boolCol = table.getColumn(
"bool_col");
318 assertEquals(boolCol.
getStats().getAvgSerializedSize() -
320 PrimitiveType.BOOLEAN.getSlotSize(), 0.0001);
322 assertTrue(!boolCol.
getStats().hasNulls());
324 Column tinyintCol = table.getColumn(
"tinyint_col");
325 assertEquals(tinyintCol.
getStats().getAvgSerializedSize() -
327 PrimitiveType.TINYINT.getSlotSize(), 0.0001);
328 assertEquals(tinyintCol.
getStats().getMaxSize(),
330 assertTrue(tinyintCol.
getStats().hasNulls());
332 Column smallintCol = table.getColumn(
"smallint_col");
333 assertEquals(smallintCol.
getStats().getAvgSerializedSize() -
335 PrimitiveType.SMALLINT.getSlotSize(), 0.0001);
336 assertEquals(smallintCol.
getStats().getMaxSize(),
338 assertTrue(smallintCol.
getStats().hasNulls());
340 Column intCol = table.getColumn(
"int_col");
341 assertEquals(intCol.
getStats().getAvgSerializedSize() -
343 PrimitiveType.INT.getSlotSize(), 0.0001);
345 assertTrue(intCol.
getStats().hasNulls());
347 Column bigintCol = table.getColumn(
"bigint_col");
348 assertEquals(bigintCol.
getStats().getAvgSerializedSize() -
350 PrimitiveType.BIGINT.getSlotSize(), 0.0001);
352 assertTrue(bigintCol.
getStats().hasNulls());
354 Column floatCol = table.getColumn(
"float_col");
355 assertEquals(floatCol.
getStats().getAvgSerializedSize() -
357 PrimitiveType.FLOAT.getSlotSize(), 0.0001);
359 assertTrue(floatCol.
getStats().hasNulls());
361 Column doubleCol = table.getColumn(
"double_col");
362 assertEquals(doubleCol.
getStats().getAvgSerializedSize() -
364 PrimitiveType.DOUBLE.getSlotSize(), 0.0001);
366 assertTrue(doubleCol.
getStats().hasNulls());
368 Column timestampCol = table.getColumn(
"timestamp_col");
369 assertEquals(timestampCol.
getStats().getAvgSerializedSize() -
371 PrimitiveType.TIMESTAMP.getSlotSize(), 0.0001);
372 assertEquals(timestampCol.
getStats().getMaxSize(),
378 Column stringCol = table.getColumn(
"string_col");
379 assertTrue(stringCol.getStats().getAvgSerializedSize() >=
381 assertTrue(stringCol.getStats().getAvgSerializedSize() > 0);
382 assertTrue(stringCol.getStats().getMaxSize() > 0);
383 assertTrue(!stringCol.getStats().hasNulls());
406 ColumnStatisticsData stringColStatsData = client.getHiveClient()
407 .getTableColumnStatistics(
"functional",
"alltypesagg",
408 Lists.newArrayList(
"string_col")).
get(0).getStatsData();
410 assertTrue(!table.
getColumn(
"int_col").updateStats(stringColStatsData));
413 assertTrue(!table.
getColumn(
"double_col").updateStats(stringColStatsData));
416 assertTrue(!table.
getColumn(
"bool_col").updateStats(stringColStatsData));
420 ColumnStatisticsData bigIntCol = client.getHiveClient()
421 .getTableColumnStatistics(
"functional",
"alltypes",
422 Lists.newArrayList(
"bigint_col")).
get(0).getStatsData();
423 assertTrue(!table.
getColumn(
"string_col").updateStats(bigIntCol));
427 assertTrue(table.
getColumn(
"string_col").updateStats(stringColStatsData));
428 assertEquals(1178, table.
getColumn(
"string_col").getStats().getNumDistinctValues());
437 assertEquals(-1, column.
getStats().getNumDistinctValues());
438 assertEquals(-1, column.
getStats().getNumNulls());
439 double expectedSize = column.getType().isFixedLengthType() ?
440 column.getType().getSlotSize() : -1;
442 assertEquals(expectedSize, column.
getStats().getAvgSerializedSize(), 0.0001);
443 assertEquals(expectedSize, column.
getStats().getMaxSize(), 0.0001);
451 assertNotNull(
"functional_hbase.internal_hbase_table was not found", table);
456 Db nonExistentDb = catalog_.getDb(
"doesnotexist");
457 assertNull(nonExistentDb);
462 Table table = catalog_.getOrLoadTable(
"functional",
"alltypes");
465 assertEquals(System.getProperty(
"user.name"), table.getMetaStoreTable().getOwner());
466 assertEquals(TableType.EXTERNAL_TABLE.toString(),
467 table.getMetaStoreTable().getTableType());
469 table = catalog_.getOrLoadTable(
"functional",
"alltypesinsert");
470 assertEquals(System.getProperty(
"user.name"), table.getMetaStoreTable().getOwner());
471 assertEquals(TableType.MANAGED_TABLE.toString(),
472 table.getMetaStoreTable().getTableType());
477 Table table = catalog_.getOrLoadTable(
"functional",
"hive_index_tbl");
479 IncompleteTable incompleteTable = (IncompleteTable) table;
481 assertEquals(
"Unsupported table type 'INDEX_TABLE' for: functional.hive_index_tbl",
482 incompleteTable.
getCause().getMessage());
485 table = catalog_.getOrLoadTable(
"functional",
"bad_serde");
486 assertTrue(table instanceof IncompleteTable);
487 incompleteTable = (IncompleteTable) table;
489 assertEquals(
"Impala does not support tables of this type. REASON: SerDe" +
490 " library 'org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe' " +
491 "is not supported.", incompleteTable.
getCause().getCause().getMessage());
495 table = catalog_.getOrLoadTable(
"functional_rc",
"rcfile_lazy_binary_serde");
496 assertTrue(table instanceof IncompleteTable);
497 incompleteTable = (IncompleteTable) table;
499 assertEquals(
"Impala does not support tables of this type. REASON: SerDe" +
500 " library 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' " +
501 "is not supported.", incompleteTable.
getCause().getCause().getMessage());
510 List<HdfsPartition> partitions = table.getPartitions();
518 List<Function> fns = catalog_.getFunctions(db);
519 List<String> names = Lists.newArrayList();
521 names.add(fn.signatureString());
529 assertEquals(fnNames.size(), 0);
531 ArrayList<Type> args1 = Lists.newArrayList();
532 ArrayList<Type> args2 = Lists.<
Type>newArrayList(
Type.
INT);
535 catalog_.removeFunction(
539 assertEquals(fnNames.size(), 0);
543 catalog_.addFunction(udf1);
545 assertEquals(fnNames.size(), 1);
546 assertTrue(fnNames.contains(
"foo()"));
551 catalog_.addFunction(udf2);
553 assertEquals(fnNames.size(), 2);
554 assertTrue(fnNames.contains(
"foo()"));
555 assertTrue(fnNames.contains(
"foo(INT)"));
560 catalog_.addFunction(udf3);
562 assertEquals(fnNames.size(), 3);
563 assertTrue(fnNames.contains(
"foo()"));
564 assertTrue(fnNames.contains(
"foo(INT)"));
565 assertTrue(fnNames.contains(
"bar(INT)"));
568 catalog_.removeFunction(
new Function(
571 assertEquals(fnNames.size(), 2);
572 assertTrue(fnNames.contains(
"foo(INT)"));
573 assertTrue(fnNames.contains(
"bar(INT)"));
576 catalog_.removeFunction(
new Function(
579 assertEquals(fnNames.size(), 2);
580 assertTrue(fnNames.contains(
"foo(INT)"));
581 assertTrue(fnNames.contains(
"bar(INT)"));
584 catalog_.removeFunction(
new Function(
587 assertEquals(fnNames.size(), 2);
588 assertTrue(fnNames.contains(
"foo(INT)"));
589 assertTrue(fnNames.contains(
"bar(INT)"));
592 catalog_.removeFunction(
new Function(
595 assertEquals(fnNames.size(), 2);
596 assertTrue(fnNames.contains(
"foo(INT)"));
597 assertTrue(fnNames.contains(
"bar(INT)"));
600 catalog_.removeFunction(
new Function(
603 assertEquals(fnNames.size(), 1);
604 assertTrue(fnNames.contains(
"foo(INT)"));
607 catalog_.removeFunction(
new Function(
610 assertEquals(fnNames.size(), 0);
void checkHBaseTableCols(Db db, String hiveTableName, String hbaseTableName, String[] hiveColNames, String[] colFamilies, String[] colQualifiers, Type[] colTypes)
static final ScalarType BIGINT
void TestTableWithBadEscapeChar()
static final char DEFAULT_ESCAPE_CHAR
static final ScalarType STRING
void testColStatsColTypeMismatch()
void checkTableCols(Db db, String tblName, int numClusteringCols, String[] colNames, Type[] colTypes)
static final ScalarType BOOLEAN
void testLoadingUnsupportedTableTypes()
static final ScalarType SMALLINT
static final ScalarType FLOAT
String getHBaseTableName(org.apache.hadoop.hive.metastore.api.Table tbl)
static final ScalarType DOUBLE
void testInternalHBaseTable()
static final ScalarType TINYINT
List< String > getFunctionSignatures(String db)
static final ScalarType INT
uint64_t Test(T *ht, const ProbeTuple *input, uint64_t num_tuples)
void assertStatsUnknown(Column column)
void testDatabaseDoesNotExist()
void testCreateTableMetadata()
String getColumnQualifier()
static CatalogServiceCatalog catalog_
Column getColumn(String name)
ImpalaException getCause()
int getNumClusteringCols()
static final ScalarType INVALID
Table getOrLoadTable(String dbName, String tblName)
static final ScalarType TIMESTAMP