Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
gen_ir_descriptions.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # Copyright 2012 Cloudera Inc.
3 #
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
7 #
8 # http://www.apache.org/licenses/LICENSE-2.0
9 #
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15 
16 from string import Template
17 import os
18 import shutil
19 import filecmp
20 import tempfile
21 from optparse import OptionParser
22 
23 parser = OptionParser()
24 parser.add_option("--noclean", action="store_true", default=False,
25  help="If specified, does not remove existing files and only replaces "
26  "them with freshly generated ones if they have changed.")
27 options, args = parser.parse_args()
28 
29 # This script will generate two headers that describe all of the clang cross compiled
30 # functions.
31 # The script outputs (run: 'impala/common/function-registry/gen_functions.py')
32 # - be/src/generated-sources/impala-ir/impala-ir-functions.h
33 # This file contains enums for all of the cross compiled functions
34 # - be/src/generated-sources/impala-ir/impala-ir-function-names.h
35 # This file contains a mapping of <string, enum>
36 
37 # Mapping of enum to compiled function name. The compiled function name only has to
38 # be a substring of the actual, mangled compiler generated name.
39 # TODO: should we work out the mangling rules?
40 ir_functions = [
41  ["AGG_NODE_PROCESS_ROW_BATCH_WITH_GROUPING", "ProcessRowBatchWithGrouping"],
42  ["AGG_NODE_PROCESS_ROW_BATCH_NO_GROUPING", "ProcessRowBatchNoGrouping"],
43  ["PART_AGG_NODE_PROCESS_BATCH_TRUE", "ProcessBatch_true"],
44  ["PART_AGG_NODE_PROCESS_BATCH_FALSE", "ProcessBatch_false"],
45  ["PART_AGG_NODE_PROCESS_BATCH_NO_GROUPING", "ProcessBatchNoGrouping"],
46  ["AVG_UPDATE_BIGINT", "9AvgUpdateIN10impala_udf9BigIntVal"],
47  ["AVG_UPDATE_DOUBLE", "9AvgUpdateIN10impala_udf9DoubleVal"],
48  ["AVG_UPDATE_TIMESTAMP", "TimestampAvgUpdate"],
49  ["AVG_UPDATE_DECIMAL", "DecimalAvgUpdate"],
50  ["AVG_MERGE", "8AvgMerge"],
51  ["AVG_MERGE_DECIMAL", "DecimalAvgMerge"],
52  ["CODEGEN_ANYVAL_STRING_VAL_EQ", "StringValEq"],
53  ["CODEGEN_ANYVAL_STRING_VALUE_EQ", "StringValueEq"],
54  ["CODEGEN_ANYVAL_TIMESTAMP_VAL_EQ", "TimestampValEq"],
55  ["CODEGEN_ANYVAL_TIMESTAMP_VALUE_EQ", "TimestampValueEq"],
56  ["EXPR_GET_BOOLEAN_VAL", "4Expr13GetBooleanVal"],
57  ["EXPR_GET_TINYINT_VAL", "4Expr13GetTinyIntVal"],
58  ["EXPR_GET_SMALLINT_VAL", "4Expr14GetSmallIntVal"],
59  ["EXPR_GET_INT_VAL", "4Expr9GetIntVal"],
60  ["EXPR_GET_BIGINT_VAL", "4Expr12GetBigIntVal"],
61  ["EXPR_GET_FLOAT_VAL", "4Expr11GetFloatVal"],
62  ["EXPR_GET_DOUBLE_VAL", "4Expr12GetDoubleVal"],
63  ["EXPR_GET_STRING_VAL", "4Expr12GetStringVal"],
64  ["EXPR_GET_TIMESTAMP_VAL", "4Expr15GetTimestampVal"],
65  ["EXPR_GET_DECIMAL_VAL", "4Expr13GetDecimalVal"],
66  ["HASH_CRC", "IrCrcHash"],
67  ["HASH_FNV", "IrFnvHash"],
68  ["HASH_MURMUR", "IrMurmurHash"],
69  ["HASH_JOIN_PROCESS_BUILD_BATCH", "12HashJoinNode17ProcessBuildBatch"],
70  ["HASH_JOIN_PROCESS_PROBE_BATCH", "12HashJoinNode17ProcessProbeBatch"],
71  ["PHJ_PROCESS_BUILD_BATCH", "23PartitionedHashJoinNode17ProcessBuildBatch"],
72  ["PHJ_PROCESS_PROBE_BATCH_INNER_JOIN", "ProcessProbeBatchILi0"],
73  ["PHJ_PROCESS_PROBE_BATCH_LEFT_OUTER_JOIN", "ProcessProbeBatchILi1"],
74  ["PHJ_PROCESS_PROBE_BATCH_LEFT_SEMI_JOIN", "ProcessProbeBatchILi2"],
75  ["PHJ_PROCESS_PROBE_BATCH_LEFT_ANTI_JOIN", "ProcessProbeBatchILi3"],
76  ["PHJ_PROCESS_PROBE_BATCH_NULL_AWARE_LEFT_ANTI_JOIN", "ProcessProbeBatchILi4"],
77  ["PHJ_PROCESS_PROBE_BATCH_RIGHT_OUTER_JOIN", "ProcessProbeBatchILi5"],
78  ["PHJ_PROCESS_PROBE_BATCH_RIGHT_SEMI_JOIN", "ProcessProbeBatchILi6"],
79  ["PHJ_PROCESS_PROBE_BATCH_RIGHT_ANTI_JOIN", "ProcessProbeBatchILi7"],
80  ["PHJ_PROCESS_PROBE_BATCH_FULL_OUTER_JOIN", "ProcessProbeBatchILi8"],
81  ["HASH_TABLE_GET_HASH_SEED", "GetHashSeed"],
82  ["HLL_UPDATE_BOOLEAN", "HllUpdateIN10impala_udf10BooleanVal"],
83  ["HLL_UPDATE_TINYINT", "HllUpdateIN10impala_udf10TinyIntVal"],
84  ["HLL_UPDATE_SMALLINT", "HllUpdateIN10impala_udf11SmallIntVal"],
85  ["HLL_UPDATE_INT", "HllUpdateIN10impala_udf6IntVal"],
86  ["HLL_UPDATE_BIGINT", "HllUpdateIN10impala_udf9BigIntVal"],
87  ["HLL_UPDATE_FLOAT", "HllUpdateIN10impala_udf8FloatVal"],
88  ["HLL_UPDATE_DOUBLE", "HllUpdateIN10impala_udf9DoubleVal"],
89  ["HLL_UPDATE_STRING", "HllUpdateIN10impala_udf9StringVal"],
90  ["HLL_UPDATE_TIMESTAMP", "HllUpdateIN10impala_udf12TimestampVal"],
91  ["HLL_UPDATE_DECIMAL", "HllUpdateIN10impala_udf10DecimalVal"],
92  ["HLL_MERGE", "HllMerge"],
93  ["DECODE_AVRO_DATA", "DecodeAvroData"],
94  ["READ_UNION_TYPE", "ReadUnionType"],
95  ["READ_AVRO_BOOLEAN", "ReadAvroBoolean"],
96  ["READ_AVRO_INT32", "ReadAvroInt32"],
97  ["READ_AVRO_INT64", "ReadAvroInt64"],
98  ["READ_AVRO_FLOAT", "ReadAvroFloat"],
99  ["READ_AVRO_DOUBLE", "ReadAvroDouble"],
100  ["READ_AVRO_STRING", "ReadAvroString"],
101  ["READ_AVRO_VARCHAR", "ReadAvroVarchar"],
102  ["READ_AVRO_CHAR", "ReadAvroChar"],
103  ["HDFS_SCANNER_WRITE_ALIGNED_TUPLES", "WriteAlignedTuples"],
104  ["HDFS_SCANNER_GET_CONJUNCT_CTX", "GetConjunctCtx"],
105  ["STRING_TO_BOOL", "IrStringToBool"],
106  ["STRING_TO_INT8", "IrStringToInt8"],
107  ["STRING_TO_INT16", "IrStringToInt16"],
108  ["STRING_TO_INT32", "IrStringToInt32"],
109  ["STRING_TO_INT64", "IrStringToInt64"],
110  ["STRING_TO_FLOAT", "IrStringToFloat"],
111  ["STRING_TO_DOUBLE", "IrStringToDouble"],
112  ["IS_NULL_STRING", "IrIsNullString"],
113  ["GENERIC_IS_NULL_STRING", "IrGenericIsNullString"],
114 ]
115 
116 enums_preamble = '\
117 // Copyright 2012 Cloudera Inc.\n\
118 //\n\
119 // Licensed under the Apache License, Version 2.0 (the "License");\n\
120 // you may not use this file except in compliance with the License.\n\
121 // You may obtain a copy of the License at\n\
122 //\n\
123 // http://www.apache.org/licenses/LICENSE-2.0\n\
124 //\n\
125 // Unless required by applicable law or agreed to in writing, software\n\
126 // distributed under the License is distributed on an "AS IS" BASIS,\n\
127 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n\
128 // See the License for the specific language governing permissions and\n\
129 // limitations under the License.\n\
130 \n\
131 // This is a generated file, DO NOT EDIT IT.\n\
132 // To add new functions, see be/src/codegen/gen_ir_descriptions.py.\n\
133 \n\
134 #ifndef IMPALA_IR_FUNCTIONS_H\n\
135 #define IMPALA_IR_FUNCTIONS_H\n\
136 \n\
137 namespace impala {\n\
138 \n\
139 class IRFunction {\n\
140  public:\n\
141  enum Type {\n'
142 
143 enums_epilogue = '\
144  };\n\
145 };\n\
146 \n\
147 }\n\
148 \n\
149 #endif\n'
150 
151 names_preamble = '\
152 // Copyright 2012 Cloudera Inc.\n\
153 //\n\
154 // Licensed under the Apache License, Version 2.0 (the "License");\n\
155 // you may not use this file except in compliance with the License.\n\
156 // You may obtain a copy of the License at\n\
157 //\n\
158 // http://www.apache.org/licenses/LICENSE-2.0\n\
159 //\n\
160 // Unless required by applicable law or agreed to in writing, software\n\
161 // distributed under the License is distributed on an "AS IS" BASIS,\n\
162 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n\
163 // See the License for the specific language governing permissions and\n\
164 // limitations under the License.\n\
165 \n\
166 // This is a generated file, DO NOT EDIT IT.\n\
167 // To add new functions, see be/src/codegen/gen_ir_descriptions.py.\n\
168 \n\
169 #ifndef IMPALA_IR_FUNCTION_NAMES_H\n\
170 #define IMPALA_IR_FUNCTION_NAMES_H\n\
171 \n\
172 #include "impala-ir/impala-ir-functions.h"\n\
173 \n\
174 namespace impala {\n\
175 \n\
176 static struct {\n\
177  std::string fn_name; \n\
178  IRFunction::Type fn; \n\
179 } FN_MAPPINGS[] = {\n'
180 
181 names_epilogue = '\
182 };\n\
183 \n\
184 }\n\
185 \n\
186 #endif\n'
187 
188 def move_if_different(src_file, dest_file):
189  """Moves src_file to dest_file if dest_file does not exist, or if
190  the contents of src_file and dest_file differ. Assumes that src_file exists."""
191  if not os.path.isfile(dest_file) or not filecmp.cmp(src_file, dest_file):
192  shutil.move(src_file, dest_file)
193  else:
194  print 'Retaining existing file: %s' % (dest_file)
195 
196 BE_PATH = os.path.join(os.environ['IMPALA_HOME'], 'be/generated-sources/impala-ir/')
197 IR_FUNCTIONS_FILE = 'impala-ir-functions.h'
198 IR_NAMES_FILE = 'impala-ir-names.h'
199 IR_FUNCTIONS_PATH = os.path.join(BE_PATH, IR_FUNCTIONS_FILE)
200 IR_NAMES_PATH = os.path.join(BE_PATH, IR_NAMES_FILE)
201 TMP_IR_FUNCTIONS_PATH = os.path.join(tempfile.gettempdir(), IR_FUNCTIONS_FILE)
202 TMP_IR_NAMES_PATH = os.path.join(tempfile.gettempdir(), IR_NAMES_FILE)
203 
204 if not os.path.exists(BE_PATH):
205  os.makedirs(BE_PATH)
206 
207 if __name__ == "__main__":
208  print "Generating IR description files"
209  enums_file = open(TMP_IR_FUNCTIONS_PATH, 'w')
210  enums_file.write(enums_preamble)
211 
212  names_file = open(TMP_IR_NAMES_PATH, 'w')
213  names_file.write(names_preamble);
214 
215  idx = 0;
216  enums_file.write(" FN_START = " + str(idx) + ",\n")
217  for fn in ir_functions:
218  enum = fn[0]
219  fn_name = fn[1]
220  enums_file.write(" " + enum + " = " + str(idx) + ",\n")
221  names_file.write(" { \"" + fn_name + "\", IRFunction::" + enum + " },\n")
222  idx = idx + 1;
223  enums_file.write(" FN_END = " + str(idx) + "\n")
224 
225  enums_file.write(enums_epilogue)
226  enums_file.close()
227 
228  names_file.write(names_epilogue)
229  names_file.close()
230 
231  # Conditionally move files from tmp to BE.
232  if options.noclean:
233  move_if_different(TMP_IR_FUNCTIONS_PATH, IR_FUNCTIONS_PATH)
234  move_if_different(TMP_IR_NAMES_PATH, IR_NAMES_PATH)
235  else:
236  shutil.move(TMP_IR_FUNCTIONS_PATH, IR_FUNCTIONS_PATH)
237  shutil.move(TMP_IR_NAMES_PATH, IR_NAMES_PATH)