19 #include <re2/stringpiece.h>
22 #include "gutil/strings/substitute.h"
26 using namespace impala_udf;
32 "(?:\\.\\*)*([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)(?:\\.\\*)*");
37 "(?:\\.\\*)*([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)\\$");
42 "\\^([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)(?:\\.\\*)*");
45 static const RE2
EQUALS_RE(
"\\^([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)\\$");
47 LikePredicate::LikePredicate(
const TExprNode& node)
56 if (scope != FunctionContext::THREAD_LOCAL)
return;
62 if (pattern_val.
is_null)
return;
64 re2::RE2 substring_re(
"(?:%+)([^%_]*)(?:%+)");
65 re2::RE2 ends_with_re(
"(?:%+)([^%_]*)");
66 re2::RE2 starts_with_re(
"([^%_]*)(?:%+)");
67 re2::RE2 equals_re(
"([^%_]*)");
68 string pattern_str(pattern.
ptr, pattern.
len);
70 if (RE2::FullMatch(pattern_str, substring_re, &search_string)) {
73 }
else if (RE2::FullMatch(pattern_str, starts_with_re, &search_string)) {
76 }
else if (RE2::FullMatch(pattern_str, ends_with_re, &search_string)) {
79 }
else if (RE2::FullMatch(pattern_str, equals_re, &search_string)) {
85 *reinterpret_cast<StringVal*>(context->
GetConstantArg(1)), &re_pattern);
86 state->
regex_.reset(
new RE2(re_pattern));
87 if (!state->
regex_->ok()) {
89 strings::Substitute(
"Invalid regex: $0", pattern_val.
ptr).c_str());
99 return (state->
function_)(context, val, pattern);
104 if (scope == FunctionContext::THREAD_LOCAL) {
113 if (scope != FunctionContext::THREAD_LOCAL)
return;
122 string pattern_str(reinterpret_cast<const char*>(pattern->
ptr), pattern->
len);
123 string search_string;
130 if (RE2::FullMatch(pattern_str,
EQUALS_RE, &search_string)) {
133 }
else if (RE2::FullMatch(pattern_str,
STARTS_WITH_RE, &search_string)) {
136 }
else if (RE2::FullMatch(pattern_str,
ENDS_WITH_RE, &search_string)) {
139 }
else if (RE2::FullMatch(pattern_str,
SUBSTRING_RE, &search_string)) {
143 state->
regex_.reset(
new RE2(pattern_str));
145 if (!state->
regex_->ok()) {
147 error <<
"Invalid regex expression" << pattern->
ptr;
148 context->
SetError(error.str().c_str());
159 return (state->
function_)(context, val, pattern);
164 if (scope == FunctionContext::THREAD_LOCAL) {
173 return RegexMatch(context, val, pattern,
false);
178 return RegexMatch(context, val, pattern,
true);
183 if (val.
is_null)
return BooleanVal::null();
193 if (val.
is_null)
return BooleanVal::null();
207 if (val.
is_null)
return BooleanVal::null();
223 if (val.
is_null)
return BooleanVal::null();
231 if (val.
is_null)
return BooleanVal::null();
234 re2::StringPiece operand_sp(reinterpret_cast<const char*>(val.
ptr), val.
len);
235 return RE2::PartialMatch(operand_sp, *state->
regex_);
240 if (val.
is_null)
return BooleanVal::null();
243 re2::StringPiece operand_sp(reinterpret_cast<const char*>(val.
ptr), val.
len);
244 return RE2::FullMatch(operand_sp, *state->
regex_);
249 bool is_like_pattern) {
250 if (operand_value.
is_null || pattern_value.
is_null)
return BooleanVal::null();
254 if (is_like_pattern) {
255 return RE2::FullMatch(re2::StringPiece(reinterpret_cast<const char*>(
256 operand_value.
ptr), operand_value.
len), *state->
regex_.get());
258 return RE2::PartialMatch(re2::StringPiece(reinterpret_cast<const char*>(
259 operand_value.
ptr), operand_value.
len), *state->
regex_.get());
263 if (is_like_pattern) {
267 string(reinterpret_cast<const char*>(pattern_value.
ptr), pattern_value.
len);
269 re2::RE2 re(re_pattern);
271 if (is_like_pattern) {
272 return RE2::FullMatch(re2::StringPiece(
273 reinterpret_cast<const char*>(operand_value.
ptr), operand_value.
len), re);
275 return RE2::PartialMatch(re2::StringPiece(
276 reinterpret_cast<const char*>(operand_value.
ptr), operand_value.
len), re);
280 strings::Substitute(
"Invalid regex: $0", pattern_value.
ptr).c_str());
287 string* re_pattern) {
291 bool is_escaped =
false;
292 for (
int i = 0; i < pattern.
len; ++i) {
293 if (!is_escaped && pattern.
ptr[i] ==
'%') {
294 re_pattern->append(
".*");
295 }
else if (!is_escaped && pattern.
ptr[i] ==
'_') {
296 re_pattern->append(
".");
301 pattern.
ptr[i] ==
'.'
302 || pattern.
ptr[i] ==
'['
303 || pattern.
ptr[i] ==
']'
304 || pattern.
ptr[i] ==
'{'
305 || pattern.
ptr[i] ==
'}'
306 || pattern.
ptr[i] ==
'('
307 || pattern.
ptr[i] ==
')'
308 || pattern.
ptr[i] ==
'\\'
309 || pattern.
ptr[i] ==
'*'
310 || pattern.
ptr[i] ==
'+'
311 || pattern.
ptr[i] ==
'?'
312 || pattern.
ptr[i] ==
'|'
313 || pattern.
ptr[i] ==
'^'
314 || pattern.
ptr[i] ==
'$'
318 re_pattern->append(
"\\");
319 re_pattern->append(1, pattern.
ptr[i]);
323 re_pattern->append(1, pattern.
ptr[i]);
bool Eq(const StringValue &other) const
==
static impala_udf::BooleanVal ConstantStartsWithFn(impala_udf::FunctionContext *context, const impala_udf::StringVal &val, const impala_udf::StringVal &pattern)
Handling of like predicates that can be implemented using strncmp.
static const RE2 STARTS_WITH_RE("\\^([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)(?:\\.\\*)*")
static void LikePrepare(impala_udf::FunctionContext *context, impala_udf::FunctionContext::FunctionStateScope scope)
static impala_udf::BooleanVal ConstantEndsWithFn(impala_udf::FunctionContext *context, const impala_udf::StringVal &val, const impala_udf::StringVal &pattern)
Handling of like predicates that can be implemented using strncmp.
static impala_udf::BooleanVal ConstantSubstringFn(impala_udf::FunctionContext *context, const impala_udf::StringVal &val, const impala_udf::StringVal &pattern)
Handling of like predicates that map to strstr.
static void RegexClose(impala_udf::FunctionContext *, impala_udf::FunctionContext::FunctionStateScope scope)
static impala_udf::BooleanVal ConstantRegexFn(impala_udf::FunctionContext *context, const impala_udf::StringVal &val, const impala_udf::StringVal &pattern)
boost::scoped_ptr< re2::RE2 > regex_
Used for RLIKE and REGEXP predicates if the pattern is a constant aruement.
int Search(const StringValue *str) const
static const RE2 EQUALS_RE("\\^([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)\\$")
static impala_udf::BooleanVal Like(impala_udf::FunctionContext *context, const impala_udf::StringVal &val, const impala_udf::StringVal &pattern)
static impala_udf::BooleanVal ConstantEqualsFn(impala_udf::FunctionContext *context, const impala_udf::StringVal &val, const impala_udf::StringVal &pattern)
Handling of like predicates that can be implemented using strcmp.
static void RegexPrepare(impala_udf::FunctionContext *context, impala_udf::FunctionContext::FunctionStateScope scope)
LikePredicateFunction function_
static impala_udf::BooleanVal RegexMatch(impala_udf::FunctionContext *context, const impala_udf::StringVal &val, const impala_udf::StringVal &pattern, bool is_like_pattern)
static void ConvertLikePattern(impala_udf::FunctionContext *context, const impala_udf::StringVal &pattern, std::string *re_pattern)
static impala_udf::BooleanVal Regex(impala_udf::FunctionContext *context, const impala_udf::StringVal &val, const impala_udf::StringVal &pattern)
void * GetFunctionState(FunctionStateScope scope) const
bool IsArgConstant(int arg_idx) const
void SetFunctionState(FunctionStateScope scope, void *ptr)
StringSearch substring_pattern_
StringValue search_string_sv_
static const RE2 ENDS_WITH_RE("(?:\\.\\*)*([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)\\$")
static impala_udf::BooleanVal ConstantRegexFnPartial(impala_udf::FunctionContext *context, const impala_udf::StringVal &val, const impala_udf::StringVal &pattern)
static StringValue FromStringVal(const impala_udf::StringVal &sv)
void SetSearchString(const std::string &search_string)
static impala_udf::BooleanVal LikeFn(impala_udf::FunctionContext *context, const impala_udf::StringVal &val, const impala_udf::StringVal &pattern)
void SetError(const char *error_msg)
AnyVal * GetConstantArg(int arg_idx) const
static void LikeClose(impala_udf::FunctionContext *context, impala_udf::FunctionContext::FunctionStateScope scope)
static const RE2 SUBSTRING_RE("(?:\\.\\*)*([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)(?:\\.\\*)*")
static impala_udf::BooleanVal RegexFn(impala_udf::FunctionContext *context, const impala_udf::StringVal &val, const impala_udf::StringVal &pattern)