20 #include <re2/stringpiece.h>
30 using namespace impala_udf;
42 int fixed_pos = pos.
val;
43 if (fixed_pos < 0) fixed_pos = str.
len + fixed_pos + 1;
44 int max_len = str.
len - fixed_pos + 1;
45 int fixed_len = ::min(static_cast<int>(len.
val), max_len);
46 if (fixed_pos > 0 && fixed_pos <= str.len && fixed_len > 0) {
56 return Substring(context, str, pos,
BigIntVal(INT32_MAX));
62 return Substring(context, str, 1, len);
69 int64_t pos = ::max(-len.
val, static_cast<int64_t>(-str.
len));
70 return Substring(context, str,
BigIntVal(pos), len);
77 memset(result.
ptr,
' ', len.
val);
86 uint8_t* ptr = result.
ptr;
87 for (int64_t i = 0; i < n.
val; ++i) {
88 memcpy(ptr, str.
ptr, str.
len);
103 int padded_prefix_len = len.
val - str.
len;
105 int result_index = 0;
106 uint8_t* ptr = result.
ptr;
109 while (result_index < padded_prefix_len) {
110 ptr[result_index++] = pad.
ptr[pad_index++];
111 pad_index = pad_index % pad.
len;
115 memcpy(ptr + result_index, str.
ptr, str.
len);
125 if (len.
val <= str.
len || pad.
len == 0) {
133 uint8_t* ptr = result.
ptr;
135 int result_len = str.
len;
136 while (result_len < len.
val) {
137 ptr[result_len++] = pad.
ptr[pad_index++];
138 pad_index = pad_index % pad.
len;
152 return StringValue::UnpaddedCharLength(reinterpret_cast<char*>(str.
ptr), t->
len);
158 for (
int i = 0; i < str.
len; ++i) {
159 result.
ptr[i] = ::tolower(str.
ptr[i]);
167 for (
int i = 0; i < str.
len; ++i) {
168 result.
ptr[i] = ::toupper(str.
ptr[i]);
180 uint8_t* result_ptr = result.
ptr;
181 bool word_start =
true;
182 for (
int i = 0; i < str.
len; ++i) {
183 if (isspace(str.
ptr[i])) {
184 result_ptr[i] = str.
ptr[i];
187 result_ptr[i] = (word_start ? toupper(str.
ptr[i]) : tolower(str.
ptr[i]));
197 std::reverse_copy(str.
ptr, str.
ptr + str.
len, result.
ptr);
209 for (
int i = 0; i < str.
len; ++i) {
210 bool matched_src =
false;
211 for (
int j = 0; j < src.
len; ++j) {
212 if (str.
ptr[i] == src.
ptr[j]) {
214 result.
ptr[result_len++] = dst.
ptr[j];
222 if (!matched_src) result.
ptr[result_len++] = str.
ptr[i];
224 result.
len = result_len;
232 while (begin < str.
len && str.
ptr[begin] ==
' ') {
236 int32_t end = str.
len - 1;
237 while (end > begin && str.
ptr[end] ==
' ') {
247 while (begin < str.
len && str.
ptr[begin] ==
' ') {
255 if (str.
len == 0)
return str;
257 int32_t end = str.
len - 1;
258 while (end > 0 && str.
ptr[end] ==
' ') {
268 return IntVal((str.
len == 0) ? 0 : static_cast<int32_t>(str.
ptr[0]));
274 StringValue str_sv = StringValue::FromStringVal(str);
275 StringValue substr_sv = StringValue::FromStringVal(substr);
283 return Instr(context, str, substr);
293 StringValue substr_sv = StringValue::FromStringVal(substr);
297 str.
len - start_pos.
val + 1);
298 int32_t match_pos = search.
Search(&adjusted_str);
299 if (match_pos >= 0) {
301 return IntVal(start_pos.
val + match_pos);
309 re2::StringPiece pattern_sp(reinterpret_cast<char*>(pattern.
ptr), pattern.
len);
310 re2::RE2::Options options;
312 options.set_log_errors(
false);
314 options.set_longest_match(
true);
315 re2::RE2* re =
new re2::RE2(pattern_sp, options);
318 ss <<
"Could not compile regexp pattern: " << AnyValUtil::ToString(pattern) << endl
319 <<
"Error: " << re->error();
320 *error_str = ss.str();
327 void StringFunctions::RegexpPrepare(
333 if (pattern->is_null)
return;
338 context->
SetError(error_str.c_str());
344 void StringFunctions::RegexpClose(
347 re2::RE2* re =
reinterpret_cast<re2::RE2*
>(context->
GetFunctionState(scope));
356 re2::RE2* re =
reinterpret_cast<re2::RE2*
>(
358 scoped_ptr<re2::RE2> scoped_re;
370 re2::StringPiece str_sp(reinterpret_cast<char*>(str.
ptr), str.
len);
371 int max_matches = 1 + re->NumberOfCapturingGroups();
375 vector<re2::StringPiece> matches(max_matches);
377 re->Match(str_sp, 0, str.
len, re2::RE2::UNANCHORED, &matches[0], max_matches);
380 const re2::StringPiece& match = matches[index.
val];
381 return AnyValUtil::FromBuffer(context, match.data(), match.size());
388 re2::RE2* re =
reinterpret_cast<re2::RE2*
>(
390 scoped_ptr<re2::RE2> scoped_re;
402 re2::StringPiece replace_str =
403 re2::StringPiece(reinterpret_cast<char*>(replace.
ptr), replace.
len);
404 string result_str = AnyValUtil::ToString(str);
405 re2::RE2::GlobalReplace(&result_str, *re, replace_str);
406 return AnyValUtil::FromString(context, result_str);
411 return ConcatWs(context,
StringVal(), num_children, strs);
415 int num_children,
const StringVal* strs) {
416 DCHECK_GE(num_children, 1);
420 if (num_children == 1)
return strs[0];
423 int32_t total_size = strs[0].
len;
426 for (int32_t i = 1; i < num_children; ++i) {
428 total_size += sep.
len + strs[i].
len;
431 uint8_t* ptr = result.
ptr;
434 memcpy(ptr, strs[0].ptr, strs[0].len);
436 for (int32_t i = 1; i < num_children; ++i) {
437 memcpy(ptr, sep.
ptr, sep.
len);
439 memcpy(ptr, strs[i].ptr, strs[i].len);
449 for (
int i = 0; i < str.
len; ++i) {
453 int32_t token_index = 1;
456 StringValue str_sv = StringValue::FromStringVal(str);
460 while(str_set.
ptr[end] !=
',' && end < str_set.
len) ++end;
461 StringValue token(reinterpret_cast<char*>(str_set.
ptr) + start, end - start);
462 if (str_sv.
Eq(token))
return IntVal(token_index);
467 }
while (start < str_set.
len);
471 void StringFunctions::ParseUrlPrepare(
477 if (part->is_null)
return;
479 *url_part = UrlParser::GetUrlPart(StringValue::FromStringVal(*part));
480 if (*url_part == UrlParser::INVALID) {
482 ss <<
"Invalid URL part: " << AnyValUtil::ToString(*part) << endl
483 <<
"(Valid URL parts are 'PROTOCOL', 'HOST', 'PATH', 'REF', 'AUTHORITY', 'FILE', "
484 <<
"'USERINFO', and 'QUERY')";
500 url_part = UrlParser::GetUrlPart(StringValue::FromStringVal(part));
504 if (!UrlParser::ParseUrl(StringValue::FromStringVal(url), url_part, &result)) {
506 if (url_part == UrlParser::INVALID) {
508 ss <<
"Invalid URL part: " << AnyValUtil::ToString(part);
512 ss <<
"Could not parse URL: " << AnyValUtil::ToString(url);
522 void StringFunctions::ParseUrlClose(
527 if (url_part == NULL)
return;
540 url_part = UrlParser::GetUrlPart(StringValue::FromStringVal(part));
544 if (!UrlParser::ParseUrlKey(StringValue::FromStringVal(url), url_part,
545 StringValue::FromStringVal(key), &result)) {
547 if (url_part == UrlParser::INVALID) {
549 ss <<
"Invalid URL part: " << AnyValUtil::ToString(part);
553 ss <<
"Could not parse URL: " << AnyValUtil::ToString(url);
bool Eq(const StringValue &other) const
==
int Search(const StringValue *str) const
re2::RE2 * CompileRegex(const StringVal &pattern, string *error_str)
bool AddWarning(const char *warning_msg)
const TypeDesc * GetArgType(int arg_idx) const
void * GetFunctionState(FunctionStateScope scope) const
bool IsArgConstant(int arg_idx) const
void SetFunctionState(FunctionStateScope scope, void *ptr)
int len
Only valid if type == TYPE_FIXED_BUFFER || type == TYPE_VARCHAR.
void SetError(const char *error_msg)
UrlPart
Parts of a URL that can be requested.
void ToStringVal(impala_udf::StringVal *sv) const
AnyVal * GetConstantArg(int arg_idx) const
StringVal Concat(FunctionContext *context, int n, const StringVal *args)