Impala
Impalaistheopensource,nativeanalyticdatabaseforApacheHadoop.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
redactor-test.cc
Go to the documentation of this file.
1 // Copyright 2015 Cloudera Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "redactor.cc" // access g_rules
16 
17 #include <cstdlib> // rand
18 #include <cstdio> // file stuff
19 #include <pthread.h>
20 #include <unistd.h> // cpu info
21 
22 #include <gtest/gtest.h>
23 
24 #include "redactor-test-utils.h"
25 
26 namespace impala {
27 
28 using std::string;
29 
30 void* MultiThreadWorkload(void* unused) {
31  unsigned int rand_seed = RandSeed();
32  int buffer_size = 10000 + rand_r(&rand_seed) % 1000;
33  char buffer[buffer_size];
34  string message;
35  for (int i = 0; i < 100; ++i) {
36  RandomlyFillString(buffer, buffer_size);
37  message = buffer;
38  Redact(&message);
39  if ((buffer_size - 1) != message.length()) {
40  ADD_FAILURE() << "Message length changed; new size is " << message.length();
41  return NULL;
42  }
43  for (int c = 0; c < buffer_size - 1; ++c) {
44  if ('0' <= message[c] && message[c] <= '9') {
45  ADD_FAILURE() << "Number " << message[c] << " should be replaced with #";
46  return NULL;
47  }
48  if (message[c] < ' ' || '~' < message[c]) {
49  ADD_FAILURE() << "Unexpected char " << message[c];
50  return NULL;
51  }
52  }
53  if (message[buffer_size - 1] != '\0') {
54  ADD_FAILURE() << "Missing string terminator";
55  return NULL;
56  }
57  }
58  return NULL;
59 }
60 
61 TEST(RedactorTest, NoTrigger) {
62  TempRulesFile rules_file(
63  "{"
64  " \"version\": 1,"
65  " \"rules\": ["
66  " {\"search\": \"foo\", \"replace\": \"bar\"}"
67  " ]"
68  "}");
69  string error = SetRedactionRulesFromFile(rules_file.name());
70  ASSERT_EQ("", error);
71  ASSERT_EQ(1, g_rules->size());
72  ASSERT_EQ("", g_rules->begin()->trigger);
73  ASSERT_EQ("foo", g_rules->begin()->search_pattern.pattern());
74  ASSERT_EQ("bar", g_rules->begin()->replacement);
75  ASSERT_UNREDACTED("baz");
76  ASSERT_REDACTED_EQ("foo", "bar");
77  ASSERT_REDACTED_EQ("foo bar foo baz", "bar bar bar baz");
78  ASSERT_REDACTED_EQ("foo\nbar\nfoo baz", "bar\nbar\nbar baz");
79 }
80 
81 TEST(RedactorTest, Trigger) {
82  TempRulesFile rules_file(
83  "{"
84  " \"version\": 1,"
85  " \"rules\": ["
86  " {\"trigger\": \"baz\", \"search\": \"foo\", \"replace\": \"bar\"}"
87  " ]"
88  "}");
89  string error = SetRedactionRulesFromFile(rules_file.name());
90  ASSERT_EQ("", error);
91  ASSERT_EQ(1, g_rules->size());
92  ASSERT_EQ("baz", g_rules->begin()->trigger);
93  ASSERT_UNREDACTED("foo");
94  ASSERT_REDACTED_EQ("foo bar foo baz", "bar bar bar baz");
95 }
96 
97 TEST(RedactorTest, MultiTrigger) {
98  TempRulesFile rules_file(
99  "{"
100  " \"version\": 1,"
101  " \"rules\": ["
102  " {\"search\": \"\\\\d+\", \"replace\": \"#\"},"
103  " {\"trigger\": \"baz\", \"search\": \"foo\", \"replace\": \"bar\"}"
104  " ]"
105  "}");
106  string error = SetRedactionRulesFromFile(rules_file.name());
107  ASSERT_EQ("", error);
108  ASSERT_EQ(2, g_rules->size());
109  ASSERT_REDACTED_EQ("foo33", "foo#");
110  ASSERT_REDACTED_EQ("foo foo baz!3", "bar bar baz!#");
111 }
112 
113 TEST(RedactorTest, CaseSensitivityProperty) {
114  TempRulesFile rules_file(
115  "{"
116  " \"version\": 1,"
117  " \"rules\": ["
118  " {\"search\": \"(C|d)+\", \"replace\": \"_\", \"caseSensitive\": false}"
119  " ]"
120  "}");
121  string error = SetRedactionRulesFromFile(rules_file.name());
122  ASSERT_EQ("", error);
123  ASSERT_UNREDACTED("123");
124  ASSERT_REDACTED_EQ("abcD Cd c D d C", "ab_ _ _ _ _ _");
125 
126  rules_file.OverwriteContents(
127  "{"
128  " \"version\": 1,"
129  " \"rules\": ["
130  " {"
131  " \"trigger\": \"BaZ\","
132  " \"caseSensitive\": false,"
133  " \"search\": \"bAz\","
134  " \"replace\": \"bar\""
135  " }"
136  " ]"
137  "}");
138  error = SetRedactionRulesFromFile(rules_file.name());
139  ASSERT_EQ("", error);
140  ASSERT_REDACTED_EQ("bAz bar", "bar bar");
141  ASSERT_REDACTED_EQ("BAz bar", "bar bar");
142 
143  rules_file.OverwriteContents(
144  "{"
145  " \"version\": 1,"
146  " \"rules\": ["
147  " {"
148  " \"trigger\": \"FOO\","
149  " \"caseSensitive\": false,"
150  " \"search\": \"foO\","
151  " \"replace\": \"BAR\""
152  " }"
153  " ]"
154  "}");
155  error = SetRedactionRulesFromFile(rules_file.name());
156  ASSERT_EQ("", error);
157  ASSERT_REDACTED_EQ("fOO bar", "BAR bar");
158 
159  rules_file.OverwriteContents(
160  "{"
161  " \"version\": 1,"
162  " \"rules\": ["
163  " {\"search\": \"(Xy)+\", \"replace\": \"$\", \"caseSensitive\": true}"
164  " ]"
165  "}");
166  error = SetRedactionRulesFromFile(rules_file.name());
167  ASSERT_EQ("", error);
168  ASSERT_UNREDACTED("xY");
169  ASSERT_REDACTED_EQ("Xy", "$");
170 
171  rules_file.OverwriteContents(
172  "{"
173  " \"version\": 1,"
174  " \"rules\": ["
175  " {"
176  " \"trigger\": \"Sensitive\","
177  " \"caseSensitive\": true,"
178  " \"search\": \"SsS\","
179  " \"replace\": \"sss\""
180  " }"
181  " ]"
182  "}");
183  error = SetRedactionRulesFromFile(rules_file.name());
184  ASSERT_EQ("", error);
185  ASSERT_UNREDACTED("SsS");
186  ASSERT_UNREDACTED("sensitive SsS");
187  ASSERT_UNREDACTED("Sensitive sss");
188  ASSERT_REDACTED_EQ("Sensitive SsS", "Sensitive sss");
189 
190  rules_file.OverwriteContents(
191  "{"
192  " \"version\": 1,"
193  " \"rules\": ["
194  " {"
195  " \"trigger\": \"QQQ\","
196  " \"search\": \"qQq\","
197  " \"replace\": \"QqQ\""
198  " }"
199  " ]"
200  "}");
201  error = SetRedactionRulesFromFile(rules_file.name());
202  ASSERT_EQ("", error);
203  ASSERT_UNREDACTED("qQq");
204  ASSERT_UNREDACTED("QQQ");
205  ASSERT_UNREDACTED("QQq qQq");
206  ASSERT_REDACTED_EQ("QQQ qQq", "QQQ QqQ");
207 }
208 
209 TEST(RedactorTest, SingleTriggerMultiRule) {
210  TempRulesFile rules_file(
211  "{"
212  " \"version\": 1,"
213  " \"rules\": ["
214  " {\"trigger\": \"baz\", \"search\": \"\\\\d+\", \"replace\": \"#\"},"
215  " {\"trigger\": \"baz\", \"search\": \"foo\", \"replace\": \"bar\"}"
216  " ]"
217  "}");
218  string error = SetRedactionRulesFromFile(rules_file.name());
219  ASSERT_EQ("", error);
220  ASSERT_EQ(2, g_rules->size());
221  ASSERT_UNREDACTED("foo33");
222  ASSERT_REDACTED_EQ("foo foo baz!3", "bar bar baz!#");
223 }
224 
225 TEST(RedactorTest, RuleOrder) {
226  TempRulesFile rules_file(
227  "{"
228  " \"version\": 1,"
229  " \"rules\": ["
230  " {\"trigger\": \"barC\", \"search\": \".*\", \"replace\": \"Z\"},"
231  " {\"search\": \"1\", \"replace\": \"2\"},"
232  " {\"search\": \"1\", \"replace\": \"3\"},"
233  " {\"trigger\": \"foo\", \"search\": \"2\", \"replace\": \"A\"},"
234  " {\"trigger\": \"bar\", \"search\": \"2\", \"replace\": \"1\"},"
235  " {\"search\": \"1\", \"replace\": \"4\"},"
236  " {\"search\": \"1\", \"replace\": \"5\"},"
237  " {\"trigger\": \"foo\", \"search\": \"A\", \"replace\": \"C\"},"
238  " {\"trigger\": \"bar\", \"search\": \"5\", \"replace\": \"1\"},"
239  " {\"trigger\": \"barC\", \"search\": \".*\", \"replace\": \"D\"}"
240  " ]"
241  "}");
242  string error = SetRedactionRulesFromFile(rules_file.name());
243  ASSERT_EQ("", error);
244  ASSERT_EQ(10, g_rules->size());
245  ASSERT_UNREDACTED("foo");
246  ASSERT_REDACTED_EQ("1", "2");
247  ASSERT_REDACTED_EQ("foo1", "fooC");
248  ASSERT_REDACTED_EQ("bar1", "bar4");
249  ASSERT_REDACTED_EQ("bar5", "bar1");
250  ASSERT_REDACTED_EQ("foobar1", "D");
251 }
252 
253 TEST(RedactorTest, InputSize) {
254  TempRulesFile rules_file(
255  "{"
256  " \"version\": 1,"
257  " \"rules\": ["
258  " {\"search\": \"[0-9]\", \"replace\": \"#\"}"
259  " ]"
260  "}");
261  string error = SetRedactionRulesFromFile(rules_file.name());
262  ASSERT_EQ("", error);
263  ASSERT_UNREDACTED("");
264  int buffer_size = 10000;
265  char buffer[buffer_size];
266  RandomlyFillString(buffer, buffer_size);
267  string message(buffer);
268  Redact(&message);
269  ASSERT_EQ(buffer_size - 1, message.length());
270  for (int i = 0; i < buffer_size; ++i) {
271  ASSERT_TRUE(message[i] < '0' || '9' < message[i])
272  << "Number " << message[i] << " should be replaced with #";
273  }
274 }
275 
276 TEST(RedactorTest, ChangeInputSize) {
277  TempRulesFile rules_file(
278  "{"
279  " \"version\": 1,"
280  " \"rules\": ["
281  " {\"search\": \"[A-Z]\", \"replace\": \"\"},"
282  " {\"trigger\": \"reduce\", \"search\": \"[0-9]+\", \"replace\": \"#\"},"
283  " {\"trigger\": \"add\", \"search\": \"[0-9]\", \"replace\": \"####\"}"
284  " ]"
285  "}");
286  string error = SetRedactionRulesFromFile(rules_file.name());
287  ASSERT_EQ("", error);
288  ASSERT_REDACTED_EQ("AAAAAAA", "");
289  ASSERT_REDACTED_EQ("reduce1234", "reduce#");
290  ASSERT_REDACTED_EQ("add1234", "add################");
291 }
292 
293 TEST(RedactorTest, MultiThreaded) {
294  TempRulesFile rules_file(
295  "{"
296  " \"version\": 1,"
297  " \"rules\": ["
298  " {\"search\": \"0\", \"replace\": \"#\"},"
299  " {\"search\": \"1\", \"replace\": \"#\"},"
300  " {\"search\": \"2\", \"replace\": \"#\"},"
301  " {\"search\": \"3\", \"replace\": \"#\"},"
302  " {\"search\": \"4\", \"replace\": \"#\"},"
303  " {\"search\": \"5\", \"replace\": \"#\"},"
304  " {\"search\": \"6\", \"replace\": \"#\"},"
305  " {\"search\": \"7\", \"replace\": \"#\"},"
306  " {\"trigger\": \"8\", \"search\": \"8\", \"replace\": \"#\"},"
307  " {\"trigger\": \"9\", \"search\": \"9\", \"replace\": \"#\"}"
308  " ]"
309  "}");
310  string error = SetRedactionRulesFromFile(rules_file.name());
311  ASSERT_EQ("", error);
312 
313  int processor_count = sysconf(_SC_NPROCESSORS_ONLN);
314  int worker_count = 2 * processor_count;
315  pthread_t worker_ids[worker_count];
316  for (int i = 0; i < worker_count; ++i) {
317  int status = pthread_create(worker_ids + i, NULL, MultiThreadWorkload, NULL);
318  ASSERT_EQ(0, status);
319  }
320  for (int i = 0; i < worker_count; ++i) {
321  int status = pthread_join(worker_ids[i], NULL);
322  ASSERT_EQ(0, status);
323  }
324 }
325 
326 }
327 
328 int main(int argc, char **argv) {
329  // Disabled under ASAN, see IMPALA-1918
330 #ifndef ADDRESS_SANITIZER
331  ::testing::InitGoogleTest(&argc, argv);
332  return RUN_ALL_TESTS();
333 #endif
334 }
void * MultiThreadWorkload(void *unused)
int main(int argc, char **argv)
const char * name() const
Returns the absolute path to the file.
void Redact(string *value, bool *changed)
Definition: redactor.cc:309
TEST(AtomicTest, Basic)
Definition: atomic-test.cc:28
static Rules * g_rules
Definition: redactor.cc:93
string SetRedactionRulesFromFile(const string &rules_file_path)
Definition: redactor.cc:260
unsigned int RandSeed()
#define ASSERT_UNREDACTED(string)
void OverwriteContents(const std::string &contents)
#define ASSERT_REDACTED_EQ(actual, expected)
void RandomlyFillString(char *string, const int length)
Randomly fills the contents of 'string' up to the given length.