1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include <gtest/gtest.h>
19 #include "arrow/memory_pool.h"
20 #include "arrow/status.h"
21
22 #include "gandiva/projector.h"
23 #include "gandiva/tests/test_util.h"
24 #include "gandiva/tree_expr_builder.h"
25
26 namespace gandiva {
27
28 using arrow::boolean;
29 using arrow::date64;
30 using arrow::int32;
31 using arrow::int64;
32 using arrow::utf8;
33
34 class TestUtf8 : public ::testing::Test {
35 public:
SetUp()36 void SetUp() { pool_ = arrow::default_memory_pool(); }
37
38 protected:
39 arrow::MemoryPool* pool_;
40 };
41
TEST_F(TestUtf8,TestSimple)42 TEST_F(TestUtf8, TestSimple) {
43 // schema for input fields
44 auto field_a = field("a", utf8());
45 auto schema = arrow::schema({field_a});
46
47 // output fields
48 auto res_1 = field("res1", int32());
49 auto res_2 = field("res2", boolean());
50 auto res_3 = field("res3", int32());
51
52 // build expressions.
53 // octet_length(a)
54 // octet_length(a) == bit_length(a) / 8
55 // length(a)
56 auto expr_a = TreeExprBuilder::MakeExpression("octet_length", {field_a}, res_1);
57
58 auto node_a = TreeExprBuilder::MakeField(field_a);
59 auto octet_length = TreeExprBuilder::MakeFunction("octet_length", {node_a}, int32());
60 auto literal_8 = TreeExprBuilder::MakeLiteral((int32_t)8);
61 auto bit_length = TreeExprBuilder::MakeFunction("bit_length", {node_a}, int32());
62 auto div_8 = TreeExprBuilder::MakeFunction("divide", {bit_length, literal_8}, int32());
63 auto is_equal =
64 TreeExprBuilder::MakeFunction("equal", {octet_length, div_8}, boolean());
65 auto expr_b = TreeExprBuilder::MakeExpression(is_equal, res_2);
66 auto expr_c = TreeExprBuilder::MakeExpression("length", {field_a}, res_3);
67
68 // Build a projector for the expressions.
69 std::shared_ptr<Projector> projector;
70 auto status =
71 Projector::Make(schema, {expr_a, expr_b, expr_c}, TestConfiguration(), &projector);
72 EXPECT_TRUE(status.ok()) << status.message();
73
74 // Create a row-batch with some sample data
75 int num_records = 5;
76 auto array_a = MakeArrowArrayUtf8({"foo", "hello", "bye", "hi", "मदन"},
77 {true, true, false, true, true});
78
79 // expected output
80 auto exp_1 = MakeArrowArrayInt32({3, 5, 0, 2, 9}, {true, true, false, true, true});
81 auto exp_2 = MakeArrowArrayBool({true, true, false, true, true},
82 {true, true, false, true, true});
83 auto exp_3 = MakeArrowArrayInt32({3, 5, 0, 2, 3}, {true, true, false, true, true});
84
85 // prepare input record batch
86 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
87
88 // Evaluate expression
89 arrow::ArrayVector outputs;
90 status = projector->Evaluate(*in_batch, pool_, &outputs);
91 EXPECT_TRUE(status.ok());
92
93 // Validate results
94 EXPECT_ARROW_ARRAY_EQUALS(exp_1, outputs.at(0));
95 EXPECT_ARROW_ARRAY_EQUALS(exp_2, outputs.at(1));
96 EXPECT_ARROW_ARRAY_EQUALS(exp_3, outputs.at(2));
97 }
98
TEST_F(TestUtf8,TestLiteral)99 TEST_F(TestUtf8, TestLiteral) {
100 // schema for input fields
101 auto field_a = field("a", utf8());
102 auto schema = arrow::schema({field_a});
103
104 // output fields
105 auto res = field("res", boolean());
106
107 // build expressions.
108 // a == literal(s)
109
110 auto node_a = TreeExprBuilder::MakeField(field_a);
111 auto literal_s = TreeExprBuilder::MakeStringLiteral("hello");
112 auto is_equal = TreeExprBuilder::MakeFunction("equal", {node_a, literal_s}, boolean());
113 auto expr = TreeExprBuilder::MakeExpression(is_equal, res);
114
115 // Build a projector for the expressions.
116 std::shared_ptr<Projector> projector;
117 auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
118 EXPECT_TRUE(status.ok()) << status.message();
119
120 // Create a row-batch with some sample data
121 int num_records = 4;
122 auto array_a =
123 MakeArrowArrayUtf8({"foo", "hello", "bye", "hi"}, {true, true, true, false});
124
125 // expected output
126 auto exp = MakeArrowArrayBool({false, true, false, false}, {true, true, true, false});
127
128 // prepare input record batch
129 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
130
131 // Evaluate expression
132 arrow::ArrayVector outputs;
133 status = projector->Evaluate(*in_batch, pool_, &outputs);
134 EXPECT_TRUE(status.ok());
135
136 // Validate results
137 EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
138 }
139
TEST_F(TestUtf8,TestNullLiteral)140 TEST_F(TestUtf8, TestNullLiteral) {
141 // schema for input fields
142 auto field_a = field("a", utf8());
143 auto schema = arrow::schema({field_a});
144
145 // output fields
146 auto res = field("res", boolean());
147
148 // build expressions.
149 // a == literal(null)
150
151 auto node_a = TreeExprBuilder::MakeField(field_a);
152 auto literal_null = TreeExprBuilder::MakeNull(arrow::utf8());
153 auto is_equal =
154 TreeExprBuilder::MakeFunction("equal", {node_a, literal_null}, boolean());
155 auto expr = TreeExprBuilder::MakeExpression(is_equal, res);
156
157 // Build a projector for the expressions.
158 std::shared_ptr<Projector> projector;
159 auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
160 EXPECT_TRUE(status.ok()) << status.message();
161
162 // Create a row-batch with some sample data
163 int num_records = 4;
164 auto array_a =
165 MakeArrowArrayUtf8({"foo", "hello", "bye", "hi"}, {true, true, true, false});
166
167 // expected output
168 auto exp =
169 MakeArrowArrayBool({false, false, false, false}, {false, false, false, false});
170
171 // prepare input record batch
172 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
173
174 // Evaluate expression
175 arrow::ArrayVector outputs;
176 status = projector->Evaluate(*in_batch, pool_, &outputs);
177 EXPECT_TRUE(status.ok());
178
179 // Validate results
180 EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
181 }
182
TEST_F(TestUtf8,TestLike)183 TEST_F(TestUtf8, TestLike) {
184 // schema for input fields
185 auto field_a = field("a", utf8());
186 auto schema = arrow::schema({field_a});
187
188 // output fields
189 auto res = field("res", boolean());
190
191 // build expressions.
192 // like(literal(s), a)
193
194 auto node_a = TreeExprBuilder::MakeField(field_a);
195 auto literal_s = TreeExprBuilder::MakeStringLiteral("%spark%");
196 auto is_like = TreeExprBuilder::MakeFunction("like", {node_a, literal_s}, boolean());
197 auto expr = TreeExprBuilder::MakeExpression(is_like, res);
198
199 // Build a projector for the expressions.
200 std::shared_ptr<Projector> projector;
201 auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
202 EXPECT_TRUE(status.ok()) << status.message();
203
204 // Create a row-batch with some sample data
205 int num_records = 4;
206 auto array_a = MakeArrowArrayUtf8({"park", "sparkle", "bright spark and fire", "spark"},
207 {true, true, true, true});
208
209 // expected output
210 auto exp = MakeArrowArrayBool({false, true, true, true}, {true, true, true, true});
211
212 // prepare input record batch
213 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
214
215 // Evaluate expression
216 arrow::ArrayVector outputs;
217 status = projector->Evaluate(*in_batch, pool_, &outputs);
218 EXPECT_TRUE(status.ok()) << status.message();
219
220 // Validate results
221 EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
222 }
223
TEST_F(TestUtf8,TestLikeWithEscape)224 TEST_F(TestUtf8, TestLikeWithEscape) {
225 // schema for input fields
226 auto field_a = field("a", utf8());
227 auto schema = arrow::schema({field_a});
228
229 // output fields
230 auto res = field("res", boolean());
231
232 // build expressions.
233 // like(literal(s), a, '\')
234
235 auto node_a = TreeExprBuilder::MakeField(field_a);
236 auto literal_s = TreeExprBuilder::MakeStringLiteral("%pa\\%rk%");
237 auto escape_char = TreeExprBuilder::MakeStringLiteral("\\");
238 auto is_like =
239 TreeExprBuilder::MakeFunction("like", {node_a, literal_s, escape_char}, boolean());
240 auto expr = TreeExprBuilder::MakeExpression(is_like, res);
241
242 // Build a projector for the expressions.
243 std::shared_ptr<Projector> projector;
244 auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
245 EXPECT_TRUE(status.ok()) << status.message();
246
247 // Create a row-batch with some sample data
248 int num_records = 4;
249 auto array_a = MakeArrowArrayUtf8(
250 {"park", "spa%rkle", "bright spa%rk and fire", "spark"}, {true, true, true, true});
251
252 // expected output
253 auto exp = MakeArrowArrayBool({false, true, true, false}, {true, true, true, true});
254
255 // prepare input record batch
256 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
257
258 // Evaluate expression
259 arrow::ArrayVector outputs;
260 status = projector->Evaluate(*in_batch, pool_, &outputs);
261 EXPECT_TRUE(status.ok()) << status.message();
262
263 // Validate results
264 EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
265 }
266
TEST_F(TestUtf8,TestBeginsEnds)267 TEST_F(TestUtf8, TestBeginsEnds) {
268 // schema for input fields
269 auto field_a = field("a", utf8());
270 auto schema = arrow::schema({field_a});
271
272 // output fields
273 auto res1 = field("res1", boolean());
274 auto res2 = field("res2", boolean());
275
276 // build expressions.
277 // like(literal("spark%"), a)
278 // like(literal("%spark"), a)
279
280 auto node_a = TreeExprBuilder::MakeField(field_a);
281 auto literal_begin = TreeExprBuilder::MakeStringLiteral("spark%");
282 auto is_like1 =
283 TreeExprBuilder::MakeFunction("like", {node_a, literal_begin}, boolean());
284 auto expr1 = TreeExprBuilder::MakeExpression(is_like1, res1);
285
286 auto literal_end = TreeExprBuilder::MakeStringLiteral("%spark");
287 auto is_like2 = TreeExprBuilder::MakeFunction("like", {node_a, literal_end}, boolean());
288 auto expr2 = TreeExprBuilder::MakeExpression(is_like2, res2);
289
290 // Build a projector for the expressions.
291 std::shared_ptr<Projector> projector;
292 auto status = Projector::Make(schema, {expr1, expr2}, TestConfiguration(), &projector);
293 EXPECT_TRUE(status.ok()) << status.message();
294
295 // Create a row-batch with some sample data
296 int num_records = 4;
297 auto array_a =
298 MakeArrowArrayUtf8({"park", "sparkle", "bright spark and fire", "fiery spark"},
299 {true, true, true, true});
300
301 // expected output
302 auto exp1 = MakeArrowArrayBool({false, true, false, false}, {true, true, true, true});
303 auto exp2 = MakeArrowArrayBool({false, false, false, true}, {true, true, true, true});
304
305 // prepare input record batch
306 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
307
308 // Evaluate expression
309 arrow::ArrayVector outputs;
310 status = projector->Evaluate(*in_batch, pool_, &outputs);
311 EXPECT_TRUE(status.ok()) << status.message();
312
313 // Validate results
314 EXPECT_ARROW_ARRAY_EQUALS(exp1, outputs.at(0));
315 EXPECT_ARROW_ARRAY_EQUALS(exp2, outputs.at(1));
316 }
317
TEST_F(TestUtf8,TestInternalAllocs)318 TEST_F(TestUtf8, TestInternalAllocs) {
319 // schema for input fields
320 auto field_a = field("a", utf8());
321 auto schema = arrow::schema({field_a});
322
323 // output fields
324 auto res = field("res", boolean());
325
326 // build expressions.
327 // like(upper(a), literal("%SPARK%"))
328
329 auto node_a = TreeExprBuilder::MakeField(field_a);
330 auto upper_a = TreeExprBuilder::MakeFunction("upper", {node_a}, utf8());
331 auto literal_spark = TreeExprBuilder::MakeStringLiteral("%SPARK%");
332 auto is_like =
333 TreeExprBuilder::MakeFunction("like", {upper_a, literal_spark}, boolean());
334 auto expr = TreeExprBuilder::MakeExpression(is_like, res);
335
336 // Build a projector for the expressions.
337 std::shared_ptr<Projector> projector;
338 auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
339 EXPECT_TRUE(status.ok()) << status.message();
340
341 // Create a row-batch with some sample data
342 int num_records = 5;
343 auto array_a = MakeArrowArrayUtf8(
344 {"park", "Sparkle", "bright spark and fire", "fiery SPARK", "मदन"},
345 {true, true, false, true, true});
346
347 // expected output
348 auto exp = MakeArrowArrayBool({false, true, false, true, false},
349 {true, true, false, true, true});
350
351 // prepare input record batch
352 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
353
354 // Evaluate expression
355 arrow::ArrayVector outputs;
356 status = projector->Evaluate(*in_batch, pool_, &outputs);
357 EXPECT_TRUE(status.ok()) << status.message();
358
359 // Validate results
360 EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
361 }
362
TEST_F(TestUtf8,TestCastDate)363 TEST_F(TestUtf8, TestCastDate) {
364 // schema for input fields
365 auto field_a = field("a", utf8());
366 auto schema = arrow::schema({field_a});
367
368 // output fields
369 auto res_1 = field("res1", int64());
370
371 // build expressions.
372 // extractYear(castDATE(a))
373 auto node_a = TreeExprBuilder::MakeField(field_a);
374 auto cast_function = TreeExprBuilder::MakeFunction("castDATE", {node_a}, date64());
375 auto extract_year =
376 TreeExprBuilder::MakeFunction("extractYear", {cast_function}, int64());
377 auto expr = TreeExprBuilder::MakeExpression(extract_year, res_1);
378
379 // Build a projector for the expressions.
380 std::shared_ptr<Projector> projector;
381 auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
382 EXPECT_TRUE(status.ok()) << status.message();
383
384 // Create a row-batch with some sample data
385 int num_records = 4;
386 auto array_a = MakeArrowArrayUtf8({"1967-12-1", "67-12-01", "incorrect", "67-45-11"},
387 {true, true, false, true});
388
389 // expected output
390 auto exp_1 = MakeArrowArrayInt64({1967, 2067, 0, 0}, {true, true, false, false});
391
392 // prepare input record batch
393 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
394
395 // Evaluate expression
396 arrow::ArrayVector outputs;
397 status = projector->Evaluate(*in_batch, pool_, &outputs);
398 EXPECT_EQ(status.code(), StatusCode::ExecutionError);
399 std::string expected_error = "Not a valid date value ";
400 EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
401
402 auto array_a_2 = MakeArrowArrayUtf8({"1967-12-1", "67-12-01", "67-1-1", "91-1-1"},
403 {true, true, true, true});
404 auto exp_2 = MakeArrowArrayInt64({1967, 2067, 2067, 1991}, {true, true, true, true});
405 auto in_batch_2 = arrow::RecordBatch::Make(schema, num_records, {array_a_2});
406 arrow::ArrayVector outputs2;
407 status = projector->Evaluate(*in_batch_2, pool_, &outputs2);
408 EXPECT_TRUE(status.ok()) << status.message();
409
410 // Validate results
411 EXPECT_ARROW_ARRAY_EQUALS(exp_2, outputs2.at(0));
412 }
413
TEST_F(TestUtf8,TestToDateNoError)414 TEST_F(TestUtf8, TestToDateNoError) {
415 // schema for input fields
416 auto field_a = field("a", utf8());
417 auto schema = arrow::schema({field_a});
418
419 // output fields
420 auto res_1 = field("res1", int64());
421
422 // build expressions.
423 // extractYear(castDATE(a))
424 auto node_a = TreeExprBuilder::MakeField(field_a);
425 auto node_b = TreeExprBuilder::MakeStringLiteral("YYYY-MM-DD");
426 auto node_c = TreeExprBuilder::MakeLiteral(1);
427
428 auto cast_function =
429 TreeExprBuilder::MakeFunction("to_date", {node_a, node_b, node_c}, date64());
430 auto extract_year =
431 TreeExprBuilder::MakeFunction("extractYear", {cast_function}, int64());
432 auto expr = TreeExprBuilder::MakeExpression(extract_year, res_1);
433
434 // Build a projector for the expressions.
435 std::shared_ptr<Projector> projector;
436 auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
437 EXPECT_TRUE(status.ok()) << status.message();
438
439 // Create a row-batch with some sample data
440 int num_records = 4;
441 auto array_a = MakeArrowArrayUtf8({"1967-12-1", "67-12-01", "incorrect", "67-45-11"},
442 {true, true, false, true});
443
444 // expected output
445 auto exp_1 = MakeArrowArrayInt64({1967, 67, 0, 0}, {true, true, false, false});
446
447 // prepare input record batch
448 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
449
450 // Evaluate expression
451 arrow::ArrayVector outputs;
452 status = projector->Evaluate(*in_batch, pool_, &outputs);
453 EXPECT_TRUE(status.ok()) << status.message();
454 EXPECT_ARROW_ARRAY_EQUALS(exp_1, outputs.at(0));
455
456 // Create a row-batch with some sample data
457 auto array_a_2 = MakeArrowArrayUtf8(
458 {"1967-12-1", "1967-12-01", "1967-11-11", "1991-11-11"}, {true, true, true, true});
459 auto exp_2 = MakeArrowArrayInt64({1967, 1967, 1967, 1991}, {true, true, true, true});
460 auto in_batch_2 = arrow::RecordBatch::Make(schema, num_records, {array_a_2});
461 arrow::ArrayVector outputs2;
462 status = projector->Evaluate(*in_batch_2, pool_, &outputs2);
463 EXPECT_TRUE(status.ok()) << status.message();
464
465 // Validate results
466 EXPECT_ARROW_ARRAY_EQUALS(exp_2, outputs2.at(0));
467 }
468
TEST_F(TestUtf8,TestToDateError)469 TEST_F(TestUtf8, TestToDateError) {
470 // schema for input fields
471 auto field_a = field("a", utf8());
472 auto schema = arrow::schema({field_a});
473
474 // output fields
475 auto res_1 = field("res1", int64());
476
477 // build expressions.
478 // extractYear(castDATE(a))
479 auto node_a = TreeExprBuilder::MakeField(field_a);
480 auto node_b = TreeExprBuilder::MakeStringLiteral("YYYY-MM-DD");
481 auto node_c = TreeExprBuilder::MakeLiteral(0);
482
483 auto cast_function =
484 TreeExprBuilder::MakeFunction("to_date", {node_a, node_b, node_c}, date64());
485 auto extract_year =
486 TreeExprBuilder::MakeFunction("extractYear", {cast_function}, int64());
487 auto expr = TreeExprBuilder::MakeExpression(extract_year, res_1);
488
489 // Build a projector for the expressions.
490 std::shared_ptr<Projector> projector;
491 auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
492 EXPECT_TRUE(status.ok()) << status.message();
493
494 // Create a row-batch with some sample data
495 int num_records = 4;
496 auto array_a = MakeArrowArrayUtf8({"1967-12-1", "67-12-01", "incorrect", "67-45-11"},
497 {true, true, false, true});
498
499 // expected output
500 auto exp_1 = MakeArrowArrayInt64({1967, 67, 0, 0}, {true, true, false, false});
501
502 // prepare input record batch
503 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
504
505 // Evaluate expression
506 arrow::ArrayVector outputs;
507 status = projector->Evaluate(*in_batch, pool_, &outputs);
508 EXPECT_EQ(status.code(), StatusCode::ExecutionError);
509 std::string expected_error = "Error parsing value 67-45-11 for given format";
510 EXPECT_TRUE(status.message().find(expected_error) != std::string::npos)
511 << status.message();
512 }
513
TEST_F(TestUtf8,TestIsNull)514 TEST_F(TestUtf8, TestIsNull) {
515 // schema for input fields
516 auto field_a = field("a", utf8());
517 auto schema = arrow::schema({field_a});
518
519 // build expressions
520 auto exprs = std::vector<ExpressionPtr>{
521 TreeExprBuilder::MakeExpression("isnull", {field_a}, field("is_null", boolean())),
522 TreeExprBuilder::MakeExpression("isnotnull", {field_a},
523 field("is_not_null", boolean())),
524 };
525
526 // Build a projector for the expressions.
527 std::shared_ptr<Projector> projector;
528 auto status = Projector::Make(schema, exprs, TestConfiguration(), &projector);
529 DCHECK_OK(status);
530
531 // Create a row-batch with some sample data
532 int num_records = 4;
533 auto array_a = MakeArrowArrayUtf8({"hello", "world", "incorrect", "universe"},
534 {true, true, false, true});
535
536 // prepare input record batch
537 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
538
539 // Evaluate expression
540 arrow::ArrayVector outputs;
541 status = projector->Evaluate(*in_batch, pool_, &outputs);
542
543 // validate results
544 EXPECT_ARROW_ARRAY_EQUALS(MakeArrowArrayBool({false, false, true, false}),
545 outputs[0]); // isnull
546 EXPECT_ARROW_ARRAY_EQUALS(MakeArrowArrayBool({true, true, false, true}),
547 outputs[1]); // isnotnull
548 }
549
TEST_F(TestUtf8,TestVarlenOutput)550 TEST_F(TestUtf8, TestVarlenOutput) {
551 // schema for input fields
552 auto field_a = field("a", boolean());
553 auto schema = arrow::schema({field_a});
554
555 // build expressions.
556 // if (a) literal_hi else literal_bye
557 auto if_node = TreeExprBuilder::MakeIf(
558 TreeExprBuilder::MakeField(field_a), TreeExprBuilder::MakeStringLiteral("hi"),
559 TreeExprBuilder::MakeStringLiteral("bye"), utf8());
560 auto expr = TreeExprBuilder::MakeExpression(if_node, field("res", utf8()));
561
562 // Build a projector for the expressions.
563 std::shared_ptr<Projector> projector;
564
565 // assert that it fails gracefully.
566 ASSERT_OK(Projector::Make(schema, {expr}, TestConfiguration(), &projector));
567
568 // Create a row-batch with some sample data
569 int num_records = 4;
570 auto array_in =
571 MakeArrowArrayBool({true, false, false, false}, {true, true, true, false});
572 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_in});
573
574 // Evaluate expression
575 arrow::ArrayVector outputs;
576 ASSERT_OK(projector->Evaluate(*in_batch, pool_, &outputs));
577
578 // expected output
579 auto exp = MakeArrowArrayUtf8({"hi", "bye", "bye", "bye"}, {true, true, true, true});
580
581 // Validate results
582 EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
583 }
584
TEST_F(TestUtf8,TestConvertUtf8)585 TEST_F(TestUtf8, TestConvertUtf8) {
586 // schema for input fields
587 auto field_a = field("a", arrow::binary());
588 auto field_c = field("c", utf8());
589 auto schema = arrow::schema({field_a, field_c});
590
591 // output fields
592 auto res = field("res", boolean());
593
594 // build expressions.
595 auto node_a = TreeExprBuilder::MakeField(field_a);
596 auto node_c = TreeExprBuilder::MakeField(field_c);
597
598 // define char to replace
599 auto node_b = TreeExprBuilder::MakeStringLiteral("z");
600
601 auto convert_replace_utf8 =
602 TreeExprBuilder::MakeFunction("convert_replaceUTF8", {node_a, node_b}, utf8());
603 auto equals =
604 TreeExprBuilder::MakeFunction("equal", {convert_replace_utf8, node_c}, boolean());
605 auto expr = TreeExprBuilder::MakeExpression(equals, res);
606
607 // Build a projector for the expressions.
608 std::shared_ptr<Projector> projector;
609 auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
610 EXPECT_TRUE(status.ok()) << status.message();
611
612 // Create a row-batch with some sample data
613 int num_records = 3;
614 auto array_a = MakeArrowArrayUtf8({"ok-\xf8\x28"
615 "-a",
616 "all-valid", "ok-\xa0\xa1-valid"},
617 {true, true, true});
618
619 auto array_b =
620 MakeArrowArrayUtf8({"ok-z(-a", "all-valid", "ok-zz-valid"}, {true, true, true});
621
622 // prepare input record batch
623 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a, array_b});
624
625 // Evaluate expression
626 arrow::ArrayVector outputs;
627 status = projector->Evaluate(*in_batch, pool_, &outputs);
628 EXPECT_TRUE(status.ok()) << status.message();
629
630 auto exp = MakeArrowArrayBool({true, true, true}, {true, true, true});
631 // Validate results
632 EXPECT_ARROW_ARRAY_EQUALS(exp, outputs[0]);
633 }
634
TEST_F(TestUtf8,TestCastVarChar)635 TEST_F(TestUtf8, TestCastVarChar) {
636 // schema for input fields
637 auto field_a = field("a", utf8());
638 auto field_c = field("c", utf8());
639 auto schema = arrow::schema({field_a, field_c});
640
641 // output fields
642 auto res = field("res", boolean());
643
644 // build expressions.
645 auto node_a = TreeExprBuilder::MakeField(field_a);
646 auto node_c = TreeExprBuilder::MakeField(field_c);
647 // truncates the string to input length
648 auto node_b = TreeExprBuilder::MakeLiteral(static_cast<int64_t>(10));
649 auto cast_varchar =
650 TreeExprBuilder::MakeFunction("castVARCHAR", {node_a, node_b}, utf8());
651 auto equals = TreeExprBuilder::MakeFunction("equal", {cast_varchar, node_c}, boolean());
652 auto expr = TreeExprBuilder::MakeExpression(equals, res);
653
654 // Build a projector for the expressions.
655 std::shared_ptr<Projector> projector;
656 auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
657 EXPECT_TRUE(status.ok()) << status.message();
658
659 // Create a row-batch with some sample data
660 int num_records = 5;
661 auto array_a = MakeArrowArrayUtf8(
662 {"park", "Sparkle", "bright spark and fire", "fiery SPARK", "मदन"},
663 {true, true, false, true, true});
664
665 auto array_b =
666 MakeArrowArrayUtf8({"park", "Sparkle", "bright spar", "fiery SPAR", "मदन"},
667 {true, true, true, true, true});
668
669 // prepare input record batch
670 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a, array_b});
671
672 // Evaluate expression
673 arrow::ArrayVector outputs;
674 status = projector->Evaluate(*in_batch, pool_, &outputs);
675 EXPECT_TRUE(status.ok()) << status.message();
676
677 auto exp = MakeArrowArrayBool({true, true, false, true, true},
678 {true, true, false, true, true});
679 // Validate results
680 EXPECT_ARROW_ARRAY_EQUALS(exp, outputs[0]);
681 }
682
TEST_F(TestUtf8,TestAscii)683 TEST_F(TestUtf8, TestAscii) {
684 // schema for input fields
685 auto field0 = field("f0", arrow::utf8());
686 auto schema = arrow::schema({field0});
687
688 // output fields
689 auto field_asc = field("ascii", arrow::int32());
690
691 // Build expression
692 auto asc_expr = TreeExprBuilder::MakeExpression("ascii", {field0}, field_asc);
693
694 std::shared_ptr<Projector> projector;
695 auto status = Projector::Make(schema, {asc_expr}, TestConfiguration(), &projector);
696 EXPECT_TRUE(status.ok()) << status.message();
697
698 // Create a row-batch with some sample data
699 int num_records = 6;
700 auto array0 = MakeArrowArrayUtf8({"ABC", "", "abc", "Hello World", "123", "999"},
701 {true, true, true, true, true, true});
702 // expected output
703 auto exp_asc =
704 MakeArrowArrayInt32({65, 0, 97, 72, 49, 57}, {true, true, true, true, true, true});
705
706 // prepare input record batch
707 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
708
709 // Evaluate expression
710 arrow::ArrayVector outputs;
711 status = projector->Evaluate(*in_batch, pool_, &outputs);
712 EXPECT_TRUE(status.ok()) << status.message();
713
714 // Validate results
715 EXPECT_ARROW_ARRAY_EQUALS(exp_asc, outputs.at(0));
716 }
717
TEST_F(TestUtf8,TestSpace)718 TEST_F(TestUtf8, TestSpace) {
719 // schema for input fields
720 auto field0 = field("f0", arrow::int64());
721 auto schema = arrow::schema({field0});
722
723 // output fields
724 auto field_space = field("space", arrow::utf8());
725
726 // Build expression
727 auto space_expr = TreeExprBuilder::MakeExpression("space", {field0}, field_space);
728
729 std::shared_ptr<Projector> projector;
730 auto status = Projector::Make(schema, {space_expr}, TestConfiguration(), &projector);
731 EXPECT_TRUE(status.ok()) << status.message();
732
733 // Create a row-batch with some sample data
734 int num_records = 4;
735 auto array0 = MakeArrowArrayInt64({1, 0, -5, 2}, {true, true, true, true});
736 // expected output
737 auto exp_space = MakeArrowArrayUtf8({" ", "", "", " "}, {true, true, true, true});
738
739 // prepare input record batch
740 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
741
742 // Evaluate expression
743 arrow::ArrayVector outputs;
744 status = projector->Evaluate(*in_batch, pool_, &outputs);
745 EXPECT_TRUE(status.ok()) << status.message();
746
747 // Validate results
748 EXPECT_ARROW_ARRAY_EQUALS(exp_space, outputs.at(0));
749 }
750
751 } // namespace gandiva
752