1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #include <gtest/gtest.h>
19 #include "arrow/memory_pool.h"
20 #include "arrow/status.h"
21 
22 #include "gandiva/projector.h"
23 #include "gandiva/tests/test_util.h"
24 #include "gandiva/tree_expr_builder.h"
25 
26 namespace gandiva {
27 
28 using arrow::boolean;
29 using arrow::date64;
30 using arrow::int32;
31 using arrow::int64;
32 using arrow::utf8;
33 
34 class TestUtf8 : public ::testing::Test {
35  public:
SetUp()36   void SetUp() { pool_ = arrow::default_memory_pool(); }
37 
38  protected:
39   arrow::MemoryPool* pool_;
40 };
41 
TEST_F(TestUtf8,TestSimple)42 TEST_F(TestUtf8, TestSimple) {
43   // schema for input fields
44   auto field_a = field("a", utf8());
45   auto schema = arrow::schema({field_a});
46 
47   // output fields
48   auto res_1 = field("res1", int32());
49   auto res_2 = field("res2", boolean());
50   auto res_3 = field("res3", int32());
51 
52   // build expressions.
53   // octet_length(a)
54   // octet_length(a) == bit_length(a) / 8
55   // length(a)
56   auto expr_a = TreeExprBuilder::MakeExpression("octet_length", {field_a}, res_1);
57 
58   auto node_a = TreeExprBuilder::MakeField(field_a);
59   auto octet_length = TreeExprBuilder::MakeFunction("octet_length", {node_a}, int32());
60   auto literal_8 = TreeExprBuilder::MakeLiteral((int32_t)8);
61   auto bit_length = TreeExprBuilder::MakeFunction("bit_length", {node_a}, int32());
62   auto div_8 = TreeExprBuilder::MakeFunction("divide", {bit_length, literal_8}, int32());
63   auto is_equal =
64       TreeExprBuilder::MakeFunction("equal", {octet_length, div_8}, boolean());
65   auto expr_b = TreeExprBuilder::MakeExpression(is_equal, res_2);
66   auto expr_c = TreeExprBuilder::MakeExpression("length", {field_a}, res_3);
67 
68   // Build a projector for the expressions.
69   std::shared_ptr<Projector> projector;
70   auto status =
71       Projector::Make(schema, {expr_a, expr_b, expr_c}, TestConfiguration(), &projector);
72   EXPECT_TRUE(status.ok()) << status.message();
73 
74   // Create a row-batch with some sample data
75   int num_records = 5;
76   auto array_a = MakeArrowArrayUtf8({"foo", "hello", "bye", "hi", "मदन"},
77                                     {true, true, false, true, true});
78 
79   // expected output
80   auto exp_1 = MakeArrowArrayInt32({3, 5, 0, 2, 9}, {true, true, false, true, true});
81   auto exp_2 = MakeArrowArrayBool({true, true, false, true, true},
82                                   {true, true, false, true, true});
83   auto exp_3 = MakeArrowArrayInt32({3, 5, 0, 2, 3}, {true, true, false, true, true});
84 
85   // prepare input record batch
86   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
87 
88   // Evaluate expression
89   arrow::ArrayVector outputs;
90   status = projector->Evaluate(*in_batch, pool_, &outputs);
91   EXPECT_TRUE(status.ok());
92 
93   // Validate results
94   EXPECT_ARROW_ARRAY_EQUALS(exp_1, outputs.at(0));
95   EXPECT_ARROW_ARRAY_EQUALS(exp_2, outputs.at(1));
96   EXPECT_ARROW_ARRAY_EQUALS(exp_3, outputs.at(2));
97 }
98 
TEST_F(TestUtf8,TestLiteral)99 TEST_F(TestUtf8, TestLiteral) {
100   // schema for input fields
101   auto field_a = field("a", utf8());
102   auto schema = arrow::schema({field_a});
103 
104   // output fields
105   auto res = field("res", boolean());
106 
107   // build expressions.
108   // a == literal(s)
109 
110   auto node_a = TreeExprBuilder::MakeField(field_a);
111   auto literal_s = TreeExprBuilder::MakeStringLiteral("hello");
112   auto is_equal = TreeExprBuilder::MakeFunction("equal", {node_a, literal_s}, boolean());
113   auto expr = TreeExprBuilder::MakeExpression(is_equal, res);
114 
115   // Build a projector for the expressions.
116   std::shared_ptr<Projector> projector;
117   auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
118   EXPECT_TRUE(status.ok()) << status.message();
119 
120   // Create a row-batch with some sample data
121   int num_records = 4;
122   auto array_a =
123       MakeArrowArrayUtf8({"foo", "hello", "bye", "hi"}, {true, true, true, false});
124 
125   // expected output
126   auto exp = MakeArrowArrayBool({false, true, false, false}, {true, true, true, false});
127 
128   // prepare input record batch
129   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
130 
131   // Evaluate expression
132   arrow::ArrayVector outputs;
133   status = projector->Evaluate(*in_batch, pool_, &outputs);
134   EXPECT_TRUE(status.ok());
135 
136   // Validate results
137   EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
138 }
139 
TEST_F(TestUtf8,TestNullLiteral)140 TEST_F(TestUtf8, TestNullLiteral) {
141   // schema for input fields
142   auto field_a = field("a", utf8());
143   auto schema = arrow::schema({field_a});
144 
145   // output fields
146   auto res = field("res", boolean());
147 
148   // build expressions.
149   // a == literal(null)
150 
151   auto node_a = TreeExprBuilder::MakeField(field_a);
152   auto literal_null = TreeExprBuilder::MakeNull(arrow::utf8());
153   auto is_equal =
154       TreeExprBuilder::MakeFunction("equal", {node_a, literal_null}, boolean());
155   auto expr = TreeExprBuilder::MakeExpression(is_equal, res);
156 
157   // Build a projector for the expressions.
158   std::shared_ptr<Projector> projector;
159   auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
160   EXPECT_TRUE(status.ok()) << status.message();
161 
162   // Create a row-batch with some sample data
163   int num_records = 4;
164   auto array_a =
165       MakeArrowArrayUtf8({"foo", "hello", "bye", "hi"}, {true, true, true, false});
166 
167   // expected output
168   auto exp =
169       MakeArrowArrayBool({false, false, false, false}, {false, false, false, false});
170 
171   // prepare input record batch
172   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
173 
174   // Evaluate expression
175   arrow::ArrayVector outputs;
176   status = projector->Evaluate(*in_batch, pool_, &outputs);
177   EXPECT_TRUE(status.ok());
178 
179   // Validate results
180   EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
181 }
182 
TEST_F(TestUtf8,TestLike)183 TEST_F(TestUtf8, TestLike) {
184   // schema for input fields
185   auto field_a = field("a", utf8());
186   auto schema = arrow::schema({field_a});
187 
188   // output fields
189   auto res = field("res", boolean());
190 
191   // build expressions.
192   // like(literal(s), a)
193 
194   auto node_a = TreeExprBuilder::MakeField(field_a);
195   auto literal_s = TreeExprBuilder::MakeStringLiteral("%spark%");
196   auto is_like = TreeExprBuilder::MakeFunction("like", {node_a, literal_s}, boolean());
197   auto expr = TreeExprBuilder::MakeExpression(is_like, res);
198 
199   // Build a projector for the expressions.
200   std::shared_ptr<Projector> projector;
201   auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
202   EXPECT_TRUE(status.ok()) << status.message();
203 
204   // Create a row-batch with some sample data
205   int num_records = 4;
206   auto array_a = MakeArrowArrayUtf8({"park", "sparkle", "bright spark and fire", "spark"},
207                                     {true, true, true, true});
208 
209   // expected output
210   auto exp = MakeArrowArrayBool({false, true, true, true}, {true, true, true, true});
211 
212   // prepare input record batch
213   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
214 
215   // Evaluate expression
216   arrow::ArrayVector outputs;
217   status = projector->Evaluate(*in_batch, pool_, &outputs);
218   EXPECT_TRUE(status.ok()) << status.message();
219 
220   // Validate results
221   EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
222 }
223 
TEST_F(TestUtf8,TestLikeWithEscape)224 TEST_F(TestUtf8, TestLikeWithEscape) {
225   // schema for input fields
226   auto field_a = field("a", utf8());
227   auto schema = arrow::schema({field_a});
228 
229   // output fields
230   auto res = field("res", boolean());
231 
232   // build expressions.
233   // like(literal(s), a, '\')
234 
235   auto node_a = TreeExprBuilder::MakeField(field_a);
236   auto literal_s = TreeExprBuilder::MakeStringLiteral("%pa\\%rk%");
237   auto escape_char = TreeExprBuilder::MakeStringLiteral("\\");
238   auto is_like =
239       TreeExprBuilder::MakeFunction("like", {node_a, literal_s, escape_char}, boolean());
240   auto expr = TreeExprBuilder::MakeExpression(is_like, res);
241 
242   // Build a projector for the expressions.
243   std::shared_ptr<Projector> projector;
244   auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
245   EXPECT_TRUE(status.ok()) << status.message();
246 
247   // Create a row-batch with some sample data
248   int num_records = 4;
249   auto array_a = MakeArrowArrayUtf8(
250       {"park", "spa%rkle", "bright spa%rk and fire", "spark"}, {true, true, true, true});
251 
252   // expected output
253   auto exp = MakeArrowArrayBool({false, true, true, false}, {true, true, true, true});
254 
255   // prepare input record batch
256   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
257 
258   // Evaluate expression
259   arrow::ArrayVector outputs;
260   status = projector->Evaluate(*in_batch, pool_, &outputs);
261   EXPECT_TRUE(status.ok()) << status.message();
262 
263   // Validate results
264   EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
265 }
266 
TEST_F(TestUtf8,TestBeginsEnds)267 TEST_F(TestUtf8, TestBeginsEnds) {
268   // schema for input fields
269   auto field_a = field("a", utf8());
270   auto schema = arrow::schema({field_a});
271 
272   // output fields
273   auto res1 = field("res1", boolean());
274   auto res2 = field("res2", boolean());
275 
276   // build expressions.
277   // like(literal("spark%"), a)
278   // like(literal("%spark"), a)
279 
280   auto node_a = TreeExprBuilder::MakeField(field_a);
281   auto literal_begin = TreeExprBuilder::MakeStringLiteral("spark%");
282   auto is_like1 =
283       TreeExprBuilder::MakeFunction("like", {node_a, literal_begin}, boolean());
284   auto expr1 = TreeExprBuilder::MakeExpression(is_like1, res1);
285 
286   auto literal_end = TreeExprBuilder::MakeStringLiteral("%spark");
287   auto is_like2 = TreeExprBuilder::MakeFunction("like", {node_a, literal_end}, boolean());
288   auto expr2 = TreeExprBuilder::MakeExpression(is_like2, res2);
289 
290   // Build a projector for the expressions.
291   std::shared_ptr<Projector> projector;
292   auto status = Projector::Make(schema, {expr1, expr2}, TestConfiguration(), &projector);
293   EXPECT_TRUE(status.ok()) << status.message();
294 
295   // Create a row-batch with some sample data
296   int num_records = 4;
297   auto array_a =
298       MakeArrowArrayUtf8({"park", "sparkle", "bright spark and fire", "fiery spark"},
299                          {true, true, true, true});
300 
301   // expected output
302   auto exp1 = MakeArrowArrayBool({false, true, false, false}, {true, true, true, true});
303   auto exp2 = MakeArrowArrayBool({false, false, false, true}, {true, true, true, true});
304 
305   // prepare input record batch
306   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
307 
308   // Evaluate expression
309   arrow::ArrayVector outputs;
310   status = projector->Evaluate(*in_batch, pool_, &outputs);
311   EXPECT_TRUE(status.ok()) << status.message();
312 
313   // Validate results
314   EXPECT_ARROW_ARRAY_EQUALS(exp1, outputs.at(0));
315   EXPECT_ARROW_ARRAY_EQUALS(exp2, outputs.at(1));
316 }
317 
TEST_F(TestUtf8,TestInternalAllocs)318 TEST_F(TestUtf8, TestInternalAllocs) {
319   // schema for input fields
320   auto field_a = field("a", utf8());
321   auto schema = arrow::schema({field_a});
322 
323   // output fields
324   auto res = field("res", boolean());
325 
326   // build expressions.
327   // like(upper(a), literal("%SPARK%"))
328 
329   auto node_a = TreeExprBuilder::MakeField(field_a);
330   auto upper_a = TreeExprBuilder::MakeFunction("upper", {node_a}, utf8());
331   auto literal_spark = TreeExprBuilder::MakeStringLiteral("%SPARK%");
332   auto is_like =
333       TreeExprBuilder::MakeFunction("like", {upper_a, literal_spark}, boolean());
334   auto expr = TreeExprBuilder::MakeExpression(is_like, res);
335 
336   // Build a projector for the expressions.
337   std::shared_ptr<Projector> projector;
338   auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
339   EXPECT_TRUE(status.ok()) << status.message();
340 
341   // Create a row-batch with some sample data
342   int num_records = 5;
343   auto array_a = MakeArrowArrayUtf8(
344       {"park", "Sparkle", "bright spark and fire", "fiery SPARK", "मदन"},
345       {true, true, false, true, true});
346 
347   // expected output
348   auto exp = MakeArrowArrayBool({false, true, false, true, false},
349                                 {true, true, false, true, true});
350 
351   // prepare input record batch
352   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
353 
354   // Evaluate expression
355   arrow::ArrayVector outputs;
356   status = projector->Evaluate(*in_batch, pool_, &outputs);
357   EXPECT_TRUE(status.ok()) << status.message();
358 
359   // Validate results
360   EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
361 }
362 
TEST_F(TestUtf8,TestCastDate)363 TEST_F(TestUtf8, TestCastDate) {
364   // schema for input fields
365   auto field_a = field("a", utf8());
366   auto schema = arrow::schema({field_a});
367 
368   // output fields
369   auto res_1 = field("res1", int64());
370 
371   // build expressions.
372   // extractYear(castDATE(a))
373   auto node_a = TreeExprBuilder::MakeField(field_a);
374   auto cast_function = TreeExprBuilder::MakeFunction("castDATE", {node_a}, date64());
375   auto extract_year =
376       TreeExprBuilder::MakeFunction("extractYear", {cast_function}, int64());
377   auto expr = TreeExprBuilder::MakeExpression(extract_year, res_1);
378 
379   // Build a projector for the expressions.
380   std::shared_ptr<Projector> projector;
381   auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
382   EXPECT_TRUE(status.ok()) << status.message();
383 
384   // Create a row-batch with some sample data
385   int num_records = 4;
386   auto array_a = MakeArrowArrayUtf8({"1967-12-1", "67-12-01", "incorrect", "67-45-11"},
387                                     {true, true, false, true});
388 
389   // expected output
390   auto exp_1 = MakeArrowArrayInt64({1967, 2067, 0, 0}, {true, true, false, false});
391 
392   // prepare input record batch
393   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
394 
395   // Evaluate expression
396   arrow::ArrayVector outputs;
397   status = projector->Evaluate(*in_batch, pool_, &outputs);
398   EXPECT_EQ(status.code(), StatusCode::ExecutionError);
399   std::string expected_error = "Not a valid date value ";
400   EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
401 
402   auto array_a_2 = MakeArrowArrayUtf8({"1967-12-1", "67-12-01", "67-1-1", "91-1-1"},
403                                       {true, true, true, true});
404   auto exp_2 = MakeArrowArrayInt64({1967, 2067, 2067, 1991}, {true, true, true, true});
405   auto in_batch_2 = arrow::RecordBatch::Make(schema, num_records, {array_a_2});
406   arrow::ArrayVector outputs2;
407   status = projector->Evaluate(*in_batch_2, pool_, &outputs2);
408   EXPECT_TRUE(status.ok()) << status.message();
409 
410   // Validate results
411   EXPECT_ARROW_ARRAY_EQUALS(exp_2, outputs2.at(0));
412 }
413 
TEST_F(TestUtf8,TestToDateNoError)414 TEST_F(TestUtf8, TestToDateNoError) {
415   // schema for input fields
416   auto field_a = field("a", utf8());
417   auto schema = arrow::schema({field_a});
418 
419   // output fields
420   auto res_1 = field("res1", int64());
421 
422   // build expressions.
423   // extractYear(castDATE(a))
424   auto node_a = TreeExprBuilder::MakeField(field_a);
425   auto node_b = TreeExprBuilder::MakeStringLiteral("YYYY-MM-DD");
426   auto node_c = TreeExprBuilder::MakeLiteral(1);
427 
428   auto cast_function =
429       TreeExprBuilder::MakeFunction("to_date", {node_a, node_b, node_c}, date64());
430   auto extract_year =
431       TreeExprBuilder::MakeFunction("extractYear", {cast_function}, int64());
432   auto expr = TreeExprBuilder::MakeExpression(extract_year, res_1);
433 
434   // Build a projector for the expressions.
435   std::shared_ptr<Projector> projector;
436   auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
437   EXPECT_TRUE(status.ok()) << status.message();
438 
439   // Create a row-batch with some sample data
440   int num_records = 4;
441   auto array_a = MakeArrowArrayUtf8({"1967-12-1", "67-12-01", "incorrect", "67-45-11"},
442                                     {true, true, false, true});
443 
444   // expected output
445   auto exp_1 = MakeArrowArrayInt64({1967, 67, 0, 0}, {true, true, false, false});
446 
447   // prepare input record batch
448   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
449 
450   // Evaluate expression
451   arrow::ArrayVector outputs;
452   status = projector->Evaluate(*in_batch, pool_, &outputs);
453   EXPECT_TRUE(status.ok()) << status.message();
454   EXPECT_ARROW_ARRAY_EQUALS(exp_1, outputs.at(0));
455 
456   // Create a row-batch with some sample data
457   auto array_a_2 = MakeArrowArrayUtf8(
458       {"1967-12-1", "1967-12-01", "1967-11-11", "1991-11-11"}, {true, true, true, true});
459   auto exp_2 = MakeArrowArrayInt64({1967, 1967, 1967, 1991}, {true, true, true, true});
460   auto in_batch_2 = arrow::RecordBatch::Make(schema, num_records, {array_a_2});
461   arrow::ArrayVector outputs2;
462   status = projector->Evaluate(*in_batch_2, pool_, &outputs2);
463   EXPECT_TRUE(status.ok()) << status.message();
464 
465   // Validate results
466   EXPECT_ARROW_ARRAY_EQUALS(exp_2, outputs2.at(0));
467 }
468 
TEST_F(TestUtf8,TestToDateError)469 TEST_F(TestUtf8, TestToDateError) {
470   // schema for input fields
471   auto field_a = field("a", utf8());
472   auto schema = arrow::schema({field_a});
473 
474   // output fields
475   auto res_1 = field("res1", int64());
476 
477   // build expressions.
478   // extractYear(castDATE(a))
479   auto node_a = TreeExprBuilder::MakeField(field_a);
480   auto node_b = TreeExprBuilder::MakeStringLiteral("YYYY-MM-DD");
481   auto node_c = TreeExprBuilder::MakeLiteral(0);
482 
483   auto cast_function =
484       TreeExprBuilder::MakeFunction("to_date", {node_a, node_b, node_c}, date64());
485   auto extract_year =
486       TreeExprBuilder::MakeFunction("extractYear", {cast_function}, int64());
487   auto expr = TreeExprBuilder::MakeExpression(extract_year, res_1);
488 
489   // Build a projector for the expressions.
490   std::shared_ptr<Projector> projector;
491   auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
492   EXPECT_TRUE(status.ok()) << status.message();
493 
494   // Create a row-batch with some sample data
495   int num_records = 4;
496   auto array_a = MakeArrowArrayUtf8({"1967-12-1", "67-12-01", "incorrect", "67-45-11"},
497                                     {true, true, false, true});
498 
499   // expected output
500   auto exp_1 = MakeArrowArrayInt64({1967, 67, 0, 0}, {true, true, false, false});
501 
502   // prepare input record batch
503   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
504 
505   // Evaluate expression
506   arrow::ArrayVector outputs;
507   status = projector->Evaluate(*in_batch, pool_, &outputs);
508   EXPECT_EQ(status.code(), StatusCode::ExecutionError);
509   std::string expected_error = "Error parsing value 67-45-11 for given format";
510   EXPECT_TRUE(status.message().find(expected_error) != std::string::npos)
511       << status.message();
512 }
513 
TEST_F(TestUtf8,TestIsNull)514 TEST_F(TestUtf8, TestIsNull) {
515   // schema for input fields
516   auto field_a = field("a", utf8());
517   auto schema = arrow::schema({field_a});
518 
519   // build expressions
520   auto exprs = std::vector<ExpressionPtr>{
521       TreeExprBuilder::MakeExpression("isnull", {field_a}, field("is_null", boolean())),
522       TreeExprBuilder::MakeExpression("isnotnull", {field_a},
523                                       field("is_not_null", boolean())),
524   };
525 
526   // Build a projector for the expressions.
527   std::shared_ptr<Projector> projector;
528   auto status = Projector::Make(schema, exprs, TestConfiguration(), &projector);
529   DCHECK_OK(status);
530 
531   // Create a row-batch with some sample data
532   int num_records = 4;
533   auto array_a = MakeArrowArrayUtf8({"hello", "world", "incorrect", "universe"},
534                                     {true, true, false, true});
535 
536   // prepare input record batch
537   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
538 
539   // Evaluate expression
540   arrow::ArrayVector outputs;
541   status = projector->Evaluate(*in_batch, pool_, &outputs);
542 
543   // validate results
544   EXPECT_ARROW_ARRAY_EQUALS(MakeArrowArrayBool({false, false, true, false}),
545                             outputs[0]);  // isnull
546   EXPECT_ARROW_ARRAY_EQUALS(MakeArrowArrayBool({true, true, false, true}),
547                             outputs[1]);  // isnotnull
548 }
549 
TEST_F(TestUtf8,TestVarlenOutput)550 TEST_F(TestUtf8, TestVarlenOutput) {
551   // schema for input fields
552   auto field_a = field("a", boolean());
553   auto schema = arrow::schema({field_a});
554 
555   // build expressions.
556   // if (a) literal_hi else literal_bye
557   auto if_node = TreeExprBuilder::MakeIf(
558       TreeExprBuilder::MakeField(field_a), TreeExprBuilder::MakeStringLiteral("hi"),
559       TreeExprBuilder::MakeStringLiteral("bye"), utf8());
560   auto expr = TreeExprBuilder::MakeExpression(if_node, field("res", utf8()));
561 
562   // Build a projector for the expressions.
563   std::shared_ptr<Projector> projector;
564 
565   // assert that it fails gracefully.
566   ASSERT_OK(Projector::Make(schema, {expr}, TestConfiguration(), &projector));
567 
568   // Create a row-batch with some sample data
569   int num_records = 4;
570   auto array_in =
571       MakeArrowArrayBool({true, false, false, false}, {true, true, true, false});
572   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_in});
573 
574   // Evaluate expression
575   arrow::ArrayVector outputs;
576   ASSERT_OK(projector->Evaluate(*in_batch, pool_, &outputs));
577 
578   // expected output
579   auto exp = MakeArrowArrayUtf8({"hi", "bye", "bye", "bye"}, {true, true, true, true});
580 
581   // Validate results
582   EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
583 }
584 
TEST_F(TestUtf8,TestConvertUtf8)585 TEST_F(TestUtf8, TestConvertUtf8) {
586   // schema for input fields
587   auto field_a = field("a", arrow::binary());
588   auto field_c = field("c", utf8());
589   auto schema = arrow::schema({field_a, field_c});
590 
591   // output fields
592   auto res = field("res", boolean());
593 
594   // build expressions.
595   auto node_a = TreeExprBuilder::MakeField(field_a);
596   auto node_c = TreeExprBuilder::MakeField(field_c);
597 
598   // define char to replace
599   auto node_b = TreeExprBuilder::MakeStringLiteral("z");
600 
601   auto convert_replace_utf8 =
602       TreeExprBuilder::MakeFunction("convert_replaceUTF8", {node_a, node_b}, utf8());
603   auto equals =
604       TreeExprBuilder::MakeFunction("equal", {convert_replace_utf8, node_c}, boolean());
605   auto expr = TreeExprBuilder::MakeExpression(equals, res);
606 
607   // Build a projector for the expressions.
608   std::shared_ptr<Projector> projector;
609   auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
610   EXPECT_TRUE(status.ok()) << status.message();
611 
612   // Create a row-batch with some sample data
613   int num_records = 3;
614   auto array_a = MakeArrowArrayUtf8({"ok-\xf8\x28"
615                                      "-a",
616                                      "all-valid", "ok-\xa0\xa1-valid"},
617                                     {true, true, true});
618 
619   auto array_b =
620       MakeArrowArrayUtf8({"ok-z(-a", "all-valid", "ok-zz-valid"}, {true, true, true});
621 
622   // prepare input record batch
623   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a, array_b});
624 
625   // Evaluate expression
626   arrow::ArrayVector outputs;
627   status = projector->Evaluate(*in_batch, pool_, &outputs);
628   EXPECT_TRUE(status.ok()) << status.message();
629 
630   auto exp = MakeArrowArrayBool({true, true, true}, {true, true, true});
631   // Validate results
632   EXPECT_ARROW_ARRAY_EQUALS(exp, outputs[0]);
633 }
634 
TEST_F(TestUtf8,TestCastVarChar)635 TEST_F(TestUtf8, TestCastVarChar) {
636   // schema for input fields
637   auto field_a = field("a", utf8());
638   auto field_c = field("c", utf8());
639   auto schema = arrow::schema({field_a, field_c});
640 
641   // output fields
642   auto res = field("res", boolean());
643 
644   // build expressions.
645   auto node_a = TreeExprBuilder::MakeField(field_a);
646   auto node_c = TreeExprBuilder::MakeField(field_c);
647   // truncates the string to input length
648   auto node_b = TreeExprBuilder::MakeLiteral(static_cast<int64_t>(10));
649   auto cast_varchar =
650       TreeExprBuilder::MakeFunction("castVARCHAR", {node_a, node_b}, utf8());
651   auto equals = TreeExprBuilder::MakeFunction("equal", {cast_varchar, node_c}, boolean());
652   auto expr = TreeExprBuilder::MakeExpression(equals, res);
653 
654   // Build a projector for the expressions.
655   std::shared_ptr<Projector> projector;
656   auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
657   EXPECT_TRUE(status.ok()) << status.message();
658 
659   // Create a row-batch with some sample data
660   int num_records = 5;
661   auto array_a = MakeArrowArrayUtf8(
662       {"park", "Sparkle", "bright spark and fire", "fiery SPARK", "मदन"},
663       {true, true, false, true, true});
664 
665   auto array_b =
666       MakeArrowArrayUtf8({"park", "Sparkle", "bright spar", "fiery SPAR", "मदन"},
667                          {true, true, true, true, true});
668 
669   // prepare input record batch
670   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a, array_b});
671 
672   // Evaluate expression
673   arrow::ArrayVector outputs;
674   status = projector->Evaluate(*in_batch, pool_, &outputs);
675   EXPECT_TRUE(status.ok()) << status.message();
676 
677   auto exp = MakeArrowArrayBool({true, true, false, true, true},
678                                 {true, true, false, true, true});
679   // Validate results
680   EXPECT_ARROW_ARRAY_EQUALS(exp, outputs[0]);
681 }
682 
TEST_F(TestUtf8,TestAscii)683 TEST_F(TestUtf8, TestAscii) {
684   // schema for input fields
685   auto field0 = field("f0", arrow::utf8());
686   auto schema = arrow::schema({field0});
687 
688   // output fields
689   auto field_asc = field("ascii", arrow::int32());
690 
691   // Build expression
692   auto asc_expr = TreeExprBuilder::MakeExpression("ascii", {field0}, field_asc);
693 
694   std::shared_ptr<Projector> projector;
695   auto status = Projector::Make(schema, {asc_expr}, TestConfiguration(), &projector);
696   EXPECT_TRUE(status.ok()) << status.message();
697 
698   // Create a row-batch with some sample data
699   int num_records = 6;
700   auto array0 = MakeArrowArrayUtf8({"ABC", "", "abc", "Hello World", "123", "999"},
701                                    {true, true, true, true, true, true});
702   // expected output
703   auto exp_asc =
704       MakeArrowArrayInt32({65, 0, 97, 72, 49, 57}, {true, true, true, true, true, true});
705 
706   // prepare input record batch
707   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
708 
709   // Evaluate expression
710   arrow::ArrayVector outputs;
711   status = projector->Evaluate(*in_batch, pool_, &outputs);
712   EXPECT_TRUE(status.ok()) << status.message();
713 
714   // Validate results
715   EXPECT_ARROW_ARRAY_EQUALS(exp_asc, outputs.at(0));
716 }
717 
TEST_F(TestUtf8,TestSpace)718 TEST_F(TestUtf8, TestSpace) {
719   // schema for input fields
720   auto field0 = field("f0", arrow::int64());
721   auto schema = arrow::schema({field0});
722 
723   // output fields
724   auto field_space = field("space", arrow::utf8());
725 
726   // Build expression
727   auto space_expr = TreeExprBuilder::MakeExpression("space", {field0}, field_space);
728 
729   std::shared_ptr<Projector> projector;
730   auto status = Projector::Make(schema, {space_expr}, TestConfiguration(), &projector);
731   EXPECT_TRUE(status.ok()) << status.message();
732 
733   // Create a row-batch with some sample data
734   int num_records = 4;
735   auto array0 = MakeArrowArrayInt64({1, 0, -5, 2}, {true, true, true, true});
736   // expected output
737   auto exp_space = MakeArrowArrayUtf8({" ", "", "", "  "}, {true, true, true, true});
738 
739   // prepare input record batch
740   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
741 
742   // Evaluate expression
743   arrow::ArrayVector outputs;
744   status = projector->Evaluate(*in_batch, pool_, &outputs);
745   EXPECT_TRUE(status.ok()) << status.message();
746 
747   // Validate results
748   EXPECT_ARROW_ARRAY_EQUALS(exp_space, outputs.at(0));
749 }
750 
751 }  // namespace gandiva
752