1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #include <gtest/gtest.h>
19 #include <math.h>
20 #include <time.h>
21 #include "arrow/memory_pool.h"
22 #include "gandiva/precompiled/time_constants.h"
23 #include "gandiva/projector.h"
24 #include "gandiva/tests/test_util.h"
25 #include "gandiva/tree_expr_builder.h"
26 
27 namespace gandiva {
28 
29 using arrow::boolean;
30 using arrow::date32;
31 using arrow::date64;
32 using arrow::float32;
33 using arrow::int32;
34 using arrow::int64;
35 using arrow::timestamp;
36 
37 class TestProjector : public ::testing::Test {
38  public:
SetUp()39   void SetUp() { pool_ = arrow::default_memory_pool(); }
40 
41  protected:
42   arrow::MemoryPool* pool_;
43 };
44 
Epoch()45 time_t Epoch() {
46   // HACK: MSVC mktime() fails on UTC times before 1970-01-01 00:00:00.
47   // But it first converts its argument from local time to UTC time,
48   // so we ask for 1970-01-02 to avoid failing in timezones ahead of UTC.
49   struct tm y1970;
50   memset(&y1970, 0, sizeof(struct tm));
51   y1970.tm_year = 70;
52   y1970.tm_mon = 0;
53   y1970.tm_mday = 2;
54   y1970.tm_hour = 0;
55   y1970.tm_min = 0;
56   y1970.tm_sec = 0;
57   time_t epoch = mktime(&y1970);
58   if (epoch == static_cast<time_t>(-1)) {
59     ARROW_LOG(FATAL) << "mktime() failed";
60   }
61   // Adjust for the 24h offset above.
62   return epoch - 24 * 3600;
63 }
64 
MillisInDay(int32_t hh,int32_t mm,int32_t ss,int32_t millis)65 int32_t MillisInDay(int32_t hh, int32_t mm, int32_t ss, int32_t millis) {
66   int32_t mins = hh * 60 + mm;
67   int32_t secs = mins * 60 + ss;
68 
69   return secs * 1000 + millis;
70 }
71 
MillisSince(time_t base_line,int32_t yy,int32_t mm,int32_t dd,int32_t hr,int32_t min,int32_t sec,int32_t millis)72 int64_t MillisSince(time_t base_line, int32_t yy, int32_t mm, int32_t dd, int32_t hr,
73                     int32_t min, int32_t sec, int32_t millis) {
74   struct tm given_ts;
75   memset(&given_ts, 0, sizeof(struct tm));
76   given_ts.tm_year = (yy - 1900);
77   given_ts.tm_mon = (mm - 1);
78   given_ts.tm_mday = dd;
79   given_ts.tm_hour = hr;
80   given_ts.tm_min = min;
81   given_ts.tm_sec = sec;
82 
83   time_t ts = mktime(&given_ts);
84   if (ts == static_cast<time_t>(-1)) {
85     ARROW_LOG(FATAL) << "mktime() failed";
86   }
87   // time_t is an arithmetic type on both POSIX and Windows, we can simply
88   // subtract to get a duration in seconds.
89   return static_cast<int64_t>(ts - base_line) * 1000 + millis;
90 }
91 
DaysSince(time_t base_line,int32_t yy,int32_t mm,int32_t dd,int32_t hr,int32_t min,int32_t sec,int32_t millis)92 int32_t DaysSince(time_t base_line, int32_t yy, int32_t mm, int32_t dd, int32_t hr,
93                   int32_t min, int32_t sec, int32_t millis) {
94   struct tm given_ts;
95   memset(&given_ts, 0, sizeof(struct tm));
96   given_ts.tm_year = (yy - 1900);
97   given_ts.tm_mon = (mm - 1);
98   given_ts.tm_mday = dd;
99   given_ts.tm_hour = hr;
100   given_ts.tm_min = min;
101   given_ts.tm_sec = sec;
102 
103   time_t ts = mktime(&given_ts);
104   if (ts == static_cast<time_t>(-1)) {
105     ARROW_LOG(FATAL) << "mktime() failed";
106   }
107   // time_t is an arithmetic type on both POSIX and Windows, we can simply
108   // subtract to get a duration in seconds.
109   return static_cast<int32_t>(((ts - base_line) * 1000 + millis) / MILLIS_IN_DAY);
110 }
111 
TEST_F(TestProjector,TestIsNull)112 TEST_F(TestProjector, TestIsNull) {
113   auto d0 = field("d0", date64());
114   auto t0 = field("t0", time32(arrow::TimeUnit::MILLI));
115   auto schema = arrow::schema({d0, t0});
116 
117   // output fields
118   auto b0 = field("isnull", boolean());
119 
120   // isnull and isnotnull
121   auto isnull_expr = TreeExprBuilder::MakeExpression("isnull", {d0}, b0);
122   auto isnotnull_expr = TreeExprBuilder::MakeExpression("isnotnull", {t0}, b0);
123 
124   std::shared_ptr<Projector> projector;
125   auto status = Projector::Make(schema, {isnull_expr, isnotnull_expr},
126                                 TestConfiguration(), &projector);
127   ASSERT_TRUE(status.ok());
128 
129   int num_records = 4;
130   std::vector<int64_t> d0_data = {0, 100, 0, 1000};
131   auto t0_data = {0, 100, 0, 1000};
132   auto validity = {false, true, false, true};
133   auto d0_array =
134       MakeArrowTypeArray<arrow::Date64Type, int64_t>(date64(), d0_data, validity);
135   auto t0_array = MakeArrowTypeArray<arrow::Time32Type, int32_t>(
136       time32(arrow::TimeUnit::MILLI), t0_data, validity);
137 
138   // expected output
139   auto exp_isnull =
140       MakeArrowArrayBool({true, false, true, false}, {true, true, true, true});
141   auto exp_isnotnull = MakeArrowArrayBool(validity, {true, true, true, true});
142 
143   // prepare input record batch
144   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {d0_array, t0_array});
145 
146   // Evaluate expression
147   arrow::ArrayVector outputs;
148   status = projector->Evaluate(*in_batch, pool_, &outputs);
149   EXPECT_TRUE(status.ok());
150 
151   // Validate results
152   EXPECT_ARROW_ARRAY_EQUALS(exp_isnull, outputs.at(0));
153   EXPECT_ARROW_ARRAY_EQUALS(exp_isnotnull, outputs.at(1));
154 }
155 
TEST_F(TestProjector,TestDate32IsNull)156 TEST_F(TestProjector, TestDate32IsNull) {
157   auto d0 = field("d0", date32());
158   auto schema = arrow::schema({d0});
159 
160   // output fields
161   auto b0 = field("isnull", boolean());
162 
163   // isnull and isnotnull
164   auto isnull_expr = TreeExprBuilder::MakeExpression("isnull", {d0}, b0);
165 
166   std::shared_ptr<Projector> projector;
167   auto status = Projector::Make(schema, {isnull_expr}, TestConfiguration(), &projector);
168   ASSERT_TRUE(status.ok());
169 
170   int num_records = 4;
171   std::vector<int32_t> d0_data = {0, 100, 0, 1000};
172   auto validity = {false, true, false, true};
173   auto d0_array =
174       MakeArrowTypeArray<arrow::Date32Type, int32_t>(date32(), d0_data, validity);
175 
176   // expected output
177   auto exp_isnull =
178       MakeArrowArrayBool({true, false, true, false}, {true, true, true, true});
179 
180   // prepare input record batch
181   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {d0_array});
182 
183   // Evaluate expression
184   arrow::ArrayVector outputs;
185   status = projector->Evaluate(*in_batch, pool_, &outputs);
186   EXPECT_TRUE(status.ok());
187 
188   // Validate results
189   EXPECT_ARROW_ARRAY_EQUALS(exp_isnull, outputs.at(0));
190 }
191 
TEST_F(TestProjector,TestDateTime)192 TEST_F(TestProjector, TestDateTime) {
193   auto field0 = field("f0", date64());
194   auto field1 = field("f1", date32());
195   auto field2 = field("f2", timestamp(arrow::TimeUnit::MILLI));
196   auto schema = arrow::schema({field0, field1, field2});
197 
198   // output fields
199   auto field_year = field("yy", int64());
200   auto field_month = field("mm", int64());
201   auto field_day = field("dd", int64());
202   auto field_hour = field("hh", int64());
203   auto field_date64 = field("date64", date64());
204 
205   // extract year and month from date
206   auto date2year_expr =
207       TreeExprBuilder::MakeExpression("extractYear", {field0}, field_year);
208   auto date2month_expr =
209       TreeExprBuilder::MakeExpression("extractMonth", {field0}, field_month);
210 
211   // extract year and month from date32, cast to date64 first
212   auto node_f1 = TreeExprBuilder::MakeField(field1);
213   auto date32_to_date64_func =
214       TreeExprBuilder::MakeFunction("castDATE", {node_f1}, date64());
215 
216   auto date64_2year_func =
217       TreeExprBuilder::MakeFunction("extractYear", {date32_to_date64_func}, int64());
218   auto date64_2year_expr = TreeExprBuilder::MakeExpression(date64_2year_func, field_year);
219 
220   auto date64_2month_func =
221       TreeExprBuilder::MakeFunction("extractMonth", {date32_to_date64_func}, int64());
222   auto date64_2month_expr =
223       TreeExprBuilder::MakeExpression(date64_2month_func, field_month);
224 
225   // extract month and day from timestamp
226   auto ts2month_expr =
227       TreeExprBuilder::MakeExpression("extractMonth", {field2}, field_month);
228   auto ts2day_expr = TreeExprBuilder::MakeExpression("extractDay", {field2}, field_day);
229 
230   std::shared_ptr<Projector> projector;
231   auto status = Projector::Make(schema,
232                                 {date2year_expr, date2month_expr, date64_2year_expr,
233                                  date64_2month_expr, ts2month_expr, ts2day_expr},
234                                 TestConfiguration(), &projector);
235   ASSERT_TRUE(status.ok());
236 
237   // Create a row-batch with some sample data
238   time_t epoch = Epoch();
239   int num_records = 4;
240   auto validity = {true, true, true, true};
241   std::vector<int64_t> field0_data = {MillisSince(epoch, 2000, 1, 1, 5, 0, 0, 0),
242                                       MillisSince(epoch, 1999, 12, 31, 5, 0, 0, 0),
243                                       MillisSince(epoch, 2015, 6, 30, 20, 0, 0, 0),
244                                       MillisSince(epoch, 2015, 7, 1, 20, 0, 0, 0)};
245   auto array0 =
246       MakeArrowTypeArray<arrow::Date64Type, int64_t>(date64(), field0_data, validity);
247 
248   std::vector<int32_t> field1_data = {DaysSince(epoch, 2000, 1, 1, 5, 0, 0, 0),
249                                       DaysSince(epoch, 1999, 12, 31, 5, 0, 0, 0),
250                                       DaysSince(epoch, 2015, 6, 30, 20, 0, 0, 0),
251                                       DaysSince(epoch, 2015, 7, 1, 20, 0, 0, 0)};
252   auto array1 =
253       MakeArrowTypeArray<arrow::Date32Type, int32_t>(date32(), field1_data, validity);
254 
255   std::vector<int64_t> field2_data = {MillisSince(epoch, 1999, 12, 31, 5, 0, 0, 0),
256                                       MillisSince(epoch, 2000, 1, 2, 5, 0, 0, 0),
257                                       MillisSince(epoch, 2015, 7, 1, 1, 0, 0, 0),
258                                       MillisSince(epoch, 2015, 6, 29, 23, 0, 0, 0)};
259 
260   auto array2 = MakeArrowTypeArray<arrow::TimestampType, int64_t>(
261       arrow::timestamp(arrow::TimeUnit::MILLI), field2_data, validity);
262 
263   // expected output
264   // date 2 year and date 2 month for date64
265   auto exp_yy_from_date64 = MakeArrowArrayInt64({2000, 1999, 2015, 2015}, validity);
266   auto exp_mm_from_date64 = MakeArrowArrayInt64({1, 12, 6, 7}, validity);
267 
268   // date 2 year and date 2 month for date32
269   auto exp_yy_from_date32 = MakeArrowArrayInt64({2000, 1999, 2015, 2015}, validity);
270   auto exp_mm_from_date32 = MakeArrowArrayInt64({1, 12, 6, 7}, validity);
271 
272   // ts 2 month and ts 2 day
273   auto exp_mm_from_ts = MakeArrowArrayInt64({12, 1, 7, 6}, validity);
274   auto exp_dd_from_ts = MakeArrowArrayInt64({31, 2, 1, 29}, validity);
275 
276   // prepare input record batch
277   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1, array2});
278 
279   // Evaluate expression
280   arrow::ArrayVector outputs;
281   status = projector->Evaluate(*in_batch, pool_, &outputs);
282   EXPECT_TRUE(status.ok());
283 
284   // Validate results
285   EXPECT_ARROW_ARRAY_EQUALS(exp_yy_from_date64, outputs.at(0));
286   EXPECT_ARROW_ARRAY_EQUALS(exp_mm_from_date64, outputs.at(1));
287   EXPECT_ARROW_ARRAY_EQUALS(exp_yy_from_date32, outputs.at(2));
288   EXPECT_ARROW_ARRAY_EQUALS(exp_mm_from_date32, outputs.at(3));
289   EXPECT_ARROW_ARRAY_EQUALS(exp_mm_from_ts, outputs.at(4));
290   EXPECT_ARROW_ARRAY_EQUALS(exp_dd_from_ts, outputs.at(5));
291 }
292 
TEST_F(TestProjector,TestTime)293 TEST_F(TestProjector, TestTime) {
294   auto field0 = field("f0", time32(arrow::TimeUnit::MILLI));
295   auto schema = arrow::schema({field0});
296 
297   auto field_min = field("mm", int64());
298   auto field_hour = field("hh", int64());
299 
300   // extract day and hour from time32
301   auto time2min_expr =
302       TreeExprBuilder::MakeExpression("extractMinute", {field0}, field_min);
303   auto time2hour_expr =
304       TreeExprBuilder::MakeExpression("extractHour", {field0}, field_hour);
305 
306   std::shared_ptr<Projector> projector;
307   auto status = Projector::Make(schema, {time2min_expr, time2hour_expr},
308                                 TestConfiguration(), &projector);
309   ASSERT_TRUE(status.ok());
310 
311   // create input data
312   int num_records = 4;
313   auto validity = {true, true, true, true};
314   std::vector<int32_t> field_data = {
315       MillisInDay(5, 35, 25, 0),  // 5:35:25
316       MillisInDay(0, 59, 0, 0),   // 0:59:12
317       MillisInDay(12, 30, 0, 0),  // 12:30:0
318       MillisInDay(23, 0, 0, 0)    // 23:0:0
319   };
320   auto array = MakeArrowTypeArray<arrow::Time32Type, int32_t>(
321       time32(arrow::TimeUnit::MILLI), field_data, validity);
322 
323   // expected output
324   auto exp_min = MakeArrowArrayInt64({35, 59, 30, 0}, validity);
325   auto exp_hour = MakeArrowArrayInt64({5, 0, 12, 23}, validity);
326 
327   // prepare input record batch
328   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array});
329 
330   // Evaluate expression
331   arrow::ArrayVector outputs;
332   status = projector->Evaluate(*in_batch, pool_, &outputs);
333   EXPECT_TRUE(status.ok());
334 
335   // Validate results
336   EXPECT_ARROW_ARRAY_EQUALS(exp_min, outputs.at(0));
337   EXPECT_ARROW_ARRAY_EQUALS(exp_hour, outputs.at(1));
338 }
339 
TEST_F(TestProjector,TestTimestampDiff)340 TEST_F(TestProjector, TestTimestampDiff) {
341   auto f0 = field("f0", timestamp(arrow::TimeUnit::MILLI));
342   auto f1 = field("f1", timestamp(arrow::TimeUnit::MILLI));
343   auto schema = arrow::schema({f0, f1});
344 
345   // output fields
346   auto diff_seconds = field("ss", int32());
347 
348   // get diff
349   auto diff_secs_expr =
350       TreeExprBuilder::MakeExpression("timestampdiffSecond", {f0, f1}, diff_seconds);
351 
352   auto diff_mins_expr =
353       TreeExprBuilder::MakeExpression("timestampdiffMinute", {f0, f1}, diff_seconds);
354 
355   auto diff_hours_expr =
356       TreeExprBuilder::MakeExpression("timestampdiffHour", {f0, f1}, diff_seconds);
357 
358   auto diff_days_expr =
359       TreeExprBuilder::MakeExpression("timestampdiffDay", {f0, f1}, diff_seconds);
360 
361   auto diff_weeks_expr =
362       TreeExprBuilder::MakeExpression("timestampdiffWeek", {f0, f1}, diff_seconds);
363 
364   auto diff_months_expr =
365       TreeExprBuilder::MakeExpression("timestampdiffMonth", {f0, f1}, diff_seconds);
366 
367   auto diff_quarters_expr =
368       TreeExprBuilder::MakeExpression("timestampdiffQuarter", {f0, f1}, diff_seconds);
369 
370   auto diff_years_expr =
371       TreeExprBuilder::MakeExpression("timestampdiffYear", {f0, f1}, diff_seconds);
372 
373   std::shared_ptr<Projector> projector;
374   auto exprs = {diff_secs_expr,  diff_mins_expr,   diff_hours_expr,    diff_days_expr,
375                 diff_weeks_expr, diff_months_expr, diff_quarters_expr, diff_years_expr};
376   auto status = Projector::Make(schema, exprs, TestConfiguration(), &projector);
377   ASSERT_TRUE(status.ok());
378 
379   time_t epoch = Epoch();
380 
381   // 2015-09-10T20:49:42.000
382   auto start_millis = MillisSince(epoch, 2015, 9, 10, 20, 49, 42, 0);
383   // 2017-03-30T22:50:59.050
384   auto end_millis = MillisSince(epoch, 2017, 3, 30, 22, 50, 59, 50);
385   std::vector<int64_t> f0_data = {start_millis, end_millis,
386                                   // 2015-09-10T20:49:42.999
387                                   start_millis + 999,
388                                   // 2015-09-10T20:49:42.999
389                                   MillisSince(epoch, 2015, 9, 10, 20, 49, 42, 999)};
390   std::vector<int64_t> f1_data = {end_millis, start_millis,
391                                   // 2015-09-10T20:49:42.999
392                                   start_millis + 999,
393                                   // 2015-09-9T21:49:42.999 (23 hours behind)
394                                   MillisSince(epoch, 2015, 9, 9, 21, 49, 42, 999)};
395 
396   int64_t num_records = f0_data.size();
397   std::vector<bool> validity(num_records, true);
398   auto array0 = MakeArrowTypeArray<arrow::TimestampType, int64_t>(
399       arrow::timestamp(arrow::TimeUnit::MILLI), f0_data, validity);
400   auto array1 = MakeArrowTypeArray<arrow::TimestampType, int64_t>(
401       arrow::timestamp(arrow::TimeUnit::MILLI), f1_data, validity);
402 
403   // expected output
404   std::vector<ArrayPtr> exp_output;
405   exp_output.push_back(
406       MakeArrowArrayInt32({48996077, -48996077, 0, -23 * 3600}, validity));
407   exp_output.push_back(MakeArrowArrayInt32({816601, -816601, 0, -23 * 60}, validity));
408   exp_output.push_back(MakeArrowArrayInt32({13610, -13610, 0, -23}, validity));
409   exp_output.push_back(MakeArrowArrayInt32({567, -567, 0, 0}, validity));
410   exp_output.push_back(MakeArrowArrayInt32({81, -81, 0, 0}, validity));
411   exp_output.push_back(MakeArrowArrayInt32({18, -18, 0, 0}, validity));
412   exp_output.push_back(MakeArrowArrayInt32({6, -6, 0, 0}, validity));
413   exp_output.push_back(MakeArrowArrayInt32({1, -1, 0, 0}, validity));
414 
415   // prepare input record batch
416   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
417 
418   // Evaluate expression
419   arrow::ArrayVector outputs;
420   status = projector->Evaluate(*in_batch, pool_, &outputs);
421   EXPECT_TRUE(status.ok());
422 
423   // Validate results
424   for (uint32_t i = 0; i < exp_output.size(); i++) {
425     EXPECT_ARROW_ARRAY_EQUALS(exp_output.at(i), outputs.at(i));
426   }
427 }
428 
TEST_F(TestProjector,TestMonthsBetween)429 TEST_F(TestProjector, TestMonthsBetween) {
430   auto f0 = field("f0", arrow::date64());
431   auto f1 = field("f1", arrow::date64());
432   auto schema = arrow::schema({f0, f1});
433 
434   // output fields
435   auto output = field("out", arrow::float64());
436 
437   auto months_between_expr =
438       TreeExprBuilder::MakeExpression("months_between", {f0, f1}, output);
439 
440   std::shared_ptr<Projector> projector;
441   auto status =
442       Projector::Make(schema, {months_between_expr}, TestConfiguration(), &projector);
443   std::cout << status.message();
444   ASSERT_TRUE(status.ok());
445 
446   time_t epoch = Epoch();
447 
448   // Create a row-batch with some sample data
449   int num_records = 4;
450   auto validity = {true, true, true, true};
451   std::vector<int64_t> f0_data = {MillisSince(epoch, 1995, 3, 2, 0, 0, 0, 0),
452                                   MillisSince(epoch, 1995, 2, 2, 0, 0, 0, 0),
453                                   MillisSince(epoch, 1995, 3, 31, 0, 0, 0, 0),
454                                   MillisSince(epoch, 1996, 3, 31, 0, 0, 0, 0)};
455 
456   auto array0 =
457       MakeArrowTypeArray<arrow::Date64Type, int64_t>(date64(), f0_data, validity);
458 
459   std::vector<int64_t> f1_data = {MillisSince(epoch, 1995, 2, 2, 0, 0, 0, 0),
460                                   MillisSince(epoch, 1995, 3, 2, 0, 0, 0, 0),
461                                   MillisSince(epoch, 1995, 2, 28, 0, 0, 0, 0),
462                                   MillisSince(epoch, 1996, 2, 29, 0, 0, 0, 0)};
463 
464   auto array1 =
465       MakeArrowTypeArray<arrow::Date64Type, int64_t>(date64(), f1_data, validity);
466 
467   // expected output
468   auto exp_output = MakeArrowArrayFloat64({1.0, -1.0, 1.0, 1.0}, validity);
469 
470   // prepare input record batch
471   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
472 
473   // Evaluate expression
474   arrow::ArrayVector outputs;
475   status = projector->Evaluate(*in_batch, pool_, &outputs);
476   EXPECT_TRUE(status.ok());
477 
478   // Validate results
479   EXPECT_ARROW_ARRAY_EQUALS(exp_output, outputs.at(0));
480 }
481 
482 }  // namespace gandiva
483