1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include <gtest/gtest.h>
19 #include <math.h>
20 #include <time.h>
21 #include "arrow/memory_pool.h"
22 #include "gandiva/precompiled/time_constants.h"
23 #include "gandiva/projector.h"
24 #include "gandiva/tests/test_util.h"
25 #include "gandiva/tree_expr_builder.h"
26
27 namespace gandiva {
28
29 using arrow::boolean;
30 using arrow::date32;
31 using arrow::date64;
32 using arrow::float32;
33 using arrow::int32;
34 using arrow::int64;
35 using arrow::timestamp;
36
37 class TestProjector : public ::testing::Test {
38 public:
SetUp()39 void SetUp() { pool_ = arrow::default_memory_pool(); }
40
41 protected:
42 arrow::MemoryPool* pool_;
43 };
44
Epoch()45 time_t Epoch() {
46 // HACK: MSVC mktime() fails on UTC times before 1970-01-01 00:00:00.
47 // But it first converts its argument from local time to UTC time,
48 // so we ask for 1970-01-02 to avoid failing in timezones ahead of UTC.
49 struct tm y1970;
50 memset(&y1970, 0, sizeof(struct tm));
51 y1970.tm_year = 70;
52 y1970.tm_mon = 0;
53 y1970.tm_mday = 2;
54 y1970.tm_hour = 0;
55 y1970.tm_min = 0;
56 y1970.tm_sec = 0;
57 time_t epoch = mktime(&y1970);
58 if (epoch == static_cast<time_t>(-1)) {
59 ARROW_LOG(FATAL) << "mktime() failed";
60 }
61 // Adjust for the 24h offset above.
62 return epoch - 24 * 3600;
63 }
64
MillisInDay(int32_t hh,int32_t mm,int32_t ss,int32_t millis)65 int32_t MillisInDay(int32_t hh, int32_t mm, int32_t ss, int32_t millis) {
66 int32_t mins = hh * 60 + mm;
67 int32_t secs = mins * 60 + ss;
68
69 return secs * 1000 + millis;
70 }
71
MillisSince(time_t base_line,int32_t yy,int32_t mm,int32_t dd,int32_t hr,int32_t min,int32_t sec,int32_t millis)72 int64_t MillisSince(time_t base_line, int32_t yy, int32_t mm, int32_t dd, int32_t hr,
73 int32_t min, int32_t sec, int32_t millis) {
74 struct tm given_ts;
75 memset(&given_ts, 0, sizeof(struct tm));
76 given_ts.tm_year = (yy - 1900);
77 given_ts.tm_mon = (mm - 1);
78 given_ts.tm_mday = dd;
79 given_ts.tm_hour = hr;
80 given_ts.tm_min = min;
81 given_ts.tm_sec = sec;
82
83 time_t ts = mktime(&given_ts);
84 if (ts == static_cast<time_t>(-1)) {
85 ARROW_LOG(FATAL) << "mktime() failed";
86 }
87 // time_t is an arithmetic type on both POSIX and Windows, we can simply
88 // subtract to get a duration in seconds.
89 return static_cast<int64_t>(ts - base_line) * 1000 + millis;
90 }
91
DaysSince(time_t base_line,int32_t yy,int32_t mm,int32_t dd,int32_t hr,int32_t min,int32_t sec,int32_t millis)92 int32_t DaysSince(time_t base_line, int32_t yy, int32_t mm, int32_t dd, int32_t hr,
93 int32_t min, int32_t sec, int32_t millis) {
94 struct tm given_ts;
95 memset(&given_ts, 0, sizeof(struct tm));
96 given_ts.tm_year = (yy - 1900);
97 given_ts.tm_mon = (mm - 1);
98 given_ts.tm_mday = dd;
99 given_ts.tm_hour = hr;
100 given_ts.tm_min = min;
101 given_ts.tm_sec = sec;
102
103 time_t ts = mktime(&given_ts);
104 if (ts == static_cast<time_t>(-1)) {
105 ARROW_LOG(FATAL) << "mktime() failed";
106 }
107 // time_t is an arithmetic type on both POSIX and Windows, we can simply
108 // subtract to get a duration in seconds.
109 return static_cast<int32_t>(((ts - base_line) * 1000 + millis) / MILLIS_IN_DAY);
110 }
111
TEST_F(TestProjector,TestIsNull)112 TEST_F(TestProjector, TestIsNull) {
113 auto d0 = field("d0", date64());
114 auto t0 = field("t0", time32(arrow::TimeUnit::MILLI));
115 auto schema = arrow::schema({d0, t0});
116
117 // output fields
118 auto b0 = field("isnull", boolean());
119
120 // isnull and isnotnull
121 auto isnull_expr = TreeExprBuilder::MakeExpression("isnull", {d0}, b0);
122 auto isnotnull_expr = TreeExprBuilder::MakeExpression("isnotnull", {t0}, b0);
123
124 std::shared_ptr<Projector> projector;
125 auto status = Projector::Make(schema, {isnull_expr, isnotnull_expr},
126 TestConfiguration(), &projector);
127 ASSERT_TRUE(status.ok());
128
129 int num_records = 4;
130 std::vector<int64_t> d0_data = {0, 100, 0, 1000};
131 auto t0_data = {0, 100, 0, 1000};
132 auto validity = {false, true, false, true};
133 auto d0_array =
134 MakeArrowTypeArray<arrow::Date64Type, int64_t>(date64(), d0_data, validity);
135 auto t0_array = MakeArrowTypeArray<arrow::Time32Type, int32_t>(
136 time32(arrow::TimeUnit::MILLI), t0_data, validity);
137
138 // expected output
139 auto exp_isnull =
140 MakeArrowArrayBool({true, false, true, false}, {true, true, true, true});
141 auto exp_isnotnull = MakeArrowArrayBool(validity, {true, true, true, true});
142
143 // prepare input record batch
144 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {d0_array, t0_array});
145
146 // Evaluate expression
147 arrow::ArrayVector outputs;
148 status = projector->Evaluate(*in_batch, pool_, &outputs);
149 EXPECT_TRUE(status.ok());
150
151 // Validate results
152 EXPECT_ARROW_ARRAY_EQUALS(exp_isnull, outputs.at(0));
153 EXPECT_ARROW_ARRAY_EQUALS(exp_isnotnull, outputs.at(1));
154 }
155
TEST_F(TestProjector,TestDate32IsNull)156 TEST_F(TestProjector, TestDate32IsNull) {
157 auto d0 = field("d0", date32());
158 auto schema = arrow::schema({d0});
159
160 // output fields
161 auto b0 = field("isnull", boolean());
162
163 // isnull and isnotnull
164 auto isnull_expr = TreeExprBuilder::MakeExpression("isnull", {d0}, b0);
165
166 std::shared_ptr<Projector> projector;
167 auto status = Projector::Make(schema, {isnull_expr}, TestConfiguration(), &projector);
168 ASSERT_TRUE(status.ok());
169
170 int num_records = 4;
171 std::vector<int32_t> d0_data = {0, 100, 0, 1000};
172 auto validity = {false, true, false, true};
173 auto d0_array =
174 MakeArrowTypeArray<arrow::Date32Type, int32_t>(date32(), d0_data, validity);
175
176 // expected output
177 auto exp_isnull =
178 MakeArrowArrayBool({true, false, true, false}, {true, true, true, true});
179
180 // prepare input record batch
181 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {d0_array});
182
183 // Evaluate expression
184 arrow::ArrayVector outputs;
185 status = projector->Evaluate(*in_batch, pool_, &outputs);
186 EXPECT_TRUE(status.ok());
187
188 // Validate results
189 EXPECT_ARROW_ARRAY_EQUALS(exp_isnull, outputs.at(0));
190 }
191
TEST_F(TestProjector,TestDateTime)192 TEST_F(TestProjector, TestDateTime) {
193 auto field0 = field("f0", date64());
194 auto field1 = field("f1", date32());
195 auto field2 = field("f2", timestamp(arrow::TimeUnit::MILLI));
196 auto schema = arrow::schema({field0, field1, field2});
197
198 // output fields
199 auto field_year = field("yy", int64());
200 auto field_month = field("mm", int64());
201 auto field_day = field("dd", int64());
202 auto field_hour = field("hh", int64());
203 auto field_date64 = field("date64", date64());
204
205 // extract year and month from date
206 auto date2year_expr =
207 TreeExprBuilder::MakeExpression("extractYear", {field0}, field_year);
208 auto date2month_expr =
209 TreeExprBuilder::MakeExpression("extractMonth", {field0}, field_month);
210
211 // extract year and month from date32, cast to date64 first
212 auto node_f1 = TreeExprBuilder::MakeField(field1);
213 auto date32_to_date64_func =
214 TreeExprBuilder::MakeFunction("castDATE", {node_f1}, date64());
215
216 auto date64_2year_func =
217 TreeExprBuilder::MakeFunction("extractYear", {date32_to_date64_func}, int64());
218 auto date64_2year_expr = TreeExprBuilder::MakeExpression(date64_2year_func, field_year);
219
220 auto date64_2month_func =
221 TreeExprBuilder::MakeFunction("extractMonth", {date32_to_date64_func}, int64());
222 auto date64_2month_expr =
223 TreeExprBuilder::MakeExpression(date64_2month_func, field_month);
224
225 // extract month and day from timestamp
226 auto ts2month_expr =
227 TreeExprBuilder::MakeExpression("extractMonth", {field2}, field_month);
228 auto ts2day_expr = TreeExprBuilder::MakeExpression("extractDay", {field2}, field_day);
229
230 std::shared_ptr<Projector> projector;
231 auto status = Projector::Make(schema,
232 {date2year_expr, date2month_expr, date64_2year_expr,
233 date64_2month_expr, ts2month_expr, ts2day_expr},
234 TestConfiguration(), &projector);
235 ASSERT_TRUE(status.ok());
236
237 // Create a row-batch with some sample data
238 time_t epoch = Epoch();
239 int num_records = 4;
240 auto validity = {true, true, true, true};
241 std::vector<int64_t> field0_data = {MillisSince(epoch, 2000, 1, 1, 5, 0, 0, 0),
242 MillisSince(epoch, 1999, 12, 31, 5, 0, 0, 0),
243 MillisSince(epoch, 2015, 6, 30, 20, 0, 0, 0),
244 MillisSince(epoch, 2015, 7, 1, 20, 0, 0, 0)};
245 auto array0 =
246 MakeArrowTypeArray<arrow::Date64Type, int64_t>(date64(), field0_data, validity);
247
248 std::vector<int32_t> field1_data = {DaysSince(epoch, 2000, 1, 1, 5, 0, 0, 0),
249 DaysSince(epoch, 1999, 12, 31, 5, 0, 0, 0),
250 DaysSince(epoch, 2015, 6, 30, 20, 0, 0, 0),
251 DaysSince(epoch, 2015, 7, 1, 20, 0, 0, 0)};
252 auto array1 =
253 MakeArrowTypeArray<arrow::Date32Type, int32_t>(date32(), field1_data, validity);
254
255 std::vector<int64_t> field2_data = {MillisSince(epoch, 1999, 12, 31, 5, 0, 0, 0),
256 MillisSince(epoch, 2000, 1, 2, 5, 0, 0, 0),
257 MillisSince(epoch, 2015, 7, 1, 1, 0, 0, 0),
258 MillisSince(epoch, 2015, 6, 29, 23, 0, 0, 0)};
259
260 auto array2 = MakeArrowTypeArray<arrow::TimestampType, int64_t>(
261 arrow::timestamp(arrow::TimeUnit::MILLI), field2_data, validity);
262
263 // expected output
264 // date 2 year and date 2 month for date64
265 auto exp_yy_from_date64 = MakeArrowArrayInt64({2000, 1999, 2015, 2015}, validity);
266 auto exp_mm_from_date64 = MakeArrowArrayInt64({1, 12, 6, 7}, validity);
267
268 // date 2 year and date 2 month for date32
269 auto exp_yy_from_date32 = MakeArrowArrayInt64({2000, 1999, 2015, 2015}, validity);
270 auto exp_mm_from_date32 = MakeArrowArrayInt64({1, 12, 6, 7}, validity);
271
272 // ts 2 month and ts 2 day
273 auto exp_mm_from_ts = MakeArrowArrayInt64({12, 1, 7, 6}, validity);
274 auto exp_dd_from_ts = MakeArrowArrayInt64({31, 2, 1, 29}, validity);
275
276 // prepare input record batch
277 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1, array2});
278
279 // Evaluate expression
280 arrow::ArrayVector outputs;
281 status = projector->Evaluate(*in_batch, pool_, &outputs);
282 EXPECT_TRUE(status.ok());
283
284 // Validate results
285 EXPECT_ARROW_ARRAY_EQUALS(exp_yy_from_date64, outputs.at(0));
286 EXPECT_ARROW_ARRAY_EQUALS(exp_mm_from_date64, outputs.at(1));
287 EXPECT_ARROW_ARRAY_EQUALS(exp_yy_from_date32, outputs.at(2));
288 EXPECT_ARROW_ARRAY_EQUALS(exp_mm_from_date32, outputs.at(3));
289 EXPECT_ARROW_ARRAY_EQUALS(exp_mm_from_ts, outputs.at(4));
290 EXPECT_ARROW_ARRAY_EQUALS(exp_dd_from_ts, outputs.at(5));
291 }
292
TEST_F(TestProjector,TestTime)293 TEST_F(TestProjector, TestTime) {
294 auto field0 = field("f0", time32(arrow::TimeUnit::MILLI));
295 auto schema = arrow::schema({field0});
296
297 auto field_min = field("mm", int64());
298 auto field_hour = field("hh", int64());
299
300 // extract day and hour from time32
301 auto time2min_expr =
302 TreeExprBuilder::MakeExpression("extractMinute", {field0}, field_min);
303 auto time2hour_expr =
304 TreeExprBuilder::MakeExpression("extractHour", {field0}, field_hour);
305
306 std::shared_ptr<Projector> projector;
307 auto status = Projector::Make(schema, {time2min_expr, time2hour_expr},
308 TestConfiguration(), &projector);
309 ASSERT_TRUE(status.ok());
310
311 // create input data
312 int num_records = 4;
313 auto validity = {true, true, true, true};
314 std::vector<int32_t> field_data = {
315 MillisInDay(5, 35, 25, 0), // 5:35:25
316 MillisInDay(0, 59, 0, 0), // 0:59:12
317 MillisInDay(12, 30, 0, 0), // 12:30:0
318 MillisInDay(23, 0, 0, 0) // 23:0:0
319 };
320 auto array = MakeArrowTypeArray<arrow::Time32Type, int32_t>(
321 time32(arrow::TimeUnit::MILLI), field_data, validity);
322
323 // expected output
324 auto exp_min = MakeArrowArrayInt64({35, 59, 30, 0}, validity);
325 auto exp_hour = MakeArrowArrayInt64({5, 0, 12, 23}, validity);
326
327 // prepare input record batch
328 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array});
329
330 // Evaluate expression
331 arrow::ArrayVector outputs;
332 status = projector->Evaluate(*in_batch, pool_, &outputs);
333 EXPECT_TRUE(status.ok());
334
335 // Validate results
336 EXPECT_ARROW_ARRAY_EQUALS(exp_min, outputs.at(0));
337 EXPECT_ARROW_ARRAY_EQUALS(exp_hour, outputs.at(1));
338 }
339
TEST_F(TestProjector,TestTimestampDiff)340 TEST_F(TestProjector, TestTimestampDiff) {
341 auto f0 = field("f0", timestamp(arrow::TimeUnit::MILLI));
342 auto f1 = field("f1", timestamp(arrow::TimeUnit::MILLI));
343 auto schema = arrow::schema({f0, f1});
344
345 // output fields
346 auto diff_seconds = field("ss", int32());
347
348 // get diff
349 auto diff_secs_expr =
350 TreeExprBuilder::MakeExpression("timestampdiffSecond", {f0, f1}, diff_seconds);
351
352 auto diff_mins_expr =
353 TreeExprBuilder::MakeExpression("timestampdiffMinute", {f0, f1}, diff_seconds);
354
355 auto diff_hours_expr =
356 TreeExprBuilder::MakeExpression("timestampdiffHour", {f0, f1}, diff_seconds);
357
358 auto diff_days_expr =
359 TreeExprBuilder::MakeExpression("timestampdiffDay", {f0, f1}, diff_seconds);
360
361 auto diff_weeks_expr =
362 TreeExprBuilder::MakeExpression("timestampdiffWeek", {f0, f1}, diff_seconds);
363
364 auto diff_months_expr =
365 TreeExprBuilder::MakeExpression("timestampdiffMonth", {f0, f1}, diff_seconds);
366
367 auto diff_quarters_expr =
368 TreeExprBuilder::MakeExpression("timestampdiffQuarter", {f0, f1}, diff_seconds);
369
370 auto diff_years_expr =
371 TreeExprBuilder::MakeExpression("timestampdiffYear", {f0, f1}, diff_seconds);
372
373 std::shared_ptr<Projector> projector;
374 auto exprs = {diff_secs_expr, diff_mins_expr, diff_hours_expr, diff_days_expr,
375 diff_weeks_expr, diff_months_expr, diff_quarters_expr, diff_years_expr};
376 auto status = Projector::Make(schema, exprs, TestConfiguration(), &projector);
377 ASSERT_TRUE(status.ok());
378
379 time_t epoch = Epoch();
380
381 // 2015-09-10T20:49:42.000
382 auto start_millis = MillisSince(epoch, 2015, 9, 10, 20, 49, 42, 0);
383 // 2017-03-30T22:50:59.050
384 auto end_millis = MillisSince(epoch, 2017, 3, 30, 22, 50, 59, 50);
385 std::vector<int64_t> f0_data = {start_millis, end_millis,
386 // 2015-09-10T20:49:42.999
387 start_millis + 999,
388 // 2015-09-10T20:49:42.999
389 MillisSince(epoch, 2015, 9, 10, 20, 49, 42, 999)};
390 std::vector<int64_t> f1_data = {end_millis, start_millis,
391 // 2015-09-10T20:49:42.999
392 start_millis + 999,
393 // 2015-09-9T21:49:42.999 (23 hours behind)
394 MillisSince(epoch, 2015, 9, 9, 21, 49, 42, 999)};
395
396 int64_t num_records = f0_data.size();
397 std::vector<bool> validity(num_records, true);
398 auto array0 = MakeArrowTypeArray<arrow::TimestampType, int64_t>(
399 arrow::timestamp(arrow::TimeUnit::MILLI), f0_data, validity);
400 auto array1 = MakeArrowTypeArray<arrow::TimestampType, int64_t>(
401 arrow::timestamp(arrow::TimeUnit::MILLI), f1_data, validity);
402
403 // expected output
404 std::vector<ArrayPtr> exp_output;
405 exp_output.push_back(
406 MakeArrowArrayInt32({48996077, -48996077, 0, -23 * 3600}, validity));
407 exp_output.push_back(MakeArrowArrayInt32({816601, -816601, 0, -23 * 60}, validity));
408 exp_output.push_back(MakeArrowArrayInt32({13610, -13610, 0, -23}, validity));
409 exp_output.push_back(MakeArrowArrayInt32({567, -567, 0, 0}, validity));
410 exp_output.push_back(MakeArrowArrayInt32({81, -81, 0, 0}, validity));
411 exp_output.push_back(MakeArrowArrayInt32({18, -18, 0, 0}, validity));
412 exp_output.push_back(MakeArrowArrayInt32({6, -6, 0, 0}, validity));
413 exp_output.push_back(MakeArrowArrayInt32({1, -1, 0, 0}, validity));
414
415 // prepare input record batch
416 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
417
418 // Evaluate expression
419 arrow::ArrayVector outputs;
420 status = projector->Evaluate(*in_batch, pool_, &outputs);
421 EXPECT_TRUE(status.ok());
422
423 // Validate results
424 for (uint32_t i = 0; i < exp_output.size(); i++) {
425 EXPECT_ARROW_ARRAY_EQUALS(exp_output.at(i), outputs.at(i));
426 }
427 }
428
TEST_F(TestProjector,TestMonthsBetween)429 TEST_F(TestProjector, TestMonthsBetween) {
430 auto f0 = field("f0", arrow::date64());
431 auto f1 = field("f1", arrow::date64());
432 auto schema = arrow::schema({f0, f1});
433
434 // output fields
435 auto output = field("out", arrow::float64());
436
437 auto months_between_expr =
438 TreeExprBuilder::MakeExpression("months_between", {f0, f1}, output);
439
440 std::shared_ptr<Projector> projector;
441 auto status =
442 Projector::Make(schema, {months_between_expr}, TestConfiguration(), &projector);
443 std::cout << status.message();
444 ASSERT_TRUE(status.ok());
445
446 time_t epoch = Epoch();
447
448 // Create a row-batch with some sample data
449 int num_records = 4;
450 auto validity = {true, true, true, true};
451 std::vector<int64_t> f0_data = {MillisSince(epoch, 1995, 3, 2, 0, 0, 0, 0),
452 MillisSince(epoch, 1995, 2, 2, 0, 0, 0, 0),
453 MillisSince(epoch, 1995, 3, 31, 0, 0, 0, 0),
454 MillisSince(epoch, 1996, 3, 31, 0, 0, 0, 0)};
455
456 auto array0 =
457 MakeArrowTypeArray<arrow::Date64Type, int64_t>(date64(), f0_data, validity);
458
459 std::vector<int64_t> f1_data = {MillisSince(epoch, 1995, 2, 2, 0, 0, 0, 0),
460 MillisSince(epoch, 1995, 3, 2, 0, 0, 0, 0),
461 MillisSince(epoch, 1995, 2, 28, 0, 0, 0, 0),
462 MillisSince(epoch, 1996, 2, 29, 0, 0, 0, 0)};
463
464 auto array1 =
465 MakeArrowTypeArray<arrow::Date64Type, int64_t>(date64(), f1_data, validity);
466
467 // expected output
468 auto exp_output = MakeArrowArrayFloat64({1.0, -1.0, 1.0, 1.0}, validity);
469
470 // prepare input record batch
471 auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
472
473 // Evaluate expression
474 arrow::ArrayVector outputs;
475 status = projector->Evaluate(*in_batch, pool_, &outputs);
476 EXPECT_TRUE(status.ok());
477
478 // Validate results
479 EXPECT_ARROW_ARRAY_EQUALS(exp_output, outputs.at(0));
480 }
481
482 } // namespace gandiva
483