1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 //! Defines physical expressions that can evaluated at runtime during query execution
19 
20 use std::sync::Arc;
21 
22 use super::ColumnarValue;
23 use crate::error::{DataFusionError, Result};
24 use crate::physical_plan::PhysicalExpr;
25 use arrow::compute::kernels::sort::{SortColumn, SortOptions};
26 use arrow::record_batch::RecordBatch;
27 
28 mod average;
29 #[macro_use]
30 mod binary;
31 mod case;
32 mod cast;
33 mod coercion;
34 mod column;
35 mod count;
36 mod in_list;
37 mod is_not_null;
38 mod is_null;
39 mod literal;
40 mod min_max;
41 mod negative;
42 mod not;
43 mod nullif;
44 mod sum;
45 
46 pub use average::{avg_return_type, Avg, AvgAccumulator};
47 pub use binary::{binary, binary_operator_data_type, BinaryExpr};
48 pub use case::{case, CaseExpr};
49 pub use cast::{cast, CastExpr};
50 pub use column::{col, Column};
51 pub use count::Count;
52 pub use in_list::{in_list, InListExpr};
53 pub use is_not_null::{is_not_null, IsNotNullExpr};
54 pub use is_null::{is_null, IsNullExpr};
55 pub use literal::{lit, Literal};
56 pub use min_max::{Max, Min};
57 pub use negative::{negative, NegativeExpr};
58 pub use not::{not, NotExpr};
59 pub use nullif::{nullif_func, SUPPORTED_NULLIF_TYPES};
60 pub use sum::{sum_return_type, Sum};
61 /// returns the name of the state
format_state_name(name: &str, state_name: &str) -> String62 pub fn format_state_name(name: &str, state_name: &str) -> String {
63     format!("{}[{}]", name, state_name)
64 }
65 
66 /// Represents Sort operation for a column in a RecordBatch
67 #[derive(Clone, Debug)]
68 pub struct PhysicalSortExpr {
69     /// Physical expression representing the column to sort
70     pub expr: Arc<dyn PhysicalExpr>,
71     /// Option to specify how the given column should be sorted
72     pub options: SortOptions,
73 }
74 
75 impl PhysicalSortExpr {
76     /// evaluate the sort expression into SortColumn that can be passed into arrow sort kernel
evaluate_to_sort_column(&self, batch: &RecordBatch) -> Result<SortColumn>77     pub fn evaluate_to_sort_column(&self, batch: &RecordBatch) -> Result<SortColumn> {
78         let value_to_sort = self.expr.evaluate(batch)?;
79         let array_to_sort = match value_to_sort {
80             ColumnarValue::Array(array) => array,
81             ColumnarValue::Scalar(scalar) => {
82                 return Err(DataFusionError::Internal(format!(
83                     "Sort operation is not applicable to scalar value {}",
84                     scalar
85                 )));
86             }
87         };
88         Ok(SortColumn {
89             values: array_to_sort,
90             options: Some(self.options),
91         })
92     }
93 }
94 
95 #[cfg(test)]
96 mod tests {
97     use super::*;
98     use crate::{error::Result, physical_plan::AggregateExpr, scalar::ScalarValue};
99 
100     /// macro to perform an aggregation and verify the result.
101     #[macro_export]
102     macro_rules! generic_test_op {
103         ($ARRAY:expr, $DATATYPE:expr, $OP:ident, $EXPECTED:expr, $EXPECTED_DATATYPE:expr) => {{
104             let schema = Schema::new(vec![Field::new("a", $DATATYPE, false)]);
105 
106             let batch = RecordBatch::try_new(Arc::new(schema.clone()), vec![$ARRAY])?;
107 
108             let agg =
109                 Arc::new(<$OP>::new(col("a"), "bla".to_string(), $EXPECTED_DATATYPE));
110             let actual = aggregate(&batch, agg)?;
111             let expected = ScalarValue::from($EXPECTED);
112 
113             assert_eq!(expected, actual);
114 
115             Ok(())
116         }};
117     }
118 
aggregate( batch: &RecordBatch, agg: Arc<dyn AggregateExpr>, ) -> Result<ScalarValue>119     pub fn aggregate(
120         batch: &RecordBatch,
121         agg: Arc<dyn AggregateExpr>,
122     ) -> Result<ScalarValue> {
123         let mut accum = agg.create_accumulator()?;
124         let expr = agg.expressions();
125         let values = expr
126             .iter()
127             .map(|e| e.evaluate(batch))
128             .map(|r| r.map(|v| v.into_array(batch.num_rows())))
129             .collect::<Result<Vec<_>>>()?;
130         accum.update_batch(&values)?;
131         accum.evaluate()
132     }
133 }
134