1 /* Copyright (C) 2017 MariaDB Corporation
2 
3    This program is free software; you can redistribute it and/or
4    modify it under the terms of the GNU General Public License
5    as published by the Free Software Foundation; version 2 of
6    the License.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
16    MA 02110-1301, USA. */
17 
18 /***********************************************************************
19 *   $Id$
20 *
21 *   mcsv1_UDAF.h
22 ***********************************************************************/
23 
24 /**
25  * Columnstore interface for writing a User Defined Aggregate
26  * Functions (UDAF) and User Defined Analytic Functions (UDAnF)
27  * or a function that can act as either - UDA(n)F
28  *
29  * The basic steps are:
30  *
31  * 1. Create a the UDA(n)F function interface in some .h file.
32  * 2. Create the UDF function implementation in some .cpp file
33  * 3. Create the connector stub (MariaDB UDAF definition) for
34  * this UDF function.
35  * 4. build the dynamic library using all of the source.
36  * 5  Put the library in $COLUMNSTORE_INSTALL/lib of
37  * all modules
38  * 6. restart the Columnstore system.
39  * 7. notify mysqld about the new functions with commands like:
40  *
41  *    CREATE AGGREGATE FUNCTION distinct_count returns INT
42  *    soname 'libudf_mysql.so';
43  *
44  */
45 #ifndef HEADER_distinct_count
46 #define HEADER_distinct_count
47 
48 #include <cstdlib>
49 #include <string>
50 #include <boost/any.hpp>
51 
52 #include "mcsv1_udaf.h"
53 #include "calpontsystemcatalog.h"
54 #include "windowfunctioncolumn.h"
55 
56 #if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT)
57 #define EXPORT __declspec(dllexport)
58 #else
59 #define EXPORT
60 #endif
61 
62 namespace mcsv1sdk
63 {
64 
65 // Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or
66 // User Defined Analytic Function (UDAnF).
67 // These will be singleton classes, so don't put any instance
68 // specific data in here. All instance data is stored in mcsv1Context
69 // passed to each user function and retrieved by the getUserData() method.
70 //
71 // Each API function returns a ReturnCode. If ERROR is returned at any time,
72 // the query is aborted, getInterrupted() will begin to return true and the
73 // message set in config->setErrorMessage() is returned to MariaDB.
74 class distinct_count : public  mcsv1_UDAF
75 {
76 public:
77 	// Defaults OK
distinct_count()78 	distinct_count() : mcsv1_UDAF(){};
~distinct_count()79 	virtual ~distinct_count(){};
80 
81 	/**
82 	 * init()
83 	 *
84 	 * Mandatory. Implement this to initialize flags and instance
85 	 * data. Called once per SQL statement. You can do any sanity
86 	 * checks here.
87 	 *
88 	 * colTypes (in) - A vector of ColDataType defining the
89 	 * parameters of the UDA(n)F call. These can be used to decide
90 	 * to override the default return type. If desired, the new
91 	 * return type can be set by context->setReturnType() and
92 	 * decimal precision can be set in context->
93 	 * setResultDecimalCharacteristics.
94 	 *
95 	 * Return mcsv1_UDAF::ERROR on any error, such as non-compatible
96 	 * colTypes or wrong number of arguments. Else return
97 	 * mcsv1_UDAF::SUCCESS.
98 	 */
99     virtual ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes);
100 
101 	/**
102 	 * reset()
103 	 *
104 	 * Mandatory. Reset the UDA(n)F for a new group, partition or,
105 	 * in some cases, new Window Frame. Do not free any memory
106 	 * allocated by context->setUserDataSize(). The SDK Framework owns
107 	 * that memory and will handle that. Use this opportunity to
108 	 * reset any variables in context->getUserData() needed for the
109 	 * next aggregation. May be called multiple times if running in
110 	 * a ditributed fashion.
111 	 *
112 	 * Use this opportunity to initialize the userData.
113 	 */
114 	virtual ReturnCode reset(mcsv1Context* context);
115 
116     /**
117      * nextValue()
118      *
119      * Mandatory. Handle a single row.
120      *
121      * colsIn - A vector of data structure describing the input
122      * data.
123      *
124      * This function is called once for every row in the filtered
125      * result set (before aggregation). It is very important that
126      * this function is efficient.
127      *
128      * If the UDAF is running in a distributed fashion, nextValue
129      * cannot depend on order, as it will only be called for each
130      * row found on the specific PM.
131      *
132      * valsIn (in) - a vector of the parameters from the row.
133      */
134     virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn);
135 
136 	 /**
137 	  * subEvaluate()
138 	  *
139 	  * Mandatory -- Called if the UDAF is running in a distributed
140 	  * fashion. Columnstore tries to run all aggregate functions
141 	  * distributed, depending on context.
142 	  *
143 	  * Perform an aggregation on rows partially aggregated by
144 	  * nextValue. Columnstore calls nextValue for each row on a
145 	  * given PM for a group (GROUP BY). subEvaluate is called on the
146 	  * UM to consolodate those values into a single instance of
147 	  * userData. Keep your aggregated totals in context's userData.
148 	  * The first time this is called for a group, reset() would have
149 	  * been called with this version of userData.
150 	  *
151 	  * Called for every partial data set in each group in GROUP BY.
152 	  *
153 	  * When subEvaluate has been called for all subAggregated data
154 	  * sets, Evaluate will be called with the same context as here.
155 	  *
156 	  * valIn (In) - This is a pointer to a memory block of the size
157 	  * set in setUserDataSize. It will contain the value of userData
158 	  * as seen in the last call to NextValue for a given PM.
159 	  *
160 	  */
161 	 virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* userDataIn);
162 
163 	/**
164 	 * evaluate()
165 	 *
166 	 * Mandatory. Get the aggregated value.
167 	 *
168 	 * Called for every new group if UDAF GROUP BY, UDAnF partition
169 	 * or, in some cases, new Window Frame.
170 	 *
171 	 * Set the aggregated value into valOut. The datatype is assumed
172 	 * to be the same as that set in the init() function;
173 	 *
174 	 * If the UDAF is running in a distributed fashion, evaluate is
175 	 * called after a series of subEvaluate calls.
176 	 *
177 	 * valOut (out) - Set the aggregated value here. The datatype is
178 	 * assumed to be the same as that set in the init() function;
179 	 *
180 	 * To return a NULL value, don't assign to valOut.
181 	 */
182 	virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut);
183 
184     /**
185      * dropValue()
186      *
187      * Optional -- If defined, the server will call this instead of
188      * reset for UDAnF.
189      *
190      * Don't implement if a UDAnF has one or more of the following:
191      * The UDAnF can't be used with a Window Frame
192      * The UDAnF is not reversable in some way
193      * The UDAnF is not interested in optimal performance
194      *
195      * If not implemented, reset() followed by a series of
196      * nextValue() will be called for each movement of the Window
197      * Frame.
198      *
199      * If implemented, then each movement of the Window Frame will
200      * result in dropValue() being called for each row falling out
201      * of the Frame and nextValue() being called for each new row
202      * coming into the Frame.
203      *
204      * valsDropped (in) - a vector of the parameters from the row
205      * leaving the Frame
206      *
207      * dropValue() will not be called for unbounded/current row type
208      * frames, as those are already optimized.
209      */
210     virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped);
211 
212 protected:
213 
214 };
215 
216 };  // namespace
217 
218 #undef EXPORT
219 
220 #endif // HEADER_distinct_count.h
221 
222