1 /* Copyright (C) 2017 MariaDB Corporation 2 3 This program is free software; you can redistribute it and/or 4 modify it under the terms of the GNU General Public License 5 as published by the Free Software Foundation; version 2 of 6 the License. 7 8 This program is distributed in the hope that it will be useful, 9 but WITHOUT ANY WARRANTY; without even the implied warranty of 10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 GNU General Public License for more details. 12 13 You should have received a copy of the GNU General Public License 14 along with this program; if not, write to the Free Software 15 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 16 MA 02110-1301, USA. */ 17 18 /*********************************************************************** 19 * $Id$ 20 * 21 * mcsv1_UDAF.h 22 ***********************************************************************/ 23 24 /** 25 * Columnstore interface for writing a User Defined Aggregate 26 * Functions (UDAF) and User Defined Analytic Functions (UDAnF) 27 * or a function that can act as either - UDA(n)F 28 * 29 * The basic steps are: 30 * 31 * 1. Create a the UDA(n)F function interface in some .h file. 32 * 2. Create the UDF function implementation in some .cpp file 33 * 3. Create the connector stub (MariaDB UDAF definition) for 34 * this UDF function. 35 * 4. build the dynamic library using all of the source. 36 * 5 Put the library in $COLUMNSTORE_INSTALL/lib of 37 * all modules 38 * 6. restart the Columnstore system. 39 * 7. notify mysqld about the new functions with commands like: 40 * 41 * CREATE AGGREGATE FUNCTION distinct_count returns INT 42 * soname 'libudf_mysql.so'; 43 * 44 */ 45 #ifndef HEADER_distinct_count 46 #define HEADER_distinct_count 47 48 #include <cstdlib> 49 #include <string> 50 #include <boost/any.hpp> 51 52 #include "mcsv1_udaf.h" 53 #include "calpontsystemcatalog.h" 54 #include "windowfunctioncolumn.h" 55 56 #if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) 57 #define EXPORT __declspec(dllexport) 58 #else 59 #define EXPORT 60 #endif 61 62 namespace mcsv1sdk 63 { 64 65 // Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or 66 // User Defined Analytic Function (UDAnF). 67 // These will be singleton classes, so don't put any instance 68 // specific data in here. All instance data is stored in mcsv1Context 69 // passed to each user function and retrieved by the getUserData() method. 70 // 71 // Each API function returns a ReturnCode. If ERROR is returned at any time, 72 // the query is aborted, getInterrupted() will begin to return true and the 73 // message set in config->setErrorMessage() is returned to MariaDB. 74 class distinct_count : public mcsv1_UDAF 75 { 76 public: 77 // Defaults OK distinct_count()78 distinct_count() : mcsv1_UDAF(){}; ~distinct_count()79 virtual ~distinct_count(){}; 80 81 /** 82 * init() 83 * 84 * Mandatory. Implement this to initialize flags and instance 85 * data. Called once per SQL statement. You can do any sanity 86 * checks here. 87 * 88 * colTypes (in) - A vector of ColDataType defining the 89 * parameters of the UDA(n)F call. These can be used to decide 90 * to override the default return type. If desired, the new 91 * return type can be set by context->setReturnType() and 92 * decimal precision can be set in context-> 93 * setResultDecimalCharacteristics. 94 * 95 * Return mcsv1_UDAF::ERROR on any error, such as non-compatible 96 * colTypes or wrong number of arguments. Else return 97 * mcsv1_UDAF::SUCCESS. 98 */ 99 virtual ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes); 100 101 /** 102 * reset() 103 * 104 * Mandatory. Reset the UDA(n)F for a new group, partition or, 105 * in some cases, new Window Frame. Do not free any memory 106 * allocated by context->setUserDataSize(). The SDK Framework owns 107 * that memory and will handle that. Use this opportunity to 108 * reset any variables in context->getUserData() needed for the 109 * next aggregation. May be called multiple times if running in 110 * a ditributed fashion. 111 * 112 * Use this opportunity to initialize the userData. 113 */ 114 virtual ReturnCode reset(mcsv1Context* context); 115 116 /** 117 * nextValue() 118 * 119 * Mandatory. Handle a single row. 120 * 121 * colsIn - A vector of data structure describing the input 122 * data. 123 * 124 * This function is called once for every row in the filtered 125 * result set (before aggregation). It is very important that 126 * this function is efficient. 127 * 128 * If the UDAF is running in a distributed fashion, nextValue 129 * cannot depend on order, as it will only be called for each 130 * row found on the specific PM. 131 * 132 * valsIn (in) - a vector of the parameters from the row. 133 */ 134 virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); 135 136 /** 137 * subEvaluate() 138 * 139 * Mandatory -- Called if the UDAF is running in a distributed 140 * fashion. Columnstore tries to run all aggregate functions 141 * distributed, depending on context. 142 * 143 * Perform an aggregation on rows partially aggregated by 144 * nextValue. Columnstore calls nextValue for each row on a 145 * given PM for a group (GROUP BY). subEvaluate is called on the 146 * UM to consolodate those values into a single instance of 147 * userData. Keep your aggregated totals in context's userData. 148 * The first time this is called for a group, reset() would have 149 * been called with this version of userData. 150 * 151 * Called for every partial data set in each group in GROUP BY. 152 * 153 * When subEvaluate has been called for all subAggregated data 154 * sets, Evaluate will be called with the same context as here. 155 * 156 * valIn (In) - This is a pointer to a memory block of the size 157 * set in setUserDataSize. It will contain the value of userData 158 * as seen in the last call to NextValue for a given PM. 159 * 160 */ 161 virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* userDataIn); 162 163 /** 164 * evaluate() 165 * 166 * Mandatory. Get the aggregated value. 167 * 168 * Called for every new group if UDAF GROUP BY, UDAnF partition 169 * or, in some cases, new Window Frame. 170 * 171 * Set the aggregated value into valOut. The datatype is assumed 172 * to be the same as that set in the init() function; 173 * 174 * If the UDAF is running in a distributed fashion, evaluate is 175 * called after a series of subEvaluate calls. 176 * 177 * valOut (out) - Set the aggregated value here. The datatype is 178 * assumed to be the same as that set in the init() function; 179 * 180 * To return a NULL value, don't assign to valOut. 181 */ 182 virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); 183 184 /** 185 * dropValue() 186 * 187 * Optional -- If defined, the server will call this instead of 188 * reset for UDAnF. 189 * 190 * Don't implement if a UDAnF has one or more of the following: 191 * The UDAnF can't be used with a Window Frame 192 * The UDAnF is not reversable in some way 193 * The UDAnF is not interested in optimal performance 194 * 195 * If not implemented, reset() followed by a series of 196 * nextValue() will be called for each movement of the Window 197 * Frame. 198 * 199 * If implemented, then each movement of the Window Frame will 200 * result in dropValue() being called for each row falling out 201 * of the Frame and nextValue() being called for each new row 202 * coming into the Frame. 203 * 204 * valsDropped (in) - a vector of the parameters from the row 205 * leaving the Frame 206 * 207 * dropValue() will not be called for unbounded/current row type 208 * frames, as those are already optimized. 209 */ 210 virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); 211 212 protected: 213 214 }; 215 216 }; // namespace 217 218 #undef EXPORT 219 220 #endif // HEADER_distinct_count.h 221 222