1 /*
2    Copyright (c) 2016, 2020, MariaDB
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; version 2 of the License.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
16 
17 #ifndef ITEM_WINDOWFUNC_INCLUDED
18 #define ITEM_WINDOWFUNC_INCLUDED
19 
20 #include "item.h"
21 
22 class Window_spec;
23 
24 
25 int test_if_group_changed(List<Cached_item> &list);
26 
27 
28 /* A wrapper around test_if_group_changed */
29 class Group_bound_tracker
30 {
31 public:
32 
Group_bound_tracker(THD * thd,SQL_I_List<ORDER> * list)33   Group_bound_tracker(THD *thd, SQL_I_List<ORDER> *list)
34   {
35     for (ORDER *curr = list->first; curr; curr=curr->next)
36     {
37         Cached_item *tmp= new_Cached_item(thd, curr->item[0], TRUE);
38         group_fields.push_back(tmp);
39     }
40   }
41 
init()42   void init()
43   {
44     first_check= true;
45   }
46 
47   /*
48     Check if the current row is in a different group than the previous row
49     this function was called for.
50     XXX: Side-effect: The new row's group becomes the current row's group.
51 
52     Returns true if there is a change between the current_group and the cached
53     value, or if it is the first check after a call to init.
54   */
check_if_next_group()55   bool check_if_next_group()
56   {
57     if (test_if_group_changed(group_fields) > -1 || first_check)
58     {
59       first_check= false;
60       return true;
61     }
62     return false;
63   }
64 
65   /*
66     Check if the current row is in a different group than the previous row
67     check_if_next_group was called for.
68 
69     Compares the groups without the additional side effect of updating the
70     current cached values.
71   */
compare_with_cache()72   int compare_with_cache()
73   {
74     List_iterator<Cached_item> li(group_fields);
75     Cached_item *ptr;
76     int res;
77     while ((ptr= li++))
78     {
79       if ((res= ptr->cmp_read_only()))
80         return res;
81     }
82     return 0;
83   }
~Group_bound_tracker()84   ~Group_bound_tracker()
85   {
86     group_fields.delete_elements();
87   }
88 
89 private:
90   List<Cached_item> group_fields;
91   /*
92     During the first check_if_next_group, the list of cached_items is not
93     initialized. The compare function will return that the items match if
94     the field's value is the same as the Cached_item's default value (0).
95     This flag makes sure that we always return true during the first check.
96 
97     XXX This is better to be implemented within test_if_group_changed, but
98     since it is used in other parts of the codebase, we keep it here for now.
99   */
100    bool first_check;
101 };
102 
103 /*
104   ROW_NUMBER() OVER (...)
105 
106   @detail
107   - This is a Window function (not just an aggregate)
108   - It can be computed by doing one pass over select output, provided
109     the output is sorted according to the window definition.
110 */
111 
112 class Item_sum_row_number: public Item_sum_int
113 {
114   longlong count;
115 
116 public:
117 
Item_sum_row_number(THD * thd)118   Item_sum_row_number(THD *thd)
119     : Item_sum_int(thd),  count(0) {}
120 
clear()121   void clear()
122   {
123     count= 0;
124   }
125 
add()126   bool add()
127   {
128     count++;
129     return false;
130   }
131 
reset_field()132   void reset_field() { DBUG_ASSERT(0); }
update_field()133   void update_field() {}
134 
sum_func()135   enum Sumfunctype sum_func() const
136   {
137     return ROW_NUMBER_FUNC;
138   }
139 
val_int()140   longlong val_int()
141   {
142     return count;
143   }
func_name()144   const char*func_name() const
145   {
146     return "row_number";
147   }
148 
get_copy(THD * thd)149   Item *get_copy(THD *thd)
150   { return get_item_copy<Item_sum_row_number>(thd, this); }
151 };
152 
153 
154 /*
155   RANK() OVER (...) Windowing function
156 
157   @detail
158   - This is a Window function (not just an aggregate)
159   - It can be computed by doing one pass over select output, provided
160     the output is sorted according to the window definition.
161 
162   The function is defined as:
163 
164   "The rank of row R is defined as 1 (one) plus the number of rows that
165   precede R and are not peers of R"
166 
167   "This implies that if two or more rows are not distinct with respect to
168   the window ordering, then there will be one or more"
169 */
170 
171 class Item_sum_rank: public Item_sum_int
172 {
173 protected:
174   longlong row_number; // just ROW_NUMBER()
175   longlong cur_rank;   // current value
176 
177   Group_bound_tracker *peer_tracker;
178 public:
179 
Item_sum_rank(THD * thd)180   Item_sum_rank(THD *thd) : Item_sum_int(thd), peer_tracker(NULL) {}
181 
clear()182   void clear()
183   {
184     /* This is called on partition start */
185     cur_rank= 1;
186     row_number= 0;
187   }
188 
189   bool add();
190 
val_int()191   longlong val_int()
192   {
193     return cur_rank;
194   }
195 
reset_field()196   void reset_field() { DBUG_ASSERT(0); }
update_field()197   void update_field() {}
198 
sum_func()199   enum Sumfunctype sum_func () const
200   {
201     return RANK_FUNC;
202   }
203 
func_name()204   const char*func_name() const
205   {
206     return "rank";
207   }
208 
209   void setup_window_func(THD *thd, Window_spec *window_spec);
210 
cleanup()211   void cleanup()
212   {
213     if (peer_tracker)
214     {
215       delete peer_tracker;
216       peer_tracker= NULL;
217     }
218     Item_sum_int::cleanup();
219   }
get_copy(THD * thd)220   Item *get_copy(THD *thd)
221   { return get_item_copy<Item_sum_rank>(thd, this); }
222 };
223 
224 
225 /*
226   DENSE_RANK() OVER (...) Windowing function
227 
228   @detail
229   - This is a Window function (not just an aggregate)
230   - It can be computed by doing one pass over select output, provided
231     the output is sorted according to the window definition.
232 
233   The function is defined as:
234 
235   "If DENSE_RANK is specified, then the rank of row R is defined as the
236   number of rows preceding and including R that are distinct with respect
237   to the window ordering"
238 
239   "This implies that there are no gaps in the sequential rank numbering of
240   rows in each window partition."
241 */
242 
243 
244 class Item_sum_dense_rank: public Item_sum_int
245 {
246   longlong dense_rank;
247   bool first_add;
248   Group_bound_tracker *peer_tracker;
249  public:
250   /*
251      XXX(cvicentiu) This class could potentially be implemented in the rank
252      class, with a switch for the DENSE case.
253   */
clear()254   void clear()
255   {
256     dense_rank= 0;
257     first_add= true;
258   }
259   bool add();
reset_field()260   void reset_field() { DBUG_ASSERT(0); }
update_field()261   void update_field() {}
val_int()262   longlong val_int()
263   {
264     return dense_rank;
265   }
266 
Item_sum_dense_rank(THD * thd)267   Item_sum_dense_rank(THD *thd)
268     : Item_sum_int(thd), dense_rank(0), first_add(true), peer_tracker(NULL) {}
sum_func()269   enum Sumfunctype sum_func () const
270   {
271     return DENSE_RANK_FUNC;
272   }
273 
func_name()274   const char*func_name() const
275   {
276     return "dense_rank";
277   }
278 
279   void setup_window_func(THD *thd, Window_spec *window_spec);
280 
cleanup()281   void cleanup()
282   {
283     if (peer_tracker)
284     {
285       delete peer_tracker;
286       peer_tracker= NULL;
287     }
288     Item_sum_int::cleanup();
289   }
get_copy(THD * thd)290   Item *get_copy(THD *thd)
291   { return get_item_copy<Item_sum_dense_rank>(thd, this); }
292 };
293 
294 class Item_sum_hybrid_simple : public Item_sum_hybrid
295 {
296  public:
Item_sum_hybrid_simple(THD * thd,Item * arg)297   Item_sum_hybrid_simple(THD *thd, Item *arg):
298    Item_sum_hybrid(thd, arg),
299    value(NULL)
300   { }
301 
Item_sum_hybrid_simple(THD * thd,Item * arg1,Item * arg2)302   Item_sum_hybrid_simple(THD *thd, Item *arg1, Item *arg2):
303    Item_sum_hybrid(thd, arg1, arg2),
304    value(NULL)
305   { }
306 
307   bool add();
308   bool fix_fields(THD *, Item **);
309   bool fix_length_and_dec();
310   void setup_hybrid(THD *thd, Item *item);
311   double val_real();
312   longlong val_int();
313   my_decimal *val_decimal(my_decimal *);
314   void reset_field();
315   String *val_str(String *);
316   bool val_native(THD *thd, Native *to);
317   bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate);
type_handler()318   const Type_handler *type_handler() const
319   { return Type_handler_hybrid_field_type::type_handler(); }
320   void update_field();
321   Field *create_tmp_field(bool group, TABLE *table);
clear()322   void clear()
323   {
324     value->clear();
325     null_value= 1;
326   }
327 
328  private:
329   Item_cache *value;
330 };
331 
332 /*
333    This item will remember the first value added to it. It will not update
334    the value unless it is cleared.
335 */
336 class Item_sum_first_value : public Item_sum_hybrid_simple
337 {
338  public:
Item_sum_first_value(THD * thd,Item * arg_expr)339   Item_sum_first_value(THD* thd, Item* arg_expr) :
340     Item_sum_hybrid_simple(thd, arg_expr) {}
341 
342 
sum_func()343   enum Sumfunctype sum_func () const
344   {
345     return FIRST_VALUE_FUNC;
346   }
347 
func_name()348   const char*func_name() const
349   {
350     return "first_value";
351   }
352 
get_copy(THD * thd)353   Item *get_copy(THD *thd)
354   { return get_item_copy<Item_sum_first_value>(thd, this); }
355 };
356 
357 /*
358    This item will remember the last value added to it.
359 
360    This item does not support removal, and can be cleared only by calling
361    clear().
362 */
363 class Item_sum_last_value : public Item_sum_hybrid_simple
364 {
365  public:
Item_sum_last_value(THD * thd,Item * arg_expr)366   Item_sum_last_value(THD* thd, Item* arg_expr) :
367     Item_sum_hybrid_simple(thd, arg_expr) {}
368 
sum_func()369   enum Sumfunctype sum_func() const
370   {
371     return LAST_VALUE_FUNC;
372   }
373 
func_name()374   const char*func_name() const
375   {
376     return "last_value";
377   }
378 
get_copy(THD * thd)379   Item *get_copy(THD *thd)
380   { return get_item_copy<Item_sum_last_value>(thd, this); }
381 };
382 
383 class Item_sum_nth_value : public Item_sum_hybrid_simple
384 {
385  public:
Item_sum_nth_value(THD * thd,Item * arg_expr,Item * offset_expr)386   Item_sum_nth_value(THD *thd, Item *arg_expr, Item* offset_expr) :
387     Item_sum_hybrid_simple(thd, arg_expr, offset_expr) {}
388 
sum_func()389   enum Sumfunctype sum_func() const
390   {
391     return NTH_VALUE_FUNC;
392   }
393 
func_name()394   const char*func_name() const
395   {
396     return "nth_value";
397   }
398 
get_copy(THD * thd)399   Item *get_copy(THD *thd)
400   { return get_item_copy<Item_sum_nth_value>(thd, this); }
401 };
402 
403 class Item_sum_lead : public Item_sum_hybrid_simple
404 {
405  public:
Item_sum_lead(THD * thd,Item * arg_expr,Item * offset_expr)406   Item_sum_lead(THD *thd, Item *arg_expr, Item* offset_expr) :
407     Item_sum_hybrid_simple(thd, arg_expr, offset_expr) {}
408 
sum_func()409   enum Sumfunctype sum_func() const
410   {
411     return LEAD_FUNC;
412   }
413 
func_name()414   const char*func_name() const
415   {
416     return "lead";
417   }
418 
get_copy(THD * thd)419   Item *get_copy(THD *thd)
420   { return get_item_copy<Item_sum_lead>(thd, this); }
421 };
422 
423 class Item_sum_lag : public Item_sum_hybrid_simple
424 {
425  public:
Item_sum_lag(THD * thd,Item * arg_expr,Item * offset_expr)426   Item_sum_lag(THD *thd, Item *arg_expr, Item* offset_expr) :
427     Item_sum_hybrid_simple(thd, arg_expr, offset_expr) {}
428 
sum_func()429   enum Sumfunctype sum_func() const
430   {
431     return LAG_FUNC;
432   }
433 
func_name()434   const char*func_name() const
435   {
436     return "lag";
437   }
438 
get_copy(THD * thd)439   Item *get_copy(THD *thd)
440   { return get_item_copy<Item_sum_lag>(thd, this); }
441 };
442 
443 
444 class Partition_row_count
445 {
446 public:
Partition_row_count()447   Partition_row_count() :partition_row_count_(0) { }
set_partition_row_count(ulonglong count)448   void set_partition_row_count(ulonglong count)
449   {
450     partition_row_count_ = count;
451   }
calc_val_real(bool * null_value,ulonglong current_row_count)452   double calc_val_real(bool *null_value,
453                        ulonglong current_row_count)
454   {
455     if ((*null_value= (partition_row_count_ == 0)))
456       return 0;
457     return static_cast<double>(current_row_count) / partition_row_count_;
458   }
459 protected:
get_row_count()460   longlong get_row_count() { return partition_row_count_; }
461   ulonglong partition_row_count_;
462 };
463 
464 
465 class Current_row_count
466 {
467 public:
Current_row_count()468   Current_row_count() :current_row_count_(0) { }
469 protected:
get_row_number()470   ulonglong get_row_number() { return current_row_count_ ; }
471   ulonglong current_row_count_;
472 };
473 
474 
475 /*
476   @detail
477   "The relative rank of a row R is defined as (RK-1)/(NR-1), where RK is
478   defined to be the RANK of R and NR is defined to be the number of rows in
479   the window partition of R."
480 
481   Computation of this function requires two passes:
482   - First pass to find #rows in the partition
483     This is held within the row_count context.
484   - Second pass to compute rank of current row and the value of the function
485 */
486 class Item_sum_percent_rank: public Item_sum_double,
487                              public Partition_row_count
488 {
489  public:
Item_sum_percent_rank(THD * thd)490   Item_sum_percent_rank(THD *thd)
491     : Item_sum_double(thd), cur_rank(1), peer_tracker(NULL) {}
492 
val_int()493   longlong val_int()
494   {
495    /*
496       Percent rank is a real value so calling the integer value should never
497       happen. It makes no sense as it gets truncated to either 0 or 1.
498    */
499     DBUG_ASSERT(0);
500     return 0;
501   }
502 
val_real()503   double val_real()
504   {
505    /*
506      We can not get the real value without knowing the number of rows
507      in the partition. Don't divide by 0.
508    */
509    ulonglong partition_rows = get_row_count();
510    null_value= partition_rows > 0 ? false : true;
511 
512    return partition_rows > 1 ?
513              static_cast<double>(cur_rank - 1) / (partition_rows - 1) : 0;
514   }
515 
sum_func()516   enum Sumfunctype sum_func () const
517   {
518     return PERCENT_RANK_FUNC;
519   }
520 
func_name()521   const char*func_name() const
522   {
523     return "percent_rank";
524   }
525 
update_field()526   void update_field() {}
527 
clear()528   void clear()
529   {
530     cur_rank= 1;
531     row_number= 0;
532   }
533   bool add();
type_handler()534   const Type_handler *type_handler() const { return &type_handler_double; }
535 
fix_length_and_dec()536   bool fix_length_and_dec()
537   {
538     decimals = 10;  // TODO-cvicentiu find out how many decimals the standard
539                     // requires.
540     return FALSE;
541   }
542 
543   void setup_window_func(THD *thd, Window_spec *window_spec);
544 
reset_field()545   void reset_field() { DBUG_ASSERT(0); }
546 
set_partition_row_count(ulonglong count)547   void set_partition_row_count(ulonglong count)
548   {
549     Partition_row_count::set_partition_row_count(count);
550   }
551 
get_copy(THD * thd)552   Item *get_copy(THD *thd)
553   { return get_item_copy<Item_sum_percent_rank>(thd, this); }
554 
555  private:
556   longlong cur_rank;   // Current rank of the current row.
557   longlong row_number; // Value if this were ROW_NUMBER() function.
558 
559   Group_bound_tracker *peer_tracker;
560 
cleanup()561   void cleanup()
562   {
563     if (peer_tracker)
564     {
565       delete peer_tracker;
566       peer_tracker= NULL;
567     }
568     Item_sum_num::cleanup();
569   }
570 };
571 
572 
573 
574 
575 /*
576   @detail
577   "The relative rank of a row R is defined as NP/NR, where
578   - NP is defined to be the number of rows preceding or peer with R in the
579     window ordering of the window partition of R
580   - NR is defined to be the number of rows in the window partition of R.
581 
582   Just like with Item_sum_percent_rank, computation of this function requires
583   two passes.
584 */
585 
586 class Item_sum_cume_dist: public Item_sum_double,
587                           public Partition_row_count,
588                           public Current_row_count
589 {
590  public:
Item_sum_cume_dist(THD * thd)591   Item_sum_cume_dist(THD *thd) :Item_sum_double(thd) { }
Item_sum_cume_dist(THD * thd,Item * arg)592   Item_sum_cume_dist(THD *thd, Item *arg) :Item_sum_double(thd, arg) { }
593 
val_real()594   double val_real()
595   {
596     return calc_val_real(&null_value, current_row_count_);
597   }
598 
add()599   bool add()
600   {
601     current_row_count_++;
602     return false;
603   }
604 
sum_func()605   enum Sumfunctype sum_func() const
606   {
607     return CUME_DIST_FUNC;
608   }
609 
clear()610   void clear()
611   {
612     current_row_count_= 0;
613     partition_row_count_= 0;
614   }
615 
func_name()616   const char*func_name() const
617   {
618     return "cume_dist";
619   }
620 
update_field()621   void update_field() {}
type_handler()622   const Type_handler *type_handler() const { return &type_handler_double; }
623 
fix_length_and_dec()624   bool fix_length_and_dec()
625   {
626     decimals = 10;  // TODO-cvicentiu find out how many decimals the standard
627                     // requires.
628     return FALSE;
629   }
630 
reset_field()631   void reset_field() { DBUG_ASSERT(0); }
632 
set_partition_row_count(ulonglong count)633   void set_partition_row_count(ulonglong count)
634   {
635     Partition_row_count::set_partition_row_count(count);
636   }
637 
get_copy(THD * thd)638   Item *get_copy(THD *thd)
639   { return get_item_copy<Item_sum_cume_dist>(thd, this); }
640 
641 };
642 
643 class Item_sum_ntile : public Item_sum_int,
644                        public Partition_row_count,
645                        public Current_row_count
646 {
647  public:
Item_sum_ntile(THD * thd,Item * num_quantiles_expr)648   Item_sum_ntile(THD* thd, Item* num_quantiles_expr) :
649     Item_sum_int(thd, num_quantiles_expr), n_old_val_(0)
650   { }
651 
val_int()652   longlong val_int()
653   {
654     if (get_row_count() == 0)
655     {
656       null_value= true;
657       return 0;
658     }
659 
660     longlong num_quantiles= get_num_quantiles();
661 
662     if (num_quantiles <= 0 ||
663       (static_cast<ulonglong>(num_quantiles) != n_old_val_ && n_old_val_ > 0))
664     {
665       my_error(ER_INVALID_NTILE_ARGUMENT, MYF(0));
666       return true;
667     }
668     n_old_val_= static_cast<ulonglong>(num_quantiles);
669     null_value= false;
670     ulonglong quantile_size = get_row_count() / num_quantiles;
671     ulonglong extra_rows = get_row_count() - quantile_size * num_quantiles;
672 
673     if (current_row_count_ <= extra_rows * (quantile_size + 1))
674       return (current_row_count_ - 1) / (quantile_size + 1) + 1;
675 
676     return (current_row_count_ - 1 - extra_rows) / quantile_size + 1;
677   }
678 
add()679   bool add()
680   {
681     current_row_count_++;
682     return false;
683   }
684 
sum_func()685   enum Sumfunctype sum_func() const
686   {
687     return NTILE_FUNC;
688   }
689 
clear()690   void clear()
691   {
692     current_row_count_= 0;
693     partition_row_count_= 0;
694     n_old_val_= 0;
695   }
696 
func_name()697   const char*func_name() const
698   {
699     return "ntile";
700   }
701 
update_field()702   void update_field() {}
703 
reset_field()704   void reset_field() { DBUG_ASSERT(0); }
705 
set_partition_row_count(ulonglong count)706   void set_partition_row_count(ulonglong count)
707   {
708     Partition_row_count::set_partition_row_count(count);
709   }
710 
get_copy(THD * thd)711   Item *get_copy(THD *thd)
712   { return get_item_copy<Item_sum_ntile>(thd, this); }
713 
714  private:
get_num_quantiles()715   longlong get_num_quantiles() { return args[0]->val_int(); }
716   ulonglong n_old_val_;
717 };
718 
719 class Item_sum_percentile_disc : public Item_sum_num,
720                                  public Type_handler_hybrid_field_type,
721                                  public Partition_row_count,
722                                  public Current_row_count
723 {
724 public:
Item_sum_percentile_disc(THD * thd,Item * arg)725   Item_sum_percentile_disc(THD *thd, Item* arg) : Item_sum_num(thd, arg),
726                            Type_handler_hybrid_field_type(&type_handler_longlong),
727                            value(NULL), val_calculated(FALSE), first_call(TRUE),
728                            prev_value(0), order_item(NULL){}
729 
val_real()730   double val_real()
731   {
732     if (get_row_count() == 0 || get_arg(0)->is_null())
733     {
734       null_value= true;
735       return 0;
736     }
737     null_value= false;
738     return value->val_real();
739   }
740 
val_int()741   longlong val_int()
742   {
743     if (get_row_count() == 0 || get_arg(0)->is_null())
744     {
745       null_value= true;
746       return 0;
747     }
748     null_value= false;
749     return value->val_int();
750   }
751 
val_decimal(my_decimal * dec)752   my_decimal* val_decimal(my_decimal* dec)
753   {
754     if (get_row_count() == 0 || get_arg(0)->is_null())
755     {
756       null_value= true;
757       return 0;
758     }
759     null_value= false;
760     return value->val_decimal(dec);
761   }
762 
val_str(String * str)763   String* val_str(String *str)
764   {
765     if (get_row_count() == 0 || get_arg(0)->is_null())
766     {
767       null_value= true;
768       return 0;
769     }
770     null_value= false;
771     return value->val_str(str);
772   }
773 
get_date(THD * thd,MYSQL_TIME * ltime,date_mode_t fuzzydate)774   bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate)
775   {
776     if (get_row_count() == 0 || get_arg(0)->is_null())
777     {
778       null_value= true;
779       return 0;
780     }
781     null_value= false;
782     return value->get_date(thd, ltime, fuzzydate);
783   }
784 
add()785   bool add()
786   {
787     Item *arg= get_arg(0);
788     if (arg->is_null())
789       return false;
790 
791     if (first_call)
792     {
793       prev_value= arg->val_real();
794       if (prev_value > 1 || prev_value < 0)
795       {
796         my_error(ER_ARGUMENT_OUT_OF_RANGE, MYF(0), func_name());
797         return true;
798       }
799       first_call= false;
800     }
801 
802     double arg_val= arg->val_real();
803 
804     if (prev_value != arg_val)
805     {
806       my_error(ER_ARGUMENT_NOT_CONSTANT, MYF(0), func_name());
807       return true;
808     }
809 
810     if (val_calculated)
811       return false;
812 
813     value->store(order_item);
814     value->cache_value();
815     if (value->null_value)
816       return false;
817 
818     current_row_count_++;
819     double val= calc_val_real(&null_value, current_row_count_);
820 
821     if (val >= prev_value && !val_calculated)
822       val_calculated= true;
823     return false;
824   }
825 
sum_func()826   enum Sumfunctype sum_func() const
827   {
828     return PERCENTILE_DISC_FUNC;
829   }
830 
clear()831   void clear()
832   {
833     val_calculated= false;
834     first_call= true;
835     value->clear();
836     partition_row_count_= 0;
837     current_row_count_= 0;
838   }
839 
func_name()840   const char*func_name() const
841   {
842     return "percentile_disc";
843   }
844 
update_field()845   void update_field() {}
type_handler()846   const Type_handler *type_handler() const
847   {return Type_handler_hybrid_field_type::type_handler();}
848 
fix_length_and_dec()849   bool fix_length_and_dec()
850   {
851     decimals = 10;  // TODO-cvicentiu find out how many decimals the standard
852                     // requires.
853     return FALSE;
854   }
855 
reset_field()856   void reset_field() { DBUG_ASSERT(0); }
857 
set_partition_row_count(ulonglong count)858   void set_partition_row_count(ulonglong count)
859   {
860     Partition_row_count::set_partition_row_count(count);
861   }
862 
get_copy(THD * thd)863   Item *get_copy(THD *thd)
864   { return get_item_copy<Item_sum_percentile_disc>(thd, this); }
865   void setup_window_func(THD *thd, Window_spec *window_spec);
866   void setup_hybrid(THD *thd, Item *item);
867   bool fix_fields(THD *thd, Item **ref);
868 
869 private:
870   Item_cache *value;
871   bool val_calculated;
872   bool first_call;
873   double prev_value;
874   Item *order_item;
875 };
876 
877 class Item_sum_percentile_cont : public Item_sum_double,
878                                  public Partition_row_count,
879                                  public Current_row_count
880 {
881 public:
Item_sum_percentile_cont(THD * thd,Item * arg)882   Item_sum_percentile_cont(THD *thd, Item* arg) : Item_sum_double(thd, arg),
883                            floor_value(NULL), ceil_value(NULL), first_call(TRUE),prev_value(0),
884                            ceil_val_calculated(FALSE), floor_val_calculated(FALSE), order_item(NULL){}
885 
val_real()886   double val_real()
887   {
888     if (get_row_count() == 0 || get_arg(0)->is_null())
889     {
890       null_value= true;
891       return 0;
892     }
893     null_value= false;
894     double val= 1 + prev_value * (get_row_count()-1);
895 
896     /*
897       Applying the formula to get the value
898       If (CRN = FRN = RN) then the result is (value of expression from row at RN)
899       Otherwise the result is
900       (CRN - RN) * (value of expression for row at FRN) +
901       (RN - FRN) * (value of expression for row at CRN)
902     */
903 
904     if(ceil(val) == floor(val))
905       return floor_value->val_real();
906 
907     double ret_val=  ((val - floor(val)) * ceil_value->val_real()) +
908                   ((ceil(val) - val) * floor_value->val_real());
909 
910     return ret_val;
911   }
912 
add()913   bool add()
914   {
915     Item *arg= get_arg(0);
916     if (arg->is_null())
917       return false;
918 
919     if (first_call)
920     {
921       first_call= false;
922       prev_value= arg->val_real();
923       if (prev_value > 1 || prev_value < 0)
924       {
925         my_error(ER_ARGUMENT_OUT_OF_RANGE, MYF(0), func_name());
926         return true;
927       }
928     }
929 
930     double arg_val= arg->val_real();
931     if (prev_value != arg_val)
932     {
933       my_error(ER_ARGUMENT_NOT_CONSTANT, MYF(0), func_name());
934       return true;
935     }
936 
937     if (!floor_val_calculated)
938     {
939       floor_value->store(order_item);
940       floor_value->cache_value();
941       if (floor_value->null_value)
942         return false;
943     }
944     if (floor_val_calculated && !ceil_val_calculated)
945     {
946       ceil_value->store(order_item);
947       ceil_value->cache_value();
948       if (ceil_value->null_value)
949         return false;
950     }
951 
952     current_row_count_++;
953     double val= 1 + prev_value * (get_row_count()-1);
954 
955     if (!floor_val_calculated && get_row_number() == floor(val))
956       floor_val_calculated= true;
957 
958     if (!ceil_val_calculated && get_row_number() == ceil(val))
959       ceil_val_calculated= true;
960     return false;
961   }
962 
sum_func()963   enum Sumfunctype sum_func() const
964   {
965     return PERCENTILE_CONT_FUNC;
966   }
967 
clear()968   void clear()
969   {
970     first_call= true;
971     floor_value->clear();
972     ceil_value->clear();
973     floor_val_calculated= false;
974     ceil_val_calculated= false;
975     partition_row_count_= 0;
976     current_row_count_= 0;
977   }
978 
func_name()979   const char*func_name() const
980   {
981     return "percentile_cont";
982   }
update_field()983   void update_field() {}
984 
fix_length_and_dec()985   bool fix_length_and_dec()
986   {
987     decimals = 10;  // TODO-cvicentiu find out how many decimals the standard
988                     // requires.
989     return FALSE;
990   }
991 
reset_field()992   void reset_field() { DBUG_ASSERT(0); }
993 
set_partition_row_count(ulonglong count)994   void set_partition_row_count(ulonglong count)
995   {
996     Partition_row_count::set_partition_row_count(count);
997   }
998 
get_copy(THD * thd)999   Item *get_copy(THD *thd)
1000   { return get_item_copy<Item_sum_percentile_cont>(thd, this); }
1001   void setup_window_func(THD *thd, Window_spec *window_spec);
1002   void setup_hybrid(THD *thd, Item *item);
1003   bool fix_fields(THD *thd, Item **ref);
1004 
1005 private:
1006   Item_cache *floor_value;
1007   Item_cache *ceil_value;
1008   bool first_call;
1009   double prev_value;
1010   bool ceil_val_calculated;
1011   bool floor_val_calculated;
1012   Item *order_item;
1013 };
1014 
1015 
1016 
1017 
1018 class Item_window_func : public Item_func_or_sum
1019 {
1020   /* Window function parameters as we've got them from the parser */
1021 public:
1022   LEX_CSTRING *window_name;
1023 public:
1024   Window_spec *window_spec;
1025 
1026 public:
Item_window_func(THD * thd,Item_sum * win_func,LEX_CSTRING * win_name)1027   Item_window_func(THD *thd, Item_sum *win_func, LEX_CSTRING *win_name)
1028     : Item_func_or_sum(thd, (Item *) win_func),
1029       window_name(win_name), window_spec(NULL),
1030       force_return_blank(true),
1031       read_value_from_result_field(false) {}
1032 
Item_window_func(THD * thd,Item_sum * win_func,Window_spec * win_spec)1033   Item_window_func(THD *thd, Item_sum *win_func, Window_spec *win_spec)
1034     : Item_func_or_sum(thd, (Item *) win_func),
1035       window_name(NULL), window_spec(win_spec),
1036       force_return_blank(true),
1037       read_value_from_result_field(false) {}
1038 
window_func()1039   Item_sum *window_func() const { return (Item_sum *) args[0]; }
1040 
1041   void update_used_tables();
1042 
1043   /*
1044     This is used by filesort to mark the columns it needs to read (because they
1045     participate in the sort criteria and/or row retrieval. Window functions can
1046     only be used in sort criteria).
1047 
1048     Sorting by window function value is only done after the window functions
1049     have been computed. In that case, window function will need to read its
1050     temp.table field. In order to allow that, mark that field in the read_set.
1051   */
register_field_in_read_map(void * arg)1052   bool register_field_in_read_map(void *arg)
1053   {
1054     TABLE *table= (TABLE*) arg;
1055     if (result_field && (result_field->table == table || !table))
1056     {
1057       bitmap_set_bit(result_field->table->read_set, result_field->field_index);
1058     }
1059     return 0;
1060   }
1061 
is_frame_prohibited()1062   bool is_frame_prohibited() const
1063   {
1064     switch (window_func()->sum_func()) {
1065     case Item_sum::ROW_NUMBER_FUNC:
1066     case Item_sum::RANK_FUNC:
1067     case Item_sum::DENSE_RANK_FUNC:
1068     case Item_sum::PERCENT_RANK_FUNC:
1069     case Item_sum::CUME_DIST_FUNC:
1070     case Item_sum::NTILE_FUNC:
1071     case Item_sum::PERCENTILE_CONT_FUNC:
1072     case Item_sum::PERCENTILE_DISC_FUNC:
1073       return true;
1074     default:
1075       return false;
1076     }
1077   }
1078 
requires_special_cursors()1079   bool requires_special_cursors() const
1080   {
1081     switch (window_func()->sum_func()) {
1082     case Item_sum::FIRST_VALUE_FUNC:
1083     case Item_sum::LAST_VALUE_FUNC:
1084     case Item_sum::NTH_VALUE_FUNC:
1085     case Item_sum::LAG_FUNC:
1086     case Item_sum::LEAD_FUNC:
1087       return true;
1088     default:
1089       return false;
1090     }
1091   }
1092 
requires_partition_size()1093   bool requires_partition_size() const
1094   {
1095     switch (window_func()->sum_func()) {
1096     case Item_sum::PERCENT_RANK_FUNC:
1097     case Item_sum::CUME_DIST_FUNC:
1098     case Item_sum::NTILE_FUNC:
1099     case Item_sum::PERCENTILE_CONT_FUNC:
1100     case Item_sum::PERCENTILE_DISC_FUNC:
1101       return true;
1102     default:
1103       return false;
1104     }
1105   }
1106 
requires_peer_size()1107   bool requires_peer_size() const
1108   {
1109     switch (window_func()->sum_func()) {
1110     case Item_sum::CUME_DIST_FUNC:
1111       return true;
1112     default:
1113       return false;
1114     }
1115   }
1116 
is_order_list_mandatory()1117   bool is_order_list_mandatory() const
1118   {
1119     switch (window_func()->sum_func()) {
1120     case Item_sum::RANK_FUNC:
1121     case Item_sum::DENSE_RANK_FUNC:
1122     case Item_sum::PERCENT_RANK_FUNC:
1123     case Item_sum::CUME_DIST_FUNC:
1124     case Item_sum::LAG_FUNC:
1125     case Item_sum::LEAD_FUNC:
1126     case Item_sum::PERCENTILE_CONT_FUNC:
1127     case Item_sum::PERCENTILE_DISC_FUNC:
1128       return true;
1129     default:
1130       return false;
1131     }
1132   }
1133 
only_single_element_order_list()1134   bool only_single_element_order_list() const
1135   {
1136     switch (window_func()->sum_func()){
1137     case Item_sum::PERCENTILE_CONT_FUNC:
1138     case Item_sum::PERCENTILE_DISC_FUNC:
1139       return true;
1140     default:
1141       return false;
1142     }
1143   }
1144 
1145   bool check_result_type_of_order_item();
1146 
1147 
1148 
1149   /*
1150     Computation functions.
1151     TODO: consoder merging these with class Group_bound_tracker.
1152   */
1153   void setup_partition_border_check(THD *thd);
1154 
type_handler()1155   const Type_handler *type_handler() const
1156   {
1157     return ((Item_sum *) args[0])->type_handler();
1158   }
type()1159   enum Item::Type type() const { return Item::WINDOW_FUNC_ITEM; }
1160 
1161 private:
1162   /*
1163     Window functions are very special functions, so val_() methods have
1164     special meaning for them:
1165 
1166     - Phase#1, "Initial" we run the join and put its result into temporary
1167       table. For window functions, we write the default value (NULL?) as
1168       a placeholder.
1169 
1170     - Phase#2: "Computation": executor does the scan in {PARTITION, ORDER BY}
1171       order of this window function. It calls appropriate methods to inform
1172       the window function about rows entering/leaving the window.
1173       It calls window_func()->val_int() so that current window function value
1174       can be saved and stored in the temp.table.
1175 
1176     - Phase#3: "Retrieval" the temporary table is read and passed to query
1177       output. However, Item_window_func still remains in the select list,
1178       so item_windowfunc->val_int() will be called.
1179       During Phase#3, read_value_from_result_field= true.
1180   */
1181   bool force_return_blank;
1182   bool read_value_from_result_field;
1183   void print_for_percentile_functions(String *str, enum_query_type query_type);
1184 
1185 public:
set_phase_to_initial()1186   void set_phase_to_initial()
1187   {
1188     force_return_blank= true;
1189     read_value_from_result_field= false;
1190   }
set_phase_to_computation()1191   void set_phase_to_computation()
1192   {
1193     force_return_blank= false;
1194     read_value_from_result_field= false;
1195   }
set_phase_to_retrieval()1196   void set_phase_to_retrieval()
1197   {
1198     force_return_blank= false;
1199     read_value_from_result_field= true;
1200   }
1201 
is_null()1202   bool is_null()
1203   {
1204     if (force_return_blank)
1205       return true;
1206 
1207     if (read_value_from_result_field)
1208       return result_field->is_null();
1209 
1210     return window_func()->is_null();
1211   }
1212 
val_real()1213   double val_real()
1214   {
1215     double res;
1216     if (force_return_blank)
1217     {
1218       res= 0.0;
1219       null_value= true;
1220     }
1221     else if (read_value_from_result_field)
1222     {
1223       res= result_field->val_real();
1224       null_value= result_field->is_null();
1225     }
1226     else
1227     {
1228       res= window_func()->val_real();
1229       null_value= window_func()->null_value;
1230     }
1231     return res;
1232   }
1233 
val_int()1234   longlong val_int()
1235   {
1236     longlong res;
1237     if (force_return_blank)
1238     {
1239       res= 0;
1240       null_value= true;
1241     }
1242     else if (read_value_from_result_field)
1243     {
1244       res= result_field->val_int();
1245       null_value= result_field->is_null();
1246     }
1247     else
1248     {
1249       res= window_func()->val_int();
1250       null_value= window_func()->null_value;
1251     }
1252     return res;
1253   }
1254 
val_str(String * str)1255   String* val_str(String* str)
1256   {
1257     String *res;
1258     if (force_return_blank)
1259     {
1260       null_value= true;
1261       res= NULL;
1262     }
1263     else if (read_value_from_result_field)
1264     {
1265       if ((null_value= result_field->is_null()))
1266         res= NULL;
1267       else
1268         res= result_field->val_str(str);
1269     }
1270     else
1271     {
1272       res= window_func()->val_str(str);
1273       null_value= window_func()->null_value;
1274     }
1275     return res;
1276   }
1277 
val_native(THD * thd,Native * to)1278   bool val_native(THD *thd, Native *to)
1279   {
1280     if (force_return_blank)
1281       return null_value= true;
1282     if (read_value_from_result_field)
1283       return val_native_from_field(result_field, to);
1284     return val_native_from_item(thd, window_func(), to);
1285   }
1286 
val_decimal(my_decimal * dec)1287   my_decimal* val_decimal(my_decimal* dec)
1288   {
1289     my_decimal *res;
1290     if (force_return_blank)
1291     {
1292       null_value= true;
1293       res= NULL;
1294     }
1295     else if (read_value_from_result_field)
1296     {
1297       if ((null_value= result_field->is_null()))
1298         res= NULL;
1299       else
1300         res= result_field->val_decimal(dec);
1301     }
1302     else
1303     {
1304       res= window_func()->val_decimal(dec);
1305       null_value= window_func()->null_value;
1306     }
1307     return res;
1308   }
1309 
get_date(THD * thd,MYSQL_TIME * ltime,date_mode_t fuzzydate)1310   bool get_date(THD *thd, MYSQL_TIME *ltime, date_mode_t fuzzydate)
1311   {
1312     bool res;
1313     if (force_return_blank)
1314     {
1315       null_value= true;
1316       res= true;
1317     }
1318     else if (read_value_from_result_field)
1319     {
1320       if ((null_value= result_field->is_null()))
1321         res= true;
1322       else
1323         res= result_field->get_date(ltime, fuzzydate);
1324     }
1325     else
1326     {
1327       res= window_func()->get_date(thd, ltime, fuzzydate);
1328       null_value= window_func()->null_value;
1329     }
1330     return res;
1331   }
1332 
1333   void split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array,
1334                               List<Item> &fields, uint flags);
1335 
fix_length_and_dec()1336   bool fix_length_and_dec()
1337   {
1338     Type_std_attributes::set(window_func());
1339     return FALSE;
1340   }
1341 
func_name()1342   const char* func_name() const { return "WF"; }
1343 
1344   bool fix_fields(THD *thd, Item **ref);
1345 
1346   bool resolve_window_name(THD *thd);
1347 
1348   void print(String *str, enum_query_type query_type);
1349 
get_copy(THD * thd)1350  Item *get_copy(THD *thd) { return 0; }
1351 
1352 };
1353 
1354 #endif /* ITEM_WINDOWFUNC_INCLUDED */
1355