1 /* Copyright (c) 2016 MariaDB corporation
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License as published by
5    the Free Software Foundation; version 2 of the License.
6 
7    This program is distributed in the hope that it will be useful,
8    but WITHOUT ANY WARRANTY; without even the implied warranty of
9    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10    GNU General Public License for more details.
11 
12    You should have received a copy of the GNU General Public License
13    along with this program; if not, write to the Free Software
14    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */
15 
16 #ifndef UNIQUE_INCLUDED
17 #define UNIQUE_INCLUDED
18 
19 #include "filesort.h"
20 
21 /*
22    Unique -- class for unique (removing of duplicates).
23    Puts all values to the TREE. If the tree becomes too big,
24    it's dumped to the file. User can request sorted values, or
25    just iterate through them. In the last case tree merging is performed in
26    memory simultaneously with iteration, so it should be ~2-3x faster.
27  */
28 
29 class Unique :public Sql_alloc
30 {
31   DYNAMIC_ARRAY file_ptrs;
32   ulong max_elements;   /* Total number of elements that will be stored in-memory */
33   size_t max_in_memory_size;
34   IO_CACHE file;
35   TREE tree;
36  /* Number of elements filtered out due to min_dupl_count when storing results
37     to table. See Unique::get */
38   ulong filtered_out_elems;
39   uint size;
40 
41   uint full_size;   /* Size of element + space needed to store the number of
42                        duplicates found for the element. */
43   uint min_dupl_count;   /* Minimum number of occurences of element required for
44                             it to be written to record_pointers.
45                             always 0 for unions, > 0 for intersections */
46   bool with_counters;
47 
48   bool merge(TABLE *table, uchar *buff, bool without_last_merge);
49   bool flush();
50 
51 public:
52   ulong elements;
53   SORT_INFO sort;
54   Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg,
55 	 uint size_arg, size_t max_in_memory_size_arg,
56          uint min_dupl_count_arg= 0);
57   ~Unique();
elements_in_tree()58   ulong elements_in_tree() { return tree.elements_in_tree; }
unique_add(void * ptr)59   inline bool unique_add(void *ptr)
60   {
61     DBUG_ENTER("unique_add");
62     DBUG_PRINT("info", ("tree %u - %lu", tree.elements_in_tree, max_elements));
63     if (!(tree.flag & TREE_ONLY_DUPS) &&
64         tree.elements_in_tree >= max_elements && flush())
65       DBUG_RETURN(1);
66     DBUG_RETURN(!tree_insert(&tree, ptr, 0, tree.custom_arg));
67   }
68 
is_in_memory()69   bool is_in_memory() { return (my_b_tell(&file) == 0); }
close_for_expansion()70   void close_for_expansion() { tree.flag= TREE_ONLY_DUPS; }
71 
72   bool get(TABLE *table);
73 
74   /* Cost of searching for an element in the tree */
get_search_cost(ulonglong tree_elems,uint compare_factor)75   inline static double get_search_cost(ulonglong tree_elems, uint compare_factor)
76   {
77     return log((double) tree_elems) / (compare_factor * M_LN2);
78   }
79 
80   static double get_use_cost(uint *buffer, size_t nkeys, uint key_size,
81                              size_t max_in_memory_size, uint compare_factor,
82                              bool intersect_fl, bool *in_memory);
get_cost_calc_buff_size(size_t nkeys,uint key_size,size_t max_in_memory_size)83   inline static int get_cost_calc_buff_size(size_t nkeys, uint key_size,
84                                             size_t max_in_memory_size)
85   {
86     size_t max_elems_in_tree=
87       max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size);
88 
89     if (max_elems_in_tree == 0)
90       max_elems_in_tree= 1;
91     return (int) (sizeof(uint)*(1 + nkeys/max_elems_in_tree));
92   }
93 
94   void reset();
95   bool walk(TABLE *table, tree_walk_action action, void *walk_action_arg);
96 
get_size()97   uint get_size() const { return size; }
get_max_in_memory_size()98   size_t get_max_in_memory_size() const { return max_in_memory_size; }
99 
100   friend int unique_write_to_file(uchar* key, element_count count, Unique *unique);
101   friend int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique);
102 
103   friend int unique_write_to_file_with_count(uchar* key, element_count count,
104                                              Unique *unique);
105   friend int unique_intersect_write_to_ptrs(uchar* key, element_count count,
106 				            Unique *unique);
107 };
108 
109 #endif /* UNIQUE_INCLUDED */
110