1 /* Copyright (c) 2001, 2010, Oracle and/or its affiliates.
2    Copyright (c) 2010, 2015, MariaDB
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; version 2 of the License.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
16 
17 /*
18   Function to handle quick removal of duplicates
19   This code is used when doing multi-table deletes to find the rows in
20   reference tables that needs to be deleted.
21 
22   The basic idea is as follows:
23 
24   Store first all strings in a binary tree, ignoring duplicates.
25   When the tree uses more memory than 'max_heap_table_size',
26   write the tree (in sorted order) out to disk and start with a new tree.
27   When all data has been generated, merge the trees (removing any found
28   duplicates).
29 
30   The unique entries will be returned in sort order, to ensure that we do the
31   deletes in disk order.
32 */
33 
34 #include "mariadb.h"
35 #include "sql_priv.h"
36 #include "unireg.h"
37 #include "sql_sort.h"
38 #include "queues.h"                             // QUEUE
39 #include "my_tree.h"                            // element_count
40 #include "uniques.h"	                        // Unique
41 #include "sql_sort.h"
42 #include "myisamchk.h"                          // BUFFPEK
43 
44 int unique_write_to_file(uchar* key, element_count count, Unique *unique)
45 {
46   /*
47     Use unique->size (size of element stored in the tree) and not
48     unique->tree.size_of_element. The latter is different from unique->size
49     when tree implementation chooses to store pointer to key in TREE_ELEMENT
50     (instead of storing the element itself there)
51   */
52   return my_b_write(&unique->file, key, unique->size) ? 1 : 0;
53 }
54 
55 int unique_write_to_file_with_count(uchar* key, element_count count, Unique *unique)
56 {
57   return my_b_write(&unique->file, key, unique->size) ||
58          my_b_write(&unique->file, (uchar*)&count, sizeof(element_count)) ? 1 : 0;
59 }
60 
61 int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique)
62 {
63   memcpy(unique->sort.record_pointers, key, unique->size);
64   unique->sort.record_pointers+=unique->size;
65   return 0;
66 }
67 
68 int unique_intersect_write_to_ptrs(uchar* key, element_count count, Unique *unique)
69 {
70   if (count >= unique->min_dupl_count)
71   {
72     memcpy(unique->sort.record_pointers, key, unique->size);
73     unique->sort.record_pointers+=unique->size;
74   }
75   else
76     unique->filtered_out_elems++;
77   return 0;
78 }
79 
80 
81 Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
82 	       uint size_arg, size_t max_in_memory_size_arg,
83                uint min_dupl_count_arg)
84   :max_in_memory_size(max_in_memory_size_arg),
85    size(size_arg),
86    elements(0)
87 {
88   my_b_clear(&file);
89   min_dupl_count= min_dupl_count_arg;
90   full_size= size;
91   if (min_dupl_count_arg)
92     full_size+= sizeof(element_count);
93   with_counters= MY_TEST(min_dupl_count_arg);
94   init_tree(&tree, (max_in_memory_size / 16), 0, size, comp_func,
95             NULL, comp_func_fixed_arg, MYF(MY_THREAD_SPECIFIC));
96   /* If the following fail's the next add will also fail */
97   my_init_dynamic_array(&file_ptrs, sizeof(BUFFPEK), 16, 16,
98                         MYF(MY_THREAD_SPECIFIC));
99   /*
100     If you change the following, change it in get_max_elements function, too.
101   */
102   max_elements= (ulong) (max_in_memory_size /
103                          ALIGN_SIZE(sizeof(TREE_ELEMENT)+size));
104   if (!max_elements)
105     max_elements= 1;
106 
107   (void) open_cached_file(&file, mysql_tmpdir,TEMP_PREFIX, DISK_BUFFER_SIZE,
108                           MYF(MY_WME));
109 }
110 
111 
112 /*
113   Calculate log2(n!)
114 
115   NOTES
116     Stirling's approximate formula is used:
117 
118       n! ~= sqrt(2*M_PI*n) * (n/M_E)^n
119 
120     Derivation of formula used for calculations is as follows:
121 
122     log2(n!) = log(n!)/log(2) = log(sqrt(2*M_PI*n)*(n/M_E)^n) / log(2) =
123 
124       = (log(2*M_PI*n)/2 + n*log(n/M_E)) / log(2).
125 */
126 
127 inline double log2_n_fact(double x)
128 {
129   return (log(2*M_PI*x)/2 + x*log(x/M_E)) / M_LN2;
130 }
131 
132 
133 /*
134   Calculate cost of merge_buffers function call for given sequence of
135   input stream lengths and store the number of rows in result stream in *last.
136 
137   SYNOPSIS
138     get_merge_buffers_cost()
139       buff_elems  Array of #s of elements in buffers
140       elem_size   Size of element stored in buffer
141       first       Pointer to first merged element size
142       last        Pointer to last merged element size
143 
144   RETURN
145     Cost of merge_buffers operation in disk seeks.
146 
147   NOTES
148     It is assumed that no rows are eliminated during merge.
149     The cost is calculated as
150 
151       cost(read_and_write) + cost(merge_comparisons).
152 
153     All bytes in the sequences is read and written back during merge so cost
154     of disk io is 2*elem_size*total_buf_elems/IO_SIZE (2 is for read + write)
155 
156     For comparisons cost calculations we assume that all merged sequences have
157     the same length, so each of total_buf_size elements will be added to a sort
158     heap with (n_buffers-1) elements. This gives the comparison cost:
159 
160       total_buf_elems* log2(n_buffers) / TIME_FOR_COMPARE_ROWID;
161 */
162 
163 static double get_merge_buffers_cost(uint *buff_elems, uint elem_size,
164                                      uint *first, uint *last,
165                                      uint compare_factor)
166 {
167   uint total_buf_elems= 0;
168   for (uint *pbuf= first; pbuf <= last; pbuf++)
169     total_buf_elems+= *pbuf;
170   *last= total_buf_elems;
171 
172   size_t n_buffers= last - first + 1;
173 
174   /* Using log2(n)=log(n)/log(2) formula */
175   return 2*((double)total_buf_elems*elem_size) / IO_SIZE +
176      total_buf_elems*log((double) n_buffers) / (compare_factor * M_LN2);
177 }
178 
179 
180 /*
181   Calculate cost of merging buffers into one in Unique::get, i.e. calculate
182   how long (in terms of disk seeks) the two calls
183     merge_many_buffs(...);
184     merge_buffers(...);
185   will take.
186 
187   SYNOPSIS
188     get_merge_many_buffs_cost()
189       buffer        buffer space for temporary data, at least
190                     Unique::get_cost_calc_buff_size bytes
191       maxbuffer     # of full buffers
192       max_n_elems   # of elements in first maxbuffer buffers
193       last_n_elems  # of elements in last buffer
194       elem_size     size of buffer element
195 
196   NOTES
197     maxbuffer+1 buffers are merged, where first maxbuffer buffers contain
198     max_n_elems elements each and last buffer contains last_n_elems elements.
199 
200     The current implementation does a dumb simulation of merge_many_buffs
201     function actions.
202 
203   RETURN
204     Cost of merge in disk seeks.
205 */
206 
207 static double get_merge_many_buffs_cost(uint *buffer,
208                                         uint maxbuffer, uint max_n_elems,
209                                         uint last_n_elems, int elem_size,
210                                         uint compare_factor)
211 {
212   int i;
213   double total_cost= 0.0;
214   uint *buff_elems= buffer; /* #s of elements in each of merged sequences */
215 
216   /*
217     Set initial state: first maxbuffer sequences contain max_n_elems elements
218     each, last sequence contains last_n_elems elements.
219   */
220   for (i = 0; i < (int)maxbuffer; i++)
221     buff_elems[i]= max_n_elems;
222   buff_elems[maxbuffer]= last_n_elems;
223 
224   /*
225     Do it exactly as merge_many_buff function does, calling
226     get_merge_buffers_cost to get cost of merge_buffers.
227   */
228   if (maxbuffer >= MERGEBUFF2)
229   {
230     while (maxbuffer >= MERGEBUFF2)
231     {
232       uint lastbuff= 0;
233       for (i = 0; i <= (int) maxbuffer - MERGEBUFF*3/2; i += MERGEBUFF)
234       {
235         total_cost+=get_merge_buffers_cost(buff_elems, elem_size,
236                                            buff_elems + i,
237                                            buff_elems + i + MERGEBUFF-1,
238                                            compare_factor);
239 	lastbuff++;
240       }
241       total_cost+=get_merge_buffers_cost(buff_elems, elem_size,
242                                          buff_elems + i,
243                                          buff_elems + maxbuffer,
244                                          compare_factor);
245       maxbuffer= lastbuff;
246     }
247   }
248 
249   /* Simulate final merge_buff call. */
250   total_cost += get_merge_buffers_cost(buff_elems, elem_size,
251                                        buff_elems, buff_elems + maxbuffer,
252                                        compare_factor);
253   return total_cost;
254 }
255 
256 
257 /*
258   Calculate cost of using Unique for processing nkeys elements of size
259   key_size using max_in_memory_size memory.
260 
261   SYNOPSIS
262     Unique::get_use_cost()
263       buffer    space for temporary data, use Unique::get_cost_calc_buff_size
264                 to get # bytes needed.
265       nkeys     #of elements in Unique
266       key_size  size of each elements in bytes
267       max_in_memory_size   amount of memory Unique will be allowed to use
268       compare_factor   used to calculate cost of one comparison
269       write_fl  if the result must be saved written to disk
270       in_memory_elems  OUT estimate of the number of elements in memory
271                            if disk is not used
272 
273   RETURN
274     Cost in disk seeks.
275 
276   NOTES
277     cost(using_unqiue) =
278       cost(create_trees) +  (see #1)
279       cost(merge) +         (see #2)
280       cost(read_result)     (see #3)
281 
282     1. Cost of trees creation
283       For each Unique::put operation there will be 2*log2(n+1) elements
284       comparisons, where n runs from 1 tree_size (we assume that all added
285       elements are different). Together this gives:
286 
287       n_compares = 2*(log2(2) + log2(3) + ... + log2(N+1)) = 2*log2((N+1)!)
288 
289       then cost(tree_creation) = n_compares*ROWID_COMPARE_COST;
290 
291       Total cost of creating trees:
292       (n_trees - 1)*max_size_tree_cost + non_max_size_tree_cost.
293 
294       Approximate value of log2(N!) is calculated by log2_n_fact function.
295 
296     2. Cost of merging.
297       If only one tree is created by Unique no merging will be necessary.
298       Otherwise, we model execution of merge_many_buff function and count
299       #of merges. (The reason behind this is that number of buffers is small,
300       while size of buffers is big and we don't want to loose precision with
301       O(x)-style formula)
302 
303     3. If only one tree is created by Unique no disk io will happen.
304       Otherwise, ceil(key_len*n_keys) disk seeks are necessary. We assume
305       these will be random seeks.
306 */
307 
308 double Unique::get_use_cost(uint *buffer, size_t nkeys, uint key_size,
309                             size_t max_in_memory_size,
310                             uint compare_factor,
311                             bool intersect_fl, bool *in_memory)
312 {
313   size_t max_elements_in_tree;
314   size_t last_tree_elems;
315   size_t   n_full_trees; /* number of trees in unique - 1 */
316   double result;
317 
318   max_elements_in_tree= ((size_t) max_in_memory_size /
319                          ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size));
320 
321   if (max_elements_in_tree == 0)
322     max_elements_in_tree= 1;
323 
324   n_full_trees=    nkeys / max_elements_in_tree;
325   last_tree_elems= nkeys % max_elements_in_tree;
326 
327   /* Calculate cost of creating trees */
328   result= 2*log2_n_fact(last_tree_elems + 1.0);
329   if (n_full_trees)
330     result+= n_full_trees * log2_n_fact(max_elements_in_tree + 1.0);
331   result /= compare_factor;
332 
333   DBUG_PRINT("info",("unique trees sizes: %u=%u*%u + %u", (uint)nkeys,
334                      (uint)n_full_trees,
335                      (uint)(n_full_trees?max_elements_in_tree:0),
336                      (uint)last_tree_elems));
337 
338   if (in_memory)
339     *in_memory= !n_full_trees;
340 
341   if (!n_full_trees)
342     return result;
343 
344   /*
345     There is more then one tree and merging is necessary.
346     First, add cost of writing all trees to disk, assuming that all disk
347     writes are sequential.
348   */
349   result += DISK_SEEK_BASE_COST * n_full_trees *
350               ceil(((double) key_size)*max_elements_in_tree / IO_SIZE);
351   result += DISK_SEEK_BASE_COST * ceil(((double) key_size)*last_tree_elems / IO_SIZE);
352 
353   /* Cost of merge */
354   if (intersect_fl)
355     key_size+= sizeof(element_count);
356   double merge_cost= get_merge_many_buffs_cost(buffer, (uint)n_full_trees,
357                                                (uint)max_elements_in_tree,
358                                                (uint)last_tree_elems, key_size,
359                                                compare_factor);
360   result += merge_cost;
361   /*
362     Add cost of reading the resulting sequence, assuming there were no
363     duplicate elements.
364   */
365   result += ceil((double)key_size*nkeys/IO_SIZE);
366 
367   return result;
368 }
369 
370 Unique::~Unique()
371 {
372   close_cached_file(&file);
373   delete_tree(&tree, 0);
374   delete_dynamic(&file_ptrs);
375 }
376 
377 
378     /* Write tree to disk; clear tree */
379 bool Unique::flush()
380 {
381   BUFFPEK file_ptr;
382   elements+= tree.elements_in_tree;
383   file_ptr.count=tree.elements_in_tree;
384   file_ptr.file_pos=my_b_tell(&file);
385 
386   tree_walk_action action= min_dupl_count ?
387 		           (tree_walk_action) unique_write_to_file_with_count :
388 		           (tree_walk_action) unique_write_to_file;
389   if (tree_walk(&tree, action,
390 		(void*) this, left_root_right) ||
391       insert_dynamic(&file_ptrs, (uchar*) &file_ptr))
392     return 1;
393   delete_tree(&tree, 0);
394   return 0;
395 }
396 
397 
398 /*
399   Clear the tree and the file.
400   You must call reset() if you want to reuse Unique after walk().
401 */
402 
403 void
404 Unique::reset()
405 {
406   reset_tree(&tree);
407   /*
408     If elements != 0, some trees were stored in the file (see how
409     flush() works). Note, that we can not count on my_b_tell(&file) == 0
410     here, because it can return 0 right after walk(), and walk() does not
411     reset any Unique member.
412   */
413   if (elements)
414   {
415     reset_dynamic(&file_ptrs);
416     reinit_io_cache(&file, WRITE_CACHE, 0L, 0, 1);
417   }
418   my_free(sort.record_pointers);
419   elements= 0;
420   tree.flag= 0;
421   sort.record_pointers= 0;
422 }
423 
424 /*
425   The comparison function, passed to queue_init() in merge_walk() and in
426   merge_buffers() when the latter is called from Uniques::get() must
427   use comparison function of Uniques::tree, but compare members of struct
428   BUFFPEK.
429 */
430 
431 C_MODE_START
432 
433 static int buffpek_compare(void *arg, uchar *key_ptr1, uchar *key_ptr2)
434 {
435   BUFFPEK_COMPARE_CONTEXT *ctx= (BUFFPEK_COMPARE_CONTEXT *) arg;
436   return ctx->key_compare(ctx->key_compare_arg,
437                           *((uchar **) key_ptr1), *((uchar **)key_ptr2));
438 }
439 
440 C_MODE_END
441 
442 
443 inline
444 element_count get_counter_from_merged_element(void *ptr, uint ofs)
445 {
446   element_count cnt;
447   memcpy((uchar *) &cnt, (uchar *) ptr + ofs, sizeof(element_count));
448   return cnt;
449 }
450 
451 
452 inline
453 void put_counter_into_merged_element(void *ptr, uint ofs, element_count cnt)
454 {
455   memcpy((uchar *) ptr + ofs, (uchar *) &cnt, sizeof(element_count));
456 }
457 
458 
459 /*
460   DESCRIPTION
461 
462     Function is very similar to merge_buffers, but instead of writing sorted
463     unique keys to the output file, it invokes walk_action for each key.
464     This saves I/O if you need to pass through all unique keys only once.
465 
466   SYNOPSIS
467     merge_walk()
468   All params are 'IN' (but see comment for begin, end):
469     merge_buffer       buffer to perform cached piece-by-piece loading
470                        of trees; initially the buffer is empty
471     merge_buffer_size  size of merge_buffer. Must be aligned with
472                        key_length
473     key_length         size of tree element; key_length * (end - begin)
474                        must be less or equal than merge_buffer_size.
475     begin              pointer to BUFFPEK struct for the first tree.
476     end                pointer to BUFFPEK struct for the last tree;
477                        end > begin and [begin, end) form a consecutive
478                        range. BUFFPEKs structs in that range are used and
479                        overwritten in merge_walk().
480     walk_action        element visitor. Action is called for each unique
481                        key.
482     walk_action_arg    argument to walk action. Passed to it on each call.
483     compare            elements comparison function
484     compare_arg        comparison function argument
485     file               file with all trees dumped. Trees in the file
486                        must contain sorted unique values. Cache must be
487                        initialized in read mode.
488     with counters      take into account counters for equal merged
489                        elements
490   RETURN VALUE
491     0     ok
492     <> 0  error
493 */
494 
495 static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size,
496                        uint key_length, BUFFPEK *begin, BUFFPEK *end,
497                        tree_walk_action walk_action, void *walk_action_arg,
498                        qsort_cmp2 compare, void *compare_arg,
499                        IO_CACHE *file, bool with_counters)
500 {
501   BUFFPEK_COMPARE_CONTEXT compare_context = { compare, compare_arg };
502   QUEUE queue;
503   if (end <= begin ||
504       merge_buffer_size < (size_t) (key_length * (end - begin + 1)) ||
505       init_queue(&queue, (uint) (end - begin), offsetof(BUFFPEK, key), 0,
506                  buffpek_compare, &compare_context, 0, 0))
507     return 1;
508   /* we need space for one key when a piece of merge buffer is re-read */
509   merge_buffer_size-= key_length;
510   uchar *save_key_buff= merge_buffer + merge_buffer_size;
511   uint max_key_count_per_piece= (uint) (merge_buffer_size/(end-begin) /
512                                         key_length);
513   /* if piece_size is aligned reuse_freed_buffer will always hit */
514   uint piece_size= max_key_count_per_piece * key_length;
515   ulong bytes_read;               /* to hold return value of read_to_buffer */
516   BUFFPEK *top;
517   int res= 1;
518   uint cnt_ofs= key_length - (with_counters ? sizeof(element_count) : 0);
519   element_count cnt;
520   /*
521     Invariant: queue must contain top element from each tree, until a tree
522     is not completely walked through.
523     Here we're forcing the invariant, inserting one element from each tree
524     to the queue.
525   */
526   for (top= begin; top != end; ++top)
527   {
528     top->base= merge_buffer + (top - begin) * piece_size;
529     top->max_keys= max_key_count_per_piece;
530     bytes_read= read_to_buffer(file, top, key_length);
531     if (unlikely(bytes_read == (ulong) -1))
532       goto end;
533     DBUG_ASSERT(bytes_read);
534     queue_insert(&queue, (uchar *) top);
535   }
536   top= (BUFFPEK *) queue_top(&queue);
537   while (queue.elements > 1)
538   {
539     /*
540       Every iteration one element is removed from the queue, and one is
541       inserted by the rules of the invariant. If two adjacent elements on
542       the top of the queue are not equal, biggest one is unique, because all
543       elements in each tree are unique. Action is applied only to unique
544       elements.
545     */
546     void *old_key= top->key;
547     /*
548       read next key from the cache or from the file and push it to the
549       queue; this gives new top.
550     */
551     top->key+= key_length;
552     if (--top->mem_count)
553       queue_replace_top(&queue);
554     else /* next piece should be read */
555     {
556       /* save old_key not to overwrite it in read_to_buffer */
557       memcpy(save_key_buff, old_key, key_length);
558       old_key= save_key_buff;
559       bytes_read= read_to_buffer(file, top, key_length);
560       if (unlikely(bytes_read == (ulong) -1))
561         goto end;
562       else if (bytes_read)      /* top->key, top->mem_count are reset */
563         queue_replace_top(&queue);             /* in read_to_buffer */
564       else
565       {
566         /*
567           Tree for old 'top' element is empty: remove it from the queue and
568           give all its memory to the nearest tree.
569         */
570         queue_remove_top(&queue);
571         reuse_freed_buff(&queue, top, key_length);
572       }
573     }
574     top= (BUFFPEK *) queue_top(&queue);
575     /* new top has been obtained; if old top is unique, apply the action */
576     if (compare(compare_arg, old_key, top->key))
577     {
578       cnt= with_counters ?
579            get_counter_from_merged_element(old_key, cnt_ofs) : 1;
580       if (walk_action(old_key, cnt, walk_action_arg))
581         goto end;
582     }
583     else if (with_counters)
584     {
585       cnt= get_counter_from_merged_element(top->key, cnt_ofs);
586       cnt+= get_counter_from_merged_element(old_key, cnt_ofs);
587       put_counter_into_merged_element(top->key, cnt_ofs, cnt);
588     }
589   }
590   /*
591     Applying walk_action to the tail of the last tree: this is safe because
592     either we had only one tree in the beginning, either we work with the
593     last tree in the queue.
594   */
595   do
596   {
597     do
598     {
599 
600       cnt= with_counters ?
601            get_counter_from_merged_element(top->key, cnt_ofs) : 1;
602       if (walk_action(top->key, cnt, walk_action_arg))
603         goto end;
604       top->key+= key_length;
605     }
606     while (--top->mem_count);
607     bytes_read= read_to_buffer(file, top, key_length);
608     if (unlikely(bytes_read == (ulong) -1))
609       goto end;
610   }
611   while (bytes_read);
612   res= 0;
613 end:
614   delete_queue(&queue);
615   return res;
616 }
617 
618 
619 /*
620   DESCRIPTION
621     Walks consecutively through all unique elements:
622     if all elements are in memory, then it simply invokes 'tree_walk', else
623     all flushed trees are loaded to memory piece-by-piece, pieces are
624     sorted, and action is called for each unique value.
625     Note: so as merging resets file_ptrs state, this method can change
626     internal Unique state to undefined: if you want to reuse Unique after
627     walk() you must call reset() first!
628   SYNOPSIS
629     Unique:walk()
630   All params are 'IN':
631     table   parameter for the call of the merge method
632     action  function-visitor, typed in include/my_tree.h
633             function is called for each unique element
634     arg     argument for visitor, which is passed to it on each call
635   RETURN VALUE
636     0    OK
637     <> 0 error
638  */
639 
640 bool Unique::walk(TABLE *table, tree_walk_action action, void *walk_action_arg)
641 {
642   int res= 0;
643   uchar *merge_buffer;
644 
645   if (elements == 0)                       /* the whole tree is in memory */
646     return tree_walk(&tree, action, walk_action_arg, left_root_right);
647 
648   sort.return_rows= elements+tree.elements_in_tree;
649   /* flush current tree to the file to have some memory for merge buffer */
650   if (flush())
651     return 1;
652   if (flush_io_cache(&file) || reinit_io_cache(&file, READ_CACHE, 0L, 0, 0))
653     return 1;
654   /*
655     merge_buffer must fit at least MERGEBUFF2 + 1 keys, because
656     merge_index() can merge that many BUFFPEKs at once. The extra space for one key
657     is needed when a piece of merge buffer is re-read, see merge_walk()
658   */
659   size_t buff_sz= MY_MAX(MERGEBUFF2+1, max_in_memory_size/full_size+1) * full_size;
660   if (!(merge_buffer = (uchar *)my_malloc(buff_sz, MYF(MY_WME))))
661     return 1;
662   if (buff_sz < full_size * (file_ptrs.elements + 1UL))
663     res= merge(table, merge_buffer, buff_sz >= full_size * MERGEBUFF2) ;
664 
665   if (!res)
666   {
667     res= merge_walk(merge_buffer, buff_sz, full_size,
668                     (BUFFPEK *) file_ptrs.buffer,
669                     (BUFFPEK *) file_ptrs.buffer + file_ptrs.elements,
670                     action, walk_action_arg,
671                     tree.compare, tree.custom_arg, &file, with_counters);
672   }
673   my_free(merge_buffer);
674   return res;
675 }
676 
677 
678 /*
679   DESCRIPTION
680 
681   Perform multi-pass sort merge of the elements using the buffer buff as
682   the merge buffer. The last pass is not performed if without_last_merge is
683   TRUE.
684 
685   SYNOPSIS
686     Unique:merge()
687   All params are 'IN':
688     table               the parameter to access sort context
689     buff                merge buffer
690     without_last_merge  TRUE <=> do not perform the last merge
691   RETURN VALUE
692     0    OK
693     <> 0 error
694  */
695 
696 bool Unique::merge(TABLE *table, uchar *buff, bool without_last_merge)
697 {
698   IO_CACHE *outfile= &sort.io_cache;
699   BUFFPEK *file_ptr= (BUFFPEK*) file_ptrs.buffer;
700   uint maxbuffer= file_ptrs.elements - 1;
701   my_off_t save_pos;
702   bool error= 1;
703   Sort_param sort_param;
704 
705   /* Open cached file for table records if it isn't open */
706   if (! my_b_inited(outfile) &&
707       open_cached_file(outfile,mysql_tmpdir,TEMP_PREFIX,READ_RECORD_BUFFER,
708                        MYF(MY_WME)))
709     return 1;
710 
711   bzero((char*) &sort_param,sizeof(sort_param));
712   sort_param.max_rows= elements;
713   sort_param.sort_form= table;
714   sort_param.rec_length= sort_param.sort_length= sort_param.ref_length=
715    full_size;
716   sort_param.min_dupl_count= min_dupl_count;
717   sort_param.res_length= 0;
718   sort_param.max_keys_per_buffer=
719     (uint) MY_MAX((max_in_memory_size / sort_param.sort_length), MERGEBUFF2);
720   sort_param.not_killable= 1;
721 
722   sort_param.unique_buff= buff +(sort_param.max_keys_per_buffer *
723 				       sort_param.sort_length);
724 
725   sort_param.compare= (qsort2_cmp) buffpek_compare;
726   sort_param.cmp_context.key_compare= tree.compare;
727   sort_param.cmp_context.key_compare_arg= tree.custom_arg;
728 
729   /* Merge the buffers to one file, removing duplicates */
730   if (merge_many_buff(&sort_param,buff,file_ptr,&maxbuffer,&file))
731     goto err;
732   if (flush_io_cache(&file) ||
733       reinit_io_cache(&file,READ_CACHE,0L,0,0))
734     goto err;
735   sort_param.res_length= sort_param.rec_length-
736                          (min_dupl_count ? sizeof(min_dupl_count) : 0);
737   if (without_last_merge)
738   {
739     file_ptrs.elements= maxbuffer+1;
740     return 0;
741   }
742   if (merge_index(&sort_param, buff, file_ptr, maxbuffer, &file, outfile))
743     goto err;
744   error= 0;
745 err:
746   if (flush_io_cache(outfile))
747     error= 1;
748 
749   /* Setup io_cache for reading */
750   save_pos= outfile->pos_in_file;
751   if (reinit_io_cache(outfile,READ_CACHE,0L,0,0))
752     error= 1;
753   outfile->end_of_file=save_pos;
754   return error;
755 }
756 
757 
758 /*
759   Allocate memory that can be used with init_records() so that
760   rows will be read in priority order.
761 */
762 
763 bool Unique::get(TABLE *table)
764 {
765   bool rc= 1;
766   uchar *sort_buffer= NULL;
767   sort.return_rows= elements+tree.elements_in_tree;
768   DBUG_ENTER("Unique::get");
769 
770   if (my_b_tell(&file) == 0)
771   {
772     /* Whole tree is in memory;  Don't use disk if you don't need to */
773     if ((sort.record_pointers= (uchar*)
774 	 my_malloc(size * tree.elements_in_tree, MYF(MY_THREAD_SPECIFIC))))
775     {
776       uchar *save_record_pointers= sort.record_pointers;
777       tree_walk_action action= min_dupl_count ?
778 		         (tree_walk_action) unique_intersect_write_to_ptrs :
779 		         (tree_walk_action) unique_write_to_ptrs;
780       filtered_out_elems= 0;
781       (void) tree_walk(&tree, action,
782 		       this, left_root_right);
783       /* Restore record_pointers that was changed in by 'action' above */
784       sort.record_pointers= save_record_pointers;
785       sort.return_rows-= filtered_out_elems;
786       DBUG_RETURN(0);
787     }
788   }
789   /* Not enough memory; Save the result to file && free memory used by tree */
790   if (flush())
791     DBUG_RETURN(1);
792   /*
793     merge_buffer must fit at least MERGEBUFF2 + 1 keys, because
794     merge_index() can merge that many BUFFPEKs at once. The extra space for
795     one key for Sort_param::unique_buff
796   */
797   size_t buff_sz= MY_MAX(MERGEBUFF2+1, max_in_memory_size/full_size+1) * full_size;
798   if (!(sort_buffer= (uchar*) my_malloc(buff_sz,
799                                         MYF(MY_THREAD_SPECIFIC|MY_WME))))
800     DBUG_RETURN(1);
801 
802   if (merge(table, sort_buffer, FALSE))
803     goto err;
804   rc= 0;
805 
806 err:
807   my_free(sort_buffer);
808   DBUG_RETURN(rc);
809 }
810