1 /* Copyright (C) 2010, 2011 Monty Program Ab
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License as published by
5    the Free Software Foundation; version 2 of the License.
6 
7    This program is distributed in the hope that it will be useful,
8    but WITHOUT ANY WARRANTY; without even the implied warranty of
9    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10    GNU General Public License for more details.
11 
12    You should have received a copy of the GNU General Public License
13    along with this program; if not, write to the Free Software
14    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
15 
16 #include "mariadb.h"
17 #include "sql_parse.h"
18 #include <my_bit.h>
19 #include "sql_select.h"
20 #include "key.h"
21 #include "sql_statistics.h"
22 
23 /****************************************************************************
24  * Default MRR implementation (MRR to non-MRR converter)
25  ***************************************************************************/
26 
27 /**
28   Get cost and other information about MRR scan over a known list of ranges
29 
30   Calculate estimated cost and other information about an MRR scan for given
31   sequence of ranges.
32 
33   @param keyno           Index number
34   @param seq             Range sequence to be traversed
35   @param seq_init_param  First parameter for seq->init()
36   @param n_ranges_arg    Number of ranges in the sequence, or 0 if the caller
37                          can't efficiently determine it
38   @param bufsz    INOUT  IN:  Size of the buffer available for use
39                          OUT: Size of the buffer that is expected to be actually
40                               used, or 0 if buffer is not needed.
41   @param flags    INOUT  A combination of HA_MRR_* flags
42   @param cost     OUT    Estimated cost of MRR access
43 
44   @note
45     This method (or an overriding one in a derived class) must check for
46     thd->killed and return HA_POS_ERROR if it is not zero. This is required
47     for a user to be able to interrupt the calculation by killing the
48     connection/query.
49 
50   @retval
51     HA_POS_ERROR  Error or the engine is unable to perform the requested
52                   scan. Values of OUT parameters are undefined.
53   @retval
54     other         OK, *cost contains cost of the scan, *bufsz and *flags
55                   contain scan parameters.
56 */
57 
58 ha_rows
multi_range_read_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges_arg,uint * bufsz,uint * flags,Cost_estimate * cost)59 handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
60                                      void *seq_init_param, uint n_ranges_arg,
61                                      uint *bufsz, uint *flags, Cost_estimate *cost)
62 {
63   KEY_MULTI_RANGE range;
64   range_seq_t seq_it;
65   ha_rows rows, total_rows= 0;
66   uint n_ranges=0;
67   THD *thd= table->in_use;
68   uint limit= thd->variables.eq_range_index_dive_limit;
69 
70   bool use_statistics_for_eq_range= eq_ranges_exceeds_limit(seq,
71                                                             seq_init_param,
72                                                             limit);
73 
74   /* Default MRR implementation doesn't need buffer */
75   *bufsz= 0;
76 
77   seq_it= seq->init(seq_init_param, n_ranges, *flags);
78   while (!seq->next(seq_it, &range))
79   {
80     if (unlikely(thd->killed != 0))
81       return HA_POS_ERROR;
82 
83     n_ranges++;
84     key_range *min_endp, *max_endp;
85     if (range.range_flag & GEOM_FLAG)
86     {
87       /* In this case tmp_min_flag contains the handler-read-function */
88       range.start_key.flag= (ha_rkey_function) (range.range_flag ^ GEOM_FLAG);
89       min_endp= &range.start_key;
90       max_endp= NULL;
91     }
92     else
93     {
94       min_endp= range.start_key.length? &range.start_key : NULL;
95       max_endp= range.end_key.length? &range.end_key : NULL;
96     }
97     int keyparts_used= my_count_bits(range.start_key.keypart_map);
98     if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE))
99       rows= 1; /* there can be at most one row */
100     else if (use_statistics_for_eq_range &&
101              !(range.range_flag & NULL_RANGE) &&
102              (range.range_flag & EQ_RANGE) &&
103              table->key_info[keyno].actual_rec_per_key(keyparts_used - 1) > 0.5)
104       rows=
105         (ha_rows) table->key_info[keyno].actual_rec_per_key(keyparts_used - 1);
106     else
107     {
108       if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp,
109                                                         max_endp)))
110       {
111         /* Can't scan one range => can't do MRR scan at all */
112         total_rows= HA_POS_ERROR;
113         break;
114       }
115     }
116     total_rows += rows;
117   }
118 
119   if (total_rows != HA_POS_ERROR)
120   {
121     /* The following calculation is the same as in multi_range_read_info(): */
122     *flags |= HA_MRR_USE_DEFAULT_IMPL;
123     cost->reset();
124     cost->avg_io_cost= 1; /* assume random seeks */
125     if ((*flags & HA_MRR_INDEX_ONLY) && total_rows > 2)
126       cost->io_count= keyread_time(keyno, n_ranges, (uint)total_rows);
127     else
128       cost->io_count= read_time(keyno, n_ranges, total_rows);
129     cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01;
130   }
131   return total_rows;
132 }
133 
134 
135 /**
136   Get cost and other information about MRR scan over some sequence of ranges
137 
138   Calculate estimated cost and other information about an MRR scan for some
139   sequence of ranges.
140 
141   The ranges themselves will be known only at execution phase. When this
142   function is called we only know number of ranges and a (rough) E(#records)
143   within those ranges.
144 
145   Currently this function is only called for "n-keypart singlepoint" ranges,
146   i.e. each range is "keypart1=someconst1 AND ... AND keypartN=someconstN"
147 
148   The flags parameter is a combination of those flags: HA_MRR_SORTED,
149   HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION, HA_MRR_LIMITS.
150 
151   @param keyno           Index number
152   @param n_ranges        Estimated number of ranges (i.e. intervals) in the
153                          range sequence.
154   @param n_rows          Estimated total number of records contained within all
155                          of the ranges
156   @param bufsz    INOUT  IN:  Size of the buffer available for use
157                          OUT: Size of the buffer that will be actually used, or
158                               0 if buffer is not needed.
159   @param flags    INOUT  A combination of HA_MRR_* flags
160   @param cost     OUT    Estimated cost of MRR access
161 
162   @retval
163     0     OK, *cost contains cost of the scan, *bufsz and *flags contain scan
164           parameters.
165   @retval
166     other Error or can't perform the requested scan
167 */
168 
multi_range_read_info(uint keyno,uint n_ranges,uint n_rows,uint key_parts,uint * bufsz,uint * flags,Cost_estimate * cost)169 ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
170                                        uint key_parts, uint *bufsz,
171                                        uint *flags, Cost_estimate *cost)
172 {
173   /*
174     Currently we expect this function to be called only in preparation of scan
175     with HA_MRR_SINGLE_POINT property.
176   */
177   DBUG_ASSERT(*flags | HA_MRR_SINGLE_POINT);
178 
179   *bufsz= 0; /* Default implementation doesn't need a buffer */
180   *flags |= HA_MRR_USE_DEFAULT_IMPL;
181 
182   cost->reset();
183   cost->avg_io_cost= 1; /* assume random seeks */
184 
185   /* Produce the same cost as non-MRR code does */
186   if (*flags & HA_MRR_INDEX_ONLY)
187     cost->io_count= keyread_time(keyno, n_ranges, n_rows);
188   else
189     cost->io_count= read_time(keyno, n_ranges, n_rows);
190   return 0;
191 }
192 
193 
194 /**
195   Initialize the MRR scan
196 
197   Initialize the MRR scan. This function may do heavyweight scan
198   initialization like row prefetching/sorting/etc (NOTE: but better not do
199   it here as we may not need it, e.g. if we never satisfy WHERE clause on
200   previous tables. For many implementations it would be natural to do such
201   initializations in the first multi_read_range_next() call)
202 
203   mode is a combination of the following flags: HA_MRR_SORTED,
204   HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION
205 
206   @param seq             Range sequence to be traversed
207   @param seq_init_param  First parameter for seq->init()
208   @param n_ranges        Number of ranges in the sequence
209   @param mode            Flags, see the description section for the details
210   @param buf             INOUT: memory buffer to be used
211 
212   @note
213     One must have called index_init() before calling this function. Several
214     multi_range_read_init() calls may be made in course of one query.
215 
216     Buffer memory management is done according to the following scenario:
217     The caller allocates the buffer and provides it to the callee by filling
218     the members of HANDLER_BUFFER structure.
219     The callee consumes all or some fraction of the provided buffer space, and
220     sets the HANDLER_BUFFER members accordingly.
221     The callee may use the buffer memory until the next multi_range_read_init()
222     call is made, all records have been read, or until index_end() call is
223     made, whichever comes first.
224 
225   @retval 0  OK
226   @retval 1  Error
227 */
228 
229 int
multi_range_read_init(RANGE_SEQ_IF * seq_funcs,void * seq_init_param,uint n_ranges,uint mode,HANDLER_BUFFER * buf)230 handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
231                                uint n_ranges, uint mode, HANDLER_BUFFER *buf)
232 {
233   DBUG_ENTER("handler::multi_range_read_init");
234   mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
235   mrr_funcs= *seq_funcs;
236   mrr_is_output_sorted= MY_TEST(mode & HA_MRR_SORTED);
237   mrr_have_range= FALSE;
238   DBUG_RETURN(0);
239 }
240 
241 /**
242   Get next record in MRR scan
243 
244   Default MRR implementation: read the next record
245 
246   @param range_info  OUT  Undefined if HA_MRR_NO_ASSOCIATION flag is in effect
247                           Otherwise, the opaque value associated with the range
248                           that contains the returned record.
249 
250   @retval 0      OK
251   @retval other  Error code
252 */
253 
multi_range_read_next(range_id_t * range_info)254 int handler::multi_range_read_next(range_id_t *range_info)
255 {
256   int result= HA_ERR_END_OF_FILE;
257   bool range_res;
258   DBUG_ENTER("handler::multi_range_read_next");
259 
260   if (!mrr_have_range)
261   {
262     mrr_have_range= TRUE;
263     goto start;
264   }
265 
266   do
267   {
268     /* Save a call if there can be only one row in range. */
269     if (mrr_cur_range.range_flag != (UNIQUE_RANGE | EQ_RANGE))
270     {
271       result= read_range_next();
272       /* On success or non-EOF errors jump to the end. */
273       if (result != HA_ERR_END_OF_FILE)
274         break;
275     }
276     else
277     {
278       if (ha_was_semi_consistent_read())
279       {
280         /*
281           The following assignment is redundant, but for extra safety and to
282           remove the compiler warning:
283         */
284         range_res= FALSE;
285         goto scan_it_again;
286       }
287       /*
288         We need to set this for the last range only, but checking this
289         condition is more expensive than just setting the result code.
290       */
291       result= HA_ERR_END_OF_FILE;
292     }
293 
294 start:
295     /* Try the next range(s) until one matches a record. */
296     while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range)))
297     {
298 scan_it_again:
299       result= read_range_first(mrr_cur_range.start_key.keypart_map ?
300                                  &mrr_cur_range.start_key : 0,
301                                mrr_cur_range.end_key.keypart_map ?
302                                  &mrr_cur_range.end_key : 0,
303                                MY_TEST(mrr_cur_range.range_flag & EQ_RANGE),
304                                mrr_is_output_sorted);
305       if (result != HA_ERR_END_OF_FILE)
306         break;
307     }
308   }
309   while ((result == HA_ERR_END_OF_FILE) && !range_res);
310 
311   *range_info= mrr_cur_range.ptr;
312   DBUG_PRINT("exit",("handler::multi_range_read_next result %d", result));
313   DBUG_RETURN(result);
314 }
315 
316 /****************************************************************************
317  * Mrr_*_reader classes (building blocks for DS-MRR)
318  ***************************************************************************/
319 
init(handler * h_arg,RANGE_SEQ_IF * seq_funcs,void * seq_init_param,uint n_ranges,uint mode,Key_parameters * key_par_arg,Lifo_buffer * key_buffer_arg,Buffer_manager * buf_manager_arg)320 int Mrr_simple_index_reader::init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
321                                   void *seq_init_param, uint n_ranges,
322                                   uint mode,  Key_parameters *key_par_arg,
323                                   Lifo_buffer *key_buffer_arg,
324                                   Buffer_manager *buf_manager_arg)
325 {
326   HANDLER_BUFFER no_buffer = {NULL, NULL, NULL};
327   file= h_arg;
328   return file->handler::multi_range_read_init(seq_funcs, seq_init_param,
329                                               n_ranges, mode, &no_buffer);
330 }
331 
332 
get_next(range_id_t * range_info)333 int Mrr_simple_index_reader::get_next(range_id_t *range_info)
334 {
335   int res;
336   while (!(res= file->handler::multi_range_read_next(range_info)))
337   {
338     KEY_MULTI_RANGE *curr_range= &file->handler::mrr_cur_range;
339     if (!file->mrr_funcs.skip_index_tuple ||
340         !file->mrr_funcs.skip_index_tuple(file->mrr_iter, curr_range->ptr))
341       break;
342   }
343   if (res && res != HA_ERR_END_OF_FILE && res != HA_ERR_KEY_NOT_FOUND)
344     file->print_error(res, MYF(0));             // Fatal error
345   return res;
346 }
347 
348 
349 /**
350   @brief Get next index record
351 
352   @param range_info  OUT identifier of range that the returned record belongs to
353 
354   @note
355     We actually iterate over nested sequences:
356     - an ordered sequence of groups of identical keys
357       - each key group has key value, which has multiple matching records
358         - thus, each record matches all members of the key group
359 
360   @retval 0                   OK, next record was successfully read
361   @retval HA_ERR_END_OF_FILE  End of records
362   @retval Other               Some other error; Error is printed
363 */
364 
get_next(range_id_t * range_info)365 int Mrr_ordered_index_reader::get_next(range_id_t *range_info)
366 {
367   int res;
368   DBUG_ENTER("Mrr_ordered_index_reader::get_next");
369 
370   for(;;)
371   {
372     if (!scanning_key_val_iter)
373     {
374       while ((res= kv_it.init(this)))
375       {
376         if ((res != HA_ERR_KEY_NOT_FOUND && res != HA_ERR_END_OF_FILE))
377           DBUG_RETURN(res); /* Some fatal error */
378 
379         if (key_buffer->is_empty())
380         {
381           DBUG_RETURN(HA_ERR_END_OF_FILE);
382         }
383       }
384       scanning_key_val_iter= TRUE;
385     }
386 
387     if ((res= kv_it.get_next(range_info)))
388     {
389       scanning_key_val_iter= FALSE;
390       if ((res != HA_ERR_KEY_NOT_FOUND && res != HA_ERR_END_OF_FILE))
391         DBUG_RETURN(res);
392       kv_it.move_to_next_key_value();
393       continue;
394     }
395     if (!skip_index_tuple(*range_info) &&
396         !skip_record(*range_info, NULL))
397     {
398       break;
399     }
400     /* Go get another (record, range_id) combination */
401   } /* while */
402 
403   DBUG_RETURN(0);
404 }
405 
406 
407 /*
408   Supply index reader with the O(1)space it needs for scan interrupt/restore
409   operation
410 */
411 
set_interruption_temp_buffer(uint rowid_length,uint key_len,uint saved_pk_len,uchar ** space_start,uchar * space_end)412 bool Mrr_ordered_index_reader::set_interruption_temp_buffer(uint rowid_length,
413                                                             uint key_len,
414                                                             uint saved_pk_len,
415                                                             uchar **space_start,
416                                                             uchar *space_end)
417 {
418   if (space_end - *space_start <= (ptrdiff_t)(rowid_length + key_len + saved_pk_len))
419     return TRUE;
420   support_scan_interruptions= TRUE;
421 
422   saved_rowid= *space_start;
423   *space_start += rowid_length;
424 
425   if (saved_pk_len)
426   {
427     saved_primary_key= *space_start;
428     *space_start += saved_pk_len;
429   }
430   else
431     saved_primary_key= NULL;
432 
433   saved_key_tuple= *space_start;
434   *space_start += key_len;
435 
436   have_saved_rowid= FALSE;
437   read_was_interrupted= FALSE;
438   return FALSE;
439 }
440 
set_no_interruption_temp_buffer()441 void Mrr_ordered_index_reader::set_no_interruption_temp_buffer()
442 {
443   support_scan_interruptions= FALSE;
444   saved_key_tuple= saved_rowid= saved_primary_key= NULL; /* safety */
445   have_saved_rowid= FALSE;
446   read_was_interrupted= FALSE;
447 }
448 
interrupt_read()449 void Mrr_ordered_index_reader::interrupt_read()
450 {
451   DBUG_ASSERT(support_scan_interruptions);
452   TABLE *table= file->get_table();
453   KEY *used_index= &table->key_info[file->active_index];
454   /* Save the current key value */
455   key_copy(saved_key_tuple, table->record[0],
456            used_index, used_index->key_length);
457 
458   if (saved_primary_key)
459   {
460     key_copy(saved_primary_key, table->record[0],
461              &table->key_info[table->s->primary_key],
462              table->key_info[table->s->primary_key].key_length);
463   }
464   read_was_interrupted= TRUE;
465 
466   /* Save the last rowid */
467   memcpy(saved_rowid, file->ref, file->ref_length);
468   have_saved_rowid= TRUE;
469 }
470 
position()471 void Mrr_ordered_index_reader::position()
472 {
473   if (have_saved_rowid)
474     memcpy(file->ref, saved_rowid, file->ref_length);
475   else
476     Mrr_index_reader::position();
477 }
478 
resume_read()479 void Mrr_ordered_index_reader::resume_read()
480 {
481   TABLE *table= file->get_table();
482 
483   if (!read_was_interrupted)
484     return;
485 
486   KEY *used_index= &table->key_info[file->active_index];
487   key_restore(table->record[0], saved_key_tuple,
488               used_index, used_index->key_length);
489   if (saved_primary_key)
490   {
491     key_restore(table->record[0], saved_primary_key,
492                 &table->key_info[table->s->primary_key],
493                 table->key_info[table->s->primary_key].key_length);
494   }
495 }
496 
497 
498 /**
499   Fill the buffer with (lookup_tuple, range_id) pairs and sort
500 
501   @return
502     0                   OK, the buffer is non-empty and sorted
503     HA_ERR_END_OF_FILE  Source exhausted, the buffer is empty.
504 */
505 
refill_buffer(bool initial)506 int Mrr_ordered_index_reader::refill_buffer(bool initial)
507 {
508   KEY_MULTI_RANGE cur_range;
509   DBUG_ENTER("Mrr_ordered_index_reader::refill_buffer");
510 
511   DBUG_ASSERT(key_buffer->is_empty());
512 
513   if (source_exhausted)
514     DBUG_RETURN(HA_ERR_END_OF_FILE);
515 
516   buf_manager->reset_buffer_sizes(buf_manager->arg);
517   key_buffer->reset();
518   key_buffer->setup_writing(keypar.key_size_in_keybuf,
519                             is_mrr_assoc? sizeof(range_id_t) : 0);
520 
521   while (key_buffer->can_write() &&
522          !(source_exhausted= mrr_funcs.next(mrr_iter, &cur_range)))
523   {
524     DBUG_ASSERT(cur_range.range_flag & EQ_RANGE);
525 
526     /* Put key, or {key, range_id} pair into the buffer */
527     key_buffer->write_ptr1= keypar.use_key_pointers ?
528                               (uchar*)&cur_range.start_key.key :
529                               (uchar*)cur_range.start_key.key;
530     key_buffer->write_ptr2= (uchar*)&cur_range.ptr;
531     key_buffer->write();
532   }
533 
534   /* Force get_next() to start with kv_it.init() call: */
535   scanning_key_val_iter= FALSE;
536 
537   if (source_exhausted && key_buffer->is_empty())
538     DBUG_RETURN(HA_ERR_END_OF_FILE);
539 
540   if (!initial)
541   {
542     /* This is a non-initial buffer fill and we've got a non-empty buffer */
543     THD *thd= current_thd;
544     status_var_increment(thd->status_var.ha_mrr_key_refills_count);
545   }
546 
547   key_buffer->sort((key_buffer->type() == Lifo_buffer::FORWARD)?
548                      (qsort2_cmp)Mrr_ordered_index_reader::compare_keys_reverse :
549                      (qsort2_cmp)Mrr_ordered_index_reader::compare_keys,
550                    this);
551   DBUG_RETURN(0);
552 }
553 
554 
init(handler * h_arg,RANGE_SEQ_IF * seq_funcs,void * seq_init_param,uint n_ranges,uint mode,Key_parameters * key_par_arg,Lifo_buffer * key_buffer_arg,Buffer_manager * buf_manager_arg)555 int Mrr_ordered_index_reader::init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
556                                    void *seq_init_param, uint n_ranges,
557                                    uint mode, Key_parameters *key_par_arg,
558                                    Lifo_buffer *key_buffer_arg,
559                                    Buffer_manager *buf_manager_arg)
560 {
561   file= h_arg;
562   key_buffer= key_buffer_arg;
563   buf_manager= buf_manager_arg;
564   keypar= *key_par_arg;
565 
566   KEY *key_info= &file->get_table()->key_info[file->active_index];
567   keypar.index_ranges_unique= MY_TEST(key_info->flags & HA_NOSAME &&
568                                       key_info->user_defined_key_parts ==
569                                       my_count_bits(keypar.key_tuple_map));
570 
571   mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
572   is_mrr_assoc= !MY_TEST(mode & HA_MRR_NO_ASSOCIATION);
573   mrr_funcs= *seq_funcs;
574   source_exhausted= FALSE;
575   read_was_interrupted= false;
576   have_saved_rowid= FALSE;
577   return 0;
578 }
579 
580 
rowid_cmp_reverse(void * file,uchar * a,uchar * b)581 static int rowid_cmp_reverse(void *file, uchar *a, uchar *b)
582 {
583   return - ((handler*)file)->cmp_ref(a, b);
584 }
585 
586 
init(handler * h_arg,Mrr_index_reader * index_reader_arg,uint mode,Lifo_buffer * buf)587 int Mrr_ordered_rndpos_reader::init(handler *h_arg,
588                                     Mrr_index_reader *index_reader_arg,
589                                     uint mode,
590                                     Lifo_buffer *buf)
591 {
592   file= h_arg;
593   index_reader= index_reader_arg;
594   rowid_buffer= buf;
595   is_mrr_assoc= !MY_TEST(mode & HA_MRR_NO_ASSOCIATION);
596   index_reader_exhausted= FALSE;
597   index_reader_needs_refill= TRUE;
598   return 0;
599 }
600 
601 
602 /**
603   DS-MRR: Fill and sort the rowid buffer
604 
605   Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into
606   buffer. When the buffer is full or scan is completed, sort the buffer by
607   rowid and return.
608 
609   When this function returns, either rowid buffer is not empty, or the source
610   of lookup keys (i.e. ranges) is exhaused.
611 
612   @retval 0      OK, the next portion of rowids is in the buffer,
613                  properly ordered
614   @retval other  Error
615 */
616 
refill_buffer(bool initial)617 int Mrr_ordered_rndpos_reader::refill_buffer(bool initial)
618 {
619   int res;
620   bool first_call= initial;
621   DBUG_ENTER("Mrr_ordered_rndpos_reader::refill_buffer");
622 
623   if (index_reader_exhausted)
624     DBUG_RETURN(HA_ERR_END_OF_FILE);
625 
626   while (initial || index_reader_needs_refill ||
627          (res= refill_from_index_reader()) == HA_ERR_END_OF_FILE)
628   {
629     if ((res= index_reader->refill_buffer(initial)))
630     {
631       if (res == HA_ERR_END_OF_FILE)
632         index_reader_exhausted= TRUE;
633       break;
634     }
635     initial= FALSE;
636     index_reader_needs_refill= FALSE;
637   }
638 
639   if (!first_call && !index_reader_exhausted)
640   {
641     /* Ok, this was a successful buffer refill operation */
642     THD *thd= current_thd;
643     status_var_increment(thd->status_var.ha_mrr_rowid_refills_count);
644   }
645 
646   DBUG_RETURN(res);
647 }
648 
649 
position()650 void Mrr_index_reader::position()
651 {
652   file->position(file->get_table()->record[0]);
653 }
654 
655 
656 /*
657   @brief Try to refill the rowid buffer without calling
658   index_reader->refill_buffer().
659 */
660 
refill_from_index_reader()661 int Mrr_ordered_rndpos_reader::refill_from_index_reader()
662 {
663   range_id_t range_info;
664   int res;
665   DBUG_ENTER("Mrr_ordered_rndpos_reader::refill_from_index_reader");
666 
667   DBUG_ASSERT(rowid_buffer->is_empty());
668   index_rowid= index_reader->get_rowid_ptr();
669   rowid_buffer->reset();
670   rowid_buffer->setup_writing(file->ref_length,
671                               is_mrr_assoc? sizeof(range_id_t) : 0);
672 
673   last_identical_rowid= NULL;
674 
675   index_reader->resume_read();
676   while (rowid_buffer->can_write())
677   {
678     res= index_reader->get_next(&range_info);
679 
680     if (res)
681     {
682       if (res != HA_ERR_END_OF_FILE)
683         DBUG_RETURN(res);
684       index_reader_needs_refill=TRUE;
685       break;
686     }
687 
688     index_reader->position();
689 
690     /* Put rowid, or {rowid, range_id} pair into the buffer */
691     rowid_buffer->write_ptr1= index_rowid;
692     rowid_buffer->write_ptr2= (uchar*)&range_info;
693     rowid_buffer->write();
694   }
695 
696   /*
697     When index_reader_needs_refill=TRUE, this means we've got all of index
698     tuples for lookups keys that index_reader had. We are not in the middle
699     of an index read, so there is no need to call interrupt_read.
700 
701     Actually, we must not call interrupt_read(), because it could be that we
702     haven't read a single row (because all index lookups returned
703     HA_ERR_KEY_NOT_FOUND). In this case, interrupt_read() will cause [harmless]
704     valgrind warnings when trying to save garbage from table->record[0].
705   */
706   if (!index_reader_needs_refill)
707     index_reader->interrupt_read();
708   /* Sort the buffer contents by rowid */
709   rowid_buffer->sort((qsort2_cmp)rowid_cmp_reverse, (void*)file);
710 
711   rowid_buffer->setup_reading(file->ref_length,
712                               is_mrr_assoc ? sizeof(range_id_t) : 0);
713   DBUG_RETURN(rowid_buffer->is_empty()? HA_ERR_END_OF_FILE : 0);
714 }
715 
716 
717 /*
718   Get the next {record, range_id} using ordered array of rowid+range_id pairs
719 
720   @note
721     Since we have sorted rowids, we try not to make multiple rnd_pos() calls
722     with the same rowid value.
723 */
724 
get_next(range_id_t * range_info)725 int Mrr_ordered_rndpos_reader::get_next(range_id_t *range_info)
726 {
727   int res;
728 
729   /*
730     First, check if rowid buffer has elements with the same rowid value as
731     the previous.
732   */
733   while (last_identical_rowid)
734   {
735     /*
736       Current record (the one we've returned in previous call) was obtained
737       from a rowid that matched multiple range_ids. Return this record again,
738       with next matching range_id.
739     */
740     (void)rowid_buffer->read();
741 
742     if (rowid_buffer->read_ptr1 == last_identical_rowid)
743       last_identical_rowid= NULL; /* reached the last of identical rowids */
744 
745     if (!is_mrr_assoc)
746       return 0;
747 
748     memcpy(range_info, rowid_buffer->read_ptr2, sizeof(range_id_t));
749     if (!index_reader->skip_record(*range_info, rowid_buffer->read_ptr1))
750       return 0;
751   }
752 
753   /*
754      Ok, last_identical_rowid==NULL, it's time to read next different rowid
755      value and get record for it.
756   */
757   for(;;)
758   {
759     /* Return eof if there are no rowids in the buffer after re-fill attempt */
760     if (rowid_buffer->read())
761       return HA_ERR_END_OF_FILE;
762 
763     if (is_mrr_assoc)
764     {
765       memcpy(range_info, rowid_buffer->read_ptr2, sizeof(range_id_t));
766       if (index_reader->skip_record(*range_info, rowid_buffer->read_ptr1))
767         continue;
768     }
769 
770     res= file->ha_rnd_pos(file->get_table()->record[0],
771                           rowid_buffer->read_ptr1);
772 
773     if (res)
774       return res; /* Some fatal error */
775 
776     break; /* Got another record */
777   }
778 
779   /*
780     Check if subsequent buffer elements have the same rowid value as this
781     one. If yes, remember this fact so that we don't make any more rnd_pos()
782     calls with this value.
783 
784     Note: this implies that SQL layer doesn't touch table->record[0]
785     between calls.
786   */
787   Lifo_buffer_iterator it;
788   it.init(rowid_buffer);
789   while (!it.read())
790   {
791     if (file->cmp_ref(it.read_ptr1, rowid_buffer->read_ptr1))
792       break;
793     last_identical_rowid= it.read_ptr1;
794   }
795   return 0;
796 }
797 
798 
799 /****************************************************************************
800  * Top-level DS-MRR implementation functions (the ones called by storage engine)
801  ***************************************************************************/
802 
803 /**
804   DS-MRR: Initialize and start MRR scan
805 
806   Initialize and start the MRR scan. Depending on the mode parameter, this
807   may use default or DS-MRR implementation.
808 
809   @param h_arg           Table handler to be used
810   @param key             Index to be used
811   @param seq_funcs       Interval sequence enumeration functions
812   @param seq_init_param  Interval sequence enumeration parameter
813   @param n_ranges        Number of ranges in the sequence.
814   @param mode            HA_MRR_* modes to use
815   @param buf             INOUT Buffer to use
816 
817   @retval 0     Ok, Scan started.
818   @retval other Error
819 */
820 
dsmrr_init(handler * h_arg,RANGE_SEQ_IF * seq_funcs,void * seq_init_param,uint n_ranges,uint mode,HANDLER_BUFFER * buf)821 int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
822                            void *seq_init_param, uint n_ranges, uint mode,
823                            HANDLER_BUFFER *buf)
824 {
825   THD *thd= h_arg->get_table()->in_use;
826   int res;
827   Key_parameters keypar;
828   uint UNINIT_VAR(key_buff_elem_size); /* set/used when do_sort_keys==TRUE */
829   handler *h_idx;
830   Mrr_ordered_rndpos_reader *disk_strategy= NULL;
831   bool do_sort_keys= FALSE;
832   DBUG_ENTER("DsMrr_impl::dsmrr_init");
833   /*
834     index_merge may invoke a scan on an object for which dsmrr_info[_const]
835     has not been called, so set the owner handler here as well.
836   */
837   primary_file= h_arg;
838   is_mrr_assoc= !MY_TEST(mode & HA_MRR_NO_ASSOCIATION);
839 
840   strategy_exhausted= FALSE;
841 
842   /* By default, have do-nothing buffer manager */
843   buf_manager.arg= this;
844   buf_manager.reset_buffer_sizes= do_nothing;
845   buf_manager.redistribute_buffer_space= do_nothing;
846 
847   if (mode & (HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED))
848     goto use_default_impl;
849 
850   /*
851     Determine whether we'll need to do key sorting and/or rnd_pos() scan
852   */
853   index_strategy= NULL;
854   if ((mode & HA_MRR_SINGLE_POINT) &&
855       optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS))
856   {
857     do_sort_keys= TRUE;
858     index_strategy= &reader_factory.ordered_index_reader;
859   }
860   else
861     index_strategy= &reader_factory.simple_index_reader;
862 
863   strategy= index_strategy;
864   /*
865     We don't need a rowid-to-rndpos step if
866      - We're doing a scan on clustered primary key
867      - [In the future] We're doing an index_only read
868   */
869   DBUG_ASSERT(primary_file->inited == handler::INDEX ||
870               (primary_file->inited == handler::RND &&
871                secondary_file &&
872                secondary_file->inited == handler::INDEX));
873 
874   h_idx= (primary_file->inited == handler::INDEX)? primary_file: secondary_file;
875   keyno= h_idx->active_index;
876 
877   if (!(keyno == table->s->primary_key && h_idx->primary_key_is_clustered()))
878   {
879     strategy= disk_strategy= &reader_factory.ordered_rndpos_reader;
880   }
881 
882   full_buf= buf->buffer;
883   full_buf_end= buf->buffer_end;
884 
885   if (do_sort_keys)
886   {
887     /* Pre-calculate some parameters of key sorting */
888     keypar.use_key_pointers= MY_TEST(mode & HA_MRR_MATERIALIZED_KEYS);
889     seq_funcs->get_key_info(seq_init_param, &keypar.key_tuple_length,
890                             &keypar.key_tuple_map);
891     keypar.key_size_in_keybuf= keypar.use_key_pointers?
892                                  sizeof(char*) : keypar.key_tuple_length;
893     key_buff_elem_size= keypar.key_size_in_keybuf + (int)is_mrr_assoc * sizeof(void*);
894 
895     /* Ordered index reader needs some space to store an index tuple */
896     if (strategy != index_strategy)
897     {
898       uint saved_pk_length=0;
899       if (h_idx->primary_key_is_clustered())
900       {
901         uint pk= h_idx->get_table()->s->primary_key;
902         if (pk != MAX_KEY)
903           saved_pk_length= h_idx->get_table()->key_info[pk].key_length;
904       }
905 
906       KEY *used_index= &h_idx->get_table()->key_info[h_idx->active_index];
907       if (reader_factory.ordered_index_reader.
908             set_interruption_temp_buffer(primary_file->ref_length,
909                                          used_index->key_length,
910                                          saved_pk_length,
911                                          &full_buf, full_buf_end))
912         goto use_default_impl;
913     }
914     else
915       reader_factory.ordered_index_reader.set_no_interruption_temp_buffer();
916   }
917 
918   if (strategy == index_strategy)
919   {
920     /*
921       Index strategy alone handles the record retrieval. Give all buffer space
922       to it. Key buffer should have forward orientation so we can return the
923       end of it.
924     */
925     key_buffer= &forward_key_buf;
926     key_buffer->set_buffer_space(full_buf, full_buf_end);
927 
928     /* Safety: specify that rowid buffer has zero size: */
929     rowid_buffer.set_buffer_space(full_buf_end, full_buf_end);
930 
931     if (do_sort_keys && !key_buffer->have_space_for(key_buff_elem_size))
932       goto use_default_impl;
933 
934     if ((res= index_strategy->init(primary_file, seq_funcs, seq_init_param, n_ranges,
935                                    mode, &keypar, key_buffer, &buf_manager)))
936       goto error;
937   }
938   else
939   {
940     /* We'll have both index and rndpos strategies working together */
941     if (do_sort_keys)
942     {
943       /* Both strategies will need buffer space, share the buffer */
944       if (setup_buffer_sharing(keypar.key_size_in_keybuf, keypar.key_tuple_map))
945         goto use_default_impl;
946 
947       buf_manager.reset_buffer_sizes= reset_buffer_sizes;
948       buf_manager.redistribute_buffer_space= redistribute_buffer_space;
949     }
950     else
951     {
952       /* index strategy doesn't need buffer, give all space to rowids*/
953       rowid_buffer.set_buffer_space(full_buf, full_buf_end);
954       if (!rowid_buffer.have_space_for(primary_file->ref_length +
955                                        (int)is_mrr_assoc * sizeof(range_id_t)))
956         goto use_default_impl;
957     }
958 
959     if ((res= setup_two_handlers()))
960       goto error;
961 
962     if ((res= index_strategy->init(secondary_file, seq_funcs, seq_init_param,
963                                    n_ranges, mode, &keypar, key_buffer,
964                                    &buf_manager)) ||
965         (res= disk_strategy->init(primary_file, index_strategy, mode,
966                                   &rowid_buffer)))
967     {
968       goto error;
969     }
970   }
971 
972   /*
973     At this point, we're sure that we're running a native MRR scan (i.e. we
974     didnt fall back to default implementation for some reason).
975   */
976   status_var_increment(thd->status_var.ha_mrr_init_count);
977 
978   res= strategy->refill_buffer(TRUE);
979   if (res)
980   {
981     if (res != HA_ERR_END_OF_FILE)
982       goto error;
983     strategy_exhausted= TRUE;
984   }
985 
986   /*
987     If we have scanned through all intervals in *seq, then adjust *buf to
988     indicate that the remaining buffer space will not be used.
989   */
990 //  if (dsmrr_eof)
991 //    buf->end_of_used_area= rowid_buffer.end_of_space();
992 
993 
994   DBUG_RETURN(0);
995 error:
996   close_second_handler();
997    /* Safety, not really needed but: */
998   strategy= NULL;
999   DBUG_RETURN(res);
1000 
1001 use_default_impl:
1002   if (primary_file->inited != handler::INDEX)
1003   {
1004     /* We can get here when
1005        - we've previously successfully done a DS-MRR scan (and so have
1006          secondary_file!= NULL, secondary_file->inited= INDEX,
1007          primary_file->inited=RND)
1008        - for this invocation, we haven't got enough buffer space, and so we
1009          have to use the default MRR implementation.
1010 
1011       note: primary_file->ha_index_end() will call dsmrr_close() which will
1012       close/destroy the secondary_file, this is intentional.
1013       (Yes this is slow, but one can't expect performance with join buffer
1014        so small that it can accomodate one rowid and one index tuple)
1015     */
1016     if ((res= primary_file->ha_rnd_end()) ||
1017         (res= primary_file->ha_index_init(keyno, MY_TEST(mode & HA_MRR_SORTED))))
1018     {
1019       DBUG_RETURN(res);
1020     }
1021   }
1022   /* Call correct init function and assign to top level object */
1023   Mrr_simple_index_reader *s= &reader_factory.simple_index_reader;
1024   res= s->init(primary_file, seq_funcs, seq_init_param, n_ranges, mode, NULL,
1025                NULL, NULL);
1026   strategy= s;
1027   DBUG_RETURN(res);
1028 }
1029 
1030 
1031 /*
1032   Whatever the current state is, make it so that we have two handler objects:
1033   - primary_file       -  initialized for rnd_pos() scan
1034   - secondary_file     -  initialized for scanning the index specified in
1035                           this->keyno
1036   RETURN
1037     0        OK
1038     HA_XXX   Error code
1039 */
1040 
setup_two_handlers()1041 int DsMrr_impl::setup_two_handlers()
1042 {
1043   int res;
1044   THD *thd= primary_file->get_table()->in_use;
1045   DBUG_ENTER("DsMrr_impl::setup_two_handlers");
1046   if (!secondary_file)
1047   {
1048     handler *new_h2;
1049     Item *pushed_cond= NULL;
1050     DBUG_ASSERT(primary_file->inited == handler::INDEX);
1051     /* Create a separate handler object to do rnd_pos() calls. */
1052     /*
1053       ::clone() takes up a lot of stack, especially on 64 bit platforms.
1054       The constant 5 is an empiric result.
1055     */
1056     if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2))
1057       DBUG_RETURN(1);
1058 
1059     /* Create a separate handler object to do rnd_pos() calls. */
1060     if (!(new_h2= primary_file->clone(primary_file->get_table()->s->
1061                                       normalized_path.str,
1062                                       thd->mem_root)) ||
1063         new_h2->ha_external_lock(thd, F_RDLCK))
1064     {
1065       delete new_h2;
1066       DBUG_RETURN(1);
1067     }
1068 
1069     if (keyno == primary_file->pushed_idx_cond_keyno)
1070       pushed_cond= primary_file->pushed_idx_cond;
1071 
1072     Mrr_reader *save_strategy= strategy;
1073     strategy= NULL;
1074     /*
1075       Caution: this call will invoke this->dsmrr_close(). Do not put the
1076       created secondary table handler new_h2 into this->secondary_file or it
1077       will delete it. Also, save the picked strategy
1078     */
1079     res= primary_file->ha_index_end();
1080 
1081     strategy= save_strategy;
1082     secondary_file= new_h2;
1083 
1084     if (res || (res= (primary_file->ha_rnd_init(FALSE))))
1085       goto error;
1086 
1087     table->prepare_for_position();
1088     secondary_file->extra(HA_EXTRA_KEYREAD);
1089     secondary_file->mrr_iter= primary_file->mrr_iter;
1090 
1091     if ((res= secondary_file->ha_index_init(keyno, FALSE)))
1092       goto error;
1093 
1094     if (pushed_cond)
1095       secondary_file->idx_cond_push(keyno, pushed_cond);
1096   }
1097   else
1098   {
1099     DBUG_ASSERT(secondary_file && secondary_file->inited==handler::INDEX);
1100     /*
1101       We get here when the access alternates betwen MRR scan(s) and non-MRR
1102       scans.
1103 
1104       Calling primary_file->index_end() will invoke dsmrr_close() for this object,
1105       which will delete secondary_file. We need to keep it, so put it away and dont
1106       let it be deleted:
1107     */
1108     if (primary_file->inited == handler::INDEX)
1109     {
1110       handler *save_h2= secondary_file;
1111       Mrr_reader *save_strategy= strategy;
1112       secondary_file= NULL;
1113       strategy= NULL;
1114       res= primary_file->ha_index_end();
1115       secondary_file= save_h2;
1116       strategy= save_strategy;
1117       if (res)
1118         goto error;
1119     }
1120     if ((primary_file->inited != handler::RND) &&
1121         (res= primary_file->ha_rnd_init(FALSE)))
1122       goto error;
1123   }
1124   DBUG_RETURN(0);
1125 
1126 error:
1127   DBUG_RETURN(res);
1128 }
1129 
1130 
close_second_handler()1131 void DsMrr_impl::close_second_handler()
1132 {
1133   if (secondary_file)
1134   {
1135     secondary_file->extra(HA_EXTRA_NO_KEYREAD);
1136     secondary_file->ha_index_or_rnd_end();
1137     secondary_file->ha_external_lock(current_thd, F_UNLCK);
1138     secondary_file->ha_close();
1139     delete secondary_file;
1140     secondary_file= NULL;
1141   }
1142 }
1143 
1144 
dsmrr_close()1145 void DsMrr_impl::dsmrr_close()
1146 {
1147   DBUG_ENTER("DsMrr_impl::dsmrr_close");
1148   close_second_handler();
1149   strategy= NULL;
1150   DBUG_VOID_RETURN;
1151 }
1152 
1153 
1154 /*
1155   my_qsort2-compatible static member function to compare key tuples
1156 */
1157 
compare_keys(void * arg,uchar * key1_arg,uchar * key2_arg)1158 int Mrr_ordered_index_reader::compare_keys(void* arg, uchar* key1_arg,
1159                                            uchar* key2_arg)
1160 {
1161   Mrr_ordered_index_reader *reader= (Mrr_ordered_index_reader*)arg;
1162   TABLE *table= reader->file->get_table();
1163   KEY_PART_INFO *part= table->key_info[reader->file->active_index].key_part;
1164   uchar *key1, *key2;
1165 
1166   if (reader->keypar.use_key_pointers)
1167   {
1168     /* the buffer stores pointers to keys, get to the keys */
1169     memcpy(&key1, key1_arg, sizeof(char*));
1170     memcpy(&key2, key2_arg, sizeof(char*));
1171   }
1172   else
1173   {
1174     key1= key1_arg;
1175     key2= key2_arg;
1176   }
1177 
1178   return key_tuple_cmp(part, key1, key2, reader->keypar.key_tuple_length);
1179 }
1180 
1181 
compare_keys_reverse(void * arg,uchar * key1,uchar * key2)1182 int Mrr_ordered_index_reader::compare_keys_reverse(void* arg, uchar* key1,
1183                                                    uchar* key2)
1184 {
1185   return -compare_keys(arg, key1, key2);
1186 }
1187 
1188 
1189 /**
1190   Set the buffer space to be shared between rowid and key buffer
1191 
1192   @return FALSE  ok
1193   @return TRUE   There is so little buffer space that we won't be able to use
1194                  the strategy.
1195                  This happens when we don't have enough space for one rowid
1196                  element and one key element so this is mainly targeted at
1197                  testing.
1198 */
1199 
setup_buffer_sharing(uint key_size_in_keybuf,key_part_map key_tuple_map)1200 bool DsMrr_impl::setup_buffer_sharing(uint key_size_in_keybuf,
1201                                       key_part_map key_tuple_map)
1202 {
1203   long key_buff_elem_size= key_size_in_keybuf +
1204                            (int)is_mrr_assoc * sizeof(range_id_t);
1205 
1206   KEY *key_info= &primary_file->get_table()->key_info[keyno];
1207   /*
1208     Ok if we got here we need to allocate one part of the buffer
1209     for keys and another part for rowids.
1210   */
1211   ulonglong rowid_buf_elem_size= primary_file->ref_length +
1212                                  (int)is_mrr_assoc * sizeof(range_id_t);
1213 
1214   /*
1215     Use rec_per_key statistics as a basis to find out how many rowids
1216     we'll get for each key value.
1217      TODO: what should be the default value to use when there is no
1218            statistics?
1219   */
1220   uint parts= my_count_bits(key_tuple_map);
1221   ha_rows rpc;
1222   ulonglong rowids_size= rowid_buf_elem_size;
1223   if ((rpc= (ha_rows) key_info->actual_rec_per_key(parts - 1)))
1224     rowids_size= rowid_buf_elem_size * rpc;
1225 
1226   double fraction_for_rowids=
1227     (ulonglong2double(rowids_size) /
1228      (ulonglong2double(rowids_size) + key_buff_elem_size));
1229 
1230   ptrdiff_t bytes_for_rowids=
1231     (ptrdiff_t)floor(0.5 + fraction_for_rowids * (full_buf_end - full_buf));
1232 
1233   ptrdiff_t bytes_for_keys= (full_buf_end - full_buf) - bytes_for_rowids;
1234 
1235   if (bytes_for_keys < key_buff_elem_size + 1 ||
1236       bytes_for_rowids < (ptrdiff_t)rowid_buf_elem_size + 1)
1237     return TRUE; /* Failed to provide minimum space for one of the buffers */
1238 
1239   rowid_buffer_end= full_buf + bytes_for_rowids;
1240   rowid_buffer.set_buffer_space(full_buf, rowid_buffer_end);
1241   key_buffer= &backward_key_buf;
1242   key_buffer->set_buffer_space(rowid_buffer_end, full_buf_end);
1243 
1244   /* The above code guarantees that the buffers are big enough */
1245   DBUG_ASSERT(key_buffer->have_space_for(key_buff_elem_size) &&
1246               rowid_buffer.have_space_for((size_t)rowid_buf_elem_size));
1247 
1248   return FALSE;
1249 }
1250 
1251 
do_nothing(void * dsmrr_arg)1252 void DsMrr_impl::do_nothing(void *dsmrr_arg)
1253 {
1254   /* Do nothing */
1255 }
1256 
1257 
reset_buffer_sizes(void * dsmrr_arg)1258 void DsMrr_impl::reset_buffer_sizes(void *dsmrr_arg)
1259 {
1260   DsMrr_impl *dsmrr= (DsMrr_impl*)dsmrr_arg;
1261   dsmrr->rowid_buffer.set_buffer_space(dsmrr->full_buf,
1262                                        dsmrr->rowid_buffer_end);
1263   dsmrr->key_buffer->set_buffer_space(dsmrr->rowid_buffer_end,
1264                                       dsmrr->full_buf_end);
1265 }
1266 
1267 
1268 /*
1269   Take unused space from the key buffer and give it to the rowid buffer
1270 */
1271 
redistribute_buffer_space(void * dsmrr_arg)1272 void DsMrr_impl::redistribute_buffer_space(void *dsmrr_arg)
1273 {
1274   DsMrr_impl *dsmrr= (DsMrr_impl*)dsmrr_arg;
1275   uchar *unused_start, *unused_end;
1276   dsmrr->key_buffer->remove_unused_space(&unused_start, &unused_end);
1277   dsmrr->rowid_buffer.grow(unused_start, unused_end);
1278 }
1279 
1280 
1281 /*
1282   @brief Initialize the iterator
1283 
1284   @note
1285   Initialize the iterator to produce matches for the key of the first element
1286   in owner_arg->key_buffer
1287 
1288   @retval  0                    OK
1289   @retval  HA_ERR_END_OF_FILE   Either the owner->key_buffer is empty or
1290                                 no matches for the key we've tried (check
1291                                 key_buffer->is_empty() to tell these apart)
1292   @retval  other code           Fatal error
1293 */
1294 
init(Mrr_ordered_index_reader * owner_arg)1295 int Key_value_records_iterator::init(Mrr_ordered_index_reader *owner_arg)
1296 {
1297   int res;
1298   owner= owner_arg;
1299 
1300   identical_key_it.init(owner->key_buffer);
1301   owner->key_buffer->setup_reading(owner->keypar.key_size_in_keybuf,
1302                                    owner->is_mrr_assoc ? sizeof(void*) : 0);
1303 
1304   if (identical_key_it.read())
1305     return HA_ERR_END_OF_FILE;
1306 
1307   uchar *key_in_buf= last_identical_key_ptr= identical_key_it.read_ptr1;
1308 
1309   uchar *index_tuple= key_in_buf;
1310   if (owner->keypar.use_key_pointers)
1311     memcpy(&index_tuple, key_in_buf, sizeof(char*));
1312 
1313   /* Check out how many more identical keys are following */
1314   while (!identical_key_it.read())
1315   {
1316     if (Mrr_ordered_index_reader::compare_keys(owner, key_in_buf,
1317                                                identical_key_it.read_ptr1))
1318       break;
1319     last_identical_key_ptr= identical_key_it.read_ptr1;
1320   }
1321   identical_key_it.init(owner->key_buffer);
1322   res= owner->file->ha_index_read_map(owner->file->get_table()->record[0],
1323                                       index_tuple,
1324                                       owner->keypar.key_tuple_map,
1325                                       HA_READ_KEY_EXACT);
1326 
1327   if (res)
1328   {
1329     /* Failed to find any matching records */
1330     move_to_next_key_value();
1331     return res;
1332   }
1333   owner->have_saved_rowid= FALSE;
1334   get_next_row= FALSE;
1335   return 0;
1336 }
1337 
1338 
get_next(range_id_t * range_info)1339 int Key_value_records_iterator::get_next(range_id_t *range_info)
1340 {
1341   int res;
1342 
1343   if (get_next_row)
1344   {
1345     if (owner->keypar.index_ranges_unique)
1346     {
1347       /* We're using a full unique key, no point to call index_next_same */
1348       return HA_ERR_END_OF_FILE;
1349     }
1350 
1351     handler *h= owner->file;
1352     uchar *lookup_key;
1353     if (owner->keypar.use_key_pointers)
1354       memcpy(&lookup_key, identical_key_it.read_ptr1, sizeof(void*));
1355     else
1356       lookup_key= identical_key_it.read_ptr1;
1357 
1358     if ((res= h->ha_index_next_same(h->get_table()->record[0],
1359                                     lookup_key,
1360                                     owner->keypar.key_tuple_length)))
1361     {
1362       /* It's either HA_ERR_END_OF_FILE or some other error */
1363       return res;
1364     }
1365     identical_key_it.init(owner->key_buffer);
1366     owner->have_saved_rowid= FALSE;
1367     get_next_row= FALSE;
1368   }
1369 
1370   identical_key_it.read(); /* This gets us next range_id */
1371   memcpy(range_info, identical_key_it.read_ptr2, sizeof(range_id_t));
1372 
1373   if (!last_identical_key_ptr ||
1374       (identical_key_it.read_ptr1 == last_identical_key_ptr))
1375   {
1376     /*
1377       We've reached the last of the identical keys that current record is a
1378       match for.  Set get_next_row=TRUE so that we read the next index record
1379       on the next call to this function.
1380     */
1381     get_next_row= TRUE;
1382   }
1383   return 0;
1384 }
1385 
1386 
move_to_next_key_value()1387 void Key_value_records_iterator::move_to_next_key_value()
1388 {
1389   while (!owner->key_buffer->read() &&
1390          (owner->key_buffer->read_ptr1 != last_identical_key_ptr)) {}
1391 }
1392 
1393 
1394 /**
1395   DS-MRR implementation: multi_range_read_next() function.
1396 
1397   Calling convention is like multi_range_read_next() has.
1398 */
1399 
dsmrr_next(range_id_t * range_info)1400 int DsMrr_impl::dsmrr_next(range_id_t *range_info)
1401 {
1402   int res;
1403   if (strategy_exhausted)
1404     return HA_ERR_END_OF_FILE;
1405 
1406   while ((res= strategy->get_next(range_info)) == HA_ERR_END_OF_FILE)
1407   {
1408     if ((res= strategy->refill_buffer(FALSE)))
1409       break; /* EOF or error */
1410   }
1411   return res;
1412 }
1413 
1414 
1415 /**
1416   DS-MRR implementation: multi_range_read_info() function
1417 */
dsmrr_info(uint keyno,uint n_ranges,uint rows,uint key_parts,uint * bufsz,uint * flags,Cost_estimate * cost)1418 ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows,
1419                                uint key_parts,
1420                                uint *bufsz, uint *flags, Cost_estimate *cost)
1421 {
1422   ha_rows res __attribute__((unused));
1423   uint def_flags= *flags;
1424   uint def_bufsz= *bufsz;
1425 
1426   /* Get cost/flags/mem_usage of default MRR implementation */
1427   res= primary_file->handler::multi_range_read_info(keyno, n_ranges, rows,
1428                                                     key_parts, &def_bufsz,
1429                                                     &def_flags, cost);
1430   DBUG_ASSERT(!res);
1431 
1432   if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
1433       choose_mrr_impl(keyno, rows, flags, bufsz, cost))
1434   {
1435     /* Default implementation is choosen */
1436     DBUG_PRINT("info", ("Default MRR implementation choosen"));
1437     *flags= def_flags;
1438     *bufsz= def_bufsz;
1439   }
1440   else
1441   {
1442     /* *flags and *bufsz were set by choose_mrr_impl */
1443     DBUG_PRINT("info", ("DS-MRR implementation choosen"));
1444   }
1445   return 0;
1446 }
1447 
1448 
1449 /**
1450   DS-MRR Implementation: multi_range_read_info_const() function
1451 */
1452 
dsmrr_info_const(uint keyno,RANGE_SEQ_IF * seq,void * seq_init_param,uint n_ranges,uint * bufsz,uint * flags,Cost_estimate * cost)1453 ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
1454                                  void *seq_init_param, uint n_ranges,
1455                                  uint *bufsz, uint *flags, Cost_estimate *cost)
1456 {
1457   ha_rows rows;
1458   uint def_flags= *flags;
1459   uint def_bufsz= *bufsz;
1460   /* Get cost/flags/mem_usage of default MRR implementation */
1461   rows= primary_file->handler::multi_range_read_info_const(keyno, seq,
1462                                                            seq_init_param,
1463                                                            n_ranges,
1464                                                            &def_bufsz,
1465                                                            &def_flags, cost);
1466   if (rows == HA_POS_ERROR)
1467   {
1468     /* Default implementation can't perform MRR scan => we can't either */
1469     return rows;
1470   }
1471 
1472   /*
1473     If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to
1474     use the default MRR implementation (we need it for UPDATE/DELETE).
1475     Otherwise, make a choice based on cost and @@optimizer_switch settings
1476   */
1477   if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
1478       choose_mrr_impl(keyno, rows, flags, bufsz, cost))
1479   {
1480     DBUG_PRINT("info", ("Default MRR implementation choosen"));
1481     *flags= def_flags;
1482     *bufsz= def_bufsz;
1483   }
1484   else
1485   {
1486     /* *flags and *bufsz were set by choose_mrr_impl */
1487     DBUG_PRINT("info", ("DS-MRR implementation choosen"));
1488   }
1489   return rows;
1490 }
1491 
1492 
1493 /**
1494   Check if key has partially-covered columns
1495 
1496   We can't use DS-MRR to perform range scans when the ranges are over
1497   partially-covered keys, because we'll not have full key part values
1498   (we'll have their prefixes from the index) and will not be able to check
1499   if we've reached the end the range.
1500 
1501   @param keyno  Key to check
1502 
1503   @todo
1504     Allow use of DS-MRR in cases where the index has partially-covered
1505     components but they are not used for scanning.
1506 
1507   @retval TRUE   Yes
1508   @retval FALSE  No
1509 */
1510 
key_uses_partial_cols(TABLE_SHARE * share,uint keyno)1511 bool key_uses_partial_cols(TABLE_SHARE *share, uint keyno)
1512 {
1513   KEY_PART_INFO *kp= share->key_info[keyno].key_part;
1514   KEY_PART_INFO *kp_end= kp + share->key_info[keyno].user_defined_key_parts;
1515   for (; kp != kp_end; kp++)
1516   {
1517     if (!kp->field->part_of_key.is_set(keyno))
1518       return TRUE;
1519   }
1520   return FALSE;
1521 }
1522 
1523 
1524 /*
1525   Check if key/flags allow DS-MRR/CPK strategy to be used
1526 
1527   @param thd
1528   @param keyno      Index that will be used
1529   @param  mrr_flags
1530 
1531   @retval TRUE   DS-MRR/CPK should be used
1532   @retval FALSE  Otherwise
1533 */
1534 
check_cpk_scan(THD * thd,TABLE_SHARE * share,uint keyno,uint mrr_flags)1535 bool DsMrr_impl::check_cpk_scan(THD *thd, TABLE_SHARE *share, uint keyno,
1536                                 uint mrr_flags)
1537 {
1538   return MY_TEST((mrr_flags & HA_MRR_SINGLE_POINT) &&
1539                  keyno == share->primary_key &&
1540                  primary_file->primary_key_is_clustered() &&
1541                  optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS));
1542 }
1543 
1544 
1545 /*
1546   DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
1547 
1548   Make the choice between using Default MRR implementation and DS-MRR.
1549   This function contains common functionality factored out of dsmrr_info()
1550   and dsmrr_info_const(). The function assumes that the default MRR
1551   implementation's applicability requirements are satisfied.
1552 
1553   @param keyno       Index number
1554   @param rows        E(full rows to be retrieved)
1555   @param flags  IN   MRR flags provided by the MRR user
1556                 OUT  If DS-MRR is choosen, flags of DS-MRR implementation
1557                      else the value is not modified
1558   @param bufsz  IN   If DS-MRR is choosen, buffer use of DS-MRR implementation
1559                      else the value is not modified
1560   @param cost   IN   Cost of default MRR implementation
1561                 OUT  If DS-MRR is choosen, cost of DS-MRR scan
1562                      else the value is not modified
1563 
1564   @retval TRUE   Default MRR implementation should be used
1565   @retval FALSE  DS-MRR implementation should be used
1566 */
1567 
1568 
choose_mrr_impl(uint keyno,ha_rows rows,uint * flags,uint * bufsz,Cost_estimate * cost)1569 bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
1570                                  uint *bufsz, Cost_estimate *cost)
1571 {
1572   Cost_estimate dsmrr_cost;
1573   bool res;
1574   THD *thd= primary_file->get_table()->in_use;
1575   TABLE_SHARE *share= primary_file->get_table_share();
1576 
1577   bool doing_cpk_scan= check_cpk_scan(thd, share, keyno, *flags);
1578   bool using_cpk= MY_TEST(keyno == share->primary_key &&
1579                           primary_file->primary_key_is_clustered());
1580   *flags &= ~HA_MRR_IMPLEMENTATION_FLAGS;
1581   if (!optimizer_flag(thd, OPTIMIZER_SWITCH_MRR) ||
1582       *flags & HA_MRR_INDEX_ONLY ||
1583       (using_cpk && !doing_cpk_scan) || key_uses_partial_cols(share, keyno))
1584   {
1585     /* Use the default implementation */
1586     *flags |= HA_MRR_USE_DEFAULT_IMPL;
1587     *flags &= ~HA_MRR_IMPLEMENTATION_FLAGS;
1588     return TRUE;
1589   }
1590 
1591   uint add_len= share->key_info[keyno].key_length + primary_file->ref_length;
1592   if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, add_len,
1593                               &dsmrr_cost))
1594     return TRUE;
1595 
1596   bool force_dsmrr;
1597   /*
1598     If mrr_cost_based flag is not set, then set cost of DS-MRR to be minimum of
1599     DS-MRR and Default implementations cost. This allows one to force use of
1600     DS-MRR whenever it is applicable without affecting other cost-based
1601     choices.
1602   */
1603   if ((force_dsmrr= !optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_COST_BASED)) &&
1604       dsmrr_cost.total_cost() > cost->total_cost())
1605     dsmrr_cost= *cost;
1606 
1607   if (force_dsmrr || dsmrr_cost.total_cost() <= cost->total_cost())
1608   {
1609     *flags &= ~HA_MRR_USE_DEFAULT_IMPL;  /* Use the DS-MRR implementation */
1610     *flags &= ~HA_MRR_SORTED;          /* We will return unordered output */
1611     *cost= dsmrr_cost;
1612     res= FALSE;
1613 
1614 
1615     if ((using_cpk && doing_cpk_scan) ||
1616         (optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS) &&
1617          *flags & HA_MRR_SINGLE_POINT))
1618     {
1619       *flags |= DSMRR_IMPL_SORT_KEYS;
1620     }
1621 
1622     if (!(using_cpk && doing_cpk_scan) &&
1623         !(*flags & HA_MRR_INDEX_ONLY))
1624     {
1625       *flags |= DSMRR_IMPL_SORT_ROWIDS;
1626     }
1627     /*
1628     if ((*flags & HA_MRR_SINGLE_POINT) &&
1629          optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS))
1630       *flags |= HA_MRR_MATERIALIZED_KEYS;
1631     */
1632   }
1633   else
1634   {
1635     /* Use the default MRR implementation */
1636     res= TRUE;
1637   }
1638   return res;
1639 }
1640 
1641 /*
1642   Take the flags we've returned previously and print one of
1643   - Key-ordered scan
1644   - Rowid-ordered scan
1645   - Key-ordered Rowid-ordered scan
1646 */
1647 
dsmrr_explain_info(uint mrr_mode,char * str,size_t size)1648 int DsMrr_impl::dsmrr_explain_info(uint mrr_mode, char *str, size_t size)
1649 {
1650   const char *key_ordered=   "Key-ordered scan";
1651   const char *rowid_ordered= "Rowid-ordered scan";
1652   const char *both_ordered=  "Key-ordered Rowid-ordered scan";
1653   const char *used_str="";
1654   const uint BOTH_FLAGS= (DSMRR_IMPL_SORT_KEYS | DSMRR_IMPL_SORT_ROWIDS);
1655 
1656   if (!(mrr_mode & HA_MRR_USE_DEFAULT_IMPL))
1657   {
1658     if ((mrr_mode & BOTH_FLAGS) == BOTH_FLAGS)
1659       used_str= both_ordered;
1660     else if (mrr_mode & DSMRR_IMPL_SORT_KEYS)
1661       used_str= key_ordered;
1662     else if (mrr_mode & DSMRR_IMPL_SORT_ROWIDS)
1663       used_str= rowid_ordered;
1664 
1665     size_t used_str_len= strlen(used_str);
1666     size_t copy_len= MY_MIN(used_str_len, size);
1667     memcpy(str, used_str, copy_len);
1668     return (int)copy_len;
1669   }
1670   return 0;
1671 }
1672 
1673 
1674 static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost);
1675 
1676 
1677 /**
1678   Get cost of DS-MRR scan
1679 
1680   @param keynr              Index to be used
1681   @param rows               E(Number of rows to be scanned)
1682   @param flags              Scan parameters (HA_MRR_* flags)
1683   @param buffer_size INOUT  Buffer size
1684                             IN: Buffer of size 0 means the function
1685                             will determine the best size and return it.
1686   @param extra_mem_overhead Extra memory overhead of the MRR implementation
1687                             (the function assumes this many bytes of buffer
1688                              space will not be usable by DS-MRR)
1689   @param cost        OUT    The cost
1690 
1691   @retval FALSE  OK
1692   @retval TRUE   Error, DS-MRR cannot be used (the buffer is too small
1693                  for even 1 rowid)
1694 */
1695 
get_disk_sweep_mrr_cost(uint keynr,ha_rows rows,uint flags,uint * buffer_size,uint extra_mem_overhead,Cost_estimate * cost)1696 bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
1697                                          uint *buffer_size,
1698                                          uint extra_mem_overhead,
1699                                          Cost_estimate *cost)
1700 {
1701   ulong max_buff_entries, elem_size;
1702   ha_rows rows_in_full_step;
1703   ha_rows rows_in_last_step;
1704   uint n_full_steps;
1705   double index_read_cost;
1706 
1707   elem_size= primary_file->ref_length +
1708              sizeof(void*) * (!MY_TEST(flags & HA_MRR_NO_ASSOCIATION));
1709 
1710   if (!*buffer_size)
1711   {
1712     /*
1713       We are requested to determine how much memory we need.
1714       Request memory to finish the scan in one pass but do not request
1715       more than @@mrr_buff_size.
1716     */
1717     *buffer_size= (uint) MY_MIN(extra_mem_overhead + elem_size*(ulong)rows,
1718                                 MY_MAX(table->in_use->variables.mrr_buff_size,
1719                                        extra_mem_overhead));
1720   }
1721 
1722   if (elem_size + extra_mem_overhead > *buffer_size)
1723     return TRUE; /* Buffer has not enough space for even 1 rowid */
1724 
1725   max_buff_entries = (*buffer_size - extra_mem_overhead) / elem_size;
1726 
1727   /* Number of iterations we'll make with full buffer */
1728   n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries);
1729 
1730   /*
1731     Get numbers of rows we'll be processing in
1732      - non-last sweep, with full buffer
1733      - last iteration, with non-full buffer
1734   */
1735   rows_in_full_step= max_buff_entries;
1736   rows_in_last_step= rows % max_buff_entries;
1737 
1738   /* Adjust buffer size if we expect to use only part of the buffer */
1739   if (n_full_steps)
1740   {
1741     get_sort_and_sweep_cost(table, rows_in_full_step, cost);
1742     cost->multiply(n_full_steps);
1743   }
1744   else
1745   {
1746     cost->reset();
1747     *buffer_size= (uint)MY_MAX(*buffer_size,
1748                       (size_t)(1.2*rows_in_last_step) * elem_size +
1749                       primary_file->ref_length + table->key_info[keynr].key_length);
1750   }
1751 
1752   Cost_estimate last_step_cost;
1753   get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost);
1754   cost->add(&last_step_cost);
1755 
1756   if (n_full_steps != 0)
1757     cost->mem_cost= *buffer_size;
1758   else
1759     cost->mem_cost= (double)rows_in_last_step * elem_size;
1760 
1761   /* Total cost of all index accesses */
1762   index_read_cost= primary_file->keyread_time(keynr, 1, rows);
1763   cost->add_io(index_read_cost, 1 /* Random seeks */);
1764   return FALSE;
1765 }
1766 
1767 
1768 /*
1769   Get cost of one sort-and-sweep step
1770 
1771   It consists of two parts:
1772    - sort an array of #nrows ROWIDs using qsort
1773    - read #nrows records from table in a sweep.
1774 
1775   @param table       Table being accessed
1776   @param nrows       Number of rows to be sorted and retrieved
1777   @param cost   OUT  The cost of scan
1778 */
1779 
1780 static
get_sort_and_sweep_cost(TABLE * table,ha_rows nrows,Cost_estimate * cost)1781 void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost)
1782 {
1783   if (nrows)
1784   {
1785     get_sweep_read_cost(table, nrows, FALSE, cost);
1786     /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */
1787     double cmp_op= rows2double(nrows) * (1.0 / TIME_FOR_COMPARE_ROWID);
1788     if (cmp_op < 3)
1789       cmp_op= 3;
1790     cost->cpu_cost += cmp_op * log2(cmp_op);
1791   }
1792   else
1793     cost->reset();
1794 }
1795 
1796 
1797 /**
1798   Get cost of reading nrows table records in a "disk sweep"
1799 
1800   A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made
1801   for an ordered sequence of rowids.
1802 
1803   We assume hard disk IO. The read is performed as follows:
1804 
1805    1. The disk head is moved to the needed cylinder
1806    2. The controller waits for the plate to rotate
1807    3. The data is transferred
1808 
1809   Time to do #3 is insignificant compared to #2+#1.
1810 
1811   Time to move the disk head is proportional to head travel distance.
1812 
1813   Time to wait for the plate to rotate depends on whether the disk head
1814   was moved or not.
1815 
1816   If disk head wasn't moved, the wait time is proportional to distance
1817   between the previous block and the block we're reading.
1818 
1819   If the head was moved, we don't know how much we'll need to wait for the
1820   plate to rotate. We assume the wait time to be a variate with a mean of
1821   0.5 of full rotation time.
1822 
1823   Our cost units are "random disk seeks". The cost of random disk seek is
1824   actually not a constant, it depends one range of cylinders we're going
1825   to access. We make it constant by introducing a fuzzy concept of "typical
1826   datafile length" (it's fuzzy as it's hard to tell whether it should
1827   include index file, temp.tables etc). Then random seek cost is:
1828 
1829     1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length
1830 
1831   We define half_rotation_cost as DISK_SEEK_BASE_COST=0.9.
1832 
1833   @param table             Table to be accessed
1834   @param nrows             Number of rows to retrieve
1835   @param interrupted       TRUE <=> Assume that the disk sweep will be
1836                            interrupted by other disk IO. FALSE - otherwise.
1837   @param cost         OUT  The cost.
1838 */
1839 
get_sweep_read_cost(TABLE * table,ha_rows nrows,bool interrupted,Cost_estimate * cost)1840 void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
1841                          Cost_estimate *cost)
1842 {
1843   DBUG_ENTER("get_sweep_read_cost");
1844 
1845   cost->reset();
1846   if (table->file->primary_key_is_clustered())
1847   {
1848     cost->io_count= table->file->read_time(table->s->primary_key,
1849                                            (uint) nrows, nrows);
1850   }
1851   else
1852   {
1853     double n_blocks=
1854       ceil(ulonglong2double(table->file->stats.data_file_length) / IO_SIZE);
1855     double busy_blocks=
1856       n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(nrows)));
1857     if (busy_blocks < 1.0)
1858       busy_blocks= 1.0;
1859 
1860     DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks,
1861                        busy_blocks));
1862     cost->io_count= busy_blocks;
1863 
1864     if (!interrupted)
1865     {
1866       /* Assume reading is done in one 'sweep' */
1867       cost->avg_io_cost= (DISK_SEEK_BASE_COST +
1868                           DISK_SEEK_PROP_COST*n_blocks/busy_blocks);
1869     }
1870   }
1871   DBUG_PRINT("info",("returning cost=%g", cost->total_cost()));
1872   DBUG_VOID_RETURN;
1873 }
1874 
1875 
1876 /* **************************************************************************
1877  * DS-MRR implementation ends
1878  ***************************************************************************/
1879 
1880 
1881