1 /* Copyright (C) 2010, 2011 Monty Program Ab
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License as published by
5    the Free Software Foundation; version 2 of the License.
6 
7    This program is distributed in the hope that it will be useful,
8    but WITHOUT ANY WARRANTY; without even the implied warranty of
9    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10    GNU General Public License for more details.
11 
12    You should have received a copy of the GNU General Public License
13    along with this program; if not, write to the Free Software
14    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
15 
16 #include "mariadb.h"
17 #include "sql_parse.h"
18 #include <my_bit.h>
19 #include "sql_select.h"
20 #include "key.h"
21 #include "sql_statistics.h"
22 #include "rowid_filter.h"
23 
24 /****************************************************************************
25  * Default MRR implementation (MRR to non-MRR converter)
26  ***************************************************************************/
27 
28 /**
29   Get cost and other information about MRR scan over a known list of ranges
30 
31   Calculate estimated cost and other information about an MRR scan for given
32   sequence of ranges.
33 
34   @param keyno           Index number
35   @param seq             Range sequence to be traversed
36   @param seq_init_param  First parameter for seq->init()
37   @param n_ranges_arg    Number of ranges in the sequence, or 0 if the caller
38                          can't efficiently determine it
39   @param bufsz    INOUT  IN:  Size of the buffer available for use
40                          OUT: Size of the buffer that is expected to be actually
41                               used, or 0 if buffer is not needed.
42   @param flags    INOUT  A combination of HA_MRR_* flags
43   @param cost     OUT    Estimated cost of MRR access
44 
45   @note
46     This method (or an overriding one in a derived class) must check for
47     thd->killed and return HA_POS_ERROR if it is not zero. This is required
48     for a user to be able to interrupt the calculation by killing the
49     connection/query.
50 
51   @retval
52     HA_POS_ERROR  Error or the engine is unable to perform the requested
53                   scan. Values of OUT parameters are undefined.
54   @retval
55     other         OK, *cost contains cost of the scan, *bufsz and *flags
56                   contain scan parameters.
57 */
58 
59 ha_rows
60 handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
61                                      void *seq_init_param, uint n_ranges_arg,
62                                      uint *bufsz, uint *flags, Cost_estimate *cost)
63 {
64   KEY_MULTI_RANGE range;
65   range_seq_t seq_it;
66   ha_rows total_rows= 0;
67   uint n_ranges=0;
68   uint n_eq_ranges= 0;
69   ulonglong total_touched_blocks= 0;
70   ha_rows max_rows= stats.records;
71   THD *thd= table->in_use;
72   uint limit= thd->variables.eq_range_index_dive_limit;
73   bool use_statistics_for_eq_range= eq_ranges_exceeds_limit(seq,
74                                                             seq_init_param,
75                                                             limit);
76   uint len= table->key_info[keyno].key_length + table->file->ref_length;
77   if (keyno == table->s->primary_key && table->file->primary_key_is_clustered())
78     len= table->s->stored_rec_length;
79   /* Assume block is 75 % full */
80   uint avg_block_records= ((uint) (table->file->stats.block_size*3/4))/len + 1;
81   DBUG_ENTER("multi_range_read_info_const");
82 
83   /* Default MRR implementation doesn't need buffer */
84   *bufsz= 0;
85 
86   seq_it= seq->init(seq_init_param, n_ranges, *flags);
87   while (!seq->next(seq_it, &range))
88   {
89     ha_rows rows;
90 
91     if (unlikely(thd->killed != 0))
92       DBUG_RETURN(HA_POS_ERROR);
93 
94     n_ranges++;
95     if (range.range_flag & EQ_RANGE)
96       n_eq_ranges++;
97     key_range *min_endp, *max_endp;
98     if (range.range_flag & GEOM_FLAG)
99     {
100       /* In this case tmp_min_flag contains the handler-read-function */
101       range.start_key.flag= (ha_rkey_function) (range.range_flag ^ GEOM_FLAG);
102       min_endp= &range.start_key;
103       max_endp= NULL;
104     }
105     else
106     {
107       min_endp= range.start_key.length? &range.start_key : NULL;
108       max_endp= range.end_key.length? &range.end_key : NULL;
109     }
110     int keyparts_used= my_count_bits(range.start_key.keypart_map);
111     if (use_statistics_for_eq_range &&
112         !(range.range_flag & NULL_RANGE) &&
113         (range.range_flag & EQ_RANGE) &&
114         table->key_info[keyno].actual_rec_per_key(keyparts_used - 1) > 0.5)
115     {
116       if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE))
117         rows= 1; /* there can be at most one row */
118       else
119         rows=
120           (ha_rows) table->key_info[keyno].actual_rec_per_key(keyparts_used-1);
121     }
122     else
123     {
124       if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE))
125         rows= 1; /* there can be at most one row */
126       else if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp,
127                                                         max_endp)))
128       {
129         /* Can't scan one range => can't do MRR scan at all */
130         total_rows= HA_POS_ERROR;
131         break;
132       }
133     }
134     total_rows += rows;
135     total_touched_blocks+= (rows / avg_block_records +1);
136   }
137 
138   if (total_rows != HA_POS_ERROR)
139   {
140     set_if_smaller(total_rows, max_rows);
141     /* The following calculation is the same as in multi_range_read_info(): */
142     *flags |= HA_MRR_USE_DEFAULT_IMPL;
143     cost->reset();
144     cost->avg_io_cost= 1; /* assume random seeks */
145     cost->idx_avg_io_cost= 1;
146     if (!((keyno == table->s->primary_key && primary_key_is_clustered()) ||
147 	   is_clustering_key(keyno)))
148     {
149       cost->idx_io_count= total_touched_blocks +
150 	                  keyread_time(keyno, 0, total_rows);
151       cost->cpu_cost= cost->idx_cpu_cost=
152         (double) total_rows / TIME_FOR_COMPARE_IDX +
153         (2 * n_ranges - n_eq_ranges) * IDX_LOOKUP_COST;
154       if (!(*flags & HA_MRR_INDEX_ONLY))
155       {
156         cost->io_count= read_time(keyno, 0, total_rows);
157         cost->cpu_cost+= (double) total_rows / TIME_FOR_COMPARE;
158       }
159     }
160     else
161     {
162       cost->io_count= read_time(keyno, n_ranges, (uint) total_rows);
163       cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01;
164     }
165   }
166   DBUG_PRINT("statistics",
167              ("key: %s  rows: %llu  total_cost: %.3f  io_blocks: %llu  "
168               "idx_io_count: %.3f  cpu_cost: %.3f  io_count: %.3f",
169               table->s->keynames.type_names[keyno],
170               (ulonglong) total_rows, cost->total_cost(),
171               (ulonglong) total_touched_blocks,
172               cost->idx_io_count, cost->cpu_cost, cost->io_count));
173   DBUG_RETURN(total_rows);
174 }
175 
176 
177 /**
178   Get cost and other information about MRR scan over some sequence of ranges
179 
180   Calculate estimated cost and other information about an MRR scan for some
181   sequence of ranges.
182 
183   The ranges themselves will be known only at execution phase. When this
184   function is called we only know number of ranges and a (rough) E(#records)
185   within those ranges.
186 
187   Currently this function is only called for "n-keypart singlepoint" ranges,
188   i.e. each range is "keypart1=someconst1 AND ... AND keypartN=someconstN"
189 
190   The flags parameter is a combination of those flags: HA_MRR_SORTED,
191   HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION, HA_MRR_LIMITS.
192 
193   @param keyno           Index number
194   @param n_ranges        Estimated number of ranges (i.e. intervals) in the
195                          range sequence.
196   @param n_rows          Estimated total number of records contained within all
197                          of the ranges
198   @param bufsz    INOUT  IN:  Size of the buffer available for use
199                          OUT: Size of the buffer that will be actually used, or
200                               0 if buffer is not needed.
201   @param flags    INOUT  A combination of HA_MRR_* flags
202   @param cost     OUT    Estimated cost of MRR access
203 
204   @retval
205     0     OK, *cost contains cost of the scan, *bufsz and *flags contain scan
206           parameters.
207   @retval
208     other Error or can't perform the requested scan
209 */
210 
211 ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
212                                        uint key_parts, uint *bufsz,
213                                        uint *flags, Cost_estimate *cost)
214 {
215   /*
216     Currently we expect this function to be called only in preparation of scan
217     with HA_MRR_SINGLE_POINT property.
218   */
219   DBUG_ASSERT(*flags | HA_MRR_SINGLE_POINT);
220 
221   *bufsz= 0; /* Default implementation doesn't need a buffer */
222   *flags |= HA_MRR_USE_DEFAULT_IMPL;
223 
224   cost->reset();
225   cost->avg_io_cost= 1; /* assume random seeks */
226 
227   /* Produce the same cost as non-MRR code does */
228   if (!(keyno == table->s->primary_key && primary_key_is_clustered()))
229   {
230     cost->idx_io_count=  n_ranges + keyread_time(keyno, 0, n_rows);
231     cost->cpu_cost= cost->idx_cpu_cost=
232       (double) n_rows / TIME_FOR_COMPARE_IDX + n_ranges * IDX_LOOKUP_COST;
233     if (!(*flags & HA_MRR_INDEX_ONLY))
234     {
235       cost->io_count= read_time(keyno, 0, n_rows);
236       cost->cpu_cost+= (double) n_rows / TIME_FOR_COMPARE;
237     }
238   }
239   else
240   {
241     cost->io_count= read_time(keyno, n_ranges, (uint)n_rows);
242     cost->cpu_cost= (double) n_rows / TIME_FOR_COMPARE + 0.01;
243   }
244   return 0;
245 }
246 
247 
248 /**
249   Initialize the MRR scan
250 
251   Initialize the MRR scan. This function may do heavyweight scan
252   initialization like row prefetching/sorting/etc (NOTE: but better not do
253   it here as we may not need it, e.g. if we never satisfy WHERE clause on
254   previous tables. For many implementations it would be natural to do such
255   initializations in the first multi_read_range_next() call)
256 
257   mode is a combination of the following flags: HA_MRR_SORTED,
258   HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION
259 
260   @param seq             Range sequence to be traversed
261   @param seq_init_param  First parameter for seq->init()
262   @param n_ranges        Number of ranges in the sequence
263   @param mode            Flags, see the description section for the details
264   @param buf             INOUT: memory buffer to be used
265 
266   @note
267     One must have called index_init() before calling this function. Several
268     multi_range_read_init() calls may be made in course of one query.
269 
270     Buffer memory management is done according to the following scenario:
271     The caller allocates the buffer and provides it to the callee by filling
272     the members of HANDLER_BUFFER structure.
273     The callee consumes all or some fraction of the provided buffer space, and
274     sets the HANDLER_BUFFER members accordingly.
275     The callee may use the buffer memory until the next multi_range_read_init()
276     call is made, all records have been read, or until index_end() call is
277     made, whichever comes first.
278 
279   @retval 0  OK
280   @retval 1  Error
281 */
282 
283 int
284 handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
285                                uint n_ranges, uint mode, HANDLER_BUFFER *buf)
286 {
287   DBUG_ENTER("handler::multi_range_read_init");
288   mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
289   mrr_funcs= *seq_funcs;
290   mrr_is_output_sorted= MY_TEST(mode & HA_MRR_SORTED);
291   mrr_have_range= FALSE;
292   DBUG_RETURN(0);
293 }
294 
295 /**
296   Get next record in MRR scan
297 
298   Default MRR implementation: read the next record
299 
300   @param range_info  OUT  Undefined if HA_MRR_NO_ASSOCIATION flag is in effect
301                           Otherwise, the opaque value associated with the range
302                           that contains the returned record.
303 
304   @retval 0      OK
305   @retval other  Error code
306 */
307 
308 int handler::multi_range_read_next(range_id_t *range_info)
309 {
310   int result= HA_ERR_END_OF_FILE;
311   bool range_res;
312   DBUG_ENTER("handler::multi_range_read_next");
313 
314   if (!mrr_have_range)
315   {
316     mrr_have_range= TRUE;
317     goto start;
318   }
319 
320   do
321   {
322     /* Save a call if there can be only one row in range. */
323     if (mrr_cur_range.range_flag != (UNIQUE_RANGE | EQ_RANGE))
324     {
325       result= read_range_next();
326       /* On success or non-EOF errors jump to the end. */
327       if (result != HA_ERR_END_OF_FILE)
328         break;
329     }
330     else
331     {
332       if (ha_was_semi_consistent_read())
333       {
334         /*
335           The following assignment is redundant, but for extra safety and to
336           remove the compiler warning:
337         */
338         range_res= FALSE;
339         goto scan_it_again;
340       }
341       /*
342         We need to set this for the last range only, but checking this
343         condition is more expensive than just setting the result code.
344       */
345       result= HA_ERR_END_OF_FILE;
346     }
347 
348 start:
349     /* Try the next range(s) until one matches a record. */
350     while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range)))
351     {
352 scan_it_again:
353       result= read_range_first(mrr_cur_range.start_key.keypart_map ?
354                                  &mrr_cur_range.start_key : 0,
355                                mrr_cur_range.end_key.keypart_map ?
356                                  &mrr_cur_range.end_key : 0,
357                                MY_TEST(mrr_cur_range.range_flag & EQ_RANGE),
358                                mrr_is_output_sorted);
359       if (result != HA_ERR_END_OF_FILE)
360         break;
361     }
362   }
363   while ((result == HA_ERR_END_OF_FILE) && !range_res);
364 
365   *range_info= mrr_cur_range.ptr;
366   DBUG_PRINT("exit",("handler::multi_range_read_next result %d", result));
367   DBUG_RETURN(result);
368 }
369 
370 /****************************************************************************
371  * Mrr_*_reader classes (building blocks for DS-MRR)
372  ***************************************************************************/
373 
374 int Mrr_simple_index_reader::init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
375                                   void *seq_init_param, uint n_ranges,
376                                   uint mode,  Key_parameters *key_par_arg,
377                                   Lifo_buffer *key_buffer_arg,
378                                   Buffer_manager *buf_manager_arg)
379 {
380   HANDLER_BUFFER no_buffer = {NULL, NULL, NULL};
381   file= h_arg;
382   return file->handler::multi_range_read_init(seq_funcs, seq_init_param,
383                                               n_ranges, mode, &no_buffer);
384 }
385 
386 
387 int Mrr_simple_index_reader::get_next(range_id_t *range_info)
388 {
389   int res;
390   while (!(res= file->handler::multi_range_read_next(range_info)))
391   {
392     KEY_MULTI_RANGE *curr_range= &file->handler::mrr_cur_range;
393     if (!file->mrr_funcs.skip_index_tuple ||
394         !file->mrr_funcs.skip_index_tuple(file->mrr_iter, curr_range->ptr))
395       break;
396   }
397   if (res && res != HA_ERR_END_OF_FILE && res != HA_ERR_KEY_NOT_FOUND)
398     file->print_error(res, MYF(0));             // Fatal error
399   return res;
400 }
401 
402 
403 /**
404   @brief Get next index record
405 
406   @param range_info  OUT identifier of range that the returned record belongs to
407 
408   @note
409     We actually iterate over nested sequences:
410     - an ordered sequence of groups of identical keys
411       - each key group has key value, which has multiple matching records
412         - thus, each record matches all members of the key group
413 
414   @retval 0                   OK, next record was successfully read
415   @retval HA_ERR_END_OF_FILE  End of records
416   @retval Other               Some other error; Error is printed
417 */
418 
419 int Mrr_ordered_index_reader::get_next(range_id_t *range_info)
420 {
421   int res;
422   DBUG_ENTER("Mrr_ordered_index_reader::get_next");
423 
424   for(;;)
425   {
426     if (!scanning_key_val_iter)
427     {
428       while ((res= kv_it.init(this)))
429       {
430         if ((res != HA_ERR_KEY_NOT_FOUND && res != HA_ERR_END_OF_FILE))
431           DBUG_RETURN(res); /* Some fatal error */
432 
433         if (key_buffer->is_empty())
434         {
435           DBUG_RETURN(HA_ERR_END_OF_FILE);
436         }
437       }
438       scanning_key_val_iter= TRUE;
439     }
440 
441     if ((res= kv_it.get_next(range_info)))
442     {
443       scanning_key_val_iter= FALSE;
444       if ((res != HA_ERR_KEY_NOT_FOUND && res != HA_ERR_END_OF_FILE))
445         DBUG_RETURN(res);
446       kv_it.move_to_next_key_value();
447       continue;
448     }
449     if (!skip_index_tuple(*range_info) &&
450         !skip_record(*range_info, NULL))
451     {
452       break;
453     }
454     /* Go get another (record, range_id) combination */
455   } /* while */
456 
457   DBUG_RETURN(0);
458 }
459 
460 
461 /*
462   Supply index reader with the O(1)space it needs for scan interrupt/restore
463   operation
464 */
465 
466 bool Mrr_ordered_index_reader::set_interruption_temp_buffer(uint rowid_length,
467                                                             uint key_len,
468                                                             uint saved_pk_len,
469                                                             uchar **space_start,
470                                                             uchar *space_end)
471 {
472   if (space_end - *space_start <= (ptrdiff_t)(rowid_length + key_len + saved_pk_len))
473     return TRUE;
474   support_scan_interruptions= TRUE;
475 
476   saved_rowid= *space_start;
477   *space_start += rowid_length;
478 
479   if (saved_pk_len)
480   {
481     saved_primary_key= *space_start;
482     *space_start += saved_pk_len;
483   }
484   else
485     saved_primary_key= NULL;
486 
487   saved_key_tuple= *space_start;
488   *space_start += key_len;
489 
490   have_saved_rowid= FALSE;
491   read_was_interrupted= FALSE;
492   return FALSE;
493 }
494 
495 void Mrr_ordered_index_reader::set_no_interruption_temp_buffer()
496 {
497   support_scan_interruptions= FALSE;
498   saved_key_tuple= saved_rowid= saved_primary_key= NULL; /* safety */
499   have_saved_rowid= FALSE;
500   read_was_interrupted= FALSE;
501 }
502 
503 void Mrr_ordered_index_reader::interrupt_read()
504 {
505   DBUG_ASSERT(support_scan_interruptions);
506   TABLE *table= file->get_table();
507   KEY *used_index= &table->key_info[file->active_index];
508   /* Save the current key value */
509   key_copy(saved_key_tuple, table->record[0],
510            used_index, used_index->key_length);
511 
512   if (saved_primary_key)
513   {
514     key_copy(saved_primary_key, table->record[0],
515              &table->key_info[table->s->primary_key],
516              table->key_info[table->s->primary_key].key_length);
517   }
518   read_was_interrupted= TRUE;
519 
520   /* Save the last rowid */
521   memcpy(saved_rowid, file->ref, file->ref_length);
522   have_saved_rowid= TRUE;
523 }
524 
525 void Mrr_ordered_index_reader::position()
526 {
527   if (have_saved_rowid)
528     memcpy(file->ref, saved_rowid, file->ref_length);
529   else
530     Mrr_index_reader::position();
531 }
532 
533 void Mrr_ordered_index_reader::resume_read()
534 {
535   TABLE *table= file->get_table();
536 
537   if (!read_was_interrupted)
538     return;
539 
540   KEY *used_index= &table->key_info[file->active_index];
541   key_restore(table->record[0], saved_key_tuple,
542               used_index, used_index->key_length);
543   if (saved_primary_key)
544   {
545     key_restore(table->record[0], saved_primary_key,
546                 &table->key_info[table->s->primary_key],
547                 table->key_info[table->s->primary_key].key_length);
548   }
549 }
550 
551 
552 /**
553   Fill the buffer with (lookup_tuple, range_id) pairs and sort
554 
555   @return
556     0                   OK, the buffer is non-empty and sorted
557     HA_ERR_END_OF_FILE  Source exhausted, the buffer is empty.
558 */
559 
560 int Mrr_ordered_index_reader::refill_buffer(bool initial)
561 {
562   KEY_MULTI_RANGE cur_range;
563   DBUG_ENTER("Mrr_ordered_index_reader::refill_buffer");
564 
565   DBUG_ASSERT(key_buffer->is_empty());
566 
567   if (source_exhausted)
568     DBUG_RETURN(HA_ERR_END_OF_FILE);
569 
570   buf_manager->reset_buffer_sizes(buf_manager->arg);
571   key_buffer->reset();
572   key_buffer->setup_writing(keypar.key_size_in_keybuf,
573                             is_mrr_assoc? sizeof(range_id_t) : 0);
574 
575   while (key_buffer->can_write() &&
576          !(source_exhausted= mrr_funcs.next(mrr_iter, &cur_range)))
577   {
578     DBUG_ASSERT(cur_range.range_flag & EQ_RANGE);
579 
580     /* Put key, or {key, range_id} pair into the buffer */
581     key_buffer->write_ptr1= keypar.use_key_pointers ?
582                               (uchar*)&cur_range.start_key.key :
583                               (uchar*)cur_range.start_key.key;
584     key_buffer->write_ptr2= (uchar*)&cur_range.ptr;
585     key_buffer->write();
586   }
587 
588   /* Force get_next() to start with kv_it.init() call: */
589   scanning_key_val_iter= FALSE;
590 
591   if (source_exhausted && key_buffer->is_empty())
592     DBUG_RETURN(HA_ERR_END_OF_FILE);
593 
594   if (!initial)
595   {
596     /* This is a non-initial buffer fill and we've got a non-empty buffer */
597     THD *thd= current_thd;
598     status_var_increment(thd->status_var.ha_mrr_key_refills_count);
599   }
600 
601   key_buffer->sort((key_buffer->type() == Lifo_buffer::FORWARD)?
602                      (qsort2_cmp)Mrr_ordered_index_reader::compare_keys_reverse :
603                      (qsort2_cmp)Mrr_ordered_index_reader::compare_keys,
604                    this);
605   DBUG_RETURN(0);
606 }
607 
608 
609 int Mrr_ordered_index_reader::init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
610                                    void *seq_init_param, uint n_ranges,
611                                    uint mode, Key_parameters *key_par_arg,
612                                    Lifo_buffer *key_buffer_arg,
613                                    Buffer_manager *buf_manager_arg)
614 {
615   file= h_arg;
616   key_buffer= key_buffer_arg;
617   buf_manager= buf_manager_arg;
618   keypar= *key_par_arg;
619 
620   KEY *key_info= &file->get_table()->key_info[file->active_index];
621   keypar.index_ranges_unique= MY_TEST(key_info->flags & HA_NOSAME &&
622                                       key_info->user_defined_key_parts ==
623                                       my_count_bits(keypar.key_tuple_map));
624 
625   mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
626   is_mrr_assoc= !MY_TEST(mode & HA_MRR_NO_ASSOCIATION);
627   mrr_funcs= *seq_funcs;
628   source_exhausted= FALSE;
629   read_was_interrupted= false;
630   have_saved_rowid= FALSE;
631   return 0;
632 }
633 
634 
635 static int rowid_cmp_reverse(void *file, uchar *a, uchar *b)
636 {
637   return - ((handler*)file)->cmp_ref(a, b);
638 }
639 
640 
641 int Mrr_ordered_rndpos_reader::init(handler *h_arg,
642                                     Mrr_index_reader *index_reader_arg,
643                                     uint mode,
644                                     Lifo_buffer *buf,
645                                     Rowid_filter *filter)
646 {
647   file= h_arg;
648   index_reader= index_reader_arg;
649   rowid_buffer= buf;
650   is_mrr_assoc= !MY_TEST(mode & HA_MRR_NO_ASSOCIATION);
651   index_reader_exhausted= FALSE;
652   index_reader_needs_refill= TRUE;
653   rowid_filter= filter;
654 
655   return 0;
656 }
657 
658 
659 /**
660   DS-MRR: Fill and sort the rowid buffer
661 
662   Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into
663   buffer. When the buffer is full or scan is completed, sort the buffer by
664   rowid and return.
665 
666   When this function returns, either rowid buffer is not empty, or the source
667   of lookup keys (i.e. ranges) is exhaused.
668 
669   @retval 0      OK, the next portion of rowids is in the buffer,
670                  properly ordered
671   @retval other  Error
672 */
673 
674 int Mrr_ordered_rndpos_reader::refill_buffer(bool initial)
675 {
676   int res;
677   bool first_call= initial;
678   DBUG_ENTER("Mrr_ordered_rndpos_reader::refill_buffer");
679 
680   if (index_reader_exhausted)
681     DBUG_RETURN(HA_ERR_END_OF_FILE);
682 
683   while (initial || index_reader_needs_refill ||
684          (res= refill_from_index_reader()) == HA_ERR_END_OF_FILE)
685   {
686     if ((res= index_reader->refill_buffer(initial)))
687     {
688       if (res == HA_ERR_END_OF_FILE)
689         index_reader_exhausted= TRUE;
690       break;
691     }
692     initial= FALSE;
693     index_reader_needs_refill= FALSE;
694   }
695 
696   if (!first_call && !index_reader_exhausted)
697   {
698     /* Ok, this was a successful buffer refill operation */
699     THD *thd= current_thd;
700     status_var_increment(thd->status_var.ha_mrr_rowid_refills_count);
701   }
702 
703   DBUG_RETURN(res);
704 }
705 
706 
707 void Mrr_index_reader::position()
708 {
709   file->position(file->get_table()->record[0]);
710 }
711 
712 
713 /*
714   @brief Try to refill the rowid buffer without calling
715   index_reader->refill_buffer().
716 */
717 
718 int Mrr_ordered_rndpos_reader::refill_from_index_reader()
719 {
720   range_id_t range_info;
721   int res;
722   DBUG_ENTER("Mrr_ordered_rndpos_reader::refill_from_index_reader");
723 
724   DBUG_ASSERT(rowid_buffer->is_empty());
725   index_rowid= index_reader->get_rowid_ptr();
726   rowid_buffer->reset();
727   rowid_buffer->setup_writing(file->ref_length,
728                               is_mrr_assoc? sizeof(range_id_t) : 0);
729 
730   last_identical_rowid= NULL;
731 
732   index_reader->resume_read();
733   while (rowid_buffer->can_write())
734   {
735     res= index_reader->get_next(&range_info);
736 
737     if (res)
738     {
739       if (res != HA_ERR_END_OF_FILE)
740         DBUG_RETURN(res);
741       index_reader_needs_refill=TRUE;
742       break;
743     }
744 
745     index_reader->position();
746 
747     /*
748       If the built rowid filter cannot be used at the engine level, use it here.
749     */
750     if (rowid_filter && !file->pushed_rowid_filter &&
751         !rowid_filter->check((char *)index_rowid))
752       continue;
753 
754     /* Put rowid, or {rowid, range_id} pair into the buffer */
755     rowid_buffer->write_ptr1= index_rowid;
756     rowid_buffer->write_ptr2= (uchar*)&range_info;
757     rowid_buffer->write();
758   }
759 
760   /*
761     When index_reader_needs_refill=TRUE, this means we've got all of index
762     tuples for lookups keys that index_reader had. We are not in the middle
763     of an index read, so there is no need to call interrupt_read.
764 
765     Actually, we must not call interrupt_read(), because it could be that we
766     haven't read a single row (because all index lookups returned
767     HA_ERR_KEY_NOT_FOUND). In this case, interrupt_read() will cause [harmless]
768     valgrind warnings when trying to save garbage from table->record[0].
769   */
770   if (!index_reader_needs_refill)
771     index_reader->interrupt_read();
772   /* Sort the buffer contents by rowid */
773   rowid_buffer->sort((qsort2_cmp)rowid_cmp_reverse, (void*)file);
774 
775   rowid_buffer->setup_reading(file->ref_length,
776                               is_mrr_assoc ? sizeof(range_id_t) : 0);
777   DBUG_RETURN(rowid_buffer->is_empty()? HA_ERR_END_OF_FILE : 0);
778 }
779 
780 
781 /*
782   Get the next {record, range_id} using ordered array of rowid+range_id pairs
783 
784   @note
785     Since we have sorted rowids, we try not to make multiple rnd_pos() calls
786     with the same rowid value.
787 */
788 
789 int Mrr_ordered_rndpos_reader::get_next(range_id_t *range_info)
790 {
791   int res;
792 
793   /*
794     First, check if rowid buffer has elements with the same rowid value as
795     the previous.
796   */
797   while (last_identical_rowid)
798   {
799     /*
800       Current record (the one we've returned in previous call) was obtained
801       from a rowid that matched multiple range_ids. Return this record again,
802       with next matching range_id.
803     */
804     (void)rowid_buffer->read();
805 
806     if (rowid_buffer->read_ptr1 == last_identical_rowid)
807       last_identical_rowid= NULL; /* reached the last of identical rowids */
808 
809     if (!is_mrr_assoc)
810       return 0;
811 
812     memcpy(range_info, rowid_buffer->read_ptr2, sizeof(range_id_t));
813     if (!index_reader->skip_record(*range_info, rowid_buffer->read_ptr1))
814       return 0;
815   }
816 
817   /*
818      Ok, last_identical_rowid==NULL, it's time to read next different rowid
819      value and get record for it.
820   */
821   for(;;)
822   {
823     /* Return eof if there are no rowids in the buffer after re-fill attempt */
824     if (rowid_buffer->read())
825       return HA_ERR_END_OF_FILE;
826 
827     if (is_mrr_assoc)
828     {
829       memcpy(range_info, rowid_buffer->read_ptr2, sizeof(range_id_t));
830       if (index_reader->skip_record(*range_info, rowid_buffer->read_ptr1))
831         continue;
832     }
833 
834     res= file->ha_rnd_pos(file->get_table()->record[0],
835                           rowid_buffer->read_ptr1);
836 
837     if (res)
838       return res; /* Some fatal error */
839 
840     break; /* Got another record */
841   }
842 
843   /*
844     Check if subsequent buffer elements have the same rowid value as this
845     one. If yes, remember this fact so that we don't make any more rnd_pos()
846     calls with this value.
847 
848     Note: this implies that SQL layer doesn't touch table->record[0]
849     between calls.
850   */
851   Lifo_buffer_iterator it;
852   it.init(rowid_buffer);
853   while (!it.read())
854   {
855     if (file->cmp_ref(it.read_ptr1, rowid_buffer->read_ptr1))
856       break;
857     last_identical_rowid= it.read_ptr1;
858   }
859   return 0;
860 }
861 
862 
863 /****************************************************************************
864  * Top-level DS-MRR implementation functions (the ones called by storage engine)
865  ***************************************************************************/
866 
867 /**
868   DS-MRR: Initialize and start MRR scan
869 
870   Initialize and start the MRR scan. Depending on the mode parameter, this
871   may use default or DS-MRR implementation.
872 
873   @param h_arg           Table handler to be used
874   @param key             Index to be used
875   @param seq_funcs       Interval sequence enumeration functions
876   @param seq_init_param  Interval sequence enumeration parameter
877   @param n_ranges        Number of ranges in the sequence.
878   @param mode            HA_MRR_* modes to use
879   @param buf             INOUT Buffer to use
880 
881   @retval 0     Ok, Scan started.
882   @retval other Error
883 */
884 
885 int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
886                            void *seq_init_param, uint n_ranges, uint mode,
887                            HANDLER_BUFFER *buf)
888 {
889   TABLE *table= h_arg->get_table();
890   THD *thd= table->in_use;
891   int res;
892   Key_parameters keypar;
893   uint UNINIT_VAR(key_buff_elem_size); /* set/used when do_sort_keys==TRUE */
894   handler *h_idx;
895   Mrr_ordered_rndpos_reader *disk_strategy= NULL;
896   bool do_sort_keys= FALSE;
897   DBUG_ENTER("DsMrr_impl::dsmrr_init");
898   /*
899     index_merge may invoke a scan on an object for which dsmrr_info[_const]
900     has not been called, so set the owner handler here as well.
901   */
902   primary_file= h_arg;
903   is_mrr_assoc= !MY_TEST(mode & HA_MRR_NO_ASSOCIATION);
904 
905   strategy_exhausted= FALSE;
906 
907   /* By default, have do-nothing buffer manager */
908   buf_manager.arg= this;
909   buf_manager.reset_buffer_sizes= do_nothing;
910   buf_manager.redistribute_buffer_space= do_nothing;
911 
912   if (mode & (HA_MRR_USE_DEFAULT_IMPL | HA_MRR_SORTED))
913     goto use_default_impl;
914 
915   /*
916     Determine whether we'll need to do key sorting and/or rnd_pos() scan
917   */
918   index_strategy= NULL;
919   if ((mode & HA_MRR_SINGLE_POINT) &&
920       optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS))
921   {
922     do_sort_keys= TRUE;
923     index_strategy= &reader_factory.ordered_index_reader;
924   }
925   else
926     index_strategy= &reader_factory.simple_index_reader;
927 
928   strategy= index_strategy;
929   /*
930     We don't need a rowid-to-rndpos step if
931      - We're doing a scan on clustered primary key
932      - [In the future] We're doing an index_only read
933   */
934   DBUG_ASSERT(primary_file->inited == handler::INDEX ||
935               (primary_file->inited == handler::RND &&
936                secondary_file &&
937                secondary_file->inited == handler::INDEX));
938 
939   h_idx= (primary_file->inited == handler::INDEX)? primary_file: secondary_file;
940   keyno= h_idx->active_index;
941 
942   if (!(keyno == table->s->primary_key && h_idx->primary_key_is_clustered()))
943   {
944     strategy= disk_strategy= &reader_factory.ordered_rndpos_reader;
945     if (h_arg->pushed_rowid_filter)
946     {
947       /*
948         Currently usage of a rowid filter within InnoDB engine is not supported
949         if the table is accessed by the primary key.
950         With optimizer switches ''mrr' and 'mrr_sort_keys' are both enabled
951         any access by a secondary index is converted to the rndpos access. In
952         InnoDB the rndpos access is always uses the primary key.
953         Do not use pushed rowid filter if the table is accessed actually by the
954         primary key. Use the rowid filter outside the engine code (see
955         Mrr_ordered_rndpos_reader::refill_from_index_reader).
956       */
957       rowid_filter= h_arg->pushed_rowid_filter;
958       h_arg->cancel_pushed_rowid_filter();
959     }
960   }
961 
962   full_buf= buf->buffer;
963   full_buf_end= buf->buffer_end;
964 
965   if (do_sort_keys)
966   {
967     /* Pre-calculate some parameters of key sorting */
968     keypar.use_key_pointers= MY_TEST(mode & HA_MRR_MATERIALIZED_KEYS);
969     seq_funcs->get_key_info(seq_init_param, &keypar.key_tuple_length,
970                             &keypar.key_tuple_map);
971     keypar.key_size_in_keybuf= keypar.use_key_pointers?
972                                  sizeof(char*) : keypar.key_tuple_length;
973     key_buff_elem_size= keypar.key_size_in_keybuf + (int)is_mrr_assoc * sizeof(void*);
974 
975     /* Ordered index reader needs some space to store an index tuple */
976     if (strategy != index_strategy)
977     {
978       uint saved_pk_length=0;
979       if (h_idx->primary_key_is_clustered())
980       {
981         uint pk= h_idx->get_table()->s->primary_key;
982         if (pk != MAX_KEY)
983           saved_pk_length= h_idx->get_table()->key_info[pk].key_length;
984       }
985 
986       KEY *used_index= &h_idx->get_table()->key_info[h_idx->active_index];
987       if (reader_factory.ordered_index_reader.
988             set_interruption_temp_buffer(primary_file->ref_length,
989                                          used_index->key_length,
990                                          saved_pk_length,
991                                          &full_buf, full_buf_end))
992         goto use_default_impl;
993     }
994     else
995       reader_factory.ordered_index_reader.set_no_interruption_temp_buffer();
996   }
997 
998   if (strategy == index_strategy)
999   {
1000     /*
1001       Index strategy alone handles the record retrieval. Give all buffer space
1002       to it. Key buffer should have forward orientation so we can return the
1003       end of it.
1004     */
1005     key_buffer= &forward_key_buf;
1006     key_buffer->set_buffer_space(full_buf, full_buf_end);
1007 
1008     /* Safety: specify that rowid buffer has zero size: */
1009     rowid_buffer.set_buffer_space(full_buf_end, full_buf_end);
1010 
1011     if (do_sort_keys && !key_buffer->have_space_for(key_buff_elem_size))
1012       goto use_default_impl;
1013 
1014     if ((res= index_strategy->init(primary_file, seq_funcs, seq_init_param, n_ranges,
1015                                    mode, &keypar, key_buffer, &buf_manager)))
1016       goto error;
1017   }
1018   else
1019   {
1020     /* We'll have both index and rndpos strategies working together */
1021     if (do_sort_keys)
1022     {
1023       /* Both strategies will need buffer space, share the buffer */
1024       if (setup_buffer_sharing(keypar.key_size_in_keybuf, keypar.key_tuple_map))
1025         goto use_default_impl;
1026 
1027       buf_manager.reset_buffer_sizes= reset_buffer_sizes;
1028       buf_manager.redistribute_buffer_space= redistribute_buffer_space;
1029     }
1030     else
1031     {
1032       /* index strategy doesn't need buffer, give all space to rowids*/
1033       rowid_buffer.set_buffer_space(full_buf, full_buf_end);
1034       if (!rowid_buffer.have_space_for(primary_file->ref_length +
1035                                        (int)is_mrr_assoc * sizeof(range_id_t)))
1036         goto use_default_impl;
1037     }
1038 
1039     // setup_two_handlers() will call dsmrr_close() will clears the filter.
1040     // Save its value and restore afterwards.
1041     Rowid_filter *tmp = rowid_filter;
1042     if ((res= setup_two_handlers()))
1043       goto error;
1044     rowid_filter= tmp;
1045 
1046     if ((res= index_strategy->init(secondary_file, seq_funcs, seq_init_param,
1047                                    n_ranges, mode, &keypar, key_buffer,
1048                                    &buf_manager)) ||
1049         (res= disk_strategy->init(primary_file, index_strategy, mode,
1050                                   &rowid_buffer, rowid_filter)))
1051     {
1052       goto error;
1053     }
1054   }
1055 
1056   /*
1057     At this point, we're sure that we're running a native MRR scan (i.e. we
1058     didnt fall back to default implementation for some reason).
1059   */
1060   status_var_increment(thd->status_var.ha_mrr_init_count);
1061 
1062   res= strategy->refill_buffer(TRUE);
1063   if (res)
1064   {
1065     if (res != HA_ERR_END_OF_FILE)
1066       goto error;
1067     strategy_exhausted= TRUE;
1068   }
1069 
1070   /*
1071     If we have scanned through all intervals in *seq, then adjust *buf to
1072     indicate that the remaining buffer space will not be used.
1073   */
1074 //  if (dsmrr_eof)
1075 //    buf->end_of_used_area= rowid_buffer.end_of_space();
1076 
1077 
1078   DBUG_RETURN(0);
1079 error:
1080   close_second_handler();
1081    /* Safety, not really needed but: */
1082   strategy= NULL;
1083   DBUG_RETURN(res);
1084 
1085 use_default_impl:
1086   if (primary_file->inited != handler::INDEX)
1087   {
1088     /* We can get here when
1089        - we've previously successfully done a DS-MRR scan (and so have
1090          secondary_file!= NULL, secondary_file->inited= INDEX,
1091          primary_file->inited=RND)
1092        - for this invocation, we haven't got enough buffer space, and so we
1093          have to use the default MRR implementation.
1094 
1095       note: primary_file->ha_index_end() will call dsmrr_close() which will
1096       close/destroy the secondary_file, this is intentional.
1097       (Yes this is slow, but one can't expect performance with join buffer
1098        so small that it can accomodate one rowid and one index tuple)
1099     */
1100     if ((res= primary_file->ha_rnd_end()) ||
1101         (res= primary_file->ha_index_init(keyno, MY_TEST(mode & HA_MRR_SORTED))))
1102     {
1103       DBUG_RETURN(res);
1104     }
1105   }
1106   /* Call correct init function and assign to top level object */
1107   Mrr_simple_index_reader *s= &reader_factory.simple_index_reader;
1108   res= s->init(primary_file, seq_funcs, seq_init_param, n_ranges, mode, NULL,
1109                NULL, NULL);
1110   strategy= s;
1111   DBUG_RETURN(res);
1112 }
1113 
1114 
1115 /*
1116   Whatever the current state is, make it so that we have two handler objects:
1117   - primary_file       -  initialized for rnd_pos() scan
1118   - secondary_file     -  initialized for scanning the index specified in
1119                           this->keyno
1120   RETURN
1121     0        OK
1122     HA_XXX   Error code
1123 */
1124 
1125 int DsMrr_impl::setup_two_handlers()
1126 {
1127   int res;
1128   THD *thd= primary_file->get_table()->in_use;
1129   DBUG_ENTER("DsMrr_impl::setup_two_handlers");
1130   if (!secondary_file)
1131   {
1132     handler *new_h2;
1133     Item *pushed_cond= NULL;
1134     DBUG_ASSERT(primary_file->inited == handler::INDEX);
1135     /* Create a separate handler object to do rnd_pos() calls. */
1136     /*
1137       ::clone() takes up a lot of stack, especially on 64 bit platforms.
1138       The constant 5 is an empiric result.
1139     */
1140     if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2))
1141       DBUG_RETURN(1);
1142 
1143     /* Create a separate handler object to do rnd_pos() calls. */
1144     if (!(new_h2= primary_file->clone(primary_file->get_table()->s->
1145                                       normalized_path.str,
1146                                       thd->mem_root)) ||
1147         new_h2->ha_external_lock(thd, F_RDLCK))
1148     {
1149       delete new_h2;
1150       DBUG_RETURN(1);
1151     }
1152 
1153     if (keyno == primary_file->pushed_idx_cond_keyno)
1154       pushed_cond= primary_file->pushed_idx_cond;
1155 
1156     Mrr_reader *save_strategy= strategy;
1157     strategy= NULL;
1158     /*
1159       Caution: this call will invoke this->dsmrr_close(). Do not put the
1160       created secondary table handler new_h2 into this->secondary_file or it
1161       will delete it. Also, save the picked strategy
1162     */
1163     res= primary_file->ha_index_end();
1164 
1165     strategy= save_strategy;
1166     secondary_file= new_h2;
1167 
1168     if (res || (res= (primary_file->ha_rnd_init(FALSE))))
1169       goto error;
1170 
1171     table->prepare_for_position();
1172     secondary_file->extra(HA_EXTRA_KEYREAD);
1173     secondary_file->mrr_iter= primary_file->mrr_iter;
1174 
1175     if ((res= secondary_file->ha_index_init(keyno, FALSE)))
1176       goto error;
1177 
1178     if (pushed_cond)
1179       secondary_file->idx_cond_push(keyno, pushed_cond);
1180   }
1181   else
1182   {
1183     DBUG_ASSERT(secondary_file && secondary_file->inited==handler::INDEX);
1184     /*
1185       We get here when the access alternates betwen MRR scan(s) and non-MRR
1186       scans.
1187 
1188       Calling primary_file->index_end() will invoke dsmrr_close() for this object,
1189       which will delete secondary_file. We need to keep it, so put it away and dont
1190       let it be deleted:
1191     */
1192     if (primary_file->inited == handler::INDEX)
1193     {
1194       handler *save_h2= secondary_file;
1195       Mrr_reader *save_strategy= strategy;
1196       secondary_file= NULL;
1197       strategy= NULL;
1198       res= primary_file->ha_index_end();
1199       secondary_file= save_h2;
1200       strategy= save_strategy;
1201       if (res)
1202         goto error;
1203     }
1204     if ((primary_file->inited != handler::RND) &&
1205         (res= primary_file->ha_rnd_init(FALSE)))
1206       goto error;
1207   }
1208   DBUG_RETURN(0);
1209 
1210 error:
1211   DBUG_RETURN(res);
1212 }
1213 
1214 
1215 void DsMrr_impl::close_second_handler()
1216 {
1217   if (secondary_file)
1218   {
1219     secondary_file->extra(HA_EXTRA_NO_KEYREAD);
1220     secondary_file->ha_index_or_rnd_end();
1221     secondary_file->ha_external_lock(current_thd, F_UNLCK);
1222     secondary_file->ha_close();
1223     delete secondary_file;
1224     secondary_file= NULL;
1225   }
1226 }
1227 
1228 
1229 void DsMrr_impl::dsmrr_close()
1230 {
1231   DBUG_ENTER("DsMrr_impl::dsmrr_close");
1232   rowid_filter= NULL;
1233   close_second_handler();
1234   strategy= NULL;
1235   DBUG_VOID_RETURN;
1236 }
1237 
1238 
1239 /*
1240   my_qsort2-compatible static member function to compare key tuples
1241 */
1242 
1243 int Mrr_ordered_index_reader::compare_keys(void* arg, uchar* key1_arg,
1244                                            uchar* key2_arg)
1245 {
1246   Mrr_ordered_index_reader *reader= (Mrr_ordered_index_reader*)arg;
1247   TABLE *table= reader->file->get_table();
1248   KEY_PART_INFO *part= table->key_info[reader->file->active_index].key_part;
1249   uchar *key1, *key2;
1250 
1251   if (reader->keypar.use_key_pointers)
1252   {
1253     /* the buffer stores pointers to keys, get to the keys */
1254     memcpy(&key1, key1_arg, sizeof(char*));
1255     memcpy(&key2, key2_arg, sizeof(char*));
1256   }
1257   else
1258   {
1259     key1= key1_arg;
1260     key2= key2_arg;
1261   }
1262 
1263   return key_tuple_cmp(part, key1, key2, reader->keypar.key_tuple_length);
1264 }
1265 
1266 
1267 int Mrr_ordered_index_reader::compare_keys_reverse(void* arg, uchar* key1,
1268                                                    uchar* key2)
1269 {
1270   return -compare_keys(arg, key1, key2);
1271 }
1272 
1273 
1274 /**
1275   Set the buffer space to be shared between rowid and key buffer
1276 
1277   @return FALSE  ok
1278   @return TRUE   There is so little buffer space that we won't be able to use
1279                  the strategy.
1280                  This happens when we don't have enough space for one rowid
1281                  element and one key element so this is mainly targeted at
1282                  testing.
1283 */
1284 
1285 bool DsMrr_impl::setup_buffer_sharing(uint key_size_in_keybuf,
1286                                       key_part_map key_tuple_map)
1287 {
1288   long key_buff_elem_size= key_size_in_keybuf +
1289                            (int)is_mrr_assoc * sizeof(range_id_t);
1290 
1291   KEY *key_info= &primary_file->get_table()->key_info[keyno];
1292   /*
1293     Ok if we got here we need to allocate one part of the buffer
1294     for keys and another part for rowids.
1295   */
1296   ulonglong rowid_buf_elem_size= primary_file->ref_length +
1297                                  (int)is_mrr_assoc * sizeof(range_id_t);
1298 
1299   /*
1300     Use rec_per_key statistics as a basis to find out how many rowids
1301     we'll get for each key value.
1302      TODO: what should be the default value to use when there is no
1303            statistics?
1304   */
1305   uint parts= my_count_bits(key_tuple_map);
1306   ha_rows rpc;
1307   ulonglong rowids_size= rowid_buf_elem_size;
1308   if ((rpc= (ha_rows) key_info->actual_rec_per_key(parts - 1)))
1309     rowids_size= rowid_buf_elem_size * rpc;
1310 
1311   double fraction_for_rowids=
1312     (ulonglong2double(rowids_size) /
1313      (ulonglong2double(rowids_size) + key_buff_elem_size));
1314 
1315   ptrdiff_t bytes_for_rowids=
1316     (ptrdiff_t)floor(0.5 + fraction_for_rowids * (full_buf_end - full_buf));
1317 
1318   ptrdiff_t bytes_for_keys= (full_buf_end - full_buf) - bytes_for_rowids;
1319 
1320   if (bytes_for_keys < key_buff_elem_size + 1 ||
1321       bytes_for_rowids < (ptrdiff_t)rowid_buf_elem_size + 1)
1322     return TRUE; /* Failed to provide minimum space for one of the buffers */
1323 
1324   rowid_buffer_end= full_buf + bytes_for_rowids;
1325   rowid_buffer.set_buffer_space(full_buf, rowid_buffer_end);
1326   key_buffer= &backward_key_buf;
1327   key_buffer->set_buffer_space(rowid_buffer_end, full_buf_end);
1328 
1329   /* The above code guarantees that the buffers are big enough */
1330   DBUG_ASSERT(key_buffer->have_space_for(key_buff_elem_size) &&
1331               rowid_buffer.have_space_for((size_t)rowid_buf_elem_size));
1332 
1333   return FALSE;
1334 }
1335 
1336 
1337 void DsMrr_impl::do_nothing(void *dsmrr_arg)
1338 {
1339   /* Do nothing */
1340 }
1341 
1342 
1343 void DsMrr_impl::reset_buffer_sizes(void *dsmrr_arg)
1344 {
1345   DsMrr_impl *dsmrr= (DsMrr_impl*)dsmrr_arg;
1346   dsmrr->rowid_buffer.set_buffer_space(dsmrr->full_buf,
1347                                        dsmrr->rowid_buffer_end);
1348   dsmrr->key_buffer->set_buffer_space(dsmrr->rowid_buffer_end,
1349                                       dsmrr->full_buf_end);
1350 }
1351 
1352 
1353 /*
1354   Take unused space from the key buffer and give it to the rowid buffer
1355 */
1356 
1357 void DsMrr_impl::redistribute_buffer_space(void *dsmrr_arg)
1358 {
1359   DsMrr_impl *dsmrr= (DsMrr_impl*)dsmrr_arg;
1360   uchar *unused_start, *unused_end;
1361   dsmrr->key_buffer->remove_unused_space(&unused_start, &unused_end);
1362   dsmrr->rowid_buffer.grow(unused_start, unused_end);
1363 }
1364 
1365 
1366 /*
1367   @brief Initialize the iterator
1368 
1369   @note
1370   Initialize the iterator to produce matches for the key of the first element
1371   in owner_arg->key_buffer
1372 
1373   @retval  0                    OK
1374   @retval  HA_ERR_END_OF_FILE   Either the owner->key_buffer is empty or
1375                                 no matches for the key we've tried (check
1376                                 key_buffer->is_empty() to tell these apart)
1377   @retval  other code           Fatal error
1378 */
1379 
1380 int Key_value_records_iterator::init(Mrr_ordered_index_reader *owner_arg)
1381 {
1382   int res;
1383   owner= owner_arg;
1384 
1385   identical_key_it.init(owner->key_buffer);
1386   owner->key_buffer->setup_reading(owner->keypar.key_size_in_keybuf,
1387                                    owner->is_mrr_assoc ? sizeof(void*) : 0);
1388 
1389   if (identical_key_it.read())
1390     return HA_ERR_END_OF_FILE;
1391 
1392   uchar *key_in_buf= last_identical_key_ptr= identical_key_it.read_ptr1;
1393 
1394   uchar *index_tuple= key_in_buf;
1395   if (owner->keypar.use_key_pointers)
1396     memcpy(&index_tuple, key_in_buf, sizeof(char*));
1397 
1398   /* Check out how many more identical keys are following */
1399   while (!identical_key_it.read())
1400   {
1401     if (Mrr_ordered_index_reader::compare_keys(owner, key_in_buf,
1402                                                identical_key_it.read_ptr1))
1403       break;
1404     last_identical_key_ptr= identical_key_it.read_ptr1;
1405   }
1406   identical_key_it.init(owner->key_buffer);
1407   res= owner->file->ha_index_read_map(owner->file->get_table()->record[0],
1408                                       index_tuple,
1409                                       owner->keypar.key_tuple_map,
1410                                       HA_READ_KEY_EXACT);
1411 
1412   if (res)
1413   {
1414     /* Failed to find any matching records */
1415     move_to_next_key_value();
1416     return res;
1417   }
1418   owner->have_saved_rowid= FALSE;
1419   get_next_row= FALSE;
1420   return 0;
1421 }
1422 
1423 
1424 int Key_value_records_iterator::get_next(range_id_t *range_info)
1425 {
1426   int res;
1427 
1428   if (get_next_row)
1429   {
1430     if (owner->keypar.index_ranges_unique)
1431     {
1432       /* We're using a full unique key, no point to call index_next_same */
1433       return HA_ERR_END_OF_FILE;
1434     }
1435 
1436     handler *h= owner->file;
1437     uchar *lookup_key;
1438     if (owner->keypar.use_key_pointers)
1439       memcpy(&lookup_key, identical_key_it.read_ptr1, sizeof(void*));
1440     else
1441       lookup_key= identical_key_it.read_ptr1;
1442 
1443     if ((res= h->ha_index_next_same(h->get_table()->record[0],
1444                                     lookup_key,
1445                                     owner->keypar.key_tuple_length)))
1446     {
1447       /* It's either HA_ERR_END_OF_FILE or some other error */
1448       return res;
1449     }
1450     identical_key_it.init(owner->key_buffer);
1451     owner->have_saved_rowid= FALSE;
1452     get_next_row= FALSE;
1453   }
1454 
1455   identical_key_it.read(); /* This gets us next range_id */
1456   memcpy(range_info, identical_key_it.read_ptr2, sizeof(range_id_t));
1457 
1458   if (!last_identical_key_ptr ||
1459       (identical_key_it.read_ptr1 == last_identical_key_ptr))
1460   {
1461     /*
1462       We've reached the last of the identical keys that current record is a
1463       match for.  Set get_next_row=TRUE so that we read the next index record
1464       on the next call to this function.
1465     */
1466     get_next_row= TRUE;
1467   }
1468   return 0;
1469 }
1470 
1471 
1472 void Key_value_records_iterator::move_to_next_key_value()
1473 {
1474   while (!owner->key_buffer->read() &&
1475          (owner->key_buffer->read_ptr1 != last_identical_key_ptr)) {}
1476 }
1477 
1478 
1479 /**
1480   DS-MRR implementation: multi_range_read_next() function.
1481 
1482   Calling convention is like multi_range_read_next() has.
1483 */
1484 
1485 int DsMrr_impl::dsmrr_next(range_id_t *range_info)
1486 {
1487   int res;
1488   if (strategy_exhausted)
1489     return HA_ERR_END_OF_FILE;
1490 
1491   while ((res= strategy->get_next(range_info)) == HA_ERR_END_OF_FILE)
1492   {
1493     if ((res= strategy->refill_buffer(FALSE)))
1494       break; /* EOF or error */
1495   }
1496   return res;
1497 }
1498 
1499 
1500 /**
1501   DS-MRR implementation: multi_range_read_info() function
1502 */
1503 ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows,
1504                                uint key_parts,
1505                                uint *bufsz, uint *flags, Cost_estimate *cost)
1506 {
1507   ha_rows res __attribute__((unused));
1508   uint def_flags= *flags;
1509   uint def_bufsz= *bufsz;
1510 
1511   /* Get cost/flags/mem_usage of default MRR implementation */
1512   res= primary_file->handler::multi_range_read_info(keyno, n_ranges, rows,
1513                                                     key_parts, &def_bufsz,
1514                                                     &def_flags, cost);
1515   DBUG_ASSERT(!res);
1516 
1517   if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
1518       choose_mrr_impl(keyno, rows, flags, bufsz, cost))
1519   {
1520     /* Default implementation is choosen */
1521     DBUG_PRINT("info", ("Default MRR implementation choosen"));
1522     *flags= def_flags;
1523     *bufsz= def_bufsz;
1524   }
1525   else
1526   {
1527     /* *flags and *bufsz were set by choose_mrr_impl */
1528     DBUG_PRINT("info", ("DS-MRR implementation choosen"));
1529   }
1530   return 0;
1531 }
1532 
1533 
1534 /**
1535   DS-MRR Implementation: multi_range_read_info_const() function
1536 */
1537 
1538 ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
1539                                  void *seq_init_param, uint n_ranges,
1540                                  uint *bufsz, uint *flags, Cost_estimate *cost)
1541 {
1542   ha_rows rows;
1543   uint def_flags= *flags;
1544   uint def_bufsz= *bufsz;
1545   /* Get cost/flags/mem_usage of default MRR implementation */
1546   rows= primary_file->handler::multi_range_read_info_const(keyno, seq,
1547                                                            seq_init_param,
1548                                                            n_ranges,
1549                                                            &def_bufsz,
1550                                                            &def_flags, cost);
1551   if (rows == HA_POS_ERROR)
1552   {
1553     /* Default implementation can't perform MRR scan => we can't either */
1554     return rows;
1555   }
1556 
1557   /*
1558     If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to
1559     use the default MRR implementation (we need it for UPDATE/DELETE).
1560     Otherwise, make a choice based on cost and @@optimizer_switch settings
1561   */
1562   if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
1563       choose_mrr_impl(keyno, rows, flags, bufsz, cost))
1564   {
1565     DBUG_PRINT("info", ("Default MRR implementation choosen"));
1566     *flags= def_flags;
1567     *bufsz= def_bufsz;
1568   }
1569   else
1570   {
1571     /* *flags and *bufsz were set by choose_mrr_impl */
1572     DBUG_PRINT("info", ("DS-MRR implementation choosen"));
1573   }
1574   return rows;
1575 }
1576 
1577 
1578 /**
1579   Check if key has partially-covered columns
1580 
1581   We can't use DS-MRR to perform range scans when the ranges are over
1582   partially-covered keys, because we'll not have full key part values
1583   (we'll have their prefixes from the index) and will not be able to check
1584   if we've reached the end the range.
1585 
1586   @param keyno  Key to check
1587 
1588   @todo
1589     Allow use of DS-MRR in cases where the index has partially-covered
1590     components but they are not used for scanning.
1591 
1592   @retval TRUE   Yes
1593   @retval FALSE  No
1594 */
1595 
1596 bool key_uses_partial_cols(TABLE_SHARE *share, uint keyno)
1597 {
1598   KEY_PART_INFO *kp= share->key_info[keyno].key_part;
1599   KEY_PART_INFO *kp_end= kp + share->key_info[keyno].user_defined_key_parts;
1600   for (; kp != kp_end; kp++)
1601   {
1602     if (!kp->field->part_of_key.is_set(keyno))
1603       return TRUE;
1604   }
1605   return FALSE;
1606 }
1607 
1608 
1609 /*
1610   Check if key/flags allow DS-MRR/CPK strategy to be used
1611 
1612   @param thd
1613   @param keyno      Index that will be used
1614   @param  mrr_flags
1615 
1616   @retval TRUE   DS-MRR/CPK should be used
1617   @retval FALSE  Otherwise
1618 */
1619 
1620 bool DsMrr_impl::check_cpk_scan(THD *thd, TABLE_SHARE *share, uint keyno,
1621                                 uint mrr_flags)
1622 {
1623   return MY_TEST((mrr_flags & HA_MRR_SINGLE_POINT) &&
1624                  keyno == share->primary_key &&
1625                  primary_file->primary_key_is_clustered() &&
1626                  optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS));
1627 }
1628 
1629 
1630 /*
1631   DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
1632 
1633   Make the choice between using Default MRR implementation and DS-MRR.
1634   This function contains common functionality factored out of dsmrr_info()
1635   and dsmrr_info_const(). The function assumes that the default MRR
1636   implementation's applicability requirements are satisfied.
1637 
1638   @param keyno       Index number
1639   @param rows        E(full rows to be retrieved)
1640   @param flags  IN   MRR flags provided by the MRR user
1641                 OUT  If DS-MRR is choosen, flags of DS-MRR implementation
1642                      else the value is not modified
1643   @param bufsz  IN   If DS-MRR is choosen, buffer use of DS-MRR implementation
1644                      else the value is not modified
1645   @param cost   IN   Cost of default MRR implementation
1646                 OUT  If DS-MRR is choosen, cost of DS-MRR scan
1647                      else the value is not modified
1648 
1649   @retval TRUE   Default MRR implementation should be used
1650   @retval FALSE  DS-MRR implementation should be used
1651 */
1652 
1653 
1654 bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
1655                                  uint *bufsz, Cost_estimate *cost)
1656 {
1657   Cost_estimate dsmrr_cost;
1658   bool res;
1659   THD *thd= primary_file->get_table()->in_use;
1660   TABLE_SHARE *share= primary_file->get_table_share();
1661 
1662   bool doing_cpk_scan= check_cpk_scan(thd, share, keyno, *flags);
1663   bool using_cpk= MY_TEST(keyno == share->primary_key &&
1664                           primary_file->primary_key_is_clustered());
1665   *flags &= ~HA_MRR_IMPLEMENTATION_FLAGS;
1666   if (!optimizer_flag(thd, OPTIMIZER_SWITCH_MRR) ||
1667       *flags & HA_MRR_INDEX_ONLY ||
1668       (using_cpk && !doing_cpk_scan) || key_uses_partial_cols(share, keyno))
1669   {
1670     /* Use the default implementation */
1671     *flags |= HA_MRR_USE_DEFAULT_IMPL;
1672     *flags &= ~HA_MRR_IMPLEMENTATION_FLAGS;
1673     return TRUE;
1674   }
1675 
1676   uint add_len= share->key_info[keyno].key_length + primary_file->ref_length;
1677   if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, add_len,
1678                               &dsmrr_cost))
1679     return TRUE;
1680 
1681   bool force_dsmrr;
1682   /*
1683     If mrr_cost_based flag is not set, then set cost of DS-MRR to be minimum of
1684     DS-MRR and Default implementations cost. This allows one to force use of
1685     DS-MRR whenever it is applicable without affecting other cost-based
1686     choices.
1687   */
1688   if ((force_dsmrr= !optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_COST_BASED)) &&
1689       dsmrr_cost.total_cost() > cost->total_cost())
1690     dsmrr_cost= *cost;
1691 
1692   if (force_dsmrr || dsmrr_cost.total_cost() <= cost->total_cost())
1693   {
1694     *flags &= ~HA_MRR_USE_DEFAULT_IMPL;  /* Use the DS-MRR implementation */
1695     *flags &= ~HA_MRR_SORTED;          /* We will return unordered output */
1696     *cost= dsmrr_cost;
1697     res= FALSE;
1698 
1699 
1700     if ((using_cpk && doing_cpk_scan) ||
1701         (optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS) &&
1702          *flags & HA_MRR_SINGLE_POINT))
1703     {
1704       *flags |= DSMRR_IMPL_SORT_KEYS;
1705     }
1706 
1707     if (!(using_cpk && doing_cpk_scan) &&
1708         !(*flags & HA_MRR_INDEX_ONLY))
1709     {
1710       *flags |= DSMRR_IMPL_SORT_ROWIDS;
1711     }
1712     /*
1713     if ((*flags & HA_MRR_SINGLE_POINT) &&
1714          optimizer_flag(thd, OPTIMIZER_SWITCH_MRR_SORT_KEYS))
1715       *flags |= HA_MRR_MATERIALIZED_KEYS;
1716     */
1717   }
1718   else
1719   {
1720     /* Use the default MRR implementation */
1721     res= TRUE;
1722   }
1723   return res;
1724 }
1725 
1726 /*
1727   Take the flags we've returned previously and print one of
1728   - Key-ordered scan
1729   - Rowid-ordered scan
1730   - Key-ordered Rowid-ordered scan
1731 */
1732 
1733 int DsMrr_impl::dsmrr_explain_info(uint mrr_mode, char *str, size_t size)
1734 {
1735   const char *key_ordered=   "Key-ordered scan";
1736   const char *rowid_ordered= "Rowid-ordered scan";
1737   const char *both_ordered=  "Key-ordered Rowid-ordered scan";
1738   const char *used_str="";
1739   const uint BOTH_FLAGS= (DSMRR_IMPL_SORT_KEYS | DSMRR_IMPL_SORT_ROWIDS);
1740 
1741   if (!(mrr_mode & HA_MRR_USE_DEFAULT_IMPL))
1742   {
1743     if ((mrr_mode & BOTH_FLAGS) == BOTH_FLAGS)
1744       used_str= both_ordered;
1745     else if (mrr_mode & DSMRR_IMPL_SORT_KEYS)
1746       used_str= key_ordered;
1747     else if (mrr_mode & DSMRR_IMPL_SORT_ROWIDS)
1748       used_str= rowid_ordered;
1749 
1750     size_t used_str_len= strlen(used_str);
1751     size_t copy_len= MY_MIN(used_str_len, size);
1752     memcpy(str, used_str, copy_len);
1753     return (int)copy_len;
1754   }
1755   return 0;
1756 }
1757 
1758 
1759 static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost);
1760 
1761 
1762 /**
1763   Get cost of DS-MRR scan
1764 
1765   @param keynr              Index to be used
1766   @param rows               E(Number of rows to be scanned)
1767   @param flags              Scan parameters (HA_MRR_* flags)
1768   @param buffer_size INOUT  Buffer size
1769                             IN: Buffer of size 0 means the function
1770                             will determine the best size and return it.
1771   @param extra_mem_overhead Extra memory overhead of the MRR implementation
1772                             (the function assumes this many bytes of buffer
1773                              space will not be usable by DS-MRR)
1774   @param cost        OUT    The cost
1775 
1776   @retval FALSE  OK
1777   @retval TRUE   Error, DS-MRR cannot be used (the buffer is too small
1778                  for even 1 rowid)
1779 */
1780 
1781 bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
1782                                          uint *buffer_size,
1783                                          uint extra_mem_overhead,
1784                                          Cost_estimate *cost)
1785 {
1786   ulong max_buff_entries, elem_size;
1787   ha_rows rows_in_full_step;
1788   ha_rows rows_in_last_step;
1789   uint n_full_steps;
1790   double index_read_cost;
1791 
1792   elem_size= primary_file->ref_length +
1793              sizeof(void*) * (!MY_TEST(flags & HA_MRR_NO_ASSOCIATION));
1794 
1795   if (!*buffer_size)
1796   {
1797     /*
1798       We are requested to determine how much memory we need.
1799       Request memory to finish the scan in one pass but do not request
1800       more than @@mrr_buff_size.
1801     */
1802     *buffer_size= (uint) MY_MIN(extra_mem_overhead + elem_size*(ulong)rows,
1803                                 MY_MAX(table->in_use->variables.mrr_buff_size,
1804                                        extra_mem_overhead));
1805   }
1806 
1807   if (elem_size + extra_mem_overhead > *buffer_size)
1808     return TRUE; /* Buffer has not enough space for even 1 rowid */
1809 
1810   max_buff_entries = (*buffer_size - extra_mem_overhead) / elem_size;
1811 
1812   /* Number of iterations we'll make with full buffer */
1813   n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries);
1814 
1815   /*
1816     Get numbers of rows we'll be processing in
1817      - non-last sweep, with full buffer
1818      - last iteration, with non-full buffer
1819   */
1820   rows_in_full_step= max_buff_entries;
1821   rows_in_last_step= rows % max_buff_entries;
1822 
1823   /* Adjust buffer size if we expect to use only part of the buffer */
1824   if (n_full_steps)
1825   {
1826     get_sort_and_sweep_cost(table, rows_in_full_step, cost);
1827     cost->multiply(n_full_steps);
1828   }
1829   else
1830   {
1831     cost->reset();
1832     *buffer_size= (uint)MY_MAX(*buffer_size,
1833                       (size_t)(1.2*rows_in_last_step) * elem_size +
1834                       primary_file->ref_length + table->key_info[keynr].key_length);
1835   }
1836 
1837   Cost_estimate last_step_cost;
1838   get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost);
1839   cost->add(&last_step_cost);
1840 
1841   if (n_full_steps != 0)
1842     cost->mem_cost= *buffer_size;
1843   else
1844     cost->mem_cost= (double)rows_in_last_step * elem_size;
1845 
1846   /* Total cost of all index accesses */
1847   index_read_cost= primary_file->keyread_time(keynr, 1, rows);
1848   cost->add_io(index_read_cost, 1 /* Random seeks */);
1849   return FALSE;
1850 }
1851 
1852 
1853 /*
1854   Get cost of one sort-and-sweep step
1855 
1856   It consists of two parts:
1857    - sort an array of #nrows ROWIDs using qsort
1858    - read #nrows records from table in a sweep.
1859 
1860   @param table       Table being accessed
1861   @param nrows       Number of rows to be sorted and retrieved
1862   @param cost   OUT  The cost of scan
1863 */
1864 
1865 static
1866 void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, Cost_estimate *cost)
1867 {
1868   if (nrows)
1869   {
1870     get_sweep_read_cost(table, nrows, FALSE, cost);
1871     /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */
1872     double cmp_op= rows2double(nrows) * (1.0 / TIME_FOR_COMPARE_ROWID);
1873     if (cmp_op < 3)
1874       cmp_op= 3;
1875     cost->cpu_cost += cmp_op * log2(cmp_op);
1876   }
1877   else
1878     cost->reset();
1879 }
1880 
1881 
1882 /**
1883   Get cost of reading nrows table records in a "disk sweep"
1884 
1885   A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made
1886   for an ordered sequence of rowids.
1887 
1888   We assume hard disk IO. The read is performed as follows:
1889 
1890    1. The disk head is moved to the needed cylinder
1891    2. The controller waits for the plate to rotate
1892    3. The data is transferred
1893 
1894   Time to do #3 is insignificant compared to #2+#1.
1895 
1896   Time to move the disk head is proportional to head travel distance.
1897 
1898   Time to wait for the plate to rotate depends on whether the disk head
1899   was moved or not.
1900 
1901   If disk head wasn't moved, the wait time is proportional to distance
1902   between the previous block and the block we're reading.
1903 
1904   If the head was moved, we don't know how much we'll need to wait for the
1905   plate to rotate. We assume the wait time to be a variate with a mean of
1906   0.5 of full rotation time.
1907 
1908   Our cost units are "random disk seeks". The cost of random disk seek is
1909   actually not a constant, it depends one range of cylinders we're going
1910   to access. We make it constant by introducing a fuzzy concept of "typical
1911   datafile length" (it's fuzzy as it's hard to tell whether it should
1912   include index file, temp.tables etc). Then random seek cost is:
1913 
1914     1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length
1915 
1916   We define half_rotation_cost as DISK_SEEK_BASE_COST=0.9.
1917 
1918   @param table             Table to be accessed
1919   @param nrows             Number of rows to retrieve
1920   @param interrupted       TRUE <=> Assume that the disk sweep will be
1921                            interrupted by other disk IO. FALSE - otherwise.
1922   @param cost         OUT  The cost.
1923 */
1924 
1925 void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
1926                          Cost_estimate *cost)
1927 {
1928   DBUG_ENTER("get_sweep_read_cost");
1929 
1930   cost->reset();
1931   if (table->file->primary_key_is_clustered())
1932   {
1933     cost->io_count= table->file->read_time(table->s->primary_key,
1934                                            (uint) nrows, nrows);
1935   }
1936   else
1937   {
1938     double n_blocks=
1939       ceil(ulonglong2double(table->file->stats.data_file_length) / IO_SIZE);
1940     double busy_blocks=
1941       n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(nrows)));
1942     if (busy_blocks < 1.0)
1943       busy_blocks= 1.0;
1944 
1945     DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks,
1946                        busy_blocks));
1947     cost->io_count= busy_blocks;
1948 
1949     if (!interrupted)
1950     {
1951       /* Assume reading is done in one 'sweep' */
1952       cost->avg_io_cost= (DISK_SEEK_BASE_COST +
1953                           DISK_SEEK_PROP_COST*n_blocks/busy_blocks);
1954     }
1955   }
1956   DBUG_PRINT("info",("returning cost=%g", cost->total_cost()));
1957   DBUG_VOID_RETURN;
1958 }
1959 
1960 
1961 /* **************************************************************************
1962  * DS-MRR implementation ends
1963  ***************************************************************************/
1964 
1965 
1966