1 /* Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
22 
23 /**
24   @file
25 
26   @brief
27   join cache optimizations
28 
29   @defgroup Query_Optimizer  Query Optimizer
30   @{
31 */
32 
33 #include "sql_priv.h"
34 #include "sql_select.h"
35 #include "key.h"
36 #include "sql_optimizer.h"  // JOIN
37 #include "sql_join_buffer.h"
38 #include "sql_tmp_table.h"  // instantiate_tmp_table()
39 
40 #include <algorithm>
41 using std::max;
42 using std::min;
43 
44 
45 /*****************************************************************************
46  *  Join cache module
47 ******************************************************************************/
48 
49 /*
50   Fill in the descriptor of a flag field associated with a join cache
51 
52   SYNOPSIS
53     add_field_flag_to_join_cache()
54       str           position in a record buffer to copy the field from/to
55       length        length of the field
56       field  IN/OUT pointer to the field descriptor to fill in
57 
58   DESCRIPTION
59     The function fill in the descriptor of a cache flag field to which
60     the parameter 'field' points to. The function uses the first two
61     parameters to set the position in the record buffer from/to which
62     the field value is to be copied and the length of the copied fragment.
63     Before returning the result the function increments the value of
64     *field by 1.
65     The function ignores the fields 'blob_length' and 'ofset' of the
66     descriptor.
67 
68   RETURN
69     the length of the field
70 */
71 
72 static
add_flag_field_to_join_cache(uchar * str,uint length,CACHE_FIELD ** field)73 uint add_flag_field_to_join_cache(uchar *str, uint length, CACHE_FIELD **field)
74 {
75   CACHE_FIELD *copy= *field;
76   copy->str= str;
77   copy->length= length;
78   copy->type= 0;
79   copy->field= 0;
80   copy->referenced_field_no= 0;
81   copy->next_copy_rowid= NULL;
82   (*field)++;
83   return length;
84 }
85 
86 
87 /*
88   Fill in the descriptors of table data fields associated with a join cache
89 
90   SYNOPSIS
91     add_table_data_fields_to_join_cache()
92       tab              descriptors of fields from this table are to be filled
93       field_set        descriptors for only these fields are to be created
94       field_cnt IN/OUT     counter of data fields
95       descr  IN/OUT        pointer to the first descriptor to be filled
96       field_ptr_cnt IN/OUT counter of pointers to the data fields
97       descr_ptr IN/OUT     pointer to the first pointer to blob descriptors
98 
99   DESCRIPTION
100     The function fills in the descriptors of cache data fields from the table
101     'tab'. The descriptors are filled only for the fields marked in the
102     bitmap 'field_set'.
103     The function fills the descriptors starting from the position pointed
104     by 'descr'. If an added field is of a BLOB type then a pointer to the
105     its descriptor is added to the array descr_ptr.
106     At the return 'descr' points to the position after the last added
107     descriptor  while 'descr_ptr' points to the position right after the
108     last added pointer.
109 
110   RETURN
111     the total length of the added fields
112 */
113 
114 static
add_table_data_fields_to_join_cache(JOIN_TAB * tab,MY_BITMAP * field_set,uint * field_cnt,CACHE_FIELD ** descr,uint * field_ptr_cnt,CACHE_FIELD *** descr_ptr)115 uint add_table_data_fields_to_join_cache(JOIN_TAB *tab,
116                                          MY_BITMAP *field_set,
117                                          uint *field_cnt,
118                                          CACHE_FIELD **descr,
119                                          uint *field_ptr_cnt,
120                                          CACHE_FIELD ***descr_ptr)
121 {
122   Field **fld_ptr;
123   uint len= 0;
124   CACHE_FIELD *copy= *descr;
125   CACHE_FIELD **copy_ptr= *descr_ptr;
126   uint used_fields= bitmap_bits_set(field_set);
127   for (fld_ptr= tab->table->field; used_fields; fld_ptr++)
128   {
129     if (bitmap_is_set(field_set, (*fld_ptr)->field_index))
130     {
131       len+= (*fld_ptr)->fill_cache_field(copy);
132       if (copy->type == CACHE_BLOB)
133       {
134         *copy_ptr= copy;
135         copy_ptr++;
136         (*field_ptr_cnt)++;
137       }
138       copy->field= *fld_ptr;
139       copy->referenced_field_no= 0;
140       copy->next_copy_rowid= NULL;
141       copy++;
142       (*field_cnt)++;
143       used_fields--;
144     }
145   }
146   *descr= copy;
147   *descr_ptr= copy_ptr;
148   return len;
149 }
150 
151 
152 /*
153   Determine various counters of fields associated with a record in the cache
154 
155   SYNOPSIS
156     calc_record_fields()
157 
158   DESCRIPTION
159     The function counts the number of total fields stored in a record
160     of the cache and saves this number in the 'fields' member. It also
161     determines the number of flag fields and the number of blobs.
162     The function sets 'with_match_flag' on if 'join_tab' needs a match flag
163     i.e. if it is the first inner table of an outer join, or of a semi-join
164     with FirstMatch strategy.
165 
166   RETURN
167     none
168 */
169 
calc_record_fields()170 void JOIN_CACHE::calc_record_fields()
171 {
172   /*
173     If there is a previous cache, start with the corresponding table, otherwise:
174     - if in a regular execution, start with the first non-const table.
175     - if in a materialized subquery, start with the first table of the subquery.
176   */
177   JOIN_TAB *tab = prev_cache ?
178                     prev_cache->join_tab :
179                     sj_is_materialize_strategy(join_tab->get_sj_strategy()) ?
180                       join_tab->first_sj_inner_tab :
181                       join->join_tab+join->const_tables;
182   tables= join_tab-tab;
183 
184   fields= 0;
185   blobs= 0;
186   flag_fields= 0;
187   data_field_count= 0;
188   data_field_ptr_count= 0;
189   referenced_fields= 0;
190 
191   for ( ; tab < join_tab ; tab++)
192   {
193     calc_used_field_length(join->thd, tab);
194     flag_fields+= MY_TEST(tab->used_null_fields || tab->used_uneven_bit_fields);
195     flag_fields+= MY_TEST(tab->table->maybe_null);
196     fields+= tab->used_fields;
197     blobs+= tab->used_blobs;
198 
199     fields+= tab->check_rowid_field();
200   }
201   if ((with_match_flag= (join_tab->is_first_inner_for_outer_join() ||
202                          (join_tab->first_sj_inner_tab == join_tab &&
203                           join_tab->get_sj_strategy() == SJ_OPT_FIRST_MATCH))))
204     flag_fields++;
205   fields+= flag_fields;
206 }
207 
208 /*
209   Allocate memory for descriptors and pointers to them associated with the cache
210 
211   SYNOPSIS
212     alloc_fields()
213 
214   DESCRIPTION
215     The function allocates memory for the array of fields descriptors
216     and the array of pointers to the field descriptors used to copy
217     join record data from record buffers into the join buffer and
218     backward. Some pointers refer to the field descriptor associated
219     with previous caches. They are placed at the beginning of the
220     array of pointers and its total number is specified by the parameter
221     'external fields'.
222     The pointer of the first array is assigned to field_descr and the
223     number of elements is precalculated by the function calc_record_fields.
224     The allocated arrays are adjacent.
225 
226   NOTES
227     The memory is allocated in join->thd->memroot
228 
229   RETURN
230     pointer to the first array
231 */
232 
alloc_fields(uint external_fields)233 int JOIN_CACHE::alloc_fields(uint external_fields)
234 {
235   uint ptr_cnt= external_fields+blobs+1;
236   uint fields_size= sizeof(CACHE_FIELD)*fields;
237   field_descr= (CACHE_FIELD*) sql_alloc(fields_size +
238                                         sizeof(CACHE_FIELD*)*ptr_cnt);
239   blob_ptr= (CACHE_FIELD **) ((uchar *) field_descr + fields_size);
240   return (field_descr == NULL);
241 }
242 
243 /*
244   Create descriptors of the record flag fields stored in the join buffer
245 
246   SYNOPSIS
247     create_flag_fields()
248 
249   DESCRIPTION
250     The function creates descriptors of the record flag fields stored
251     in the join buffer. These are descriptors for:
252     - an optional match flag field,
253     - table null bitmap fields,
254     - table null row fields.
255     The match flag field is created when 'join_tab' is the first inner
256     table of an outer join our a semi-join. A null bitmap field is
257     created for any table whose fields are to be stored in the join
258     buffer if at least one of these fields is nullable or is a BIT field
259     whose bits are partially stored with null bits. A null row flag
260     is created for any table assigned to the cache if it is an inner
261     table of an outer join.
262     The descriptor for flag fields are placed one after another at the
263     beginning of the array of field descriptors 'field_descr' that
264     contains 'fields' elements. If there is a match flag field the
265     descriptor for it is always first in the sequence of flag fields.
266     The descriptors for other flag fields can follow in an arbitrary
267     order.
268     The flag field values follow in a record stored in the join buffer
269     in the same order as field descriptors, with the match flag always
270     following first.
271     The function sets the value of 'flag_fields' to the total number
272     of the descriptors created for the flag fields.
273     The function sets the value of 'length' to the total length of the
274     flag fields.
275 
276   RETURN
277     none
278 */
279 
create_flag_fields()280 void JOIN_CACHE::create_flag_fields()
281 {
282   CACHE_FIELD *copy;
283   JOIN_TAB *tab;
284 
285   copy= field_descr;
286 
287   length=0;
288 
289   /* If there is a match flag the first field is always used for this flag */
290   if (with_match_flag)
291     length+= add_flag_field_to_join_cache((uchar*) &join_tab->found,
292                                           sizeof(join_tab->found),
293 	                                  &copy);
294 
295   /* Create fields for all null bitmaps and null row flags that are needed */
296   for (tab= join_tab-tables; tab < join_tab; tab++)
297   {
298     TABLE *table= tab->table;
299 
300     /* Create a field for the null bitmap from table if needed */
301     if (tab->used_null_fields || tab->used_uneven_bit_fields)
302       length+= add_flag_field_to_join_cache(table->null_flags,
303                                             table->s->null_bytes,
304                                             &copy);
305 
306     /* Create table for the null row flag if needed */
307     if (table->maybe_null)
308       length+= add_flag_field_to_join_cache((uchar*) &table->null_row,
309                                             sizeof(table->null_row),
310                                             &copy);
311   }
312 
313   /* Theoretically the new value of flag_fields can be less than the old one */
314   flag_fields= copy-field_descr;
315 }
316 
317 
318 /*
319   Create descriptors of all remaining data fields stored in the join buffer
320 
321   SYNOPSIS
322     create_remaining_fields()
323       all_read_fields   indicates that descriptors for all read data fields
324                         are to be created
325 
326   DESCRIPTION
327     The function creates descriptors for all remaining data fields of a
328     record from the join buffer. If the parameter 'all_read_fields' is
329     true the function creates fields for all read record fields that
330     comprise the partial join record joined with join_tab. Otherwise,
331     for each table tab, the set of the read fields for which the descriptors
332     have to be added is determined as the difference between all read fields
333     and and those for which the descriptors have been already created.
334     The latter are supposed to be marked in the bitmap tab->table->tmp_set.
335     The function increases the value of 'length' to the the total length of
336     the added fields.
337 
338   NOTES
339     If 'all_read_fields' is false the function modifies the value of
340     tab->table->tmp_set for a each table whose fields are stored in the cache.
341     The function calls the method Field::fill_cache_field to figure out
342     the type of the cache field and the maximal length of its representation
343     in the join buffer. If this is a blob field then additionally a pointer
344     to this field is added as an element of the array blob_ptr. For a blob
345     field only the size of the length of the blob data is taken into account.
346     It is assumed that 'data_field_count' contains the number of descriptors
347     for data fields that have been already created and 'data_field_ptr_count'
348     contains the number of the pointers to such descriptors having been
349     stored up to the moment.
350 
351   RETURN
352     none
353 */
354 
create_remaining_fields(bool all_read_fields)355 void JOIN_CACHE:: create_remaining_fields(bool all_read_fields)
356 {
357   JOIN_TAB *tab;
358   CACHE_FIELD *copy= field_descr+flag_fields+data_field_count;
359   CACHE_FIELD **copy_ptr= blob_ptr+data_field_ptr_count;
360 
361   for (tab= join_tab-tables; tab < join_tab; tab++)
362   {
363     MY_BITMAP *rem_field_set;
364     TABLE *table= tab->table;
365 
366     if (all_read_fields)
367       rem_field_set= table->read_set;
368     else
369     {
370       bitmap_invert(&table->tmp_set);
371       bitmap_intersect(&table->tmp_set, table->read_set);
372       rem_field_set= &table->tmp_set;
373     }
374 
375     length+= add_table_data_fields_to_join_cache(tab, rem_field_set,
376                                                  &data_field_count, &copy,
377                                                  &data_field_ptr_count,
378                                                  &copy_ptr);
379 
380     /* SemiJoinDuplicateElimination: allocate space for rowid if needed */
381     if (tab->keep_current_rowid)
382     {
383       copy->str= table->file->ref;
384       copy->length= table->file->ref_length;
385       copy->type= 0;
386       copy->field= 0;
387       copy->referenced_field_no= 0;
388       copy->next_copy_rowid= NULL;
389       // Chain rowid copy objects belonging to same join_tab
390       if (tab->copy_current_rowid != NULL)
391         copy->next_copy_rowid= tab->copy_current_rowid;
392       tab->copy_current_rowid= copy;
393       length+= copy->length;
394       data_field_count++;
395       copy++;
396     }
397   }
398 }
399 
400 
401 /*
402   Calculate and set all cache constants
403 
404   SYNOPSIS
405     set_constants()
406 
407   DESCRIPTION
408     The function calculates and set all precomputed constants that are used
409     when writing records into the join buffer and reading them from it.
410     It calculates the size of offsets of a record within the join buffer
411     and of a field within a record. It also calculates the number of bytes
412     used to store record lengths.
413     The function also calculates the maximal length of the representation
414     of record in the cache excluding blob_data. This value is used when
415     making a dicision whether more records should be added into the join
416     buffer or not.
417 
418   RETURN
419     none
420 */
421 
set_constants()422 void JOIN_CACHE::set_constants()
423 {
424   /*
425     Any record from a BKA cache is prepended with the record length.
426     We use the record length when reading the buffer and building key values
427     for each record. The length allows us not to read the fields that are
428     not needed for keys.
429     If a record has match flag it also may be skipped when the match flag
430     is on. It happens if the cache is used for a semi-join operation or
431     for outer join when the 'not exist' optimization can be applied.
432     If some of the fields are referenced from other caches then
433     the record length allows us to easily reach the saved offsets for
434     these fields since the offsets are stored at the very end of the record.
435     However at this moment we don't know whether we have referenced fields for
436     the cache or not. Later when a referenced field is registered for the cache
437     we adjust the value of the flag 'with_length'.
438   */
439   with_length= is_key_access() || with_match_flag;
440   /*
441      At this moment we don't know yet the value of 'referenced_fields',
442      but in any case it can't be greater than the value of 'fields'.
443   */
444   uint len= length + fields*sizeof(uint)+blobs*sizeof(uchar *) +
445             (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) +
446             sizeof(ulong) + aux_buffer_min_size();
447   buff_size= max<size_t>(join->thd->variables.join_buff_size, 2*len);
448   size_of_rec_ofs= offset_size(buff_size);
449   size_of_rec_len= blobs ? size_of_rec_ofs : offset_size(len);
450   size_of_fld_ofs= size_of_rec_len;
451   /*
452     The size of the offsets for referenced fields will be added later.
453     The values of 'pack_length' and 'pack_length_with_blob_ptrs' are adjusted
454     every time when the first reference to the referenced field is registered.
455   */
456   pack_length= (with_length ? size_of_rec_len : 0) +
457                (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) +
458                length;
459   pack_length_with_blob_ptrs= pack_length + blobs*sizeof(uchar *);
460 
461   check_only_first_match= calc_check_only_first_match(join_tab);
462 }
463 
464 
465 /**
466   Allocate memory for a join buffer.
467 
468   The function allocates a lump of memory for the join buffer. The
469   size of the allocated memory is 'buff_size' bytes.
470 
471   @returns false if success, otherwise true.
472 */
alloc_buffer()473 bool JOIN_CACHE::alloc_buffer()
474 {
475   DBUG_EXECUTE_IF("jb_alloc_fail",
476                    buff= NULL;
477                    DBUG_SET("-d,jb_alloc_fail");
478                    return true;
479                   );
480 
481   buff= (uchar*) my_malloc(buff_size, MYF(0));
482   return buff == NULL;
483 }
484 
485 
486 /*
487   Initialize a BNL cache
488 
489   SYNOPSIS
490     init()
491 
492   DESCRIPTION
493     The function initializes the cache structure. It supposed to be called
494     right after a constructor for the JOIN_CACHE_BNL.
495     The function allocates memory for the join buffer and for descriptors of
496     the record fields stored in the buffer.
497 
498   NOTES
499     The code of this function should have been included into the constructor
500     code itself. However the new operator for the class JOIN_CACHE_BNL would
501     never fail while memory allocation for the join buffer is not absolutely
502     unlikely to fail. That's why this memory allocation has to be placed in a
503     separate function that is called in a couple with a cache constructor.
504     It is quite natural to put almost all other constructor actions into
505     this function.
506 
507   RETURN
508     0   initialization with buffer allocations has been succeeded
509     1   otherwise
510 */
511 
init()512 int JOIN_CACHE_BNL::init()
513 {
514   DBUG_ENTER("JOIN_CACHE::init");
515 
516   calc_record_fields();
517 
518   if (alloc_fields(0))
519     DBUG_RETURN(1);
520 
521   create_flag_fields();
522 
523   create_remaining_fields(TRUE);
524 
525   set_constants();
526 
527   if (alloc_buffer())
528     DBUG_RETURN(1);
529 
530   reset_cache(true);
531 
532   DBUG_RETURN(0);
533 }
534 
535 
536 /*
537   Initialize a BKA cache
538 
539   SYNOPSIS
540     init()
541 
542   DESCRIPTION
543     The function initializes the cache structure. It supposed to be called
544     right after a constructor for the JOIN_CACHE_BKA.
545     The function allocates memory for the join buffer and for descriptors of
546     the record fields stored in the buffer.
547 
548   NOTES
549     The code of this function should have been included into the constructor
550     code itself. However the new operator for the class JOIN_CACHE_BKA would
551     never fail while memory allocation for the join buffer is not absolutely
552     unlikely to fail. That's why this memory allocation has to be placed in a
553     separate function that is called in a couple with a cache constructor.
554     It is quite natural to put almost all other constructor actions into
555     this function.
556 
557   RETURN
558     0   initialization with buffer allocations has been succeeded
559     1   otherwise
560 */
561 
init()562 int JOIN_CACHE_BKA::init()
563 {
564   JOIN_TAB *tab;
565   JOIN_CACHE *cache;
566   local_key_arg_fields= 0;
567   external_key_arg_fields= 0;
568   DBUG_ENTER("JOIN_CACHE_BKA::init");
569 
570   calc_record_fields();
571 
572   /* Mark all fields that can be used as arguments for this key access */
573   TABLE_REF *ref= &join_tab->ref;
574   cache= this;
575   do
576   {
577     /*
578       Traverse the ref expressions and find the occurrences of fields in them for
579       each table 'tab' whose fields are to be stored in the 'cache' join buffer.
580       Mark these fields in the bitmap tab->table->tmp_set.
581       For these fields count the number of them stored in this cache and the
582       total number of them stored in the previous caches. Save the result
583       of the counting 'in local_key_arg_fields' and 'external_key_arg_fields'
584       respectively.
585     */
586     for (tab= cache->join_tab-cache->tables; tab < cache->join_tab ; tab++)
587     {
588       uint key_args;
589       bitmap_clear_all(&tab->table->tmp_set);
590       for (uint i= 0; i < ref->key_parts; i++)
591       {
592         Item *ref_item= ref->items[i];
593         if (!(tab->table->map & ref_item->used_tables()))
594 	  continue;
595 	 ref_item->walk(&Item::add_field_to_set_processor, 1,
596                         (uchar *) tab->table);
597       }
598       if ((key_args= bitmap_bits_set(&tab->table->tmp_set)))
599       {
600         if (cache == this)
601           local_key_arg_fields+= key_args;
602         else
603           external_key_arg_fields+= key_args;
604       }
605     }
606     cache= cache->prev_cache;
607   }
608   while (cache);
609 
610   if (alloc_fields(external_key_arg_fields))
611     DBUG_RETURN(1);
612 
613   create_flag_fields();
614 
615   /*
616     Save pointers to the cache fields in previous caches
617     that  are used to build keys for this key access.
618   */
619   cache= this;
620   uint ext_key_arg_cnt= external_key_arg_fields;
621   CACHE_FIELD *copy;
622   CACHE_FIELD **copy_ptr= blob_ptr;
623   while (ext_key_arg_cnt)
624   {
625     cache= cache->prev_cache;
626     for (tab= cache->join_tab-cache->tables; tab < cache->join_tab ; tab++)
627     {
628       CACHE_FIELD *copy_end;
629       MY_BITMAP *key_read_set= &tab->table->tmp_set;
630       /* key_read_set contains the bitmap of tab's fields referenced by ref */
631       if (bitmap_is_clear_all(key_read_set))
632         continue;
633       copy_end= cache->field_descr+cache->fields;
634       for (copy= cache->field_descr+cache->flag_fields; copy < copy_end; copy++)
635       {
636         /*
637           (1) - when we store rowids for DuplicateWeedout, they have
638                 copy->field==NULL
639         */
640         if (copy->field &&  // (1)
641             copy->field->table == tab->table &&
642             bitmap_is_set(key_read_set, copy->field->field_index))
643         {
644           *copy_ptr++= copy;
645           ext_key_arg_cnt--;
646           if (!copy->referenced_field_no)
647           {
648             /*
649               Register the referenced field 'copy':
650               - set the offset number in copy->referenced_field_no,
651               - adjust the value of the flag 'with_length',
652               - adjust the values of 'pack_length' and
653                 of 'pack_length_with_blob_ptrs'.
654 	    */
655             copy->referenced_field_no= ++cache->referenced_fields;
656             cache->with_length= TRUE;
657 	    cache->pack_length+= cache->get_size_of_fld_offset();
658             cache->pack_length_with_blob_ptrs+= cache->get_size_of_fld_offset();
659           }
660         }
661       }
662     }
663   }
664   /* After this 'blob_ptr' shall not be be changed */
665   blob_ptr= copy_ptr;
666 
667   /* Now create local fields that are used to build ref for this key access */
668   copy= field_descr+flag_fields;
669   for (tab= join_tab-tables; tab < join_tab ; tab++)
670   {
671     length+= add_table_data_fields_to_join_cache(tab, &tab->table->tmp_set,
672                                                  &data_field_count, &copy,
673                                                  &data_field_ptr_count,
674                                                  &copy_ptr);
675   }
676 
677   use_emb_key= check_emb_key_usage();
678 
679   create_remaining_fields(FALSE);
680 
681   set_constants();
682 
683   if (alloc_buffer())
684     DBUG_RETURN(1);
685 
686   reset_cache(true);
687 
688   DBUG_RETURN(0);
689 }
690 
691 
692 /*
693   Check the possibility to read the access keys directly from the join buffer
694 
695   SYNOPSIS
696     check_emb_key_usage()
697 
698   DESCRIPTION
699     The function checks some conditions at which the key values can be read
700     directly from the join buffer. This is possible when the key values can be
701     composed by concatenation of the record fields stored in the join buffer.
702     Sometimes when the access key is multi-component the function has to re-order
703     the fields written into the join buffer to make keys embedded. If key
704     values for the key access are detected as embedded then 'use_emb_key'
705     is set to TRUE.
706 
707   EXAMPLE
708     Let table t2 has an index defined on the columns a,b . Let's assume also
709     that the columns t2.a, t2.b as well as the columns t1.a, t1.b are all
710     of the integer type. Then if the query
711       SELECT COUNT(*) FROM t1, t2 WHERE t1.a=t2.a and t1.b=t2.b
712     is executed with a join cache in such a way that t1 is the driving
713     table then the key values to access table t2 can be read directly
714     from the join buffer.
715 
716   NOTES
717     In some cases key values could be read directly from the join buffer but
718     we still do not consider them embedded. In the future we'll expand the
719     the class of keys which we identify as embedded.
720 
721   RETURN
722     TRUE  - key values will be considered as embedded,
723     FALSE - otherwise.
724 */
725 
check_emb_key_usage()726 bool JOIN_CACHE_BKA::check_emb_key_usage()
727 {
728   uint i;
729   Item *item;
730   KEY_PART_INFO *key_part;
731   CACHE_FIELD *copy;
732   CACHE_FIELD *copy_end;
733   uint len= 0;
734   TABLE *table= join_tab->table;
735   TABLE_REF *ref= &join_tab->ref;
736   KEY *keyinfo= table->key_info+ref->key;
737 
738   /*
739     If some of the key arguments are not from the local cache the key
740     is not considered as embedded.
741     TODO:
742     Expand it to the case when ref->key_parts=1 and local_key_arg_fields=0.
743   */
744   if (external_key_arg_fields != 0)
745     return FALSE;
746   /*
747     If the number of the local key arguments is not equal to the number
748     of key parts the key value cannot be read directly from the join buffer.
749   */
750   if (local_key_arg_fields != ref->key_parts)
751     return FALSE;
752 
753   /*
754     A key is not considered embedded if one of the following is true:
755     - one of its key parts is not equal to a field
756     - it is a partial key
757     - definition of the argument field does not coincide with the
758       definition of the corresponding key component
759     - some of the key components are nullable
760   */
761   for (i=0; i < ref->key_parts; i++)
762   {
763     item= ref->items[i]->real_item();
764     if (item->type() != Item::FIELD_ITEM)
765       return FALSE;
766     key_part= keyinfo->key_part+i;
767     if (key_part->key_part_flag & HA_PART_KEY_SEG)
768       return FALSE;
769     if (!key_part->field->eq_def(((Item_field *) item)->field))
770       return FALSE;
771     if (key_part->field->maybe_null())
772     {
773       return FALSE;
774       /*
775         If this is changed so that embedded keys may contain nullable
776         components, get_next_key() and put_record() will have to test
777         ref->null_rejecting in the "embedded keys" case too.
778       */
779     }
780   }
781 
782   copy= field_descr+flag_fields;
783   copy_end= copy+local_key_arg_fields;
784   for ( ; copy < copy_end; copy++)
785   {
786     /*
787       If some of the key arguments are of variable length the key
788       is not considered as embedded.
789     */
790     if (copy->type != 0)
791       return FALSE;
792     /*
793       If some of the key arguments are bit fields whose bits are partially
794       stored with null bits the key is not considered as embedded.
795     */
796     if (copy->field->type() == MYSQL_TYPE_BIT &&
797 	 ((Field_bit*) (copy->field))->bit_len)
798       return FALSE;
799     len+= copy->length;
800   }
801 
802   emb_key_length= len;
803 
804   /*
805     Make sure that key fields follow the order of the corresponding
806     key components these fields are equal to. For this the descriptors
807     of the fields that comprise the key might be re-ordered.
808   */
809   for (i= 0; i < ref->key_parts; i++)
810   {
811     uint j;
812     Item *item= ref->items[i]->real_item();
813     Field *fld= ((Item_field *) item)->field;
814     CACHE_FIELD *init_copy= field_descr+flag_fields+i;
815     for (j= i, copy= init_copy; i < local_key_arg_fields;  i++, copy++)
816     {
817       if (fld->eq(copy->field))
818       {
819         if (j != i)
820         {
821           CACHE_FIELD key_part_copy= *copy;
822           *copy= *init_copy;
823           *init_copy= key_part_copy;
824         }
825         break;
826       }
827     }
828   }
829 
830   return TRUE;
831 }
832 
833 
834 /*
835   Calculate the increment of the MRR buffer for a record write
836 
837   SYNOPSIS
838     aux_buffer_incr()
839 
840   DESCRIPTION
841     This implementation of the virtual function aux_buffer_incr determines
842     for how much the size of the MRR buffer should be increased when another
843     record is added to the cache.
844 
845   RETURN
846     the increment of the size of the MRR buffer for the next record
847 */
848 
aux_buffer_incr()849 uint JOIN_CACHE_BKA::aux_buffer_incr()
850 {
851   uint incr= 0;
852   TABLE_REF *ref= &join_tab->ref;
853   TABLE *tab= join_tab->table;
854   uint rec_per_key= tab->key_info[ref->key].rec_per_key[ref->key_parts-1];
855   set_if_bigger(rec_per_key, 1);
856   if (records == 1)
857     incr=  ref->key_length + tab->file->ref_length;
858   /*
859     When adding a new record to the join buffer this can match
860     multiple keys in this table. We use rec_per_key as estimate for
861     the number of records that will match and reserve space in the
862     DS-MRR sort buffer for this many record references.
863   */
864   incr+= tab->file->stats.mrr_length_per_rec * rec_per_key;
865   return incr;
866 }
867 
868 
869 /**
870   Calculate the minimume size for the MRR buffer.
871 
872   @return The minumum size that must be allocated for the MRR buffer
873 */
874 
aux_buffer_min_size() const875 uint JOIN_CACHE_BKA::aux_buffer_min_size() const
876 {
877   /*
878     For DS-MRR to work, the sort buffer must have space to store the
879     reference (or primary key) for at least one record.
880   */
881   return join_tab->table->file->stats.mrr_length_per_rec;
882 }
883 
884 
885 /*
886   Check if the record combination matches the index condition
887 
888   SYNOPSIS
889     JOIN_CACHE_BKA::skip_index_tuple()
890       rseq             Value returned by bka_range_seq_init()
891       range_info       MRR range association data
892 
893   DESCRIPTION
894     This function is invoked from MRR implementation to check if an index
895     tuple matches the index condition. It is used in the case where the index
896     condition actually depends on both columns of the used index and columns
897     from previous tables.
898 
899     Accessing columns of the previous tables requires special handling with
900     BKA. The idea of BKA is to collect record combinations in a buffer and
901     then do a batch of ref access lookups, i.e. by the time we're doing a
902     lookup its previous-records-combination is not in prev_table->record[0]
903     but somewhere in the join buffer.
904 
905     We need to get it from there back into prev_table(s)->record[0] before we
906     can evaluate the index condition, and that's why we need this function
907     instead of regular IndexConditionPushdown.
908 
909   NOTE
910     Possible optimization:
911     Before we unpack the record from a previous table
912     check if this table is used in the condition.
913     If so then unpack the record otherwise skip the unpacking.
914     This should be done by a special virtual method
915     get_partial_record_by_pos().
916 
917   RETURN
918     0    The record combination satisfies the index condition
919     1    Otherwise
920 */
921 
skip_index_tuple(range_seq_t rseq,char * range_info)922 bool JOIN_CACHE_BKA::skip_index_tuple(range_seq_t rseq, char *range_info)
923 {
924   DBUG_ENTER("JOIN_CACHE_BKA::skip_index_tuple");
925   JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
926   cache->get_record_by_pos((uchar*)range_info);
927   DBUG_RETURN(!join_tab->cache_idx_cond->val_int());
928 }
929 
930 
931 /*
932   Check if the record combination matches the index condition
933 
934   SYNOPSIS
935     bka_skip_index_tuple()
936       rseq             Value returned by bka_range_seq_init()
937       range_info       MRR range association data
938 
939   DESCRIPTION
940     This is wrapper for JOIN_CACHE_BKA::skip_index_tuple method,
941     see comments there.
942 
943   NOTE
944     This function is used as a RANGE_SEQ_IF::skip_index_tuple callback.
945 
946   RETURN
947     0    The record combination satisfies the index condition
948     1    Otherwise
949 */
950 
951 static
bka_skip_index_tuple(range_seq_t rseq,char * range_info)952 bool bka_skip_index_tuple(range_seq_t rseq, char *range_info)
953 {
954   DBUG_ENTER("bka_skip_index_tuple");
955   JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
956   DBUG_RETURN(cache->skip_index_tuple(rseq, range_info));
957 }
958 
959 
960 /*
961   Write record fields and their required offsets into the join cache buffer
962 
963   SYNOPSIS
964     write_record_data()
965       link        a reference to the associated info in the previous cache
966       is_full OUT true if it has been decided that no more records will be
967                   added to the join buffer
968 
969   DESCRIPTION
970     This function put into the cache buffer the following info that it reads
971     from the join record buffers or computes somehow:
972     (1) the length of all fields written for the record (optional)
973     (2) an offset to the associated info in the previous cache (if there is any)
974         determined by the link parameter
975     (3) all flag fields of the tables whose data field are put into the cache:
976         - match flag (optional),
977         - null bitmaps for all tables,
978         - null row flags for all tables
979     (4) values of all data fields including
980         - full images of those fixed legth data fields that cannot have
981           trailing spaces
982         - significant part of fixed length fields that can have trailing spaces
983           with the prepanded length
984         - data of non-blob variable length fields with the prepanded data length
985         - blob data from blob fields with the prepanded data length
986     (5) record offset values for the data fields that are referred to from
987         other caches
988 
989     The record is written at the current position stored in the field 'pos'.
990     At the end of the function 'pos' points at the position right after the
991     written record data.
992     The function increments the number of records in the cache that is stored
993     in the 'records' field by 1. The function also modifies the values of
994     'curr_rec_pos' and 'last_rec_pos' to point to the written record.
995     The 'end_pos' cursor is modified accordingly.
996     The 'last_rec_blob_data_is_in_rec_buff' is set on if the blob data
997     remains in the record buffers and not copied to the join buffer. It may
998     happen only to the blob data from the last record added into the cache.
999 
1000 
1001   RETURN
1002     length of the written record data
1003 */
1004 
write_record_data(uchar * link,bool * is_full)1005 uint JOIN_CACHE::write_record_data(uchar * link, bool *is_full)
1006 {
1007   uint len;
1008   bool last_record;
1009   CACHE_FIELD *copy;
1010   CACHE_FIELD *copy_end;
1011   uchar *cp= pos;
1012   uchar *init_pos= cp;
1013   uchar *rec_len_ptr= 0;
1014 
1015   records++;  /* Increment the counter of records in the cache */
1016 
1017   len= pack_length;
1018 
1019   /* Make an adjustment for the size of the auxiliary buffer if there is any */
1020   uint incr= aux_buffer_incr();
1021   ulong rem= rem_space();
1022   aux_buff_size+= len+incr < rem ? incr : rem;
1023 
1024   /*
1025     For each blob to be put into cache save its length and a pointer
1026     to the value in the corresponding element of the blob_ptr array.
1027     Blobs with null values are skipped.
1028     Increment 'len' by the total length of all these blobs.
1029   */
1030   if (blobs)
1031   {
1032     CACHE_FIELD **copy_ptr= blob_ptr;
1033     CACHE_FIELD **copy_ptr_end= copy_ptr+blobs;
1034     for ( ; copy_ptr < copy_ptr_end; copy_ptr++)
1035     {
1036       Field_blob *blob_field= (Field_blob *) (*copy_ptr)->field;
1037       if (!blob_field->is_null())
1038       {
1039         uint blob_len= blob_field->get_length();
1040         (*copy_ptr)->blob_length= blob_len;
1041         len+= blob_len;
1042         blob_field->get_ptr(&(*copy_ptr)->str);
1043       }
1044     }
1045   }
1046 
1047   /*
1048     Check whether we won't be able to add any new record into the cache after
1049     this one because the cache will be full. Set last_record to TRUE if it's so.
1050     The assume that the cache will be full after the record has been written
1051     into it if either the remaining space of the cache is not big enough for the
1052     record's blob values or if there is a chance that not all non-blob fields
1053     of the next record can be placed there.
1054     This function is called only in the case when there is enough space left in
1055     the cache to store at least non-blob parts of the current record.
1056   */
1057   last_record= (len+pack_length_with_blob_ptrs) > rem_space();
1058 
1059   /*
1060     Save the position for the length of the record in the cache if it's needed.
1061     The length of the record will be inserted here when all fields of the record
1062     are put into the cache.
1063   */
1064   if (with_length)
1065   {
1066     rec_len_ptr= cp;
1067     cp+= size_of_rec_len;
1068   }
1069 
1070   /*
1071     Put a reference to the fields of the record that are stored in the previous
1072     cache if there is any. This reference is passed by the 'link' parameter.
1073   */
1074   if (prev_cache)
1075   {
1076     cp+= prev_cache->get_size_of_rec_offset();
1077     prev_cache->store_rec_ref(cp, link);
1078   }
1079 
1080   curr_rec_pos= cp;
1081 
1082   /* If the there is a match flag set its value to 0 */
1083   copy= field_descr;
1084   if (with_match_flag)
1085     *copy[0].str= 0;
1086 
1087   /* First put into the cache the values of all flag fields */
1088   copy_end= field_descr+flag_fields;
1089   for ( ; copy < copy_end; copy++)
1090   {
1091     memcpy(cp, copy->str, copy->length);
1092     cp+= copy->length;
1093   }
1094 
1095   /* Now put the values of the remaining fields as soon as they are not nulls */
1096   copy_end= field_descr+fields;
1097   for ( ; copy < copy_end; copy++)
1098   {
1099     Field *field= copy->field;
1100     if (field && field->maybe_null() && field->is_null())
1101     {
1102       /* Do not copy a field if its value is null */
1103       if (copy->referenced_field_no)
1104         copy->offset= 0;
1105       continue;
1106     }
1107     /* Save the offset of the field to put it later at the end of the record */
1108     if (copy->referenced_field_no)
1109       copy->offset= cp-curr_rec_pos;
1110 
1111     if (copy->type == CACHE_BLOB)
1112     {
1113       Field_blob *blob_field= (Field_blob *) copy->field;
1114       if (last_record)
1115       {
1116         last_rec_blob_data_is_in_rec_buff= 1;
1117         /* Put down the length of the blob and the pointer to the data */
1118 	blob_field->get_image(cp, copy->length+sizeof(char*),
1119                               blob_field->charset());
1120 	cp+= copy->length+sizeof(char*);
1121       }
1122       else
1123       {
1124         /* First put down the length of the blob and then copy the data */
1125 	blob_field->get_image(cp, copy->length,
1126 			      blob_field->charset());
1127 	memcpy(cp+copy->length, copy->str, copy->blob_length);
1128 	cp+= copy->length+copy->blob_length;
1129       }
1130     }
1131     else
1132     {
1133       switch (copy->type) {
1134       case CACHE_VARSTR1:
1135         /* Copy the significant part of the short varstring field */
1136         len= (uint) copy->str[0] + 1;
1137         memcpy(cp, copy->str, len);
1138         cp+= len;
1139         break;
1140       case CACHE_VARSTR2:
1141         /* Copy the significant part of the long varstring field */
1142         len= uint2korr(copy->str) + 2;
1143         memcpy(cp, copy->str, len);
1144         cp+= len;
1145         break;
1146       case CACHE_STRIPPED:
1147       {
1148         /*
1149           Put down the field value stripping all trailing spaces off.
1150           After this insert the length of the written sequence of bytes.
1151         */
1152 	uchar *str, *end;
1153 	for (str= copy->str, end= str+copy->length;
1154 	     end > str && end[-1] == ' ';
1155 	     end--) ;
1156 	len=(uint) (end-str);
1157         int2store(cp, len);
1158 	memcpy(cp+2, str, len);
1159 	cp+= len+2;
1160         break;
1161       }
1162       default:
1163         /* Copy the entire image of the field from the record buffer */
1164 	memcpy(cp, copy->str, copy->length);
1165 	cp+= copy->length;
1166       }
1167     }
1168   }
1169 
1170   /* Add the offsets of the fields that are referenced from other caches */
1171   if (referenced_fields)
1172   {
1173     uint cnt= 0;
1174     for (copy= field_descr+flag_fields; copy < copy_end ; copy++)
1175     {
1176       if (copy->referenced_field_no)
1177       {
1178         store_fld_offset(cp+size_of_fld_ofs*(copy->referenced_field_no-1),
1179                          copy->offset);
1180         cnt++;
1181       }
1182     }
1183     cp+= size_of_fld_ofs*cnt;
1184   }
1185 
1186   if (rec_len_ptr)
1187     store_rec_length(rec_len_ptr, (ulong) (cp-rec_len_ptr-size_of_rec_len));
1188   last_rec_pos= curr_rec_pos;
1189   end_pos= pos= cp;
1190   *is_full= last_record;
1191   return (uint) (cp-init_pos);
1192 }
1193 
1194 
1195 /**
1196   @brief Reset the join buffer for reading/writing: default implementation
1197 
1198   @param for_writing  if it's TRUE the function reset the buffer for writing
1199 
1200   @details
1201     This default implementation of the virtual function reset_cache() resets
1202     the join buffer for reading or writing.
1203     If the buffer is reset for reading only the 'pos' value is reset
1204     to point to the very beginning of the join buffer. If the buffer is
1205     reset for writing additionally:
1206     - the counter of the records in the buffer is set to 0,
1207     - the the value of 'last_rec_pos' gets pointing at the position just
1208       before the buffer,
1209     - 'end_pos' is set to point to the beginning of the join buffer,
1210     - the size of the auxiliary buffer is reset to 0,
1211     - the flag 'last_rec_blob_data_is_in_rec_buff' is set to 0.
1212 */
1213 
reset_cache(bool for_writing)1214 void JOIN_CACHE::reset_cache(bool for_writing)
1215 {
1216   pos= buff;
1217   curr_rec_link= 0;
1218   if (for_writing)
1219   {
1220     records= 0;
1221     last_rec_pos= buff;
1222     aux_buff_size= 0;
1223     end_pos= pos;
1224     last_rec_blob_data_is_in_rec_buff= 0;
1225   }
1226 }
1227 
1228 /*
1229   Add a record into the join buffer: the default implementation
1230 
1231   SYNOPSIS
1232     put_record_in_cache()
1233 
1234   DESCRIPTION
1235     This default implementation of the virtual function put_record writes
1236     the next matching record into the join buffer.
1237     It also links the record having been written into the join buffer with
1238     the matched record in the previous cache if there is any.
1239     The implementation assumes that the function get_curr_link()
1240     will return exactly the pointer to this matched record.
1241 
1242   RETURN
1243     TRUE    if it has been decided that it should be the last record
1244             in the join buffer,
1245     FALSE   otherwise
1246 */
1247 
put_record_in_cache()1248 bool JOIN_CACHE::put_record_in_cache()
1249 {
1250   bool is_full;
1251   uchar *link= 0;
1252   if (prev_cache)
1253     link= prev_cache->get_curr_rec_link();
1254   write_record_data(link, &is_full);
1255   return (is_full);
1256 }
1257 
1258 
1259 /*
1260   Read the next record from the join buffer: the default implementation
1261 
1262   SYNOPSIS
1263     get_record()
1264 
1265   DESCRIPTION
1266     This default implementation of the virtual function get_record
1267     reads fields of the next record from the join buffer of this cache.
1268     The function also reads all other fields associated with this record
1269     from the the join buffers of the previous caches. The fields are read
1270     into the corresponding record buffers.
1271     It is supposed that 'pos' points to the position in the buffer
1272     right after the previous record when the function is called.
1273     When the function returns the 'pos' values is updated to point
1274     to the position after the read record.
1275     The value of 'curr_rec_pos' is also updated by the function to
1276     point to the beginning of the first field of the record in the
1277     join buffer.
1278 
1279   RETURN
1280     TRUE  - there are no more records to read from the join buffer
1281     FALSE - otherwise
1282 */
1283 
get_record()1284 bool JOIN_CACHE::get_record()
1285 {
1286   bool res;
1287   uchar *prev_rec_ptr= 0;
1288   if (with_length)
1289     pos+= size_of_rec_len;
1290   if (prev_cache)
1291   {
1292     pos+= prev_cache->get_size_of_rec_offset();
1293     prev_rec_ptr= prev_cache->get_rec_ref(pos);
1294   }
1295   curr_rec_pos= pos;
1296   res= (read_some_record_fields() == -1);
1297   if (!res)
1298   { // There are more records to read
1299     pos+= referenced_fields*size_of_fld_ofs;
1300     if (prev_cache)
1301     {
1302       /*
1303         read_some_record_fields() didn't read fields stored in previous
1304         buffers, read them now:
1305       */
1306       prev_cache->get_record_by_pos(prev_rec_ptr);
1307     }
1308   }
1309   return res;
1310 }
1311 
1312 
1313 /*
1314   Read a positioned record from the join buffer: the default implementation
1315 
1316   SYNOPSIS
1317     get_record_by_pos()
1318       rec_ptr  position of the first field of the record in the join buffer
1319 
1320   DESCRIPTION
1321     This default implementation of the virtual function get_record_pos
1322     reads the fields of the record positioned at 'rec_ptr' from the join buffer.
1323     The function also reads all other fields associated with this record
1324     from the the join buffers of the previous caches. The fields are read
1325     into the corresponding record buffers.
1326 
1327   RETURN
1328     none
1329 */
1330 
get_record_by_pos(uchar * rec_ptr)1331 void JOIN_CACHE::get_record_by_pos(uchar *rec_ptr)
1332 {
1333   uchar *save_pos= pos;
1334   pos= rec_ptr;
1335   read_some_record_fields();
1336   pos= save_pos;
1337   if (prev_cache)
1338   {
1339     uchar *prev_rec_ptr= prev_cache->get_rec_ref(rec_ptr);
1340     prev_cache->get_record_by_pos(prev_rec_ptr);
1341   }
1342 }
1343 
1344 
1345 /*
1346   Test the match flag from the referenced record: the default implementation
1347 
1348   SYNOPSIS
1349     get_match_flag_by_pos()
1350       rec_ptr  position of the first field of the record in the join buffer
1351 
1352   DESCRIPTION
1353     This default implementation of the virtual function get_match_flag_by_pos
1354     test the match flag for the record pointed by the reference at the position
1355     rec_ptr. If the match flag in placed one of the previous buffers the function
1356     first reaches the linked record fields in this buffer.
1357 
1358   RETURN
1359     TRUE    if the match flag is set on
1360     FALSE   otherwise
1361 */
1362 
get_match_flag_by_pos(uchar * rec_ptr)1363 bool JOIN_CACHE::get_match_flag_by_pos(uchar *rec_ptr)
1364 {
1365   if (with_match_flag)
1366     return MY_TEST(*rec_ptr);
1367   if (prev_cache)
1368   {
1369     uchar *prev_rec_ptr= prev_cache->get_rec_ref(rec_ptr);
1370     return prev_cache->get_match_flag_by_pos(prev_rec_ptr);
1371   }
1372   DBUG_ASSERT(1);
1373   return FALSE;
1374 }
1375 
1376 
1377 /**
1378   Read some flag and data fields of a record from the join buffer.
1379 
1380   Reads all fields (flag and data fields) stored in this join buffer, for the
1381   current record (at 'pos'). If the buffer is incremental, fields of this
1382   record which are stored in previous join buffers are _not_ read so remain
1383   unknown: caller must then make sure to call this function on previous
1384   buffers too.
1385 
1386   The fields are read starting from the position 'pos' which is
1387   supposed to point to the beginning of the first record field.
1388   The function increments the value of 'pos' by the length of the
1389   read data.
1390 
1391   Flag fields are copied back to their source; data fields are copied to the
1392   record's buffer.
1393 
1394   @retval (-1)   if there are no more records in the join buffer
1395   @retval <>(-1) length of the data read from the join buffer
1396 */
1397 
read_some_record_fields()1398 int JOIN_CACHE::read_some_record_fields()
1399 {
1400   uchar *init_pos= pos;
1401 
1402   if (pos > last_rec_pos || !records)
1403     return -1;
1404 
1405   // First match flag, read null bitmaps and null_row flag
1406   read_some_flag_fields();
1407 
1408   /* Now read the remaining table fields if needed */
1409   CACHE_FIELD *copy= field_descr+flag_fields;
1410   CACHE_FIELD *copy_end= field_descr+fields;
1411   bool blob_in_rec_buff= blob_data_is_in_rec_buff(init_pos);
1412   for ( ; copy < copy_end; copy++)
1413     read_record_field(copy, blob_in_rec_buff);
1414 
1415   return (uint) (pos-init_pos);
1416 }
1417 
1418 
1419 /**
1420   Read some flag fields of a record from the join buffer.
1421 
1422   Reads all flag fields stored in this join buffer, for the current record (at
1423   'pos'). If the buffer is incremental, flag fields of this record which are
1424   stored in previous join buffers are _not_ read so remain unknown: caller
1425   must then make sure to call this function on previous buffers too.
1426 
1427   The flag fields are read starting from the position 'pos'.
1428   The function increments the value of 'pos' by the length of the
1429   read data.
1430 
1431   Flag fields are copied back to their source.
1432 */
read_some_flag_fields()1433 void JOIN_CACHE::read_some_flag_fields()
1434 {
1435   CACHE_FIELD *copy= field_descr;
1436   CACHE_FIELD *copy_end= copy+flag_fields;
1437   for ( ; copy < copy_end; copy++)
1438   {
1439     memcpy(copy->str, pos, copy->length);
1440     pos+= copy->length;
1441   }
1442 }
1443 
1444 
1445 /*
1446   Read a data record field from the join buffer
1447 
1448   SYNOPSIS
1449     read_record_field()
1450       copy             the descriptor of the data field to be read
1451       blob_in_rec_buff indicates whether this is the field from the record
1452                        whose blob data are in record buffers
1453 
1454   DESCRIPTION
1455     The function reads the data field specified by the parameter copy
1456     from the join buffer into the corresponding record buffer.
1457     The field is read starting from the position 'pos'.
1458     The data of blob values is not copied from the join buffer.
1459     The function increments the value of 'pos' by the length of the
1460     read data.
1461 
1462   RETURN
1463     length of the data read from the join buffer
1464 */
1465 
read_record_field(CACHE_FIELD * copy,bool blob_in_rec_buff)1466 uint JOIN_CACHE::read_record_field(CACHE_FIELD *copy, bool blob_in_rec_buff)
1467 {
1468   uint len;
1469   /* Do not copy the field if its value is null */
1470   if (copy->field && copy->field->maybe_null() && copy->field->is_null())
1471     return 0;
1472   if (copy->type == CACHE_BLOB)
1473   {
1474     Field_blob *blob_field= (Field_blob *) copy->field;
1475     /*
1476       Copy the length and the pointer to data but not the blob data
1477       itself to the record buffer
1478     */
1479     if (blob_in_rec_buff)
1480     {
1481       blob_field->set_image(pos, copy->length+sizeof(char*),
1482 			    blob_field->charset());
1483       len= copy->length+sizeof(char*);
1484     }
1485     else
1486     {
1487       blob_field->set_ptr(pos, pos+copy->length);
1488       len= copy->length+blob_field->get_length();
1489     }
1490   }
1491   else
1492   {
1493     switch (copy->type) {
1494     case CACHE_VARSTR1:
1495       /* Copy the significant part of the short varstring field */
1496       len= (uint) pos[0] + 1;
1497       memcpy(copy->str, pos, len);
1498       break;
1499     case CACHE_VARSTR2:
1500       /* Copy the significant part of the long varstring field */
1501       len= uint2korr(pos) + 2;
1502       memcpy(copy->str, pos, len);
1503       break;
1504     case CACHE_STRIPPED:
1505       /* Pad the value by spaces that has been stripped off */
1506       len= uint2korr(pos);
1507       memcpy(copy->str, pos+2, len);
1508       memset(copy->str+len, ' ', copy->length-len);
1509       len+= 2;
1510       break;
1511     default:
1512       /* Copy the entire image of the field from the record buffer */
1513       len= copy->length;
1514       memcpy(copy->str, pos, len);
1515     }
1516   }
1517   pos+= len;
1518   return len;
1519 }
1520 
1521 
1522 /*
1523   Read a referenced field from the join buffer
1524 
1525   SYNOPSIS
1526     read_referenced_field()
1527       copy         pointer to the descriptor of the referenced field
1528       rec_ptr      pointer to the record that may contain this field
1529       len  IN/OUT  total length of the record fields
1530 
1531   DESCRIPTION
1532     The function checks whether copy points to a data field descriptor
1533     for this cache object. If it does not then the function returns
1534     FALSE. Otherwise the function reads the field of the record in
1535     the join buffer pointed by 'rec_ptr' into the corresponding record
1536     buffer and returns TRUE.
1537     If the value of *len is 0 then the function sets it to the total
1538     length of the record fields including possible trailing offset
1539     values. Otherwise *len is supposed to provide this value that
1540     has been obtained earlier.
1541 
1542   RETURN
1543     TRUE   'copy' points to a data descriptor of this join cache
1544     FALSE  otherwise
1545 */
1546 
read_referenced_field(CACHE_FIELD * copy,uchar * rec_ptr,uint * len)1547 bool JOIN_CACHE::read_referenced_field(CACHE_FIELD *copy,
1548                                        uchar *rec_ptr,
1549                                        uint *len)
1550 {
1551   uchar *ptr;
1552   uint offset;
1553   if (copy < field_descr || copy >= field_descr+fields)
1554     return FALSE;
1555   if (!*len)
1556   {
1557     /* Get the total length of the record fields */
1558     uchar *len_ptr= rec_ptr;
1559     if (prev_cache)
1560       len_ptr-= prev_cache->get_size_of_rec_offset();
1561     *len= get_rec_length(len_ptr-size_of_rec_len);
1562   }
1563 
1564   ptr= rec_ptr-(prev_cache ? prev_cache->get_size_of_rec_offset() : 0);
1565   offset= get_fld_offset(ptr+ *len -
1566                          size_of_fld_ofs*
1567                          (referenced_fields+1-copy->referenced_field_no));
1568   bool is_null= FALSE;
1569   if (offset == 0 && flag_fields)
1570     is_null= TRUE;
1571   if (is_null)
1572     copy->field->set_null();
1573   else
1574   {
1575     uchar *save_pos= pos;
1576     copy->field->set_notnull();
1577     pos= rec_ptr+offset;
1578     read_record_field(copy, blob_data_is_in_rec_buff(rec_ptr));
1579     pos= save_pos;
1580   }
1581   return TRUE;
1582 }
1583 
1584 
1585 /*
1586   Skip record from join buffer if its match flag is on: default implementation
1587 
1588   SYNOPSIS
1589     skip_record_if_match()
1590 
1591   DESCRIPTION
1592     This default implementation of the virtual function skip_record_if_match
1593     skips the next record from the join buffer if its  match flag is set on.
1594     If the record is skipped the value of 'pos' is set to points to the position
1595     right after the record.
1596 
1597   RETURN
1598     TRUE  - the match flag is on and the record has been skipped
1599     FALSE - the match flag is off
1600 */
1601 
skip_record_if_match()1602 bool JOIN_CACHE::skip_record_if_match()
1603 {
1604   DBUG_ASSERT(with_match_flag && with_length);
1605   uint offset= size_of_rec_len;
1606   if (prev_cache)
1607     offset+= prev_cache->get_size_of_rec_offset();
1608   /* Check whether the match flag is on */
1609   if (MY_TEST(*(pos+offset)))
1610   {
1611     pos+= size_of_rec_len + get_rec_length(pos);
1612     return TRUE;
1613   }
1614   return FALSE;
1615 }
1616 
1617 
1618 /*
1619   Restore the fields of the last record from the join buffer
1620 
1621   SYNOPSIS
1622     restore_last_record()
1623 
1624   DESCRIPTION
1625     This function restore the values of the fields of the last record put
1626     into join buffer in record buffers. The values most probably have been
1627     overwritten by the field values from other records when they were read
1628     from the join buffer into the record buffer in order to check pushdown
1629     predicates.
1630 
1631   RETURN
1632     none
1633 */
1634 
restore_last_record()1635 void JOIN_CACHE::restore_last_record()
1636 {
1637   if (records)
1638     get_record_by_pos(last_rec_pos);
1639 }
1640 
1641 
1642 /*
1643   Join records from the join buffer with records from the next join table
1644 
1645   SYNOPSIS
1646     join_records()
1647       skip_last    do not find matches for the last record from the buffer
1648 
1649   DESCRIPTION
1650     The functions extends all records from the join buffer by the matched
1651     records from join_tab. In the case of outer join operation it also
1652     adds null complementing extensions for the records from the join buffer
1653     that have no match.
1654     No extensions are generated for the last record from the buffer if
1655     skip_last is true.
1656 
1657   NOTES
1658     The function must make sure that if linked join buffers are used then
1659     a join buffer cannot be refilled again until all extensions in the
1660     buffers chained to this one are generated.
1661     Currently an outer join operation with several inner tables always uses
1662     at least two linked buffers with the match join flags placed in the
1663     first buffer. Any record composed of rows of the inner tables that
1664     matches a record in this buffer must refer to the position of the
1665     corresponding match flag.
1666 
1667   IMPLEMENTATION
1668     When generating extensions for outer tables of an outer join operation
1669     first we generate all extensions for those records from the join buffer
1670     that have matches, after which null complementing extension for all
1671     unmatched records from the join buffer are generated.
1672 
1673   RETURN
1674     return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS.
1675 */
1676 
join_records(bool skip_last)1677 enum_nested_loop_state JOIN_CACHE::join_records(bool skip_last)
1678 {
1679   enum_nested_loop_state rc= NESTED_LOOP_OK;
1680   DBUG_ENTER("JOIN_CACHE::join_records");
1681 
1682   table_map saved_status_bits[3]= {0, 0, 0};
1683   for (int cnt= 1; cnt <= static_cast<int>(tables); cnt++)
1684   {
1685     /*
1686       We may have hit EOF on previous tables; this has set
1687       STATUS_NOT_FOUND in their status. However, now we are going to load
1688       table->record[0] from the join buffer so have to declare that there is a
1689       record. @See convert_constant_item().
1690       We first need to save bits of table->status; STATUS_DELETED and
1691       STATUS_UPDATED cannot be on as multi-table DELETE/UPDATE never use join
1692       buffering. So we only have three bits to save.
1693     */
1694     TABLE * const table= join_tab[- cnt].table;
1695     const uint8 status= table->status;
1696     const table_map map= table->map;
1697     DBUG_ASSERT((status & (STATUS_DELETED | STATUS_UPDATED)) == 0);
1698     if (status & STATUS_GARBAGE)
1699       saved_status_bits[0]|= map;
1700     if (status & STATUS_NOT_FOUND)
1701       saved_status_bits[1]|= map;
1702     if (status & STATUS_NULL_ROW)
1703       saved_status_bits[2]|= map;
1704     table->status= 0;                           // Record exists.
1705   }
1706 
1707   const bool outer_join_first_inner=
1708     join_tab->is_first_inner_for_outer_join();
1709   if (outer_join_first_inner && !join_tab->first_unmatched)
1710     join_tab->not_null_compl= TRUE;
1711 
1712   if (!join_tab->first_unmatched)
1713   {
1714     /* Find all records from join_tab that match records from join buffer */
1715     rc= join_matching_records(skip_last);
1716     if (rc != NESTED_LOOP_OK)
1717       goto finish;
1718     if (outer_join_first_inner)
1719     {
1720       if (next_cache)
1721       {
1722         /*
1723           Ensure that all matches for outer records from join buffer are to be
1724           found. Now we ensure that all full records are found for records from
1725           join buffer. Generally this is an overkill.
1726           TODO: Ensure that only matches of the inner table records have to be
1727           found for the records from join buffer.
1728 	*/
1729         rc= next_cache->join_records(skip_last);
1730         if (rc != NESTED_LOOP_OK)
1731           goto finish;
1732       }
1733       join_tab->not_null_compl= FALSE;
1734       /* Prepare for generation of null complementing extensions */
1735       for (JOIN_TAB *tab= join_tab->first_inner;
1736            tab <= join_tab->last_inner; tab++)
1737         tab->first_unmatched= join_tab->first_inner;
1738     }
1739   }
1740   if (join_tab->first_unmatched)
1741   {
1742     if (is_key_access())
1743       restore_last_record();
1744 
1745     /*
1746       Generate all null complementing extensions for the records from
1747       join buffer that don't have any matching rows from the inner tables.
1748     */
1749     reset_cache(false);
1750     rc= join_null_complements(skip_last);
1751     if (rc != NESTED_LOOP_OK)
1752       goto finish;
1753   }
1754   if(next_cache)
1755   {
1756     /*
1757       When using linked caches we must ensure the records in the next caches
1758       that refer to the records in the join buffer are fully extended.
1759       Otherwise we could have references to the records that have been
1760       already erased from the join buffer and replaced for new records.
1761     */
1762     rc= next_cache->join_records(skip_last);
1763     if (rc != NESTED_LOOP_OK)
1764       goto finish;
1765   }
1766 
1767   if (skip_last)
1768   {
1769     DBUG_ASSERT(!is_key_access());
1770     /*
1771        Restore the last record from the join buffer to generate
1772        all extensions for it.
1773     */
1774     get_record();
1775   }
1776 
1777 finish:
1778   if (outer_join_first_inner)
1779   {
1780     /*
1781       All null complemented rows have been already generated for all
1782       outer records from join buffer. Restore the state of the
1783       first_unmatched values to 0 to avoid another null complementing.
1784     */
1785     for (JOIN_TAB *tab= join_tab->first_inner;
1786          tab <= join_tab->last_inner; tab++)
1787       tab->first_unmatched= NULL;
1788   }
1789   for (int cnt= 1; cnt <= static_cast<int>(tables); cnt++)
1790   {
1791     /*
1792       We must restore the status of outer tables as it was before entering
1793       this function.
1794     */
1795     TABLE * const table= join_tab[- cnt].table;
1796     const table_map map= table->map;
1797     uint8 status= 0;
1798     if (saved_status_bits[0] & map)
1799       status|= STATUS_GARBAGE;
1800     if (saved_status_bits[1] & map)
1801       status|= STATUS_NOT_FOUND;
1802     if (saved_status_bits[2] & map)
1803       status|= STATUS_NULL_ROW;
1804     table->status= status;
1805   }
1806   restore_last_record();
1807   reset_cache(true);
1808   DBUG_RETURN(rc);
1809 }
1810 
1811 
1812 /*
1813   Using BNL find matches from the next table for records from the join buffer
1814 
1815   SYNOPSIS
1816     join_matching_records()
1817       skip_last    do not look for matches for the last partial join record
1818 
1819   DESCRIPTION
1820     The function retrieves all rows of the join_tab table and check whether
1821     they match partial join records from the join buffer. If a match is found
1822     the function will call the sub_select function trying to look for matches
1823     for the remaining join operations.
1824     This function currently is called only from the function join_records.
1825     If the value of skip_last is true the function writes the partial join
1826     record from the record buffer into the join buffer to save its value for
1827     the future processing in the caller function.
1828 
1829   NOTES
1830     The function produces all matching extensions for the records in the
1831     join buffer following the path of the Blocked Nested Loops algorithm.
1832     When an outer join operation is performed all unmatched records from
1833     the join buffer must be extended by null values. The function
1834     'join_null_complements' serves this purpose.
1835 
1836   RETURN
1837     return one of enum_nested_loop_state.
1838 */
1839 
join_matching_records(bool skip_last)1840 enum_nested_loop_state JOIN_CACHE_BNL::join_matching_records(bool skip_last)
1841 {
1842   uint cnt;
1843   int error;
1844   READ_RECORD *info;
1845   enum_nested_loop_state rc= NESTED_LOOP_OK;
1846   SQL_SELECT *select= join_tab->cache_select;
1847 
1848   join_tab->table->null_row= 0;
1849 
1850   /* Return at once if there are no records in the join buffer */
1851   if (!records)
1852     return NESTED_LOOP_OK;
1853 
1854   /*
1855     When joining we read records from the join buffer back into record buffers.
1856     If matches for the last partial join record are found through a call to
1857     the sub_select function then this partial join record must be saved in the
1858     join buffer in order to be restored just before the sub_select call.
1859   */
1860   if (skip_last)
1861     put_record_in_cache();
1862 
1863   if (join_tab->use_quick == QS_DYNAMIC_RANGE && join_tab->select->quick)
1864     /* A dynamic range access was used last. Clean up after it */
1865     join_tab->select->set_quick(NULL);
1866 
1867   /* Start retrieving all records of the joined table */
1868   if ((error= (*join_tab->read_first_record)(join_tab)))
1869     return error < 0 ? NESTED_LOOP_OK : NESTED_LOOP_ERROR;
1870 
1871   info= &join_tab->read_record;
1872   do
1873   {
1874     if (join_tab->keep_current_rowid)
1875       join_tab->table->file->position(join_tab->table->record[0]);
1876 
1877     if (join->thd->killed)
1878     {
1879       /* The user has aborted the execution of the query */
1880       join->thd->send_kill_message();
1881       return NESTED_LOOP_KILLED;
1882     }
1883 
1884     /*
1885       Do not look for matches if the last read record of the joined table
1886       does not meet the conditions that have been pushed to this table
1887     */
1888     if (rc == NESTED_LOOP_OK)
1889     {
1890       bool skip_record;
1891       bool consider_record= (!select ||
1892                              (!select->skip_record(join->thd, &skip_record) &&
1893                               !skip_record));
1894       if (select && join->thd->is_error())
1895         return NESTED_LOOP_ERROR;
1896       if (consider_record)
1897       {
1898         /* Prepare to read records from the join buffer */
1899         reset_cache(false);
1900 
1901         /* Read each record from the join buffer and look for matches */
1902         for (cnt= records - MY_TEST(skip_last) ; cnt; cnt--)
1903         {
1904           /*
1905             If only the first match is needed and it has been already found for
1906             the next record read from the join buffer then the record is skipped.
1907           */
1908           if (!check_only_first_match || !skip_record_if_match())
1909           {
1910             get_record();
1911             rc= generate_full_extensions(get_curr_rec());
1912             if (rc != NESTED_LOOP_OK)
1913               return rc;
1914           }
1915         }
1916       }
1917     }
1918   } while (!(error= info->read_record(info)));
1919 
1920   if (error > 0)				// Fatal error
1921     rc= NESTED_LOOP_ERROR;
1922   return rc;
1923 }
1924 
1925 
1926 /*
1927   Set match flag for a record in join buffer if it has not been set yet
1928 
1929   SYNOPSIS
1930     set_match_flag_if_none()
1931       first_inner     the join table to which this flag is attached to
1932       rec_ptr         pointer to the record in the join buffer
1933 
1934   DESCRIPTION
1935     If the records of the table are accumulated in a join buffer the function
1936     sets the match flag for the record in the buffer that is referred to by
1937     the record from this cache positioned at 'rec_ptr'.
1938     The function also sets the match flag 'found' of the table first inner
1939     if it has not been set before.
1940 
1941   NOTES
1942     The function assumes that the match flag for any record in any cache
1943     is placed in the first byte occupied by the record fields.
1944 
1945   RETURN
1946     TRUE   the match flag is set by this call for the first time
1947     FALSE  the match flag has been set before this call
1948 */
1949 
set_match_flag_if_none(JOIN_TAB * first_inner,uchar * rec_ptr)1950 bool JOIN_CACHE::set_match_flag_if_none(JOIN_TAB *first_inner,
1951                                         uchar *rec_ptr)
1952 {
1953   if (!first_inner->op)
1954   {
1955     /*
1956       Records of the first inner table to which the flag is attached to
1957       are not accumulated in a join buffer.
1958     */
1959     if (first_inner->found)
1960       return FALSE;
1961     else
1962     {
1963       first_inner->found= 1;
1964       return TRUE;
1965     }
1966   }
1967   JOIN_CACHE *cache= this;
1968   while (cache->join_tab != first_inner)
1969   {
1970     cache= cache->prev_cache;
1971     DBUG_ASSERT(cache);
1972     rec_ptr= cache->get_rec_ref(rec_ptr);
1973   }
1974   if (rec_ptr[0] == 0)
1975   {
1976     rec_ptr[0]= 1;
1977     first_inner->found= 1;
1978     return TRUE;
1979   }
1980   return FALSE;
1981 }
1982 
1983 
1984 /*
1985   Generate all full extensions for a partial join record in the buffer
1986 
1987   SYNOPSIS
1988     generate_full_extensions()
1989       rec_ptr     pointer to the record from join buffer to generate extensions
1990 
1991   DESCRIPTION
1992     The function first checks whether the current record of 'join_tab' matches
1993     the partial join record from join buffer located at 'rec_ptr'. If it is the
1994     case the function calls the join_tab->next_select method to generate
1995     all full extension for this partial join match.
1996 
1997   RETURN
1998     return one of enum_nested_loop_state.
1999 */
2000 
generate_full_extensions(uchar * rec_ptr)2001 enum_nested_loop_state JOIN_CACHE::generate_full_extensions(uchar *rec_ptr)
2002 {
2003   enum_nested_loop_state rc= NESTED_LOOP_OK;
2004 
2005   /*
2006     Check whether the extended partial join record meets
2007     the pushdown conditions.
2008   */
2009   if (check_match(rec_ptr))
2010   {
2011     int res= 0;
2012     if (!join_tab->check_weed_out_table ||
2013         !(res= do_sj_dups_weedout(join->thd, join_tab->check_weed_out_table)))
2014     {
2015       set_curr_rec_link(rec_ptr);
2016       rc= (join_tab->next_select)(join, join_tab+1, 0);
2017       if (rc != NESTED_LOOP_OK)
2018       {
2019         reset_cache(true);
2020         return rc;
2021       }
2022     }
2023     if (res == -1)
2024     {
2025       rc= NESTED_LOOP_ERROR;
2026       return rc;
2027     }
2028   }
2029   return rc;
2030 }
2031 
2032 
2033 /*
2034   Check matching to a partial join record from the join buffer
2035 
2036   SYNOPSIS
2037     check_match()
2038       rec_ptr     pointer to the record from join buffer to check matching to
2039 
2040   DESCRIPTION
2041     The function checks whether the current record of 'join_tab' matches
2042     the partial join record from join buffer located at 'rec_ptr'. If this is
2043     the case and 'join_tab' is the last inner table of a semi-join or an outer
2044     join the function turns on the match flag for the 'rec_ptr' record unless
2045     it has been already set.
2046 
2047   NOTES
2048     Setting the match flag on can trigger re-evaluation of pushdown conditions
2049     for the record when join_tab is the last inner table of an outer join.
2050 
2051   RETURN
2052     TRUE   there is a match
2053     FALSE  there is no match
2054 */
2055 
check_match(uchar * rec_ptr)2056 bool JOIN_CACHE::check_match(uchar *rec_ptr)
2057 {
2058   bool skip_record;
2059   /* Check whether pushdown conditions are satisfied */
2060   if (join_tab->select &&
2061       (join_tab->select->skip_record(join->thd, &skip_record) || skip_record))
2062     return FALSE;
2063 
2064   if (!((join_tab->first_inner &&
2065          join_tab->first_inner->last_inner == join_tab) ||
2066         (join_tab->last_sj_inner_tab == join_tab &&
2067          join_tab->get_sj_strategy() == SJ_OPT_FIRST_MATCH)))
2068     return TRUE; // not the last inner table
2069 
2070   /*
2071      This is the last inner table of an outer join,
2072      and maybe of other embedding outer joins, or
2073      this is the last inner table of a semi-join.
2074   */
2075   JOIN_TAB *first_inner= join_tab->first_inner ?
2076     join_tab->first_inner :
2077     ((join_tab->get_sj_strategy() == SJ_OPT_FIRST_MATCH) ?
2078      join_tab->first_sj_inner_tab : NULL);
2079 
2080   do
2081   {
2082     set_match_flag_if_none(first_inner, rec_ptr);
2083     if (calc_check_only_first_match(first_inner) &&
2084         !join_tab->first_inner)
2085       return TRUE;
2086     /*
2087       This is the first match for the outer table row.
2088       The function set_match_flag_if_none has turned the flag
2089       first_inner->found on. The pushdown predicates for
2090       inner tables must be re-evaluated with this flag on.
2091       Note that, if first_inner is the first inner table
2092       of a semi-join, but is not an inner table of an outer join
2093       such that 'not exists' optimization can  be applied to it,
2094       the re-evaluation of the pushdown predicates is not needed.
2095     */
2096     for (JOIN_TAB *tab= first_inner; tab <= join_tab; tab++)
2097     {
2098       if (tab->select &&
2099           (tab->select->skip_record(join->thd, &skip_record) || skip_record))
2100         return FALSE;
2101     }
2102   }
2103   while ((first_inner= first_inner->first_upper) &&
2104          first_inner->last_inner == join_tab);
2105 
2106   return TRUE;
2107 }
2108 
2109 
2110 /*
2111   Add null complements for unmatched outer records from join buffer
2112 
2113   SYNOPSIS
2114     join_null_complements()
2115       skip_last    do not add null complements for the last record
2116 
2117   DESCRIPTION
2118     This function is called only for inner tables of outer joins.
2119     The function retrieves all rows from the join buffer and adds null
2120     complements for those of them that do not have matches for outer
2121     table records.
2122     If the 'join_tab' is the last inner table of the embedding outer
2123     join and the null complemented record satisfies the outer join
2124     condition then the the corresponding match flag is turned on
2125     unless it has been set earlier. This setting may trigger
2126     re-evaluation of pushdown conditions for the record.
2127 
2128   NOTES
2129     The same implementation of the virtual method join_null_complements
2130     is used for JOIN_CACHE_BNL and JOIN_CACHE_BKA.
2131 
2132   RETURN
2133     return one of enum_nested_loop_state.
2134 */
2135 
join_null_complements(bool skip_last)2136 enum_nested_loop_state JOIN_CACHE::join_null_complements(bool skip_last)
2137 {
2138   uint cnt;
2139   enum_nested_loop_state rc= NESTED_LOOP_OK;
2140   bool is_first_inner= join_tab == join_tab->first_unmatched;
2141   DBUG_ENTER("JOIN_CACHE::join_null_complements");
2142 
2143   /* Return at once if there are no records in the join buffer */
2144   if (!records)
2145     DBUG_RETURN(NESTED_LOOP_OK);
2146 
2147   cnt= records - (is_key_access() ? 0 : MY_TEST(skip_last));
2148 
2149   /* This function may be called only for inner tables of outer joins */
2150   DBUG_ASSERT(join_tab->first_inner);
2151 
2152   // Make sure that the rowid buffer is bound, duplicates weedout needs it
2153   if (join_tab->copy_current_rowid &&
2154       !join_tab->copy_current_rowid->buffer_is_bound())
2155     join_tab->copy_current_rowid->bind_buffer(join_tab->table->file->ref);
2156 
2157   for ( ; cnt; cnt--)
2158   {
2159     if (join->thd->killed)
2160     {
2161       /* The user has aborted the execution of the query */
2162       join->thd->send_kill_message();
2163       rc= NESTED_LOOP_KILLED;
2164       goto finish;
2165     }
2166     /* Just skip the whole record if a match for it has been already found */
2167     if (!is_first_inner || !skip_record_if_match())
2168     {
2169       get_record();
2170       /* The outer row is complemented by nulls for each inner table */
2171       restore_record(join_tab->table, s->default_values);
2172       mark_as_null_row(join_tab->table);
2173       rc= generate_full_extensions(get_curr_rec());
2174       if (rc != NESTED_LOOP_OK)
2175         goto finish;
2176     }
2177   }
2178 
2179 finish:
2180   DBUG_RETURN(rc);
2181 }
2182 
2183 
2184 /*
2185   Initialize retrieval of range sequence for BKA algorithm
2186 
2187   SYNOPSIS
2188     bka_range_seq_init()
2189      init_params   pointer to the BKA join cache object
2190      n_ranges      the number of ranges obtained
2191      flags         combination of HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
2192 
2193   DESCRIPTION
2194     The function interprets init_param as a pointer to a JOIN_CACHE_BKA
2195     object. The function prepares for an iteration over the join keys
2196     built for all records from the cache join buffer.
2197 
2198   NOTE
2199     This function are used only as a callback function.
2200 
2201   RETURN
2202     init_param value that is to be used as a parameter of bka_range_seq_next()
2203 */
2204 
2205 static
bka_range_seq_init(void * init_param,uint n_ranges,uint flags)2206 range_seq_t bka_range_seq_init(void *init_param, uint n_ranges, uint flags)
2207 {
2208   DBUG_ENTER("bka_range_seq_init");
2209   JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) init_param;
2210   cache->reset_cache(false);
2211   DBUG_RETURN((range_seq_t) init_param);
2212 }
2213 
2214 
2215 /*
2216   Get the key over the next record from the join buffer used by BKA
2217 
2218   SYNOPSIS
2219     bka_range_seq_next()
2220       seq    the value returned by  bka_range_seq_init
2221       range  OUT reference to the next range
2222 
2223   DESCRIPTION
2224     The function interprets seq as a pointer to a JOIN_CACHE_BKA
2225     object. The function returns a pointer to the range descriptor
2226     for the key built over the next record from the join buffer.
2227 
2228   NOTE
2229     This function are used only as a callback function.
2230 
2231   RETURN
2232     0   ok, the range structure filled with info about the next key
2233     1   no more ranges
2234 */
2235 
2236 static
bka_range_seq_next(range_seq_t rseq,KEY_MULTI_RANGE * range)2237 uint bka_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
2238 {
2239   DBUG_ENTER("bka_range_seq_next");
2240   JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
2241   TABLE_REF *ref= &cache->join_tab->ref;
2242   key_range *start_key= &range->start_key;
2243   if ((start_key->length= cache->get_next_key((uchar **) &start_key->key)))
2244   {
2245     start_key->keypart_map= (1 << ref->key_parts) - 1;
2246     start_key->flag= HA_READ_KEY_EXACT;
2247     range->end_key= *start_key;
2248     range->end_key.flag= HA_READ_AFTER_KEY;
2249     range->ptr= (char *) cache->get_curr_rec();
2250     range->range_flag= EQ_RANGE;
2251     DBUG_RETURN(0);
2252   }
2253   DBUG_RETURN(1);
2254 }
2255 
2256 
2257 /*
2258   Check whether range_info orders to skip the next record from BKA buffer
2259 
2260   SYNOPSIS
2261     bka_range_seq_skip_record()
2262       seq              value returned by bka_range_seq_init()
2263       range_info       information about the next range
2264       rowid [NOT USED] rowid of the record to be checked
2265 
2266 
2267   DESCRIPTION
2268     The function interprets seq as a pointer to a JOIN_CACHE_BKA object.
2269     The function interprets seq as a pointer to the JOIN_CACHE_BKA_UNIQUE
2270     object. The function returns TRUE if the record with this range_info
2271     is to be filtered out from the stream of records returned by
2272     multi_range_read_next().
2273 
2274   NOTE
2275     This function are used only as a callback function.
2276 
2277   RETURN
2278     1    record with this range_info is to be filtered out from the stream
2279          of records returned by multi_range_read_next()
2280     0    the record is to be left in the stream
2281 */
2282 
2283 static
bka_range_seq_skip_record(range_seq_t rseq,char * range_info,uchar * rowid)2284 bool bka_range_seq_skip_record(range_seq_t rseq, char *range_info, uchar *rowid)
2285 {
2286   DBUG_ENTER("bka_range_seq_skip_record");
2287   JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
2288   bool res= cache->get_match_flag_by_pos((uchar *) range_info);
2289   DBUG_RETURN(res);
2290 }
2291 
2292 /*
2293   Using BKA find matches from the next table for records from the join buffer
2294 
2295   SYNOPSIS
2296     join_matching_records()
2297       skip_last    do not look for matches for the last partial join record
2298 
2299   DESCRIPTION
2300     This function can be used only when the table join_tab can be accessed
2301     by keys built over the fields of previous join tables.
2302     The function retrieves all partial join records from the join buffer and
2303     for each of them builds the key value to access join_tab, performs index
2304     look-up with this key and selects matching records yielded by this look-up
2305     If a match is found the function will call the sub_select function trying
2306     to look for matches for the remaining join operations.
2307     This function currently is called only from the function join_records.
2308     It's assumed that this function is always called with the skip_last
2309     parameter equal to false.
2310 
2311   NOTES
2312     The function produces all matching extensions for the records in the
2313     join buffer following the path of the Batched Key Access algorithm.
2314     When an outer join operation is performed all unmatched records from
2315     the join buffer must be extended by null values. The function
2316     join_null_complements serves this purpose.
2317     The Batched Key Access algorithm assumes that key accesses are batched.
2318     In other words it assumes that, first, either keys themselves or the
2319     corresponding rowids (primary keys) are accumulated in a buffer, then
2320     data rows from  join_tab are fetched for all of them. When a row is
2321     fetched it is always returned with a reference to the key by which it
2322     has been accessed.
2323     When key values are batched we can save on the number of the server
2324     requests for index lookups. For the remote engines, like NDB cluster, it
2325     essentially reduces the number of round trips between the server and
2326     the engine when performing a join operation.
2327     When the rowids for the keys are batched we can optimize the order
2328     in what we fetch the data for this rowids. The performance benefits of
2329     this optimization can be significant for such engines as MyISAM, InnoDB.
2330     What is exactly batched are hidden behind implementations of
2331     MRR handler interface that is supposed to be appropriately chosen
2332     for each engine. If for a engine no specific implementation of the MRR
2333     interface is supllied then the default implementation is used. This
2334     implementation actually follows the path of Nested Loops Join algorithm.
2335     In this case BKA join surely will demonstrate a worse performance than
2336     NL join.
2337 
2338   RETURN
2339     return one of enum_nested_loop_state
2340 */
2341 
join_matching_records(bool skip_last)2342 enum_nested_loop_state JOIN_CACHE_BKA::join_matching_records(bool skip_last)
2343 {
2344   /* The value of skip_last must be always FALSE when this function is called */
2345   DBUG_ASSERT(!skip_last);
2346 
2347   /* Return at once if there are no records in the join buffer */
2348   if (!records)
2349     return NESTED_LOOP_OK;
2350 
2351   /* Set functions to iterate over keys in the join buffer */
2352   RANGE_SEQ_IF seq_funcs= { bka_range_seq_init,
2353                             bka_range_seq_next,
2354                             check_only_first_match ?
2355                               bka_range_seq_skip_record : 0,
2356                             join_tab->cache_idx_cond ?
2357                               bka_skip_index_tuple : 0 };
2358 
2359   if (init_join_matching_records(&seq_funcs, records))
2360     return NESTED_LOOP_ERROR;
2361 
2362   int error;
2363   handler *file= join_tab->table->file;
2364   enum_nested_loop_state rc= NESTED_LOOP_OK;
2365   uchar *rec_ptr= NULL;
2366 
2367   while (!(error= file->multi_range_read_next((char **) &rec_ptr)))
2368   {
2369     if (join->thd->killed)
2370     {
2371       /* The user has aborted the execution of the query */
2372       join->thd->send_kill_message();
2373       return NESTED_LOOP_KILLED;
2374     }
2375     if (join_tab->keep_current_rowid)
2376       join_tab->table->file->position(join_tab->table->record[0]);
2377     /*
2378       If only the first match is needed and it has been already found
2379       for the associated partial join record then the returned candidate
2380       is discarded.
2381     */
2382     if (rc == NESTED_LOOP_OK &&
2383         (!check_only_first_match || !get_match_flag_by_pos(rec_ptr)))
2384     {
2385       get_record_by_pos(rec_ptr);
2386       rc= generate_full_extensions(rec_ptr);
2387       if (rc != NESTED_LOOP_OK)
2388         return rc;
2389     }
2390   }
2391 
2392   if (error > 0 && error != HA_ERR_END_OF_FILE)
2393     return NESTED_LOOP_ERROR;
2394   return rc;
2395 }
2396 
2397 
2398 
2399 /*
2400   Prepare to search for records that match records from the join buffer
2401 
2402   SYNOPSIS
2403     init_join_matching_records()
2404       seq_funcs    structure of range sequence interface
2405       ranges       number of keys/ranges in the sequence
2406 
2407   DESCRIPTION
2408     This function calls the multi_range_read_init function to set up
2409     the BKA process of generating the keys from the records in the join
2410     buffer and looking for matching records from the table to be joined.
2411     The function passes as a parameter a structure of functions that
2412     implement the range sequence interface. This interface is used to
2413     enumerate all generated keys and optionally to filter the matching
2414     records returned by the multi_range_read_next calls from the
2415     intended invocation of the join_matching_records method. The
2416     multi_range_read_init function also receives the parameters for
2417     MRR buffer to be used and flags specifying the mode in which
2418     this buffer will be functioning.
2419     The number of keys in the sequence expected by multi_range_read_init
2420     is passed through the parameter ranges.
2421 
2422   RETURN
2423     False if ok, True otherwise.
2424 */
2425 
2426 bool
init_join_matching_records(RANGE_SEQ_IF * seq_funcs,uint ranges)2427 JOIN_CACHE_BKA::init_join_matching_records(RANGE_SEQ_IF *seq_funcs, uint ranges)
2428 {
2429   handler *file= join_tab->table->file;
2430 
2431   join_tab->table->null_row= 0;
2432 
2433   /* Dynamic range access is never used with BKA */
2434   DBUG_ASSERT(join_tab->use_quick != QS_DYNAMIC_RANGE);
2435 
2436   init_mrr_buff();
2437 
2438   /*
2439     Prepare to iterate over keys from the join buffer and to get
2440     matching candidates obtained with MMR handler functions.
2441   */
2442   if (!file->inited)
2443   {
2444     const int error= file->ha_index_init(join_tab->ref.key, 1);
2445     if (error)
2446     {
2447       file->print_error(error, MYF(0));
2448       return error;
2449     }
2450   }
2451   return
2452     file->multi_range_read_init(seq_funcs, (void*) this, ranges,
2453                                 mrr_mode, &mrr_buff);
2454 }
2455 
2456 
2457 /**
2458   Reads all flag fields of a positioned record from the join buffer.
2459   Including all flag fields (of this record) stored in the previous join
2460   buffers.
2461 
2462   @param rec_ptr  position of the first field of the record in the join buffer
2463  */
read_all_flag_fields_by_pos(uchar * rec_ptr)2464 void JOIN_CACHE::read_all_flag_fields_by_pos(uchar *rec_ptr)
2465 {
2466   uchar * const save_pos= pos;
2467   pos= rec_ptr;
2468   read_some_flag_fields();                      // moves 'pos'...
2469   pos= save_pos;                                // ... so we restore it.
2470   if (prev_cache)
2471   {
2472     // position of this record in previous join buffer:
2473     rec_ptr= prev_cache->get_rec_ref(rec_ptr);
2474     // recurse into previous buffer to read missing flag fields
2475     prev_cache->read_all_flag_fields_by_pos(rec_ptr);
2476   }
2477 }
2478 
2479 
2480 /*
2481   Get the key built over the next record from BKA join buffer
2482 
2483   SYNOPSIS
2484     get_next_key()
2485       key    pointer to the buffer where the key value is to be placed
2486 
2487   DESCRIPTION
2488     The function reads key fields from the current record in the join buffer.
2489     and builds the key value out of these fields that will be used to access
2490     the 'join_tab' table. Some of key fields may belong to previous caches.
2491     They are accessed via record references to the record parts stored in the
2492     previous join buffers. The other key fields always are placed right after
2493     the flag fields of the record.
2494     If the key is embedded, which means that its value can be read directly
2495     from the join buffer, then *key is set to the beginning of the key in
2496     this buffer. Otherwise the key is built in the join_tab->ref->key_buff.
2497     The function returns the length of the key if it succeeds ro read it.
2498     If is assumed that the functions starts reading at the position of
2499     the record length which is provided for each records in a BKA cache.
2500     After the key is built the 'pos' value points to the first position after
2501     the current record.
2502     The function returns 0 if the initial position is after the beginning
2503     of the record fields for last record from the join buffer.
2504 
2505   RETURN
2506     length of the key value - if the starting value of 'pos' points to
2507     the position before the fields for the last record,
2508     0 - otherwise.
2509 */
2510 
get_next_key(uchar ** key)2511 uint JOIN_CACHE_BKA::get_next_key(uchar ** key)
2512 {
2513   uint len;
2514   uint32 rec_len;
2515   uchar *init_pos;
2516   JOIN_CACHE *cache;
2517 
2518   if (records == 0)
2519     return 0;
2520 
2521   /* Any record in a BKA cache is prepended with its length, which we need */
2522   DBUG_ASSERT(with_length);
2523 
2524   /*
2525     Read keys until find non-ignorable one or EOF.
2526     Unlike in JOIN_CACHE::read_some_record_fields()), pos>=last_rec_pos means
2527     EOF, because we are not at fields' start, and previous record's fields
2528     might be empty.
2529   */
2530   for(len= 0 ; (len == 0) && pos < last_rec_pos ; pos= init_pos + rec_len)
2531   {
2532     /* Read the length of the record */
2533     rec_len= get_rec_length(pos);
2534     pos+= size_of_rec_len;
2535     init_pos= pos;
2536 
2537     /* Read a reference to the previous cache if any */
2538     uchar *prev_rec_ptr= NULL;
2539     if (prev_cache)
2540     {
2541       pos+= prev_cache->get_size_of_rec_offset();
2542       // position of this record in previous buffer:
2543       prev_rec_ptr= prev_cache->get_rec_ref(pos);
2544     }
2545 
2546     curr_rec_pos= pos;
2547 
2548     // Read all flag fields of the record, in two steps:
2549     read_some_flag_fields();      // 1) flag fields stored in this buffer
2550     if (prev_cache)               // 2) flag fields stored in previous buffers
2551       prev_cache->read_all_flag_fields_by_pos(prev_rec_ptr);
2552 
2553     if (use_emb_key)
2554     {
2555       /* An embedded key is taken directly from the join buffer */
2556       *key= pos;
2557       len= emb_key_length;
2558       DBUG_ASSERT(len != 0);
2559     }
2560     else
2561     {
2562       /*
2563         Read key arguments from previous caches if there are any such
2564         fields
2565       */
2566       if (external_key_arg_fields)
2567       {
2568         uchar *rec_ptr= curr_rec_pos;
2569         uint key_arg_count= external_key_arg_fields;
2570         CACHE_FIELD **copy_ptr= blob_ptr-key_arg_count;
2571         for (cache= prev_cache; key_arg_count; cache= cache->prev_cache)
2572         {
2573           uint len2= 0;
2574           DBUG_ASSERT(cache);
2575           rec_ptr= cache->get_rec_ref(rec_ptr);
2576           while (!cache->referenced_fields)
2577           {
2578             cache= cache->prev_cache;
2579             DBUG_ASSERT(cache);
2580             rec_ptr= cache->get_rec_ref(rec_ptr);
2581           }
2582           while (key_arg_count &&
2583                  cache->read_referenced_field(*copy_ptr, rec_ptr, &len2))
2584           {
2585             copy_ptr++;
2586             --key_arg_count;
2587           }
2588         }
2589       }
2590 
2591       /*
2592          Read the other key arguments from the current record. The fields for
2593          these arguments are always first in the sequence of the record's
2594          fields.
2595       */
2596       CACHE_FIELD *copy= field_descr+flag_fields;
2597       CACHE_FIELD *copy_end= copy+local_key_arg_fields;
2598       bool blob_in_rec_buff= blob_data_is_in_rec_buff(curr_rec_pos);
2599       for ( ; copy < copy_end; copy++)
2600         read_record_field(copy, blob_in_rec_buff);
2601 
2602       TABLE_REF *ref= &join_tab->ref;
2603       if (ref->impossible_null_ref())
2604       {
2605         DBUG_PRINT("info", ("JOIN_CACHE_BKA::get_next_key null_rejected"));
2606         /* this key cannot give a match, don't collect it, go read next key */
2607         len= 0;
2608       }
2609       else
2610       {
2611         /* Build the key over the fields read into the record buffers */
2612         cp_buffer_from_ref(join->thd, join_tab->table, ref);
2613         *key= ref->key_buff;
2614         len= ref->key_length;
2615         DBUG_ASSERT(len != 0);
2616       }
2617     }
2618   }
2619   return len;
2620 }
2621 
2622 
2623 /*
2624   Initialize a BKA_UNIQUE cache
2625 
2626   SYNOPSIS
2627     init()
2628 
2629   DESCRIPTION
2630     The function initializes the cache structure. It supposed to be called
2631     right after a constructor for the JOIN_CACHE_BKA_UNIQUE.
2632     The function allocates memory for the join buffer and for descriptors of
2633     the record fields stored in the buffer.
2634     The function also estimates the number of hash table entries in the hash
2635     table to be used and initializes this hash table.
2636 
2637   NOTES
2638     The code of this function should have been included into the constructor
2639     code itself. However the new operator for the class JOIN_CACHE_BKA_UNIQUE
2640     would never fail while memory allocation for the join buffer is not
2641     absolutely unlikely to fail. That's why this memory allocation has to be
2642     placed in a separate function that is called in a couple with a cache
2643     constructor.
2644     It is quite natural to put almost all other constructor actions into
2645     this function.
2646 
2647   RETURN
2648     0   initialization with buffer allocations has been succeeded
2649     1   otherwise
2650 */
2651 
init()2652 int JOIN_CACHE_BKA_UNIQUE::init()
2653 {
2654   int rc= 0;
2655   TABLE_REF *ref= &join_tab->ref;
2656 
2657   DBUG_ENTER("JOIN_CACHE_BKA_UNIQUE::init");
2658 
2659   hash_table= 0;
2660   key_entries= 0;
2661 
2662   if ((rc= JOIN_CACHE_BKA::init()))
2663     DBUG_RETURN (rc);
2664 
2665   key_length= ref->key_length;
2666 
2667   /* Take into account a reference to the next record in the key chain */
2668   pack_length+= get_size_of_rec_offset();
2669 
2670   /* Calculate the minimal possible value of size_of_key_ofs greater than 1 */
2671   uint max_size_of_key_ofs= max(2U, get_size_of_rec_offset());
2672   for (size_of_key_ofs= 2;
2673        size_of_key_ofs <= max_size_of_key_ofs;
2674        size_of_key_ofs+= 2)
2675   {
2676     key_entry_length= get_size_of_rec_offset() + // key chain header
2677                       size_of_key_ofs +          // reference to the next key
2678                       (use_emb_key ?  get_size_of_rec_offset() : key_length);
2679 
2680     uint n= buff_size / (pack_length+key_entry_length+size_of_key_ofs);
2681 
2682     /*
2683       TODO: Make a better estimate for this upper bound of
2684             the number of records in in the join buffer.
2685     */
2686     uint max_n= buff_size / (pack_length-length+
2687                              key_entry_length+size_of_key_ofs);
2688 
2689     hash_entries= (uint) (n / 0.7);
2690 
2691     if (offset_size(max_n*key_entry_length) <=
2692         size_of_key_ofs)
2693       break;
2694   }
2695 
2696   /* Initialize the hash table */
2697   hash_table= buff + (buff_size-hash_entries*size_of_key_ofs);
2698   cleanup_hash_table();
2699   curr_key_entry= hash_table;
2700 
2701   pack_length+= key_entry_length;
2702   pack_length_with_blob_ptrs+= get_size_of_rec_offset() + key_entry_length;
2703 
2704   rec_fields_offset= get_size_of_rec_offset()+get_size_of_rec_length()+
2705                      (prev_cache ? prev_cache->get_size_of_rec_offset() : 0);
2706 
2707   data_fields_offset= 0;
2708   if (use_emb_key)
2709   {
2710     CACHE_FIELD *copy= field_descr;
2711     CACHE_FIELD *copy_end= copy+flag_fields;
2712     for ( ; copy < copy_end; copy++)
2713       data_fields_offset+= copy->length;
2714   }
2715 
2716   DBUG_RETURN(rc);
2717 }
2718 
2719 
2720 /*
2721   Reset the JOIN_CACHE_BKA_UNIQUE  buffer for reading/writing
2722 
2723   SYNOPSIS
2724     reset_cache()
2725       for_writing  if it's TRUE the function reset the buffer for writing
2726 
2727   DESCRIPTION
2728     This implementation of the virtual function reset_cache() resets the join
2729     buffer of the JOIN_CACHE_BKA_UNIQUE class for reading or writing.
2730     Additionally to what the default implementation does this function
2731     cleans up the hash table allocated within the buffer.
2732 
2733   RETURN
2734     none
2735 */
2736 
reset_cache(bool for_writing)2737 void JOIN_CACHE_BKA_UNIQUE::reset_cache(bool for_writing)
2738 {
2739   this->JOIN_CACHE::reset_cache(for_writing);
2740   if (for_writing && hash_table)
2741     cleanup_hash_table();
2742   curr_key_entry= hash_table;
2743 }
2744 
2745 /*
2746   Add a record into the JOIN_CACHE_BKA_UNIQUE buffer
2747 
2748   SYNOPSIS
2749     put_record()
2750 
2751   DESCRIPTION
2752     This implementation of the virtual function put_record writes the next
2753     matching record into the join buffer of the JOIN_CACHE_BKA_UNIQUE class.
2754     Additionally to what the default implementation does this function
2755     performs the following.
2756     It extracts from the record the key value used in lookups for matching
2757     records and searches for this key in the hash tables from the join cache.
2758     If it finds the key in the hash table it joins the record to the chain
2759     of records with this key. If the key is not found in the hash table the
2760     key is placed into it and a chain containing only the newly added record
2761     is attached to the key entry. The key value is either placed in the hash
2762     element added for the key or, if the use_emb_key flag is set, remains in
2763     the record from the partial join.
2764 
2765   RETURN
2766     TRUE    if it has been decided that it should be the last record
2767             in the join buffer,
2768     FALSE   otherwise
2769 */
2770 
2771 bool
put_record_in_cache()2772 JOIN_CACHE_BKA_UNIQUE::put_record_in_cache()
2773 {
2774   uchar *key;
2775   uint key_len= key_length;
2776   uchar *key_ref_ptr;
2777   TABLE_REF *ref= &join_tab->ref;
2778   uchar *next_ref_ptr= pos;
2779   pos+= get_size_of_rec_offset();
2780 
2781   // Write record to join buffer
2782   bool is_full= JOIN_CACHE::put_record_in_cache();
2783 
2784   if (use_emb_key)
2785   {
2786      key= get_curr_emb_key();
2787     // Embedded is not used if one of the key columns is nullable
2788   }
2789   else
2790   {
2791     /* Build the key over the fields read into the record buffers */
2792     cp_buffer_from_ref(join->thd, join_tab->table, ref);
2793     key= ref->key_buff;
2794     if (ref->impossible_null_ref())
2795     {
2796       /*
2797         The row just put into the buffer has a NULL-value for one of
2798         the ref-columns and the ref access is NULL-rejecting, this key cannot
2799         give a match. So we don't insert it into the hash table.
2800         We still stored the record into the buffer (put_record() call above),
2801         or we would later miss NULL-complementing of this record.
2802       */
2803       DBUG_PRINT("info", ("JOIN_CACHE_BKA_UNIQUE::put_record null_rejected"));
2804       return is_full;
2805     }
2806   }
2807 
2808   /* Look for the key in the hash table */
2809   if (key_search(key, key_len, &key_ref_ptr))
2810   {
2811     uchar *last_next_ref_ptr;
2812     /*
2813       The key is found in the hash table.
2814       Add the record to the circular list of the records attached to this key.
2815       Below 'rec' is the record to be added into the record chain for the found
2816       key, 'key_ref' points to a flatten representation of the st_key_entry
2817       structure that contains the key and the head of the record chain.
2818     */
2819     last_next_ref_ptr= get_next_rec_ref(key_ref_ptr+get_size_of_key_offset());
2820     /* rec->next_rec= key_entry->last_rec->next_rec */
2821     memcpy(next_ref_ptr, last_next_ref_ptr, get_size_of_rec_offset());
2822     /* key_entry->last_rec->next_rec= rec */
2823     store_next_rec_ref(last_next_ref_ptr, next_ref_ptr);
2824     /* key_entry->last_rec= rec */
2825     store_next_rec_ref(key_ref_ptr+get_size_of_key_offset(), next_ref_ptr);
2826   }
2827   else
2828   {
2829     /*
2830       The key is not found in the hash table.
2831       Put the key into the join buffer linking it with the keys for the
2832       corresponding hash entry. Create a circular list with one element
2833       referencing the record and attach the list to the key in the buffer.
2834     */
2835     uchar *cp= last_key_entry;
2836     cp-= get_size_of_rec_offset()+get_size_of_key_offset();
2837     store_next_key_ref(key_ref_ptr, cp);
2838     store_null_key_ref(cp);
2839     store_next_rec_ref(next_ref_ptr, next_ref_ptr);
2840     store_next_rec_ref(cp+get_size_of_key_offset(), next_ref_ptr);
2841     if (use_emb_key)
2842     {
2843       cp-= get_size_of_rec_offset();
2844       store_emb_key_ref(cp, key);
2845     }
2846     else
2847     {
2848       cp-= key_len;
2849       memcpy(cp, key, key_len);
2850     }
2851     last_key_entry= cp;
2852     /* Increment the counter of key_entries in the hash table */
2853     key_entries++;
2854   }
2855   return is_full;
2856 }
2857 
2858 
2859 /*
2860   Read the next record from the JOIN_CACHE_BKA_UNIQUE buffer
2861 
2862   SYNOPSIS
2863     get_record()
2864 
2865   DESCRIPTION
2866     Additionally to what the default implementation of the virtual
2867     function get_record does this implementation skips the link element
2868     used to connect the records with the same key into a chain.
2869 
2870   RETURN
2871     TRUE  - there are no more records to read from the join buffer
2872     FALSE - otherwise
2873 */
2874 
get_record()2875 bool JOIN_CACHE_BKA_UNIQUE::get_record()
2876 {
2877   pos+= get_size_of_rec_offset();
2878   return this->JOIN_CACHE::get_record();
2879 }
2880 
2881 
2882 /*
2883   Skip record from the JOIN_CACHE_BKA_UNIQUE join buffer if its match flag is on
2884 
2885   SYNOPSIS
2886     skip_record_if_match()
2887 
2888   DESCRIPTION
2889     This implementation of the virtual function skip_record_if_match does
2890     the same as the default implementation does, but it takes into account
2891     the link element used to connect the records with the same key into a chain.
2892 
2893   RETURN
2894     TRUE  - the match flag is on and the record has been skipped
2895     FALSE - the match flag is off
2896 */
2897 
skip_record_if_match()2898 bool JOIN_CACHE_BKA_UNIQUE::skip_record_if_match()
2899 {
2900   uchar *save_pos= pos;
2901   pos+= get_size_of_rec_offset();
2902   if (!this->JOIN_CACHE::skip_record_if_match())
2903   {
2904     pos= save_pos;
2905     return FALSE;
2906   }
2907   return TRUE;
2908 }
2909 
2910 
2911 /*
2912   Search for a key in the hash table of the join buffer
2913 
2914   SYNOPSIS
2915     key_search()
2916       key             pointer to the key value
2917       key_len         key value length
2918       key_ref_ptr OUT position of the reference to the next key from
2919                       the hash element for the found key , or
2920                       a position where the reference to the the hash
2921                       element for the key is to be added in the
2922                       case when the key has not been found
2923 
2924   DESCRIPTION
2925     The function looks for a key in the hash table of the join buffer.
2926     If the key is found the functionreturns the position of the reference
2927     to the next key from  to the hash element for the given key.
2928     Otherwise the function returns the position where the reference to the
2929     newly created hash element for the given key is to be added.
2930 
2931   RETURN
2932     TRUE  - the key is found in the hash table
2933     FALSE - otherwise
2934 */
2935 
key_search(uchar * key,uint key_len,uchar ** key_ref_ptr)2936 bool JOIN_CACHE_BKA_UNIQUE::key_search(uchar *key, uint key_len,
2937                                        uchar **key_ref_ptr)
2938 {
2939   bool is_found= FALSE;
2940   uint idx= get_hash_idx(key, key_length);
2941   uchar *ref_ptr= hash_table+size_of_key_ofs*idx;
2942   while (!is_null_key_ref(ref_ptr))
2943   {
2944     uchar *next_key;
2945     ref_ptr= get_next_key_ref(ref_ptr);
2946     next_key= use_emb_key ? get_emb_key(ref_ptr-get_size_of_rec_offset()) :
2947                             ref_ptr-key_length;
2948 
2949     if (memcmp(next_key, key, key_len) == 0)
2950     {
2951       is_found= TRUE;
2952       break;
2953     }
2954   }
2955   *key_ref_ptr= ref_ptr;
2956   return is_found;
2957 }
2958 
2959 
2960 /*
2961   Calclulate hash value for a key in the hash table of the join buffer
2962 
2963   SYNOPSIS
2964     get_hash_idx()
2965       key             pointer to the key value
2966       key_len         key value length
2967 
2968   DESCRIPTION
2969     The function calculates an index of the hash entry in the hash table
2970     of the join buffer for the given key
2971 
2972   RETURN
2973     the calculated index of the hash entry for the given key.
2974 */
2975 
get_hash_idx(uchar * key,uint key_len)2976 uint JOIN_CACHE_BKA_UNIQUE::get_hash_idx(uchar* key, uint key_len)
2977 {
2978   ulong nr= 1;
2979   ulong nr2= 4;
2980   uchar *pos= key;
2981   uchar *end= key+key_len;
2982   for (; pos < end ; pos++)
2983   {
2984     nr^= (ulong) ((((uint) nr & 63)+nr2)*((uint) *pos))+ (nr << 8);
2985     nr2+= 3;
2986   }
2987   return nr % hash_entries;
2988 }
2989 
2990 
2991 /*
2992   Clean up the hash table of the join buffer
2993 
2994   SYNOPSIS
2995     cleanup_hash_table()
2996       key             pointer to the key value
2997       key_len         key value length
2998 
2999   DESCRIPTION
3000     The function cleans up the hash table in the join buffer removing all
3001     hash elements from the table.
3002 
3003   RETURN
3004     none
3005 */
3006 
cleanup_hash_table()3007 void JOIN_CACHE_BKA_UNIQUE:: cleanup_hash_table()
3008 {
3009   last_key_entry= hash_table;
3010   memset(hash_table, 0, (buff+buff_size)-hash_table);
3011   key_entries= 0;
3012 }
3013 
3014 
3015 /*
3016   Initialize retrieval of range sequence for BKA_UNIQUE algorithm
3017 
3018   SYNOPSIS
3019     bka_range_seq_init()
3020       init_params   pointer to the BKA_INIQUE join cache object
3021       n_ranges      the number of ranges obtained
3022       flags         combination of HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
3023 
3024   DESCRIPTION
3025     The function interprets init_param as a pointer to a JOIN_CACHE_BKA_UNIQUE
3026     object. The function prepares for an iteration over the unique join keys
3027     built over the records from the cache join buffer.
3028 
3029   NOTE
3030     This function are used only as a callback function.
3031 
3032   RETURN
3033     init_param    value that is to be used as a parameter of
3034                   bka_unique_range_seq_next()
3035 */
3036 
3037 static
bka_unique_range_seq_init(void * init_param,uint n_ranges,uint flags)3038 range_seq_t bka_unique_range_seq_init(void *init_param, uint n_ranges,
3039                                       uint flags)
3040 {
3041   DBUG_ENTER("bka_unique_range_seq_init");
3042   JOIN_CACHE_BKA_UNIQUE *cache= (JOIN_CACHE_BKA_UNIQUE *) init_param;
3043   cache->reset_cache(false);
3044   DBUG_RETURN((range_seq_t) init_param);
3045 }
3046 
3047 
3048 /*
3049   Get the key over the next record from the join buffer used by BKA_UNIQUE
3050 
3051   SYNOPSIS
3052     bka_unique_range_seq_next()
3053       seq        value returned by  bka_unique_range_seq_init()
3054       range  OUT reference to the next range
3055 
3056   DESCRIPTION
3057     The function interprets seq as a pointer to the JOIN_CACHE_BKA_UNIQUE
3058     object. The function returns a pointer to the range descriptor
3059     for the next unique key built over records from the join buffer.
3060 
3061   NOTE
3062     This function are used only as a callback function.
3063 
3064   RETURN
3065     0    ok, the range structure filled with info about the next key
3066     1    no more ranges
3067 */
3068 
3069 static
bka_unique_range_seq_next(range_seq_t rseq,KEY_MULTI_RANGE * range)3070 uint bka_unique_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
3071 {
3072   DBUG_ENTER("bka_unique_range_seq_next");
3073   JOIN_CACHE_BKA_UNIQUE *cache= (JOIN_CACHE_BKA_UNIQUE *) rseq;
3074   TABLE_REF *ref= &cache->join_tab->ref;
3075   key_range *start_key= &range->start_key;
3076   if ((start_key->length= cache->get_next_key((uchar **) &start_key->key)))
3077   {
3078     start_key->keypart_map= (1 << ref->key_parts) - 1;
3079     start_key->flag= HA_READ_KEY_EXACT;
3080     range->end_key= *start_key;
3081     range->end_key.flag= HA_READ_AFTER_KEY;
3082     range->ptr= (char *) cache->get_curr_key_chain();
3083     range->range_flag= EQ_RANGE;
3084     DBUG_RETURN(0);
3085   }
3086   DBUG_RETURN(1);
3087 }
3088 
3089 
3090 /*
3091   Check whether range_info orders to skip the next record from BKA_UNIQUE buffer
3092 
3093   SYNOPSIS
3094     bka_unique_range_seq_skip_record()
3095       seq              value returned by bka_unique_range_seq_init()
3096       range_info       information about the next range
3097       rowid [NOT USED] rowid of the record to be checked (not used)
3098 
3099   DESCRIPTION
3100     The function interprets seq as a pointer to the JOIN_CACHE_BKA_UNIQUE
3101     object. The function returns TRUE if the record with this range_info
3102     is to be filtered out from the stream of records returned by
3103     multi_range_read_next().
3104 
3105   NOTE
3106     This function are used only as a callback function.
3107 
3108   RETURN
3109     1    record with this range_info is to be filtered out from the stream
3110          of records returned by multi_range_read_next()
3111     0    the record is to be left in the stream
3112 */
3113 
3114 static
bka_unique_range_seq_skip_record(range_seq_t rseq,char * range_info,uchar * rowid)3115 bool bka_unique_range_seq_skip_record(range_seq_t rseq, char *range_info,
3116                                       uchar *rowid)
3117 {
3118   DBUG_ENTER("bka_unique_range_seq_skip_record");
3119   JOIN_CACHE_BKA_UNIQUE *cache= (JOIN_CACHE_BKA_UNIQUE *) rseq;
3120   bool res= cache->check_all_match_flags_for_key((uchar *) range_info);
3121   DBUG_RETURN(res);
3122 }
3123 
3124 
3125 /**
3126   Check if the record combination matches the index condition
3127 
3128   @param  rseq             Value returned by bka_range_seq_init()
3129   @param  range_info       MRR range association data
3130 
3131   @sa JOIN_CACHE_BKA::skip_index_tuple().
3132   This function is the variant for use with
3133   JOIN_CACHE_BKA_UNIQUE. The difference from JOIN_CACHE_BKA case is that
3134   there may be multiple previous table record combinations that share the
3135   same key, i.e. they map to the same MRR range. And for all of those
3136   records, we have just done one single key lookup in the current table,
3137   found an index tuple. If in this function we discard this index tuple, all
3138   those records will be eliminated from the result. Thus, in this function
3139   we can discard the index tuple only if _all_ those cached records and the
3140   index tuple don't match the pushed index condition. It's a "group-wide
3141   decision".
3142   Thus we must here loop through all previous table records combinations
3143   that match the given MRR range key range_info, searching for a single one
3144   matching the index condition.
3145   If we find none, we can safely discard the index tuple here, which avoids
3146   retrieving the record from the current table.
3147   If we instead find one, we cannot discard the index tuple here; later in
3148   execution, in join_matching_records(), we can finally take one
3149   "case-by-case decision" per cached record, by checking again the index
3150   condition (@sa JOIN_CACHE_BKA_UNIQUE::check_match).
3151 
3152   @note
3153   Possible optimization:
3154   Before we unpack the record from a previous table
3155   check if this table is used in the condition.
3156   If so then unpack the record otherwise skip the unpacking.
3157   This should be done by a special virtual method
3158   get_partial_record_by_pos().
3159 
3160   @retval false  The record combination satisfies the index condition
3161   @retval true   Otherwise
3162 
3163 
3164 */
3165 
skip_index_tuple(range_seq_t rseq,char * range_info)3166 bool JOIN_CACHE_BKA_UNIQUE::skip_index_tuple(range_seq_t rseq, char *range_info)
3167 {
3168   DBUG_ENTER("JOIN_CACHE_BKA_UNIQUE::skip_index_tuple");
3169   JOIN_CACHE_BKA_UNIQUE *cache= (JOIN_CACHE_BKA_UNIQUE *) rseq;
3170   uchar *last_rec_ref_ptr=  cache->get_next_rec_ref((uchar*) range_info);
3171   uchar *next_rec_ref_ptr= last_rec_ref_ptr;
3172   do
3173   {
3174     next_rec_ref_ptr= cache->get_next_rec_ref(next_rec_ref_ptr);
3175     uchar *rec_ptr= next_rec_ref_ptr + cache->rec_fields_offset;
3176     cache->get_record_by_pos(rec_ptr);
3177     if (join_tab->cache_idx_cond->val_int())
3178       DBUG_RETURN(FALSE);
3179   } while(next_rec_ref_ptr != last_rec_ref_ptr);
3180   DBUG_RETURN(TRUE);
3181 }
3182 
3183 
3184 /*
3185   Check if the record combination matches the index condition
3186 
3187   SYNOPSIS
3188     bka_unique_skip_index_tuple()
3189       rseq             Value returned by bka_range_seq_init()
3190       range_info       MRR range association data
3191 
3192   DESCRIPTION
3193     This is wrapper for JOIN_CACHE_BKA_UNIQUE::skip_index_tuple method,
3194     see comments there.
3195 
3196   NOTE
3197     This function is used as a RANGE_SEQ_IF::skip_index_tuple callback.
3198 
3199   RETURN
3200     0    The record combination satisfies the index condition
3201     1    Otherwise
3202 */
3203 
3204 static
bka_unique_skip_index_tuple(range_seq_t rseq,char * range_info)3205 bool bka_unique_skip_index_tuple(range_seq_t rseq, char *range_info)
3206 {
3207   DBUG_ENTER("bka_unique_skip_index_tuple");
3208   JOIN_CACHE_BKA_UNIQUE *cache= (JOIN_CACHE_BKA_UNIQUE *) rseq;
3209   DBUG_RETURN(cache->skip_index_tuple(rseq, range_info));
3210 }
3211 
3212 
3213 /*
3214   Using BKA_UNIQUE find matches from the next table for records from join buffer
3215 
3216   SYNOPSIS
3217     join_matching_records()
3218       skip_last    do not look for matches for the last partial join record
3219 
3220   DESCRIPTION
3221     This function can be used only when the table join_tab can be accessed
3222     by keys built over the fields of previous join tables.
3223     The function retrieves all keys from the hash table of the join buffer
3224     built for partial join records from the buffer. For each of these keys
3225     the function performs an index lookup and tries to match records yielded
3226     by this lookup with records from the join buffer attached to the key.
3227     If a match is found the function will call the sub_select function trying
3228     to look for matches for the remaining join operations.
3229     This function does not assume that matching records are necessarily
3230     returned with references to the keys by which they were found. If the call
3231     of the function multi_range_read_init returns flags with
3232     HA_MRR_NO_ASSOCIATION then a search for the key built from the returned
3233     record is carried on. The search is performed by probing in in the hash
3234     table of the join buffer.
3235     This function currently is called only from the function join_records.
3236     It's assumed that this function is always called with the skip_last
3237     parameter equal to false.
3238 
3239   RETURN
3240     return one of enum_nested_loop_state
3241 */
3242 
3243 enum_nested_loop_state
join_matching_records(bool skip_last)3244 JOIN_CACHE_BKA_UNIQUE::join_matching_records(bool skip_last)
3245 {
3246   /* The value of skip_last must be always FALSE when this function is called */
3247   DBUG_ASSERT(!skip_last);
3248 
3249   /* Return at once if there are no records in the join buffer */
3250   if (!records)
3251     return NESTED_LOOP_OK;
3252 
3253   const bool no_association= mrr_mode & HA_MRR_NO_ASSOCIATION;
3254   /* Set functions to iterate over keys in the join buffer */
3255   RANGE_SEQ_IF seq_funcs= { bka_unique_range_seq_init,
3256                             bka_unique_range_seq_next,
3257                             check_only_first_match && !no_association ?
3258                               bka_unique_range_seq_skip_record : 0,
3259                             join_tab->cache_idx_cond ?
3260                               bka_unique_skip_index_tuple : 0  };
3261 
3262   if (init_join_matching_records(&seq_funcs, key_entries))
3263     return NESTED_LOOP_ERROR;
3264 
3265   int error;
3266   uchar *key_chain_ptr;
3267   handler *file= join_tab->table->file;
3268   enum_nested_loop_state rc= NESTED_LOOP_OK;
3269 
3270   while (!(error= file->multi_range_read_next((char **) &key_chain_ptr)))
3271   {
3272     if (no_association)
3273     {
3274       uchar *key_ref_ptr;
3275       TABLE *table= join_tab->table;
3276       TABLE_REF *ref= &join_tab->ref;
3277       KEY *keyinfo= table->key_info+ref->key;
3278       /*
3279         Build the key value out of  the record returned by the call of
3280         multi_range_read_next in the record buffer
3281       */
3282       key_copy(ref->key_buff, table->record[0], keyinfo, ref->key_length);
3283       /* Look for this key in the join buffer */
3284       if (!key_search(ref->key_buff, ref->key_length, &key_ref_ptr))
3285 	continue;
3286       key_chain_ptr= key_ref_ptr+get_size_of_key_offset();
3287     }
3288 
3289     if (join_tab->keep_current_rowid)
3290       join_tab->table->file->position(join_tab->table->record[0]);
3291 
3292     uchar *last_rec_ref_ptr= get_next_rec_ref(key_chain_ptr);
3293     uchar *next_rec_ref_ptr= last_rec_ref_ptr;
3294     do
3295     {
3296       next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr);
3297       uchar *rec_ptr= next_rec_ref_ptr+rec_fields_offset;
3298 
3299       if (join->thd->killed)
3300       {
3301         /* The user has aborted the execution of the query */
3302         join->thd->send_kill_message();
3303         return NESTED_LOOP_KILLED;
3304       }
3305       /*
3306         If only the first match is needed and it has been already found
3307         for the associated partial join record then the returned candidate
3308         is discarded.
3309       */
3310       if (rc == NESTED_LOOP_OK &&
3311           (!check_only_first_match || !get_match_flag_by_pos(rec_ptr)))
3312       {
3313         get_record_by_pos(rec_ptr);
3314         rc= generate_full_extensions(rec_ptr);
3315         if (rc != NESTED_LOOP_OK)
3316           return rc;
3317       }
3318     }
3319     while (next_rec_ref_ptr != last_rec_ref_ptr);
3320   }
3321 
3322   if (error > 0 && error != HA_ERR_END_OF_FILE)
3323     return NESTED_LOOP_ERROR;
3324   return rc;
3325 }
3326 
3327 
3328 /*
3329   Check whether all records in a key chain have their match flags set on
3330 
3331   SYNOPSIS
3332     check_all_match_flags_for_key()
3333       key_chain_ptr
3334 
3335   DESCRIPTION
3336     This function retrieves records in the given circular chain and checks
3337     whether their match flags are set on. The parameter key_chain_ptr shall
3338     point to the position in the join buffer storing the reference to the
3339     last element of this chain.
3340 
3341   RETURN
3342     TRUE   if each retrieved record has its match flag set on
3343     FALSE  otherwise
3344 */
3345 
check_all_match_flags_for_key(uchar * key_chain_ptr)3346 bool JOIN_CACHE_BKA_UNIQUE::check_all_match_flags_for_key(uchar *key_chain_ptr)
3347 {
3348   uchar *last_rec_ref_ptr= get_next_rec_ref(key_chain_ptr);
3349   uchar *next_rec_ref_ptr= last_rec_ref_ptr;
3350   do
3351   {
3352     next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr);
3353     uchar *rec_ptr= next_rec_ref_ptr+rec_fields_offset;
3354     if (!get_match_flag_by_pos(rec_ptr))
3355       return FALSE;
3356   }
3357   while (next_rec_ref_ptr != last_rec_ref_ptr);
3358   return TRUE;
3359 }
3360 
3361 
3362 /*
3363   Get the next key built for the records from BKA_UNIQUE join buffer
3364 
3365   SYNOPSIS
3366     get_next_key()
3367       key    pointer to the buffer where the key value is to be placed
3368 
3369   DESCRIPTION
3370     The function reads the next key value stored in the hash table of the
3371     join buffer. Depending on the value of the use_emb_key flag of the
3372     join cache the value is read either from the table itself or from
3373     the record field where it occurs.
3374 
3375   RETURN
3376     length of the key value - if the starting value of 'cur_key_entry' refers
3377     to the position after that referred by the the value of 'last_key_entry'
3378     0 - otherwise.
3379 */
3380 
get_next_key(uchar ** key)3381 uint JOIN_CACHE_BKA_UNIQUE::get_next_key(uchar ** key)
3382 {
3383   if (curr_key_entry == last_key_entry)
3384     return 0;
3385 
3386   curr_key_entry-= key_entry_length;
3387 
3388   *key = use_emb_key ? get_emb_key(curr_key_entry) : curr_key_entry;
3389 
3390   DBUG_ASSERT(*key >= buff && *key < hash_table);
3391 
3392   return key_length;
3393 }
3394 
3395 /**
3396   Check matching to a partial join record from the join buffer, an
3397   implementation specialized for JOIN_CACHE_BKA_UNIQUE.
3398   Only JOIN_CACHE_BKA_UNIQUE needs that, because it's the only cache using
3399   distinct keys.
3400   JOIN_CACHE_BKA, on the other hand, does one key lookup per cached
3401   record, so can take a per-record individualized decision for the pushed
3402   index condition as soon as it has the index tuple.
3403   @sa JOIN_CACHE_BKA_UNIQUE::skip_index_tuple
3404   @sa JOIN_CACHE::check_match
3405  */
check_match(uchar * rec_ptr)3406 bool JOIN_CACHE_BKA_UNIQUE::check_match(uchar *rec_ptr)
3407 {
3408   /* recheck pushed down index condition */
3409   if (join_tab->cache_idx_cond != NULL &&
3410       !join_tab->cache_idx_cond->val_int())
3411       return FALSE;
3412   /* continue with generic tests */
3413   return JOIN_CACHE_BKA::check_match(rec_ptr);
3414 }
3415 
3416 
3417 /****************************************************************************
3418  * Join cache module end
3419  ****************************************************************************/
3420