1 /* Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23 /**
24 @file
25
26 @brief
27 join cache optimizations
28
29 @defgroup Query_Optimizer Query Optimizer
30 @{
31 */
32
33 #include "sql_priv.h"
34 #include "sql_select.h"
35 #include "key.h"
36 #include "sql_optimizer.h" // JOIN
37 #include "sql_join_buffer.h"
38 #include "sql_tmp_table.h" // instantiate_tmp_table()
39
40 #include <algorithm>
41 using std::max;
42 using std::min;
43
44
45 /*****************************************************************************
46 * Join cache module
47 ******************************************************************************/
48
49 /*
50 Fill in the descriptor of a flag field associated with a join cache
51
52 SYNOPSIS
53 add_field_flag_to_join_cache()
54 str position in a record buffer to copy the field from/to
55 length length of the field
56 field IN/OUT pointer to the field descriptor to fill in
57
58 DESCRIPTION
59 The function fill in the descriptor of a cache flag field to which
60 the parameter 'field' points to. The function uses the first two
61 parameters to set the position in the record buffer from/to which
62 the field value is to be copied and the length of the copied fragment.
63 Before returning the result the function increments the value of
64 *field by 1.
65 The function ignores the fields 'blob_length' and 'ofset' of the
66 descriptor.
67
68 RETURN
69 the length of the field
70 */
71
72 static
add_flag_field_to_join_cache(uchar * str,uint length,CACHE_FIELD ** field)73 uint add_flag_field_to_join_cache(uchar *str, uint length, CACHE_FIELD **field)
74 {
75 CACHE_FIELD *copy= *field;
76 copy->str= str;
77 copy->length= length;
78 copy->type= 0;
79 copy->field= 0;
80 copy->referenced_field_no= 0;
81 copy->next_copy_rowid= NULL;
82 (*field)++;
83 return length;
84 }
85
86
87 /*
88 Fill in the descriptors of table data fields associated with a join cache
89
90 SYNOPSIS
91 add_table_data_fields_to_join_cache()
92 tab descriptors of fields from this table are to be filled
93 field_set descriptors for only these fields are to be created
94 field_cnt IN/OUT counter of data fields
95 descr IN/OUT pointer to the first descriptor to be filled
96 field_ptr_cnt IN/OUT counter of pointers to the data fields
97 descr_ptr IN/OUT pointer to the first pointer to blob descriptors
98
99 DESCRIPTION
100 The function fills in the descriptors of cache data fields from the table
101 'tab'. The descriptors are filled only for the fields marked in the
102 bitmap 'field_set'.
103 The function fills the descriptors starting from the position pointed
104 by 'descr'. If an added field is of a BLOB type then a pointer to the
105 its descriptor is added to the array descr_ptr.
106 At the return 'descr' points to the position after the last added
107 descriptor while 'descr_ptr' points to the position right after the
108 last added pointer.
109
110 RETURN
111 the total length of the added fields
112 */
113
114 static
add_table_data_fields_to_join_cache(JOIN_TAB * tab,MY_BITMAP * field_set,uint * field_cnt,CACHE_FIELD ** descr,uint * field_ptr_cnt,CACHE_FIELD *** descr_ptr)115 uint add_table_data_fields_to_join_cache(JOIN_TAB *tab,
116 MY_BITMAP *field_set,
117 uint *field_cnt,
118 CACHE_FIELD **descr,
119 uint *field_ptr_cnt,
120 CACHE_FIELD ***descr_ptr)
121 {
122 Field **fld_ptr;
123 uint len= 0;
124 CACHE_FIELD *copy= *descr;
125 CACHE_FIELD **copy_ptr= *descr_ptr;
126 uint used_fields= bitmap_bits_set(field_set);
127 for (fld_ptr= tab->table->field; used_fields; fld_ptr++)
128 {
129 if (bitmap_is_set(field_set, (*fld_ptr)->field_index))
130 {
131 len+= (*fld_ptr)->fill_cache_field(copy);
132 if (copy->type == CACHE_BLOB)
133 {
134 *copy_ptr= copy;
135 copy_ptr++;
136 (*field_ptr_cnt)++;
137 }
138 copy->field= *fld_ptr;
139 copy->referenced_field_no= 0;
140 copy->next_copy_rowid= NULL;
141 copy++;
142 (*field_cnt)++;
143 used_fields--;
144 }
145 }
146 *descr= copy;
147 *descr_ptr= copy_ptr;
148 return len;
149 }
150
151
152 /*
153 Determine various counters of fields associated with a record in the cache
154
155 SYNOPSIS
156 calc_record_fields()
157
158 DESCRIPTION
159 The function counts the number of total fields stored in a record
160 of the cache and saves this number in the 'fields' member. It also
161 determines the number of flag fields and the number of blobs.
162 The function sets 'with_match_flag' on if 'join_tab' needs a match flag
163 i.e. if it is the first inner table of an outer join, or of a semi-join
164 with FirstMatch strategy.
165
166 RETURN
167 none
168 */
169
calc_record_fields()170 void JOIN_CACHE::calc_record_fields()
171 {
172 /*
173 If there is a previous cache, start with the corresponding table, otherwise:
174 - if in a regular execution, start with the first non-const table.
175 - if in a materialized subquery, start with the first table of the subquery.
176 */
177 JOIN_TAB *tab = prev_cache ?
178 prev_cache->join_tab :
179 sj_is_materialize_strategy(join_tab->get_sj_strategy()) ?
180 join_tab->first_sj_inner_tab :
181 join->join_tab+join->const_tables;
182 tables= join_tab-tab;
183
184 fields= 0;
185 blobs= 0;
186 flag_fields= 0;
187 data_field_count= 0;
188 data_field_ptr_count= 0;
189 referenced_fields= 0;
190
191 for ( ; tab < join_tab ; tab++)
192 {
193 calc_used_field_length(join->thd, tab);
194 flag_fields+= MY_TEST(tab->used_null_fields || tab->used_uneven_bit_fields);
195 flag_fields+= MY_TEST(tab->table->maybe_null);
196 fields+= tab->used_fields;
197 blobs+= tab->used_blobs;
198
199 fields+= tab->check_rowid_field();
200 }
201 if ((with_match_flag= (join_tab->is_first_inner_for_outer_join() ||
202 (join_tab->first_sj_inner_tab == join_tab &&
203 join_tab->get_sj_strategy() == SJ_OPT_FIRST_MATCH))))
204 flag_fields++;
205 fields+= flag_fields;
206 }
207
208 /*
209 Allocate memory for descriptors and pointers to them associated with the cache
210
211 SYNOPSIS
212 alloc_fields()
213
214 DESCRIPTION
215 The function allocates memory for the array of fields descriptors
216 and the array of pointers to the field descriptors used to copy
217 join record data from record buffers into the join buffer and
218 backward. Some pointers refer to the field descriptor associated
219 with previous caches. They are placed at the beginning of the
220 array of pointers and its total number is specified by the parameter
221 'external fields'.
222 The pointer of the first array is assigned to field_descr and the
223 number of elements is precalculated by the function calc_record_fields.
224 The allocated arrays are adjacent.
225
226 NOTES
227 The memory is allocated in join->thd->memroot
228
229 RETURN
230 pointer to the first array
231 */
232
alloc_fields(uint external_fields)233 int JOIN_CACHE::alloc_fields(uint external_fields)
234 {
235 uint ptr_cnt= external_fields+blobs+1;
236 uint fields_size= sizeof(CACHE_FIELD)*fields;
237 field_descr= (CACHE_FIELD*) sql_alloc(fields_size +
238 sizeof(CACHE_FIELD*)*ptr_cnt);
239 blob_ptr= (CACHE_FIELD **) ((uchar *) field_descr + fields_size);
240 return (field_descr == NULL);
241 }
242
243 /*
244 Create descriptors of the record flag fields stored in the join buffer
245
246 SYNOPSIS
247 create_flag_fields()
248
249 DESCRIPTION
250 The function creates descriptors of the record flag fields stored
251 in the join buffer. These are descriptors for:
252 - an optional match flag field,
253 - table null bitmap fields,
254 - table null row fields.
255 The match flag field is created when 'join_tab' is the first inner
256 table of an outer join our a semi-join. A null bitmap field is
257 created for any table whose fields are to be stored in the join
258 buffer if at least one of these fields is nullable or is a BIT field
259 whose bits are partially stored with null bits. A null row flag
260 is created for any table assigned to the cache if it is an inner
261 table of an outer join.
262 The descriptor for flag fields are placed one after another at the
263 beginning of the array of field descriptors 'field_descr' that
264 contains 'fields' elements. If there is a match flag field the
265 descriptor for it is always first in the sequence of flag fields.
266 The descriptors for other flag fields can follow in an arbitrary
267 order.
268 The flag field values follow in a record stored in the join buffer
269 in the same order as field descriptors, with the match flag always
270 following first.
271 The function sets the value of 'flag_fields' to the total number
272 of the descriptors created for the flag fields.
273 The function sets the value of 'length' to the total length of the
274 flag fields.
275
276 RETURN
277 none
278 */
279
create_flag_fields()280 void JOIN_CACHE::create_flag_fields()
281 {
282 CACHE_FIELD *copy;
283 JOIN_TAB *tab;
284
285 copy= field_descr;
286
287 length=0;
288
289 /* If there is a match flag the first field is always used for this flag */
290 if (with_match_flag)
291 length+= add_flag_field_to_join_cache((uchar*) &join_tab->found,
292 sizeof(join_tab->found),
293 ©);
294
295 /* Create fields for all null bitmaps and null row flags that are needed */
296 for (tab= join_tab-tables; tab < join_tab; tab++)
297 {
298 TABLE *table= tab->table;
299
300 /* Create a field for the null bitmap from table if needed */
301 if (tab->used_null_fields || tab->used_uneven_bit_fields)
302 length+= add_flag_field_to_join_cache(table->null_flags,
303 table->s->null_bytes,
304 ©);
305
306 /* Create table for the null row flag if needed */
307 if (table->maybe_null)
308 length+= add_flag_field_to_join_cache((uchar*) &table->null_row,
309 sizeof(table->null_row),
310 ©);
311 }
312
313 /* Theoretically the new value of flag_fields can be less than the old one */
314 flag_fields= copy-field_descr;
315 }
316
317
318 /*
319 Create descriptors of all remaining data fields stored in the join buffer
320
321 SYNOPSIS
322 create_remaining_fields()
323 all_read_fields indicates that descriptors for all read data fields
324 are to be created
325
326 DESCRIPTION
327 The function creates descriptors for all remaining data fields of a
328 record from the join buffer. If the parameter 'all_read_fields' is
329 true the function creates fields for all read record fields that
330 comprise the partial join record joined with join_tab. Otherwise,
331 for each table tab, the set of the read fields for which the descriptors
332 have to be added is determined as the difference between all read fields
333 and and those for which the descriptors have been already created.
334 The latter are supposed to be marked in the bitmap tab->table->tmp_set.
335 The function increases the value of 'length' to the the total length of
336 the added fields.
337
338 NOTES
339 If 'all_read_fields' is false the function modifies the value of
340 tab->table->tmp_set for a each table whose fields are stored in the cache.
341 The function calls the method Field::fill_cache_field to figure out
342 the type of the cache field and the maximal length of its representation
343 in the join buffer. If this is a blob field then additionally a pointer
344 to this field is added as an element of the array blob_ptr. For a blob
345 field only the size of the length of the blob data is taken into account.
346 It is assumed that 'data_field_count' contains the number of descriptors
347 for data fields that have been already created and 'data_field_ptr_count'
348 contains the number of the pointers to such descriptors having been
349 stored up to the moment.
350
351 RETURN
352 none
353 */
354
create_remaining_fields(bool all_read_fields)355 void JOIN_CACHE:: create_remaining_fields(bool all_read_fields)
356 {
357 JOIN_TAB *tab;
358 CACHE_FIELD *copy= field_descr+flag_fields+data_field_count;
359 CACHE_FIELD **copy_ptr= blob_ptr+data_field_ptr_count;
360
361 for (tab= join_tab-tables; tab < join_tab; tab++)
362 {
363 MY_BITMAP *rem_field_set;
364 TABLE *table= tab->table;
365
366 if (all_read_fields)
367 rem_field_set= table->read_set;
368 else
369 {
370 bitmap_invert(&table->tmp_set);
371 bitmap_intersect(&table->tmp_set, table->read_set);
372 rem_field_set= &table->tmp_set;
373 }
374
375 length+= add_table_data_fields_to_join_cache(tab, rem_field_set,
376 &data_field_count, ©,
377 &data_field_ptr_count,
378 ©_ptr);
379
380 /* SemiJoinDuplicateElimination: allocate space for rowid if needed */
381 if (tab->keep_current_rowid)
382 {
383 copy->str= table->file->ref;
384 copy->length= table->file->ref_length;
385 copy->type= 0;
386 copy->field= 0;
387 copy->referenced_field_no= 0;
388 copy->next_copy_rowid= NULL;
389 // Chain rowid copy objects belonging to same join_tab
390 if (tab->copy_current_rowid != NULL)
391 copy->next_copy_rowid= tab->copy_current_rowid;
392 tab->copy_current_rowid= copy;
393 length+= copy->length;
394 data_field_count++;
395 copy++;
396 }
397 }
398 }
399
400
401 /*
402 Calculate and set all cache constants
403
404 SYNOPSIS
405 set_constants()
406
407 DESCRIPTION
408 The function calculates and set all precomputed constants that are used
409 when writing records into the join buffer and reading them from it.
410 It calculates the size of offsets of a record within the join buffer
411 and of a field within a record. It also calculates the number of bytes
412 used to store record lengths.
413 The function also calculates the maximal length of the representation
414 of record in the cache excluding blob_data. This value is used when
415 making a dicision whether more records should be added into the join
416 buffer or not.
417
418 RETURN
419 none
420 */
421
set_constants()422 void JOIN_CACHE::set_constants()
423 {
424 /*
425 Any record from a BKA cache is prepended with the record length.
426 We use the record length when reading the buffer and building key values
427 for each record. The length allows us not to read the fields that are
428 not needed for keys.
429 If a record has match flag it also may be skipped when the match flag
430 is on. It happens if the cache is used for a semi-join operation or
431 for outer join when the 'not exist' optimization can be applied.
432 If some of the fields are referenced from other caches then
433 the record length allows us to easily reach the saved offsets for
434 these fields since the offsets are stored at the very end of the record.
435 However at this moment we don't know whether we have referenced fields for
436 the cache or not. Later when a referenced field is registered for the cache
437 we adjust the value of the flag 'with_length'.
438 */
439 with_length= is_key_access() || with_match_flag;
440 /*
441 At this moment we don't know yet the value of 'referenced_fields',
442 but in any case it can't be greater than the value of 'fields'.
443 */
444 uint len= length + fields*sizeof(uint)+blobs*sizeof(uchar *) +
445 (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) +
446 sizeof(ulong) + aux_buffer_min_size();
447 buff_size= max<size_t>(join->thd->variables.join_buff_size, 2*len);
448 size_of_rec_ofs= offset_size(buff_size);
449 size_of_rec_len= blobs ? size_of_rec_ofs : offset_size(len);
450 size_of_fld_ofs= size_of_rec_len;
451 /*
452 The size of the offsets for referenced fields will be added later.
453 The values of 'pack_length' and 'pack_length_with_blob_ptrs' are adjusted
454 every time when the first reference to the referenced field is registered.
455 */
456 pack_length= (with_length ? size_of_rec_len : 0) +
457 (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) +
458 length;
459 pack_length_with_blob_ptrs= pack_length + blobs*sizeof(uchar *);
460
461 check_only_first_match= calc_check_only_first_match(join_tab);
462 }
463
464
465 /**
466 Allocate memory for a join buffer.
467
468 The function allocates a lump of memory for the join buffer. The
469 size of the allocated memory is 'buff_size' bytes.
470
471 @returns false if success, otherwise true.
472 */
alloc_buffer()473 bool JOIN_CACHE::alloc_buffer()
474 {
475 DBUG_EXECUTE_IF("jb_alloc_fail",
476 buff= NULL;
477 DBUG_SET("-d,jb_alloc_fail");
478 return true;
479 );
480
481 buff= (uchar*) my_malloc(buff_size, MYF(0));
482 return buff == NULL;
483 }
484
485
486 /*
487 Initialize a BNL cache
488
489 SYNOPSIS
490 init()
491
492 DESCRIPTION
493 The function initializes the cache structure. It supposed to be called
494 right after a constructor for the JOIN_CACHE_BNL.
495 The function allocates memory for the join buffer and for descriptors of
496 the record fields stored in the buffer.
497
498 NOTES
499 The code of this function should have been included into the constructor
500 code itself. However the new operator for the class JOIN_CACHE_BNL would
501 never fail while memory allocation for the join buffer is not absolutely
502 unlikely to fail. That's why this memory allocation has to be placed in a
503 separate function that is called in a couple with a cache constructor.
504 It is quite natural to put almost all other constructor actions into
505 this function.
506
507 RETURN
508 0 initialization with buffer allocations has been succeeded
509 1 otherwise
510 */
511
init()512 int JOIN_CACHE_BNL::init()
513 {
514 DBUG_ENTER("JOIN_CACHE::init");
515
516 calc_record_fields();
517
518 if (alloc_fields(0))
519 DBUG_RETURN(1);
520
521 create_flag_fields();
522
523 create_remaining_fields(TRUE);
524
525 set_constants();
526
527 if (alloc_buffer())
528 DBUG_RETURN(1);
529
530 reset_cache(true);
531
532 DBUG_RETURN(0);
533 }
534
535
536 /*
537 Initialize a BKA cache
538
539 SYNOPSIS
540 init()
541
542 DESCRIPTION
543 The function initializes the cache structure. It supposed to be called
544 right after a constructor for the JOIN_CACHE_BKA.
545 The function allocates memory for the join buffer and for descriptors of
546 the record fields stored in the buffer.
547
548 NOTES
549 The code of this function should have been included into the constructor
550 code itself. However the new operator for the class JOIN_CACHE_BKA would
551 never fail while memory allocation for the join buffer is not absolutely
552 unlikely to fail. That's why this memory allocation has to be placed in a
553 separate function that is called in a couple with a cache constructor.
554 It is quite natural to put almost all other constructor actions into
555 this function.
556
557 RETURN
558 0 initialization with buffer allocations has been succeeded
559 1 otherwise
560 */
561
init()562 int JOIN_CACHE_BKA::init()
563 {
564 JOIN_TAB *tab;
565 JOIN_CACHE *cache;
566 local_key_arg_fields= 0;
567 external_key_arg_fields= 0;
568 DBUG_ENTER("JOIN_CACHE_BKA::init");
569
570 calc_record_fields();
571
572 /* Mark all fields that can be used as arguments for this key access */
573 TABLE_REF *ref= &join_tab->ref;
574 cache= this;
575 do
576 {
577 /*
578 Traverse the ref expressions and find the occurrences of fields in them for
579 each table 'tab' whose fields are to be stored in the 'cache' join buffer.
580 Mark these fields in the bitmap tab->table->tmp_set.
581 For these fields count the number of them stored in this cache and the
582 total number of them stored in the previous caches. Save the result
583 of the counting 'in local_key_arg_fields' and 'external_key_arg_fields'
584 respectively.
585 */
586 for (tab= cache->join_tab-cache->tables; tab < cache->join_tab ; tab++)
587 {
588 uint key_args;
589 bitmap_clear_all(&tab->table->tmp_set);
590 for (uint i= 0; i < ref->key_parts; i++)
591 {
592 Item *ref_item= ref->items[i];
593 if (!(tab->table->map & ref_item->used_tables()))
594 continue;
595 ref_item->walk(&Item::add_field_to_set_processor, 1,
596 (uchar *) tab->table);
597 }
598 if ((key_args= bitmap_bits_set(&tab->table->tmp_set)))
599 {
600 if (cache == this)
601 local_key_arg_fields+= key_args;
602 else
603 external_key_arg_fields+= key_args;
604 }
605 }
606 cache= cache->prev_cache;
607 }
608 while (cache);
609
610 if (alloc_fields(external_key_arg_fields))
611 DBUG_RETURN(1);
612
613 create_flag_fields();
614
615 /*
616 Save pointers to the cache fields in previous caches
617 that are used to build keys for this key access.
618 */
619 cache= this;
620 uint ext_key_arg_cnt= external_key_arg_fields;
621 CACHE_FIELD *copy;
622 CACHE_FIELD **copy_ptr= blob_ptr;
623 while (ext_key_arg_cnt)
624 {
625 cache= cache->prev_cache;
626 for (tab= cache->join_tab-cache->tables; tab < cache->join_tab ; tab++)
627 {
628 CACHE_FIELD *copy_end;
629 MY_BITMAP *key_read_set= &tab->table->tmp_set;
630 /* key_read_set contains the bitmap of tab's fields referenced by ref */
631 if (bitmap_is_clear_all(key_read_set))
632 continue;
633 copy_end= cache->field_descr+cache->fields;
634 for (copy= cache->field_descr+cache->flag_fields; copy < copy_end; copy++)
635 {
636 /*
637 (1) - when we store rowids for DuplicateWeedout, they have
638 copy->field==NULL
639 */
640 if (copy->field && // (1)
641 copy->field->table == tab->table &&
642 bitmap_is_set(key_read_set, copy->field->field_index))
643 {
644 *copy_ptr++= copy;
645 ext_key_arg_cnt--;
646 if (!copy->referenced_field_no)
647 {
648 /*
649 Register the referenced field 'copy':
650 - set the offset number in copy->referenced_field_no,
651 - adjust the value of the flag 'with_length',
652 - adjust the values of 'pack_length' and
653 of 'pack_length_with_blob_ptrs'.
654 */
655 copy->referenced_field_no= ++cache->referenced_fields;
656 cache->with_length= TRUE;
657 cache->pack_length+= cache->get_size_of_fld_offset();
658 cache->pack_length_with_blob_ptrs+= cache->get_size_of_fld_offset();
659 }
660 }
661 }
662 }
663 }
664 /* After this 'blob_ptr' shall not be be changed */
665 blob_ptr= copy_ptr;
666
667 /* Now create local fields that are used to build ref for this key access */
668 copy= field_descr+flag_fields;
669 for (tab= join_tab-tables; tab < join_tab ; tab++)
670 {
671 length+= add_table_data_fields_to_join_cache(tab, &tab->table->tmp_set,
672 &data_field_count, ©,
673 &data_field_ptr_count,
674 ©_ptr);
675 }
676
677 use_emb_key= check_emb_key_usage();
678
679 create_remaining_fields(FALSE);
680
681 set_constants();
682
683 if (alloc_buffer())
684 DBUG_RETURN(1);
685
686 reset_cache(true);
687
688 DBUG_RETURN(0);
689 }
690
691
692 /*
693 Check the possibility to read the access keys directly from the join buffer
694
695 SYNOPSIS
696 check_emb_key_usage()
697
698 DESCRIPTION
699 The function checks some conditions at which the key values can be read
700 directly from the join buffer. This is possible when the key values can be
701 composed by concatenation of the record fields stored in the join buffer.
702 Sometimes when the access key is multi-component the function has to re-order
703 the fields written into the join buffer to make keys embedded. If key
704 values for the key access are detected as embedded then 'use_emb_key'
705 is set to TRUE.
706
707 EXAMPLE
708 Let table t2 has an index defined on the columns a,b . Let's assume also
709 that the columns t2.a, t2.b as well as the columns t1.a, t1.b are all
710 of the integer type. Then if the query
711 SELECT COUNT(*) FROM t1, t2 WHERE t1.a=t2.a and t1.b=t2.b
712 is executed with a join cache in such a way that t1 is the driving
713 table then the key values to access table t2 can be read directly
714 from the join buffer.
715
716 NOTES
717 In some cases key values could be read directly from the join buffer but
718 we still do not consider them embedded. In the future we'll expand the
719 the class of keys which we identify as embedded.
720
721 RETURN
722 TRUE - key values will be considered as embedded,
723 FALSE - otherwise.
724 */
725
check_emb_key_usage()726 bool JOIN_CACHE_BKA::check_emb_key_usage()
727 {
728 uint i;
729 Item *item;
730 KEY_PART_INFO *key_part;
731 CACHE_FIELD *copy;
732 CACHE_FIELD *copy_end;
733 uint len= 0;
734 TABLE *table= join_tab->table;
735 TABLE_REF *ref= &join_tab->ref;
736 KEY *keyinfo= table->key_info+ref->key;
737
738 /*
739 If some of the key arguments are not from the local cache the key
740 is not considered as embedded.
741 TODO:
742 Expand it to the case when ref->key_parts=1 and local_key_arg_fields=0.
743 */
744 if (external_key_arg_fields != 0)
745 return FALSE;
746 /*
747 If the number of the local key arguments is not equal to the number
748 of key parts the key value cannot be read directly from the join buffer.
749 */
750 if (local_key_arg_fields != ref->key_parts)
751 return FALSE;
752
753 /*
754 A key is not considered embedded if one of the following is true:
755 - one of its key parts is not equal to a field
756 - it is a partial key
757 - definition of the argument field does not coincide with the
758 definition of the corresponding key component
759 - some of the key components are nullable
760 */
761 for (i=0; i < ref->key_parts; i++)
762 {
763 item= ref->items[i]->real_item();
764 if (item->type() != Item::FIELD_ITEM)
765 return FALSE;
766 key_part= keyinfo->key_part+i;
767 if (key_part->key_part_flag & HA_PART_KEY_SEG)
768 return FALSE;
769 if (!key_part->field->eq_def(((Item_field *) item)->field))
770 return FALSE;
771 if (key_part->field->maybe_null())
772 {
773 return FALSE;
774 /*
775 If this is changed so that embedded keys may contain nullable
776 components, get_next_key() and put_record() will have to test
777 ref->null_rejecting in the "embedded keys" case too.
778 */
779 }
780 }
781
782 copy= field_descr+flag_fields;
783 copy_end= copy+local_key_arg_fields;
784 for ( ; copy < copy_end; copy++)
785 {
786 /*
787 If some of the key arguments are of variable length the key
788 is not considered as embedded.
789 */
790 if (copy->type != 0)
791 return FALSE;
792 /*
793 If some of the key arguments are bit fields whose bits are partially
794 stored with null bits the key is not considered as embedded.
795 */
796 if (copy->field->type() == MYSQL_TYPE_BIT &&
797 ((Field_bit*) (copy->field))->bit_len)
798 return FALSE;
799 len+= copy->length;
800 }
801
802 emb_key_length= len;
803
804 /*
805 Make sure that key fields follow the order of the corresponding
806 key components these fields are equal to. For this the descriptors
807 of the fields that comprise the key might be re-ordered.
808 */
809 for (i= 0; i < ref->key_parts; i++)
810 {
811 uint j;
812 Item *item= ref->items[i]->real_item();
813 Field *fld= ((Item_field *) item)->field;
814 CACHE_FIELD *init_copy= field_descr+flag_fields+i;
815 for (j= i, copy= init_copy; i < local_key_arg_fields; i++, copy++)
816 {
817 if (fld->eq(copy->field))
818 {
819 if (j != i)
820 {
821 CACHE_FIELD key_part_copy= *copy;
822 *copy= *init_copy;
823 *init_copy= key_part_copy;
824 }
825 break;
826 }
827 }
828 }
829
830 return TRUE;
831 }
832
833
834 /*
835 Calculate the increment of the MRR buffer for a record write
836
837 SYNOPSIS
838 aux_buffer_incr()
839
840 DESCRIPTION
841 This implementation of the virtual function aux_buffer_incr determines
842 for how much the size of the MRR buffer should be increased when another
843 record is added to the cache.
844
845 RETURN
846 the increment of the size of the MRR buffer for the next record
847 */
848
aux_buffer_incr()849 uint JOIN_CACHE_BKA::aux_buffer_incr()
850 {
851 uint incr= 0;
852 TABLE_REF *ref= &join_tab->ref;
853 TABLE *tab= join_tab->table;
854 uint rec_per_key= tab->key_info[ref->key].rec_per_key[ref->key_parts-1];
855 set_if_bigger(rec_per_key, 1);
856 if (records == 1)
857 incr= ref->key_length + tab->file->ref_length;
858 /*
859 When adding a new record to the join buffer this can match
860 multiple keys in this table. We use rec_per_key as estimate for
861 the number of records that will match and reserve space in the
862 DS-MRR sort buffer for this many record references.
863 */
864 incr+= tab->file->stats.mrr_length_per_rec * rec_per_key;
865 return incr;
866 }
867
868
869 /**
870 Calculate the minimume size for the MRR buffer.
871
872 @return The minumum size that must be allocated for the MRR buffer
873 */
874
aux_buffer_min_size() const875 uint JOIN_CACHE_BKA::aux_buffer_min_size() const
876 {
877 /*
878 For DS-MRR to work, the sort buffer must have space to store the
879 reference (or primary key) for at least one record.
880 */
881 return join_tab->table->file->stats.mrr_length_per_rec;
882 }
883
884
885 /*
886 Check if the record combination matches the index condition
887
888 SYNOPSIS
889 JOIN_CACHE_BKA::skip_index_tuple()
890 rseq Value returned by bka_range_seq_init()
891 range_info MRR range association data
892
893 DESCRIPTION
894 This function is invoked from MRR implementation to check if an index
895 tuple matches the index condition. It is used in the case where the index
896 condition actually depends on both columns of the used index and columns
897 from previous tables.
898
899 Accessing columns of the previous tables requires special handling with
900 BKA. The idea of BKA is to collect record combinations in a buffer and
901 then do a batch of ref access lookups, i.e. by the time we're doing a
902 lookup its previous-records-combination is not in prev_table->record[0]
903 but somewhere in the join buffer.
904
905 We need to get it from there back into prev_table(s)->record[0] before we
906 can evaluate the index condition, and that's why we need this function
907 instead of regular IndexConditionPushdown.
908
909 NOTE
910 Possible optimization:
911 Before we unpack the record from a previous table
912 check if this table is used in the condition.
913 If so then unpack the record otherwise skip the unpacking.
914 This should be done by a special virtual method
915 get_partial_record_by_pos().
916
917 RETURN
918 0 The record combination satisfies the index condition
919 1 Otherwise
920 */
921
skip_index_tuple(range_seq_t rseq,char * range_info)922 bool JOIN_CACHE_BKA::skip_index_tuple(range_seq_t rseq, char *range_info)
923 {
924 DBUG_ENTER("JOIN_CACHE_BKA::skip_index_tuple");
925 JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
926 cache->get_record_by_pos((uchar*)range_info);
927 DBUG_RETURN(!join_tab->cache_idx_cond->val_int());
928 }
929
930
931 /*
932 Check if the record combination matches the index condition
933
934 SYNOPSIS
935 bka_skip_index_tuple()
936 rseq Value returned by bka_range_seq_init()
937 range_info MRR range association data
938
939 DESCRIPTION
940 This is wrapper for JOIN_CACHE_BKA::skip_index_tuple method,
941 see comments there.
942
943 NOTE
944 This function is used as a RANGE_SEQ_IF::skip_index_tuple callback.
945
946 RETURN
947 0 The record combination satisfies the index condition
948 1 Otherwise
949 */
950
951 static
bka_skip_index_tuple(range_seq_t rseq,char * range_info)952 bool bka_skip_index_tuple(range_seq_t rseq, char *range_info)
953 {
954 DBUG_ENTER("bka_skip_index_tuple");
955 JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
956 DBUG_RETURN(cache->skip_index_tuple(rseq, range_info));
957 }
958
959
960 /*
961 Write record fields and their required offsets into the join cache buffer
962
963 SYNOPSIS
964 write_record_data()
965 link a reference to the associated info in the previous cache
966 is_full OUT true if it has been decided that no more records will be
967 added to the join buffer
968
969 DESCRIPTION
970 This function put into the cache buffer the following info that it reads
971 from the join record buffers or computes somehow:
972 (1) the length of all fields written for the record (optional)
973 (2) an offset to the associated info in the previous cache (if there is any)
974 determined by the link parameter
975 (3) all flag fields of the tables whose data field are put into the cache:
976 - match flag (optional),
977 - null bitmaps for all tables,
978 - null row flags for all tables
979 (4) values of all data fields including
980 - full images of those fixed legth data fields that cannot have
981 trailing spaces
982 - significant part of fixed length fields that can have trailing spaces
983 with the prepanded length
984 - data of non-blob variable length fields with the prepanded data length
985 - blob data from blob fields with the prepanded data length
986 (5) record offset values for the data fields that are referred to from
987 other caches
988
989 The record is written at the current position stored in the field 'pos'.
990 At the end of the function 'pos' points at the position right after the
991 written record data.
992 The function increments the number of records in the cache that is stored
993 in the 'records' field by 1. The function also modifies the values of
994 'curr_rec_pos' and 'last_rec_pos' to point to the written record.
995 The 'end_pos' cursor is modified accordingly.
996 The 'last_rec_blob_data_is_in_rec_buff' is set on if the blob data
997 remains in the record buffers and not copied to the join buffer. It may
998 happen only to the blob data from the last record added into the cache.
999
1000
1001 RETURN
1002 length of the written record data
1003 */
1004
write_record_data(uchar * link,bool * is_full)1005 uint JOIN_CACHE::write_record_data(uchar * link, bool *is_full)
1006 {
1007 uint len;
1008 bool last_record;
1009 CACHE_FIELD *copy;
1010 CACHE_FIELD *copy_end;
1011 uchar *cp= pos;
1012 uchar *init_pos= cp;
1013 uchar *rec_len_ptr= 0;
1014
1015 records++; /* Increment the counter of records in the cache */
1016
1017 len= pack_length;
1018
1019 /* Make an adjustment for the size of the auxiliary buffer if there is any */
1020 uint incr= aux_buffer_incr();
1021 ulong rem= rem_space();
1022 aux_buff_size+= len+incr < rem ? incr : rem;
1023
1024 /*
1025 For each blob to be put into cache save its length and a pointer
1026 to the value in the corresponding element of the blob_ptr array.
1027 Blobs with null values are skipped.
1028 Increment 'len' by the total length of all these blobs.
1029 */
1030 if (blobs)
1031 {
1032 CACHE_FIELD **copy_ptr= blob_ptr;
1033 CACHE_FIELD **copy_ptr_end= copy_ptr+blobs;
1034 for ( ; copy_ptr < copy_ptr_end; copy_ptr++)
1035 {
1036 Field_blob *blob_field= (Field_blob *) (*copy_ptr)->field;
1037 if (!blob_field->is_null())
1038 {
1039 uint blob_len= blob_field->get_length();
1040 (*copy_ptr)->blob_length= blob_len;
1041 len+= blob_len;
1042 blob_field->get_ptr(&(*copy_ptr)->str);
1043 }
1044 }
1045 }
1046
1047 /*
1048 Check whether we won't be able to add any new record into the cache after
1049 this one because the cache will be full. Set last_record to TRUE if it's so.
1050 The assume that the cache will be full after the record has been written
1051 into it if either the remaining space of the cache is not big enough for the
1052 record's blob values or if there is a chance that not all non-blob fields
1053 of the next record can be placed there.
1054 This function is called only in the case when there is enough space left in
1055 the cache to store at least non-blob parts of the current record.
1056 */
1057 last_record= (len+pack_length_with_blob_ptrs) > rem_space();
1058
1059 /*
1060 Save the position for the length of the record in the cache if it's needed.
1061 The length of the record will be inserted here when all fields of the record
1062 are put into the cache.
1063 */
1064 if (with_length)
1065 {
1066 rec_len_ptr= cp;
1067 cp+= size_of_rec_len;
1068 }
1069
1070 /*
1071 Put a reference to the fields of the record that are stored in the previous
1072 cache if there is any. This reference is passed by the 'link' parameter.
1073 */
1074 if (prev_cache)
1075 {
1076 cp+= prev_cache->get_size_of_rec_offset();
1077 prev_cache->store_rec_ref(cp, link);
1078 }
1079
1080 curr_rec_pos= cp;
1081
1082 /* If the there is a match flag set its value to 0 */
1083 copy= field_descr;
1084 if (with_match_flag)
1085 *copy[0].str= 0;
1086
1087 /* First put into the cache the values of all flag fields */
1088 copy_end= field_descr+flag_fields;
1089 for ( ; copy < copy_end; copy++)
1090 {
1091 memcpy(cp, copy->str, copy->length);
1092 cp+= copy->length;
1093 }
1094
1095 /* Now put the values of the remaining fields as soon as they are not nulls */
1096 copy_end= field_descr+fields;
1097 for ( ; copy < copy_end; copy++)
1098 {
1099 Field *field= copy->field;
1100 if (field && field->maybe_null() && field->is_null())
1101 {
1102 /* Do not copy a field if its value is null */
1103 if (copy->referenced_field_no)
1104 copy->offset= 0;
1105 continue;
1106 }
1107 /* Save the offset of the field to put it later at the end of the record */
1108 if (copy->referenced_field_no)
1109 copy->offset= cp-curr_rec_pos;
1110
1111 if (copy->type == CACHE_BLOB)
1112 {
1113 Field_blob *blob_field= (Field_blob *) copy->field;
1114 if (last_record)
1115 {
1116 last_rec_blob_data_is_in_rec_buff= 1;
1117 /* Put down the length of the blob and the pointer to the data */
1118 blob_field->get_image(cp, copy->length+sizeof(char*),
1119 blob_field->charset());
1120 cp+= copy->length+sizeof(char*);
1121 }
1122 else
1123 {
1124 /* First put down the length of the blob and then copy the data */
1125 blob_field->get_image(cp, copy->length,
1126 blob_field->charset());
1127 memcpy(cp+copy->length, copy->str, copy->blob_length);
1128 cp+= copy->length+copy->blob_length;
1129 }
1130 }
1131 else
1132 {
1133 switch (copy->type) {
1134 case CACHE_VARSTR1:
1135 /* Copy the significant part of the short varstring field */
1136 len= (uint) copy->str[0] + 1;
1137 memcpy(cp, copy->str, len);
1138 cp+= len;
1139 break;
1140 case CACHE_VARSTR2:
1141 /* Copy the significant part of the long varstring field */
1142 len= uint2korr(copy->str) + 2;
1143 memcpy(cp, copy->str, len);
1144 cp+= len;
1145 break;
1146 case CACHE_STRIPPED:
1147 {
1148 /*
1149 Put down the field value stripping all trailing spaces off.
1150 After this insert the length of the written sequence of bytes.
1151 */
1152 uchar *str, *end;
1153 for (str= copy->str, end= str+copy->length;
1154 end > str && end[-1] == ' ';
1155 end--) ;
1156 len=(uint) (end-str);
1157 int2store(cp, len);
1158 memcpy(cp+2, str, len);
1159 cp+= len+2;
1160 break;
1161 }
1162 default:
1163 /* Copy the entire image of the field from the record buffer */
1164 memcpy(cp, copy->str, copy->length);
1165 cp+= copy->length;
1166 }
1167 }
1168 }
1169
1170 /* Add the offsets of the fields that are referenced from other caches */
1171 if (referenced_fields)
1172 {
1173 uint cnt= 0;
1174 for (copy= field_descr+flag_fields; copy < copy_end ; copy++)
1175 {
1176 if (copy->referenced_field_no)
1177 {
1178 store_fld_offset(cp+size_of_fld_ofs*(copy->referenced_field_no-1),
1179 copy->offset);
1180 cnt++;
1181 }
1182 }
1183 cp+= size_of_fld_ofs*cnt;
1184 }
1185
1186 if (rec_len_ptr)
1187 store_rec_length(rec_len_ptr, (ulong) (cp-rec_len_ptr-size_of_rec_len));
1188 last_rec_pos= curr_rec_pos;
1189 end_pos= pos= cp;
1190 *is_full= last_record;
1191 return (uint) (cp-init_pos);
1192 }
1193
1194
1195 /**
1196 @brief Reset the join buffer for reading/writing: default implementation
1197
1198 @param for_writing if it's TRUE the function reset the buffer for writing
1199
1200 @details
1201 This default implementation of the virtual function reset_cache() resets
1202 the join buffer for reading or writing.
1203 If the buffer is reset for reading only the 'pos' value is reset
1204 to point to the very beginning of the join buffer. If the buffer is
1205 reset for writing additionally:
1206 - the counter of the records in the buffer is set to 0,
1207 - the the value of 'last_rec_pos' gets pointing at the position just
1208 before the buffer,
1209 - 'end_pos' is set to point to the beginning of the join buffer,
1210 - the size of the auxiliary buffer is reset to 0,
1211 - the flag 'last_rec_blob_data_is_in_rec_buff' is set to 0.
1212 */
1213
reset_cache(bool for_writing)1214 void JOIN_CACHE::reset_cache(bool for_writing)
1215 {
1216 pos= buff;
1217 curr_rec_link= 0;
1218 if (for_writing)
1219 {
1220 records= 0;
1221 last_rec_pos= buff;
1222 aux_buff_size= 0;
1223 end_pos= pos;
1224 last_rec_blob_data_is_in_rec_buff= 0;
1225 }
1226 }
1227
1228 /*
1229 Add a record into the join buffer: the default implementation
1230
1231 SYNOPSIS
1232 put_record_in_cache()
1233
1234 DESCRIPTION
1235 This default implementation of the virtual function put_record writes
1236 the next matching record into the join buffer.
1237 It also links the record having been written into the join buffer with
1238 the matched record in the previous cache if there is any.
1239 The implementation assumes that the function get_curr_link()
1240 will return exactly the pointer to this matched record.
1241
1242 RETURN
1243 TRUE if it has been decided that it should be the last record
1244 in the join buffer,
1245 FALSE otherwise
1246 */
1247
put_record_in_cache()1248 bool JOIN_CACHE::put_record_in_cache()
1249 {
1250 bool is_full;
1251 uchar *link= 0;
1252 if (prev_cache)
1253 link= prev_cache->get_curr_rec_link();
1254 write_record_data(link, &is_full);
1255 return (is_full);
1256 }
1257
1258
1259 /*
1260 Read the next record from the join buffer: the default implementation
1261
1262 SYNOPSIS
1263 get_record()
1264
1265 DESCRIPTION
1266 This default implementation of the virtual function get_record
1267 reads fields of the next record from the join buffer of this cache.
1268 The function also reads all other fields associated with this record
1269 from the the join buffers of the previous caches. The fields are read
1270 into the corresponding record buffers.
1271 It is supposed that 'pos' points to the position in the buffer
1272 right after the previous record when the function is called.
1273 When the function returns the 'pos' values is updated to point
1274 to the position after the read record.
1275 The value of 'curr_rec_pos' is also updated by the function to
1276 point to the beginning of the first field of the record in the
1277 join buffer.
1278
1279 RETURN
1280 TRUE - there are no more records to read from the join buffer
1281 FALSE - otherwise
1282 */
1283
get_record()1284 bool JOIN_CACHE::get_record()
1285 {
1286 bool res;
1287 uchar *prev_rec_ptr= 0;
1288 if (with_length)
1289 pos+= size_of_rec_len;
1290 if (prev_cache)
1291 {
1292 pos+= prev_cache->get_size_of_rec_offset();
1293 prev_rec_ptr= prev_cache->get_rec_ref(pos);
1294 }
1295 curr_rec_pos= pos;
1296 res= (read_some_record_fields() == -1);
1297 if (!res)
1298 { // There are more records to read
1299 pos+= referenced_fields*size_of_fld_ofs;
1300 if (prev_cache)
1301 {
1302 /*
1303 read_some_record_fields() didn't read fields stored in previous
1304 buffers, read them now:
1305 */
1306 prev_cache->get_record_by_pos(prev_rec_ptr);
1307 }
1308 }
1309 return res;
1310 }
1311
1312
1313 /*
1314 Read a positioned record from the join buffer: the default implementation
1315
1316 SYNOPSIS
1317 get_record_by_pos()
1318 rec_ptr position of the first field of the record in the join buffer
1319
1320 DESCRIPTION
1321 This default implementation of the virtual function get_record_pos
1322 reads the fields of the record positioned at 'rec_ptr' from the join buffer.
1323 The function also reads all other fields associated with this record
1324 from the the join buffers of the previous caches. The fields are read
1325 into the corresponding record buffers.
1326
1327 RETURN
1328 none
1329 */
1330
get_record_by_pos(uchar * rec_ptr)1331 void JOIN_CACHE::get_record_by_pos(uchar *rec_ptr)
1332 {
1333 uchar *save_pos= pos;
1334 pos= rec_ptr;
1335 read_some_record_fields();
1336 pos= save_pos;
1337 if (prev_cache)
1338 {
1339 uchar *prev_rec_ptr= prev_cache->get_rec_ref(rec_ptr);
1340 prev_cache->get_record_by_pos(prev_rec_ptr);
1341 }
1342 }
1343
1344
1345 /*
1346 Test the match flag from the referenced record: the default implementation
1347
1348 SYNOPSIS
1349 get_match_flag_by_pos()
1350 rec_ptr position of the first field of the record in the join buffer
1351
1352 DESCRIPTION
1353 This default implementation of the virtual function get_match_flag_by_pos
1354 test the match flag for the record pointed by the reference at the position
1355 rec_ptr. If the match flag in placed one of the previous buffers the function
1356 first reaches the linked record fields in this buffer.
1357
1358 RETURN
1359 TRUE if the match flag is set on
1360 FALSE otherwise
1361 */
1362
get_match_flag_by_pos(uchar * rec_ptr)1363 bool JOIN_CACHE::get_match_flag_by_pos(uchar *rec_ptr)
1364 {
1365 if (with_match_flag)
1366 return MY_TEST(*rec_ptr);
1367 if (prev_cache)
1368 {
1369 uchar *prev_rec_ptr= prev_cache->get_rec_ref(rec_ptr);
1370 return prev_cache->get_match_flag_by_pos(prev_rec_ptr);
1371 }
1372 DBUG_ASSERT(1);
1373 return FALSE;
1374 }
1375
1376
1377 /**
1378 Read some flag and data fields of a record from the join buffer.
1379
1380 Reads all fields (flag and data fields) stored in this join buffer, for the
1381 current record (at 'pos'). If the buffer is incremental, fields of this
1382 record which are stored in previous join buffers are _not_ read so remain
1383 unknown: caller must then make sure to call this function on previous
1384 buffers too.
1385
1386 The fields are read starting from the position 'pos' which is
1387 supposed to point to the beginning of the first record field.
1388 The function increments the value of 'pos' by the length of the
1389 read data.
1390
1391 Flag fields are copied back to their source; data fields are copied to the
1392 record's buffer.
1393
1394 @retval (-1) if there are no more records in the join buffer
1395 @retval <>(-1) length of the data read from the join buffer
1396 */
1397
read_some_record_fields()1398 int JOIN_CACHE::read_some_record_fields()
1399 {
1400 uchar *init_pos= pos;
1401
1402 if (pos > last_rec_pos || !records)
1403 return -1;
1404
1405 // First match flag, read null bitmaps and null_row flag
1406 read_some_flag_fields();
1407
1408 /* Now read the remaining table fields if needed */
1409 CACHE_FIELD *copy= field_descr+flag_fields;
1410 CACHE_FIELD *copy_end= field_descr+fields;
1411 bool blob_in_rec_buff= blob_data_is_in_rec_buff(init_pos);
1412 for ( ; copy < copy_end; copy++)
1413 read_record_field(copy, blob_in_rec_buff);
1414
1415 return (uint) (pos-init_pos);
1416 }
1417
1418
1419 /**
1420 Read some flag fields of a record from the join buffer.
1421
1422 Reads all flag fields stored in this join buffer, for the current record (at
1423 'pos'). If the buffer is incremental, flag fields of this record which are
1424 stored in previous join buffers are _not_ read so remain unknown: caller
1425 must then make sure to call this function on previous buffers too.
1426
1427 The flag fields are read starting from the position 'pos'.
1428 The function increments the value of 'pos' by the length of the
1429 read data.
1430
1431 Flag fields are copied back to their source.
1432 */
read_some_flag_fields()1433 void JOIN_CACHE::read_some_flag_fields()
1434 {
1435 CACHE_FIELD *copy= field_descr;
1436 CACHE_FIELD *copy_end= copy+flag_fields;
1437 for ( ; copy < copy_end; copy++)
1438 {
1439 memcpy(copy->str, pos, copy->length);
1440 pos+= copy->length;
1441 }
1442 }
1443
1444
1445 /*
1446 Read a data record field from the join buffer
1447
1448 SYNOPSIS
1449 read_record_field()
1450 copy the descriptor of the data field to be read
1451 blob_in_rec_buff indicates whether this is the field from the record
1452 whose blob data are in record buffers
1453
1454 DESCRIPTION
1455 The function reads the data field specified by the parameter copy
1456 from the join buffer into the corresponding record buffer.
1457 The field is read starting from the position 'pos'.
1458 The data of blob values is not copied from the join buffer.
1459 The function increments the value of 'pos' by the length of the
1460 read data.
1461
1462 RETURN
1463 length of the data read from the join buffer
1464 */
1465
read_record_field(CACHE_FIELD * copy,bool blob_in_rec_buff)1466 uint JOIN_CACHE::read_record_field(CACHE_FIELD *copy, bool blob_in_rec_buff)
1467 {
1468 uint len;
1469 /* Do not copy the field if its value is null */
1470 if (copy->field && copy->field->maybe_null() && copy->field->is_null())
1471 return 0;
1472 if (copy->type == CACHE_BLOB)
1473 {
1474 Field_blob *blob_field= (Field_blob *) copy->field;
1475 /*
1476 Copy the length and the pointer to data but not the blob data
1477 itself to the record buffer
1478 */
1479 if (blob_in_rec_buff)
1480 {
1481 blob_field->set_image(pos, copy->length+sizeof(char*),
1482 blob_field->charset());
1483 len= copy->length+sizeof(char*);
1484 }
1485 else
1486 {
1487 blob_field->set_ptr(pos, pos+copy->length);
1488 len= copy->length+blob_field->get_length();
1489 }
1490 }
1491 else
1492 {
1493 switch (copy->type) {
1494 case CACHE_VARSTR1:
1495 /* Copy the significant part of the short varstring field */
1496 len= (uint) pos[0] + 1;
1497 memcpy(copy->str, pos, len);
1498 break;
1499 case CACHE_VARSTR2:
1500 /* Copy the significant part of the long varstring field */
1501 len= uint2korr(pos) + 2;
1502 memcpy(copy->str, pos, len);
1503 break;
1504 case CACHE_STRIPPED:
1505 /* Pad the value by spaces that has been stripped off */
1506 len= uint2korr(pos);
1507 memcpy(copy->str, pos+2, len);
1508 memset(copy->str+len, ' ', copy->length-len);
1509 len+= 2;
1510 break;
1511 default:
1512 /* Copy the entire image of the field from the record buffer */
1513 len= copy->length;
1514 memcpy(copy->str, pos, len);
1515 }
1516 }
1517 pos+= len;
1518 return len;
1519 }
1520
1521
1522 /*
1523 Read a referenced field from the join buffer
1524
1525 SYNOPSIS
1526 read_referenced_field()
1527 copy pointer to the descriptor of the referenced field
1528 rec_ptr pointer to the record that may contain this field
1529 len IN/OUT total length of the record fields
1530
1531 DESCRIPTION
1532 The function checks whether copy points to a data field descriptor
1533 for this cache object. If it does not then the function returns
1534 FALSE. Otherwise the function reads the field of the record in
1535 the join buffer pointed by 'rec_ptr' into the corresponding record
1536 buffer and returns TRUE.
1537 If the value of *len is 0 then the function sets it to the total
1538 length of the record fields including possible trailing offset
1539 values. Otherwise *len is supposed to provide this value that
1540 has been obtained earlier.
1541
1542 RETURN
1543 TRUE 'copy' points to a data descriptor of this join cache
1544 FALSE otherwise
1545 */
1546
read_referenced_field(CACHE_FIELD * copy,uchar * rec_ptr,uint * len)1547 bool JOIN_CACHE::read_referenced_field(CACHE_FIELD *copy,
1548 uchar *rec_ptr,
1549 uint *len)
1550 {
1551 uchar *ptr;
1552 uint offset;
1553 if (copy < field_descr || copy >= field_descr+fields)
1554 return FALSE;
1555 if (!*len)
1556 {
1557 /* Get the total length of the record fields */
1558 uchar *len_ptr= rec_ptr;
1559 if (prev_cache)
1560 len_ptr-= prev_cache->get_size_of_rec_offset();
1561 *len= get_rec_length(len_ptr-size_of_rec_len);
1562 }
1563
1564 ptr= rec_ptr-(prev_cache ? prev_cache->get_size_of_rec_offset() : 0);
1565 offset= get_fld_offset(ptr+ *len -
1566 size_of_fld_ofs*
1567 (referenced_fields+1-copy->referenced_field_no));
1568 bool is_null= FALSE;
1569 if (offset == 0 && flag_fields)
1570 is_null= TRUE;
1571 if (is_null)
1572 copy->field->set_null();
1573 else
1574 {
1575 uchar *save_pos= pos;
1576 copy->field->set_notnull();
1577 pos= rec_ptr+offset;
1578 read_record_field(copy, blob_data_is_in_rec_buff(rec_ptr));
1579 pos= save_pos;
1580 }
1581 return TRUE;
1582 }
1583
1584
1585 /*
1586 Skip record from join buffer if its match flag is on: default implementation
1587
1588 SYNOPSIS
1589 skip_record_if_match()
1590
1591 DESCRIPTION
1592 This default implementation of the virtual function skip_record_if_match
1593 skips the next record from the join buffer if its match flag is set on.
1594 If the record is skipped the value of 'pos' is set to points to the position
1595 right after the record.
1596
1597 RETURN
1598 TRUE - the match flag is on and the record has been skipped
1599 FALSE - the match flag is off
1600 */
1601
skip_record_if_match()1602 bool JOIN_CACHE::skip_record_if_match()
1603 {
1604 DBUG_ASSERT(with_match_flag && with_length);
1605 uint offset= size_of_rec_len;
1606 if (prev_cache)
1607 offset+= prev_cache->get_size_of_rec_offset();
1608 /* Check whether the match flag is on */
1609 if (MY_TEST(*(pos+offset)))
1610 {
1611 pos+= size_of_rec_len + get_rec_length(pos);
1612 return TRUE;
1613 }
1614 return FALSE;
1615 }
1616
1617
1618 /*
1619 Restore the fields of the last record from the join buffer
1620
1621 SYNOPSIS
1622 restore_last_record()
1623
1624 DESCRIPTION
1625 This function restore the values of the fields of the last record put
1626 into join buffer in record buffers. The values most probably have been
1627 overwritten by the field values from other records when they were read
1628 from the join buffer into the record buffer in order to check pushdown
1629 predicates.
1630
1631 RETURN
1632 none
1633 */
1634
restore_last_record()1635 void JOIN_CACHE::restore_last_record()
1636 {
1637 if (records)
1638 get_record_by_pos(last_rec_pos);
1639 }
1640
1641
1642 /*
1643 Join records from the join buffer with records from the next join table
1644
1645 SYNOPSIS
1646 join_records()
1647 skip_last do not find matches for the last record from the buffer
1648
1649 DESCRIPTION
1650 The functions extends all records from the join buffer by the matched
1651 records from join_tab. In the case of outer join operation it also
1652 adds null complementing extensions for the records from the join buffer
1653 that have no match.
1654 No extensions are generated for the last record from the buffer if
1655 skip_last is true.
1656
1657 NOTES
1658 The function must make sure that if linked join buffers are used then
1659 a join buffer cannot be refilled again until all extensions in the
1660 buffers chained to this one are generated.
1661 Currently an outer join operation with several inner tables always uses
1662 at least two linked buffers with the match join flags placed in the
1663 first buffer. Any record composed of rows of the inner tables that
1664 matches a record in this buffer must refer to the position of the
1665 corresponding match flag.
1666
1667 IMPLEMENTATION
1668 When generating extensions for outer tables of an outer join operation
1669 first we generate all extensions for those records from the join buffer
1670 that have matches, after which null complementing extension for all
1671 unmatched records from the join buffer are generated.
1672
1673 RETURN
1674 return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS.
1675 */
1676
join_records(bool skip_last)1677 enum_nested_loop_state JOIN_CACHE::join_records(bool skip_last)
1678 {
1679 enum_nested_loop_state rc= NESTED_LOOP_OK;
1680 DBUG_ENTER("JOIN_CACHE::join_records");
1681
1682 table_map saved_status_bits[3]= {0, 0, 0};
1683 for (int cnt= 1; cnt <= static_cast<int>(tables); cnt++)
1684 {
1685 /*
1686 We may have hit EOF on previous tables; this has set
1687 STATUS_NOT_FOUND in their status. However, now we are going to load
1688 table->record[0] from the join buffer so have to declare that there is a
1689 record. @See convert_constant_item().
1690 We first need to save bits of table->status; STATUS_DELETED and
1691 STATUS_UPDATED cannot be on as multi-table DELETE/UPDATE never use join
1692 buffering. So we only have three bits to save.
1693 */
1694 TABLE * const table= join_tab[- cnt].table;
1695 const uint8 status= table->status;
1696 const table_map map= table->map;
1697 DBUG_ASSERT((status & (STATUS_DELETED | STATUS_UPDATED)) == 0);
1698 if (status & STATUS_GARBAGE)
1699 saved_status_bits[0]|= map;
1700 if (status & STATUS_NOT_FOUND)
1701 saved_status_bits[1]|= map;
1702 if (status & STATUS_NULL_ROW)
1703 saved_status_bits[2]|= map;
1704 table->status= 0; // Record exists.
1705 }
1706
1707 const bool outer_join_first_inner=
1708 join_tab->is_first_inner_for_outer_join();
1709 if (outer_join_first_inner && !join_tab->first_unmatched)
1710 join_tab->not_null_compl= TRUE;
1711
1712 if (!join_tab->first_unmatched)
1713 {
1714 /* Find all records from join_tab that match records from join buffer */
1715 rc= join_matching_records(skip_last);
1716 if (rc != NESTED_LOOP_OK)
1717 goto finish;
1718 if (outer_join_first_inner)
1719 {
1720 if (next_cache)
1721 {
1722 /*
1723 Ensure that all matches for outer records from join buffer are to be
1724 found. Now we ensure that all full records are found for records from
1725 join buffer. Generally this is an overkill.
1726 TODO: Ensure that only matches of the inner table records have to be
1727 found for the records from join buffer.
1728 */
1729 rc= next_cache->join_records(skip_last);
1730 if (rc != NESTED_LOOP_OK)
1731 goto finish;
1732 }
1733 join_tab->not_null_compl= FALSE;
1734 /* Prepare for generation of null complementing extensions */
1735 for (JOIN_TAB *tab= join_tab->first_inner;
1736 tab <= join_tab->last_inner; tab++)
1737 tab->first_unmatched= join_tab->first_inner;
1738 }
1739 }
1740 if (join_tab->first_unmatched)
1741 {
1742 if (is_key_access())
1743 restore_last_record();
1744
1745 /*
1746 Generate all null complementing extensions for the records from
1747 join buffer that don't have any matching rows from the inner tables.
1748 */
1749 reset_cache(false);
1750 rc= join_null_complements(skip_last);
1751 if (rc != NESTED_LOOP_OK)
1752 goto finish;
1753 }
1754 if(next_cache)
1755 {
1756 /*
1757 When using linked caches we must ensure the records in the next caches
1758 that refer to the records in the join buffer are fully extended.
1759 Otherwise we could have references to the records that have been
1760 already erased from the join buffer and replaced for new records.
1761 */
1762 rc= next_cache->join_records(skip_last);
1763 if (rc != NESTED_LOOP_OK)
1764 goto finish;
1765 }
1766
1767 if (skip_last)
1768 {
1769 DBUG_ASSERT(!is_key_access());
1770 /*
1771 Restore the last record from the join buffer to generate
1772 all extensions for it.
1773 */
1774 get_record();
1775 }
1776
1777 finish:
1778 if (outer_join_first_inner)
1779 {
1780 /*
1781 All null complemented rows have been already generated for all
1782 outer records from join buffer. Restore the state of the
1783 first_unmatched values to 0 to avoid another null complementing.
1784 */
1785 for (JOIN_TAB *tab= join_tab->first_inner;
1786 tab <= join_tab->last_inner; tab++)
1787 tab->first_unmatched= NULL;
1788 }
1789 for (int cnt= 1; cnt <= static_cast<int>(tables); cnt++)
1790 {
1791 /*
1792 We must restore the status of outer tables as it was before entering
1793 this function.
1794 */
1795 TABLE * const table= join_tab[- cnt].table;
1796 const table_map map= table->map;
1797 uint8 status= 0;
1798 if (saved_status_bits[0] & map)
1799 status|= STATUS_GARBAGE;
1800 if (saved_status_bits[1] & map)
1801 status|= STATUS_NOT_FOUND;
1802 if (saved_status_bits[2] & map)
1803 status|= STATUS_NULL_ROW;
1804 table->status= status;
1805 }
1806 restore_last_record();
1807 reset_cache(true);
1808 DBUG_RETURN(rc);
1809 }
1810
1811
1812 /*
1813 Using BNL find matches from the next table for records from the join buffer
1814
1815 SYNOPSIS
1816 join_matching_records()
1817 skip_last do not look for matches for the last partial join record
1818
1819 DESCRIPTION
1820 The function retrieves all rows of the join_tab table and check whether
1821 they match partial join records from the join buffer. If a match is found
1822 the function will call the sub_select function trying to look for matches
1823 for the remaining join operations.
1824 This function currently is called only from the function join_records.
1825 If the value of skip_last is true the function writes the partial join
1826 record from the record buffer into the join buffer to save its value for
1827 the future processing in the caller function.
1828
1829 NOTES
1830 The function produces all matching extensions for the records in the
1831 join buffer following the path of the Blocked Nested Loops algorithm.
1832 When an outer join operation is performed all unmatched records from
1833 the join buffer must be extended by null values. The function
1834 'join_null_complements' serves this purpose.
1835
1836 RETURN
1837 return one of enum_nested_loop_state.
1838 */
1839
join_matching_records(bool skip_last)1840 enum_nested_loop_state JOIN_CACHE_BNL::join_matching_records(bool skip_last)
1841 {
1842 uint cnt;
1843 int error;
1844 READ_RECORD *info;
1845 enum_nested_loop_state rc= NESTED_LOOP_OK;
1846 SQL_SELECT *select= join_tab->cache_select;
1847
1848 join_tab->table->null_row= 0;
1849
1850 /* Return at once if there are no records in the join buffer */
1851 if (!records)
1852 return NESTED_LOOP_OK;
1853
1854 /*
1855 When joining we read records from the join buffer back into record buffers.
1856 If matches for the last partial join record are found through a call to
1857 the sub_select function then this partial join record must be saved in the
1858 join buffer in order to be restored just before the sub_select call.
1859 */
1860 if (skip_last)
1861 put_record_in_cache();
1862
1863 if (join_tab->use_quick == QS_DYNAMIC_RANGE && join_tab->select->quick)
1864 /* A dynamic range access was used last. Clean up after it */
1865 join_tab->select->set_quick(NULL);
1866
1867 /* Start retrieving all records of the joined table */
1868 if ((error= (*join_tab->read_first_record)(join_tab)))
1869 return error < 0 ? NESTED_LOOP_OK : NESTED_LOOP_ERROR;
1870
1871 info= &join_tab->read_record;
1872 do
1873 {
1874 if (join_tab->keep_current_rowid)
1875 join_tab->table->file->position(join_tab->table->record[0]);
1876
1877 if (join->thd->killed)
1878 {
1879 /* The user has aborted the execution of the query */
1880 join->thd->send_kill_message();
1881 return NESTED_LOOP_KILLED;
1882 }
1883
1884 /*
1885 Do not look for matches if the last read record of the joined table
1886 does not meet the conditions that have been pushed to this table
1887 */
1888 if (rc == NESTED_LOOP_OK)
1889 {
1890 bool skip_record;
1891 bool consider_record= (!select ||
1892 (!select->skip_record(join->thd, &skip_record) &&
1893 !skip_record));
1894 if (select && join->thd->is_error())
1895 return NESTED_LOOP_ERROR;
1896 if (consider_record)
1897 {
1898 /* Prepare to read records from the join buffer */
1899 reset_cache(false);
1900
1901 /* Read each record from the join buffer and look for matches */
1902 for (cnt= records - MY_TEST(skip_last) ; cnt; cnt--)
1903 {
1904 /*
1905 If only the first match is needed and it has been already found for
1906 the next record read from the join buffer then the record is skipped.
1907 */
1908 if (!check_only_first_match || !skip_record_if_match())
1909 {
1910 get_record();
1911 rc= generate_full_extensions(get_curr_rec());
1912 if (rc != NESTED_LOOP_OK)
1913 return rc;
1914 }
1915 }
1916 }
1917 }
1918 } while (!(error= info->read_record(info)));
1919
1920 if (error > 0) // Fatal error
1921 rc= NESTED_LOOP_ERROR;
1922 return rc;
1923 }
1924
1925
1926 /*
1927 Set match flag for a record in join buffer if it has not been set yet
1928
1929 SYNOPSIS
1930 set_match_flag_if_none()
1931 first_inner the join table to which this flag is attached to
1932 rec_ptr pointer to the record in the join buffer
1933
1934 DESCRIPTION
1935 If the records of the table are accumulated in a join buffer the function
1936 sets the match flag for the record in the buffer that is referred to by
1937 the record from this cache positioned at 'rec_ptr'.
1938 The function also sets the match flag 'found' of the table first inner
1939 if it has not been set before.
1940
1941 NOTES
1942 The function assumes that the match flag for any record in any cache
1943 is placed in the first byte occupied by the record fields.
1944
1945 RETURN
1946 TRUE the match flag is set by this call for the first time
1947 FALSE the match flag has been set before this call
1948 */
1949
set_match_flag_if_none(JOIN_TAB * first_inner,uchar * rec_ptr)1950 bool JOIN_CACHE::set_match_flag_if_none(JOIN_TAB *first_inner,
1951 uchar *rec_ptr)
1952 {
1953 if (!first_inner->op)
1954 {
1955 /*
1956 Records of the first inner table to which the flag is attached to
1957 are not accumulated in a join buffer.
1958 */
1959 if (first_inner->found)
1960 return FALSE;
1961 else
1962 {
1963 first_inner->found= 1;
1964 return TRUE;
1965 }
1966 }
1967 JOIN_CACHE *cache= this;
1968 while (cache->join_tab != first_inner)
1969 {
1970 cache= cache->prev_cache;
1971 DBUG_ASSERT(cache);
1972 rec_ptr= cache->get_rec_ref(rec_ptr);
1973 }
1974 if (rec_ptr[0] == 0)
1975 {
1976 rec_ptr[0]= 1;
1977 first_inner->found= 1;
1978 return TRUE;
1979 }
1980 return FALSE;
1981 }
1982
1983
1984 /*
1985 Generate all full extensions for a partial join record in the buffer
1986
1987 SYNOPSIS
1988 generate_full_extensions()
1989 rec_ptr pointer to the record from join buffer to generate extensions
1990
1991 DESCRIPTION
1992 The function first checks whether the current record of 'join_tab' matches
1993 the partial join record from join buffer located at 'rec_ptr'. If it is the
1994 case the function calls the join_tab->next_select method to generate
1995 all full extension for this partial join match.
1996
1997 RETURN
1998 return one of enum_nested_loop_state.
1999 */
2000
generate_full_extensions(uchar * rec_ptr)2001 enum_nested_loop_state JOIN_CACHE::generate_full_extensions(uchar *rec_ptr)
2002 {
2003 enum_nested_loop_state rc= NESTED_LOOP_OK;
2004
2005 /*
2006 Check whether the extended partial join record meets
2007 the pushdown conditions.
2008 */
2009 if (check_match(rec_ptr))
2010 {
2011 int res= 0;
2012 if (!join_tab->check_weed_out_table ||
2013 !(res= do_sj_dups_weedout(join->thd, join_tab->check_weed_out_table)))
2014 {
2015 set_curr_rec_link(rec_ptr);
2016 rc= (join_tab->next_select)(join, join_tab+1, 0);
2017 if (rc != NESTED_LOOP_OK)
2018 {
2019 reset_cache(true);
2020 return rc;
2021 }
2022 }
2023 if (res == -1)
2024 {
2025 rc= NESTED_LOOP_ERROR;
2026 return rc;
2027 }
2028 }
2029 return rc;
2030 }
2031
2032
2033 /*
2034 Check matching to a partial join record from the join buffer
2035
2036 SYNOPSIS
2037 check_match()
2038 rec_ptr pointer to the record from join buffer to check matching to
2039
2040 DESCRIPTION
2041 The function checks whether the current record of 'join_tab' matches
2042 the partial join record from join buffer located at 'rec_ptr'. If this is
2043 the case and 'join_tab' is the last inner table of a semi-join or an outer
2044 join the function turns on the match flag for the 'rec_ptr' record unless
2045 it has been already set.
2046
2047 NOTES
2048 Setting the match flag on can trigger re-evaluation of pushdown conditions
2049 for the record when join_tab is the last inner table of an outer join.
2050
2051 RETURN
2052 TRUE there is a match
2053 FALSE there is no match
2054 */
2055
check_match(uchar * rec_ptr)2056 bool JOIN_CACHE::check_match(uchar *rec_ptr)
2057 {
2058 bool skip_record;
2059 /* Check whether pushdown conditions are satisfied */
2060 if (join_tab->select &&
2061 (join_tab->select->skip_record(join->thd, &skip_record) || skip_record))
2062 return FALSE;
2063
2064 if (!((join_tab->first_inner &&
2065 join_tab->first_inner->last_inner == join_tab) ||
2066 (join_tab->last_sj_inner_tab == join_tab &&
2067 join_tab->get_sj_strategy() == SJ_OPT_FIRST_MATCH)))
2068 return TRUE; // not the last inner table
2069
2070 /*
2071 This is the last inner table of an outer join,
2072 and maybe of other embedding outer joins, or
2073 this is the last inner table of a semi-join.
2074 */
2075 JOIN_TAB *first_inner= join_tab->first_inner ?
2076 join_tab->first_inner :
2077 ((join_tab->get_sj_strategy() == SJ_OPT_FIRST_MATCH) ?
2078 join_tab->first_sj_inner_tab : NULL);
2079
2080 do
2081 {
2082 set_match_flag_if_none(first_inner, rec_ptr);
2083 if (calc_check_only_first_match(first_inner) &&
2084 !join_tab->first_inner)
2085 return TRUE;
2086 /*
2087 This is the first match for the outer table row.
2088 The function set_match_flag_if_none has turned the flag
2089 first_inner->found on. The pushdown predicates for
2090 inner tables must be re-evaluated with this flag on.
2091 Note that, if first_inner is the first inner table
2092 of a semi-join, but is not an inner table of an outer join
2093 such that 'not exists' optimization can be applied to it,
2094 the re-evaluation of the pushdown predicates is not needed.
2095 */
2096 for (JOIN_TAB *tab= first_inner; tab <= join_tab; tab++)
2097 {
2098 if (tab->select &&
2099 (tab->select->skip_record(join->thd, &skip_record) || skip_record))
2100 return FALSE;
2101 }
2102 }
2103 while ((first_inner= first_inner->first_upper) &&
2104 first_inner->last_inner == join_tab);
2105
2106 return TRUE;
2107 }
2108
2109
2110 /*
2111 Add null complements for unmatched outer records from join buffer
2112
2113 SYNOPSIS
2114 join_null_complements()
2115 skip_last do not add null complements for the last record
2116
2117 DESCRIPTION
2118 This function is called only for inner tables of outer joins.
2119 The function retrieves all rows from the join buffer and adds null
2120 complements for those of them that do not have matches for outer
2121 table records.
2122 If the 'join_tab' is the last inner table of the embedding outer
2123 join and the null complemented record satisfies the outer join
2124 condition then the the corresponding match flag is turned on
2125 unless it has been set earlier. This setting may trigger
2126 re-evaluation of pushdown conditions for the record.
2127
2128 NOTES
2129 The same implementation of the virtual method join_null_complements
2130 is used for JOIN_CACHE_BNL and JOIN_CACHE_BKA.
2131
2132 RETURN
2133 return one of enum_nested_loop_state.
2134 */
2135
join_null_complements(bool skip_last)2136 enum_nested_loop_state JOIN_CACHE::join_null_complements(bool skip_last)
2137 {
2138 uint cnt;
2139 enum_nested_loop_state rc= NESTED_LOOP_OK;
2140 bool is_first_inner= join_tab == join_tab->first_unmatched;
2141 DBUG_ENTER("JOIN_CACHE::join_null_complements");
2142
2143 /* Return at once if there are no records in the join buffer */
2144 if (!records)
2145 DBUG_RETURN(NESTED_LOOP_OK);
2146
2147 cnt= records - (is_key_access() ? 0 : MY_TEST(skip_last));
2148
2149 /* This function may be called only for inner tables of outer joins */
2150 DBUG_ASSERT(join_tab->first_inner);
2151
2152 // Make sure that the rowid buffer is bound, duplicates weedout needs it
2153 if (join_tab->copy_current_rowid &&
2154 !join_tab->copy_current_rowid->buffer_is_bound())
2155 join_tab->copy_current_rowid->bind_buffer(join_tab->table->file->ref);
2156
2157 for ( ; cnt; cnt--)
2158 {
2159 if (join->thd->killed)
2160 {
2161 /* The user has aborted the execution of the query */
2162 join->thd->send_kill_message();
2163 rc= NESTED_LOOP_KILLED;
2164 goto finish;
2165 }
2166 /* Just skip the whole record if a match for it has been already found */
2167 if (!is_first_inner || !skip_record_if_match())
2168 {
2169 get_record();
2170 /* The outer row is complemented by nulls for each inner table */
2171 restore_record(join_tab->table, s->default_values);
2172 mark_as_null_row(join_tab->table);
2173 rc= generate_full_extensions(get_curr_rec());
2174 if (rc != NESTED_LOOP_OK)
2175 goto finish;
2176 }
2177 }
2178
2179 finish:
2180 DBUG_RETURN(rc);
2181 }
2182
2183
2184 /*
2185 Initialize retrieval of range sequence for BKA algorithm
2186
2187 SYNOPSIS
2188 bka_range_seq_init()
2189 init_params pointer to the BKA join cache object
2190 n_ranges the number of ranges obtained
2191 flags combination of HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
2192
2193 DESCRIPTION
2194 The function interprets init_param as a pointer to a JOIN_CACHE_BKA
2195 object. The function prepares for an iteration over the join keys
2196 built for all records from the cache join buffer.
2197
2198 NOTE
2199 This function are used only as a callback function.
2200
2201 RETURN
2202 init_param value that is to be used as a parameter of bka_range_seq_next()
2203 */
2204
2205 static
bka_range_seq_init(void * init_param,uint n_ranges,uint flags)2206 range_seq_t bka_range_seq_init(void *init_param, uint n_ranges, uint flags)
2207 {
2208 DBUG_ENTER("bka_range_seq_init");
2209 JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) init_param;
2210 cache->reset_cache(false);
2211 DBUG_RETURN((range_seq_t) init_param);
2212 }
2213
2214
2215 /*
2216 Get the key over the next record from the join buffer used by BKA
2217
2218 SYNOPSIS
2219 bka_range_seq_next()
2220 seq the value returned by bka_range_seq_init
2221 range OUT reference to the next range
2222
2223 DESCRIPTION
2224 The function interprets seq as a pointer to a JOIN_CACHE_BKA
2225 object. The function returns a pointer to the range descriptor
2226 for the key built over the next record from the join buffer.
2227
2228 NOTE
2229 This function are used only as a callback function.
2230
2231 RETURN
2232 0 ok, the range structure filled with info about the next key
2233 1 no more ranges
2234 */
2235
2236 static
bka_range_seq_next(range_seq_t rseq,KEY_MULTI_RANGE * range)2237 uint bka_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
2238 {
2239 DBUG_ENTER("bka_range_seq_next");
2240 JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
2241 TABLE_REF *ref= &cache->join_tab->ref;
2242 key_range *start_key= &range->start_key;
2243 if ((start_key->length= cache->get_next_key((uchar **) &start_key->key)))
2244 {
2245 start_key->keypart_map= (1 << ref->key_parts) - 1;
2246 start_key->flag= HA_READ_KEY_EXACT;
2247 range->end_key= *start_key;
2248 range->end_key.flag= HA_READ_AFTER_KEY;
2249 range->ptr= (char *) cache->get_curr_rec();
2250 range->range_flag= EQ_RANGE;
2251 DBUG_RETURN(0);
2252 }
2253 DBUG_RETURN(1);
2254 }
2255
2256
2257 /*
2258 Check whether range_info orders to skip the next record from BKA buffer
2259
2260 SYNOPSIS
2261 bka_range_seq_skip_record()
2262 seq value returned by bka_range_seq_init()
2263 range_info information about the next range
2264 rowid [NOT USED] rowid of the record to be checked
2265
2266
2267 DESCRIPTION
2268 The function interprets seq as a pointer to a JOIN_CACHE_BKA object.
2269 The function interprets seq as a pointer to the JOIN_CACHE_BKA_UNIQUE
2270 object. The function returns TRUE if the record with this range_info
2271 is to be filtered out from the stream of records returned by
2272 multi_range_read_next().
2273
2274 NOTE
2275 This function are used only as a callback function.
2276
2277 RETURN
2278 1 record with this range_info is to be filtered out from the stream
2279 of records returned by multi_range_read_next()
2280 0 the record is to be left in the stream
2281 */
2282
2283 static
bka_range_seq_skip_record(range_seq_t rseq,char * range_info,uchar * rowid)2284 bool bka_range_seq_skip_record(range_seq_t rseq, char *range_info, uchar *rowid)
2285 {
2286 DBUG_ENTER("bka_range_seq_skip_record");
2287 JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
2288 bool res= cache->get_match_flag_by_pos((uchar *) range_info);
2289 DBUG_RETURN(res);
2290 }
2291
2292 /*
2293 Using BKA find matches from the next table for records from the join buffer
2294
2295 SYNOPSIS
2296 join_matching_records()
2297 skip_last do not look for matches for the last partial join record
2298
2299 DESCRIPTION
2300 This function can be used only when the table join_tab can be accessed
2301 by keys built over the fields of previous join tables.
2302 The function retrieves all partial join records from the join buffer and
2303 for each of them builds the key value to access join_tab, performs index
2304 look-up with this key and selects matching records yielded by this look-up
2305 If a match is found the function will call the sub_select function trying
2306 to look for matches for the remaining join operations.
2307 This function currently is called only from the function join_records.
2308 It's assumed that this function is always called with the skip_last
2309 parameter equal to false.
2310
2311 NOTES
2312 The function produces all matching extensions for the records in the
2313 join buffer following the path of the Batched Key Access algorithm.
2314 When an outer join operation is performed all unmatched records from
2315 the join buffer must be extended by null values. The function
2316 join_null_complements serves this purpose.
2317 The Batched Key Access algorithm assumes that key accesses are batched.
2318 In other words it assumes that, first, either keys themselves or the
2319 corresponding rowids (primary keys) are accumulated in a buffer, then
2320 data rows from join_tab are fetched for all of them. When a row is
2321 fetched it is always returned with a reference to the key by which it
2322 has been accessed.
2323 When key values are batched we can save on the number of the server
2324 requests for index lookups. For the remote engines, like NDB cluster, it
2325 essentially reduces the number of round trips between the server and
2326 the engine when performing a join operation.
2327 When the rowids for the keys are batched we can optimize the order
2328 in what we fetch the data for this rowids. The performance benefits of
2329 this optimization can be significant for such engines as MyISAM, InnoDB.
2330 What is exactly batched are hidden behind implementations of
2331 MRR handler interface that is supposed to be appropriately chosen
2332 for each engine. If for a engine no specific implementation of the MRR
2333 interface is supllied then the default implementation is used. This
2334 implementation actually follows the path of Nested Loops Join algorithm.
2335 In this case BKA join surely will demonstrate a worse performance than
2336 NL join.
2337
2338 RETURN
2339 return one of enum_nested_loop_state
2340 */
2341
join_matching_records(bool skip_last)2342 enum_nested_loop_state JOIN_CACHE_BKA::join_matching_records(bool skip_last)
2343 {
2344 /* The value of skip_last must be always FALSE when this function is called */
2345 DBUG_ASSERT(!skip_last);
2346
2347 /* Return at once if there are no records in the join buffer */
2348 if (!records)
2349 return NESTED_LOOP_OK;
2350
2351 /* Set functions to iterate over keys in the join buffer */
2352 RANGE_SEQ_IF seq_funcs= { bka_range_seq_init,
2353 bka_range_seq_next,
2354 check_only_first_match ?
2355 bka_range_seq_skip_record : 0,
2356 join_tab->cache_idx_cond ?
2357 bka_skip_index_tuple : 0 };
2358
2359 if (init_join_matching_records(&seq_funcs, records))
2360 return NESTED_LOOP_ERROR;
2361
2362 int error;
2363 handler *file= join_tab->table->file;
2364 enum_nested_loop_state rc= NESTED_LOOP_OK;
2365 uchar *rec_ptr= NULL;
2366
2367 while (!(error= file->multi_range_read_next((char **) &rec_ptr)))
2368 {
2369 if (join->thd->killed)
2370 {
2371 /* The user has aborted the execution of the query */
2372 join->thd->send_kill_message();
2373 return NESTED_LOOP_KILLED;
2374 }
2375 if (join_tab->keep_current_rowid)
2376 join_tab->table->file->position(join_tab->table->record[0]);
2377 /*
2378 If only the first match is needed and it has been already found
2379 for the associated partial join record then the returned candidate
2380 is discarded.
2381 */
2382 if (rc == NESTED_LOOP_OK &&
2383 (!check_only_first_match || !get_match_flag_by_pos(rec_ptr)))
2384 {
2385 get_record_by_pos(rec_ptr);
2386 rc= generate_full_extensions(rec_ptr);
2387 if (rc != NESTED_LOOP_OK)
2388 return rc;
2389 }
2390 }
2391
2392 if (error > 0 && error != HA_ERR_END_OF_FILE)
2393 return NESTED_LOOP_ERROR;
2394 return rc;
2395 }
2396
2397
2398
2399 /*
2400 Prepare to search for records that match records from the join buffer
2401
2402 SYNOPSIS
2403 init_join_matching_records()
2404 seq_funcs structure of range sequence interface
2405 ranges number of keys/ranges in the sequence
2406
2407 DESCRIPTION
2408 This function calls the multi_range_read_init function to set up
2409 the BKA process of generating the keys from the records in the join
2410 buffer and looking for matching records from the table to be joined.
2411 The function passes as a parameter a structure of functions that
2412 implement the range sequence interface. This interface is used to
2413 enumerate all generated keys and optionally to filter the matching
2414 records returned by the multi_range_read_next calls from the
2415 intended invocation of the join_matching_records method. The
2416 multi_range_read_init function also receives the parameters for
2417 MRR buffer to be used and flags specifying the mode in which
2418 this buffer will be functioning.
2419 The number of keys in the sequence expected by multi_range_read_init
2420 is passed through the parameter ranges.
2421
2422 RETURN
2423 False if ok, True otherwise.
2424 */
2425
2426 bool
init_join_matching_records(RANGE_SEQ_IF * seq_funcs,uint ranges)2427 JOIN_CACHE_BKA::init_join_matching_records(RANGE_SEQ_IF *seq_funcs, uint ranges)
2428 {
2429 handler *file= join_tab->table->file;
2430
2431 join_tab->table->null_row= 0;
2432
2433 /* Dynamic range access is never used with BKA */
2434 DBUG_ASSERT(join_tab->use_quick != QS_DYNAMIC_RANGE);
2435
2436 init_mrr_buff();
2437
2438 /*
2439 Prepare to iterate over keys from the join buffer and to get
2440 matching candidates obtained with MMR handler functions.
2441 */
2442 if (!file->inited)
2443 {
2444 const int error= file->ha_index_init(join_tab->ref.key, 1);
2445 if (error)
2446 {
2447 file->print_error(error, MYF(0));
2448 return error;
2449 }
2450 }
2451 return
2452 file->multi_range_read_init(seq_funcs, (void*) this, ranges,
2453 mrr_mode, &mrr_buff);
2454 }
2455
2456
2457 /**
2458 Reads all flag fields of a positioned record from the join buffer.
2459 Including all flag fields (of this record) stored in the previous join
2460 buffers.
2461
2462 @param rec_ptr position of the first field of the record in the join buffer
2463 */
read_all_flag_fields_by_pos(uchar * rec_ptr)2464 void JOIN_CACHE::read_all_flag_fields_by_pos(uchar *rec_ptr)
2465 {
2466 uchar * const save_pos= pos;
2467 pos= rec_ptr;
2468 read_some_flag_fields(); // moves 'pos'...
2469 pos= save_pos; // ... so we restore it.
2470 if (prev_cache)
2471 {
2472 // position of this record in previous join buffer:
2473 rec_ptr= prev_cache->get_rec_ref(rec_ptr);
2474 // recurse into previous buffer to read missing flag fields
2475 prev_cache->read_all_flag_fields_by_pos(rec_ptr);
2476 }
2477 }
2478
2479
2480 /*
2481 Get the key built over the next record from BKA join buffer
2482
2483 SYNOPSIS
2484 get_next_key()
2485 key pointer to the buffer where the key value is to be placed
2486
2487 DESCRIPTION
2488 The function reads key fields from the current record in the join buffer.
2489 and builds the key value out of these fields that will be used to access
2490 the 'join_tab' table. Some of key fields may belong to previous caches.
2491 They are accessed via record references to the record parts stored in the
2492 previous join buffers. The other key fields always are placed right after
2493 the flag fields of the record.
2494 If the key is embedded, which means that its value can be read directly
2495 from the join buffer, then *key is set to the beginning of the key in
2496 this buffer. Otherwise the key is built in the join_tab->ref->key_buff.
2497 The function returns the length of the key if it succeeds ro read it.
2498 If is assumed that the functions starts reading at the position of
2499 the record length which is provided for each records in a BKA cache.
2500 After the key is built the 'pos' value points to the first position after
2501 the current record.
2502 The function returns 0 if the initial position is after the beginning
2503 of the record fields for last record from the join buffer.
2504
2505 RETURN
2506 length of the key value - if the starting value of 'pos' points to
2507 the position before the fields for the last record,
2508 0 - otherwise.
2509 */
2510
get_next_key(uchar ** key)2511 uint JOIN_CACHE_BKA::get_next_key(uchar ** key)
2512 {
2513 uint len;
2514 uint32 rec_len;
2515 uchar *init_pos;
2516 JOIN_CACHE *cache;
2517
2518 if (records == 0)
2519 return 0;
2520
2521 /* Any record in a BKA cache is prepended with its length, which we need */
2522 DBUG_ASSERT(with_length);
2523
2524 /*
2525 Read keys until find non-ignorable one or EOF.
2526 Unlike in JOIN_CACHE::read_some_record_fields()), pos>=last_rec_pos means
2527 EOF, because we are not at fields' start, and previous record's fields
2528 might be empty.
2529 */
2530 for(len= 0 ; (len == 0) && pos < last_rec_pos ; pos= init_pos + rec_len)
2531 {
2532 /* Read the length of the record */
2533 rec_len= get_rec_length(pos);
2534 pos+= size_of_rec_len;
2535 init_pos= pos;
2536
2537 /* Read a reference to the previous cache if any */
2538 uchar *prev_rec_ptr= NULL;
2539 if (prev_cache)
2540 {
2541 pos+= prev_cache->get_size_of_rec_offset();
2542 // position of this record in previous buffer:
2543 prev_rec_ptr= prev_cache->get_rec_ref(pos);
2544 }
2545
2546 curr_rec_pos= pos;
2547
2548 // Read all flag fields of the record, in two steps:
2549 read_some_flag_fields(); // 1) flag fields stored in this buffer
2550 if (prev_cache) // 2) flag fields stored in previous buffers
2551 prev_cache->read_all_flag_fields_by_pos(prev_rec_ptr);
2552
2553 if (use_emb_key)
2554 {
2555 /* An embedded key is taken directly from the join buffer */
2556 *key= pos;
2557 len= emb_key_length;
2558 DBUG_ASSERT(len != 0);
2559 }
2560 else
2561 {
2562 /*
2563 Read key arguments from previous caches if there are any such
2564 fields
2565 */
2566 if (external_key_arg_fields)
2567 {
2568 uchar *rec_ptr= curr_rec_pos;
2569 uint key_arg_count= external_key_arg_fields;
2570 CACHE_FIELD **copy_ptr= blob_ptr-key_arg_count;
2571 for (cache= prev_cache; key_arg_count; cache= cache->prev_cache)
2572 {
2573 uint len2= 0;
2574 DBUG_ASSERT(cache);
2575 rec_ptr= cache->get_rec_ref(rec_ptr);
2576 while (!cache->referenced_fields)
2577 {
2578 cache= cache->prev_cache;
2579 DBUG_ASSERT(cache);
2580 rec_ptr= cache->get_rec_ref(rec_ptr);
2581 }
2582 while (key_arg_count &&
2583 cache->read_referenced_field(*copy_ptr, rec_ptr, &len2))
2584 {
2585 copy_ptr++;
2586 --key_arg_count;
2587 }
2588 }
2589 }
2590
2591 /*
2592 Read the other key arguments from the current record. The fields for
2593 these arguments are always first in the sequence of the record's
2594 fields.
2595 */
2596 CACHE_FIELD *copy= field_descr+flag_fields;
2597 CACHE_FIELD *copy_end= copy+local_key_arg_fields;
2598 bool blob_in_rec_buff= blob_data_is_in_rec_buff(curr_rec_pos);
2599 for ( ; copy < copy_end; copy++)
2600 read_record_field(copy, blob_in_rec_buff);
2601
2602 TABLE_REF *ref= &join_tab->ref;
2603 if (ref->impossible_null_ref())
2604 {
2605 DBUG_PRINT("info", ("JOIN_CACHE_BKA::get_next_key null_rejected"));
2606 /* this key cannot give a match, don't collect it, go read next key */
2607 len= 0;
2608 }
2609 else
2610 {
2611 /* Build the key over the fields read into the record buffers */
2612 cp_buffer_from_ref(join->thd, join_tab->table, ref);
2613 *key= ref->key_buff;
2614 len= ref->key_length;
2615 DBUG_ASSERT(len != 0);
2616 }
2617 }
2618 }
2619 return len;
2620 }
2621
2622
2623 /*
2624 Initialize a BKA_UNIQUE cache
2625
2626 SYNOPSIS
2627 init()
2628
2629 DESCRIPTION
2630 The function initializes the cache structure. It supposed to be called
2631 right after a constructor for the JOIN_CACHE_BKA_UNIQUE.
2632 The function allocates memory for the join buffer and for descriptors of
2633 the record fields stored in the buffer.
2634 The function also estimates the number of hash table entries in the hash
2635 table to be used and initializes this hash table.
2636
2637 NOTES
2638 The code of this function should have been included into the constructor
2639 code itself. However the new operator for the class JOIN_CACHE_BKA_UNIQUE
2640 would never fail while memory allocation for the join buffer is not
2641 absolutely unlikely to fail. That's why this memory allocation has to be
2642 placed in a separate function that is called in a couple with a cache
2643 constructor.
2644 It is quite natural to put almost all other constructor actions into
2645 this function.
2646
2647 RETURN
2648 0 initialization with buffer allocations has been succeeded
2649 1 otherwise
2650 */
2651
init()2652 int JOIN_CACHE_BKA_UNIQUE::init()
2653 {
2654 int rc= 0;
2655 TABLE_REF *ref= &join_tab->ref;
2656
2657 DBUG_ENTER("JOIN_CACHE_BKA_UNIQUE::init");
2658
2659 hash_table= 0;
2660 key_entries= 0;
2661
2662 if ((rc= JOIN_CACHE_BKA::init()))
2663 DBUG_RETURN (rc);
2664
2665 key_length= ref->key_length;
2666
2667 /* Take into account a reference to the next record in the key chain */
2668 pack_length+= get_size_of_rec_offset();
2669
2670 /* Calculate the minimal possible value of size_of_key_ofs greater than 1 */
2671 uint max_size_of_key_ofs= max(2U, get_size_of_rec_offset());
2672 for (size_of_key_ofs= 2;
2673 size_of_key_ofs <= max_size_of_key_ofs;
2674 size_of_key_ofs+= 2)
2675 {
2676 key_entry_length= get_size_of_rec_offset() + // key chain header
2677 size_of_key_ofs + // reference to the next key
2678 (use_emb_key ? get_size_of_rec_offset() : key_length);
2679
2680 uint n= buff_size / (pack_length+key_entry_length+size_of_key_ofs);
2681
2682 /*
2683 TODO: Make a better estimate for this upper bound of
2684 the number of records in in the join buffer.
2685 */
2686 uint max_n= buff_size / (pack_length-length+
2687 key_entry_length+size_of_key_ofs);
2688
2689 hash_entries= (uint) (n / 0.7);
2690
2691 if (offset_size(max_n*key_entry_length) <=
2692 size_of_key_ofs)
2693 break;
2694 }
2695
2696 /* Initialize the hash table */
2697 hash_table= buff + (buff_size-hash_entries*size_of_key_ofs);
2698 cleanup_hash_table();
2699 curr_key_entry= hash_table;
2700
2701 pack_length+= key_entry_length;
2702 pack_length_with_blob_ptrs+= get_size_of_rec_offset() + key_entry_length;
2703
2704 rec_fields_offset= get_size_of_rec_offset()+get_size_of_rec_length()+
2705 (prev_cache ? prev_cache->get_size_of_rec_offset() : 0);
2706
2707 data_fields_offset= 0;
2708 if (use_emb_key)
2709 {
2710 CACHE_FIELD *copy= field_descr;
2711 CACHE_FIELD *copy_end= copy+flag_fields;
2712 for ( ; copy < copy_end; copy++)
2713 data_fields_offset+= copy->length;
2714 }
2715
2716 DBUG_RETURN(rc);
2717 }
2718
2719
2720 /*
2721 Reset the JOIN_CACHE_BKA_UNIQUE buffer for reading/writing
2722
2723 SYNOPSIS
2724 reset_cache()
2725 for_writing if it's TRUE the function reset the buffer for writing
2726
2727 DESCRIPTION
2728 This implementation of the virtual function reset_cache() resets the join
2729 buffer of the JOIN_CACHE_BKA_UNIQUE class for reading or writing.
2730 Additionally to what the default implementation does this function
2731 cleans up the hash table allocated within the buffer.
2732
2733 RETURN
2734 none
2735 */
2736
reset_cache(bool for_writing)2737 void JOIN_CACHE_BKA_UNIQUE::reset_cache(bool for_writing)
2738 {
2739 this->JOIN_CACHE::reset_cache(for_writing);
2740 if (for_writing && hash_table)
2741 cleanup_hash_table();
2742 curr_key_entry= hash_table;
2743 }
2744
2745 /*
2746 Add a record into the JOIN_CACHE_BKA_UNIQUE buffer
2747
2748 SYNOPSIS
2749 put_record()
2750
2751 DESCRIPTION
2752 This implementation of the virtual function put_record writes the next
2753 matching record into the join buffer of the JOIN_CACHE_BKA_UNIQUE class.
2754 Additionally to what the default implementation does this function
2755 performs the following.
2756 It extracts from the record the key value used in lookups for matching
2757 records and searches for this key in the hash tables from the join cache.
2758 If it finds the key in the hash table it joins the record to the chain
2759 of records with this key. If the key is not found in the hash table the
2760 key is placed into it and a chain containing only the newly added record
2761 is attached to the key entry. The key value is either placed in the hash
2762 element added for the key or, if the use_emb_key flag is set, remains in
2763 the record from the partial join.
2764
2765 RETURN
2766 TRUE if it has been decided that it should be the last record
2767 in the join buffer,
2768 FALSE otherwise
2769 */
2770
2771 bool
put_record_in_cache()2772 JOIN_CACHE_BKA_UNIQUE::put_record_in_cache()
2773 {
2774 uchar *key;
2775 uint key_len= key_length;
2776 uchar *key_ref_ptr;
2777 TABLE_REF *ref= &join_tab->ref;
2778 uchar *next_ref_ptr= pos;
2779 pos+= get_size_of_rec_offset();
2780
2781 // Write record to join buffer
2782 bool is_full= JOIN_CACHE::put_record_in_cache();
2783
2784 if (use_emb_key)
2785 {
2786 key= get_curr_emb_key();
2787 // Embedded is not used if one of the key columns is nullable
2788 }
2789 else
2790 {
2791 /* Build the key over the fields read into the record buffers */
2792 cp_buffer_from_ref(join->thd, join_tab->table, ref);
2793 key= ref->key_buff;
2794 if (ref->impossible_null_ref())
2795 {
2796 /*
2797 The row just put into the buffer has a NULL-value for one of
2798 the ref-columns and the ref access is NULL-rejecting, this key cannot
2799 give a match. So we don't insert it into the hash table.
2800 We still stored the record into the buffer (put_record() call above),
2801 or we would later miss NULL-complementing of this record.
2802 */
2803 DBUG_PRINT("info", ("JOIN_CACHE_BKA_UNIQUE::put_record null_rejected"));
2804 return is_full;
2805 }
2806 }
2807
2808 /* Look for the key in the hash table */
2809 if (key_search(key, key_len, &key_ref_ptr))
2810 {
2811 uchar *last_next_ref_ptr;
2812 /*
2813 The key is found in the hash table.
2814 Add the record to the circular list of the records attached to this key.
2815 Below 'rec' is the record to be added into the record chain for the found
2816 key, 'key_ref' points to a flatten representation of the st_key_entry
2817 structure that contains the key and the head of the record chain.
2818 */
2819 last_next_ref_ptr= get_next_rec_ref(key_ref_ptr+get_size_of_key_offset());
2820 /* rec->next_rec= key_entry->last_rec->next_rec */
2821 memcpy(next_ref_ptr, last_next_ref_ptr, get_size_of_rec_offset());
2822 /* key_entry->last_rec->next_rec= rec */
2823 store_next_rec_ref(last_next_ref_ptr, next_ref_ptr);
2824 /* key_entry->last_rec= rec */
2825 store_next_rec_ref(key_ref_ptr+get_size_of_key_offset(), next_ref_ptr);
2826 }
2827 else
2828 {
2829 /*
2830 The key is not found in the hash table.
2831 Put the key into the join buffer linking it with the keys for the
2832 corresponding hash entry. Create a circular list with one element
2833 referencing the record and attach the list to the key in the buffer.
2834 */
2835 uchar *cp= last_key_entry;
2836 cp-= get_size_of_rec_offset()+get_size_of_key_offset();
2837 store_next_key_ref(key_ref_ptr, cp);
2838 store_null_key_ref(cp);
2839 store_next_rec_ref(next_ref_ptr, next_ref_ptr);
2840 store_next_rec_ref(cp+get_size_of_key_offset(), next_ref_ptr);
2841 if (use_emb_key)
2842 {
2843 cp-= get_size_of_rec_offset();
2844 store_emb_key_ref(cp, key);
2845 }
2846 else
2847 {
2848 cp-= key_len;
2849 memcpy(cp, key, key_len);
2850 }
2851 last_key_entry= cp;
2852 /* Increment the counter of key_entries in the hash table */
2853 key_entries++;
2854 }
2855 return is_full;
2856 }
2857
2858
2859 /*
2860 Read the next record from the JOIN_CACHE_BKA_UNIQUE buffer
2861
2862 SYNOPSIS
2863 get_record()
2864
2865 DESCRIPTION
2866 Additionally to what the default implementation of the virtual
2867 function get_record does this implementation skips the link element
2868 used to connect the records with the same key into a chain.
2869
2870 RETURN
2871 TRUE - there are no more records to read from the join buffer
2872 FALSE - otherwise
2873 */
2874
get_record()2875 bool JOIN_CACHE_BKA_UNIQUE::get_record()
2876 {
2877 pos+= get_size_of_rec_offset();
2878 return this->JOIN_CACHE::get_record();
2879 }
2880
2881
2882 /*
2883 Skip record from the JOIN_CACHE_BKA_UNIQUE join buffer if its match flag is on
2884
2885 SYNOPSIS
2886 skip_record_if_match()
2887
2888 DESCRIPTION
2889 This implementation of the virtual function skip_record_if_match does
2890 the same as the default implementation does, but it takes into account
2891 the link element used to connect the records with the same key into a chain.
2892
2893 RETURN
2894 TRUE - the match flag is on and the record has been skipped
2895 FALSE - the match flag is off
2896 */
2897
skip_record_if_match()2898 bool JOIN_CACHE_BKA_UNIQUE::skip_record_if_match()
2899 {
2900 uchar *save_pos= pos;
2901 pos+= get_size_of_rec_offset();
2902 if (!this->JOIN_CACHE::skip_record_if_match())
2903 {
2904 pos= save_pos;
2905 return FALSE;
2906 }
2907 return TRUE;
2908 }
2909
2910
2911 /*
2912 Search for a key in the hash table of the join buffer
2913
2914 SYNOPSIS
2915 key_search()
2916 key pointer to the key value
2917 key_len key value length
2918 key_ref_ptr OUT position of the reference to the next key from
2919 the hash element for the found key , or
2920 a position where the reference to the the hash
2921 element for the key is to be added in the
2922 case when the key has not been found
2923
2924 DESCRIPTION
2925 The function looks for a key in the hash table of the join buffer.
2926 If the key is found the functionreturns the position of the reference
2927 to the next key from to the hash element for the given key.
2928 Otherwise the function returns the position where the reference to the
2929 newly created hash element for the given key is to be added.
2930
2931 RETURN
2932 TRUE - the key is found in the hash table
2933 FALSE - otherwise
2934 */
2935
key_search(uchar * key,uint key_len,uchar ** key_ref_ptr)2936 bool JOIN_CACHE_BKA_UNIQUE::key_search(uchar *key, uint key_len,
2937 uchar **key_ref_ptr)
2938 {
2939 bool is_found= FALSE;
2940 uint idx= get_hash_idx(key, key_length);
2941 uchar *ref_ptr= hash_table+size_of_key_ofs*idx;
2942 while (!is_null_key_ref(ref_ptr))
2943 {
2944 uchar *next_key;
2945 ref_ptr= get_next_key_ref(ref_ptr);
2946 next_key= use_emb_key ? get_emb_key(ref_ptr-get_size_of_rec_offset()) :
2947 ref_ptr-key_length;
2948
2949 if (memcmp(next_key, key, key_len) == 0)
2950 {
2951 is_found= TRUE;
2952 break;
2953 }
2954 }
2955 *key_ref_ptr= ref_ptr;
2956 return is_found;
2957 }
2958
2959
2960 /*
2961 Calclulate hash value for a key in the hash table of the join buffer
2962
2963 SYNOPSIS
2964 get_hash_idx()
2965 key pointer to the key value
2966 key_len key value length
2967
2968 DESCRIPTION
2969 The function calculates an index of the hash entry in the hash table
2970 of the join buffer for the given key
2971
2972 RETURN
2973 the calculated index of the hash entry for the given key.
2974 */
2975
get_hash_idx(uchar * key,uint key_len)2976 uint JOIN_CACHE_BKA_UNIQUE::get_hash_idx(uchar* key, uint key_len)
2977 {
2978 ulong nr= 1;
2979 ulong nr2= 4;
2980 uchar *pos= key;
2981 uchar *end= key+key_len;
2982 for (; pos < end ; pos++)
2983 {
2984 nr^= (ulong) ((((uint) nr & 63)+nr2)*((uint) *pos))+ (nr << 8);
2985 nr2+= 3;
2986 }
2987 return nr % hash_entries;
2988 }
2989
2990
2991 /*
2992 Clean up the hash table of the join buffer
2993
2994 SYNOPSIS
2995 cleanup_hash_table()
2996 key pointer to the key value
2997 key_len key value length
2998
2999 DESCRIPTION
3000 The function cleans up the hash table in the join buffer removing all
3001 hash elements from the table.
3002
3003 RETURN
3004 none
3005 */
3006
cleanup_hash_table()3007 void JOIN_CACHE_BKA_UNIQUE:: cleanup_hash_table()
3008 {
3009 last_key_entry= hash_table;
3010 memset(hash_table, 0, (buff+buff_size)-hash_table);
3011 key_entries= 0;
3012 }
3013
3014
3015 /*
3016 Initialize retrieval of range sequence for BKA_UNIQUE algorithm
3017
3018 SYNOPSIS
3019 bka_range_seq_init()
3020 init_params pointer to the BKA_INIQUE join cache object
3021 n_ranges the number of ranges obtained
3022 flags combination of HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY
3023
3024 DESCRIPTION
3025 The function interprets init_param as a pointer to a JOIN_CACHE_BKA_UNIQUE
3026 object. The function prepares for an iteration over the unique join keys
3027 built over the records from the cache join buffer.
3028
3029 NOTE
3030 This function are used only as a callback function.
3031
3032 RETURN
3033 init_param value that is to be used as a parameter of
3034 bka_unique_range_seq_next()
3035 */
3036
3037 static
bka_unique_range_seq_init(void * init_param,uint n_ranges,uint flags)3038 range_seq_t bka_unique_range_seq_init(void *init_param, uint n_ranges,
3039 uint flags)
3040 {
3041 DBUG_ENTER("bka_unique_range_seq_init");
3042 JOIN_CACHE_BKA_UNIQUE *cache= (JOIN_CACHE_BKA_UNIQUE *) init_param;
3043 cache->reset_cache(false);
3044 DBUG_RETURN((range_seq_t) init_param);
3045 }
3046
3047
3048 /*
3049 Get the key over the next record from the join buffer used by BKA_UNIQUE
3050
3051 SYNOPSIS
3052 bka_unique_range_seq_next()
3053 seq value returned by bka_unique_range_seq_init()
3054 range OUT reference to the next range
3055
3056 DESCRIPTION
3057 The function interprets seq as a pointer to the JOIN_CACHE_BKA_UNIQUE
3058 object. The function returns a pointer to the range descriptor
3059 for the next unique key built over records from the join buffer.
3060
3061 NOTE
3062 This function are used only as a callback function.
3063
3064 RETURN
3065 0 ok, the range structure filled with info about the next key
3066 1 no more ranges
3067 */
3068
3069 static
bka_unique_range_seq_next(range_seq_t rseq,KEY_MULTI_RANGE * range)3070 uint bka_unique_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
3071 {
3072 DBUG_ENTER("bka_unique_range_seq_next");
3073 JOIN_CACHE_BKA_UNIQUE *cache= (JOIN_CACHE_BKA_UNIQUE *) rseq;
3074 TABLE_REF *ref= &cache->join_tab->ref;
3075 key_range *start_key= &range->start_key;
3076 if ((start_key->length= cache->get_next_key((uchar **) &start_key->key)))
3077 {
3078 start_key->keypart_map= (1 << ref->key_parts) - 1;
3079 start_key->flag= HA_READ_KEY_EXACT;
3080 range->end_key= *start_key;
3081 range->end_key.flag= HA_READ_AFTER_KEY;
3082 range->ptr= (char *) cache->get_curr_key_chain();
3083 range->range_flag= EQ_RANGE;
3084 DBUG_RETURN(0);
3085 }
3086 DBUG_RETURN(1);
3087 }
3088
3089
3090 /*
3091 Check whether range_info orders to skip the next record from BKA_UNIQUE buffer
3092
3093 SYNOPSIS
3094 bka_unique_range_seq_skip_record()
3095 seq value returned by bka_unique_range_seq_init()
3096 range_info information about the next range
3097 rowid [NOT USED] rowid of the record to be checked (not used)
3098
3099 DESCRIPTION
3100 The function interprets seq as a pointer to the JOIN_CACHE_BKA_UNIQUE
3101 object. The function returns TRUE if the record with this range_info
3102 is to be filtered out from the stream of records returned by
3103 multi_range_read_next().
3104
3105 NOTE
3106 This function are used only as a callback function.
3107
3108 RETURN
3109 1 record with this range_info is to be filtered out from the stream
3110 of records returned by multi_range_read_next()
3111 0 the record is to be left in the stream
3112 */
3113
3114 static
bka_unique_range_seq_skip_record(range_seq_t rseq,char * range_info,uchar * rowid)3115 bool bka_unique_range_seq_skip_record(range_seq_t rseq, char *range_info,
3116 uchar *rowid)
3117 {
3118 DBUG_ENTER("bka_unique_range_seq_skip_record");
3119 JOIN_CACHE_BKA_UNIQUE *cache= (JOIN_CACHE_BKA_UNIQUE *) rseq;
3120 bool res= cache->check_all_match_flags_for_key((uchar *) range_info);
3121 DBUG_RETURN(res);
3122 }
3123
3124
3125 /**
3126 Check if the record combination matches the index condition
3127
3128 @param rseq Value returned by bka_range_seq_init()
3129 @param range_info MRR range association data
3130
3131 @sa JOIN_CACHE_BKA::skip_index_tuple().
3132 This function is the variant for use with
3133 JOIN_CACHE_BKA_UNIQUE. The difference from JOIN_CACHE_BKA case is that
3134 there may be multiple previous table record combinations that share the
3135 same key, i.e. they map to the same MRR range. And for all of those
3136 records, we have just done one single key lookup in the current table,
3137 found an index tuple. If in this function we discard this index tuple, all
3138 those records will be eliminated from the result. Thus, in this function
3139 we can discard the index tuple only if _all_ those cached records and the
3140 index tuple don't match the pushed index condition. It's a "group-wide
3141 decision".
3142 Thus we must here loop through all previous table records combinations
3143 that match the given MRR range key range_info, searching for a single one
3144 matching the index condition.
3145 If we find none, we can safely discard the index tuple here, which avoids
3146 retrieving the record from the current table.
3147 If we instead find one, we cannot discard the index tuple here; later in
3148 execution, in join_matching_records(), we can finally take one
3149 "case-by-case decision" per cached record, by checking again the index
3150 condition (@sa JOIN_CACHE_BKA_UNIQUE::check_match).
3151
3152 @note
3153 Possible optimization:
3154 Before we unpack the record from a previous table
3155 check if this table is used in the condition.
3156 If so then unpack the record otherwise skip the unpacking.
3157 This should be done by a special virtual method
3158 get_partial_record_by_pos().
3159
3160 @retval false The record combination satisfies the index condition
3161 @retval true Otherwise
3162
3163
3164 */
3165
skip_index_tuple(range_seq_t rseq,char * range_info)3166 bool JOIN_CACHE_BKA_UNIQUE::skip_index_tuple(range_seq_t rseq, char *range_info)
3167 {
3168 DBUG_ENTER("JOIN_CACHE_BKA_UNIQUE::skip_index_tuple");
3169 JOIN_CACHE_BKA_UNIQUE *cache= (JOIN_CACHE_BKA_UNIQUE *) rseq;
3170 uchar *last_rec_ref_ptr= cache->get_next_rec_ref((uchar*) range_info);
3171 uchar *next_rec_ref_ptr= last_rec_ref_ptr;
3172 do
3173 {
3174 next_rec_ref_ptr= cache->get_next_rec_ref(next_rec_ref_ptr);
3175 uchar *rec_ptr= next_rec_ref_ptr + cache->rec_fields_offset;
3176 cache->get_record_by_pos(rec_ptr);
3177 if (join_tab->cache_idx_cond->val_int())
3178 DBUG_RETURN(FALSE);
3179 } while(next_rec_ref_ptr != last_rec_ref_ptr);
3180 DBUG_RETURN(TRUE);
3181 }
3182
3183
3184 /*
3185 Check if the record combination matches the index condition
3186
3187 SYNOPSIS
3188 bka_unique_skip_index_tuple()
3189 rseq Value returned by bka_range_seq_init()
3190 range_info MRR range association data
3191
3192 DESCRIPTION
3193 This is wrapper for JOIN_CACHE_BKA_UNIQUE::skip_index_tuple method,
3194 see comments there.
3195
3196 NOTE
3197 This function is used as a RANGE_SEQ_IF::skip_index_tuple callback.
3198
3199 RETURN
3200 0 The record combination satisfies the index condition
3201 1 Otherwise
3202 */
3203
3204 static
bka_unique_skip_index_tuple(range_seq_t rseq,char * range_info)3205 bool bka_unique_skip_index_tuple(range_seq_t rseq, char *range_info)
3206 {
3207 DBUG_ENTER("bka_unique_skip_index_tuple");
3208 JOIN_CACHE_BKA_UNIQUE *cache= (JOIN_CACHE_BKA_UNIQUE *) rseq;
3209 DBUG_RETURN(cache->skip_index_tuple(rseq, range_info));
3210 }
3211
3212
3213 /*
3214 Using BKA_UNIQUE find matches from the next table for records from join buffer
3215
3216 SYNOPSIS
3217 join_matching_records()
3218 skip_last do not look for matches for the last partial join record
3219
3220 DESCRIPTION
3221 This function can be used only when the table join_tab can be accessed
3222 by keys built over the fields of previous join tables.
3223 The function retrieves all keys from the hash table of the join buffer
3224 built for partial join records from the buffer. For each of these keys
3225 the function performs an index lookup and tries to match records yielded
3226 by this lookup with records from the join buffer attached to the key.
3227 If a match is found the function will call the sub_select function trying
3228 to look for matches for the remaining join operations.
3229 This function does not assume that matching records are necessarily
3230 returned with references to the keys by which they were found. If the call
3231 of the function multi_range_read_init returns flags with
3232 HA_MRR_NO_ASSOCIATION then a search for the key built from the returned
3233 record is carried on. The search is performed by probing in in the hash
3234 table of the join buffer.
3235 This function currently is called only from the function join_records.
3236 It's assumed that this function is always called with the skip_last
3237 parameter equal to false.
3238
3239 RETURN
3240 return one of enum_nested_loop_state
3241 */
3242
3243 enum_nested_loop_state
join_matching_records(bool skip_last)3244 JOIN_CACHE_BKA_UNIQUE::join_matching_records(bool skip_last)
3245 {
3246 /* The value of skip_last must be always FALSE when this function is called */
3247 DBUG_ASSERT(!skip_last);
3248
3249 /* Return at once if there are no records in the join buffer */
3250 if (!records)
3251 return NESTED_LOOP_OK;
3252
3253 const bool no_association= mrr_mode & HA_MRR_NO_ASSOCIATION;
3254 /* Set functions to iterate over keys in the join buffer */
3255 RANGE_SEQ_IF seq_funcs= { bka_unique_range_seq_init,
3256 bka_unique_range_seq_next,
3257 check_only_first_match && !no_association ?
3258 bka_unique_range_seq_skip_record : 0,
3259 join_tab->cache_idx_cond ?
3260 bka_unique_skip_index_tuple : 0 };
3261
3262 if (init_join_matching_records(&seq_funcs, key_entries))
3263 return NESTED_LOOP_ERROR;
3264
3265 int error;
3266 uchar *key_chain_ptr;
3267 handler *file= join_tab->table->file;
3268 enum_nested_loop_state rc= NESTED_LOOP_OK;
3269
3270 while (!(error= file->multi_range_read_next((char **) &key_chain_ptr)))
3271 {
3272 if (no_association)
3273 {
3274 uchar *key_ref_ptr;
3275 TABLE *table= join_tab->table;
3276 TABLE_REF *ref= &join_tab->ref;
3277 KEY *keyinfo= table->key_info+ref->key;
3278 /*
3279 Build the key value out of the record returned by the call of
3280 multi_range_read_next in the record buffer
3281 */
3282 key_copy(ref->key_buff, table->record[0], keyinfo, ref->key_length);
3283 /* Look for this key in the join buffer */
3284 if (!key_search(ref->key_buff, ref->key_length, &key_ref_ptr))
3285 continue;
3286 key_chain_ptr= key_ref_ptr+get_size_of_key_offset();
3287 }
3288
3289 if (join_tab->keep_current_rowid)
3290 join_tab->table->file->position(join_tab->table->record[0]);
3291
3292 uchar *last_rec_ref_ptr= get_next_rec_ref(key_chain_ptr);
3293 uchar *next_rec_ref_ptr= last_rec_ref_ptr;
3294 do
3295 {
3296 next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr);
3297 uchar *rec_ptr= next_rec_ref_ptr+rec_fields_offset;
3298
3299 if (join->thd->killed)
3300 {
3301 /* The user has aborted the execution of the query */
3302 join->thd->send_kill_message();
3303 return NESTED_LOOP_KILLED;
3304 }
3305 /*
3306 If only the first match is needed and it has been already found
3307 for the associated partial join record then the returned candidate
3308 is discarded.
3309 */
3310 if (rc == NESTED_LOOP_OK &&
3311 (!check_only_first_match || !get_match_flag_by_pos(rec_ptr)))
3312 {
3313 get_record_by_pos(rec_ptr);
3314 rc= generate_full_extensions(rec_ptr);
3315 if (rc != NESTED_LOOP_OK)
3316 return rc;
3317 }
3318 }
3319 while (next_rec_ref_ptr != last_rec_ref_ptr);
3320 }
3321
3322 if (error > 0 && error != HA_ERR_END_OF_FILE)
3323 return NESTED_LOOP_ERROR;
3324 return rc;
3325 }
3326
3327
3328 /*
3329 Check whether all records in a key chain have their match flags set on
3330
3331 SYNOPSIS
3332 check_all_match_flags_for_key()
3333 key_chain_ptr
3334
3335 DESCRIPTION
3336 This function retrieves records in the given circular chain and checks
3337 whether their match flags are set on. The parameter key_chain_ptr shall
3338 point to the position in the join buffer storing the reference to the
3339 last element of this chain.
3340
3341 RETURN
3342 TRUE if each retrieved record has its match flag set on
3343 FALSE otherwise
3344 */
3345
check_all_match_flags_for_key(uchar * key_chain_ptr)3346 bool JOIN_CACHE_BKA_UNIQUE::check_all_match_flags_for_key(uchar *key_chain_ptr)
3347 {
3348 uchar *last_rec_ref_ptr= get_next_rec_ref(key_chain_ptr);
3349 uchar *next_rec_ref_ptr= last_rec_ref_ptr;
3350 do
3351 {
3352 next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr);
3353 uchar *rec_ptr= next_rec_ref_ptr+rec_fields_offset;
3354 if (!get_match_flag_by_pos(rec_ptr))
3355 return FALSE;
3356 }
3357 while (next_rec_ref_ptr != last_rec_ref_ptr);
3358 return TRUE;
3359 }
3360
3361
3362 /*
3363 Get the next key built for the records from BKA_UNIQUE join buffer
3364
3365 SYNOPSIS
3366 get_next_key()
3367 key pointer to the buffer where the key value is to be placed
3368
3369 DESCRIPTION
3370 The function reads the next key value stored in the hash table of the
3371 join buffer. Depending on the value of the use_emb_key flag of the
3372 join cache the value is read either from the table itself or from
3373 the record field where it occurs.
3374
3375 RETURN
3376 length of the key value - if the starting value of 'cur_key_entry' refers
3377 to the position after that referred by the the value of 'last_key_entry'
3378 0 - otherwise.
3379 */
3380
get_next_key(uchar ** key)3381 uint JOIN_CACHE_BKA_UNIQUE::get_next_key(uchar ** key)
3382 {
3383 if (curr_key_entry == last_key_entry)
3384 return 0;
3385
3386 curr_key_entry-= key_entry_length;
3387
3388 *key = use_emb_key ? get_emb_key(curr_key_entry) : curr_key_entry;
3389
3390 DBUG_ASSERT(*key >= buff && *key < hash_table);
3391
3392 return key_length;
3393 }
3394
3395 /**
3396 Check matching to a partial join record from the join buffer, an
3397 implementation specialized for JOIN_CACHE_BKA_UNIQUE.
3398 Only JOIN_CACHE_BKA_UNIQUE needs that, because it's the only cache using
3399 distinct keys.
3400 JOIN_CACHE_BKA, on the other hand, does one key lookup per cached
3401 record, so can take a per-record individualized decision for the pushed
3402 index condition as soon as it has the index tuple.
3403 @sa JOIN_CACHE_BKA_UNIQUE::skip_index_tuple
3404 @sa JOIN_CACHE::check_match
3405 */
check_match(uchar * rec_ptr)3406 bool JOIN_CACHE_BKA_UNIQUE::check_match(uchar *rec_ptr)
3407 {
3408 /* recheck pushed down index condition */
3409 if (join_tab->cache_idx_cond != NULL &&
3410 !join_tab->cache_idx_cond->val_int())
3411 return FALSE;
3412 /* continue with generic tests */
3413 return JOIN_CACHE_BKA::check_match(rec_ptr);
3414 }
3415
3416
3417 /****************************************************************************
3418 * Join cache module end
3419 ****************************************************************************/
3420