1 /* Copyright (C) 2000-2006 MySQL AB
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
6
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
11
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
15
16 /**
17 @file
18
19 @brief
20 join cache optimizations
21
22 @defgroup Query_Optimizer Query Optimizer
23 @{
24 */
25
26 #ifdef USE_PRAGMA_IMPLEMENTATION
27 #pragma implementation // gcc: Class implementation
28 #endif
29
30 #include "mariadb.h"
31 #include "key.h"
32 #include "sql_base.h"
33 #include "sql_select.h"
34 #include "opt_subselect.h"
35
36 #define NO_MORE_RECORDS_IN_BUFFER (uint)(-1)
37
38 static void save_or_restore_used_tabs(JOIN_TAB *join_tab, bool save);
39
40 /*****************************************************************************
41 * Join cache module
42 ******************************************************************************/
43
44 /*
45 Fill in the descriptor of a flag field associated with a join cache
46
47 SYNOPSIS
48 add_field_flag_to_join_cache()
49 str position in a record buffer to copy the field from/to
50 length length of the field
51 field IN/OUT pointer to the field descriptor to fill in
52
53 DESCRIPTION
54 The function fill in the descriptor of a cache flag field to which
55 the parameter 'field' points to. The function uses the first two
56 parameters to set the position in the record buffer from/to which
57 the field value is to be copied and the length of the copied fragment.
58 Before returning the result the function increments the value of
59 *field by 1.
60 The function ignores the fields 'blob_length' and 'ofset' of the
61 descriptor.
62
63 RETURN VALUE
64 the length of the field
65 */
66
67 static
add_flag_field_to_join_cache(uchar * str,uint length,CACHE_FIELD ** field)68 uint add_flag_field_to_join_cache(uchar *str, uint length, CACHE_FIELD **field)
69 {
70 CACHE_FIELD *copy= *field;
71 copy->str= str;
72 copy->length= length;
73 copy->type= 0;
74 copy->field= 0;
75 copy->referenced_field_no= 0;
76 (*field)++;
77 return length;
78 }
79
80
81 /*
82 Fill in the descriptors of table data fields associated with a join cache
83
84 SYNOPSIS
85 add_table_data_fields_to_join_cache()
86 tab descriptors of fields from this table are to be filled
87 field_set descriptors for only these fields are to be created
88 field_cnt IN/OUT counter of data fields
89 descr IN/OUT pointer to the first descriptor to be filled
90 field_ptr_cnt IN/OUT counter of pointers to the data fields
91 descr_ptr IN/OUT pointer to the first pointer to blob descriptors
92
93 DESCRIPTION
94 The function fills in the descriptors of cache data fields from the table
95 'tab'. The descriptors are filled only for the fields marked in the
96 bitmap 'field_set'.
97 The function fills the descriptors starting from the position pointed
98 by 'descr'. If an added field is of a BLOB type then a pointer to the
99 its descriptor is added to the array descr_ptr.
100 At the return 'descr' points to the position after the last added
101 descriptor while 'descr_ptr' points to the position right after the
102 last added pointer.
103
104 RETURN VALUE
105 the total length of the added fields
106 */
107
108 static
add_table_data_fields_to_join_cache(JOIN_TAB * tab,MY_BITMAP * field_set,uint * field_cnt,CACHE_FIELD ** descr,uint * field_ptr_cnt,CACHE_FIELD *** descr_ptr)109 uint add_table_data_fields_to_join_cache(JOIN_TAB *tab,
110 MY_BITMAP *field_set,
111 uint *field_cnt,
112 CACHE_FIELD **descr,
113 uint *field_ptr_cnt,
114 CACHE_FIELD ***descr_ptr)
115 {
116 Field **fld_ptr;
117 uint len= 0;
118 CACHE_FIELD *copy= *descr;
119 CACHE_FIELD **copy_ptr= *descr_ptr;
120 uint used_fields= bitmap_bits_set(field_set);
121 for (fld_ptr= tab->table->field; used_fields; fld_ptr++)
122 {
123 if (bitmap_is_set(field_set, (*fld_ptr)->field_index))
124 {
125 len+= (*fld_ptr)->fill_cache_field(copy);
126 if (copy->type == CACHE_BLOB)
127 {
128 *copy_ptr= copy;
129 copy_ptr++;
130 (*field_ptr_cnt)++;
131 }
132 copy->field= *fld_ptr;
133 copy->referenced_field_no= 0;
134 copy++;
135 (*field_cnt)++;
136 used_fields--;
137 }
138 }
139 *descr= copy;
140 *descr_ptr= copy_ptr;
141 return len;
142 }
143
144 /*
145 Determine different counters of fields associated with a record in the cache
146
147 SYNOPSIS
148 calc_record_fields()
149
150 DESCRIPTION
151 The function counts the number of total fields stored in a record
152 of the cache and saves this number in the 'fields' member. It also
153 determines the number of flag fields and the number of blobs.
154 The function sets 'with_match_flag' on if 'join_tab' needs a match flag
155 i.e. if it is the first inner table of an outer join or a semi-join.
156
157 RETURN VALUE
158 none
159 */
160
calc_record_fields()161 void JOIN_CACHE::calc_record_fields()
162 {
163 JOIN_TAB *tab;
164
165 if (prev_cache)
166 tab= prev_cache->join_tab;
167 else
168 {
169 if (join_tab->bush_root_tab)
170 {
171 /*
172 --ot1--SJM1--------------ot2--...
173 |
174 |
175 +-it1--...--itN
176 ^____________ this->join_tab is somewhere here,
177 inside an sjm nest.
178
179 The join buffer should store the values of it1.*, it2.*, ..
180 It should not store values of ot1.*.
181 */
182 tab= join_tab->bush_root_tab->bush_children->start;
183 }
184 else
185 {
186 /*
187 -ot1--ot2--SJM1--SJM2--------------ot3--...--otN
188 | | ^
189 | +-it21--...--it2N |
190 | \-- we're somewhere here,
191 +-it11--...--it1N at the top level
192
193 The join buffer should store the values of
194
195 ot1.*, ot2.*, it1{i}, it2{j}.*, ot3.*, ...
196
197 that is, we should start from the first non-const top-level table.
198
199 We will need to store columns of SJ-inner tables (it_X_Y.*), but we're
200 not interested in storing the columns of materialization tables
201 themselves. Beause of that, if the first non-const top-level table is a
202 materialized table, we move to its bush_children:
203 */
204 tab= join->join_tab + join->const_tables;
205 if (tab->bush_children)
206 tab= tab->bush_children->start;
207 }
208 }
209 DBUG_ASSERT(!tab->bush_children);
210
211 start_tab= tab;
212 fields= 0;
213 blobs= 0;
214 flag_fields= 0;
215 data_field_count= 0;
216 data_field_ptr_count= 0;
217 referenced_fields= 0;
218
219 /*
220 The following loop will get inside SJM nests, because data may be unpacked
221 to sjm-inner tables.
222 */
223 for (; tab != join_tab ; tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
224 {
225 tab->calc_used_field_length(FALSE);
226 flag_fields+= MY_TEST(tab->used_null_fields || tab->used_uneven_bit_fields);
227 flag_fields+= MY_TEST(tab->table->maybe_null);
228 fields+= tab->used_fields;
229 blobs+= tab->used_blobs;
230 }
231 if ((with_match_flag= join_tab->use_match_flag()))
232 flag_fields++;
233 fields+= flag_fields;
234 }
235
236
237 /*
238 Collect information on join key arguments
239
240 SYNOPSIS
241 collect_info_on_key_args()
242
243 DESCRIPTION
244 The function traverses the ref expressions that are used to access the
245 joined table join_tab. For each table 'tab' whose fields are to be stored
246 in the join buffer of the cache the function finds the fields from 'tab'
247 that occur in the ref expressions and marks these fields in the bitmap
248 tab->table->tmp_set. The function counts the number of them stored
249 in this cache and the total number of them stored in the previous caches
250 and saves the results of the counting in 'local_key_arg_fields' and
251 'external_key_arg_fields' respectively.
252
253 NOTES
254 The function does not do anything if no key is used to join the records
255 from join_tab.
256
257 RETURN VALUE
258 none
259 */
260
collect_info_on_key_args()261 void JOIN_CACHE::collect_info_on_key_args()
262 {
263 JOIN_TAB *tab;
264 JOIN_CACHE *cache;
265 local_key_arg_fields= 0;
266 external_key_arg_fields= 0;
267
268 if (!is_key_access())
269 return;
270
271 TABLE_REF *ref= &join_tab->ref;
272 cache= this;
273 do
274 {
275 for (tab= cache->start_tab; tab != cache->join_tab;
276 tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
277 {
278 uint key_args;
279 bitmap_clear_all(&tab->table->tmp_set);
280 for (uint i= 0; i < ref->key_parts; i++)
281 {
282 Item *ref_item= ref->items[i];
283 if (!(tab->table->map & ref_item->used_tables()))
284 continue;
285 ref_item->walk(&Item::add_field_to_set_processor, 1, tab->table);
286 }
287 if ((key_args= bitmap_bits_set(&tab->table->tmp_set)))
288 {
289 if (cache == this)
290 local_key_arg_fields+= key_args;
291 else
292 external_key_arg_fields+= key_args;
293 }
294 }
295 cache= cache->prev_cache;
296 }
297 while (cache);
298
299 return;
300 }
301
302
303 /*
304 Allocate memory for descriptors and pointers to them associated with the cache
305
306 SYNOPSIS
307 alloc_fields()
308
309 DESCRIPTION
310 The function allocates memory for the array of fields descriptors
311 and the array of pointers to the field descriptors used to copy
312 join record data from record buffers into the join buffer and
313 backward. Some pointers refer to the field descriptor associated
314 with previous caches. They are placed at the beginning of the array
315 of pointers and its total number is stored in external_key_arg_fields.
316 The pointer of the first array is assigned to field_descr and the number
317 of the elements in it is precalculated by the function calc_record_fields.
318 The allocated arrays are adjacent.
319
320 NOTES
321 The memory is allocated in join->thd->mem_root
322
323 RETURN VALUE
324 pointer to the first array
325 */
326
alloc_fields()327 int JOIN_CACHE::alloc_fields()
328 {
329 uint ptr_cnt= external_key_arg_fields+blobs+1;
330 uint fields_size= sizeof(CACHE_FIELD)*fields;
331 field_descr= (CACHE_FIELD*) join->thd->alloc(fields_size +
332 sizeof(CACHE_FIELD*)*ptr_cnt);
333 blob_ptr= (CACHE_FIELD **) ((uchar *) field_descr + fields_size);
334 return (field_descr == NULL);
335 }
336
337
338 /*
339 Create descriptors of the record flag fields stored in the join buffer
340
341 SYNOPSIS
342 create_flag_fields()
343
344 DESCRIPTION
345 The function creates descriptors of the record flag fields stored
346 in the join buffer. These are descriptors for:
347 - an optional match flag field,
348 - table null bitmap fields,
349 - table null row fields.
350 The match flag field is created when 'join_tab' is the first inner
351 table of an outer join our a semi-join. A null bitmap field is
352 created for any table whose fields are to be stored in the join
353 buffer if at least one of these fields is nullable or is a BIT field
354 whose bits are partially stored with null bits. A null row flag
355 is created for any table assigned to the cache if it is an inner
356 table of an outer join.
357 The descriptor for flag fields are placed one after another at the
358 beginning of the array of field descriptors 'field_descr' that
359 contains 'fields' elements. If there is a match flag field the
360 descriptor for it is always first in the sequence of flag fields.
361 The descriptors for other flag fields can follow in an arbitrary
362 order.
363 The flag field values follow in a record stored in the join buffer
364 in the same order as field descriptors, with the match flag always
365 following first.
366 The function sets the value of 'flag_fields' to the total number
367 of the descriptors created for the flag fields.
368 The function sets the value of 'length' to the total length of the
369 flag fields.
370
371 RETURN VALUE
372 none
373 */
374
create_flag_fields()375 void JOIN_CACHE::create_flag_fields()
376 {
377 CACHE_FIELD *copy;
378 JOIN_TAB *tab;
379
380 copy= field_descr;
381
382 length=0;
383
384 /* If there is a match flag the first field is always used for this flag */
385 if (with_match_flag)
386 length+= add_flag_field_to_join_cache((uchar*) &join_tab->found,
387 sizeof(join_tab->found),
388 ©);
389
390 /* Create fields for all null bitmaps and null row flags that are needed */
391 for (tab= start_tab; tab != join_tab;
392 tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
393 {
394 TABLE *table= tab->table;
395
396 /* Create a field for the null bitmap from table if needed */
397 if (tab->used_null_fields || tab->used_uneven_bit_fields)
398 length+= add_flag_field_to_join_cache(table->null_flags,
399 table->s->null_bytes,
400 ©);
401
402 /* Create table for the null row flag if needed */
403 if (table->maybe_null)
404 length+= add_flag_field_to_join_cache((uchar*) &table->null_row,
405 sizeof(table->null_row),
406 ©);
407 }
408
409 /* Theoretically the new value of flag_fields can be less than the old one */
410 flag_fields= (uint)(copy-field_descr);
411 }
412
413
414 /*
415 Create descriptors of the fields used to build access keys to the joined table
416
417 SYNOPSIS
418 create_key_arg_fields()
419
420 DESCRIPTION
421 The function creates descriptors of the record fields stored in the join
422 buffer that are used to build access keys to the joined table. These
423 fields are put into the buffer ahead of other records fields stored in
424 the buffer. Such placement helps to optimize construction of access keys.
425 For each field that is used to build access keys to the joined table but
426 is stored in some other join cache buffer the function saves a pointer
427 to the the field descriptor. The array of such pointers are placed in the
428 the join cache structure just before the array of pointers to the
429 blob fields blob_ptr.
430 Any field stored in a join cache buffer that is used to construct keys
431 to access tables associated with other join caches is called a referenced
432 field. It receives a unique number that is saved by the function in the
433 member 'referenced_field_no' of the CACHE_FIELD descriptor for the field.
434 This number is used as index to the array of offsets to the referenced
435 fields that are saved and put in the join cache buffer after all record
436 fields.
437 The function also finds out whether that the keys to access join_tab
438 can be considered as embedded and, if so, sets the flag 'use_emb_key' in
439 this join cache appropriately.
440
441 NOTES.
442 When a key to access the joined table 'join_tab' is constructed the array
443 of pointers to the field descriptors for the external fields is looked
444 through. For each of this pointers we find out in what previous key cache
445 the referenced field is stored. The value of 'referenced_field_no'
446 provides us with the index into the array of offsets for referenced
447 fields stored in the join cache. The offset read by the the index allows
448 us to read the field without reading all other fields of the record
449 stored the join cache buffer. This optimizes the construction of keys
450 to access 'join_tab' when some key arguments are stored in the previous
451 join caches.
452
453 NOTES
454 The function does not do anything if no key is used to join the records
455 from join_tab.
456
457 RETURN VALUE
458 none
459 */
create_key_arg_fields()460 void JOIN_CACHE::create_key_arg_fields()
461 {
462 JOIN_TAB *tab;
463 JOIN_CACHE *cache;
464
465 if (!is_key_access())
466 return;
467
468 /*
469 Save pointers to the cache fields in previous caches
470 that are used to build keys for this key access.
471 */
472 cache= this;
473 uint ext_key_arg_cnt= external_key_arg_fields;
474 CACHE_FIELD *copy;
475 CACHE_FIELD **copy_ptr= blob_ptr;
476 while (ext_key_arg_cnt)
477 {
478 cache= cache->prev_cache;
479 for (tab= cache->start_tab; tab != cache->join_tab;
480 tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
481 {
482 CACHE_FIELD *copy_end;
483 MY_BITMAP *key_read_set= &tab->table->tmp_set;
484 /* key_read_set contains the bitmap of tab's fields referenced by ref */
485 if (bitmap_is_clear_all(key_read_set))
486 continue;
487 copy_end= cache->field_descr+cache->fields;
488 for (copy= cache->field_descr+cache->flag_fields; copy < copy_end; copy++)
489 {
490 /*
491 (1) - when we store rowids for DuplicateWeedout, they have
492 copy->field==NULL
493 */
494 if (copy->field && // (1)
495 copy->field->table == tab->table &&
496 bitmap_is_set(key_read_set, copy->field->field_index))
497 {
498 *copy_ptr++= copy;
499 ext_key_arg_cnt--;
500 if (!copy->referenced_field_no)
501 {
502 /*
503 Register the referenced field 'copy':
504 - set the offset number in copy->referenced_field_no,
505 - adjust the value of the flag 'with_length',
506 - adjust the values of 'pack_length' and
507 of 'pack_length_with_blob_ptrs'.
508 */
509 copy->referenced_field_no= ++cache->referenced_fields;
510 if (!cache->with_length)
511 {
512 cache->with_length= TRUE;
513 uint sz= cache->get_size_of_rec_length();
514 cache->base_prefix_length+= sz;
515 cache->pack_length+= sz;
516 cache->pack_length_with_blob_ptrs+= sz;
517 }
518 cache->pack_length+= cache->get_size_of_fld_offset();
519 cache->pack_length_with_blob_ptrs+= cache->get_size_of_fld_offset();
520 }
521 }
522 }
523 }
524 }
525 /* After this 'blob_ptr' shall not be be changed */
526 blob_ptr= copy_ptr;
527
528 /* Now create local fields that are used to build ref for this key access */
529 copy= field_descr+flag_fields;
530 for (tab= start_tab; tab != join_tab;
531 tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
532 {
533 length+= add_table_data_fields_to_join_cache(tab, &tab->table->tmp_set,
534 &data_field_count, ©,
535 &data_field_ptr_count,
536 ©_ptr);
537 }
538
539 use_emb_key= check_emb_key_usage();
540
541 return;
542 }
543
544
545 /*
546 Create descriptors of all remaining data fields stored in the join buffer
547
548 SYNOPSIS
549 create_remaining_fields()
550
551 DESCRIPTION
552 The function creates descriptors for all remaining data fields of a
553 record from the join buffer. If the value returned by is_key_access() is
554 false the function creates fields for all read record fields that
555 comprise the partial join record joined with join_tab. Otherwise,
556 for each table tab, the set of the read fields for which the descriptors
557 have to be added is determined as the difference between all read fields
558 and and those for which the descriptors have been already created.
559 The latter are supposed to be marked in the bitmap tab->table->tmp_set.
560 The function increases the value of 'length' to the the total length of
561 the added fields.
562
563 NOTES
564 If is_key_access() returns true the function modifies the value of
565 tab->table->tmp_set for a each table whose fields are stored in the cache.
566 The function calls the method Field::fill_cache_field to figure out
567 the type of the cache field and the maximal length of its representation
568 in the join buffer. If this is a blob field then additionally a pointer
569 to this field is added as an element of the array blob_ptr. For a blob
570 field only the size of the length of the blob data is taken into account.
571 It is assumed that 'data_field_count' contains the number of descriptors
572 for data fields that have been already created and 'data_field_ptr_count'
573 contains the number of the pointers to such descriptors having been
574 stored up to the moment.
575
576 RETURN VALUE
577 none
578 */
579
create_remaining_fields()580 void JOIN_CACHE::create_remaining_fields()
581 {
582 JOIN_TAB *tab;
583 bool all_read_fields= !is_key_access();
584 CACHE_FIELD *copy= field_descr+flag_fields+data_field_count;
585 CACHE_FIELD **copy_ptr= blob_ptr+data_field_ptr_count;
586
587 for (tab= start_tab; tab != join_tab;
588 tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
589 {
590 MY_BITMAP *rem_field_set;
591 TABLE *table= tab->table;
592
593 if (all_read_fields)
594 rem_field_set= table->read_set;
595 else
596 {
597 bitmap_invert(&table->tmp_set);
598 bitmap_intersect(&table->tmp_set, table->read_set);
599 rem_field_set= &table->tmp_set;
600 }
601
602 length+= add_table_data_fields_to_join_cache(tab, rem_field_set,
603 &data_field_count, ©,
604 &data_field_ptr_count,
605 ©_ptr);
606
607 /* SemiJoinDuplicateElimination: allocate space for rowid if needed */
608 if (tab->keep_current_rowid)
609 {
610 copy->str= table->file->ref;
611 if (copy->str)
612 copy->length= table->file->ref_length;
613 else
614 {
615 /* This may happen only for materialized derived tables and views */
616 copy->length= 0;
617 copy->str= (uchar *) table;
618 }
619 copy->type= CACHE_ROWID;
620 copy->field= 0;
621 copy->referenced_field_no= 0;
622 /*
623 Note: this may seem odd, but at this point we have
624 table->file->ref==NULL while table->file->ref_length is already set
625 to correct value.
626 */
627 length += table->file->ref_length;
628 data_field_count++;
629 copy++;
630 }
631 }
632 }
633
634
635
636 /*
637 Calculate and set all cache constants
638
639 SYNOPSIS
640 set_constants()
641
642 DESCRIPTION
643 The function calculates and set all precomputed constants that are used
644 when writing records into the join buffer and reading them from it.
645 It calculates the size of offsets of a record within the join buffer
646 and of a field within a record. It also calculates the number of bytes
647 used to store record lengths.
648 The function also calculates the maximal length of the representation
649 of record in the cache excluding blob_data. This value is used when
650 making a dicision whether more records should be added into the join
651 buffer or not.
652
653 RETURN VALUE
654 none
655 */
656
set_constants()657 void JOIN_CACHE::set_constants()
658 {
659 /*
660 Any record from a BKA cache is prepended with the record length.
661 We use the record length when reading the buffer and building key values
662 for each record. The length allows us not to read the fields that are
663 not needed for keys.
664 If a record has match flag it also may be skipped when the match flag
665 is on. It happens if the cache is used for a semi-join operation or
666 for outer join when the 'not exist' optimization can be applied.
667 If some of the fields are referenced from other caches then
668 the record length allows us to easily reach the saved offsets for
669 these fields since the offsets are stored at the very end of the record.
670 However at this moment we don't know whether we have referenced fields for
671 the cache or not. Later when a referenced field is registered for the cache
672 we adjust the value of the flag 'with_length'.
673 */
674 with_length= is_key_access() ||
675 join_tab->is_inner_table_of_semi_join_with_first_match() ||
676 join_tab->is_inner_table_of_outer_join();
677 /*
678 At this moment we don't know yet the value of 'referenced_fields',
679 but in any case it can't be greater than the value of 'fields'.
680 */
681 uint len= length + fields*sizeof(uint)+blobs*sizeof(uchar *) +
682 (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) +
683 sizeof(ulong);
684 /*
685 The values of size_of_rec_ofs, size_of_rec_len, size_of_fld_ofs,
686 base_prefix_length, pack_length, pack_length_with_blob_ptrs
687 will be recalculated later in this function when we get the estimate
688 for the actual value of the join buffer size.
689 */
690 size_of_rec_ofs= size_of_rec_len= size_of_fld_ofs= 4;
691 base_prefix_length= (with_length ? size_of_rec_len : 0) +
692 (prev_cache ? prev_cache->get_size_of_rec_offset() : 0);
693 pack_length= (with_length ? size_of_rec_len : 0) +
694 (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) +
695 length + fields*sizeof(uint);
696 pack_length_with_blob_ptrs= pack_length + blobs*sizeof(uchar *);
697 min_buff_size= 0;
698 min_records= 1;
699 buff_size= (size_t)MY_MAX(join->thd->variables.join_buff_size,
700 get_min_join_buffer_size());
701 size_of_rec_ofs= offset_size(buff_size);
702 size_of_rec_len= blobs ? size_of_rec_ofs : offset_size(len);
703 size_of_fld_ofs= size_of_rec_len;
704 base_prefix_length= (with_length ? size_of_rec_len : 0) +
705 (prev_cache ? prev_cache->get_size_of_rec_offset() : 0);
706 /*
707 The size of the offsets for referenced fields will be added later.
708 The values of 'pack_length' and 'pack_length_with_blob_ptrs' are adjusted
709 every time when the first reference to the referenced field is registered.
710 */
711 pack_length= (with_length ? size_of_rec_len : 0) +
712 (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) +
713 length;
714 pack_length_with_blob_ptrs= pack_length + blobs*sizeof(uchar *);
715 }
716
717
718 /*
719 Get maximum total length of all affixes of a record in the join cache buffer
720
721 SYNOPSIS
722 get_record_max_affix_length()
723
724 DESCRIPTION
725 The function calculates the maximum possible total length of all affixes
726 of a record in the join cache buffer, that is made of:
727 - the length of all prefixes used in this cache,
728 - the length of the match flag if it's needed
729 - the total length of the maximum possible offsets to the fields of
730 a record in the buffer.
731
732 RETURN VALUE
733 The maximum total length of all affixes of a record in the join buffer
734 */
735
get_record_max_affix_length()736 uint JOIN_CACHE::get_record_max_affix_length()
737 {
738 uint len= get_prefix_length() +
739 MY_TEST(with_match_flag) +
740 size_of_fld_ofs * data_field_count;
741 return len;
742 }
743
744
745 /*
746 Get the minimum possible size of the cache join buffer
747
748 SYNOPSIS
749 get_min_join_buffer_size()
750
751 DESCRIPTION
752 At the first its invocation for the cache the function calculates the
753 minimum possible size of the join buffer of the cache. This value depends
754 on the minimal number of records 'min_records' to be stored in the join
755 buffer. The number is supposed to be determined by the procedure that
756 chooses the best access path to the joined table join_tab in the execution
757 plan. After the calculation of the interesting size the function saves it
758 in the field 'min_buff_size' in order to use it directly at the next
759 invocations of the function.
760
761 NOTES
762 Currently the number of minimal records is just set to 1.
763
764 RETURN VALUE
765 The minimal possible size of the join buffer of this cache
766 */
767
get_min_join_buffer_size()768 size_t JOIN_CACHE::get_min_join_buffer_size()
769 {
770 if (!min_buff_size)
771 {
772 size_t len= 0;
773 size_t len_last= 0;
774 for (JOIN_TAB *tab= start_tab; tab != join_tab;
775 tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
776 {
777 len+= tab->get_max_used_fieldlength();
778 len_last+= tab->get_used_fieldlength();
779 }
780 size_t len_addon= get_record_max_affix_length() +
781 get_max_key_addon_space_per_record();
782 len+= len_addon;
783 len_last+= len_addon;
784 size_t min_sz= len*(min_records-1) + len_last;
785 min_sz+= pack_length_with_blob_ptrs;
786 size_t add_sz= 0;
787 for (uint i=0; i < min_records; i++)
788 add_sz+= join_tab_scan->aux_buffer_incr(i+1);
789 avg_aux_buffer_incr= add_sz/min_records;
790 min_sz+= add_sz;
791 set_if_bigger(min_sz, 1);
792 min_buff_size= min_sz;
793 }
794 return min_buff_size;
795 }
796
797
798 /*
799 Get the maximum possible size of the cache join buffer
800
801 SYNOPSIS
802 get_max_join_buffer_size()
803
804 optimize_buff_size FALSE <-> do not take more memory than needed for
805 the estimated number of records in the partial join
806
807 DESCRIPTION
808 At the first its invocation for the cache the function calculates the
809 maximum possible size of join buffer for the cache. If the parameter
810 optimize_buff_size true then this value does not exceed the size of the
811 space needed for the estimated number of records 'max_records' in the
812 partial join that joins tables from the first one through join_tab. This
813 value is also capped off by the value of join_tab->join_buffer_size_limit,
814 if it has been set a to non-zero value, and by the value of the system
815 parameter join_buffer_size - otherwise. After the calculation of the
816 interesting size the function saves the value in the field 'max_buff_size'
817 in order to use it directly at the next invocations of the function.
818
819 NOTES
820 Currently the value of join_tab->join_buffer_size_limit is initialized
821 to 0 and is never reset.
822
823 RETURN VALUE
824 The maximum possible size of the join buffer of this cache
825 */
826
get_max_join_buffer_size(bool optimize_buff_size)827 size_t JOIN_CACHE::get_max_join_buffer_size(bool optimize_buff_size)
828 {
829 if (!max_buff_size)
830 {
831 size_t max_sz;
832 size_t min_sz= get_min_join_buffer_size();
833 size_t len= 0;
834 for (JOIN_TAB *tab= start_tab; tab != join_tab;
835 tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
836 {
837 len+= tab->get_used_fieldlength();
838 }
839 len+= get_record_max_affix_length();
840 avg_record_length= len;
841 len+= get_max_key_addon_space_per_record() + avg_aux_buffer_incr;
842 space_per_record= len;
843
844 size_t limit_sz= (size_t)join->thd->variables.join_buff_size;
845 if (join_tab->join_buffer_size_limit)
846 set_if_smaller(limit_sz, join_tab->join_buffer_size_limit);
847 if (!optimize_buff_size)
848 max_sz= limit_sz;
849 else
850 {
851 if (limit_sz / max_records > space_per_record)
852 max_sz= space_per_record * max_records;
853 else
854 max_sz= limit_sz;
855 max_sz+= pack_length_with_blob_ptrs;
856 set_if_smaller(max_sz, limit_sz);
857 }
858 set_if_bigger(max_sz, min_sz);
859 max_buff_size= max_sz;
860 }
861 return max_buff_size;
862 }
863
864
865 /*
866 Allocate memory for a join buffer
867
868 SYNOPSIS
869 alloc_buffer()
870
871 DESCRIPTION
872 The function allocates a lump of memory for the cache join buffer.
873 Initially the function sets the size of the buffer buff_size equal to
874 the value returned by get_max_join_buffer_size(). If the total size of
875 the space intended to be used for the join buffers employed by the
876 tables from the first one through join_tab exceeds the value of the
877 system parameter join_buff_space_limit, then the function first tries
878 to shrink the used buffers to make the occupied space fit the maximum
879 memory allowed to be used for all join buffers in total. After
880 this the function tries to allocate a join buffer for join_tab.
881 If it fails to do so, it decrements the requested size of the join
882 buffer, shrinks proportionally the join buffers used for the previous
883 tables and tries to allocate a buffer for join_tab. In the case of a
884 failure the function repeats its attempts with smaller and smaller
885 requested sizes of the buffer, but not more than 4 times.
886
887 RETURN VALUE
888 0 if the memory has been successfully allocated
889 1 otherwise
890 */
891
alloc_buffer()892 int JOIN_CACHE::alloc_buffer()
893 {
894 JOIN_TAB *tab;
895 JOIN_CACHE *cache;
896 ulonglong curr_buff_space_sz= 0;
897 ulonglong curr_min_buff_space_sz= 0;
898 ulonglong join_buff_space_limit=
899 join->thd->variables.join_buff_space_limit;
900 bool optimize_buff_size=
901 optimizer_flag(join->thd, OPTIMIZER_SWITCH_OPTIMIZE_JOIN_BUFFER_SIZE);
902 double partial_join_cardinality= (join_tab-1)->get_partial_join_cardinality();
903 buff= NULL;
904 min_buff_size= 0;
905 max_buff_size= 0;
906 min_records= 1;
907 max_records= (size_t) (partial_join_cardinality <= join_buff_space_limit ?
908 (ulonglong) partial_join_cardinality : join_buff_space_limit);
909 set_if_bigger(max_records, 10);
910 min_buff_size= get_min_join_buffer_size();
911 buff_size= get_max_join_buffer_size(optimize_buff_size);
912
913 for (tab= start_tab; tab!= join_tab;
914 tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
915 {
916 cache= tab->cache;
917 if (cache)
918 {
919 curr_min_buff_space_sz+= cache->get_min_join_buffer_size();
920 curr_buff_space_sz+= cache->get_join_buffer_size();
921 }
922 }
923 curr_min_buff_space_sz+= min_buff_size;
924 curr_buff_space_sz+= buff_size;
925
926 if (curr_min_buff_space_sz > join_buff_space_limit ||
927 (curr_buff_space_sz > join_buff_space_limit &&
928 (!optimize_buff_size ||
929 join->shrink_join_buffers(join_tab, curr_buff_space_sz,
930 join_buff_space_limit))))
931 goto fail;
932
933 if (for_explain_only)
934 return 0;
935
936 for (size_t buff_size_decr= (buff_size-min_buff_size)/4 + 1; ; )
937 {
938 size_t next_buff_size;
939
940 if ((buff= (uchar*) my_malloc(key_memory_JOIN_CACHE, buff_size,
941 MYF(MY_THREAD_SPECIFIC))))
942 break;
943
944 next_buff_size= buff_size > buff_size_decr ? buff_size-buff_size_decr : 0;
945 if (next_buff_size < min_buff_size ||
946 join->shrink_join_buffers(join_tab, curr_buff_space_sz,
947 curr_buff_space_sz-buff_size_decr))
948 goto fail;
949 buff_size= next_buff_size;
950
951 curr_buff_space_sz= 0;
952 for (tab= join->join_tab+join->const_tables; tab <= join_tab; tab++)
953 {
954 cache= tab->cache;
955 if (cache)
956 curr_buff_space_sz+= cache->get_join_buffer_size();
957 }
958 }
959 return 0;
960
961 fail:
962 buff_size= 0;
963 return 1;
964 }
965
966
967 /*
968 Shrink the size if the cache join buffer in a given ratio
969
970 SYNOPSIS
971 shrink_join_buffer_in_ratio()
972 n nominator of the ratio to shrink the buffer in
973 d denominator if the ratio
974
975 DESCRIPTION
976 The function first deallocates the join buffer of the cache. Then
977 it allocates a buffer that is (n/d) times smaller.
978
979 RETURN VALUE
980 FALSE on success with allocation of the smaller join buffer
981 TRUE otherwise
982 */
983
shrink_join_buffer_in_ratio(ulonglong n,ulonglong d)984 bool JOIN_CACHE::shrink_join_buffer_in_ratio(ulonglong n, ulonglong d)
985 {
986 size_t next_buff_size;
987 if (n < d)
988 return FALSE;
989 next_buff_size= (size_t) ((double) buff_size / n * d);
990 set_if_bigger(next_buff_size, min_buff_size);
991 buff_size= next_buff_size;
992 return realloc_buffer();
993 }
994
995
996 /*
997 Reallocate the join buffer of a join cache
998
999 SYNOPSIS
1000 realloc_buffer()
1001
1002 DESCRITION
1003 The function reallocates the join buffer of the join cache. After this
1004 it resets the buffer for writing.
1005
1006 NOTES
1007 The function assumes that buff_size contains the new value for the join
1008 buffer size.
1009
1010 RETURN VALUE
1011 0 if the buffer has been successfully reallocated
1012 1 otherwise
1013 */
1014
realloc_buffer()1015 int JOIN_CACHE::realloc_buffer()
1016 {
1017 free();
1018 buff= (uchar*) my_malloc(key_memory_JOIN_CACHE, buff_size,
1019 MYF(MY_THREAD_SPECIFIC));
1020 reset(TRUE);
1021 return buff == NULL;
1022 }
1023
1024
1025 /*
1026 Initialize a join cache
1027
1028 SYNOPSIS
1029 init()
1030 for_explain join buffer is initialized for explain only
1031
1032 DESCRIPTION
1033 The function initializes the join cache structure. It supposed to be called
1034 by init methods for classes derived from the JOIN_CACHE.
1035 The function allocates memory for the join buffer and for descriptors of
1036 the record fields stored in the buffer.
1037
1038 NOTES
1039 The code of this function should have been included into the constructor
1040 code itself. However the new operator for the class JOIN_CACHE would
1041 never fail while memory allocation for the join buffer is not absolutely
1042 unlikely to fail. That's why this memory allocation has to be placed in a
1043 separate function that is called in a couple with a cache constructor.
1044 It is quite natural to put almost all other constructor actions into
1045 this function.
1046
1047 RETURN VALUE
1048 0 initialization with buffer allocations has been succeeded
1049 1 otherwise
1050 */
1051
init(bool for_explain)1052 int JOIN_CACHE::init(bool for_explain)
1053 {
1054 DBUG_ENTER("JOIN_CACHE::init");
1055
1056 for_explain_only= for_explain;
1057
1058 calc_record_fields();
1059
1060 collect_info_on_key_args();
1061
1062 if (alloc_fields())
1063 DBUG_RETURN(1);
1064
1065 create_flag_fields();
1066
1067 create_key_arg_fields();
1068
1069 create_remaining_fields();
1070
1071 set_constants();
1072
1073 if (alloc_buffer())
1074 DBUG_RETURN(1);
1075
1076 reset(TRUE);
1077
1078 DBUG_RETURN(0);
1079 }
1080
1081
1082 /*
1083 Check the possibility to read the access keys directly from the join buffer
1084 SYNOPSIS
1085 check_emb_key_usage()
1086
1087 DESCRIPTION
1088 The function checks some conditions at which the key values can be read
1089 directly from the join buffer. This is possible when the key values can be
1090 composed by concatenation of the record fields stored in the join buffer.
1091 Sometimes when the access key is multi-component the function has to re-order
1092 the fields written into the join buffer to make keys embedded. If key
1093 values for the key access are detected as embedded then 'use_emb_key'
1094 is set to TRUE.
1095
1096 EXAMPLE
1097 Let table t2 has an index defined on the columns a,b . Let's assume also
1098 that the columns t2.a, t2.b as well as the columns t1.a, t1.b are all
1099 of the integer type. Then if the query
1100 SELECT COUNT(*) FROM t1, t2 WHERE t1.a=t2.a and t1.b=t2.b
1101 is executed with a join cache in such a way that t1 is the driving
1102 table then the key values to access table t2 can be read directly
1103 from the join buffer.
1104
1105 NOTES
1106 In some cases key values could be read directly from the join buffer but
1107 we still do not consider them embedded. In the future we'll expand the
1108 the class of keys which we identify as embedded.
1109
1110 NOTES
1111 The function returns FALSE if no key is used to join the records
1112 from join_tab.
1113
1114 RETURN VALUE
1115 TRUE key values will be considered as embedded,
1116 FALSE otherwise.
1117 */
1118
check_emb_key_usage()1119 bool JOIN_CACHE::check_emb_key_usage()
1120 {
1121
1122 if (!is_key_access())
1123 return FALSE;
1124
1125 uint i;
1126 Item *item;
1127 KEY_PART_INFO *key_part;
1128 CACHE_FIELD *copy;
1129 CACHE_FIELD *copy_end;
1130 uint len= 0;
1131 TABLE_REF *ref= &join_tab->ref;
1132 KEY *keyinfo= join_tab->get_keyinfo_by_key_no(ref->key);
1133
1134 /*
1135 If some of the key arguments are not from the local cache the key
1136 is not considered as embedded.
1137 TODO:
1138 Expand it to the case when ref->key_parts=1 and local_key_arg_fields=0.
1139 */
1140 if (external_key_arg_fields != 0)
1141 return FALSE;
1142 /*
1143 If the number of the local key arguments is not equal to the number
1144 of key parts the key value cannot be read directly from the join buffer.
1145 */
1146 if (local_key_arg_fields != ref->key_parts)
1147 return FALSE;
1148
1149 /*
1150 A key is not considered embedded if one of the following is true:
1151 - one of its key parts is not equal to a field
1152 - it is a partial key
1153 - definition of the argument field does not coincide with the
1154 definition of the corresponding key component
1155 - some of the key components are nullable
1156 */
1157 for (i=0; i < ref->key_parts; i++)
1158 {
1159 item= ref->items[i]->real_item();
1160 if (item->type() != Item::FIELD_ITEM)
1161 return FALSE;
1162 key_part= keyinfo->key_part+i;
1163 if (key_part->key_part_flag & HA_PART_KEY_SEG)
1164 return FALSE;
1165 if (!key_part->field->eq_def(((Item_field *) item)->field))
1166 return FALSE;
1167 if (key_part->field->maybe_null())
1168 return FALSE;
1169 }
1170
1171 copy= field_descr+flag_fields;
1172 copy_end= copy+local_key_arg_fields;
1173 for ( ; copy < copy_end; copy++)
1174 {
1175 /*
1176 If some of the key arguments are of variable length the key
1177 is not considered as embedded.
1178 */
1179 if (copy->type != 0)
1180 return FALSE;
1181 /*
1182 If some of the key arguments are bit fields whose bits are partially
1183 stored with null bits the key is not considered as embedded.
1184 */
1185 if (copy->field->type() == MYSQL_TYPE_BIT &&
1186 ((Field_bit*) (copy->field))->bit_len)
1187 return FALSE;
1188 len+= copy->length;
1189 }
1190
1191 emb_key_length= len;
1192
1193 /*
1194 Make sure that key fields follow the order of the corresponding
1195 key components these fields are equal to. For this the descriptors
1196 of the fields that comprise the key might be re-ordered.
1197 */
1198 for (i= 0; i < ref->key_parts; i++)
1199 {
1200 uint j;
1201 Item *item= ref->items[i]->real_item();
1202 Field *fld= ((Item_field *) item)->field;
1203 CACHE_FIELD *init_copy= field_descr+flag_fields+i;
1204 for (j= i, copy= init_copy; j < local_key_arg_fields; j++, copy++)
1205 {
1206 if (fld->eq(copy->field))
1207 {
1208 if (j != i)
1209 {
1210 CACHE_FIELD key_part_copy= *copy;
1211 *copy= *init_copy;
1212 *init_copy= key_part_copy;
1213 }
1214 break;
1215 }
1216 }
1217 }
1218
1219 return TRUE;
1220 }
1221
1222
1223 /*
1224 Write record fields and their required offsets into the join cache buffer
1225
1226 SYNOPSIS
1227 write_record_data()
1228 link a reference to the associated info in the previous cache
1229 is_full OUT true if it has been decided that no more records will be
1230 added to the join buffer
1231
1232 DESCRIPTION
1233 This function put into the cache buffer the following info that it reads
1234 from the join record buffers or computes somehow:
1235 (1) the length of all fields written for the record (optional)
1236 (2) an offset to the associated info in the previous cache (if there is any)
1237 determined by the link parameter
1238 (3) all flag fields of the tables whose data field are put into the cache:
1239 - match flag (optional),
1240 - null bitmaps for all tables,
1241 - null row flags for all tables
1242 (4) values of all data fields including
1243 - full images of those fixed legth data fields that cannot have
1244 trailing spaces
1245 - significant part of fixed length fields that can have trailing spaces
1246 with the prepanded length
1247 - data of non-blob variable length fields with the prepanded data length
1248 - blob data from blob fields with the prepanded data length
1249 (5) record offset values for the data fields that are referred to from
1250 other caches
1251
1252 The record is written at the current position stored in the field 'pos'.
1253 At the end of the function 'pos' points at the position right after the
1254 written record data.
1255 The function increments the number of records in the cache that is stored
1256 in the 'records' field by 1. The function also modifies the values of
1257 'curr_rec_pos' and 'last_rec_pos' to point to the written record.
1258 The 'end_pos' cursor is modified accordingly.
1259 The 'last_rec_blob_data_is_in_rec_buff' is set on if the blob data
1260 remains in the record buffers and not copied to the join buffer. It may
1261 happen only to the blob data from the last record added into the cache.
1262 If on_precond is attached to join_tab and it is not evaluated to TRUE
1263 then MATCH_IMPOSSIBLE is placed in the match flag field of the record
1264 written into the join buffer.
1265
1266 RETURN VALUE
1267 length of the written record data
1268 */
1269
write_record_data(uchar * link,bool * is_full)1270 uint JOIN_CACHE::write_record_data(uchar * link, bool *is_full)
1271 {
1272 uint len;
1273 bool last_record;
1274 CACHE_FIELD *copy;
1275 CACHE_FIELD *copy_end;
1276 uchar *flags_pos;
1277 uchar *cp= pos;
1278 uchar *init_pos= cp;
1279 uchar *rec_len_ptr= 0;
1280 uint key_extra= extra_key_length();
1281
1282 records++; /* Increment the counter of records in the cache */
1283
1284 len= pack_length + key_extra;
1285
1286 /* Make an adjustment for the size of the auxiliary buffer if there is any */
1287 uint incr= aux_buffer_incr(records);
1288 size_t rem= rem_space();
1289 aux_buff_size+= len+incr < rem ? incr : rem;
1290
1291 /*
1292 For each blob to be put into cache save its length and a pointer
1293 to the value in the corresponding element of the blob_ptr array.
1294 Blobs with null values are skipped.
1295 Increment 'len' by the total length of all these blobs.
1296 */
1297 if (blobs)
1298 {
1299 CACHE_FIELD **copy_ptr= blob_ptr;
1300 CACHE_FIELD **copy_ptr_end= copy_ptr+blobs;
1301 for ( ; copy_ptr < copy_ptr_end; copy_ptr++)
1302 {
1303 Field_blob *blob_field= (Field_blob *) (*copy_ptr)->field;
1304 if (!blob_field->is_null())
1305 {
1306 uint blob_len= blob_field->get_length();
1307 (*copy_ptr)->blob_length= blob_len;
1308 len+= blob_len;
1309 (*copy_ptr)->str= blob_field->get_ptr();
1310 }
1311 }
1312 }
1313
1314 /*
1315 Check whether we won't be able to add any new record into the cache after
1316 this one because the cache will be full. Set last_record to TRUE if it's so.
1317 The assume that the cache will be full after the record has been written
1318 into it if either the remaining space of the cache is not big enough for the
1319 record's blob values or if there is a chance that not all non-blob fields
1320 of the next record can be placed there.
1321 This function is called only in the case when there is enough space left in
1322 the cache to store at least non-blob parts of the current record.
1323 */
1324 last_record= (len+pack_length_with_blob_ptrs+key_extra) > rem_space();
1325
1326 /*
1327 Save the position for the length of the record in the cache if it's needed.
1328 The length of the record will be inserted here when all fields of the record
1329 are put into the cache.
1330 */
1331 if (with_length)
1332 {
1333 rec_len_ptr= cp;
1334 DBUG_ASSERT(cp + size_of_rec_len <= buff + buff_size);
1335 cp+= size_of_rec_len;
1336 }
1337
1338 /*
1339 Put a reference to the fields of the record that are stored in the previous
1340 cache if there is any. This reference is passed by the 'link' parameter.
1341 */
1342 if (prev_cache)
1343 {
1344 DBUG_ASSERT(cp + prev_cache->get_size_of_rec_offset() <= buff + buff_size);
1345 cp+= prev_cache->get_size_of_rec_offset();
1346 prev_cache->store_rec_ref(cp, link);
1347 }
1348
1349 curr_rec_pos= cp;
1350
1351 /* If the there is a match flag set its value to 0 */
1352 copy= field_descr;
1353 if (with_match_flag)
1354 *copy[0].str= 0;
1355
1356 /* First put into the cache the values of all flag fields */
1357 copy_end= field_descr+flag_fields;
1358 flags_pos= cp;
1359 for ( ; copy < copy_end; copy++)
1360 {
1361 DBUG_ASSERT(cp + copy->length <= buff + buff_size);
1362 memcpy(cp, copy->str, copy->length);
1363 cp+= copy->length;
1364 }
1365
1366 /* Now put the values of the remaining fields as soon as they are not nulls */
1367 copy_end= field_descr+fields;
1368 for ( ; copy < copy_end; copy++)
1369 {
1370 Field *field= copy->field;
1371 if (field && field->maybe_null() && field->is_null())
1372 {
1373 if (copy->referenced_field_no)
1374 copy->offset= 0;
1375 continue;
1376 }
1377 /* Save the offset of the field to put it later at the end of the record */
1378 if (copy->referenced_field_no)
1379 copy->offset= (uint)(cp-curr_rec_pos);
1380
1381 switch (copy->type) {
1382 case CACHE_BLOB:
1383 {
1384 Field_blob *blob_field= (Field_blob *) copy->field;
1385 if (last_record)
1386 {
1387 last_rec_blob_data_is_in_rec_buff= 1;
1388 /* Put down the length of the blob and the pointer to the data */
1389 DBUG_ASSERT(cp + copy->length + sizeof(char*) <= buff + buff_size);
1390 blob_field->get_image(cp, copy->length+sizeof(char*),
1391 blob_field->charset());
1392 cp+= copy->length+sizeof(char*);
1393 }
1394 else
1395 {
1396 /* First put down the length of the blob and then copy the data */
1397 blob_field->get_image(cp, copy->length,
1398 blob_field->charset());
1399 DBUG_ASSERT(cp + copy->length + copy->blob_length <= buff + buff_size);
1400 if (copy->blob_length)
1401 memcpy(cp+copy->length, copy->str, copy->blob_length);
1402 cp+= copy->length+copy->blob_length;
1403 }
1404 break;
1405 }
1406 case CACHE_VARSTR1:
1407 /* Copy the significant part of the short varstring field */
1408 len= (uint) copy->str[0] + 1;
1409 DBUG_ASSERT(cp + len <= buff + buff_size);
1410 memcpy(cp, copy->str, len);
1411 cp+= len;
1412 break;
1413 case CACHE_VARSTR2:
1414 /* Copy the significant part of the long varstring field */
1415 len= uint2korr(copy->str) + 2;
1416 DBUG_ASSERT(cp + len <= buff + buff_size);
1417 memcpy(cp, copy->str, len);
1418 cp+= len;
1419 break;
1420 case CACHE_STRIPPED:
1421 {
1422 /*
1423 Put down the field value stripping all trailing spaces off.
1424 After this insert the length of the written sequence of bytes.
1425 */
1426 uchar *str, *end;
1427 for (str= copy->str, end= str+copy->length;
1428 end > str && end[-1] == ' ';
1429 end--) ;
1430 len=(uint) (end-str);
1431 DBUG_ASSERT(cp + len + 2 <= buff + buff_size);
1432 int2store(cp, len);
1433 memcpy(cp+2, str, len);
1434 cp+= len+2;
1435 break;
1436 }
1437 case CACHE_ROWID:
1438 if (!copy->length)
1439 {
1440 /*
1441 This may happen only for ROWID fields of materialized
1442 derived tables and views.
1443 */
1444 TABLE *table= (TABLE *) copy->str;
1445 copy->str= table->file->ref;
1446 copy->length= table->file->ref_length;
1447 if (!copy->str)
1448 {
1449 /*
1450 If table is an empty inner table of an outer join and it is
1451 a materialized derived table then table->file->ref == NULL.
1452 */
1453 cp+= copy->length;
1454 break;
1455 }
1456 }
1457 /* fall through */
1458 default:
1459 /* Copy the entire image of the field from the record buffer */
1460 DBUG_ASSERT(cp + copy->length <= buff + buff_size);
1461 if (copy->str)
1462 memcpy(cp, copy->str, copy->length);
1463 cp+= copy->length;
1464 }
1465 }
1466
1467 /* Add the offsets of the fields that are referenced from other caches */
1468 if (referenced_fields)
1469 {
1470 uint cnt= 0;
1471 for (copy= field_descr+flag_fields; copy < copy_end ; copy++)
1472 {
1473 if (copy->referenced_field_no)
1474 {
1475 store_fld_offset(cp+size_of_fld_ofs*(copy->referenced_field_no-1),
1476 copy->offset);
1477 cnt++;
1478 }
1479 }
1480 DBUG_ASSERT(cp + size_of_fld_ofs*cnt <= buff + buff_size);
1481 cp+= size_of_fld_ofs*cnt;
1482 }
1483
1484 if (rec_len_ptr)
1485 store_rec_length(rec_len_ptr, (ulong) (cp-rec_len_ptr-size_of_rec_len));
1486 last_rec_pos= curr_rec_pos;
1487 end_pos= pos= cp;
1488 *is_full= last_record;
1489
1490 last_written_is_null_compl= 0;
1491 if (!join_tab->first_unmatched && join_tab->on_precond)
1492 {
1493 join_tab->found= 0;
1494 join_tab->not_null_compl= 1;
1495 if (!join_tab->on_precond->val_int())
1496 {
1497 flags_pos[0]= MATCH_IMPOSSIBLE;
1498 last_written_is_null_compl= 1;
1499 }
1500 }
1501
1502 return (uint) (cp-init_pos);
1503 }
1504
1505
1506 /*
1507 Reset the join buffer for reading/writing: default implementation
1508
1509 SYNOPSIS
1510 reset()
1511 for_writing if it's TRUE the function reset the buffer for writing
1512
1513 DESCRIPTION
1514 This default implementation of the virtual function reset() resets
1515 the join buffer for reading or writing.
1516 If the buffer is reset for reading only the 'pos' value is reset
1517 to point to the very beginning of the join buffer. If the buffer is
1518 reset for writing additionally:
1519 - the counter of the records in the buffer is set to 0,
1520 - the the value of 'last_rec_pos' gets pointing at the position just
1521 before the buffer,
1522 - 'end_pos' is set to point to the beginning of the join buffer,
1523 - the size of the auxiliary buffer is reset to 0,
1524 - the flag 'last_rec_blob_data_is_in_rec_buff' is set to 0.
1525
1526 RETURN VALUE
1527 none
1528 */
reset(bool for_writing)1529 void JOIN_CACHE::reset(bool for_writing)
1530 {
1531 pos= buff;
1532 curr_rec_link= 0;
1533 if (for_writing)
1534 {
1535 records= 0;
1536 last_rec_pos= buff;
1537 aux_buff_size= 0;
1538 end_pos= pos;
1539 last_rec_blob_data_is_in_rec_buff= 0;
1540 }
1541 }
1542
1543
1544 /*
1545 Add a record into the join buffer: the default implementation
1546
1547 SYNOPSIS
1548 put_record()
1549
1550 DESCRIPTION
1551 This default implementation of the virtual function put_record writes
1552 the next matching record into the join buffer.
1553 It also links the record having been written into the join buffer with
1554 the matched record in the previous cache if there is any.
1555 The implementation assumes that the function get_curr_link()
1556 will return exactly the pointer to this matched record.
1557
1558 RETURN VALUE
1559 TRUE if it has been decided that it should be the last record
1560 in the join buffer,
1561 FALSE otherwise
1562 */
1563
put_record()1564 bool JOIN_CACHE::put_record()
1565 {
1566 bool is_full;
1567 uchar *link= 0;
1568 if (prev_cache)
1569 link= prev_cache->get_curr_rec_link();
1570 write_record_data(link, &is_full);
1571 return is_full;
1572 }
1573
1574
1575 /*
1576 Read the next record from the join buffer: the default implementation
1577
1578 SYNOPSIS
1579 get_record()
1580
1581 DESCRIPTION
1582 This default implementation of the virtual function get_record
1583 reads fields of the next record from the join buffer of this cache.
1584 The function also reads all other fields associated with this record
1585 from the the join buffers of the previous caches. The fields are read
1586 into the corresponding record buffers.
1587 It is supposed that 'pos' points to the position in the buffer
1588 right after the previous record when the function is called.
1589 When the function returns the 'pos' values is updated to point
1590 to the position after the read record.
1591 The value of 'curr_rec_pos' is also updated by the function to
1592 point to the beginning of the first field of the record in the
1593 join buffer.
1594
1595 RETURN VALUE
1596 TRUE there are no more records to read from the join buffer
1597 FALSE otherwise
1598 */
1599
get_record()1600 bool JOIN_CACHE::get_record()
1601 {
1602 bool res;
1603 uchar *prev_rec_ptr= 0;
1604 if (with_length)
1605 pos+= size_of_rec_len;
1606 if (prev_cache)
1607 {
1608 pos+= prev_cache->get_size_of_rec_offset();
1609 prev_rec_ptr= prev_cache->get_rec_ref(pos);
1610 }
1611 curr_rec_pos= pos;
1612 if (!(res= read_all_record_fields() == NO_MORE_RECORDS_IN_BUFFER))
1613 {
1614 pos+= referenced_fields*size_of_fld_ofs;
1615 if (prev_cache)
1616 prev_cache->get_record_by_pos(prev_rec_ptr);
1617 }
1618 return res;
1619 }
1620
1621
1622 /*
1623 Read a positioned record from the join buffer: the default implementation
1624
1625 SYNOPSIS
1626 get_record_by_pos()
1627 rec_ptr position of the first field of the record in the join buffer
1628
1629 DESCRIPTION
1630 This default implementation of the virtual function get_record_pos
1631 reads the fields of the record positioned at 'rec_ptr' from the join buffer.
1632 The function also reads all other fields associated with this record
1633 from the the join buffers of the previous caches. The fields are read
1634 into the corresponding record buffers.
1635
1636 RETURN VALUE
1637 none
1638 */
1639
get_record_by_pos(uchar * rec_ptr)1640 void JOIN_CACHE::get_record_by_pos(uchar *rec_ptr)
1641 {
1642 uchar *save_pos= pos;
1643 pos= rec_ptr;
1644 read_all_record_fields();
1645 pos= save_pos;
1646 if (prev_cache)
1647 {
1648 uchar *prev_rec_ptr= prev_cache->get_rec_ref(rec_ptr);
1649 prev_cache->get_record_by_pos(prev_rec_ptr);
1650 }
1651 }
1652
1653
1654 /*
1655 Get the match flag from the referenced record: the default implementation
1656
1657 SYNOPSIS
1658 get_match_flag_by_pos()
1659 rec_ptr position of the first field of the record in the join buffer
1660
1661 DESCRIPTION
1662 This default implementation of the virtual function get_match_flag_by_pos
1663 get the match flag for the record pointed by the reference at the position
1664 rec_ptr. If the match flag is placed in one of the previous buffers the
1665 function first reaches the linked record fields in this buffer.
1666 The function returns the value of the first encountered match flag.
1667
1668 RETURN VALUE
1669 match flag for the record at the position rec_ptr
1670 */
1671
get_match_flag_by_pos(uchar * rec_ptr)1672 enum JOIN_CACHE::Match_flag JOIN_CACHE::get_match_flag_by_pos(uchar *rec_ptr)
1673 {
1674 Match_flag match_fl= MATCH_NOT_FOUND;
1675 if (with_match_flag)
1676 {
1677 match_fl= (enum Match_flag) rec_ptr[0];
1678 return match_fl;
1679 }
1680 if (prev_cache)
1681 {
1682 uchar *prev_rec_ptr= prev_cache->get_rec_ref(rec_ptr);
1683 return prev_cache->get_match_flag_by_pos(prev_rec_ptr);
1684 }
1685 DBUG_ASSERT(0);
1686 return match_fl;
1687 }
1688
1689
1690 /*
1691 Get the match flag for the referenced record from specified join buffer
1692
1693 SYNOPSIS
1694 get_match_flag_by_pos_from_join_buffer()
1695 rec_ptr position of the first field of the record in the join buffer
1696 tab join table with join buffer where to look for the match flag
1697
1698 DESCRIPTION
1699 This default implementation of the get_match_flag_by_pos_from_join_buffer
1700 method gets the match flag for the record pointed by the reference at the
1701 position rec_ptr from the join buffer attached to the join table tab.
1702
1703 RETURN VALUE
1704 match flag for the record at the position rec_ptr from the join
1705 buffer attached to the table tab.
1706 */
1707
1708 enum JOIN_CACHE::Match_flag
get_match_flag_by_pos_from_join_buffer(uchar * rec_ptr,JOIN_TAB * tab)1709 JOIN_CACHE::get_match_flag_by_pos_from_join_buffer(uchar *rec_ptr,
1710 JOIN_TAB *tab)
1711 {
1712 DBUG_ASSERT(tab->cache && tab->cache->with_match_flag);
1713 for (JOIN_CACHE *cache= this; ; )
1714 {
1715 if (cache->join_tab == tab)
1716 return (enum Match_flag) rec_ptr[0];
1717 cache= cache->prev_cache;
1718 rec_ptr= cache->get_rec_ref(rec_ptr);
1719 }
1720 }
1721
1722
1723 /*
1724 Calculate the increment of the auxiliary buffer for a record write
1725
1726 SYNOPSIS
1727 aux_buffer_incr()
1728 recno the number of the record the increment to be calculated for
1729
1730 DESCRIPTION
1731 This function calls the aux_buffer_incr the method of the
1732 companion member join_tab_scan to calculate the growth of the
1733 auxiliary buffer when the recno-th record is added to the
1734 join_buffer of this cache.
1735
1736 RETURN VALUE
1737 the number of bytes in the increment
1738 */
1739
aux_buffer_incr(size_t recno)1740 uint JOIN_CACHE::aux_buffer_incr(size_t recno)
1741 {
1742 return join_tab_scan->aux_buffer_incr(recno);
1743 }
1744
1745 /*
1746 Read all flag and data fields of a record from the join buffer
1747
1748 SYNOPSIS
1749 read_all_record_fields()
1750
1751 DESCRIPTION
1752 The function reads all flag and data fields of a record from the join
1753 buffer into the corresponding record buffers.
1754 The fields are read starting from the position 'pos' which is
1755 supposed to point to the beginning of the first record field.
1756 The function increments the value of 'pos' by the length of the
1757 read data.
1758
1759 RETURN VALUE
1760 (-1) if there is no more records in the join buffer
1761 length of the data read from the join buffer - otherwise
1762 */
1763
read_all_record_fields()1764 uint JOIN_CACHE::read_all_record_fields()
1765 {
1766 uchar *init_pos= pos;
1767
1768 if (pos > last_rec_pos || !records)
1769 return NO_MORE_RECORDS_IN_BUFFER;
1770
1771 /* First match flag, read null bitmaps and null_row flag for each table */
1772 read_flag_fields();
1773
1774 /* Now read the remaining table fields if needed */
1775 CACHE_FIELD *copy= field_descr+flag_fields;
1776 CACHE_FIELD *copy_end= field_descr+fields;
1777 bool blob_in_rec_buff= blob_data_is_in_rec_buff(init_pos);
1778 for ( ; copy < copy_end; copy++)
1779 read_record_field(copy, blob_in_rec_buff);
1780
1781 return (uint) (pos-init_pos);
1782 }
1783
1784
1785 /*
1786 Read all flag fields of a record from the join buffer
1787
1788 SYNOPSIS
1789 read_flag_fields()
1790
1791 DESCRIPTION
1792 The function reads all flag fields of a record from the join
1793 buffer into the corresponding record buffers.
1794 The fields are read starting from the position 'pos'.
1795 The function increments the value of 'pos' by the length of the
1796 read data.
1797
1798 RETURN VALUE
1799 length of the data read from the join buffer
1800 */
1801
read_flag_fields()1802 uint JOIN_CACHE::read_flag_fields()
1803 {
1804 uchar *init_pos= pos;
1805 CACHE_FIELD *copy= field_descr;
1806 CACHE_FIELD *copy_end= copy+flag_fields;
1807 if (with_match_flag)
1808 {
1809 copy->str[0]= MY_TEST((Match_flag) pos[0] == MATCH_FOUND);
1810 pos+= copy->length;
1811 copy++;
1812 }
1813 for ( ; copy < copy_end; copy++)
1814 {
1815 memcpy(copy->str, pos, copy->length);
1816 pos+= copy->length;
1817 }
1818 return (uint)(pos-init_pos);
1819 }
1820
1821
1822 /*
1823 Read a data record field from the join buffer
1824
1825 SYNOPSIS
1826 read_record_field()
1827 copy the descriptor of the data field to be read
1828 blob_in_rec_buff indicates whether this is the field from the record
1829 whose blob data are in record buffers
1830
1831 DESCRIPTION
1832 The function reads the data field specified by the parameter copy
1833 from the join buffer into the corresponding record buffer.
1834 The field is read starting from the position 'pos'.
1835 The data of blob values is not copied from the join buffer.
1836 The function increments the value of 'pos' by the length of the
1837 read data.
1838
1839 RETURN VALUE
1840 length of the data read from the join buffer
1841 */
1842
read_record_field(CACHE_FIELD * copy,bool blob_in_rec_buff)1843 uint JOIN_CACHE::read_record_field(CACHE_FIELD *copy, bool blob_in_rec_buff)
1844 {
1845 uint len;
1846 /* Do not copy the field if its value is null */
1847 if (copy->field && copy->field->maybe_null() && copy->field->is_null())
1848 return 0;
1849 switch (copy->type) {
1850 case CACHE_BLOB:
1851 {
1852 Field_blob *blob_field= (Field_blob *) copy->field;
1853 /*
1854 Copy the length and the pointer to data but not the blob data
1855 itself to the record buffer
1856 */
1857 if (blob_in_rec_buff)
1858 {
1859 blob_field->set_image(pos, copy->length + sizeof(char*),
1860 blob_field->charset());
1861 len= copy->length + sizeof(char*);
1862 }
1863 else
1864 {
1865 blob_field->set_ptr(pos, pos+copy->length);
1866 len= copy->length + blob_field->get_length();
1867 }
1868 }
1869 break;
1870 case CACHE_VARSTR1:
1871 /* Copy the significant part of the short varstring field */
1872 len= (uint) pos[0] + 1;
1873 memcpy(copy->str, pos, len);
1874 break;
1875 case CACHE_VARSTR2:
1876 /* Copy the significant part of the long varstring field */
1877 len= uint2korr(pos) + 2;
1878 memcpy(copy->str, pos, len);
1879 break;
1880 case CACHE_STRIPPED:
1881 /* Pad the value by spaces that has been stripped off */
1882 len= uint2korr(pos);
1883 memcpy(copy->str, pos+2, len);
1884 memset(copy->str+len, ' ', copy->length-len);
1885 len+= 2;
1886 break;
1887 case CACHE_ROWID:
1888 if (!copy->str)
1889 {
1890 len= copy->length;
1891 break;
1892 }
1893 /* fall through */
1894 default:
1895 /* Copy the entire image of the field from the record buffer */
1896 len= copy->length;
1897 memcpy(copy->str, pos, len);
1898 }
1899 pos+= len;
1900 return len;
1901 }
1902
1903
1904 /*
1905 Read a referenced field from the join buffer
1906
1907 SYNOPSIS
1908 read_referenced_field()
1909 copy pointer to the descriptor of the referenced field
1910 rec_ptr pointer to the record that may contain this field
1911 len IN/OUT total length of the record fields
1912
1913 DESCRIPTION
1914 The function checks whether copy points to a data field descriptor
1915 for this cache object. If it does not then the function returns
1916 FALSE. Otherwise the function reads the field of the record in
1917 the join buffer pointed by 'rec_ptr' into the corresponding record
1918 buffer and returns TRUE.
1919 If the value of *len is 0 then the function sets it to the total
1920 length of the record fields including possible trailing offset
1921 values. Otherwise *len is supposed to provide this value that
1922 has been obtained earlier.
1923
1924 NOTE
1925 If the value of the referenced field is null then the offset
1926 for the value is set to 0. If the value of a field can be null
1927 then the value of flag_fields is always positive. So the offset
1928 for any non-null value cannot be 0 in this case.
1929
1930 RETURN VALUE
1931 TRUE 'copy' points to a data descriptor of this join cache
1932 FALSE otherwise
1933 */
1934
read_referenced_field(CACHE_FIELD * copy,uchar * rec_ptr,uint * len)1935 bool JOIN_CACHE::read_referenced_field(CACHE_FIELD *copy,
1936 uchar *rec_ptr,
1937 uint *len)
1938 {
1939 uchar *ptr;
1940 uint offset;
1941 if (copy < field_descr || copy >= field_descr+fields)
1942 return FALSE;
1943 if (!*len)
1944 {
1945 /* Get the total length of the record fields */
1946 uchar *len_ptr= rec_ptr;
1947 if (prev_cache)
1948 len_ptr-= prev_cache->get_size_of_rec_offset();
1949 *len= get_rec_length(len_ptr-size_of_rec_len);
1950 }
1951
1952 ptr= rec_ptr-(prev_cache ? prev_cache->get_size_of_rec_offset() : 0);
1953 offset= get_fld_offset(ptr+ *len -
1954 size_of_fld_ofs*
1955 (referenced_fields+1-copy->referenced_field_no));
1956 bool is_null= FALSE;
1957 Field *field= copy->field;
1958 if (offset == 0 && flag_fields)
1959 is_null= TRUE;
1960 if (is_null)
1961 {
1962 field->set_null();
1963 if (!field->real_maybe_null())
1964 field->table->null_row= 1;
1965 }
1966 else
1967 {
1968 uchar *save_pos= pos;
1969 field->set_notnull();
1970 if (!field->real_maybe_null())
1971 field->table->null_row= 0;
1972 pos= rec_ptr+offset;
1973 read_record_field(copy, blob_data_is_in_rec_buff(rec_ptr));
1974 pos= save_pos;
1975 }
1976 return TRUE;
1977 }
1978
1979
1980 /*
1981 Skip record from join buffer if's already matched: default implementation
1982
1983 SYNOPSIS
1984 skip_if_matched()
1985
1986 DESCRIPTION
1987 This default implementation of the virtual function skip_if_matched
1988 skips the next record from the join buffer if its match flag is set to
1989 MATCH_FOUND.
1990 If the record is skipped the value of 'pos' is set to point to the position
1991 right after the record.
1992
1993 NOTE
1994 Currently this function is called only when generating null complemented
1995 records for outer joins (=> only when join_tab->first_unmatched != NULL).
1996
1997 RETURN VALUE
1998 TRUE the match flag is set to MATCH_FOUND and the record has been skipped
1999 FALSE otherwise
2000 */
2001
skip_if_matched()2002 bool JOIN_CACHE::skip_if_matched()
2003 {
2004 DBUG_ASSERT(with_length);
2005 uint offset= size_of_rec_len;
2006 if (prev_cache)
2007 offset+= prev_cache->get_size_of_rec_offset();
2008 /* Check whether the match flag is MATCH_FOUND */
2009 if (get_match_flag_by_pos_from_join_buffer(pos+offset,
2010 join_tab->first_unmatched) ==
2011 MATCH_FOUND)
2012 {
2013 pos+= size_of_rec_len + get_rec_length(pos);
2014 return TRUE;
2015 }
2016 return FALSE;
2017 }
2018
2019
2020 /*
2021 Skip record from join buffer if the match isn't needed: default implementation
2022
2023 SYNOPSIS
2024 skip_if_not_needed_match()
2025
2026 DESCRIPTION
2027 This default implementation of the virtual function skip_if_not_needed_match
2028 skips the next record from the join when generating join extensions
2029 for the records in the join buffer depending on the value of the match flag.
2030 - In the case of a semi-nest the match flag may be in two states
2031 {MATCH_NOT_FOUND, MATCH_FOUND}. The record is skipped if the flag is set
2032 to MATCH_FOUND.
2033 - In the case of a outer join nest when not_exists optimization is applied
2034 the match may be in three states {MATCH_NOT_FOUND, MATCH_IMPOSSIBLE,
2035 MATCH_FOUND. The record is skipped if the flag is set to MATCH_FOUND or
2036 to MATCH_IMPOSSIBLE.
2037
2038 If the record is skipped the value of 'pos' is set to point to the position
2039 right after the record.
2040
2041 NOTE
2042 Currently the function is called only when generating non-null complemented
2043 extensions for records in the join buffer.
2044
2045 RETURN VALUE
2046 TRUE the record has to be skipped
2047 FALSE otherwise
2048 */
2049
skip_if_not_needed_match()2050 bool JOIN_CACHE::skip_if_not_needed_match()
2051 {
2052 DBUG_ASSERT(with_length);
2053 enum Match_flag match_fl;
2054 uint offset= size_of_rec_len;
2055 bool skip= FALSE;
2056 if (prev_cache)
2057 offset+= prev_cache->get_size_of_rec_offset();
2058
2059 if (!join_tab->check_only_first_match())
2060 return FALSE;
2061
2062 match_fl= get_match_flag_by_pos(pos+offset);
2063 skip= join_tab->first_sj_inner_tab ?
2064 match_fl == MATCH_FOUND : // the case of semi-join
2065 match_fl != MATCH_NOT_FOUND; // the case of outer-join
2066
2067 if (skip)
2068 {
2069 pos+= size_of_rec_len + get_rec_length(pos);
2070 return TRUE;
2071 }
2072 return FALSE;
2073 }
2074
2075
2076 /*
2077 Restore the fields of the last record from the join buffer
2078
2079 SYNOPSIS
2080 restore_last_record()
2081
2082 DESCRIPTION
2083 This function restore the values of the fields of the last record put
2084 into join buffer in record buffers. The values most probably have been
2085 overwritten by the field values from other records when they were read
2086 from the join buffer into the record buffer in order to check pushdown
2087 predicates.
2088
2089 RETURN
2090 none
2091 */
2092
restore_last_record()2093 void JOIN_CACHE::restore_last_record()
2094 {
2095 if (records)
2096 get_record_by_pos(last_rec_pos);
2097 }
2098
2099
2100 /*
2101 Join records from the join buffer with records from the next join table
2102
2103 SYNOPSIS
2104 join_records()
2105 skip_last do not find matches for the last record from the buffer
2106
2107 DESCRIPTION
2108 The functions extends all records from the join buffer by the matched
2109 records from join_tab. In the case of outer join operation it also
2110 adds null complementing extensions for the records from the join buffer
2111 that have no match.
2112 No extensions are generated for the last record from the buffer if
2113 skip_last is true.
2114
2115 NOTES
2116 The function must make sure that if linked join buffers are used then
2117 a join buffer cannot be refilled again until all extensions in the
2118 buffers chained to this one are generated.
2119 Currently an outer join operation with several inner tables always uses
2120 at least two linked buffers with the match join flags placed in the
2121 first buffer. Any record composed of rows of the inner tables that
2122 matches a record in this buffer must refer to the position of the
2123 corresponding match flag.
2124
2125 IMPLEMENTATION
2126 When generating extensions for outer tables of an outer join operation
2127 first we generate all extensions for those records from the join buffer
2128 that have matches, after which null complementing extension for all
2129 unmatched records from the join buffer are generated.
2130
2131 RETURN VALUE
2132 return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS.
2133 */
2134
join_records(bool skip_last)2135 enum_nested_loop_state JOIN_CACHE::join_records(bool skip_last)
2136 {
2137 JOIN_TAB *tab;
2138 enum_nested_loop_state rc= NESTED_LOOP_OK;
2139 bool outer_join_first_inner= join_tab->is_first_inner_for_outer_join();
2140 DBUG_ENTER("JOIN_CACHE::join_records");
2141
2142 if (outer_join_first_inner && !join_tab->first_unmatched)
2143 join_tab->not_null_compl= TRUE;
2144
2145 if (!join_tab->first_unmatched)
2146 {
2147 bool pfs_batch_update= join_tab->pfs_batch_update(join);
2148 if (pfs_batch_update)
2149 join_tab->table->file->start_psi_batch_mode();
2150 /* Find all records from join_tab that match records from join buffer */
2151 rc= join_matching_records(skip_last);
2152 if (pfs_batch_update)
2153 join_tab->table->file->end_psi_batch_mode();
2154 if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
2155 goto finish;
2156 if (outer_join_first_inner)
2157 {
2158 if (next_cache && join_tab != join_tab->last_inner)
2159 {
2160 /*
2161 Ensure that all matches for outer records from join buffer are to be
2162 found. Now we ensure that all full records are found for records from
2163 join buffer. Generally this is an overkill.
2164 TODO: Ensure that only matches of the inner table records have to be
2165 found for the records from join buffer.
2166 */
2167 rc= next_cache->join_records(skip_last);
2168 if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
2169 goto finish;
2170 }
2171 join_tab->not_null_compl= FALSE;
2172 /*
2173 Prepare for generation of null complementing extensions.
2174 For all inner tables of the outer join operation for which
2175 regular matches have been just found the field 'first_unmatched'
2176 is set to point the the first inner table. After all null
2177 complement rows are generated for this outer join this field
2178 is set back to NULL.
2179 */
2180 for (tab= join_tab->first_inner; tab <= join_tab->last_inner; tab++)
2181 tab->first_unmatched= join_tab->first_inner;
2182 }
2183 }
2184 if (join_tab->first_unmatched)
2185 {
2186 if (is_key_access())
2187 restore_last_record();
2188
2189 /*
2190 Generate all null complementing extensions for the records from
2191 join buffer that don't have any matching rows from the inner tables.
2192 */
2193 reset(FALSE);
2194 rc= join_null_complements(skip_last);
2195 if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
2196 goto finish;
2197 }
2198 if(next_cache)
2199 {
2200 /*
2201 When using linked caches we must ensure the records in the next caches
2202 that refer to the records in the join buffer are fully extended.
2203 Otherwise we could have references to the records that have been
2204 already erased from the join buffer and replaced for new records.
2205 */
2206 rc= next_cache->join_records(skip_last);
2207 if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
2208 goto finish;
2209 }
2210
2211 if (skip_last)
2212 {
2213 DBUG_ASSERT(!is_key_access());
2214 /*
2215 Restore the last record from the join buffer to generate
2216 all extensions for it.
2217 */
2218 get_record();
2219 }
2220
2221 finish:
2222 if (outer_join_first_inner &&
2223 join_tab->first_inner == join_tab->first_unmatched)
2224 {
2225 /*
2226 All null complemented rows have been already generated for all
2227 outer records from join buffer. Restore the state of the
2228 first_unmatched values to 0 to avoid another null complementing.
2229 */
2230 for (tab= join_tab->first_inner; tab <= join_tab->last_inner; tab++)
2231 tab->first_unmatched= 0;
2232 }
2233 restore_last_record();
2234 reset(TRUE);
2235 DBUG_PRINT("exit", ("rc: %d", rc));
2236 DBUG_RETURN(rc);
2237 }
2238
2239
2240 /*
2241 Find matches from the next table for records from the join buffer
2242
2243 SYNOPSIS
2244 join_matching_records()
2245 skip_last do not look for matches for the last partial join record
2246
2247 DESCRIPTION
2248 The function retrieves rows of the join_tab table and checks whether they
2249 match partial join records from the join buffer. If a match is found
2250 the function will call the sub_select function trying to look for matches
2251 for the remaining join operations.
2252 This function currently is called only from the function join_records.
2253 If the value of skip_last is true the function writes the partial join
2254 record from the record buffer into the join buffer to save its value for
2255 the future processing in the caller function.
2256
2257 NOTES
2258 If employed by BNL or BNLH join algorithms the function performs a full
2259 scan of join_tab for each refill of the join buffer. If BKA or BKAH
2260 algorithms are used then the function iterates only over those records
2261 from join_tab that can be accessed by keys built over records in the join
2262 buffer. To apply a proper method of iteration the function just calls
2263 virtual iterator methods (open, next, close) of the member join_tab_scan.
2264 The member can be either of the JOIN_TAB_SCAN or JOIN_TAB_SCAN_MMR type.
2265 The class JOIN_TAB_SCAN provides the iterator methods for BNL/BNLH join
2266 algorithms. The class JOIN_TAB_SCAN_MRR provides the iterator methods
2267 for BKA/BKAH join algorithms.
2268 When the function looks for records from the join buffer that would
2269 match a record from join_tab it iterates either over all records in
2270 the buffer or only over selected records. If BNL join operation is
2271 performed all records are checked for the match. If BNLH or BKAH
2272 algorithm is employed to join join_tab then the function looks only
2273 through the records with the same join key as the record from join_tab.
2274 With the BKA join algorithm only one record from the join buffer is checked
2275 for a match for any record from join_tab. To iterate over the candidates
2276 for a match the virtual function get_next_candidate_for_match is used,
2277 while the virtual function prepare_look_for_matches is called to prepare
2278 for such iteration process.
2279
2280 NOTES
2281 The function produces all matching extensions for the records in the
2282 join buffer following the path of the employed blocked algorithm.
2283 When an outer join operation is performed all unmatched records from
2284 the join buffer must be extended by null values. The function
2285 'join_null_complements' serves this purpose.
2286
2287 RETURN VALUE
2288 return one of enum_nested_loop_state
2289 */
2290
join_matching_records(bool skip_last)2291 enum_nested_loop_state JOIN_CACHE::join_matching_records(bool skip_last)
2292 {
2293 int error;
2294 enum_nested_loop_state rc= NESTED_LOOP_OK;
2295 join_tab->table->null_row= 0;
2296 bool check_only_first_match=
2297 join_tab->check_only_first_match() &&
2298 (!join_tab->first_inner || // semi-join case
2299 join_tab->first_inner == join_tab->first_unmatched); // outer join case
2300 bool outer_join_first_inner= join_tab->is_first_inner_for_outer_join();
2301 DBUG_ENTER("JOIN_CACHE::join_matching_records");
2302
2303 /* Return at once if there are no records in the join buffer */
2304 if (!records)
2305 DBUG_RETURN(NESTED_LOOP_OK);
2306
2307 /*
2308 When joining we read records from the join buffer back into record buffers.
2309 If matches for the last partial join record are found through a call to
2310 the sub_select function then this partial join record must be saved in the
2311 join buffer in order to be restored just before the sub_select call.
2312 */
2313 if (skip_last)
2314 put_record();
2315
2316 if (join_tab->use_quick == 2 && join_tab->select->quick)
2317 {
2318 /* A dynamic range access was used last. Clean up after it */
2319 delete join_tab->select->quick;
2320 join_tab->select->quick= 0;
2321 }
2322
2323 if ((rc= join_tab_execution_startup(join_tab)) < 0)
2324 goto finish2;
2325
2326 join_tab->build_range_rowid_filter_if_needed();
2327
2328 /* Prepare to retrieve all records of the joined table */
2329 if (unlikely((error= join_tab_scan->open())))
2330 {
2331 /*
2332 TODO: if we get here, we will assert in net_send_statement(). Add test
2333 coverage and fix.
2334 */
2335 goto finish;
2336 }
2337
2338 while (!(error= join_tab_scan->next()))
2339 {
2340 if (unlikely(join->thd->check_killed()))
2341 {
2342 /* The user has aborted the execution of the query */
2343 rc= NESTED_LOOP_KILLED;
2344 goto finish;
2345 }
2346
2347 if (join_tab->keep_current_rowid)
2348 join_tab->table->file->position(join_tab->table->record[0]);
2349
2350 /* Prepare to read matching candidates from the join buffer */
2351 if (prepare_look_for_matches(skip_last))
2352 continue;
2353 join_tab->jbuf_tracker->r_scans++;
2354
2355 uchar *rec_ptr;
2356 /* Read each possible candidate from the buffer and look for matches */
2357 while ((rec_ptr= get_next_candidate_for_match()))
2358 {
2359 join_tab->jbuf_tracker->r_rows++;
2360 /*
2361 If only the first match is needed, and, it has been already found for
2362 the next record read from the join buffer, then the record is skipped.
2363 Also those records that must be null complemented are not considered
2364 as candidates for matches.
2365 */
2366 if ((!check_only_first_match && !outer_join_first_inner) ||
2367 !skip_next_candidate_for_match(rec_ptr))
2368 {
2369 read_next_candidate_for_match(rec_ptr);
2370 rc= generate_full_extensions(rec_ptr);
2371 if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
2372 goto finish;
2373 }
2374 }
2375 }
2376
2377 finish:
2378 if (error)
2379 rc= error < 0 ? NESTED_LOOP_NO_MORE_ROWS: NESTED_LOOP_ERROR;
2380 finish2:
2381 join_tab_scan->close();
2382 DBUG_RETURN(rc);
2383 }
2384
2385
2386 /*
2387 Set match flag for a record in join buffer if it has not been set yet
2388
2389 SYNOPSIS
2390 set_match_flag_if_none()
2391 first_inner the join table to which this flag is attached to
2392 rec_ptr pointer to the record in the join buffer
2393
2394 DESCRIPTION
2395 If the records of the table are accumulated in a join buffer the function
2396 sets the match flag for the record in the buffer that is referred to by
2397 the record from this cache positioned at 'rec_ptr'.
2398 The function also sets the match flag 'found' of the table first inner
2399 if it has not been set before.
2400
2401 NOTES
2402 The function assumes that the match flag for any record in any cache
2403 is placed in the first byte occupied by the record fields.
2404
2405 RETURN VALUE
2406 TRUE the match flag is set by this call for the first time
2407 FALSE the match flag has been set before this call
2408 */
2409
set_match_flag_if_none(JOIN_TAB * first_inner,uchar * rec_ptr)2410 bool JOIN_CACHE::set_match_flag_if_none(JOIN_TAB *first_inner,
2411 uchar *rec_ptr)
2412 {
2413 if (!first_inner->cache)
2414 {
2415 /*
2416 Records of the first inner table to which the flag is attached to
2417 are not accumulated in a join buffer.
2418 */
2419 if (first_inner->found)
2420 return FALSE;
2421 else
2422 {
2423 first_inner->found= 1;
2424 return TRUE;
2425 }
2426 }
2427 JOIN_CACHE *cache= this;
2428 while (cache->join_tab != first_inner)
2429 {
2430 cache= cache->prev_cache;
2431 DBUG_ASSERT(cache);
2432 rec_ptr= cache->get_rec_ref(rec_ptr);
2433 }
2434 if ((Match_flag) rec_ptr[0] != MATCH_FOUND)
2435 {
2436 rec_ptr[0]= MATCH_FOUND;
2437 first_inner->found= 1;
2438 return TRUE;
2439 }
2440 return FALSE;
2441 }
2442
2443
2444 /*
2445 Generate all full extensions for a partial join record in the buffer
2446
2447 SYNOPSIS
2448 generate_full_extensions()
2449 rec_ptr pointer to the record from join buffer to generate extensions
2450
2451 DESCRIPTION
2452 The function first checks whether the current record of 'join_tab' matches
2453 the partial join record from join buffer located at 'rec_ptr'. If it is the
2454 case the function calls the join_tab->next_select method to generate
2455 all full extension for this partial join match.
2456
2457 RETURN VALUE
2458 return one of enum_nested_loop_state.
2459 */
2460
generate_full_extensions(uchar * rec_ptr)2461 enum_nested_loop_state JOIN_CACHE::generate_full_extensions(uchar *rec_ptr)
2462 {
2463 enum_nested_loop_state rc= NESTED_LOOP_OK;
2464 DBUG_ENTER("JOIN_CACHE::generate_full_extensions");
2465
2466 /*
2467 Check whether the extended partial join record meets
2468 the pushdown conditions.
2469 */
2470 if (check_match(rec_ptr))
2471 {
2472 int res= 0;
2473
2474 if (!join_tab->check_weed_out_table ||
2475 !(res= join_tab->check_weed_out_table->sj_weedout_check_row(join->thd)))
2476 {
2477 set_curr_rec_link(rec_ptr);
2478 rc= (join_tab->next_select)(join, join_tab+1, 0);
2479 if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
2480 {
2481 reset(TRUE);
2482 DBUG_RETURN(rc);
2483 }
2484 }
2485 if (res == -1)
2486 {
2487 rc= NESTED_LOOP_ERROR;
2488 DBUG_RETURN(rc);
2489 }
2490 }
2491 else if (unlikely(join->thd->is_error()))
2492 rc= NESTED_LOOP_ERROR;
2493 DBUG_RETURN(rc);
2494 }
2495
2496
2497 /*
2498 Check matching to a partial join record from the join buffer
2499
2500 SYNOPSIS
2501 check_match()
2502 rec_ptr pointer to the record from join buffer to check matching to
2503
2504 DESCRIPTION
2505 The function checks whether the current record of 'join_tab' matches
2506 the partial join record from join buffer located at 'rec_ptr'. If this is
2507 the case and 'join_tab' is the last inner table of a semi-join or an outer
2508 join the function turns on the match flag for the 'rec_ptr' record unless
2509 it has been already set.
2510
2511 NOTES
2512 Setting the match flag on can trigger re-evaluation of pushdown conditions
2513 for the record when join_tab is the last inner table of an outer join.
2514
2515 RETURN VALUE
2516 TRUE there is a match
2517 FALSE there is no match
2518 In this case the caller must also check thd->is_error() to see
2519 if there was a fatal error for the query.
2520 */
2521
check_match(uchar * rec_ptr)2522 inline bool JOIN_CACHE::check_match(uchar *rec_ptr)
2523 {
2524 /* Check whether pushdown conditions are satisfied */
2525 DBUG_ENTER("JOIN_CACHE:check_match");
2526
2527 if (join_tab->select && join_tab->select->skip_record(join->thd) <= 0)
2528 DBUG_RETURN(FALSE);
2529
2530 join_tab->jbuf_tracker->r_rows_after_where++;
2531
2532 if (!join_tab->is_last_inner_table())
2533 DBUG_RETURN(TRUE);
2534
2535 /*
2536 This is the last inner table of an outer join,
2537 and maybe of other embedding outer joins, or
2538 this is the last inner table of a semi-join.
2539 */
2540 JOIN_TAB *first_inner= join_tab->get_first_inner_table();
2541 do
2542 {
2543 set_match_flag_if_none(first_inner, rec_ptr);
2544 if (first_inner->check_only_first_match() &&
2545 !join_tab->first_inner)
2546 DBUG_RETURN(TRUE);
2547 /*
2548 This is the first match for the outer table row.
2549 The function set_match_flag_if_none has turned the flag
2550 first_inner->found on. The pushdown predicates for
2551 inner tables must be re-evaluated with this flag on.
2552 Note that, if first_inner is the first inner table
2553 of a semi-join, but is not an inner table of an outer join
2554 such that 'not exists' optimization can be applied to it,
2555 the re-evaluation of the pushdown predicates is not needed.
2556 */
2557 for (JOIN_TAB *tab= first_inner; tab <= join_tab; tab++)
2558 {
2559 if (tab->select && tab->select->skip_record(join->thd) <= 0)
2560 DBUG_RETURN(FALSE);
2561 }
2562 }
2563 while ((first_inner= first_inner->first_upper) &&
2564 first_inner->last_inner == join_tab);
2565 DBUG_RETURN(TRUE);
2566 }
2567
2568
2569 /*
2570 Add null complements for unmatched outer records from join buffer
2571
2572 SYNOPSIS
2573 join_null_complements()
2574 skip_last do not add null complements for the last record
2575
2576 DESCRIPTION
2577 This function is called only for inner tables of outer joins.
2578 The function retrieves all rows from the join buffer and adds null
2579 complements for those of them that do not have matches for outer
2580 table records.
2581 If the 'join_tab' is the last inner table of the embedding outer
2582 join and the null complemented record satisfies the outer join
2583 condition then the the corresponding match flag is turned on
2584 unless it has been set earlier. This setting may trigger
2585 re-evaluation of pushdown conditions for the record.
2586
2587 NOTES
2588 The same implementation of the virtual method join_null_complements
2589 is used for BNL/BNLH/BKA/BKA join algorthm.
2590
2591 RETURN VALUE
2592 return one of enum_nested_loop_state.
2593 */
2594
join_null_complements(bool skip_last)2595 enum_nested_loop_state JOIN_CACHE::join_null_complements(bool skip_last)
2596 {
2597 ulonglong cnt;
2598 enum_nested_loop_state rc= NESTED_LOOP_OK;
2599 bool is_first_inner= join_tab == join_tab->first_unmatched;
2600 DBUG_ENTER("JOIN_CACHE::join_null_complements");
2601
2602 /* Return at once if there are no records in the join buffer */
2603 if (!records)
2604 DBUG_RETURN(NESTED_LOOP_OK);
2605
2606 cnt= records - (is_key_access() ? 0 : MY_TEST(skip_last));
2607
2608 /* This function may be called only for inner tables of outer joins */
2609 DBUG_ASSERT(join_tab->first_inner);
2610
2611 for ( ; cnt; cnt--)
2612 {
2613 if (unlikely(join->thd->check_killed()))
2614 {
2615 /* The user has aborted the execution of the query */
2616 rc= NESTED_LOOP_KILLED;
2617 goto finish;
2618 }
2619 /* Just skip the whole record if a match for it has been already found */
2620 if (!is_first_inner || !skip_if_matched())
2621 {
2622 get_record();
2623 /* The outer row is complemented by nulls for each inner table */
2624 restore_record(join_tab->table, s->default_values);
2625 mark_as_null_row(join_tab->table);
2626 rc= generate_full_extensions(get_curr_rec());
2627 if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
2628 goto finish;
2629 }
2630 }
2631
2632 finish:
2633 DBUG_RETURN(rc);
2634 }
2635
2636
2637 /*
2638 Save data on the join algorithm employed by the join cache
2639
2640 SYNOPSIS
2641 save_explain_data()
2642 str string to add the comment on the employed join algorithm to
2643
2644 DESCRIPTION
2645 This function puts info about the type of the used join buffer (flat or
2646 incremental) and on the type of the the employed join algorithm (BNL,
2647 BNLH, BKA or BKAH) to the data structure
2648
2649 RETURN VALUE
2650 0 ok
2651 1 error
2652 */
2653
save_explain_data(EXPLAIN_BKA_TYPE * explain)2654 bool JOIN_CACHE::save_explain_data(EXPLAIN_BKA_TYPE *explain)
2655 {
2656 explain->incremental= MY_TEST(prev_cache);
2657
2658 explain->join_buffer_size= get_join_buffer_size();
2659
2660 switch (get_join_alg()) {
2661 case BNL_JOIN_ALG:
2662 explain->join_alg= "BNL";
2663 break;
2664 case BNLH_JOIN_ALG:
2665 explain->join_alg= "BNLH";
2666 break;
2667 case BKA_JOIN_ALG:
2668 explain->join_alg= "BKA";
2669 break;
2670 case BKAH_JOIN_ALG:
2671 explain->join_alg= "BKAH";
2672 break;
2673 default:
2674 DBUG_ASSERT(0);
2675 }
2676 return 0;
2677 }
2678
2679 /**
2680 get thread handle.
2681 */
2682
thd()2683 THD *JOIN_CACHE::thd()
2684 {
2685 return join->thd;
2686 }
2687
2688
add_mrr_explain_info(String * str,uint mrr_mode,handler * file)2689 static bool add_mrr_explain_info(String *str, uint mrr_mode, handler *file)
2690 {
2691 char mrr_str_buf[128]={0};
2692 int len;
2693 len= file->multi_range_read_explain_info(mrr_mode, mrr_str_buf,
2694 sizeof(mrr_str_buf));
2695 if (len > 0)
2696 {
2697 if (str->length())
2698 {
2699 if (str->append(STRING_WITH_LEN("; ")))
2700 return 1;
2701 }
2702 if (str->append(mrr_str_buf, len))
2703 return 1;
2704 }
2705 return 0;
2706 }
2707
2708
save_explain_data(EXPLAIN_BKA_TYPE * explain)2709 bool JOIN_CACHE_BKA::save_explain_data(EXPLAIN_BKA_TYPE *explain)
2710 {
2711 if (JOIN_CACHE::save_explain_data(explain))
2712 return 1;
2713 return add_mrr_explain_info(&explain->mrr_type, mrr_mode, join_tab->table->file);
2714 }
2715
2716
save_explain_data(EXPLAIN_BKA_TYPE * explain)2717 bool JOIN_CACHE_BKAH::save_explain_data(EXPLAIN_BKA_TYPE *explain)
2718 {
2719 if (JOIN_CACHE::save_explain_data(explain))
2720 return 1;
2721 return add_mrr_explain_info(&explain->mrr_type, mrr_mode, join_tab->table->file);
2722 }
2723
2724
2725 /*
2726 Initialize a hashed join cache
2727
2728 SYNOPSIS
2729 init()
2730 for_explain join buffer is initialized for explain only
2731
2732 DESCRIPTION
2733 The function initializes the cache structure with a hash table in it.
2734 The hash table will be used to store key values for the records from
2735 the join buffer.
2736 The function allocates memory for the join buffer and for descriptors of
2737 the record fields stored in the buffer.
2738 The function also initializes a hash table for record keys within the join
2739 buffer space.
2740
2741 NOTES VALUE
2742 The function is supposed to be called by the init methods of the classes
2743 derived from JOIN_CACHE_HASHED.
2744
2745 RETURN VALUE
2746 0 initialization with buffer allocations has been succeeded
2747 1 otherwise
2748 */
2749
init(bool for_explain)2750 int JOIN_CACHE_HASHED::init(bool for_explain)
2751 {
2752 int rc= 0;
2753 TABLE_REF *ref= &join_tab->ref;
2754
2755 DBUG_ENTER("JOIN_CACHE_HASHED::init");
2756
2757 hash_table= 0;
2758 key_entries= 0;
2759
2760 key_length= ref->key_length;
2761
2762 if ((rc= JOIN_CACHE::init(for_explain)) || for_explain)
2763 DBUG_RETURN (rc);
2764
2765 if (!(key_buff= (uchar*) join->thd->alloc(key_length)))
2766 DBUG_RETURN(1);
2767
2768 /* Take into account a reference to the next record in the key chain */
2769 pack_length+= get_size_of_rec_offset();
2770 pack_length_with_blob_ptrs+= get_size_of_rec_offset();
2771
2772 ref_key_info= join_tab->get_keyinfo_by_key_no(join_tab->ref.key);
2773 ref_used_key_parts= join_tab->ref.key_parts;
2774
2775 hash_func= &JOIN_CACHE_HASHED::get_hash_idx_simple;
2776 hash_cmp_func= &JOIN_CACHE_HASHED::equal_keys_simple;
2777
2778 KEY_PART_INFO *key_part= ref_key_info->key_part;
2779 KEY_PART_INFO *key_part_end= key_part+ref_used_key_parts;
2780 for ( ; key_part < key_part_end; key_part++)
2781 {
2782 if (!key_part->field->eq_cmp_as_binary())
2783 {
2784 hash_func= &JOIN_CACHE_HASHED::get_hash_idx_complex;
2785 hash_cmp_func= &JOIN_CACHE_HASHED::equal_keys_complex;
2786 break;
2787 }
2788 }
2789
2790 init_hash_table();
2791
2792 rec_fields_offset= get_size_of_rec_offset()+get_size_of_rec_length()+
2793 (prev_cache ? prev_cache->get_size_of_rec_offset() : 0);
2794
2795 data_fields_offset= 0;
2796 if (use_emb_key)
2797 {
2798 CACHE_FIELD *copy= field_descr;
2799 CACHE_FIELD *copy_end= copy+flag_fields;
2800 for ( ; copy < copy_end; copy++)
2801 data_fields_offset+= copy->length;
2802 }
2803
2804 DBUG_RETURN(0);
2805 }
2806
2807
2808 /*
2809 Initialize the hash table of a hashed join cache
2810
2811 SYNOPSIS
2812 init_hash_table()
2813
2814 DESCRIPTION
2815 The function estimates the number of hash table entries in the hash
2816 table to be used and initializes this hash table within the join buffer
2817 space.
2818
2819 RETURN VALUE
2820 Currently the function always returns 0;
2821 */
2822
init_hash_table()2823 int JOIN_CACHE_HASHED::init_hash_table()
2824 {
2825 hash_table= 0;
2826 key_entries= 0;
2827
2828 /* Calculate the minimal possible value of size_of_key_ofs greater than 1 */
2829 uint max_size_of_key_ofs= MY_MAX(2, get_size_of_rec_offset());
2830 for (size_of_key_ofs= 2;
2831 size_of_key_ofs <= max_size_of_key_ofs;
2832 size_of_key_ofs+= 2)
2833 {
2834 key_entry_length= get_size_of_rec_offset() + // key chain header
2835 size_of_key_ofs + // reference to the next key
2836 (use_emb_key ? get_size_of_rec_offset() : key_length);
2837
2838 size_t space_per_rec= avg_record_length +
2839 avg_aux_buffer_incr +
2840 key_entry_length+size_of_key_ofs;
2841 size_t n= buff_size / space_per_rec;
2842
2843 /*
2844 TODO: Make a better estimate for this upper bound of
2845 the number of records in in the join buffer.
2846 */
2847 size_t max_n= buff_size / (pack_length-length+
2848 key_entry_length+size_of_key_ofs);
2849
2850 hash_entries= (uint) (n / 0.7);
2851 set_if_bigger(hash_entries, 1);
2852
2853 if (offset_size((uint)(max_n*key_entry_length)) <=
2854 size_of_key_ofs)
2855 break;
2856 }
2857
2858 /* Initialize the hash table */
2859 hash_table= buff + (buff_size-hash_entries*size_of_key_ofs);
2860 cleanup_hash_table();
2861 curr_key_entry= hash_table;
2862
2863 return 0;
2864 }
2865
2866
2867 /*
2868 Reallocate the join buffer of a hashed join cache
2869
2870 SYNOPSIS
2871 realloc_buffer()
2872
2873 DESCRITION
2874 The function reallocates the join buffer of the hashed join cache.
2875 After this it initializes a hash table within the buffer space and
2876 resets the join cache for writing.
2877
2878 NOTES
2879 The function assumes that buff_size contains the new value for the join
2880 buffer size.
2881
2882 RETURN VALUE
2883 0 if the buffer has been successfully reallocated
2884 1 otherwise
2885 */
2886
realloc_buffer()2887 int JOIN_CACHE_HASHED::realloc_buffer()
2888 {
2889 free();
2890 buff= (uchar*) my_malloc(key_memory_JOIN_CACHE, buff_size,
2891 MYF(MY_THREAD_SPECIFIC));
2892 init_hash_table();
2893 reset(TRUE);
2894 return buff == NULL;
2895 }
2896
2897
2898 /*
2899 Get maximum size of the additional space per record used for record keys
2900
2901 SYNOPSYS
2902 get_max_key_addon_space_per_record()
2903
2904 DESCRIPTION
2905 The function returns the size of the space occupied by one key entry
2906 and one hash table entry.
2907
2908 RETURN VALUE
2909 maximum size of the additional space per record that is used to store
2910 record keys in the hash table
2911 */
2912
get_max_key_addon_space_per_record()2913 uint JOIN_CACHE_HASHED::get_max_key_addon_space_per_record()
2914 {
2915 ulong len;
2916 TABLE_REF *ref= &join_tab->ref;
2917 /*
2918 The total number of hash entries in the hash tables is bounded by
2919 ceiling(N/0.7) where N is the maximum number of records in the buffer.
2920 That's why the multiplier 2 is used in the formula below.
2921 */
2922 len= (use_emb_key ? get_size_of_rec_offset() : ref->key_length) +
2923 size_of_rec_ofs + // size of the key chain header
2924 size_of_rec_ofs + // >= size of the reference to the next key
2925 2*size_of_rec_ofs; // >= 2*( size of hash table entry)
2926 return len;
2927 }
2928
2929
2930 /*
2931 Reset the buffer of a hashed join cache for reading/writing
2932
2933 SYNOPSIS
2934 reset()
2935 for_writing if it's TRUE the function reset the buffer for writing
2936
2937 DESCRIPTION
2938 This implementation of the virtual function reset() resets the join buffer
2939 of the JOIN_CACHE_HASHED class for reading or writing.
2940 Additionally to what the default implementation does this function
2941 cleans up the hash table allocated within the buffer.
2942
2943 RETURN VALUE
2944 none
2945 */
2946
reset(bool for_writing)2947 void JOIN_CACHE_HASHED::reset(bool for_writing)
2948 {
2949 this->JOIN_CACHE::reset(for_writing);
2950 if (for_writing && hash_table)
2951 cleanup_hash_table();
2952 curr_key_entry= hash_table;
2953 }
2954
2955
2956 /*
2957 Add a record into the buffer of a hashed join cache
2958
2959 SYNOPSIS
2960 put_record()
2961
2962 DESCRIPTION
2963 This implementation of the virtual function put_record writes the next
2964 matching record into the join buffer of the JOIN_CACHE_HASHED class.
2965 Additionally to what the default implementation does this function
2966 performs the following.
2967 It extracts from the record the key value used in lookups for matching
2968 records and searches for this key in the hash tables from the join cache.
2969 If it finds the key in the hash table it joins the record to the chain
2970 of records with this key. If the key is not found in the hash table the
2971 key is placed into it and a chain containing only the newly added record
2972 is attached to the key entry. The key value is either placed in the hash
2973 element added for the key or, if the use_emb_key flag is set, remains in
2974 the record from the partial join.
2975 If the match flag field of a record contains MATCH_IMPOSSIBLE the key is
2976 not created for this record.
2977
2978 RETURN VALUE
2979 TRUE if it has been decided that it should be the last record
2980 in the join buffer,
2981 FALSE otherwise
2982 */
2983
put_record()2984 bool JOIN_CACHE_HASHED::put_record()
2985 {
2986 bool is_full;
2987 uchar *key;
2988 uint key_len= key_length;
2989 uchar *key_ref_ptr;
2990 uchar *link= 0;
2991 TABLE_REF *ref= &join_tab->ref;
2992 uchar *next_ref_ptr= pos;
2993
2994 pos+= get_size_of_rec_offset();
2995 /* Write the record into the join buffer */
2996 if (prev_cache)
2997 link= prev_cache->get_curr_rec_link();
2998 write_record_data(link, &is_full);
2999
3000 if (last_written_is_null_compl)
3001 return is_full;
3002
3003 if (use_emb_key)
3004 key= get_curr_emb_key();
3005 else
3006 {
3007 /* Build the key over the fields read into the record buffers */
3008 cp_buffer_from_ref(join->thd, join_tab->table, ref);
3009 key= ref->key_buff;
3010 }
3011
3012 /* Look for the key in the hash table */
3013 if (key_search(key, key_len, &key_ref_ptr))
3014 {
3015 uchar *last_next_ref_ptr;
3016 /*
3017 The key is found in the hash table.
3018 Add the record to the circular list of the records attached to this key.
3019 Below 'rec' is the record to be added into the record chain for the found
3020 key, 'key_ref' points to a flatten representation of the st_key_entry
3021 structure that contains the key and the head of the record chain.
3022 */
3023 last_next_ref_ptr= get_next_rec_ref(key_ref_ptr+get_size_of_key_offset());
3024 /* rec->next_rec= key_entry->last_rec->next_rec */
3025 memcpy(next_ref_ptr, last_next_ref_ptr, get_size_of_rec_offset());
3026 /* key_entry->last_rec->next_rec= rec */
3027 store_next_rec_ref(last_next_ref_ptr, next_ref_ptr);
3028 /* key_entry->last_rec= rec */
3029 store_next_rec_ref(key_ref_ptr+get_size_of_key_offset(), next_ref_ptr);
3030 }
3031 else
3032 {
3033 /*
3034 The key is not found in the hash table.
3035 Put the key into the join buffer linking it with the keys for the
3036 corresponding hash entry. Create a circular list with one element
3037 referencing the record and attach the list to the key in the buffer.
3038 */
3039 uchar *cp= last_key_entry;
3040 cp-= get_size_of_rec_offset()+get_size_of_key_offset();
3041 store_next_key_ref(key_ref_ptr, cp);
3042 store_null_key_ref(cp);
3043 store_next_rec_ref(next_ref_ptr, next_ref_ptr);
3044 store_next_rec_ref(cp+get_size_of_key_offset(), next_ref_ptr);
3045 if (use_emb_key)
3046 {
3047 cp-= get_size_of_rec_offset();
3048 store_emb_key_ref(cp, key);
3049 }
3050 else
3051 {
3052 cp-= key_len;
3053 memcpy(cp, key, key_len);
3054 }
3055 last_key_entry= cp;
3056 DBUG_ASSERT(last_key_entry >= end_pos);
3057 /* Increment the counter of key_entries in the hash table */
3058 key_entries++;
3059 }
3060 return is_full;
3061 }
3062
3063
3064 /*
3065 Read the next record from the buffer of a hashed join cache
3066
3067 SYNOPSIS
3068 get_record()
3069
3070 DESCRIPTION
3071 Additionally to what the default implementation of the virtual
3072 function get_record does this implementation skips the link element
3073 used to connect the records with the same key into a chain.
3074
3075 RETURN VALUE
3076 TRUE there are no more records to read from the join buffer
3077 FALSE otherwise
3078 */
3079
get_record()3080 bool JOIN_CACHE_HASHED::get_record()
3081 {
3082 pos+= get_size_of_rec_offset();
3083 return this->JOIN_CACHE::get_record();
3084 }
3085
3086
3087 /*
3088 Skip record from a hashed join buffer if its match flag is set to MATCH_FOUND
3089
3090 SYNOPSIS
3091 skip_if_matched()
3092
3093 DESCRIPTION
3094 This implementation of the virtual function skip_if_matched does
3095 the same as the default implementation does, but it takes into account
3096 the link element used to connect the records with the same key into a chain.
3097
3098 RETURN VALUE
3099 TRUE the match flag is MATCH_FOUND and the record has been skipped
3100 FALSE otherwise
3101 */
3102
skip_if_matched()3103 bool JOIN_CACHE_HASHED::skip_if_matched()
3104 {
3105 uchar *save_pos= pos;
3106 pos+= get_size_of_rec_offset();
3107 if (!this->JOIN_CACHE::skip_if_matched())
3108 {
3109 pos= save_pos;
3110 return FALSE;
3111 }
3112 return TRUE;
3113 }
3114
3115
3116 /*
3117 Skip record from a hashed join buffer if its match flag dictates to do so
3118
3119 SYNOPSIS
3120 skip_if_uneeded_match()
3121
3122 DESCRIPTION
3123 This implementation of the virtual function skip_if_not_needed_match does
3124 the same as the default implementation does, but it takes into account
3125 the link element used to connect the records with the same key into a chain.
3126
3127 RETURN VALUE
3128 TRUE the match flag dictates to skip the record
3129 FALSE the match flag is off
3130 */
3131
skip_if_not_needed_match()3132 bool JOIN_CACHE_HASHED::skip_if_not_needed_match()
3133 {
3134 uchar *save_pos= pos;
3135 pos+= get_size_of_rec_offset();
3136 if (!this->JOIN_CACHE::skip_if_not_needed_match())
3137 {
3138 pos= save_pos;
3139 return FALSE;
3140 }
3141 return TRUE;
3142 }
3143
3144
3145 /*
3146 Search for a key in the hash table of the join buffer
3147
3148 SYNOPSIS
3149 key_search()
3150 key pointer to the key value
3151 key_len key value length
3152 key_ref_ptr OUT position of the reference to the next key from
3153 the hash element for the found key , or
3154 a position where the reference to the the hash
3155 element for the key is to be added in the
3156 case when the key has not been found
3157
3158 DESCRIPTION
3159 The function looks for a key in the hash table of the join buffer.
3160 If the key is found the functionreturns the position of the reference
3161 to the next key from to the hash element for the given key.
3162 Otherwise the function returns the position where the reference to the
3163 newly created hash element for the given key is to be added.
3164
3165 RETURN VALUE
3166 TRUE the key is found in the hash table
3167 FALSE otherwise
3168 */
3169
key_search(uchar * key,uint key_len,uchar ** key_ref_ptr)3170 bool JOIN_CACHE_HASHED::key_search(uchar *key, uint key_len,
3171 uchar **key_ref_ptr)
3172 {
3173 bool is_found= FALSE;
3174 uint idx= (this->*hash_func)(key, key_length);
3175 uchar *ref_ptr= hash_table+size_of_key_ofs*idx;
3176 while (!is_null_key_ref(ref_ptr))
3177 {
3178 uchar *next_key;
3179 ref_ptr= get_next_key_ref(ref_ptr);
3180 next_key= use_emb_key ? get_emb_key(ref_ptr-get_size_of_rec_offset()) :
3181 ref_ptr-key_length;
3182
3183 if ((this->*hash_cmp_func)(next_key, key, key_len))
3184 {
3185 is_found= TRUE;
3186 break;
3187 }
3188 }
3189 *key_ref_ptr= ref_ptr;
3190 return is_found;
3191 }
3192
3193
3194 /*
3195 Hash function that considers a key in the hash table as byte array
3196
3197 SYNOPSIS
3198 get_hash_idx_simple()
3199 key pointer to the key value
3200 key_len key value length
3201
3202 DESCRIPTION
3203 The function calculates an index of the hash entry in the hash table
3204 of the join buffer for the given key. It considers the key just as
3205 a sequence of bytes of the length key_len.
3206
3207 RETURN VALUE
3208 the calculated index of the hash entry for the given key
3209 */
3210
3211 inline
get_hash_idx_simple(uchar * key,uint key_len)3212 uint JOIN_CACHE_HASHED::get_hash_idx_simple(uchar* key, uint key_len)
3213 {
3214 ulong nr= 1;
3215 ulong nr2= 4;
3216 uchar *pos= key;
3217 uchar *end= key+key_len;
3218 for (; pos < end ; pos++)
3219 {
3220 nr^= (ulong) ((((uint) nr & 63)+nr2)*((uint) *pos))+ (nr << 8);
3221 nr2+= 3;
3222 }
3223 return nr % hash_entries;
3224 }
3225
3226
3227 /*
3228 Hash function that takes into account collations of the components of the key
3229
3230 SYNOPSIS
3231 get_hash_idx_complex()
3232 key pointer to the key value
3233 key_len key value length
3234
3235 DESCRIPTION
3236 The function calculates an index of the hash entry in the hash table
3237 of the join buffer for the given key. It takes into account that the
3238 components of the key may be of a varchar type with different collations.
3239 The function guarantees that the same hash value for any two equal
3240 keys that may differ as byte sequences.
3241 The function takes the info about the components of the key, their
3242 types and used collations from the class member ref_key_info containing
3243 a pointer to the descriptor of the index that can be used for the join
3244 operation.
3245
3246 RETURN VALUE
3247 the calculated index of the hash entry for the given key
3248 */
3249
3250 inline
get_hash_idx_complex(uchar * key,uint key_len)3251 uint JOIN_CACHE_HASHED::get_hash_idx_complex(uchar *key, uint key_len)
3252 {
3253 return
3254 (uint) (key_hashnr(ref_key_info, ref_used_key_parts, key) % hash_entries);
3255 }
3256
3257
3258 /*
3259 Compare two key entries in the hash table as sequence of bytes
3260
3261 SYNOPSIS
3262 equal_keys_simple()
3263 key1 pointer to the first key entry
3264 key2 pointer to the second key entry
3265 key_len the length of the key values
3266
3267 DESCRIPTION
3268 The function compares two key entries in the hash table key1 and key2
3269 as two sequences bytes of the length key_len
3270
3271 RETURN VALUE
3272 TRUE key1 coincides with key2
3273 FALSE otherwise
3274 */
3275
3276 inline
equal_keys_simple(uchar * key1,uchar * key2,uint key_len)3277 bool JOIN_CACHE_HASHED::equal_keys_simple(uchar *key1, uchar *key2,
3278 uint key_len)
3279 {
3280 return memcmp(key1, key2, key_len) == 0;
3281 }
3282
3283
3284 /*
3285 Compare two key entries taking into account the used collation
3286
3287 SYNOPSIS
3288 equal_keys_complex()
3289 key1 pointer to the first key entry
3290 key2 pointer to the second key entry
3291 key_len the length of the key values
3292
3293 DESCRIPTION
3294 The function checks whether two key entries in the hash table
3295 key1 and key2 are equal as, possibly, compound keys of a certain
3296 structure whose components may be of a varchar type and may
3297 employ different collations.
3298 The descriptor of the key structure is taken from the class
3299 member ref_key_info.
3300
3301 RETURN VALUE
3302 TRUE key1 is equal tokey2
3303 FALSE otherwise
3304 */
3305
3306 inline
equal_keys_complex(uchar * key1,uchar * key2,uint key_len)3307 bool JOIN_CACHE_HASHED::equal_keys_complex(uchar *key1, uchar *key2,
3308 uint key_len)
3309 {
3310 return key_buf_cmp(ref_key_info, ref_used_key_parts, key1, key2) == 0;
3311 }
3312
3313
3314 /*
3315 Clean up the hash table of the join buffer
3316
3317 SYNOPSIS
3318 cleanup_hash_table()
3319 key pointer to the key value
3320 key_len key value length
3321
3322 DESCRIPTION
3323 The function cleans up the hash table in the join buffer removing all
3324 hash elements from the table.
3325
3326 RETURN VALUE
3327 none
3328 */
3329
cleanup_hash_table()3330 void JOIN_CACHE_HASHED:: cleanup_hash_table()
3331 {
3332 last_key_entry= hash_table;
3333 bzero(hash_table, (buff+buff_size)-hash_table);
3334 key_entries= 0;
3335 }
3336
3337
3338 /*
3339 Check whether all records in a key chain have their match flags set on
3340
3341 SYNOPSIS
3342 check_all_match_flags_for_key()
3343 key_chain_ptr
3344
3345 DESCRIPTION
3346 This function retrieves records in the given circular chain and checks
3347 whether their match flags are set on. The parameter key_chain_ptr shall
3348 point to the position in the join buffer storing the reference to the
3349 last element of this chain.
3350
3351 RETURN VALUE
3352 TRUE if each retrieved record has its match flag set to MATCH_FOUND
3353 FALSE otherwise
3354 */
3355
check_all_match_flags_for_key(uchar * key_chain_ptr)3356 bool JOIN_CACHE_HASHED::check_all_match_flags_for_key(uchar *key_chain_ptr)
3357 {
3358 uchar *last_rec_ref_ptr= get_next_rec_ref(key_chain_ptr);
3359 uchar *next_rec_ref_ptr= last_rec_ref_ptr;
3360 do
3361 {
3362 next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr);
3363 uchar *rec_ptr= next_rec_ref_ptr+rec_fields_offset;
3364 if (get_match_flag_by_pos(rec_ptr) != MATCH_FOUND)
3365 return FALSE;
3366 }
3367 while (next_rec_ref_ptr != last_rec_ref_ptr);
3368 return TRUE;
3369 }
3370
3371
3372 /*
3373 Get the next key built for the records from the buffer of a hashed join cache
3374
3375 SYNOPSIS
3376 get_next_key()
3377 key pointer to the buffer where the key value is to be placed
3378
3379 DESCRIPTION
3380 The function reads the next key value stored in the hash table of the
3381 join buffer. Depending on the value of the use_emb_key flag of the
3382 join cache the value is read either from the table itself or from
3383 the record field where it occurs.
3384
3385 RETURN VALUE
3386 length of the key value - if the starting value of 'cur_key_entry' refers
3387 to the position after that referred by the the value of 'last_key_entry',
3388 0 - otherwise.
3389 */
3390
get_next_key(uchar ** key)3391 uint JOIN_CACHE_HASHED::get_next_key(uchar ** key)
3392 {
3393 if (curr_key_entry == last_key_entry)
3394 return 0;
3395
3396 curr_key_entry-= key_entry_length;
3397
3398 *key = use_emb_key ? get_emb_key(curr_key_entry) : curr_key_entry;
3399
3400 DBUG_ASSERT(*key >= buff && *key < hash_table);
3401
3402 return key_length;
3403 }
3404
3405
3406 /*
3407 Initiate an iteration process over records in the joined table
3408
3409 SYNOPSIS
3410 open()
3411
3412 DESCRIPTION
3413 The function initiates the process of iteration over records from the
3414 joined table recurrently performed by the BNL/BKLH join algorithm.
3415
3416 RETURN VALUE
3417 0 the initiation is a success
3418 error code otherwise
3419 */
3420
open()3421 int JOIN_TAB_SCAN::open()
3422 {
3423 save_or_restore_used_tabs(join_tab, FALSE);
3424 is_first_record= TRUE;
3425 join_tab->tracker->r_scans++;
3426 return join_init_read_record(join_tab);
3427 }
3428
3429
3430 /*
3431 Read the next record that can match while scanning the joined table
3432
3433 SYNOPSIS
3434 next()
3435
3436 DESCRIPTION
3437 The function reads the next record from the joined table that can
3438 match some records in the buffer of the join cache 'cache'. To do
3439 this the function calls the function that scans table records and
3440 looks for the next one that meets the condition pushed to the
3441 joined table join_tab.
3442
3443 NOTES
3444 The function catches the signal that kills the query.
3445
3446 RETURN VALUE
3447 0 the next record exists and has been successfully read
3448 error code otherwise
3449 */
3450
next()3451 int JOIN_TAB_SCAN::next()
3452 {
3453 int err= 0;
3454 int skip_rc;
3455 READ_RECORD *info= &join_tab->read_record;
3456 SQL_SELECT *select= join_tab->cache_select;
3457 THD *thd= join->thd;
3458
3459 if (is_first_record)
3460 is_first_record= FALSE;
3461 else
3462 err= info->read_record();
3463
3464 if (!err)
3465 {
3466 join_tab->tracker->r_rows++;
3467 }
3468
3469 while (!err && select && (skip_rc= select->skip_record(thd)) <= 0)
3470 {
3471 if (unlikely(thd->check_killed()) || skip_rc < 0)
3472 return 1;
3473 /*
3474 Move to the next record if the last retrieved record does not
3475 meet the condition pushed to the table join_tab.
3476 */
3477 err= info->read_record();
3478 if (!err)
3479 {
3480 join_tab->tracker->r_rows++;
3481 }
3482 }
3483
3484 if (!err)
3485 join_tab->tracker->r_rows_after_where++;
3486 return err;
3487 }
3488
3489
3490 /*
3491 Walk back in join order from join_tab until we encounter a join tab with
3492 tab->cache!=NULL, and save/restore tab->table->status along the way.
3493
3494 @param save TRUE save
3495 FALSE restore
3496 */
3497
save_or_restore_used_tabs(JOIN_TAB * join_tab,bool save)3498 static void save_or_restore_used_tabs(JOIN_TAB *join_tab, bool save)
3499 {
3500 JOIN_TAB *first= join_tab->bush_root_tab?
3501 join_tab->bush_root_tab->bush_children->start :
3502 join_tab->join->join_tab + join_tab->join->const_tables;
3503
3504 for (JOIN_TAB *tab= join_tab-1; tab != first && !tab->cache; tab--)
3505 {
3506 if (tab->bush_children)
3507 {
3508 for (JOIN_TAB *child= tab->bush_children->start;
3509 child != tab->bush_children->end;
3510 child++)
3511 {
3512 if (save)
3513 child->table->status= child->status;
3514 else
3515 {
3516 tab->status= tab->table->status;
3517 tab->table->status= 0;
3518 }
3519 }
3520 }
3521
3522 if (save)
3523 tab->table->status= tab->status;
3524 else
3525 {
3526 tab->status= tab->table->status;
3527 tab->table->status= 0;
3528 }
3529 }
3530 }
3531
3532
3533 /*
3534 Perform finalizing actions for a scan over the table records
3535
3536 SYNOPSIS
3537 close()
3538
3539 DESCRIPTION
3540 The function performs the necessary restoring actions after
3541 the table scan over the joined table has been finished.
3542
3543 RETURN VALUE
3544 none
3545 */
3546
close()3547 void JOIN_TAB_SCAN::close()
3548 {
3549 save_or_restore_used_tabs(join_tab, TRUE);
3550 }
3551
3552
3553 /*
3554 Prepare to iterate over the BNL join cache buffer to look for matches
3555
3556 SYNOPSIS
3557 prepare_look_for_matches()
3558 skip_last <-> ignore the last record in the buffer
3559
3560 DESCRIPTION
3561 The function prepares the join cache for an iteration over the
3562 records in the join buffer. The iteration is performed when looking
3563 for matches for the record from the joined table join_tab that
3564 has been placed into the record buffer of the joined table.
3565 If the value of the parameter skip_last is TRUE then the last
3566 record from the join buffer is ignored.
3567 The function initializes the counter of the records that have been
3568 not iterated over yet.
3569
3570 RETURN VALUE
3571 TRUE there are no records in the buffer to iterate over
3572 FALSE otherwise
3573 */
3574
prepare_look_for_matches(bool skip_last)3575 bool JOIN_CACHE_BNL::prepare_look_for_matches(bool skip_last)
3576 {
3577 if (!records)
3578 return TRUE;
3579 reset(FALSE);
3580 rem_records= (uint)records - MY_TEST(skip_last);
3581 return rem_records == 0;
3582 }
3583
3584
3585 /*
3586 Get next record from the BNL join cache buffer when looking for matches
3587
3588 SYNOPSIS
3589 get_next_candidate_for_match
3590
3591 DESCRIPTION
3592 This method is used for iterations over the records from the join
3593 cache buffer when looking for matches for records from join_tab.
3594 The methods performs the necessary preparations to read the next record
3595 from the join buffer into the record buffer by the method
3596 read_next_candidate_for_match, or, to skip the next record from the join
3597 buffer by the method skip_recurrent_candidate_for_match.
3598 This implementation of the virtual method get_next_candidate_for_match
3599 just decrements the counter of the records that are to be iterated over
3600 and returns the current value of the cursor 'pos' as the position of
3601 the record to be processed.
3602
3603 RETURN VALUE
3604 pointer to the position right after the prefix of the current record
3605 in the join buffer if the there is another record to iterate over,
3606 0 - otherwise.
3607 */
3608
get_next_candidate_for_match()3609 uchar *JOIN_CACHE_BNL::get_next_candidate_for_match()
3610 {
3611 if (!rem_records)
3612 return 0;
3613 rem_records--;
3614 return pos+base_prefix_length;
3615 }
3616
3617
3618 /*
3619 Check whether the matching record from the BNL cache is to be skipped
3620
3621 SYNOPSIS
3622 skip_next_candidate_for_match
3623 rec_ptr pointer to the position in the join buffer right after the prefix
3624 of the current record
3625
3626 DESCRIPTION
3627 This implementation of the virtual function just calls the
3628 method skip_if_not_needed_match to check whether the record referenced by
3629 ref_ptr has its match flag set either to MATCH_FOUND and join_tab is the
3630 first inner table of a semi-join, or it's set to MATCH_IMPOSSIBLE and
3631 join_tab is the first inner table of an outer join.
3632 If so, the function just skips this record setting the value of the
3633 cursor 'pos' to the position right after it.
3634
3635 RETURN VALUE
3636 TRUE the record referenced by rec_ptr has been skipped
3637 FALSE otherwise
3638 */
3639
skip_next_candidate_for_match(uchar * rec_ptr)3640 bool JOIN_CACHE_BNL::skip_next_candidate_for_match(uchar *rec_ptr)
3641 {
3642 pos= rec_ptr-base_prefix_length;
3643 return skip_if_not_needed_match();
3644 }
3645
3646
3647 /*
3648 Read next record from the BNL join cache buffer when looking for matches
3649
3650 SYNOPSIS
3651 read_next_candidate_for_match
3652 rec_ptr pointer to the position in the join buffer right after the prefix
3653 the current record.
3654
3655 DESCRIPTION
3656 This implementation of the virtual method read_next_candidate_for_match
3657 calls the method get_record to read the record referenced by rec_ptr from
3658 the join buffer into the record buffer. If this record refers to the
3659 fields in the other join buffers the call of get_record ensures that
3660 these fields are read into the corresponding record buffers as well.
3661 This function is supposed to be called after a successful call of
3662 the method get_next_candidate_for_match.
3663
3664 RETURN VALUE
3665 none
3666 */
3667
read_next_candidate_for_match(uchar * rec_ptr)3668 void JOIN_CACHE_BNL::read_next_candidate_for_match(uchar *rec_ptr)
3669 {
3670 pos= rec_ptr-base_prefix_length;
3671 get_record();
3672 }
3673
3674
3675 /*
3676 Initialize the BNL join cache
3677
3678 SYNOPSIS
3679 init
3680 for_explain join buffer is initialized for explain only
3681
3682 DESCRIPTION
3683 The function initializes the cache structure. It is supposed to be called
3684 right after a constructor for the JOIN_CACHE_BNL.
3685
3686 NOTES
3687 The function first constructs a companion object of the type JOIN_TAB_SCAN,
3688 then it calls the init method of the parent class.
3689
3690 RETURN VALUE
3691 0 initialization with buffer allocations has been succeeded
3692 1 otherwise
3693 */
3694
init(bool for_explain)3695 int JOIN_CACHE_BNL::init(bool for_explain)
3696 {
3697 DBUG_ENTER("JOIN_CACHE_BNL::init");
3698
3699 if (!(join_tab_scan= new JOIN_TAB_SCAN(join, join_tab)))
3700 DBUG_RETURN(1);
3701
3702 DBUG_RETURN(JOIN_CACHE::init(for_explain));
3703 }
3704
3705
3706 /*
3707 Get the chain of records from buffer matching the current candidate for join
3708
3709 SYNOPSIS
3710 get_matching_chain_by_join_key()
3711
3712 DESCRIPTION
3713 This function first build a join key for the record of join_tab that
3714 currently is in the join buffer for this table. Then it looks for
3715 the key entry with this key in the hash table of the join cache.
3716 If such a key entry is found the function returns the pointer to
3717 the head of the chain of records in the join_buffer that match this
3718 key.
3719
3720 RETURN VALUE
3721 The pointer to the corresponding circular list of records if
3722 the key entry with the join key is found, 0 - otherwise.
3723 */
3724
get_matching_chain_by_join_key()3725 uchar *JOIN_CACHE_BNLH::get_matching_chain_by_join_key()
3726 {
3727 uchar *key_ref_ptr;
3728 TABLE *table= join_tab->table;
3729 TABLE_REF *ref= &join_tab->ref;
3730 KEY *keyinfo= join_tab->get_keyinfo_by_key_no(ref->key);
3731 /* Build the join key value out of the record in the record buffer */
3732 key_copy(key_buff, table->record[0], keyinfo, key_length, TRUE);
3733 /* Look for this key in the join buffer */
3734 if (!key_search(key_buff, key_length, &key_ref_ptr))
3735 return 0;
3736 return key_ref_ptr+get_size_of_key_offset();
3737 }
3738
3739
3740 /*
3741 Prepare to iterate over the BNLH join cache buffer to look for matches
3742
3743 SYNOPSIS
3744 prepare_look_for_matches()
3745 skip_last <-> ignore the last record in the buffer
3746
3747 DESCRIPTION
3748 The function prepares the join cache for an iteration over the
3749 records in the join buffer. The iteration is performed when looking
3750 for matches for the record from the joined table join_tab that
3751 has been placed into the record buffer of the joined table.
3752 If the value of the parameter skip_last is TRUE then the last
3753 record from the join buffer is ignored.
3754 The function builds the hashed key from the join fields of join_tab
3755 and uses this key to look in the hash table of the join cache for
3756 the chain of matching records in in the join buffer. If it finds
3757 such a chain it sets the member last_rec_ref_ptr to point to the
3758 last link of the chain while setting the member next_rec_ref_po 0.
3759
3760 RETURN VALUE
3761 TRUE there are no matching records in the buffer to iterate over
3762 FALSE otherwise
3763 */
3764
prepare_look_for_matches(bool skip_last)3765 bool JOIN_CACHE_BNLH::prepare_look_for_matches(bool skip_last)
3766 {
3767 uchar *curr_matching_chain;
3768 last_matching_rec_ref_ptr= next_matching_rec_ref_ptr= 0;
3769 if (!(curr_matching_chain= get_matching_chain_by_join_key()))
3770 return 1;
3771 last_matching_rec_ref_ptr= get_next_rec_ref(curr_matching_chain);
3772 return 0;
3773 }
3774
3775
3776 /*
3777 Get next record from the BNLH join cache buffer when looking for matches
3778
3779 SYNOPSIS
3780 get_next_candidate_for_match
3781
3782 DESCRIPTION
3783 This method is used for iterations over the records from the join
3784 cache buffer when looking for matches for records from join_tab.
3785 The methods performs the necessary preparations to read the next record
3786 from the join buffer into the record buffer by the method
3787 read_next_candidate_for_match, or, to skip the next record from the join
3788 buffer by the method skip_next_candidate_for_match.
3789 This implementation of the virtual method moves to the next record
3790 in the chain of all records from the join buffer that are to be
3791 equi-joined with the current record from join_tab.
3792
3793 RETURN VALUE
3794 pointer to the beginning of the record fields in the join buffer
3795 if the there is another record to iterate over, 0 - otherwise.
3796 */
3797
get_next_candidate_for_match()3798 uchar *JOIN_CACHE_BNLH::get_next_candidate_for_match()
3799 {
3800 if (next_matching_rec_ref_ptr == last_matching_rec_ref_ptr)
3801 return 0;
3802 next_matching_rec_ref_ptr= get_next_rec_ref(next_matching_rec_ref_ptr ?
3803 next_matching_rec_ref_ptr :
3804 last_matching_rec_ref_ptr);
3805 return next_matching_rec_ref_ptr+rec_fields_offset;
3806 }
3807
3808
3809 /*
3810 Check whether the matching record from the BNLH cache is to be skipped
3811
3812 SYNOPSIS
3813 skip_next_candidate_for_match
3814 rec_ptr pointer to the position in the join buffer right after
3815 the previous record
3816
3817 DESCRIPTION
3818 This implementation of the virtual function just calls the
3819 method get_match_flag_by_pos to check whether the record referenced
3820 by ref_ptr has its match flag set to MATCH_FOUND.
3821
3822 RETURN VALUE
3823 TRUE the record referenced by rec_ptr has its match flag set to
3824 MATCH_FOUND
3825 FALSE otherwise
3826 */
3827
skip_next_candidate_for_match(uchar * rec_ptr)3828 bool JOIN_CACHE_BNLH::skip_next_candidate_for_match(uchar *rec_ptr)
3829 {
3830 return join_tab->check_only_first_match() &&
3831 (get_match_flag_by_pos(rec_ptr) == MATCH_FOUND);
3832 }
3833
3834
3835 /*
3836 Read next record from the BNLH join cache buffer when looking for matches
3837
3838 SYNOPSIS
3839 read_next_candidate_for_match
3840 rec_ptr pointer to the position in the join buffer right after
3841 the previous record
3842
3843 DESCRIPTION
3844 This implementation of the virtual method read_next_candidate_for_match
3845 calls the method get_record_by_pos to read the record referenced by rec_ptr
3846 from the join buffer into the record buffer. If this record refers to
3847 fields in the other join buffers the call of get_record_by_po ensures that
3848 these fields are read into the corresponding record buffers as well.
3849 This function is supposed to be called after a successful call of
3850 the method get_next_candidate_for_match.
3851
3852 RETURN VALUE
3853 none
3854 */
3855
read_next_candidate_for_match(uchar * rec_ptr)3856 void JOIN_CACHE_BNLH::read_next_candidate_for_match(uchar *rec_ptr)
3857 {
3858 get_record_by_pos(rec_ptr);
3859 }
3860
3861
3862 /*
3863 Initialize the BNLH join cache
3864
3865 SYNOPSIS
3866 init
3867 for_explain join buffer is initialized for explain only
3868
3869 DESCRIPTION
3870 The function initializes the cache structure. It is supposed to be called
3871 right after a constructor for the JOIN_CACHE_BNLH.
3872
3873 NOTES
3874 The function first constructs a companion object of the type JOIN_TAB_SCAN,
3875 then it calls the init method of the parent class.
3876
3877 RETURN VALUE
3878 0 initialization with buffer allocations has been succeeded
3879 1 otherwise
3880 */
3881
init(bool for_explain)3882 int JOIN_CACHE_BNLH::init(bool for_explain)
3883 {
3884 DBUG_ENTER("JOIN_CACHE_BNLH::init");
3885
3886 if (!(join_tab_scan= new JOIN_TAB_SCAN(join, join_tab)))
3887 DBUG_RETURN(1);
3888
3889 DBUG_RETURN(JOIN_CACHE_HASHED::init(for_explain));
3890 }
3891
3892
3893 /*
3894 Calculate the increment of the MRR buffer for a record write
3895
3896 SYNOPSIS
3897 aux_buffer_incr()
3898
3899 DESCRIPTION
3900 This implementation of the virtual function aux_buffer_incr determines
3901 for how much the size of the MRR buffer should be increased when another
3902 record is added to the cache.
3903
3904 RETURN VALUE
3905 the increment of the size of the MRR buffer for the next record
3906 */
3907
aux_buffer_incr(size_t recno)3908 uint JOIN_TAB_SCAN_MRR::aux_buffer_incr(size_t recno)
3909 {
3910 uint incr= 0;
3911 TABLE_REF *ref= &join_tab->ref;
3912 TABLE *tab= join_tab->table;
3913 ha_rows rec_per_key=
3914 (ha_rows) tab->key_info[ref->key].actual_rec_per_key(ref->key_parts-1);
3915 set_if_bigger(rec_per_key, 1);
3916 if (recno == 1)
3917 incr= ref->key_length + tab->file->ref_length;
3918 incr+= (uint)(tab->file->stats.mrr_length_per_rec * rec_per_key);
3919 return incr;
3920 }
3921
3922
3923 /*
3924 Initiate iteration over records returned by MRR for the current join buffer
3925
3926 SYNOPSIS
3927 open()
3928
3929 DESCRIPTION
3930 The function initiates the process of iteration over the records from
3931 join_tab returned by the MRR interface functions for records from
3932 the join buffer. Such an iteration is performed by the BKA/BKAH join
3933 algorithm for each new refill of the join buffer.
3934 The function calls the MRR handler function multi_range_read_init to
3935 initiate this process.
3936
3937 RETURN VALUE
3938 0 the initiation is a success
3939 error code otherwise
3940 */
3941
open()3942 int JOIN_TAB_SCAN_MRR::open()
3943 {
3944 handler *file= join_tab->table->file;
3945
3946 join_tab->table->null_row= 0;
3947
3948
3949 /* Dynamic range access is never used with BKA */
3950 DBUG_ASSERT(join_tab->use_quick != 2);
3951
3952 join_tab->tracker->r_scans++;
3953 save_or_restore_used_tabs(join_tab, FALSE);
3954
3955 init_mrr_buff();
3956
3957 /*
3958 Prepare to iterate over keys from the join buffer and to get
3959 matching candidates obtained with MMR handler functions.
3960 */
3961 if (!file->inited)
3962 file->ha_index_init(join_tab->ref.key, 1);
3963 ranges= cache->get_number_of_ranges_for_mrr();
3964 if (!join_tab->cache_idx_cond)
3965 range_seq_funcs.skip_index_tuple= 0;
3966 return file->multi_range_read_init(&range_seq_funcs, (void*) cache,
3967 ranges, mrr_mode, &mrr_buff);
3968 }
3969
3970
3971 /*
3972 Read the next record returned by MRR for the current join buffer
3973
3974 SYNOPSIS
3975 next()
3976
3977 DESCRIPTION
3978 The function reads the next record from the joined table join_tab
3979 returned by the MRR handler function multi_range_read_next for
3980 the current refill of the join buffer. The record is read into
3981 the record buffer used for join_tab records in join operations.
3982
3983 RETURN VALUE
3984 0 the next record exists and has been successfully read
3985 error code otherwise
3986 */
3987
next()3988 int JOIN_TAB_SCAN_MRR::next()
3989 {
3990 char **ptr= (char **) cache->get_curr_association_ptr();
3991
3992 DBUG_ASSERT(sizeof(range_id_t) == sizeof(*ptr));
3993 int rc= join_tab->table->file->multi_range_read_next((range_id_t*)ptr) ? -1 : 0;
3994 if (!rc)
3995 {
3996 join_tab->tracker->r_rows++;
3997 join_tab->tracker->r_rows_after_where++;
3998 /*
3999 If a record in in an incremental cache contains no fields then the
4000 association for the last record in cache will be equal to cache->end_pos
4001 */
4002 /*
4003 psergey: this makes no sense where HA_MRR_NO_ASSOC is used.
4004 DBUG_ASSERT(cache->buff <= (uchar *) (*ptr) &&
4005 (uchar *) (*ptr) <= cache->end_pos);
4006 */
4007 }
4008 return rc;
4009 }
4010
4011
4012 static
bka_range_seq_key_info(void * init_params,uint * length,key_part_map * map)4013 void bka_range_seq_key_info(void *init_params, uint *length,
4014 key_part_map *map)
4015 {
4016 TABLE_REF *ref= &(((JOIN_CACHE*)init_params)->join_tab->ref);
4017 *length= ref->key_length;
4018 *map= (key_part_map(1) << ref->key_parts) - 1;
4019 }
4020
4021
4022 /*
4023 Initialize retrieval of range sequence for BKA join algorithm
4024
4025 SYNOPSIS
4026 bka_range_seq_init()
4027 init_params pointer to the BKA join cache object
4028 n_ranges the number of ranges obtained
4029 flags combination of MRR flags
4030
4031 DESCRIPTION
4032 The function interprets init_param as a pointer to a JOIN_CACHE_BKA
4033 object. The function prepares for an iteration over the join keys
4034 built for all records from the cache join buffer.
4035
4036 NOTE
4037 This function are used only as a callback function.
4038
4039 RETURN VALUE
4040 init_param value that is to be used as a parameter of bka_range_seq_next()
4041 */
4042
4043 static
bka_range_seq_init(void * init_param,uint n_ranges,uint flags)4044 range_seq_t bka_range_seq_init(void *init_param, uint n_ranges, uint flags)
4045 {
4046 DBUG_ENTER("bka_range_seq_init");
4047 JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) init_param;
4048 cache->reset(0);
4049 DBUG_RETURN((range_seq_t) init_param);
4050 }
4051
4052
4053 /*
4054 Get the next range/key over records from the join buffer used by a BKA cache
4055
4056 SYNOPSIS
4057 bka_range_seq_next()
4058 seq the value returned by bka_range_seq_init
4059 range OUT reference to the next range
4060
4061 DESCRIPTION
4062 The function interprets seq as a pointer to a JOIN_CACHE_BKA
4063 object. The function returns a pointer to the range descriptor
4064 for the key built over the next record from the join buffer.
4065
4066 NOTE
4067 This function are used only as a callback function.
4068
4069 RETURN VALUE
4070 FALSE ok, the range structure filled with info about the next range/key
4071 TRUE no more ranges
4072 */
4073
4074 static
bka_range_seq_next(range_seq_t rseq,KEY_MULTI_RANGE * range)4075 bool bka_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
4076 {
4077 DBUG_ENTER("bka_range_seq_next");
4078 JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
4079 TABLE_REF *ref= &cache->join_tab->ref;
4080 key_range *start_key= &range->start_key;
4081 if ((start_key->length= cache->get_next_key((uchar **) &start_key->key)))
4082 {
4083 start_key->keypart_map= (1 << ref->key_parts) - 1;
4084 start_key->flag= HA_READ_KEY_EXACT;
4085 range->end_key= *start_key;
4086 range->end_key.flag= HA_READ_AFTER_KEY;
4087 range->ptr= (char *) cache->get_curr_rec();
4088 range->range_flag= EQ_RANGE;
4089 DBUG_RETURN(0);
4090 }
4091 DBUG_RETURN(1);
4092 }
4093
4094
4095 /*
4096 Check whether range_info orders to skip the next record from BKA buffer
4097
4098 SYNOPSIS
4099 bka_range_seq_skip_record()
4100 seq value returned by bka_range_seq_init()
4101 range_info information about the next range
4102 rowid [NOT USED] rowid of the record to be checked
4103
4104
4105 DESCRIPTION
4106 The function interprets seq as a pointer to a JOIN_CACHE_BKA object.
4107 The function returns TRUE if the record with this range_info
4108 is to be filtered out from the stream of records returned by
4109 multi_range_read_next().
4110
4111 NOTE
4112 This function are used only as a callback function.
4113
4114 RETURN VALUE
4115 1 record with this range_info is to be filtered out from the stream
4116 of records returned by multi_range_read_next()
4117 0 the record is to be left in the stream
4118 */
4119
4120 static
bka_range_seq_skip_record(range_seq_t rseq,range_id_t range_info,uchar * rowid)4121 bool bka_range_seq_skip_record(range_seq_t rseq, range_id_t range_info, uchar *rowid)
4122 {
4123 DBUG_ENTER("bka_range_seq_skip_record");
4124 JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
4125 bool res= cache->get_match_flag_by_pos((uchar *) range_info) ==
4126 JOIN_CACHE::MATCH_FOUND;
4127 DBUG_RETURN(res);
4128 }
4129
4130
4131 /*
4132 Check if the record combination from BKA cache matches the index condition
4133
4134 SYNOPSIS
4135 bka_skip_index_tuple()
4136 rseq value returned by bka_range_seq_init()
4137 range_info record chain for the next range/key returned by MRR
4138
4139 DESCRIPTION
4140 This is wrapper for JOIN_CACHE_BKA::skip_index_tuple method,
4141 see comments there.
4142
4143 NOTE
4144 This function is used as a RANGE_SEQ_IF::skip_index_tuple callback.
4145
4146 RETURN VALUE
4147 0 The record combination satisfies the index condition
4148 1 Otherwise
4149 */
4150
4151 static
bka_skip_index_tuple(range_seq_t rseq,range_id_t range_info)4152 bool bka_skip_index_tuple(range_seq_t rseq, range_id_t range_info)
4153 {
4154 DBUG_ENTER("bka_skip_index_tuple");
4155 JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq;
4156 THD *thd= cache->thd();
4157 bool res;
4158 status_var_increment(thd->status_var.ha_icp_attempts);
4159 if (!(res= cache->skip_index_tuple(range_info)))
4160 status_var_increment(thd->status_var.ha_icp_match);
4161 DBUG_RETURN(res);
4162 }
4163
4164
4165 /*
4166 Prepare to read the record from BKA cache matching the current joined record
4167
4168 SYNOPSIS
4169 prepare_look_for_matches()
4170 skip_last <-> ignore the last record in the buffer (always unused here)
4171
4172 DESCRIPTION
4173 The function prepares to iterate over records in the join cache buffer
4174 matching the record loaded into the record buffer for join_tab when
4175 performing join operation by BKA join algorithm. With BKA algorithms the
4176 record loaded into the record buffer for join_tab always has a direct
4177 reference to the matching records from the join buffer. When the regular
4178 BKA join algorithm is employed the record from join_tab can refer to
4179 only one such record.
4180 The function sets the counter of the remaining records from the cache
4181 buffer that would match the current join_tab record to 1.
4182
4183 RETURN VALUE
4184 TRUE there are no records in the buffer to iterate over
4185 FALSE otherwise
4186 */
4187
prepare_look_for_matches(bool skip_last)4188 bool JOIN_CACHE_BKA::prepare_look_for_matches(bool skip_last)
4189 {
4190 if (!records)
4191 return TRUE;
4192 rem_records= 1;
4193 return FALSE;
4194 }
4195
4196
4197 /*
4198 Get the record from the BKA cache matching the current joined record
4199
4200 SYNOPSIS
4201 get_next_candidate_for_match
4202
4203 DESCRIPTION
4204 This method is used for iterations over the records from the join
4205 cache buffer when looking for matches for records from join_tab.
4206 The method performs the necessary preparations to read the next record
4207 from the join buffer into the record buffer by the method
4208 read_next_candidate_for_match, or, to skip the next record from the join
4209 buffer by the method skip_if_not_needed_match.
4210 This implementation of the virtual method get_next_candidate_for_match
4211 just decrements the counter of the records that are to be iterated over
4212 and returns the value of curr_association as a reference to the position
4213 of the beginning of the record fields in the buffer.
4214
4215 RETURN VALUE
4216 pointer to the start of the record fields in the join buffer
4217 if the there is another record to iterate over, 0 - otherwise.
4218 */
4219
get_next_candidate_for_match()4220 uchar *JOIN_CACHE_BKA::get_next_candidate_for_match()
4221 {
4222 if (!rem_records)
4223 return 0;
4224 rem_records--;
4225 return curr_association;
4226 }
4227
4228
4229 /*
4230 Check whether the matching record from the BKA cache is to be skipped
4231
4232 SYNOPSIS
4233 skip_next_candidate_for_match
4234 rec_ptr pointer to the position in the join buffer right after
4235 the previous record
4236
4237 DESCRIPTION
4238 This implementation of the virtual function just calls the
4239 method get_match_flag_by_pos to check whether the record referenced
4240 by ref_ptr has its match flag set to MATCH_FOUND.
4241
4242 RETURN VALUE
4243 TRUE the record referenced by rec_ptr has its match flag set to
4244 MATCH_FOUND
4245 FALSE otherwise
4246 */
4247
skip_next_candidate_for_match(uchar * rec_ptr)4248 bool JOIN_CACHE_BKA::skip_next_candidate_for_match(uchar *rec_ptr)
4249 {
4250 return join_tab->check_only_first_match() &&
4251 (get_match_flag_by_pos(rec_ptr) == MATCH_FOUND);
4252 }
4253
4254
4255 /*
4256 Read the next record from the BKA join cache buffer when looking for matches
4257
4258 SYNOPSIS
4259 read_next_candidate_for_match
4260 rec_ptr pointer to the position in the join buffer right after
4261 the previous record
4262
4263 DESCRIPTION
4264 This implementation of the virtual method read_next_candidate_for_match
4265 calls the method get_record_by_pos to read the record referenced by rec_ptr
4266 from the join buffer into the record buffer. If this record refers to
4267 fields in the other join buffers the call of get_record_by_po ensures that
4268 these fields are read into the corresponding record buffers as well.
4269 This function is supposed to be called after a successful call of
4270 the method get_next_candidate_for_match.
4271
4272 RETURN VALUE
4273 none
4274 */
4275
read_next_candidate_for_match(uchar * rec_ptr)4276 void JOIN_CACHE_BKA::read_next_candidate_for_match(uchar *rec_ptr)
4277 {
4278 get_record_by_pos(rec_ptr);
4279 }
4280
4281
4282 /*
4283 Initialize the BKA join cache
4284
4285 SYNOPSIS
4286 init
4287 for_explain join buffer is initialized for explain only
4288
4289
4290 DESCRIPTION
4291 The function initializes the cache structure. It is supposed to be called
4292 right after a constructor for the JOIN_CACHE_BKA.
4293
4294 NOTES
4295 The function first constructs a companion object of the type
4296 JOIN_TAB_SCAN_MRR, then it calls the init method of the parent class.
4297
4298 RETURN VALUE
4299 0 initialization with buffer allocations has been succeeded
4300 1 otherwise
4301 */
4302
init(bool for_explain)4303 int JOIN_CACHE_BKA::init(bool for_explain)
4304 {
4305 int res;
4306 bool check_only_first_match= join_tab->check_only_first_match();
4307
4308 RANGE_SEQ_IF rs_funcs= { bka_range_seq_key_info,
4309 bka_range_seq_init,
4310 bka_range_seq_next,
4311 check_only_first_match ? bka_range_seq_skip_record : 0,
4312 bka_skip_index_tuple };
4313
4314 DBUG_ENTER("JOIN_CACHE_BKA::init");
4315
4316 JOIN_TAB_SCAN_MRR *jsm;
4317 if (!(join_tab_scan= jsm= new JOIN_TAB_SCAN_MRR(join, join_tab,
4318 mrr_mode, rs_funcs)))
4319 DBUG_RETURN(1);
4320
4321 if ((res= JOIN_CACHE::init(for_explain)))
4322 DBUG_RETURN(res);
4323
4324 if (use_emb_key)
4325 jsm->mrr_mode |= HA_MRR_MATERIALIZED_KEYS;
4326
4327 DBUG_RETURN(0);
4328 }
4329
4330
4331 /*
4332 Get the key built over the next record from BKA join buffer
4333
4334 SYNOPSIS
4335 get_next_key()
4336 key pointer to the buffer where the key value is to be placed
4337
4338 DESCRIPTION
4339 The function reads key fields from the current record in the join buffer.
4340 and builds the key value out of these fields that will be used to access
4341 the 'join_tab' table. Some of key fields may belong to previous caches.
4342 They are accessed via record references to the record parts stored in the
4343 previous join buffers. The other key fields always are placed right after
4344 the flag fields of the record.
4345 If the key is embedded, which means that its value can be read directly
4346 from the join buffer, then *key is set to the beginning of the key in
4347 this buffer. Otherwise the key is built in the join_tab->ref->key_buff.
4348 The function returns the length of the key if it succeeds ro read it.
4349 If is assumed that the functions starts reading at the position of
4350 the record length which is provided for each records in a BKA cache.
4351 After the key is built the 'pos' value points to the first position after
4352 the current record.
4353 The function just skips the records with MATCH_IMPOSSIBLE in the
4354 match flag field if there is any.
4355 The function returns 0 if the initial position is after the beginning
4356 of the record fields for last record from the join buffer.
4357
4358 RETURN VALUE
4359 length of the key value - if the starting value of 'pos' points to
4360 the position before the fields for the last record,
4361 0 - otherwise.
4362 */
4363
get_next_key(uchar ** key)4364 uint JOIN_CACHE_BKA::get_next_key(uchar ** key)
4365 {
4366 uint len;
4367 uint32 rec_len;
4368 uchar *init_pos;
4369 JOIN_CACHE *cache;
4370
4371 start:
4372
4373 /* Any record in a BKA cache is prepended with its length */
4374 DBUG_ASSERT(with_length);
4375
4376 if ((pos+size_of_rec_len) > last_rec_pos || !records)
4377 return 0;
4378
4379 /* Read the length of the record */
4380 rec_len= get_rec_length(pos);
4381 pos+= size_of_rec_len;
4382 init_pos= pos;
4383
4384 /* Read a reference to the previous cache if any */
4385 if (prev_cache)
4386 pos+= prev_cache->get_size_of_rec_offset();
4387
4388 curr_rec_pos= pos;
4389
4390 /* Read all flag fields of the record */
4391 read_flag_fields();
4392
4393 if (with_match_flag &&
4394 (Match_flag) curr_rec_pos[0] == MATCH_IMPOSSIBLE )
4395 {
4396 pos= init_pos+rec_len;
4397 goto start;
4398 }
4399
4400 if (use_emb_key)
4401 {
4402 /* An embedded key is taken directly from the join buffer */
4403 *key= pos;
4404 len= emb_key_length;
4405 }
4406 else
4407 {
4408 /* Read key arguments from previous caches if there are any such fields */
4409 if (external_key_arg_fields)
4410 {
4411 uchar *rec_ptr= curr_rec_pos;
4412 uint key_arg_count= external_key_arg_fields;
4413 CACHE_FIELD **copy_ptr= blob_ptr-key_arg_count;
4414 for (cache= prev_cache; key_arg_count; cache= cache->prev_cache)
4415 {
4416 uint len= 0;
4417 DBUG_ASSERT(cache);
4418 rec_ptr= cache->get_rec_ref(rec_ptr);
4419 while (!cache->referenced_fields)
4420 {
4421 cache= cache->prev_cache;
4422 DBUG_ASSERT(cache);
4423 rec_ptr= cache->get_rec_ref(rec_ptr);
4424 }
4425 while (key_arg_count &&
4426 cache->read_referenced_field(*copy_ptr, rec_ptr, &len))
4427 {
4428 copy_ptr++;
4429 --key_arg_count;
4430 }
4431 }
4432 }
4433
4434 /*
4435 Read the other key arguments from the current record. The fields for
4436 these arguments are always first in the sequence of the record's fields.
4437 */
4438 CACHE_FIELD *copy= field_descr+flag_fields;
4439 CACHE_FIELD *copy_end= copy+local_key_arg_fields;
4440 bool blob_in_rec_buff= blob_data_is_in_rec_buff(curr_rec_pos);
4441 for ( ; copy < copy_end; copy++)
4442 read_record_field(copy, blob_in_rec_buff);
4443
4444 /* Build the key over the fields read into the record buffers */
4445 TABLE_REF *ref= &join_tab->ref;
4446 cp_buffer_from_ref(join->thd, join_tab->table, ref);
4447 *key= ref->key_buff;
4448 len= ref->key_length;
4449 }
4450
4451 pos= init_pos+rec_len;
4452
4453 return len;
4454 }
4455
4456
4457 /*
4458 Check the index condition of the joined table for a record from the BKA cache
4459
4460 SYNOPSIS
4461 skip_index_tuple()
4462 range_info pointer to the record returned by MRR
4463
4464 DESCRIPTION
4465 This function is invoked from MRR implementation to check if an index
4466 tuple matches the index condition. It is used in the case where the index
4467 condition actually depends on both columns of the used index and columns
4468 from previous tables.
4469
4470 NOTES
4471 Accessing columns of the previous tables requires special handling with
4472 BKA. The idea of BKA is to collect record combinations in a buffer and
4473 then do a batch of ref access lookups, i.e. by the time we're doing a
4474 lookup its previous-records-combination is not in prev_table->record[0]
4475 but somewhere in the join buffer.
4476 We need to get it from there back into prev_table(s)->record[0] before we
4477 can evaluate the index condition, and that's why we need this function
4478 instead of regular IndexConditionPushdown.
4479
4480 NOTES
4481 Possible optimization:
4482 Before we unpack the record from a previous table
4483 check if this table is used in the condition.
4484 If so then unpack the record otherwise skip the unpacking.
4485 This should be done by a special virtual method
4486 get_partial_record_by_pos().
4487
4488 RETURN VALUE
4489 1 the record combination does not satisfies the index condition
4490 0 otherwise
4491 */
4492
skip_index_tuple(range_id_t range_info)4493 bool JOIN_CACHE_BKA::skip_index_tuple(range_id_t range_info)
4494 {
4495 DBUG_ENTER("JOIN_CACHE_BKA::skip_index_tuple");
4496 get_record_by_pos((uchar*)range_info);
4497 DBUG_RETURN(!join_tab->cache_idx_cond->val_int());
4498 }
4499
4500
4501
4502 /*
4503 Initialize retrieval of range sequence for the BKAH join algorithm
4504
4505 SYNOPSIS
4506 bkah_range_seq_init()
4507 init_params pointer to the BKAH join cache object
4508 n_ranges the number of ranges obtained
4509 flags combination of MRR flags
4510
4511 DESCRIPTION
4512 The function interprets init_param as a pointer to a JOIN_CACHE_BKAH
4513 object. The function prepares for an iteration over distinct join keys
4514 built over the records from the cache join buffer.
4515
4516 NOTE
4517 This function are used only as a callback function.
4518
4519 RETURN VALUE
4520 init_param value that is to be used as a parameter of
4521 bkah_range_seq_next()
4522 */
4523
4524 static
bkah_range_seq_init(void * init_param,uint n_ranges,uint flags)4525 range_seq_t bkah_range_seq_init(void *init_param, uint n_ranges, uint flags)
4526 {
4527 DBUG_ENTER("bkah_range_seq_init");
4528 JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) init_param;
4529 cache->reset(0);
4530 DBUG_RETURN((range_seq_t) init_param);
4531 }
4532
4533
4534 /*
4535 Get the next range/key over records from the join buffer of a BKAH cache
4536
4537 SYNOPSIS
4538 bkah_range_seq_next()
4539 seq value returned by bkah_range_seq_init()
4540 range OUT reference to the next range
4541
4542 DESCRIPTION
4543 The function interprets seq as a pointer to a JOIN_CACHE_BKAH
4544 object. The function returns a pointer to the range descriptor
4545 for the next unique key built over records from the join buffer.
4546
4547 NOTE
4548 This function are used only as a callback function.
4549
4550 RETURN VALUE
4551 FALSE ok, the range structure filled with info about the next range/key
4552 TRUE no more ranges
4553 */
4554
4555 static
bkah_range_seq_next(range_seq_t rseq,KEY_MULTI_RANGE * range)4556 bool bkah_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
4557 {
4558 DBUG_ENTER("bkah_range_seq_next");
4559 JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) rseq;
4560 TABLE_REF *ref= &cache->join_tab->ref;
4561 key_range *start_key= &range->start_key;
4562 if ((start_key->length= cache->get_next_key((uchar **) &start_key->key)))
4563 {
4564 start_key->keypart_map= (1 << ref->key_parts) - 1;
4565 start_key->flag= HA_READ_KEY_EXACT;
4566 range->end_key= *start_key;
4567 range->end_key.flag= HA_READ_AFTER_KEY;
4568 range->ptr= (char *) cache->get_curr_key_chain();
4569 range->range_flag= EQ_RANGE;
4570 DBUG_RETURN(0);
4571 }
4572 DBUG_RETURN(1);
4573 }
4574
4575
4576 /*
4577 Check whether range_info orders to skip the next record from BKAH join buffer
4578
4579 SYNOPSIS
4580 bkah_range_seq_skip_record()
4581 seq value returned by bkah_range_seq_init()
4582 range_info information about the next range/key returned by MRR
4583 rowid [NOT USED] rowid of the record to be checked (not used)
4584
4585 DESCRIPTION
4586 The function interprets seq as a pointer to a JOIN_CACHE_BKAH
4587 object. The function returns TRUE if the record with this range_info
4588 is to be filtered out from the stream of records returned by
4589 multi_range_read_next().
4590
4591 NOTE
4592 This function are used only as a callback function.
4593
4594 RETURN VALUE
4595 1 record with this range_info is to be filtered out from the stream
4596 of records returned by multi_range_read_next()
4597 0 the record is to be left in the stream
4598 */
4599
4600 static
bkah_range_seq_skip_record(range_seq_t rseq,range_id_t range_info,uchar * rowid)4601 bool bkah_range_seq_skip_record(range_seq_t rseq, range_id_t range_info, uchar *rowid)
4602 {
4603 DBUG_ENTER("bkah_range_seq_skip_record");
4604 JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) rseq;
4605 bool res= cache->check_all_match_flags_for_key((uchar *) range_info);
4606 DBUG_RETURN(res);
4607 }
4608
4609
4610 /*
4611 Check if the record combination from BKAH cache matches the index condition
4612
4613 SYNOPSIS
4614 bkah_skip_index_tuple()
4615 rseq value returned by bka_range_seq_init()
4616 range_info record chain for the next range/key returned by MRR
4617
4618 DESCRIPTION
4619 This is wrapper for JOIN_CACHE_BKA_UNIQUE::skip_index_tuple method,
4620 see comments there.
4621
4622 NOTE
4623 This function is used as a RANGE_SEQ_IF::skip_index_tuple callback.
4624
4625 RETURN VALUE
4626 0 some records from the chain satisfy the index condition
4627 1 otherwise
4628 */
4629
4630 static
bkah_skip_index_tuple(range_seq_t rseq,range_id_t range_info)4631 bool bkah_skip_index_tuple(range_seq_t rseq, range_id_t range_info)
4632 {
4633 DBUG_ENTER("bka_unique_skip_index_tuple");
4634 JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) rseq;
4635 THD *thd= cache->thd();
4636 bool res;
4637 status_var_increment(thd->status_var.ha_icp_attempts);
4638 if (!(res= cache->skip_index_tuple(range_info)))
4639 status_var_increment(thd->status_var.ha_icp_match);
4640 DBUG_RETURN(res);
4641 }
4642
4643
4644 /*
4645 Prepare to read record from BKAH cache matching the current joined record
4646
4647 SYNOPSIS
4648 prepare_look_for_matches()
4649 skip_last <-> ignore the last record in the buffer (always unused here)
4650
4651 DESCRIPTION
4652 The function prepares to iterate over records in the join cache buffer
4653 matching the record loaded into the record buffer for join_tab when
4654 performing join operation by BKAH join algorithm. With BKAH algorithm, if
4655 association labels are used, then record loaded into the record buffer
4656 for join_tab always has a direct reference to the chain of the mathing
4657 records from the join buffer. If association labels are not used then
4658 then the chain of the matching records is obtained by the call of the
4659 get_key_chain_by_join_key function.
4660
4661 RETURN VALUE
4662 TRUE there are no records in the buffer to iterate over
4663 FALSE otherwise
4664 */
4665
prepare_look_for_matches(bool skip_last)4666 bool JOIN_CACHE_BKAH::prepare_look_for_matches(bool skip_last)
4667 {
4668 last_matching_rec_ref_ptr= next_matching_rec_ref_ptr= 0;
4669 if (no_association &&
4670 !(curr_matching_chain= get_matching_chain_by_join_key())) //psergey: added '!'
4671 return 1;
4672 last_matching_rec_ref_ptr= get_next_rec_ref(curr_matching_chain);
4673 return 0;
4674 }
4675
4676 /*
4677 Initialize the BKAH join cache
4678
4679 SYNOPSIS
4680 init
4681 for_explain join buffer is initialized for explain only
4682
4683 DESCRIPTION
4684 The function initializes the cache structure. It is supposed to be called
4685 right after a constructor for the JOIN_CACHE_BKAH.
4686
4687 NOTES
4688 The function first constructs a companion object of the type
4689 JOIN_TAB_SCAN_MRR, then it calls the init method of the parent class.
4690
4691 RETURN VALUE
4692 0 initialization with buffer allocations has been succeeded
4693 1 otherwise
4694 */
4695
init(bool for_explain)4696 int JOIN_CACHE_BKAH::init(bool for_explain)
4697 {
4698 bool check_only_first_match= join_tab->check_only_first_match();
4699
4700 no_association= MY_TEST(mrr_mode & HA_MRR_NO_ASSOCIATION);
4701
4702 RANGE_SEQ_IF rs_funcs= { bka_range_seq_key_info,
4703 bkah_range_seq_init,
4704 bkah_range_seq_next,
4705 check_only_first_match && !no_association ?
4706 bkah_range_seq_skip_record : 0,
4707 bkah_skip_index_tuple };
4708
4709 DBUG_ENTER("JOIN_CACHE_BKAH::init");
4710
4711 if (!(join_tab_scan= new JOIN_TAB_SCAN_MRR(join, join_tab,
4712 mrr_mode, rs_funcs)))
4713 DBUG_RETURN(1);
4714
4715 DBUG_RETURN(JOIN_CACHE_HASHED::init(for_explain));
4716 }
4717
4718
4719 /*
4720 Check the index condition of the joined table for a record from the BKA cache
4721
4722 SYNOPSIS
4723 skip_index_tuple()
4724 range_info record chain returned by MRR
4725
4726 DESCRIPTION
4727 See JOIN_CACHE_BKA::skip_index_tuple().
4728 This function is the variant for use with rhe class JOIN_CACHE_BKAH.
4729 The difference from JOIN_CACHE_BKA case is that there may be multiple
4730 previous table record combinations that share the same key(MRR range).
4731 As a consequence, we need to loop through the chain of all table record
4732 combinations that match the given MRR range key range_info until we find
4733 one that satisfies the index condition.
4734
4735 NOTE
4736 Possible optimization:
4737 Before we unpack the record from a previous table
4738 check if this table is used in the condition.
4739 If so then unpack the record otherwise skip the unpacking.
4740 This should be done by a special virtual method
4741 get_partial_record_by_pos().
4742
4743 RETURN VALUE
4744 1 any record combination from the chain referred by range_info
4745 does not satisfy the index condition
4746 0 otherwise
4747
4748
4749 */
4750
skip_index_tuple(range_id_t range_info)4751 bool JOIN_CACHE_BKAH::skip_index_tuple(range_id_t range_info)
4752 {
4753 uchar *last_rec_ref_ptr= get_next_rec_ref((uchar*) range_info);
4754 uchar *next_rec_ref_ptr= last_rec_ref_ptr;
4755 DBUG_ENTER("JOIN_CACHE_BKAH::skip_index_tuple");
4756 do
4757 {
4758 next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr);
4759 uchar *rec_ptr= next_rec_ref_ptr + rec_fields_offset;
4760 get_record_by_pos(rec_ptr);
4761 if (join_tab->cache_idx_cond->val_int())
4762 DBUG_RETURN(FALSE);
4763 } while(next_rec_ref_ptr != last_rec_ref_ptr);
4764 DBUG_RETURN(TRUE);
4765 }
4766