1 /* Copyright (C) 2000-2006 MySQL AB 2 3 This program is free software; you can redistribute it and/or modify 4 it under the terms of the GNU General Public License as published by 5 the Free Software Foundation; version 2 of the License. 6 7 This program is distributed in the hope that it will be useful, 8 but WITHOUT ANY WARRANTY; without even the implied warranty of 9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 GNU General Public License for more details. 11 12 You should have received a copy of the GNU General Public License 13 along with this program; if not, write to the Free Software 14 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ 15 16 /** 17 @file 18 19 @brief 20 join cache optimizations 21 22 @defgroup Query_Optimizer Query Optimizer 23 @{ 24 */ 25 26 #ifdef USE_PRAGMA_IMPLEMENTATION 27 #pragma implementation // gcc: Class implementation 28 #endif 29 30 #include "mariadb.h" 31 #include "key.h" 32 #include "sql_base.h" 33 #include "sql_select.h" 34 #include "opt_subselect.h" 35 36 #define NO_MORE_RECORDS_IN_BUFFER (uint)(-1) 37 38 static void save_or_restore_used_tabs(JOIN_TAB *join_tab, bool save); 39 40 /***************************************************************************** 41 * Join cache module 42 ******************************************************************************/ 43 44 /* 45 Fill in the descriptor of a flag field associated with a join cache 46 47 SYNOPSIS 48 add_field_flag_to_join_cache() 49 str position in a record buffer to copy the field from/to 50 length length of the field 51 field IN/OUT pointer to the field descriptor to fill in 52 53 DESCRIPTION 54 The function fill in the descriptor of a cache flag field to which 55 the parameter 'field' points to. The function uses the first two 56 parameters to set the position in the record buffer from/to which 57 the field value is to be copied and the length of the copied fragment. 58 Before returning the result the function increments the value of 59 *field by 1. 60 The function ignores the fields 'blob_length' and 'ofset' of the 61 descriptor. 62 63 RETURN VALUE 64 the length of the field 65 */ 66 67 static 68 uint add_flag_field_to_join_cache(uchar *str, uint length, CACHE_FIELD **field) 69 { 70 CACHE_FIELD *copy= *field; 71 copy->str= str; 72 copy->length= length; 73 copy->type= 0; 74 copy->field= 0; 75 copy->referenced_field_no= 0; 76 (*field)++; 77 return length; 78 } 79 80 81 /* 82 Fill in the descriptors of table data fields associated with a join cache 83 84 SYNOPSIS 85 add_table_data_fields_to_join_cache() 86 tab descriptors of fields from this table are to be filled 87 field_set descriptors for only these fields are to be created 88 field_cnt IN/OUT counter of data fields 89 descr IN/OUT pointer to the first descriptor to be filled 90 field_ptr_cnt IN/OUT counter of pointers to the data fields 91 descr_ptr IN/OUT pointer to the first pointer to blob descriptors 92 93 DESCRIPTION 94 The function fills in the descriptors of cache data fields from the table 95 'tab'. The descriptors are filled only for the fields marked in the 96 bitmap 'field_set'. 97 The function fills the descriptors starting from the position pointed 98 by 'descr'. If an added field is of a BLOB type then a pointer to the 99 its descriptor is added to the array descr_ptr. 100 At the return 'descr' points to the position after the last added 101 descriptor while 'descr_ptr' points to the position right after the 102 last added pointer. 103 104 RETURN VALUE 105 the total length of the added fields 106 */ 107 108 static 109 uint add_table_data_fields_to_join_cache(JOIN_TAB *tab, 110 MY_BITMAP *field_set, 111 uint *field_cnt, 112 CACHE_FIELD **descr, 113 uint *field_ptr_cnt, 114 CACHE_FIELD ***descr_ptr) 115 { 116 Field **fld_ptr; 117 uint len= 0; 118 CACHE_FIELD *copy= *descr; 119 CACHE_FIELD **copy_ptr= *descr_ptr; 120 uint used_fields= bitmap_bits_set(field_set); 121 for (fld_ptr= tab->table->field; used_fields; fld_ptr++) 122 { 123 if (bitmap_is_set(field_set, (*fld_ptr)->field_index)) 124 { 125 len+= (*fld_ptr)->fill_cache_field(copy); 126 if (copy->type == CACHE_BLOB) 127 { 128 *copy_ptr= copy; 129 copy_ptr++; 130 (*field_ptr_cnt)++; 131 } 132 copy->field= *fld_ptr; 133 copy->referenced_field_no= 0; 134 copy++; 135 (*field_cnt)++; 136 used_fields--; 137 } 138 } 139 *descr= copy; 140 *descr_ptr= copy_ptr; 141 return len; 142 } 143 144 /* 145 Determine different counters of fields associated with a record in the cache 146 147 SYNOPSIS 148 calc_record_fields() 149 150 DESCRIPTION 151 The function counts the number of total fields stored in a record 152 of the cache and saves this number in the 'fields' member. It also 153 determines the number of flag fields and the number of blobs. 154 The function sets 'with_match_flag' on if 'join_tab' needs a match flag 155 i.e. if it is the first inner table of an outer join or a semi-join. 156 157 RETURN VALUE 158 none 159 */ 160 161 void JOIN_CACHE::calc_record_fields() 162 { 163 JOIN_TAB *tab; 164 165 if (prev_cache) 166 tab= prev_cache->join_tab; 167 else 168 { 169 if (join_tab->bush_root_tab) 170 { 171 /* 172 --ot1--SJM1--------------ot2--... 173 | 174 | 175 +-it1--...--itN 176 ^____________ this->join_tab is somewhere here, 177 inside an sjm nest. 178 179 The join buffer should store the values of it1.*, it2.*, .. 180 It should not store values of ot1.*. 181 */ 182 tab= join_tab->bush_root_tab->bush_children->start; 183 } 184 else 185 { 186 /* 187 -ot1--ot2--SJM1--SJM2--------------ot3--...--otN 188 | | ^ 189 | +-it21--...--it2N | 190 | \-- we're somewhere here, 191 +-it11--...--it1N at the top level 192 193 The join buffer should store the values of 194 195 ot1.*, ot2.*, it1{i}, it2{j}.*, ot3.*, ... 196 197 that is, we should start from the first non-const top-level table. 198 199 We will need to store columns of SJ-inner tables (it_X_Y.*), but we're 200 not interested in storing the columns of materialization tables 201 themselves. Beause of that, if the first non-const top-level table is a 202 materialized table, we move to its bush_children: 203 */ 204 tab= join->join_tab + join->const_tables; 205 if (tab->bush_children) 206 tab= tab->bush_children->start; 207 } 208 } 209 DBUG_ASSERT(!tab->bush_children); 210 211 start_tab= tab; 212 fields= 0; 213 blobs= 0; 214 flag_fields= 0; 215 data_field_count= 0; 216 data_field_ptr_count= 0; 217 referenced_fields= 0; 218 219 /* 220 The following loop will get inside SJM nests, because data may be unpacked 221 to sjm-inner tables. 222 */ 223 for (; tab != join_tab ; tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) 224 { 225 tab->calc_used_field_length(FALSE); 226 flag_fields+= MY_TEST(tab->used_null_fields || tab->used_uneven_bit_fields); 227 flag_fields+= MY_TEST(tab->table->maybe_null); 228 fields+= tab->used_fields; 229 blobs+= tab->used_blobs; 230 } 231 if ((with_match_flag= join_tab->use_match_flag())) 232 flag_fields++; 233 fields+= flag_fields; 234 } 235 236 237 /* 238 Collect information on join key arguments 239 240 SYNOPSIS 241 collect_info_on_key_args() 242 243 DESCRIPTION 244 The function traverses the ref expressions that are used to access the 245 joined table join_tab. For each table 'tab' whose fields are to be stored 246 in the join buffer of the cache the function finds the fields from 'tab' 247 that occur in the ref expressions and marks these fields in the bitmap 248 tab->table->tmp_set. The function counts the number of them stored 249 in this cache and the total number of them stored in the previous caches 250 and saves the results of the counting in 'local_key_arg_fields' and 251 'external_key_arg_fields' respectively. 252 253 NOTES 254 The function does not do anything if no key is used to join the records 255 from join_tab. 256 257 RETURN VALUE 258 none 259 */ 260 261 void JOIN_CACHE::collect_info_on_key_args() 262 { 263 JOIN_TAB *tab; 264 JOIN_CACHE *cache; 265 local_key_arg_fields= 0; 266 external_key_arg_fields= 0; 267 268 if (!is_key_access()) 269 return; 270 271 TABLE_REF *ref= &join_tab->ref; 272 cache= this; 273 do 274 { 275 for (tab= cache->start_tab; tab != cache->join_tab; 276 tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) 277 { 278 uint key_args; 279 bitmap_clear_all(&tab->table->tmp_set); 280 for (uint i= 0; i < ref->key_parts; i++) 281 { 282 Item *ref_item= ref->items[i]; 283 if (!(tab->table->map & ref_item->used_tables())) 284 continue; 285 ref_item->walk(&Item::add_field_to_set_processor, 1, tab->table); 286 } 287 if ((key_args= bitmap_bits_set(&tab->table->tmp_set))) 288 { 289 if (cache == this) 290 local_key_arg_fields+= key_args; 291 else 292 external_key_arg_fields+= key_args; 293 } 294 } 295 cache= cache->prev_cache; 296 } 297 while (cache); 298 299 return; 300 } 301 302 303 /* 304 Allocate memory for descriptors and pointers to them associated with the cache 305 306 SYNOPSIS 307 alloc_fields() 308 309 DESCRIPTION 310 The function allocates memory for the array of fields descriptors 311 and the array of pointers to the field descriptors used to copy 312 join record data from record buffers into the join buffer and 313 backward. Some pointers refer to the field descriptor associated 314 with previous caches. They are placed at the beginning of the array 315 of pointers and its total number is stored in external_key_arg_fields. 316 The pointer of the first array is assigned to field_descr and the number 317 of the elements in it is precalculated by the function calc_record_fields. 318 The allocated arrays are adjacent. 319 320 NOTES 321 The memory is allocated in join->thd->mem_root 322 323 RETURN VALUE 324 pointer to the first array 325 */ 326 327 int JOIN_CACHE::alloc_fields() 328 { 329 uint ptr_cnt= external_key_arg_fields+blobs+1; 330 uint fields_size= sizeof(CACHE_FIELD)*fields; 331 field_descr= (CACHE_FIELD*) join->thd->alloc(fields_size + 332 sizeof(CACHE_FIELD*)*ptr_cnt); 333 blob_ptr= (CACHE_FIELD **) ((uchar *) field_descr + fields_size); 334 return (field_descr == NULL); 335 } 336 337 338 /* 339 Create descriptors of the record flag fields stored in the join buffer 340 341 SYNOPSIS 342 create_flag_fields() 343 344 DESCRIPTION 345 The function creates descriptors of the record flag fields stored 346 in the join buffer. These are descriptors for: 347 - an optional match flag field, 348 - table null bitmap fields, 349 - table null row fields. 350 The match flag field is created when 'join_tab' is the first inner 351 table of an outer join our a semi-join. A null bitmap field is 352 created for any table whose fields are to be stored in the join 353 buffer if at least one of these fields is nullable or is a BIT field 354 whose bits are partially stored with null bits. A null row flag 355 is created for any table assigned to the cache if it is an inner 356 table of an outer join. 357 The descriptor for flag fields are placed one after another at the 358 beginning of the array of field descriptors 'field_descr' that 359 contains 'fields' elements. If there is a match flag field the 360 descriptor for it is always first in the sequence of flag fields. 361 The descriptors for other flag fields can follow in an arbitrary 362 order. 363 The flag field values follow in a record stored in the join buffer 364 in the same order as field descriptors, with the match flag always 365 following first. 366 The function sets the value of 'flag_fields' to the total number 367 of the descriptors created for the flag fields. 368 The function sets the value of 'length' to the total length of the 369 flag fields. 370 371 RETURN VALUE 372 none 373 */ 374 375 void JOIN_CACHE::create_flag_fields() 376 { 377 CACHE_FIELD *copy; 378 JOIN_TAB *tab; 379 380 copy= field_descr; 381 382 length=0; 383 384 /* If there is a match flag the first field is always used for this flag */ 385 if (with_match_flag) 386 length+= add_flag_field_to_join_cache((uchar*) &join_tab->found, 387 sizeof(join_tab->found), 388 ©); 389 390 /* Create fields for all null bitmaps and null row flags that are needed */ 391 for (tab= start_tab; tab != join_tab; 392 tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) 393 { 394 TABLE *table= tab->table; 395 396 /* Create a field for the null bitmap from table if needed */ 397 if (tab->used_null_fields || tab->used_uneven_bit_fields) 398 length+= add_flag_field_to_join_cache(table->null_flags, 399 table->s->null_bytes, 400 ©); 401 402 /* Create table for the null row flag if needed */ 403 if (table->maybe_null) 404 length+= add_flag_field_to_join_cache((uchar*) &table->null_row, 405 sizeof(table->null_row), 406 ©); 407 } 408 409 /* Theoretically the new value of flag_fields can be less than the old one */ 410 flag_fields= (uint)(copy-field_descr); 411 } 412 413 414 /* 415 Create descriptors of the fields used to build access keys to the joined table 416 417 SYNOPSIS 418 create_key_arg_fields() 419 420 DESCRIPTION 421 The function creates descriptors of the record fields stored in the join 422 buffer that are used to build access keys to the joined table. These 423 fields are put into the buffer ahead of other records fields stored in 424 the buffer. Such placement helps to optimize construction of access keys. 425 For each field that is used to build access keys to the joined table but 426 is stored in some other join cache buffer the function saves a pointer 427 to the the field descriptor. The array of such pointers are placed in the 428 the join cache structure just before the array of pointers to the 429 blob fields blob_ptr. 430 Any field stored in a join cache buffer that is used to construct keys 431 to access tables associated with other join caches is called a referenced 432 field. It receives a unique number that is saved by the function in the 433 member 'referenced_field_no' of the CACHE_FIELD descriptor for the field. 434 This number is used as index to the array of offsets to the referenced 435 fields that are saved and put in the join cache buffer after all record 436 fields. 437 The function also finds out whether that the keys to access join_tab 438 can be considered as embedded and, if so, sets the flag 'use_emb_key' in 439 this join cache appropriately. 440 441 NOTES. 442 When a key to access the joined table 'join_tab' is constructed the array 443 of pointers to the field descriptors for the external fields is looked 444 through. For each of this pointers we find out in what previous key cache 445 the referenced field is stored. The value of 'referenced_field_no' 446 provides us with the index into the array of offsets for referenced 447 fields stored in the join cache. The offset read by the the index allows 448 us to read the field without reading all other fields of the record 449 stored the join cache buffer. This optimizes the construction of keys 450 to access 'join_tab' when some key arguments are stored in the previous 451 join caches. 452 453 NOTES 454 The function does not do anything if no key is used to join the records 455 from join_tab. 456 457 RETURN VALUE 458 none 459 */ 460 void JOIN_CACHE::create_key_arg_fields() 461 { 462 JOIN_TAB *tab; 463 JOIN_CACHE *cache; 464 465 if (!is_key_access()) 466 return; 467 468 /* 469 Save pointers to the cache fields in previous caches 470 that are used to build keys for this key access. 471 */ 472 cache= this; 473 uint ext_key_arg_cnt= external_key_arg_fields; 474 CACHE_FIELD *copy; 475 CACHE_FIELD **copy_ptr= blob_ptr; 476 while (ext_key_arg_cnt) 477 { 478 cache= cache->prev_cache; 479 for (tab= cache->start_tab; tab != cache->join_tab; 480 tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) 481 { 482 CACHE_FIELD *copy_end; 483 MY_BITMAP *key_read_set= &tab->table->tmp_set; 484 /* key_read_set contains the bitmap of tab's fields referenced by ref */ 485 if (bitmap_is_clear_all(key_read_set)) 486 continue; 487 copy_end= cache->field_descr+cache->fields; 488 for (copy= cache->field_descr+cache->flag_fields; copy < copy_end; copy++) 489 { 490 /* 491 (1) - when we store rowids for DuplicateWeedout, they have 492 copy->field==NULL 493 */ 494 if (copy->field && // (1) 495 copy->field->table == tab->table && 496 bitmap_is_set(key_read_set, copy->field->field_index)) 497 { 498 *copy_ptr++= copy; 499 ext_key_arg_cnt--; 500 if (!copy->referenced_field_no) 501 { 502 /* 503 Register the referenced field 'copy': 504 - set the offset number in copy->referenced_field_no, 505 - adjust the value of the flag 'with_length', 506 - adjust the values of 'pack_length' and 507 of 'pack_length_with_blob_ptrs'. 508 */ 509 copy->referenced_field_no= ++cache->referenced_fields; 510 if (!cache->with_length) 511 { 512 cache->with_length= TRUE; 513 uint sz= cache->get_size_of_rec_length(); 514 cache->base_prefix_length+= sz; 515 cache->pack_length+= sz; 516 cache->pack_length_with_blob_ptrs+= sz; 517 } 518 cache->pack_length+= cache->get_size_of_fld_offset(); 519 cache->pack_length_with_blob_ptrs+= cache->get_size_of_fld_offset(); 520 } 521 } 522 } 523 } 524 } 525 /* After this 'blob_ptr' shall not be be changed */ 526 blob_ptr= copy_ptr; 527 528 /* Now create local fields that are used to build ref for this key access */ 529 copy= field_descr+flag_fields; 530 for (tab= start_tab; tab != join_tab; 531 tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) 532 { 533 length+= add_table_data_fields_to_join_cache(tab, &tab->table->tmp_set, 534 &data_field_count, ©, 535 &data_field_ptr_count, 536 ©_ptr); 537 } 538 539 use_emb_key= check_emb_key_usage(); 540 541 return; 542 } 543 544 545 /* 546 Create descriptors of all remaining data fields stored in the join buffer 547 548 SYNOPSIS 549 create_remaining_fields() 550 551 DESCRIPTION 552 The function creates descriptors for all remaining data fields of a 553 record from the join buffer. If the value returned by is_key_access() is 554 false the function creates fields for all read record fields that 555 comprise the partial join record joined with join_tab. Otherwise, 556 for each table tab, the set of the read fields for which the descriptors 557 have to be added is determined as the difference between all read fields 558 and and those for which the descriptors have been already created. 559 The latter are supposed to be marked in the bitmap tab->table->tmp_set. 560 The function increases the value of 'length' to the the total length of 561 the added fields. 562 563 NOTES 564 If is_key_access() returns true the function modifies the value of 565 tab->table->tmp_set for a each table whose fields are stored in the cache. 566 The function calls the method Field::fill_cache_field to figure out 567 the type of the cache field and the maximal length of its representation 568 in the join buffer. If this is a blob field then additionally a pointer 569 to this field is added as an element of the array blob_ptr. For a blob 570 field only the size of the length of the blob data is taken into account. 571 It is assumed that 'data_field_count' contains the number of descriptors 572 for data fields that have been already created and 'data_field_ptr_count' 573 contains the number of the pointers to such descriptors having been 574 stored up to the moment. 575 576 RETURN VALUE 577 none 578 */ 579 580 void JOIN_CACHE::create_remaining_fields() 581 { 582 JOIN_TAB *tab; 583 bool all_read_fields= !is_key_access(); 584 CACHE_FIELD *copy= field_descr+flag_fields+data_field_count; 585 CACHE_FIELD **copy_ptr= blob_ptr+data_field_ptr_count; 586 587 for (tab= start_tab; tab != join_tab; 588 tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) 589 { 590 MY_BITMAP *rem_field_set; 591 TABLE *table= tab->table; 592 593 if (all_read_fields) 594 rem_field_set= table->read_set; 595 else 596 { 597 bitmap_invert(&table->tmp_set); 598 bitmap_intersect(&table->tmp_set, table->read_set); 599 rem_field_set= &table->tmp_set; 600 } 601 602 length+= add_table_data_fields_to_join_cache(tab, rem_field_set, 603 &data_field_count, ©, 604 &data_field_ptr_count, 605 ©_ptr); 606 607 /* SemiJoinDuplicateElimination: allocate space for rowid if needed */ 608 if (tab->keep_current_rowid) 609 { 610 copy->str= table->file->ref; 611 if (copy->str) 612 copy->length= table->file->ref_length; 613 else 614 { 615 /* This may happen only for materialized derived tables and views */ 616 copy->length= 0; 617 copy->str= (uchar *) table; 618 } 619 copy->type= CACHE_ROWID; 620 copy->field= 0; 621 copy->referenced_field_no= 0; 622 /* 623 Note: this may seem odd, but at this point we have 624 table->file->ref==NULL while table->file->ref_length is already set 625 to correct value. 626 */ 627 length += table->file->ref_length; 628 data_field_count++; 629 copy++; 630 } 631 } 632 } 633 634 635 636 /* 637 Calculate and set all cache constants 638 639 SYNOPSIS 640 set_constants() 641 642 DESCRIPTION 643 The function calculates and set all precomputed constants that are used 644 when writing records into the join buffer and reading them from it. 645 It calculates the size of offsets of a record within the join buffer 646 and of a field within a record. It also calculates the number of bytes 647 used to store record lengths. 648 The function also calculates the maximal length of the representation 649 of record in the cache excluding blob_data. This value is used when 650 making a dicision whether more records should be added into the join 651 buffer or not. 652 653 RETURN VALUE 654 none 655 */ 656 657 void JOIN_CACHE::set_constants() 658 { 659 /* 660 Any record from a BKA cache is prepended with the record length. 661 We use the record length when reading the buffer and building key values 662 for each record. The length allows us not to read the fields that are 663 not needed for keys. 664 If a record has match flag it also may be skipped when the match flag 665 is on. It happens if the cache is used for a semi-join operation or 666 for outer join when the 'not exist' optimization can be applied. 667 If some of the fields are referenced from other caches then 668 the record length allows us to easily reach the saved offsets for 669 these fields since the offsets are stored at the very end of the record. 670 However at this moment we don't know whether we have referenced fields for 671 the cache or not. Later when a referenced field is registered for the cache 672 we adjust the value of the flag 'with_length'. 673 */ 674 with_length= is_key_access() || 675 join_tab->is_inner_table_of_semi_join_with_first_match() || 676 join_tab->is_inner_table_of_outer_join(); 677 /* 678 At this moment we don't know yet the value of 'referenced_fields', 679 but in any case it can't be greater than the value of 'fields'. 680 */ 681 uint len= length + fields*sizeof(uint)+blobs*sizeof(uchar *) + 682 (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) + 683 sizeof(ulong); 684 /* 685 The values of size_of_rec_ofs, size_of_rec_len, size_of_fld_ofs, 686 base_prefix_length, pack_length, pack_length_with_blob_ptrs 687 will be recalculated later in this function when we get the estimate 688 for the actual value of the join buffer size. 689 */ 690 size_of_rec_ofs= size_of_rec_len= size_of_fld_ofs= 4; 691 base_prefix_length= (with_length ? size_of_rec_len : 0) + 692 (prev_cache ? prev_cache->get_size_of_rec_offset() : 0); 693 pack_length= (with_length ? size_of_rec_len : 0) + 694 (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) + 695 length + fields*sizeof(uint); 696 pack_length_with_blob_ptrs= pack_length + blobs*sizeof(uchar *); 697 min_buff_size= 0; 698 min_records= 1; 699 buff_size= (size_t)MY_MAX(join->thd->variables.join_buff_size, 700 get_min_join_buffer_size()); 701 size_of_rec_ofs= offset_size(buff_size); 702 size_of_rec_len= blobs ? size_of_rec_ofs : offset_size(len); 703 size_of_fld_ofs= size_of_rec_len; 704 base_prefix_length= (with_length ? size_of_rec_len : 0) + 705 (prev_cache ? prev_cache->get_size_of_rec_offset() : 0); 706 /* 707 The size of the offsets for referenced fields will be added later. 708 The values of 'pack_length' and 'pack_length_with_blob_ptrs' are adjusted 709 every time when the first reference to the referenced field is registered. 710 */ 711 pack_length= (with_length ? size_of_rec_len : 0) + 712 (prev_cache ? prev_cache->get_size_of_rec_offset() : 0) + 713 length; 714 pack_length_with_blob_ptrs= pack_length + blobs*sizeof(uchar *); 715 } 716 717 718 /* 719 Get maximum total length of all affixes of a record in the join cache buffer 720 721 SYNOPSIS 722 get_record_max_affix_length() 723 724 DESCRIPTION 725 The function calculates the maximum possible total length of all affixes 726 of a record in the join cache buffer, that is made of: 727 - the length of all prefixes used in this cache, 728 - the length of the match flag if it's needed 729 - the total length of the maximum possible offsets to the fields of 730 a record in the buffer. 731 732 RETURN VALUE 733 The maximum total length of all affixes of a record in the join buffer 734 */ 735 736 uint JOIN_CACHE::get_record_max_affix_length() 737 { 738 uint len= get_prefix_length() + 739 MY_TEST(with_match_flag) + 740 size_of_fld_ofs * data_field_count; 741 return len; 742 } 743 744 745 /* 746 Get the minimum possible size of the cache join buffer 747 748 SYNOPSIS 749 get_min_join_buffer_size() 750 751 DESCRIPTION 752 At the first its invocation for the cache the function calculates the 753 minimum possible size of the join buffer of the cache. This value depends 754 on the minimal number of records 'min_records' to be stored in the join 755 buffer. The number is supposed to be determined by the procedure that 756 chooses the best access path to the joined table join_tab in the execution 757 plan. After the calculation of the interesting size the function saves it 758 in the field 'min_buff_size' in order to use it directly at the next 759 invocations of the function. 760 761 NOTES 762 Currently the number of minimal records is just set to 1. 763 764 RETURN VALUE 765 The minimal possible size of the join buffer of this cache 766 */ 767 768 size_t JOIN_CACHE::get_min_join_buffer_size() 769 { 770 if (!min_buff_size) 771 { 772 size_t len= 0; 773 size_t len_last= 0; 774 for (JOIN_TAB *tab= start_tab; tab != join_tab; 775 tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) 776 { 777 len+= tab->get_max_used_fieldlength(); 778 len_last+= tab->get_used_fieldlength(); 779 } 780 size_t len_addon= get_record_max_affix_length() + 781 get_max_key_addon_space_per_record(); 782 len+= len_addon; 783 len_last+= len_addon; 784 size_t min_sz= len*(min_records-1) + len_last; 785 min_sz+= pack_length_with_blob_ptrs; 786 size_t add_sz= 0; 787 for (uint i=0; i < min_records; i++) 788 add_sz+= join_tab_scan->aux_buffer_incr(i+1); 789 avg_aux_buffer_incr= add_sz/min_records; 790 min_sz+= add_sz; 791 set_if_bigger(min_sz, 1); 792 min_buff_size= min_sz; 793 } 794 return min_buff_size; 795 } 796 797 798 /* 799 Get the maximum possible size of the cache join buffer 800 801 SYNOPSIS 802 get_max_join_buffer_size() 803 804 optimize_buff_size FALSE <-> do not take more memory than needed for 805 the estimated number of records in the partial join 806 807 DESCRIPTION 808 At the first its invocation for the cache the function calculates the 809 maximum possible size of join buffer for the cache. If the parameter 810 optimize_buff_size true then this value does not exceed the size of the 811 space needed for the estimated number of records 'max_records' in the 812 partial join that joins tables from the first one through join_tab. This 813 value is also capped off by the value of join_tab->join_buffer_size_limit, 814 if it has been set a to non-zero value, and by the value of the system 815 parameter join_buffer_size - otherwise. After the calculation of the 816 interesting size the function saves the value in the field 'max_buff_size' 817 in order to use it directly at the next invocations of the function. 818 819 NOTES 820 Currently the value of join_tab->join_buffer_size_limit is initialized 821 to 0 and is never reset. 822 823 RETURN VALUE 824 The maximum possible size of the join buffer of this cache 825 */ 826 827 size_t JOIN_CACHE::get_max_join_buffer_size(bool optimize_buff_size) 828 { 829 if (!max_buff_size) 830 { 831 size_t max_sz; 832 size_t min_sz= get_min_join_buffer_size(); 833 size_t len= 0; 834 for (JOIN_TAB *tab= start_tab; tab != join_tab; 835 tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) 836 { 837 len+= tab->get_used_fieldlength(); 838 } 839 len+= get_record_max_affix_length(); 840 avg_record_length= len; 841 len+= get_max_key_addon_space_per_record() + avg_aux_buffer_incr; 842 space_per_record= len; 843 844 size_t limit_sz= (size_t)join->thd->variables.join_buff_size; 845 if (join_tab->join_buffer_size_limit) 846 set_if_smaller(limit_sz, join_tab->join_buffer_size_limit); 847 if (!optimize_buff_size) 848 max_sz= limit_sz; 849 else 850 { 851 if (limit_sz / max_records > space_per_record) 852 max_sz= space_per_record * max_records; 853 else 854 max_sz= limit_sz; 855 max_sz+= pack_length_with_blob_ptrs; 856 set_if_smaller(max_sz, limit_sz); 857 } 858 set_if_bigger(max_sz, min_sz); 859 max_buff_size= max_sz; 860 } 861 return max_buff_size; 862 } 863 864 865 /* 866 Allocate memory for a join buffer 867 868 SYNOPSIS 869 alloc_buffer() 870 871 DESCRIPTION 872 The function allocates a lump of memory for the cache join buffer. 873 Initially the function sets the size of the buffer buff_size equal to 874 the value returned by get_max_join_buffer_size(). If the total size of 875 the space intended to be used for the join buffers employed by the 876 tables from the first one through join_tab exceeds the value of the 877 system parameter join_buff_space_limit, then the function first tries 878 to shrink the used buffers to make the occupied space fit the maximum 879 memory allowed to be used for all join buffers in total. After 880 this the function tries to allocate a join buffer for join_tab. 881 If it fails to do so, it decrements the requested size of the join 882 buffer, shrinks proportionally the join buffers used for the previous 883 tables and tries to allocate a buffer for join_tab. In the case of a 884 failure the function repeats its attempts with smaller and smaller 885 requested sizes of the buffer, but not more than 4 times. 886 887 RETURN VALUE 888 0 if the memory has been successfully allocated 889 1 otherwise 890 */ 891 892 int JOIN_CACHE::alloc_buffer() 893 { 894 JOIN_TAB *tab; 895 JOIN_CACHE *cache; 896 ulonglong curr_buff_space_sz= 0; 897 ulonglong curr_min_buff_space_sz= 0; 898 ulonglong join_buff_space_limit= 899 join->thd->variables.join_buff_space_limit; 900 bool optimize_buff_size= 901 optimizer_flag(join->thd, OPTIMIZER_SWITCH_OPTIMIZE_JOIN_BUFFER_SIZE); 902 double partial_join_cardinality= (join_tab-1)->get_partial_join_cardinality(); 903 buff= NULL; 904 min_buff_size= 0; 905 max_buff_size= 0; 906 min_records= 1; 907 max_records= (size_t) (partial_join_cardinality <= join_buff_space_limit ? 908 (ulonglong) partial_join_cardinality : join_buff_space_limit); 909 set_if_bigger(max_records, 10); 910 min_buff_size= get_min_join_buffer_size(); 911 buff_size= get_max_join_buffer_size(optimize_buff_size); 912 913 for (tab= start_tab; tab!= join_tab; 914 tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS)) 915 { 916 cache= tab->cache; 917 if (cache) 918 { 919 curr_min_buff_space_sz+= cache->get_min_join_buffer_size(); 920 curr_buff_space_sz+= cache->get_join_buffer_size(); 921 } 922 } 923 curr_min_buff_space_sz+= min_buff_size; 924 curr_buff_space_sz+= buff_size; 925 926 if (curr_min_buff_space_sz > join_buff_space_limit || 927 (curr_buff_space_sz > join_buff_space_limit && 928 (!optimize_buff_size || 929 join->shrink_join_buffers(join_tab, curr_buff_space_sz, 930 join_buff_space_limit)))) 931 goto fail; 932 933 if (for_explain_only) 934 return 0; 935 936 for (size_t buff_size_decr= (buff_size-min_buff_size)/4 + 1; ; ) 937 { 938 size_t next_buff_size; 939 940 if ((buff= (uchar*) my_malloc(buff_size, MYF(MY_THREAD_SPECIFIC)))) 941 break; 942 943 next_buff_size= buff_size > buff_size_decr ? buff_size-buff_size_decr : 0; 944 if (next_buff_size < min_buff_size || 945 join->shrink_join_buffers(join_tab, curr_buff_space_sz, 946 curr_buff_space_sz-buff_size_decr)) 947 goto fail; 948 buff_size= next_buff_size; 949 950 curr_buff_space_sz= 0; 951 for (tab= join->join_tab+join->const_tables; tab <= join_tab; tab++) 952 { 953 cache= tab->cache; 954 if (cache) 955 curr_buff_space_sz+= cache->get_join_buffer_size(); 956 } 957 } 958 return 0; 959 960 fail: 961 buff_size= 0; 962 return 1; 963 } 964 965 966 /* 967 Shrink the size if the cache join buffer in a given ratio 968 969 SYNOPSIS 970 shrink_join_buffer_in_ratio() 971 n nominator of the ratio to shrink the buffer in 972 d denominator if the ratio 973 974 DESCRIPTION 975 The function first deallocates the join buffer of the cache. Then 976 it allocates a buffer that is (n/d) times smaller. 977 978 RETURN VALUE 979 FALSE on success with allocation of the smaller join buffer 980 TRUE otherwise 981 */ 982 983 bool JOIN_CACHE::shrink_join_buffer_in_ratio(ulonglong n, ulonglong d) 984 { 985 size_t next_buff_size; 986 if (n < d) 987 return FALSE; 988 next_buff_size= (size_t) ((double) buff_size / n * d); 989 set_if_bigger(next_buff_size, min_buff_size); 990 buff_size= next_buff_size; 991 return realloc_buffer(); 992 } 993 994 995 /* 996 Reallocate the join buffer of a join cache 997 998 SYNOPSIS 999 realloc_buffer() 1000 1001 DESCRITION 1002 The function reallocates the join buffer of the join cache. After this 1003 it resets the buffer for writing. 1004 1005 NOTES 1006 The function assumes that buff_size contains the new value for the join 1007 buffer size. 1008 1009 RETURN VALUE 1010 0 if the buffer has been successfully reallocated 1011 1 otherwise 1012 */ 1013 1014 int JOIN_CACHE::realloc_buffer() 1015 { 1016 int rc; 1017 free(); 1018 rc= MY_TEST(!(buff= (uchar*) my_malloc(buff_size, MYF(MY_THREAD_SPECIFIC)))); 1019 reset(TRUE); 1020 return rc; 1021 } 1022 1023 1024 /* 1025 Initialize a join cache 1026 1027 SYNOPSIS 1028 init() 1029 for_explain join buffer is initialized for explain only 1030 1031 DESCRIPTION 1032 The function initializes the join cache structure. It supposed to be called 1033 by init methods for classes derived from the JOIN_CACHE. 1034 The function allocates memory for the join buffer and for descriptors of 1035 the record fields stored in the buffer. 1036 1037 NOTES 1038 The code of this function should have been included into the constructor 1039 code itself. However the new operator for the class JOIN_CACHE would 1040 never fail while memory allocation for the join buffer is not absolutely 1041 unlikely to fail. That's why this memory allocation has to be placed in a 1042 separate function that is called in a couple with a cache constructor. 1043 It is quite natural to put almost all other constructor actions into 1044 this function. 1045 1046 RETURN VALUE 1047 0 initialization with buffer allocations has been succeeded 1048 1 otherwise 1049 */ 1050 1051 int JOIN_CACHE::init(bool for_explain) 1052 { 1053 DBUG_ENTER("JOIN_CACHE::init"); 1054 1055 for_explain_only= for_explain; 1056 1057 calc_record_fields(); 1058 1059 collect_info_on_key_args(); 1060 1061 if (alloc_fields()) 1062 DBUG_RETURN(1); 1063 1064 create_flag_fields(); 1065 1066 create_key_arg_fields(); 1067 1068 create_remaining_fields(); 1069 1070 set_constants(); 1071 1072 if (alloc_buffer()) 1073 DBUG_RETURN(1); 1074 1075 reset(TRUE); 1076 1077 DBUG_RETURN(0); 1078 } 1079 1080 1081 /* 1082 Check the possibility to read the access keys directly from the join buffer 1083 SYNOPSIS 1084 check_emb_key_usage() 1085 1086 DESCRIPTION 1087 The function checks some conditions at which the key values can be read 1088 directly from the join buffer. This is possible when the key values can be 1089 composed by concatenation of the record fields stored in the join buffer. 1090 Sometimes when the access key is multi-component the function has to re-order 1091 the fields written into the join buffer to make keys embedded. If key 1092 values for the key access are detected as embedded then 'use_emb_key' 1093 is set to TRUE. 1094 1095 EXAMPLE 1096 Let table t2 has an index defined on the columns a,b . Let's assume also 1097 that the columns t2.a, t2.b as well as the columns t1.a, t1.b are all 1098 of the integer type. Then if the query 1099 SELECT COUNT(*) FROM t1, t2 WHERE t1.a=t2.a and t1.b=t2.b 1100 is executed with a join cache in such a way that t1 is the driving 1101 table then the key values to access table t2 can be read directly 1102 from the join buffer. 1103 1104 NOTES 1105 In some cases key values could be read directly from the join buffer but 1106 we still do not consider them embedded. In the future we'll expand the 1107 the class of keys which we identify as embedded. 1108 1109 NOTES 1110 The function returns FALSE if no key is used to join the records 1111 from join_tab. 1112 1113 RETURN VALUE 1114 TRUE key values will be considered as embedded, 1115 FALSE otherwise. 1116 */ 1117 1118 bool JOIN_CACHE::check_emb_key_usage() 1119 { 1120 1121 if (!is_key_access()) 1122 return FALSE; 1123 1124 uint i; 1125 Item *item; 1126 KEY_PART_INFO *key_part; 1127 CACHE_FIELD *copy; 1128 CACHE_FIELD *copy_end; 1129 uint len= 0; 1130 TABLE_REF *ref= &join_tab->ref; 1131 KEY *keyinfo= join_tab->get_keyinfo_by_key_no(ref->key); 1132 1133 /* 1134 If some of the key arguments are not from the local cache the key 1135 is not considered as embedded. 1136 TODO: 1137 Expand it to the case when ref->key_parts=1 and local_key_arg_fields=0. 1138 */ 1139 if (external_key_arg_fields != 0) 1140 return FALSE; 1141 /* 1142 If the number of the local key arguments is not equal to the number 1143 of key parts the key value cannot be read directly from the join buffer. 1144 */ 1145 if (local_key_arg_fields != ref->key_parts) 1146 return FALSE; 1147 1148 /* 1149 A key is not considered embedded if one of the following is true: 1150 - one of its key parts is not equal to a field 1151 - it is a partial key 1152 - definition of the argument field does not coincide with the 1153 definition of the corresponding key component 1154 - some of the key components are nullable 1155 */ 1156 for (i=0; i < ref->key_parts; i++) 1157 { 1158 item= ref->items[i]->real_item(); 1159 if (item->type() != Item::FIELD_ITEM) 1160 return FALSE; 1161 key_part= keyinfo->key_part+i; 1162 if (key_part->key_part_flag & HA_PART_KEY_SEG) 1163 return FALSE; 1164 if (!key_part->field->eq_def(((Item_field *) item)->field)) 1165 return FALSE; 1166 if (key_part->field->maybe_null()) 1167 return FALSE; 1168 } 1169 1170 copy= field_descr+flag_fields; 1171 copy_end= copy+local_key_arg_fields; 1172 for ( ; copy < copy_end; copy++) 1173 { 1174 /* 1175 If some of the key arguments are of variable length the key 1176 is not considered as embedded. 1177 */ 1178 if (copy->type != 0) 1179 return FALSE; 1180 /* 1181 If some of the key arguments are bit fields whose bits are partially 1182 stored with null bits the key is not considered as embedded. 1183 */ 1184 if (copy->field->type() == MYSQL_TYPE_BIT && 1185 ((Field_bit*) (copy->field))->bit_len) 1186 return FALSE; 1187 len+= copy->length; 1188 } 1189 1190 emb_key_length= len; 1191 1192 /* 1193 Make sure that key fields follow the order of the corresponding 1194 key components these fields are equal to. For this the descriptors 1195 of the fields that comprise the key might be re-ordered. 1196 */ 1197 for (i= 0; i < ref->key_parts; i++) 1198 { 1199 uint j; 1200 Item *item= ref->items[i]->real_item(); 1201 Field *fld= ((Item_field *) item)->field; 1202 CACHE_FIELD *init_copy= field_descr+flag_fields+i; 1203 for (j= i, copy= init_copy; j < local_key_arg_fields; j++, copy++) 1204 { 1205 if (fld->eq(copy->field)) 1206 { 1207 if (j != i) 1208 { 1209 CACHE_FIELD key_part_copy= *copy; 1210 *copy= *init_copy; 1211 *init_copy= key_part_copy; 1212 } 1213 break; 1214 } 1215 } 1216 } 1217 1218 return TRUE; 1219 } 1220 1221 1222 /* 1223 Write record fields and their required offsets into the join cache buffer 1224 1225 SYNOPSIS 1226 write_record_data() 1227 link a reference to the associated info in the previous cache 1228 is_full OUT true if it has been decided that no more records will be 1229 added to the join buffer 1230 1231 DESCRIPTION 1232 This function put into the cache buffer the following info that it reads 1233 from the join record buffers or computes somehow: 1234 (1) the length of all fields written for the record (optional) 1235 (2) an offset to the associated info in the previous cache (if there is any) 1236 determined by the link parameter 1237 (3) all flag fields of the tables whose data field are put into the cache: 1238 - match flag (optional), 1239 - null bitmaps for all tables, 1240 - null row flags for all tables 1241 (4) values of all data fields including 1242 - full images of those fixed legth data fields that cannot have 1243 trailing spaces 1244 - significant part of fixed length fields that can have trailing spaces 1245 with the prepanded length 1246 - data of non-blob variable length fields with the prepanded data length 1247 - blob data from blob fields with the prepanded data length 1248 (5) record offset values for the data fields that are referred to from 1249 other caches 1250 1251 The record is written at the current position stored in the field 'pos'. 1252 At the end of the function 'pos' points at the position right after the 1253 written record data. 1254 The function increments the number of records in the cache that is stored 1255 in the 'records' field by 1. The function also modifies the values of 1256 'curr_rec_pos' and 'last_rec_pos' to point to the written record. 1257 The 'end_pos' cursor is modified accordingly. 1258 The 'last_rec_blob_data_is_in_rec_buff' is set on if the blob data 1259 remains in the record buffers and not copied to the join buffer. It may 1260 happen only to the blob data from the last record added into the cache. 1261 If on_precond is attached to join_tab and it is not evaluated to TRUE 1262 then MATCH_IMPOSSIBLE is placed in the match flag field of the record 1263 written into the join buffer. 1264 1265 RETURN VALUE 1266 length of the written record data 1267 */ 1268 1269 uint JOIN_CACHE::write_record_data(uchar * link, bool *is_full) 1270 { 1271 uint len; 1272 bool last_record; 1273 CACHE_FIELD *copy; 1274 CACHE_FIELD *copy_end; 1275 uchar *flags_pos; 1276 uchar *cp= pos; 1277 uchar *init_pos= cp; 1278 uchar *rec_len_ptr= 0; 1279 uint key_extra= extra_key_length(); 1280 1281 records++; /* Increment the counter of records in the cache */ 1282 1283 len= pack_length + key_extra; 1284 1285 /* Make an adjustment for the size of the auxiliary buffer if there is any */ 1286 uint incr= aux_buffer_incr(records); 1287 size_t rem= rem_space(); 1288 aux_buff_size+= len+incr < rem ? incr : rem; 1289 1290 /* 1291 For each blob to be put into cache save its length and a pointer 1292 to the value in the corresponding element of the blob_ptr array. 1293 Blobs with null values are skipped. 1294 Increment 'len' by the total length of all these blobs. 1295 */ 1296 if (blobs) 1297 { 1298 CACHE_FIELD **copy_ptr= blob_ptr; 1299 CACHE_FIELD **copy_ptr_end= copy_ptr+blobs; 1300 for ( ; copy_ptr < copy_ptr_end; copy_ptr++) 1301 { 1302 Field_blob *blob_field= (Field_blob *) (*copy_ptr)->field; 1303 if (!blob_field->is_null()) 1304 { 1305 uint blob_len= blob_field->get_length(); 1306 (*copy_ptr)->blob_length= blob_len; 1307 len+= blob_len; 1308 (*copy_ptr)->str= blob_field->get_ptr(); 1309 } 1310 } 1311 } 1312 1313 /* 1314 Check whether we won't be able to add any new record into the cache after 1315 this one because the cache will be full. Set last_record to TRUE if it's so. 1316 The assume that the cache will be full after the record has been written 1317 into it if either the remaining space of the cache is not big enough for the 1318 record's blob values or if there is a chance that not all non-blob fields 1319 of the next record can be placed there. 1320 This function is called only in the case when there is enough space left in 1321 the cache to store at least non-blob parts of the current record. 1322 */ 1323 last_record= (len+pack_length_with_blob_ptrs+key_extra) > rem_space(); 1324 1325 /* 1326 Save the position for the length of the record in the cache if it's needed. 1327 The length of the record will be inserted here when all fields of the record 1328 are put into the cache. 1329 */ 1330 if (with_length) 1331 { 1332 rec_len_ptr= cp; 1333 DBUG_ASSERT(cp + size_of_rec_len <= buff + buff_size); 1334 cp+= size_of_rec_len; 1335 } 1336 1337 /* 1338 Put a reference to the fields of the record that are stored in the previous 1339 cache if there is any. This reference is passed by the 'link' parameter. 1340 */ 1341 if (prev_cache) 1342 { 1343 DBUG_ASSERT(cp + prev_cache->get_size_of_rec_offset() <= buff + buff_size); 1344 cp+= prev_cache->get_size_of_rec_offset(); 1345 prev_cache->store_rec_ref(cp, link); 1346 } 1347 1348 curr_rec_pos= cp; 1349 1350 /* If the there is a match flag set its value to 0 */ 1351 copy= field_descr; 1352 if (with_match_flag) 1353 *copy[0].str= 0; 1354 1355 /* First put into the cache the values of all flag fields */ 1356 copy_end= field_descr+flag_fields; 1357 flags_pos= cp; 1358 for ( ; copy < copy_end; copy++) 1359 { 1360 DBUG_ASSERT(cp + copy->length <= buff + buff_size); 1361 memcpy(cp, copy->str, copy->length); 1362 cp+= copy->length; 1363 } 1364 1365 /* Now put the values of the remaining fields as soon as they are not nulls */ 1366 copy_end= field_descr+fields; 1367 for ( ; copy < copy_end; copy++) 1368 { 1369 Field *field= copy->field; 1370 if (field && field->maybe_null() && field->is_null()) 1371 { 1372 if (copy->referenced_field_no) 1373 copy->offset= 0; 1374 continue; 1375 } 1376 /* Save the offset of the field to put it later at the end of the record */ 1377 if (copy->referenced_field_no) 1378 copy->offset= (uint)(cp-curr_rec_pos); 1379 1380 switch (copy->type) { 1381 case CACHE_BLOB: 1382 { 1383 Field_blob *blob_field= (Field_blob *) copy->field; 1384 if (last_record) 1385 { 1386 last_rec_blob_data_is_in_rec_buff= 1; 1387 /* Put down the length of the blob and the pointer to the data */ 1388 DBUG_ASSERT(cp + copy->length + sizeof(char*) <= buff + buff_size); 1389 blob_field->get_image(cp, copy->length+sizeof(char*), 1390 blob_field->charset()); 1391 cp+= copy->length+sizeof(char*); 1392 } 1393 else 1394 { 1395 /* First put down the length of the blob and then copy the data */ 1396 blob_field->get_image(cp, copy->length, 1397 blob_field->charset()); 1398 DBUG_ASSERT(cp + copy->length + copy->blob_length <= buff + buff_size); 1399 if (copy->blob_length) 1400 memcpy(cp+copy->length, copy->str, copy->blob_length); 1401 cp+= copy->length+copy->blob_length; 1402 } 1403 break; 1404 } 1405 case CACHE_VARSTR1: 1406 /* Copy the significant part of the short varstring field */ 1407 len= (uint) copy->str[0] + 1; 1408 DBUG_ASSERT(cp + len <= buff + buff_size); 1409 memcpy(cp, copy->str, len); 1410 cp+= len; 1411 break; 1412 case CACHE_VARSTR2: 1413 /* Copy the significant part of the long varstring field */ 1414 len= uint2korr(copy->str) + 2; 1415 DBUG_ASSERT(cp + len <= buff + buff_size); 1416 memcpy(cp, copy->str, len); 1417 cp+= len; 1418 break; 1419 case CACHE_STRIPPED: 1420 { 1421 /* 1422 Put down the field value stripping all trailing spaces off. 1423 After this insert the length of the written sequence of bytes. 1424 */ 1425 uchar *str, *end; 1426 for (str= copy->str, end= str+copy->length; 1427 end > str && end[-1] == ' '; 1428 end--) ; 1429 len=(uint) (end-str); 1430 DBUG_ASSERT(cp + len + 2 <= buff + buff_size); 1431 int2store(cp, len); 1432 memcpy(cp+2, str, len); 1433 cp+= len+2; 1434 break; 1435 } 1436 case CACHE_ROWID: 1437 if (!copy->length) 1438 { 1439 /* 1440 This may happen only for ROWID fields of materialized 1441 derived tables and views. 1442 */ 1443 TABLE *table= (TABLE *) copy->str; 1444 copy->str= table->file->ref; 1445 copy->length= table->file->ref_length; 1446 if (!copy->str) 1447 { 1448 /* 1449 If table is an empty inner table of an outer join and it is 1450 a materialized derived table then table->file->ref == NULL. 1451 */ 1452 cp+= copy->length; 1453 break; 1454 } 1455 } 1456 /* fall through */ 1457 default: 1458 /* Copy the entire image of the field from the record buffer */ 1459 DBUG_ASSERT(cp + copy->length <= buff + buff_size); 1460 if (copy->str) 1461 memcpy(cp, copy->str, copy->length); 1462 cp+= copy->length; 1463 } 1464 } 1465 1466 /* Add the offsets of the fields that are referenced from other caches */ 1467 if (referenced_fields) 1468 { 1469 uint cnt= 0; 1470 for (copy= field_descr+flag_fields; copy < copy_end ; copy++) 1471 { 1472 if (copy->referenced_field_no) 1473 { 1474 store_fld_offset(cp+size_of_fld_ofs*(copy->referenced_field_no-1), 1475 copy->offset); 1476 cnt++; 1477 } 1478 } 1479 DBUG_ASSERT(cp + size_of_fld_ofs*cnt <= buff + buff_size); 1480 cp+= size_of_fld_ofs*cnt; 1481 } 1482 1483 if (rec_len_ptr) 1484 store_rec_length(rec_len_ptr, (ulong) (cp-rec_len_ptr-size_of_rec_len)); 1485 last_rec_pos= curr_rec_pos; 1486 end_pos= pos= cp; 1487 *is_full= last_record; 1488 1489 last_written_is_null_compl= 0; 1490 if (!join_tab->first_unmatched && join_tab->on_precond) 1491 { 1492 join_tab->found= 0; 1493 join_tab->not_null_compl= 1; 1494 if (!join_tab->on_precond->val_int()) 1495 { 1496 flags_pos[0]= MATCH_IMPOSSIBLE; 1497 last_written_is_null_compl= 1; 1498 } 1499 } 1500 1501 return (uint) (cp-init_pos); 1502 } 1503 1504 1505 /* 1506 Reset the join buffer for reading/writing: default implementation 1507 1508 SYNOPSIS 1509 reset() 1510 for_writing if it's TRUE the function reset the buffer for writing 1511 1512 DESCRIPTION 1513 This default implementation of the virtual function reset() resets 1514 the join buffer for reading or writing. 1515 If the buffer is reset for reading only the 'pos' value is reset 1516 to point to the very beginning of the join buffer. If the buffer is 1517 reset for writing additionally: 1518 - the counter of the records in the buffer is set to 0, 1519 - the the value of 'last_rec_pos' gets pointing at the position just 1520 before the buffer, 1521 - 'end_pos' is set to point to the beginning of the join buffer, 1522 - the size of the auxiliary buffer is reset to 0, 1523 - the flag 'last_rec_blob_data_is_in_rec_buff' is set to 0. 1524 1525 RETURN VALUE 1526 none 1527 */ 1528 void JOIN_CACHE::reset(bool for_writing) 1529 { 1530 pos= buff; 1531 curr_rec_link= 0; 1532 if (for_writing) 1533 { 1534 records= 0; 1535 last_rec_pos= buff; 1536 aux_buff_size= 0; 1537 end_pos= pos; 1538 last_rec_blob_data_is_in_rec_buff= 0; 1539 } 1540 } 1541 1542 1543 /* 1544 Add a record into the join buffer: the default implementation 1545 1546 SYNOPSIS 1547 put_record() 1548 1549 DESCRIPTION 1550 This default implementation of the virtual function put_record writes 1551 the next matching record into the join buffer. 1552 It also links the record having been written into the join buffer with 1553 the matched record in the previous cache if there is any. 1554 The implementation assumes that the function get_curr_link() 1555 will return exactly the pointer to this matched record. 1556 1557 RETURN VALUE 1558 TRUE if it has been decided that it should be the last record 1559 in the join buffer, 1560 FALSE otherwise 1561 */ 1562 1563 bool JOIN_CACHE::put_record() 1564 { 1565 bool is_full; 1566 uchar *link= 0; 1567 if (prev_cache) 1568 link= prev_cache->get_curr_rec_link(); 1569 write_record_data(link, &is_full); 1570 return is_full; 1571 } 1572 1573 1574 /* 1575 Read the next record from the join buffer: the default implementation 1576 1577 SYNOPSIS 1578 get_record() 1579 1580 DESCRIPTION 1581 This default implementation of the virtual function get_record 1582 reads fields of the next record from the join buffer of this cache. 1583 The function also reads all other fields associated with this record 1584 from the the join buffers of the previous caches. The fields are read 1585 into the corresponding record buffers. 1586 It is supposed that 'pos' points to the position in the buffer 1587 right after the previous record when the function is called. 1588 When the function returns the 'pos' values is updated to point 1589 to the position after the read record. 1590 The value of 'curr_rec_pos' is also updated by the function to 1591 point to the beginning of the first field of the record in the 1592 join buffer. 1593 1594 RETURN VALUE 1595 TRUE there are no more records to read from the join buffer 1596 FALSE otherwise 1597 */ 1598 1599 bool JOIN_CACHE::get_record() 1600 { 1601 bool res; 1602 uchar *prev_rec_ptr= 0; 1603 if (with_length) 1604 pos+= size_of_rec_len; 1605 if (prev_cache) 1606 { 1607 pos+= prev_cache->get_size_of_rec_offset(); 1608 prev_rec_ptr= prev_cache->get_rec_ref(pos); 1609 } 1610 curr_rec_pos= pos; 1611 if (!(res= read_all_record_fields() == NO_MORE_RECORDS_IN_BUFFER)) 1612 { 1613 pos+= referenced_fields*size_of_fld_ofs; 1614 if (prev_cache) 1615 prev_cache->get_record_by_pos(prev_rec_ptr); 1616 } 1617 return res; 1618 } 1619 1620 1621 /* 1622 Read a positioned record from the join buffer: the default implementation 1623 1624 SYNOPSIS 1625 get_record_by_pos() 1626 rec_ptr position of the first field of the record in the join buffer 1627 1628 DESCRIPTION 1629 This default implementation of the virtual function get_record_pos 1630 reads the fields of the record positioned at 'rec_ptr' from the join buffer. 1631 The function also reads all other fields associated with this record 1632 from the the join buffers of the previous caches. The fields are read 1633 into the corresponding record buffers. 1634 1635 RETURN VALUE 1636 none 1637 */ 1638 1639 void JOIN_CACHE::get_record_by_pos(uchar *rec_ptr) 1640 { 1641 uchar *save_pos= pos; 1642 pos= rec_ptr; 1643 read_all_record_fields(); 1644 pos= save_pos; 1645 if (prev_cache) 1646 { 1647 uchar *prev_rec_ptr= prev_cache->get_rec_ref(rec_ptr); 1648 prev_cache->get_record_by_pos(prev_rec_ptr); 1649 } 1650 } 1651 1652 1653 /* 1654 Get the match flag from the referenced record: the default implementation 1655 1656 SYNOPSIS 1657 get_match_flag_by_pos() 1658 rec_ptr position of the first field of the record in the join buffer 1659 1660 DESCRIPTION 1661 This default implementation of the virtual function get_match_flag_by_pos 1662 get the match flag for the record pointed by the reference at the position 1663 rec_ptr. If the match flag is placed in one of the previous buffers the 1664 function first reaches the linked record fields in this buffer. 1665 The function returns the value of the first encountered match flag. 1666 1667 RETURN VALUE 1668 match flag for the record at the position rec_ptr 1669 */ 1670 1671 enum JOIN_CACHE::Match_flag JOIN_CACHE::get_match_flag_by_pos(uchar *rec_ptr) 1672 { 1673 Match_flag match_fl= MATCH_NOT_FOUND; 1674 if (with_match_flag) 1675 { 1676 match_fl= (enum Match_flag) rec_ptr[0]; 1677 return match_fl; 1678 } 1679 if (prev_cache) 1680 { 1681 uchar *prev_rec_ptr= prev_cache->get_rec_ref(rec_ptr); 1682 return prev_cache->get_match_flag_by_pos(prev_rec_ptr); 1683 } 1684 DBUG_ASSERT(0); 1685 return match_fl; 1686 } 1687 1688 1689 /* 1690 Get the match flag for the referenced record from specified join buffer 1691 1692 SYNOPSIS 1693 get_match_flag_by_pos_from_join_buffer() 1694 rec_ptr position of the first field of the record in the join buffer 1695 tab join table with join buffer where to look for the match flag 1696 1697 DESCRIPTION 1698 This default implementation of the get_match_flag_by_pos_from_join_buffer 1699 method gets the match flag for the record pointed by the reference at the 1700 position rec_ptr from the join buffer attached to the join table tab. 1701 1702 RETURN VALUE 1703 match flag for the record at the position rec_ptr from the join 1704 buffer attached to the table tab. 1705 */ 1706 1707 enum JOIN_CACHE::Match_flag 1708 JOIN_CACHE::get_match_flag_by_pos_from_join_buffer(uchar *rec_ptr, 1709 JOIN_TAB *tab) 1710 { 1711 DBUG_ASSERT(tab->cache && tab->cache->with_match_flag); 1712 for (JOIN_CACHE *cache= this; ; ) 1713 { 1714 if (cache->join_tab == tab) 1715 return (enum Match_flag) rec_ptr[0]; 1716 cache= cache->prev_cache; 1717 rec_ptr= cache->get_rec_ref(rec_ptr); 1718 } 1719 } 1720 1721 1722 /* 1723 Calculate the increment of the auxiliary buffer for a record write 1724 1725 SYNOPSIS 1726 aux_buffer_incr() 1727 recno the number of the record the increment to be calculated for 1728 1729 DESCRIPTION 1730 This function calls the aux_buffer_incr the method of the 1731 companion member join_tab_scan to calculate the growth of the 1732 auxiliary buffer when the recno-th record is added to the 1733 join_buffer of this cache. 1734 1735 RETURN VALUE 1736 the number of bytes in the increment 1737 */ 1738 1739 uint JOIN_CACHE::aux_buffer_incr(size_t recno) 1740 { 1741 return join_tab_scan->aux_buffer_incr(recno); 1742 } 1743 1744 /* 1745 Read all flag and data fields of a record from the join buffer 1746 1747 SYNOPSIS 1748 read_all_record_fields() 1749 1750 DESCRIPTION 1751 The function reads all flag and data fields of a record from the join 1752 buffer into the corresponding record buffers. 1753 The fields are read starting from the position 'pos' which is 1754 supposed to point to the beginning of the first record field. 1755 The function increments the value of 'pos' by the length of the 1756 read data. 1757 1758 RETURN VALUE 1759 (-1) if there is no more records in the join buffer 1760 length of the data read from the join buffer - otherwise 1761 */ 1762 1763 uint JOIN_CACHE::read_all_record_fields() 1764 { 1765 uchar *init_pos= pos; 1766 1767 if (pos > last_rec_pos || !records) 1768 return NO_MORE_RECORDS_IN_BUFFER; 1769 1770 /* First match flag, read null bitmaps and null_row flag for each table */ 1771 read_flag_fields(); 1772 1773 /* Now read the remaining table fields if needed */ 1774 CACHE_FIELD *copy= field_descr+flag_fields; 1775 CACHE_FIELD *copy_end= field_descr+fields; 1776 bool blob_in_rec_buff= blob_data_is_in_rec_buff(init_pos); 1777 for ( ; copy < copy_end; copy++) 1778 read_record_field(copy, blob_in_rec_buff); 1779 1780 return (uint) (pos-init_pos); 1781 } 1782 1783 1784 /* 1785 Read all flag fields of a record from the join buffer 1786 1787 SYNOPSIS 1788 read_flag_fields() 1789 1790 DESCRIPTION 1791 The function reads all flag fields of a record from the join 1792 buffer into the corresponding record buffers. 1793 The fields are read starting from the position 'pos'. 1794 The function increments the value of 'pos' by the length of the 1795 read data. 1796 1797 RETURN VALUE 1798 length of the data read from the join buffer 1799 */ 1800 1801 uint JOIN_CACHE::read_flag_fields() 1802 { 1803 uchar *init_pos= pos; 1804 CACHE_FIELD *copy= field_descr; 1805 CACHE_FIELD *copy_end= copy+flag_fields; 1806 if (with_match_flag) 1807 { 1808 copy->str[0]= MY_TEST((Match_flag) pos[0] == MATCH_FOUND); 1809 pos+= copy->length; 1810 copy++; 1811 } 1812 for ( ; copy < copy_end; copy++) 1813 { 1814 memcpy(copy->str, pos, copy->length); 1815 pos+= copy->length; 1816 } 1817 return (uint)(pos-init_pos); 1818 } 1819 1820 1821 /* 1822 Read a data record field from the join buffer 1823 1824 SYNOPSIS 1825 read_record_field() 1826 copy the descriptor of the data field to be read 1827 blob_in_rec_buff indicates whether this is the field from the record 1828 whose blob data are in record buffers 1829 1830 DESCRIPTION 1831 The function reads the data field specified by the parameter copy 1832 from the join buffer into the corresponding record buffer. 1833 The field is read starting from the position 'pos'. 1834 The data of blob values is not copied from the join buffer. 1835 The function increments the value of 'pos' by the length of the 1836 read data. 1837 1838 RETURN VALUE 1839 length of the data read from the join buffer 1840 */ 1841 1842 uint JOIN_CACHE::read_record_field(CACHE_FIELD *copy, bool blob_in_rec_buff) 1843 { 1844 uint len; 1845 /* Do not copy the field if its value is null */ 1846 if (copy->field && copy->field->maybe_null() && copy->field->is_null()) 1847 return 0; 1848 switch (copy->type) { 1849 case CACHE_BLOB: 1850 { 1851 Field_blob *blob_field= (Field_blob *) copy->field; 1852 /* 1853 Copy the length and the pointer to data but not the blob data 1854 itself to the record buffer 1855 */ 1856 if (blob_in_rec_buff) 1857 { 1858 blob_field->set_image(pos, copy->length + sizeof(char*), 1859 blob_field->charset()); 1860 len= copy->length + sizeof(char*); 1861 } 1862 else 1863 { 1864 blob_field->set_ptr(pos, pos+copy->length); 1865 len= copy->length + blob_field->get_length(); 1866 } 1867 } 1868 break; 1869 case CACHE_VARSTR1: 1870 /* Copy the significant part of the short varstring field */ 1871 len= (uint) pos[0] + 1; 1872 memcpy(copy->str, pos, len); 1873 break; 1874 case CACHE_VARSTR2: 1875 /* Copy the significant part of the long varstring field */ 1876 len= uint2korr(pos) + 2; 1877 memcpy(copy->str, pos, len); 1878 break; 1879 case CACHE_STRIPPED: 1880 /* Pad the value by spaces that has been stripped off */ 1881 len= uint2korr(pos); 1882 memcpy(copy->str, pos+2, len); 1883 memset(copy->str+len, ' ', copy->length-len); 1884 len+= 2; 1885 break; 1886 case CACHE_ROWID: 1887 if (!copy->str) 1888 { 1889 len= copy->length; 1890 break; 1891 } 1892 /* fall through */ 1893 default: 1894 /* Copy the entire image of the field from the record buffer */ 1895 len= copy->length; 1896 memcpy(copy->str, pos, len); 1897 } 1898 pos+= len; 1899 return len; 1900 } 1901 1902 1903 /* 1904 Read a referenced field from the join buffer 1905 1906 SYNOPSIS 1907 read_referenced_field() 1908 copy pointer to the descriptor of the referenced field 1909 rec_ptr pointer to the record that may contain this field 1910 len IN/OUT total length of the record fields 1911 1912 DESCRIPTION 1913 The function checks whether copy points to a data field descriptor 1914 for this cache object. If it does not then the function returns 1915 FALSE. Otherwise the function reads the field of the record in 1916 the join buffer pointed by 'rec_ptr' into the corresponding record 1917 buffer and returns TRUE. 1918 If the value of *len is 0 then the function sets it to the total 1919 length of the record fields including possible trailing offset 1920 values. Otherwise *len is supposed to provide this value that 1921 has been obtained earlier. 1922 1923 NOTE 1924 If the value of the referenced field is null then the offset 1925 for the value is set to 0. If the value of a field can be null 1926 then the value of flag_fields is always positive. So the offset 1927 for any non-null value cannot be 0 in this case. 1928 1929 RETURN VALUE 1930 TRUE 'copy' points to a data descriptor of this join cache 1931 FALSE otherwise 1932 */ 1933 1934 bool JOIN_CACHE::read_referenced_field(CACHE_FIELD *copy, 1935 uchar *rec_ptr, 1936 uint *len) 1937 { 1938 uchar *ptr; 1939 uint offset; 1940 if (copy < field_descr || copy >= field_descr+fields) 1941 return FALSE; 1942 if (!*len) 1943 { 1944 /* Get the total length of the record fields */ 1945 uchar *len_ptr= rec_ptr; 1946 if (prev_cache) 1947 len_ptr-= prev_cache->get_size_of_rec_offset(); 1948 *len= get_rec_length(len_ptr-size_of_rec_len); 1949 } 1950 1951 ptr= rec_ptr-(prev_cache ? prev_cache->get_size_of_rec_offset() : 0); 1952 offset= get_fld_offset(ptr+ *len - 1953 size_of_fld_ofs* 1954 (referenced_fields+1-copy->referenced_field_no)); 1955 bool is_null= FALSE; 1956 Field *field= copy->field; 1957 if (offset == 0 && flag_fields) 1958 is_null= TRUE; 1959 if (is_null) 1960 { 1961 field->set_null(); 1962 if (!field->real_maybe_null()) 1963 field->table->null_row= 1; 1964 } 1965 else 1966 { 1967 uchar *save_pos= pos; 1968 field->set_notnull(); 1969 if (!field->real_maybe_null()) 1970 field->table->null_row= 0; 1971 pos= rec_ptr+offset; 1972 read_record_field(copy, blob_data_is_in_rec_buff(rec_ptr)); 1973 pos= save_pos; 1974 } 1975 return TRUE; 1976 } 1977 1978 1979 /* 1980 Skip record from join buffer if's already matched: default implementation 1981 1982 SYNOPSIS 1983 skip_if_matched() 1984 1985 DESCRIPTION 1986 This default implementation of the virtual function skip_if_matched 1987 skips the next record from the join buffer if its match flag is set to 1988 MATCH_FOUND. 1989 If the record is skipped the value of 'pos' is set to point to the position 1990 right after the record. 1991 1992 NOTE 1993 Currently this function is called only when generating null complemented 1994 records for outer joins (=> only when join_tab->first_unmatched != NULL). 1995 1996 RETURN VALUE 1997 TRUE the match flag is set to MATCH_FOUND and the record has been skipped 1998 FALSE otherwise 1999 */ 2000 2001 bool JOIN_CACHE::skip_if_matched() 2002 { 2003 DBUG_ASSERT(with_length); 2004 uint offset= size_of_rec_len; 2005 if (prev_cache) 2006 offset+= prev_cache->get_size_of_rec_offset(); 2007 /* Check whether the match flag is MATCH_FOUND */ 2008 if (get_match_flag_by_pos_from_join_buffer(pos+offset, 2009 join_tab->first_unmatched) == 2010 MATCH_FOUND) 2011 { 2012 pos+= size_of_rec_len + get_rec_length(pos); 2013 return TRUE; 2014 } 2015 return FALSE; 2016 } 2017 2018 2019 /* 2020 Skip record from join buffer if the match isn't needed: default implementation 2021 2022 SYNOPSIS 2023 skip_if_not_needed_match() 2024 2025 DESCRIPTION 2026 This default implementation of the virtual function skip_if_not_needed_match 2027 skips the next record from the join when generating join extensions 2028 for the records in the join buffer depending on the value of the match flag. 2029 - In the case of a semi-nest the match flag may be in two states 2030 {MATCH_NOT_FOUND, MATCH_FOUND}. The record is skipped if the flag is set 2031 to MATCH_FOUND. 2032 - In the case of a outer join nest when not_exists optimization is applied 2033 the match may be in three states {MATCH_NOT_FOUND, MATCH_IMPOSSIBLE, 2034 MATCH_FOUND. The record is skipped if the flag is set to MATCH_FOUND or 2035 to MATCH_IMPOSSIBLE. 2036 2037 If the record is skipped the value of 'pos' is set to point to the position 2038 right after the record. 2039 2040 NOTE 2041 Currently the function is called only when generating non-null complemented 2042 extensions for records in the join buffer. 2043 2044 RETURN VALUE 2045 TRUE the record has to be skipped 2046 FALSE otherwise 2047 */ 2048 2049 bool JOIN_CACHE::skip_if_not_needed_match() 2050 { 2051 DBUG_ASSERT(with_length); 2052 enum Match_flag match_fl; 2053 uint offset= size_of_rec_len; 2054 bool skip= FALSE; 2055 if (prev_cache) 2056 offset+= prev_cache->get_size_of_rec_offset(); 2057 2058 if (!join_tab->check_only_first_match()) 2059 return FALSE; 2060 2061 match_fl= get_match_flag_by_pos(pos+offset); 2062 skip= join_tab->first_sj_inner_tab ? 2063 match_fl == MATCH_FOUND : // the case of semi-join 2064 match_fl != MATCH_NOT_FOUND; // the case of outer-join 2065 2066 if (skip) 2067 { 2068 pos+= size_of_rec_len + get_rec_length(pos); 2069 return TRUE; 2070 } 2071 return FALSE; 2072 } 2073 2074 2075 /* 2076 Restore the fields of the last record from the join buffer 2077 2078 SYNOPSIS 2079 restore_last_record() 2080 2081 DESCRIPTION 2082 This function restore the values of the fields of the last record put 2083 into join buffer in record buffers. The values most probably have been 2084 overwritten by the field values from other records when they were read 2085 from the join buffer into the record buffer in order to check pushdown 2086 predicates. 2087 2088 RETURN 2089 none 2090 */ 2091 2092 void JOIN_CACHE::restore_last_record() 2093 { 2094 if (records) 2095 get_record_by_pos(last_rec_pos); 2096 } 2097 2098 2099 /* 2100 Join records from the join buffer with records from the next join table 2101 2102 SYNOPSIS 2103 join_records() 2104 skip_last do not find matches for the last record from the buffer 2105 2106 DESCRIPTION 2107 The functions extends all records from the join buffer by the matched 2108 records from join_tab. In the case of outer join operation it also 2109 adds null complementing extensions for the records from the join buffer 2110 that have no match. 2111 No extensions are generated for the last record from the buffer if 2112 skip_last is true. 2113 2114 NOTES 2115 The function must make sure that if linked join buffers are used then 2116 a join buffer cannot be refilled again until all extensions in the 2117 buffers chained to this one are generated. 2118 Currently an outer join operation with several inner tables always uses 2119 at least two linked buffers with the match join flags placed in the 2120 first buffer. Any record composed of rows of the inner tables that 2121 matches a record in this buffer must refer to the position of the 2122 corresponding match flag. 2123 2124 IMPLEMENTATION 2125 When generating extensions for outer tables of an outer join operation 2126 first we generate all extensions for those records from the join buffer 2127 that have matches, after which null complementing extension for all 2128 unmatched records from the join buffer are generated. 2129 2130 RETURN VALUE 2131 return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS. 2132 */ 2133 2134 enum_nested_loop_state JOIN_CACHE::join_records(bool skip_last) 2135 { 2136 JOIN_TAB *tab; 2137 enum_nested_loop_state rc= NESTED_LOOP_OK; 2138 bool outer_join_first_inner= join_tab->is_first_inner_for_outer_join(); 2139 DBUG_ENTER("JOIN_CACHE::join_records"); 2140 2141 if (outer_join_first_inner && !join_tab->first_unmatched) 2142 join_tab->not_null_compl= TRUE; 2143 2144 if (!join_tab->first_unmatched) 2145 { 2146 /* Find all records from join_tab that match records from join buffer */ 2147 rc= join_matching_records(skip_last); 2148 if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) 2149 goto finish; 2150 if (outer_join_first_inner) 2151 { 2152 if (next_cache && join_tab != join_tab->last_inner) 2153 { 2154 /* 2155 Ensure that all matches for outer records from join buffer are to be 2156 found. Now we ensure that all full records are found for records from 2157 join buffer. Generally this is an overkill. 2158 TODO: Ensure that only matches of the inner table records have to be 2159 found for the records from join buffer. 2160 */ 2161 rc= next_cache->join_records(skip_last); 2162 if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) 2163 goto finish; 2164 } 2165 join_tab->not_null_compl= FALSE; 2166 /* 2167 Prepare for generation of null complementing extensions. 2168 For all inner tables of the outer join operation for which 2169 regular matches have been just found the field 'first_unmatched' 2170 is set to point the the first inner table. After all null 2171 complement rows are generated for this outer join this field 2172 is set back to NULL. 2173 */ 2174 for (tab= join_tab->first_inner; tab <= join_tab->last_inner; tab++) 2175 tab->first_unmatched= join_tab->first_inner; 2176 } 2177 } 2178 if (join_tab->first_unmatched) 2179 { 2180 if (is_key_access()) 2181 restore_last_record(); 2182 2183 /* 2184 Generate all null complementing extensions for the records from 2185 join buffer that don't have any matching rows from the inner tables. 2186 */ 2187 reset(FALSE); 2188 rc= join_null_complements(skip_last); 2189 if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) 2190 goto finish; 2191 } 2192 if(next_cache) 2193 { 2194 /* 2195 When using linked caches we must ensure the records in the next caches 2196 that refer to the records in the join buffer are fully extended. 2197 Otherwise we could have references to the records that have been 2198 already erased from the join buffer and replaced for new records. 2199 */ 2200 rc= next_cache->join_records(skip_last); 2201 if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) 2202 goto finish; 2203 } 2204 2205 if (skip_last) 2206 { 2207 DBUG_ASSERT(!is_key_access()); 2208 /* 2209 Restore the last record from the join buffer to generate 2210 all extensions for it. 2211 */ 2212 get_record(); 2213 } 2214 2215 finish: 2216 if (outer_join_first_inner && 2217 join_tab->first_inner == join_tab->first_unmatched) 2218 { 2219 /* 2220 All null complemented rows have been already generated for all 2221 outer records from join buffer. Restore the state of the 2222 first_unmatched values to 0 to avoid another null complementing. 2223 */ 2224 for (tab= join_tab->first_inner; tab <= join_tab->last_inner; tab++) 2225 tab->first_unmatched= 0; 2226 } 2227 restore_last_record(); 2228 reset(TRUE); 2229 DBUG_PRINT("exit", ("rc: %d", rc)); 2230 DBUG_RETURN(rc); 2231 } 2232 2233 2234 /* 2235 Find matches from the next table for records from the join buffer 2236 2237 SYNOPSIS 2238 join_matching_records() 2239 skip_last do not look for matches for the last partial join record 2240 2241 DESCRIPTION 2242 The function retrieves rows of the join_tab table and checks whether they 2243 match partial join records from the join buffer. If a match is found 2244 the function will call the sub_select function trying to look for matches 2245 for the remaining join operations. 2246 This function currently is called only from the function join_records. 2247 If the value of skip_last is true the function writes the partial join 2248 record from the record buffer into the join buffer to save its value for 2249 the future processing in the caller function. 2250 2251 NOTES 2252 If employed by BNL or BNLH join algorithms the function performs a full 2253 scan of join_tab for each refill of the join buffer. If BKA or BKAH 2254 algorithms are used then the function iterates only over those records 2255 from join_tab that can be accessed by keys built over records in the join 2256 buffer. To apply a proper method of iteration the function just calls 2257 virtual iterator methods (open, next, close) of the member join_tab_scan. 2258 The member can be either of the JOIN_TAB_SCAN or JOIN_TAB_SCAN_MMR type. 2259 The class JOIN_TAB_SCAN provides the iterator methods for BNL/BNLH join 2260 algorithms. The class JOIN_TAB_SCAN_MRR provides the iterator methods 2261 for BKA/BKAH join algorithms. 2262 When the function looks for records from the join buffer that would 2263 match a record from join_tab it iterates either over all records in 2264 the buffer or only over selected records. If BNL join operation is 2265 performed all records are checked for the match. If BNLH or BKAH 2266 algorithm is employed to join join_tab then the function looks only 2267 through the records with the same join key as the record from join_tab. 2268 With the BKA join algorithm only one record from the join buffer is checked 2269 for a match for any record from join_tab. To iterate over the candidates 2270 for a match the virtual function get_next_candidate_for_match is used, 2271 while the virtual function prepare_look_for_matches is called to prepare 2272 for such iteration process. 2273 2274 NOTES 2275 The function produces all matching extensions for the records in the 2276 join buffer following the path of the employed blocked algorithm. 2277 When an outer join operation is performed all unmatched records from 2278 the join buffer must be extended by null values. The function 2279 'join_null_complements' serves this purpose. 2280 2281 RETURN VALUE 2282 return one of enum_nested_loop_state 2283 */ 2284 2285 enum_nested_loop_state JOIN_CACHE::join_matching_records(bool skip_last) 2286 { 2287 int error; 2288 enum_nested_loop_state rc= NESTED_LOOP_OK; 2289 join_tab->table->null_row= 0; 2290 bool check_only_first_match= 2291 join_tab->check_only_first_match() && 2292 (!join_tab->first_inner || // semi-join case 2293 join_tab->first_inner == join_tab->first_unmatched); // outer join case 2294 bool outer_join_first_inner= join_tab->is_first_inner_for_outer_join(); 2295 DBUG_ENTER("JOIN_CACHE::join_matching_records"); 2296 2297 /* Return at once if there are no records in the join buffer */ 2298 if (!records) 2299 DBUG_RETURN(NESTED_LOOP_OK); 2300 2301 /* 2302 When joining we read records from the join buffer back into record buffers. 2303 If matches for the last partial join record are found through a call to 2304 the sub_select function then this partial join record must be saved in the 2305 join buffer in order to be restored just before the sub_select call. 2306 */ 2307 if (skip_last) 2308 put_record(); 2309 2310 if (join_tab->use_quick == 2 && join_tab->select->quick) 2311 { 2312 /* A dynamic range access was used last. Clean up after it */ 2313 delete join_tab->select->quick; 2314 join_tab->select->quick= 0; 2315 } 2316 2317 if ((rc= join_tab_execution_startup(join_tab)) < 0) 2318 goto finish2; 2319 2320 /* Prepare to retrieve all records of the joined table */ 2321 if (unlikely((error= join_tab_scan->open()))) 2322 { 2323 /* 2324 TODO: if we get here, we will assert in net_send_statement(). Add test 2325 coverage and fix. 2326 */ 2327 goto finish; 2328 } 2329 2330 while (!(error= join_tab_scan->next())) 2331 { 2332 if (unlikely(join->thd->check_killed())) 2333 { 2334 /* The user has aborted the execution of the query */ 2335 rc= NESTED_LOOP_KILLED; 2336 goto finish; 2337 } 2338 2339 if (join_tab->keep_current_rowid) 2340 join_tab->table->file->position(join_tab->table->record[0]); 2341 2342 /* Prepare to read matching candidates from the join buffer */ 2343 if (prepare_look_for_matches(skip_last)) 2344 continue; 2345 join_tab->jbuf_tracker->r_scans++; 2346 2347 uchar *rec_ptr; 2348 /* Read each possible candidate from the buffer and look for matches */ 2349 while ((rec_ptr= get_next_candidate_for_match())) 2350 { 2351 join_tab->jbuf_tracker->r_rows++; 2352 /* 2353 If only the first match is needed, and, it has been already found for 2354 the next record read from the join buffer, then the record is skipped. 2355 Also those records that must be null complemented are not considered 2356 as candidates for matches. 2357 */ 2358 if ((!check_only_first_match && !outer_join_first_inner) || 2359 !skip_next_candidate_for_match(rec_ptr)) 2360 { 2361 read_next_candidate_for_match(rec_ptr); 2362 rc= generate_full_extensions(rec_ptr); 2363 if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) 2364 goto finish; 2365 } 2366 } 2367 } 2368 2369 finish: 2370 if (error) 2371 rc= error < 0 ? NESTED_LOOP_NO_MORE_ROWS: NESTED_LOOP_ERROR; 2372 finish2: 2373 join_tab_scan->close(); 2374 DBUG_RETURN(rc); 2375 } 2376 2377 2378 /* 2379 Set match flag for a record in join buffer if it has not been set yet 2380 2381 SYNOPSIS 2382 set_match_flag_if_none() 2383 first_inner the join table to which this flag is attached to 2384 rec_ptr pointer to the record in the join buffer 2385 2386 DESCRIPTION 2387 If the records of the table are accumulated in a join buffer the function 2388 sets the match flag for the record in the buffer that is referred to by 2389 the record from this cache positioned at 'rec_ptr'. 2390 The function also sets the match flag 'found' of the table first inner 2391 if it has not been set before. 2392 2393 NOTES 2394 The function assumes that the match flag for any record in any cache 2395 is placed in the first byte occupied by the record fields. 2396 2397 RETURN VALUE 2398 TRUE the match flag is set by this call for the first time 2399 FALSE the match flag has been set before this call 2400 */ 2401 2402 bool JOIN_CACHE::set_match_flag_if_none(JOIN_TAB *first_inner, 2403 uchar *rec_ptr) 2404 { 2405 if (!first_inner->cache) 2406 { 2407 /* 2408 Records of the first inner table to which the flag is attached to 2409 are not accumulated in a join buffer. 2410 */ 2411 if (first_inner->found) 2412 return FALSE; 2413 else 2414 { 2415 first_inner->found= 1; 2416 return TRUE; 2417 } 2418 } 2419 JOIN_CACHE *cache= this; 2420 while (cache->join_tab != first_inner) 2421 { 2422 cache= cache->prev_cache; 2423 DBUG_ASSERT(cache); 2424 rec_ptr= cache->get_rec_ref(rec_ptr); 2425 } 2426 if ((Match_flag) rec_ptr[0] != MATCH_FOUND) 2427 { 2428 rec_ptr[0]= MATCH_FOUND; 2429 first_inner->found= 1; 2430 return TRUE; 2431 } 2432 return FALSE; 2433 } 2434 2435 2436 /* 2437 Generate all full extensions for a partial join record in the buffer 2438 2439 SYNOPSIS 2440 generate_full_extensions() 2441 rec_ptr pointer to the record from join buffer to generate extensions 2442 2443 DESCRIPTION 2444 The function first checks whether the current record of 'join_tab' matches 2445 the partial join record from join buffer located at 'rec_ptr'. If it is the 2446 case the function calls the join_tab->next_select method to generate 2447 all full extension for this partial join match. 2448 2449 RETURN VALUE 2450 return one of enum_nested_loop_state. 2451 */ 2452 2453 enum_nested_loop_state JOIN_CACHE::generate_full_extensions(uchar *rec_ptr) 2454 { 2455 enum_nested_loop_state rc= NESTED_LOOP_OK; 2456 DBUG_ENTER("JOIN_CACHE::generate_full_extensions"); 2457 2458 /* 2459 Check whether the extended partial join record meets 2460 the pushdown conditions. 2461 */ 2462 if (check_match(rec_ptr)) 2463 { 2464 int res= 0; 2465 2466 if (!join_tab->check_weed_out_table || 2467 !(res= join_tab->check_weed_out_table->sj_weedout_check_row(join->thd))) 2468 { 2469 set_curr_rec_link(rec_ptr); 2470 rc= (join_tab->next_select)(join, join_tab+1, 0); 2471 if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) 2472 { 2473 reset(TRUE); 2474 DBUG_RETURN(rc); 2475 } 2476 } 2477 if (res == -1) 2478 { 2479 rc= NESTED_LOOP_ERROR; 2480 DBUG_RETURN(rc); 2481 } 2482 } 2483 else if (unlikely(join->thd->is_error())) 2484 rc= NESTED_LOOP_ERROR; 2485 DBUG_RETURN(rc); 2486 } 2487 2488 2489 /* 2490 Check matching to a partial join record from the join buffer 2491 2492 SYNOPSIS 2493 check_match() 2494 rec_ptr pointer to the record from join buffer to check matching to 2495 2496 DESCRIPTION 2497 The function checks whether the current record of 'join_tab' matches 2498 the partial join record from join buffer located at 'rec_ptr'. If this is 2499 the case and 'join_tab' is the last inner table of a semi-join or an outer 2500 join the function turns on the match flag for the 'rec_ptr' record unless 2501 it has been already set. 2502 2503 NOTES 2504 Setting the match flag on can trigger re-evaluation of pushdown conditions 2505 for the record when join_tab is the last inner table of an outer join. 2506 2507 RETURN VALUE 2508 TRUE there is a match 2509 FALSE there is no match 2510 In this case the caller must also check thd->is_error() to see 2511 if there was a fatal error for the query. 2512 */ 2513 2514 inline bool JOIN_CACHE::check_match(uchar *rec_ptr) 2515 { 2516 /* Check whether pushdown conditions are satisfied */ 2517 DBUG_ENTER("JOIN_CACHE:check_match"); 2518 2519 if (join_tab->select && join_tab->select->skip_record(join->thd) <= 0) 2520 DBUG_RETURN(FALSE); 2521 2522 join_tab->jbuf_tracker->r_rows_after_where++; 2523 2524 if (!join_tab->is_last_inner_table()) 2525 DBUG_RETURN(TRUE); 2526 2527 /* 2528 This is the last inner table of an outer join, 2529 and maybe of other embedding outer joins, or 2530 this is the last inner table of a semi-join. 2531 */ 2532 JOIN_TAB *first_inner= join_tab->get_first_inner_table(); 2533 do 2534 { 2535 set_match_flag_if_none(first_inner, rec_ptr); 2536 if (first_inner->check_only_first_match() && 2537 !join_tab->first_inner) 2538 DBUG_RETURN(TRUE); 2539 /* 2540 This is the first match for the outer table row. 2541 The function set_match_flag_if_none has turned the flag 2542 first_inner->found on. The pushdown predicates for 2543 inner tables must be re-evaluated with this flag on. 2544 Note that, if first_inner is the first inner table 2545 of a semi-join, but is not an inner table of an outer join 2546 such that 'not exists' optimization can be applied to it, 2547 the re-evaluation of the pushdown predicates is not needed. 2548 */ 2549 for (JOIN_TAB *tab= first_inner; tab <= join_tab; tab++) 2550 { 2551 if (tab->select && tab->select->skip_record(join->thd) <= 0) 2552 DBUG_RETURN(FALSE); 2553 } 2554 } 2555 while ((first_inner= first_inner->first_upper) && 2556 first_inner->last_inner == join_tab); 2557 DBUG_RETURN(TRUE); 2558 } 2559 2560 2561 /* 2562 Add null complements for unmatched outer records from join buffer 2563 2564 SYNOPSIS 2565 join_null_complements() 2566 skip_last do not add null complements for the last record 2567 2568 DESCRIPTION 2569 This function is called only for inner tables of outer joins. 2570 The function retrieves all rows from the join buffer and adds null 2571 complements for those of them that do not have matches for outer 2572 table records. 2573 If the 'join_tab' is the last inner table of the embedding outer 2574 join and the null complemented record satisfies the outer join 2575 condition then the the corresponding match flag is turned on 2576 unless it has been set earlier. This setting may trigger 2577 re-evaluation of pushdown conditions for the record. 2578 2579 NOTES 2580 The same implementation of the virtual method join_null_complements 2581 is used for BNL/BNLH/BKA/BKA join algorthm. 2582 2583 RETURN VALUE 2584 return one of enum_nested_loop_state. 2585 */ 2586 2587 enum_nested_loop_state JOIN_CACHE::join_null_complements(bool skip_last) 2588 { 2589 ulonglong cnt; 2590 enum_nested_loop_state rc= NESTED_LOOP_OK; 2591 bool is_first_inner= join_tab == join_tab->first_unmatched; 2592 DBUG_ENTER("JOIN_CACHE::join_null_complements"); 2593 2594 /* Return at once if there are no records in the join buffer */ 2595 if (!records) 2596 DBUG_RETURN(NESTED_LOOP_OK); 2597 2598 cnt= records - (is_key_access() ? 0 : MY_TEST(skip_last)); 2599 2600 /* This function may be called only for inner tables of outer joins */ 2601 DBUG_ASSERT(join_tab->first_inner); 2602 2603 for ( ; cnt; cnt--) 2604 { 2605 if (unlikely(join->thd->check_killed())) 2606 { 2607 /* The user has aborted the execution of the query */ 2608 rc= NESTED_LOOP_KILLED; 2609 goto finish; 2610 } 2611 /* Just skip the whole record if a match for it has been already found */ 2612 if (!is_first_inner || !skip_if_matched()) 2613 { 2614 get_record(); 2615 /* The outer row is complemented by nulls for each inner table */ 2616 restore_record(join_tab->table, s->default_values); 2617 mark_as_null_row(join_tab->table); 2618 rc= generate_full_extensions(get_curr_rec()); 2619 if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS) 2620 goto finish; 2621 } 2622 } 2623 2624 finish: 2625 DBUG_RETURN(rc); 2626 } 2627 2628 2629 /* 2630 Save data on the join algorithm employed by the join cache 2631 2632 SYNOPSIS 2633 save_explain_data() 2634 str string to add the comment on the employed join algorithm to 2635 2636 DESCRIPTION 2637 This function puts info about the type of the used join buffer (flat or 2638 incremental) and on the type of the the employed join algorithm (BNL, 2639 BNLH, BKA or BKAH) to the data structure 2640 2641 RETURN VALUE 2642 0 ok 2643 1 error 2644 */ 2645 2646 bool JOIN_CACHE::save_explain_data(EXPLAIN_BKA_TYPE *explain) 2647 { 2648 explain->incremental= MY_TEST(prev_cache); 2649 2650 explain->join_buffer_size= get_join_buffer_size(); 2651 2652 switch (get_join_alg()) { 2653 case BNL_JOIN_ALG: 2654 explain->join_alg= "BNL"; 2655 break; 2656 case BNLH_JOIN_ALG: 2657 explain->join_alg= "BNLH"; 2658 break; 2659 case BKA_JOIN_ALG: 2660 explain->join_alg= "BKA"; 2661 break; 2662 case BKAH_JOIN_ALG: 2663 explain->join_alg= "BKAH"; 2664 break; 2665 default: 2666 DBUG_ASSERT(0); 2667 } 2668 return 0; 2669 } 2670 2671 /** 2672 get thread handle. 2673 */ 2674 2675 THD *JOIN_CACHE::thd() 2676 { 2677 return join->thd; 2678 } 2679 2680 2681 static bool add_mrr_explain_info(String *str, uint mrr_mode, handler *file) 2682 { 2683 char mrr_str_buf[128]={0}; 2684 int len; 2685 len= file->multi_range_read_explain_info(mrr_mode, mrr_str_buf, 2686 sizeof(mrr_str_buf)); 2687 if (len > 0) 2688 { 2689 if (str->length()) 2690 { 2691 if (str->append(STRING_WITH_LEN("; "))) 2692 return 1; 2693 } 2694 if (str->append(mrr_str_buf, len)) 2695 return 1; 2696 } 2697 return 0; 2698 } 2699 2700 2701 bool JOIN_CACHE_BKA::save_explain_data(EXPLAIN_BKA_TYPE *explain) 2702 { 2703 if (JOIN_CACHE::save_explain_data(explain)) 2704 return 1; 2705 return add_mrr_explain_info(&explain->mrr_type, mrr_mode, join_tab->table->file); 2706 } 2707 2708 2709 bool JOIN_CACHE_BKAH::save_explain_data(EXPLAIN_BKA_TYPE *explain) 2710 { 2711 if (JOIN_CACHE::save_explain_data(explain)) 2712 return 1; 2713 return add_mrr_explain_info(&explain->mrr_type, mrr_mode, join_tab->table->file); 2714 } 2715 2716 2717 /* 2718 Initialize a hashed join cache 2719 2720 SYNOPSIS 2721 init() 2722 for_explain join buffer is initialized for explain only 2723 2724 DESCRIPTION 2725 The function initializes the cache structure with a hash table in it. 2726 The hash table will be used to store key values for the records from 2727 the join buffer. 2728 The function allocates memory for the join buffer and for descriptors of 2729 the record fields stored in the buffer. 2730 The function also initializes a hash table for record keys within the join 2731 buffer space. 2732 2733 NOTES VALUE 2734 The function is supposed to be called by the init methods of the classes 2735 derived from JOIN_CACHE_HASHED. 2736 2737 RETURN VALUE 2738 0 initialization with buffer allocations has been succeeded 2739 1 otherwise 2740 */ 2741 2742 int JOIN_CACHE_HASHED::init(bool for_explain) 2743 { 2744 int rc= 0; 2745 TABLE_REF *ref= &join_tab->ref; 2746 2747 DBUG_ENTER("JOIN_CACHE_HASHED::init"); 2748 2749 hash_table= 0; 2750 key_entries= 0; 2751 2752 key_length= ref->key_length; 2753 2754 if ((rc= JOIN_CACHE::init(for_explain)) || for_explain) 2755 DBUG_RETURN (rc); 2756 2757 if (!(key_buff= (uchar*) join->thd->alloc(key_length))) 2758 DBUG_RETURN(1); 2759 2760 /* Take into account a reference to the next record in the key chain */ 2761 pack_length+= get_size_of_rec_offset(); 2762 pack_length_with_blob_ptrs+= get_size_of_rec_offset(); 2763 2764 ref_key_info= join_tab->get_keyinfo_by_key_no(join_tab->ref.key); 2765 ref_used_key_parts= join_tab->ref.key_parts; 2766 2767 hash_func= &JOIN_CACHE_HASHED::get_hash_idx_simple; 2768 hash_cmp_func= &JOIN_CACHE_HASHED::equal_keys_simple; 2769 2770 KEY_PART_INFO *key_part= ref_key_info->key_part; 2771 KEY_PART_INFO *key_part_end= key_part+ref_used_key_parts; 2772 for ( ; key_part < key_part_end; key_part++) 2773 { 2774 if (!key_part->field->eq_cmp_as_binary()) 2775 { 2776 hash_func= &JOIN_CACHE_HASHED::get_hash_idx_complex; 2777 hash_cmp_func= &JOIN_CACHE_HASHED::equal_keys_complex; 2778 break; 2779 } 2780 } 2781 2782 init_hash_table(); 2783 2784 rec_fields_offset= get_size_of_rec_offset()+get_size_of_rec_length()+ 2785 (prev_cache ? prev_cache->get_size_of_rec_offset() : 0); 2786 2787 data_fields_offset= 0; 2788 if (use_emb_key) 2789 { 2790 CACHE_FIELD *copy= field_descr; 2791 CACHE_FIELD *copy_end= copy+flag_fields; 2792 for ( ; copy < copy_end; copy++) 2793 data_fields_offset+= copy->length; 2794 } 2795 2796 DBUG_RETURN(0); 2797 } 2798 2799 2800 /* 2801 Initialize the hash table of a hashed join cache 2802 2803 SYNOPSIS 2804 init_hash_table() 2805 2806 DESCRIPTION 2807 The function estimates the number of hash table entries in the hash 2808 table to be used and initializes this hash table within the join buffer 2809 space. 2810 2811 RETURN VALUE 2812 Currently the function always returns 0; 2813 */ 2814 2815 int JOIN_CACHE_HASHED::init_hash_table() 2816 { 2817 hash_table= 0; 2818 key_entries= 0; 2819 2820 /* Calculate the minimal possible value of size_of_key_ofs greater than 1 */ 2821 uint max_size_of_key_ofs= MY_MAX(2, get_size_of_rec_offset()); 2822 for (size_of_key_ofs= 2; 2823 size_of_key_ofs <= max_size_of_key_ofs; 2824 size_of_key_ofs+= 2) 2825 { 2826 key_entry_length= get_size_of_rec_offset() + // key chain header 2827 size_of_key_ofs + // reference to the next key 2828 (use_emb_key ? get_size_of_rec_offset() : key_length); 2829 2830 size_t space_per_rec= avg_record_length + 2831 avg_aux_buffer_incr + 2832 key_entry_length+size_of_key_ofs; 2833 size_t n= buff_size / space_per_rec; 2834 2835 /* 2836 TODO: Make a better estimate for this upper bound of 2837 the number of records in in the join buffer. 2838 */ 2839 size_t max_n= buff_size / (pack_length-length+ 2840 key_entry_length+size_of_key_ofs); 2841 2842 hash_entries= (uint) (n / 0.7); 2843 set_if_bigger(hash_entries, 1); 2844 2845 if (offset_size((uint)(max_n*key_entry_length)) <= 2846 size_of_key_ofs) 2847 break; 2848 } 2849 2850 /* Initialize the hash table */ 2851 hash_table= buff + (buff_size-hash_entries*size_of_key_ofs); 2852 cleanup_hash_table(); 2853 curr_key_entry= hash_table; 2854 2855 return 0; 2856 } 2857 2858 2859 /* 2860 Reallocate the join buffer of a hashed join cache 2861 2862 SYNOPSIS 2863 realloc_buffer() 2864 2865 DESCRITION 2866 The function reallocates the join buffer of the hashed join cache. 2867 After this it initializes a hash table within the buffer space and 2868 resets the join cache for writing. 2869 2870 NOTES 2871 The function assumes that buff_size contains the new value for the join 2872 buffer size. 2873 2874 RETURN VALUE 2875 0 if the buffer has been successfully reallocated 2876 1 otherwise 2877 */ 2878 2879 int JOIN_CACHE_HASHED::realloc_buffer() 2880 { 2881 int rc; 2882 free(); 2883 rc= MY_TEST(!(buff= (uchar*) my_malloc(buff_size, MYF(MY_THREAD_SPECIFIC)))); 2884 init_hash_table(); 2885 reset(TRUE); 2886 return rc; 2887 } 2888 2889 2890 /* 2891 Get maximum size of the additional space per record used for record keys 2892 2893 SYNOPSYS 2894 get_max_key_addon_space_per_record() 2895 2896 DESCRIPTION 2897 The function returns the size of the space occupied by one key entry 2898 and one hash table entry. 2899 2900 RETURN VALUE 2901 maximum size of the additional space per record that is used to store 2902 record keys in the hash table 2903 */ 2904 2905 uint JOIN_CACHE_HASHED::get_max_key_addon_space_per_record() 2906 { 2907 ulong len; 2908 TABLE_REF *ref= &join_tab->ref; 2909 /* 2910 The total number of hash entries in the hash tables is bounded by 2911 ceiling(N/0.7) where N is the maximum number of records in the buffer. 2912 That's why the multiplier 2 is used in the formula below. 2913 */ 2914 len= (use_emb_key ? get_size_of_rec_offset() : ref->key_length) + 2915 size_of_rec_ofs + // size of the key chain header 2916 size_of_rec_ofs + // >= size of the reference to the next key 2917 2*size_of_rec_ofs; // >= 2*( size of hash table entry) 2918 return len; 2919 } 2920 2921 2922 /* 2923 Reset the buffer of a hashed join cache for reading/writing 2924 2925 SYNOPSIS 2926 reset() 2927 for_writing if it's TRUE the function reset the buffer for writing 2928 2929 DESCRIPTION 2930 This implementation of the virtual function reset() resets the join buffer 2931 of the JOIN_CACHE_HASHED class for reading or writing. 2932 Additionally to what the default implementation does this function 2933 cleans up the hash table allocated within the buffer. 2934 2935 RETURN VALUE 2936 none 2937 */ 2938 2939 void JOIN_CACHE_HASHED::reset(bool for_writing) 2940 { 2941 this->JOIN_CACHE::reset(for_writing); 2942 if (for_writing && hash_table) 2943 cleanup_hash_table(); 2944 curr_key_entry= hash_table; 2945 } 2946 2947 2948 /* 2949 Add a record into the buffer of a hashed join cache 2950 2951 SYNOPSIS 2952 put_record() 2953 2954 DESCRIPTION 2955 This implementation of the virtual function put_record writes the next 2956 matching record into the join buffer of the JOIN_CACHE_HASHED class. 2957 Additionally to what the default implementation does this function 2958 performs the following. 2959 It extracts from the record the key value used in lookups for matching 2960 records and searches for this key in the hash tables from the join cache. 2961 If it finds the key in the hash table it joins the record to the chain 2962 of records with this key. If the key is not found in the hash table the 2963 key is placed into it and a chain containing only the newly added record 2964 is attached to the key entry. The key value is either placed in the hash 2965 element added for the key or, if the use_emb_key flag is set, remains in 2966 the record from the partial join. 2967 If the match flag field of a record contains MATCH_IMPOSSIBLE the key is 2968 not created for this record. 2969 2970 RETURN VALUE 2971 TRUE if it has been decided that it should be the last record 2972 in the join buffer, 2973 FALSE otherwise 2974 */ 2975 2976 bool JOIN_CACHE_HASHED::put_record() 2977 { 2978 bool is_full; 2979 uchar *key; 2980 uint key_len= key_length; 2981 uchar *key_ref_ptr; 2982 uchar *link= 0; 2983 TABLE_REF *ref= &join_tab->ref; 2984 uchar *next_ref_ptr= pos; 2985 2986 pos+= get_size_of_rec_offset(); 2987 /* Write the record into the join buffer */ 2988 if (prev_cache) 2989 link= prev_cache->get_curr_rec_link(); 2990 write_record_data(link, &is_full); 2991 2992 if (last_written_is_null_compl) 2993 return is_full; 2994 2995 if (use_emb_key) 2996 key= get_curr_emb_key(); 2997 else 2998 { 2999 /* Build the key over the fields read into the record buffers */ 3000 cp_buffer_from_ref(join->thd, join_tab->table, ref); 3001 key= ref->key_buff; 3002 } 3003 3004 /* Look for the key in the hash table */ 3005 if (key_search(key, key_len, &key_ref_ptr)) 3006 { 3007 uchar *last_next_ref_ptr; 3008 /* 3009 The key is found in the hash table. 3010 Add the record to the circular list of the records attached to this key. 3011 Below 'rec' is the record to be added into the record chain for the found 3012 key, 'key_ref' points to a flatten representation of the st_key_entry 3013 structure that contains the key and the head of the record chain. 3014 */ 3015 last_next_ref_ptr= get_next_rec_ref(key_ref_ptr+get_size_of_key_offset()); 3016 /* rec->next_rec= key_entry->last_rec->next_rec */ 3017 memcpy(next_ref_ptr, last_next_ref_ptr, get_size_of_rec_offset()); 3018 /* key_entry->last_rec->next_rec= rec */ 3019 store_next_rec_ref(last_next_ref_ptr, next_ref_ptr); 3020 /* key_entry->last_rec= rec */ 3021 store_next_rec_ref(key_ref_ptr+get_size_of_key_offset(), next_ref_ptr); 3022 } 3023 else 3024 { 3025 /* 3026 The key is not found in the hash table. 3027 Put the key into the join buffer linking it with the keys for the 3028 corresponding hash entry. Create a circular list with one element 3029 referencing the record and attach the list to the key in the buffer. 3030 */ 3031 uchar *cp= last_key_entry; 3032 cp-= get_size_of_rec_offset()+get_size_of_key_offset(); 3033 store_next_key_ref(key_ref_ptr, cp); 3034 store_null_key_ref(cp); 3035 store_next_rec_ref(next_ref_ptr, next_ref_ptr); 3036 store_next_rec_ref(cp+get_size_of_key_offset(), next_ref_ptr); 3037 if (use_emb_key) 3038 { 3039 cp-= get_size_of_rec_offset(); 3040 store_emb_key_ref(cp, key); 3041 } 3042 else 3043 { 3044 cp-= key_len; 3045 memcpy(cp, key, key_len); 3046 } 3047 last_key_entry= cp; 3048 DBUG_ASSERT(last_key_entry >= end_pos); 3049 /* Increment the counter of key_entries in the hash table */ 3050 key_entries++; 3051 } 3052 return is_full; 3053 } 3054 3055 3056 /* 3057 Read the next record from the buffer of a hashed join cache 3058 3059 SYNOPSIS 3060 get_record() 3061 3062 DESCRIPTION 3063 Additionally to what the default implementation of the virtual 3064 function get_record does this implementation skips the link element 3065 used to connect the records with the same key into a chain. 3066 3067 RETURN VALUE 3068 TRUE there are no more records to read from the join buffer 3069 FALSE otherwise 3070 */ 3071 3072 bool JOIN_CACHE_HASHED::get_record() 3073 { 3074 pos+= get_size_of_rec_offset(); 3075 return this->JOIN_CACHE::get_record(); 3076 } 3077 3078 3079 /* 3080 Skip record from a hashed join buffer if its match flag is set to MATCH_FOUND 3081 3082 SYNOPSIS 3083 skip_if_matched() 3084 3085 DESCRIPTION 3086 This implementation of the virtual function skip_if_matched does 3087 the same as the default implementation does, but it takes into account 3088 the link element used to connect the records with the same key into a chain. 3089 3090 RETURN VALUE 3091 TRUE the match flag is MATCH_FOUND and the record has been skipped 3092 FALSE otherwise 3093 */ 3094 3095 bool JOIN_CACHE_HASHED::skip_if_matched() 3096 { 3097 uchar *save_pos= pos; 3098 pos+= get_size_of_rec_offset(); 3099 if (!this->JOIN_CACHE::skip_if_matched()) 3100 { 3101 pos= save_pos; 3102 return FALSE; 3103 } 3104 return TRUE; 3105 } 3106 3107 3108 /* 3109 Skip record from a hashed join buffer if its match flag dictates to do so 3110 3111 SYNOPSIS 3112 skip_if_uneeded_match() 3113 3114 DESCRIPTION 3115 This implementation of the virtual function skip_if_not_needed_match does 3116 the same as the default implementation does, but it takes into account 3117 the link element used to connect the records with the same key into a chain. 3118 3119 RETURN VALUE 3120 TRUE the match flag dictates to skip the record 3121 FALSE the match flag is off 3122 */ 3123 3124 bool JOIN_CACHE_HASHED::skip_if_not_needed_match() 3125 { 3126 uchar *save_pos= pos; 3127 pos+= get_size_of_rec_offset(); 3128 if (!this->JOIN_CACHE::skip_if_not_needed_match()) 3129 { 3130 pos= save_pos; 3131 return FALSE; 3132 } 3133 return TRUE; 3134 } 3135 3136 3137 /* 3138 Search for a key in the hash table of the join buffer 3139 3140 SYNOPSIS 3141 key_search() 3142 key pointer to the key value 3143 key_len key value length 3144 key_ref_ptr OUT position of the reference to the next key from 3145 the hash element for the found key , or 3146 a position where the reference to the the hash 3147 element for the key is to be added in the 3148 case when the key has not been found 3149 3150 DESCRIPTION 3151 The function looks for a key in the hash table of the join buffer. 3152 If the key is found the functionreturns the position of the reference 3153 to the next key from to the hash element for the given key. 3154 Otherwise the function returns the position where the reference to the 3155 newly created hash element for the given key is to be added. 3156 3157 RETURN VALUE 3158 TRUE the key is found in the hash table 3159 FALSE otherwise 3160 */ 3161 3162 bool JOIN_CACHE_HASHED::key_search(uchar *key, uint key_len, 3163 uchar **key_ref_ptr) 3164 { 3165 bool is_found= FALSE; 3166 uint idx= (this->*hash_func)(key, key_length); 3167 uchar *ref_ptr= hash_table+size_of_key_ofs*idx; 3168 while (!is_null_key_ref(ref_ptr)) 3169 { 3170 uchar *next_key; 3171 ref_ptr= get_next_key_ref(ref_ptr); 3172 next_key= use_emb_key ? get_emb_key(ref_ptr-get_size_of_rec_offset()) : 3173 ref_ptr-key_length; 3174 3175 if ((this->*hash_cmp_func)(next_key, key, key_len)) 3176 { 3177 is_found= TRUE; 3178 break; 3179 } 3180 } 3181 *key_ref_ptr= ref_ptr; 3182 return is_found; 3183 } 3184 3185 3186 /* 3187 Hash function that considers a key in the hash table as byte array 3188 3189 SYNOPSIS 3190 get_hash_idx_simple() 3191 key pointer to the key value 3192 key_len key value length 3193 3194 DESCRIPTION 3195 The function calculates an index of the hash entry in the hash table 3196 of the join buffer for the given key. It considers the key just as 3197 a sequence of bytes of the length key_len. 3198 3199 RETURN VALUE 3200 the calculated index of the hash entry for the given key 3201 */ 3202 3203 inline 3204 uint JOIN_CACHE_HASHED::get_hash_idx_simple(uchar* key, uint key_len) 3205 { 3206 ulong nr= 1; 3207 ulong nr2= 4; 3208 uchar *pos= key; 3209 uchar *end= key+key_len; 3210 for (; pos < end ; pos++) 3211 { 3212 nr^= (ulong) ((((uint) nr & 63)+nr2)*((uint) *pos))+ (nr << 8); 3213 nr2+= 3; 3214 } 3215 return nr % hash_entries; 3216 } 3217 3218 3219 /* 3220 Hash function that takes into account collations of the components of the key 3221 3222 SYNOPSIS 3223 get_hash_idx_complex() 3224 key pointer to the key value 3225 key_len key value length 3226 3227 DESCRIPTION 3228 The function calculates an index of the hash entry in the hash table 3229 of the join buffer for the given key. It takes into account that the 3230 components of the key may be of a varchar type with different collations. 3231 The function guarantees that the same hash value for any two equal 3232 keys that may differ as byte sequences. 3233 The function takes the info about the components of the key, their 3234 types and used collations from the class member ref_key_info containing 3235 a pointer to the descriptor of the index that can be used for the join 3236 operation. 3237 3238 RETURN VALUE 3239 the calculated index of the hash entry for the given key 3240 */ 3241 3242 inline 3243 uint JOIN_CACHE_HASHED::get_hash_idx_complex(uchar *key, uint key_len) 3244 { 3245 return 3246 (uint) (key_hashnr(ref_key_info, ref_used_key_parts, key) % hash_entries); 3247 } 3248 3249 3250 /* 3251 Compare two key entries in the hash table as sequence of bytes 3252 3253 SYNOPSIS 3254 equal_keys_simple() 3255 key1 pointer to the first key entry 3256 key2 pointer to the second key entry 3257 key_len the length of the key values 3258 3259 DESCRIPTION 3260 The function compares two key entries in the hash table key1 and key2 3261 as two sequences bytes of the length key_len 3262 3263 RETURN VALUE 3264 TRUE key1 coincides with key2 3265 FALSE otherwise 3266 */ 3267 3268 inline 3269 bool JOIN_CACHE_HASHED::equal_keys_simple(uchar *key1, uchar *key2, 3270 uint key_len) 3271 { 3272 return memcmp(key1, key2, key_len) == 0; 3273 } 3274 3275 3276 /* 3277 Compare two key entries taking into account the used collation 3278 3279 SYNOPSIS 3280 equal_keys_complex() 3281 key1 pointer to the first key entry 3282 key2 pointer to the second key entry 3283 key_len the length of the key values 3284 3285 DESCRIPTION 3286 The function checks whether two key entries in the hash table 3287 key1 and key2 are equal as, possibly, compound keys of a certain 3288 structure whose components may be of a varchar type and may 3289 employ different collations. 3290 The descriptor of the key structure is taken from the class 3291 member ref_key_info. 3292 3293 RETURN VALUE 3294 TRUE key1 is equal tokey2 3295 FALSE otherwise 3296 */ 3297 3298 inline 3299 bool JOIN_CACHE_HASHED::equal_keys_complex(uchar *key1, uchar *key2, 3300 uint key_len) 3301 { 3302 return key_buf_cmp(ref_key_info, ref_used_key_parts, key1, key2) == 0; 3303 } 3304 3305 3306 /* 3307 Clean up the hash table of the join buffer 3308 3309 SYNOPSIS 3310 cleanup_hash_table() 3311 key pointer to the key value 3312 key_len key value length 3313 3314 DESCRIPTION 3315 The function cleans up the hash table in the join buffer removing all 3316 hash elements from the table. 3317 3318 RETURN VALUE 3319 none 3320 */ 3321 3322 void JOIN_CACHE_HASHED:: cleanup_hash_table() 3323 { 3324 last_key_entry= hash_table; 3325 bzero(hash_table, (buff+buff_size)-hash_table); 3326 key_entries= 0; 3327 } 3328 3329 3330 /* 3331 Check whether all records in a key chain have their match flags set on 3332 3333 SYNOPSIS 3334 check_all_match_flags_for_key() 3335 key_chain_ptr 3336 3337 DESCRIPTION 3338 This function retrieves records in the given circular chain and checks 3339 whether their match flags are set on. The parameter key_chain_ptr shall 3340 point to the position in the join buffer storing the reference to the 3341 last element of this chain. 3342 3343 RETURN VALUE 3344 TRUE if each retrieved record has its match flag set to MATCH_FOUND 3345 FALSE otherwise 3346 */ 3347 3348 bool JOIN_CACHE_HASHED::check_all_match_flags_for_key(uchar *key_chain_ptr) 3349 { 3350 uchar *last_rec_ref_ptr= get_next_rec_ref(key_chain_ptr); 3351 uchar *next_rec_ref_ptr= last_rec_ref_ptr; 3352 do 3353 { 3354 next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr); 3355 uchar *rec_ptr= next_rec_ref_ptr+rec_fields_offset; 3356 if (get_match_flag_by_pos(rec_ptr) != MATCH_FOUND) 3357 return FALSE; 3358 } 3359 while (next_rec_ref_ptr != last_rec_ref_ptr); 3360 return TRUE; 3361 } 3362 3363 3364 /* 3365 Get the next key built for the records from the buffer of a hashed join cache 3366 3367 SYNOPSIS 3368 get_next_key() 3369 key pointer to the buffer where the key value is to be placed 3370 3371 DESCRIPTION 3372 The function reads the next key value stored in the hash table of the 3373 join buffer. Depending on the value of the use_emb_key flag of the 3374 join cache the value is read either from the table itself or from 3375 the record field where it occurs. 3376 3377 RETURN VALUE 3378 length of the key value - if the starting value of 'cur_key_entry' refers 3379 to the position after that referred by the the value of 'last_key_entry', 3380 0 - otherwise. 3381 */ 3382 3383 uint JOIN_CACHE_HASHED::get_next_key(uchar ** key) 3384 { 3385 if (curr_key_entry == last_key_entry) 3386 return 0; 3387 3388 curr_key_entry-= key_entry_length; 3389 3390 *key = use_emb_key ? get_emb_key(curr_key_entry) : curr_key_entry; 3391 3392 DBUG_ASSERT(*key >= buff && *key < hash_table); 3393 3394 return key_length; 3395 } 3396 3397 3398 /* 3399 Initiate an iteration process over records in the joined table 3400 3401 SYNOPSIS 3402 open() 3403 3404 DESCRIPTION 3405 The function initiates the process of iteration over records from the 3406 joined table recurrently performed by the BNL/BKLH join algorithm. 3407 3408 RETURN VALUE 3409 0 the initiation is a success 3410 error code otherwise 3411 */ 3412 3413 int JOIN_TAB_SCAN::open() 3414 { 3415 save_or_restore_used_tabs(join_tab, FALSE); 3416 is_first_record= TRUE; 3417 join_tab->tracker->r_scans++; 3418 return join_init_read_record(join_tab); 3419 } 3420 3421 3422 /* 3423 Read the next record that can match while scanning the joined table 3424 3425 SYNOPSIS 3426 next() 3427 3428 DESCRIPTION 3429 The function reads the next record from the joined table that can 3430 match some records in the buffer of the join cache 'cache'. To do 3431 this the function calls the function that scans table records and 3432 looks for the next one that meets the condition pushed to the 3433 joined table join_tab. 3434 3435 NOTES 3436 The function catches the signal that kills the query. 3437 3438 RETURN VALUE 3439 0 the next record exists and has been successfully read 3440 error code otherwise 3441 */ 3442 3443 int JOIN_TAB_SCAN::next() 3444 { 3445 int err= 0; 3446 int skip_rc; 3447 READ_RECORD *info= &join_tab->read_record; 3448 SQL_SELECT *select= join_tab->cache_select; 3449 THD *thd= join->thd; 3450 3451 if (is_first_record) 3452 is_first_record= FALSE; 3453 else 3454 err= info->read_record(); 3455 3456 if (!err) 3457 { 3458 join_tab->tracker->r_rows++; 3459 } 3460 3461 while (!err && select && (skip_rc= select->skip_record(thd)) <= 0) 3462 { 3463 if (unlikely(thd->check_killed()) || skip_rc < 0) 3464 return 1; 3465 /* 3466 Move to the next record if the last retrieved record does not 3467 meet the condition pushed to the table join_tab. 3468 */ 3469 err= info->read_record(); 3470 if (!err) 3471 { 3472 join_tab->tracker->r_rows++; 3473 } 3474 } 3475 3476 if (!err) 3477 join_tab->tracker->r_rows_after_where++; 3478 return err; 3479 } 3480 3481 3482 /* 3483 Walk back in join order from join_tab until we encounter a join tab with 3484 tab->cache!=NULL, and save/restore tab->table->status along the way. 3485 3486 @param save TRUE save 3487 FALSE restore 3488 */ 3489 3490 static void save_or_restore_used_tabs(JOIN_TAB *join_tab, bool save) 3491 { 3492 JOIN_TAB *first= join_tab->bush_root_tab? 3493 join_tab->bush_root_tab->bush_children->start : 3494 join_tab->join->join_tab + join_tab->join->const_tables; 3495 3496 for (JOIN_TAB *tab= join_tab-1; tab != first && !tab->cache; tab--) 3497 { 3498 if (tab->bush_children) 3499 { 3500 for (JOIN_TAB *child= tab->bush_children->start; 3501 child != tab->bush_children->end; 3502 child++) 3503 { 3504 if (save) 3505 child->table->status= child->status; 3506 else 3507 { 3508 tab->status= tab->table->status; 3509 tab->table->status= 0; 3510 } 3511 } 3512 } 3513 3514 if (save) 3515 tab->table->status= tab->status; 3516 else 3517 { 3518 tab->status= tab->table->status; 3519 tab->table->status= 0; 3520 } 3521 } 3522 } 3523 3524 3525 /* 3526 Perform finalizing actions for a scan over the table records 3527 3528 SYNOPSIS 3529 close() 3530 3531 DESCRIPTION 3532 The function performs the necessary restoring actions after 3533 the table scan over the joined table has been finished. 3534 3535 RETURN VALUE 3536 none 3537 */ 3538 3539 void JOIN_TAB_SCAN::close() 3540 { 3541 save_or_restore_used_tabs(join_tab, TRUE); 3542 } 3543 3544 3545 /* 3546 Prepare to iterate over the BNL join cache buffer to look for matches 3547 3548 SYNOPSIS 3549 prepare_look_for_matches() 3550 skip_last <-> ignore the last record in the buffer 3551 3552 DESCRIPTION 3553 The function prepares the join cache for an iteration over the 3554 records in the join buffer. The iteration is performed when looking 3555 for matches for the record from the joined table join_tab that 3556 has been placed into the record buffer of the joined table. 3557 If the value of the parameter skip_last is TRUE then the last 3558 record from the join buffer is ignored. 3559 The function initializes the counter of the records that have been 3560 not iterated over yet. 3561 3562 RETURN VALUE 3563 TRUE there are no records in the buffer to iterate over 3564 FALSE otherwise 3565 */ 3566 3567 bool JOIN_CACHE_BNL::prepare_look_for_matches(bool skip_last) 3568 { 3569 if (!records) 3570 return TRUE; 3571 reset(FALSE); 3572 rem_records= (uint)records - MY_TEST(skip_last); 3573 return rem_records == 0; 3574 } 3575 3576 3577 /* 3578 Get next record from the BNL join cache buffer when looking for matches 3579 3580 SYNOPSIS 3581 get_next_candidate_for_match 3582 3583 DESCRIPTION 3584 This method is used for iterations over the records from the join 3585 cache buffer when looking for matches for records from join_tab. 3586 The methods performs the necessary preparations to read the next record 3587 from the join buffer into the record buffer by the method 3588 read_next_candidate_for_match, or, to skip the next record from the join 3589 buffer by the method skip_recurrent_candidate_for_match. 3590 This implementation of the virtual method get_next_candidate_for_match 3591 just decrements the counter of the records that are to be iterated over 3592 and returns the current value of the cursor 'pos' as the position of 3593 the record to be processed. 3594 3595 RETURN VALUE 3596 pointer to the position right after the prefix of the current record 3597 in the join buffer if the there is another record to iterate over, 3598 0 - otherwise. 3599 */ 3600 3601 uchar *JOIN_CACHE_BNL::get_next_candidate_for_match() 3602 { 3603 if (!rem_records) 3604 return 0; 3605 rem_records--; 3606 return pos+base_prefix_length; 3607 } 3608 3609 3610 /* 3611 Check whether the matching record from the BNL cache is to be skipped 3612 3613 SYNOPSIS 3614 skip_next_candidate_for_match 3615 rec_ptr pointer to the position in the join buffer right after the prefix 3616 of the current record 3617 3618 DESCRIPTION 3619 This implementation of the virtual function just calls the 3620 method skip_if_not_needed_match to check whether the record referenced by 3621 ref_ptr has its match flag set either to MATCH_FOUND and join_tab is the 3622 first inner table of a semi-join, or it's set to MATCH_IMPOSSIBLE and 3623 join_tab is the first inner table of an outer join. 3624 If so, the function just skips this record setting the value of the 3625 cursor 'pos' to the position right after it. 3626 3627 RETURN VALUE 3628 TRUE the record referenced by rec_ptr has been skipped 3629 FALSE otherwise 3630 */ 3631 3632 bool JOIN_CACHE_BNL::skip_next_candidate_for_match(uchar *rec_ptr) 3633 { 3634 pos= rec_ptr-base_prefix_length; 3635 return skip_if_not_needed_match(); 3636 } 3637 3638 3639 /* 3640 Read next record from the BNL join cache buffer when looking for matches 3641 3642 SYNOPSIS 3643 read_next_candidate_for_match 3644 rec_ptr pointer to the position in the join buffer right after the prefix 3645 the current record. 3646 3647 DESCRIPTION 3648 This implementation of the virtual method read_next_candidate_for_match 3649 calls the method get_record to read the record referenced by rec_ptr from 3650 the join buffer into the record buffer. If this record refers to the 3651 fields in the other join buffers the call of get_record ensures that 3652 these fields are read into the corresponding record buffers as well. 3653 This function is supposed to be called after a successful call of 3654 the method get_next_candidate_for_match. 3655 3656 RETURN VALUE 3657 none 3658 */ 3659 3660 void JOIN_CACHE_BNL::read_next_candidate_for_match(uchar *rec_ptr) 3661 { 3662 pos= rec_ptr-base_prefix_length; 3663 get_record(); 3664 } 3665 3666 3667 /* 3668 Initialize the BNL join cache 3669 3670 SYNOPSIS 3671 init 3672 for_explain join buffer is initialized for explain only 3673 3674 DESCRIPTION 3675 The function initializes the cache structure. It is supposed to be called 3676 right after a constructor for the JOIN_CACHE_BNL. 3677 3678 NOTES 3679 The function first constructs a companion object of the type JOIN_TAB_SCAN, 3680 then it calls the init method of the parent class. 3681 3682 RETURN VALUE 3683 0 initialization with buffer allocations has been succeeded 3684 1 otherwise 3685 */ 3686 3687 int JOIN_CACHE_BNL::init(bool for_explain) 3688 { 3689 DBUG_ENTER("JOIN_CACHE_BNL::init"); 3690 3691 if (!(join_tab_scan= new JOIN_TAB_SCAN(join, join_tab))) 3692 DBUG_RETURN(1); 3693 3694 DBUG_RETURN(JOIN_CACHE::init(for_explain)); 3695 } 3696 3697 3698 /* 3699 Get the chain of records from buffer matching the current candidate for join 3700 3701 SYNOPSIS 3702 get_matching_chain_by_join_key() 3703 3704 DESCRIPTION 3705 This function first build a join key for the record of join_tab that 3706 currently is in the join buffer for this table. Then it looks for 3707 the key entry with this key in the hash table of the join cache. 3708 If such a key entry is found the function returns the pointer to 3709 the head of the chain of records in the join_buffer that match this 3710 key. 3711 3712 RETURN VALUE 3713 The pointer to the corresponding circular list of records if 3714 the key entry with the join key is found, 0 - otherwise. 3715 */ 3716 3717 uchar *JOIN_CACHE_BNLH::get_matching_chain_by_join_key() 3718 { 3719 uchar *key_ref_ptr; 3720 TABLE *table= join_tab->table; 3721 TABLE_REF *ref= &join_tab->ref; 3722 KEY *keyinfo= join_tab->get_keyinfo_by_key_no(ref->key); 3723 /* Build the join key value out of the record in the record buffer */ 3724 key_copy(key_buff, table->record[0], keyinfo, key_length, TRUE); 3725 /* Look for this key in the join buffer */ 3726 if (!key_search(key_buff, key_length, &key_ref_ptr)) 3727 return 0; 3728 return key_ref_ptr+get_size_of_key_offset(); 3729 } 3730 3731 3732 /* 3733 Prepare to iterate over the BNLH join cache buffer to look for matches 3734 3735 SYNOPSIS 3736 prepare_look_for_matches() 3737 skip_last <-> ignore the last record in the buffer 3738 3739 DESCRIPTION 3740 The function prepares the join cache for an iteration over the 3741 records in the join buffer. The iteration is performed when looking 3742 for matches for the record from the joined table join_tab that 3743 has been placed into the record buffer of the joined table. 3744 If the value of the parameter skip_last is TRUE then the last 3745 record from the join buffer is ignored. 3746 The function builds the hashed key from the join fields of join_tab 3747 and uses this key to look in the hash table of the join cache for 3748 the chain of matching records in in the join buffer. If it finds 3749 such a chain it sets the member last_rec_ref_ptr to point to the 3750 last link of the chain while setting the member next_rec_ref_po 0. 3751 3752 RETURN VALUE 3753 TRUE there are no matching records in the buffer to iterate over 3754 FALSE otherwise 3755 */ 3756 3757 bool JOIN_CACHE_BNLH::prepare_look_for_matches(bool skip_last) 3758 { 3759 uchar *curr_matching_chain; 3760 last_matching_rec_ref_ptr= next_matching_rec_ref_ptr= 0; 3761 if (!(curr_matching_chain= get_matching_chain_by_join_key())) 3762 return 1; 3763 last_matching_rec_ref_ptr= get_next_rec_ref(curr_matching_chain); 3764 return 0; 3765 } 3766 3767 3768 /* 3769 Get next record from the BNLH join cache buffer when looking for matches 3770 3771 SYNOPSIS 3772 get_next_candidate_for_match 3773 3774 DESCRIPTION 3775 This method is used for iterations over the records from the join 3776 cache buffer when looking for matches for records from join_tab. 3777 The methods performs the necessary preparations to read the next record 3778 from the join buffer into the record buffer by the method 3779 read_next_candidate_for_match, or, to skip the next record from the join 3780 buffer by the method skip_next_candidate_for_match. 3781 This implementation of the virtual method moves to the next record 3782 in the chain of all records from the join buffer that are to be 3783 equi-joined with the current record from join_tab. 3784 3785 RETURN VALUE 3786 pointer to the beginning of the record fields in the join buffer 3787 if the there is another record to iterate over, 0 - otherwise. 3788 */ 3789 3790 uchar *JOIN_CACHE_BNLH::get_next_candidate_for_match() 3791 { 3792 if (next_matching_rec_ref_ptr == last_matching_rec_ref_ptr) 3793 return 0; 3794 next_matching_rec_ref_ptr= get_next_rec_ref(next_matching_rec_ref_ptr ? 3795 next_matching_rec_ref_ptr : 3796 last_matching_rec_ref_ptr); 3797 return next_matching_rec_ref_ptr+rec_fields_offset; 3798 } 3799 3800 3801 /* 3802 Check whether the matching record from the BNLH cache is to be skipped 3803 3804 SYNOPSIS 3805 skip_next_candidate_for_match 3806 rec_ptr pointer to the position in the join buffer right after 3807 the previous record 3808 3809 DESCRIPTION 3810 This implementation of the virtual function just calls the 3811 method get_match_flag_by_pos to check whether the record referenced 3812 by ref_ptr has its match flag set to MATCH_FOUND. 3813 3814 RETURN VALUE 3815 TRUE the record referenced by rec_ptr has its match flag set to 3816 MATCH_FOUND 3817 FALSE otherwise 3818 */ 3819 3820 bool JOIN_CACHE_BNLH::skip_next_candidate_for_match(uchar *rec_ptr) 3821 { 3822 return join_tab->check_only_first_match() && 3823 (get_match_flag_by_pos(rec_ptr) == MATCH_FOUND); 3824 } 3825 3826 3827 /* 3828 Read next record from the BNLH join cache buffer when looking for matches 3829 3830 SYNOPSIS 3831 read_next_candidate_for_match 3832 rec_ptr pointer to the position in the join buffer right after 3833 the previous record 3834 3835 DESCRIPTION 3836 This implementation of the virtual method read_next_candidate_for_match 3837 calls the method get_record_by_pos to read the record referenced by rec_ptr 3838 from the join buffer into the record buffer. If this record refers to 3839 fields in the other join buffers the call of get_record_by_po ensures that 3840 these fields are read into the corresponding record buffers as well. 3841 This function is supposed to be called after a successful call of 3842 the method get_next_candidate_for_match. 3843 3844 RETURN VALUE 3845 none 3846 */ 3847 3848 void JOIN_CACHE_BNLH::read_next_candidate_for_match(uchar *rec_ptr) 3849 { 3850 get_record_by_pos(rec_ptr); 3851 } 3852 3853 3854 /* 3855 Initialize the BNLH join cache 3856 3857 SYNOPSIS 3858 init 3859 for_explain join buffer is initialized for explain only 3860 3861 DESCRIPTION 3862 The function initializes the cache structure. It is supposed to be called 3863 right after a constructor for the JOIN_CACHE_BNLH. 3864 3865 NOTES 3866 The function first constructs a companion object of the type JOIN_TAB_SCAN, 3867 then it calls the init method of the parent class. 3868 3869 RETURN VALUE 3870 0 initialization with buffer allocations has been succeeded 3871 1 otherwise 3872 */ 3873 3874 int JOIN_CACHE_BNLH::init(bool for_explain) 3875 { 3876 DBUG_ENTER("JOIN_CACHE_BNLH::init"); 3877 3878 if (!(join_tab_scan= new JOIN_TAB_SCAN(join, join_tab))) 3879 DBUG_RETURN(1); 3880 3881 DBUG_RETURN(JOIN_CACHE_HASHED::init(for_explain)); 3882 } 3883 3884 3885 /* 3886 Calculate the increment of the MRR buffer for a record write 3887 3888 SYNOPSIS 3889 aux_buffer_incr() 3890 3891 DESCRIPTION 3892 This implementation of the virtual function aux_buffer_incr determines 3893 for how much the size of the MRR buffer should be increased when another 3894 record is added to the cache. 3895 3896 RETURN VALUE 3897 the increment of the size of the MRR buffer for the next record 3898 */ 3899 3900 uint JOIN_TAB_SCAN_MRR::aux_buffer_incr(size_t recno) 3901 { 3902 uint incr= 0; 3903 TABLE_REF *ref= &join_tab->ref; 3904 TABLE *tab= join_tab->table; 3905 ha_rows rec_per_key= 3906 (ha_rows) tab->key_info[ref->key].actual_rec_per_key(ref->key_parts-1); 3907 set_if_bigger(rec_per_key, 1); 3908 if (recno == 1) 3909 incr= ref->key_length + tab->file->ref_length; 3910 incr+= (uint)(tab->file->stats.mrr_length_per_rec * rec_per_key); 3911 return incr; 3912 } 3913 3914 3915 /* 3916 Initiate iteration over records returned by MRR for the current join buffer 3917 3918 SYNOPSIS 3919 open() 3920 3921 DESCRIPTION 3922 The function initiates the process of iteration over the records from 3923 join_tab returned by the MRR interface functions for records from 3924 the join buffer. Such an iteration is performed by the BKA/BKAH join 3925 algorithm for each new refill of the join buffer. 3926 The function calls the MRR handler function multi_range_read_init to 3927 initiate this process. 3928 3929 RETURN VALUE 3930 0 the initiation is a success 3931 error code otherwise 3932 */ 3933 3934 int JOIN_TAB_SCAN_MRR::open() 3935 { 3936 handler *file= join_tab->table->file; 3937 3938 join_tab->table->null_row= 0; 3939 3940 3941 /* Dynamic range access is never used with BKA */ 3942 DBUG_ASSERT(join_tab->use_quick != 2); 3943 3944 join_tab->tracker->r_scans++; 3945 save_or_restore_used_tabs(join_tab, FALSE); 3946 3947 init_mrr_buff(); 3948 3949 /* 3950 Prepare to iterate over keys from the join buffer and to get 3951 matching candidates obtained with MMR handler functions. 3952 */ 3953 if (!file->inited) 3954 file->ha_index_init(join_tab->ref.key, 1); 3955 ranges= cache->get_number_of_ranges_for_mrr(); 3956 if (!join_tab->cache_idx_cond) 3957 range_seq_funcs.skip_index_tuple= 0; 3958 return file->multi_range_read_init(&range_seq_funcs, (void*) cache, 3959 ranges, mrr_mode, &mrr_buff); 3960 } 3961 3962 3963 /* 3964 Read the next record returned by MRR for the current join buffer 3965 3966 SYNOPSIS 3967 next() 3968 3969 DESCRIPTION 3970 The function reads the next record from the joined table join_tab 3971 returned by the MRR handler function multi_range_read_next for 3972 the current refill of the join buffer. The record is read into 3973 the record buffer used for join_tab records in join operations. 3974 3975 RETURN VALUE 3976 0 the next record exists and has been successfully read 3977 error code otherwise 3978 */ 3979 3980 int JOIN_TAB_SCAN_MRR::next() 3981 { 3982 char **ptr= (char **) cache->get_curr_association_ptr(); 3983 3984 DBUG_ASSERT(sizeof(range_id_t) == sizeof(*ptr)); 3985 int rc= join_tab->table->file->multi_range_read_next((range_id_t*)ptr) ? -1 : 0; 3986 if (!rc) 3987 { 3988 join_tab->tracker->r_rows++; 3989 join_tab->tracker->r_rows_after_where++; 3990 /* 3991 If a record in in an incremental cache contains no fields then the 3992 association for the last record in cache will be equal to cache->end_pos 3993 */ 3994 /* 3995 psergey: this makes no sense where HA_MRR_NO_ASSOC is used. 3996 DBUG_ASSERT(cache->buff <= (uchar *) (*ptr) && 3997 (uchar *) (*ptr) <= cache->end_pos); 3998 */ 3999 } 4000 return rc; 4001 } 4002 4003 4004 static 4005 void bka_range_seq_key_info(void *init_params, uint *length, 4006 key_part_map *map) 4007 { 4008 TABLE_REF *ref= &(((JOIN_CACHE*)init_params)->join_tab->ref); 4009 *length= ref->key_length; 4010 *map= (key_part_map(1) << ref->key_parts) - 1; 4011 } 4012 4013 4014 /* 4015 Initialize retrieval of range sequence for BKA join algorithm 4016 4017 SYNOPSIS 4018 bka_range_seq_init() 4019 init_params pointer to the BKA join cache object 4020 n_ranges the number of ranges obtained 4021 flags combination of MRR flags 4022 4023 DESCRIPTION 4024 The function interprets init_param as a pointer to a JOIN_CACHE_BKA 4025 object. The function prepares for an iteration over the join keys 4026 built for all records from the cache join buffer. 4027 4028 NOTE 4029 This function are used only as a callback function. 4030 4031 RETURN VALUE 4032 init_param value that is to be used as a parameter of bka_range_seq_next() 4033 */ 4034 4035 static 4036 range_seq_t bka_range_seq_init(void *init_param, uint n_ranges, uint flags) 4037 { 4038 DBUG_ENTER("bka_range_seq_init"); 4039 JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) init_param; 4040 cache->reset(0); 4041 DBUG_RETURN((range_seq_t) init_param); 4042 } 4043 4044 4045 /* 4046 Get the next range/key over records from the join buffer used by a BKA cache 4047 4048 SYNOPSIS 4049 bka_range_seq_next() 4050 seq the value returned by bka_range_seq_init 4051 range OUT reference to the next range 4052 4053 DESCRIPTION 4054 The function interprets seq as a pointer to a JOIN_CACHE_BKA 4055 object. The function returns a pointer to the range descriptor 4056 for the key built over the next record from the join buffer. 4057 4058 NOTE 4059 This function are used only as a callback function. 4060 4061 RETURN VALUE 4062 FALSE ok, the range structure filled with info about the next range/key 4063 TRUE no more ranges 4064 */ 4065 4066 static 4067 bool bka_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range) 4068 { 4069 DBUG_ENTER("bka_range_seq_next"); 4070 JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq; 4071 TABLE_REF *ref= &cache->join_tab->ref; 4072 key_range *start_key= &range->start_key; 4073 if ((start_key->length= cache->get_next_key((uchar **) &start_key->key))) 4074 { 4075 start_key->keypart_map= (1 << ref->key_parts) - 1; 4076 start_key->flag= HA_READ_KEY_EXACT; 4077 range->end_key= *start_key; 4078 range->end_key.flag= HA_READ_AFTER_KEY; 4079 range->ptr= (char *) cache->get_curr_rec(); 4080 range->range_flag= EQ_RANGE; 4081 DBUG_RETURN(0); 4082 } 4083 DBUG_RETURN(1); 4084 } 4085 4086 4087 /* 4088 Check whether range_info orders to skip the next record from BKA buffer 4089 4090 SYNOPSIS 4091 bka_range_seq_skip_record() 4092 seq value returned by bka_range_seq_init() 4093 range_info information about the next range 4094 rowid [NOT USED] rowid of the record to be checked 4095 4096 4097 DESCRIPTION 4098 The function interprets seq as a pointer to a JOIN_CACHE_BKA object. 4099 The function returns TRUE if the record with this range_info 4100 is to be filtered out from the stream of records returned by 4101 multi_range_read_next(). 4102 4103 NOTE 4104 This function are used only as a callback function. 4105 4106 RETURN VALUE 4107 1 record with this range_info is to be filtered out from the stream 4108 of records returned by multi_range_read_next() 4109 0 the record is to be left in the stream 4110 */ 4111 4112 static 4113 bool bka_range_seq_skip_record(range_seq_t rseq, range_id_t range_info, uchar *rowid) 4114 { 4115 DBUG_ENTER("bka_range_seq_skip_record"); 4116 JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq; 4117 bool res= cache->get_match_flag_by_pos((uchar *) range_info) == 4118 JOIN_CACHE::MATCH_FOUND; 4119 DBUG_RETURN(res); 4120 } 4121 4122 4123 /* 4124 Check if the record combination from BKA cache matches the index condition 4125 4126 SYNOPSIS 4127 bka_skip_index_tuple() 4128 rseq value returned by bka_range_seq_init() 4129 range_info record chain for the next range/key returned by MRR 4130 4131 DESCRIPTION 4132 This is wrapper for JOIN_CACHE_BKA::skip_index_tuple method, 4133 see comments there. 4134 4135 NOTE 4136 This function is used as a RANGE_SEQ_IF::skip_index_tuple callback. 4137 4138 RETURN VALUE 4139 0 The record combination satisfies the index condition 4140 1 Otherwise 4141 */ 4142 4143 static 4144 bool bka_skip_index_tuple(range_seq_t rseq, range_id_t range_info) 4145 { 4146 DBUG_ENTER("bka_skip_index_tuple"); 4147 JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq; 4148 THD *thd= cache->thd(); 4149 bool res; 4150 status_var_increment(thd->status_var.ha_icp_attempts); 4151 if (!(res= cache->skip_index_tuple(range_info))) 4152 status_var_increment(thd->status_var.ha_icp_match); 4153 DBUG_RETURN(res); 4154 } 4155 4156 4157 /* 4158 Prepare to read the record from BKA cache matching the current joined record 4159 4160 SYNOPSIS 4161 prepare_look_for_matches() 4162 skip_last <-> ignore the last record in the buffer (always unused here) 4163 4164 DESCRIPTION 4165 The function prepares to iterate over records in the join cache buffer 4166 matching the record loaded into the record buffer for join_tab when 4167 performing join operation by BKA join algorithm. With BKA algorithms the 4168 record loaded into the record buffer for join_tab always has a direct 4169 reference to the matching records from the join buffer. When the regular 4170 BKA join algorithm is employed the record from join_tab can refer to 4171 only one such record. 4172 The function sets the counter of the remaining records from the cache 4173 buffer that would match the current join_tab record to 1. 4174 4175 RETURN VALUE 4176 TRUE there are no records in the buffer to iterate over 4177 FALSE otherwise 4178 */ 4179 4180 bool JOIN_CACHE_BKA::prepare_look_for_matches(bool skip_last) 4181 { 4182 if (!records) 4183 return TRUE; 4184 rem_records= 1; 4185 return FALSE; 4186 } 4187 4188 4189 /* 4190 Get the record from the BKA cache matching the current joined record 4191 4192 SYNOPSIS 4193 get_next_candidate_for_match 4194 4195 DESCRIPTION 4196 This method is used for iterations over the records from the join 4197 cache buffer when looking for matches for records from join_tab. 4198 The method performs the necessary preparations to read the next record 4199 from the join buffer into the record buffer by the method 4200 read_next_candidate_for_match, or, to skip the next record from the join 4201 buffer by the method skip_if_not_needed_match. 4202 This implementation of the virtual method get_next_candidate_for_match 4203 just decrements the counter of the records that are to be iterated over 4204 and returns the value of curr_association as a reference to the position 4205 of the beginning of the record fields in the buffer. 4206 4207 RETURN VALUE 4208 pointer to the start of the record fields in the join buffer 4209 if the there is another record to iterate over, 0 - otherwise. 4210 */ 4211 4212 uchar *JOIN_CACHE_BKA::get_next_candidate_for_match() 4213 { 4214 if (!rem_records) 4215 return 0; 4216 rem_records--; 4217 return curr_association; 4218 } 4219 4220 4221 /* 4222 Check whether the matching record from the BKA cache is to be skipped 4223 4224 SYNOPSIS 4225 skip_next_candidate_for_match 4226 rec_ptr pointer to the position in the join buffer right after 4227 the previous record 4228 4229 DESCRIPTION 4230 This implementation of the virtual function just calls the 4231 method get_match_flag_by_pos to check whether the record referenced 4232 by ref_ptr has its match flag set to MATCH_FOUND. 4233 4234 RETURN VALUE 4235 TRUE the record referenced by rec_ptr has its match flag set to 4236 MATCH_FOUND 4237 FALSE otherwise 4238 */ 4239 4240 bool JOIN_CACHE_BKA::skip_next_candidate_for_match(uchar *rec_ptr) 4241 { 4242 return join_tab->check_only_first_match() && 4243 (get_match_flag_by_pos(rec_ptr) == MATCH_FOUND); 4244 } 4245 4246 4247 /* 4248 Read the next record from the BKA join cache buffer when looking for matches 4249 4250 SYNOPSIS 4251 read_next_candidate_for_match 4252 rec_ptr pointer to the position in the join buffer right after 4253 the previous record 4254 4255 DESCRIPTION 4256 This implementation of the virtual method read_next_candidate_for_match 4257 calls the method get_record_by_pos to read the record referenced by rec_ptr 4258 from the join buffer into the record buffer. If this record refers to 4259 fields in the other join buffers the call of get_record_by_po ensures that 4260 these fields are read into the corresponding record buffers as well. 4261 This function is supposed to be called after a successful call of 4262 the method get_next_candidate_for_match. 4263 4264 RETURN VALUE 4265 none 4266 */ 4267 4268 void JOIN_CACHE_BKA::read_next_candidate_for_match(uchar *rec_ptr) 4269 { 4270 get_record_by_pos(rec_ptr); 4271 } 4272 4273 4274 /* 4275 Initialize the BKA join cache 4276 4277 SYNOPSIS 4278 init 4279 for_explain join buffer is initialized for explain only 4280 4281 4282 DESCRIPTION 4283 The function initializes the cache structure. It is supposed to be called 4284 right after a constructor for the JOIN_CACHE_BKA. 4285 4286 NOTES 4287 The function first constructs a companion object of the type 4288 JOIN_TAB_SCAN_MRR, then it calls the init method of the parent class. 4289 4290 RETURN VALUE 4291 0 initialization with buffer allocations has been succeeded 4292 1 otherwise 4293 */ 4294 4295 int JOIN_CACHE_BKA::init(bool for_explain) 4296 { 4297 int res; 4298 bool check_only_first_match= join_tab->check_only_first_match(); 4299 4300 RANGE_SEQ_IF rs_funcs= { bka_range_seq_key_info, 4301 bka_range_seq_init, 4302 bka_range_seq_next, 4303 check_only_first_match ? bka_range_seq_skip_record : 0, 4304 bka_skip_index_tuple }; 4305 4306 DBUG_ENTER("JOIN_CACHE_BKA::init"); 4307 4308 JOIN_TAB_SCAN_MRR *jsm; 4309 if (!(join_tab_scan= jsm= new JOIN_TAB_SCAN_MRR(join, join_tab, 4310 mrr_mode, rs_funcs))) 4311 DBUG_RETURN(1); 4312 4313 if ((res= JOIN_CACHE::init(for_explain))) 4314 DBUG_RETURN(res); 4315 4316 if (use_emb_key) 4317 jsm->mrr_mode |= HA_MRR_MATERIALIZED_KEYS; 4318 4319 DBUG_RETURN(0); 4320 } 4321 4322 4323 /* 4324 Get the key built over the next record from BKA join buffer 4325 4326 SYNOPSIS 4327 get_next_key() 4328 key pointer to the buffer where the key value is to be placed 4329 4330 DESCRIPTION 4331 The function reads key fields from the current record in the join buffer. 4332 and builds the key value out of these fields that will be used to access 4333 the 'join_tab' table. Some of key fields may belong to previous caches. 4334 They are accessed via record references to the record parts stored in the 4335 previous join buffers. The other key fields always are placed right after 4336 the flag fields of the record. 4337 If the key is embedded, which means that its value can be read directly 4338 from the join buffer, then *key is set to the beginning of the key in 4339 this buffer. Otherwise the key is built in the join_tab->ref->key_buff. 4340 The function returns the length of the key if it succeeds ro read it. 4341 If is assumed that the functions starts reading at the position of 4342 the record length which is provided for each records in a BKA cache. 4343 After the key is built the 'pos' value points to the first position after 4344 the current record. 4345 The function just skips the records with MATCH_IMPOSSIBLE in the 4346 match flag field if there is any. 4347 The function returns 0 if the initial position is after the beginning 4348 of the record fields for last record from the join buffer. 4349 4350 RETURN VALUE 4351 length of the key value - if the starting value of 'pos' points to 4352 the position before the fields for the last record, 4353 0 - otherwise. 4354 */ 4355 4356 uint JOIN_CACHE_BKA::get_next_key(uchar ** key) 4357 { 4358 uint len; 4359 uint32 rec_len; 4360 uchar *init_pos; 4361 JOIN_CACHE *cache; 4362 4363 start: 4364 4365 /* Any record in a BKA cache is prepended with its length */ 4366 DBUG_ASSERT(with_length); 4367 4368 if ((pos+size_of_rec_len) > last_rec_pos || !records) 4369 return 0; 4370 4371 /* Read the length of the record */ 4372 rec_len= get_rec_length(pos); 4373 pos+= size_of_rec_len; 4374 init_pos= pos; 4375 4376 /* Read a reference to the previous cache if any */ 4377 if (prev_cache) 4378 pos+= prev_cache->get_size_of_rec_offset(); 4379 4380 curr_rec_pos= pos; 4381 4382 /* Read all flag fields of the record */ 4383 read_flag_fields(); 4384 4385 if (with_match_flag && 4386 (Match_flag) curr_rec_pos[0] == MATCH_IMPOSSIBLE ) 4387 { 4388 pos= init_pos+rec_len; 4389 goto start; 4390 } 4391 4392 if (use_emb_key) 4393 { 4394 /* An embedded key is taken directly from the join buffer */ 4395 *key= pos; 4396 len= emb_key_length; 4397 } 4398 else 4399 { 4400 /* Read key arguments from previous caches if there are any such fields */ 4401 if (external_key_arg_fields) 4402 { 4403 uchar *rec_ptr= curr_rec_pos; 4404 uint key_arg_count= external_key_arg_fields; 4405 CACHE_FIELD **copy_ptr= blob_ptr-key_arg_count; 4406 for (cache= prev_cache; key_arg_count; cache= cache->prev_cache) 4407 { 4408 uint len= 0; 4409 DBUG_ASSERT(cache); 4410 rec_ptr= cache->get_rec_ref(rec_ptr); 4411 while (!cache->referenced_fields) 4412 { 4413 cache= cache->prev_cache; 4414 DBUG_ASSERT(cache); 4415 rec_ptr= cache->get_rec_ref(rec_ptr); 4416 } 4417 while (key_arg_count && 4418 cache->read_referenced_field(*copy_ptr, rec_ptr, &len)) 4419 { 4420 copy_ptr++; 4421 --key_arg_count; 4422 } 4423 } 4424 } 4425 4426 /* 4427 Read the other key arguments from the current record. The fields for 4428 these arguments are always first in the sequence of the record's fields. 4429 */ 4430 CACHE_FIELD *copy= field_descr+flag_fields; 4431 CACHE_FIELD *copy_end= copy+local_key_arg_fields; 4432 bool blob_in_rec_buff= blob_data_is_in_rec_buff(curr_rec_pos); 4433 for ( ; copy < copy_end; copy++) 4434 read_record_field(copy, blob_in_rec_buff); 4435 4436 /* Build the key over the fields read into the record buffers */ 4437 TABLE_REF *ref= &join_tab->ref; 4438 cp_buffer_from_ref(join->thd, join_tab->table, ref); 4439 *key= ref->key_buff; 4440 len= ref->key_length; 4441 } 4442 4443 pos= init_pos+rec_len; 4444 4445 return len; 4446 } 4447 4448 4449 /* 4450 Check the index condition of the joined table for a record from the BKA cache 4451 4452 SYNOPSIS 4453 skip_index_tuple() 4454 range_info pointer to the record returned by MRR 4455 4456 DESCRIPTION 4457 This function is invoked from MRR implementation to check if an index 4458 tuple matches the index condition. It is used in the case where the index 4459 condition actually depends on both columns of the used index and columns 4460 from previous tables. 4461 4462 NOTES 4463 Accessing columns of the previous tables requires special handling with 4464 BKA. The idea of BKA is to collect record combinations in a buffer and 4465 then do a batch of ref access lookups, i.e. by the time we're doing a 4466 lookup its previous-records-combination is not in prev_table->record[0] 4467 but somewhere in the join buffer. 4468 We need to get it from there back into prev_table(s)->record[0] before we 4469 can evaluate the index condition, and that's why we need this function 4470 instead of regular IndexConditionPushdown. 4471 4472 NOTES 4473 Possible optimization: 4474 Before we unpack the record from a previous table 4475 check if this table is used in the condition. 4476 If so then unpack the record otherwise skip the unpacking. 4477 This should be done by a special virtual method 4478 get_partial_record_by_pos(). 4479 4480 RETURN VALUE 4481 1 the record combination does not satisfies the index condition 4482 0 otherwise 4483 */ 4484 4485 bool JOIN_CACHE_BKA::skip_index_tuple(range_id_t range_info) 4486 { 4487 DBUG_ENTER("JOIN_CACHE_BKA::skip_index_tuple"); 4488 get_record_by_pos((uchar*)range_info); 4489 DBUG_RETURN(!join_tab->cache_idx_cond->val_int()); 4490 } 4491 4492 4493 4494 /* 4495 Initialize retrieval of range sequence for the BKAH join algorithm 4496 4497 SYNOPSIS 4498 bkah_range_seq_init() 4499 init_params pointer to the BKAH join cache object 4500 n_ranges the number of ranges obtained 4501 flags combination of MRR flags 4502 4503 DESCRIPTION 4504 The function interprets init_param as a pointer to a JOIN_CACHE_BKAH 4505 object. The function prepares for an iteration over distinct join keys 4506 built over the records from the cache join buffer. 4507 4508 NOTE 4509 This function are used only as a callback function. 4510 4511 RETURN VALUE 4512 init_param value that is to be used as a parameter of 4513 bkah_range_seq_next() 4514 */ 4515 4516 static 4517 range_seq_t bkah_range_seq_init(void *init_param, uint n_ranges, uint flags) 4518 { 4519 DBUG_ENTER("bkah_range_seq_init"); 4520 JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) init_param; 4521 cache->reset(0); 4522 DBUG_RETURN((range_seq_t) init_param); 4523 } 4524 4525 4526 /* 4527 Get the next range/key over records from the join buffer of a BKAH cache 4528 4529 SYNOPSIS 4530 bkah_range_seq_next() 4531 seq value returned by bkah_range_seq_init() 4532 range OUT reference to the next range 4533 4534 DESCRIPTION 4535 The function interprets seq as a pointer to a JOIN_CACHE_BKAH 4536 object. The function returns a pointer to the range descriptor 4537 for the next unique key built over records from the join buffer. 4538 4539 NOTE 4540 This function are used only as a callback function. 4541 4542 RETURN VALUE 4543 FALSE ok, the range structure filled with info about the next range/key 4544 TRUE no more ranges 4545 */ 4546 4547 static 4548 bool bkah_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range) 4549 { 4550 DBUG_ENTER("bkah_range_seq_next"); 4551 JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) rseq; 4552 TABLE_REF *ref= &cache->join_tab->ref; 4553 key_range *start_key= &range->start_key; 4554 if ((start_key->length= cache->get_next_key((uchar **) &start_key->key))) 4555 { 4556 start_key->keypart_map= (1 << ref->key_parts) - 1; 4557 start_key->flag= HA_READ_KEY_EXACT; 4558 range->end_key= *start_key; 4559 range->end_key.flag= HA_READ_AFTER_KEY; 4560 range->ptr= (char *) cache->get_curr_key_chain(); 4561 range->range_flag= EQ_RANGE; 4562 DBUG_RETURN(0); 4563 } 4564 DBUG_RETURN(1); 4565 } 4566 4567 4568 /* 4569 Check whether range_info orders to skip the next record from BKAH join buffer 4570 4571 SYNOPSIS 4572 bkah_range_seq_skip_record() 4573 seq value returned by bkah_range_seq_init() 4574 range_info information about the next range/key returned by MRR 4575 rowid [NOT USED] rowid of the record to be checked (not used) 4576 4577 DESCRIPTION 4578 The function interprets seq as a pointer to a JOIN_CACHE_BKAH 4579 object. The function returns TRUE if the record with this range_info 4580 is to be filtered out from the stream of records returned by 4581 multi_range_read_next(). 4582 4583 NOTE 4584 This function are used only as a callback function. 4585 4586 RETURN VALUE 4587 1 record with this range_info is to be filtered out from the stream 4588 of records returned by multi_range_read_next() 4589 0 the record is to be left in the stream 4590 */ 4591 4592 static 4593 bool bkah_range_seq_skip_record(range_seq_t rseq, range_id_t range_info, uchar *rowid) 4594 { 4595 DBUG_ENTER("bkah_range_seq_skip_record"); 4596 JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) rseq; 4597 bool res= cache->check_all_match_flags_for_key((uchar *) range_info); 4598 DBUG_RETURN(res); 4599 } 4600 4601 4602 /* 4603 Check if the record combination from BKAH cache matches the index condition 4604 4605 SYNOPSIS 4606 bkah_skip_index_tuple() 4607 rseq value returned by bka_range_seq_init() 4608 range_info record chain for the next range/key returned by MRR 4609 4610 DESCRIPTION 4611 This is wrapper for JOIN_CACHE_BKA_UNIQUE::skip_index_tuple method, 4612 see comments there. 4613 4614 NOTE 4615 This function is used as a RANGE_SEQ_IF::skip_index_tuple callback. 4616 4617 RETURN VALUE 4618 0 some records from the chain satisfy the index condition 4619 1 otherwise 4620 */ 4621 4622 static 4623 bool bkah_skip_index_tuple(range_seq_t rseq, range_id_t range_info) 4624 { 4625 DBUG_ENTER("bka_unique_skip_index_tuple"); 4626 JOIN_CACHE_BKAH *cache= (JOIN_CACHE_BKAH *) rseq; 4627 THD *thd= cache->thd(); 4628 bool res; 4629 status_var_increment(thd->status_var.ha_icp_attempts); 4630 if (!(res= cache->skip_index_tuple(range_info))) 4631 status_var_increment(thd->status_var.ha_icp_match); 4632 DBUG_RETURN(res); 4633 } 4634 4635 4636 /* 4637 Prepare to read record from BKAH cache matching the current joined record 4638 4639 SYNOPSIS 4640 prepare_look_for_matches() 4641 skip_last <-> ignore the last record in the buffer (always unused here) 4642 4643 DESCRIPTION 4644 The function prepares to iterate over records in the join cache buffer 4645 matching the record loaded into the record buffer for join_tab when 4646 performing join operation by BKAH join algorithm. With BKAH algorithm, if 4647 association labels are used, then record loaded into the record buffer 4648 for join_tab always has a direct reference to the chain of the mathing 4649 records from the join buffer. If association labels are not used then 4650 then the chain of the matching records is obtained by the call of the 4651 get_key_chain_by_join_key function. 4652 4653 RETURN VALUE 4654 TRUE there are no records in the buffer to iterate over 4655 FALSE otherwise 4656 */ 4657 4658 bool JOIN_CACHE_BKAH::prepare_look_for_matches(bool skip_last) 4659 { 4660 last_matching_rec_ref_ptr= next_matching_rec_ref_ptr= 0; 4661 if (no_association && 4662 !(curr_matching_chain= get_matching_chain_by_join_key())) //psergey: added '!' 4663 return 1; 4664 last_matching_rec_ref_ptr= get_next_rec_ref(curr_matching_chain); 4665 return 0; 4666 } 4667 4668 /* 4669 Initialize the BKAH join cache 4670 4671 SYNOPSIS 4672 init 4673 for_explain join buffer is initialized for explain only 4674 4675 DESCRIPTION 4676 The function initializes the cache structure. It is supposed to be called 4677 right after a constructor for the JOIN_CACHE_BKAH. 4678 4679 NOTES 4680 The function first constructs a companion object of the type 4681 JOIN_TAB_SCAN_MRR, then it calls the init method of the parent class. 4682 4683 RETURN VALUE 4684 0 initialization with buffer allocations has been succeeded 4685 1 otherwise 4686 */ 4687 4688 int JOIN_CACHE_BKAH::init(bool for_explain) 4689 { 4690 bool check_only_first_match= join_tab->check_only_first_match(); 4691 4692 no_association= MY_TEST(mrr_mode & HA_MRR_NO_ASSOCIATION); 4693 4694 RANGE_SEQ_IF rs_funcs= { bka_range_seq_key_info, 4695 bkah_range_seq_init, 4696 bkah_range_seq_next, 4697 check_only_first_match && !no_association ? 4698 bkah_range_seq_skip_record : 0, 4699 bkah_skip_index_tuple }; 4700 4701 DBUG_ENTER("JOIN_CACHE_BKAH::init"); 4702 4703 if (!(join_tab_scan= new JOIN_TAB_SCAN_MRR(join, join_tab, 4704 mrr_mode, rs_funcs))) 4705 DBUG_RETURN(1); 4706 4707 DBUG_RETURN(JOIN_CACHE_HASHED::init(for_explain)); 4708 } 4709 4710 4711 /* 4712 Check the index condition of the joined table for a record from the BKA cache 4713 4714 SYNOPSIS 4715 skip_index_tuple() 4716 range_info record chain returned by MRR 4717 4718 DESCRIPTION 4719 See JOIN_CACHE_BKA::skip_index_tuple(). 4720 This function is the variant for use with rhe class JOIN_CACHE_BKAH. 4721 The difference from JOIN_CACHE_BKA case is that there may be multiple 4722 previous table record combinations that share the same key(MRR range). 4723 As a consequence, we need to loop through the chain of all table record 4724 combinations that match the given MRR range key range_info until we find 4725 one that satisfies the index condition. 4726 4727 NOTE 4728 Possible optimization: 4729 Before we unpack the record from a previous table 4730 check if this table is used in the condition. 4731 If so then unpack the record otherwise skip the unpacking. 4732 This should be done by a special virtual method 4733 get_partial_record_by_pos(). 4734 4735 RETURN VALUE 4736 1 any record combination from the chain referred by range_info 4737 does not satisfy the index condition 4738 0 otherwise 4739 4740 4741 */ 4742 4743 bool JOIN_CACHE_BKAH::skip_index_tuple(range_id_t range_info) 4744 { 4745 uchar *last_rec_ref_ptr= get_next_rec_ref((uchar*) range_info); 4746 uchar *next_rec_ref_ptr= last_rec_ref_ptr; 4747 DBUG_ENTER("JOIN_CACHE_BKAH::skip_index_tuple"); 4748 do 4749 { 4750 next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr); 4751 uchar *rec_ptr= next_rec_ref_ptr + rec_fields_offset; 4752 get_record_by_pos(rec_ptr); 4753 if (join_tab->cache_idx_cond->val_int()) 4754 DBUG_RETURN(FALSE); 4755 } while(next_rec_ref_ptr != last_rec_ref_ptr); 4756 DBUG_RETURN(TRUE); 4757 } 4758