1/* 2 Copyright (c) 2018 MariaDB Corporation 3 4 This program is free software; you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; version 2 of the License. 7 8 This program is distributed in the hope that it will be useful, 9 but WITHOUT ANY WARRANTY; without even the implied warranty of 10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 GNU General Public License for more details. 12 13 You should have received a copy of the GNU General Public License 14 along with this program; if not, write to the Free Software 15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16*/ 17 18 19#ifndef MY_FUNCTION_NAME 20#error MY_FUNCTION_NAME is not defined 21#endif 22#ifndef MY_MB_WC 23#error MY_MB_WC is not defined 24#endif 25#ifndef MY_LIKE_RANGE 26#error MY_LIKE_RANGE is not defined 27#endif 28#ifndef MY_UCA_ASCII_OPTIMIZE 29#error MY_ASCII_OPTIMIZE is not defined 30#endif 31#ifndef MY_UCA_COMPILE_CONTRACTIONS 32#error MY_UCA_COMPILE_CONTRACTIONS is not defined 33#endif 34#ifndef MY_UCA_COLL_INIT 35#error MY_UCA_COLL_INIT is not defined 36#endif 37 38#include "ctype-uca-scanner_next.inl" 39#define SCANNER_NEXT_NCHARS 40#include "ctype-uca-scanner_next.inl" 41 42/* 43 Compares two strings according to the collation 44 45 SYNOPSIS: 46 strnncoll_onelevel() 47 cs Character set information 48 level Weight level (0 primary, 1 secondary, 2 tertiary, etc) 49 s First string 50 slen First string length 51 t Second string 52 tlen Seconf string length 53 level DUCETweight level 54 55 NOTES: 56 Initializes two weight scanners and gets weights 57 corresponding to two strings in a loop. If weights are not 58 the same at some step then returns their difference. 59 60 In the while() comparison these situations are possible: 61 1. (s_res>0) and (t_res>0) and (s_res == t_res) 62 Weights are the same so far, continue comparison 63 2. (s_res>0) and (t_res>0) and (s_res!=t_res) 64 A difference has been found, return. 65 3. (s_res>0) and (t_res<0) 66 We have reached the end of the second string, or found 67 an illegal multibyte sequence in the second string. 68 Return a positive number, i.e. the first string is bigger. 69 4. (s_res<0) and (t_res>0) 70 We have reached the end of the first string, or found 71 an illegal multibyte sequence in the first string. 72 Return a negative number, i.e. the second string is bigger. 73 5. (s_res<0) and (t_res<0) 74 Both scanners returned -1. It means we have riched 75 the end-of-string of illegal-sequence in both strings 76 at the same time. Return 0, strings are equal. 77 78 RETURN 79 Difference between two strings, according to the collation: 80 0 - means strings are equal 81 negative number - means the first string is smaller 82 positive number - means the first string is bigger 83*/ 84 85static int 86MY_FUNCTION_NAME(strnncoll_onelevel)(CHARSET_INFO *cs, 87 const MY_UCA_WEIGHT_LEVEL *level, 88 const uchar *s, size_t slen, 89 const uchar *t, size_t tlen, 90 my_bool t_is_prefix) 91{ 92 my_uca_scanner sscanner; 93 my_uca_scanner tscanner; 94 int s_res; 95 int t_res; 96 97 my_uca_scanner_init_any(&sscanner, cs, level, s, slen); 98 my_uca_scanner_init_any(&tscanner, cs, level, t, tlen); 99 100 do 101 { 102 s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner); 103 t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner); 104 } while ( s_res == t_res && s_res >0); 105 106 return (t_is_prefix && t_res < 0) ? 0 : (s_res - t_res); 107} 108 109 110/* 111 One-level, PAD SPACE. 112*/ 113static int 114MY_FUNCTION_NAME(strnncoll)(CHARSET_INFO *cs, 115 const uchar *s, size_t slen, 116 const uchar *t, size_t tlen, 117 my_bool t_is_prefix) 118{ 119 return MY_FUNCTION_NAME(strnncoll_onelevel)(cs, &cs->uca->level[0], 120 s, slen, t, tlen, t_is_prefix); 121} 122 123 124/* 125 Multi-level, PAD SPACE. 126*/ 127static int 128MY_FUNCTION_NAME(strnncoll_multilevel)(CHARSET_INFO *cs, 129 const uchar *s, size_t slen, 130 const uchar *t, size_t tlen, 131 my_bool t_is_prefix) 132{ 133 uint i, num_level= cs->levels_for_order; 134 for (i= 0; i != num_level; i++) 135 { 136 int ret= MY_FUNCTION_NAME(strnncoll_onelevel)(cs, &cs->uca->level[i], 137 s, slen, t, tlen, 138 t_is_prefix); 139 if (ret) 140 return ret; 141 } 142 return 0; 143} 144 145 146/* 147 Compares two strings according to the collation, 148 ignoring trailing spaces. 149 150 SYNOPSIS: 151 strnncollsp_onelevel() 152 cs Character set information 153 level UCA weight level 154 s First string 155 slen First string length 156 t Second string 157 tlen Seconf string length 158 level DUCETweight level 159 160 NOTES: 161 Works exactly the same with my_strnncoll_uca(), 162 but ignores trailing spaces. 163 164 In the while() comparison these situations are possible: 165 1. (s_res>0) and (t_res>0) and (s_res == t_res) 166 Weights are the same so far, continue comparison 167 2. (s_res>0) and (t_res>0) and (s_res!=t_res) 168 A difference has been found, return. 169 3. (s_res>0) and (t_res<0) 170 We have reached the end of the second string, or found 171 an illegal multibyte sequence in the second string. 172 Compare the first string to an infinite array of 173 space characters until difference is found, or until 174 the end of the first string. 175 4. (s_res<0) and (t_res>0) 176 We have reached the end of the first string, or found 177 an illegal multibyte sequence in the first string. 178 Compare the second string to an infinite array of 179 space characters until difference is found or until 180 the end of the second steing. 181 5. (s_res<0) and (t_res<0) 182 Both scanners returned -1. It means we have riched 183 the end-of-string of illegal-sequence in both strings 184 at the same time. Return 0, strings are equal. 185 186 RETURN 187 Difference between two strings, according to the collation: 188 0 - means strings are equal 189 negative number - means the first string is smaller 190 positive number - means the first string is bigger 191*/ 192 193static int 194MY_FUNCTION_NAME(strnncollsp_onelevel)(CHARSET_INFO *cs, 195 const MY_UCA_WEIGHT_LEVEL *level, 196 const uchar *s, size_t slen, 197 const uchar *t, size_t tlen) 198{ 199 my_uca_scanner sscanner, tscanner; 200 int s_res, t_res; 201 202 my_uca_scanner_init_any(&sscanner, cs, level, s, slen); 203 my_uca_scanner_init_any(&tscanner, cs, level, t, tlen); 204 205 do 206 { 207 s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner); 208 t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner); 209 } while ( s_res == t_res && s_res >0); 210 211 if (s_res > 0 && t_res < 0) 212 { 213 /* Calculate weight for SPACE character */ 214 t_res= my_space_weight(level); 215 216 /* compare the first string to spaces */ 217 do 218 { 219 if (s_res != t_res) 220 return (s_res - t_res); 221 s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner); 222 } while (s_res > 0); 223 return 0; 224 } 225 226 if (s_res < 0 && t_res > 0) 227 { 228 /* Calculate weight for SPACE character */ 229 s_res= my_space_weight(level); 230 231 /* compare the second string to spaces */ 232 do 233 { 234 if (s_res != t_res) 235 return (s_res - t_res); 236 t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner); 237 } while (t_res > 0); 238 return 0; 239 } 240 241 return ( s_res - t_res ); 242} 243 244 245/* 246 One-level, PAD SPACE 247*/ 248static int 249MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs, 250 const uchar *s, size_t slen, 251 const uchar *t, size_t tlen) 252{ 253 return MY_FUNCTION_NAME(strnncollsp_onelevel)(cs, &cs->uca->level[0], 254 s, slen, t, tlen); 255} 256 257 258/* 259 One-level, NO PAD 260*/ 261static int 262MY_FUNCTION_NAME(strnncollsp_nopad)(CHARSET_INFO *cs, 263 const uchar *s, size_t slen, 264 const uchar *t, size_t tlen) 265{ 266 return MY_FUNCTION_NAME(strnncoll_onelevel)(cs, &cs->uca->level[0], 267 s, slen, t, tlen, FALSE); 268} 269 270 271/* 272 Multi-level, PAD SPACE 273*/ 274static int 275MY_FUNCTION_NAME(strnncollsp_multilevel)(CHARSET_INFO *cs, 276 const uchar *s, size_t slen, 277 const uchar *t, size_t tlen) 278{ 279 280 uint i, num_level= cs->levels_for_order; 281 for (i= 0; i != num_level; i++) 282 { 283 int ret= MY_FUNCTION_NAME(strnncollsp_onelevel)(cs, &cs->uca->level[i], 284 s, slen, t, tlen); 285 if (ret) 286 return ret; 287 } 288 return 0; 289} 290 291 292/* 293 Multi-level, NO PAD 294*/ 295static int 296MY_FUNCTION_NAME(strnncollsp_nopad_multilevel)(CHARSET_INFO *cs, 297 const uchar *s, size_t slen, 298 const uchar *t, size_t tlen) 299{ 300 uint num_level= cs->levels_for_order; 301 uint i; 302 for (i= 0; i != num_level; i++) 303 { 304 int ret= MY_FUNCTION_NAME(strnncoll_onelevel)(cs, &cs->uca->level[i], 305 s, slen, t, tlen, FALSE); 306 if (ret) 307 return ret; 308 } 309 return 0; 310} 311 312 313/* 314 Scan the next weight and perform space padding 315 or trimming according to "nchars". 316*/ 317static inline weight_and_nchars_t 318MY_FUNCTION_NAME(scanner_next_pad_trim)(my_uca_scanner *scanner, 319 size_t nchars, 320 uint *generated) 321{ 322 weight_and_nchars_t res; 323 if (nchars > 0 || 324 scanner->wbeg[0] /* Some weights from a previous expansion left */) 325 { 326 if ((res= MY_FUNCTION_NAME(scanner_next_with_nchars)(scanner, 327 nchars)).weight < 0) 328 { 329 /* 330 We reached the end of the string, but the caller wants more weights. 331 Perform space padding. 332 */ 333 res.weight= my_space_weight(scanner->level); 334 res.nchars= 1; 335 (*generated)++; 336 } 337 else if (res.nchars > nchars) 338 { 339 /* 340 We scanned the next collation element, but it does not fit into 341 the "nchars" limit. This is possible in case of: 342 - A contraction, e.g. Czech 'ch' with nchars=1 343 - A sequence of ignorable characters followed by non-ignorable ones, 344 e.g. CONCAT(x'00','a') with nchars=1. 345 Perform trimming. 346 */ 347 res.weight= scanner->cs->state & MY_CS_NOPAD ? 348 0 : my_space_weight(scanner->level); 349 res.nchars= (uint) nchars; 350 (*generated)++; 351 } 352 } 353 else 354 { 355 /* The caller wants nchars==0. Perform trimming. */ 356 res.weight= scanner->cs->state & MY_CS_NOPAD ? 357 0 : my_space_weight(scanner->level); 358 res.nchars= 0; 359 (*generated)++; 360 } 361 return res; 362} 363 364 365static int 366MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(CHARSET_INFO *cs, 367 const MY_UCA_WEIGHT_LEVEL *level, 368 const uchar *s, size_t slen, 369 const uchar *t, size_t tlen, 370 size_t nchars) 371{ 372 my_uca_scanner sscanner; 373 my_uca_scanner tscanner; 374 size_t s_nchars_left= nchars; 375 size_t t_nchars_left= nchars; 376 377 my_uca_scanner_init_any(&sscanner, cs, level, s, slen); 378 my_uca_scanner_init_any(&tscanner, cs, level, t, tlen); 379 380 for ( ; ; ) 381 { 382 weight_and_nchars_t s_res; 383 weight_and_nchars_t t_res; 384 uint generated= 0; 385 int diff; 386 387 s_res= MY_FUNCTION_NAME(scanner_next_pad_trim)(&sscanner, s_nchars_left, 388 &generated); 389 t_res= MY_FUNCTION_NAME(scanner_next_pad_trim)(&tscanner, t_nchars_left, 390 &generated); 391 if ((diff= (s_res.weight - t_res.weight))) 392 return diff; 393 394 if (generated == 2) 395 { 396 if (cs->state & MY_CS_NOPAD) 397 { 398 /* 399 Both values are auto-generated. There's no real data any more. 400 We need to handle the remaining virtual trailing spaces. 401 The two strings still have s_nchars_left and t_nchars_left imaginary 402 trailing spaces at the end. If s_nchars_left != t_nchars_left, 403 the strings will be not equal in case of a NOPAD collation. 404 405 Example: 406 "B" is German "U+00DF LATIN SMALL LETTER SHARP S" 407 When we have these values in a 408 CHAR(3) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_nopad_ci 409 column: 410 'B ' (one character, two trailing spaces) 411 'ss ' (two characters, one trailing space) 412 The 'B ' is greater than the 'ss '. 413 They are compared in the following steps: 414 1. 'B' == 'ss' 415 2. ' ' == ' ' 416 3. ' ' > '' 417 418 We need to emulate the same behavior in this function even if 419 it's called with strings 'B' and 'ss' (with space trimmed). 420 The side which has more remaining virtual spaces at the end 421 is greater. 422 */ 423 if (s_nchars_left < t_nchars_left) 424 return -1; 425 if (s_nchars_left > t_nchars_left) 426 return +1; 427 } 428 return 0; 429 } 430 431 DBUG_ASSERT(s_nchars_left >= s_res.nchars); 432 DBUG_ASSERT(t_nchars_left >= t_res.nchars); 433 s_nchars_left-= s_res.nchars; 434 t_nchars_left-= t_res.nchars; 435 } 436 437 return 0; 438} 439 440 441/* 442 One-level collations. 443*/ 444static int 445MY_FUNCTION_NAME(strnncollsp_nchars)(CHARSET_INFO *cs, 446 const uchar *s, size_t slen, 447 const uchar *t, size_t tlen, 448 size_t nchars) 449{ 450 return MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(cs, &cs->uca->level[0], 451 s, slen, t, tlen, 452 nchars); 453} 454 455 456/* 457 Multi-level collations. 458*/ 459static int 460MY_FUNCTION_NAME(strnncollsp_nchars_multilevel)(CHARSET_INFO *cs, 461 const uchar *s, size_t slen, 462 const uchar *t, size_t tlen, 463 size_t nchars) 464{ 465 uint num_level= cs->levels_for_order; 466 uint i; 467 for (i= 0; i != num_level; i++) 468 { 469 int ret= MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(cs, 470 &cs->uca->level[i], 471 s, slen, 472 t, tlen, 473 nchars); 474 if (ret) 475 return ret; 476 } 477 return 0; 478} 479 480 481/* 482 Calculates hash value for the given string, 483 according to the collation, and ignoring trailing spaces. 484 485 SYNOPSIS: 486 hash_sort() 487 cs Character set information 488 s String 489 slen String's length 490 n1 First hash parameter 491 n2 Second hash parameter 492 493 NOTES: 494 Scans consequently weights and updates 495 hash parameters n1 and n2. In a case insensitive collation, 496 upper and lower case of the same letter will return the same 497 weight sequence, and thus will produce the same hash values 498 in n1 and n2. 499 500 This functions is used for one-level and for multi-level collations. 501 We intentionally use only primary level in multi-level collations. 502 This helps to have PARTITION BY KEY put primarily equal records 503 into the same partition. E.g. in utf8_thai_520_ci records that differ 504 only in tone marks go into the same partition. 505 506 RETURN 507 N/A 508*/ 509 510static void 511MY_FUNCTION_NAME(hash_sort)(CHARSET_INFO *cs, 512 const uchar *s, size_t slen, 513 ulong *nr1, ulong *nr2) 514{ 515 int s_res; 516 my_uca_scanner scanner; 517 int space_weight= my_space_weight(&cs->uca->level[0]); 518 register ulong m1= *nr1, m2= *nr2; 519 520 my_uca_scanner_init_any(&scanner, cs, &cs->uca->level[0], s, slen); 521 522 while ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) >0) 523 { 524 if (s_res == space_weight) 525 { 526 /* Combine all spaces to be able to skip end spaces */ 527 uint count= 0; 528 do 529 { 530 count++; 531 if ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) <= 0) 532 { 533 /* Skip strings at end of string */ 534 goto end; 535 } 536 } 537 while (s_res == space_weight); 538 539 /* Add back that has for the space characters */ 540 do 541 { 542 /* 543 We can't use MY_HASH_ADD_16() here as we, because of a misstake 544 in the original code, where we added the 16 byte variable the 545 opposite way. Changing this would cause old partitioned tables 546 to fail. 547 */ 548 MY_HASH_ADD(m1, m2, space_weight >> 8); 549 MY_HASH_ADD(m1, m2, space_weight & 0xFF); 550 } 551 while (--count != 0); 552 553 } 554 /* See comment above why we can't use MY_HASH_ADD_16() */ 555 MY_HASH_ADD(m1, m2, s_res >> 8); 556 MY_HASH_ADD(m1, m2, s_res & 0xFF); 557 } 558end: 559 *nr1= m1; 560 *nr2= m2; 561} 562 563 564static void 565MY_FUNCTION_NAME(hash_sort_nopad)(CHARSET_INFO *cs, 566 const uchar *s, size_t slen, 567 ulong *nr1, ulong *nr2) 568{ 569 int s_res; 570 my_uca_scanner scanner; 571 register ulong m1= *nr1, m2= *nr2; 572 573 my_uca_scanner_init_any(&scanner, cs, &cs->uca->level[0], s, slen); 574 575 while ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) >0) 576 { 577 /* See comment above why we can't use MY_HASH_ADD_16() */ 578 MY_HASH_ADD(m1, m2, s_res >> 8); 579 MY_HASH_ADD(m1, m2, s_res & 0xFF); 580 } 581 *nr1= m1; 582 *nr2= m2; 583} 584 585 586 587/* 588 For the given string creates its "binary image", suitable 589 to be used in binary comparison, i.e. in memcmp(). 590 591 SYNOPSIS: 592 my_strnxfrm_uca() 593 cs Character set information 594 dst Where to write the image 595 dstlen Space available for the image, in bytes 596 src The source string 597 srclen Length of the source string, in bytes 598 599 NOTES: 600 In a loop, scans weights from the source string and writes 601 them into the binary image. In a case insensitive collation, 602 upper and lower cases of the same letter will produce the 603 same image subsequences. When we have reached the end-of-string 604 or found an illegal multibyte sequence, the loop stops. 605 606 It is impossible to restore the original string using its 607 binary image. 608 609 Binary images are used for bulk comparison purposes, 610 e.g. in ORDER BY, when it is more efficient to create 611 a binary image and use it instead of weight scanner 612 for the original strings for every comparison. 613 614 RETURN 615 Number of bytes that have been written into the binary image. 616*/ 617 618static uchar * 619MY_FUNCTION_NAME(strnxfrm_onelevel_internal)(CHARSET_INFO *cs, 620 MY_UCA_WEIGHT_LEVEL *level, 621 uchar *dst, uchar *de, 622 uint *nweights, 623 const uchar *src, size_t srclen) 624{ 625 my_uca_scanner scanner; 626 int s_res; 627 628 DBUG_ASSERT(src || !srclen); 629 630#if MY_UCA_ASCII_OPTIMIZE && !MY_UCA_COMPILE_CONTRACTIONS 631 /* 632 Fast path for the ASCII range with no contractions. 633 */ 634 { 635 const uchar *de2= de - 1; /* Last position where 2 bytes fit */ 636 const uint16 *weights0= level->weights[0]; 637 uint lengths0= level->lengths[0]; 638 for ( ; ; src++, srclen--) 639 { 640 const uint16 *weight; 641 if (!srclen || !*nweights) 642 return dst; /* Done */ 643 if (*src > 0x7F) 644 break; /* Non-ASCII */ 645 646 weight= weights0 + (((uint) *src) * lengths0); 647 if (!(s_res= *weight)) 648 continue; /* Ignorable */ 649 if (weight[1]) /* Expansion (e.g. in a user defined collation */ 650 break; 651 652 /* Here we have a character with extactly one 2-byte UCA weight */ 653 if (dst < de2) /* Most typical case is when both bytes fit */ 654 { 655 *dst++= s_res >> 8; 656 *dst++= s_res & 0xFF; 657 (*nweights)--; 658 continue; 659 } 660 if (dst >= de) /* No space left in "dst" */ 661 return dst; 662 *dst++= s_res >> 8; /* There is space only for one byte */ 663 (*nweights)--; 664 return dst; 665 } 666 } 667#endif 668 669 my_uca_scanner_init_any(&scanner, cs, level, src, srclen); 670 for (; dst < de && *nweights && 671 (s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) > 0 ; (*nweights)--) 672 { 673 *dst++= s_res >> 8; 674 if (dst < de) 675 *dst++= s_res & 0xFF; 676 } 677 return dst; 678} 679 680 681static uchar * 682MY_FUNCTION_NAME(strnxfrm_onelevel)(CHARSET_INFO *cs, 683 MY_UCA_WEIGHT_LEVEL *level, 684 uchar *dst, uchar *de, uint nweights, 685 const uchar *src, size_t srclen, uint flags) 686{ 687 uchar *d0= dst; 688 dst= MY_FUNCTION_NAME(strnxfrm_onelevel_internal)(cs, level, 689 dst, de, &nweights, 690 src, srclen); 691 DBUG_ASSERT(dst <= de); 692 if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE)) 693 dst= my_strnxfrm_uca_padn(dst, de, nweights, my_space_weight(level)); 694 DBUG_ASSERT(dst <= de); 695 my_strxfrm_desc_and_reverse(d0, dst, flags, 0); 696 return dst; 697} 698 699 700 701static uchar * 702MY_FUNCTION_NAME(strnxfrm_nopad_onelevel)(CHARSET_INFO *cs, 703 MY_UCA_WEIGHT_LEVEL *level, 704 uchar *dst, uchar *de, uint nweights, 705 const uchar *src, size_t srclen, 706 uint flags) 707{ 708 uchar *d0= dst; 709 dst= MY_FUNCTION_NAME(strnxfrm_onelevel_internal)(cs, level, 710 dst, de, &nweights, 711 src, srclen); 712 DBUG_ASSERT(dst <= de); 713 /* Pad with the minimum possible weight on this level */ 714 if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE)) 715 dst= my_strnxfrm_uca_padn(dst, de, nweights, min_weight_on_level(level)); 716 DBUG_ASSERT(dst <= de); 717 my_strxfrm_desc_and_reverse(d0, dst, flags, 0); 718 return dst; 719} 720 721 722static size_t 723MY_FUNCTION_NAME(strnxfrm)(CHARSET_INFO *cs, 724 uchar *dst, size_t dstlen, uint nweights, 725 const uchar *src, size_t srclen, uint flags) 726{ 727 uchar *d0= dst; 728 uchar *de= dst + dstlen; 729 730 /* 731 There are two ways to handle trailing spaces for PAD SPACE collations: 732 1. Keep trailing spaces as they are, so have strnxfrm_onelevel() scan 733 spaces as normal characters. This will call scanner_next() for every 734 trailing space and calculate its weight using UCA weights. 735 2. Strip trailing spaces before calling strnxfrm_onelevel(), as it will 736 append weights for implicit spaces anyway, up to the desired key size. 737 This will effectively generate exactly the same sortable key result. 738 The latter is much faster. 739 */ 740 741 if (flags & MY_STRXFRM_PAD_WITH_SPACE) 742 srclen= cs->cset->lengthsp(cs, (const char*) src, srclen); 743 dst= MY_FUNCTION_NAME(strnxfrm_onelevel)(cs, &cs->uca->level[0], 744 dst, de, nweights, 745 src, srclen, flags); 746 /* 747 This can probably be changed to memset(dst, 0, de - dst), 748 like my_strnxfrm_uca_multilevel() does. 749 */ 750 if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de) 751 dst= my_strnxfrm_uca_pad(dst, de, my_space_weight(&cs->uca->level[0])); 752 return dst - d0; 753} 754 755 756static size_t 757MY_FUNCTION_NAME(strnxfrm_nopad)(CHARSET_INFO *cs, 758 uchar *dst, size_t dstlen, 759 uint nweights, 760 const uchar *src, size_t srclen, 761 uint flags) 762{ 763 uchar *d0= dst; 764 uchar *de= dst + dstlen; 765 766 dst= MY_FUNCTION_NAME(strnxfrm_nopad_onelevel)(cs, &cs->uca->level[0], 767 dst, de, nweights, 768 src, srclen, flags); 769 if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de) 770 { 771 memset(dst, 0, de - dst); 772 dst= de; 773 } 774 return dst - d0; 775} 776 777 778static size_t 779MY_FUNCTION_NAME(strnxfrm_multilevel)(CHARSET_INFO *cs, 780 uchar *dst, size_t dstlen, 781 uint nweights, 782 const uchar *src, size_t srclen, 783 uint flags) 784{ 785 uint num_level= cs->levels_for_order; 786 uchar *d0= dst; 787 uchar *de= dst + dstlen; 788 uint current_level; 789 790 for (current_level= 0; current_level != num_level; current_level++) 791 { 792 if (!(flags & MY_STRXFRM_LEVEL_ALL) || 793 (flags & (MY_STRXFRM_LEVEL1 << current_level))) 794 dst= cs->state & MY_CS_NOPAD ? 795 MY_FUNCTION_NAME(strnxfrm_nopad_onelevel)(cs, 796 &cs->uca->level[current_level], 797 dst, de, nweights, 798 src, srclen, flags) : 799 MY_FUNCTION_NAME(strnxfrm_onelevel)(cs, 800 &cs->uca->level[current_level], 801 dst, de, nweights, 802 src, srclen, flags); 803 } 804 805 if (dst < de && (flags & MY_STRXFRM_PAD_TO_MAXLEN)) 806 { 807 memset(dst, 0, de - dst); 808 dst= de; 809 } 810 811 return dst - d0; 812} 813 814 815/* 816 One-level, PAD SPACE 817*/ 818MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler)= 819{ 820 MY_UCA_COLL_INIT, 821 MY_FUNCTION_NAME(strnncoll), 822 MY_FUNCTION_NAME(strnncollsp), 823 MY_FUNCTION_NAME(strnncollsp_nchars), 824 MY_FUNCTION_NAME(strnxfrm), 825 my_strnxfrmlen_any_uca, 826 MY_LIKE_RANGE, 827 my_wildcmp_uca, 828 NULL, /* strcasecmp() */ 829 my_instr_mb, 830 MY_FUNCTION_NAME(hash_sort), 831 my_propagate_complex 832}; 833 834 835/* 836 One-level, NO PAD 837 For character sets with mbminlen==1 use MY_LIKE_RANGE=my_like_range_mb 838 For character sets with mbminlen>=2 use MY_LIKE_RANGE=my_like_range_generic 839*/ 840MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_nopad)= 841{ 842 MY_UCA_COLL_INIT, 843 MY_FUNCTION_NAME(strnncoll), 844 MY_FUNCTION_NAME(strnncollsp_nopad), 845 MY_FUNCTION_NAME(strnncollsp_nchars), 846 MY_FUNCTION_NAME(strnxfrm_nopad), 847 my_strnxfrmlen_any_uca, 848 MY_LIKE_RANGE, /* my_like_range_mb or my_like_range_generic */ 849 my_wildcmp_uca, 850 NULL, /* strcasecmp() */ 851 my_instr_mb, 852 MY_FUNCTION_NAME(hash_sort_nopad), 853 my_propagate_complex 854}; 855 856 857/* 858 Multi-level, PAD SPACE 859*/ 860MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_multilevel)= 861{ 862 MY_UCA_COLL_INIT, 863 MY_FUNCTION_NAME(strnncoll_multilevel), 864 MY_FUNCTION_NAME(strnncollsp_multilevel), 865 MY_FUNCTION_NAME(strnncollsp_nchars_multilevel), 866 MY_FUNCTION_NAME(strnxfrm_multilevel), 867 my_strnxfrmlen_any_uca_multilevel, 868 MY_LIKE_RANGE, 869 my_wildcmp_uca, 870 NULL, /* strcasecmp() */ 871 my_instr_mb, 872 MY_FUNCTION_NAME(hash_sort), 873 my_propagate_complex 874}; 875 876 877/* 878 Multi-level, NO PAD 879*/ 880MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_nopad_multilevel)= 881{ 882 MY_UCA_COLL_INIT, 883 MY_FUNCTION_NAME(strnncoll_multilevel), 884 MY_FUNCTION_NAME(strnncollsp_nopad_multilevel), 885 MY_FUNCTION_NAME(strnncollsp_nchars_multilevel), 886 MY_FUNCTION_NAME(strnxfrm_multilevel), 887 my_strnxfrmlen_any_uca_multilevel, 888 MY_LIKE_RANGE, 889 my_wildcmp_uca, 890 NULL, /* strcasecmp() */ 891 my_instr_mb, 892 MY_FUNCTION_NAME(hash_sort), 893 my_propagate_complex 894}; 895 896 897MY_COLLATION_HANDLER_PACKAGE MY_FUNCTION_NAME(package)= 898{ 899 &MY_FUNCTION_NAME(collation_handler), 900 &MY_FUNCTION_NAME(collation_handler_nopad), 901 &MY_FUNCTION_NAME(collation_handler_multilevel), 902 &MY_FUNCTION_NAME(collation_handler_nopad_multilevel) 903}; 904 905 906#undef MY_FUNCTION_NAME 907#undef MY_MB_WC 908#undef MY_LIKE_RANGE 909#undef MY_UCA_ASCII_OPTIMIZE 910#undef MY_UCA_COMPILE_CONTRACTIONS 911#undef MY_UCA_COLL_INIT 912