1/*
2  Copyright (c) 2018 MariaDB Corporation
3
4  This program is free software; you can redistribute it and/or modify
5  it under the terms of the GNU General Public License as published by
6  the Free Software Foundation; version 2 of the License.
7
8  This program is distributed in the hope that it will be useful,
9  but WITHOUT ANY WARRANTY; without even the implied warranty of
10  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  GNU General Public License for more details.
12
13  You should have received a copy of the GNU General Public License
14  along with this program; if not, write to the Free Software
15  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16*/
17
18
19#ifndef MY_FUNCTION_NAME
20#error MY_FUNCTION_NAME is not defined
21#endif
22#ifndef MY_MB_WC
23#error MY_MB_WC is not defined
24#endif
25#ifndef MY_LIKE_RANGE
26#error MY_LIKE_RANGE is not defined
27#endif
28#ifndef MY_UCA_ASCII_OPTIMIZE
29#error MY_ASCII_OPTIMIZE is not defined
30#endif
31#ifndef MY_UCA_COMPILE_CONTRACTIONS
32#error MY_UCA_COMPILE_CONTRACTIONS is not defined
33#endif
34#ifndef MY_UCA_COLL_INIT
35#error MY_UCA_COLL_INIT is not defined
36#endif
37
38#include "ctype-uca-scanner_next.inl"
39#define SCANNER_NEXT_NCHARS
40#include "ctype-uca-scanner_next.inl"
41
42/*
43  Compares two strings according to the collation
44
45  SYNOPSIS:
46    strnncoll_onelevel()
47    cs		Character set information
48    level       Weight level (0 primary, 1 secondary, 2 tertiary, etc)
49    s		First string
50    slen	First string length
51    t		Second string
52    tlen	Seconf string length
53    level	DUCETweight level
54
55  NOTES:
56    Initializes two weight scanners and gets weights
57    corresponding to two strings in a loop. If weights are not
58    the same at some step then returns their difference.
59
60    In the while() comparison these situations are possible:
61    1. (s_res>0) and (t_res>0) and (s_res == t_res)
62       Weights are the same so far, continue comparison
63    2. (s_res>0) and (t_res>0) and (s_res!=t_res)
64       A difference has been found, return.
65    3. (s_res>0) and (t_res<0)
66       We have reached the end of the second string, or found
67       an illegal multibyte sequence in the second string.
68       Return a positive number, i.e. the first string is bigger.
69    4. (s_res<0) and (t_res>0)
70       We have reached the end of the first string, or found
71       an illegal multibyte sequence in the first string.
72       Return a negative number, i.e. the second string is bigger.
73    5. (s_res<0) and (t_res<0)
74       Both scanners returned -1. It means we have riched
75       the end-of-string of illegal-sequence in both strings
76       at the same time. Return 0, strings are equal.
77
78  RETURN
79    Difference between two strings, according to the collation:
80    0               - means strings are equal
81    negative number - means the first string is smaller
82    positive number - means the first string is bigger
83*/
84
85static int
86MY_FUNCTION_NAME(strnncoll_onelevel)(CHARSET_INFO *cs,
87                                     const MY_UCA_WEIGHT_LEVEL *level,
88                                     const uchar *s, size_t slen,
89                                     const uchar *t, size_t tlen,
90                                     my_bool t_is_prefix)
91{
92  my_uca_scanner sscanner;
93  my_uca_scanner tscanner;
94  int s_res;
95  int t_res;
96
97  my_uca_scanner_init_any(&sscanner, cs, level, s, slen);
98  my_uca_scanner_init_any(&tscanner, cs, level, t, tlen);
99
100  do
101  {
102    s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner);
103    t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner);
104  } while ( s_res == t_res && s_res >0);
105
106  return  (t_is_prefix && t_res < 0) ? 0 : (s_res - t_res);
107}
108
109
110/*
111  One-level, PAD SPACE.
112*/
113static int
114MY_FUNCTION_NAME(strnncoll)(CHARSET_INFO *cs,
115                            const uchar *s, size_t slen,
116                            const uchar *t, size_t tlen,
117                            my_bool t_is_prefix)
118{
119  return MY_FUNCTION_NAME(strnncoll_onelevel)(cs, &cs->uca->level[0],
120                                              s, slen, t, tlen, t_is_prefix);
121}
122
123
124/*
125  Multi-level, PAD SPACE.
126*/
127static int
128MY_FUNCTION_NAME(strnncoll_multilevel)(CHARSET_INFO *cs,
129                                       const uchar *s, size_t slen,
130                                       const uchar *t, size_t tlen,
131                                       my_bool t_is_prefix)
132{
133  uint i, num_level= cs->levels_for_order;
134  for (i= 0; i != num_level; i++)
135  {
136    int ret= MY_FUNCTION_NAME(strnncoll_onelevel)(cs, &cs->uca->level[i],
137                                                  s, slen, t, tlen,
138                                                  t_is_prefix);
139    if (ret)
140       return ret;
141  }
142  return 0;
143}
144
145
146/*
147  Compares two strings according to the collation,
148  ignoring trailing spaces.
149
150  SYNOPSIS:
151    strnncollsp_onelevel()
152    cs		Character set information
153    level       UCA weight level
154    s		First string
155    slen	First string length
156    t		Second string
157    tlen	Seconf string length
158    level	DUCETweight level
159
160  NOTES:
161    Works exactly the same with my_strnncoll_uca(),
162    but ignores trailing spaces.
163
164    In the while() comparison these situations are possible:
165    1. (s_res>0) and (t_res>0) and (s_res == t_res)
166       Weights are the same so far, continue comparison
167    2. (s_res>0) and (t_res>0) and (s_res!=t_res)
168       A difference has been found, return.
169    3. (s_res>0) and (t_res<0)
170       We have reached the end of the second string, or found
171       an illegal multibyte sequence in the second string.
172       Compare the first string to an infinite array of
173       space characters until difference is found, or until
174       the end of the first string.
175    4. (s_res<0) and (t_res>0)
176       We have reached the end of the first string, or found
177       an illegal multibyte sequence in the first string.
178       Compare the second string to an infinite array of
179       space characters until difference is found or until
180       the end of the second steing.
181    5. (s_res<0) and (t_res<0)
182       Both scanners returned -1. It means we have riched
183       the end-of-string of illegal-sequence in both strings
184       at the same time. Return 0, strings are equal.
185
186  RETURN
187    Difference between two strings, according to the collation:
188    0               - means strings are equal
189    negative number - means the first string is smaller
190    positive number - means the first string is bigger
191*/
192
193static int
194MY_FUNCTION_NAME(strnncollsp_onelevel)(CHARSET_INFO *cs,
195                                       const MY_UCA_WEIGHT_LEVEL *level,
196                                       const uchar *s, size_t slen,
197                                       const uchar *t, size_t tlen)
198{
199  my_uca_scanner sscanner, tscanner;
200  int s_res, t_res;
201
202  my_uca_scanner_init_any(&sscanner, cs, level, s, slen);
203  my_uca_scanner_init_any(&tscanner, cs, level, t, tlen);
204
205  do
206  {
207    s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner);
208    t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner);
209  } while ( s_res == t_res && s_res >0);
210
211  if (s_res > 0 && t_res < 0)
212  {
213    /* Calculate weight for SPACE character */
214    t_res= my_space_weight(level);
215
216    /* compare the first string to spaces */
217    do
218    {
219      if (s_res != t_res)
220        return (s_res - t_res);
221      s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner);
222    } while (s_res > 0);
223    return 0;
224  }
225
226  if (s_res < 0 && t_res > 0)
227  {
228    /* Calculate weight for SPACE character */
229    s_res= my_space_weight(level);
230
231    /* compare the second string to spaces */
232    do
233    {
234      if (s_res != t_res)
235        return (s_res - t_res);
236      t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner);
237    } while (t_res > 0);
238    return 0;
239  }
240
241  return ( s_res - t_res );
242}
243
244
245/*
246  One-level, PAD SPACE
247*/
248static int
249MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs,
250                              const uchar *s, size_t slen,
251                              const uchar *t, size_t tlen)
252{
253  return MY_FUNCTION_NAME(strnncollsp_onelevel)(cs, &cs->uca->level[0],
254                                                s, slen, t, tlen);
255}
256
257
258/*
259  One-level, NO PAD
260*/
261static int
262MY_FUNCTION_NAME(strnncollsp_nopad)(CHARSET_INFO *cs,
263                                    const uchar *s, size_t slen,
264                                    const uchar *t, size_t tlen)
265{
266  return MY_FUNCTION_NAME(strnncoll_onelevel)(cs, &cs->uca->level[0],
267                                              s, slen, t, tlen, FALSE);
268}
269
270
271/*
272  Multi-level, PAD SPACE
273*/
274static int
275MY_FUNCTION_NAME(strnncollsp_multilevel)(CHARSET_INFO *cs,
276                                         const uchar *s, size_t slen,
277                                         const uchar *t, size_t tlen)
278{
279
280  uint i, num_level= cs->levels_for_order;
281  for (i= 0; i != num_level; i++)
282  {
283    int ret= MY_FUNCTION_NAME(strnncollsp_onelevel)(cs, &cs->uca->level[i],
284                                                    s, slen, t, tlen);
285    if (ret)
286      return ret;
287  }
288  return 0;
289}
290
291
292/*
293  Multi-level, NO PAD
294*/
295static int
296MY_FUNCTION_NAME(strnncollsp_nopad_multilevel)(CHARSET_INFO *cs,
297                                               const uchar *s, size_t slen,
298                                               const uchar *t, size_t tlen)
299{
300  uint num_level= cs->levels_for_order;
301  uint i;
302  for (i= 0; i != num_level; i++)
303  {
304    int ret= MY_FUNCTION_NAME(strnncoll_onelevel)(cs, &cs->uca->level[i],
305                                                  s, slen, t, tlen, FALSE);
306    if (ret)
307       return ret;
308  }
309  return 0;
310}
311
312
313/*
314  Scan the next weight and perform space padding
315  or trimming according to "nchars".
316*/
317static inline weight_and_nchars_t
318MY_FUNCTION_NAME(scanner_next_pad_trim)(my_uca_scanner *scanner,
319                                        size_t nchars,
320                                        uint *generated)
321{
322  weight_and_nchars_t res;
323  if (nchars > 0 ||
324      scanner->wbeg[0] /* Some weights from a previous expansion left */)
325  {
326    if ((res= MY_FUNCTION_NAME(scanner_next_with_nchars)(scanner,
327                                                         nchars)).weight < 0)
328    {
329      /*
330        We reached the end of the string, but the caller wants more weights.
331        Perform space padding.
332      */
333      res.weight= my_space_weight(scanner->level);
334      res.nchars= 1;
335      (*generated)++;
336    }
337    else if (res.nchars > nchars)
338    {
339      /*
340        We scanned the next collation element, but it does not fit into
341        the "nchars" limit. This is possible in case of:
342        - A contraction, e.g. Czech 'ch' with nchars=1
343        - A sequence of ignorable characters followed by non-ignorable ones,
344          e.g. CONCAT(x'00','a') with nchars=1.
345        Perform trimming.
346      */
347      res.weight= scanner->cs->state & MY_CS_NOPAD ?
348                  0 : my_space_weight(scanner->level);
349      res.nchars= (uint) nchars;
350      (*generated)++;
351    }
352  }
353  else
354  {
355    /* The caller wants nchars==0. Perform trimming. */
356    res.weight= scanner->cs->state & MY_CS_NOPAD ?
357                0 : my_space_weight(scanner->level);
358    res.nchars= 0;
359    (*generated)++;
360  }
361  return res;
362}
363
364
365static int
366MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(CHARSET_INFO *cs,
367                                              const MY_UCA_WEIGHT_LEVEL *level,
368                                              const uchar *s, size_t slen,
369                                              const uchar *t, size_t tlen,
370                                              size_t nchars)
371{
372  my_uca_scanner sscanner;
373  my_uca_scanner tscanner;
374  size_t s_nchars_left= nchars;
375  size_t t_nchars_left= nchars;
376
377  my_uca_scanner_init_any(&sscanner, cs, level, s, slen);
378  my_uca_scanner_init_any(&tscanner, cs, level, t, tlen);
379
380  for ( ; ; )
381  {
382    weight_and_nchars_t s_res;
383    weight_and_nchars_t t_res;
384    uint generated= 0;
385    int diff;
386
387    s_res= MY_FUNCTION_NAME(scanner_next_pad_trim)(&sscanner, s_nchars_left,
388                                                   &generated);
389    t_res= MY_FUNCTION_NAME(scanner_next_pad_trim)(&tscanner, t_nchars_left,
390                                                   &generated);
391    if ((diff= (s_res.weight - t_res.weight)))
392      return diff;
393
394    if (generated == 2)
395    {
396      if (cs->state & MY_CS_NOPAD)
397      {
398        /*
399          Both values are auto-generated. There's no real data any more.
400          We need to handle the remaining virtual trailing spaces.
401          The two strings still have s_nchars_left and t_nchars_left imaginary
402          trailing spaces at the end. If s_nchars_left != t_nchars_left,
403          the strings will be not equal in case of a NOPAD collation.
404
405          Example:
406          "B" is German "U+00DF LATIN SMALL LETTER SHARP S"
407          When we have these values in a
408          CHAR(3) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_nopad_ci
409          column:
410          'B  '        (one character, two trailing spaces)
411          'ss '        (two characters, one trailing space)
412          The 'B  ' is greater than the 'ss '.
413          They are compared in the following steps:
414            1. 'B' == 'ss'
415            2. ' ' == ' '
416            3. ' ' >   ''
417
418          We need to emulate the same behavior in this function even if
419          it's called with strings 'B' and 'ss' (with space trimmed).
420          The side which has more remaining virtual spaces at the end
421          is greater.
422        */
423        if (s_nchars_left < t_nchars_left)
424          return -1;
425        if (s_nchars_left > t_nchars_left)
426          return +1;
427      }
428      return 0;
429    }
430
431    DBUG_ASSERT(s_nchars_left >= s_res.nchars);
432    DBUG_ASSERT(t_nchars_left >= t_res.nchars);
433    s_nchars_left-= s_res.nchars;
434    t_nchars_left-= t_res.nchars;
435  }
436
437  return 0;
438}
439
440
441/*
442  One-level collations.
443*/
444static int
445MY_FUNCTION_NAME(strnncollsp_nchars)(CHARSET_INFO *cs,
446                                     const uchar *s, size_t slen,
447                                     const uchar *t, size_t tlen,
448                                     size_t nchars)
449{
450  return MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(cs, &cs->uca->level[0],
451                                                       s, slen, t, tlen,
452                                                       nchars);
453}
454
455
456/*
457  Multi-level collations.
458*/
459static int
460MY_FUNCTION_NAME(strnncollsp_nchars_multilevel)(CHARSET_INFO *cs,
461                                                const uchar *s, size_t slen,
462                                                const uchar *t, size_t tlen,
463                                                size_t nchars)
464{
465  uint num_level= cs->levels_for_order;
466  uint i;
467  for (i= 0; i != num_level; i++)
468  {
469    int ret= MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(cs,
470                                                           &cs->uca->level[i],
471                                                           s, slen,
472                                                           t, tlen,
473                                                           nchars);
474    if (ret)
475       return ret;
476  }
477  return 0;
478}
479
480
481/*
482  Calculates hash value for the given string,
483  according to the collation, and ignoring trailing spaces.
484
485  SYNOPSIS:
486    hash_sort()
487    cs		Character set information
488    s		String
489    slen	String's length
490    n1		First hash parameter
491    n2		Second hash parameter
492
493  NOTES:
494    Scans consequently weights and updates
495    hash parameters n1 and n2. In a case insensitive collation,
496    upper and lower case of the same letter will return the same
497    weight sequence, and thus will produce the same hash values
498    in n1 and n2.
499
500    This functions is used for one-level and for multi-level collations.
501    We intentionally use only primary level in multi-level collations.
502    This helps to have PARTITION BY KEY put primarily equal records
503    into the same partition. E.g. in utf8_thai_520_ci records that differ
504    only in tone marks go into the same partition.
505
506  RETURN
507    N/A
508*/
509
510static void
511MY_FUNCTION_NAME(hash_sort)(CHARSET_INFO *cs,
512                            const uchar *s, size_t slen,
513                            ulong *nr1, ulong *nr2)
514{
515  int   s_res;
516  my_uca_scanner scanner;
517  int space_weight= my_space_weight(&cs->uca->level[0]);
518  register ulong m1= *nr1, m2= *nr2;
519
520  my_uca_scanner_init_any(&scanner, cs, &cs->uca->level[0], s, slen);
521
522  while ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) >0)
523  {
524    if (s_res == space_weight)
525    {
526      /* Combine all spaces to be able to skip end spaces */
527      uint count= 0;
528      do
529      {
530        count++;
531        if ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) <= 0)
532        {
533          /* Skip strings at end of string */
534          goto end;
535        }
536      }
537      while (s_res == space_weight);
538
539      /* Add back that has for the space characters */
540      do
541      {
542        /*
543          We can't use MY_HASH_ADD_16() here as we, because of a misstake
544          in the original code, where we added the 16 byte variable the
545          opposite way.  Changing this would cause old partitioned tables
546          to fail.
547        */
548        MY_HASH_ADD(m1, m2, space_weight >> 8);
549        MY_HASH_ADD(m1, m2, space_weight & 0xFF);
550      }
551      while (--count != 0);
552
553    }
554    /* See comment above why we can't use MY_HASH_ADD_16() */
555    MY_HASH_ADD(m1, m2, s_res >> 8);
556    MY_HASH_ADD(m1, m2, s_res & 0xFF);
557  }
558end:
559  *nr1= m1;
560  *nr2= m2;
561}
562
563
564static void
565MY_FUNCTION_NAME(hash_sort_nopad)(CHARSET_INFO *cs,
566                                  const uchar *s, size_t slen,
567                                  ulong *nr1, ulong *nr2)
568{
569  int   s_res;
570  my_uca_scanner scanner;
571  register ulong m1= *nr1, m2= *nr2;
572
573  my_uca_scanner_init_any(&scanner, cs, &cs->uca->level[0], s, slen);
574
575  while ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) >0)
576  {
577    /* See comment above why we can't use MY_HASH_ADD_16() */
578    MY_HASH_ADD(m1, m2, s_res >> 8);
579    MY_HASH_ADD(m1, m2, s_res & 0xFF);
580  }
581  *nr1= m1;
582  *nr2= m2;
583}
584
585
586
587/*
588  For the given string creates its "binary image", suitable
589  to be used in binary comparison, i.e. in memcmp().
590
591  SYNOPSIS:
592    my_strnxfrm_uca()
593    cs		Character set information
594    dst		Where to write the image
595    dstlen	Space available for the image, in bytes
596    src		The source string
597    srclen	Length of the source string, in bytes
598
599  NOTES:
600    In a loop, scans weights from the source string and writes
601    them into the binary image. In a case insensitive collation,
602    upper and lower cases of the same letter will produce the
603    same image subsequences. When we have reached the end-of-string
604    or found an illegal multibyte sequence, the loop stops.
605
606    It is impossible to restore the original string using its
607    binary image.
608
609    Binary images are used for bulk comparison purposes,
610    e.g. in ORDER BY, when it is more efficient to create
611    a binary image and use it instead of weight scanner
612    for the original strings for every comparison.
613
614  RETURN
615    Number of bytes that have been written into the binary image.
616*/
617
618static uchar *
619MY_FUNCTION_NAME(strnxfrm_onelevel_internal)(CHARSET_INFO *cs,
620                                             MY_UCA_WEIGHT_LEVEL *level,
621                                             uchar *dst, uchar *de,
622                                             uint *nweights,
623                                             const uchar *src, size_t srclen)
624{
625  my_uca_scanner scanner;
626  int s_res;
627
628  DBUG_ASSERT(src || !srclen);
629
630#if MY_UCA_ASCII_OPTIMIZE && !MY_UCA_COMPILE_CONTRACTIONS
631 /*
632    Fast path for the ASCII range with no contractions.
633  */
634  {
635    const uchar *de2= de - 1; /* Last position where 2 bytes fit */
636    const uint16 *weights0= level->weights[0];
637    uint lengths0= level->lengths[0];
638    for ( ; ; src++, srclen--)
639    {
640      const uint16 *weight;
641      if (!srclen || !*nweights)
642        return dst;         /* Done */
643      if (*src > 0x7F)
644        break;              /* Non-ASCII */
645
646      weight= weights0 + (((uint) *src) * lengths0);
647      if (!(s_res= *weight))
648        continue;           /* Ignorable */
649      if (weight[1])        /* Expansion (e.g. in a user defined collation */
650        break;
651
652      /* Here we have a character with extactly one 2-byte UCA weight */
653      if (dst < de2)        /* Most typical case is when both bytes fit */
654      {
655        *dst++= s_res >> 8;
656        *dst++= s_res & 0xFF;
657        (*nweights)--;
658        continue;
659      }
660      if (dst >= de)        /* No space left in "dst" */
661        return dst;
662      *dst++= s_res >> 8;   /* There is space only for one byte */
663      (*nweights)--;
664      return dst;
665    }
666  }
667#endif
668
669  my_uca_scanner_init_any(&scanner, cs, level, src, srclen);
670  for (; dst < de && *nweights &&
671         (s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) > 0 ; (*nweights)--)
672  {
673    *dst++= s_res >> 8;
674    if (dst < de)
675      *dst++= s_res & 0xFF;
676  }
677  return dst;
678}
679
680
681static uchar *
682MY_FUNCTION_NAME(strnxfrm_onelevel)(CHARSET_INFO *cs,
683                                    MY_UCA_WEIGHT_LEVEL *level,
684                                    uchar *dst, uchar *de, uint nweights,
685                                    const uchar *src, size_t srclen, uint flags)
686{
687  uchar *d0= dst;
688  dst= MY_FUNCTION_NAME(strnxfrm_onelevel_internal)(cs, level,
689                                                    dst, de, &nweights,
690                                                    src, srclen);
691  DBUG_ASSERT(dst <= de);
692  if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
693    dst= my_strnxfrm_uca_padn(dst, de, nweights, my_space_weight(level));
694  DBUG_ASSERT(dst <= de);
695  my_strxfrm_desc_and_reverse(d0, dst, flags, 0);
696  return dst;
697}
698
699
700
701static uchar *
702MY_FUNCTION_NAME(strnxfrm_nopad_onelevel)(CHARSET_INFO *cs,
703                                          MY_UCA_WEIGHT_LEVEL *level,
704                                          uchar *dst, uchar *de, uint nweights,
705                                          const uchar *src, size_t srclen,
706                                          uint flags)
707{
708  uchar *d0= dst;
709  dst= MY_FUNCTION_NAME(strnxfrm_onelevel_internal)(cs, level,
710                                                    dst, de, &nweights,
711                                                    src, srclen);
712  DBUG_ASSERT(dst <= de);
713  /*  Pad with the minimum possible weight on this level */
714  if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
715    dst= my_strnxfrm_uca_padn(dst, de, nweights, min_weight_on_level(level));
716  DBUG_ASSERT(dst <= de);
717  my_strxfrm_desc_and_reverse(d0, dst, flags, 0);
718  return dst;
719}
720
721
722static size_t
723MY_FUNCTION_NAME(strnxfrm)(CHARSET_INFO *cs,
724                           uchar *dst, size_t dstlen, uint nweights,
725                           const uchar *src, size_t srclen, uint flags)
726{
727  uchar *d0= dst;
728  uchar *de= dst + dstlen;
729
730  /*
731    There are two ways to handle trailing spaces for PAD SPACE collations:
732    1. Keep trailing spaces as they are, so have strnxfrm_onelevel() scan
733       spaces as normal characters. This will call scanner_next() for every
734       trailing space and calculate its weight using UCA weights.
735    2. Strip trailing spaces before calling strnxfrm_onelevel(), as it will
736       append weights for implicit spaces anyway, up to the desired key size.
737       This will effectively generate exactly the same sortable key result.
738    The latter is much faster.
739  */
740
741  if (flags & MY_STRXFRM_PAD_WITH_SPACE)
742    srclen= cs->cset->lengthsp(cs, (const char*) src, srclen);
743  dst= MY_FUNCTION_NAME(strnxfrm_onelevel)(cs, &cs->uca->level[0],
744                                           dst, de, nweights,
745                                           src, srclen, flags);
746  /*
747    This can probably be changed to memset(dst, 0, de - dst),
748    like my_strnxfrm_uca_multilevel() does.
749  */
750  if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
751    dst= my_strnxfrm_uca_pad(dst, de, my_space_weight(&cs->uca->level[0]));
752  return dst - d0;
753}
754
755
756static size_t
757MY_FUNCTION_NAME(strnxfrm_nopad)(CHARSET_INFO *cs,
758                                 uchar *dst, size_t dstlen,
759                                 uint nweights,
760                                 const uchar *src, size_t srclen,
761                                 uint flags)
762{
763  uchar *d0= dst;
764  uchar *de= dst + dstlen;
765
766  dst= MY_FUNCTION_NAME(strnxfrm_nopad_onelevel)(cs, &cs->uca->level[0],
767                                                 dst, de, nweights,
768                                                 src, srclen, flags);
769  if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
770  {
771    memset(dst, 0, de - dst);
772    dst= de;
773  }
774  return dst - d0;
775}
776
777
778static size_t
779MY_FUNCTION_NAME(strnxfrm_multilevel)(CHARSET_INFO *cs,
780                                      uchar *dst, size_t dstlen,
781                                      uint nweights,
782                                      const uchar *src, size_t srclen,
783                                      uint flags)
784{
785  uint num_level= cs->levels_for_order;
786  uchar *d0= dst;
787  uchar *de= dst + dstlen;
788  uint current_level;
789
790  for (current_level= 0; current_level != num_level; current_level++)
791  {
792    if (!(flags & MY_STRXFRM_LEVEL_ALL) ||
793        (flags & (MY_STRXFRM_LEVEL1 << current_level)))
794      dst= cs->state & MY_CS_NOPAD ?
795           MY_FUNCTION_NAME(strnxfrm_nopad_onelevel)(cs,
796                                          &cs->uca->level[current_level],
797                                          dst, de, nweights,
798                                          src, srclen, flags) :
799           MY_FUNCTION_NAME(strnxfrm_onelevel)(cs,
800                                    &cs->uca->level[current_level],
801                                    dst, de, nweights,
802                                    src, srclen, flags);
803  }
804
805  if (dst < de && (flags & MY_STRXFRM_PAD_TO_MAXLEN))
806  {
807    memset(dst, 0, de - dst);
808    dst= de;
809  }
810
811  return dst - d0;
812}
813
814
815/*
816  One-level, PAD SPACE
817*/
818MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler)=
819{
820  MY_UCA_COLL_INIT,
821  MY_FUNCTION_NAME(strnncoll),
822  MY_FUNCTION_NAME(strnncollsp),
823  MY_FUNCTION_NAME(strnncollsp_nchars),
824  MY_FUNCTION_NAME(strnxfrm),
825  my_strnxfrmlen_any_uca,
826  MY_LIKE_RANGE,
827  my_wildcmp_uca,
828  NULL,                                /* strcasecmp() */
829  my_instr_mb,
830  MY_FUNCTION_NAME(hash_sort),
831  my_propagate_complex
832};
833
834
835/*
836  One-level, NO PAD
837  For character sets with mbminlen==1 use MY_LIKE_RANGE=my_like_range_mb
838  For character sets with mbminlen>=2 use MY_LIKE_RANGE=my_like_range_generic
839*/
840MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_nopad)=
841{
842  MY_UCA_COLL_INIT,
843  MY_FUNCTION_NAME(strnncoll),
844  MY_FUNCTION_NAME(strnncollsp_nopad),
845  MY_FUNCTION_NAME(strnncollsp_nchars),
846  MY_FUNCTION_NAME(strnxfrm_nopad),
847  my_strnxfrmlen_any_uca,
848  MY_LIKE_RANGE,    /* my_like_range_mb or my_like_range_generic */
849  my_wildcmp_uca,
850  NULL,                                /* strcasecmp() */
851  my_instr_mb,
852  MY_FUNCTION_NAME(hash_sort_nopad),
853  my_propagate_complex
854};
855
856
857/*
858  Multi-level, PAD SPACE
859*/
860MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_multilevel)=
861{
862  MY_UCA_COLL_INIT,
863  MY_FUNCTION_NAME(strnncoll_multilevel),
864  MY_FUNCTION_NAME(strnncollsp_multilevel),
865  MY_FUNCTION_NAME(strnncollsp_nchars_multilevel),
866  MY_FUNCTION_NAME(strnxfrm_multilevel),
867  my_strnxfrmlen_any_uca_multilevel,
868  MY_LIKE_RANGE,
869  my_wildcmp_uca,
870  NULL,                                /* strcasecmp() */
871  my_instr_mb,
872  MY_FUNCTION_NAME(hash_sort),
873  my_propagate_complex
874};
875
876
877/*
878  Multi-level, NO PAD
879*/
880MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_nopad_multilevel)=
881{
882  MY_UCA_COLL_INIT,
883  MY_FUNCTION_NAME(strnncoll_multilevel),
884  MY_FUNCTION_NAME(strnncollsp_nopad_multilevel),
885  MY_FUNCTION_NAME(strnncollsp_nchars_multilevel),
886  MY_FUNCTION_NAME(strnxfrm_multilevel),
887  my_strnxfrmlen_any_uca_multilevel,
888  MY_LIKE_RANGE,
889  my_wildcmp_uca,
890  NULL,                                /* strcasecmp() */
891  my_instr_mb,
892  MY_FUNCTION_NAME(hash_sort),
893  my_propagate_complex
894};
895
896
897MY_COLLATION_HANDLER_PACKAGE MY_FUNCTION_NAME(package)=
898{
899  &MY_FUNCTION_NAME(collation_handler),
900  &MY_FUNCTION_NAME(collation_handler_nopad),
901  &MY_FUNCTION_NAME(collation_handler_multilevel),
902  &MY_FUNCTION_NAME(collation_handler_nopad_multilevel)
903};
904
905
906#undef MY_FUNCTION_NAME
907#undef MY_MB_WC
908#undef MY_LIKE_RANGE
909#undef MY_UCA_ASCII_OPTIMIZE
910#undef MY_UCA_COMPILE_CONTRACTIONS
911#undef MY_UCA_COLL_INIT
912