1 /* Copyright (c) 2003, 2013, Oracle and/or its affiliates
2    Copyright (c) 2009, 2020, MariaDB
3 
4    This library is free software; you can redistribute it and/or
5    modify it under the terms of the GNU Library General Public
6    License as published by the Free Software Foundation; version 2
7    of the License.
8 
9    This library is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12    Library General Public License for more details.
13 
14    You should have received a copy of the GNU Library General Public
15    License along with this library; if not, write to the Free
16    Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
17    MA 02110-1335  USA */
18 
19 /* UCS2 support. Written by Alexander Barkov <bar@mysql.com> */
20 
21 #include "strings_def.h"
22 #include <m_ctype.h>
23 #include <my_sys.h>
24 #include <stdarg.h>
25 
26 #include "ctype-unidata.h"
27 
28 
29 #if defined(HAVE_CHARSET_utf16) || defined(HAVE_CHARSET_ucs2)
30 #define HAVE_CHARSET_mb2
31 #endif
32 
33 
34 #if defined(HAVE_CHARSET_mb2) || defined(HAVE_CHARSET_utf32)
35 #define HAVE_CHARSET_mb2_or_mb4
36 #endif
37 
38 #ifndef EILSEQ
39 #define EILSEQ ENOENT
40 #endif
41 
42 #undef  ULONGLONG_MAX
43 #define ULONGLONG_MAX                (~(ulonglong) 0)
44 #define MAX_NEGATIVE_NUMBER        ((ulonglong) 0x8000000000000000LL)
45 #define INIT_CNT  9
46 #define LFACTOR   1000000000ULL
47 #define LFACTOR1  10000000000ULL
48 #define LFACTOR2  100000000000ULL
49 
50 #if defined(HAVE_CHARSET_utf32) || defined(HAVE_CHARSET_mb2)
51 static unsigned long lfactor[9]=
52 { 1L, 10L, 100L, 1000L, 10000L, 100000L, 1000000L, 10000000L, 100000000L };
53 #endif
54 
55 
56 #ifdef HAVE_CHARSET_mb2_or_mb4
57 static size_t
my_caseup_str_mb2_or_mb4(CHARSET_INFO * cs,char * s)58 my_caseup_str_mb2_or_mb4(CHARSET_INFO * cs  __attribute__((unused)),
59                          char * s __attribute__((unused)))
60 {
61   DBUG_ASSERT(0);
62   return 0;
63 }
64 
65 
66 static size_t
my_casedn_str_mb2_or_mb4(CHARSET_INFO * cs,char * s)67 my_casedn_str_mb2_or_mb4(CHARSET_INFO *cs __attribute__((unused)),
68                          char * s __attribute__((unused)))
69 {
70   DBUG_ASSERT(0);
71   return 0;
72 }
73 
74 
75 static int
my_strcasecmp_mb2_or_mb4(CHARSET_INFO * cs,const char * s,const char * t)76 my_strcasecmp_mb2_or_mb4(CHARSET_INFO *cs __attribute__((unused)),
77                          const char *s __attribute__((unused)),
78                          const char *t __attribute__((unused)))
79 {
80   DBUG_ASSERT(0);
81   return 0;
82 }
83 
84 
85 typedef enum
86 {
87   MY_CHAR_COPY_OK=       0, /* The character was Okey */
88   MY_CHAR_COPY_ERROR=    1, /* The character was not Ok, and could not fix */
89   MY_CHAR_COPY_FIXED=    2  /* The character was not Ok, was fixed to '?' */
90 } my_char_copy_status_t;
91 
92 
93 /*
94   Copies an incomplete character, lef-padding it with 0x00 bytes.
95 
96   @param cs           Character set
97   @param dst          The destination string
98   @param dst_length   Space available in dst
99   @param src          The source string
100   @param src_length   Length of src
101   @param nchars       Copy not more than nchars characters.
102                       The "nchars" parameter of the caller.
103                       Only 0 and non-0 are important here.
104   @param fix          What to do if after zero-padding didn't get a valid
105                       character:
106                       - FALSE - exit with error.
107                       - TRUE  - try to put '?' instead.
108 
109   @return  MY_CHAR_COPY_OK     if after zero-padding got a valid character.
110                                cs->mbmaxlen bytes were written to "dst".
111   @return  MY_CHAR_COPY_FIXED  if after zero-padding did not get a valid
112                                character, but wrote '?' to the destination
113                                string instead.
114                                cs->mbminlen bytes were written to "dst".
115   @return  MY_CHAR_COPY_ERROR  If failed and nothing was written to "dst".
116                                Possible reasons:
117                                - dst_length was too short
118                                - nchars was 0
119                                - the character after padding appeared not
120                                  to be valid, and could not fix it to '?'.
121 */
122 static my_char_copy_status_t
my_copy_incomplete_char(CHARSET_INFO * cs,char * dst,size_t dst_length,const char * src,size_t src_length,size_t nchars,my_bool fix)123 my_copy_incomplete_char(CHARSET_INFO *cs,
124                         char *dst, size_t dst_length,
125                         const char *src, size_t src_length,
126                         size_t nchars, my_bool fix)
127 {
128   size_t pad_length;
129   size_t src_offset= src_length % cs->mbminlen;
130   if (dst_length < cs->mbminlen || !nchars)
131     return MY_CHAR_COPY_ERROR;
132 
133   pad_length= cs->mbminlen - src_offset;
134   bzero(dst, pad_length);
135   memmove(dst + pad_length, src, src_offset);
136   /*
137     In some cases left zero-padding can create an incorrect character.
138     For example:
139       INSERT INTO t1 (utf32_column) VALUES (0x110000);
140     We'll pad the value to 0x00110000, which is a wrong UTF32 sequence!
141     The valid characters range is limited to 0x00000000..0x0010FFFF.
142 
143     Make sure we didn't pad to an incorrect character.
144   */
145   if (my_ci_charlen(cs, (uchar *) dst, (uchar *) dst + cs->mbminlen) ==
146       (int) cs->mbminlen)
147     return MY_CHAR_COPY_OK;
148 
149   if (fix &&
150       my_ci_wc_mb(cs, '?', (uchar *) dst, (uchar *) dst + cs->mbminlen) ==
151       (int) cs->mbminlen)
152     return MY_CHAR_COPY_FIXED;
153 
154   return MY_CHAR_COPY_ERROR;
155 }
156 
157 
158 /*
159   Copy an UCS2/UTF16/UTF32 string, fix bad characters.
160 */
161 static size_t
my_copy_fix_mb2_or_mb4(CHARSET_INFO * cs,char * dst,size_t dst_length,const char * src,size_t src_length,size_t nchars,MY_STRCOPY_STATUS * status)162 my_copy_fix_mb2_or_mb4(CHARSET_INFO *cs,
163                        char *dst, size_t dst_length,
164                        const char *src, size_t src_length,
165                        size_t nchars, MY_STRCOPY_STATUS *status)
166 {
167   size_t length2, src_offset= src_length % cs->mbminlen;
168   my_char_copy_status_t padstatus;
169 
170   if (!src_offset)
171     return  my_copy_fix_mb(cs, dst, dst_length,
172                                src, src_length, nchars, status);
173   if ((padstatus= my_copy_incomplete_char(cs, dst, dst_length,
174                                           src, src_length, nchars, TRUE)) ==
175       MY_CHAR_COPY_ERROR)
176   {
177     status->m_source_end_pos= status->m_well_formed_error_pos= src;
178     return 0;
179   }
180   length2= my_copy_fix_mb(cs, dst + cs->mbminlen, dst_length - cs->mbminlen,
181                           src + src_offset, src_length - src_offset,
182                           nchars - 1, status);
183   if (padstatus == MY_CHAR_COPY_FIXED)
184     status->m_well_formed_error_pos= src;
185   return cs->mbminlen /* The left-padded character */ + length2;
186 }
187 
188 
189 static long
my_strntol_mb2_or_mb4(CHARSET_INFO * cs,const char * nptr,size_t l,int base,char ** endptr,int * err)190 my_strntol_mb2_or_mb4(CHARSET_INFO *cs,
191                       const char *nptr, size_t l, int base,
192                       char **endptr, int *err)
193 {
194   int      negative= 0;
195   int      overflow;
196   int      cnv;
197   my_wc_t  wc;
198   my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
199   register unsigned int cutlim;
200   register uint32 cutoff;
201   register uint32 res;
202   register const uchar *s= (const uchar*) nptr;
203   register const uchar *e= (const uchar*) nptr+l;
204   const uchar *save;
205 
206   *err= 0;
207   do
208   {
209     if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
210     {
211       switch (wc)
212       {
213         case ' ' : break;
214         case '\t': break;
215         case '-' : negative= !negative; break;
216         case '+' : break;
217         default  : goto bs;
218       }
219     }
220     else /* No more characters or bad multibyte sequence */
221     {
222       if (endptr != NULL )
223         *endptr= (char*) s;
224       err[0]= (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
225       return 0;
226     }
227     s+= cnv;
228   } while (1);
229 
230 bs:
231 
232   overflow= 0;
233   res= 0;
234   save= s;
235   cutoff= ((uint32)~0L) / (uint32) base;
236   cutlim= (uint) (((uint32)~0L) % (uint32) base);
237 
238   do {
239     if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
240     {
241       s+= cnv;
242       if (wc >= '0' && wc <= '9')
243         wc-= '0';
244       else if (wc >= 'A' && wc <= 'Z')
245         wc= wc - 'A' + 10;
246       else if (wc >= 'a' && wc <= 'z')
247         wc= wc - 'a' + 10;
248       else
249         break;
250       if ((int)wc >= base)
251         break;
252       if (res > cutoff || (res == cutoff && wc > cutlim))
253         overflow= 1;
254       else
255       {
256         res*= (uint32) base;
257         res+= wc;
258       }
259     }
260     else if (cnv == MY_CS_ILSEQ)
261     {
262       if (endptr !=NULL )
263         *endptr = (char*) s;
264       err[0]= EILSEQ;
265       return 0;
266     }
267     else
268     {
269       /* No more characters */
270       break;
271     }
272   } while(1);
273 
274   if (endptr != NULL)
275     *endptr = (char *) s;
276 
277   if (s == save)
278   {
279     err[0]= EDOM;
280     return 0L;
281   }
282 
283   if (negative)
284   {
285     if (res > (uint32) INT_MIN32)
286       overflow= 1;
287   }
288   else if (res > INT_MAX32)
289     overflow= 1;
290 
291   if (overflow)
292   {
293     err[0]= ERANGE;
294     return negative ? INT_MIN32 : INT_MAX32;
295   }
296 
297   return (negative ? -((long) res) : (long) res);
298 }
299 
300 
301 static ulong
my_strntoul_mb2_or_mb4(CHARSET_INFO * cs,const char * nptr,size_t l,int base,char ** endptr,int * err)302 my_strntoul_mb2_or_mb4(CHARSET_INFO *cs,
303                        const char *nptr, size_t l, int base,
304                        char **endptr, int *err)
305 {
306   int      negative= 0;
307   int      overflow;
308   int      cnv;
309   my_wc_t  wc;
310   my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
311   register unsigned int cutlim;
312   register uint32 cutoff;
313   register uint32 res;
314   register const uchar *s= (const uchar*) nptr;
315   register const uchar *e= (const uchar*) nptr + l;
316   const uchar *save;
317 
318   *err= 0;
319   do
320   {
321     if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
322     {
323       switch (wc)
324       {
325         case ' ' : break;
326         case '\t': break;
327         case '-' : negative= !negative; break;
328         case '+' : break;
329         default  : goto bs;
330       }
331     }
332     else /* No more characters or bad multibyte sequence */
333     {
334       if (endptr !=NULL )
335         *endptr= (char*)s;
336       err[0]= (cnv == MY_CS_ILSEQ) ? EILSEQ : EDOM;
337       return 0;
338     }
339     s+= cnv;
340   } while (1);
341 
342 bs:
343 
344   overflow= 0;
345   res= 0;
346   save= s;
347   cutoff= ((uint32)~0L) / (uint32) base;
348   cutlim= (uint) (((uint32)~0L) % (uint32) base);
349 
350   do
351   {
352     if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
353     {
354       s+= cnv;
355       if (wc >= '0' && wc <= '9')
356         wc-= '0';
357       else if (wc >= 'A' && wc <= 'Z')
358         wc= wc - 'A' + 10;
359       else if (wc >= 'a' && wc <= 'z')
360         wc= wc - 'a' + 10;
361       else
362         break;
363       if ((int) wc >= base)
364         break;
365       if (res > cutoff || (res == cutoff && wc > cutlim))
366         overflow = 1;
367       else
368       {
369         res*= (uint32) base;
370         res+= wc;
371       }
372     }
373     else if (cnv == MY_CS_ILSEQ)
374     {
375       if (endptr != NULL )
376         *endptr= (char*)s;
377       err[0]= EILSEQ;
378       return 0;
379     }
380     else
381     {
382       /* No more characters */
383       break;
384     }
385   } while(1);
386 
387   if (endptr != NULL)
388     *endptr= (char *) s;
389 
390   if (s == save)
391   {
392     err[0]= EDOM;
393     return 0L;
394   }
395 
396   if (overflow)
397   {
398     err[0]= (ERANGE);
399     return (~(uint32) 0);
400   }
401 
402   return (negative ? -((long) res) : (long) res);
403 }
404 
405 
406 static longlong
my_strntoll_mb2_or_mb4(CHARSET_INFO * cs,const char * nptr,size_t l,int base,char ** endptr,int * err)407 my_strntoll_mb2_or_mb4(CHARSET_INFO *cs,
408                        const char *nptr, size_t l, int base,
409                        char **endptr, int *err)
410 {
411   int      negative=0;
412   int      overflow;
413   int      cnv;
414   my_wc_t  wc;
415   my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
416   register ulonglong    cutoff;
417   register unsigned int cutlim;
418   register ulonglong    res;
419   register const uchar *s= (const uchar*) nptr;
420   register const uchar *e= (const uchar*) nptr+l;
421   const uchar *save;
422 
423   *err= 0;
424   do
425   {
426     if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
427     {
428       switch (wc)
429       {
430         case ' ' : break;
431         case '\t': break;
432         case '-' : negative= !negative; break;
433         case '+' : break;
434         default  : goto bs;
435       }
436     }
437     else /* No more characters or bad multibyte sequence */
438     {
439       if (endptr !=NULL )
440         *endptr = (char*)s;
441       err[0] = (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
442       return 0;
443     }
444     s+=cnv;
445   } while (1);
446 
447 bs:
448 
449   overflow = 0;
450   res = 0;
451   save = s;
452   cutoff = (~(ulonglong) 0) / (unsigned long int) base;
453   cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
454 
455   do {
456     if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
457     {
458       s+=cnv;
459       if ( wc>='0' && wc<='9')
460         wc -= '0';
461       else if ( wc>='A' && wc<='Z')
462         wc = wc - 'A' + 10;
463       else if ( wc>='a' && wc<='z')
464         wc = wc - 'a' + 10;
465       else
466         break;
467       if ((int)wc >= base)
468         break;
469       if (res > cutoff || (res == cutoff && wc > cutlim))
470         overflow = 1;
471       else
472       {
473         res *= (ulonglong) base;
474         res += wc;
475       }
476     }
477     else if (cnv==MY_CS_ILSEQ)
478     {
479       if (endptr !=NULL )
480         *endptr = (char*)s;
481       err[0]=EILSEQ;
482       return 0;
483     }
484     else
485     {
486       /* No more characters */
487       break;
488     }
489   } while(1);
490 
491   if (endptr != NULL)
492     *endptr = (char *) s;
493 
494   if (s == save)
495   {
496     err[0]=EDOM;
497     return 0L;
498   }
499 
500   if (negative)
501   {
502     if (res  > (ulonglong) LONGLONG_MIN)
503       overflow = 1;
504   }
505   else if (res > (ulonglong) LONGLONG_MAX)
506     overflow = 1;
507 
508   if (overflow)
509   {
510     err[0]=ERANGE;
511     return negative ? LONGLONG_MIN : LONGLONG_MAX;
512   }
513 
514   return (negative ? -((longlong)res) : (longlong)res);
515 }
516 
517 
518 static ulonglong
my_strntoull_mb2_or_mb4(CHARSET_INFO * cs,const char * nptr,size_t l,int base,char ** endptr,int * err)519 my_strntoull_mb2_or_mb4(CHARSET_INFO *cs,
520                         const char *nptr, size_t l, int base,
521                         char **endptr, int *err)
522 {
523   int      negative= 0;
524   int      overflow;
525   int      cnv;
526   my_wc_t  wc;
527   my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
528   register ulonglong    cutoff;
529   register unsigned int cutlim;
530   register ulonglong    res;
531   register const uchar *s= (const uchar*) nptr;
532   register const uchar *e= (const uchar*) nptr + l;
533   const uchar *save;
534 
535   *err= 0;
536   do
537   {
538     if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
539     {
540       switch (wc)
541       {
542         case ' ' : break;
543         case '\t': break;
544         case '-' : negative= !negative; break;
545         case '+' : break;
546         default  : goto bs;
547       }
548     }
549     else /* No more characters or bad multibyte sequence */
550     {
551       if (endptr !=NULL )
552         *endptr = (char*)s;
553       err[0]= (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
554       return 0;
555     }
556     s+=cnv;
557   } while (1);
558 
559 bs:
560 
561   overflow = 0;
562   res = 0;
563   save = s;
564   cutoff = (~(ulonglong) 0) / (unsigned long int) base;
565   cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
566 
567   do
568   {
569     if ((cnv= mb_wc(cs, &wc, s, e)) > 0)
570     {
571       s+=cnv;
572       if ( wc>='0' && wc<='9')
573         wc -= '0';
574       else if ( wc>='A' && wc<='Z')
575         wc = wc - 'A' + 10;
576       else if ( wc>='a' && wc<='z')
577         wc = wc - 'a' + 10;
578       else
579         break;
580       if ((int)wc >= base)
581         break;
582       if (res > cutoff || (res == cutoff && wc > cutlim))
583         overflow = 1;
584       else
585       {
586         res *= (ulonglong) base;
587         res += wc;
588       }
589     }
590     else if (cnv==MY_CS_ILSEQ)
591     {
592       if (endptr !=NULL )
593         *endptr = (char*)s;
594       err[0]= EILSEQ;
595       return 0;
596     }
597     else
598     {
599       /* No more characters */
600       break;
601     }
602   } while(1);
603 
604   if (endptr != NULL)
605     *endptr = (char *) s;
606 
607   if (s == save)
608   {
609     err[0]= EDOM;
610     return 0L;
611   }
612 
613   if (overflow)
614   {
615     err[0]= ERANGE;
616     return (~(ulonglong) 0);
617   }
618 
619   return (negative ? -((longlong) res) : (longlong) res);
620 }
621 
622 
623 static double
my_strntod_mb2_or_mb4(CHARSET_INFO * cs,char * nptr,size_t length,char ** endptr,int * err)624 my_strntod_mb2_or_mb4(CHARSET_INFO *cs,
625                       char *nptr, size_t length,
626                       char **endptr, int *err)
627 {
628   char     buf[256];
629   double   res;
630   register char *b= buf;
631   register const uchar *s= (const uchar*) nptr;
632   const uchar *end;
633   my_wc_t  wc;
634   my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
635   int     cnv;
636 
637   *err= 0;
638   /* Cut too long strings */
639   if (length >= sizeof(buf))
640     length= sizeof(buf) - 1;
641   end= s + length;
642 
643   while ((cnv= mb_wc(cs, &wc, s, end)) > 0)
644   {
645     s+= cnv;
646     if (wc > (int) (uchar) 'e' || !wc)
647       break;                                        /* Can't be part of double */
648     *b++= (char) wc;
649   }
650 
651   *endptr= b;
652   res= my_strtod(buf, endptr, err);
653   *endptr= nptr + cs->mbminlen * (size_t) (*endptr - buf);
654   return res;
655 }
656 
657 
658 static ulonglong
my_strntoull10rnd_mb2_or_mb4(CHARSET_INFO * cs,const char * nptr,size_t length,int unsign_fl,char ** endptr,int * err)659 my_strntoull10rnd_mb2_or_mb4(CHARSET_INFO *cs,
660                              const char *nptr, size_t length,
661                              int unsign_fl,
662                              char **endptr, int *err)
663 {
664   char  buf[256], *b= buf;
665   ulonglong res;
666   const uchar *end, *s= (const uchar*) nptr;
667   my_wc_t  wc;
668   my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
669   int     cnv;
670 
671   /* Cut too long strings */
672   if (length >= sizeof(buf))
673     length= sizeof(buf)-1;
674   end= s + length;
675 
676   while ((cnv= mb_wc(cs, &wc, s, end)) > 0)
677   {
678     s+= cnv;
679     if (wc > (int) (uchar) 'e' || !wc)
680       break;                            /* Can't be a number part */
681     *b++= (char) wc;
682   }
683 
684   res= my_strntoull10rnd_8bit(cs, buf, b - buf, unsign_fl, endptr, err);
685   *endptr= (char*) nptr + cs->mbminlen * (size_t) (*endptr - buf);
686   return res;
687 }
688 
689 
690 /*
691   This is a fast version optimized for the case of radix 10 / -10
692 */
693 
694 static size_t
my_l10tostr_mb2_or_mb4(CHARSET_INFO * cs,char * dst,size_t len,int radix,long int val)695 my_l10tostr_mb2_or_mb4(CHARSET_INFO *cs,
696                        char *dst, size_t len, int radix, long int val)
697 {
698   char buffer[66];
699   register char *p, *db, *de;
700   long int new_val;
701   int  sl= 0;
702   unsigned long int uval = (unsigned long int) val;
703 
704   p= &buffer[sizeof(buffer) - 1];
705   *p= '\0';
706 
707   if (radix < 0)
708   {
709     if (val < 0)
710     {
711       sl= 1;
712       /* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
713       uval  = (unsigned long int)0 - uval;
714     }
715   }
716 
717   new_val = (long) (uval / 10);
718   *--p    = '0'+ (char) (uval - (unsigned long) new_val * 10);
719   val= new_val;
720 
721   while (val != 0)
722   {
723     new_val= val / 10;
724     *--p= '0' + (char) (val - new_val * 10);
725     val= new_val;
726   }
727 
728   if (sl)
729   {
730     *--p= '-';
731   }
732 
733   for ( db= dst, de= dst + len ; (dst < de) && *p ; p++)
734   {
735     int cnvres= my_ci_wc_mb(cs, (my_wc_t) p[0], (uchar*) dst, (uchar*) de);
736     if (cnvres > 0)
737       dst+= cnvres;
738     else
739       break;
740   }
741   return (int) (dst - db);
742 }
743 
744 
745 static size_t
my_ll10tostr_mb2_or_mb4(CHARSET_INFO * cs,char * dst,size_t len,int radix,longlong val)746 my_ll10tostr_mb2_or_mb4(CHARSET_INFO *cs,
747                         char *dst, size_t len, int radix, longlong val)
748 {
749   char buffer[65];
750   register char *p, *db, *de;
751   long long_val;
752   int sl= 0;
753   ulonglong uval= (ulonglong) val;
754 
755   if (radix < 0)
756   {
757     if (val < 0)
758     {
759       sl= 1;
760       /* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
761       uval = (ulonglong)0 - uval;
762     }
763   }
764 
765   p= &buffer[sizeof(buffer)-1];
766   *p='\0';
767 
768   if (uval == 0)
769   {
770     *--p= '0';
771     goto cnv;
772   }
773 
774   while (uval > (ulonglong) LONG_MAX)
775   {
776     ulonglong quo= uval/(uint) 10;
777     uint rem= (uint) (uval- quo* (uint) 10);
778     *--p= '0' + rem;
779     uval= quo;
780   }
781 
782   long_val= (long) uval;
783   while (long_val != 0)
784   {
785     long quo= long_val/10;
786     *--p= (char) ('0' + (long_val - quo*10));
787     long_val= quo;
788   }
789 
790 cnv:
791   if (sl)
792   {
793     *--p= '-';
794   }
795 
796   for ( db= dst, de= dst + len ; (dst < de) && *p ; p++)
797   {
798     int cnvres= my_ci_wc_mb(cs, (my_wc_t) p[0], (uchar*) dst, (uchar*) de);
799     if (cnvres > 0)
800       dst+= cnvres;
801     else
802       break;
803   }
804   return (int) (dst -db);
805 }
806 
807 #endif /* HAVE_CHARSET_mb2_or_mb4 */
808 
809 
810 #ifdef HAVE_CHARSET_mb2
811 /**
812   Convert a Unicode code point to a digit.
813   @param      wc  - the input Unicode code point
814   @param[OUT] c   - the output character representing the digit value 0..9
815 
816   @return   0     - if wc is a good digit
817   @return   1     - if wc is not a digit
818 */
819 static inline my_bool
wc2digit_uchar(uchar * c,my_wc_t wc)820 wc2digit_uchar(uchar *c, my_wc_t wc)
821 {
822   return wc > '9' || (c[0]= (uchar) (wc - '0')) > 9;
823 }
824 
825 
826 static longlong
my_strtoll10_mb2(CHARSET_INFO * cs,const char * nptr,char ** endptr,int * error)827 my_strtoll10_mb2(CHARSET_INFO *cs __attribute__((unused)),
828                  const char *nptr, char **endptr, int *error)
829 {
830   const uchar *s, *end, *start, *n_end, *true_end;
831   uchar UNINIT_VAR(c);
832   unsigned long i, j, k;
833   ulonglong li;
834   int negative;
835   ulong cutoff, cutoff2, cutoff3;
836   my_wc_t wc;
837   int res;
838   my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
839 
840   s= (const uchar *) nptr;
841   /* If fixed length string */
842   if (endptr)
843   {
844     /*
845       Make sure string length is even.
846       Odd length indicates a bug in the caller.
847       Assert in debug, round in production.
848     */
849     DBUG_ASSERT((*endptr - (const char *) s) % 2 == 0);
850     end= s + ((*endptr - (const char*) s) / 2) * 2;
851 
852     for ( ; ; ) /* Skip leading spaces and tabs */
853     {
854       if ((res= mb_wc(cs, &wc, s, end)) <= 0)
855         goto no_conv;
856       s+= res;
857       if (wc != ' ' && wc != '\t')
858         break;
859     }
860   }
861   else
862   {
863      /* We don't support null terminated strings in UCS2 */
864      goto no_conv;
865   }
866 
867   /* Check for a sign. */
868   negative= 0;
869   if (wc == '-')
870   {
871     *error= -1;                                        /* Mark as negative number */
872     negative= 1;
873     if ((res= mb_wc(cs, &wc, s, end)) <= 0)
874       goto no_conv;
875     s+= res; /* wc is now expected to hold the first digit. */
876     cutoff=  MAX_NEGATIVE_NUMBER / LFACTOR2;
877     cutoff2= (MAX_NEGATIVE_NUMBER % LFACTOR2) / 100;
878     cutoff3=  MAX_NEGATIVE_NUMBER % 100;
879   }
880   else
881   {
882     *error= 0;
883     if (wc == '+')
884     {
885       if ((res= mb_wc(cs, &wc, s, end)) <= 0)
886         goto no_conv;
887       s+= res; /* wc is now expected to hold the first digit. */
888     }
889     cutoff=  ULONGLONG_MAX / LFACTOR2;
890     cutoff2= ULONGLONG_MAX % LFACTOR2 / 100;
891     cutoff3=  ULONGLONG_MAX % 100;
892   }
893 
894   /*
895     The code below assumes that 'wc' holds the first digit
896     and 's' points to the next character after it.
897 
898     Scan pre-zeros if any.
899   */
900   if (wc == '0')
901   {
902     i= 0;
903     for ( ; ; s+= res)
904     {
905       if (s == end)
906         goto end_i;                                /* Return 0 */
907       if ((res= mb_wc(cs, &wc, s, end)) <= 0)
908         goto no_conv;
909       if (wc != '0')
910         break;
911     }
912     n_end= s + 2 * INIT_CNT;
913   }
914   else
915   {
916     /* Read first digit to check that it's a valid number */
917     if ((i= (wc - '0')) > 9)
918       goto no_conv;
919     n_end= s + 2 * (INIT_CNT-1);
920   }
921 
922   /* Handle first 9 digits and store them in i */
923   if (n_end > end)
924     n_end= end;
925   for ( ; ; s+= res)
926   {
927     if ((res= mb_wc(cs, &wc, s, n_end)) <= 0)
928       break;
929     if (wc2digit_uchar(&c, wc))
930       goto end_i;
931     i= i*10+c;
932   }
933   if (s == end)
934     goto end_i;
935 
936   /* Handle next 9 digits and store them in j */
937   j= 0;
938   start= s;                                /* Used to know how much to shift i */
939   n_end= true_end= s + 2 * INIT_CNT;
940   if (n_end > end)
941     n_end= end;
942   do
943   {
944     if ((res= mb_wc(cs, &wc, s, end)) <= 0)
945       goto no_conv;
946     if (wc2digit_uchar(&c, wc))
947       goto end_i_and_j;
948     s+= res;
949     j= j * 10 + c;
950   } while (s != n_end);
951   if (s == end)
952   {
953     if (s != true_end)
954       goto end_i_and_j;
955     goto end3;
956   }
957 
958   /* Handle the next 1 or 2 digits and store them in k */
959   if ((res= mb_wc(cs, &wc, s, end)) <= 0)
960     goto no_conv;
961   if ((k= (wc - '0')) > 9)
962     goto end3;
963   s+= res;
964 
965   if (s == end)
966     goto end4;
967   if ((res= mb_wc(cs, &wc, s, end)) <= 0)
968     goto no_conv;
969   if (wc2digit_uchar(&c, wc))
970     goto end4;
971   s+= res;
972   k= k*10+c;
973   *endptr= (char*) s;
974 
975   /* number string should have ended here */
976   if (s != end && mb_wc(cs, &wc, s, end) > 0 && ((uchar) (wc - '0')) <= 9)
977     goto overflow;
978 
979   /* Check that we didn't get an overflow with the last digit */
980   if (i > cutoff || (i == cutoff && ((j > cutoff2 || j == cutoff2) &&
981                                      k > cutoff3)))
982     goto overflow;
983   li=i*LFACTOR2+ (ulonglong) j*100 + k;
984   return (longlong) li;
985 
986 overflow:                                        /* *endptr is set here */
987   *error= MY_ERRNO_ERANGE;
988   return negative ? LONGLONG_MIN : (longlong) ULONGLONG_MAX;
989 
990 end_i:
991   *endptr= (char*) s;
992   return (negative ? ((longlong) -(long) i) : (longlong) i);
993 
994 end_i_and_j:
995   li= (ulonglong) i * lfactor[(size_t) (s-start) / 2] + j;
996   *endptr= (char*) s;
997   return (negative ? -((longlong) li) : (longlong) li);
998 
999 end3:
1000   li=(ulonglong) i*LFACTOR+ (ulonglong) j;
1001   *endptr= (char*) s;
1002   return (negative ? -((longlong) li) : (longlong) li);
1003 
1004 end4:
1005   li=(ulonglong) i*LFACTOR1+ (ulonglong) j * 10 + k;
1006   *endptr= (char*) s;
1007   if (negative)
1008   {
1009    if (li > MAX_NEGATIVE_NUMBER)
1010      goto overflow;
1011    return -((longlong) li);
1012   }
1013   return (longlong) li;
1014 
1015 no_conv:
1016   /* There was no number to convert.  */
1017   *error= MY_ERRNO_EDOM;
1018   *endptr= (char *) nptr;
1019   return 0;
1020 }
1021 
1022 
1023 static size_t
my_scan_mb2(CHARSET_INFO * cs,const char * str,const char * end,int sequence_type)1024 my_scan_mb2(CHARSET_INFO *cs __attribute__((unused)),
1025             const char *str, const char *end, int sequence_type)
1026 {
1027   const char *str0= str;
1028   my_wc_t wc;
1029   my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
1030   int res;
1031 
1032   switch (sequence_type)
1033   {
1034   case MY_SEQ_SPACES:
1035     for (res= mb_wc(cs, &wc, (const uchar *) str, (const uchar *) end);
1036          res > 0 && wc == ' ';
1037          str+= res,
1038          res= mb_wc(cs, &wc, (const uchar *) str, (const uchar *) end))
1039     {
1040     }
1041     return (size_t) (str - str0);
1042   case MY_SEQ_NONSPACES:
1043     DBUG_ASSERT(0); /* Not implemented */
1044     /* pass through */
1045   default:
1046     return 0;
1047   }
1048 }
1049 
1050 
1051 static void
my_fill_mb2(CHARSET_INFO * cs,char * s,size_t slen,int fill)1052 my_fill_mb2(CHARSET_INFO *cs, char *s, size_t slen, int fill)
1053 {
1054   char buf[10], *last;
1055   size_t buflen, remainder;
1056 
1057   DBUG_ASSERT((slen % 2) == 0);
1058 
1059   buflen= my_ci_wc_mb(cs, (my_wc_t) fill, (uchar*) buf,
1060                           (uchar*) buf + sizeof(buf));
1061 
1062   DBUG_ASSERT(buflen > 0);
1063 
1064   /*
1065     "last" in the last position where a sequence of "buflen" bytes can start.
1066   */
1067   for (last= s + slen - buflen; s <= last; s+= buflen)
1068   {
1069     /* Enough space for the character */
1070     memcpy(s, buf, buflen);
1071   }
1072 
1073   /*
1074     If there are some more space which is not enough
1075     for the whole multibyte character, then add trailing zeros.
1076   */
1077   if ((remainder= last + buflen - s) > 0)
1078     bzero(s, (size_t) remainder);
1079 }
1080 
1081 
1082 static size_t
my_vsnprintf_mb2(char * dst,size_t n,const char * fmt,va_list ap)1083 my_vsnprintf_mb2(char *dst, size_t n, const char* fmt, va_list ap)
1084 {
1085   char *start=dst, *end= dst + n - 1;
1086   for (; *fmt ; fmt++)
1087   {
1088     if (fmt[0] != '%')
1089     {
1090       if (dst == end)                     /* End of buffer */
1091         break;
1092 
1093       *dst++='\0';
1094       *dst++= *fmt;          /* Copy ordinary char */
1095       continue;
1096     }
1097 
1098     fmt++;
1099 
1100     /* Skip if max size is used (to be compatible with printf) */
1101     while ( (*fmt >= '0' && *fmt <= '9') || *fmt == '.' || *fmt == '-')
1102       fmt++;
1103 
1104     if (*fmt == 'l')
1105       fmt++;
1106 
1107     if (*fmt == 's')                      /* String parameter */
1108     {
1109       char *par= va_arg(ap, char *);
1110       size_t plen;
1111       size_t left_len= (size_t)(end-dst);
1112       if (!par)
1113         par= (char*) "(null)";
1114       plen= strlen(par);
1115       if (left_len <= plen * 2)
1116         plen = left_len / 2 - 1;
1117 
1118       for ( ; plen ; plen--, dst+=2, par++)
1119       {
1120         dst[0]= '\0';
1121         dst[1]= par[0];
1122       }
1123       continue;
1124     }
1125     else if (*fmt == 'd' || *fmt == 'u')  /* Integer parameter */
1126     {
1127       int iarg;
1128       char nbuf[16];
1129       char *pbuf= nbuf;
1130 
1131       if ((size_t) (end - dst) < 32)
1132         break;
1133       iarg= va_arg(ap, int);
1134       if (*fmt == 'd')
1135         int10_to_str((long) iarg, nbuf, -10);
1136       else
1137         int10_to_str((long) (uint) iarg, nbuf,10);
1138 
1139       for (; pbuf[0]; pbuf++)
1140       {
1141         *dst++= '\0';
1142         *dst++= *pbuf;
1143       }
1144       continue;
1145     }
1146 
1147     /* We come here on '%%', unknown code or too long parameter */
1148     if (dst == end)
1149       break;
1150     *dst++= '\0';
1151     *dst++= '%';                            /* % used as % or unknown code */
1152   }
1153 
1154   DBUG_ASSERT(dst <= end);
1155   *dst='\0';                                /* End of errmessage */
1156   return (size_t) (dst - start);
1157 }
1158 
1159 
1160 static size_t
my_snprintf_mb2(CHARSET_INFO * cs,char * to,size_t n,const char * fmt,...)1161 my_snprintf_mb2(CHARSET_INFO *cs __attribute__((unused)),
1162                 char* to, size_t n, const char* fmt, ...)
1163 {
1164   size_t ret;
1165   va_list args;
1166   va_start(args,fmt);
1167   ret= my_vsnprintf_mb2(to, n, fmt, args);
1168   va_end(args);
1169   return ret;
1170 }
1171 
1172 
1173 static size_t
my_lengthsp_mb2(CHARSET_INFO * cs,const char * ptr,size_t length)1174 my_lengthsp_mb2(CHARSET_INFO *cs __attribute__((unused)),
1175                 const char *ptr, size_t length)
1176 {
1177   const char *end= ptr + length;
1178   while (end > ptr + 1 && end[-1] == ' ' && end[-2] == '\0')
1179     end-= 2;
1180   return (size_t) (end - ptr);
1181 }
1182 
1183 #endif /* HAVE_CHARSET_mb2*/
1184 
1185 
1186 /*
1187   Next part is actually HAVE_CHARSET_utf16-specific,
1188   but the JSON functions needed my_utf16_uni()
1189   so the #ifdef was moved lower.
1190 */
1191 #include "ctype-utf16.h"
1192 
1193 #define IS_MB2_CHAR(b0,b1)       (!MY_UTF16_SURROGATE_HEAD(b0))
1194 #define IS_MB4_CHAR(b0,b1,b2,b3) (MY_UTF16_HIGH_HEAD(b0) && MY_UTF16_LOW_HEAD(b2))
1195 
my_weight_mb2_utf16mb2_general_ci(uchar b0,uchar b1)1196 static inline int my_weight_mb2_utf16mb2_general_ci(uchar b0, uchar b1)
1197 {
1198   my_wc_t wc= MY_UTF16_WC2(b0, b1);
1199   MY_UNICASE_CHARACTER *page= my_unicase_default_pages[wc >> 8];
1200   return (int) (page ? page[wc & 0xFF].sort : wc);
1201 }
1202 #define MY_FUNCTION_NAME(x)      my_ ## x ## _utf16_general_ci
1203 #define DEFINE_STRNXFRM_UNICODE
1204 #define DEFINE_STRNXFRM_UNICODE_NOPAD
1205 #define MY_MB_WC(cs, pwc, s, e)  my_mb_wc_utf16_quick(pwc, s, e)
1206 #define OPTIMIZE_ASCII           0
1207 #define UNICASE_MAXCHAR          MY_UNICASE_INFO_DEFAULT_MAXCHAR
1208 #define UNICASE_PAGE0            my_unicase_default_page00
1209 #define UNICASE_PAGES            my_unicase_default_pages
1210 #define WEIGHT_ILSEQ(x)          (0xFF0000 + (uchar) (x))
1211 #define WEIGHT_MB2(b0,b1)        my_weight_mb2_utf16mb2_general_ci(b0,b1)
1212 #define WEIGHT_MB4(b0,b1,b2,b3)  MY_CS_REPLACEMENT_CHARACTER
1213 #include "strcoll.inl"
1214 
1215 #define MY_FUNCTION_NAME(x)      my_ ## x ## _utf16_bin
1216 #define WEIGHT_ILSEQ(x)          (0xFF0000 + (uchar) (x))
1217 #define WEIGHT_MB2(b0,b1)        ((int) MY_UTF16_WC2(b0, b1))
1218 #define WEIGHT_MB4(b0,b1,b2,b3)  ((int) MY_UTF16_WC4(b0, b1, b2, b3))
1219 #include "strcoll.inl"
1220 
1221 #define DEFINE_STRNNCOLLSP_NOPAD
1222 #define MY_FUNCTION_NAME(x)      my_ ## x ## _utf16_general_nopad_ci
1223 #define WEIGHT_ILSEQ(x)          (0xFF0000 + (uchar) (x))
1224 #define WEIGHT_MB2(b0,b1)        my_weight_mb2_utf16mb2_general_ci(b0,b1)
1225 #define WEIGHT_MB4(b0,b1,b2,b3)  MY_CS_REPLACEMENT_CHARACTER
1226 #include "strcoll.inl"
1227 
1228 #define DEFINE_STRNNCOLLSP_NOPAD
1229 #define MY_FUNCTION_NAME(x)      my_ ## x ## _utf16_nopad_bin
1230 #define WEIGHT_ILSEQ(x)          (0xFF0000 + (uchar) (x))
1231 #define WEIGHT_MB2(b0,b1)        ((int) MY_UTF16_WC2(b0, b1))
1232 #define WEIGHT_MB4(b0,b1,b2,b3)  ((int) MY_UTF16_WC4(b0, b1, b2, b3))
1233 #include "strcoll.inl"
1234 
1235 #undef IS_MB2_CHAR
1236 #undef IS_MB4_CHAR
1237 
1238 /*
1239   These two functions are used in JSON library, so made exportable
1240   and unconditionally compiled into the library.
1241 */
1242 
1243 /*static*/ int
my_utf16_uni(CHARSET_INFO * cs,my_wc_t * pwc,const uchar * s,const uchar * e)1244 my_utf16_uni(CHARSET_INFO *cs __attribute__((unused)),
1245              my_wc_t *pwc, const uchar *s, const uchar *e)
1246 {
1247   return my_mb_wc_utf16_quick(pwc, s, e);
1248 }
1249 
1250 
1251 /*static*/ int
my_uni_utf16(CHARSET_INFO * cs,my_wc_t wc,uchar * s,uchar * e)1252 my_uni_utf16(CHARSET_INFO *cs __attribute__((unused)),
1253              my_wc_t wc, uchar *s, uchar *e)
1254 {
1255   if (wc <= 0xFFFF)
1256   {
1257     if (s + 2 > e)
1258       return MY_CS_TOOSMALL2;
1259     if (MY_UTF16_SURROGATE(wc))
1260       return MY_CS_ILUNI;
1261     *s++= (uchar) (wc >> 8);
1262     *s= (uchar) (wc & 0xFF);
1263     return 2;
1264   }
1265 
1266   if (wc <= 0x10FFFF)
1267   {
1268     if (s + 4 > e)
1269       return MY_CS_TOOSMALL4;
1270     *s++= (uchar) ((wc-= 0x10000) >> 18) | 0xD8;
1271     *s++= (uchar) (wc >> 10) & 0xFF;
1272     *s++= (uchar) ((wc >> 8) & 3) | 0xDC;
1273     *s= (uchar) wc & 0xFF;
1274     return 4;
1275   }
1276 
1277   return MY_CS_ILUNI;
1278 }
1279 
1280 
1281 #ifdef HAVE_CHARSET_utf16
1282 
1283 const char charset_name_utf16le[]= "utf16le";
1284 
1285 static inline void
my_tolower_utf16(MY_UNICASE_INFO * uni_plane,my_wc_t * wc)1286 my_tolower_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
1287 {
1288   MY_UNICASE_CHARACTER *page;
1289   if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
1290     *wc= page[*wc & 0xFF].tolower;
1291 }
1292 
1293 
1294 static inline void
my_toupper_utf16(MY_UNICASE_INFO * uni_plane,my_wc_t * wc)1295 my_toupper_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
1296 {
1297   MY_UNICASE_CHARACTER *page;
1298   if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
1299     *wc= page[*wc & 0xFF].toupper;
1300 }
1301 
1302 
1303 static inline void
my_tosort_utf16(MY_UNICASE_INFO * uni_plane,my_wc_t * wc)1304 my_tosort_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
1305 {
1306   if (*wc <= uni_plane->maxchar)
1307   {
1308     MY_UNICASE_CHARACTER *page;
1309     if ((page= uni_plane->page[*wc >> 8]))
1310       *wc= page[*wc & 0xFF].sort;
1311   }
1312   else
1313   {
1314     *wc= MY_CS_REPLACEMENT_CHARACTER;
1315   }
1316 }
1317 
1318 
1319 
1320 static size_t
my_caseup_utf16(CHARSET_INFO * cs,const char * src,size_t srclen,char * dst,size_t dstlen)1321 my_caseup_utf16(CHARSET_INFO *cs, const char *src, size_t srclen,
1322                 char *dst, size_t dstlen)
1323 {
1324   my_wc_t wc;
1325   my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
1326   my_charset_conv_wc_mb wc_mb= cs->cset->wc_mb;
1327   int res;
1328   const char *srcend= src + srclen;
1329   char *dstend= dst + dstlen;
1330   MY_UNICASE_INFO *uni_plane= cs->caseinfo;
1331   DBUG_ASSERT(srclen <= dstlen);
1332 
1333   while ((src < srcend) &&
1334          (res= mb_wc(cs, &wc, (uchar *) src, (uchar *) srcend)) > 0)
1335   {
1336     my_toupper_utf16(uni_plane, &wc);
1337     if (res != wc_mb(cs, wc, (uchar *) dst, (uchar *) dstend))
1338       break;
1339     src+= res;
1340     dst+= res;
1341   }
1342   return srclen;
1343 }
1344 
1345 
1346 static void
my_hash_sort_utf16_nopad(CHARSET_INFO * cs,const uchar * s,size_t slen,ulong * nr1,ulong * nr2)1347 my_hash_sort_utf16_nopad(CHARSET_INFO *cs,
1348                          const uchar *s, size_t slen,
1349                          ulong *nr1, ulong *nr2)
1350 {
1351   my_wc_t wc;
1352   my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
1353   int res;
1354   const uchar *e= s + slen;
1355   MY_UNICASE_INFO *uni_plane= cs->caseinfo;
1356   register ulong m1= *nr1, m2= *nr2;
1357 
1358   while ((s < e) && (res= mb_wc(cs, &wc, (uchar *) s, (uchar *) e)) > 0)
1359   {
1360     my_tosort_utf16(uni_plane, &wc);
1361     MY_HASH_ADD_16(m1, m2, wc);
1362     s+= res;
1363   }
1364   *nr1= m1;
1365   *nr2= m2;
1366 }
1367 
1368 
1369 static void
my_hash_sort_utf16(CHARSET_INFO * cs,const uchar * s,size_t slen,ulong * nr1,ulong * nr2)1370 my_hash_sort_utf16(CHARSET_INFO *cs, const uchar *s, size_t slen,
1371                    ulong *nr1, ulong *nr2)
1372 {
1373   size_t lengthsp= my_ci_lengthsp(cs, (const char *) s, slen);
1374   my_hash_sort_utf16_nopad(cs, s, lengthsp, nr1, nr2);
1375 }
1376 
1377 
1378 static size_t
my_casedn_utf16(CHARSET_INFO * cs,const char * src,size_t srclen,char * dst,size_t dstlen)1379 my_casedn_utf16(CHARSET_INFO *cs, const char *src, size_t srclen,
1380                 char *dst, size_t dstlen)
1381 {
1382   my_wc_t wc;
1383   my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
1384   my_charset_conv_wc_mb wc_mb= cs->cset->wc_mb;
1385   int res;
1386   const char *srcend= src + srclen;
1387   char *dstend= dst + dstlen;
1388   MY_UNICASE_INFO *uni_plane= cs->caseinfo;
1389   DBUG_ASSERT(srclen <= dstlen);
1390 
1391   while ((src < srcend) &&
1392          (res= mb_wc(cs, &wc, (uchar *) src, (uchar *) srcend)) > 0)
1393   {
1394     my_tolower_utf16(uni_plane, &wc);
1395     if (res != wc_mb(cs, wc, (uchar *) dst, (uchar *) dstend))
1396       break;
1397     src+= res;
1398     dst+= res;
1399   }
1400   return srclen;
1401 }
1402 
1403 
1404 static int
my_charlen_utf16(CHARSET_INFO * cs,const uchar * str,const uchar * end)1405 my_charlen_utf16(CHARSET_INFO *cs, const uchar *str, const uchar *end)
1406 {
1407   my_wc_t wc;
1408   return my_ci_mb_wc(cs, &wc, str, end);
1409 }
1410 
1411 
1412 #define MY_FUNCTION_NAME(x)       my_ ## x ## _utf16
1413 #define CHARLEN(cs,str,end)       my_charlen_utf16(cs,str,end)
1414 #define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
1415 #include "ctype-mb.inl"
1416 #undef MY_FUNCTION_NAME
1417 #undef CHARLEN
1418 #undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
1419 /* Defines my_well_formed_char_length_utf16 */
1420 
1421 
1422 static size_t
my_numchars_utf16(CHARSET_INFO * cs,const char * b,const char * e)1423 my_numchars_utf16(CHARSET_INFO *cs,
1424                   const char *b, const char *e)
1425 {
1426   size_t nchars= 0;
1427   for ( ; ; nchars++)
1428   {
1429     size_t charlen= my_ismbchar(cs, b, e);
1430     if (!charlen)
1431       break;
1432     b+= charlen;
1433   }
1434   return nchars;
1435 }
1436 
1437 
1438 static size_t
my_charpos_utf16(CHARSET_INFO * cs,const char * b,const char * e,size_t pos)1439 my_charpos_utf16(CHARSET_INFO *cs,
1440                  const char *b, const char *e, size_t pos)
1441 {
1442   const char *b0= b;
1443   uint charlen;
1444 
1445   for ( ; pos; b+= charlen, pos--)
1446   {
1447     if (!(charlen= my_ismbchar(cs, b, e)))
1448       return (e + 2 - b0); /* Error, return pos outside the string */
1449   }
1450   return (size_t) (pos ? (e + 2 - b0) : (b - b0));
1451 }
1452 
1453 
1454 static int
my_wildcmp_utf16_ci(CHARSET_INFO * cs,const char * str,const char * str_end,const char * wildstr,const char * wildend,int escape,int w_one,int w_many)1455 my_wildcmp_utf16_ci(CHARSET_INFO *cs,
1456                     const char *str,const char *str_end,
1457                     const char *wildstr,const char *wildend,
1458                     int escape, int w_one, int w_many)
1459 {
1460   MY_UNICASE_INFO *uni_plane= cs->caseinfo;
1461   return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
1462                             escape, w_one, w_many, uni_plane);
1463 }
1464 
1465 
1466 static int
my_wildcmp_utf16_bin(CHARSET_INFO * cs,const char * str,const char * str_end,const char * wildstr,const char * wildend,int escape,int w_one,int w_many)1467 my_wildcmp_utf16_bin(CHARSET_INFO *cs,
1468                      const char *str,const char *str_end,
1469                      const char *wildstr,const char *wildend,
1470                      int escape, int w_one, int w_many)
1471 {
1472   return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
1473                             escape, w_one, w_many, NULL);
1474 }
1475 
1476 
1477 static void
my_hash_sort_utf16_nopad_bin(CHARSET_INFO * cs,const uchar * pos,size_t len,ulong * nr1,ulong * nr2)1478 my_hash_sort_utf16_nopad_bin(CHARSET_INFO *cs  __attribute__((unused)),
1479                              const uchar *pos, size_t len,
1480                              ulong *nr1, ulong *nr2)
1481 {
1482   const uchar *end= pos + len;
1483   register ulong m1= *nr1, m2= *nr2;
1484 
1485   for ( ; pos < end ; pos++)
1486   {
1487     MY_HASH_ADD(m1, m2, (uint)*pos);
1488   }
1489   *nr1= m1;
1490   *nr2= m2;
1491 }
1492 
1493 
1494 static void
my_hash_sort_utf16_bin(CHARSET_INFO * cs,const uchar * pos,size_t len,ulong * nr1,ulong * nr2)1495 my_hash_sort_utf16_bin(CHARSET_INFO *cs,
1496                        const uchar *pos, size_t len, ulong *nr1, ulong *nr2)
1497 {
1498   size_t lengthsp= my_ci_lengthsp(cs, (const char *) pos, len);
1499   my_hash_sort_utf16_nopad_bin(cs, pos, lengthsp, nr1, nr2);
1500 }
1501 
1502 
1503 static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler =
1504 {
1505   NULL,                /* init */
1506   my_strnncoll_utf16_general_ci,
1507   my_strnncollsp_utf16_general_ci,
1508   my_strnncollsp_nchars_utf16_general_ci,
1509   my_strnxfrm_utf16_general_ci,
1510   my_strnxfrmlen_unicode,
1511   my_like_range_generic,
1512   my_wildcmp_utf16_ci,
1513   my_strcasecmp_mb2_or_mb4,
1514   my_instr_mb,
1515   my_hash_sort_utf16,
1516   my_propagate_simple
1517 };
1518 
1519 
1520 static MY_COLLATION_HANDLER my_collation_utf16_bin_handler =
1521 {
1522   NULL,                /* init */
1523   my_strnncoll_utf16_bin,
1524   my_strnncollsp_utf16_bin,
1525   my_strnncollsp_nchars_utf16_bin,
1526   my_strnxfrm_unicode_full_bin,
1527   my_strnxfrmlen_unicode_full_bin,
1528   my_like_range_generic,
1529   my_wildcmp_utf16_bin,
1530   my_strcasecmp_mb2_or_mb4,
1531   my_instr_mb,
1532   my_hash_sort_utf16_bin,
1533   my_propagate_simple
1534 };
1535 
1536 
1537 static MY_COLLATION_HANDLER my_collation_utf16_general_nopad_ci_handler =
1538 {
1539   NULL,                /* init */
1540   my_strnncoll_utf16_general_ci,
1541   my_strnncollsp_utf16_general_nopad_ci,
1542   my_strnncollsp_nchars_utf16_general_nopad_ci,
1543   my_strnxfrm_nopad_utf16_general_ci,
1544   my_strnxfrmlen_unicode,
1545   my_like_range_generic,
1546   my_wildcmp_utf16_ci,
1547   my_strcasecmp_mb2_or_mb4,
1548   my_instr_mb,
1549   my_hash_sort_utf16_nopad,
1550   my_propagate_simple
1551 };
1552 
1553 
1554 static MY_COLLATION_HANDLER my_collation_utf16_nopad_bin_handler =
1555 {
1556   NULL,                /* init */
1557   my_strnncoll_utf16_bin,
1558   my_strnncollsp_utf16_nopad_bin,
1559   my_strnncollsp_nchars_utf16_nopad_bin,
1560   my_strnxfrm_unicode_full_nopad_bin,
1561   my_strnxfrmlen_unicode_full_bin,
1562   my_like_range_generic,
1563   my_wildcmp_utf16_bin,
1564   my_strcasecmp_mb2_or_mb4,
1565   my_instr_mb,
1566   my_hash_sort_utf16_nopad_bin,
1567   my_propagate_simple
1568 };
1569 
1570 
1571 MY_CHARSET_HANDLER my_charset_utf16_handler=
1572 {
1573   NULL,                /* init         */
1574   my_numchars_utf16,
1575   my_charpos_utf16,
1576   my_lengthsp_mb2,
1577   my_numcells_mb,
1578   my_utf16_uni,        /* mb_wc        */
1579   my_uni_utf16,        /* wc_mb        */
1580   my_mb_ctype_mb,
1581   my_caseup_str_mb2_or_mb4,
1582   my_casedn_str_mb2_or_mb4,
1583   my_caseup_utf16,
1584   my_casedn_utf16,
1585   my_snprintf_mb2,
1586   my_l10tostr_mb2_or_mb4,
1587   my_ll10tostr_mb2_or_mb4,
1588   my_fill_mb2,
1589   my_strntol_mb2_or_mb4,
1590   my_strntoul_mb2_or_mb4,
1591   my_strntoll_mb2_or_mb4,
1592   my_strntoull_mb2_or_mb4,
1593   my_strntod_mb2_or_mb4,
1594   my_strtoll10_mb2,
1595   my_strntoull10rnd_mb2_or_mb4,
1596   my_scan_mb2,
1597   my_charlen_utf16,
1598   my_well_formed_char_length_utf16,
1599   my_copy_fix_mb2_or_mb4,
1600   my_uni_utf16,
1601   my_wc_to_printable_generic
1602 };
1603 
1604 
1605 struct charset_info_st my_charset_utf16_general_ci=
1606 {
1607   54,0,0,              /* number       */
1608   MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
1609   charset_name_utf16,             /* cs name    */
1610   "utf16_general_ci",  /* name         */
1611   "UTF-16 Unicode",    /* comment      */
1612   NULL,                /* tailoring    */
1613   NULL,                /* ctype        */
1614   NULL,                /* to_lower     */
1615   NULL,                /* to_upper     */
1616   NULL,                /* sort_order   */
1617   NULL,                /* uca          */
1618   NULL,                /* tab_to_uni   */
1619   NULL,                /* tab_from_uni */
1620   &my_unicase_default, /* caseinfo     */
1621   NULL,                /* state_map    */
1622   NULL,                /* ident_map    */
1623   1,                   /* strxfrm_multiply */
1624   1,                   /* caseup_multiply  */
1625   1,                   /* casedn_multiply  */
1626   2,                   /* mbminlen     */
1627   4,                   /* mbmaxlen     */
1628   0,                   /* min_sort_char */
1629   0xFFFF,              /* max_sort_char */
1630   ' ',                 /* pad char      */
1631   0,                   /* escape_with_backslash_is_dangerous */
1632   1,                   /* levels_for_order   */
1633   &my_charset_utf16_handler,
1634   &my_collation_utf16_general_ci_handler
1635 };
1636 
1637 
1638 struct charset_info_st my_charset_utf16_bin=
1639 {
1640   55,0,0,              /* number       */
1641   MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
1642   charset_name_utf16,             /* cs name      */
1643   "utf16_bin",         /* name         */
1644   "UTF-16 Unicode",    /* comment      */
1645   NULL,                /* tailoring    */
1646   NULL,                /* ctype        */
1647   NULL,                /* to_lower     */
1648   NULL,                /* to_upper     */
1649   NULL,                /* sort_order   */
1650   NULL,                /* uca          */
1651   NULL,                /* tab_to_uni   */
1652   NULL,                /* tab_from_uni */
1653   &my_unicase_default, /* caseinfo     */
1654   NULL,                /* state_map    */
1655   NULL,                /* ident_map    */
1656   1,                   /* strxfrm_multiply */
1657   1,                   /* caseup_multiply  */
1658   1,                   /* casedn_multiply  */
1659   2,                   /* mbminlen     */
1660   4,                   /* mbmaxlen     */
1661   0,                   /* min_sort_char */
1662   0xFFFF,              /* max_sort_char */
1663   ' ',                 /* pad char      */
1664   0,                   /* escape_with_backslash_is_dangerous */
1665   1,                   /* levels_for_order   */
1666   &my_charset_utf16_handler,
1667   &my_collation_utf16_bin_handler
1668 };
1669 
1670 
1671 struct charset_info_st my_charset_utf16_general_nopad_ci=
1672 {
1673   MY_NOPAD_ID(54),0,0, /* number           */
1674   MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD,
1675   charset_name_utf16,             /* cs name          */
1676   "utf16_general_nopad_ci", /* name        */
1677   "UTF-16 Unicode",    /* comment          */
1678   NULL,                /* tailoring        */
1679   NULL,                /* ctype            */
1680   NULL,                /* to_lower         */
1681   NULL,                /* to_upper         */
1682   NULL,                /* sort_order       */
1683   NULL,                /* uca              */
1684   NULL,                /* tab_to_uni       */
1685   NULL,                /* tab_from_uni     */
1686   &my_unicase_default, /* caseinfo         */
1687   NULL,                /* state_map        */
1688   NULL,                /* ident_map        */
1689   1,                   /* strxfrm_multiply */
1690   1,                   /* caseup_multiply  */
1691   1,                   /* casedn_multiply  */
1692   2,                   /* mbminlen         */
1693   4,                   /* mbmaxlen         */
1694   0,                   /* min_sort_char    */
1695   0xFFFF,              /* max_sort_char    */
1696   ' ',                 /* pad char         */
1697   0,                   /* escape_with_backslash_is_dangerous */
1698   1,                   /* levels_for_order */
1699   &my_charset_utf16_handler,
1700   &my_collation_utf16_general_nopad_ci_handler
1701 };
1702 
1703 
1704 struct charset_info_st my_charset_utf16_nopad_bin=
1705 {
1706   MY_NOPAD_ID(55),0,0, /* number           */
1707   MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|
1708   MY_CS_NOPAD,
1709   charset_name_utf16,             /* cs name          */
1710   "utf16_nopad_bin",   /* name             */
1711   "UTF-16 Unicode",    /* comment          */
1712   NULL,                /* tailoring        */
1713   NULL,                /* ctype            */
1714   NULL,                /* to_lower         */
1715   NULL,                /* to_upper         */
1716   NULL,                /* sort_order       */
1717   NULL,                /* uca              */
1718   NULL,                /* tab_to_uni       */
1719   NULL,                /* tab_from_uni     */
1720   &my_unicase_default, /* caseinfo         */
1721   NULL,                /* state_map        */
1722   NULL,                /* ident_map        */
1723   1,                   /* strxfrm_multiply */
1724   1,                   /* caseup_multiply  */
1725   1,                   /* casedn_multiply  */
1726   2,                   /* mbminlen         */
1727   4,                   /* mbmaxlen         */
1728   0,                   /* min_sort_char    */
1729   0xFFFF,              /* max_sort_char    */
1730   ' ',                 /* pad char         */
1731   0,                   /* escape_with_backslash_is_dangerous */
1732   1,                   /* levels_for_order */
1733   &my_charset_utf16_handler,
1734   &my_collation_utf16_nopad_bin_handler
1735 };
1736 
1737 
1738 #define IS_MB2_CHAR(b0,b1)       (!MY_UTF16_SURROGATE_HEAD(b1))
1739 #define IS_MB4_CHAR(b0,b1,b2,b3) (MY_UTF16_HIGH_HEAD(b1) && MY_UTF16_LOW_HEAD(b3))
1740 
1741 #define MY_FUNCTION_NAME(x)      my_ ## x ## _utf16le_general_ci
1742 #define DEFINE_STRNXFRM_UNICODE
1743 #define DEFINE_STRNXFRM_UNICODE_NOPAD
1744 #define MY_MB_WC(cs, pwc, s, e)  (my_ci_mb_wc(cs, pwc, s, e))
1745 #define OPTIMIZE_ASCII           0
1746 #define UNICASE_MAXCHAR          MY_UNICASE_INFO_DEFAULT_MAXCHAR
1747 #define UNICASE_PAGE0            my_unicase_default_page00
1748 #define UNICASE_PAGES            my_unicase_default_pages
1749 #define WEIGHT_ILSEQ(x)          (0xFF0000 + (uchar) (x))
1750 #define WEIGHT_MB2(b0,b1)        my_weight_mb2_utf16mb2_general_ci(b1,b0)
1751 #define WEIGHT_MB4(b0,b1,b2,b3)  MY_CS_REPLACEMENT_CHARACTER
1752 #include "strcoll.inl"
1753 
1754 #define MY_FUNCTION_NAME(x)      my_ ## x ## _utf16le_bin
1755 #define WEIGHT_ILSEQ(x)          (0xFF0000 + (uchar) (x))
1756 #define WEIGHT_MB2(b0,b1)        ((int) MY_UTF16_WC2(b1, b0))
1757 #define WEIGHT_MB4(b0,b1,b2,b3)  ((int) MY_UTF16_WC4(b1, b0, b3, b2))
1758 #include "strcoll.inl"
1759 
1760 #define DEFINE_STRNNCOLLSP_NOPAD
1761 #define MY_FUNCTION_NAME(x)      my_ ## x ## _utf16le_general_nopad_ci
1762 #define WEIGHT_ILSEQ(x)          (0xFF0000 + (uchar) (x))
1763 #define WEIGHT_MB2(b0,b1)        my_weight_mb2_utf16mb2_general_ci(b1,b0)
1764 #define WEIGHT_MB4(b0,b1,b2,b3)  MY_CS_REPLACEMENT_CHARACTER
1765 #include "strcoll.inl"
1766 
1767 #define DEFINE_STRNNCOLLSP_NOPAD
1768 #define MY_FUNCTION_NAME(x)      my_ ## x ## _utf16le_nopad_bin
1769 #define WEIGHT_ILSEQ(x)          (0xFF0000 + (uchar) (x))
1770 #define WEIGHT_MB2(b0,b1)        ((int) MY_UTF16_WC2(b1, b0))
1771 #define WEIGHT_MB4(b0,b1,b2,b3)  ((int) MY_UTF16_WC4(b1, b0, b3, b2))
1772 #include "strcoll.inl"
1773 
1774 #undef IS_MB2_CHAR
1775 #undef IS_MB4_CHAR
1776 
1777 static int
my_utf16le_uni(CHARSET_INFO * cs,my_wc_t * pwc,const uchar * s,const uchar * e)1778 my_utf16le_uni(CHARSET_INFO *cs __attribute__((unused)),
1779                my_wc_t *pwc, const uchar *s, const uchar *e)
1780 {
1781   my_wc_t lo;
1782 
1783   if (s + 2 > e)
1784     return MY_CS_TOOSMALL2;
1785 
1786   if ((*pwc= uint2korr(s)) < MY_UTF16_SURROGATE_HIGH_FIRST ||
1787       (*pwc > MY_UTF16_SURROGATE_LOW_LAST))
1788     return 2; /* [0000-D7FF,E000-FFFF] */
1789 
1790   if (*pwc >= MY_UTF16_SURROGATE_LOW_FIRST)
1791     return MY_CS_ILSEQ; /* [DC00-DFFF] Low surrogate part without high part */
1792 
1793   if (s + 4  > e)
1794     return MY_CS_TOOSMALL4;
1795 
1796   s+= 2;
1797 
1798   if ((lo= uint2korr(s)) < MY_UTF16_SURROGATE_LOW_FIRST ||
1799       lo > MY_UTF16_SURROGATE_LOW_LAST)
1800     return MY_CS_ILSEQ; /* Expected low surrogate part, got something else */
1801 
1802   *pwc= 0x10000 + (((*pwc & 0x3FF) << 10) | (lo & 0x3FF));
1803   return 4;
1804 }
1805 
1806 
1807 static int
my_uni_utf16le(CHARSET_INFO * cs,my_wc_t wc,uchar * s,uchar * e)1808 my_uni_utf16le(CHARSET_INFO *cs __attribute__((unused)),
1809                my_wc_t wc, uchar *s, uchar *e)
1810 {
1811   uint32 first, second, total;
1812   if (wc < MY_UTF16_SURROGATE_HIGH_FIRST ||
1813       (wc > MY_UTF16_SURROGATE_LOW_LAST &&
1814        wc <= 0xFFFF))
1815   {
1816     if (s + 2 > e)
1817       return MY_CS_TOOSMALL2;
1818     int2store(s, wc);
1819     return 2; /* [0000-D7FF,E000-FFFF] */
1820   }
1821 
1822   if (wc < 0xFFFF || wc > 0x10FFFF)
1823     return MY_CS_ILUNI; /* [D800-DFFF,10FFFF+] */
1824 
1825   if (s + 4 > e)
1826     return MY_CS_TOOSMALL4;
1827 
1828   wc-= 0x10000;
1829   first=  (0xD800 | ((wc >> 10) & 0x3FF));
1830   second= (0xDC00 | (wc & 0x3FF));
1831   total=  first | (second << 16);
1832   int4store(s, total);
1833   return 4; /* [010000-10FFFF] */
1834 }
1835 
1836 
1837 static size_t
my_lengthsp_utf16le(CHARSET_INFO * cs,const char * ptr,size_t length)1838 my_lengthsp_utf16le(CHARSET_INFO *cs __attribute__((unused)),
1839                     const char *ptr, size_t length)
1840 {
1841   const char *end= ptr + length;
1842   while (end > ptr + 1 && uint2korr(end - 2) == ' ')
1843     end-= 2;
1844   return (size_t) (end - ptr);
1845 }
1846 
1847 
1848 static MY_COLLATION_HANDLER my_collation_utf16le_general_ci_handler =
1849 {
1850   NULL,                /* init */
1851   my_strnncoll_utf16le_general_ci,
1852   my_strnncollsp_utf16le_general_ci,
1853   my_strnncollsp_nchars_utf16le_general_ci,
1854   my_strnxfrm_utf16le_general_ci,
1855   my_strnxfrmlen_unicode,
1856   my_like_range_generic,
1857   my_wildcmp_utf16_ci,
1858   my_strcasecmp_mb2_or_mb4,
1859   my_instr_mb,
1860   my_hash_sort_utf16,
1861   my_propagate_simple
1862 };
1863 
1864 
1865 static MY_COLLATION_HANDLER my_collation_utf16le_bin_handler =
1866 {
1867   NULL,                /* init */
1868   my_strnncoll_utf16le_bin,
1869   my_strnncollsp_utf16le_bin,
1870   my_strnncollsp_nchars_utf16le_bin,
1871   my_strnxfrm_unicode_full_bin,
1872   my_strnxfrmlen_unicode_full_bin,
1873   my_like_range_generic,
1874   my_wildcmp_utf16_bin,
1875   my_strcasecmp_mb2_or_mb4,
1876   my_instr_mb,
1877   my_hash_sort_utf16_bin,
1878   my_propagate_simple
1879 };
1880 
1881 
1882 static MY_COLLATION_HANDLER my_collation_utf16le_general_nopad_ci_handler =
1883 {
1884   NULL,                /* init */
1885   my_strnncoll_utf16le_general_ci,
1886   my_strnncollsp_utf16le_general_nopad_ci,
1887   my_strnncollsp_nchars_utf16le_general_nopad_ci,
1888   my_strnxfrm_nopad_utf16le_general_ci,
1889   my_strnxfrmlen_unicode,
1890   my_like_range_generic,
1891   my_wildcmp_utf16_ci,
1892   my_strcasecmp_mb2_or_mb4,
1893   my_instr_mb,
1894   my_hash_sort_utf16_nopad,
1895   my_propagate_simple
1896 };
1897 
1898 
1899 static MY_COLLATION_HANDLER my_collation_utf16le_nopad_bin_handler =
1900 {
1901   NULL,                /* init */
1902   my_strnncoll_utf16le_bin,
1903   my_strnncollsp_utf16le_nopad_bin,
1904   my_strnncollsp_nchars_utf16le_nopad_bin,
1905   my_strnxfrm_unicode_full_nopad_bin,
1906   my_strnxfrmlen_unicode_full_bin,
1907   my_like_range_generic,
1908   my_wildcmp_utf16_bin,
1909   my_strcasecmp_mb2_or_mb4,
1910   my_instr_mb,
1911   my_hash_sort_utf16_nopad_bin,
1912   my_propagate_simple
1913 };
1914 
1915 
1916 static MY_CHARSET_HANDLER my_charset_utf16le_handler=
1917 {
1918   NULL,                /* init         */
1919   my_numchars_utf16,
1920   my_charpos_utf16,
1921   my_lengthsp_utf16le,
1922   my_numcells_mb,
1923   my_utf16le_uni,      /* mb_wc        */
1924   my_uni_utf16le,      /* wc_mb        */
1925   my_mb_ctype_mb,
1926   my_caseup_str_mb2_or_mb4,
1927   my_casedn_str_mb2_or_mb4,
1928   my_caseup_utf16,
1929   my_casedn_utf16,
1930   my_snprintf_mb2,
1931   my_l10tostr_mb2_or_mb4,
1932   my_ll10tostr_mb2_or_mb4,
1933   my_fill_mb2,
1934   my_strntol_mb2_or_mb4,
1935   my_strntoul_mb2_or_mb4,
1936   my_strntoll_mb2_or_mb4,
1937   my_strntoull_mb2_or_mb4,
1938   my_strntod_mb2_or_mb4,
1939   my_strtoll10_mb2,
1940   my_strntoull10rnd_mb2_or_mb4,
1941   my_scan_mb2,
1942   my_charlen_utf16,
1943   my_well_formed_char_length_utf16,
1944   my_copy_fix_mb2_or_mb4,
1945   my_uni_utf16le,
1946   my_wc_to_printable_generic
1947 };
1948 
1949 
1950 struct charset_info_st my_charset_utf16le_general_ci=
1951 {
1952   56,0,0,              /* number       */
1953   MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
1954   charset_name_utf16le,           /* cs name    */
1955   "utf16le_general_ci",/* name         */
1956   "UTF-16LE Unicode",  /* comment      */
1957   NULL,                /* tailoring    */
1958   NULL,                /* ctype        */
1959   NULL,                /* to_lower     */
1960   NULL,                /* to_upper     */
1961   NULL,                /* sort_order   */
1962   NULL,                /* uca          */
1963   NULL,                /* tab_to_uni   */
1964   NULL,                /* tab_from_uni */
1965   &my_unicase_default, /* caseinfo     */
1966   NULL,                /* state_map    */
1967   NULL,                /* ident_map    */
1968   1,                   /* strxfrm_multiply */
1969   1,                   /* caseup_multiply  */
1970   1,                   /* casedn_multiply  */
1971   2,                   /* mbminlen     */
1972   4,                   /* mbmaxlen     */
1973   0,                   /* min_sort_char */
1974   0xFFFF,              /* max_sort_char */
1975   ' ',                 /* pad char      */
1976   0,                   /* escape_with_backslash_is_dangerous */
1977   1,                   /* levels_for_order   */
1978   &my_charset_utf16le_handler,
1979   &my_collation_utf16le_general_ci_handler
1980 };
1981 
1982 
1983 struct charset_info_st my_charset_utf16le_bin=
1984 {
1985   62,0,0,              /* number       */
1986   MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
1987   charset_name_utf16le,           /* cs name      */
1988   "utf16le_bin",       /* name         */
1989   "UTF-16LE Unicode",  /* comment      */
1990   NULL,                /* tailoring    */
1991   NULL,                /* ctype        */
1992   NULL,                /* to_lower     */
1993   NULL,                /* to_upper     */
1994   NULL,                /* sort_order   */
1995   NULL,                /* uca          */
1996   NULL,                /* tab_to_uni   */
1997   NULL,                /* tab_from_uni */
1998   &my_unicase_default, /* caseinfo     */
1999   NULL,                /* state_map    */
2000   NULL,                /* ident_map    */
2001   1,                   /* strxfrm_multiply */
2002   1,                   /* caseup_multiply  */
2003   1,                   /* casedn_multiply  */
2004   2,                   /* mbminlen     */
2005   4,                   /* mbmaxlen     */
2006   0,                   /* min_sort_char */
2007   0xFFFF,              /* max_sort_char */
2008   ' ',                 /* pad char      */
2009   0,                   /* escape_with_backslash_is_dangerous */
2010   1,                   /* levels_for_order   */
2011   &my_charset_utf16le_handler,
2012   &my_collation_utf16le_bin_handler
2013 };
2014 
2015 
2016 struct charset_info_st my_charset_utf16le_general_nopad_ci=
2017 {
2018   MY_NOPAD_ID(56),0,0, /* number           */
2019   MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD,
2020   charset_name_utf16le,           /* cs name          */
2021   "utf16le_general_nopad_ci",/* name       */
2022   "UTF-16LE Unicode",  /* comment          */
2023   NULL,                /* tailoring        */
2024   NULL,                /* ctype            */
2025   NULL,                /* to_lower         */
2026   NULL,                /* to_upper         */
2027   NULL,                /* sort_order       */
2028   NULL,                /* uca              */
2029   NULL,                /* tab_to_uni       */
2030   NULL,                /* tab_from_uni     */
2031   &my_unicase_default, /* caseinfo         */
2032   NULL,                /* state_map        */
2033   NULL,                /* ident_map        */
2034   1,                   /* strxfrm_multiply */
2035   1,                   /* caseup_multiply  */
2036   1,                   /* casedn_multiply  */
2037   2,                   /* mbminlen         */
2038   4,                   /* mbmaxlen         */
2039   0,                   /* min_sort_char    */
2040   0xFFFF,              /* max_sort_char    */
2041   ' ',                 /* pad char         */
2042   0,                   /* escape_with_backslash_is_dangerous */
2043   1,                   /* levels_for_order */
2044   &my_charset_utf16le_handler,
2045   &my_collation_utf16le_general_nopad_ci_handler
2046 };
2047 
2048 
2049 struct charset_info_st my_charset_utf16le_nopad_bin=
2050 {
2051   MY_NOPAD_ID(62),0,0, /* number           */
2052   MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|
2053   MY_CS_NOPAD,
2054   charset_name_utf16le,           /* cs name          */
2055   "utf16le_nopad_bin", /* name             */
2056   "UTF-16LE Unicode",  /* comment          */
2057   NULL,                /* tailoring        */
2058   NULL,                /* ctype            */
2059   NULL,                /* to_lower         */
2060   NULL,                /* to_upper         */
2061   NULL,                /* sort_order       */
2062   NULL,                /* uca              */
2063   NULL,                /* tab_to_uni       */
2064   NULL,                /* tab_from_uni     */
2065   &my_unicase_default, /* caseinfo         */
2066   NULL,                /* state_map        */
2067   NULL,                /* ident_map        */
2068   1,                   /* strxfrm_multiply */
2069   1,                   /* caseup_multiply  */
2070   1,                   /* casedn_multiply  */
2071   2,                   /* mbminlen         */
2072   4,                   /* mbmaxlen         */
2073   0,                   /* min_sort_char    */
2074   0xFFFF,              /* max_sort_char    */
2075   ' ',                 /* pad char         */
2076   0,                   /* escape_with_backslash_is_dangerous */
2077   1,                   /* levels_for_order */
2078   &my_charset_utf16le_handler,
2079   &my_collation_utf16le_nopad_bin_handler
2080 };
2081 
2082 
2083 #endif /* HAVE_CHARSET_utf16 */
2084 
2085 
2086 #ifdef HAVE_CHARSET_utf32
2087 
2088 #include "ctype-utf32.h"
2089 
2090 /*
2091   Check is b0 and b1 start a valid UTF32 four-byte sequence.
2092   Don't accept characters greater than U+10FFFF.
2093 */
2094 #define IS_UTF32_MBHEAD4(b0,b1) (!(b0) && ((uchar) (b1) <= 0x10))
2095 
2096 #define IS_MB4_CHAR(b0,b1,b2,b3)   (IS_UTF32_MBHEAD4(b0,b1))
2097 
2098 
my_weight_utf32_general_ci(uchar b0,uchar b1,uchar b2,uchar b3)2099 static inline int my_weight_utf32_general_ci(uchar b0, uchar b1,
2100                                              uchar b2, uchar b3)
2101 {
2102   my_wc_t wc= MY_UTF32_WC4(b0, b1, b2, b3);
2103   if (wc <= 0xFFFF)
2104   {
2105     MY_UNICASE_CHARACTER *page= my_unicase_default_pages[wc >> 8];
2106     return (int) (page ? page[wc & 0xFF].sort : wc);
2107   }
2108   return MY_CS_REPLACEMENT_CHARACTER;
2109 }
2110 #define MY_FUNCTION_NAME(x)      my_ ## x ## _utf32_general_ci
2111 #define DEFINE_STRNXFRM_UNICODE
2112 #define DEFINE_STRNXFRM_UNICODE_NOPAD
2113 #define MY_MB_WC(cs, pwc, s, e)  my_mb_wc_utf32_quick(pwc, s, e)
2114 #define OPTIMIZE_ASCII           0
2115 #define UNICASE_MAXCHAR          MY_UNICASE_INFO_DEFAULT_MAXCHAR
2116 #define UNICASE_PAGE0            my_unicase_default_page00
2117 #define UNICASE_PAGES            my_unicase_default_pages
2118 #define WEIGHT_ILSEQ(x)          (0xFF0000 + (uchar) (x))
2119 #define WEIGHT_MB4(b0,b1,b2,b3)  my_weight_utf32_general_ci(b0, b1, b2, b3)
2120 #include "strcoll.inl"
2121 
2122 #define MY_FUNCTION_NAME(x)      my_ ## x ## _utf32_bin
2123 #define WEIGHT_ILSEQ(x)          (0xFF0000 + (uchar) (x))
2124 #define WEIGHT_MB4(b0,b1,b2,b3)  ((int) MY_UTF32_WC4(b0, b1, b2, b3))
2125 #include "strcoll.inl"
2126 
2127 #define DEFINE_STRNNCOLLSP_NOPAD
2128 #define MY_FUNCTION_NAME(x)      my_ ## x ## _utf32_general_nopad_ci
2129 #define WEIGHT_ILSEQ(x)          (0xFF0000 + (uchar) (x))
2130 #define WEIGHT_MB4(b0,b1,b2,b3)  my_weight_utf32_general_ci(b0, b1, b2, b3)
2131 #include "strcoll.inl"
2132 
2133 #define DEFINE_STRNNCOLLSP_NOPAD
2134 #define MY_FUNCTION_NAME(x)      my_ ## x ## _utf32_nopad_bin
2135 #define WEIGHT_ILSEQ(x)          (0xFF0000 + (uchar) (x))
2136 #define WEIGHT_MB4(b0,b1,b2,b3)  ((int) MY_UTF32_WC4(b0, b1, b2, b3))
2137 #include "strcoll.inl"
2138 
2139 #undef IS_MB2_CHAR
2140 #undef IS_MB4_CHAR
2141 
2142 
2143 static int
my_utf32_uni(CHARSET_INFO * cs,my_wc_t * pwc,const uchar * s,const uchar * e)2144 my_utf32_uni(CHARSET_INFO *cs __attribute__((unused)),
2145              my_wc_t *pwc, const uchar *s, const uchar *e)
2146 {
2147   return my_mb_wc_utf32_quick(pwc, s, e);
2148 }
2149 
2150 
2151 static int
my_uni_utf32(CHARSET_INFO * cs,my_wc_t wc,uchar * s,uchar * e)2152 my_uni_utf32(CHARSET_INFO *cs __attribute__((unused)),
2153              my_wc_t wc, uchar *s, uchar *e)
2154 {
2155   if (s + 4 > e)
2156     return MY_CS_TOOSMALL4;
2157 
2158   if (wc > 0x10FFFF)
2159     return MY_CS_ILUNI;
2160 
2161   s[0]= (uchar) (wc >> 24);
2162   s[1]= (uchar) (wc >> 16) & 0xFF;
2163   s[2]= (uchar) (wc >> 8)  & 0xFF;
2164   s[3]= (uchar) wc & 0xFF;
2165   return 4;
2166 }
2167 
2168 
2169 static inline void
my_tolower_utf32(MY_UNICASE_INFO * uni_plane,my_wc_t * wc)2170 my_tolower_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
2171 {
2172   MY_UNICASE_CHARACTER *page;
2173   if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
2174     *wc= page[*wc & 0xFF].tolower;
2175 }
2176 
2177 
2178 static inline void
my_toupper_utf32(MY_UNICASE_INFO * uni_plane,my_wc_t * wc)2179 my_toupper_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
2180 {
2181   MY_UNICASE_CHARACTER *page;
2182   if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
2183     *wc= page[*wc & 0xFF].toupper;
2184 }
2185 
2186 
2187 static inline void
my_tosort_utf32(MY_UNICASE_INFO * uni_plane,my_wc_t * wc)2188 my_tosort_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
2189 {
2190   if (*wc <= uni_plane->maxchar)
2191   {
2192     MY_UNICASE_CHARACTER *page;
2193     if ((page= uni_plane->page[*wc >> 8]))
2194       *wc= page[*wc & 0xFF].sort;
2195   }
2196   else
2197   {
2198     *wc= MY_CS_REPLACEMENT_CHARACTER;
2199   }
2200 }
2201 
2202 
2203 static size_t
my_lengthsp_utf32(CHARSET_INFO * cs,const char * ptr,size_t length)2204 my_lengthsp_utf32(CHARSET_INFO *cs __attribute__((unused)),
2205                   const char *ptr, size_t length)
2206 {
2207   const char *end= ptr + length;
2208   DBUG_ASSERT((length % 4) == 0);
2209   while (end > ptr + 3 && end[-1] == ' ' && !end[-2] && !end[-3] && !end[-4])
2210     end-= 4;
2211   return (size_t) (end - ptr);
2212 }
2213 
2214 
2215 static size_t
my_caseup_utf32(CHARSET_INFO * cs,const char * src,size_t srclen,char * dst,size_t dstlen)2216 my_caseup_utf32(CHARSET_INFO *cs, const char *src, size_t srclen,
2217                 char *dst, size_t dstlen)
2218 {
2219   my_wc_t wc;
2220   int res;
2221   const char *srcend= src + srclen;
2222   char *dstend= dst + dstlen;
2223   MY_UNICASE_INFO *uni_plane= cs->caseinfo;
2224   DBUG_ASSERT(srclen <= dstlen);
2225 
2226   while ((src < srcend) &&
2227          (res= my_utf32_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
2228   {
2229     my_toupper_utf32(uni_plane, &wc);
2230     if (res != my_uni_utf32(cs, wc, (uchar*) dst, (uchar*) dstend))
2231       break;
2232     src+= res;
2233     dst+= res;
2234   }
2235   return srclen;
2236 }
2237 
2238 
2239 static void
my_hash_sort_utf32_nopad(CHARSET_INFO * cs,const uchar * s,size_t slen,ulong * nr1,ulong * nr2)2240 my_hash_sort_utf32_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen,
2241                          ulong *nr1, ulong *nr2)
2242 {
2243   my_wc_t wc;
2244   int res;
2245   const uchar *e= s + slen;
2246   MY_UNICASE_INFO *uni_plane= cs->caseinfo;
2247   register ulong m1= *nr1, m2= *nr2;
2248 
2249   while ((res= my_utf32_uni(cs, &wc, (uchar*) s, (uchar*) e)) > 0)
2250   {
2251     my_tosort_utf32(uni_plane, &wc);
2252     MY_HASH_ADD(m1, m2, (uint) (wc >> 24));
2253     MY_HASH_ADD(m1, m2, (uint) (wc >> 16) & 0xFF);
2254     MY_HASH_ADD(m1, m2, (uint) (wc >> 8)  & 0xFF);
2255     MY_HASH_ADD(m1, m2, (uint) (wc & 0xFF));
2256     s+= res;
2257   }
2258   *nr1= m1;
2259   *nr2= m2;
2260 }
2261 
2262 
2263 static void
my_hash_sort_utf32(CHARSET_INFO * cs,const uchar * s,size_t slen,ulong * nr1,ulong * nr2)2264 my_hash_sort_utf32(CHARSET_INFO *cs, const uchar *s, size_t slen,
2265                    ulong *nr1, ulong *nr2)
2266 {
2267   size_t lengthsp= my_lengthsp_utf32(cs, (const char *) s, slen);
2268   my_hash_sort_utf32_nopad(cs, s, lengthsp, nr1, nr2);
2269 }
2270 
2271 
2272 static size_t
my_casedn_utf32(CHARSET_INFO * cs,const char * src,size_t srclen,char * dst,size_t dstlen)2273 my_casedn_utf32(CHARSET_INFO *cs, const char *src, size_t srclen,
2274                 char *dst, size_t dstlen)
2275 {
2276   my_wc_t wc;
2277   int res;
2278   const char *srcend= src + srclen;
2279   char *dstend= dst + dstlen;
2280   MY_UNICASE_INFO *uni_plane= cs->caseinfo;
2281   DBUG_ASSERT(srclen <= dstlen);
2282 
2283   while ((res= my_utf32_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
2284   {
2285     my_tolower_utf32(uni_plane,&wc);
2286     if (res != my_uni_utf32(cs, wc, (uchar*) dst, (uchar*) dstend))
2287       break;
2288     src+= res;
2289     dst+= res;
2290   }
2291   return srclen;
2292 }
2293 
2294 
2295 static int
my_charlen_utf32(CHARSET_INFO * cs,const uchar * b,const uchar * e)2296 my_charlen_utf32(CHARSET_INFO *cs __attribute__((unused)),
2297                  const uchar *b, const uchar *e)
2298 {
2299   return b + 4 > e ? MY_CS_TOOSMALL4 :
2300          IS_UTF32_MBHEAD4(b[0], b[1]) ? 4 : MY_CS_ILSEQ;
2301 }
2302 
2303 
2304 #define MY_FUNCTION_NAME(x)       my_ ## x ## _utf32
2305 #define CHARLEN(cs,str,end)       my_charlen_utf32(cs,str,end)
2306 #define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
2307 #include "ctype-mb.inl"
2308 #undef MY_FUNCTION_NAME
2309 #undef CHARLEN
2310 #undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
2311 /* Defines my_well_formed_char_length_utf32 */
2312 
2313 
2314 static size_t
my_vsnprintf_utf32(char * dst,size_t n,const char * fmt,va_list ap)2315 my_vsnprintf_utf32(char *dst, size_t n, const char* fmt, va_list ap)
2316 {
2317   char *start= dst, *end= dst + n;
2318   DBUG_ASSERT((n % 4) == 0);
2319   for (; *fmt ; fmt++)
2320   {
2321     if (fmt[0] != '%')
2322     {
2323       if (dst >= end)                        /* End of buffer */
2324         break;
2325 
2326       *dst++= '\0';
2327       *dst++= '\0';
2328       *dst++= '\0';
2329       *dst++= *fmt;        /* Copy ordinary char */
2330       continue;
2331     }
2332 
2333     fmt++;
2334 
2335     /* Skip if max size is used (to be compatible with printf) */
2336     while ( (*fmt>='0' && *fmt<='9') || *fmt == '.' || *fmt == '-')
2337       fmt++;
2338 
2339     if (*fmt == 'l')
2340       fmt++;
2341 
2342     if (*fmt == 's')                                /* String parameter */
2343     {
2344       reg2 char *par= va_arg(ap, char *);
2345       size_t plen;
2346       size_t left_len= (size_t)(end - dst);
2347       if (!par) par= (char*)"(null)";
2348       plen= strlen(par);
2349       if (left_len <= plen*4)
2350         plen= left_len / 4 - 1;
2351 
2352       for ( ; plen ; plen--, dst+= 4, par++)
2353       {
2354         dst[0]= '\0';
2355         dst[1]= '\0';
2356         dst[2]= '\0';
2357         dst[3]= par[0];
2358       }
2359       continue;
2360     }
2361     else if (*fmt == 'd' || *fmt == 'u')        /* Integer parameter */
2362     {
2363       register int iarg;
2364       char nbuf[16];
2365       char *pbuf= nbuf;
2366 
2367       if ((size_t) (end - dst) < 64)
2368         break;
2369       iarg= va_arg(ap, int);
2370       if (*fmt == 'd')
2371         int10_to_str((long) iarg, nbuf, -10);
2372       else
2373         int10_to_str((long) (uint) iarg,nbuf,10);
2374 
2375       for (; pbuf[0]; pbuf++)
2376       {
2377         *dst++= '\0';
2378         *dst++= '\0';
2379         *dst++= '\0';
2380         *dst++= *pbuf;
2381       }
2382       continue;
2383     }
2384 
2385     /* We come here on '%%', unknown code or too long parameter */
2386     if (dst == end)
2387       break;
2388     *dst++= '\0';
2389     *dst++= '\0';
2390     *dst++= '\0';
2391     *dst++= '%';    /* % used as % or unknown code */
2392   }
2393 
2394   DBUG_ASSERT(dst < end);
2395   *dst++= '\0';
2396   *dst++= '\0';
2397   *dst++= '\0';
2398   *dst++= '\0';     /* End of errmessage */
2399   return (size_t) (dst - start - 4);
2400 }
2401 
2402 
2403 static size_t
my_snprintf_utf32(CHARSET_INFO * cs,char * to,size_t n,const char * fmt,...)2404 my_snprintf_utf32(CHARSET_INFO *cs __attribute__((unused)),
2405                   char* to, size_t n, const char* fmt, ...)
2406 {
2407   size_t ret;
2408   va_list args;
2409   va_start(args,fmt);
2410   ret= my_vsnprintf_utf32(to, n, fmt, args);
2411   va_end(args);
2412   return ret;
2413 }
2414 
2415 
2416 static longlong
my_strtoll10_utf32(CHARSET_INFO * cs,const char * nptr,char ** endptr,int * error)2417 my_strtoll10_utf32(CHARSET_INFO *cs __attribute__((unused)),
2418                    const char *nptr, char **endptr, int *error)
2419 {
2420   const char *s, *end, *start, *n_end, *true_end;
2421   uchar c;
2422   unsigned long i, j, k;
2423   ulonglong li;
2424   int negative;
2425   ulong cutoff, cutoff2, cutoff3;
2426 
2427   s= nptr;
2428   /* If fixed length string */
2429   if (endptr)
2430   {
2431     /* Make sure string length is even */
2432     end= s + ((*endptr - s) / 4) * 4;
2433     while (s < end && !s[0] && !s[1] && !s[2] &&
2434            (s[3] == ' ' || s[3] == '\t'))
2435       s+= 4;
2436     if (s == end)
2437       goto no_conv;
2438   }
2439   else
2440   {
2441      /* We don't support null terminated strings in UCS2 */
2442      goto no_conv;
2443   }
2444 
2445   /* Check for a sign. */
2446   negative= 0;
2447   if (!s[0] && !s[1] && !s[2] && s[3] == '-')
2448   {
2449     *error= -1;                                        /* Mark as negative number */
2450     negative= 1;
2451     s+= 4;
2452     if (s == end)
2453       goto no_conv;
2454     cutoff=  MAX_NEGATIVE_NUMBER / LFACTOR2;
2455     cutoff2= (MAX_NEGATIVE_NUMBER % LFACTOR2) / 100;
2456     cutoff3=  MAX_NEGATIVE_NUMBER % 100;
2457   }
2458   else
2459   {
2460     *error= 0;
2461     if (!s[0] && !s[1] && !s[2] && s[3] == '+')
2462     {
2463       s+= 4;
2464       if (s == end)
2465         goto no_conv;
2466     }
2467     cutoff=  ULONGLONG_MAX / LFACTOR2;
2468     cutoff2= ULONGLONG_MAX % LFACTOR2 / 100;
2469     cutoff3=  ULONGLONG_MAX % 100;
2470   }
2471 
2472   /* Handle case where we have a lot of pre-zero */
2473   if (!s[0] && !s[1] && !s[2] && s[3] == '0')
2474   {
2475     i= 0;
2476     do
2477     {
2478       s+= 4;
2479       if (s == end)
2480         goto end_i;                                /* Return 0 */
2481     }
2482     while (!s[0] && !s[1] && !s[2] && s[3] == '0');
2483     n_end= s + 4 * INIT_CNT;
2484   }
2485   else
2486   {
2487     /* Read first digit to check that it's a valid number */
2488     if (s[0] || s[1] || s[2] || (c= (s[3]-'0')) > 9)
2489       goto no_conv;
2490     i= c;
2491     s+= 4;
2492     n_end= s + 4 * (INIT_CNT-1);
2493   }
2494 
2495   /* Handle first 9 digits and store them in i */
2496   if (n_end > end)
2497     n_end= end;
2498   for (; s != n_end ; s+= 4)
2499   {
2500     if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9)
2501       goto end_i;
2502     i= i * 10 + c;
2503   }
2504   if (s == end)
2505     goto end_i;
2506 
2507   /* Handle next 9 digits and store them in j */
2508   j= 0;
2509   start= s;                                /* Used to know how much to shift i */
2510   n_end= true_end= s + 4 * INIT_CNT;
2511   if (n_end > end)
2512     n_end= end;
2513   do
2514   {
2515     if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9)
2516       goto end_i_and_j;
2517     j= j * 10 + c;
2518     s+= 4;
2519   } while (s != n_end);
2520   if (s == end)
2521   {
2522     if (s != true_end)
2523       goto end_i_and_j;
2524     goto end3;
2525   }
2526   if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9)
2527     goto end3;
2528 
2529   /* Handle the next 1 or 2 digits and store them in k */
2530   k=c;
2531   s+= 4;
2532   if (s == end || s[0] || s[1] || s[2] || (c= (s[3]-'0')) > 9)
2533     goto end4;
2534   k= k * 10 + c;
2535   s+= 4;
2536   *endptr= (char*) s;
2537 
2538   /* number string should have ended here */
2539   if (s != end && !s[0] && !s[1] && !s[2] && (c= (s[3] - '0')) <= 9)
2540     goto overflow;
2541 
2542   /* Check that we didn't get an overflow with the last digit */
2543   if (i > cutoff || (i == cutoff && ((j > cutoff2 || j == cutoff2) &&
2544                                      k > cutoff3)))
2545     goto overflow;
2546   li= i * LFACTOR2+ (ulonglong) j * 100 + k;
2547   return (longlong) li;
2548 
2549 overflow:                                        /* *endptr is set here */
2550   *error= MY_ERRNO_ERANGE;
2551   return negative ? LONGLONG_MIN : (longlong) ULONGLONG_MAX;
2552 
2553 end_i:
2554   *endptr= (char*) s;
2555   return (negative ? ((longlong) -(long) i) : (longlong) i);
2556 
2557 end_i_and_j:
2558   li= (ulonglong) i * lfactor[(size_t) (s-start) / 4] + j;
2559   *endptr= (char*) s;
2560   return (negative ? -((longlong) li) : (longlong) li);
2561 
2562 end3:
2563   li= (ulonglong) i*LFACTOR+ (ulonglong) j;
2564   *endptr= (char*) s;
2565   return (negative ? -((longlong) li) : (longlong) li);
2566 
2567 end4:
2568   li= (ulonglong) i*LFACTOR1+ (ulonglong) j * 10 + k;
2569   *endptr= (char*) s;
2570   if (negative)
2571   {
2572    if (li > MAX_NEGATIVE_NUMBER)
2573      goto overflow;
2574    return -((longlong) li);
2575   }
2576   return (longlong) li;
2577 
2578 no_conv:
2579   /* There was no number to convert.  */
2580   *error= MY_ERRNO_EDOM;
2581   *endptr= (char *) nptr;
2582   return 0;
2583 }
2584 
2585 
2586 static size_t
my_numchars_utf32(CHARSET_INFO * cs,const char * b,const char * e)2587 my_numchars_utf32(CHARSET_INFO *cs __attribute__((unused)),
2588                   const char *b, const char *e)
2589 {
2590   return (size_t) (e - b) / 4;
2591 }
2592 
2593 
2594 static size_t
my_charpos_utf32(CHARSET_INFO * cs,const char * b,const char * e,size_t pos)2595 my_charpos_utf32(CHARSET_INFO *cs __attribute__((unused)),
2596                  const char *b, const char *e, size_t pos)
2597 {
2598   size_t string_length= (size_t) (e - b);
2599   return pos * 4 > string_length ? string_length + 4 : pos * 4;
2600 }
2601 
2602 
2603 static
my_fill_utf32(CHARSET_INFO * cs,char * s,size_t slen,int fill)2604 void my_fill_utf32(CHARSET_INFO *cs,
2605                    char *s, size_t slen, int fill)
2606 {
2607   char buf[10];
2608 #ifdef DBUG_ASSERT_EXISTS
2609   uint buflen;
2610 #endif
2611   char *e= s + slen;
2612 
2613   DBUG_ASSERT((slen % 4) == 0);
2614 
2615 #ifdef DBUG_ASSERT_EXISTS
2616   buflen=
2617 #endif
2618     my_ci_wc_mb(cs, (my_wc_t) fill, (uchar*) buf, (uchar*) buf + sizeof(buf));
2619   DBUG_ASSERT(buflen == 4);
2620   while (s < e)
2621   {
2622     memcpy(s, buf, 4);
2623     s+= 4;
2624   }
2625 }
2626 
2627 
2628 static int
my_wildcmp_utf32_ci(CHARSET_INFO * cs,const char * str,const char * str_end,const char * wildstr,const char * wildend,int escape,int w_one,int w_many)2629 my_wildcmp_utf32_ci(CHARSET_INFO *cs,
2630                     const char *str, const char *str_end,
2631                     const char *wildstr, const char *wildend,
2632                     int escape, int w_one, int w_many)
2633 {
2634   MY_UNICASE_INFO *uni_plane= cs->caseinfo;
2635   return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
2636                             escape, w_one, w_many, uni_plane);
2637 }
2638 
2639 
2640 static int
my_wildcmp_utf32_bin(CHARSET_INFO * cs,const char * str,const char * str_end,const char * wildstr,const char * wildend,int escape,int w_one,int w_many)2641 my_wildcmp_utf32_bin(CHARSET_INFO *cs,
2642                      const char *str,const char *str_end,
2643                      const char *wildstr,const char *wildend,
2644                      int escape, int w_one, int w_many)
2645 {
2646   return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
2647                             escape, w_one, w_many, NULL);
2648 }
2649 
2650 
2651 static size_t
my_scan_utf32(CHARSET_INFO * cs,const char * str,const char * end,int sequence_type)2652 my_scan_utf32(CHARSET_INFO *cs,
2653               const char *str, const char *end, int sequence_type)
2654 {
2655   const char *str0= str;
2656 
2657   switch (sequence_type)
2658   {
2659   case MY_SEQ_SPACES:
2660     for ( ; str < end; )
2661     {
2662       my_wc_t wc;
2663       int res= my_utf32_uni(cs, &wc, (uchar*) str, (uchar*) end);
2664       if (res < 0 || wc != ' ')
2665         break;
2666       str+= res;
2667     }
2668     return (size_t) (str - str0);
2669   case MY_SEQ_NONSPACES:
2670     DBUG_ASSERT(0); /* Not implemented */
2671     /* pass through */
2672   default:
2673     return 0;
2674   }
2675 }
2676 
2677 
2678 static MY_COLLATION_HANDLER my_collation_utf32_general_ci_handler =
2679 {
2680   NULL, /* init */
2681   my_strnncoll_utf32_general_ci,
2682   my_strnncollsp_utf32_general_ci,
2683   my_strnncollsp_nchars_utf32_general_ci,
2684   my_strnxfrm_utf32_general_ci,
2685   my_strnxfrmlen_unicode,
2686   my_like_range_generic,
2687   my_wildcmp_utf32_ci,
2688   my_strcasecmp_mb2_or_mb4,
2689   my_instr_mb,
2690   my_hash_sort_utf32,
2691   my_propagate_simple
2692 };
2693 
2694 
2695 static MY_COLLATION_HANDLER my_collation_utf32_bin_handler =
2696 {
2697   NULL, /* init */
2698   my_strnncoll_utf32_bin,
2699   my_strnncollsp_utf32_bin,
2700   my_strnncollsp_nchars_utf32_bin,
2701   my_strnxfrm_unicode_full_bin,
2702   my_strnxfrmlen_unicode_full_bin,
2703   my_like_range_generic,
2704   my_wildcmp_utf32_bin,
2705   my_strcasecmp_mb2_or_mb4,
2706   my_instr_mb,
2707   my_hash_sort_utf32,
2708   my_propagate_simple
2709 };
2710 
2711 
2712 static MY_COLLATION_HANDLER my_collation_utf32_general_nopad_ci_handler =
2713 {
2714   NULL, /* init */
2715   my_strnncoll_utf32_general_ci,
2716   my_strnncollsp_utf32_general_nopad_ci,
2717   my_strnncollsp_nchars_utf32_general_nopad_ci,
2718   my_strnxfrm_nopad_utf32_general_ci,
2719   my_strnxfrmlen_unicode,
2720   my_like_range_generic,
2721   my_wildcmp_utf32_ci,
2722   my_strcasecmp_mb2_or_mb4,
2723   my_instr_mb,
2724   my_hash_sort_utf32_nopad,
2725   my_propagate_simple
2726 };
2727 
2728 
2729 static MY_COLLATION_HANDLER my_collation_utf32_nopad_bin_handler =
2730 {
2731   NULL, /* init */
2732   my_strnncoll_utf32_bin,
2733   my_strnncollsp_utf32_nopad_bin,
2734   my_strnncollsp_nchars_utf32_nopad_bin,
2735   my_strnxfrm_unicode_full_nopad_bin,
2736   my_strnxfrmlen_unicode_full_bin,
2737   my_like_range_generic,
2738   my_wildcmp_utf32_bin,
2739   my_strcasecmp_mb2_or_mb4,
2740   my_instr_mb,
2741   my_hash_sort_utf32_nopad,
2742   my_propagate_simple
2743 };
2744 
2745 
2746 MY_CHARSET_HANDLER my_charset_utf32_handler=
2747 {
2748   NULL, /* init */
2749   my_numchars_utf32,
2750   my_charpos_utf32,
2751   my_lengthsp_utf32,
2752   my_numcells_mb,
2753   my_utf32_uni,
2754   my_uni_utf32,
2755   my_mb_ctype_mb,
2756   my_caseup_str_mb2_or_mb4,
2757   my_casedn_str_mb2_or_mb4,
2758   my_caseup_utf32,
2759   my_casedn_utf32,
2760   my_snprintf_utf32,
2761   my_l10tostr_mb2_or_mb4,
2762   my_ll10tostr_mb2_or_mb4,
2763   my_fill_utf32,
2764   my_strntol_mb2_or_mb4,
2765   my_strntoul_mb2_or_mb4,
2766   my_strntoll_mb2_or_mb4,
2767   my_strntoull_mb2_or_mb4,
2768   my_strntod_mb2_or_mb4,
2769   my_strtoll10_utf32,
2770   my_strntoull10rnd_mb2_or_mb4,
2771   my_scan_utf32,
2772   my_charlen_utf32,
2773   my_well_formed_char_length_utf32,
2774   my_copy_fix_mb2_or_mb4,
2775   my_uni_utf32,
2776   my_wc_to_printable_generic
2777 };
2778 
2779 
2780 struct charset_info_st my_charset_utf32_general_ci=
2781 {
2782   60,0,0,              /* number       */
2783   MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
2784   charset_name_utf32,             /* cs name    */
2785   "utf32_general_ci",  /* name         */
2786   "UTF-32 Unicode",    /* comment      */
2787   NULL,                /* tailoring    */
2788   NULL,                /* ctype        */
2789   NULL,                /* to_lower     */
2790   NULL,                /* to_upper     */
2791   NULL,                /* sort_order   */
2792   NULL,                /* uca          */
2793   NULL,                /* tab_to_uni   */
2794   NULL,                /* tab_from_uni */
2795   &my_unicase_default, /* caseinfo     */
2796   NULL,                /* state_map    */
2797   NULL,                /* ident_map    */
2798   1,                   /* strxfrm_multiply */
2799   1,                   /* caseup_multiply  */
2800   1,                   /* casedn_multiply  */
2801   4,                   /* mbminlen     */
2802   4,                   /* mbmaxlen     */
2803   0,                   /* min_sort_char */
2804   0xFFFF,              /* max_sort_char */
2805   ' ',                 /* pad char      */
2806   0,                   /* escape_with_backslash_is_dangerous */
2807   1,                   /* levels_for_order   */
2808   &my_charset_utf32_handler,
2809   &my_collation_utf32_general_ci_handler
2810 };
2811 
2812 
2813 struct charset_info_st my_charset_utf32_bin=
2814 {
2815   61,0,0,              /* number       */
2816   MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
2817   charset_name_utf32,             /* cs name    */
2818   "utf32_bin",         /* name         */
2819   "UTF-32 Unicode",    /* comment      */
2820   NULL,                /* tailoring    */
2821   NULL,                /* ctype        */
2822   NULL,                /* to_lower     */
2823   NULL,                /* to_upper     */
2824   NULL,                /* sort_order   */
2825   NULL,                /* uca          */
2826   NULL,                /* tab_to_uni   */
2827   NULL,                /* tab_from_uni */
2828   &my_unicase_default, /* caseinfo     */
2829   NULL,                /* state_map    */
2830   NULL,                /* ident_map    */
2831   1,                   /* strxfrm_multiply */
2832   1,                   /* caseup_multiply  */
2833   1,                   /* casedn_multiply  */
2834   4,                   /* mbminlen     */
2835   4,                   /* mbmaxlen     */
2836   0,                   /* min_sort_char */
2837   0xFFFF,              /* max_sort_char */
2838   ' ',                 /* pad char      */
2839   0,                   /* escape_with_backslash_is_dangerous */
2840   1,                   /* levels_for_order   */
2841   &my_charset_utf32_handler,
2842   &my_collation_utf32_bin_handler
2843 };
2844 
2845 
2846 struct charset_info_st my_charset_utf32_general_nopad_ci=
2847 {
2848   MY_NOPAD_ID(60),0,0, /* number           */
2849   MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD,
2850   charset_name_utf32,             /* cs name          */
2851   "utf32_general_nopad_ci", /* name        */
2852   "UTF-32 Unicode",    /* comment          */
2853   NULL,                /* tailoring        */
2854   NULL,                /* ctype            */
2855   NULL,                /* to_lower         */
2856   NULL,                /* to_upper         */
2857   NULL,                /* sort_order       */
2858   NULL,                /* uca              */
2859   NULL,                /* tab_to_uni       */
2860   NULL,                /* tab_from_uni     */
2861   &my_unicase_default, /* caseinfo         */
2862   NULL,                /* state_map        */
2863   NULL,                /* ident_map        */
2864   1,                   /* strxfrm_multiply */
2865   1,                   /* caseup_multiply  */
2866   1,                   /* casedn_multiply  */
2867   4,                   /* mbminlen         */
2868   4,                   /* mbmaxlen         */
2869   0,                   /* min_sort_char    */
2870   0xFFFF,              /* max_sort_char    */
2871   ' ',                 /* pad char         */
2872   0,                   /* escape_with_backslash_is_dangerous */
2873   1,                   /* levels_for_order */
2874   &my_charset_utf32_handler,
2875   &my_collation_utf32_general_nopad_ci_handler
2876 };
2877 
2878 
2879 struct charset_info_st my_charset_utf32_nopad_bin=
2880 {
2881   MY_NOPAD_ID(61),0,0, /* number           */
2882   MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|
2883   MY_CS_NOPAD,
2884   charset_name_utf32,             /* cs name          */
2885   "utf32_nopad_bin",   /* name             */
2886   "UTF-32 Unicode",    /* comment          */
2887   NULL,                /* tailoring        */
2888   NULL,                /* ctype            */
2889   NULL,                /* to_lower         */
2890   NULL,                /* to_upper         */
2891   NULL,                /* sort_order       */
2892   NULL,                /* uca              */
2893   NULL,                /* tab_to_uni       */
2894   NULL,                /* tab_from_uni     */
2895   &my_unicase_default, /* caseinfo         */
2896   NULL,                /* state_map        */
2897   NULL,                /* ident_map        */
2898   1,                   /* strxfrm_multiply */
2899   1,                   /* caseup_multiply  */
2900   1,                   /* casedn_multiply  */
2901   4,                   /* mbminlen         */
2902   4,                   /* mbmaxlen         */
2903   0,                   /* min_sort_char    */
2904   0xFFFF,              /* max_sort_char    */
2905   ' ',                 /* pad char         */
2906   0,                   /* escape_with_backslash_is_dangerous */
2907   1,                   /* levels_for_order */
2908   &my_charset_utf32_handler,
2909   &my_collation_utf32_nopad_bin_handler
2910 };
2911 
2912 
2913 #endif /* HAVE_CHARSET_utf32 */
2914 
2915 
2916 #ifdef HAVE_CHARSET_ucs2
2917 
2918 #include "ctype-ucs2.h"
2919 
2920 static const uchar ctype_ucs2[] = {
2921     0,
2922    32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
2923    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
2924    72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
2925   132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
2926    16,129,129,129,129,129,129,  1,  1,  1,  1,  1,  1,  1,  1,  1,
2927     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, 16, 16, 16, 16, 16,
2928    16,130,130,130,130,130,130,  2,  2,  2,  2,  2,  2,  2,  2,  2,
2929     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, 16, 16, 16, 16, 32,
2930     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2931     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2932     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2933     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2934     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2935     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2936     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2937     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
2938 };
2939 
2940 static const uchar to_lower_ucs2[] = {
2941     0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
2942    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2943    32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2944    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
2945    64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
2946   112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
2947    96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
2948   112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
2949   128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
2950   144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
2951   160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
2952   176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
2953   192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
2954   208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
2955   224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
2956   240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
2957 };
2958 
2959 static const uchar to_upper_ucs2[] = {
2960     0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
2961    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2962    32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2963    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
2964    64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
2965    80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
2966    96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
2967    80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
2968   128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
2969   144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
2970   160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
2971   176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
2972   192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
2973   208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
2974   224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
2975   240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
2976 };
2977 
2978 
2979 /* Definitions for strcoll.inl */
2980 #define IS_MB2_CHAR(x,y)            (1)
2981 #define UCS2_CODE(b0,b1)            (((uchar) b0) << 8 | ((uchar) b1))
2982 
2983 
my_weight_mb2_ucs2_general_ci(uchar b0,uchar b1)2984 static inline int my_weight_mb2_ucs2_general_ci(uchar b0, uchar b1)
2985 {
2986   my_wc_t wc= UCS2_CODE(b0, b1);
2987   MY_UNICASE_CHARACTER *page= my_unicase_default_pages[wc >> 8];
2988   return (int) (page ? page[wc & 0xFF].sort : wc);
2989 }
2990 
2991 
2992 #define MY_FUNCTION_NAME(x)      my_ ## x ## _ucs2_general_ci
2993 #define DEFINE_STRNXFRM_UNICODE
2994 #define DEFINE_STRNXFRM_UNICODE_NOPAD
2995 #define MY_MB_WC(cs, pwc, s, e)  my_mb_wc_ucs2_quick(pwc, s, e)
2996 #define OPTIMIZE_ASCII           0
2997 #define UNICASE_MAXCHAR          MY_UNICASE_INFO_DEFAULT_MAXCHAR
2998 #define UNICASE_PAGE0            my_unicase_default_page00
2999 #define UNICASE_PAGES            my_unicase_default_pages
3000 #define WEIGHT_ILSEQ(x)          (0xFF0000 + (uchar) (x))
3001 #define WEIGHT_MB2(b0,b1)        my_weight_mb2_ucs2_general_ci(b0,b1)
3002 #include "strcoll.inl"
3003 
3004 
3005 #define MY_FUNCTION_NAME(x)      my_ ## x ## _ucs2_bin
3006 #define DEFINE_STRNXFRM_UNICODE_BIN2
3007 #define MY_MB_WC(cs, pwc, s, e)  my_mb_wc_ucs2_quick(pwc, s, e)
3008 #define OPTIMIZE_ASCII           0
3009 #define WEIGHT_ILSEQ(x)          (0xFF0000 + (uchar) (x))
3010 #define WEIGHT_MB2(b0,b1)        UCS2_CODE(b0,b1)
3011 #include "strcoll.inl"
3012 
3013 
3014 #define DEFINE_STRNNCOLLSP_NOPAD
3015 #define MY_FUNCTION_NAME(x)    my_ ## x ## _ucs2_general_nopad_ci
3016 #define WEIGHT_ILSEQ(x)        (0xFF0000 + (uchar) (x))
3017 #define WEIGHT_MB2(b0,b1)      my_weight_mb2_ucs2_general_ci(b0,b1)
3018 #include "strcoll.inl"
3019 
3020 
3021 #define DEFINE_STRNNCOLLSP_NOPAD
3022 #define MY_FUNCTION_NAME(x)    my_ ## x ## _ucs2_nopad_bin
3023 #define WEIGHT_ILSEQ(x)        (0xFF0000 + (uchar) (x))
3024 #define WEIGHT_MB2(b0,b1)      UCS2_CODE(b0,b1)
3025 #include "strcoll.inl"
3026 
3027 
3028 static int
my_charlen_ucs2(CHARSET_INFO * cs,const uchar * s,const uchar * e)3029 my_charlen_ucs2(CHARSET_INFO *cs __attribute__((unused)),
3030 		const uchar *s, const uchar *e)
3031 {
3032   return s + 2 > e ? MY_CS_TOOSMALLN(2) : 2;
3033 }
3034 
3035 
my_ucs2_uni(CHARSET_INFO * cs,my_wc_t * pwc,const uchar * s,const uchar * e)3036 static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)),
3037 		       my_wc_t * pwc, const uchar *s, const uchar *e)
3038 {
3039   return my_mb_wc_ucs2_quick(pwc, s, e);
3040 }
3041 
my_uni_ucs2(CHARSET_INFO * cs,my_wc_t wc,uchar * r,uchar * e)3042 static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
3043 		       my_wc_t wc, uchar *r, uchar *e)
3044 {
3045   if ( r+2 > e )
3046     return MY_CS_TOOSMALL2;
3047 
3048   if (wc > 0xFFFF) /* UCS2 does not support characters outside BMP */
3049     return MY_CS_ILUNI;
3050 
3051   r[0]= (uchar) (wc >> 8);
3052   r[1]= (uchar) (wc & 0xFF);
3053   return 2;
3054 }
3055 
3056 
3057 static inline void
my_tolower_ucs2(MY_UNICASE_INFO * uni_plane,my_wc_t * wc)3058 my_tolower_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
3059 {
3060   MY_UNICASE_CHARACTER *page;
3061   if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
3062     *wc= page[*wc & 0xFF].tolower;
3063 }
3064 
3065 
3066 static inline void
my_toupper_ucs2(MY_UNICASE_INFO * uni_plane,my_wc_t * wc)3067 my_toupper_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
3068 {
3069   MY_UNICASE_CHARACTER *page;
3070   if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
3071     *wc= page[*wc & 0xFF].toupper;
3072 }
3073 
3074 
3075 static inline void
my_tosort_ucs2(MY_UNICASE_INFO * uni_plane,my_wc_t * wc)3076 my_tosort_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
3077 {
3078   MY_UNICASE_CHARACTER *page;
3079   if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
3080     *wc= page[*wc & 0xFF].sort;
3081 }
3082 
my_caseup_ucs2(CHARSET_INFO * cs,const char * src,size_t srclen,char * dst,size_t dstlen)3083 static size_t my_caseup_ucs2(CHARSET_INFO *cs, const char *src, size_t srclen,
3084                            char *dst, size_t dstlen)
3085 {
3086   my_wc_t wc;
3087   int res;
3088   const char *srcend= src + srclen;
3089   char *dstend= dst + dstlen;
3090   MY_UNICASE_INFO *uni_plane= cs->caseinfo;
3091   DBUG_ASSERT(srclen <= dstlen);
3092 
3093   while ((src < srcend) &&
3094          (res= my_ucs2_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
3095   {
3096     my_toupper_ucs2(uni_plane, &wc);
3097     if (res != my_uni_ucs2(cs, wc, (uchar*) dst, (uchar*) dstend))
3098       break;
3099     src+= res;
3100     dst+= res;
3101   }
3102   return srclen;
3103 }
3104 
3105 
3106 static void
my_hash_sort_ucs2_nopad(CHARSET_INFO * cs,const uchar * s,size_t slen,ulong * nr1,ulong * nr2)3107 my_hash_sort_ucs2_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen,
3108                         ulong *nr1, ulong *nr2)
3109 {
3110   my_wc_t wc;
3111   int res;
3112   const uchar *e=s+slen;
3113   MY_UNICASE_INFO *uni_plane= cs->caseinfo;
3114   register ulong m1= *nr1, m2= *nr2;
3115 
3116   while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0)
3117   {
3118     my_tosort_ucs2(uni_plane, &wc);
3119     MY_HASH_ADD_16(m1, m2, wc);
3120     s+=res;
3121   }
3122   *nr1= m1;
3123   *nr2= m2;
3124 }
3125 
3126 
my_hash_sort_ucs2(CHARSET_INFO * cs,const uchar * s,size_t slen,ulong * nr1,ulong * nr2)3127 static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, size_t slen,
3128 			      ulong *nr1, ulong *nr2)
3129 {
3130   size_t lengthsp= my_lengthsp_mb2(cs, (const char *) s, slen);
3131   my_hash_sort_ucs2_nopad(cs, s, lengthsp, nr1, nr2);
3132 }
3133 
my_casedn_ucs2(CHARSET_INFO * cs,const char * src,size_t srclen,char * dst,size_t dstlen)3134 static size_t my_casedn_ucs2(CHARSET_INFO *cs, const char *src, size_t srclen,
3135                            char *dst, size_t dstlen)
3136 {
3137   my_wc_t wc;
3138   int res;
3139   const char *srcend= src + srclen;
3140   char *dstend= dst + dstlen;
3141   MY_UNICASE_INFO *uni_plane= cs->caseinfo;
3142   DBUG_ASSERT(srclen <= dstlen);
3143 
3144   while ((src < srcend) &&
3145          (res= my_ucs2_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
3146   {
3147     my_tolower_ucs2(uni_plane, &wc);
3148     if (res != my_uni_ucs2(cs, wc, (uchar*) dst, (uchar*) dstend))
3149       break;
3150     src+= res;
3151     dst+= res;
3152   }
3153   return srclen;
3154 }
3155 
3156 
3157 static void
my_fill_ucs2(CHARSET_INFO * cs,char * s,size_t l,int fill)3158 my_fill_ucs2(CHARSET_INFO *cs __attribute__((unused)),
3159              char *s, size_t l, int fill)
3160 {
3161   DBUG_ASSERT(fill <= 0xFFFF);
3162 #ifdef WAITING_FOR_GCC_VECTORIZATION_BUG_TO_BE_FIXED
3163   /*
3164     This code with int2store() is known to be faster on some processors,
3165     but crashes on other processors due to a possible bug in GCC's
3166     -ftree-vectorization (which is enabled in -O3) in case of
3167     a   non-aligned memory. See here for details:
3168     http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58039
3169   */
3170   char *last= s + l - 2;
3171   uint16 tmp= (fill >> 8) + ((fill & 0xFF) << 8); /* swap bytes */
3172   DBUG_ASSERT(fill <= 0xFFFF);
3173   for ( ; s <= last; s+= 2)
3174     int2store(s, tmp); /* store little-endian */
3175 #else
3176   for ( ; l >= 2; s[0]= (fill >> 8), s[1]= (fill & 0xFF), s+= 2, l-= 2);
3177 #endif
3178 }
3179 
3180 
3181 static
my_numchars_ucs2(CHARSET_INFO * cs,const char * b,const char * e)3182 size_t my_numchars_ucs2(CHARSET_INFO *cs __attribute__((unused)),
3183                         const char *b, const char *e)
3184 {
3185   return (size_t) (e-b)/2;
3186 }
3187 
3188 
3189 static
my_charpos_ucs2(CHARSET_INFO * cs,const char * b,const char * e,size_t pos)3190 size_t my_charpos_ucs2(CHARSET_INFO *cs __attribute__((unused)),
3191                        const char *b  __attribute__((unused)),
3192                        const char *e  __attribute__((unused)),
3193                        size_t pos)
3194 {
3195   size_t string_length= (size_t) (e - b);
3196   return pos > string_length ? string_length + 2 : pos * 2;
3197 }
3198 
3199 
3200 static size_t
my_well_formed_char_length_ucs2(CHARSET_INFO * cs,const char * b,const char * e,size_t nchars,MY_STRCOPY_STATUS * status)3201 my_well_formed_char_length_ucs2(CHARSET_INFO *cs __attribute__((unused)),
3202                                 const char *b, const char *e,
3203                                 size_t nchars, MY_STRCOPY_STATUS *status)
3204 {
3205   size_t length= e - b;
3206   if (nchars * 2 <= length)
3207   {
3208     status->m_well_formed_error_pos= NULL;
3209     status->m_source_end_pos= b + (nchars * 2);
3210     return nchars;
3211   }
3212   if (length % 2)
3213   {
3214     status->m_well_formed_error_pos= status->m_source_end_pos= e - 1;
3215   }
3216   else
3217   {
3218     status->m_well_formed_error_pos= NULL;
3219     status->m_source_end_pos= e;
3220   }
3221   return length / 2;
3222 }
3223 
3224 
3225 static
my_wildcmp_ucs2_ci(CHARSET_INFO * cs,const char * str,const char * str_end,const char * wildstr,const char * wildend,int escape,int w_one,int w_many)3226 int my_wildcmp_ucs2_ci(CHARSET_INFO *cs,
3227 		    const char *str,const char *str_end,
3228 		    const char *wildstr,const char *wildend,
3229 		    int escape, int w_one, int w_many)
3230 {
3231   MY_UNICASE_INFO *uni_plane= cs->caseinfo;
3232   return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
3233                             escape,w_one,w_many,uni_plane);
3234 }
3235 
3236 
3237 static
my_wildcmp_ucs2_bin(CHARSET_INFO * cs,const char * str,const char * str_end,const char * wildstr,const char * wildend,int escape,int w_one,int w_many)3238 int my_wildcmp_ucs2_bin(CHARSET_INFO *cs,
3239 		    const char *str,const char *str_end,
3240 		    const char *wildstr,const char *wildend,
3241 		    int escape, int w_one, int w_many)
3242 {
3243   return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
3244                             escape,w_one,w_many,NULL);
3245 }
3246 
3247 
3248 static void
my_hash_sort_ucs2_nopad_bin(CHARSET_INFO * cs,const uchar * key,size_t len,ulong * nr1,ulong * nr2)3249 my_hash_sort_ucs2_nopad_bin(CHARSET_INFO *cs __attribute__((unused)),
3250                             const uchar *key, size_t len,
3251                             ulong *nr1, ulong *nr2)
3252 {
3253   const uchar *end= key + len;
3254   register ulong m1= *nr1, m2= *nr2;
3255   for ( ; key < end ; key++)
3256   {
3257     MY_HASH_ADD(m1, m2, (uint)*key);
3258   }
3259   *nr1= m1;
3260   *nr2= m2;
3261 }
3262 
3263 
3264 static void
my_hash_sort_ucs2_bin(CHARSET_INFO * cs,const uchar * key,size_t len,ulong * nr1,ulong * nr2)3265 my_hash_sort_ucs2_bin(CHARSET_INFO *cs,
3266                       const uchar *key, size_t len, ulong *nr1, ulong *nr2)
3267 {
3268   size_t lengthsp= my_lengthsp_mb2(cs, (const char *) key, len);
3269   my_hash_sort_ucs2_nopad_bin(cs, key, lengthsp, nr1, nr2);
3270 }
3271 
3272 
3273 static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
3274 {
3275     NULL,		/* init */
3276     my_strnncoll_ucs2_general_ci,
3277     my_strnncollsp_ucs2_general_ci,
3278     my_strnncollsp_nchars_ucs2_general_ci,
3279     my_strnxfrm_ucs2_general_ci,
3280     my_strnxfrmlen_unicode,
3281     my_like_range_generic,
3282     my_wildcmp_ucs2_ci,
3283     my_strcasecmp_mb2_or_mb4,
3284     my_instr_mb,
3285     my_hash_sort_ucs2,
3286     my_propagate_simple
3287 };
3288 
3289 
3290 static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
3291 {
3292     NULL,		/* init */
3293     my_strnncoll_ucs2_bin,
3294     my_strnncollsp_ucs2_bin,
3295     my_strnncollsp_nchars_ucs2_bin,
3296     my_strnxfrm_ucs2_bin,
3297     my_strnxfrmlen_unicode,
3298     my_like_range_generic,
3299     my_wildcmp_ucs2_bin,
3300     my_strcasecmp_mb2_or_mb4,
3301     my_instr_mb,
3302     my_hash_sort_ucs2_bin,
3303     my_propagate_simple
3304 };
3305 
3306 
3307 static MY_COLLATION_HANDLER my_collation_ucs2_general_nopad_ci_handler =
3308 {
3309     NULL,		/* init */
3310     my_strnncoll_ucs2_general_ci,
3311     my_strnncollsp_ucs2_general_nopad_ci,
3312     my_strnncollsp_nchars_ucs2_general_nopad_ci,
3313     my_strnxfrm_nopad_ucs2_general_ci,
3314     my_strnxfrmlen_unicode,
3315     my_like_range_generic,
3316     my_wildcmp_ucs2_ci,
3317     my_strcasecmp_mb2_or_mb4,
3318     my_instr_mb,
3319     my_hash_sort_ucs2_nopad,
3320     my_propagate_simple
3321 };
3322 
3323 
3324 static MY_COLLATION_HANDLER my_collation_ucs2_nopad_bin_handler =
3325 {
3326     NULL,		/* init */
3327     my_strnncoll_ucs2_bin,
3328     my_strnncollsp_ucs2_nopad_bin,
3329     my_strnncollsp_nchars_ucs2_nopad_bin,
3330     my_strnxfrm_nopad_ucs2_bin,
3331     my_strnxfrmlen_unicode,
3332     my_like_range_generic,
3333     my_wildcmp_ucs2_bin,
3334     my_strcasecmp_mb2_or_mb4,
3335     my_instr_mb,
3336     my_hash_sort_ucs2_nopad_bin,
3337     my_propagate_simple
3338 };
3339 
3340 
3341 MY_CHARSET_HANDLER my_charset_ucs2_handler=
3342 {
3343     NULL,		/* init */
3344     my_numchars_ucs2,
3345     my_charpos_ucs2,
3346     my_lengthsp_mb2,
3347     my_numcells_mb,
3348     my_ucs2_uni,	/* mb_wc        */
3349     my_uni_ucs2,	/* wc_mb        */
3350     my_mb_ctype_mb,
3351     my_caseup_str_mb2_or_mb4,
3352     my_casedn_str_mb2_or_mb4,
3353     my_caseup_ucs2,
3354     my_casedn_ucs2,
3355     my_snprintf_mb2,
3356     my_l10tostr_mb2_or_mb4,
3357     my_ll10tostr_mb2_or_mb4,
3358     my_fill_ucs2,
3359     my_strntol_mb2_or_mb4,
3360     my_strntoul_mb2_or_mb4,
3361     my_strntoll_mb2_or_mb4,
3362     my_strntoull_mb2_or_mb4,
3363     my_strntod_mb2_or_mb4,
3364     my_strtoll10_mb2,
3365     my_strntoull10rnd_mb2_or_mb4,
3366     my_scan_mb2,
3367     my_charlen_ucs2,
3368     my_well_formed_char_length_ucs2,
3369     my_copy_fix_mb2_or_mb4,
3370     my_uni_ucs2,
3371     my_wc_to_printable_generic
3372 };
3373 
3374 
3375 struct charset_info_st my_charset_ucs2_general_ci=
3376 {
3377     35,0,0,		/* number       */
3378     MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
3379     charset_name_ucs2,		/* cs name    */
3380     "ucs2_general_ci",	/* name         */
3381     "",			/* comment      */
3382     NULL,		/* tailoring    */
3383     ctype_ucs2,		/* ctype        */
3384     to_lower_ucs2,	/* to_lower     */
3385     to_upper_ucs2,	/* to_upper     */
3386     to_upper_ucs2,	/* sort_order   */
3387     NULL,		/* uca          */
3388     NULL,		/* tab_to_uni   */
3389     NULL,		/* tab_from_uni */
3390     &my_unicase_default,/* caseinfo     */
3391     NULL,		/* state_map    */
3392     NULL,		/* ident_map    */
3393     1,			/* strxfrm_multiply */
3394     1,                  /* caseup_multiply  */
3395     1,                  /* casedn_multiply  */
3396     2,			/* mbminlen     */
3397     2,			/* mbmaxlen     */
3398     0,			/* min_sort_char */
3399     0xFFFF,		/* max_sort_char */
3400     ' ',                /* pad char      */
3401     0,                  /* escape_with_backslash_is_dangerous */
3402     1,                  /* levels_for_order   */
3403     &my_charset_ucs2_handler,
3404     &my_collation_ucs2_general_ci_handler
3405 };
3406 
3407 
3408 struct charset_info_st my_charset_ucs2_general_mysql500_ci=
3409 {
3410   159, 0, 0,                                       /* number           */
3411   MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, /* state */
3412   charset_name_ucs2,                                          /* cs name          */
3413   "ucs2_general_mysql500_ci",                      /* name             */
3414   "",                                              /* comment          */
3415   NULL,                                            /* tailoring        */
3416   ctype_ucs2,                                      /* ctype            */
3417   to_lower_ucs2,                                   /* to_lower         */
3418   to_upper_ucs2,                                   /* to_upper         */
3419   to_upper_ucs2,                                   /* sort_order       */
3420   NULL,                                            /* uca              */
3421   NULL,                                            /* tab_to_uni       */
3422   NULL,                                            /* tab_from_uni     */
3423   &my_unicase_mysql500,                            /* caseinfo         */
3424   NULL,                                            /* state_map        */
3425   NULL,                                            /* ident_map        */
3426   1,                                               /* strxfrm_multiply */
3427   1,                                               /* caseup_multiply  */
3428   1,                                               /* casedn_multiply  */
3429   2,                                               /* mbminlen         */
3430   2,                                               /* mbmaxlen         */
3431   0,                                               /* min_sort_char    */
3432   0xFFFF,                                          /* max_sort_char    */
3433   ' ',                                             /* pad char         */
3434   0,                          /* escape_with_backslash_is_dangerous    */
3435   1,                                               /* levels_for_order   */
3436   &my_charset_ucs2_handler,
3437   &my_collation_ucs2_general_ci_handler
3438 };
3439 
3440 
3441 struct charset_info_st my_charset_ucs2_bin=
3442 {
3443     90,0,0,		/* number       */
3444     MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII,
3445     charset_name_ucs2,		/* cs name    */
3446     "ucs2_bin",		/* name         */
3447     "",			/* comment      */
3448     NULL,		/* tailoring    */
3449     ctype_ucs2,		/* ctype        */
3450     to_lower_ucs2,	/* to_lower     */
3451     to_upper_ucs2,	/* to_upper     */
3452     NULL,		/* sort_order   */
3453     NULL,		/* uca          */
3454     NULL,		/* tab_to_uni   */
3455     NULL,		/* tab_from_uni */
3456     &my_unicase_default,/* caseinfo     */
3457     NULL,		/* state_map    */
3458     NULL,		/* ident_map    */
3459     1,			/* strxfrm_multiply */
3460     1,                  /* caseup_multiply  */
3461     1,                  /* casedn_multiply  */
3462     2,			/* mbminlen     */
3463     2,			/* mbmaxlen     */
3464     0,			/* min_sort_char */
3465     0xFFFF,		/* max_sort_char */
3466     ' ',                /* pad char      */
3467     0,                  /* escape_with_backslash_is_dangerous */
3468     1,                  /* levels_for_order   */
3469     &my_charset_ucs2_handler,
3470     &my_collation_ucs2_bin_handler
3471 };
3472 
3473 
3474 struct charset_info_st my_charset_ucs2_general_nopad_ci=
3475 {
3476     MY_NOPAD_ID(35),0,0,     /* number           */
3477     MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD,
3478     charset_name_ucs2,                  /* cs name          */
3479     "ucs2_general_nopad_ci", /* name             */
3480     "",                      /* comment          */
3481     NULL,                    /* tailoring        */
3482     ctype_ucs2,              /* ctype            */
3483     to_lower_ucs2,           /* to_lower         */
3484     to_upper_ucs2,           /* to_upper         */
3485     to_upper_ucs2,           /* sort_order       */
3486     NULL,                    /* uca              */
3487     NULL,                    /* tab_to_uni       */
3488     NULL,                    /* tab_from_uni     */
3489     &my_unicase_default,     /* caseinfo         */
3490     NULL,                    /* state_map        */
3491     NULL,                    /* ident_map        */
3492     1,                       /* strxfrm_multiply */
3493     1,                       /* caseup_multiply  */
3494     1,                       /* casedn_multiply  */
3495     2,                       /* mbminlen         */
3496     2,                       /* mbmaxlen         */
3497     0,                       /* min_sort_char    */
3498     0xFFFF,                  /* max_sort_char    */
3499     ' ',                     /* pad char         */
3500     0,                       /* escape_with_backslash_is_dangerous */
3501     1,                       /* levels_for_order */
3502     &my_charset_ucs2_handler,
3503     &my_collation_ucs2_general_nopad_ci_handler
3504 };
3505 
3506 
3507 struct charset_info_st my_charset_ucs2_nopad_bin=
3508 {
3509     MY_NOPAD_ID(90),0,0,     /* number           */
3510     MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD,
3511     charset_name_ucs2,                  /* cs name          */
3512     "ucs2_nopad_bin",        /* name             */
3513     "",                      /* comment          */
3514     NULL,                    /* tailoring        */
3515     ctype_ucs2,              /* ctype            */
3516     to_lower_ucs2,           /* to_lower         */
3517     to_upper_ucs2,           /* to_upper         */
3518     NULL,                    /* sort_order       */
3519     NULL,                    /* uca              */
3520     NULL,                    /* tab_to_uni       */
3521     NULL,                    /* tab_from_uni     */
3522     &my_unicase_default,     /* caseinfo         */
3523     NULL,                    /* state_map        */
3524     NULL,                    /* ident_map        */
3525     1,                       /* strxfrm_multiply */
3526     1,                       /* caseup_multiply  */
3527     1,                       /* casedn_multiply  */
3528     2,                       /* mbminlen         */
3529     2,                       /* mbmaxlen         */
3530     0,                       /* min_sort_char    */
3531     0xFFFF,                  /* max_sort_char    */
3532     ' ',                     /* pad char         */
3533     0,                       /* escape_with_backslash_is_dangerous */
3534     1,                       /* levels_for_order */
3535     &my_charset_ucs2_handler,
3536     &my_collation_ucs2_nopad_bin_handler
3537 };
3538 
3539 #endif /* HAVE_CHARSET_ucs2 */
3540