1 /* Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
2
3 This library is free software; you can redistribute it and/or
4 modify it under the terms of the GNU Library General Public
5 License as published by the Free Software Foundation; version 2
6 of the License.
7
8 This library is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 Library General Public License for more details.
12
13 You should have received a copy of the GNU Library General Public
14 License along with this library; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
16
17 /* UCS2 support. Written by Alexander Barkov <bar@mysql.com> */
18
19 #include <my_global.h>
20 #include <my_sys.h>
21 #include "m_string.h"
22 #include "m_ctype.h"
23 #include <errno.h>
24 #include <stdarg.h>
25
26
27 #if defined(HAVE_CHARSET_utf16) || defined(HAVE_CHARSET_ucs2)
28 #define HAVE_CHARSET_mb2
29 #endif
30
31
32 #if defined(HAVE_CHARSET_mb2) || defined(HAVE_CHARSET_utf32)
33 #define HAVE_CHARSET_mb2_or_mb4
34 #endif
35
36
37 #ifndef EILSEQ
38 #define EILSEQ ENOENT
39 #endif
40
41 #undef ULONGLONG_MAX
42 #define ULONGLONG_MAX (~(ulonglong) 0)
43 #define MAX_NEGATIVE_NUMBER ((ulonglong) LL(0x8000000000000000))
44 #define INIT_CNT 9
45 #define LFACTOR ULL(1000000000)
46 #define LFACTOR1 ULL(10000000000)
47 #define LFACTOR2 ULL(100000000000)
48
49 static unsigned long lfactor[9]=
50 { 1L, 10L, 100L, 1000L, 10000L, 100000L, 1000000L, 10000000L, 100000000L };
51
52
53
54 #ifdef HAVE_CHARSET_mb2_or_mb4
55 static inline int
my_bincmp(const uchar * s,const uchar * se,const uchar * t,const uchar * te)56 my_bincmp(const uchar *s, const uchar *se,
57 const uchar *t, const uchar *te)
58 {
59 int slen= (int) (se - s), tlen= (int) (te - t);
60 int len= MY_MIN(slen, tlen);
61 int cmp= memcmp(s, t, len);
62 return cmp ? cmp : slen - tlen;
63 }
64
65
66 static size_t
my_caseup_str_mb2_or_mb4(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),char * s MY_ATTRIBUTE ((unused)))67 my_caseup_str_mb2_or_mb4(const CHARSET_INFO * cs MY_ATTRIBUTE((unused)),
68 char * s MY_ATTRIBUTE((unused)))
69 {
70 DBUG_ASSERT(0);
71 return 0;
72 }
73
74
75 static size_t
my_casedn_str_mb2_or_mb4(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),char * s MY_ATTRIBUTE ((unused)))76 my_casedn_str_mb2_or_mb4(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
77 char * s MY_ATTRIBUTE((unused)))
78 {
79 DBUG_ASSERT(0);
80 return 0;
81 }
82
83
84 static int
my_strcasecmp_mb2_or_mb4(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * s MY_ATTRIBUTE ((unused)),const char * t MY_ATTRIBUTE ((unused)))85 my_strcasecmp_mb2_or_mb4(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
86 const char *s MY_ATTRIBUTE((unused)),
87 const char *t MY_ATTRIBUTE((unused)))
88 {
89 DBUG_ASSERT(0);
90 return 0;
91 }
92
93
94 static long
my_strntol_mb2_or_mb4(const CHARSET_INFO * cs,const char * nptr,size_t l,int base,char ** endptr,int * err)95 my_strntol_mb2_or_mb4(const CHARSET_INFO *cs,
96 const char *nptr, size_t l, int base,
97 char **endptr, int *err)
98 {
99 int negative= 0;
100 int overflow;
101 int cnv;
102 my_wc_t wc;
103 unsigned int cutlim;
104 uint32 cutoff;
105 uint32 res;
106 const uchar *s= (const uchar*) nptr;
107 const uchar *e= (const uchar*) nptr+l;
108 const uchar *save;
109
110 *err= 0;
111 do
112 {
113 if ((cnv= cs->cset->mb_wc(cs, &wc, s, e))>0)
114 {
115 switch (wc)
116 {
117 case ' ' : break;
118 case '\t': break;
119 case '-' : negative= !negative; break;
120 case '+' : break;
121 default : goto bs;
122 }
123 }
124 else /* No more characters or bad multibyte sequence */
125 {
126 if (endptr != NULL )
127 *endptr= (char*) s;
128 err[0]= (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
129 return 0;
130 }
131 s+= cnv;
132 } while (1);
133
134 bs:
135
136 overflow= 0;
137 res= 0;
138 save= s;
139 cutoff= ((uint32)~0L) / (uint32) base;
140 cutlim= (uint) (((uint32)~0L) % (uint32) base);
141
142 do {
143 if ((cnv= cs->cset->mb_wc(cs, &wc, s, e)) > 0)
144 {
145 s+= cnv;
146 if (wc >= '0' && wc <= '9')
147 wc-= '0';
148 else if (wc >= 'A' && wc <= 'Z')
149 wc= wc - 'A' + 10;
150 else if (wc >= 'a' && wc <= 'z')
151 wc= wc - 'a' + 10;
152 else
153 break;
154 if ((int)wc >= base)
155 break;
156 if (res > cutoff || (res == cutoff && wc > cutlim))
157 overflow= 1;
158 else
159 {
160 res*= (uint32) base;
161 res+= wc;
162 }
163 }
164 else if (cnv == MY_CS_ILSEQ)
165 {
166 if (endptr !=NULL )
167 *endptr = (char*) s;
168 err[0]= EILSEQ;
169 return 0;
170 }
171 else
172 {
173 /* No more characters */
174 break;
175 }
176 } while(1);
177
178 if (endptr != NULL)
179 *endptr = (char *) s;
180
181 if (s == save)
182 {
183 err[0]= EDOM;
184 return 0L;
185 }
186
187 if (negative)
188 {
189 if (res > (uint32) INT_MIN32)
190 overflow= 1;
191 }
192 else if (res > INT_MAX32)
193 overflow= 1;
194
195 if (overflow)
196 {
197 err[0]= ERANGE;
198 return negative ? INT_MIN32 : INT_MAX32;
199 }
200
201 return (negative ? -((long) res) : (long) res);
202 }
203
204
205 static ulong
my_strntoul_mb2_or_mb4(const CHARSET_INFO * cs,const char * nptr,size_t l,int base,char ** endptr,int * err)206 my_strntoul_mb2_or_mb4(const CHARSET_INFO *cs,
207 const char *nptr, size_t l, int base,
208 char **endptr, int *err)
209 {
210 int negative= 0;
211 int overflow;
212 int cnv;
213 my_wc_t wc;
214 unsigned int cutlim;
215 uint32 cutoff;
216 uint32 res;
217 const uchar *s= (const uchar*) nptr;
218 const uchar *e= (const uchar*) nptr + l;
219 const uchar *save;
220
221 *err= 0;
222 do
223 {
224 if ((cnv= cs->cset->mb_wc(cs, &wc, s, e)) > 0)
225 {
226 switch (wc)
227 {
228 case ' ' : break;
229 case '\t': break;
230 case '-' : negative= !negative; break;
231 case '+' : break;
232 default : goto bs;
233 }
234 }
235 else /* No more characters or bad multibyte sequence */
236 {
237 if (endptr !=NULL )
238 *endptr= (char*)s;
239 err[0]= (cnv == MY_CS_ILSEQ) ? EILSEQ : EDOM;
240 return 0;
241 }
242 s+= cnv;
243 } while (1);
244
245 bs:
246
247 overflow= 0;
248 res= 0;
249 save= s;
250 cutoff= ((uint32)~0L) / (uint32) base;
251 cutlim= (uint) (((uint32)~0L) % (uint32) base);
252
253 do
254 {
255 if ((cnv= cs->cset->mb_wc(cs, &wc, s, e)) > 0)
256 {
257 s+= cnv;
258 if (wc >= '0' && wc <= '9')
259 wc-= '0';
260 else if (wc >= 'A' && wc <= 'Z')
261 wc= wc - 'A' + 10;
262 else if (wc >= 'a' && wc <= 'z')
263 wc= wc - 'a' + 10;
264 else
265 break;
266 if ((int) wc >= base)
267 break;
268 if (res > cutoff || (res == cutoff && wc > cutlim))
269 overflow = 1;
270 else
271 {
272 res*= (uint32) base;
273 res+= wc;
274 }
275 }
276 else if (cnv == MY_CS_ILSEQ)
277 {
278 if (endptr != NULL )
279 *endptr= (char*)s;
280 err[0]= EILSEQ;
281 return 0;
282 }
283 else
284 {
285 /* No more characters */
286 break;
287 }
288 } while(1);
289
290 if (endptr != NULL)
291 *endptr= (char *) s;
292
293 if (s == save)
294 {
295 err[0]= EDOM;
296 return 0L;
297 }
298
299 if (overflow)
300 {
301 err[0]= (ERANGE);
302 return (~(uint32) 0);
303 }
304
305 return (negative ? -((long) res) : (long) res);
306 }
307
308
309 static longlong
my_strntoll_mb2_or_mb4(const CHARSET_INFO * cs,const char * nptr,size_t l,int base,char ** endptr,int * err)310 my_strntoll_mb2_or_mb4(const CHARSET_INFO *cs,
311 const char *nptr, size_t l, int base,
312 char **endptr, int *err)
313 {
314 int negative=0;
315 int overflow;
316 int cnv;
317 my_wc_t wc;
318 ulonglong cutoff;
319 unsigned int cutlim;
320 ulonglong res;
321 const uchar *s= (const uchar*) nptr;
322 const uchar *e= (const uchar*) nptr+l;
323 const uchar *save;
324
325 *err= 0;
326 do
327 {
328 if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0)
329 {
330 switch (wc)
331 {
332 case ' ' : break;
333 case '\t': break;
334 case '-' : negative= !negative; break;
335 case '+' : break;
336 default : goto bs;
337 }
338 }
339 else /* No more characters or bad multibyte sequence */
340 {
341 if (endptr !=NULL )
342 *endptr = (char*)s;
343 err[0] = (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
344 return 0;
345 }
346 s+=cnv;
347 } while (1);
348
349 bs:
350
351 overflow = 0;
352 res = 0;
353 save = s;
354 cutoff = (~(ulonglong) 0) / (unsigned long int) base;
355 cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
356
357 do {
358 if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0)
359 {
360 s+=cnv;
361 if ( wc>='0' && wc<='9')
362 wc -= '0';
363 else if ( wc>='A' && wc<='Z')
364 wc = wc - 'A' + 10;
365 else if ( wc>='a' && wc<='z')
366 wc = wc - 'a' + 10;
367 else
368 break;
369 if ((int)wc >= base)
370 break;
371 if (res > cutoff || (res == cutoff && wc > cutlim))
372 overflow = 1;
373 else
374 {
375 res *= (ulonglong) base;
376 res += wc;
377 }
378 }
379 else if (cnv==MY_CS_ILSEQ)
380 {
381 if (endptr !=NULL )
382 *endptr = (char*)s;
383 err[0]=EILSEQ;
384 return 0;
385 }
386 else
387 {
388 /* No more characters */
389 break;
390 }
391 } while(1);
392
393 if (endptr != NULL)
394 *endptr = (char *) s;
395
396 if (s == save)
397 {
398 err[0]=EDOM;
399 return 0L;
400 }
401
402 if (negative)
403 {
404 if (res > (ulonglong) LONGLONG_MIN)
405 overflow = 1;
406 }
407 else if (res > (ulonglong) LONGLONG_MAX)
408 overflow = 1;
409
410 if (overflow)
411 {
412 err[0]=ERANGE;
413 return negative ? LONGLONG_MIN : LONGLONG_MAX;
414 }
415
416 return (negative ? -((longlong)res) : (longlong)res);
417 }
418
419
420 static ulonglong
my_strntoull_mb2_or_mb4(const CHARSET_INFO * cs,const char * nptr,size_t l,int base,char ** endptr,int * err)421 my_strntoull_mb2_or_mb4(const CHARSET_INFO *cs,
422 const char *nptr, size_t l, int base,
423 char **endptr, int *err)
424 {
425 int negative= 0;
426 int overflow;
427 int cnv;
428 my_wc_t wc;
429 ulonglong cutoff;
430 unsigned int cutlim;
431 ulonglong res;
432 const uchar *s= (const uchar*) nptr;
433 const uchar *e= (const uchar*) nptr + l;
434 const uchar *save;
435
436 *err= 0;
437 do
438 {
439 if ((cnv= cs->cset->mb_wc(cs,&wc,s,e)) > 0)
440 {
441 switch (wc)
442 {
443 case ' ' : break;
444 case '\t': break;
445 case '-' : negative= !negative; break;
446 case '+' : break;
447 default : goto bs;
448 }
449 }
450 else /* No more characters or bad multibyte sequence */
451 {
452 if (endptr !=NULL )
453 *endptr = (char*)s;
454 err[0]= (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
455 return 0;
456 }
457 s+=cnv;
458 } while (1);
459
460 bs:
461
462 overflow = 0;
463 res = 0;
464 save = s;
465 cutoff = (~(ulonglong) 0) / (unsigned long int) base;
466 cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
467
468 do
469 {
470 if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0)
471 {
472 s+=cnv;
473 if ( wc>='0' && wc<='9')
474 wc -= '0';
475 else if ( wc>='A' && wc<='Z')
476 wc = wc - 'A' + 10;
477 else if ( wc>='a' && wc<='z')
478 wc = wc - 'a' + 10;
479 else
480 break;
481 if ((int)wc >= base)
482 break;
483 if (res > cutoff || (res == cutoff && wc > cutlim))
484 overflow = 1;
485 else
486 {
487 res *= (ulonglong) base;
488 res += wc;
489 }
490 }
491 else if (cnv==MY_CS_ILSEQ)
492 {
493 if (endptr !=NULL )
494 *endptr = (char*)s;
495 err[0]= EILSEQ;
496 return 0;
497 }
498 else
499 {
500 /* No more characters */
501 break;
502 }
503 } while(1);
504
505 if (endptr != NULL)
506 *endptr = (char *) s;
507
508 if (s == save)
509 {
510 err[0]= EDOM;
511 return 0L;
512 }
513
514 if (overflow)
515 {
516 err[0]= ERANGE;
517 return (~(ulonglong) 0);
518 }
519
520 return (negative ? -((longlong) res) : (longlong) res);
521 }
522
523
524 static double
my_strntod_mb2_or_mb4(const CHARSET_INFO * cs,char * nptr,size_t length,char ** endptr,int * err)525 my_strntod_mb2_or_mb4(const CHARSET_INFO *cs,
526 char *nptr, size_t length,
527 char **endptr, int *err)
528 {
529 char buf[256];
530 double res;
531 char *b= buf;
532 const uchar *s= (const uchar*) nptr;
533 const uchar *end;
534 my_wc_t wc;
535 int cnv;
536
537 *err= 0;
538 /* Cut too long strings */
539 if (length >= sizeof(buf))
540 length= sizeof(buf) - 1;
541 end= s + length;
542
543 while ((cnv= cs->cset->mb_wc(cs,&wc,s,end)) > 0)
544 {
545 s+= cnv;
546 if (wc > (int) (uchar) 'e' || !wc)
547 break; /* Can't be part of double */
548 *b++= (char) wc;
549 }
550
551 *endptr= b;
552 res= my_strtod(buf, endptr, err);
553 *endptr= nptr + cs->mbminlen * (size_t) (*endptr - buf);
554 return res;
555 }
556
557
558 static ulonglong
my_strntoull10rnd_mb2_or_mb4(const CHARSET_INFO * cs,const char * nptr,size_t length,int unsign_fl,char ** endptr,int * err)559 my_strntoull10rnd_mb2_or_mb4(const CHARSET_INFO *cs,
560 const char *nptr, size_t length,
561 int unsign_fl,
562 char **endptr, int *err)
563 {
564 char buf[256], *b= buf;
565 ulonglong res;
566 const uchar *end, *s= (const uchar*) nptr;
567 my_wc_t wc;
568 int cnv;
569
570 /* Cut too long strings */
571 if (length >= sizeof(buf))
572 length= sizeof(buf)-1;
573 end= s + length;
574
575 while ((cnv= cs->cset->mb_wc(cs,&wc,s,end)) > 0)
576 {
577 s+= cnv;
578 if (wc > (int) (uchar) 'e' || !wc)
579 break; /* Can't be a number part */
580 *b++= (char) wc;
581 }
582
583 res= my_strntoull10rnd_8bit(cs, buf, b - buf, unsign_fl, endptr, err);
584 *endptr= (char*) nptr + cs->mbminlen * (size_t) (*endptr - buf);
585 return res;
586 }
587
588
589 /*
590 This is a fast version optimized for the case of radix 10 / -10
591 */
592
593 static size_t
my_l10tostr_mb2_or_mb4(const CHARSET_INFO * cs,char * dst,size_t len,int radix,long int val)594 my_l10tostr_mb2_or_mb4(const CHARSET_INFO *cs,
595 char *dst, size_t len, int radix, long int val)
596 {
597 char buffer[66];
598 char *p, *db, *de;
599 long int new_val;
600 int sl= 0;
601 unsigned long int uval = (unsigned long int) val;
602
603 p= &buffer[sizeof(buffer) - 1];
604 *p= '\0';
605
606 if (radix < 0)
607 {
608 if (val < 0)
609 {
610 sl= 1;
611 /* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
612 uval = (unsigned long int)0 - uval;
613 }
614 }
615
616 new_val = (long) (uval / 10);
617 *--p = '0'+ (char) (uval - (unsigned long) new_val * 10);
618 val= new_val;
619
620 while (val != 0)
621 {
622 new_val= val / 10;
623 *--p= '0' + (char) (val - new_val * 10);
624 val= new_val;
625 }
626
627 if (sl)
628 {
629 *--p= '-';
630 }
631
632 for ( db= dst, de= dst + len ; (dst < de) && *p ; p++)
633 {
634 int cnvres= cs->cset->wc_mb(cs,(my_wc_t)p[0],(uchar*) dst, (uchar*) de);
635 if (cnvres > 0)
636 dst+= cnvres;
637 else
638 break;
639 }
640 return (int) (dst - db);
641 }
642
643
644 static size_t
my_ll10tostr_mb2_or_mb4(const CHARSET_INFO * cs,char * dst,size_t len,int radix,longlong val)645 my_ll10tostr_mb2_or_mb4(const CHARSET_INFO *cs,
646 char *dst, size_t len, int radix, longlong val)
647 {
648 char buffer[65];
649 char *p, *db, *de;
650 long long_val;
651 int sl= 0;
652 ulonglong uval= (ulonglong) val;
653
654 if (radix < 0)
655 {
656 if (val < 0)
657 {
658 sl= 1;
659 /* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
660 uval = (ulonglong)0 - uval;
661 }
662 }
663
664 p= &buffer[sizeof(buffer)-1];
665 *p='\0';
666
667 if (uval == 0)
668 {
669 *--p= '0';
670 goto cnv;
671 }
672
673 while (uval > (ulonglong) LONG_MAX)
674 {
675 ulonglong quo= uval/(uint) 10;
676 uint rem= (uint) (uval- quo* (uint) 10);
677 *--p= '0' + rem;
678 uval= quo;
679 }
680
681 long_val= (long) uval;
682 while (long_val != 0)
683 {
684 long quo= long_val/10;
685 *--p= (char) ('0' + (long_val - quo*10));
686 long_val= quo;
687 }
688
689 cnv:
690 if (sl)
691 {
692 *--p= '-';
693 }
694
695 for ( db= dst, de= dst + len ; (dst < de) && *p ; p++)
696 {
697 int cnvres= cs->cset->wc_mb(cs, (my_wc_t) p[0], (uchar*) dst, (uchar*) de);
698 if (cnvres > 0)
699 dst+= cnvres;
700 else
701 break;
702 }
703 return (int) (dst -db);
704 }
705
706 #endif /* HAVE_CHARSET_mb2_or_mb4 */
707
708
709 #ifdef HAVE_CHARSET_mb2
710 static longlong
my_strtoll10_mb2(const CHARSET_INFO * cs,const char * nptr,char ** endptr,int * error)711 my_strtoll10_mb2(const CHARSET_INFO *cs,
712 const char *nptr, char **endptr, int *error)
713 {
714 const char *s, *end, *start, *n_end, *true_end;
715 uchar c;
716 unsigned long i, j, k;
717 ulonglong li;
718 int negative;
719 ulong cutoff, cutoff2, cutoff3;
720 my_wc_t wc;
721 int res;
722
723 s= nptr;
724 /* If fixed length string */
725 if (endptr)
726 {
727 /*
728 Make sure string length is even.
729 Odd length indicates a bug in the caller.
730 Assert in debug, round in production.
731 */
732 DBUG_ASSERT((*endptr - s) % 2 == 0);
733 end= s + ((*endptr - s) / 2) * 2;
734
735 for ( ; ; ) /* Skip leading spaces and tabs */
736 {
737 res= cs->cset->mb_wc(cs, &wc, (const uchar *) s, (const uchar *) end);
738 if (res <= 0)
739 goto no_conv;
740 s+= res;
741 if (wc != ' ' && wc != '\t')
742 break;
743 }
744 }
745 else
746 {
747 /* We don't support null terminated strings in UCS2 */
748 goto no_conv;
749 }
750
751 /* Check for a sign. */
752 negative= 0;
753 if (wc == '-')
754 {
755 *error= -1; /* Mark as negative number */
756 negative= 1;
757 res= cs->cset->mb_wc(cs, &wc, (const uchar *) s, (const uchar *) end);
758 if (res < 0)
759 goto no_conv;
760 s+= res;
761 cutoff= MAX_NEGATIVE_NUMBER / LFACTOR2;
762 cutoff2= (MAX_NEGATIVE_NUMBER % LFACTOR2) / 100;
763 cutoff3= MAX_NEGATIVE_NUMBER % 100;
764 }
765 else
766 {
767 *error= 0;
768 if (wc == '+')
769 {
770 res= cs->cset->mb_wc(cs, &wc, (const uchar *) s, (const uchar *) end);
771 if (res < 0)
772 goto no_conv;
773 s+= res;
774 }
775 cutoff= ULONGLONG_MAX / LFACTOR2;
776 cutoff2= ULONGLONG_MAX % LFACTOR2 / 100;
777 cutoff3= ULONGLONG_MAX % 100;
778 }
779
780
781 /* Handle case where we have a lot of pre-zero */
782 if (wc == '0')
783 {
784 i= 0;
785 for ( ; ; s+= res)
786 {
787 if (s == end)
788 goto end_i; /* Return 0 */
789 res= cs->cset->mb_wc(cs, &wc, (const uchar *) s, (const uchar *) end);
790 if (res < 0)
791 goto no_conv;
792 if (wc != '0')
793 break;
794 }
795 while (wc == '0');
796 n_end= s + 2 * INIT_CNT;
797 }
798 else
799 {
800 /* Read first digit to check that it's a valid number */
801 if ((c= (wc - '0')) > 9)
802 goto no_conv;
803 i= c;
804 n_end= s + 2 * (INIT_CNT-1);
805 }
806
807 /* Handle first 9 digits and store them in i */
808 if (n_end > end)
809 n_end= end;
810 for ( ; ; )
811 {
812 res= cs->cset->mb_wc(cs, &wc, (const uchar *) s, (const uchar *) n_end);
813 if (res < 0)
814 break;
815 s+= res;
816 if ((c= (wc - '0')) > 9)
817 goto end_i;
818 i= i*10+c;
819 }
820 if (s == end)
821 goto end_i;
822
823 /* Handle next 9 digits and store them in j */
824 j= 0;
825 start= s; /* Used to know how much to shift i */
826 n_end= true_end= s + 2 * INIT_CNT;
827 if (n_end > end)
828 n_end= end;
829 do
830 {
831 res= cs->cset->mb_wc(cs, &wc, (const uchar *) s, (const uchar *) end);
832 if (res < 0)
833 goto no_conv;
834 s+= res;
835 if ((c= (wc - '0')) > 9)
836 goto end_i_and_j;
837 j= j*10+c;
838 } while (s != n_end);
839 if (s == end)
840 {
841 if (s != true_end)
842 goto end_i_and_j;
843 goto end3;
844 }
845 res= cs->cset->mb_wc(cs, &wc, (const uchar *) s, (const uchar *) end);
846 if (res < 0)
847 goto no_conv;
848 s+= res;
849 if ((c= (wc - '0')) > 9)
850 goto end3;
851
852 /* Handle the next 1 or 2 digits and store them in k */
853 k=c;
854 if (s == end)
855 goto end4;
856 res= cs->cset->mb_wc(cs, &wc, (const uchar *) s, (const uchar *) end);
857 if (res < 0)
858 goto no_conv;
859 s+= res;
860 if ((c= (wc - '0')) > 9)
861 goto end4;
862 k= k*10+c;
863 *endptr= (char*) s;
864
865 /* number string should have ended here */
866 if (s != end && (c= (wc - '0')) <= 9)
867 goto overflow;
868
869 /* Check that we didn't get an overflow with the last digit */
870 if (i > cutoff || (i == cutoff && ((j > cutoff2 || j == cutoff2) &&
871 k > cutoff3)))
872 goto overflow;
873 li=i*LFACTOR2+ (ulonglong) j*100 + k;
874 return (longlong) li;
875
876 overflow: /* *endptr is set here */
877 *error= MY_ERRNO_ERANGE;
878 return negative ? LONGLONG_MIN : (longlong) ULONGLONG_MAX;
879
880 end_i:
881 *endptr= (char*) s;
882 return (negative ? ((longlong) -(long) i) : (longlong) i);
883
884 end_i_and_j:
885 li= (ulonglong) i * lfactor[(size_t) (s-start) / 2] + j;
886 *endptr= (char*) s;
887 return (negative ? -((longlong) li) : (longlong) li);
888
889 end3:
890 li=(ulonglong) i*LFACTOR+ (ulonglong) j;
891 *endptr= (char*) s;
892 return (negative ? -((longlong) li) : (longlong) li);
893
894 end4:
895 li=(ulonglong) i*LFACTOR1+ (ulonglong) j * 10 + k;
896 *endptr= (char*) s;
897 if (negative)
898 {
899 if (li > MAX_NEGATIVE_NUMBER)
900 goto overflow;
901 return -((longlong) li);
902 }
903 return (longlong) li;
904
905 no_conv:
906 /* There was no number to convert. */
907 *error= MY_ERRNO_EDOM;
908 *endptr= (char *) nptr;
909 return 0;
910 }
911
912
913 static size_t
my_scan_mb2(const CHARSET_INFO * cs,const char * str,const char * end,int sequence_type)914 my_scan_mb2(const CHARSET_INFO *cs,
915 const char *str, const char *end, int sequence_type)
916 {
917 const char *str0= str;
918 my_wc_t wc;
919 int res;
920
921 switch (sequence_type)
922 {
923 case MY_SEQ_SPACES:
924 for (res= cs->cset->mb_wc(cs, &wc,
925 (const uchar *) str, (const uchar *) end);
926 res > 0 && wc == ' ';
927 str+= res,
928 res= cs->cset->mb_wc(cs, &wc,
929 (const uchar *) str, (const uchar *) end))
930 {
931 }
932 return (size_t) (str - str0);
933 default:
934 return 0;
935 }
936 }
937
938
939 static void
my_fill_mb2(const CHARSET_INFO * cs,char * s,size_t slen,int fill)940 my_fill_mb2(const CHARSET_INFO *cs, char *s, size_t slen, int fill)
941 {
942 char buf[10];
943 int buflen;
944
945 DBUG_ASSERT((slen % 2) == 0);
946
947 buflen= cs->cset->wc_mb(cs, (my_wc_t) fill, (uchar*) buf,
948 (uchar*) buf + sizeof(buf));
949
950 DBUG_ASSERT(buflen > 0);
951
952 while (slen >= (size_t) buflen)
953 {
954 /* Enough space for the characer */
955 memcpy(s, buf, (size_t) buflen);
956 s+= buflen;
957 slen-= buflen;
958 }
959
960 /*
961 If there are some more space which is not enough
962 for the whole multibyte character, then add trailing zeros.
963 */
964 for ( ; slen; slen--)
965 {
966 *s++= 0x00;
967 }
968 }
969
970
971 static int
my_vsnprintf_mb2(char * dst,size_t n,const char * fmt,va_list ap)972 my_vsnprintf_mb2(char *dst, size_t n, const char* fmt, va_list ap)
973 {
974 char *start=dst, *end= dst + n - 1;
975 for (; *fmt ; fmt++)
976 {
977 if (fmt[0] != '%')
978 {
979 if (dst == end) /* End of buffer */
980 break;
981
982 *dst++='\0';
983 *dst++= *fmt; /* Copy ordinary char */
984 continue;
985 }
986
987 fmt++;
988
989 /* Skip if max size is used (to be compatible with printf) */
990 while ( (*fmt >= '0' && *fmt <= '9') || *fmt == '.' || *fmt == '-')
991 fmt++;
992
993 if (*fmt == 'l')
994 fmt++;
995
996 if (*fmt == 's') /* String parameter */
997 {
998 char *par= va_arg(ap, char *);
999 size_t plen;
1000 size_t left_len= (size_t)(end-dst);
1001 if (!par)
1002 par= (char*) "(null)";
1003 plen= strlen(par);
1004 if (left_len <= plen * 2)
1005 plen = left_len / 2 - 1;
1006
1007 for ( ; plen ; plen--, dst+=2, par++)
1008 {
1009 dst[0]= '\0';
1010 dst[1]= par[0];
1011 }
1012 continue;
1013 }
1014 else if (*fmt == 'd' || *fmt == 'u') /* Integer parameter */
1015 {
1016 int iarg;
1017 char nbuf[16];
1018 char *pbuf= nbuf;
1019
1020 if ((size_t) (end - dst) < 32)
1021 break;
1022 iarg= va_arg(ap, int);
1023 if (*fmt == 'd')
1024 int10_to_str((long) iarg, nbuf, -10);
1025 else
1026 int10_to_str((long) (uint) iarg, nbuf,10);
1027
1028 for (; pbuf[0]; pbuf++)
1029 {
1030 *dst++= '\0';
1031 *dst++= *pbuf;
1032 }
1033 continue;
1034 }
1035
1036 /* We come here on '%%', unknown code or too long parameter */
1037 if (dst == end)
1038 break;
1039 *dst++= '\0';
1040 *dst++= '%'; /* % used as % or unknown code */
1041 }
1042
1043 DBUG_ASSERT(dst <= end);
1044 *dst='\0'; /* End of errmessage */
1045 return (size_t) (dst - start);
1046 }
1047
1048
1049 static size_t
my_snprintf_mb2(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),char * to,size_t n,const char * fmt,...)1050 my_snprintf_mb2(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1051 char* to, size_t n, const char* fmt, ...)
1052 {
1053 size_t retval;
1054 va_list args;
1055 va_start(args,fmt);
1056 retval= my_vsnprintf_mb2(to, n, fmt, args);
1057 va_end(args);
1058 return retval;
1059 }
1060
1061
1062 static size_t
my_lengthsp_mb2(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * ptr,size_t length)1063 my_lengthsp_mb2(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1064 const char *ptr, size_t length)
1065 {
1066 const char *end= ptr + length;
1067 while (end > ptr + 1 && end[-1] == ' ' && end[-2] == '\0')
1068 end-= 2;
1069 return (size_t) (end - ptr);
1070 }
1071
1072 #endif /* HAVE_CHARSET_mb2*/
1073
1074
1075
1076
1077 #ifdef HAVE_CHARSET_utf16
1078
1079 /*
1080 D800..DB7F - Non-provate surrogate high (896 pages)
1081 DB80..DBFF - Private surrogate high (128 pages)
1082 DC00..DFFF - Surrogate low (1024 codes in a page)
1083 */
1084 #define MY_UTF16_SURROGATE_HIGH_FIRST 0xD800
1085 #define MY_UTF16_SURROGATE_HIGH_LAST 0xDBFF
1086 #define MY_UTF16_SURROGATE_LOW_FIRST 0xDC00
1087 #define MY_UTF16_SURROGATE_LOW_LAST 0xDFFF
1088
1089 #define MY_UTF16_HIGH_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xD8)
1090 #define MY_UTF16_LOW_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xDC)
1091 #define MY_UTF16_SURROGATE(x) (((x) & 0xF800) == 0xD800)
1092
1093 #define MY_UTF16_WC2(a, b) ((a << 8) + b)
1094
1095 /*
1096 a= 110110?? (<< 18)
1097 b= ???????? (<< 10)
1098 c= 110111?? (<< 8)
1099 d= ???????? (<< 0)
1100 */
1101 #define MY_UTF16_WC4(a, b, c, d) (((a & 3) << 18) + (b << 10) + \
1102 ((c & 3) << 8) + d + 0x10000)
1103
1104 static int
my_utf16_uni(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),my_wc_t * pwc,const uchar * s,const uchar * e)1105 my_utf16_uni(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1106 my_wc_t *pwc, const uchar *s, const uchar *e)
1107 {
1108 if (s + 2 > e)
1109 return MY_CS_TOOSMALL2;
1110
1111 /*
1112 High bytes: 0xD[89AB] = B'110110??'
1113 Low bytes: 0xD[CDEF] = B'110111??'
1114 Surrogate mask: 0xFC = B'11111100'
1115 */
1116
1117 if (MY_UTF16_HIGH_HEAD(*s)) /* Surrogate head */
1118 {
1119 if (s + 4 > e)
1120 return MY_CS_TOOSMALL4;
1121
1122 if (!MY_UTF16_LOW_HEAD(s[2])) /* Broken surrigate pair */
1123 return MY_CS_ILSEQ;
1124
1125 *pwc= MY_UTF16_WC4(s[0], s[1], s[2], s[3]);
1126 return 4;
1127 }
1128
1129 if (MY_UTF16_LOW_HEAD(*s)) /* Low surrogate part without high part */
1130 return MY_CS_ILSEQ;
1131
1132 *pwc= MY_UTF16_WC2(s[0], s[1]);
1133 return 2;
1134 }
1135
1136
1137 static int
my_uni_utf16(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),my_wc_t wc,uchar * s,uchar * e)1138 my_uni_utf16(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1139 my_wc_t wc, uchar *s, uchar *e)
1140 {
1141 if (wc <= 0xFFFF)
1142 {
1143 if (s + 2 > e)
1144 return MY_CS_TOOSMALL2;
1145 if (MY_UTF16_SURROGATE(wc))
1146 return MY_CS_ILUNI;
1147 *s++= (uchar) (wc >> 8);
1148 *s= (uchar) (wc & 0xFF);
1149 return 2;
1150 }
1151
1152 if (wc <= 0x10FFFF)
1153 {
1154 if (s + 4 > e)
1155 return MY_CS_TOOSMALL4;
1156 *s++= (uchar) ((wc-= 0x10000) >> 18) | 0xD8;
1157 *s++= (uchar) (wc >> 10) & 0xFF;
1158 *s++= (uchar) ((wc >> 8) & 3) | 0xDC;
1159 *s= (uchar) wc & 0xFF;
1160 return 4;
1161 }
1162
1163 return MY_CS_ILUNI;
1164 }
1165
1166
1167 static inline void
my_tolower_utf16(MY_UNICASE_INFO * uni_plane,my_wc_t * wc)1168 my_tolower_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
1169 {
1170 MY_UNICASE_CHARACTER *page;
1171 if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
1172 *wc= page[*wc & 0xFF].tolower;
1173 }
1174
1175
1176 static inline void
my_toupper_utf16(MY_UNICASE_INFO * uni_plane,my_wc_t * wc)1177 my_toupper_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
1178 {
1179 MY_UNICASE_CHARACTER *page;
1180 if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
1181 *wc= page[*wc & 0xFF].toupper;
1182 }
1183
1184
1185 static inline void
my_tosort_utf16(MY_UNICASE_INFO * uni_plane,my_wc_t * wc)1186 my_tosort_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
1187 {
1188 if (*wc <= uni_plane->maxchar)
1189 {
1190 MY_UNICASE_CHARACTER *page;
1191 if ((page= uni_plane->page[*wc >> 8]))
1192 *wc= page[*wc & 0xFF].sort;
1193 }
1194 else
1195 {
1196 *wc= MY_CS_REPLACEMENT_CHARACTER;
1197 }
1198 }
1199
1200
1201
1202 static size_t
my_caseup_utf16(const CHARSET_INFO * cs,char * src,size_t srclen,char * dst MY_ATTRIBUTE ((unused)),size_t dstlen MY_ATTRIBUTE ((unused)))1203 my_caseup_utf16(const CHARSET_INFO *cs, char *src, size_t srclen,
1204 char *dst MY_ATTRIBUTE((unused)),
1205 size_t dstlen MY_ATTRIBUTE((unused)))
1206 {
1207 my_wc_t wc;
1208 int res;
1209 char *srcend= src + srclen;
1210 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
1211 DBUG_ASSERT(src == dst && srclen == dstlen);
1212
1213 while ((src < srcend) &&
1214 (res= cs->cset->mb_wc(cs, &wc, (uchar *) src, (uchar *) srcend)) > 0)
1215 {
1216 my_toupper_utf16(uni_plane, &wc);
1217 if (res != cs->cset->wc_mb(cs, wc, (uchar *) src, (uchar *) srcend))
1218 break;
1219 src+= res;
1220 }
1221 return srclen;
1222 }
1223
1224
1225 static void
my_hash_sort_utf16(const CHARSET_INFO * cs,const uchar * s,size_t slen,ulong * n1,ulong * n2)1226 my_hash_sort_utf16(const CHARSET_INFO *cs, const uchar *s, size_t slen,
1227 ulong *n1, ulong *n2)
1228 {
1229 my_wc_t wc;
1230 int res;
1231 const uchar *e= s + cs->cset->lengthsp(cs, (const char *) s, slen);
1232 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
1233
1234 while ((s < e) && (res= cs->cset->mb_wc(cs, &wc,
1235 (uchar *) s, (uchar *) e)) > 0)
1236 {
1237 my_tosort_utf16(uni_plane, &wc);
1238 n1[0]^= (((n1[0] & 63) + n2[0]) * (wc & 0xFF)) + (n1[0] << 8);
1239 n2[0]+= 3;
1240 n1[0]^= (((n1[0] & 63) + n2[0]) * (wc >> 8)) + (n1[0] << 8);
1241 n2[0]+= 3;
1242 s+= res;
1243 }
1244 }
1245
1246
1247 static size_t
my_casedn_utf16(const CHARSET_INFO * cs,char * src,size_t srclen,char * dst MY_ATTRIBUTE ((unused)),size_t dstlen MY_ATTRIBUTE ((unused)))1248 my_casedn_utf16(const CHARSET_INFO *cs, char *src, size_t srclen,
1249 char *dst MY_ATTRIBUTE((unused)),
1250 size_t dstlen MY_ATTRIBUTE((unused)))
1251 {
1252 my_wc_t wc;
1253 int res;
1254 char *srcend= src + srclen;
1255 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
1256 DBUG_ASSERT(src == dst && srclen == dstlen);
1257
1258 while ((src < srcend) &&
1259 (res= cs->cset->mb_wc(cs, &wc, (uchar *) src, (uchar *) srcend)) > 0)
1260 {
1261 my_tolower_utf16(uni_plane, &wc);
1262 if (res != cs->cset->wc_mb(cs, wc, (uchar *) src, (uchar *) srcend))
1263 break;
1264 src+= res;
1265 }
1266 return srclen;
1267 }
1268
1269
1270 static int
my_strnncoll_utf16(const CHARSET_INFO * cs,const uchar * s,size_t slen,const uchar * t,size_t tlen,my_bool t_is_prefix)1271 my_strnncoll_utf16(const CHARSET_INFO *cs,
1272 const uchar *s, size_t slen,
1273 const uchar *t, size_t tlen,
1274 my_bool t_is_prefix)
1275 {
1276 int s_res, t_res;
1277 my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
1278 const uchar *se= s + slen;
1279 const uchar *te= t + tlen;
1280 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
1281
1282 while (s < se && t < te)
1283 {
1284 s_res= cs->cset->mb_wc(cs, &s_wc, s, se);
1285 t_res= cs->cset->mb_wc(cs, &t_wc, t, te);
1286
1287 if (s_res <= 0 || t_res <= 0)
1288 {
1289 /* Incorrect string, compare by char value */
1290 return my_bincmp(s, se, t, te);
1291 }
1292
1293 my_tosort_utf16(uni_plane, &s_wc);
1294 my_tosort_utf16(uni_plane, &t_wc);
1295
1296 if (s_wc != t_wc)
1297 {
1298 return s_wc > t_wc ? 1 : -1;
1299 }
1300
1301 s+= s_res;
1302 t+= t_res;
1303 }
1304 return (int) (t_is_prefix ? (t - te) : ((se - s) - (te - t)));
1305 }
1306
1307
1308 /**
1309 Compare strings, discarding end space
1310
1311 If one string is shorter as the other, then we space extend the other
1312 so that the strings have equal length.
1313
1314 This will ensure that the following things hold:
1315
1316 "a" == "a "
1317 "a\0" < "a"
1318 "a\0" < "a "
1319
1320 @param cs Character set pinter.
1321 @param a First string to compare.
1322 @param a_length Length of 'a'.
1323 @param b Second string to compare.
1324 @param b_length Length of 'b'.
1325
1326 IMPLEMENTATION
1327
1328 @return Comparison result.
1329 @retval Negative number, if a less than b.
1330 @retval 0, if a is equal to b
1331 @retval Positive number, if a > b
1332 */
1333
1334 static int
my_strnncollsp_utf16(const CHARSET_INFO * cs,const uchar * s,size_t slen,const uchar * t,size_t tlen,my_bool diff_if_only_endspace_difference)1335 my_strnncollsp_utf16(const CHARSET_INFO *cs,
1336 const uchar *s, size_t slen,
1337 const uchar *t, size_t tlen,
1338 my_bool diff_if_only_endspace_difference)
1339 {
1340 int res;
1341 my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
1342 const uchar *se= s + slen, *te= t + tlen;
1343 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
1344
1345 DBUG_ASSERT((slen % 2) == 0);
1346 DBUG_ASSERT((tlen % 2) == 0);
1347
1348 #ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
1349 diff_if_only_endspace_difference= FALSE;
1350 #endif
1351
1352 while (s < se && t < te)
1353 {
1354 int s_res= cs->cset->mb_wc(cs, &s_wc, s, se);
1355 int t_res= cs->cset->mb_wc(cs, &t_wc, t, te);
1356
1357 if (s_res <= 0 || t_res <= 0)
1358 {
1359 /* Incorrect string, compare bytewise */
1360 return my_bincmp(s, se, t, te);
1361 }
1362
1363 my_tosort_utf16(uni_plane, &s_wc);
1364 my_tosort_utf16(uni_plane, &t_wc);
1365
1366 if (s_wc != t_wc)
1367 {
1368 return s_wc > t_wc ? 1 : -1;
1369 }
1370
1371 s+= s_res;
1372 t+= t_res;
1373 }
1374
1375 slen= (size_t) (se - s);
1376 tlen= (size_t) (te - t);
1377 res= 0;
1378
1379 if (slen != tlen)
1380 {
1381 int s_res, swap= 1;
1382 if (diff_if_only_endspace_difference)
1383 res= 1; /* Assume 's' is bigger */
1384 if (slen < tlen)
1385 {
1386 slen= tlen;
1387 s= t;
1388 se= te;
1389 swap= -1;
1390 res= -res;
1391 }
1392
1393 for ( ; s < se; s+= s_res)
1394 {
1395 if ((s_res= cs->cset->mb_wc(cs, &s_wc, s, se)) < 0)
1396 {
1397 DBUG_ASSERT(0);
1398 return 0;
1399 }
1400 if (s_wc != ' ')
1401 return (s_wc < ' ') ? -swap : swap;
1402 }
1403 }
1404 return res;
1405 }
1406
1407
1408 static uint
my_ismbchar_utf16(const CHARSET_INFO * cs,const char * b,const char * e)1409 my_ismbchar_utf16(const CHARSET_INFO *cs, const char *b, const char *e)
1410 {
1411 my_wc_t wc;
1412 int res= cs->cset->mb_wc(cs, &wc, (const uchar *) b, (const uchar *) e);
1413 return (uint) (res > 0 ? res : 0);
1414 }
1415
1416
1417 static uint
my_mbcharlen_utf16(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),uint c MY_ATTRIBUTE ((unused)))1418 my_mbcharlen_utf16(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1419 uint c MY_ATTRIBUTE((unused)))
1420 {
1421 DBUG_ASSERT(0);
1422 return MY_UTF16_HIGH_HEAD(c) ? 4 : 2;
1423 }
1424
1425
1426 static size_t
my_numchars_utf16(const CHARSET_INFO * cs,const char * b,const char * e)1427 my_numchars_utf16(const CHARSET_INFO *cs,
1428 const char *b, const char *e)
1429 {
1430 size_t nchars= 0;
1431 for ( ; ; nchars++)
1432 {
1433 size_t charlen= my_ismbchar_utf16(cs, b, e);
1434 if (!charlen)
1435 break;
1436 b+= charlen;
1437 }
1438 return nchars;
1439 }
1440
1441
1442 static size_t
my_charpos_utf16(const CHARSET_INFO * cs,const char * b,const char * e,size_t pos)1443 my_charpos_utf16(const CHARSET_INFO *cs,
1444 const char *b, const char *e, size_t pos)
1445 {
1446 const char *b0= b;
1447 uint charlen;
1448
1449 for ( ; pos; b+= charlen, pos--)
1450 {
1451 if (!(charlen= my_ismbchar(cs, b, e)))
1452 return (e + 2 - b0); /* Error, return pos outside the string */
1453 }
1454 return (size_t) (pos ? (e + 2 - b0) : (b - b0));
1455 }
1456
1457
1458 static size_t
my_well_formed_len_utf16(const CHARSET_INFO * cs,const char * b,const char * e,size_t nchars,int * error)1459 my_well_formed_len_utf16(const CHARSET_INFO *cs,
1460 const char *b, const char *e,
1461 size_t nchars, int *error)
1462 {
1463 const char *b0= b;
1464 uint charlen;
1465 *error= 0;
1466
1467 for ( ; nchars; b+= charlen, nchars--)
1468 {
1469 if (!(charlen= my_ismbchar(cs, b, e)))
1470 {
1471 *error= b < e ? 1 : 0;
1472 break;
1473 }
1474 }
1475 return (size_t) (b - b0);
1476 }
1477
1478
1479 static int
my_wildcmp_utf16_ci(const CHARSET_INFO * cs,const char * str,const char * str_end,const char * wildstr,const char * wildend,int escape,int w_one,int w_many)1480 my_wildcmp_utf16_ci(const CHARSET_INFO *cs,
1481 const char *str,const char *str_end,
1482 const char *wildstr,const char *wildend,
1483 int escape, int w_one, int w_many)
1484 {
1485 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
1486 return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
1487 escape, w_one, w_many, uni_plane);
1488 }
1489
1490
1491 static int
my_wildcmp_utf16_bin(const CHARSET_INFO * cs,const char * str,const char * str_end,const char * wildstr,const char * wildend,int escape,int w_one,int w_many)1492 my_wildcmp_utf16_bin(const CHARSET_INFO *cs,
1493 const char *str,const char *str_end,
1494 const char *wildstr,const char *wildend,
1495 int escape, int w_one, int w_many)
1496 {
1497 return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
1498 escape, w_one, w_many, NULL);
1499 }
1500
1501
1502 static int
my_strnncoll_utf16_bin(const CHARSET_INFO * cs,const uchar * s,size_t slen,const uchar * t,size_t tlen,my_bool t_is_prefix)1503 my_strnncoll_utf16_bin(const CHARSET_INFO *cs,
1504 const uchar *s, size_t slen,
1505 const uchar *t, size_t tlen,
1506 my_bool t_is_prefix)
1507 {
1508 int s_res,t_res;
1509 my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
1510 const uchar *se=s+slen;
1511 const uchar *te=t+tlen;
1512
1513 while ( s < se && t < te )
1514 {
1515 s_res= cs->cset->mb_wc(cs, &s_wc, s, se);
1516 t_res= cs->cset->mb_wc(cs, &t_wc, t, te);
1517
1518 if (s_res <= 0 || t_res <= 0)
1519 {
1520 /* Incorrect string, compare by char value */
1521 return my_bincmp(s, se, t, te);
1522 }
1523 if (s_wc != t_wc)
1524 {
1525 return s_wc > t_wc ? 1 : -1;
1526 }
1527
1528 s+= s_res;
1529 t+= t_res;
1530 }
1531 return (int) (t_is_prefix ? (t - te) : ((se - s) - (te - t)));
1532 }
1533
1534
1535 static int
my_strnncollsp_utf16_bin(const CHARSET_INFO * cs,const uchar * s,size_t slen,const uchar * t,size_t tlen,my_bool diff_if_only_endspace_difference)1536 my_strnncollsp_utf16_bin(const CHARSET_INFO *cs,
1537 const uchar *s, size_t slen,
1538 const uchar *t, size_t tlen,
1539 my_bool diff_if_only_endspace_difference)
1540 {
1541 int res;
1542 my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
1543 const uchar *se= s + slen, *te= t + tlen;
1544
1545 DBUG_ASSERT((slen % 2) == 0);
1546 DBUG_ASSERT((tlen % 2) == 0);
1547
1548 #ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
1549 diff_if_only_endspace_difference= FALSE;
1550 #endif
1551
1552 while (s < se && t < te)
1553 {
1554 int s_res= cs->cset->mb_wc(cs, &s_wc, s, se);
1555 int t_res= cs->cset->mb_wc(cs, &t_wc, t, te);
1556
1557 if (s_res <= 0 || t_res <= 0)
1558 {
1559 /* Incorrect string, compare bytewise */
1560 return my_bincmp(s, se, t, te);
1561 }
1562
1563 if (s_wc != t_wc)
1564 {
1565 return s_wc > t_wc ? 1 : -1;
1566 }
1567
1568 s+= s_res;
1569 t+= t_res;
1570 }
1571
1572 slen= (size_t) (se - s);
1573 tlen= (size_t) (te - t);
1574 res= 0;
1575
1576 if (slen != tlen)
1577 {
1578 int s_res, swap= 1;
1579 if (diff_if_only_endspace_difference)
1580 res= 1; /* Assume 's' is bigger */
1581 if (slen < tlen)
1582 {
1583 slen= tlen;
1584 s= t;
1585 se= te;
1586 swap= -1;
1587 res= -res;
1588 }
1589
1590 for ( ; s < se; s+= s_res)
1591 {
1592 if ((s_res= cs->cset->mb_wc(cs, &s_wc, s, se)) < 0)
1593 {
1594 DBUG_ASSERT(0);
1595 return 0;
1596 }
1597 if (s_wc != ' ')
1598 return (s_wc < ' ') ? -swap : swap;
1599 }
1600 }
1601 return res;
1602 }
1603
1604
1605 static void
my_hash_sort_utf16_bin(const CHARSET_INFO * cs,const uchar * pos,size_t len,ulong * nr1,ulong * nr2)1606 my_hash_sort_utf16_bin(const CHARSET_INFO *cs,
1607 const uchar *pos, size_t len, ulong *nr1, ulong *nr2)
1608 {
1609 const uchar *end= pos + cs->cset->lengthsp(cs, (const char *) pos, len);
1610 for ( ; pos < end ; pos++)
1611 {
1612 nr1[0]^= (ulong) ((((uint) nr1[0] & 63) + nr2[0]) *
1613 ((uint)*pos)) + (nr1[0] << 8);
1614 nr2[0]+= 3;
1615 }
1616 }
1617
1618
1619 static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler =
1620 {
1621 NULL, /* init */
1622 my_strnncoll_utf16,
1623 my_strnncollsp_utf16,
1624 my_strnxfrm_unicode,
1625 my_strnxfrmlen_simple,
1626 my_like_range_generic,
1627 my_wildcmp_utf16_ci,
1628 my_strcasecmp_mb2_or_mb4,
1629 my_instr_mb,
1630 my_hash_sort_utf16,
1631 my_propagate_simple
1632 };
1633
1634
1635 static MY_COLLATION_HANDLER my_collation_utf16_bin_handler =
1636 {
1637 NULL, /* init */
1638 my_strnncoll_utf16_bin,
1639 my_strnncollsp_utf16_bin,
1640 my_strnxfrm_unicode_full_bin,
1641 my_strnxfrmlen_unicode_full_bin,
1642 my_like_range_generic,
1643 my_wildcmp_utf16_bin,
1644 my_strcasecmp_mb2_or_mb4,
1645 my_instr_mb,
1646 my_hash_sort_utf16_bin,
1647 my_propagate_simple
1648 };
1649
1650
1651 MY_CHARSET_HANDLER my_charset_utf16_handler=
1652 {
1653 NULL, /* init */
1654 my_ismbchar_utf16, /* ismbchar */
1655 my_mbcharlen_utf16, /* mbcharlen */
1656 my_numchars_utf16,
1657 my_charpos_utf16,
1658 my_well_formed_len_utf16,
1659 my_lengthsp_mb2,
1660 my_numcells_mb,
1661 my_utf16_uni, /* mb_wc */
1662 my_uni_utf16, /* wc_mb */
1663 my_mb_ctype_mb,
1664 my_caseup_str_mb2_or_mb4,
1665 my_casedn_str_mb2_or_mb4,
1666 my_caseup_utf16,
1667 my_casedn_utf16,
1668 my_snprintf_mb2,
1669 my_l10tostr_mb2_or_mb4,
1670 my_ll10tostr_mb2_or_mb4,
1671 my_fill_mb2,
1672 my_strntol_mb2_or_mb4,
1673 my_strntoul_mb2_or_mb4,
1674 my_strntoll_mb2_or_mb4,
1675 my_strntoull_mb2_or_mb4,
1676 my_strntod_mb2_or_mb4,
1677 my_strtoll10_mb2,
1678 my_strntoull10rnd_mb2_or_mb4,
1679 my_scan_mb2
1680 };
1681
1682
1683 CHARSET_INFO my_charset_utf16_general_ci=
1684 {
1685 54,0,0, /* number */
1686 MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
1687 "utf16", /* cs name */
1688 "utf16_general_ci", /* name */
1689 "UTF-16 Unicode", /* comment */
1690 NULL, /* tailoring */
1691 NULL, /* ctype */
1692 NULL, /* to_lower */
1693 NULL, /* to_upper */
1694 NULL, /* sort_order */
1695 NULL, /* uca */
1696 NULL, /* tab_to_uni */
1697 NULL, /* tab_from_uni */
1698 &my_unicase_default, /* caseinfo */
1699 NULL, /* state_map */
1700 NULL, /* ident_map */
1701 1, /* strxfrm_multiply */
1702 1, /* caseup_multiply */
1703 1, /* casedn_multiply */
1704 2, /* mbminlen */
1705 4, /* mbmaxlen */
1706 0, /* min_sort_char */
1707 0xFFFF, /* max_sort_char */
1708 ' ', /* pad char */
1709 0, /* escape_with_backslash_is_dangerous */
1710 1, /* levels_for_compare */
1711 1, /* levels_for_order */
1712 &my_charset_utf16_handler,
1713 &my_collation_utf16_general_ci_handler
1714 };
1715
1716
1717 CHARSET_INFO my_charset_utf16_bin=
1718 {
1719 55,0,0, /* number */
1720 MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
1721 "utf16", /* cs name */
1722 "utf16_bin", /* name */
1723 "UTF-16 Unicode", /* comment */
1724 NULL, /* tailoring */
1725 NULL, /* ctype */
1726 NULL, /* to_lower */
1727 NULL, /* to_upper */
1728 NULL, /* sort_order */
1729 NULL, /* uca */
1730 NULL, /* tab_to_uni */
1731 NULL, /* tab_from_uni */
1732 &my_unicase_default, /* caseinfo */
1733 NULL, /* state_map */
1734 NULL, /* ident_map */
1735 1, /* strxfrm_multiply */
1736 1, /* caseup_multiply */
1737 1, /* casedn_multiply */
1738 2, /* mbminlen */
1739 4, /* mbmaxlen */
1740 0, /* min_sort_char */
1741 0xFFFF, /* max_sort_char */
1742 ' ', /* pad char */
1743 0, /* escape_with_backslash_is_dangerous */
1744 1, /* levels_for_compare */
1745 1, /* levels_for_order */
1746 &my_charset_utf16_handler,
1747 &my_collation_utf16_bin_handler
1748 };
1749
1750
1751 static int
my_utf16le_uni(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),my_wc_t * pwc,const uchar * s,const uchar * e)1752 my_utf16le_uni(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1753 my_wc_t *pwc, const uchar *s, const uchar *e)
1754 {
1755 my_wc_t lo;
1756
1757 if (s + 2 > e)
1758 return MY_CS_TOOSMALL2;
1759
1760 if ((*pwc= uint2korr(s)) < MY_UTF16_SURROGATE_HIGH_FIRST ||
1761 (*pwc > MY_UTF16_SURROGATE_LOW_LAST))
1762 return 2; /* [0000-D7FF,E000-FFFF] */
1763
1764 if (*pwc >= MY_UTF16_SURROGATE_LOW_FIRST)
1765 return MY_CS_ILSEQ; /* [DC00-DFFF] Low surrogate part without high part */
1766
1767 if (s + 4 > e)
1768 return MY_CS_TOOSMALL4;
1769
1770 s+= 2;
1771
1772 if ((lo= uint2korr(s)) < MY_UTF16_SURROGATE_LOW_FIRST ||
1773 lo > MY_UTF16_SURROGATE_LOW_LAST)
1774 return MY_CS_ILSEQ; /* Expected low surrogate part, got something else */
1775
1776 *pwc= 0x10000 + (((*pwc & 0x3FF) << 10) | (lo & 0x3FF));
1777 return 4;
1778 }
1779
1780
1781 static int
my_uni_utf16le(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),my_wc_t wc,uchar * s,uchar * e)1782 my_uni_utf16le(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1783 my_wc_t wc, uchar *s, uchar *e)
1784 {
1785 if (wc < MY_UTF16_SURROGATE_HIGH_FIRST ||
1786 (wc > MY_UTF16_SURROGATE_LOW_LAST &&
1787 wc <= 0xFFFF))
1788 {
1789 if (s + 2 > e)
1790 return MY_CS_TOOSMALL2;
1791 int2store(s, wc);
1792 return 2; /* [0000-D7FF,E000-FFFF] */
1793 }
1794
1795 if (wc < 0xFFFF || wc > 0x10FFFF)
1796 return MY_CS_ILUNI; /* [D800-DFFF,10FFFF+] */
1797
1798 if (s + 4 > e)
1799 return MY_CS_TOOSMALL4;
1800
1801 wc-= 0x10000;
1802 int2store(s, (0xD800 | ((wc >> 10) & 0x3FF))); s+= 2;
1803 int2store(s, (0xDC00 | (wc & 0x3FF)));
1804 return 4; /* [010000-10FFFF] */
1805 }
1806
1807
1808 static size_t
my_lengthsp_utf16le(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * ptr,size_t length)1809 my_lengthsp_utf16le(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1810 const char *ptr, size_t length)
1811 {
1812 const char *end= ptr + length;
1813 while (end > ptr + 1 && uint2korr(end - 2) == 0x20)
1814 end-= 2;
1815 return (size_t) (end - ptr);
1816 }
1817
1818
1819 static MY_CHARSET_HANDLER my_charset_utf16le_handler=
1820 {
1821 NULL, /* init */
1822 my_ismbchar_utf16,
1823 my_mbcharlen_utf16,
1824 my_numchars_utf16,
1825 my_charpos_utf16,
1826 my_well_formed_len_utf16,
1827 my_lengthsp_utf16le,
1828 my_numcells_mb,
1829 my_utf16le_uni, /* mb_wc */
1830 my_uni_utf16le, /* wc_mb */
1831 my_mb_ctype_mb,
1832 my_caseup_str_mb2_or_mb4,
1833 my_casedn_str_mb2_or_mb4,
1834 my_caseup_utf16,
1835 my_casedn_utf16,
1836 my_snprintf_mb2,
1837 my_l10tostr_mb2_or_mb4,
1838 my_ll10tostr_mb2_or_mb4,
1839 my_fill_mb2,
1840 my_strntol_mb2_or_mb4,
1841 my_strntoul_mb2_or_mb4,
1842 my_strntoll_mb2_or_mb4,
1843 my_strntoull_mb2_or_mb4,
1844 my_strntod_mb2_or_mb4,
1845 my_strtoll10_mb2,
1846 my_strntoull10rnd_mb2_or_mb4,
1847 my_scan_mb2
1848 };
1849
1850
1851 CHARSET_INFO my_charset_utf16le_general_ci=
1852 {
1853 56,0,0, /* number */
1854 MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
1855 "utf16le", /* cs name */
1856 "utf16le_general_ci",/* name */
1857 "UTF-16LE Unicode", /* comment */
1858 NULL, /* tailoring */
1859 NULL, /* ctype */
1860 NULL, /* to_lower */
1861 NULL, /* to_upper */
1862 NULL, /* sort_order */
1863 NULL, /* uca */
1864 NULL, /* tab_to_uni */
1865 NULL, /* tab_from_uni */
1866 &my_unicase_default, /* caseinfo */
1867 NULL, /* state_map */
1868 NULL, /* ident_map */
1869 1, /* strxfrm_multiply */
1870 1, /* caseup_multiply */
1871 1, /* casedn_multiply */
1872 2, /* mbminlen */
1873 4, /* mbmaxlen */
1874 0, /* min_sort_char */
1875 0xFFFF, /* max_sort_char */
1876 ' ', /* pad char */
1877 0, /* escape_with_backslash_is_dangerous */
1878 1, /* levels_for_compare */
1879 1, /* levels_for_order */
1880 &my_charset_utf16le_handler,
1881 &my_collation_utf16_general_ci_handler
1882 };
1883
1884
1885 CHARSET_INFO my_charset_utf16le_bin=
1886 {
1887 62,0,0, /* number */
1888 MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
1889 "utf16le", /* cs name */
1890 "utf16le_bin", /* name */
1891 "UTF-16LE Unicode", /* comment */
1892 NULL, /* tailoring */
1893 NULL, /* ctype */
1894 NULL, /* to_lower */
1895 NULL, /* to_upper */
1896 NULL, /* sort_order */
1897 NULL, /* uca */
1898 NULL, /* tab_to_uni */
1899 NULL, /* tab_from_uni */
1900 &my_unicase_default, /* caseinfo */
1901 NULL, /* state_map */
1902 NULL, /* ident_map */
1903 1, /* strxfrm_multiply */
1904 1, /* caseup_multiply */
1905 1, /* casedn_multiply */
1906 2, /* mbminlen */
1907 4, /* mbmaxlen */
1908 0, /* min_sort_char */
1909 0xFFFF, /* max_sort_char */
1910 ' ', /* pad char */
1911 0, /* escape_with_backslash_is_dangerous */
1912 1, /* levels_for_compare */
1913 1, /* levels_for_order */
1914 &my_charset_utf16le_handler,
1915 &my_collation_utf16_bin_handler
1916 };
1917
1918
1919 #endif /* HAVE_CHARSET_utf16 */
1920
1921
1922 #ifdef HAVE_CHARSET_utf32
1923
1924 static int
my_utf32_uni(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),my_wc_t * pwc,const uchar * s,const uchar * e)1925 my_utf32_uni(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1926 my_wc_t *pwc, const uchar *s, const uchar *e)
1927 {
1928 if (s + 4 > e)
1929 return MY_CS_TOOSMALL4;
1930 *pwc= (((my_wc_t)s[0]) << 24) + (s[1] << 16) + (s[2] << 8) + (s[3]);
1931 return 4;
1932 }
1933
1934
1935 static int
my_uni_utf32(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),my_wc_t wc,uchar * s,uchar * e)1936 my_uni_utf32(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1937 my_wc_t wc, uchar *s, uchar *e)
1938 {
1939 if (s + 4 > e)
1940 return MY_CS_TOOSMALL4;
1941
1942 s[0]= (uchar) (wc >> 24);
1943 s[1]= (uchar) (wc >> 16) & 0xFF;
1944 s[2]= (uchar) (wc >> 8) & 0xFF;
1945 s[3]= (uchar) wc & 0xFF;
1946 return 4;
1947 }
1948
1949
1950 static inline void
my_tolower_utf32(MY_UNICASE_INFO * uni_plane,my_wc_t * wc)1951 my_tolower_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
1952 {
1953 MY_UNICASE_CHARACTER *page;
1954 if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
1955 *wc= page[*wc & 0xFF].tolower;
1956 }
1957
1958
1959 static inline void
my_toupper_utf32(MY_UNICASE_INFO * uni_plane,my_wc_t * wc)1960 my_toupper_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
1961 {
1962 MY_UNICASE_CHARACTER *page;
1963 if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
1964 *wc= page[*wc & 0xFF].toupper;
1965 }
1966
1967
1968 static inline void
my_tosort_utf32(MY_UNICASE_INFO * uni_plane,my_wc_t * wc)1969 my_tosort_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
1970 {
1971 if (*wc <= uni_plane->maxchar)
1972 {
1973 MY_UNICASE_CHARACTER *page;
1974 if ((page= uni_plane->page[*wc >> 8]))
1975 *wc= page[*wc & 0xFF].sort;
1976 }
1977 else
1978 {
1979 *wc= MY_CS_REPLACEMENT_CHARACTER;
1980 }
1981 }
1982
1983
1984 static size_t
my_caseup_utf32(const CHARSET_INFO * cs,char * src,size_t srclen,char * dst MY_ATTRIBUTE ((unused)),size_t dstlen MY_ATTRIBUTE ((unused)))1985 my_caseup_utf32(const CHARSET_INFO *cs, char *src, size_t srclen,
1986 char *dst MY_ATTRIBUTE((unused)),
1987 size_t dstlen MY_ATTRIBUTE((unused)))
1988 {
1989 my_wc_t wc;
1990 int res;
1991 char *srcend= src + srclen;
1992 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
1993 DBUG_ASSERT(src == dst && srclen == dstlen);
1994
1995 while ((src < srcend) &&
1996 (res= my_utf32_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
1997 {
1998 my_toupper_utf32(uni_plane, &wc);
1999 if (res != my_uni_utf32(cs, wc, (uchar*) src, (uchar*) srcend))
2000 break;
2001 src+= res;
2002 }
2003 return srclen;
2004 }
2005
2006
2007 static inline void
my_hash_add(ulong * n1,ulong * n2,uint ch)2008 my_hash_add(ulong *n1, ulong *n2, uint ch)
2009 {
2010 n1[0]^= (((n1[0] & 63) + n2[0]) * (ch)) + (n1[0] << 8);
2011 n2[0]+= 3;
2012 }
2013
2014
2015 static void
my_hash_sort_utf32(const CHARSET_INFO * cs,const uchar * s,size_t slen,ulong * n1,ulong * n2)2016 my_hash_sort_utf32(const CHARSET_INFO *cs, const uchar *s, size_t slen,
2017 ulong *n1, ulong *n2)
2018 {
2019 my_wc_t wc;
2020 int res;
2021 const uchar *e= s + slen;
2022 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
2023
2024 /* Skip trailing spaces */
2025 while (e > s + 3 && e[-1] == ' ' && !e[-2] && !e[-3] && !e[-4])
2026 e-= 4;
2027
2028 while ((res= my_utf32_uni(cs, &wc, (uchar*) s, (uchar*) e)) > 0)
2029 {
2030 my_tosort_utf32(uni_plane, &wc);
2031 my_hash_add(n1, n2, (uint) (wc >> 24));
2032 my_hash_add(n1, n2, (uint) (wc >> 16) & 0xFF);
2033 my_hash_add(n1, n2, (uint) (wc >> 8) & 0xFF);
2034 my_hash_add(n1, n2, (uint) (wc & 0xFF));
2035 s+= res;
2036 }
2037 }
2038
2039
2040 static size_t
my_casedn_utf32(const CHARSET_INFO * cs,char * src,size_t srclen,char * dst MY_ATTRIBUTE ((unused)),size_t dstlen MY_ATTRIBUTE ((unused)))2041 my_casedn_utf32(const CHARSET_INFO *cs, char *src, size_t srclen,
2042 char *dst MY_ATTRIBUTE((unused)),
2043 size_t dstlen MY_ATTRIBUTE((unused)))
2044 {
2045 my_wc_t wc;
2046 int res;
2047 char *srcend= src + srclen;
2048 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
2049 DBUG_ASSERT(src == dst && srclen == dstlen);
2050
2051 while ((res= my_utf32_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
2052 {
2053 my_tolower_utf32(uni_plane,&wc);
2054 if (res != my_uni_utf32(cs, wc, (uchar*) src, (uchar*) srcend))
2055 break;
2056 src+= res;
2057 }
2058 return srclen;
2059 }
2060
2061
2062 static int
my_strnncoll_utf32(const CHARSET_INFO * cs,const uchar * s,size_t slen,const uchar * t,size_t tlen,my_bool t_is_prefix)2063 my_strnncoll_utf32(const CHARSET_INFO *cs,
2064 const uchar *s, size_t slen,
2065 const uchar *t, size_t tlen,
2066 my_bool t_is_prefix)
2067 {
2068 my_wc_t UNINIT_VAR(s_wc),UNINIT_VAR(t_wc);
2069 const uchar *se= s + slen;
2070 const uchar *te= t + tlen;
2071 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
2072
2073 while (s < se && t < te)
2074 {
2075 int s_res= my_utf32_uni(cs, &s_wc, s, se);
2076 int t_res= my_utf32_uni(cs, &t_wc, t, te);
2077
2078 if ( s_res <= 0 || t_res <= 0)
2079 {
2080 /* Incorrect string, compare by char value */
2081 return my_bincmp(s, se, t, te);
2082 }
2083
2084 my_tosort_utf32(uni_plane, &s_wc);
2085 my_tosort_utf32(uni_plane, &t_wc);
2086
2087 if (s_wc != t_wc)
2088 {
2089 return s_wc > t_wc ? 1 : -1;
2090 }
2091
2092 s+= s_res;
2093 t+= t_res;
2094 }
2095 return (int) (t_is_prefix ? (t - te) : ((se - s) - (te - t)));
2096 }
2097
2098
2099 /**
2100 Compare strings, discarding end space
2101
2102 If one string is shorter as the other, then we space extend the other
2103 so that the strings have equal length.
2104
2105 This will ensure that the following things hold:
2106
2107 "a" == "a "
2108 "a\0" < "a"
2109 "a\0" < "a "
2110
2111 @param cs Character set pinter.
2112 @param a First string to compare.
2113 @param a_length Length of 'a'.
2114 @param b Second string to compare.
2115 @param b_length Length of 'b'.
2116
2117 IMPLEMENTATION
2118
2119 @return Comparison result.
2120 @retval Negative number, if a less than b.
2121 @retval 0, if a is equal to b
2122 @retval Positive number, if a > b
2123 */
2124
2125
2126 static int
my_strnncollsp_utf32(const CHARSET_INFO * cs,const uchar * s,size_t slen,const uchar * t,size_t tlen,my_bool diff_if_only_endspace_difference)2127 my_strnncollsp_utf32(const CHARSET_INFO *cs,
2128 const uchar *s, size_t slen,
2129 const uchar *t, size_t tlen,
2130 my_bool diff_if_only_endspace_difference)
2131 {
2132 int res;
2133 my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
2134 const uchar *se= s + slen, *te= t + tlen;
2135 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
2136
2137 DBUG_ASSERT((slen % 4) == 0);
2138 DBUG_ASSERT((tlen % 4) == 0);
2139
2140 #ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
2141 diff_if_only_endspace_difference= FALSE;
2142 #endif
2143
2144 while ( s < se && t < te )
2145 {
2146 int s_res= my_utf32_uni(cs, &s_wc, s, se);
2147 int t_res= my_utf32_uni(cs, &t_wc, t, te);
2148
2149 if ( s_res <= 0 || t_res <= 0 )
2150 {
2151 /* Incorrect string, compare bytewise */
2152 return my_bincmp(s, se, t, te);
2153 }
2154
2155 my_tosort_utf32(uni_plane, &s_wc);
2156 my_tosort_utf32(uni_plane, &t_wc);
2157
2158 if ( s_wc != t_wc )
2159 {
2160 return s_wc > t_wc ? 1 : -1;
2161 }
2162
2163 s+= s_res;
2164 t+= t_res;
2165 }
2166
2167 slen= (size_t) (se - s);
2168 tlen= (size_t) (te - t);
2169 res= 0;
2170
2171 if (slen != tlen)
2172 {
2173 int s_res, swap= 1;
2174 if (diff_if_only_endspace_difference)
2175 res= 1; /* Assume 's' is bigger */
2176 if (slen < tlen)
2177 {
2178 slen= tlen;
2179 s= t;
2180 se= te;
2181 swap= -1;
2182 res= -res;
2183 }
2184
2185 for ( ; s < se; s+= s_res)
2186 {
2187 if ((s_res= my_utf32_uni(cs, &s_wc, s, se)) < 0)
2188 {
2189 DBUG_ASSERT(0);
2190 return 0;
2191 }
2192 if (s_wc != ' ')
2193 return (s_wc < ' ') ? -swap : swap;
2194 }
2195 }
2196 return res;
2197 }
2198
2199
2200 static size_t
my_strnxfrmlen_utf32(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),size_t len)2201 my_strnxfrmlen_utf32(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
2202 size_t len)
2203 {
2204 return len / 2;
2205 }
2206
2207
2208 static uint
my_ismbchar_utf32(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * b MY_ATTRIBUTE ((unused)),const char * e MY_ATTRIBUTE ((unused)))2209 my_ismbchar_utf32(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
2210 const char *b MY_ATTRIBUTE((unused)),
2211 const char *e MY_ATTRIBUTE((unused)))
2212 {
2213 return 4;
2214 }
2215
2216
2217 static uint
my_mbcharlen_utf32(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),uint c MY_ATTRIBUTE ((unused)))2218 my_mbcharlen_utf32(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)) ,
2219 uint c MY_ATTRIBUTE((unused)))
2220 {
2221 return 4;
2222 }
2223
2224
2225 static int
my_vsnprintf_utf32(char * dst,size_t n,const char * fmt,va_list ap)2226 my_vsnprintf_utf32(char *dst, size_t n, const char* fmt, va_list ap)
2227 {
2228 char *start= dst, *end= dst + n;
2229 DBUG_ASSERT((n % 4) == 0);
2230 for (; *fmt ; fmt++)
2231 {
2232 if (fmt[0] != '%')
2233 {
2234 if (dst >= end) /* End of buffer */
2235 break;
2236
2237 *dst++= '\0';
2238 *dst++= '\0';
2239 *dst++= '\0';
2240 *dst++= *fmt; /* Copy ordinary char */
2241 continue;
2242 }
2243
2244 fmt++;
2245
2246 /* Skip if max size is used (to be compatible with printf) */
2247 while ( (*fmt>='0' && *fmt<='9') || *fmt == '.' || *fmt == '-')
2248 fmt++;
2249
2250 if (*fmt == 'l')
2251 fmt++;
2252
2253 if (*fmt == 's') /* String parameter */
2254 {
2255 char *par= va_arg(ap, char *);
2256 size_t plen;
2257 size_t left_len= (size_t)(end - dst);
2258 if (!par) par= (char*)"(null)";
2259 plen= strlen(par);
2260 if (left_len <= plen*4)
2261 plen= left_len / 4 - 1;
2262
2263 for ( ; plen ; plen--, dst+= 4, par++)
2264 {
2265 dst[0]= '\0';
2266 dst[1]= '\0';
2267 dst[2]= '\0';
2268 dst[3]= par[0];
2269 }
2270 continue;
2271 }
2272 else if (*fmt == 'd' || *fmt == 'u') /* Integer parameter */
2273 {
2274 int iarg;
2275 char nbuf[16];
2276 char *pbuf= nbuf;
2277
2278 if ((size_t) (end - dst) < 64)
2279 break;
2280 iarg= va_arg(ap, int);
2281 if (*fmt == 'd')
2282 int10_to_str((long) iarg, nbuf, -10);
2283 else
2284 int10_to_str((long) (uint) iarg,nbuf,10);
2285
2286 for (; pbuf[0]; pbuf++)
2287 {
2288 *dst++= '\0';
2289 *dst++= '\0';
2290 *dst++= '\0';
2291 *dst++= *pbuf;
2292 }
2293 continue;
2294 }
2295
2296 /* We come here on '%%', unknown code or too long parameter */
2297 if (dst == end)
2298 break;
2299 *dst++= '\0';
2300 *dst++= '\0';
2301 *dst++= '\0';
2302 *dst++= '%'; /* % used as % or unknown code */
2303 }
2304
2305 DBUG_ASSERT(dst < end);
2306 *dst++= '\0';
2307 *dst++= '\0';
2308 *dst++= '\0';
2309 *dst++= '\0'; /* End of errmessage */
2310 return (size_t) (dst - start - 4);
2311 }
2312
2313
2314 static size_t
my_snprintf_utf32(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),char * to,size_t n,const char * fmt,...)2315 my_snprintf_utf32(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
2316 char* to, size_t n, const char* fmt, ...)
2317 {
2318 size_t retval;
2319 va_list args;
2320 va_start(args,fmt);
2321 retval= my_vsnprintf_utf32(to, n, fmt, args);
2322 va_end(args);
2323 return retval;
2324 }
2325
2326
2327 static longlong
my_strtoll10_utf32(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * nptr,char ** endptr,int * error)2328 my_strtoll10_utf32(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
2329 const char *nptr, char **endptr, int *error)
2330 {
2331 const char *s, *end, *start, *n_end, *true_end;
2332 uchar c;
2333 unsigned long i, j, k;
2334 ulonglong li;
2335 int negative;
2336 ulong cutoff, cutoff2, cutoff3;
2337
2338 s= nptr;
2339 /* If fixed length string */
2340 if (endptr)
2341 {
2342 /* Make sure string length is even */
2343 end= s + ((*endptr - s) / 4) * 4;
2344 while (s < end && !s[0] && !s[1] && !s[2] &&
2345 (s[3] == ' ' || s[3] == '\t'))
2346 s+= 4;
2347 if (s == end)
2348 goto no_conv;
2349 }
2350 else
2351 {
2352 /* We don't support null terminated strings in UCS2 */
2353 goto no_conv;
2354 }
2355
2356 /* Check for a sign. */
2357 negative= 0;
2358 if (!s[0] && !s[1] && !s[2] && s[3] == '-')
2359 {
2360 *error= -1; /* Mark as negative number */
2361 negative= 1;
2362 s+= 4;
2363 if (s == end)
2364 goto no_conv;
2365 cutoff= MAX_NEGATIVE_NUMBER / LFACTOR2;
2366 cutoff2= (MAX_NEGATIVE_NUMBER % LFACTOR2) / 100;
2367 cutoff3= MAX_NEGATIVE_NUMBER % 100;
2368 }
2369 else
2370 {
2371 *error= 0;
2372 if (!s[0] && !s[1] && !s[2] && s[3] == '+')
2373 {
2374 s+= 4;
2375 if (s == end)
2376 goto no_conv;
2377 }
2378 cutoff= ULONGLONG_MAX / LFACTOR2;
2379 cutoff2= ULONGLONG_MAX % LFACTOR2 / 100;
2380 cutoff3= ULONGLONG_MAX % 100;
2381 }
2382
2383 /* Handle case where we have a lot of pre-zero */
2384 if (!s[0] && !s[1] && !s[2] && s[3] == '0')
2385 {
2386 i= 0;
2387 do
2388 {
2389 s+= 4;
2390 if (s == end)
2391 goto end_i; /* Return 0 */
2392 }
2393 while (!s[0] && !s[1] && !s[2] && s[3] == '0');
2394 n_end= s + 4 * INIT_CNT;
2395 }
2396 else
2397 {
2398 /* Read first digit to check that it's a valid number */
2399 if (s[0] || s[1] || s[2] || (c= (s[3]-'0')) > 9)
2400 goto no_conv;
2401 i= c;
2402 s+= 4;
2403 n_end= s + 4 * (INIT_CNT-1);
2404 }
2405
2406 /* Handle first 9 digits and store them in i */
2407 if (n_end > end)
2408 n_end= end;
2409 for (; s != n_end ; s+= 4)
2410 {
2411 if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9)
2412 goto end_i;
2413 i= i * 10 + c;
2414 }
2415 if (s == end)
2416 goto end_i;
2417
2418 /* Handle next 9 digits and store them in j */
2419 j= 0;
2420 start= s; /* Used to know how much to shift i */
2421 n_end= true_end= s + 4 * INIT_CNT;
2422 if (n_end > end)
2423 n_end= end;
2424 do
2425 {
2426 if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9)
2427 goto end_i_and_j;
2428 j= j * 10 + c;
2429 s+= 4;
2430 } while (s != n_end);
2431 if (s == end)
2432 {
2433 if (s != true_end)
2434 goto end_i_and_j;
2435 goto end3;
2436 }
2437 if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9)
2438 goto end3;
2439
2440 /* Handle the next 1 or 2 digits and store them in k */
2441 k=c;
2442 s+= 4;
2443 if (s == end || s[0] || s[1] || s[2] || (c= (s[3]-'0')) > 9)
2444 goto end4;
2445 k= k * 10 + c;
2446 s+= 2;
2447 *endptr= (char*) s;
2448
2449 /* number string should have ended here */
2450 if (s != end && !s[0] && !s[1] && !s[2] && (c= (s[3] - '0')) <= 9)
2451 goto overflow;
2452
2453 /* Check that we didn't get an overflow with the last digit */
2454 if (i > cutoff || (i == cutoff && ((j > cutoff2 || j == cutoff2) &&
2455 k > cutoff3)))
2456 goto overflow;
2457 li= i * LFACTOR2+ (ulonglong) j * 100 + k;
2458 return (longlong) li;
2459
2460 overflow: /* *endptr is set here */
2461 *error= MY_ERRNO_ERANGE;
2462 return negative ? LONGLONG_MIN : (longlong) ULONGLONG_MAX;
2463
2464 end_i:
2465 *endptr= (char*) s;
2466 return (negative ? ((longlong) -(long) i) : (longlong) i);
2467
2468 end_i_and_j:
2469 li= (ulonglong) i * lfactor[(size_t) (s-start) / 4] + j;
2470 *endptr= (char*) s;
2471 return (negative ? -((longlong) li) : (longlong) li);
2472
2473 end3:
2474 li= (ulonglong) i*LFACTOR+ (ulonglong) j;
2475 *endptr= (char*) s;
2476 return (negative ? -((longlong) li) : (longlong) li);
2477
2478 end4:
2479 li= (ulonglong) i*LFACTOR1+ (ulonglong) j * 10 + k;
2480 *endptr= (char*) s;
2481 if (negative)
2482 {
2483 if (li > MAX_NEGATIVE_NUMBER)
2484 goto overflow;
2485 return -((longlong) li);
2486 }
2487 return (longlong) li;
2488
2489 no_conv:
2490 /* There was no number to convert. */
2491 *error= MY_ERRNO_EDOM;
2492 *endptr= (char *) nptr;
2493 return 0;
2494 }
2495
2496
2497 static size_t
my_numchars_utf32(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * b,const char * e)2498 my_numchars_utf32(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
2499 const char *b, const char *e)
2500 {
2501 return (size_t) (e - b) / 4;
2502 }
2503
2504
2505 static size_t
my_charpos_utf32(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * b,const char * e,size_t pos)2506 my_charpos_utf32(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
2507 const char *b, const char *e, size_t pos)
2508 {
2509 size_t string_length= (size_t) (e - b);
2510 return pos * 4 > string_length ? string_length + 4 : pos * 4;
2511 }
2512
2513
2514 static size_t
my_well_formed_len_utf32(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * b,const char * e,size_t nchars,int * error)2515 my_well_formed_len_utf32(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
2516 const char *b, const char *e,
2517 size_t nchars, int *error)
2518 {
2519 /* Ensure string length is divisible by 4 */
2520 const char *b0= b;
2521 size_t length= e - b;
2522 DBUG_ASSERT((length % 4) == 0);
2523 *error= 0;
2524 nchars*= 4;
2525 if (length > nchars)
2526 {
2527 length= nchars;
2528 e= b + nchars;
2529 }
2530 for (; b < e; b+= 4)
2531 {
2532 /* Don't accept characters greater than U+10FFFF */
2533 if (b[0] || (uchar) b[1] > 0x10)
2534 {
2535 *error= 1;
2536 return b - b0;
2537 }
2538 }
2539 return length;
2540 }
2541
2542
2543 static
my_fill_utf32(const CHARSET_INFO * cs,char * s,size_t slen,int fill)2544 void my_fill_utf32(const CHARSET_INFO *cs,
2545 char *s, size_t slen, int fill)
2546 {
2547 char buf[10];
2548 char *e= s + slen;
2549
2550 DBUG_ASSERT((slen % 4) == 0);
2551 {
2552 #ifndef DBUG_OFF
2553 uint buflen=
2554 #endif
2555 cs->cset->wc_mb(cs, (my_wc_t) fill, (uchar*) buf,
2556 (uchar*) buf + sizeof(buf));
2557 DBUG_ASSERT(buflen == 4);
2558 }
2559 while (s < e)
2560 {
2561 memcpy(s, buf, 4);
2562 s+= 4;
2563 }
2564 }
2565
2566
2567 static size_t
my_lengthsp_utf32(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * ptr,size_t length)2568 my_lengthsp_utf32(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
2569 const char *ptr, size_t length)
2570 {
2571 const char *end= ptr + length;
2572 DBUG_ASSERT((length % 4) == 0);
2573 while (end > ptr + 3 && end[-1] == ' ' && !end[-2] && !end[-3] && !end[-4])
2574 end-= 4;
2575 return (size_t) (end - ptr);
2576 }
2577
2578
2579 static int
my_wildcmp_utf32_ci(const CHARSET_INFO * cs,const char * str,const char * str_end,const char * wildstr,const char * wildend,int escape,int w_one,int w_many)2580 my_wildcmp_utf32_ci(const CHARSET_INFO *cs,
2581 const char *str, const char *str_end,
2582 const char *wildstr, const char *wildend,
2583 int escape, int w_one, int w_many)
2584 {
2585 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
2586 return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
2587 escape, w_one, w_many, uni_plane);
2588 }
2589
2590
2591 static int
my_wildcmp_utf32_bin(const CHARSET_INFO * cs,const char * str,const char * str_end,const char * wildstr,const char * wildend,int escape,int w_one,int w_many)2592 my_wildcmp_utf32_bin(const CHARSET_INFO *cs,
2593 const char *str,const char *str_end,
2594 const char *wildstr,const char *wildend,
2595 int escape, int w_one, int w_many)
2596 {
2597 return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
2598 escape, w_one, w_many, NULL);
2599 }
2600
2601
2602 static int
my_strnncoll_utf32_bin(const CHARSET_INFO * cs,const uchar * s,size_t slen,const uchar * t,size_t tlen,my_bool t_is_prefix)2603 my_strnncoll_utf32_bin(const CHARSET_INFO *cs,
2604 const uchar *s, size_t slen,
2605 const uchar *t, size_t tlen,
2606 my_bool t_is_prefix)
2607 {
2608 my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
2609 const uchar *se= s + slen;
2610 const uchar *te= t + tlen;
2611
2612 while (s < se && t < te)
2613 {
2614 int s_res= my_utf32_uni(cs, &s_wc, s, se);
2615 int t_res= my_utf32_uni(cs, &t_wc, t, te);
2616
2617 if (s_res <= 0 || t_res <= 0)
2618 {
2619 /* Incorrect string, compare by char value */
2620 return my_bincmp(s, se, t, te);
2621 }
2622 if (s_wc != t_wc)
2623 {
2624 return s_wc > t_wc ? 1 : -1;
2625 }
2626
2627 s+= s_res;
2628 t+= t_res;
2629 }
2630 return (int) (t_is_prefix ? (t-te) : ((se - s) - (te - t)));
2631 }
2632
2633
2634 static inline my_wc_t
my_utf32_get(const uchar * s)2635 my_utf32_get(const uchar *s)
2636 {
2637 return
2638 ((my_wc_t) s[0] << 24) +
2639 ((my_wc_t) s[1] << 16) +
2640 ((my_wc_t) s[2] << 8) +
2641 s[3];
2642 }
2643
2644
2645 static int
my_strnncollsp_utf32_bin(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const uchar * s,size_t slen,const uchar * t,size_t tlen,my_bool diff_if_only_endspace_difference MY_ATTRIBUTE ((unused)))2646 my_strnncollsp_utf32_bin(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
2647 const uchar *s, size_t slen,
2648 const uchar *t, size_t tlen,
2649 my_bool diff_if_only_endspace_difference
2650 MY_ATTRIBUTE((unused)))
2651 {
2652 const uchar *se, *te;
2653 size_t minlen;
2654
2655 DBUG_ASSERT((slen % 4) == 0);
2656 DBUG_ASSERT((tlen % 4) == 0);
2657
2658 se= s + slen;
2659 te= t + tlen;
2660
2661 for (minlen= MY_MIN(slen, tlen); minlen; minlen-= 4)
2662 {
2663 my_wc_t s_wc= my_utf32_get(s);
2664 my_wc_t t_wc= my_utf32_get(t);
2665 if (s_wc != t_wc)
2666 return s_wc > t_wc ? 1 : -1;
2667
2668 s+= 4;
2669 t+= 4;
2670 }
2671
2672 if (slen != tlen)
2673 {
2674 int swap= 1;
2675 if (slen < tlen)
2676 {
2677 s= t;
2678 se= te;
2679 swap= -1;
2680 }
2681
2682 for ( ; s < se ; s+= 4)
2683 {
2684 my_wc_t s_wc= my_utf32_get(s);
2685 if (s_wc != ' ')
2686 return (s_wc < ' ') ? -swap : swap;
2687 }
2688 }
2689 return 0;
2690 }
2691
2692
2693 static size_t
my_scan_utf32(const CHARSET_INFO * cs,const char * str,const char * end,int sequence_type)2694 my_scan_utf32(const CHARSET_INFO *cs,
2695 const char *str, const char *end, int sequence_type)
2696 {
2697 const char *str0= str;
2698
2699 switch (sequence_type)
2700 {
2701 case MY_SEQ_SPACES:
2702 for ( ; str < end; )
2703 {
2704 my_wc_t wc;
2705 int res= my_utf32_uni(cs, &wc, (uchar*) str, (uchar*) end);
2706 if (res < 0 || wc != ' ')
2707 break;
2708 str+= res;
2709 }
2710 return (size_t) (str - str0);
2711 default:
2712 return 0;
2713 }
2714 }
2715
2716
2717 static MY_COLLATION_HANDLER my_collation_utf32_general_ci_handler =
2718 {
2719 NULL, /* init */
2720 my_strnncoll_utf32,
2721 my_strnncollsp_utf32,
2722 my_strnxfrm_unicode,
2723 my_strnxfrmlen_utf32,
2724 my_like_range_generic,
2725 my_wildcmp_utf32_ci,
2726 my_strcasecmp_mb2_or_mb4,
2727 my_instr_mb,
2728 my_hash_sort_utf32,
2729 my_propagate_simple
2730 };
2731
2732
2733 static MY_COLLATION_HANDLER my_collation_utf32_bin_handler =
2734 {
2735 NULL, /* init */
2736 my_strnncoll_utf32_bin,
2737 my_strnncollsp_utf32_bin,
2738 my_strnxfrm_unicode_full_bin,
2739 my_strnxfrmlen_unicode_full_bin,
2740 my_like_range_generic,
2741 my_wildcmp_utf32_bin,
2742 my_strcasecmp_mb2_or_mb4,
2743 my_instr_mb,
2744 my_hash_sort_utf32,
2745 my_propagate_simple
2746 };
2747
2748
2749 MY_CHARSET_HANDLER my_charset_utf32_handler=
2750 {
2751 NULL, /* init */
2752 my_ismbchar_utf32,
2753 my_mbcharlen_utf32,
2754 my_numchars_utf32,
2755 my_charpos_utf32,
2756 my_well_formed_len_utf32,
2757 my_lengthsp_utf32,
2758 my_numcells_mb,
2759 my_utf32_uni,
2760 my_uni_utf32,
2761 my_mb_ctype_mb,
2762 my_caseup_str_mb2_or_mb4,
2763 my_casedn_str_mb2_or_mb4,
2764 my_caseup_utf32,
2765 my_casedn_utf32,
2766 my_snprintf_utf32,
2767 my_l10tostr_mb2_or_mb4,
2768 my_ll10tostr_mb2_or_mb4,
2769 my_fill_utf32,
2770 my_strntol_mb2_or_mb4,
2771 my_strntoul_mb2_or_mb4,
2772 my_strntoll_mb2_or_mb4,
2773 my_strntoull_mb2_or_mb4,
2774 my_strntod_mb2_or_mb4,
2775 my_strtoll10_utf32,
2776 my_strntoull10rnd_mb2_or_mb4,
2777 my_scan_utf32
2778 };
2779
2780
2781 CHARSET_INFO my_charset_utf32_general_ci=
2782 {
2783 60,0,0, /* number */
2784 MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
2785 "utf32", /* cs name */
2786 "utf32_general_ci", /* name */
2787 "UTF-32 Unicode", /* comment */
2788 NULL, /* tailoring */
2789 NULL, /* ctype */
2790 NULL, /* to_lower */
2791 NULL, /* to_upper */
2792 NULL, /* sort_order */
2793 NULL, /* uca */
2794 NULL, /* tab_to_uni */
2795 NULL, /* tab_from_uni */
2796 &my_unicase_default, /* caseinfo */
2797 NULL, /* state_map */
2798 NULL, /* ident_map */
2799 1, /* strxfrm_multiply */
2800 1, /* caseup_multiply */
2801 1, /* casedn_multiply */
2802 4, /* mbminlen */
2803 4, /* mbmaxlen */
2804 0, /* min_sort_char */
2805 0xFFFF, /* max_sort_char */
2806 ' ', /* pad char */
2807 0, /* escape_with_backslash_is_dangerous */
2808 1, /* levels_for_compare */
2809 1, /* levels_for_order */
2810 &my_charset_utf32_handler,
2811 &my_collation_utf32_general_ci_handler
2812 };
2813
2814
2815 CHARSET_INFO my_charset_utf32_bin=
2816 {
2817 61,0,0, /* number */
2818 MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
2819 "utf32", /* cs name */
2820 "utf32_bin", /* name */
2821 "UTF-32 Unicode", /* comment */
2822 NULL, /* tailoring */
2823 NULL, /* ctype */
2824 NULL, /* to_lower */
2825 NULL, /* to_upper */
2826 NULL, /* sort_order */
2827 NULL, /* uca */
2828 NULL, /* tab_to_uni */
2829 NULL, /* tab_from_uni */
2830 &my_unicase_default, /* caseinfo */
2831 NULL, /* state_map */
2832 NULL, /* ident_map */
2833 1, /* strxfrm_multiply */
2834 1, /* caseup_multiply */
2835 1, /* casedn_multiply */
2836 4, /* mbminlen */
2837 4, /* mbmaxlen */
2838 0, /* min_sort_char */
2839 0xFFFF, /* max_sort_char */
2840 ' ', /* pad char */
2841 0, /* escape_with_backslash_is_dangerous */
2842 1, /* levels_for_compare */
2843 1, /* levels_for_order */
2844 &my_charset_utf32_handler,
2845 &my_collation_utf32_bin_handler
2846 };
2847
2848
2849 #endif /* HAVE_CHARSET_utf32 */
2850
2851
2852 #ifdef HAVE_CHARSET_ucs2
2853
2854 static uchar ctype_ucs2[] = {
2855 0,
2856 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
2857 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
2858 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
2859 132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
2860 16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2861 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
2862 16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2863 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
2864 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2865 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2866 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2867 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2868 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2869 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2870 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2871 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2872 };
2873
2874 static uchar to_lower_ucs2[] = {
2875 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2876 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2877 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2878 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
2879 64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
2880 112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
2881 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
2882 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
2883 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
2884 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
2885 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
2886 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
2887 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
2888 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
2889 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
2890 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
2891 };
2892
2893 static uchar to_upper_ucs2[] = {
2894 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2895 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2896 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2897 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
2898 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
2899 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
2900 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
2901 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
2902 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
2903 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
2904 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
2905 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
2906 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
2907 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
2908 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
2909 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
2910 };
2911
2912
my_ucs2_uni(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),my_wc_t * pwc,const uchar * s,const uchar * e)2913 static int my_ucs2_uni(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
2914 my_wc_t * pwc, const uchar *s, const uchar *e)
2915 {
2916 if (s+2 > e) /* Need 2 characters */
2917 return MY_CS_TOOSMALL2;
2918
2919 *pwc= ((uchar)s[0]) * 256 + ((uchar)s[1]);
2920 return 2;
2921 }
2922
my_uni_ucs2(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),my_wc_t wc,uchar * r,uchar * e)2923 static int my_uni_ucs2(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)) ,
2924 my_wc_t wc, uchar *r, uchar *e)
2925 {
2926 if ( r+2 > e )
2927 return MY_CS_TOOSMALL2;
2928
2929 if (wc > 0xFFFF) /* UCS2 does not support characters outside BMP */
2930 return MY_CS_ILUNI;
2931
2932 r[0]= (uchar) (wc >> 8);
2933 r[1]= (uchar) (wc & 0xFF);
2934 return 2;
2935 }
2936
2937
2938 static inline void
my_tolower_ucs2(MY_UNICASE_INFO * uni_plane,my_wc_t * wc)2939 my_tolower_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
2940 {
2941 MY_UNICASE_CHARACTER *page;
2942 if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
2943 *wc= page[*wc & 0xFF].tolower;
2944 }
2945
2946
2947 static inline void
my_toupper_ucs2(MY_UNICASE_INFO * uni_plane,my_wc_t * wc)2948 my_toupper_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
2949 {
2950 MY_UNICASE_CHARACTER *page;
2951 if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
2952 *wc= page[*wc & 0xFF].toupper;
2953 }
2954
2955
2956 static inline void
my_tosort_ucs2(MY_UNICASE_INFO * uni_plane,my_wc_t * wc)2957 my_tosort_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
2958 {
2959 MY_UNICASE_CHARACTER *page;
2960 if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
2961 *wc= page[*wc & 0xFF].sort;
2962 }
2963
2964
my_caseup_ucs2(const CHARSET_INFO * cs,char * src,size_t srclen,char * dst MY_ATTRIBUTE ((unused)),size_t dstlen MY_ATTRIBUTE ((unused)))2965 static size_t my_caseup_ucs2(const CHARSET_INFO *cs, char *src, size_t srclen,
2966 char *dst MY_ATTRIBUTE((unused)),
2967 size_t dstlen MY_ATTRIBUTE((unused)))
2968 {
2969 my_wc_t wc;
2970 int res;
2971 char *srcend= src + srclen;
2972 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
2973 DBUG_ASSERT(src == dst && srclen == dstlen);
2974
2975 while ((src < srcend) &&
2976 (res= my_ucs2_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
2977 {
2978 my_toupper_ucs2(uni_plane, &wc);
2979 if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend))
2980 break;
2981 src+= res;
2982 }
2983 return srclen;
2984 }
2985
2986
my_hash_sort_ucs2(const CHARSET_INFO * cs,const uchar * s,size_t slen,ulong * n1,ulong * n2)2987 static void my_hash_sort_ucs2(const CHARSET_INFO *cs, const uchar *s,
2988 size_t slen, ulong *n1, ulong *n2)
2989 {
2990 my_wc_t wc;
2991 int res;
2992 const uchar *e=s+slen;
2993 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
2994
2995 while (e > s+1 && e[-1] == ' ' && e[-2] == '\0')
2996 e-= 2;
2997
2998 while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0)
2999 {
3000 my_tosort_ucs2(uni_plane, &wc);
3001 n1[0]^= (((n1[0] & 63)+n2[0])*(wc & 0xFF))+ (n1[0] << 8);
3002 n2[0]+=3;
3003 n1[0]^= (((n1[0] & 63)+n2[0])*(wc >> 8))+ (n1[0] << 8);
3004 n2[0]+=3;
3005 s+=res;
3006 }
3007 }
3008
3009
my_casedn_ucs2(const CHARSET_INFO * cs,char * src,size_t srclen,char * dst MY_ATTRIBUTE ((unused)),size_t dstlen MY_ATTRIBUTE ((unused)))3010 static size_t my_casedn_ucs2(const CHARSET_INFO *cs, char *src, size_t srclen,
3011 char *dst MY_ATTRIBUTE((unused)),
3012 size_t dstlen MY_ATTRIBUTE((unused)))
3013 {
3014 my_wc_t wc;
3015 int res;
3016 char *srcend= src + srclen;
3017 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
3018 DBUG_ASSERT(src == dst && srclen == dstlen);
3019
3020 while ((src < srcend) &&
3021 (res= my_ucs2_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
3022 {
3023 my_tolower_ucs2(uni_plane, &wc);
3024 if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend))
3025 break;
3026 src+= res;
3027 }
3028 return srclen;
3029 }
3030
3031
3032 static void
my_fill_ucs2(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),char * s,size_t l,int fill)3033 my_fill_ucs2(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
3034 char *s, size_t l, int fill)
3035 {
3036 DBUG_ASSERT(fill <= 0xFFFF);
3037 for ( ; l >= 2; s[0]= (fill >> 8), s[1]= (fill & 0xFF), s+= 2, l-= 2);
3038 }
3039
3040
my_strnncoll_ucs2(const CHARSET_INFO * cs,const uchar * s,size_t slen,const uchar * t,size_t tlen,my_bool t_is_prefix)3041 static int my_strnncoll_ucs2(const CHARSET_INFO *cs,
3042 const uchar *s, size_t slen,
3043 const uchar *t, size_t tlen,
3044 my_bool t_is_prefix)
3045 {
3046 int s_res,t_res;
3047 my_wc_t UNINIT_VAR(s_wc),UNINIT_VAR(t_wc);
3048 const uchar *se=s+slen;
3049 const uchar *te=t+tlen;
3050 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
3051
3052 while ( s < se && t < te )
3053 {
3054 s_res=my_ucs2_uni(cs,&s_wc, s, se);
3055 t_res=my_ucs2_uni(cs,&t_wc, t, te);
3056
3057 if ( s_res <= 0 || t_res <= 0 )
3058 {
3059 /* Incorrect string, compare by char value */
3060 return ((int)s[0]-(int)t[0]);
3061 }
3062
3063 my_tosort_ucs2(uni_plane, &s_wc);
3064 my_tosort_ucs2(uni_plane, &t_wc);
3065
3066 if ( s_wc != t_wc )
3067 {
3068 return s_wc > t_wc ? 1 : -1;
3069 }
3070
3071 s+=s_res;
3072 t+=t_res;
3073 }
3074 return (int) (t_is_prefix ? t-te : ((se-s) - (te-t)));
3075 }
3076
3077 /*
3078 Compare strings, discarding end space
3079
3080 SYNOPSIS
3081 my_strnncollsp_ucs2()
3082 cs character set handler
3083 a First string to compare
3084 a_length Length of 'a'
3085 b Second string to compare
3086 b_length Length of 'b'
3087
3088 IMPLEMENTATION
3089 If one string is shorter as the other, then we space extend the other
3090 so that the strings have equal length.
3091
3092 This will ensure that the following things hold:
3093
3094 "a" == "a "
3095 "a\0" < "a"
3096 "a\0" < "a "
3097
3098 RETURN
3099 < 0 a < b
3100 = 0 a == b
3101 > 0 a > b
3102 */
3103
my_strnncollsp_ucs2(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const uchar * s,size_t slen,const uchar * t,size_t tlen,my_bool diff_if_only_endspace_difference MY_ATTRIBUTE ((unused)))3104 static int my_strnncollsp_ucs2(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
3105 const uchar *s, size_t slen,
3106 const uchar *t, size_t tlen,
3107 my_bool diff_if_only_endspace_difference
3108 MY_ATTRIBUTE((unused)))
3109 {
3110 const uchar *se, *te;
3111 size_t minlen;
3112 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
3113
3114 /* extra safety to make sure the lengths are even numbers */
3115 slen&= ~1;
3116 tlen&= ~1;
3117
3118 se= s + slen;
3119 te= t + tlen;
3120
3121 for (minlen= MY_MIN(slen, tlen); minlen; minlen-= 2)
3122 {
3123 int s_wc = uni_plane->page[s[0]] ? (int) uni_plane->page[s[0]][s[1]].sort :
3124 (((int) s[0]) << 8) + (int) s[1];
3125
3126 int t_wc = uni_plane->page[t[0]] ? (int) uni_plane->page[t[0]][t[1]].sort :
3127 (((int) t[0]) << 8) + (int) t[1];
3128 if ( s_wc != t_wc )
3129 return s_wc > t_wc ? 1 : -1;
3130
3131 s+= 2;
3132 t+= 2;
3133 }
3134
3135 if (slen != tlen)
3136 {
3137 int swap= 1;
3138 if (slen < tlen)
3139 {
3140 s= t;
3141 se= te;
3142 swap= -1;
3143 }
3144
3145 for ( ; s < se ; s+= 2)
3146 {
3147 if (s[0] || s[1] != ' ')
3148 return (s[0] == 0 && s[1] < ' ') ? -swap : swap;
3149 }
3150 }
3151 return 0;
3152 }
3153
3154
my_ismbchar_ucs2(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * b MY_ATTRIBUTE ((unused)),const char * e MY_ATTRIBUTE ((unused)))3155 static uint my_ismbchar_ucs2(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
3156 const char *b MY_ATTRIBUTE((unused)),
3157 const char *e MY_ATTRIBUTE((unused)))
3158 {
3159 return 2;
3160 }
3161
3162
my_mbcharlen_ucs2(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),uint c MY_ATTRIBUTE ((unused)))3163 static uint my_mbcharlen_ucs2(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)) ,
3164 uint c MY_ATTRIBUTE((unused)))
3165 {
3166 return 2;
3167 }
3168
3169
3170 static
my_numchars_ucs2(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * b,const char * e)3171 size_t my_numchars_ucs2(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
3172 const char *b, const char *e)
3173 {
3174 return (size_t) (e-b)/2;
3175 }
3176
3177
3178 static
my_charpos_ucs2(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * b MY_ATTRIBUTE ((unused)),const char * e MY_ATTRIBUTE ((unused)),size_t pos)3179 size_t my_charpos_ucs2(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
3180 const char *b MY_ATTRIBUTE((unused)),
3181 const char *e MY_ATTRIBUTE((unused)),
3182 size_t pos)
3183 {
3184 size_t string_length= (size_t) (e - b);
3185 return pos > string_length ? string_length + 2 : pos * 2;
3186 }
3187
3188
3189 static
my_well_formed_len_ucs2(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * b,const char * e,size_t nchars,int * error)3190 size_t my_well_formed_len_ucs2(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
3191 const char *b, const char *e,
3192 size_t nchars, int *error)
3193 {
3194 /* Ensure string length is dividable with 2 */
3195 size_t nbytes= ((size_t) (e-b)) & ~(size_t) 1;
3196 *error= 0;
3197 nchars*= 2;
3198 return MY_MIN(nbytes, nchars);
3199 }
3200
3201
3202 static
my_wildcmp_ucs2_ci(const CHARSET_INFO * cs,const char * str,const char * str_end,const char * wildstr,const char * wildend,int escape,int w_one,int w_many)3203 int my_wildcmp_ucs2_ci(const CHARSET_INFO *cs,
3204 const char *str,const char *str_end,
3205 const char *wildstr,const char *wildend,
3206 int escape, int w_one, int w_many)
3207 {
3208 MY_UNICASE_INFO *uni_plane= cs->caseinfo;
3209 return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
3210 escape,w_one,w_many,uni_plane);
3211 }
3212
3213
3214 static
my_wildcmp_ucs2_bin(const CHARSET_INFO * cs,const char * str,const char * str_end,const char * wildstr,const char * wildend,int escape,int w_one,int w_many)3215 int my_wildcmp_ucs2_bin(const CHARSET_INFO *cs,
3216 const char *str,const char *str_end,
3217 const char *wildstr,const char *wildend,
3218 int escape, int w_one, int w_many)
3219 {
3220 return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
3221 escape,w_one,w_many,NULL);
3222 }
3223
3224
3225 static
my_strnncoll_ucs2_bin(const CHARSET_INFO * cs,const uchar * s,size_t slen,const uchar * t,size_t tlen,my_bool t_is_prefix)3226 int my_strnncoll_ucs2_bin(const CHARSET_INFO *cs,
3227 const uchar *s, size_t slen,
3228 const uchar *t, size_t tlen,
3229 my_bool t_is_prefix)
3230 {
3231 int s_res,t_res;
3232 my_wc_t UNINIT_VAR(s_wc),UNINIT_VAR(t_wc);
3233 const uchar *se=s+slen;
3234 const uchar *te=t+tlen;
3235
3236 while ( s < se && t < te )
3237 {
3238 s_res=my_ucs2_uni(cs,&s_wc, s, se);
3239 t_res=my_ucs2_uni(cs,&t_wc, t, te);
3240
3241 if ( s_res <= 0 || t_res <= 0 )
3242 {
3243 /* Incorrect string, compare by char value */
3244 return ((int)s[0]-(int)t[0]);
3245 }
3246 if ( s_wc != t_wc )
3247 {
3248 return s_wc > t_wc ? 1 : -1;
3249 }
3250
3251 s+=s_res;
3252 t+=t_res;
3253 }
3254 return (int) (t_is_prefix ? t-te : ((se-s) - (te-t)));
3255 }
3256
my_strnncollsp_ucs2_bin(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const uchar * s,size_t slen,const uchar * t,size_t tlen,my_bool diff_if_only_endspace_difference MY_ATTRIBUTE ((unused)))3257 static int my_strnncollsp_ucs2_bin(const CHARSET_INFO *cs
3258 MY_ATTRIBUTE((unused)),
3259 const uchar *s, size_t slen,
3260 const uchar *t, size_t tlen,
3261 my_bool diff_if_only_endspace_difference
3262 MY_ATTRIBUTE((unused)))
3263 {
3264 const uchar *se, *te;
3265 size_t minlen;
3266
3267 /* extra safety to make sure the lengths are even numbers */
3268 slen= (slen >> 1) << 1;
3269 tlen= (tlen >> 1) << 1;
3270
3271 se= s + slen;
3272 te= t + tlen;
3273
3274 for (minlen= MY_MIN(slen, tlen); minlen; minlen-= 2)
3275 {
3276 int s_wc= s[0] * 256 + s[1];
3277 int t_wc= t[0] * 256 + t[1];
3278 if ( s_wc != t_wc )
3279 return s_wc > t_wc ? 1 : -1;
3280
3281 s+= 2;
3282 t+= 2;
3283 }
3284
3285 if (slen != tlen)
3286 {
3287 int swap= 1;
3288 if (slen < tlen)
3289 {
3290 s= t;
3291 se= te;
3292 swap= -1;
3293 }
3294
3295 for ( ; s < se ; s+= 2)
3296 {
3297 if (s[0] || s[1] != ' ')
3298 return (s[0] == 0 && s[1] < ' ') ? -swap : swap;
3299 }
3300 }
3301 return 0;
3302 }
3303
3304
3305 static
my_hash_sort_ucs2_bin(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const uchar * key,size_t len,ulong * nr1,ulong * nr2)3306 void my_hash_sort_ucs2_bin(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
3307 const uchar *key, size_t len,ulong *nr1, ulong *nr2)
3308 {
3309 const uchar *pos = key;
3310
3311 key+= len;
3312
3313 while (key > pos+1 && key[-1] == ' ' && key[-2] == '\0')
3314 key-= 2;
3315
3316 for (; pos < (uchar*) key ; pos++)
3317 {
3318 nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) *
3319 ((uint)*pos)) + (nr1[0] << 8);
3320 nr2[0]+=3;
3321 }
3322 }
3323
3324
3325 static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
3326 {
3327 NULL, /* init */
3328 my_strnncoll_ucs2,
3329 my_strnncollsp_ucs2,
3330 my_strnxfrm_unicode,
3331 my_strnxfrmlen_simple,
3332 my_like_range_generic,
3333 my_wildcmp_ucs2_ci,
3334 my_strcasecmp_mb2_or_mb4,
3335 my_instr_mb,
3336 my_hash_sort_ucs2,
3337 my_propagate_simple
3338 };
3339
3340
3341 static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
3342 {
3343 NULL, /* init */
3344 my_strnncoll_ucs2_bin,
3345 my_strnncollsp_ucs2_bin,
3346 my_strnxfrm_unicode,
3347 my_strnxfrmlen_simple,
3348 my_like_range_generic,
3349 my_wildcmp_ucs2_bin,
3350 my_strcasecmp_mb2_or_mb4,
3351 my_instr_mb,
3352 my_hash_sort_ucs2_bin,
3353 my_propagate_simple
3354 };
3355
3356
3357 MY_CHARSET_HANDLER my_charset_ucs2_handler=
3358 {
3359 NULL, /* init */
3360 my_ismbchar_ucs2, /* ismbchar */
3361 my_mbcharlen_ucs2, /* mbcharlen */
3362 my_numchars_ucs2,
3363 my_charpos_ucs2,
3364 my_well_formed_len_ucs2,
3365 my_lengthsp_mb2,
3366 my_numcells_mb,
3367 my_ucs2_uni, /* mb_wc */
3368 my_uni_ucs2, /* wc_mb */
3369 my_mb_ctype_mb,
3370 my_caseup_str_mb2_or_mb4,
3371 my_casedn_str_mb2_or_mb4,
3372 my_caseup_ucs2,
3373 my_casedn_ucs2,
3374 my_snprintf_mb2,
3375 my_l10tostr_mb2_or_mb4,
3376 my_ll10tostr_mb2_or_mb4,
3377 my_fill_ucs2,
3378 my_strntol_mb2_or_mb4,
3379 my_strntoul_mb2_or_mb4,
3380 my_strntoll_mb2_or_mb4,
3381 my_strntoull_mb2_or_mb4,
3382 my_strntod_mb2_or_mb4,
3383 my_strtoll10_mb2,
3384 my_strntoull10rnd_mb2_or_mb4,
3385 my_scan_mb2
3386 };
3387
3388
3389 CHARSET_INFO my_charset_ucs2_general_ci=
3390 {
3391 35,0,0, /* number */
3392 MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
3393 "ucs2", /* cs name */
3394 "ucs2_general_ci", /* name */
3395 "", /* comment */
3396 NULL, /* tailoring */
3397 ctype_ucs2, /* ctype */
3398 to_lower_ucs2, /* to_lower */
3399 to_upper_ucs2, /* to_upper */
3400 to_upper_ucs2, /* sort_order */
3401 NULL, /* uca */
3402 NULL, /* tab_to_uni */
3403 NULL, /* tab_from_uni */
3404 &my_unicase_default,/* caseinfo */
3405 NULL, /* state_map */
3406 NULL, /* ident_map */
3407 1, /* strxfrm_multiply */
3408 1, /* caseup_multiply */
3409 1, /* casedn_multiply */
3410 2, /* mbminlen */
3411 2, /* mbmaxlen */
3412 0, /* min_sort_char */
3413 0xFFFF, /* max_sort_char */
3414 ' ', /* pad char */
3415 0, /* escape_with_backslash_is_dangerous */
3416 1, /* levels_for_compare */
3417 1, /* levels_for_order */
3418 &my_charset_ucs2_handler,
3419 &my_collation_ucs2_general_ci_handler
3420 };
3421
3422
3423 CHARSET_INFO my_charset_ucs2_general_mysql500_ci=
3424 {
3425 159, 0, 0, /* number */
3426 MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, /* state */
3427 "ucs2", /* cs name */
3428 "ucs2_general_mysql500_ci", /* name */
3429 "", /* comment */
3430 NULL, /* tailoring */
3431 ctype_ucs2, /* ctype */
3432 to_lower_ucs2, /* to_lower */
3433 to_upper_ucs2, /* to_upper */
3434 to_upper_ucs2, /* sort_order */
3435 NULL, /* uca */
3436 NULL, /* tab_to_uni */
3437 NULL, /* tab_from_uni */
3438 &my_unicase_mysql500, /* caseinfo */
3439 NULL, /* state_map */
3440 NULL, /* ident_map */
3441 1, /* strxfrm_multiply */
3442 1, /* caseup_multiply */
3443 1, /* casedn_multiply */
3444 2, /* mbminlen */
3445 2, /* mbmaxlen */
3446 0, /* min_sort_char */
3447 0xFFFF, /* max_sort_char */
3448 ' ', /* pad char */
3449 0, /* escape_with_backslash_is_dangerous */
3450 1, /* levels_for_compare */
3451 1, /* levels_for_order */
3452 &my_charset_ucs2_handler,
3453 &my_collation_ucs2_general_ci_handler
3454 };
3455
3456
3457 CHARSET_INFO my_charset_ucs2_bin=
3458 {
3459 90,0,0, /* number */
3460 MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII,
3461 "ucs2", /* cs name */
3462 "ucs2_bin", /* name */
3463 "", /* comment */
3464 NULL, /* tailoring */
3465 ctype_ucs2, /* ctype */
3466 to_lower_ucs2, /* to_lower */
3467 to_upper_ucs2, /* to_upper */
3468 NULL, /* sort_order */
3469 NULL, /* uca */
3470 NULL, /* tab_to_uni */
3471 NULL, /* tab_from_uni */
3472 &my_unicase_default,/* caseinfo */
3473 NULL, /* state_map */
3474 NULL, /* ident_map */
3475 1, /* strxfrm_multiply */
3476 1, /* caseup_multiply */
3477 1, /* casedn_multiply */
3478 2, /* mbminlen */
3479 2, /* mbmaxlen */
3480 0, /* min_sort_char */
3481 0xFFFF, /* max_sort_char */
3482 ' ', /* pad char */
3483 0, /* escape_with_backslash_is_dangerous */
3484 1, /* levels_for_compare */
3485 1, /* levels_for_order */
3486 &my_charset_ucs2_handler,
3487 &my_collation_ucs2_bin_handler
3488 };
3489
3490
3491 #endif /* HAVE_CHARSET_ucs2 */
3492