1 /* Copyright (c) 2002, 2021, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 Without limiting anything contained in the foregoing, this file,
15 which is part of C Driver for MySQL (Connector/C), is also subject to the
16 Universal FOSS Exception, version 1.0, a copy of which can be found at
17 http://oss.oracle.com/licenses/universal-foss-exception.
18
19 This program is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License, version 2.0, for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
27
28 #include <my_global.h>
29 #include "m_string.h"
30 #include "m_ctype.h"
31 #include "my_sys.h" /* Needed for MY_ERRNO_ERANGE */
32 #include <errno.h>
33
34 #include "stdarg.h"
35
36 /*
37 Returns the number of bytes required for strnxfrm().
38 */
39
my_strnxfrmlen_simple(const CHARSET_INFO * cs,size_t len)40 size_t my_strnxfrmlen_simple(const CHARSET_INFO *cs, size_t len)
41 {
42 return len * (cs->strxfrm_multiply ? cs->strxfrm_multiply : 1);
43 }
44
45
46 /*
47 Converts a string into its sort key.
48
49 SYNOPSIS
50 my_strnxfrm_xxx()
51
52 IMPLEMENTATION
53
54 The my_strxfrm_xxx() function transforms a string pointed to by
55 'src' with length 'srclen' according to the charset+collation
56 pair 'cs' and copies the result key into 'dest'.
57
58 Comparing two strings using memcmp() after my_strnxfrm_xxx()
59 is equal to comparing two original strings with my_strnncollsp_xxx().
60
61 Not more than 'dstlen' bytes are written into 'dst'.
62 To garantee that the whole string is transformed, 'dstlen' must be
63 at least srclen*cs->strnxfrm_multiply bytes long. Otherwise,
64 consequent memcmp() may return a non-accurate result.
65
66 If the source string is too short to fill whole 'dstlen' bytes,
67 then the 'dest' string is padded up to 'dstlen', ensuring that:
68
69 "a" == "a "
70 "a\0" < "a"
71 "a\0" < "a "
72
73 my_strnxfrm_simple() is implemented for 8bit charsets and
74 simple collations with one-to-one string->key transformation.
75
76 See also implementations for various charsets/collations in
77 other ctype-xxx.c files.
78
79 RETURN
80
81 Target len 'dstlen'.
82
83 */
84
85
86 size_t
my_strnxfrm_simple(const CHARSET_INFO * cs,uchar * dst,size_t dstlen,uint nweights,const uchar * src,size_t srclen,uint flags)87 my_strnxfrm_simple(const CHARSET_INFO *cs,
88 uchar *dst, size_t dstlen, uint nweights,
89 const uchar *src, size_t srclen, uint flags)
90 {
91 const uchar *map= cs->sort_order;
92 uchar *d0= dst;
93 const uchar *end;
94 const uchar *remainder;
95 size_t frmlen;
96 if ((frmlen= MY_MIN(dstlen, nweights)) > srclen)
97 frmlen= srclen;
98 end= src + frmlen;
99
100 // Do the first few bytes.
101 remainder= src + (frmlen % 8);
102 for (; src < remainder;)
103 *dst++= map[*src++];
104
105 // Unroll loop for rest of string.
106 for (; src < end;)
107 {
108 *dst++= map[*src++];
109 *dst++= map[*src++];
110 *dst++= map[*src++];
111 *dst++= map[*src++];
112 *dst++= map[*src++];
113 *dst++= map[*src++];
114 *dst++= map[*src++];
115 *dst++= map[*src++];
116 }
117 return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, d0 + dstlen,
118 (uint)(nweights - frmlen), flags, 0);
119 }
120
121
my_strnncoll_simple(const CHARSET_INFO * cs,const uchar * s,size_t slen,const uchar * t,size_t tlen,my_bool t_is_prefix)122 int my_strnncoll_simple(const CHARSET_INFO * cs, const uchar *s, size_t slen,
123 const uchar *t, size_t tlen,
124 my_bool t_is_prefix)
125 {
126 size_t len = ( slen > tlen ) ? tlen : slen;
127 const uchar *map= cs->sort_order;
128 if (t_is_prefix && slen > tlen)
129 slen=tlen;
130 while (len--)
131 {
132 if (map[*s++] != map[*t++])
133 return ((int) map[s[-1]] - (int) map[t[-1]]);
134 }
135 /*
136 We can't use (slen - tlen) here as the result may be outside of the
137 precision of a signed int
138 */
139 return slen > tlen ? 1 : slen < tlen ? -1 : 0 ;
140 }
141
142
143 /*
144 Compare strings, discarding end space
145
146 SYNOPSIS
147 my_strnncollsp_simple()
148 cs character set handler
149 a First string to compare
150 a_length Length of 'a'
151 b Second string to compare
152 b_length Length of 'b'
153 diff_if_only_endspace_difference
154 Set to 1 if the strings should be regarded as different
155 if they only difference in end space
156
157 IMPLEMENTATION
158 If one string is shorter as the other, then we space extend the other
159 so that the strings have equal length.
160
161 This will ensure that the following things hold:
162
163 "a" == "a "
164 "a\0" < "a"
165 "a\0" < "a "
166
167 RETURN
168 < 0 a < b
169 = 0 a == b
170 > 0 a > b
171 */
172
my_strnncollsp_simple(const CHARSET_INFO * cs,const uchar * a,size_t a_length,const uchar * b,size_t b_length,my_bool diff_if_only_endspace_difference)173 int my_strnncollsp_simple(const CHARSET_INFO *cs, const uchar *a,
174 size_t a_length, const uchar *b, size_t b_length,
175 my_bool diff_if_only_endspace_difference)
176 {
177 const uchar *map= cs->sort_order, *end;
178 size_t length;
179 int res;
180
181 #ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
182 diff_if_only_endspace_difference= 0;
183 #endif
184
185 end= a + (length= MY_MIN(a_length, b_length));
186 while (a < end)
187 {
188 if (map[*a++] != map[*b++])
189 return ((int) map[a[-1]] - (int) map[b[-1]]);
190 }
191 res= 0;
192 if (a_length != b_length)
193 {
194 int swap= 1;
195 if (diff_if_only_endspace_difference)
196 res= 1; /* Assume 'a' is bigger */
197 /*
198 Check the next not space character of the longer key. If it's < ' ',
199 then it's smaller than the other key.
200 */
201 if (a_length < b_length)
202 {
203 /* put shorter key in s */
204 a_length= b_length;
205 a= b;
206 swap= -1; /* swap sign of result */
207 res= -res;
208 }
209 for (end= a + a_length-length; a < end ; a++)
210 {
211 if (map[*a] != map[' '])
212 return (map[*a] < map[' ']) ? -swap : swap;
213 }
214 }
215 return res;
216 }
217
218
my_caseup_str_8bit(const CHARSET_INFO * cs,char * str)219 size_t my_caseup_str_8bit(const CHARSET_INFO *cs,char *str)
220 {
221 const uchar *map= cs->to_upper;
222 char *str_orig= str;
223 while ((*str= (char) map[(uchar) *str]) != 0)
224 str++;
225 return (size_t) (str - str_orig);
226 }
227
228
my_casedn_str_8bit(const CHARSET_INFO * cs,char * str)229 size_t my_casedn_str_8bit(const CHARSET_INFO *cs,char *str)
230 {
231 const uchar *map= cs->to_lower;
232 char *str_orig= str;
233 while ((*str= (char) map[(uchar) *str]) != 0)
234 str++;
235 return (size_t) (str - str_orig);
236 }
237
238
my_caseup_8bit(const CHARSET_INFO * cs,char * src,size_t srclen,char * dst MY_ATTRIBUTE ((unused)),size_t dstlen MY_ATTRIBUTE ((unused)))239 size_t my_caseup_8bit(const CHARSET_INFO *cs, char *src, size_t srclen,
240 char *dst MY_ATTRIBUTE((unused)),
241 size_t dstlen MY_ATTRIBUTE((unused)))
242 {
243 char *end= src + srclen;
244 const uchar *map= cs->to_upper;
245 assert(src == dst && srclen == dstlen);
246 for ( ; src != end ; src++)
247 *src= (char) map[(uchar) *src];
248 return srclen;
249 }
250
251
my_casedn_8bit(const CHARSET_INFO * cs,char * src,size_t srclen,char * dst MY_ATTRIBUTE ((unused)),size_t dstlen MY_ATTRIBUTE ((unused)))252 size_t my_casedn_8bit(const CHARSET_INFO *cs, char *src, size_t srclen,
253 char *dst MY_ATTRIBUTE((unused)),
254 size_t dstlen MY_ATTRIBUTE((unused)))
255 {
256 char *end= src + srclen;
257 const uchar *map=cs->to_lower;
258 assert(src == dst && srclen == dstlen);
259 for ( ; src != end ; src++)
260 *src= (char) map[(uchar) *src];
261 return srclen;
262 }
263
my_strcasecmp_8bit(const CHARSET_INFO * cs,const char * s,const char * t)264 int my_strcasecmp_8bit(const CHARSET_INFO *cs,const char *s, const char *t)
265 {
266 const uchar *map=cs->to_upper;
267 while (map[(uchar) *s] == map[(uchar) *t++])
268 if (!*s++) return 0;
269 return ((int) map[(uchar) s[0]] - (int) map[(uchar) t[-1]]);
270 }
271
272
my_mb_wc_8bit(const CHARSET_INFO * cs,my_wc_t * wc,const uchar * str,const uchar * end MY_ATTRIBUTE ((unused)))273 int my_mb_wc_8bit(const CHARSET_INFO *cs,my_wc_t *wc,
274 const uchar *str,
275 const uchar *end MY_ATTRIBUTE((unused)))
276 {
277 if (str >= end)
278 return MY_CS_TOOSMALL;
279
280 *wc=cs->tab_to_uni[*str];
281 return (!wc[0] && str[0]) ? -1 : 1;
282 }
283
my_wc_mb_8bit(const CHARSET_INFO * cs,my_wc_t wc,uchar * str,uchar * end)284 int my_wc_mb_8bit(const CHARSET_INFO *cs,my_wc_t wc,
285 uchar *str,
286 uchar *end)
287 {
288 const MY_UNI_IDX *idx;
289
290 if (str >= end)
291 return MY_CS_TOOSMALL;
292
293 for (idx=cs->tab_from_uni; idx->tab ; idx++)
294 {
295 if (idx->from <= wc && idx->to >= wc)
296 {
297 str[0]= idx->tab[wc - idx->from];
298 return (!str[0] && wc) ? MY_CS_ILUNI : 1;
299 }
300 }
301 return MY_CS_ILUNI;
302 }
303
304
305 /*
306 We can't use vsprintf here as it's not guaranteed to return
307 the length on all operating systems.
308 This function is also not called in a safe environment, so the
309 end buffer must be checked.
310 */
311
my_snprintf_8bit(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),char * to,size_t n MY_ATTRIBUTE ((unused)),const char * fmt,...)312 size_t my_snprintf_8bit(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
313 char* to, size_t n MY_ATTRIBUTE((unused)),
314 const char* fmt, ...)
315 {
316 va_list args;
317 size_t result;
318 va_start(args,fmt);
319 result= my_vsnprintf(to, n, fmt, args);
320 va_end(args);
321 return result;
322 }
323
324
my_hash_sort_simple(const CHARSET_INFO * cs,const uchar * key,size_t len,ulong * nr1,ulong * nr2)325 void my_hash_sort_simple(const CHARSET_INFO *cs,
326 const uchar *key, size_t len,
327 ulong *nr1, ulong *nr2)
328 {
329 const uchar *sort_order=cs->sort_order;
330 const uchar *end;
331 ulong tmp1;
332 ulong tmp2;
333
334 /*
335 Remove end space. We have to do this to be able to compare
336 'A ' and 'A' as identical
337 */
338 end= skip_trailing_space(key, len);
339
340 tmp1= *nr1;
341 tmp2= *nr2;
342
343 for (; key < (uchar*) end ; key++)
344 {
345 tmp1^=(ulong) ((((uint) tmp1 & 63) + tmp2) *
346 ((uint) sort_order[(uint) *key])) + (tmp1 << 8);
347 tmp2+=3;
348 }
349
350 *nr1= tmp1;
351 *nr2= tmp2;
352 }
353
354
my_strntol_8bit(const CHARSET_INFO * cs,const char * nptr,size_t l,int base,char ** endptr,int * err)355 long my_strntol_8bit(const CHARSET_INFO *cs,
356 const char *nptr, size_t l, int base,
357 char **endptr, int *err)
358 {
359 int negative;
360 uint32 cutoff;
361 uint cutlim;
362 uint32 i;
363 const char *s;
364 uchar c;
365 const char *save, *e;
366 int overflow;
367
368 *err= 0; /* Initialize error indicator */
369
370 s = nptr;
371 e = nptr+l;
372
373 for ( ; s<e && my_isspace(cs, *s) ; s++);
374
375 if (s == e)
376 {
377 goto noconv;
378 }
379
380 /* Check for a sign. */
381 if (*s == '-')
382 {
383 negative = 1;
384 ++s;
385 }
386 else if (*s == '+')
387 {
388 negative = 0;
389 ++s;
390 }
391 else
392 negative = 0;
393
394 save = s;
395 cutoff = ((uint32)~0L) / (uint32) base;
396 cutlim = (uint) (((uint32)~0L) % (uint32) base);
397
398 overflow = 0;
399 i = 0;
400 for (c = *s; s != e; c = *++s)
401 {
402 if (c>='0' && c<='9')
403 c -= '0';
404 else if (c>='A' && c<='Z')
405 c = c - 'A' + 10;
406 else if (c>='a' && c<='z')
407 c = c - 'a' + 10;
408 else
409 break;
410 if (c >= base)
411 break;
412 if (i > cutoff || (i == cutoff && c > cutlim))
413 overflow = 1;
414 else
415 {
416 i *= (uint32) base;
417 i += c;
418 }
419 }
420
421 if (s == save)
422 goto noconv;
423
424 if (endptr != NULL)
425 *endptr = (char *) s;
426
427 if (negative)
428 {
429 if (i > (uint32) INT_MIN32)
430 overflow = 1;
431 }
432 else if (i > INT_MAX32)
433 overflow = 1;
434
435 if (overflow)
436 {
437 err[0]= ERANGE;
438 return negative ? INT_MIN32 : INT_MAX32;
439 }
440
441 return (negative ? -((long) i) : (long) i);
442
443 noconv:
444 err[0]= EDOM;
445 if (endptr != NULL)
446 *endptr = (char *) nptr;
447 return 0L;
448 }
449
450
my_strntoul_8bit(const CHARSET_INFO * cs,const char * nptr,size_t l,int base,char ** endptr,int * err)451 ulong my_strntoul_8bit(const CHARSET_INFO *cs,
452 const char *nptr, size_t l, int base,
453 char **endptr, int *err)
454 {
455 int negative;
456 uint32 cutoff;
457 uint cutlim;
458 uint32 i;
459 const char *s;
460 uchar c;
461 const char *save, *e;
462 int overflow;
463
464 *err= 0; /* Initialize error indicator */
465
466 s = nptr;
467 e = nptr+l;
468
469 for( ; s<e && my_isspace(cs, *s); s++);
470
471 if (s==e)
472 {
473 goto noconv;
474 }
475
476 if (*s == '-')
477 {
478 negative = 1;
479 ++s;
480 }
481 else if (*s == '+')
482 {
483 negative = 0;
484 ++s;
485 }
486 else
487 negative = 0;
488
489 save = s;
490 cutoff = ((uint32)~0L) / (uint32) base;
491 cutlim = (uint) (((uint32)~0L) % (uint32) base);
492 overflow = 0;
493 i = 0;
494
495 for (c = *s; s != e; c = *++s)
496 {
497 if (c>='0' && c<='9')
498 c -= '0';
499 else if (c>='A' && c<='Z')
500 c = c - 'A' + 10;
501 else if (c>='a' && c<='z')
502 c = c - 'a' + 10;
503 else
504 break;
505 if (c >= base)
506 break;
507 if (i > cutoff || (i == cutoff && c > cutlim))
508 overflow = 1;
509 else
510 {
511 i *= (uint32) base;
512 i += c;
513 }
514 }
515
516 if (s == save)
517 goto noconv;
518
519 if (endptr != NULL)
520 *endptr = (char *) s;
521
522 if (overflow)
523 {
524 err[0]= ERANGE;
525 return (~(uint32) 0);
526 }
527
528 return (negative ? -((long) i) : (long) i);
529
530 noconv:
531 err[0]= EDOM;
532 if (endptr != NULL)
533 *endptr = (char *) nptr;
534 return 0L;
535 }
536
537
my_strntoll_8bit(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * nptr,size_t l,int base,char ** endptr,int * err)538 longlong my_strntoll_8bit(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
539 const char *nptr, size_t l, int base,
540 char **endptr,int *err)
541 {
542 int negative;
543 ulonglong cutoff;
544 uint cutlim;
545 ulonglong i;
546 const char *s, *e;
547 const char *save;
548 int overflow;
549
550 *err= 0; /* Initialize error indicator */
551
552 s = nptr;
553 e = nptr+l;
554
555 for(; s<e && my_isspace(cs,*s); s++);
556
557 if (s == e)
558 {
559 goto noconv;
560 }
561
562 if (*s == '-')
563 {
564 negative = 1;
565 ++s;
566 }
567 else if (*s == '+')
568 {
569 negative = 0;
570 ++s;
571 }
572 else
573 negative = 0;
574
575 save = s;
576
577 cutoff = (~(ulonglong) 0) / (unsigned long int) base;
578 cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
579
580 overflow = 0;
581 i = 0;
582 for ( ; s != e; s++)
583 {
584 uchar c= *s;
585 if (c>='0' && c<='9')
586 c -= '0';
587 else if (c>='A' && c<='Z')
588 c = c - 'A' + 10;
589 else if (c>='a' && c<='z')
590 c = c - 'a' + 10;
591 else
592 break;
593 if (c >= base)
594 break;
595 if (i > cutoff || (i == cutoff && c > cutlim))
596 overflow = 1;
597 else
598 {
599 i *= (ulonglong) base;
600 i += c;
601 }
602 }
603
604 if (s == save)
605 goto noconv;
606
607 if (endptr != NULL)
608 *endptr = (char *) s;
609
610 if (negative)
611 {
612 if (i > (ulonglong) LLONG_MIN)
613 overflow = 1;
614 }
615 else if (i > (ulonglong) LLONG_MAX)
616 overflow = 1;
617
618 if (overflow)
619 {
620 err[0]= ERANGE;
621 return negative ? LLONG_MIN : LLONG_MAX;
622 }
623
624 return (negative ? -((longlong) i) : (longlong) i);
625
626 noconv:
627 err[0]= EDOM;
628 if (endptr != NULL)
629 *endptr = (char *) nptr;
630 return 0L;
631 }
632
633
my_strntoull_8bit(const CHARSET_INFO * cs,const char * nptr,size_t l,int base,char ** endptr,int * err)634 ulonglong my_strntoull_8bit(const CHARSET_INFO *cs,
635 const char *nptr, size_t l, int base,
636 char **endptr, int *err)
637 {
638 int negative;
639 ulonglong cutoff;
640 uint cutlim;
641 ulonglong i;
642 const char *s, *e;
643 const char *save;
644 int overflow;
645
646 *err= 0; /* Initialize error indicator */
647
648 s = nptr;
649 e = nptr+l;
650
651 for(; s<e && my_isspace(cs,*s); s++);
652
653 if (s == e)
654 {
655 goto noconv;
656 }
657
658 if (*s == '-')
659 {
660 negative = 1;
661 ++s;
662 }
663 else if (*s == '+')
664 {
665 negative = 0;
666 ++s;
667 }
668 else
669 negative = 0;
670
671 save = s;
672
673 cutoff = (~(ulonglong) 0) / (unsigned long int) base;
674 cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
675
676 overflow = 0;
677 i = 0;
678 for ( ; s != e; s++)
679 {
680 uchar c= *s;
681
682 if (c>='0' && c<='9')
683 c -= '0';
684 else if (c>='A' && c<='Z')
685 c = c - 'A' + 10;
686 else if (c>='a' && c<='z')
687 c = c - 'a' + 10;
688 else
689 break;
690 if (c >= base)
691 break;
692 if (i > cutoff || (i == cutoff && c > cutlim))
693 overflow = 1;
694 else
695 {
696 i *= (ulonglong) base;
697 i += c;
698 }
699 }
700
701 if (s == save)
702 goto noconv;
703
704 if (endptr != NULL)
705 *endptr = (char *) s;
706
707 if (overflow)
708 {
709 err[0]= ERANGE;
710 return (~(ulonglong) 0);
711 }
712
713 return (negative ? -((longlong) i) : (longlong) i);
714
715 noconv:
716 err[0]= EDOM;
717 if (endptr != NULL)
718 *endptr = (char *) nptr;
719 return 0L;
720 }
721
722
723 /*
724 Read double from string
725
726 SYNOPSIS:
727 my_strntod_8bit()
728 cs Character set information
729 str String to convert to double
730 length Optional length for string.
731 end result pointer to end of converted string
732 err Error number if failed conversion
733
734 NOTES:
735 If length is not INT_MAX32 or str[length] != 0 then the given str must
736 be writeable
737 If length == INT_MAX32 the str must be \0 terminated.
738
739 It's implemented this way to save a buffer allocation and a memory copy.
740
741 RETURN
742 Value of number in string
743 */
744
745
my_strntod_8bit(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),char * str,size_t length,char ** end,int * err)746 double my_strntod_8bit(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
747 char *str, size_t length,
748 char **end, int *err)
749 {
750 if (length == INT_MAX32)
751 length= 65535; /* Should be big enough */
752 *end= str + length;
753 return my_strtod(str, end, err);
754 }
755
756
757 /*
758 This is a fast version optimized for the case of radix 10 / -10
759
760 Assume len >= 1
761 */
762
my_long10_to_str_8bit(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),char * dst,size_t len,int radix,long int val)763 size_t my_long10_to_str_8bit(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
764 char *dst, size_t len, int radix, long int val)
765 {
766 char buffer[66];
767 char *p, *e;
768 long int new_val;
769 uint sign=0;
770 unsigned long int uval = (unsigned long int) val;
771
772 e = p = &buffer[sizeof(buffer)-1];
773 *p= 0;
774
775 if (radix < 0)
776 {
777 if (val < 0)
778 {
779 /* Avoid integer overflow in (-val) for LLONG_MIN (BUG#31799). */
780 uval= (unsigned long int)0 - uval;
781 *dst++= '-';
782 len--;
783 sign= 1;
784 }
785 }
786
787 new_val = (long) (uval / 10);
788 *--p = '0'+ (char) (uval - (unsigned long) new_val * 10);
789 val = new_val;
790
791 while (val != 0)
792 {
793 new_val=val/10;
794 *--p = '0' + (char) (val-new_val*10);
795 val= new_val;
796 }
797
798 len= MY_MIN(len, (size_t) (e-p));
799 memcpy(dst, p, len);
800 return len+sign;
801 }
802
803
my_longlong10_to_str_8bit(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),char * dst,size_t len,int radix,longlong val)804 size_t my_longlong10_to_str_8bit(const CHARSET_INFO *cs
805 MY_ATTRIBUTE((unused)),
806 char *dst, size_t len, int radix,
807 longlong val)
808 {
809 char buffer[65];
810 char *p, *e;
811 long long_val;
812 uint sign= 0;
813 ulonglong uval = (ulonglong)val;
814
815 if (radix < 0)
816 {
817 if (val < 0)
818 {
819 /* Avoid integer overflow in (-val) for LLONG_MIN (BUG#31799). */
820 uval = (ulonglong)0 - uval;
821 *dst++= '-';
822 len--;
823 sign= 1;
824 }
825 }
826
827 e = p = &buffer[sizeof(buffer)-1];
828 *p= 0;
829
830 if (uval == 0)
831 {
832 *--p= '0';
833 len= 1;
834 goto cnv;
835 }
836
837 while (uval > (ulonglong) LONG_MAX)
838 {
839 ulonglong quo= uval/(uint) 10;
840 uint rem= (uint) (uval- quo* (uint) 10);
841 *--p = '0' + rem;
842 uval= quo;
843 }
844
845 long_val= (long) uval;
846 while (long_val != 0)
847 {
848 long quo= long_val/10;
849 *--p = (char) ('0' + (long_val - quo*10));
850 long_val= quo;
851 }
852
853 len= MY_MIN(len, (size_t) (e-p));
854 cnv:
855 memcpy(dst, p, len);
856 return len+sign;
857 }
858
859
860 /*
861 ** Compare string against string with wildcard
862 ** 0 if matched
863 ** -1 if not matched with wildcard
864 ** 1 if matched with wildcard
865 */
866
867 #define likeconv(s,A) (uchar) (s)->sort_order[(uchar) (A)]
868 #define INC_PTR(cs,A,B) (A)++
869
870 static
my_wildcmp_8bit_impl(const CHARSET_INFO * cs,const char * str,const char * str_end,const char * wildstr,const char * wildend,int escape,int w_one,int w_many,int recurse_level)871 int my_wildcmp_8bit_impl(const CHARSET_INFO *cs,
872 const char *str,const char *str_end,
873 const char *wildstr,const char *wildend,
874 int escape, int w_one, int w_many, int recurse_level)
875 {
876 int result= -1; /* Not found, using wildcards */
877
878 if (my_string_stack_guard && my_string_stack_guard(recurse_level))
879 return 1;
880 while (wildstr != wildend)
881 {
882 while (*wildstr != w_many && *wildstr != w_one)
883 {
884 if (*wildstr == escape && wildstr+1 != wildend)
885 wildstr++;
886
887 if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
888 return(1); /* No match */
889 if (wildstr == wildend)
890 return(str != str_end); /* Match if both are at end */
891 result=1; /* Found an anchor char */
892 }
893 if (*wildstr == w_one)
894 {
895 do
896 {
897 if (str == str_end) /* Skip one char if possible */
898 return(result);
899 INC_PTR(cs,str,str_end);
900 } while (++wildstr < wildend && *wildstr == w_one);
901 if (wildstr == wildend)
902 break;
903 }
904 if (*wildstr == w_many)
905 { /* Found w_many */
906 uchar cmp;
907
908 wildstr++;
909 /* Remove any '%' and '_' from the wild search string */
910 for (; wildstr != wildend ; wildstr++)
911 {
912 if (*wildstr == w_many)
913 continue;
914 if (*wildstr == w_one)
915 {
916 if (str == str_end)
917 return(-1);
918 INC_PTR(cs,str,str_end);
919 continue;
920 }
921 break; /* Not a wild character */
922 }
923 if (wildstr == wildend)
924 return(0); /* Ok if w_many is last */
925 if (str == str_end)
926 return(-1);
927
928 if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
929 cmp= *++wildstr;
930
931 INC_PTR(cs,wildstr,wildend); /* This is compared trough cmp */
932 cmp=likeconv(cs,cmp);
933 do
934 {
935 while (str != str_end && (uchar) likeconv(cs,*str) != cmp)
936 str++;
937 if (str++ == str_end) return(-1);
938 {
939 int tmp=my_wildcmp_8bit_impl(cs,str,str_end,
940 wildstr,wildend,escape,w_one,
941 w_many, recurse_level + 1);
942 if (tmp <= 0)
943 return(tmp);
944 }
945 } while (str != str_end && wildstr[0] != w_many);
946 return(-1);
947 }
948 }
949 return(str != str_end ? 1 : 0);
950 }
951
my_wildcmp_8bit(const CHARSET_INFO * cs,const char * str,const char * str_end,const char * wildstr,const char * wildend,int escape,int w_one,int w_many)952 int my_wildcmp_8bit(const CHARSET_INFO *cs,
953 const char *str,const char *str_end,
954 const char *wildstr,const char *wildend,
955 int escape, int w_one, int w_many)
956 {
957 return my_wildcmp_8bit_impl(cs, str, str_end,
958 wildstr, wildend,
959 escape, w_one, w_many, 1);
960 }
961
962
963 /*
964 ** Calculate min_str and max_str that ranges a LIKE string.
965 ** Arguments:
966 ** ptr Pointer to LIKE string.
967 ** ptr_length Length of LIKE string.
968 ** escape Escape character in LIKE. (Normally '\').
969 ** All escape characters should be removed from min_str and max_str
970 ** res_length Length of min_str and max_str.
971 ** min_str Smallest case sensitive string that ranges LIKE.
972 ** Should be space padded to res_length.
973 ** max_str Largest case sensitive string that ranges LIKE.
974 ** Normally padded with the biggest character sort value.
975 **
976 ** The function should return 0 if ok and 1 if the LIKE string can't be
977 ** optimized !
978 */
979
my_like_range_simple(const CHARSET_INFO * cs,const char * ptr,size_t ptr_length,pbool escape,pbool w_one,pbool w_many,size_t res_length,char * min_str,char * max_str,size_t * min_length,size_t * max_length)980 my_bool my_like_range_simple(const CHARSET_INFO *cs,
981 const char *ptr, size_t ptr_length,
982 pbool escape, pbool w_one, pbool w_many,
983 size_t res_length,
984 char *min_str,char *max_str,
985 size_t *min_length, size_t *max_length)
986 {
987 const char *end= ptr + ptr_length;
988 char *min_org=min_str;
989 char *min_end=min_str+res_length;
990 size_t charlen= res_length / cs->mbmaxlen;
991
992 for (; ptr != end && min_str != min_end && charlen > 0 ; ptr++, charlen--)
993 {
994 if (*ptr == escape && ptr+1 != end)
995 {
996 ptr++; /* Skip escape */
997 *min_str++= *max_str++ = *ptr;
998 continue;
999 }
1000 if (*ptr == w_one) /* '_' in SQL */
1001 {
1002 *min_str++='\0'; /* This should be min char */
1003 *max_str++= (char) cs->max_sort_char;
1004 continue;
1005 }
1006 if (*ptr == w_many) /* '%' in SQL */
1007 {
1008 /* Calculate length of keys */
1009 *min_length= ((cs->state & MY_CS_BINSORT) ?
1010 (size_t) (min_str - min_org) :
1011 res_length);
1012 *max_length= res_length;
1013 do
1014 {
1015 *min_str++= 0;
1016 *max_str++= (char) cs->max_sort_char;
1017 } while (min_str != min_end);
1018 return 0;
1019 }
1020 *min_str++= *max_str++ = *ptr;
1021 }
1022
1023 *min_length= *max_length = (size_t) (min_str - min_org);
1024 while (min_str != min_end)
1025 *min_str++= *max_str++ = ' '; /* Because if key compression */
1026 return 0;
1027 }
1028
1029
my_scan_8bit(const CHARSET_INFO * cs,const char * str,const char * end,int sq)1030 size_t my_scan_8bit(const CHARSET_INFO *cs, const char *str, const char *end,
1031 int sq)
1032 {
1033 const char *str0= str;
1034 switch (sq)
1035 {
1036 case MY_SEQ_INTTAIL:
1037 if (*str == '.')
1038 {
1039 for(str++ ; str != end && *str == '0' ; str++);
1040 return (size_t) (str - str0);
1041 }
1042 return 0;
1043
1044 case MY_SEQ_SPACES:
1045 for ( ; str < end ; str++)
1046 {
1047 if (!my_isspace(cs,*str))
1048 break;
1049 }
1050 return (size_t) (str - str0);
1051 default:
1052 return 0;
1053 }
1054 }
1055
1056
my_fill_8bit(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),char * s,size_t l,int fill)1057 void my_fill_8bit(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1058 char *s, size_t l, int fill)
1059 {
1060 memset(s, fill, l);
1061 }
1062
1063
my_numchars_8bit(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * b,const char * e)1064 size_t my_numchars_8bit(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1065 const char *b, const char *e)
1066 {
1067 return (size_t) (e - b);
1068 }
1069
1070
my_numcells_8bit(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * b,const char * e)1071 size_t my_numcells_8bit(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1072 const char *b, const char *e)
1073 {
1074 return (size_t) (e - b);
1075 }
1076
1077
my_charpos_8bit(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * b MY_ATTRIBUTE ((unused)),const char * e MY_ATTRIBUTE ((unused)),size_t pos)1078 size_t my_charpos_8bit(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1079 const char *b MY_ATTRIBUTE((unused)),
1080 const char *e MY_ATTRIBUTE((unused)),
1081 size_t pos)
1082 {
1083 return pos;
1084 }
1085
1086
my_well_formed_len_8bit(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * start,const char * end,size_t nchars,int * error)1087 size_t my_well_formed_len_8bit(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1088 const char *start, const char *end,
1089 size_t nchars, int *error)
1090 {
1091 size_t nbytes= (size_t) (end-start);
1092 *error= 0;
1093 return MY_MIN(nbytes, nchars);
1094 }
1095
1096
my_lengthsp_8bit(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * ptr,size_t length)1097 size_t my_lengthsp_8bit(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1098 const char *ptr, size_t length)
1099 {
1100 const char *end;
1101 end= (const char *) skip_trailing_space((const uchar *)ptr, length);
1102 return (size_t) (end-ptr);
1103 }
1104
1105
my_instr_simple(const CHARSET_INFO * cs,const char * b,size_t b_length,const char * s,size_t s_length,my_match_t * match,uint nmatch)1106 uint my_instr_simple(const CHARSET_INFO *cs,
1107 const char *b, size_t b_length,
1108 const char *s, size_t s_length,
1109 my_match_t *match, uint nmatch)
1110 {
1111 const uchar *str, *search, *end, *search_end;
1112
1113 if (s_length <= b_length)
1114 {
1115 if (!s_length)
1116 {
1117 if (nmatch)
1118 {
1119 match->beg= 0;
1120 match->end= 0;
1121 match->mb_len= 0;
1122 }
1123 return 1; /* Empty string is always found */
1124 }
1125
1126 str= (const uchar*) b;
1127 search= (const uchar*) s;
1128 end= (const uchar*) b+b_length-s_length+1;
1129 search_end= (const uchar*) s + s_length;
1130
1131 skip:
1132 while (str != end)
1133 {
1134 if (cs->sort_order[*str++] == cs->sort_order[*search])
1135 {
1136 const uchar *i,*j;
1137
1138 i= str;
1139 j= search+1;
1140
1141 while (j != search_end)
1142 if (cs->sort_order[*i++] != cs->sort_order[*j++])
1143 goto skip;
1144
1145 if (nmatch > 0)
1146 {
1147 match[0].beg= 0;
1148 match[0].end= (uint) (str- (const uchar*)b-1);
1149 match[0].mb_len= match[0].end;
1150
1151 if (nmatch > 1)
1152 {
1153 match[1].beg= match[0].end;
1154 match[1].end= match[0].end + (uint)s_length;
1155 match[1].mb_len= match[1].end-match[1].beg;
1156 }
1157 }
1158 return 2;
1159 }
1160 }
1161 }
1162 return 0;
1163 }
1164
my_well_formed_len_ascii(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * start,const char * end,size_t nchars,int * error)1165 size_t my_well_formed_len_ascii(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1166 const char *start, const char *end,
1167 size_t nchars, int *error)
1168 {
1169 /**
1170 @todo: Currently return warning on invalid character.
1171 Return error in future release.
1172 */
1173 const char* oldstart = start;
1174 *error= 0;
1175 while (start < end)
1176 {
1177 if ((*start & 0x80) != 0)
1178 {
1179 *error = 1;
1180 break;
1181 }
1182 start++;
1183 }
1184 return MY_MIN((size_t)(end - oldstart), nchars);
1185 }
1186
1187 typedef struct
1188 {
1189 int nchars;
1190 MY_UNI_IDX uidx;
1191 } uni_idx;
1192
1193 #define PLANE_SIZE 0x100
1194 #define PLANE_NUM 0x100
1195 #define PLANE_NUMBER(x) (((x)>>8) % PLANE_NUM)
1196
pcmp(const void * f,const void * s)1197 static int pcmp(const void * f, const void * s)
1198 {
1199 const uni_idx *F= (const uni_idx*) f;
1200 const uni_idx *S= (const uni_idx*) s;
1201 int res;
1202
1203 if (!(res=((S->nchars)-(F->nchars))))
1204 res=((F->uidx.from)-(S->uidx.to));
1205 return res;
1206 }
1207
1208 static my_bool
create_fromuni(CHARSET_INFO * cs,MY_CHARSET_LOADER * loader)1209 create_fromuni(CHARSET_INFO *cs,
1210 MY_CHARSET_LOADER *loader)
1211 {
1212 uni_idx idx[PLANE_NUM];
1213 int i,n;
1214 MY_UNI_IDX *tab_from_uni;
1215
1216 /*
1217 Check that Unicode map is loaded.
1218 It can be not loaded when the collation is
1219 listed in Index.xml but not specified
1220 in the character set specific XML file.
1221 */
1222 if (!cs->tab_to_uni)
1223 return TRUE;
1224
1225 /* Clear plane statistics */
1226 memset(idx, 0, sizeof(idx));
1227
1228 /* Count number of characters in each plane */
1229 for (i=0; i< 0x100; i++)
1230 {
1231 uint16 wc=cs->tab_to_uni[i];
1232 int pl= PLANE_NUMBER(wc);
1233
1234 if (wc || !i)
1235 {
1236 if (!idx[pl].nchars)
1237 {
1238 idx[pl].uidx.from=wc;
1239 idx[pl].uidx.to=wc;
1240 }else
1241 {
1242 idx[pl].uidx.from=wc<idx[pl].uidx.from?wc:idx[pl].uidx.from;
1243 idx[pl].uidx.to=wc>idx[pl].uidx.to?wc:idx[pl].uidx.to;
1244 }
1245 idx[pl].nchars++;
1246 }
1247 }
1248
1249 /* Sort planes in descending order */
1250 qsort(&idx,PLANE_NUM,sizeof(uni_idx),&pcmp);
1251
1252 for (i=0; i < PLANE_NUM; i++)
1253 {
1254 int ch,numchars;
1255 uchar *tab;
1256
1257 /* Skip empty plane */
1258 if (!idx[i].nchars)
1259 break;
1260
1261 numchars=idx[i].uidx.to-idx[i].uidx.from+1;
1262 if (!(idx[i].uidx.tab= tab= (uchar *)
1263 (loader->once_alloc)
1264 (numchars * sizeof(*idx[i].uidx.tab))))
1265 return TRUE;
1266
1267 memset(tab, 0, numchars*sizeof(*idx[i].uidx.tab));
1268
1269 for (ch=1; ch < PLANE_SIZE; ch++)
1270 {
1271 uint16 wc=cs->tab_to_uni[ch];
1272 if (wc >= idx[i].uidx.from && wc <= idx[i].uidx.to && wc)
1273 {
1274 int ofs= wc - idx[i].uidx.from;
1275 /*
1276 Character sets like armscii8 may have two code points for
1277 one character. When converting from UNICODE back to
1278 armscii8, select the lowest one, which is in the ASCII
1279 range.
1280 */
1281 if (tab[ofs] == '\0')
1282 tab[ofs]= ch;
1283 }
1284 }
1285 }
1286
1287 /* Allocate and fill reverse table for each plane */
1288 n=i;
1289 if (!(cs->tab_from_uni= tab_from_uni= (MY_UNI_IDX *)
1290 (loader->once_alloc)
1291 (sizeof(MY_UNI_IDX) * (n + 1))))
1292 return TRUE;
1293
1294 for (i=0; i< n; i++)
1295 tab_from_uni[i]= idx[i].uidx;
1296
1297 /* Set end-of-list marker */
1298 memset(&tab_from_uni[i], 0, sizeof(MY_UNI_IDX));
1299 return FALSE;
1300 }
1301
1302 static my_bool
my_cset_init_8bit(CHARSET_INFO * cs,MY_CHARSET_LOADER * loader)1303 my_cset_init_8bit(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader)
1304 {
1305 cs->caseup_multiply= 1;
1306 cs->casedn_multiply= 1;
1307 cs->pad_char= ' ';
1308 return create_fromuni(cs, loader);
1309 }
1310
set_max_sort_char(CHARSET_INFO * cs)1311 static void set_max_sort_char(CHARSET_INFO *cs)
1312 {
1313 uchar max_char;
1314 uint i;
1315
1316 if (!cs->sort_order)
1317 return;
1318
1319 max_char=cs->sort_order[(uchar) cs->max_sort_char];
1320 for (i= 0; i < 256; i++)
1321 {
1322 if ((uchar) cs->sort_order[i] > max_char)
1323 {
1324 max_char=(uchar) cs->sort_order[i];
1325 cs->max_sort_char= i;
1326 }
1327 }
1328 }
1329
1330 static my_bool
my_coll_init_simple(CHARSET_INFO * cs,MY_CHARSET_LOADER * loader MY_ATTRIBUTE ((unused)))1331 my_coll_init_simple(CHARSET_INFO *cs,
1332 MY_CHARSET_LOADER *loader MY_ATTRIBUTE((unused)))
1333 {
1334 set_max_sort_char(cs);
1335 return FALSE;
1336 }
1337
1338
my_strtoll10_8bit(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * nptr,char ** endptr,int * error)1339 longlong my_strtoll10_8bit(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1340 const char *nptr, char **endptr, int *error)
1341 {
1342 return my_strtoll10(nptr, endptr, error);
1343 }
1344
1345
my_mb_ctype_8bit(const CHARSET_INFO * cs,int * ctype,const uchar * s,const uchar * e)1346 int my_mb_ctype_8bit(const CHARSET_INFO *cs, int *ctype,
1347 const uchar *s, const uchar *e)
1348 {
1349 if (s >= e)
1350 {
1351 *ctype= 0;
1352 return MY_CS_TOOSMALL;
1353 }
1354 *ctype= cs->ctype[*s + 1];
1355 return 1;
1356 }
1357
1358
1359 #define CUTOFF (ULLONG_MAX / 10)
1360 #define CUTLIM (ULLONG_MAX % 10)
1361 #define DIGITS_IN_ULONGLONG 20
1362
1363 static ulonglong d10[DIGITS_IN_ULONGLONG]=
1364 {
1365 1,
1366 10,
1367 100,
1368 1000,
1369 10000,
1370 100000,
1371 1000000,
1372 10000000,
1373 100000000,
1374 1000000000,
1375 10000000000ULL,
1376 100000000000ULL,
1377 1000000000000ULL,
1378 10000000000000ULL,
1379 100000000000000ULL,
1380 1000000000000000ULL,
1381 10000000000000000ULL,
1382 100000000000000000ULL,
1383 1000000000000000000ULL,
1384 10000000000000000000ULL
1385 };
1386
1387
1388 /*
1389
1390 Convert a string to unsigned long long integer value
1391 with rounding.
1392
1393 SYNOPSYS
1394 my_strntoull10_8bit()
1395 cs in pointer to character set
1396 str in pointer to the string to be converted
1397 length in string length
1398 unsigned_flag in whether the number is unsigned
1399 endptr out pointer to the stop character
1400 error out returned error code
1401
1402 DESCRIPTION
1403 This function takes the decimal representation of integer number
1404 from string str and converts it to an signed or unsigned
1405 long long integer value.
1406 Space characters and tab are ignored.
1407 A sign character might precede the digit characters.
1408 The number may have any number of pre-zero digits.
1409 The number may have decimal point and exponent.
1410 Rounding is always done in "away from zero" style:
1411 0.5 -> 1
1412 -0.5 -> -1
1413
1414 The function stops reading the string str after "length" bytes
1415 or at the first character that is not a part of correct number syntax:
1416
1417 <signed numeric literal> ::=
1418 [ <sign> ] <exact numeric literal> [ E [ <sign> ] <unsigned integer> ]
1419
1420 <exact numeric literal> ::=
1421 <unsigned integer> [ <period> [ <unsigned integer> ] ]
1422 | <period> <unsigned integer>
1423 <unsigned integer> ::= <digit>...
1424
1425 RETURN VALUES
1426 Value of string as a signed/unsigned longlong integer
1427
1428 endptr cannot be NULL. The function will store the end pointer
1429 to the stop character here.
1430
1431 The error parameter contains information how things went:
1432 0 ok
1433 ERANGE If the the value of the converted number is out of range
1434 In this case the return value is:
1435 - ULLONG_MAX if unsigned_flag and the number was too big
1436 - 0 if unsigned_flag and the number was negative
1437 - LLONG_MAX if no unsigned_flag and the number is too big
1438 - LLONG_MIN if no unsigned_flag and the number it too big negative
1439
1440 EDOM If the string didn't contain any digits.
1441 In this case the return value is 0.
1442 */
1443
1444 ulonglong
my_strntoull10rnd_8bit(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * str,size_t length,int unsigned_flag,char ** endptr,int * error)1445 my_strntoull10rnd_8bit(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1446 const char *str, size_t length, int unsigned_flag,
1447 char **endptr, int *error)
1448 {
1449 const char *dot, *end9, *beg, *end= str + length;
1450 ulonglong ull;
1451 ulong ul;
1452 uchar ch;
1453 int shift= 0, digits= 0, negative, addon;
1454
1455 /* Skip leading spaces and tabs */
1456 for ( ; str < end && (*str == ' ' || *str == '\t') ; str++);
1457
1458 if (str >= end)
1459 goto ret_edom;
1460
1461 if ((negative= (*str == '-')) || *str=='+') /* optional sign */
1462 {
1463 if (++str == end)
1464 goto ret_edom;
1465 }
1466
1467 beg= str;
1468 end9= (str + 9) > end ? end : (str + 9);
1469 /* Accumulate small number into ulong, for performance purposes */
1470 for (ul= 0 ; str < end9 && (ch= (uchar) (*str - '0')) < 10; str++)
1471 {
1472 ul= ul * 10 + ch;
1473 }
1474
1475 if (str >= end) /* Small number without dots and expanents */
1476 {
1477 *endptr= (char*) str;
1478 if (negative)
1479 {
1480 if (unsigned_flag)
1481 {
1482 *error= ul ? MY_ERRNO_ERANGE : 0;
1483 return 0;
1484 }
1485 else
1486 {
1487 *error= 0;
1488 return (ulonglong) (longlong) -(long) ul;
1489 }
1490 }
1491 else
1492 {
1493 *error=0;
1494 return (ulonglong) ul;
1495 }
1496 }
1497
1498 digits= (int)(str - beg);
1499
1500 /* Continue to accumulate into ulonglong */
1501 for (dot= NULL, ull= ul; str < end; str++)
1502 {
1503 if ((ch= (uchar) (*str - '0')) < 10)
1504 {
1505 if (ull < CUTOFF || (ull == CUTOFF && ch <= CUTLIM))
1506 {
1507 ull= ull * 10 + ch;
1508 digits++;
1509 continue;
1510 }
1511 /*
1512 Adding the next digit would overflow.
1513 Remember the next digit in "addon", for rounding.
1514 Scan all digits with an optional single dot.
1515 */
1516 if (ull == CUTOFF)
1517 {
1518 ull= ULLONG_MAX;
1519 addon= 1;
1520 str++;
1521 }
1522 else
1523 addon= (*str >= '5');
1524 if (!dot)
1525 {
1526 for ( ; str < end && (ch= (uchar) (*str - '0')) < 10; shift++, str++);
1527 if (str < end && *str == '.')
1528 {
1529 str++;
1530 for ( ; str < end && (ch= (uchar) (*str - '0')) < 10; str++);
1531 }
1532 }
1533 else
1534 {
1535 shift= (int)(dot - str);
1536 for ( ; str < end && (ch= (uchar) (*str - '0')) < 10; str++);
1537 }
1538 goto exp;
1539 }
1540
1541 if (*str == '.')
1542 {
1543 if (dot)
1544 {
1545 /* The second dot character */
1546 addon= 0;
1547 goto exp;
1548 }
1549 else
1550 {
1551 dot= str + 1;
1552 }
1553 continue;
1554 }
1555
1556 /* Unknown character, exit the loop */
1557 break;
1558 }
1559 shift= dot ? (int)(dot - str) : 0; /* Right shift */
1560 addon= 0;
1561
1562 exp: /* [ E [ <sign> ] <unsigned integer> ] */
1563
1564 if (!digits)
1565 {
1566 str= beg;
1567 goto ret_edom;
1568 }
1569
1570 if (str < end && (*str == 'e' || *str == 'E'))
1571 {
1572 str++;
1573 if (str < end)
1574 {
1575 longlong negative_exp, exponent;
1576 if ((negative_exp= (*str == '-')) || *str=='+')
1577 {
1578 if (++str == end)
1579 goto ret_sign;
1580 }
1581 for (exponent= 0 ;
1582 str < end && (ch= (uchar) (*str - '0')) < 10;
1583 str++)
1584 {
1585 if (exponent <= (LLONG_MAX - ch) / 10)
1586 exponent= exponent * 10 + ch;
1587 else
1588 goto ret_too_big;
1589 }
1590 shift+= negative_exp ? -exponent : exponent;
1591 }
1592 }
1593
1594 if (shift == 0) /* No shift, check addon digit */
1595 {
1596 if (addon)
1597 {
1598 if (ull == ULLONG_MAX)
1599 goto ret_too_big;
1600 ull++;
1601 }
1602 goto ret_sign;
1603 }
1604
1605 if (shift < 0) /* Right shift */
1606 {
1607 ulonglong d, r;
1608
1609 if (shift == INT_MIN32 || -shift >= DIGITS_IN_ULONGLONG)
1610 goto ret_zero; /* Exponent is a big negative number, return 0 */
1611
1612 d= d10[-shift];
1613 r= (ull % d) * 2;
1614 ull /= d;
1615 if (r >= d)
1616 ull++;
1617 goto ret_sign;
1618 }
1619
1620 if (shift > DIGITS_IN_ULONGLONG) /* Huge left shift */
1621 {
1622 if (!ull)
1623 goto ret_sign;
1624 goto ret_too_big;
1625 }
1626
1627 for ( ; shift > 0; shift--, ull*= 10) /* Left shift */
1628 {
1629 if (ull > CUTOFF)
1630 goto ret_too_big; /* Overflow, number too big */
1631 }
1632
1633 ret_sign:
1634 *endptr= (char*) str;
1635
1636 if (!unsigned_flag)
1637 {
1638 if (negative)
1639 {
1640 if (ull > (ulonglong) LLONG_MIN)
1641 {
1642 *error= MY_ERRNO_ERANGE;
1643 return (ulonglong) LLONG_MIN;
1644 }
1645 *error= 0;
1646 return (ulonglong) -(longlong) ull;
1647 }
1648 else
1649 {
1650 if (ull > (ulonglong) LLONG_MAX)
1651 {
1652 *error= MY_ERRNO_ERANGE;
1653 return (ulonglong) LLONG_MAX;
1654 }
1655 *error= 0;
1656 return ull;
1657 }
1658 }
1659
1660 /* Unsigned number */
1661 if (negative && ull)
1662 {
1663 *error= MY_ERRNO_ERANGE;
1664 return 0;
1665 }
1666 *error= 0;
1667 return ull;
1668
1669 ret_zero:
1670 *endptr= (char*) str;
1671 *error= 0;
1672 return 0;
1673
1674 ret_edom:
1675 *endptr= (char*) str;
1676 *error= MY_ERRNO_EDOM;
1677 return 0;
1678
1679 ret_too_big:
1680 *endptr= (char*) str;
1681 *error= MY_ERRNO_ERANGE;
1682 return unsigned_flag ?
1683 ULLONG_MAX :
1684 negative ? (ulonglong) LLONG_MIN : (ulonglong) LLONG_MAX;
1685 }
1686
1687
1688 /*
1689 Check if a constant can be propagated
1690
1691 SYNOPSIS:
1692 my_propagate_simple()
1693 cs Character set information
1694 str String to convert to double
1695 length Optional length for string.
1696
1697 NOTES:
1698 Takes the string in the given charset and check
1699 if it can be safely propagated in the optimizer.
1700
1701 create table t1 (
1702 s char(5) character set latin1 collate latin1_german2_ci);
1703 insert into t1 values (0xf6); -- o-umlaut
1704 select * from t1 where length(s)=1 and s='oe';
1705
1706 The above query should return one row.
1707 We cannot convert this query into:
1708 select * from t1 where length('oe')=1 and s='oe';
1709
1710 Currently we don't check the constant itself,
1711 and decide not to propagate a constant
1712 just if the collation itself allows tricky things
1713 like expansions and contractions. In the future
1714 we can write a more sophisticated functions to
1715 check the constants. For example, 'oa' can always
1716 be safety propagated in German2 because unlike
1717 'oe' it does not have any special meaning.
1718
1719 RETURN
1720 1 if constant can be safely propagated
1721 0 if it is not safe to propagate the constant
1722 */
1723
1724
1725
my_propagate_simple(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const uchar * str MY_ATTRIBUTE ((unused)),size_t length MY_ATTRIBUTE ((unused)))1726 my_bool my_propagate_simple(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1727 const uchar *str MY_ATTRIBUTE((unused)),
1728 size_t length MY_ATTRIBUTE((unused)))
1729 {
1730 return 1;
1731 }
1732
1733
my_propagate_complex(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const uchar * str MY_ATTRIBUTE ((unused)),size_t length MY_ATTRIBUTE ((unused)))1734 my_bool my_propagate_complex(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1735 const uchar *str MY_ATTRIBUTE((unused)),
1736 size_t length MY_ATTRIBUTE((unused)))
1737 {
1738 return 0;
1739 }
1740
1741
1742 /*
1743 Normalize strxfrm flags
1744
1745 SYNOPSIS:
1746 my_strxfrm_flag_normalize()
1747 flags - non-normalized flags
1748 nlevels - number of levels
1749
1750 NOTES:
1751 If levels are omitted, then 1-maximum is assumed.
1752 If any level number is greater than the maximum,
1753 it is treated as the maximum.
1754
1755 RETURN
1756 normalized flags
1757 */
1758
my_strxfrm_flag_normalize(uint flags,uint maximum)1759 uint my_strxfrm_flag_normalize(uint flags, uint maximum)
1760 {
1761 assert(maximum >= 1 && maximum <= MY_STRXFRM_NLEVELS);
1762
1763 /* If levels are omitted, then 1-maximum is assumed*/
1764 if (!(flags & MY_STRXFRM_LEVEL_ALL))
1765 {
1766 static uint def_level_flags[]= {0, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F };
1767 uint flag_pad= flags &
1768 (MY_STRXFRM_PAD_WITH_SPACE | MY_STRXFRM_PAD_TO_MAXLEN);
1769 flags= def_level_flags[maximum] | flag_pad;
1770 }
1771 else
1772 {
1773 uint i;
1774 uint flag_lev= flags & MY_STRXFRM_LEVEL_ALL;
1775 uint flag_dsc= (flags >> MY_STRXFRM_DESC_SHIFT) & MY_STRXFRM_LEVEL_ALL;
1776 uint flag_rev= (flags >> MY_STRXFRM_REVERSE_SHIFT) & MY_STRXFRM_LEVEL_ALL;
1777 uint flag_pad= flags &
1778 (MY_STRXFRM_PAD_WITH_SPACE | MY_STRXFRM_PAD_TO_MAXLEN);
1779
1780 /*
1781 If any level number is greater than the maximum,
1782 it is treated as the maximum.
1783 */
1784 for (maximum--, flags= 0, i= 0; i < MY_STRXFRM_NLEVELS; i++)
1785 {
1786 uint src_bit= 1 << i;
1787 if (flag_lev & src_bit)
1788 {
1789 uint dst_bit= 1 << MY_MIN(i, maximum);
1790 flags|= dst_bit;
1791 flags|= (flag_dsc & dst_bit) << MY_STRXFRM_DESC_SHIFT;
1792 flags|= (flag_rev & dst_bit) << MY_STRXFRM_REVERSE_SHIFT;
1793 }
1794 }
1795 flags|= flag_pad;
1796 }
1797
1798 return flags;
1799 }
1800
1801
1802 /*
1803 Apply DESC and REVERSE collation rules.
1804
1805 SYNOPSIS:
1806 my_strxfrm_desc_and_reverse()
1807 str - pointer to string
1808 strend - end of string
1809 flags - flags
1810 level - which level, starting from 0.
1811
1812 NOTES:
1813 Apply DESC or REVERSE or both flags.
1814
1815 If DESC flag is given, then the weights
1816 come out NOTed or negated for that level.
1817
1818 If REVERSE flags is given, then the weights come out in
1819 reverse order for that level, that is, starting with
1820 the last character and ending with the first character.
1821
1822 If nether DESC nor REVERSE flags are give,
1823 the string is not changed.
1824
1825 */
1826 void
my_strxfrm_desc_and_reverse(uchar * str,uchar * strend,uint flags,uint level)1827 my_strxfrm_desc_and_reverse(uchar *str, uchar *strend,
1828 uint flags, uint level)
1829 {
1830 if (flags & (MY_STRXFRM_DESC_LEVEL1 << level))
1831 {
1832 if (flags & (MY_STRXFRM_REVERSE_LEVEL1 << level))
1833 {
1834 for (strend--; str <= strend;)
1835 {
1836 uchar tmp= *str;
1837 *str++= ~*strend;
1838 *strend--= ~tmp;
1839 }
1840 }
1841 else
1842 {
1843 for (; str < strend; str++)
1844 *str= ~*str;
1845 }
1846 }
1847 else if (flags & (MY_STRXFRM_REVERSE_LEVEL1 << level))
1848 {
1849 for (strend--; str < strend;)
1850 {
1851 uchar tmp= *str;
1852 *str++= *strend;
1853 *strend--= tmp;
1854 }
1855 }
1856 }
1857
1858
1859 size_t
my_strxfrm_pad_desc_and_reverse(const CHARSET_INFO * cs,uchar * str,uchar * frmend,uchar * strend,uint nweights,uint flags,uint level)1860 my_strxfrm_pad_desc_and_reverse(const CHARSET_INFO *cs,
1861 uchar *str, uchar *frmend, uchar *strend,
1862 uint nweights, uint flags, uint level)
1863 {
1864 if (nweights && frmend < strend && (flags & MY_STRXFRM_PAD_WITH_SPACE))
1865 {
1866 uint fill_length= MY_MIN((uint) (strend - frmend), nweights * cs->mbminlen);
1867 cs->cset->fill(cs, (char*) frmend, fill_length, cs->pad_char);
1868 frmend+= fill_length;
1869 }
1870 my_strxfrm_desc_and_reverse(str, frmend, flags, level);
1871 if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && frmend < strend)
1872 {
1873 size_t fill_length= strend - frmend;
1874 cs->cset->fill(cs, (char*) frmend, fill_length, cs->pad_char);
1875 frmend= strend;
1876 }
1877 return frmend - str;
1878 }
1879
1880
1881 MY_CHARSET_HANDLER my_charset_8bit_handler=
1882 {
1883 my_cset_init_8bit,
1884 NULL, /* ismbchar */
1885 my_mbcharlen_8bit, /* mbcharlen */
1886 my_numchars_8bit,
1887 my_charpos_8bit,
1888 my_well_formed_len_8bit,
1889 my_lengthsp_8bit,
1890 my_numcells_8bit,
1891 my_mb_wc_8bit,
1892 my_wc_mb_8bit,
1893 my_mb_ctype_8bit,
1894 my_caseup_str_8bit,
1895 my_casedn_str_8bit,
1896 my_caseup_8bit,
1897 my_casedn_8bit,
1898 my_snprintf_8bit,
1899 my_long10_to_str_8bit,
1900 my_longlong10_to_str_8bit,
1901 my_fill_8bit,
1902 my_strntol_8bit,
1903 my_strntoul_8bit,
1904 my_strntoll_8bit,
1905 my_strntoull_8bit,
1906 my_strntod_8bit,
1907 my_strtoll10_8bit,
1908 my_strntoull10rnd_8bit,
1909 my_scan_8bit
1910 };
1911
1912 MY_CHARSET_HANDLER my_charset_ascii_handler=
1913 {
1914 my_cset_init_8bit,
1915 NULL, /* ismbchar */
1916 my_mbcharlen_8bit, /* mbcharlen */
1917 my_numchars_8bit,
1918 my_charpos_8bit,
1919 my_well_formed_len_ascii,
1920 my_lengthsp_8bit,
1921 my_numcells_8bit,
1922 my_mb_wc_8bit,
1923 my_wc_mb_8bit,
1924 my_mb_ctype_8bit,
1925 my_caseup_str_8bit,
1926 my_casedn_str_8bit,
1927 my_caseup_8bit,
1928 my_casedn_8bit,
1929 my_snprintf_8bit,
1930 my_long10_to_str_8bit,
1931 my_longlong10_to_str_8bit,
1932 my_fill_8bit,
1933 my_strntol_8bit,
1934 my_strntoul_8bit,
1935 my_strntoll_8bit,
1936 my_strntoull_8bit,
1937 my_strntod_8bit,
1938 my_strtoll10_8bit,
1939 my_strntoull10rnd_8bit,
1940 my_scan_8bit
1941 };
1942
1943 MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler =
1944 {
1945 my_coll_init_simple, /* init */
1946 my_strnncoll_simple,
1947 my_strnncollsp_simple,
1948 my_strnxfrm_simple,
1949 my_strnxfrmlen_simple,
1950 my_like_range_simple,
1951 my_wildcmp_8bit,
1952 my_strcasecmp_8bit,
1953 my_instr_simple,
1954 my_hash_sort_simple,
1955 my_propagate_simple
1956 };
1957