1 /* Copyright (c) 2002, 2021, Oracle and/or its affiliates.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    Without limiting anything contained in the foregoing, this file,
15    which is part of C Driver for MySQL (Connector/C), is also subject to the
16    Universal FOSS Exception, version 1.0, a copy of which can be found at
17    http://oss.oracle.com/licenses/universal-foss-exception.
18 
19    This program is distributed in the hope that it will be useful,
20    but WITHOUT ANY WARRANTY; without even the implied warranty of
21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22    GNU General Public License, version 2.0, for more details.
23 
24    You should have received a copy of the GNU General Public License
25    along with this program; if not, write to the Free Software
26    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
27 
28 #include <my_global.h>
29 #include "m_string.h"
30 #include "m_ctype.h"
31 #include "my_sys.h"  /* Needed for MY_ERRNO_ERANGE */
32 #include <errno.h>
33 
34 #include "stdarg.h"
35 
36 /*
37   Returns the number of bytes required for strnxfrm().
38 */
39 
my_strnxfrmlen_simple(const CHARSET_INFO * cs,size_t len)40 size_t my_strnxfrmlen_simple(const CHARSET_INFO *cs, size_t len)
41 {
42   return len * (cs->strxfrm_multiply ? cs->strxfrm_multiply : 1);
43 }
44 
45 
46 /*
47   Converts a string into its sort key.
48 
49   SYNOPSIS
50      my_strnxfrm_xxx()
51 
52   IMPLEMENTATION
53 
54      The my_strxfrm_xxx() function transforms a string pointed to by
55      'src' with length 'srclen' according to the charset+collation
56      pair 'cs' and copies the result key into 'dest'.
57 
58      Comparing two strings using memcmp() after my_strnxfrm_xxx()
59      is equal to comparing two original strings with my_strnncollsp_xxx().
60 
61      Not more than 'dstlen' bytes are written into 'dst'.
62      To garantee that the whole string is transformed, 'dstlen' must be
63      at least srclen*cs->strnxfrm_multiply bytes long. Otherwise,
64      consequent memcmp() may return a non-accurate result.
65 
66      If the source string is too short to fill whole 'dstlen' bytes,
67      then the 'dest' string is padded up to 'dstlen', ensuring that:
68 
69        "a"  == "a "
70        "a\0" < "a"
71        "a\0" < "a "
72 
73      my_strnxfrm_simple() is implemented for 8bit charsets and
74      simple collations with one-to-one string->key transformation.
75 
76      See also implementations for various charsets/collations in
77      other ctype-xxx.c files.
78 
79   RETURN
80 
81     Target len 'dstlen'.
82 
83 */
84 
85 
86 size_t
my_strnxfrm_simple(const CHARSET_INFO * cs,uchar * dst,size_t dstlen,uint nweights,const uchar * src,size_t srclen,uint flags)87 my_strnxfrm_simple(const CHARSET_INFO *cs,
88                    uchar *dst, size_t dstlen, uint nweights,
89                    const uchar *src, size_t srclen, uint flags)
90 {
91   const uchar *map= cs->sort_order;
92   uchar *d0= dst;
93   const uchar *end;
94   const uchar *remainder;
95   size_t frmlen;
96   if ((frmlen= MY_MIN(dstlen, nweights)) > srclen)
97     frmlen= srclen;
98   end= src + frmlen;
99 
100   // Do the first few bytes.
101   remainder= src + (frmlen % 8);
102   for (; src < remainder;)
103     *dst++= map[*src++];
104 
105   // Unroll loop for rest of string.
106   for (; src < end;)
107   {
108     *dst++= map[*src++];
109     *dst++= map[*src++];
110     *dst++= map[*src++];
111     *dst++= map[*src++];
112     *dst++= map[*src++];
113     *dst++= map[*src++];
114     *dst++= map[*src++];
115     *dst++= map[*src++];
116   }
117   return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, d0 + dstlen,
118                                          (uint)(nweights - frmlen), flags, 0);
119 }
120 
121 
my_strnncoll_simple(const CHARSET_INFO * cs,const uchar * s,size_t slen,const uchar * t,size_t tlen,my_bool t_is_prefix)122 int my_strnncoll_simple(const CHARSET_INFO * cs, const uchar *s, size_t slen,
123                         const uchar *t, size_t tlen,
124                         my_bool t_is_prefix)
125 {
126   size_t len = ( slen > tlen ) ? tlen : slen;
127   const uchar *map= cs->sort_order;
128   if (t_is_prefix && slen > tlen)
129     slen=tlen;
130   while (len--)
131   {
132     if (map[*s++] != map[*t++])
133       return ((int) map[s[-1]] - (int) map[t[-1]]);
134   }
135   /*
136     We can't use (slen - tlen) here as the result may be outside of the
137     precision of a signed int
138   */
139   return slen > tlen ? 1 : slen < tlen ? -1 : 0 ;
140 }
141 
142 
143 /*
144   Compare strings, discarding end space
145 
146   SYNOPSIS
147     my_strnncollsp_simple()
148     cs			character set handler
149     a			First string to compare
150     a_length		Length of 'a'
151     b			Second string to compare
152     b_length		Length of 'b'
153     diff_if_only_endspace_difference
154 		        Set to 1 if the strings should be regarded as different
155                         if they only difference in end space
156 
157   IMPLEMENTATION
158     If one string is shorter as the other, then we space extend the other
159     so that the strings have equal length.
160 
161     This will ensure that the following things hold:
162 
163     "a"  == "a "
164     "a\0" < "a"
165     "a\0" < "a "
166 
167   RETURN
168     < 0	 a <  b
169     = 0	 a == b
170     > 0	 a > b
171 */
172 
my_strnncollsp_simple(const CHARSET_INFO * cs,const uchar * a,size_t a_length,const uchar * b,size_t b_length,my_bool diff_if_only_endspace_difference)173 int my_strnncollsp_simple(const CHARSET_INFO *cs, const uchar *a,
174                           size_t a_length, const uchar *b, size_t b_length,
175                           my_bool diff_if_only_endspace_difference)
176 {
177   const uchar *map= cs->sort_order, *end;
178   size_t length;
179   int res;
180 
181 #ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
182   diff_if_only_endspace_difference= 0;
183 #endif
184 
185   end= a + (length= MY_MIN(a_length, b_length));
186   while (a < end)
187   {
188     if (map[*a++] != map[*b++])
189       return ((int) map[a[-1]] - (int) map[b[-1]]);
190   }
191   res= 0;
192   if (a_length != b_length)
193   {
194     int swap= 1;
195     if (diff_if_only_endspace_difference)
196       res= 1;                                   /* Assume 'a' is bigger */
197     /*
198       Check the next not space character of the longer key. If it's < ' ',
199       then it's smaller than the other key.
200     */
201     if (a_length < b_length)
202     {
203       /* put shorter key in s */
204       a_length= b_length;
205       a= b;
206       swap= -1;                                 /* swap sign of result */
207       res= -res;
208     }
209     for (end= a + a_length-length; a < end ; a++)
210     {
211       if (map[*a] != map[' '])
212 	return (map[*a] < map[' ']) ? -swap : swap;
213     }
214   }
215   return res;
216 }
217 
218 
my_caseup_str_8bit(const CHARSET_INFO * cs,char * str)219 size_t my_caseup_str_8bit(const CHARSET_INFO *cs,char *str)
220 {
221   const uchar *map= cs->to_upper;
222   char *str_orig= str;
223   while ((*str= (char) map[(uchar) *str]) != 0)
224     str++;
225   return (size_t) (str - str_orig);
226 }
227 
228 
my_casedn_str_8bit(const CHARSET_INFO * cs,char * str)229 size_t my_casedn_str_8bit(const CHARSET_INFO *cs,char *str)
230 {
231   const uchar *map= cs->to_lower;
232   char *str_orig= str;
233   while ((*str= (char) map[(uchar) *str]) != 0)
234     str++;
235   return (size_t) (str - str_orig);
236 }
237 
238 
my_caseup_8bit(const CHARSET_INFO * cs,char * src,size_t srclen,char * dst MY_ATTRIBUTE ((unused)),size_t dstlen MY_ATTRIBUTE ((unused)))239 size_t my_caseup_8bit(const CHARSET_INFO *cs, char *src, size_t srclen,
240                       char *dst MY_ATTRIBUTE((unused)),
241                       size_t dstlen MY_ATTRIBUTE((unused)))
242 {
243   char *end= src + srclen;
244   const uchar *map= cs->to_upper;
245   assert(src == dst && srclen == dstlen);
246   for ( ; src != end ; src++)
247     *src= (char) map[(uchar) *src];
248   return srclen;
249 }
250 
251 
my_casedn_8bit(const CHARSET_INFO * cs,char * src,size_t srclen,char * dst MY_ATTRIBUTE ((unused)),size_t dstlen MY_ATTRIBUTE ((unused)))252 size_t my_casedn_8bit(const CHARSET_INFO *cs, char *src, size_t srclen,
253                       char *dst MY_ATTRIBUTE((unused)),
254                       size_t dstlen MY_ATTRIBUTE((unused)))
255 {
256   char *end= src + srclen;
257   const uchar *map=cs->to_lower;
258   assert(src == dst && srclen == dstlen);
259   for ( ; src != end ; src++)
260     *src= (char) map[(uchar) *src];
261   return srclen;
262 }
263 
my_strcasecmp_8bit(const CHARSET_INFO * cs,const char * s,const char * t)264 int my_strcasecmp_8bit(const CHARSET_INFO *cs,const char *s, const char *t)
265 {
266   const uchar *map=cs->to_upper;
267   while (map[(uchar) *s] == map[(uchar) *t++])
268     if (!*s++) return 0;
269   return ((int) map[(uchar) s[0]] - (int) map[(uchar) t[-1]]);
270 }
271 
272 
my_mb_wc_8bit(const CHARSET_INFO * cs,my_wc_t * wc,const uchar * str,const uchar * end MY_ATTRIBUTE ((unused)))273 int my_mb_wc_8bit(const CHARSET_INFO *cs,my_wc_t *wc,
274 		  const uchar *str,
275 		  const uchar *end MY_ATTRIBUTE((unused)))
276 {
277   if (str >= end)
278     return MY_CS_TOOSMALL;
279 
280   *wc=cs->tab_to_uni[*str];
281   return (!wc[0] && str[0]) ? -1 : 1;
282 }
283 
my_wc_mb_8bit(const CHARSET_INFO * cs,my_wc_t wc,uchar * str,uchar * end)284 int my_wc_mb_8bit(const CHARSET_INFO *cs,my_wc_t wc,
285 		  uchar *str,
286 		  uchar *end)
287 {
288   const MY_UNI_IDX *idx;
289 
290   if (str >= end)
291     return MY_CS_TOOSMALL;
292 
293   for (idx=cs->tab_from_uni; idx->tab ; idx++)
294   {
295     if (idx->from <= wc && idx->to >= wc)
296     {
297       str[0]= idx->tab[wc - idx->from];
298       return (!str[0] && wc) ? MY_CS_ILUNI : 1;
299     }
300   }
301   return MY_CS_ILUNI;
302 }
303 
304 
305 /*
306    We can't use vsprintf here as it's not guaranteed to return
307    the length on all operating systems.
308    This function is also not called in a safe environment, so the
309    end buffer must be checked.
310 */
311 
my_snprintf_8bit(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),char * to,size_t n MY_ATTRIBUTE ((unused)),const char * fmt,...)312 size_t my_snprintf_8bit(const CHARSET_INFO *cs  MY_ATTRIBUTE((unused)),
313                         char* to, size_t n  MY_ATTRIBUTE((unused)),
314 		     const char* fmt, ...)
315 {
316   va_list args;
317   size_t result;
318   va_start(args,fmt);
319   result= my_vsnprintf(to, n, fmt, args);
320   va_end(args);
321   return result;
322 }
323 
324 
my_hash_sort_simple(const CHARSET_INFO * cs,const uchar * key,size_t len,ulong * nr1,ulong * nr2)325 void my_hash_sort_simple(const CHARSET_INFO *cs,
326 			 const uchar *key, size_t len,
327 			 ulong *nr1, ulong *nr2)
328 {
329   const uchar *sort_order=cs->sort_order;
330   const uchar *end;
331   ulong tmp1;
332   ulong tmp2;
333 
334   /*
335     Remove end space. We have to do this to be able to compare
336     'A ' and 'A' as identical
337   */
338   end= skip_trailing_space(key, len);
339 
340   tmp1= *nr1;
341   tmp2= *nr2;
342 
343   for (; key < (uchar*) end ; key++)
344   {
345     tmp1^=(ulong) ((((uint) tmp1 & 63) + tmp2) *
346                    ((uint) sort_order[(uint) *key])) + (tmp1 << 8);
347     tmp2+=3;
348   }
349 
350   *nr1= tmp1;
351   *nr2= tmp2;
352 }
353 
354 
my_strntol_8bit(const CHARSET_INFO * cs,const char * nptr,size_t l,int base,char ** endptr,int * err)355 long my_strntol_8bit(const CHARSET_INFO *cs,
356 		     const char *nptr, size_t l, int base,
357 		     char **endptr, int *err)
358 {
359   int negative;
360   uint32 cutoff;
361   uint cutlim;
362   uint32 i;
363   const char *s;
364   uchar c;
365   const char *save, *e;
366   int overflow;
367 
368   *err= 0;				/* Initialize error indicator */
369 
370   s = nptr;
371   e = nptr+l;
372 
373   for ( ; s<e && my_isspace(cs, *s) ; s++);
374 
375   if (s == e)
376   {
377     goto noconv;
378   }
379 
380   /* Check for a sign.	*/
381   if (*s == '-')
382   {
383     negative = 1;
384     ++s;
385   }
386   else if (*s == '+')
387   {
388     negative = 0;
389     ++s;
390   }
391   else
392     negative = 0;
393 
394   save = s;
395   cutoff = ((uint32)~0L) / (uint32) base;
396   cutlim = (uint) (((uint32)~0L) % (uint32) base);
397 
398   overflow = 0;
399   i = 0;
400   for (c = *s; s != e; c = *++s)
401   {
402     if (c>='0' && c<='9')
403       c -= '0';
404     else if (c>='A' && c<='Z')
405       c = c - 'A' + 10;
406     else if (c>='a' && c<='z')
407       c = c - 'a' + 10;
408     else
409       break;
410     if (c >= base)
411       break;
412     if (i > cutoff || (i == cutoff && c > cutlim))
413       overflow = 1;
414     else
415     {
416       i *= (uint32) base;
417       i += c;
418     }
419   }
420 
421   if (s == save)
422     goto noconv;
423 
424   if (endptr != NULL)
425     *endptr = (char *) s;
426 
427   if (negative)
428   {
429     if (i  > (uint32) INT_MIN32)
430       overflow = 1;
431   }
432   else if (i > INT_MAX32)
433     overflow = 1;
434 
435   if (overflow)
436   {
437     err[0]= ERANGE;
438     return negative ? INT_MIN32 : INT_MAX32;
439   }
440 
441   return (negative ? -((long) i) : (long) i);
442 
443 noconv:
444   err[0]= EDOM;
445   if (endptr != NULL)
446     *endptr = (char *) nptr;
447   return 0L;
448 }
449 
450 
my_strntoul_8bit(const CHARSET_INFO * cs,const char * nptr,size_t l,int base,char ** endptr,int * err)451 ulong my_strntoul_8bit(const CHARSET_INFO *cs,
452 		       const char *nptr, size_t l, int base,
453 		       char **endptr, int *err)
454 {
455   int negative;
456   uint32 cutoff;
457   uint cutlim;
458   uint32 i;
459   const char *s;
460   uchar c;
461   const char *save, *e;
462   int overflow;
463 
464   *err= 0;				/* Initialize error indicator */
465 
466   s = nptr;
467   e = nptr+l;
468 
469   for( ; s<e && my_isspace(cs, *s); s++);
470 
471   if (s==e)
472   {
473     goto noconv;
474   }
475 
476   if (*s == '-')
477   {
478     negative = 1;
479     ++s;
480   }
481   else if (*s == '+')
482   {
483     negative = 0;
484     ++s;
485   }
486   else
487     negative = 0;
488 
489   save = s;
490   cutoff = ((uint32)~0L) / (uint32) base;
491   cutlim = (uint) (((uint32)~0L) % (uint32) base);
492   overflow = 0;
493   i = 0;
494 
495   for (c = *s; s != e; c = *++s)
496   {
497     if (c>='0' && c<='9')
498       c -= '0';
499     else if (c>='A' && c<='Z')
500       c = c - 'A' + 10;
501     else if (c>='a' && c<='z')
502       c = c - 'a' + 10;
503     else
504       break;
505     if (c >= base)
506       break;
507     if (i > cutoff || (i == cutoff && c > cutlim))
508       overflow = 1;
509     else
510     {
511       i *= (uint32) base;
512       i += c;
513     }
514   }
515 
516   if (s == save)
517     goto noconv;
518 
519   if (endptr != NULL)
520     *endptr = (char *) s;
521 
522   if (overflow)
523   {
524     err[0]= ERANGE;
525     return (~(uint32) 0);
526   }
527 
528   return (negative ? -((long) i) : (long) i);
529 
530 noconv:
531   err[0]= EDOM;
532   if (endptr != NULL)
533     *endptr = (char *) nptr;
534   return 0L;
535 }
536 
537 
my_strntoll_8bit(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * nptr,size_t l,int base,char ** endptr,int * err)538 longlong my_strntoll_8bit(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
539 			  const char *nptr, size_t l, int base,
540 			  char **endptr,int *err)
541 {
542   int negative;
543   ulonglong cutoff;
544   uint cutlim;
545   ulonglong i;
546   const char *s, *e;
547   const char *save;
548   int overflow;
549 
550   *err= 0;				/* Initialize error indicator */
551 
552   s = nptr;
553   e = nptr+l;
554 
555   for(; s<e && my_isspace(cs,*s); s++);
556 
557   if (s == e)
558   {
559     goto noconv;
560   }
561 
562   if (*s == '-')
563   {
564     negative = 1;
565     ++s;
566   }
567   else if (*s == '+')
568   {
569     negative = 0;
570     ++s;
571   }
572   else
573     negative = 0;
574 
575   save = s;
576 
577   cutoff = (~(ulonglong) 0) / (unsigned long int) base;
578   cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
579 
580   overflow = 0;
581   i = 0;
582   for ( ; s != e; s++)
583   {
584     uchar c= *s;
585     if (c>='0' && c<='9')
586       c -= '0';
587     else if (c>='A' && c<='Z')
588       c = c - 'A' + 10;
589     else if (c>='a' && c<='z')
590       c = c - 'a' + 10;
591     else
592       break;
593     if (c >= base)
594       break;
595     if (i > cutoff || (i == cutoff && c > cutlim))
596       overflow = 1;
597     else
598     {
599       i *= (ulonglong) base;
600       i += c;
601     }
602   }
603 
604   if (s == save)
605     goto noconv;
606 
607   if (endptr != NULL)
608     *endptr = (char *) s;
609 
610   if (negative)
611   {
612     if (i  > (ulonglong) LLONG_MIN)
613       overflow = 1;
614   }
615   else if (i > (ulonglong) LLONG_MAX)
616     overflow = 1;
617 
618   if (overflow)
619   {
620     err[0]= ERANGE;
621     return negative ? LLONG_MIN : LLONG_MAX;
622   }
623 
624   return (negative ? -((longlong) i) : (longlong) i);
625 
626 noconv:
627   err[0]= EDOM;
628   if (endptr != NULL)
629     *endptr = (char *) nptr;
630   return 0L;
631 }
632 
633 
my_strntoull_8bit(const CHARSET_INFO * cs,const char * nptr,size_t l,int base,char ** endptr,int * err)634 ulonglong my_strntoull_8bit(const CHARSET_INFO *cs,
635 			   const char *nptr, size_t l, int base,
636 			   char **endptr, int *err)
637 {
638   int negative;
639   ulonglong cutoff;
640   uint cutlim;
641   ulonglong i;
642   const char *s, *e;
643   const char *save;
644   int overflow;
645 
646   *err= 0;				/* Initialize error indicator */
647 
648   s = nptr;
649   e = nptr+l;
650 
651   for(; s<e && my_isspace(cs,*s); s++);
652 
653   if (s == e)
654   {
655     goto noconv;
656   }
657 
658   if (*s == '-')
659   {
660     negative = 1;
661     ++s;
662   }
663   else if (*s == '+')
664   {
665     negative = 0;
666     ++s;
667   }
668   else
669     negative = 0;
670 
671   save = s;
672 
673   cutoff = (~(ulonglong) 0) / (unsigned long int) base;
674   cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
675 
676   overflow = 0;
677   i = 0;
678   for ( ; s != e; s++)
679   {
680     uchar c= *s;
681 
682     if (c>='0' && c<='9')
683       c -= '0';
684     else if (c>='A' && c<='Z')
685       c = c - 'A' + 10;
686     else if (c>='a' && c<='z')
687       c = c - 'a' + 10;
688     else
689       break;
690     if (c >= base)
691       break;
692     if (i > cutoff || (i == cutoff && c > cutlim))
693       overflow = 1;
694     else
695     {
696       i *= (ulonglong) base;
697       i += c;
698     }
699   }
700 
701   if (s == save)
702     goto noconv;
703 
704   if (endptr != NULL)
705     *endptr = (char *) s;
706 
707   if (overflow)
708   {
709     err[0]= ERANGE;
710     return (~(ulonglong) 0);
711   }
712 
713   return (negative ? -((longlong) i) : (longlong) i);
714 
715 noconv:
716   err[0]= EDOM;
717   if (endptr != NULL)
718     *endptr = (char *) nptr;
719   return 0L;
720 }
721 
722 
723 /*
724   Read double from string
725 
726   SYNOPSIS:
727     my_strntod_8bit()
728     cs		Character set information
729     str		String to convert to double
730     length	Optional length for string.
731     end		result pointer to end of converted string
732     err		Error number if failed conversion
733 
734   NOTES:
735     If length is not INT_MAX32 or str[length] != 0 then the given str must
736     be writeable
737     If length == INT_MAX32 the str must be \0 terminated.
738 
739     It's implemented this way to save a buffer allocation and a memory copy.
740 
741   RETURN
742     Value of number in string
743 */
744 
745 
my_strntod_8bit(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),char * str,size_t length,char ** end,int * err)746 double my_strntod_8bit(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
747 		       char *str, size_t length,
748 		       char **end, int *err)
749 {
750   if (length == INT_MAX32)
751     length= 65535;                          /* Should be big enough */
752   *end= str + length;
753   return my_strtod(str, end, err);
754 }
755 
756 
757 /*
758   This is a fast version optimized for the case of radix 10 / -10
759 
760   Assume len >= 1
761 */
762 
my_long10_to_str_8bit(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),char * dst,size_t len,int radix,long int val)763 size_t my_long10_to_str_8bit(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
764                              char *dst, size_t len, int radix, long int val)
765 {
766   char buffer[66];
767   char *p, *e;
768   long int new_val;
769   uint sign=0;
770   unsigned long int uval = (unsigned long int) val;
771 
772   e = p = &buffer[sizeof(buffer)-1];
773   *p= 0;
774 
775   if (radix < 0)
776   {
777     if (val < 0)
778     {
779       /* Avoid integer overflow in (-val) for LLONG_MIN (BUG#31799). */
780       uval= (unsigned long int)0 - uval;
781       *dst++= '-';
782       len--;
783       sign= 1;
784     }
785   }
786 
787   new_val = (long) (uval / 10);
788   *--p    = '0'+ (char) (uval - (unsigned long) new_val * 10);
789   val     = new_val;
790 
791   while (val != 0)
792   {
793     new_val=val/10;
794     *--p = '0' + (char) (val-new_val*10);
795     val= new_val;
796   }
797 
798   len= MY_MIN(len, (size_t) (e-p));
799   memcpy(dst, p, len);
800   return len+sign;
801 }
802 
803 
my_longlong10_to_str_8bit(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),char * dst,size_t len,int radix,longlong val)804 size_t my_longlong10_to_str_8bit(const CHARSET_INFO *cs
805                                  MY_ATTRIBUTE((unused)),
806                                  char *dst, size_t len, int radix,
807                                  longlong val)
808 {
809   char buffer[65];
810   char *p, *e;
811   long long_val;
812   uint sign= 0;
813   ulonglong uval = (ulonglong)val;
814 
815   if (radix < 0)
816   {
817     if (val < 0)
818     {
819       /* Avoid integer overflow in (-val) for LLONG_MIN (BUG#31799). */
820       uval = (ulonglong)0 - uval;
821       *dst++= '-';
822       len--;
823       sign= 1;
824     }
825   }
826 
827   e = p = &buffer[sizeof(buffer)-1];
828   *p= 0;
829 
830   if (uval == 0)
831   {
832     *--p= '0';
833     len= 1;
834     goto cnv;
835   }
836 
837   while (uval > (ulonglong) LONG_MAX)
838   {
839     ulonglong quo= uval/(uint) 10;
840     uint rem= (uint) (uval- quo* (uint) 10);
841     *--p = '0' + rem;
842     uval= quo;
843   }
844 
845   long_val= (long) uval;
846   while (long_val != 0)
847   {
848     long quo= long_val/10;
849     *--p = (char) ('0' + (long_val - quo*10));
850     long_val= quo;
851   }
852 
853   len= MY_MIN(len, (size_t) (e-p));
854 cnv:
855   memcpy(dst, p, len);
856   return len+sign;
857 }
858 
859 
860 /*
861 ** Compare string against string with wildcard
862 **	0 if matched
863 **	-1 if not matched with wildcard
864 **	 1 if matched with wildcard
865 */
866 
867 #define likeconv(s,A) (uchar) (s)->sort_order[(uchar) (A)]
868 #define INC_PTR(cs,A,B) (A)++
869 
870 static
my_wildcmp_8bit_impl(const CHARSET_INFO * cs,const char * str,const char * str_end,const char * wildstr,const char * wildend,int escape,int w_one,int w_many,int recurse_level)871 int my_wildcmp_8bit_impl(const CHARSET_INFO *cs,
872                          const char *str,const char *str_end,
873                          const char *wildstr,const char *wildend,
874                          int escape, int w_one, int w_many, int recurse_level)
875 {
876   int result= -1;			/* Not found, using wildcards */
877 
878   if (my_string_stack_guard && my_string_stack_guard(recurse_level))
879     return 1;
880   while (wildstr != wildend)
881   {
882     while (*wildstr != w_many && *wildstr != w_one)
883     {
884       if (*wildstr == escape && wildstr+1 != wildend)
885 	wildstr++;
886 
887       if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
888 	return(1);				/* No match */
889       if (wildstr == wildend)
890 	return(str != str_end);		/* Match if both are at end */
891       result=1;					/* Found an anchor char     */
892     }
893     if (*wildstr == w_one)
894     {
895       do
896       {
897 	if (str == str_end)			/* Skip one char if possible */
898 	  return(result);
899 	INC_PTR(cs,str,str_end);
900       } while (++wildstr < wildend && *wildstr == w_one);
901       if (wildstr == wildend)
902 	break;
903     }
904     if (*wildstr == w_many)
905     {						/* Found w_many */
906       uchar cmp;
907 
908       wildstr++;
909       /* Remove any '%' and '_' from the wild search string */
910       for (; wildstr != wildend ; wildstr++)
911       {
912 	if (*wildstr == w_many)
913 	  continue;
914 	if (*wildstr == w_one)
915 	{
916 	  if (str == str_end)
917 	    return(-1);
918 	  INC_PTR(cs,str,str_end);
919 	  continue;
920 	}
921 	break;					/* Not a wild character */
922       }
923       if (wildstr == wildend)
924 	return(0);				/* Ok if w_many is last */
925       if (str == str_end)
926 	return(-1);
927 
928       if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
929 	cmp= *++wildstr;
930 
931       INC_PTR(cs,wildstr,wildend);	/* This is compared trough cmp */
932       cmp=likeconv(cs,cmp);
933       do
934       {
935 	while (str != str_end && (uchar) likeconv(cs,*str) != cmp)
936 	  str++;
937 	if (str++ == str_end) return(-1);
938 	{
939 	  int tmp=my_wildcmp_8bit_impl(cs,str,str_end,
940                                        wildstr,wildend,escape,w_one,
941                                        w_many, recurse_level + 1);
942 	  if (tmp <= 0)
943 	    return(tmp);
944 	}
945       } while (str != str_end && wildstr[0] != w_many);
946       return(-1);
947     }
948   }
949   return(str != str_end ? 1 : 0);
950 }
951 
my_wildcmp_8bit(const CHARSET_INFO * cs,const char * str,const char * str_end,const char * wildstr,const char * wildend,int escape,int w_one,int w_many)952 int my_wildcmp_8bit(const CHARSET_INFO *cs,
953                     const char *str,const char *str_end,
954                     const char *wildstr,const char *wildend,
955                     int escape, int w_one, int w_many)
956 {
957   return my_wildcmp_8bit_impl(cs, str, str_end,
958                               wildstr, wildend,
959                               escape, w_one, w_many, 1);
960 }
961 
962 
963 /*
964 ** Calculate min_str and max_str that ranges a LIKE string.
965 ** Arguments:
966 ** ptr		Pointer to LIKE string.
967 ** ptr_length	Length of LIKE string.
968 ** escape	Escape character in LIKE.  (Normally '\').
969 **		All escape characters should be removed from min_str and max_str
970 ** res_length	Length of min_str and max_str.
971 ** min_str	Smallest case sensitive string that ranges LIKE.
972 **		Should be space padded to res_length.
973 ** max_str	Largest case sensitive string that ranges LIKE.
974 **		Normally padded with the biggest character sort value.
975 **
976 ** The function should return 0 if ok and 1 if the LIKE string can't be
977 ** optimized !
978 */
979 
my_like_range_simple(const CHARSET_INFO * cs,const char * ptr,size_t ptr_length,pbool escape,pbool w_one,pbool w_many,size_t res_length,char * min_str,char * max_str,size_t * min_length,size_t * max_length)980 my_bool my_like_range_simple(const CHARSET_INFO *cs,
981 			     const char *ptr, size_t ptr_length,
982 			     pbool escape, pbool w_one, pbool w_many,
983 			     size_t res_length,
984 			     char *min_str,char *max_str,
985 			     size_t *min_length, size_t *max_length)
986 {
987   const char *end= ptr + ptr_length;
988   char *min_org=min_str;
989   char *min_end=min_str+res_length;
990   size_t charlen= res_length / cs->mbmaxlen;
991 
992   for (; ptr != end && min_str != min_end && charlen > 0 ; ptr++, charlen--)
993   {
994     if (*ptr == escape && ptr+1 != end)
995     {
996       ptr++;					/* Skip escape */
997       *min_str++= *max_str++ = *ptr;
998       continue;
999     }
1000     if (*ptr == w_one)				/* '_' in SQL */
1001     {
1002       *min_str++='\0';				/* This should be min char */
1003       *max_str++= (char) cs->max_sort_char;
1004       continue;
1005     }
1006     if (*ptr == w_many)				/* '%' in SQL */
1007     {
1008       /* Calculate length of keys */
1009       *min_length= ((cs->state & MY_CS_BINSORT) ?
1010                     (size_t) (min_str - min_org) :
1011                     res_length);
1012       *max_length= res_length;
1013       do
1014       {
1015 	*min_str++= 0;
1016 	*max_str++= (char) cs->max_sort_char;
1017       } while (min_str != min_end);
1018       return 0;
1019     }
1020     *min_str++= *max_str++ = *ptr;
1021   }
1022 
1023  *min_length= *max_length = (size_t) (min_str - min_org);
1024   while (min_str != min_end)
1025     *min_str++= *max_str++ = ' ';      /* Because if key compression */
1026   return 0;
1027 }
1028 
1029 
my_scan_8bit(const CHARSET_INFO * cs,const char * str,const char * end,int sq)1030 size_t my_scan_8bit(const CHARSET_INFO *cs, const char *str, const char *end,
1031                     int sq)
1032 {
1033   const char *str0= str;
1034   switch (sq)
1035   {
1036   case MY_SEQ_INTTAIL:
1037     if (*str == '.')
1038     {
1039       for(str++ ; str != end && *str == '0' ; str++);
1040       return (size_t) (str - str0);
1041     }
1042     return 0;
1043 
1044   case MY_SEQ_SPACES:
1045     for ( ; str < end ; str++)
1046     {
1047       if (!my_isspace(cs,*str))
1048         break;
1049     }
1050     return (size_t) (str - str0);
1051   default:
1052     return 0;
1053   }
1054 }
1055 
1056 
my_fill_8bit(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),char * s,size_t l,int fill)1057 void my_fill_8bit(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1058 		   char *s, size_t l, int fill)
1059 {
1060   memset(s, fill, l);
1061 }
1062 
1063 
my_numchars_8bit(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * b,const char * e)1064 size_t my_numchars_8bit(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1065 		      const char *b, const char *e)
1066 {
1067   return (size_t) (e - b);
1068 }
1069 
1070 
my_numcells_8bit(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * b,const char * e)1071 size_t my_numcells_8bit(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1072                         const char *b, const char *e)
1073 {
1074   return (size_t) (e - b);
1075 }
1076 
1077 
my_charpos_8bit(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * b MY_ATTRIBUTE ((unused)),const char * e MY_ATTRIBUTE ((unused)),size_t pos)1078 size_t my_charpos_8bit(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1079                        const char *b  MY_ATTRIBUTE((unused)),
1080                        const char *e  MY_ATTRIBUTE((unused)),
1081                        size_t pos)
1082 {
1083   return pos;
1084 }
1085 
1086 
my_well_formed_len_8bit(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * start,const char * end,size_t nchars,int * error)1087 size_t my_well_formed_len_8bit(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1088                                const char *start, const char *end,
1089                                size_t nchars, int *error)
1090 {
1091   size_t nbytes= (size_t) (end-start);
1092   *error= 0;
1093   return MY_MIN(nbytes, nchars);
1094 }
1095 
1096 
my_lengthsp_8bit(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * ptr,size_t length)1097 size_t my_lengthsp_8bit(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1098                         const char *ptr, size_t length)
1099 {
1100   const char *end;
1101   end= (const char *) skip_trailing_space((const uchar *)ptr, length);
1102   return (size_t) (end-ptr);
1103 }
1104 
1105 
my_instr_simple(const CHARSET_INFO * cs,const char * b,size_t b_length,const char * s,size_t s_length,my_match_t * match,uint nmatch)1106 uint my_instr_simple(const CHARSET_INFO *cs,
1107                      const char *b, size_t b_length,
1108                      const char *s, size_t s_length,
1109                      my_match_t *match, uint nmatch)
1110 {
1111   const uchar *str, *search, *end, *search_end;
1112 
1113   if (s_length <= b_length)
1114   {
1115     if (!s_length)
1116     {
1117       if (nmatch)
1118       {
1119         match->beg= 0;
1120         match->end= 0;
1121         match->mb_len= 0;
1122       }
1123       return 1;		/* Empty string is always found */
1124     }
1125 
1126     str= (const uchar*) b;
1127     search= (const uchar*) s;
1128     end= (const uchar*) b+b_length-s_length+1;
1129     search_end= (const uchar*) s + s_length;
1130 
1131 skip:
1132     while (str != end)
1133     {
1134       if (cs->sort_order[*str++] == cs->sort_order[*search])
1135       {
1136 	const uchar *i,*j;
1137 
1138 	i= str;
1139 	j= search+1;
1140 
1141 	while (j != search_end)
1142 	  if (cs->sort_order[*i++] != cs->sort_order[*j++])
1143             goto skip;
1144 
1145 	if (nmatch > 0)
1146 	{
1147 	  match[0].beg= 0;
1148 	  match[0].end= (uint) (str- (const uchar*)b-1);
1149 	  match[0].mb_len= match[0].end;
1150 
1151 	  if (nmatch > 1)
1152 	  {
1153 	    match[1].beg= match[0].end;
1154 	    match[1].end= match[0].end + (uint)s_length;
1155 	    match[1].mb_len= match[1].end-match[1].beg;
1156 	  }
1157 	}
1158 	return 2;
1159       }
1160     }
1161   }
1162   return 0;
1163 }
1164 
my_well_formed_len_ascii(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * start,const char * end,size_t nchars,int * error)1165 size_t my_well_formed_len_ascii(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1166                                 const char *start, const char *end,
1167                                 size_t nchars, int *error)
1168 {
1169   /**
1170     @todo: Currently return warning on invalid character.
1171            Return error in future release.
1172   */
1173   const char* oldstart = start;
1174   *error= 0;
1175   while (start < end)
1176   {
1177     if ((*start & 0x80) != 0)
1178     {
1179       *error = 1;
1180       break;
1181     }
1182     start++;
1183   }
1184   return MY_MIN((size_t)(end - oldstart), nchars);
1185 }
1186 
1187 typedef struct
1188 {
1189   int		nchars;
1190   MY_UNI_IDX	uidx;
1191 } uni_idx;
1192 
1193 #define PLANE_SIZE	0x100
1194 #define PLANE_NUM	0x100
1195 #define PLANE_NUMBER(x)	(((x)>>8) % PLANE_NUM)
1196 
pcmp(const void * f,const void * s)1197 static int pcmp(const void * f, const void * s)
1198 {
1199   const uni_idx *F= (const uni_idx*) f;
1200   const uni_idx *S= (const uni_idx*) s;
1201   int res;
1202 
1203   if (!(res=((S->nchars)-(F->nchars))))
1204     res=((F->uidx.from)-(S->uidx.to));
1205   return res;
1206 }
1207 
1208 static my_bool
create_fromuni(CHARSET_INFO * cs,MY_CHARSET_LOADER * loader)1209 create_fromuni(CHARSET_INFO *cs,
1210                MY_CHARSET_LOADER *loader)
1211 {
1212   uni_idx	idx[PLANE_NUM];
1213   int		i,n;
1214   MY_UNI_IDX	*tab_from_uni;
1215 
1216   /*
1217     Check that Unicode map is loaded.
1218     It can be not loaded when the collation is
1219     listed in Index.xml but not specified
1220     in the character set specific XML file.
1221   */
1222   if (!cs->tab_to_uni)
1223     return TRUE;
1224 
1225   /* Clear plane statistics */
1226   memset(idx, 0, sizeof(idx));
1227 
1228   /* Count number of characters in each plane */
1229   for (i=0; i< 0x100; i++)
1230   {
1231     uint16 wc=cs->tab_to_uni[i];
1232     int pl= PLANE_NUMBER(wc);
1233 
1234     if (wc || !i)
1235     {
1236       if (!idx[pl].nchars)
1237       {
1238         idx[pl].uidx.from=wc;
1239         idx[pl].uidx.to=wc;
1240       }else
1241       {
1242         idx[pl].uidx.from=wc<idx[pl].uidx.from?wc:idx[pl].uidx.from;
1243         idx[pl].uidx.to=wc>idx[pl].uidx.to?wc:idx[pl].uidx.to;
1244       }
1245       idx[pl].nchars++;
1246     }
1247   }
1248 
1249   /* Sort planes in descending order */
1250   qsort(&idx,PLANE_NUM,sizeof(uni_idx),&pcmp);
1251 
1252   for (i=0; i < PLANE_NUM; i++)
1253   {
1254     int ch,numchars;
1255     uchar *tab;
1256 
1257     /* Skip empty plane */
1258     if (!idx[i].nchars)
1259       break;
1260 
1261     numchars=idx[i].uidx.to-idx[i].uidx.from+1;
1262     if (!(idx[i].uidx.tab= tab= (uchar *)
1263                                 (loader->once_alloc)
1264                                   (numchars * sizeof(*idx[i].uidx.tab))))
1265       return TRUE;
1266 
1267     memset(tab, 0, numchars*sizeof(*idx[i].uidx.tab));
1268 
1269     for (ch=1; ch < PLANE_SIZE; ch++)
1270     {
1271       uint16 wc=cs->tab_to_uni[ch];
1272       if (wc >= idx[i].uidx.from && wc <= idx[i].uidx.to && wc)
1273       {
1274         int ofs= wc - idx[i].uidx.from;
1275         /*
1276           Character sets like armscii8 may have two code points for
1277           one character. When converting from UNICODE back to
1278           armscii8, select the lowest one, which is in the ASCII
1279           range.
1280         */
1281         if (tab[ofs] == '\0')
1282           tab[ofs]= ch;
1283       }
1284     }
1285   }
1286 
1287   /* Allocate and fill reverse table for each plane */
1288   n=i;
1289   if (!(cs->tab_from_uni= tab_from_uni= (MY_UNI_IDX *)
1290                                         (loader->once_alloc)
1291                                           (sizeof(MY_UNI_IDX) * (n + 1))))
1292     return TRUE;
1293 
1294   for (i=0; i< n; i++)
1295     tab_from_uni[i]= idx[i].uidx;
1296 
1297   /* Set end-of-list marker */
1298   memset(&tab_from_uni[i], 0, sizeof(MY_UNI_IDX));
1299   return FALSE;
1300 }
1301 
1302 static my_bool
my_cset_init_8bit(CHARSET_INFO * cs,MY_CHARSET_LOADER * loader)1303 my_cset_init_8bit(CHARSET_INFO *cs, MY_CHARSET_LOADER *loader)
1304 {
1305   cs->caseup_multiply= 1;
1306   cs->casedn_multiply= 1;
1307   cs->pad_char= ' ';
1308   return create_fromuni(cs, loader);
1309 }
1310 
set_max_sort_char(CHARSET_INFO * cs)1311 static void set_max_sort_char(CHARSET_INFO *cs)
1312 {
1313   uchar max_char;
1314   uint  i;
1315 
1316   if (!cs->sort_order)
1317     return;
1318 
1319   max_char=cs->sort_order[(uchar) cs->max_sort_char];
1320   for (i= 0; i < 256; i++)
1321   {
1322     if ((uchar) cs->sort_order[i] > max_char)
1323     {
1324       max_char=(uchar) cs->sort_order[i];
1325       cs->max_sort_char= i;
1326     }
1327   }
1328 }
1329 
1330 static my_bool
my_coll_init_simple(CHARSET_INFO * cs,MY_CHARSET_LOADER * loader MY_ATTRIBUTE ((unused)))1331 my_coll_init_simple(CHARSET_INFO *cs,
1332                     MY_CHARSET_LOADER *loader MY_ATTRIBUTE((unused)))
1333 {
1334   set_max_sort_char(cs);
1335   return FALSE;
1336 }
1337 
1338 
my_strtoll10_8bit(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * nptr,char ** endptr,int * error)1339 longlong my_strtoll10_8bit(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1340                            const char *nptr, char **endptr, int *error)
1341 {
1342   return my_strtoll10(nptr, endptr, error);
1343 }
1344 
1345 
my_mb_ctype_8bit(const CHARSET_INFO * cs,int * ctype,const uchar * s,const uchar * e)1346 int my_mb_ctype_8bit(const CHARSET_INFO *cs, int *ctype,
1347                    const uchar *s, const uchar *e)
1348 {
1349   if (s >= e)
1350   {
1351     *ctype= 0;
1352     return MY_CS_TOOSMALL;
1353   }
1354   *ctype= cs->ctype[*s + 1];
1355   return 1;
1356 }
1357 
1358 
1359 #define CUTOFF  (ULLONG_MAX / 10)
1360 #define CUTLIM  (ULLONG_MAX % 10)
1361 #define DIGITS_IN_ULONGLONG 20
1362 
1363 static ulonglong d10[DIGITS_IN_ULONGLONG]=
1364 {
1365   1,
1366   10,
1367   100,
1368   1000,
1369   10000,
1370   100000,
1371   1000000,
1372   10000000,
1373   100000000,
1374   1000000000,
1375   10000000000ULL,
1376   100000000000ULL,
1377   1000000000000ULL,
1378   10000000000000ULL,
1379   100000000000000ULL,
1380   1000000000000000ULL,
1381   10000000000000000ULL,
1382   100000000000000000ULL,
1383   1000000000000000000ULL,
1384   10000000000000000000ULL
1385 };
1386 
1387 
1388 /*
1389 
1390   Convert a string to unsigned long long integer value
1391   with rounding.
1392 
1393   SYNOPSYS
1394     my_strntoull10_8bit()
1395       cs              in      pointer to character set
1396       str             in      pointer to the string to be converted
1397       length          in      string length
1398       unsigned_flag   in      whether the number is unsigned
1399       endptr          out     pointer to the stop character
1400       error           out     returned error code
1401 
1402   DESCRIPTION
1403     This function takes the decimal representation of integer number
1404     from string str and converts it to an signed or unsigned
1405     long long integer value.
1406     Space characters and tab are ignored.
1407     A sign character might precede the digit characters.
1408     The number may have any number of pre-zero digits.
1409     The number may have decimal point and exponent.
1410     Rounding is always done in "away from zero" style:
1411       0.5  ->   1
1412      -0.5  ->  -1
1413 
1414     The function stops reading the string str after "length" bytes
1415     or at the first character that is not a part of correct number syntax:
1416 
1417     <signed numeric literal> ::=
1418       [ <sign> ] <exact numeric literal> [ E [ <sign> ] <unsigned integer> ]
1419 
1420     <exact numeric literal> ::=
1421                         <unsigned integer> [ <period> [ <unsigned integer> ] ]
1422                       | <period> <unsigned integer>
1423     <unsigned integer>   ::= <digit>...
1424 
1425   RETURN VALUES
1426     Value of string as a signed/unsigned longlong integer
1427 
1428     endptr cannot be NULL. The function will store the end pointer
1429     to the stop character here.
1430 
1431     The error parameter contains information how things went:
1432     0	     ok
1433     ERANGE   If the the value of the converted number is out of range
1434     In this case the return value is:
1435     - ULLONG_MAX if unsigned_flag and the number was too big
1436     - 0 if unsigned_flag and the number was negative
1437     - LLONG_MAX if no unsigned_flag and the number is too big
1438     - LLONG_MIN if no unsigned_flag and the number it too big negative
1439 
1440     EDOM If the string didn't contain any digits.
1441     In this case the return value is 0.
1442 */
1443 
1444 ulonglong
my_strntoull10rnd_8bit(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * str,size_t length,int unsigned_flag,char ** endptr,int * error)1445 my_strntoull10rnd_8bit(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1446                        const char *str, size_t length, int unsigned_flag,
1447                        char **endptr, int *error)
1448 {
1449   const char *dot, *end9, *beg, *end= str + length;
1450   ulonglong ull;
1451   ulong ul;
1452   uchar ch;
1453   int shift= 0, digits= 0, negative, addon;
1454 
1455   /* Skip leading spaces and tabs */
1456   for ( ; str < end && (*str == ' ' || *str == '\t') ; str++);
1457 
1458   if (str >= end)
1459     goto ret_edom;
1460 
1461   if ((negative= (*str == '-')) || *str=='+') /* optional sign */
1462   {
1463     if (++str == end)
1464       goto ret_edom;
1465   }
1466 
1467   beg= str;
1468   end9= (str + 9) > end ? end : (str + 9);
1469   /* Accumulate small number into ulong, for performance purposes */
1470   for (ul= 0 ; str < end9 && (ch= (uchar) (*str - '0')) < 10; str++)
1471   {
1472     ul= ul * 10 + ch;
1473   }
1474 
1475   if (str >= end) /* Small number without dots and expanents */
1476   {
1477     *endptr= (char*) str;
1478     if (negative)
1479     {
1480       if (unsigned_flag)
1481       {
1482         *error= ul ? MY_ERRNO_ERANGE : 0;
1483         return 0;
1484       }
1485       else
1486       {
1487         *error= 0;
1488         return (ulonglong) (longlong) -(long) ul;
1489       }
1490     }
1491     else
1492     {
1493       *error=0;
1494       return (ulonglong) ul;
1495     }
1496   }
1497 
1498   digits= (int)(str - beg);
1499 
1500   /* Continue to accumulate into ulonglong */
1501   for (dot= NULL, ull= ul; str < end; str++)
1502   {
1503     if ((ch= (uchar) (*str - '0')) < 10)
1504     {
1505       if (ull < CUTOFF || (ull == CUTOFF && ch <= CUTLIM))
1506       {
1507         ull= ull * 10 + ch;
1508         digits++;
1509         continue;
1510       }
1511       /*
1512         Adding the next digit would overflow.
1513         Remember the next digit in "addon", for rounding.
1514         Scan all digits with an optional single dot.
1515       */
1516       if (ull == CUTOFF)
1517       {
1518         ull= ULLONG_MAX;
1519         addon= 1;
1520         str++;
1521       }
1522       else
1523         addon= (*str >= '5');
1524       if (!dot)
1525       {
1526         for ( ; str < end && (ch= (uchar) (*str - '0')) < 10; shift++, str++);
1527         if (str < end && *str == '.')
1528         {
1529           str++;
1530           for ( ; str < end && (ch= (uchar) (*str - '0')) < 10; str++);
1531         }
1532       }
1533       else
1534       {
1535         shift= (int)(dot - str);
1536         for ( ; str < end && (ch= (uchar) (*str - '0')) < 10; str++);
1537       }
1538       goto exp;
1539     }
1540 
1541     if (*str == '.')
1542     {
1543       if (dot)
1544       {
1545         /* The second dot character */
1546         addon= 0;
1547         goto exp;
1548       }
1549       else
1550       {
1551         dot= str + 1;
1552       }
1553       continue;
1554     }
1555 
1556     /* Unknown character, exit the loop */
1557     break;
1558   }
1559   shift= dot ? (int)(dot - str) : 0; /* Right shift */
1560   addon= 0;
1561 
1562 exp:    /* [ E [ <sign> ] <unsigned integer> ] */
1563 
1564   if (!digits)
1565   {
1566     str= beg;
1567     goto ret_edom;
1568   }
1569 
1570   if (str < end && (*str == 'e' || *str == 'E'))
1571   {
1572     str++;
1573     if (str < end)
1574     {
1575       longlong negative_exp, exponent;
1576       if ((negative_exp= (*str == '-')) || *str=='+')
1577       {
1578         if (++str == end)
1579           goto ret_sign;
1580       }
1581       for (exponent= 0 ;
1582            str < end && (ch= (uchar) (*str - '0')) < 10;
1583            str++)
1584       {
1585         if (exponent <= (LLONG_MAX - ch) / 10)
1586          exponent= exponent * 10 + ch;
1587         else
1588          goto ret_too_big;
1589       }
1590       shift+= negative_exp ? -exponent : exponent;
1591     }
1592   }
1593 
1594   if (shift == 0) /* No shift, check addon digit */
1595   {
1596     if (addon)
1597     {
1598       if (ull == ULLONG_MAX)
1599         goto ret_too_big;
1600       ull++;
1601     }
1602     goto ret_sign;
1603   }
1604 
1605   if (shift < 0) /* Right shift */
1606   {
1607     ulonglong d, r;
1608 
1609     if (shift == INT_MIN32 || -shift >= DIGITS_IN_ULONGLONG)
1610       goto ret_zero; /* Exponent is a big negative number, return 0 */
1611 
1612     d= d10[-shift];
1613     r= (ull % d) * 2;
1614     ull /= d;
1615     if (r >= d)
1616       ull++;
1617     goto ret_sign;
1618   }
1619 
1620   if (shift > DIGITS_IN_ULONGLONG) /* Huge left shift */
1621   {
1622     if (!ull)
1623       goto ret_sign;
1624     goto ret_too_big;
1625   }
1626 
1627   for ( ; shift > 0; shift--, ull*= 10) /* Left shift */
1628   {
1629     if (ull > CUTOFF)
1630       goto ret_too_big; /* Overflow, number too big */
1631   }
1632 
1633 ret_sign:
1634   *endptr= (char*) str;
1635 
1636   if (!unsigned_flag)
1637   {
1638     if (negative)
1639     {
1640       if (ull > (ulonglong) LLONG_MIN)
1641       {
1642         *error= MY_ERRNO_ERANGE;
1643         return (ulonglong) LLONG_MIN;
1644       }
1645       *error= 0;
1646       return (ulonglong) -(longlong) ull;
1647     }
1648     else
1649     {
1650       if (ull > (ulonglong) LLONG_MAX)
1651       {
1652         *error= MY_ERRNO_ERANGE;
1653         return (ulonglong) LLONG_MAX;
1654       }
1655       *error= 0;
1656       return ull;
1657     }
1658   }
1659 
1660   /* Unsigned number */
1661   if (negative && ull)
1662   {
1663     *error= MY_ERRNO_ERANGE;
1664     return 0;
1665   }
1666   *error= 0;
1667   return ull;
1668 
1669 ret_zero:
1670   *endptr= (char*) str;
1671   *error= 0;
1672   return 0;
1673 
1674 ret_edom:
1675   *endptr= (char*) str;
1676   *error= MY_ERRNO_EDOM;
1677   return 0;
1678 
1679 ret_too_big:
1680   *endptr= (char*) str;
1681   *error= MY_ERRNO_ERANGE;
1682   return unsigned_flag ?
1683          ULLONG_MAX :
1684          negative ? (ulonglong) LLONG_MIN : (ulonglong) LLONG_MAX;
1685 }
1686 
1687 
1688 /*
1689   Check if a constant can be propagated
1690 
1691   SYNOPSIS:
1692     my_propagate_simple()
1693     cs		Character set information
1694     str		String to convert to double
1695     length	Optional length for string.
1696 
1697   NOTES:
1698    Takes the string in the given charset and check
1699    if it can be safely propagated in the optimizer.
1700 
1701    create table t1 (
1702      s char(5) character set latin1 collate latin1_german2_ci);
1703    insert into t1 values (0xf6); -- o-umlaut
1704    select * from t1 where length(s)=1 and s='oe';
1705 
1706    The above query should return one row.
1707    We cannot convert this query into:
1708    select * from t1 where length('oe')=1 and s='oe';
1709 
1710    Currently we don't check the constant itself,
1711    and decide not to propagate a constant
1712    just if the collation itself allows tricky things
1713    like expansions and contractions. In the future
1714    we can write a more sophisticated functions to
1715    check the constants. For example, 'oa' can always
1716    be safety propagated in German2 because unlike
1717    'oe' it does not have any special meaning.
1718 
1719   RETURN
1720     1 if constant can be safely propagated
1721     0 if it is not safe to propagate the constant
1722 */
1723 
1724 
1725 
my_propagate_simple(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const uchar * str MY_ATTRIBUTE ((unused)),size_t length MY_ATTRIBUTE ((unused)))1726 my_bool my_propagate_simple(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1727                             const uchar *str MY_ATTRIBUTE((unused)),
1728                             size_t length MY_ATTRIBUTE((unused)))
1729 {
1730   return 1;
1731 }
1732 
1733 
my_propagate_complex(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const uchar * str MY_ATTRIBUTE ((unused)),size_t length MY_ATTRIBUTE ((unused)))1734 my_bool my_propagate_complex(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
1735                              const uchar *str MY_ATTRIBUTE((unused)),
1736                              size_t length MY_ATTRIBUTE((unused)))
1737 {
1738   return 0;
1739 }
1740 
1741 
1742 /*
1743   Normalize strxfrm flags
1744 
1745   SYNOPSIS:
1746     my_strxfrm_flag_normalize()
1747     flags    - non-normalized flags
1748     nlevels  - number of levels
1749 
1750   NOTES:
1751     If levels are omitted, then 1-maximum is assumed.
1752     If any level number is greater than the maximum,
1753     it is treated as the maximum.
1754 
1755   RETURN
1756     normalized flags
1757 */
1758 
my_strxfrm_flag_normalize(uint flags,uint maximum)1759 uint my_strxfrm_flag_normalize(uint flags, uint maximum)
1760 {
1761   assert(maximum >= 1 && maximum <= MY_STRXFRM_NLEVELS);
1762 
1763   /* If levels are omitted, then 1-maximum is assumed*/
1764   if (!(flags & MY_STRXFRM_LEVEL_ALL))
1765   {
1766     static uint def_level_flags[]= {0, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F };
1767     uint flag_pad= flags &
1768                    (MY_STRXFRM_PAD_WITH_SPACE | MY_STRXFRM_PAD_TO_MAXLEN);
1769     flags= def_level_flags[maximum] | flag_pad;
1770   }
1771   else
1772   {
1773     uint i;
1774     uint flag_lev= flags & MY_STRXFRM_LEVEL_ALL;
1775     uint flag_dsc= (flags >> MY_STRXFRM_DESC_SHIFT) & MY_STRXFRM_LEVEL_ALL;
1776     uint flag_rev= (flags >> MY_STRXFRM_REVERSE_SHIFT) & MY_STRXFRM_LEVEL_ALL;
1777     uint flag_pad= flags &
1778                    (MY_STRXFRM_PAD_WITH_SPACE | MY_STRXFRM_PAD_TO_MAXLEN);
1779 
1780     /*
1781       If any level number is greater than the maximum,
1782       it is treated as the maximum.
1783     */
1784     for (maximum--, flags= 0, i= 0; i < MY_STRXFRM_NLEVELS; i++)
1785     {
1786       uint src_bit= 1 << i;
1787       if (flag_lev & src_bit)
1788       {
1789         uint dst_bit= 1 << MY_MIN(i, maximum);
1790         flags|= dst_bit;
1791         flags|= (flag_dsc & dst_bit) << MY_STRXFRM_DESC_SHIFT;
1792         flags|= (flag_rev & dst_bit) << MY_STRXFRM_REVERSE_SHIFT;
1793       }
1794     }
1795     flags|= flag_pad;
1796   }
1797 
1798   return flags;
1799 }
1800 
1801 
1802 /*
1803   Apply DESC and REVERSE collation rules.
1804 
1805   SYNOPSIS:
1806     my_strxfrm_desc_and_reverse()
1807     str      - pointer to string
1808     strend   - end of string
1809     flags    - flags
1810     level    - which level, starting from 0.
1811 
1812   NOTES:
1813     Apply DESC or REVERSE or both flags.
1814 
1815     If DESC flag is given, then the weights
1816     come out NOTed or negated for that level.
1817 
1818     If REVERSE flags is given, then the weights come out in
1819     reverse order for that level, that is, starting with
1820     the last character and ending with the first character.
1821 
1822     If nether DESC nor REVERSE flags are give,
1823     the string is not changed.
1824 
1825 */
1826 void
my_strxfrm_desc_and_reverse(uchar * str,uchar * strend,uint flags,uint level)1827 my_strxfrm_desc_and_reverse(uchar *str, uchar *strend,
1828                             uint flags, uint level)
1829 {
1830   if (flags & (MY_STRXFRM_DESC_LEVEL1 << level))
1831   {
1832     if (flags & (MY_STRXFRM_REVERSE_LEVEL1 << level))
1833     {
1834       for (strend--; str <= strend;)
1835       {
1836         uchar tmp= *str;
1837         *str++= ~*strend;
1838         *strend--= ~tmp;
1839       }
1840     }
1841     else
1842     {
1843       for (; str < strend; str++)
1844         *str= ~*str;
1845     }
1846   }
1847   else if (flags & (MY_STRXFRM_REVERSE_LEVEL1 << level))
1848   {
1849     for (strend--; str < strend;)
1850     {
1851       uchar tmp= *str;
1852       *str++= *strend;
1853       *strend--= tmp;
1854     }
1855   }
1856 }
1857 
1858 
1859 size_t
my_strxfrm_pad_desc_and_reverse(const CHARSET_INFO * cs,uchar * str,uchar * frmend,uchar * strend,uint nweights,uint flags,uint level)1860 my_strxfrm_pad_desc_and_reverse(const CHARSET_INFO *cs,
1861                                 uchar *str, uchar *frmend, uchar *strend,
1862                                 uint nweights, uint flags, uint level)
1863 {
1864   if (nweights && frmend < strend && (flags & MY_STRXFRM_PAD_WITH_SPACE))
1865   {
1866     uint fill_length= MY_MIN((uint) (strend - frmend), nweights * cs->mbminlen);
1867     cs->cset->fill(cs, (char*) frmend, fill_length, cs->pad_char);
1868     frmend+= fill_length;
1869   }
1870   my_strxfrm_desc_and_reverse(str, frmend, flags, level);
1871   if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && frmend < strend)
1872   {
1873     size_t fill_length= strend - frmend;
1874     cs->cset->fill(cs, (char*) frmend, fill_length, cs->pad_char);
1875     frmend= strend;
1876   }
1877   return frmend - str;
1878 }
1879 
1880 
1881 MY_CHARSET_HANDLER my_charset_8bit_handler=
1882 {
1883     my_cset_init_8bit,
1884     NULL,			/* ismbchar      */
1885     my_mbcharlen_8bit,		/* mbcharlen     */
1886     my_numchars_8bit,
1887     my_charpos_8bit,
1888     my_well_formed_len_8bit,
1889     my_lengthsp_8bit,
1890     my_numcells_8bit,
1891     my_mb_wc_8bit,
1892     my_wc_mb_8bit,
1893     my_mb_ctype_8bit,
1894     my_caseup_str_8bit,
1895     my_casedn_str_8bit,
1896     my_caseup_8bit,
1897     my_casedn_8bit,
1898     my_snprintf_8bit,
1899     my_long10_to_str_8bit,
1900     my_longlong10_to_str_8bit,
1901     my_fill_8bit,
1902     my_strntol_8bit,
1903     my_strntoul_8bit,
1904     my_strntoll_8bit,
1905     my_strntoull_8bit,
1906     my_strntod_8bit,
1907     my_strtoll10_8bit,
1908     my_strntoull10rnd_8bit,
1909     my_scan_8bit
1910 };
1911 
1912 MY_CHARSET_HANDLER my_charset_ascii_handler=
1913 {
1914     my_cset_init_8bit,
1915     NULL,                /* ismbchar      */
1916     my_mbcharlen_8bit,   /* mbcharlen     */
1917     my_numchars_8bit,
1918     my_charpos_8bit,
1919     my_well_formed_len_ascii,
1920     my_lengthsp_8bit,
1921     my_numcells_8bit,
1922     my_mb_wc_8bit,
1923     my_wc_mb_8bit,
1924     my_mb_ctype_8bit,
1925     my_caseup_str_8bit,
1926     my_casedn_str_8bit,
1927     my_caseup_8bit,
1928     my_casedn_8bit,
1929     my_snprintf_8bit,
1930     my_long10_to_str_8bit,
1931     my_longlong10_to_str_8bit,
1932     my_fill_8bit,
1933     my_strntol_8bit,
1934     my_strntoul_8bit,
1935     my_strntoll_8bit,
1936     my_strntoull_8bit,
1937     my_strntod_8bit,
1938     my_strtoll10_8bit,
1939     my_strntoull10rnd_8bit,
1940     my_scan_8bit
1941 };
1942 
1943 MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler =
1944 {
1945     my_coll_init_simple,	/* init */
1946     my_strnncoll_simple,
1947     my_strnncollsp_simple,
1948     my_strnxfrm_simple,
1949     my_strnxfrmlen_simple,
1950     my_like_range_simple,
1951     my_wildcmp_8bit,
1952     my_strcasecmp_8bit,
1953     my_instr_simple,
1954     my_hash_sort_simple,
1955     my_propagate_simple
1956 };
1957