1 /* Copyright (c) 2002 MySQL AB & tommy@valley.ne.jp
2    Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved.
3 
4    This library is free software; you can redistribute it and/or
5    modify it under the terms of the GNU Library General Public
6    License as published by the Free Software Foundation; version 2
7    of the License.
8 
9    This library is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12    Library General Public License for more details.
13 
14    You should have received a copy of the GNU Library General Public
15    License along with this library; if not, write to the Free Software
16    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
17 
18 /* This file is for binary pseudo charset, created by bar@mysql.com */
19 
20 
21 #include <my_global.h>
22 #include "m_string.h"
23 #include "m_ctype.h"
24 
25 static uchar ctype_bin[]=
26 {
27   0,
28   32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
29   32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
30   72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
31   132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
32   16,129,129,129,129,129,129,  1,  1,  1,  1,  1,  1,  1,  1,  1,
33   1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, 16, 16, 16, 16, 16,
34   16,130,130,130,130,130,130,  2,  2,  2,  2,  2,  2,  2,  2,  2,
35   2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, 16, 16, 16, 16, 32,
36   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
37   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
38   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
39   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
40   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
41   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
42   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
43   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
44 };
45 
46 
47 /* Dummy array for toupper / tolower / sortorder */
48 
49 static uchar bin_char_array[] =
50 {
51     0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
52    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
53    32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
54    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
55    64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
56    80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
57    96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
58   112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
59   128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
60   144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
61   160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
62   176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
63   192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
64   208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
65   224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
66   240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
67 };
68 
69 
70 static my_bool
my_coll_init_8bit_bin(CHARSET_INFO * cs,MY_CHARSET_LOADER * loader MY_ATTRIBUTE ((unused)))71 my_coll_init_8bit_bin(CHARSET_INFO *cs,
72                       MY_CHARSET_LOADER *loader MY_ATTRIBUTE((unused)))
73 {
74   cs->max_sort_char=255;
75   return FALSE;
76 }
77 
my_strnncoll_binary(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const uchar * s,size_t slen,const uchar * t,size_t tlen,my_bool t_is_prefix)78 static int my_strnncoll_binary(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
79                                const uchar *s, size_t slen,
80                                const uchar *t, size_t tlen,
81                                my_bool t_is_prefix)
82 {
83   size_t len= MY_MIN(slen,tlen);
84   int cmp= memcmp(s,t,len);
85   return cmp ? cmp : (int)((t_is_prefix ? len : slen) - tlen);
86 }
87 
88 
my_lengthsp_binary(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * ptr MY_ATTRIBUTE ((unused)),size_t length)89 size_t my_lengthsp_binary(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
90                           const char *ptr MY_ATTRIBUTE((unused)),
91                           size_t length)
92 {
93   return length;
94 }
95 
96 
97 /*
98   Compare two strings. Result is sign(first_argument - second_argument)
99 
100   SYNOPSIS
101     my_strnncollsp_binary()
102     cs			Chararacter set
103     s			String to compare
104     slen		Length of 's'
105     t			String to compare
106     tlen		Length of 't'
107 
108   NOTE
109    This function is used for real binary strings, i.e. for
110    BLOB, BINARY(N) and VARBINARY(N).
111    It compares trailing spaces as spaces.
112 
113   RETURN
114   < 0	s < t
115   0	s == t
116   > 0	s > t
117 */
118 
my_strnncollsp_binary(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const uchar * s,size_t slen,const uchar * t,size_t tlen,my_bool diff_if_only_endspace_difference MY_ATTRIBUTE ((unused)))119 static int my_strnncollsp_binary(const CHARSET_INFO *cs
120                                  MY_ATTRIBUTE((unused)),
121                                  const uchar *s, size_t slen,
122                                  const uchar *t, size_t tlen,
123                                  my_bool diff_if_only_endspace_difference
124                                  MY_ATTRIBUTE((unused)))
125 {
126   return my_strnncoll_binary(cs,s,slen,t,tlen,0);
127 }
128 
129 
my_strnncoll_8bit_bin(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const uchar * s,size_t slen,const uchar * t,size_t tlen,my_bool t_is_prefix)130 static int my_strnncoll_8bit_bin(const CHARSET_INFO *cs
131                                  MY_ATTRIBUTE((unused)),
132                                  const uchar *s, size_t slen,
133                                  const uchar *t, size_t tlen,
134                                  my_bool t_is_prefix)
135 {
136   size_t len=MY_MIN(slen,tlen);
137   int cmp= memcmp(s,t,len);
138   return cmp ? cmp : (int)((t_is_prefix ? len : slen) - tlen);
139 }
140 
141 
142 /*
143   Compare two strings. Result is sign(first_argument - second_argument)
144 
145   SYNOPSIS
146     my_strnncollsp_8bit_bin()
147     cs			Chararacter set
148     s			String to compare
149     slen		Length of 's'
150     t			String to compare
151     tlen		Length of 't'
152     diff_if_only_endspace_difference
153 		        Set to 1 if the strings should be regarded as different
154                         if they only difference in end space
155 
156   NOTE
157    This function is used for character strings with binary collations.
158    The shorter string is extended with end space to be as long as the longer
159    one.
160 
161   RETURN
162   < 0	s < t
163   0	s == t
164   > 0	s > t
165 */
166 
my_strnncollsp_8bit_bin(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const uchar * a,size_t a_length,const uchar * b,size_t b_length,my_bool diff_if_only_endspace_difference)167 static int my_strnncollsp_8bit_bin(const CHARSET_INFO *cs
168                                    MY_ATTRIBUTE((unused)),
169                                    const uchar *a, size_t a_length,
170                                    const uchar *b, size_t b_length,
171                                    my_bool diff_if_only_endspace_difference)
172 {
173   const uchar *end;
174   size_t length;
175   int res;
176 
177 #ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
178   diff_if_only_endspace_difference= 0;
179 #endif
180 
181   end= a + (length= MY_MIN(a_length, b_length));
182   while (a < end)
183   {
184     if (*a++ != *b++)
185       return ((int) a[-1] - (int) b[-1]);
186   }
187   res= 0;
188   if (a_length != b_length)
189   {
190     int swap= 1;
191     /*
192       Check the next not space character of the longer key. If it's < ' ',
193       then it's smaller than the other key.
194     */
195     if (diff_if_only_endspace_difference)
196       res= 1;                                   /* Assume 'a' is bigger */
197     if (a_length < b_length)
198     {
199       /* put shorter key in s */
200       a_length= b_length;
201       a= b;
202       swap= -1;					/* swap sign of result */
203       res= -res;
204     }
205     for (end= a + a_length-length; a < end ; a++)
206     {
207       if (*a != ' ')
208 	return (*a < ' ') ? -swap : swap;
209     }
210   }
211   return res;
212 }
213 
214 
215 /* This function is used for all conversion functions */
216 
my_case_str_bin(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),char * str MY_ATTRIBUTE ((unused)))217 static size_t my_case_str_bin(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
218                               char *str MY_ATTRIBUTE((unused)))
219 {
220   return 0;
221 }
222 
223 
my_case_bin(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),char * src MY_ATTRIBUTE ((unused)),size_t srclen,char * dst MY_ATTRIBUTE ((unused)),size_t dstlen MY_ATTRIBUTE ((unused)))224 static size_t my_case_bin(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
225                           char *src MY_ATTRIBUTE((unused)),
226                           size_t srclen,
227                           char *dst MY_ATTRIBUTE((unused)),
228                           size_t dstlen MY_ATTRIBUTE((unused)))
229 {
230   return srclen;
231 }
232 
233 
my_strcasecmp_bin(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * s,const char * t)234 static int my_strcasecmp_bin(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
235 			     const char *s, const char *t)
236 {
237   return strcmp(s,t);
238 }
239 
240 
my_mbcharlen_8bit(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),uint c MY_ATTRIBUTE ((unused)))241 uint my_mbcharlen_8bit(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
242                       uint c MY_ATTRIBUTE((unused)))
243 {
244   return 1;
245 }
246 
247 
my_mb_wc_bin(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),my_wc_t * wc,const uchar * str,const uchar * end MY_ATTRIBUTE ((unused)))248 static int my_mb_wc_bin(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
249 			my_wc_t *wc,
250 			const uchar *str,
251 			const uchar *end MY_ATTRIBUTE((unused)))
252 {
253   if (str >= end)
254     return MY_CS_TOOSMALL;
255 
256   *wc=str[0];
257   return 1;
258 }
259 
260 
my_wc_mb_bin(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),my_wc_t wc,uchar * s,uchar * e MY_ATTRIBUTE ((unused)))261 static int my_wc_mb_bin(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
262 			my_wc_t wc,
263 			uchar *s,
264 			uchar *e MY_ATTRIBUTE((unused)))
265 {
266   if (s >= e)
267     return MY_CS_TOOSMALL;
268 
269   if (wc < 256)
270   {
271     s[0]= (char) wc;
272     return 1;
273   }
274   return MY_CS_ILUNI;
275 }
276 
277 
my_hash_sort_8bit_bin(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const uchar * key,size_t len,ulong * nr1,ulong * nr2)278 void my_hash_sort_8bit_bin(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
279                            const uchar *key, size_t len,
280                            ulong *nr1, ulong *nr2)
281 {
282   const uchar *pos = key;
283 
284   /*
285      Remove trailing spaces. We have to do this to be able to compare
286     'A ' and 'A' as identical
287   */
288   key= skip_trailing_space(key, len);
289 
290   for (; pos < (uchar*) key ; pos++)
291   {
292     nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) *
293 	     ((uint)*pos)) + (nr1[0] << 8);
294     nr2[0]+=3;
295   }
296 }
297 
298 
my_hash_sort_bin(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const uchar * key,size_t len,ulong * nr1,ulong * nr2)299 void my_hash_sort_bin(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
300 		      const uchar *key, size_t len,ulong *nr1, ulong *nr2)
301 {
302   const uchar *pos = key;
303 
304   key+= len;
305 
306   for (; pos < (uchar*) key ; pos++)
307   {
308     nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) *
309 	     ((uint)*pos)) + (nr1[0] << 8);
310     nr2[0]+=3;
311   }
312 }
313 
314 
315 /*
316   The following defines is here to keep the following code identical to
317   the one in ctype-simple.c
318 */
319 
320 #define likeconv(s,A) (A)
321 #define INC_PTR(cs,A,B) (A)++
322 
323 
324 static
my_wildcmp_bin_impl(const CHARSET_INFO * cs,const char * str,const char * str_end,const char * wildstr,const char * wildend,int escape,int w_one,int w_many,int recurse_level)325 int my_wildcmp_bin_impl(const CHARSET_INFO *cs,
326                         const char *str,const char *str_end,
327                         const char *wildstr,const char *wildend,
328                         int escape, int w_one, int w_many, int recurse_level)
329 {
330   int result= -1;			/* Not found, using wildcards */
331 
332   if (my_string_stack_guard && my_string_stack_guard(recurse_level))
333     return 1;
334   while (wildstr != wildend)
335   {
336     while (*wildstr != w_many && *wildstr != w_one)
337     {
338       if (*wildstr == escape && wildstr+1 != wildend)
339 	wildstr++;
340       if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
341 	return(1);			/* No match */
342       if (wildstr == wildend)
343 	return(str != str_end);		/* Match if both are at end */
344       result=1;				/* Found an anchor char */
345     }
346     if (*wildstr == w_one)
347     {
348       do
349       {
350 	if (str == str_end)		/* Skip one char if possible */
351 	  return(result);
352 	INC_PTR(cs,str,str_end);
353       } while (++wildstr < wildend && *wildstr == w_one);
354       if (wildstr == wildend)
355 	break;
356     }
357     if (*wildstr == w_many)
358     {					/* Found w_many */
359       uchar cmp;
360       wildstr++;
361       /* Remove any '%' and '_' from the wild search string */
362       for (; wildstr != wildend ; wildstr++)
363       {
364 	if (*wildstr == w_many)
365 	  continue;
366 	if (*wildstr == w_one)
367 	{
368 	  if (str == str_end)
369 	    return(-1);
370 	  INC_PTR(cs,str,str_end);
371 	  continue;
372 	}
373 	break;				/* Not a wild character */
374       }
375       if (wildstr == wildend)
376 	return(0);			/* match if w_many is last */
377       if (str == str_end)
378 	return(-1);
379 
380       if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
381 	cmp= *++wildstr;
382 
383       INC_PTR(cs,wildstr,wildend);	/* This is compared through cmp */
384       cmp=likeconv(cs,cmp);
385       do
386       {
387 	while (str != str_end && (uchar) likeconv(cs,*str) != cmp)
388 	  str++;
389 	if (str++ == str_end)
390 	  return(-1);
391 	{
392 	  int tmp=my_wildcmp_bin_impl(cs,str,str_end,
393                                       wildstr,wildend,escape,
394                                       w_one, w_many, recurse_level + 1);
395 	  if (tmp <= 0)
396 	    return(tmp);
397 	}
398       } while (str != str_end && wildstr[0] != w_many);
399       return(-1);
400     }
401   }
402   return(str != str_end ? 1 : 0);
403 }
404 
my_wildcmp_bin(const CHARSET_INFO * cs,const char * str,const char * str_end,const char * wildstr,const char * wildend,int escape,int w_one,int w_many)405 int my_wildcmp_bin(const CHARSET_INFO *cs,
406                    const char *str,const char *str_end,
407                    const char *wildstr,const char *wildend,
408                    int escape, int w_one, int w_many)
409 {
410   return my_wildcmp_bin_impl(cs, str, str_end,
411                              wildstr, wildend,
412                              escape, w_one, w_many, 1);
413 }
414 
415 
416 static size_t
my_strnxfrm_8bit_bin(const CHARSET_INFO * cs,uchar * dst,size_t dstlen,uint nweights,const uchar * src,size_t srclen,uint flags)417 my_strnxfrm_8bit_bin(const CHARSET_INFO *cs,
418                      uchar * dst, size_t dstlen, uint nweights,
419                      const uchar *src, size_t srclen, uint flags)
420 {
421   set_if_smaller(srclen, dstlen);
422   set_if_smaller(srclen, nweights);
423   if (dst != src)
424     memcpy(dst, src, srclen);
425   return my_strxfrm_pad_desc_and_reverse(cs, dst, dst + srclen, dst + dstlen,
426                                          nweights - srclen, flags, 0);
427 }
428 
429 
430 static
my_instr_bin(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * b,size_t b_length,const char * s,size_t s_length,my_match_t * match,uint nmatch)431 uint my_instr_bin(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
432 		  const char *b, size_t b_length,
433 		  const char *s, size_t s_length,
434 		  my_match_t *match, uint nmatch)
435 {
436   const uchar *str, *search, *end, *search_end;
437 
438   if (s_length <= b_length)
439   {
440     if (!s_length)
441     {
442       if (nmatch)
443       {
444         match->beg= 0;
445         match->end= 0;
446         match->mb_len= 0;
447       }
448       return 1;		/* Empty string is always found */
449     }
450 
451     str= (const uchar*) b;
452     search= (const uchar*) s;
453     end= (const uchar*) b+b_length-s_length+1;
454     search_end= (const uchar*) s + s_length;
455 
456 skip:
457     while (str != end)
458     {
459       if ( (*str++) == (*search))
460       {
461 	const uchar *i,*j;
462 
463 	i= str;
464 	j= search+1;
465 
466 	while (j != search_end)
467 	  if ((*i++) != (*j++))
468             goto skip;
469 
470         if (nmatch > 0)
471 	{
472 	  match[0].beg= 0;
473 	  match[0].end= (size_t) (str- (const uchar*)b-1);
474 	  match[0].mb_len= match[0].end;
475 
476 	  if (nmatch > 1)
477 	  {
478 	    match[1].beg= match[0].end;
479 	    match[1].end= match[0].end+s_length;
480 	    match[1].mb_len= match[1].end-match[1].beg;
481 	  }
482 	}
483 	return 2;
484       }
485     }
486   }
487   return 0;
488 }
489 
490 
491 MY_COLLATION_HANDLER my_collation_8bit_bin_handler =
492 {
493   my_coll_init_8bit_bin,
494   my_strnncoll_8bit_bin,
495   my_strnncollsp_8bit_bin,
496   my_strnxfrm_8bit_bin,
497   my_strnxfrmlen_simple,
498   my_like_range_simple,
499   my_wildcmp_bin,
500   my_strcasecmp_bin,
501   my_instr_bin,
502   my_hash_sort_8bit_bin,
503   my_propagate_simple
504 };
505 
506 
507 static MY_COLLATION_HANDLER my_collation_binary_handler =
508 {
509   NULL,			/* init */
510   my_strnncoll_binary,
511   my_strnncollsp_binary,
512   my_strnxfrm_8bit_bin,
513   my_strnxfrmlen_simple,
514   my_like_range_simple,
515   my_wildcmp_bin,
516   my_strcasecmp_bin,
517   my_instr_bin,
518   my_hash_sort_bin,
519   my_propagate_simple
520 };
521 
522 
523 static MY_CHARSET_HANDLER my_charset_handler=
524 {
525   NULL,			/* init */
526   NULL,			/* ismbchar      */
527   my_mbcharlen_8bit,	/* mbcharlen     */
528   my_numchars_8bit,
529   my_charpos_8bit,
530   my_well_formed_len_8bit,
531   my_lengthsp_binary,
532   my_numcells_8bit,
533   my_mb_wc_bin,
534   my_wc_mb_bin,
535   my_mb_ctype_8bit,
536   my_case_str_bin,
537   my_case_str_bin,
538   my_case_bin,
539   my_case_bin,
540   my_snprintf_8bit,
541   my_long10_to_str_8bit,
542   my_longlong10_to_str_8bit,
543   my_fill_8bit,
544   my_strntol_8bit,
545   my_strntoul_8bit,
546   my_strntoll_8bit,
547   my_strntoull_8bit,
548   my_strntod_8bit,
549   my_strtoll10_8bit,
550   my_strntoull10rnd_8bit,
551   my_scan_8bit
552 };
553 
554 
555 CHARSET_INFO my_charset_bin =
556 {
557     63,0,0,			/* number        */
558     MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_PRIMARY,/* state */
559     "binary",			/* cs name    */
560     "binary",			/* name          */
561     "",				/* comment       */
562     NULL,			/* tailoring     */
563     ctype_bin,			/* ctype         */
564     bin_char_array,		/* to_lower      */
565     bin_char_array,		/* to_upper      */
566     NULL,			/* sort_order    */
567     NULL,			/* uca           */
568     NULL,			/* tab_to_uni    */
569     NULL,			/* tab_from_uni  */
570     &my_unicase_default,        /* caseinfo     */
571     NULL,			/* state_map    */
572     NULL,			/* ident_map    */
573     1,				/* strxfrm_multiply */
574     1,                          /* caseup_multiply  */
575     1,                          /* casedn_multiply  */
576     1,				/* mbminlen      */
577     1,				/* mbmaxlen      */
578     0,				/* min_sort_char */
579     255,			/* max_sort_char */
580     0,                          /* pad char      */
581     0,                          /* escape_with_backslash_is_dangerous */
582     1,                          /* levels_for_compare */
583     1,                          /* levels_for_order   */
584     &my_charset_handler,
585     &my_collation_binary_handler
586 };
587