1 /* Copyright (c) 2002 MySQL AB & tommy@valley.ne.jp
2 Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved.
3
4 This library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public
6 License as published by the Free Software Foundation; version 2
7 of the License.
8
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
13
14 You should have received a copy of the GNU Library General Public
15 License along with this library; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
17
18 /* This file is for binary pseudo charset, created by bar@mysql.com */
19
20
21 #include <my_global.h>
22 #include "m_string.h"
23 #include "m_ctype.h"
24
25 static uchar ctype_bin[]=
26 {
27 0,
28 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
29 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
30 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
31 132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
32 16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
33 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
34 16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
35 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
36 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
37 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
38 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
39 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44 };
45
46
47 /* Dummy array for toupper / tolower / sortorder */
48
49 static uchar bin_char_array[] =
50 {
51 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
52 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
53 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
54 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
55 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
56 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
57 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
58 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
59 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
60 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
61 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
62 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
63 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
64 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
65 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
66 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
67 };
68
69
70 static my_bool
my_coll_init_8bit_bin(CHARSET_INFO * cs,MY_CHARSET_LOADER * loader MY_ATTRIBUTE ((unused)))71 my_coll_init_8bit_bin(CHARSET_INFO *cs,
72 MY_CHARSET_LOADER *loader MY_ATTRIBUTE((unused)))
73 {
74 cs->max_sort_char=255;
75 return FALSE;
76 }
77
my_strnncoll_binary(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const uchar * s,size_t slen,const uchar * t,size_t tlen,my_bool t_is_prefix)78 static int my_strnncoll_binary(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
79 const uchar *s, size_t slen,
80 const uchar *t, size_t tlen,
81 my_bool t_is_prefix)
82 {
83 size_t len= MY_MIN(slen,tlen);
84 int cmp= memcmp(s,t,len);
85 return cmp ? cmp : (int)((t_is_prefix ? len : slen) - tlen);
86 }
87
88
my_lengthsp_binary(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * ptr MY_ATTRIBUTE ((unused)),size_t length)89 size_t my_lengthsp_binary(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
90 const char *ptr MY_ATTRIBUTE((unused)),
91 size_t length)
92 {
93 return length;
94 }
95
96
97 /*
98 Compare two strings. Result is sign(first_argument - second_argument)
99
100 SYNOPSIS
101 my_strnncollsp_binary()
102 cs Chararacter set
103 s String to compare
104 slen Length of 's'
105 t String to compare
106 tlen Length of 't'
107
108 NOTE
109 This function is used for real binary strings, i.e. for
110 BLOB, BINARY(N) and VARBINARY(N).
111 It compares trailing spaces as spaces.
112
113 RETURN
114 < 0 s < t
115 0 s == t
116 > 0 s > t
117 */
118
my_strnncollsp_binary(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const uchar * s,size_t slen,const uchar * t,size_t tlen,my_bool diff_if_only_endspace_difference MY_ATTRIBUTE ((unused)))119 static int my_strnncollsp_binary(const CHARSET_INFO *cs
120 MY_ATTRIBUTE((unused)),
121 const uchar *s, size_t slen,
122 const uchar *t, size_t tlen,
123 my_bool diff_if_only_endspace_difference
124 MY_ATTRIBUTE((unused)))
125 {
126 return my_strnncoll_binary(cs,s,slen,t,tlen,0);
127 }
128
129
my_strnncoll_8bit_bin(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const uchar * s,size_t slen,const uchar * t,size_t tlen,my_bool t_is_prefix)130 static int my_strnncoll_8bit_bin(const CHARSET_INFO *cs
131 MY_ATTRIBUTE((unused)),
132 const uchar *s, size_t slen,
133 const uchar *t, size_t tlen,
134 my_bool t_is_prefix)
135 {
136 size_t len=MY_MIN(slen,tlen);
137 int cmp= memcmp(s,t,len);
138 return cmp ? cmp : (int)((t_is_prefix ? len : slen) - tlen);
139 }
140
141
142 /*
143 Compare two strings. Result is sign(first_argument - second_argument)
144
145 SYNOPSIS
146 my_strnncollsp_8bit_bin()
147 cs Chararacter set
148 s String to compare
149 slen Length of 's'
150 t String to compare
151 tlen Length of 't'
152 diff_if_only_endspace_difference
153 Set to 1 if the strings should be regarded as different
154 if they only difference in end space
155
156 NOTE
157 This function is used for character strings with binary collations.
158 The shorter string is extended with end space to be as long as the longer
159 one.
160
161 RETURN
162 < 0 s < t
163 0 s == t
164 > 0 s > t
165 */
166
my_strnncollsp_8bit_bin(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const uchar * a,size_t a_length,const uchar * b,size_t b_length,my_bool diff_if_only_endspace_difference)167 static int my_strnncollsp_8bit_bin(const CHARSET_INFO *cs
168 MY_ATTRIBUTE((unused)),
169 const uchar *a, size_t a_length,
170 const uchar *b, size_t b_length,
171 my_bool diff_if_only_endspace_difference)
172 {
173 const uchar *end;
174 size_t length;
175 int res;
176
177 #ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
178 diff_if_only_endspace_difference= 0;
179 #endif
180
181 end= a + (length= MY_MIN(a_length, b_length));
182 while (a < end)
183 {
184 if (*a++ != *b++)
185 return ((int) a[-1] - (int) b[-1]);
186 }
187 res= 0;
188 if (a_length != b_length)
189 {
190 int swap= 1;
191 /*
192 Check the next not space character of the longer key. If it's < ' ',
193 then it's smaller than the other key.
194 */
195 if (diff_if_only_endspace_difference)
196 res= 1; /* Assume 'a' is bigger */
197 if (a_length < b_length)
198 {
199 /* put shorter key in s */
200 a_length= b_length;
201 a= b;
202 swap= -1; /* swap sign of result */
203 res= -res;
204 }
205 for (end= a + a_length-length; a < end ; a++)
206 {
207 if (*a != ' ')
208 return (*a < ' ') ? -swap : swap;
209 }
210 }
211 return res;
212 }
213
214
215 /* This function is used for all conversion functions */
216
my_case_str_bin(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),char * str MY_ATTRIBUTE ((unused)))217 static size_t my_case_str_bin(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
218 char *str MY_ATTRIBUTE((unused)))
219 {
220 return 0;
221 }
222
223
my_case_bin(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),char * src MY_ATTRIBUTE ((unused)),size_t srclen,char * dst MY_ATTRIBUTE ((unused)),size_t dstlen MY_ATTRIBUTE ((unused)))224 static size_t my_case_bin(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
225 char *src MY_ATTRIBUTE((unused)),
226 size_t srclen,
227 char *dst MY_ATTRIBUTE((unused)),
228 size_t dstlen MY_ATTRIBUTE((unused)))
229 {
230 return srclen;
231 }
232
233
my_strcasecmp_bin(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * s,const char * t)234 static int my_strcasecmp_bin(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
235 const char *s, const char *t)
236 {
237 return strcmp(s,t);
238 }
239
240
my_mbcharlen_8bit(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),uint c MY_ATTRIBUTE ((unused)))241 uint my_mbcharlen_8bit(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
242 uint c MY_ATTRIBUTE((unused)))
243 {
244 return 1;
245 }
246
247
my_mb_wc_bin(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),my_wc_t * wc,const uchar * str,const uchar * end MY_ATTRIBUTE ((unused)))248 static int my_mb_wc_bin(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
249 my_wc_t *wc,
250 const uchar *str,
251 const uchar *end MY_ATTRIBUTE((unused)))
252 {
253 if (str >= end)
254 return MY_CS_TOOSMALL;
255
256 *wc=str[0];
257 return 1;
258 }
259
260
my_wc_mb_bin(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),my_wc_t wc,uchar * s,uchar * e MY_ATTRIBUTE ((unused)))261 static int my_wc_mb_bin(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
262 my_wc_t wc,
263 uchar *s,
264 uchar *e MY_ATTRIBUTE((unused)))
265 {
266 if (s >= e)
267 return MY_CS_TOOSMALL;
268
269 if (wc < 256)
270 {
271 s[0]= (char) wc;
272 return 1;
273 }
274 return MY_CS_ILUNI;
275 }
276
277
my_hash_sort_8bit_bin(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const uchar * key,size_t len,ulong * nr1,ulong * nr2)278 void my_hash_sort_8bit_bin(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
279 const uchar *key, size_t len,
280 ulong *nr1, ulong *nr2)
281 {
282 const uchar *pos = key;
283
284 /*
285 Remove trailing spaces. We have to do this to be able to compare
286 'A ' and 'A' as identical
287 */
288 key= skip_trailing_space(key, len);
289
290 for (; pos < (uchar*) key ; pos++)
291 {
292 nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) *
293 ((uint)*pos)) + (nr1[0] << 8);
294 nr2[0]+=3;
295 }
296 }
297
298
my_hash_sort_bin(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const uchar * key,size_t len,ulong * nr1,ulong * nr2)299 void my_hash_sort_bin(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
300 const uchar *key, size_t len,ulong *nr1, ulong *nr2)
301 {
302 const uchar *pos = key;
303
304 key+= len;
305
306 for (; pos < (uchar*) key ; pos++)
307 {
308 nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) *
309 ((uint)*pos)) + (nr1[0] << 8);
310 nr2[0]+=3;
311 }
312 }
313
314
315 /*
316 The following defines is here to keep the following code identical to
317 the one in ctype-simple.c
318 */
319
320 #define likeconv(s,A) (A)
321 #define INC_PTR(cs,A,B) (A)++
322
323
324 static
my_wildcmp_bin_impl(const CHARSET_INFO * cs,const char * str,const char * str_end,const char * wildstr,const char * wildend,int escape,int w_one,int w_many,int recurse_level)325 int my_wildcmp_bin_impl(const CHARSET_INFO *cs,
326 const char *str,const char *str_end,
327 const char *wildstr,const char *wildend,
328 int escape, int w_one, int w_many, int recurse_level)
329 {
330 int result= -1; /* Not found, using wildcards */
331
332 if (my_string_stack_guard && my_string_stack_guard(recurse_level))
333 return 1;
334 while (wildstr != wildend)
335 {
336 while (*wildstr != w_many && *wildstr != w_one)
337 {
338 if (*wildstr == escape && wildstr+1 != wildend)
339 wildstr++;
340 if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
341 return(1); /* No match */
342 if (wildstr == wildend)
343 return(str != str_end); /* Match if both are at end */
344 result=1; /* Found an anchor char */
345 }
346 if (*wildstr == w_one)
347 {
348 do
349 {
350 if (str == str_end) /* Skip one char if possible */
351 return(result);
352 INC_PTR(cs,str,str_end);
353 } while (++wildstr < wildend && *wildstr == w_one);
354 if (wildstr == wildend)
355 break;
356 }
357 if (*wildstr == w_many)
358 { /* Found w_many */
359 uchar cmp;
360 wildstr++;
361 /* Remove any '%' and '_' from the wild search string */
362 for (; wildstr != wildend ; wildstr++)
363 {
364 if (*wildstr == w_many)
365 continue;
366 if (*wildstr == w_one)
367 {
368 if (str == str_end)
369 return(-1);
370 INC_PTR(cs,str,str_end);
371 continue;
372 }
373 break; /* Not a wild character */
374 }
375 if (wildstr == wildend)
376 return(0); /* match if w_many is last */
377 if (str == str_end)
378 return(-1);
379
380 if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
381 cmp= *++wildstr;
382
383 INC_PTR(cs,wildstr,wildend); /* This is compared through cmp */
384 cmp=likeconv(cs,cmp);
385 do
386 {
387 while (str != str_end && (uchar) likeconv(cs,*str) != cmp)
388 str++;
389 if (str++ == str_end)
390 return(-1);
391 {
392 int tmp=my_wildcmp_bin_impl(cs,str,str_end,
393 wildstr,wildend,escape,
394 w_one, w_many, recurse_level + 1);
395 if (tmp <= 0)
396 return(tmp);
397 }
398 } while (str != str_end && wildstr[0] != w_many);
399 return(-1);
400 }
401 }
402 return(str != str_end ? 1 : 0);
403 }
404
my_wildcmp_bin(const CHARSET_INFO * cs,const char * str,const char * str_end,const char * wildstr,const char * wildend,int escape,int w_one,int w_many)405 int my_wildcmp_bin(const CHARSET_INFO *cs,
406 const char *str,const char *str_end,
407 const char *wildstr,const char *wildend,
408 int escape, int w_one, int w_many)
409 {
410 return my_wildcmp_bin_impl(cs, str, str_end,
411 wildstr, wildend,
412 escape, w_one, w_many, 1);
413 }
414
415
416 static size_t
my_strnxfrm_8bit_bin(const CHARSET_INFO * cs,uchar * dst,size_t dstlen,uint nweights,const uchar * src,size_t srclen,uint flags)417 my_strnxfrm_8bit_bin(const CHARSET_INFO *cs,
418 uchar * dst, size_t dstlen, uint nweights,
419 const uchar *src, size_t srclen, uint flags)
420 {
421 set_if_smaller(srclen, dstlen);
422 set_if_smaller(srclen, nweights);
423 if (dst != src)
424 memcpy(dst, src, srclen);
425 return my_strxfrm_pad_desc_and_reverse(cs, dst, dst + srclen, dst + dstlen,
426 nweights - srclen, flags, 0);
427 }
428
429
430 static
my_instr_bin(const CHARSET_INFO * cs MY_ATTRIBUTE ((unused)),const char * b,size_t b_length,const char * s,size_t s_length,my_match_t * match,uint nmatch)431 uint my_instr_bin(const CHARSET_INFO *cs MY_ATTRIBUTE((unused)),
432 const char *b, size_t b_length,
433 const char *s, size_t s_length,
434 my_match_t *match, uint nmatch)
435 {
436 const uchar *str, *search, *end, *search_end;
437
438 if (s_length <= b_length)
439 {
440 if (!s_length)
441 {
442 if (nmatch)
443 {
444 match->beg= 0;
445 match->end= 0;
446 match->mb_len= 0;
447 }
448 return 1; /* Empty string is always found */
449 }
450
451 str= (const uchar*) b;
452 search= (const uchar*) s;
453 end= (const uchar*) b+b_length-s_length+1;
454 search_end= (const uchar*) s + s_length;
455
456 skip:
457 while (str != end)
458 {
459 if ( (*str++) == (*search))
460 {
461 const uchar *i,*j;
462
463 i= str;
464 j= search+1;
465
466 while (j != search_end)
467 if ((*i++) != (*j++))
468 goto skip;
469
470 if (nmatch > 0)
471 {
472 match[0].beg= 0;
473 match[0].end= (size_t) (str- (const uchar*)b-1);
474 match[0].mb_len= match[0].end;
475
476 if (nmatch > 1)
477 {
478 match[1].beg= match[0].end;
479 match[1].end= match[0].end+s_length;
480 match[1].mb_len= match[1].end-match[1].beg;
481 }
482 }
483 return 2;
484 }
485 }
486 }
487 return 0;
488 }
489
490
491 MY_COLLATION_HANDLER my_collation_8bit_bin_handler =
492 {
493 my_coll_init_8bit_bin,
494 my_strnncoll_8bit_bin,
495 my_strnncollsp_8bit_bin,
496 my_strnxfrm_8bit_bin,
497 my_strnxfrmlen_simple,
498 my_like_range_simple,
499 my_wildcmp_bin,
500 my_strcasecmp_bin,
501 my_instr_bin,
502 my_hash_sort_8bit_bin,
503 my_propagate_simple
504 };
505
506
507 static MY_COLLATION_HANDLER my_collation_binary_handler =
508 {
509 NULL, /* init */
510 my_strnncoll_binary,
511 my_strnncollsp_binary,
512 my_strnxfrm_8bit_bin,
513 my_strnxfrmlen_simple,
514 my_like_range_simple,
515 my_wildcmp_bin,
516 my_strcasecmp_bin,
517 my_instr_bin,
518 my_hash_sort_bin,
519 my_propagate_simple
520 };
521
522
523 static MY_CHARSET_HANDLER my_charset_handler=
524 {
525 NULL, /* init */
526 NULL, /* ismbchar */
527 my_mbcharlen_8bit, /* mbcharlen */
528 my_numchars_8bit,
529 my_charpos_8bit,
530 my_well_formed_len_8bit,
531 my_lengthsp_binary,
532 my_numcells_8bit,
533 my_mb_wc_bin,
534 my_wc_mb_bin,
535 my_mb_ctype_8bit,
536 my_case_str_bin,
537 my_case_str_bin,
538 my_case_bin,
539 my_case_bin,
540 my_snprintf_8bit,
541 my_long10_to_str_8bit,
542 my_longlong10_to_str_8bit,
543 my_fill_8bit,
544 my_strntol_8bit,
545 my_strntoul_8bit,
546 my_strntoll_8bit,
547 my_strntoull_8bit,
548 my_strntod_8bit,
549 my_strtoll10_8bit,
550 my_strntoull10rnd_8bit,
551 my_scan_8bit
552 };
553
554
555 CHARSET_INFO my_charset_bin =
556 {
557 63,0,0, /* number */
558 MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_PRIMARY,/* state */
559 "binary", /* cs name */
560 "binary", /* name */
561 "", /* comment */
562 NULL, /* tailoring */
563 ctype_bin, /* ctype */
564 bin_char_array, /* to_lower */
565 bin_char_array, /* to_upper */
566 NULL, /* sort_order */
567 NULL, /* uca */
568 NULL, /* tab_to_uni */
569 NULL, /* tab_from_uni */
570 &my_unicase_default, /* caseinfo */
571 NULL, /* state_map */
572 NULL, /* ident_map */
573 1, /* strxfrm_multiply */
574 1, /* caseup_multiply */
575 1, /* casedn_multiply */
576 1, /* mbminlen */
577 1, /* mbmaxlen */
578 0, /* min_sort_char */
579 255, /* max_sort_char */
580 0, /* pad char */
581 0, /* escape_with_backslash_is_dangerous */
582 1, /* levels_for_compare */
583 1, /* levels_for_order */
584 &my_charset_handler,
585 &my_collation_binary_handler
586 };
587