1 /* Copyright (c) 2000, 2021, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 Without limiting anything contained in the foregoing, this file,
15 which is part of C Driver for MySQL (Connector/C), is also subject to the
16 Universal FOSS Exception, version 1.0, a copy of which can be found at
17 http://oss.oracle.com/licenses/universal-foss-exception.
18
19 This program is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License, version 2.0, for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
27
28 #include "mysys_priv.h"
29 #include "my_sys.h"
30 #include "mysys_err.h"
31 #include <m_ctype.h>
32 #include <m_string.h>
33 #include <my_dir.h>
34 #include <my_xml.h>
35 #include "mysql/psi/mysql_file.h"
36 #include "sql_chars.h"
37
38 /*
39 The code below implements this functionality:
40
41 - Initializing charset related structures
42 - Loading dynamic charsets
43 - Searching for a proper CHARSET_INFO
44 using charset name, collation name or collation ID
45 - Setting server default character set
46 */
47
my_charset_same(const CHARSET_INFO * cs1,const CHARSET_INFO * cs2)48 my_bool my_charset_same(const CHARSET_INFO *cs1, const CHARSET_INFO *cs2)
49 {
50 return ((cs1 == cs2) || !strcmp(cs1->csname,cs2->csname));
51 }
52
53
54 static uint
get_collation_number_internal(const char * name)55 get_collation_number_internal(const char *name)
56 {
57 CHARSET_INFO **cs;
58 for (cs= all_charsets;
59 cs < all_charsets + array_elements(all_charsets);
60 cs++)
61 {
62 if ( cs[0] && cs[0]->name &&
63 !my_strcasecmp(&my_charset_latin1, cs[0]->name, name))
64 return cs[0]->number;
65 }
66 return 0;
67 }
68
69
simple_cs_init_functions(CHARSET_INFO * cs)70 static void simple_cs_init_functions(CHARSET_INFO *cs)
71 {
72 if (cs->state & MY_CS_BINSORT)
73 cs->coll= &my_collation_8bit_bin_handler;
74 else
75 cs->coll= &my_collation_8bit_simple_ci_handler;
76
77 cs->cset= &my_charset_8bit_handler;
78 }
79
80
81
cs_copy_data(CHARSET_INFO * to,CHARSET_INFO * from)82 static int cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
83 {
84 to->number= from->number ? from->number : to->number;
85
86 if (from->csname)
87 if (!(to->csname= my_once_strdup(from->csname,MYF(MY_WME))))
88 goto err;
89
90 if (from->name)
91 if (!(to->name= my_once_strdup(from->name,MYF(MY_WME))))
92 goto err;
93
94 if (from->comment)
95 if (!(to->comment= my_once_strdup(from->comment,MYF(MY_WME))))
96 goto err;
97
98 if (from->ctype)
99 {
100 if (!(to->ctype= (uchar*) my_once_memdup((char*) from->ctype,
101 MY_CS_CTYPE_TABLE_SIZE,
102 MYF(MY_WME))))
103 goto err;
104 if (init_state_maps(to))
105 goto err;
106 }
107 if (from->to_lower)
108 if (!(to->to_lower= (uchar*) my_once_memdup((char*) from->to_lower,
109 MY_CS_TO_LOWER_TABLE_SIZE,
110 MYF(MY_WME))))
111 goto err;
112
113 if (from->to_upper)
114 if (!(to->to_upper= (uchar*) my_once_memdup((char*) from->to_upper,
115 MY_CS_TO_UPPER_TABLE_SIZE,
116 MYF(MY_WME))))
117 goto err;
118 if (from->sort_order)
119 {
120 if (!(to->sort_order= (uchar*) my_once_memdup((char*) from->sort_order,
121 MY_CS_SORT_ORDER_TABLE_SIZE,
122 MYF(MY_WME))))
123 goto err;
124
125 }
126 if (from->tab_to_uni)
127 {
128 uint sz= MY_CS_TO_UNI_TABLE_SIZE*sizeof(uint16);
129 if (!(to->tab_to_uni= (uint16*) my_once_memdup((char*)from->tab_to_uni,
130 sz, MYF(MY_WME))))
131 goto err;
132 }
133 if (from->tailoring)
134 if (!(to->tailoring= my_once_strdup(from->tailoring,MYF(MY_WME))))
135 goto err;
136
137 return 0;
138
139 err:
140 return 1;
141 }
142
143
144
simple_cs_is_full(CHARSET_INFO * cs)145 static my_bool simple_cs_is_full(CHARSET_INFO *cs)
146 {
147 return ((cs->csname && cs->tab_to_uni && cs->ctype && cs->to_upper &&
148 cs->to_lower) &&
149 (cs->number && cs->name &&
150 (cs->sort_order || (cs->state & MY_CS_BINSORT) )));
151 }
152
153
154 static void
copy_uca_collation(CHARSET_INFO * to,CHARSET_INFO * from)155 copy_uca_collation(CHARSET_INFO *to, CHARSET_INFO *from)
156 {
157 to->cset= from->cset;
158 to->coll= from->coll;
159 to->strxfrm_multiply= from->strxfrm_multiply;
160 to->min_sort_char= from->min_sort_char;
161 to->max_sort_char= from->max_sort_char;
162 to->mbminlen= from->mbminlen;
163 to->mbmaxlen= from->mbmaxlen;
164 to->caseup_multiply= from->caseup_multiply;
165 to->casedn_multiply= from->casedn_multiply;
166 to->state|= MY_CS_AVAILABLE | MY_CS_LOADED |
167 MY_CS_STRNXFRM | MY_CS_UNICODE;
168 }
169
170
add_collation(CHARSET_INFO * cs)171 static int add_collation(CHARSET_INFO *cs)
172 {
173 if (cs->name && (cs->number ||
174 (cs->number=get_collation_number_internal(cs->name))) &&
175 cs->number < array_elements(all_charsets))
176 {
177 if (!all_charsets[cs->number])
178 {
179 if (!(all_charsets[cs->number]=
180 (CHARSET_INFO*) my_once_alloc(sizeof(CHARSET_INFO),MYF(0))))
181 return MY_XML_ERROR;
182 memset(all_charsets[cs->number], 0, sizeof(CHARSET_INFO));
183 }
184
185 if (cs->primary_number == cs->number)
186 cs->state |= MY_CS_PRIMARY;
187
188 if (cs->binary_number == cs->number)
189 cs->state |= MY_CS_BINSORT;
190
191 all_charsets[cs->number]->state|= cs->state;
192
193 if (!(all_charsets[cs->number]->state & MY_CS_COMPILED))
194 {
195 CHARSET_INFO *newcs= all_charsets[cs->number];
196 if (cs_copy_data(all_charsets[cs->number],cs))
197 return MY_XML_ERROR;
198
199 newcs->caseup_multiply= newcs->casedn_multiply= 1;
200 newcs->levels_for_compare= 1;
201 newcs->levels_for_order= 1;
202
203 if (!strcmp(cs->csname,"ucs2") )
204 {
205 #if defined(HAVE_CHARSET_ucs2) && defined(HAVE_UCA_COLLATIONS)
206 copy_uca_collation(newcs, &my_charset_ucs2_unicode_ci);
207 newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
208 #endif
209 }
210 else if (!strcmp(cs->csname, "utf8") || !strcmp(cs->csname, "utf8mb3"))
211 {
212 #if defined (HAVE_CHARSET_utf8) && defined(HAVE_UCA_COLLATIONS)
213 copy_uca_collation(newcs, &my_charset_utf8_unicode_ci);
214 newcs->ctype= my_charset_utf8_unicode_ci.ctype;
215 if (init_state_maps(newcs))
216 return MY_XML_ERROR;
217 #endif
218 }
219 else if (!strcmp(cs->csname, "utf8mb4"))
220 {
221 #if defined (HAVE_CHARSET_utf8mb4) && defined(HAVE_UCA_COLLATIONS)
222 copy_uca_collation(newcs, &my_charset_utf8mb4_unicode_ci);
223 newcs->ctype= my_charset_utf8mb4_unicode_ci.ctype;
224 newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED;
225 #endif
226 }
227 else if (!strcmp(cs->csname, "utf16"))
228 {
229 #if defined (HAVE_CHARSET_utf16) && defined(HAVE_UCA_COLLATIONS)
230 copy_uca_collation(newcs, &my_charset_utf16_unicode_ci);
231 newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
232 #endif
233 }
234 else if (!strcmp(cs->csname, "utf32"))
235 {
236 #if defined (HAVE_CHARSET_utf32) && defined(HAVE_UCA_COLLATIONS)
237 copy_uca_collation(newcs, &my_charset_utf32_unicode_ci);
238 newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
239 #endif
240 }
241 else
242 {
243 const uchar *sort_order= all_charsets[cs->number]->sort_order;
244 simple_cs_init_functions(all_charsets[cs->number]);
245 newcs->mbminlen= 1;
246 newcs->mbmaxlen= 1;
247 if (simple_cs_is_full(all_charsets[cs->number]))
248 {
249 all_charsets[cs->number]->state |= MY_CS_LOADED;
250 }
251 all_charsets[cs->number]->state|= MY_CS_AVAILABLE;
252
253 /*
254 Check if case sensitive sort order: A < a < B.
255 We need MY_CS_FLAG for regex library, and for
256 case sensitivity flag for 5.0 client protocol,
257 to support isCaseSensitive() method in JDBC driver
258 */
259 if (sort_order && sort_order['A'] < sort_order['a'] &&
260 sort_order['a'] < sort_order['B'])
261 all_charsets[cs->number]->state|= MY_CS_CSSORT;
262
263 if (my_charset_is_8bit_pure_ascii(all_charsets[cs->number]))
264 all_charsets[cs->number]->state|= MY_CS_PUREASCII;
265 if (!my_charset_is_ascii_compatible(cs))
266 all_charsets[cs->number]->state|= MY_CS_NONASCII;
267 }
268 }
269 else
270 {
271 /*
272 We need the below to make get_charset_name()
273 and get_charset_number() working even if a
274 character set has not been really incompiled.
275 The above functions are used for example
276 in error message compiler extra/comp_err.c.
277 If a character set was compiled, this information
278 will get lost and overwritten in add_compiled_collation().
279 */
280 CHARSET_INFO *dst= all_charsets[cs->number];
281 dst->number= cs->number;
282 if (cs->comment)
283 if (!(dst->comment= my_once_strdup(cs->comment,MYF(MY_WME))))
284 return MY_XML_ERROR;
285 if (cs->csname)
286 if (!(dst->csname= my_once_strdup(cs->csname,MYF(MY_WME))))
287 return MY_XML_ERROR;
288 if (cs->name)
289 if (!(dst->name= my_once_strdup(cs->name,MYF(MY_WME))))
290 return MY_XML_ERROR;
291 }
292 cs->number= 0;
293 cs->primary_number= 0;
294 cs->binary_number= 0;
295 cs->name= NULL;
296 cs->state= 0;
297 cs->sort_order= NULL;
298 cs->state= 0;
299 }
300 return MY_XML_OK;
301 }
302
303
304 /**
305 Report character set initialization errors and warnings.
306 Be silent by default: no warnings on the client side.
307 */
308 static void
default_reporter(enum loglevel level MY_ATTRIBUTE ((unused)),const char * format MY_ATTRIBUTE ((unused)),...)309 default_reporter(enum loglevel level MY_ATTRIBUTE ((unused)),
310 const char *format MY_ATTRIBUTE ((unused)),
311 ...)
312 {
313 }
314 my_error_reporter my_charset_error_reporter= default_reporter;
315
316
317 /**
318 Wrappers for memory functions my_malloc (and friends)
319 with C-compatbile API without extra "myf" argument.
320 */
321 static void *
my_once_alloc_c(size_t size)322 my_once_alloc_c(size_t size)
323 { return my_once_alloc(size, MYF(MY_WME)); }
324
325
326 static void *
my_malloc_c(size_t size)327 my_malloc_c(size_t size)
328 { return my_malloc(key_memory_charset_loader, size, MYF(MY_WME)); }
329
330
331 static void *
my_realloc_c(void * old,size_t size)332 my_realloc_c(void *old, size_t size)
333 { return my_realloc(key_memory_charset_loader,
334 old, size, MYF(MY_WME)); }
335
336 static void
my_free_c(void * ptr)337 my_free_c(void *ptr)
338 {
339 my_free(ptr);
340 }
341
342 /**
343 Initialize character set loader to use mysys memory management functions.
344 @param loader Loader to initialize
345 */
346 void
my_charset_loader_init_mysys(MY_CHARSET_LOADER * loader)347 my_charset_loader_init_mysys(MY_CHARSET_LOADER *loader)
348 {
349 loader->error[0]= '\0';
350 loader->once_alloc= my_once_alloc_c;
351 loader->mem_malloc= my_malloc_c;
352 loader->mem_realloc= my_realloc_c;
353 loader->mem_free= my_free_c;
354 loader->reporter= my_charset_error_reporter;
355 loader->add_collation= add_collation;
356 }
357
358
359 #define MY_MAX_ALLOWED_BUF 1024*1024
360 #define MY_CHARSET_INDEX "Index.xml"
361
362 const char *charsets_dir= NULL;
363
364
365 static my_bool
my_read_charset_file(MY_CHARSET_LOADER * loader,const char * filename,myf myflags)366 my_read_charset_file(MY_CHARSET_LOADER *loader,
367 const char *filename,
368 myf myflags)
369 {
370 uchar *buf;
371 int fd;
372 size_t len, tmp_len;
373 MY_STAT stat_info;
374
375 if (!my_stat(filename, &stat_info, MYF(myflags)))
376 return TRUE;
377
378 len= stat_info.st_size;
379 if ((len > MY_MAX_ALLOWED_BUF) && (myflags & MY_WME))
380 {
381 my_printf_error(EE_UNKNOWN_CHARSET,
382 "Error while reading '%s': its length %llu is larger than "
383 "maximum allowed length %llu\n", MYF(0), filename,
384 (unsigned long long)len,
385 (unsigned long long)MY_MAX_ALLOWED_BUF);
386 return TRUE;
387 }
388
389 buf= my_malloc(key_memory_charset_file, len, myflags);
390 if (!buf)
391 return TRUE;
392
393 if ((fd= mysql_file_open(key_file_charset, filename, O_RDONLY, myflags)) < 0)
394 goto error;
395 tmp_len= mysql_file_read(fd, buf, len, myflags);
396 mysql_file_close(fd, myflags);
397 if (tmp_len != len)
398 goto error;
399
400 if (my_parse_charset_xml(loader, (char *) buf, len))
401 {
402 my_printf_error(EE_UNKNOWN_CHARSET, "Error while parsing '%s': %s\n",
403 MYF(0), filename, loader->error);
404 goto error;
405 }
406
407 my_free(buf);
408 return FALSE;
409
410 error:
411 my_free(buf);
412 return TRUE;
413 }
414
415
get_charsets_dir(char * buf)416 char *get_charsets_dir(char *buf)
417 {
418 const char *sharedir= SHAREDIR;
419 char *res;
420 DBUG_ENTER("get_charsets_dir");
421
422 if (charsets_dir != NULL)
423 strmake(buf, charsets_dir, FN_REFLEN-1);
424 else
425 {
426 if (test_if_hard_path(sharedir) ||
427 is_prefix(sharedir, DEFAULT_CHARSET_HOME))
428 strxmov(buf, sharedir, "/", CHARSET_DIR, NullS);
429 else
430 strxmov(buf, DEFAULT_CHARSET_HOME, "/", sharedir, "/", CHARSET_DIR,
431 NullS);
432 }
433 res= convert_dirname(buf,buf,NullS);
434 DBUG_PRINT("info",("charsets dir: '%s'", buf));
435 DBUG_RETURN(res);
436 }
437
438 CHARSET_INFO *all_charsets[MY_ALL_CHARSETS_SIZE]={NULL};
439 CHARSET_INFO *default_charset_info = &my_charset_latin1;
440
add_compiled_collation(CHARSET_INFO * cs)441 void add_compiled_collation(CHARSET_INFO *cs)
442 {
443 assert(cs->number < array_elements(all_charsets));
444 all_charsets[cs->number]= cs;
445 cs->state|= MY_CS_AVAILABLE;
446 }
447
448
449 static my_thread_once_t charsets_initialized= MY_THREAD_ONCE_INIT;
450 static my_thread_once_t charsets_template= MY_THREAD_ONCE_INIT;
451
init_available_charsets(void)452 static void init_available_charsets(void)
453 {
454 char fname[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
455 MY_CHARSET_LOADER loader;
456
457 memset(&all_charsets, 0, sizeof(all_charsets));
458 init_compiled_charsets(MYF(0));
459
460 /* Copy compiled charsets */
461
462 my_charset_loader_init_mysys(&loader);
463 my_stpcpy(get_charsets_dir(fname), MY_CHARSET_INDEX);
464 my_read_charset_file(&loader, fname,
465 #ifdef MYSQL_SERVER
466 MYF(MY_WME)
467 #else
468 MYF(0)
469 #endif
470 );
471 }
472
473
free_charsets(void)474 void free_charsets(void)
475 {
476 charsets_initialized= charsets_template;
477 }
478
479
480 static const char*
get_collation_name_alias(const char * name,char * buf,size_t bufsize)481 get_collation_name_alias(const char *name, char *buf, size_t bufsize)
482 {
483 if (!native_strncasecmp(name, "utf8mb3_", 8))
484 {
485 my_snprintf(buf, bufsize, "utf8_%s", name + 8);
486 return buf;
487 }
488 return NULL;
489 }
490
491
get_collation_number(const char * name)492 uint get_collation_number(const char *name)
493 {
494 uint id;
495 char alias[64];
496 my_thread_once(&charsets_initialized, init_available_charsets);
497 if ((id= get_collation_number_internal(name)))
498 return id;
499 if ((name= get_collation_name_alias(name, alias, sizeof(alias))))
500 return get_collation_number_internal(name);
501 return 0;
502 }
503
504
505 static uint
get_charset_number_internal(const char * charset_name,uint cs_flags)506 get_charset_number_internal(const char *charset_name, uint cs_flags)
507 {
508 CHARSET_INFO **cs;
509
510 for (cs= all_charsets;
511 cs < all_charsets + array_elements(all_charsets);
512 cs++)
513 {
514 if ( cs[0] && cs[0]->csname && (cs[0]->state & cs_flags) &&
515 !my_strcasecmp(&my_charset_latin1, cs[0]->csname, charset_name))
516 return cs[0]->number;
517 }
518 return 0;
519 }
520
521
522 static const char*
get_charset_name_alias(const char * name)523 get_charset_name_alias(const char *name)
524 {
525 if (!my_strcasecmp(&my_charset_latin1, name, "utf8mb3"))
526 return "utf8";
527 return NULL;
528 }
529
530
get_charset_number(const char * charset_name,uint cs_flags)531 uint get_charset_number(const char *charset_name, uint cs_flags)
532 {
533 uint id;
534 my_thread_once(&charsets_initialized, init_available_charsets);
535 if ((id= get_charset_number_internal(charset_name, cs_flags)))
536 return id;
537 if ((charset_name= get_charset_name_alias(charset_name)))
538 return get_charset_number_internal(charset_name, cs_flags);
539 return 0;
540 }
541
542
get_charset_name(uint charset_number)543 const char *get_charset_name(uint charset_number)
544 {
545 my_thread_once(&charsets_initialized, init_available_charsets);
546
547 if (charset_number < array_elements(all_charsets))
548 {
549 CHARSET_INFO *cs= all_charsets[charset_number];
550
551 if (cs && (cs->number == charset_number) && cs->name)
552 return (char*) cs->name;
553 }
554
555 return "?"; /* this mimics find_type() */
556 }
557
558
559 static CHARSET_INFO *
get_internal_charset(MY_CHARSET_LOADER * loader,uint cs_number,myf flags)560 get_internal_charset(MY_CHARSET_LOADER *loader, uint cs_number, myf flags)
561 {
562 char buf[FN_REFLEN];
563 CHARSET_INFO *cs;
564
565 assert(cs_number < array_elements(all_charsets));
566
567 if ((cs= all_charsets[cs_number]))
568 {
569 if (cs->state & MY_CS_READY) /* if CS is already initialized */
570 return cs;
571
572 /*
573 To make things thread safe we are not allowing other threads to interfere
574 while we may changing the cs_info_table
575 */
576 mysql_mutex_lock(&THR_LOCK_charset);
577
578 if (!(cs->state & (MY_CS_COMPILED|MY_CS_LOADED))) /* if CS is not in memory */
579 {
580 MY_CHARSET_LOADER loader;
581 strxmov(get_charsets_dir(buf), cs->csname, ".xml", NullS);
582 my_charset_loader_init_mysys(&loader);
583 my_read_charset_file(&loader, buf, flags);
584 }
585
586 if (cs->state & MY_CS_AVAILABLE)
587 {
588 if (!(cs->state & MY_CS_READY))
589 {
590 if ((cs->cset->init && cs->cset->init(cs, loader)) ||
591 (cs->coll->init && cs->coll->init(cs, loader)))
592 {
593 cs= NULL;
594 }
595 else
596 cs->state|= MY_CS_READY;
597 }
598 }
599 else
600 cs= NULL;
601
602 mysql_mutex_unlock(&THR_LOCK_charset);
603 }
604 return cs;
605 }
606
607
get_charset(uint cs_number,myf flags)608 CHARSET_INFO *get_charset(uint cs_number, myf flags)
609 {
610 CHARSET_INFO *cs;
611 MY_CHARSET_LOADER loader;
612
613 if (cs_number == default_charset_info->number)
614 return default_charset_info;
615
616 my_thread_once(&charsets_initialized, init_available_charsets);
617
618 if (cs_number >= array_elements(all_charsets))
619 return NULL;
620
621 my_charset_loader_init_mysys(&loader);
622 cs= get_internal_charset(&loader, cs_number, flags);
623
624 if (!cs && (flags & MY_WME))
625 {
626 char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)], cs_string[23];
627 my_stpcpy(get_charsets_dir(index_file),MY_CHARSET_INDEX);
628 cs_string[0]='#';
629 int10_to_str(cs_number, cs_string+1, 10);
630 my_error(EE_UNKNOWN_CHARSET, MYF(0), cs_string, index_file);
631 }
632 return cs;
633 }
634
635
636 /**
637 Find collation by name: extended version of get_charset_by_name()
638 to return error messages to the caller.
639 @param loader Character set loader
640 @param name Collation name
641 @param flags Flags
642 @return NULL on error, pointer to collation on success
643 */
644
645 CHARSET_INFO *
my_collation_get_by_name(MY_CHARSET_LOADER * loader,const char * name,myf flags)646 my_collation_get_by_name(MY_CHARSET_LOADER *loader,
647 const char *name, myf flags)
648 {
649 uint cs_number;
650 CHARSET_INFO *cs;
651 my_thread_once(&charsets_initialized, init_available_charsets);
652
653 cs_number= get_collation_number(name);
654 my_charset_loader_init_mysys(loader);
655 cs= cs_number ? get_internal_charset(loader, cs_number, flags) : NULL;
656
657 if (!cs && (flags & MY_WME))
658 {
659 char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
660 my_stpcpy(get_charsets_dir(index_file),MY_CHARSET_INDEX);
661 my_error(EE_UNKNOWN_COLLATION, MYF(0), name, index_file);
662 }
663 return cs;
664 }
665
666
get_charset_by_name(const char * cs_name,myf flags)667 CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags)
668 {
669 MY_CHARSET_LOADER loader;
670 my_charset_loader_init_mysys(&loader);
671 return my_collation_get_by_name(&loader, cs_name, flags);
672 }
673
674
675 /**
676 Find character set by name: extended version of get_charset_by_csname()
677 to return error messages to the caller.
678 @param loader Character set loader
679 @param name Collation name
680 @param cs_flags Character set flags (e.g. default or binary collation)
681 @param flags Flags
682 @return NULL on error, pointer to collation on success
683 */
684 CHARSET_INFO *
my_charset_get_by_name(MY_CHARSET_LOADER * loader,const char * cs_name,uint cs_flags,myf flags)685 my_charset_get_by_name(MY_CHARSET_LOADER *loader,
686 const char *cs_name, uint cs_flags, myf flags)
687 {
688 uint cs_number;
689 CHARSET_INFO *cs;
690 DBUG_ENTER("get_charset_by_csname");
691 DBUG_PRINT("enter",("name: '%s'", cs_name));
692
693 my_thread_once(&charsets_initialized, init_available_charsets);
694
695 cs_number= get_charset_number(cs_name, cs_flags);
696 cs= cs_number ? get_internal_charset(loader, cs_number, flags) : NULL;
697
698 if (!cs && (flags & MY_WME))
699 {
700 char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
701 my_stpcpy(get_charsets_dir(index_file),MY_CHARSET_INDEX);
702 my_error(EE_UNKNOWN_CHARSET, MYF(0), cs_name, index_file);
703 }
704
705 DBUG_RETURN(cs);
706 }
707
708
709 CHARSET_INFO *
get_charset_by_csname(const char * cs_name,uint cs_flags,myf flags)710 get_charset_by_csname(const char *cs_name, uint cs_flags, myf flags)
711 {
712 MY_CHARSET_LOADER loader;
713 my_charset_loader_init_mysys(&loader);
714 return my_charset_get_by_name(&loader, cs_name, cs_flags, flags);
715 }
716
717
718 /**
719 Resolve character set by the character set name (utf8, latin1, ...).
720
721 The function tries to resolve character set by the specified name. If
722 there is character set with the given name, it is assigned to the "cs"
723 parameter and FALSE is returned. If there is no such character set,
724 "default_cs" is assigned to the "cs" and TRUE is returned.
725
726 @param[in] cs_name Character set name.
727 @param[in] default_cs Default character set.
728 @param[out] cs Variable to store character set.
729
730 @return FALSE if character set was resolved successfully; TRUE if there
731 is no character set with given name.
732 */
733
resolve_charset(const char * cs_name,const CHARSET_INFO * default_cs,const CHARSET_INFO ** cs)734 my_bool resolve_charset(const char *cs_name,
735 const CHARSET_INFO *default_cs,
736 const CHARSET_INFO **cs)
737 {
738 *cs= get_charset_by_csname(cs_name, MY_CS_PRIMARY, MYF(0));
739
740 if (*cs == NULL)
741 {
742 *cs= default_cs;
743 return TRUE;
744 }
745
746 return FALSE;
747 }
748
749
750 /**
751 Resolve collation by the collation name (utf8_general_ci, ...).
752
753 The function tries to resolve collation by the specified name. If there
754 is collation with the given name, it is assigned to the "cl" parameter
755 and FALSE is returned. If there is no such collation, "default_cl" is
756 assigned to the "cl" and TRUE is returned.
757
758 @param[out] cl Variable to store collation.
759 @param[in] cl_name Collation name.
760 @param[in] default_cl Default collation.
761
762 @return FALSE if collation was resolved successfully; TRUE if there is no
763 collation with given name.
764 */
765
resolve_collation(const char * cl_name,const CHARSET_INFO * default_cl,const CHARSET_INFO ** cl)766 my_bool resolve_collation(const char *cl_name,
767 const CHARSET_INFO *default_cl,
768 const CHARSET_INFO **cl)
769 {
770 *cl= get_charset_by_name(cl_name, MYF(0));
771
772 if (*cl == NULL)
773 {
774 *cl= default_cl;
775 return TRUE;
776 }
777
778 return FALSE;
779 }
780
781
782 /*
783 Escape string with backslashes (\)
784
785 SYNOPSIS
786 escape_string_for_mysql()
787 charset_info Charset of the strings
788 to Buffer for escaped string
789 to_length Length of destination buffer, or 0
790 from The string to escape
791 length The length of the string to escape
792
793 DESCRIPTION
794 This escapes the contents of a string by adding backslashes before special
795 characters, and turning others into specific escape sequences, such as
796 turning newlines into \n and null bytes into \0.
797
798 NOTE
799 To maintain compatibility with the old C API, to_length may be 0 to mean
800 "big enough"
801
802 RETURN VALUES
803 (size_t) -1 The escaped string did not fit in the to buffer
804 # The length of the escaped string
805 */
806
escape_string_for_mysql(const CHARSET_INFO * charset_info,char * to,size_t to_length,const char * from,size_t length)807 size_t escape_string_for_mysql(const CHARSET_INFO *charset_info,
808 char *to, size_t to_length,
809 const char *from, size_t length)
810 {
811 const char *to_start= to;
812 const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
813 my_bool overflow= FALSE;
814 my_bool use_mb_flag= use_mb(charset_info);
815 for (end= from + length; from < end; from++)
816 {
817 char escape= 0;
818 int tmp_length;
819 if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
820 {
821 if (to + tmp_length > to_end)
822 {
823 overflow= TRUE;
824 break;
825 }
826 while (tmp_length--)
827 *to++= *from++;
828 from--;
829 continue;
830 }
831 /*
832 If the next character appears to begin a multi-byte character, we
833 escape that first byte of that apparent multi-byte character. (The
834 character just looks like a multi-byte character -- if it were actually
835 a multi-byte character, it would have been passed through in the test
836 above.)
837
838 Without this check, we can create a problem by converting an invalid
839 multi-byte character into a valid one. For example, 0xbf27 is not
840 a valid GBK character, but 0xbf5c is. (0x27 = ', 0x5c = \)
841 */
842 tmp_length= use_mb_flag ? my_mbcharlen_ptr(charset_info, from, end) : 0;
843 if (tmp_length > 1)
844 escape= *from;
845 else
846 switch (*from) {
847 case 0: /* Must be escaped for 'mysql' */
848 escape= '0';
849 break;
850 case '\n': /* Must be escaped for logs */
851 escape= 'n';
852 break;
853 case '\r':
854 escape= 'r';
855 break;
856 case '\\':
857 escape= '\\';
858 break;
859 case '\'':
860 escape= '\'';
861 break;
862 case '"': /* Better safe than sorry */
863 escape= '"';
864 break;
865 case '\032': /* This gives problems on Win32 */
866 escape= 'Z';
867 break;
868 }
869 if (escape)
870 {
871 if (to + 2 > to_end)
872 {
873 overflow= TRUE;
874 break;
875 }
876 *to++= '\\';
877 *to++= escape;
878 }
879 else
880 {
881 if (to + 1 > to_end)
882 {
883 overflow= TRUE;
884 break;
885 }
886 *to++= *from;
887 }
888 }
889 *to= 0;
890 return overflow ? (size_t) -1 : (size_t) (to - to_start);
891 }
892
893
894 #ifdef _WIN32
895 static CHARSET_INFO *fs_cset_cache= NULL;
896
fs_character_set()897 CHARSET_INFO *fs_character_set()
898 {
899 if (!fs_cset_cache)
900 {
901 char buf[10]= "cp";
902 GetLocaleInfo(LOCALE_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE,
903 buf+2, sizeof(buf)-3);
904 /*
905 We cannot call get_charset_by_name here
906 because fs_character_set() is executed before
907 LOCK_THD_charset mutex initialization, which
908 is used inside get_charset_by_name.
909 As we're now interested in cp932 only,
910 let's just detect it using strcmp().
911 */
912 fs_cset_cache=
913 #ifdef HAVE_CHARSET_cp932
914 !strcmp(buf, "cp932") ? &my_charset_cp932_japanese_ci :
915 #endif
916 &my_charset_bin;
917 }
918 return fs_cset_cache;
919 }
920 #endif
921
922 /*
923 Escape apostrophes by doubling them up
924
925 SYNOPSIS
926 escape_quotes_for_mysql()
927 charset_info Charset of the strings
928 to Buffer for escaped string
929 to_length Length of destination buffer, or 0
930 from The string to escape
931 length The length of the string to escape
932 quote The quote the buffer will be escaped against
933
934 DESCRIPTION
935 This escapes the contents of a string by doubling up any character
936 specified by the quote parameter. This is used when the
937 NO_BACKSLASH_ESCAPES SQL_MODE is in effect on the server.
938
939 NOTE
940 To be consistent with escape_string_for_mysql(), to_length may be 0 to
941 mean "big enough"
942
943 RETURN VALUES
944 ~0 The escaped string did not fit in the to buffer
945 >=0 The length of the escaped string
946 */
947
escape_quotes_for_mysql(CHARSET_INFO * charset_info,char * to,size_t to_length,const char * from,size_t length,char quote)948 size_t escape_quotes_for_mysql(CHARSET_INFO *charset_info,
949 char *to, size_t to_length,
950 const char *from, size_t length, char quote)
951 {
952 const char *to_start= to;
953 const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
954 my_bool overflow= FALSE;
955 my_bool use_mb_flag= use_mb(charset_info);
956 for (end= from + length; from < end; from++)
957 {
958 int tmp_length;
959 if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
960 {
961 if (to + tmp_length > to_end)
962 {
963 overflow= TRUE;
964 break;
965 }
966 while (tmp_length--)
967 *to++= *from++;
968 from--;
969 continue;
970 }
971 /*
972 We don't have the same issue here with a non-multi-byte character being
973 turned into a multi-byte character by the addition of an escaping
974 character, because we are only escaping the ' character with itself.
975 */
976 if (*from == quote)
977 {
978 if (to + 2 > to_end)
979 {
980 overflow= TRUE;
981 break;
982 }
983 *to++= quote;
984 *to++= quote;
985 }
986 else
987 {
988 if (to + 1 > to_end)
989 {
990 overflow= TRUE;
991 break;
992 }
993 *to++= *from;
994 }
995 }
996 *to= 0;
997 return overflow ? (ulong)~0 : (ulong) (to - to_start);
998 }
999