1 /* Copyright (c) 2000, 2021, Oracle and/or its affiliates.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    Without limiting anything contained in the foregoing, this file,
15    which is part of C Driver for MySQL (Connector/C), is also subject to the
16    Universal FOSS Exception, version 1.0, a copy of which can be found at
17    http://oss.oracle.com/licenses/universal-foss-exception.
18 
19    This program is distributed in the hope that it will be useful,
20    but WITHOUT ANY WARRANTY; without even the implied warranty of
21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22    GNU General Public License, version 2.0, for more details.
23 
24    You should have received a copy of the GNU General Public License
25    along with this program; if not, write to the Free Software
26    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
27 
28 #include "mysys_priv.h"
29 #include "my_sys.h"
30 #include "mysys_err.h"
31 #include <m_ctype.h>
32 #include <m_string.h>
33 #include <my_dir.h>
34 #include <my_xml.h>
35 #include "mysql/psi/mysql_file.h"
36 #include "sql_chars.h"
37 
38 /*
39   The code below implements this functionality:
40 
41     - Initializing charset related structures
42     - Loading dynamic charsets
43     - Searching for a proper CHARSET_INFO
44       using charset name, collation name or collation ID
45     - Setting server default character set
46 */
47 
my_charset_same(const CHARSET_INFO * cs1,const CHARSET_INFO * cs2)48 my_bool my_charset_same(const CHARSET_INFO *cs1, const CHARSET_INFO *cs2)
49 {
50   return ((cs1 == cs2) || !strcmp(cs1->csname,cs2->csname));
51 }
52 
53 
54 static uint
get_collation_number_internal(const char * name)55 get_collation_number_internal(const char *name)
56 {
57   CHARSET_INFO **cs;
58   for (cs= all_charsets;
59        cs < all_charsets + array_elements(all_charsets);
60        cs++)
61   {
62     if ( cs[0] && cs[0]->name &&
63          !my_strcasecmp(&my_charset_latin1, cs[0]->name, name))
64       return cs[0]->number;
65   }
66   return 0;
67 }
68 
69 
simple_cs_init_functions(CHARSET_INFO * cs)70 static void simple_cs_init_functions(CHARSET_INFO *cs)
71 {
72   if (cs->state & MY_CS_BINSORT)
73     cs->coll= &my_collation_8bit_bin_handler;
74   else
75     cs->coll= &my_collation_8bit_simple_ci_handler;
76 
77   cs->cset= &my_charset_8bit_handler;
78 }
79 
80 
81 
cs_copy_data(CHARSET_INFO * to,CHARSET_INFO * from)82 static int cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
83 {
84   to->number= from->number ? from->number : to->number;
85 
86   if (from->csname)
87     if (!(to->csname= my_once_strdup(from->csname,MYF(MY_WME))))
88       goto err;
89 
90   if (from->name)
91     if (!(to->name= my_once_strdup(from->name,MYF(MY_WME))))
92       goto err;
93 
94   if (from->comment)
95     if (!(to->comment= my_once_strdup(from->comment,MYF(MY_WME))))
96       goto err;
97 
98   if (from->ctype)
99   {
100     if (!(to->ctype= (uchar*) my_once_memdup((char*) from->ctype,
101 					     MY_CS_CTYPE_TABLE_SIZE,
102 					     MYF(MY_WME))))
103       goto err;
104     if (init_state_maps(to))
105       goto err;
106   }
107   if (from->to_lower)
108     if (!(to->to_lower= (uchar*) my_once_memdup((char*) from->to_lower,
109 						MY_CS_TO_LOWER_TABLE_SIZE,
110 						MYF(MY_WME))))
111       goto err;
112 
113   if (from->to_upper)
114     if (!(to->to_upper= (uchar*) my_once_memdup((char*) from->to_upper,
115 						MY_CS_TO_UPPER_TABLE_SIZE,
116 						MYF(MY_WME))))
117       goto err;
118   if (from->sort_order)
119   {
120     if (!(to->sort_order= (uchar*) my_once_memdup((char*) from->sort_order,
121 						  MY_CS_SORT_ORDER_TABLE_SIZE,
122 						  MYF(MY_WME))))
123       goto err;
124 
125   }
126   if (from->tab_to_uni)
127   {
128     uint sz= MY_CS_TO_UNI_TABLE_SIZE*sizeof(uint16);
129     if (!(to->tab_to_uni= (uint16*)  my_once_memdup((char*)from->tab_to_uni,
130 						    sz, MYF(MY_WME))))
131       goto err;
132   }
133   if (from->tailoring)
134     if (!(to->tailoring= my_once_strdup(from->tailoring,MYF(MY_WME))))
135       goto err;
136 
137   return 0;
138 
139 err:
140   return 1;
141 }
142 
143 
144 
simple_cs_is_full(CHARSET_INFO * cs)145 static my_bool simple_cs_is_full(CHARSET_INFO *cs)
146 {
147   return ((cs->csname && cs->tab_to_uni && cs->ctype && cs->to_upper &&
148 	   cs->to_lower) &&
149 	  (cs->number && cs->name &&
150 	  (cs->sort_order || (cs->state & MY_CS_BINSORT) )));
151 }
152 
153 
154 static void
copy_uca_collation(CHARSET_INFO * to,CHARSET_INFO * from)155 copy_uca_collation(CHARSET_INFO *to, CHARSET_INFO *from)
156 {
157   to->cset= from->cset;
158   to->coll= from->coll;
159   to->strxfrm_multiply= from->strxfrm_multiply;
160   to->min_sort_char= from->min_sort_char;
161   to->max_sort_char= from->max_sort_char;
162   to->mbminlen= from->mbminlen;
163   to->mbmaxlen= from->mbmaxlen;
164   to->caseup_multiply= from->caseup_multiply;
165   to->casedn_multiply= from->casedn_multiply;
166   to->state|= MY_CS_AVAILABLE | MY_CS_LOADED |
167               MY_CS_STRNXFRM  | MY_CS_UNICODE;
168 }
169 
170 
add_collation(CHARSET_INFO * cs)171 static int add_collation(CHARSET_INFO *cs)
172 {
173   if (cs->name && (cs->number ||
174                    (cs->number=get_collation_number_internal(cs->name))) &&
175       cs->number < array_elements(all_charsets))
176   {
177     if (!all_charsets[cs->number])
178     {
179       if (!(all_charsets[cs->number]=
180          (CHARSET_INFO*) my_once_alloc(sizeof(CHARSET_INFO),MYF(0))))
181         return MY_XML_ERROR;
182       memset(all_charsets[cs->number], 0, sizeof(CHARSET_INFO));
183     }
184 
185     if (cs->primary_number == cs->number)
186       cs->state |= MY_CS_PRIMARY;
187 
188     if (cs->binary_number == cs->number)
189       cs->state |= MY_CS_BINSORT;
190 
191     all_charsets[cs->number]->state|= cs->state;
192 
193     if (!(all_charsets[cs->number]->state & MY_CS_COMPILED))
194     {
195       CHARSET_INFO *newcs= all_charsets[cs->number];
196       if (cs_copy_data(all_charsets[cs->number],cs))
197         return MY_XML_ERROR;
198 
199       newcs->caseup_multiply= newcs->casedn_multiply= 1;
200       newcs->levels_for_compare= 1;
201       newcs->levels_for_order= 1;
202 
203       if (!strcmp(cs->csname,"ucs2") )
204       {
205 #if defined(HAVE_CHARSET_ucs2) && defined(HAVE_UCA_COLLATIONS)
206         copy_uca_collation(newcs, &my_charset_ucs2_unicode_ci);
207         newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
208 #endif
209       }
210       else if (!strcmp(cs->csname, "utf8") || !strcmp(cs->csname, "utf8mb3"))
211       {
212 #if defined (HAVE_CHARSET_utf8) && defined(HAVE_UCA_COLLATIONS)
213         copy_uca_collation(newcs, &my_charset_utf8_unicode_ci);
214         newcs->ctype= my_charset_utf8_unicode_ci.ctype;
215         if (init_state_maps(newcs))
216           return MY_XML_ERROR;
217 #endif
218       }
219       else if (!strcmp(cs->csname, "utf8mb4"))
220       {
221 #if defined (HAVE_CHARSET_utf8mb4) && defined(HAVE_UCA_COLLATIONS)
222         copy_uca_collation(newcs, &my_charset_utf8mb4_unicode_ci);
223         newcs->ctype= my_charset_utf8mb4_unicode_ci.ctype;
224         newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED;
225 #endif
226       }
227       else if (!strcmp(cs->csname, "utf16"))
228       {
229 #if defined (HAVE_CHARSET_utf16) && defined(HAVE_UCA_COLLATIONS)
230         copy_uca_collation(newcs, &my_charset_utf16_unicode_ci);
231         newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
232 #endif
233       }
234       else if (!strcmp(cs->csname, "utf32"))
235       {
236 #if defined (HAVE_CHARSET_utf32) && defined(HAVE_UCA_COLLATIONS)
237         copy_uca_collation(newcs, &my_charset_utf32_unicode_ci);
238         newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
239 #endif
240       }
241       else
242       {
243         const uchar *sort_order= all_charsets[cs->number]->sort_order;
244         simple_cs_init_functions(all_charsets[cs->number]);
245         newcs->mbminlen= 1;
246         newcs->mbmaxlen= 1;
247         if (simple_cs_is_full(all_charsets[cs->number]))
248         {
249           all_charsets[cs->number]->state |= MY_CS_LOADED;
250         }
251         all_charsets[cs->number]->state|= MY_CS_AVAILABLE;
252 
253         /*
254           Check if case sensitive sort order: A < a < B.
255           We need MY_CS_FLAG for regex library, and for
256           case sensitivity flag for 5.0 client protocol,
257           to support isCaseSensitive() method in JDBC driver
258         */
259         if (sort_order && sort_order['A'] < sort_order['a'] &&
260                           sort_order['a'] < sort_order['B'])
261           all_charsets[cs->number]->state|= MY_CS_CSSORT;
262 
263         if (my_charset_is_8bit_pure_ascii(all_charsets[cs->number]))
264           all_charsets[cs->number]->state|= MY_CS_PUREASCII;
265         if (!my_charset_is_ascii_compatible(cs))
266           all_charsets[cs->number]->state|= MY_CS_NONASCII;
267       }
268     }
269     else
270     {
271       /*
272         We need the below to make get_charset_name()
273         and get_charset_number() working even if a
274         character set has not been really incompiled.
275         The above functions are used for example
276         in error message compiler extra/comp_err.c.
277         If a character set was compiled, this information
278         will get lost and overwritten in add_compiled_collation().
279       */
280       CHARSET_INFO *dst= all_charsets[cs->number];
281       dst->number= cs->number;
282       if (cs->comment)
283 	if (!(dst->comment= my_once_strdup(cs->comment,MYF(MY_WME))))
284 	  return MY_XML_ERROR;
285       if (cs->csname)
286         if (!(dst->csname= my_once_strdup(cs->csname,MYF(MY_WME))))
287 	  return MY_XML_ERROR;
288       if (cs->name)
289 	if (!(dst->name= my_once_strdup(cs->name,MYF(MY_WME))))
290 	  return MY_XML_ERROR;
291     }
292     cs->number= 0;
293     cs->primary_number= 0;
294     cs->binary_number= 0;
295     cs->name= NULL;
296     cs->state= 0;
297     cs->sort_order= NULL;
298     cs->state= 0;
299   }
300   return MY_XML_OK;
301 }
302 
303 
304 /**
305   Report character set initialization errors and warnings.
306   Be silent by default: no warnings on the client side.
307 */
308 static void
default_reporter(enum loglevel level MY_ATTRIBUTE ((unused)),const char * format MY_ATTRIBUTE ((unused)),...)309 default_reporter(enum loglevel level  MY_ATTRIBUTE ((unused)),
310                  const char *format  MY_ATTRIBUTE ((unused)),
311                  ...)
312 {
313 }
314 my_error_reporter my_charset_error_reporter= default_reporter;
315 
316 
317 /**
318   Wrappers for memory functions my_malloc (and friends)
319   with C-compatbile API without extra "myf" argument.
320 */
321 static void *
my_once_alloc_c(size_t size)322 my_once_alloc_c(size_t size)
323 { return my_once_alloc(size, MYF(MY_WME)); }
324 
325 
326 static void *
my_malloc_c(size_t size)327 my_malloc_c(size_t size)
328 { return my_malloc(key_memory_charset_loader, size, MYF(MY_WME)); }
329 
330 
331 static void *
my_realloc_c(void * old,size_t size)332 my_realloc_c(void *old, size_t size)
333 { return my_realloc(key_memory_charset_loader,
334                     old, size, MYF(MY_WME)); }
335 
336 static void
my_free_c(void * ptr)337 my_free_c(void *ptr)
338 {
339   my_free(ptr);
340 }
341 
342 /**
343   Initialize character set loader to use mysys memory management functions.
344   @param loader  Loader to initialize
345 */
346 void
my_charset_loader_init_mysys(MY_CHARSET_LOADER * loader)347 my_charset_loader_init_mysys(MY_CHARSET_LOADER *loader)
348 {
349   loader->error[0]= '\0';
350   loader->once_alloc= my_once_alloc_c;
351   loader->mem_malloc= my_malloc_c;
352   loader->mem_realloc= my_realloc_c;
353   loader->mem_free= my_free_c;
354   loader->reporter= my_charset_error_reporter;
355   loader->add_collation= add_collation;
356 }
357 
358 
359 #define MY_MAX_ALLOWED_BUF 1024*1024
360 #define MY_CHARSET_INDEX "Index.xml"
361 
362 const char *charsets_dir= NULL;
363 
364 
365 static my_bool
my_read_charset_file(MY_CHARSET_LOADER * loader,const char * filename,myf myflags)366 my_read_charset_file(MY_CHARSET_LOADER *loader,
367                      const char *filename,
368                      myf myflags)
369 {
370   uchar *buf;
371   int  fd;
372   size_t len, tmp_len;
373   MY_STAT stat_info;
374 
375   if (!my_stat(filename, &stat_info, MYF(myflags)))
376     return TRUE;
377 
378   len= stat_info.st_size;
379   if ((len > MY_MAX_ALLOWED_BUF) && (myflags & MY_WME))
380   {
381     my_printf_error(EE_UNKNOWN_CHARSET,
382                     "Error while reading '%s': its length %llu is larger than "
383                     "maximum allowed length %llu\n", MYF(0), filename,
384                     (unsigned long long)len,
385                     (unsigned long long)MY_MAX_ALLOWED_BUF);
386     return TRUE;
387   }
388 
389   buf= my_malloc(key_memory_charset_file, len, myflags);
390   if (!buf)
391     return TRUE;
392 
393   if ((fd= mysql_file_open(key_file_charset, filename, O_RDONLY, myflags)) < 0)
394     goto error;
395   tmp_len= mysql_file_read(fd, buf, len, myflags);
396   mysql_file_close(fd, myflags);
397   if (tmp_len != len)
398     goto error;
399 
400   if (my_parse_charset_xml(loader, (char *) buf, len))
401   {
402     my_printf_error(EE_UNKNOWN_CHARSET, "Error while parsing '%s': %s\n",
403                     MYF(0), filename, loader->error);
404     goto error;
405   }
406 
407   my_free(buf);
408   return FALSE;
409 
410 error:
411   my_free(buf);
412   return TRUE;
413 }
414 
415 
get_charsets_dir(char * buf)416 char *get_charsets_dir(char *buf)
417 {
418   const char *sharedir= SHAREDIR;
419   char *res;
420   DBUG_ENTER("get_charsets_dir");
421 
422   if (charsets_dir != NULL)
423     strmake(buf, charsets_dir, FN_REFLEN-1);
424   else
425   {
426     if (test_if_hard_path(sharedir) ||
427 	is_prefix(sharedir, DEFAULT_CHARSET_HOME))
428       strxmov(buf, sharedir, "/", CHARSET_DIR, NullS);
429     else
430       strxmov(buf, DEFAULT_CHARSET_HOME, "/", sharedir, "/", CHARSET_DIR,
431 	      NullS);
432   }
433   res= convert_dirname(buf,buf,NullS);
434   DBUG_PRINT("info",("charsets dir: '%s'", buf));
435   DBUG_RETURN(res);
436 }
437 
438 CHARSET_INFO *all_charsets[MY_ALL_CHARSETS_SIZE]={NULL};
439 CHARSET_INFO *default_charset_info = &my_charset_latin1;
440 
add_compiled_collation(CHARSET_INFO * cs)441 void add_compiled_collation(CHARSET_INFO *cs)
442 {
443   assert(cs->number < array_elements(all_charsets));
444   all_charsets[cs->number]= cs;
445   cs->state|= MY_CS_AVAILABLE;
446 }
447 
448 
449 static my_thread_once_t charsets_initialized= MY_THREAD_ONCE_INIT;
450 static my_thread_once_t charsets_template= MY_THREAD_ONCE_INIT;
451 
init_available_charsets(void)452 static void init_available_charsets(void)
453 {
454   char fname[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
455   MY_CHARSET_LOADER loader;
456 
457   memset(&all_charsets, 0, sizeof(all_charsets));
458   init_compiled_charsets(MYF(0));
459 
460   /* Copy compiled charsets */
461 
462   my_charset_loader_init_mysys(&loader);
463   my_stpcpy(get_charsets_dir(fname), MY_CHARSET_INDEX);
464   my_read_charset_file(&loader, fname,
465 #ifdef MYSQL_SERVER
466                        MYF(MY_WME)
467 #else
468                        MYF(0)
469 #endif
470                        );
471 }
472 
473 
free_charsets(void)474 void free_charsets(void)
475 {
476   charsets_initialized= charsets_template;
477 }
478 
479 
480 static const char*
get_collation_name_alias(const char * name,char * buf,size_t bufsize)481 get_collation_name_alias(const char *name, char *buf, size_t bufsize)
482 {
483   if (!native_strncasecmp(name, "utf8mb3_", 8))
484   {
485     my_snprintf(buf, bufsize, "utf8_%s", name + 8);
486     return buf;
487   }
488   return NULL;
489 }
490 
491 
get_collation_number(const char * name)492 uint get_collation_number(const char *name)
493 {
494   uint id;
495   char alias[64];
496   my_thread_once(&charsets_initialized, init_available_charsets);
497   if ((id= get_collation_number_internal(name)))
498     return id;
499   if ((name= get_collation_name_alias(name, alias, sizeof(alias))))
500     return get_collation_number_internal(name);
501   return 0;
502 }
503 
504 
505 static uint
get_charset_number_internal(const char * charset_name,uint cs_flags)506 get_charset_number_internal(const char *charset_name, uint cs_flags)
507 {
508   CHARSET_INFO **cs;
509 
510   for (cs= all_charsets;
511        cs < all_charsets + array_elements(all_charsets);
512        cs++)
513   {
514     if ( cs[0] && cs[0]->csname && (cs[0]->state & cs_flags) &&
515          !my_strcasecmp(&my_charset_latin1, cs[0]->csname, charset_name))
516       return cs[0]->number;
517   }
518   return 0;
519 }
520 
521 
522 static const char*
get_charset_name_alias(const char * name)523 get_charset_name_alias(const char *name)
524 {
525   if (!my_strcasecmp(&my_charset_latin1, name, "utf8mb3"))
526     return "utf8";
527   return NULL;
528 }
529 
530 
get_charset_number(const char * charset_name,uint cs_flags)531 uint get_charset_number(const char *charset_name, uint cs_flags)
532 {
533   uint id;
534   my_thread_once(&charsets_initialized, init_available_charsets);
535   if ((id= get_charset_number_internal(charset_name, cs_flags)))
536     return id;
537   if ((charset_name= get_charset_name_alias(charset_name)))
538     return get_charset_number_internal(charset_name, cs_flags);
539   return 0;
540 }
541 
542 
get_charset_name(uint charset_number)543 const char *get_charset_name(uint charset_number)
544 {
545   my_thread_once(&charsets_initialized, init_available_charsets);
546 
547   if (charset_number < array_elements(all_charsets))
548   {
549     CHARSET_INFO *cs= all_charsets[charset_number];
550 
551     if (cs && (cs->number == charset_number) && cs->name)
552       return (char*) cs->name;
553   }
554 
555   return "?";   /* this mimics find_type() */
556 }
557 
558 
559 static CHARSET_INFO *
get_internal_charset(MY_CHARSET_LOADER * loader,uint cs_number,myf flags)560 get_internal_charset(MY_CHARSET_LOADER *loader, uint cs_number, myf flags)
561 {
562   char  buf[FN_REFLEN];
563   CHARSET_INFO *cs;
564 
565   assert(cs_number < array_elements(all_charsets));
566 
567   if ((cs= all_charsets[cs_number]))
568   {
569     if (cs->state & MY_CS_READY)  /* if CS is already initialized */
570         return cs;
571 
572     /*
573       To make things thread safe we are not allowing other threads to interfere
574       while we may changing the cs_info_table
575     */
576     mysql_mutex_lock(&THR_LOCK_charset);
577 
578     if (!(cs->state & (MY_CS_COMPILED|MY_CS_LOADED))) /* if CS is not in memory */
579     {
580       MY_CHARSET_LOADER loader;
581       strxmov(get_charsets_dir(buf), cs->csname, ".xml", NullS);
582       my_charset_loader_init_mysys(&loader);
583       my_read_charset_file(&loader, buf, flags);
584     }
585 
586     if (cs->state & MY_CS_AVAILABLE)
587     {
588       if (!(cs->state & MY_CS_READY))
589       {
590         if ((cs->cset->init && cs->cset->init(cs, loader)) ||
591             (cs->coll->init && cs->coll->init(cs, loader)))
592         {
593           cs= NULL;
594         }
595         else
596           cs->state|= MY_CS_READY;
597       }
598     }
599     else
600       cs= NULL;
601 
602     mysql_mutex_unlock(&THR_LOCK_charset);
603   }
604   return cs;
605 }
606 
607 
get_charset(uint cs_number,myf flags)608 CHARSET_INFO *get_charset(uint cs_number, myf flags)
609 {
610   CHARSET_INFO *cs;
611   MY_CHARSET_LOADER loader;
612 
613   if (cs_number == default_charset_info->number)
614     return default_charset_info;
615 
616   my_thread_once(&charsets_initialized, init_available_charsets);
617 
618   if (cs_number >= array_elements(all_charsets))
619     return NULL;
620 
621   my_charset_loader_init_mysys(&loader);
622   cs= get_internal_charset(&loader, cs_number, flags);
623 
624   if (!cs && (flags & MY_WME))
625   {
626     char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)], cs_string[23];
627     my_stpcpy(get_charsets_dir(index_file),MY_CHARSET_INDEX);
628     cs_string[0]='#';
629     int10_to_str(cs_number, cs_string+1, 10);
630     my_error(EE_UNKNOWN_CHARSET, MYF(0), cs_string, index_file);
631   }
632   return cs;
633 }
634 
635 
636 /**
637   Find collation by name: extended version of get_charset_by_name()
638   to return error messages to the caller.
639   @param   loader  Character set loader
640   @param   name    Collation name
641   @param   flags   Flags
642   @return          NULL on error, pointer to collation on success
643 */
644 
645 CHARSET_INFO *
my_collation_get_by_name(MY_CHARSET_LOADER * loader,const char * name,myf flags)646 my_collation_get_by_name(MY_CHARSET_LOADER *loader,
647                          const char *name, myf flags)
648 {
649   uint cs_number;
650   CHARSET_INFO *cs;
651   my_thread_once(&charsets_initialized, init_available_charsets);
652 
653   cs_number= get_collation_number(name);
654   my_charset_loader_init_mysys(loader);
655   cs= cs_number ? get_internal_charset(loader, cs_number, flags) : NULL;
656 
657   if (!cs && (flags & MY_WME))
658   {
659     char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
660     my_stpcpy(get_charsets_dir(index_file),MY_CHARSET_INDEX);
661     my_error(EE_UNKNOWN_COLLATION, MYF(0), name, index_file);
662   }
663   return cs;
664 }
665 
666 
get_charset_by_name(const char * cs_name,myf flags)667 CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags)
668 {
669   MY_CHARSET_LOADER loader;
670   my_charset_loader_init_mysys(&loader);
671   return my_collation_get_by_name(&loader, cs_name, flags);
672 }
673 
674 
675 /**
676   Find character set by name: extended version of get_charset_by_csname()
677   to return error messages to the caller.
678   @param   loader   Character set loader
679   @param   name     Collation name
680   @param   cs_flags Character set flags (e.g. default or binary collation)
681   @param   flags    Flags
682   @return           NULL on error, pointer to collation on success
683 */
684 CHARSET_INFO *
my_charset_get_by_name(MY_CHARSET_LOADER * loader,const char * cs_name,uint cs_flags,myf flags)685 my_charset_get_by_name(MY_CHARSET_LOADER *loader,
686                        const char *cs_name, uint cs_flags, myf flags)
687 {
688   uint cs_number;
689   CHARSET_INFO *cs;
690   DBUG_ENTER("get_charset_by_csname");
691   DBUG_PRINT("enter",("name: '%s'", cs_name));
692 
693   my_thread_once(&charsets_initialized, init_available_charsets);
694 
695   cs_number= get_charset_number(cs_name, cs_flags);
696   cs= cs_number ? get_internal_charset(loader, cs_number, flags) : NULL;
697 
698   if (!cs && (flags & MY_WME))
699   {
700     char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
701     my_stpcpy(get_charsets_dir(index_file),MY_CHARSET_INDEX);
702     my_error(EE_UNKNOWN_CHARSET, MYF(0), cs_name, index_file);
703   }
704 
705   DBUG_RETURN(cs);
706 }
707 
708 
709 CHARSET_INFO *
get_charset_by_csname(const char * cs_name,uint cs_flags,myf flags)710 get_charset_by_csname(const char *cs_name, uint cs_flags, myf flags)
711 {
712   MY_CHARSET_LOADER loader;
713   my_charset_loader_init_mysys(&loader);
714   return my_charset_get_by_name(&loader, cs_name, cs_flags, flags);
715 }
716 
717 
718 /**
719   Resolve character set by the character set name (utf8, latin1, ...).
720 
721   The function tries to resolve character set by the specified name. If
722   there is character set with the given name, it is assigned to the "cs"
723   parameter and FALSE is returned. If there is no such character set,
724   "default_cs" is assigned to the "cs" and TRUE is returned.
725 
726   @param[in] cs_name    Character set name.
727   @param[in] default_cs Default character set.
728   @param[out] cs        Variable to store character set.
729 
730   @return FALSE if character set was resolved successfully; TRUE if there
731   is no character set with given name.
732 */
733 
resolve_charset(const char * cs_name,const CHARSET_INFO * default_cs,const CHARSET_INFO ** cs)734 my_bool resolve_charset(const char *cs_name,
735                         const CHARSET_INFO *default_cs,
736                         const CHARSET_INFO **cs)
737 {
738   *cs= get_charset_by_csname(cs_name, MY_CS_PRIMARY, MYF(0));
739 
740   if (*cs == NULL)
741   {
742     *cs= default_cs;
743     return TRUE;
744   }
745 
746   return FALSE;
747 }
748 
749 
750 /**
751   Resolve collation by the collation name (utf8_general_ci, ...).
752 
753   The function tries to resolve collation by the specified name. If there
754   is collation with the given name, it is assigned to the "cl" parameter
755   and FALSE is returned. If there is no such collation, "default_cl" is
756   assigned to the "cl" and TRUE is returned.
757 
758   @param[out] cl        Variable to store collation.
759   @param[in] cl_name    Collation name.
760   @param[in] default_cl Default collation.
761 
762   @return FALSE if collation was resolved successfully; TRUE if there is no
763   collation with given name.
764 */
765 
resolve_collation(const char * cl_name,const CHARSET_INFO * default_cl,const CHARSET_INFO ** cl)766 my_bool resolve_collation(const char *cl_name,
767                           const CHARSET_INFO *default_cl,
768                           const CHARSET_INFO **cl)
769 {
770   *cl= get_charset_by_name(cl_name, MYF(0));
771 
772   if (*cl == NULL)
773   {
774     *cl= default_cl;
775     return TRUE;
776   }
777 
778   return FALSE;
779 }
780 
781 
782 /*
783   Escape string with backslashes (\)
784 
785   SYNOPSIS
786     escape_string_for_mysql()
787     charset_info        Charset of the strings
788     to                  Buffer for escaped string
789     to_length           Length of destination buffer, or 0
790     from                The string to escape
791     length              The length of the string to escape
792 
793   DESCRIPTION
794     This escapes the contents of a string by adding backslashes before special
795     characters, and turning others into specific escape sequences, such as
796     turning newlines into \n and null bytes into \0.
797 
798   NOTE
799     To maintain compatibility with the old C API, to_length may be 0 to mean
800     "big enough"
801 
802   RETURN VALUES
803     (size_t) -1 The escaped string did not fit in the to buffer
804     #           The length of the escaped string
805 */
806 
escape_string_for_mysql(const CHARSET_INFO * charset_info,char * to,size_t to_length,const char * from,size_t length)807 size_t escape_string_for_mysql(const CHARSET_INFO *charset_info,
808                                char *to, size_t to_length,
809                                const char *from, size_t length)
810 {
811   const char *to_start= to;
812   const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
813   my_bool overflow= FALSE;
814   my_bool use_mb_flag= use_mb(charset_info);
815   for (end= from + length; from < end; from++)
816   {
817     char escape= 0;
818     int tmp_length;
819     if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
820     {
821       if (to + tmp_length > to_end)
822       {
823         overflow= TRUE;
824         break;
825       }
826       while (tmp_length--)
827 	*to++= *from++;
828       from--;
829       continue;
830     }
831     /*
832      If the next character appears to begin a multi-byte character, we
833      escape that first byte of that apparent multi-byte character. (The
834      character just looks like a multi-byte character -- if it were actually
835      a multi-byte character, it would have been passed through in the test
836      above.)
837 
838      Without this check, we can create a problem by converting an invalid
839      multi-byte character into a valid one. For example, 0xbf27 is not
840      a valid GBK character, but 0xbf5c is. (0x27 = ', 0x5c = \)
841     */
842     tmp_length= use_mb_flag ? my_mbcharlen_ptr(charset_info, from, end) : 0;
843     if (tmp_length > 1)
844       escape= *from;
845     else
846     switch (*from) {
847     case 0:				/* Must be escaped for 'mysql' */
848       escape= '0';
849       break;
850     case '\n':				/* Must be escaped for logs */
851       escape= 'n';
852       break;
853     case '\r':
854       escape= 'r';
855       break;
856     case '\\':
857       escape= '\\';
858       break;
859     case '\'':
860       escape= '\'';
861       break;
862     case '"':				/* Better safe than sorry */
863       escape= '"';
864       break;
865     case '\032':			/* This gives problems on Win32 */
866       escape= 'Z';
867       break;
868     }
869     if (escape)
870     {
871       if (to + 2 > to_end)
872       {
873         overflow= TRUE;
874         break;
875       }
876       *to++= '\\';
877       *to++= escape;
878     }
879     else
880     {
881       if (to + 1 > to_end)
882       {
883         overflow= TRUE;
884         break;
885       }
886       *to++= *from;
887     }
888   }
889   *to= 0;
890   return overflow ? (size_t) -1 : (size_t) (to - to_start);
891 }
892 
893 
894 #ifdef _WIN32
895 static CHARSET_INFO *fs_cset_cache= NULL;
896 
fs_character_set()897 CHARSET_INFO *fs_character_set()
898 {
899   if (!fs_cset_cache)
900   {
901     char buf[10]= "cp";
902     GetLocaleInfo(LOCALE_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE,
903                   buf+2, sizeof(buf)-3);
904     /*
905       We cannot call get_charset_by_name here
906       because fs_character_set() is executed before
907       LOCK_THD_charset mutex initialization, which
908       is used inside get_charset_by_name.
909       As we're now interested in cp932 only,
910       let's just detect it using strcmp().
911     */
912     fs_cset_cache=
913                 #ifdef HAVE_CHARSET_cp932
914                         !strcmp(buf, "cp932") ? &my_charset_cp932_japanese_ci :
915                 #endif
916                         &my_charset_bin;
917   }
918   return fs_cset_cache;
919 }
920 #endif
921 
922 /*
923   Escape apostrophes by doubling them up
924 
925   SYNOPSIS
926     escape_quotes_for_mysql()
927     charset_info        Charset of the strings
928     to                  Buffer for escaped string
929     to_length           Length of destination buffer, or 0
930     from                The string to escape
931     length              The length of the string to escape
932     quote               The quote the buffer will be escaped against
933 
934   DESCRIPTION
935     This escapes the contents of a string by doubling up any character
936     specified by the quote parameter. This is used when the
937     NO_BACKSLASH_ESCAPES SQL_MODE is in effect on the server.
938 
939   NOTE
940     To be consistent with escape_string_for_mysql(), to_length may be 0 to
941     mean "big enough"
942 
943   RETURN VALUES
944     ~0          The escaped string did not fit in the to buffer
945     >=0         The length of the escaped string
946 */
947 
escape_quotes_for_mysql(CHARSET_INFO * charset_info,char * to,size_t to_length,const char * from,size_t length,char quote)948 size_t escape_quotes_for_mysql(CHARSET_INFO *charset_info,
949                                char *to, size_t to_length,
950                                const char *from, size_t length, char quote)
951 {
952   const char *to_start= to;
953   const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
954   my_bool overflow= FALSE;
955   my_bool use_mb_flag= use_mb(charset_info);
956   for (end= from + length; from < end; from++)
957   {
958     int tmp_length;
959     if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
960     {
961       if (to + tmp_length > to_end)
962       {
963         overflow= TRUE;
964         break;
965       }
966       while (tmp_length--)
967 	*to++= *from++;
968       from--;
969       continue;
970     }
971     /*
972       We don't have the same issue here with a non-multi-byte character being
973       turned into a multi-byte character by the addition of an escaping
974       character, because we are only escaping the ' character with itself.
975      */
976     if (*from == quote)
977     {
978       if (to + 2 > to_end)
979       {
980         overflow= TRUE;
981         break;
982       }
983       *to++= quote;
984       *to++= quote;
985     }
986     else
987     {
988       if (to + 1 > to_end)
989       {
990         overflow= TRUE;
991         break;
992       }
993       *to++= *from;
994     }
995   }
996   *to= 0;
997   return overflow ? (ulong)~0 : (ulong) (to - to_start);
998 }
999