1 /*
2  Copyright (c) 2010, 2021, Oracle and/or its affiliates.
3 
4  This program is free software; you can redistribute it and/or modify
5  it under the terms of the GNU General Public License, version 2.0,
6  as published by the Free Software Foundation.
7 
8  This program is also distributed with certain software (including
9  but not limited to OpenSSL) that is licensed under separate terms,
10  as designated in a particular file or component or in included license
11  documentation.  The authors of MySQL hereby grant you an additional
12  permission to link the program and your derivative works with the
13  separately licensed software that they have included with MySQL.
14 
15  This program is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  GNU General Public License, version 2.0, for more details.
19 
20  You should have received a copy of the GNU General Public License
21  along with this program; if not, write to the Free Software
22  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
23 */
24 
25 /*
26  *  CharsetMap.cpp
27  */
28 
29 #include "CharsetMap.hpp"
30 #include "CharsetMapImpl.h"
31 #include "my_global.h"
32 #include "mysql.h"
33 #include "my_sys.h"
34 
35 bool m_false_result = false;
36 bool m_true_result = true;
37 
38 /* _map is a static pointer visible only within the scope of this file.
39    A singleton CharsetMapImpl serves every instance of CharsetMap.
40 */
41 static CharsetMapImpl *_map = 0;
42 
43 
44 /* Initialization allocates the CharsetMapImpl and initializes its mutex.
45  But we defer building the map of charset names, so as not to create
46  any sort of ordering dependency that would require mysql_init() to
47  be run first.
48  */
49 
init()50 void CharsetMap::init()
51 {
52     if(_map == 0) _map = new CharsetMapImpl;
53 }
54 
55 
56 /* Free the CharsetMapImpl at shutdown time.
57 */
unload()58 void CharsetMap::unload()
59 {
60     delete _map;
61     _map = 0;
62 }
63 
64 
65 /* On the first invocation of the CharsetMap constructor, it completes the
66    initialization of the CharsetMapImpl by building the map of character set
67    names.
68 */
CharsetMap()69 CharsetMap::CharsetMap()
70 {
71     _map->lock();
72     if(_map->ready == 0)  _map->build_map();
73     _map->unlock();
74 }
75 
76 
getName(int csnum) const77 const char * CharsetMap::getName(int csnum) const
78 {
79     return _map->getName(csnum);
80 }
81 
82 
getMysqlName(int csnum) const83 const char * CharsetMap::getMysqlName(int csnum) const
84 {
85     CHARSET_INFO *cs = get_charset(csnum, MYF(0));
86     return cs ? cs->csname : 0;
87 }
88 
89 
getUTF8CharsetNumber() const90 int CharsetMap::getUTF8CharsetNumber() const
91 {
92     return _map->UTF8Charset;
93 }
94 
95 
getUTF16CharsetNumber() const96 int CharsetMap::getUTF16CharsetNumber() const
97 {
98     return _map->UTF16Charset;
99 }
100 
101 
getCharsetNumber(const char * name) const102 int CharsetMap::getCharsetNumber(const char *name) const
103 {
104     return get_charset_number(name, MY_CS_AVAILABLE);
105 }
106 
isMultibyte(int cs_number) const107 const bool * CharsetMap::isMultibyte(int cs_number) const
108 {
109   CHARSET_INFO * cset = get_charset(cs_number, MYF(0));
110   if(cset == 0) return 0;
111   return use_mb(cset) ? & m_true_result : & m_false_result;
112 }
113 
114 
recode(Int32 * lengths,int From,int To,const void * void_src,void * void_dest) const115 CharsetMap::RecodeStatus CharsetMap::recode(Int32 *lengths, int From, int To,
116                                             const void *void_src,
117                                             void *void_dest) const
118 {
119     Int32 &total_read = lengths[0];     // IN/OUT
120     Int32 &total_written = lengths[1];  // IN/OUT
121     my_wc_t wide;
122     my_wc_t mystery_char = '?';  // used in place of unmappable characters
123     const unsigned char * src = (const unsigned char *) void_src;
124     unsigned char * dest = (unsigned char *) void_dest;
125     CHARSET_INFO * csFrom = get_charset(From, MYF(0));
126     CHARSET_INFO * csTo  =  get_charset(To, MYF(0));
127 
128     if(! (csTo && csFrom)) return RECODE_BAD_CHARSET;
129 
130     Int32 src_len = lengths[0];
131     Int32 dest_len = lengths[1];
132     const unsigned char * src_end = src + src_len;
133     unsigned char * dest_end = dest + dest_len;
134     total_read = 0 ;            // i.e. lengths[0] = 0;
135     total_written = 0;          // i.e. lengths[1] = 0;
136 
137     while(src < src_end) {
138         /* First recode from source character to 32-bit wide character */
139         int nread = csFrom->cset->mb_wc(csFrom, &wide, src, src_end);
140         if(nread < 0) return RECODE_BUFF_TOO_SMALL;
141         if(nread == 0) return RECODE_BAD_SRC;
142 
143         /* Then recode from wide character to target character */
144         int nwritten = csTo->cset->wc_mb(csTo, wide, dest, dest_end);
145         if(nwritten == MY_CS_ILUNI) {
146             /* Character does not exist in target charset */
147             nwritten = csTo->cset->wc_mb(csTo, mystery_char, dest, dest_end);
148         }
149         if(nwritten < 0) return RECODE_BUFF_TOO_SMALL;
150 
151         total_read += nread;            src  += nread;
152         total_written += nwritten;      dest += nwritten;
153     }
154 
155     return RECODE_OK;
156 }
157