1 /*
2 Copyright (c) 2010, 2021, Oracle and/or its affiliates.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 /*
26 * CharsetMap.cpp
27 */
28
29 #include "CharsetMap.hpp"
30 #include "CharsetMapImpl.h"
31 #include "my_global.h"
32 #include "mysql.h"
33 #include "my_sys.h"
34
35 bool m_false_result = false;
36 bool m_true_result = true;
37
38 /* _map is a static pointer visible only within the scope of this file.
39 A singleton CharsetMapImpl serves every instance of CharsetMap.
40 */
41 static CharsetMapImpl *_map = 0;
42
43
44 /* Initialization allocates the CharsetMapImpl and initializes its mutex.
45 But we defer building the map of charset names, so as not to create
46 any sort of ordering dependency that would require mysql_init() to
47 be run first.
48 */
49
init()50 void CharsetMap::init()
51 {
52 if(_map == 0) _map = new CharsetMapImpl;
53 }
54
55
56 /* Free the CharsetMapImpl at shutdown time.
57 */
unload()58 void CharsetMap::unload()
59 {
60 delete _map;
61 _map = 0;
62 }
63
64
65 /* On the first invocation of the CharsetMap constructor, it completes the
66 initialization of the CharsetMapImpl by building the map of character set
67 names.
68 */
CharsetMap()69 CharsetMap::CharsetMap()
70 {
71 _map->lock();
72 if(_map->ready == 0) _map->build_map();
73 _map->unlock();
74 }
75
76
getName(int csnum) const77 const char * CharsetMap::getName(int csnum) const
78 {
79 return _map->getName(csnum);
80 }
81
82
getMysqlName(int csnum) const83 const char * CharsetMap::getMysqlName(int csnum) const
84 {
85 CHARSET_INFO *cs = get_charset(csnum, MYF(0));
86 return cs ? cs->csname : 0;
87 }
88
89
getUTF8CharsetNumber() const90 int CharsetMap::getUTF8CharsetNumber() const
91 {
92 return _map->UTF8Charset;
93 }
94
95
getUTF16CharsetNumber() const96 int CharsetMap::getUTF16CharsetNumber() const
97 {
98 return _map->UTF16Charset;
99 }
100
101
getCharsetNumber(const char * name) const102 int CharsetMap::getCharsetNumber(const char *name) const
103 {
104 return get_charset_number(name, MY_CS_AVAILABLE);
105 }
106
isMultibyte(int cs_number) const107 const bool * CharsetMap::isMultibyte(int cs_number) const
108 {
109 CHARSET_INFO * cset = get_charset(cs_number, MYF(0));
110 if(cset == 0) return 0;
111 return use_mb(cset) ? & m_true_result : & m_false_result;
112 }
113
114
recode(Int32 * lengths,int From,int To,const void * void_src,void * void_dest) const115 CharsetMap::RecodeStatus CharsetMap::recode(Int32 *lengths, int From, int To,
116 const void *void_src,
117 void *void_dest) const
118 {
119 Int32 &total_read = lengths[0]; // IN/OUT
120 Int32 &total_written = lengths[1]; // IN/OUT
121 my_wc_t wide;
122 my_wc_t mystery_char = '?'; // used in place of unmappable characters
123 const unsigned char * src = (const unsigned char *) void_src;
124 unsigned char * dest = (unsigned char *) void_dest;
125 CHARSET_INFO * csFrom = get_charset(From, MYF(0));
126 CHARSET_INFO * csTo = get_charset(To, MYF(0));
127
128 if(! (csTo && csFrom)) return RECODE_BAD_CHARSET;
129
130 Int32 src_len = lengths[0];
131 Int32 dest_len = lengths[1];
132 const unsigned char * src_end = src + src_len;
133 unsigned char * dest_end = dest + dest_len;
134 total_read = 0 ; // i.e. lengths[0] = 0;
135 total_written = 0; // i.e. lengths[1] = 0;
136
137 while(src < src_end) {
138 /* First recode from source character to 32-bit wide character */
139 int nread = csFrom->cset->mb_wc(csFrom, &wide, src, src_end);
140 if(nread < 0) return RECODE_BUFF_TOO_SMALL;
141 if(nread == 0) return RECODE_BAD_SRC;
142
143 /* Then recode from wide character to target character */
144 int nwritten = csTo->cset->wc_mb(csTo, wide, dest, dest_end);
145 if(nwritten == MY_CS_ILUNI) {
146 /* Character does not exist in target charset */
147 nwritten = csTo->cset->wc_mb(csTo, mystery_char, dest, dest_end);
148 }
149 if(nwritten < 0) return RECODE_BUFF_TOO_SMALL;
150
151 total_read += nread; src += nread;
152 total_written += nwritten; dest += nwritten;
153 }
154
155 return RECODE_OK;
156 }
157