1 /*
2  Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
3 
4  This program is free software; you can redistribute it and/or modify
5  it under the terms of the GNU General Public License, version 2.0,
6  as published by the Free Software Foundation.
7 
8  This program is also distributed with certain software (including
9  but not limited to OpenSSL) that is licensed under separate terms,
10  as designated in a particular file or component or in included license
11  documentation.  The authors of MySQL hereby grant you an additional
12  permission to link the program and your derivative works with the
13  separately licensed software that they have included with MySQL.
14 
15  This program is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  GNU General Public License, version 2.0, for more details.
19 
20  You should have received a copy of the GNU General Public License
21  along with this program; if not, write to the Free Software
22  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
23 */
24 /*
25  * CharsetMap.hpp
26  */
27 
28 #ifndef CharsetMap_hpp
29 #define CharsetMap_hpp
30 
31 /*
32  * This API uses the C99 exact-width type aliases as defined in <stdint.h>.
33  * Unfortunately, some C/C++ compiler still lack a stdint.h header file.
34  * (For instance, MS Visual Studio until VS2010.)
35  *
36  * Since this API has no dependency upon NDBAPI, we do not want to use
37  * the exact-width type aliases from there but delegate to a generic helper
38  * file that handles the absence of the <stdint.h>.
39  */
40 #include "mystdint.h"
41 
42 /**
43  * Handles encoding issues for character data
44  * while keeping MySQL's CHARSET_INFO structure hidden.
45  */
46 class CharsetMap {
47 public:
48 
49     CharsetMap();
50     /* The compiler-generated destructor is OK. */
51     /* The compiler-generated copy constructor is OK. */
52     /* The compiler-generated assignment operator is OK. */
53 
54     /**
55      * Initializes any global CharsetMap resources.
56      * Should be called exactly once before any other CharsetMap function
57      * from a single thread.
58      */
59     static void init();
60 
61     /**
62      * Releases all global CharsetMap resources.
63      * Also not thread-safe
64      */
65     static void unload();
66 
67     /**
68      * Returns a standard character set name.
69      *
70      * The cs_number argument in getName(), getMysqlName(), and recode()
71      * can be obtained from NdbDictionary::Column::getCharsetNumber().
72      *
73      * getName() returns a name that in most cases will be a preferred name
74      * from http://www.iana.org/assignments/character-sets and will
75      * be recognized and usable by Java (e.g. java.nio, java.io, and java.lang).
76      * However it may return "binary" if a column is BLOB / BINARY / VARBINARY,
77      * or it may return the name of an obscure MySQL character set such as
78      * "keybcs2" or "dec8".
79      */
80     const char * getName(int cs_number) const;
81 
82     /**
83      * Returns just the internal mysql name of the charset.
84      */
85     const char * getMysqlName(int cs_number) const;
86 
87     /**
88      * Takes the mysql name (not the standardized name) and returns a
89      * character set number.
90      */
91     int getCharsetNumber(const char *mysql_name) const;
92 
93     /**
94      * Convenience function for UTF-8.
95      */
96     int getUTF8CharsetNumber() const;
97 
98     /**
99      * Convenience function for UTF-16.
100      */
101     int getUTF16CharsetNumber() const;
102 
103     /**
104      * The return status of a buffer recode operation.
105      */
106     enum RecodeStatus {
107         RECODE_OK ,
108         RECODE_BAD_CHARSET ,
109         RECODE_BAD_SRC ,
110         RECODE_BUFF_TOO_SMALL
111     };
112 
113     /**
114      * Returns true if this charset number refers to a multibyte charset;
115      * otherwise false.
116      */
117   const bool * isMultibyte(int cs_number) const;
118 
119     /**
120      * Recodes the content of a source buffer into destination buffer.
121      *
122      * Takes five arguments:
123      * lengths is an array of two ints: first the source
124      *  buffer length, then the destination buffer length.
125      * From and To are character set numbers.
126      *  src and dest are buffers.
127      *
128      * On return, lengths[0] is set to the number of bytes consumed from src
129      * and lengths[1] to the number of bytes written to dest.
130      *
131      * The string in src will be recoded from charset cs_from to charset cs_to.
132      * If the conversion is successful we return RECODE_OK.
133      * Other return values are noted above.
134      */
135     RecodeStatus recode(int32_t *lengths /* IN/OUT */,
136                         int cs_from, int cs_to, const void *src,
137                         void *dest) const;
138 
139 };
140 
141 #endif // CharsetMap_hpp
142