1 /*
2  Copyright 2010 Sun Microsystems, Inc.
3  All rights reserved. Use is subject to license terms.
4 
5  This program is free software; you can redistribute it and/or modify
6  it under the terms of the GNU General Public License, version 2.0,
7  as published by the Free Software Foundation.
8 
9  This program is also distributed with certain software (including
10  but not limited to OpenSSL) that is licensed under separate terms,
11  as designated in a particular file or component or in included license
12  documentation.  The authors of MySQL hereby grant you an additional
13  permission to link the program and your derivative works with the
14  separately licensed software that they have included with MySQL.
15 
16  This program is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  GNU General Public License, version 2.0, for more details.
20 
21  You should have received a copy of the GNU General Public License
22  along with this program; if not, write to the Free Software
23  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
24 */
25 
26 /*
27  *  CharsetMapImpl.cpp
28  *
29 */
30 
31 #include "CharsetMapImpl.h"
32 
33 #include <string.h> // not using namespaces yet
34 
35 #include "my_global.h"
36 #include "mysql.h"
37 #include "my_sys.h"
38 
39 #define MYSQL_BINARY_CHARSET 63
40 
41 /* build_map():
42    Actually building the map is deferred until after my_init() etc. have
43    fully initialized mysql's strings library.  They cannot be done as part
44    of static initialization.
45 */
build_map()46 void CharsetMapImpl::build_map()
47 {
48     int cs_ucs2 = 0;
49     int cs_utf16 = 0;
50     int cs_utf8 = 0;
51     int cs_utf8_3 = 0;
52     int cs_utf8_4 = 0;
53 
54     /* ISO 8859 Charsets */
55     put("latin1" , "windows-1252");     // Western Europe
56     put("latin2" , "ISO-8859-2");     // Central Europe
57     put("greek" , "ISO-8859-7");
58     put("hebrew", "ISO-8859-8");
59     put("latin5", "ISO-8859-9");     // Turkish
60     put("latin7", "ISO-8859-13");    // Baltics
61 
62     /* IBM & Microsoft code pages */
63     put("cp850", "IBM850");
64     put("cp852", "IBM852");
65     put("cp866", "IBM866");
66     put("cp1250", "windows-1250");
67     put("cp1251", "windows-1251");
68     put("cp1256", "windows-1256");
69     put("cp1257", "windows-1257");
70 
71     /* Asian Encodings */
72     put("ujis", "EUC-JP");
73     put("euckr", "EUC-KR");
74     put("cp932", "windows-31j");
75     put("eucjpms", "EUC_JP_Solaris");
76     put("tis620", "TIS-620");
77 
78     /* Unicode */
79     put("utf8", "UTF-8");
80     put("utf8mb3", "UTF-8");
81     put("utf8mb4", "UTF-8");
82     put("ucs2", "UTF-16");
83     put("utf16", "UTF-16");
84     put("utf32", "UTF-32");
85 
86     /* You could add here:
87      put("filename", "UTF-8");    // No. 17: filename encoding
88      ... but we're going to leave it out for now, because it should not be found
89      in the database. */
90 
91     /* Others */
92     put("hp8", "HP-ROMAN-8");
93     put("swe7", "ISO646-SE");
94     put("koi8r", "KOI8-R");      // Russian Cyrillic
95     put("koi8u", "KOI8-U");      // Ukrainian Cyrillic
96     put("macce", "MacCentralEurope");
97 
98     /* Build the fixed map */
99     for(unsigned int i = 0 ; i < 255 ; i++)
100     {
101         CHARSET_INFO *cs = get_charset(i, MYF(0));
102         register const char *mysql_name = 0;
103         const char *mapped_name = 0;
104 
105         if(cs)
106         {
107             mysql_name = cs->csname;
108             mapped_name = get(mysql_name);
109             if(! cs_ucs2 && ! strcmp(mysql_name, "ucs2"))       cs_ucs2 = i;
110             if(! cs_utf16 && ! strcmp(mysql_name, "utf16"))     cs_utf16 = i;
111             if(! cs_utf8 && ! strcmp(mysql_name, "utf8"))       cs_utf8 = i;
112             if(! cs_utf8_3 && ! strcmp(mysql_name, "utf8mb3"))  cs_utf8_3 = i;
113             if(! cs_utf8_4 && ! strcmp(mysql_name, "utf8mb4"))  cs_utf8_4 = i;
114         }
115 
116         if(mapped_name) mysql_charset_name[i] = mapped_name;
117         else            mysql_charset_name[i] = mysql_name;
118     }
119 
120     if(cs_utf16)
121         UTF16Charset = cs_utf16;
122     else if(cs_ucs2)
123         UTF16Charset = cs_ucs2;
124     else
125         UTF16Charset = 0;
126 
127     if(cs_utf8_4)
128         UTF8Charset = cs_utf8_4;
129     else if(cs_utf8_3)
130         UTF8Charset = cs_utf8_3;
131     else if(cs_utf8)
132         UTF8Charset = cs_utf8;
133     else
134         UTF8Charset = 0;
135 
136     ready = 1;
137 }
138 
139 
getName(int csnum)140 const char * CharsetMapImpl::getName(int csnum)
141 {
142     if((csnum > 255) || (csnum < 0))
143     {
144         return 0;
145     }
146     return mysql_charset_name[csnum];
147 }
148 
149 
hash(const char * name) const150 inline int CharsetMapImpl::hash(const char *name) const
151 {
152     const unsigned char *p;
153     unsigned int h = 0;
154 
155     for (p = (const unsigned char *) name ; *p != '\0' ; p++)
156         h = 27 * h + *p;
157     return h % CHARSET_MAP_HASH_TABLE_SIZE;
158 }
159 
160 
put(const char * name,const char * value)161 void CharsetMapImpl::put(const char *name, const char *value)
162 {
163     unsigned int h = hash(name);
164     MapTableItem *i = & map[h];
165     if(i->name)
166     {
167         i = new MapTableItem;
168         map[h].next = i;
169         collisions++;
170     }
171 
172     i->name = name;
173     i->value = value;
174     n_items++;
175 }
176 
177 
get(const char * name) const178 const char * CharsetMapImpl::get(const char *name) const
179 {
180     unsigned int h = hash(name);
181     const MapTableItem *i = & map[h];
182     if(i->name)
183     {
184         for( ; i ; i = i->next)
185         {
186             if(! strcmp(name, i->name)) return i->value;
187         }
188     }
189     return 0;
190 }
191