1 #include "TCLocalStrings.h"
2 #include "TCDictionary.h"
3 #include "TCSortedStringArray.h"
4 #include "mystring.h"
5
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <ctype.h>
9
10 #define UTF8_CODE_PAGE 65001
11
12 #ifdef WIN32
13 #if defined(_MSC_VER) && _MSC_VER >= 1400 && defined(_DEBUG)
14 #define new DEBUG_CLIENTBLOCK
15 #endif // _DEBUG
16 #endif // WIN32
17
18 // The following map came from here:
19 // http://www.microsoft.com/globaldev/reference/sbcs/1250.mspx
20 // The <UNMAPPED> entries were added by hand based on the holes in the chart.
21 static wchar_t g_cp1250[256] =
22 {
23 /*00 = U+*/0x0000, //NULL
24 /*01 = U+*/0x0001, //START OF HEADING
25 /*02 = U+*/0x0002, //START OF TEXT
26 /*03 = U+*/0x0003, //END OF TEXT
27 /*04 = U+*/0x0004, //END OF TRANSMISSION
28 /*05 = U+*/0x0005, //ENQUIRY
29 /*06 = U+*/0x0006, //ACKNOWLEDGE
30 /*07 = U+*/0x0007, //BELL
31 /*08 = U+*/0x0008, //BACKSPACE
32 /*09 = U+*/0x0009, //HORIZONTAL TABULATION
33 /*0A = U+*/0x000A, //LINE FEED
34 /*0B = U+*/0x000B, //VERTICAL TABULATION
35 /*0C = U+*/0x000C, //FORM FEED
36 /*0D = U+*/0x000D, //CARRIAGE RETURN
37 /*0E = U+*/0x000E, //SHIFT OUT
38 /*0F = U+*/0x000F, //SHIFT IN
39 /*10 = U+*/0x0010, //DATA LINK ESCAPE
40 /*11 = U+*/0x0011, //DEVICE CONTROL ONE
41 /*12 = U+*/0x0012, //DEVICE CONTROL TWO
42 /*13 = U+*/0x0013, //DEVICE CONTROL THREE
43 /*14 = U+*/0x0014, //DEVICE CONTROL FOUR
44 /*15 = U+*/0x0015, //NEGATIVE ACKNOWLEDGE
45 /*16 = U+*/0x0016, //SYNCHRONOUS IDLE
46 /*17 = U+*/0x0017, //END OF TRANSMISSION BLOCK
47 /*18 = U+*/0x0018, //CANCEL
48 /*19 = U+*/0x0019, //END OF MEDIUM
49 /*1A = U+*/0x001A, //SUBSTITUTE
50 /*1B = U+*/0x001B, //ESCAPE
51 /*1C = U+*/0x001C, //FILE SEPARATOR
52 /*1D = U+*/0x001D, //GROUP SEPARATOR
53 /*1E = U+*/0x001E, //RECORD SEPARATOR
54 /*1F = U+*/0x001F, //UNIT SEPARATOR
55 /*20 = U+*/0x0020, //SPACE
56 /*21 = U+*/0x0021, //EXCLAMATION MARK
57 /*22 = U+*/0x0022, //QUOTATION MARK
58 /*23 = U+*/0x0023, //NUMBER SIGN
59 /*24 = U+*/0x0024, //DOLLAR SIGN
60 /*25 = U+*/0x0025, //PERCENT SIGN
61 /*26 = U+*/0x0026, //AMPERSAND
62 /*27 = U+*/0x0027, //APOSTROPHE
63 /*28 = U+*/0x0028, //LEFT PARENTHESIS
64 /*29 = U+*/0x0029, //RIGHT PARENTHESIS
65 /*2A = U+*/0x002A, //ASTERISK
66 /*2B = U+*/0x002B, //PLUS SIGN
67 /*2C = U+*/0x002C, //COMMA
68 /*2D = U+*/0x002D, //HYPHEN-MINUS
69 /*2E = U+*/0x002E, //FULL STOP
70 /*2F = U+*/0x002F, //SOLIDUS
71 /*30 = U+*/0x0030, //DIGIT ZERO
72 /*31 = U+*/0x0031, //DIGIT ONE
73 /*32 = U+*/0x0032, //DIGIT TWO
74 /*33 = U+*/0x0033, //DIGIT THREE
75 /*34 = U+*/0x0034, //DIGIT FOUR
76 /*35 = U+*/0x0035, //DIGIT FIVE
77 /*36 = U+*/0x0036, //DIGIT SIX
78 /*37 = U+*/0x0037, //DIGIT SEVEN
79 /*38 = U+*/0x0038, //DIGIT EIGHT
80 /*39 = U+*/0x0039, //DIGIT NINE
81 /*3A = U+*/0x003A, //COLON
82 /*3B = U+*/0x003B, //SEMICOLON
83 /*3C = U+*/0x003C, //LESS-THAN SIGN
84 /*3D = U+*/0x003D, //EQUALS SIGN
85 /*3E = U+*/0x003E, //GREATER-THAN SIGN
86 /*3F = U+*/0x003F, //QUESTION MARK
87 /*40 = U+*/0x0040, //COMMERCIAL AT
88 /*41 = U+*/0x0041, //LATIN CAPITAL LETTER A
89 /*42 = U+*/0x0042, //LATIN CAPITAL LETTER B
90 /*43 = U+*/0x0043, //LATIN CAPITAL LETTER C
91 /*44 = U+*/0x0044, //LATIN CAPITAL LETTER D
92 /*45 = U+*/0x0045, //LATIN CAPITAL LETTER E
93 /*46 = U+*/0x0046, //LATIN CAPITAL LETTER F
94 /*47 = U+*/0x0047, //LATIN CAPITAL LETTER G
95 /*48 = U+*/0x0048, //LATIN CAPITAL LETTER H
96 /*49 = U+*/0x0049, //LATIN CAPITAL LETTER I
97 /*4A = U+*/0x004A, //LATIN CAPITAL LETTER J
98 /*4B = U+*/0x004B, //LATIN CAPITAL LETTER K
99 /*4C = U+*/0x004C, //LATIN CAPITAL LETTER L
100 /*4D = U+*/0x004D, //LATIN CAPITAL LETTER M
101 /*4E = U+*/0x004E, //LATIN CAPITAL LETTER N
102 /*4F = U+*/0x004F, //LATIN CAPITAL LETTER O
103 /*50 = U+*/0x0050, //LATIN CAPITAL LETTER P
104 /*51 = U+*/0x0051, //LATIN CAPITAL LETTER Q
105 /*52 = U+*/0x0052, //LATIN CAPITAL LETTER R
106 /*53 = U+*/0x0053, //LATIN CAPITAL LETTER S
107 /*54 = U+*/0x0054, //LATIN CAPITAL LETTER T
108 /*55 = U+*/0x0055, //LATIN CAPITAL LETTER U
109 /*56 = U+*/0x0056, //LATIN CAPITAL LETTER V
110 /*57 = U+*/0x0057, //LATIN CAPITAL LETTER W
111 /*58 = U+*/0x0058, //LATIN CAPITAL LETTER X
112 /*59 = U+*/0x0059, //LATIN CAPITAL LETTER Y
113 /*5A = U+*/0x005A, //LATIN CAPITAL LETTER Z
114 /*5B = U+*/0x005B, //LEFT SQUARE BRACKET
115 /*5C = U+*/0x005C, //REVERSE SOLIDUS
116 /*5D = U+*/0x005D, //RIGHT SQUARE BRACKET
117 /*5E = U+*/0x005E, //CIRCUMFLEX ACCENT
118 /*5F = U+*/0x005F, //LOW LINE
119 /*60 = U+*/0x0060, //GRAVE ACCENT
120 /*61 = U+*/0x0061, //LATIN SMALL LETTER A
121 /*62 = U+*/0x0062, //LATIN SMALL LETTER B
122 /*63 = U+*/0x0063, //LATIN SMALL LETTER C
123 /*64 = U+*/0x0064, //LATIN SMALL LETTER D
124 /*65 = U+*/0x0065, //LATIN SMALL LETTER E
125 /*66 = U+*/0x0066, //LATIN SMALL LETTER F
126 /*67 = U+*/0x0067, //LATIN SMALL LETTER G
127 /*68 = U+*/0x0068, //LATIN SMALL LETTER H
128 /*69 = U+*/0x0069, //LATIN SMALL LETTER I
129 /*6A = U+*/0x006A, //LATIN SMALL LETTER J
130 /*6B = U+*/0x006B, //LATIN SMALL LETTER K
131 /*6C = U+*/0x006C, //LATIN SMALL LETTER L
132 /*6D = U+*/0x006D, //LATIN SMALL LETTER M
133 /*6E = U+*/0x006E, //LATIN SMALL LETTER N
134 /*6F = U+*/0x006F, //LATIN SMALL LETTER O
135 /*70 = U+*/0x0070, //LATIN SMALL LETTER P
136 /*71 = U+*/0x0071, //LATIN SMALL LETTER Q
137 /*72 = U+*/0x0072, //LATIN SMALL LETTER R
138 /*73 = U+*/0x0073, //LATIN SMALL LETTER S
139 /*74 = U+*/0x0074, //LATIN SMALL LETTER T
140 /*75 = U+*/0x0075, //LATIN SMALL LETTER U
141 /*76 = U+*/0x0076, //LATIN SMALL LETTER V
142 /*77 = U+*/0x0077, //LATIN SMALL LETTER W
143 /*78 = U+*/0x0078, //LATIN SMALL LETTER X
144 /*79 = U+*/0x0079, //LATIN SMALL LETTER Y
145 /*7A = U+*/0x007A, //LATIN SMALL LETTER Z
146 /*7B = U+*/0x007B, //LEFT CURLY BRACKET
147 /*7C = U+*/0x007C, //VERTICAL LINE
148 /*7D = U+*/0x007D, //RIGHT CURLY BRACKET
149 /*7E = U+*/0x007E, //TILDE
150 /*7F = U+*/0x007F, //DELETE
151 /*80 = U+*/0x20AC, //EURO SIGN
152 /*81 = U+*/0xFFFF, //<UNMAPPED>
153 /*82 = U+*/0x201A, //SINGLE LOW-9 QUOTATION MARK
154 /*83 = U+*/0xFFFF, //<UNMAPPED>
155 /*84 = U+*/0x201E, //DOUBLE LOW-9 QUOTATION MARK
156 /*85 = U+*/0x2026, //HORIZONTAL ELLIPSIS
157 /*86 = U+*/0x2020, //DAGGER
158 /*87 = U+*/0x2021, //DOUBLE DAGGER
159 /*88 = U+*/0xFFFF, //<UNMAPPED>
160 /*89 = U+*/0x2030, //PER MILLE SIGN
161 /*8A = U+*/0x0160, //LATIN CAPITAL LETTER S WITH CARON
162 /*8B = U+*/0x2039, //SINGLE LEFT-POINTING ANGLE QUOTATION MARK
163 /*8C = U+*/0x015A, //LATIN CAPITAL LETTER S WITH ACUTE
164 /*8D = U+*/0x0164, //LATIN CAPITAL LETTER T WITH CARON
165 /*8E = U+*/0x017D, //LATIN CAPITAL LETTER Z WITH CARON
166 /*8F = U+*/0x0179, //LATIN CAPITAL LETTER Z WITH ACUTE
167 /*90 = U+*/0xFFFF, //<UNMAPPED>
168 /*91 = U+*/0x2018, //LEFT SINGLE QUOTATION MARK
169 /*92 = U+*/0x2019, //RIGHT SINGLE QUOTATION MARK
170 /*93 = U+*/0x201C, //LEFT DOUBLE QUOTATION MARK
171 /*94 = U+*/0x201D, //RIGHT DOUBLE QUOTATION MARK
172 /*95 = U+*/0x2022, //BULLET
173 /*96 = U+*/0x2013, //EN DASH
174 /*97 = U+*/0x2014, //EM DASH
175 /*98 = U+*/0xFFFF, //<UNMAPPED>
176 /*99 = U+*/0x2122, //TRADE MARK SIGN
177 /*9A = U+*/0x0161, //LATIN SMALL LETTER S WITH CARON
178 /*9B = U+*/0x203A, //SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
179 /*9C = U+*/0x015B, //LATIN SMALL LETTER S WITH ACUTE
180 /*9D = U+*/0x0165, //LATIN SMALL LETTER T WITH CARON
181 /*9E = U+*/0x017E, //LATIN SMALL LETTER Z WITH CARON
182 /*9F = U+*/0x017A, //LATIN SMALL LETTER Z WITH ACUTE
183 /*A0 = U+*/0x00A0, //NO-BREAK SPACE
184 /*A1 = U+*/0x02C7, //CARON
185 /*A2 = U+*/0x02D8, //BREVE
186 /*A3 = U+*/0x0141, //LATIN CAPITAL LETTER L WITH STROKE
187 /*A4 = U+*/0x00A4, //CURRENCY SIGN
188 /*A5 = U+*/0x0104, //LATIN CAPITAL LETTER A WITH OGONEK
189 /*A6 = U+*/0x00A6, //BROKEN BAR
190 /*A7 = U+*/0x00A7, //SECTION SIGN
191 /*A8 = U+*/0x00A8, //DIAERESIS
192 /*A9 = U+*/0x00A9, //COPYRIGHT SIGN
193 /*AA = U+*/0x015E, //LATIN CAPITAL LETTER S WITH CEDILLA
194 /*AB = U+*/0x00AB, //LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
195 /*AC = U+*/0x00AC, //NOT SIGN
196 /*AD = U+*/0x00AD, //SOFT HYPHEN
197 /*AE = U+*/0x00AE, //REGISTERED SIGN
198 /*AF = U+*/0x017B, //LATIN CAPITAL LETTER Z WITH DOT ABOVE
199 /*B0 = U+*/0x00B0, //DEGREE SIGN
200 /*B1 = U+*/0x00B1, //PLUS-MINUS SIGN
201 /*B2 = U+*/0x02DB, //OGONEK
202 /*B3 = U+*/0x0142, //LATIN SMALL LETTER L WITH STROKE
203 /*B4 = U+*/0x00B4, //ACUTE ACCENT
204 /*B5 = U+*/0x00B5, //MICRO SIGN
205 /*B6 = U+*/0x00B6, //PILCROW SIGN
206 /*B7 = U+*/0x00B7, //MIDDLE DOT
207 /*B8 = U+*/0x00B8, //CEDILLA
208 /*B9 = U+*/0x0105, //LATIN SMALL LETTER A WITH OGONEK
209 /*BA = U+*/0x015F, //LATIN SMALL LETTER S WITH CEDILLA
210 /*BB = U+*/0x00BB, //RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
211 /*BC = U+*/0x013D, //LATIN CAPITAL LETTER L WITH CARON
212 /*BD = U+*/0x02DD, //DOUBLE ACUTE ACCENT
213 /*BE = U+*/0x013E, //LATIN SMALL LETTER L WITH CARON
214 /*BF = U+*/0x017C, //LATIN SMALL LETTER Z WITH DOT ABOVE
215 /*C0 = U+*/0x0154, //LATIN CAPITAL LETTER R WITH ACUTE
216 /*C1 = U+*/0x00C1, //LATIN CAPITAL LETTER A WITH ACUTE
217 /*C2 = U+*/0x00C2, //LATIN CAPITAL LETTER A WITH CIRCUMFLEX
218 /*C3 = U+*/0x0102, //LATIN CAPITAL LETTER A WITH BREVE
219 /*C4 = U+*/0x00C4, //LATIN CAPITAL LETTER A WITH DIAERESIS
220 /*C5 = U+*/0x0139, //LATIN CAPITAL LETTER L WITH ACUTE
221 /*C6 = U+*/0x0106, //LATIN CAPITAL LETTER C WITH ACUTE
222 /*C7 = U+*/0x00C7, //LATIN CAPITAL LETTER C WITH CEDILLA
223 /*C8 = U+*/0x010C, //LATIN CAPITAL LETTER C WITH CARON
224 /*C9 = U+*/0x00C9, //LATIN CAPITAL LETTER E WITH ACUTE
225 /*CA = U+*/0x0118, //LATIN CAPITAL LETTER E WITH OGONEK
226 /*CB = U+*/0x00CB, //LATIN CAPITAL LETTER E WITH DIAERESIS
227 /*CC = U+*/0x011A, //LATIN CAPITAL LETTER E WITH CARON
228 /*CD = U+*/0x00CD, //LATIN CAPITAL LETTER I WITH ACUTE
229 /*CE = U+*/0x00CE, //LATIN CAPITAL LETTER I WITH CIRCUMFLEX
230 /*CF = U+*/0x010E, //LATIN CAPITAL LETTER D WITH CARON
231 /*D0 = U+*/0x0110, //LATIN CAPITAL LETTER D WITH STROKE
232 /*D1 = U+*/0x0143, //LATIN CAPITAL LETTER N WITH ACUTE
233 /*D2 = U+*/0x0147, //LATIN CAPITAL LETTER N WITH CARON
234 /*D3 = U+*/0x00D3, //LATIN CAPITAL LETTER O WITH ACUTE
235 /*D4 = U+*/0x00D4, //LATIN CAPITAL LETTER O WITH CIRCUMFLEX
236 /*D5 = U+*/0x0150, //LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
237 /*D6 = U+*/0x00D6, //LATIN CAPITAL LETTER O WITH DIAERESIS
238 /*D7 = U+*/0x00D7, //MULTIPLICATION SIGN
239 /*D8 = U+*/0x0158, //LATIN CAPITAL LETTER R WITH CARON
240 /*D9 = U+*/0x016E, //LATIN CAPITAL LETTER U WITH RING ABOVE
241 /*DA = U+*/0x00DA, //LATIN CAPITAL LETTER U WITH ACUTE
242 /*DB = U+*/0x0170, //LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
243 /*DC = U+*/0x00DC, //LATIN CAPITAL LETTER U WITH DIAERESIS
244 /*DD = U+*/0x00DD, //LATIN CAPITAL LETTER Y WITH ACUTE
245 /*DE = U+*/0x0162, //LATIN CAPITAL LETTER T WITH CEDILLA
246 /*DF = U+*/0x00DF, //LATIN SMALL LETTER SHARP S
247 /*E0 = U+*/0x0155, //LATIN SMALL LETTER R WITH ACUTE
248 /*E1 = U+*/0x00E1, //LATIN SMALL LETTER A WITH ACUTE
249 /*E2 = U+*/0x00E2, //LATIN SMALL LETTER A WITH CIRCUMFLEX
250 /*E3 = U+*/0x0103, //LATIN SMALL LETTER A WITH BREVE
251 /*E4 = U+*/0x00E4, //LATIN SMALL LETTER A WITH DIAERESIS
252 /*E5 = U+*/0x013A, //LATIN SMALL LETTER L WITH ACUTE
253 /*E6 = U+*/0x0107, //LATIN SMALL LETTER C WITH ACUTE
254 /*E7 = U+*/0x00E7, //LATIN SMALL LETTER C WITH CEDILLA
255 /*E8 = U+*/0x010D, //LATIN SMALL LETTER C WITH CARON
256 /*E9 = U+*/0x00E9, //LATIN SMALL LETTER E WITH ACUTE
257 /*EA = U+*/0x0119, //LATIN SMALL LETTER E WITH OGONEK
258 /*EB = U+*/0x00EB, //LATIN SMALL LETTER E WITH DIAERESIS
259 /*EC = U+*/0x011B, //LATIN SMALL LETTER E WITH CARON
260 /*ED = U+*/0x00ED, //LATIN SMALL LETTER I WITH ACUTE
261 /*EE = U+*/0x00EE, //LATIN SMALL LETTER I WITH CIRCUMFLEX
262 /*EF = U+*/0x010F, //LATIN SMALL LETTER D WITH CARON
263 /*F0 = U+*/0x0111, //LATIN SMALL LETTER D WITH STROKE
264 /*F1 = U+*/0x0144, //LATIN SMALL LETTER N WITH ACUTE
265 /*F2 = U+*/0x0148, //LATIN SMALL LETTER N WITH CARON
266 /*F3 = U+*/0x00F3, //LATIN SMALL LETTER O WITH ACUTE
267 /*F4 = U+*/0x00F4, //LATIN SMALL LETTER O WITH CIRCUMFLEX
268 /*F5 = U+*/0x0151, //LATIN SMALL LETTER O WITH DOUBLE ACUTE
269 /*F6 = U+*/0x00F6, //LATIN SMALL LETTER O WITH DIAERESIS
270 /*F7 = U+*/0x00F7, //DIVISION SIGN
271 /*F8 = U+*/0x0159, //LATIN SMALL LETTER R WITH CARON
272 /*F9 = U+*/0x016F, //LATIN SMALL LETTER U WITH RING ABOVE
273 /*FA = U+*/0x00FA, //LATIN SMALL LETTER U WITH ACUTE
274 /*FB = U+*/0x0171, //LATIN SMALL LETTER U WITH DOUBLE ACUTE
275 /*FC = U+*/0x00FC, //LATIN SMALL LETTER U WITH DIAERESIS
276 /*FD = U+*/0x00FD, //LATIN SMALL LETTER Y WITH ACUTE
277 /*FE = U+*/0x0163, //LATIN SMALL LETTER T WITH CEDILLA
278 /*FF = U+*/0x02D9 //DOT ABOVE
279 };
280
281 // The following map came from here:
282 // http://www.microsoft.com/globaldev/reference/sbcs/1251.mspx
283 // The <UNMAPPED> entries were added by hand based on the holes in the chart.
284 static wchar_t g_cp1251[256] =
285 {
286 /*00 = U+*/0x0000, //NULL
287 /*01 = U+*/0x0001, //START OF HEADING
288 /*02 = U+*/0x0002, //START OF TEXT
289 /*03 = U+*/0x0003, //END OF TEXT
290 /*04 = U+*/0x0004, //END OF TRANSMISSION
291 /*05 = U+*/0x0005, //ENQUIRY
292 /*06 = U+*/0x0006, //ACKNOWLEDGE
293 /*07 = U+*/0x0007, //BELL
294 /*08 = U+*/0x0008, //BACKSPACE
295 /*09 = U+*/0x0009, //HORIZONTAL TABULATION
296 /*0A = U+*/0x000A, //LINE FEED
297 /*0B = U+*/0x000B, //VERTICAL TABULATION
298 /*0C = U+*/0x000C, //FORM FEED
299 /*0D = U+*/0x000D, //CARRIAGE RETURN
300 /*0E = U+*/0x000E, //SHIFT OUT
301 /*0F = U+*/0x000F, //SHIFT IN
302 /*10 = U+*/0x0010, //DATA LINK ESCAPE
303 /*11 = U+*/0x0011, //DEVICE CONTROL ONE
304 /*12 = U+*/0x0012, //DEVICE CONTROL TWO
305 /*13 = U+*/0x0013, //DEVICE CONTROL THREE
306 /*14 = U+*/0x0014, //DEVICE CONTROL FOUR
307 /*15 = U+*/0x0015, //NEGATIVE ACKNOWLEDGE
308 /*16 = U+*/0x0016, //SYNCHRONOUS IDLE
309 /*17 = U+*/0x0017, //END OF TRANSMISSION BLOCK
310 /*18 = U+*/0x0018, //CANCEL
311 /*19 = U+*/0x0019, //END OF MEDIUM
312 /*1A = U+*/0x001A, //SUBSTITUTE
313 /*1B = U+*/0x001B, //ESCAPE
314 /*1C = U+*/0x001C, //FILE SEPARATOR
315 /*1D = U+*/0x001D, //GROUP SEPARATOR
316 /*1E = U+*/0x001E, //RECORD SEPARATOR
317 /*1F = U+*/0x001F, //UNIT SEPARATOR
318 /*20 = U+*/0x0020, //SPACE
319 /*21 = U+*/0x0021, //EXCLAMATION MARK
320 /*22 = U+*/0x0022, //QUOTATION MARK
321 /*23 = U+*/0x0023, //NUMBER SIGN
322 /*24 = U+*/0x0024, //DOLLAR SIGN
323 /*25 = U+*/0x0025, //PERCENT SIGN
324 /*26 = U+*/0x0026, //AMPERSAND
325 /*27 = U+*/0x0027, //APOSTROPHE
326 /*28 = U+*/0x0028, //LEFT PARENTHESIS
327 /*29 = U+*/0x0029, //RIGHT PARENTHESIS
328 /*2A = U+*/0x002A, //ASTERISK
329 /*2B = U+*/0x002B, //PLUS SIGN
330 /*2C = U+*/0x002C, //COMMA
331 /*2D = U+*/0x002D, //HYPHEN-MINUS
332 /*2E = U+*/0x002E, //FULL STOP
333 /*2F = U+*/0x002F, //SOLIDUS
334 /*30 = U+*/0x0030, //DIGIT ZERO
335 /*31 = U+*/0x0031, //DIGIT ONE
336 /*32 = U+*/0x0032, //DIGIT TWO
337 /*33 = U+*/0x0033, //DIGIT THREE
338 /*34 = U+*/0x0034, //DIGIT FOUR
339 /*35 = U+*/0x0035, //DIGIT FIVE
340 /*36 = U+*/0x0036, //DIGIT SIX
341 /*37 = U+*/0x0037, //DIGIT SEVEN
342 /*38 = U+*/0x0038, //DIGIT EIGHT
343 /*39 = U+*/0x0039, //DIGIT NINE
344 /*3A = U+*/0x003A, //COLON
345 /*3B = U+*/0x003B, //SEMICOLON
346 /*3C = U+*/0x003C, //LESS-THAN SIGN
347 /*3D = U+*/0x003D, //EQUALS SIGN
348 /*3E = U+*/0x003E, //GREATER-THAN SIGN
349 /*3F = U+*/0x003F, //QUESTION MARK
350 /*40 = U+*/0x0040, //COMMERCIAL AT
351 /*41 = U+*/0x0041, //LATIN CAPITAL LETTER A
352 /*42 = U+*/0x0042, //LATIN CAPITAL LETTER B
353 /*43 = U+*/0x0043, //LATIN CAPITAL LETTER C
354 /*44 = U+*/0x0044, //LATIN CAPITAL LETTER D
355 /*45 = U+*/0x0045, //LATIN CAPITAL LETTER E
356 /*46 = U+*/0x0046, //LATIN CAPITAL LETTER F
357 /*47 = U+*/0x0047, //LATIN CAPITAL LETTER G
358 /*48 = U+*/0x0048, //LATIN CAPITAL LETTER H
359 /*49 = U+*/0x0049, //LATIN CAPITAL LETTER I
360 /*4A = U+*/0x004A, //LATIN CAPITAL LETTER J
361 /*4B = U+*/0x004B, //LATIN CAPITAL LETTER K
362 /*4C = U+*/0x004C, //LATIN CAPITAL LETTER L
363 /*4D = U+*/0x004D, //LATIN CAPITAL LETTER M
364 /*4E = U+*/0x004E, //LATIN CAPITAL LETTER N
365 /*4F = U+*/0x004F, //LATIN CAPITAL LETTER O
366 /*50 = U+*/0x0050, //LATIN CAPITAL LETTER P
367 /*51 = U+*/0x0051, //LATIN CAPITAL LETTER Q
368 /*52 = U+*/0x0052, //LATIN CAPITAL LETTER R
369 /*53 = U+*/0x0053, //LATIN CAPITAL LETTER S
370 /*54 = U+*/0x0054, //LATIN CAPITAL LETTER T
371 /*55 = U+*/0x0055, //LATIN CAPITAL LETTER U
372 /*56 = U+*/0x0056, //LATIN CAPITAL LETTER V
373 /*57 = U+*/0x0057, //LATIN CAPITAL LETTER W
374 /*58 = U+*/0x0058, //LATIN CAPITAL LETTER X
375 /*59 = U+*/0x0059, //LATIN CAPITAL LETTER Y
376 /*5A = U+*/0x005A, //LATIN CAPITAL LETTER Z
377 /*5B = U+*/0x005B, //LEFT SQUARE BRACKET
378 /*5C = U+*/0x005C, //REVERSE SOLIDUS
379 /*5D = U+*/0x005D, //RIGHT SQUARE BRACKET
380 /*5E = U+*/0x005E, //CIRCUMFLEX ACCENT
381 /*5F = U+*/0x005F, //LOW LINE
382 /*60 = U+*/0x0060, //GRAVE ACCENT
383 /*61 = U+*/0x0061, //LATIN SMALL LETTER A
384 /*62 = U+*/0x0062, //LATIN SMALL LETTER B
385 /*63 = U+*/0x0063, //LATIN SMALL LETTER C
386 /*64 = U+*/0x0064, //LATIN SMALL LETTER D
387 /*65 = U+*/0x0065, //LATIN SMALL LETTER E
388 /*66 = U+*/0x0066, //LATIN SMALL LETTER F
389 /*67 = U+*/0x0067, //LATIN SMALL LETTER G
390 /*68 = U+*/0x0068, //LATIN SMALL LETTER H
391 /*69 = U+*/0x0069, //LATIN SMALL LETTER I
392 /*6A = U+*/0x006A, //LATIN SMALL LETTER J
393 /*6B = U+*/0x006B, //LATIN SMALL LETTER K
394 /*6C = U+*/0x006C, //LATIN SMALL LETTER L
395 /*6D = U+*/0x006D, //LATIN SMALL LETTER M
396 /*6E = U+*/0x006E, //LATIN SMALL LETTER N
397 /*6F = U+*/0x006F, //LATIN SMALL LETTER O
398 /*70 = U+*/0x0070, //LATIN SMALL LETTER P
399 /*71 = U+*/0x0071, //LATIN SMALL LETTER Q
400 /*72 = U+*/0x0072, //LATIN SMALL LETTER R
401 /*73 = U+*/0x0073, //LATIN SMALL LETTER S
402 /*74 = U+*/0x0074, //LATIN SMALL LETTER T
403 /*75 = U+*/0x0075, //LATIN SMALL LETTER U
404 /*76 = U+*/0x0076, //LATIN SMALL LETTER V
405 /*77 = U+*/0x0077, //LATIN SMALL LETTER W
406 /*78 = U+*/0x0078, //LATIN SMALL LETTER X
407 /*79 = U+*/0x0079, //LATIN SMALL LETTER Y
408 /*7A = U+*/0x007A, //LATIN SMALL LETTER Z
409 /*7B = U+*/0x007B, //LEFT CURLY BRACKET
410 /*7C = U+*/0x007C, //VERTICAL LINE
411 /*7D = U+*/0x007D, //RIGHT CURLY BRACKET
412 /*7E = U+*/0x007E, //TILDE
413 /*7F = U+*/0x007F, //DELETE
414 /*80 = U+*/0x0402, //CYRILLIC CAPITAL LETTER DJE
415 /*81 = U+*/0x0403, //CYRILLIC CAPITAL LETTER GJE
416 /*82 = U+*/0x201A, //SINGLE LOW-9 QUOTATION MARK
417 /*83 = U+*/0x0453, //CYRILLIC SMALL LETTER GJE
418 /*84 = U+*/0x201E, //DOUBLE LOW-9 QUOTATION MARK
419 /*85 = U+*/0x2026, //HORIZONTAL ELLIPSIS
420 /*86 = U+*/0x2020, //DAGGER
421 /*87 = U+*/0x2021, //DOUBLE DAGGER
422 /*88 = U+*/0x20AC, //EURO SIGN
423 /*89 = U+*/0x2030, //PER MILLE SIGN
424 /*8A = U+*/0x0409, //CYRILLIC CAPITAL LETTER LJE
425 /*8B = U+*/0x2039, //SINGLE LEFT-POINTING ANGLE QUOTATION MARK
426 /*8C = U+*/0x040A, //CYRILLIC CAPITAL LETTER NJE
427 /*8D = U+*/0x040C, //CYRILLIC CAPITAL LETTER KJE
428 /*8E = U+*/0x040B, //CYRILLIC CAPITAL LETTER TSHE
429 /*8F = U+*/0x040F, //CYRILLIC CAPITAL LETTER DZHE
430 /*90 = U+*/0x0452, //CYRILLIC SMALL LETTER DJE
431 /*91 = U+*/0x2018, //LEFT SINGLE QUOTATION MARK
432 /*92 = U+*/0x2019, //RIGHT SINGLE QUOTATION MARK
433 /*93 = U+*/0x201C, //LEFT DOUBLE QUOTATION MARK
434 /*94 = U+*/0x201D, //RIGHT DOUBLE QUOTATION MARK
435 /*95 = U+*/0x2022, //BULLET
436 /*96 = U+*/0x2013, //EN DASH
437 /*97 = U+*/0x2014, //EM DASH
438 /*98 = U+*/0x0000, //NULL
439 /*99 = U+*/0x2122, //TRADE MARK SIGN
440 /*9A = U+*/0x0459, //CYRILLIC SMALL LETTER LJE
441 /*9B = U+*/0x203A, //SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
442 /*9C = U+*/0x045A, //CYRILLIC SMALL LETTER NJE
443 /*9D = U+*/0x045C, //CYRILLIC SMALL LETTER KJE
444 /*9E = U+*/0x045B, //CYRILLIC SMALL LETTER TSHE
445 /*9F = U+*/0x045F, //CYRILLIC SMALL LETTER DZHE
446 /*A0 = U+*/0x00A0, //NO-BREAK SPACE
447 /*A1 = U+*/0x040E, //CYRILLIC CAPITAL LETTER SHORT U
448 /*A2 = U+*/0x045E, //CYRILLIC SMALL LETTER SHORT U
449 /*A3 = U+*/0x0408, //CYRILLIC CAPITAL LETTER JE
450 /*A4 = U+*/0x00A4, //CURRENCY SIGN
451 /*A5 = U+*/0x0490, //CYRILLIC CAPITAL LETTER GHE WITH UPTURN
452 /*A6 = U+*/0x00A6, //BROKEN BAR
453 /*A7 = U+*/0x00A7, //SECTION SIGN
454 /*A8 = U+*/0x0401, //CYRILLIC CAPITAL LETTER IO
455 /*A9 = U+*/0x00A9, //COPYRIGHT SIGN
456 /*AA = U+*/0x0404, //CYRILLIC CAPITAL LETTER UKRAINIAN IE
457 /*AB = U+*/0x00AB, //LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
458 /*AC = U+*/0x00AC, //NOT SIGN
459 /*AD = U+*/0x00AD, //SOFT HYPHEN
460 /*AE = U+*/0x00AE, //REGISTERED SIGN
461 /*AF = U+*/0x0407, //CYRILLIC CAPITAL LETTER YI
462 /*B0 = U+*/0x00B0, //DEGREE SIGN
463 /*B1 = U+*/0x00B1, //PLUS-MINUS SIGN
464 /*B2 = U+*/0x0406, //CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
465 /*B3 = U+*/0x0456, //CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
466 /*B4 = U+*/0x0491, //CYRILLIC SMALL LETTER GHE WITH UPTURN
467 /*B5 = U+*/0x00B5, //MICRO SIGN
468 /*B6 = U+*/0x00B6, //PILCROW SIGN
469 /*B7 = U+*/0x00B7, //MIDDLE DOT
470 /*B8 = U+*/0x0451, //CYRILLIC SMALL LETTER IO
471 /*B9 = U+*/0x2116, //NUMERO SIGN
472 /*BA = U+*/0x0454, //CYRILLIC SMALL LETTER UKRAINIAN IE
473 /*BB = U+*/0x00BB, //RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
474 /*BC = U+*/0x0458, //CYRILLIC SMALL LETTER JE
475 /*BD = U+*/0x0405, //CYRILLIC CAPITAL LETTER DZE
476 /*BE = U+*/0x0455, //CYRILLIC SMALL LETTER DZE
477 /*BF = U+*/0x0457, //CYRILLIC SMALL LETTER YI
478 /*C0 = U+*/0x0410, //CYRILLIC CAPITAL LETTER A
479 /*C1 = U+*/0x0411, //CYRILLIC CAPITAL LETTER BE
480 /*C2 = U+*/0x0412, //CYRILLIC CAPITAL LETTER VE
481 /*C3 = U+*/0x0413, //CYRILLIC CAPITAL LETTER GHE
482 /*C4 = U+*/0x0414, //CYRILLIC CAPITAL LETTER DE
483 /*C5 = U+*/0x0415, //CYRILLIC CAPITAL LETTER IE
484 /*C6 = U+*/0x0416, //CYRILLIC CAPITAL LETTER ZHE
485 /*C7 = U+*/0x0417, //CYRILLIC CAPITAL LETTER ZE
486 /*C8 = U+*/0x0418, //CYRILLIC CAPITAL LETTER I
487 /*C9 = U+*/0x0419, //CYRILLIC CAPITAL LETTER SHORT I
488 /*CA = U+*/0x041A, //CYRILLIC CAPITAL LETTER KA
489 /*CB = U+*/0x041B, //CYRILLIC CAPITAL LETTER EL
490 /*CC = U+*/0x041C, //CYRILLIC CAPITAL LETTER EM
491 /*CD = U+*/0x041D, //CYRILLIC CAPITAL LETTER EN
492 /*CE = U+*/0x041E, //CYRILLIC CAPITAL LETTER O
493 /*CF = U+*/0x041F, //CYRILLIC CAPITAL LETTER PE
494 /*D0 = U+*/0x0420, //CYRILLIC CAPITAL LETTER ER
495 /*D1 = U+*/0x0421, //CYRILLIC CAPITAL LETTER ES
496 /*D2 = U+*/0x0422, //CYRILLIC CAPITAL LETTER TE
497 /*D3 = U+*/0x0423, //CYRILLIC CAPITAL LETTER U
498 /*D4 = U+*/0x0424, //CYRILLIC CAPITAL LETTER EF
499 /*D5 = U+*/0x0425, //CYRILLIC CAPITAL LETTER HA
500 /*D6 = U+*/0x0426, //CYRILLIC CAPITAL LETTER TSE
501 /*D7 = U+*/0x0427, //CYRILLIC CAPITAL LETTER CHE
502 /*D8 = U+*/0x0428, //CYRILLIC CAPITAL LETTER SHA
503 /*D9 = U+*/0x0429, //CYRILLIC CAPITAL LETTER SHCHA
504 /*DA = U+*/0x042A, //CYRILLIC CAPITAL LETTER HARD SIGN
505 /*DB = U+*/0x042B, //CYRILLIC CAPITAL LETTER YERU
506 /*DC = U+*/0x042C, //CYRILLIC CAPITAL LETTER SOFT SIGN
507 /*DD = U+*/0x042D, //CYRILLIC CAPITAL LETTER E
508 /*DE = U+*/0x042E, //CYRILLIC CAPITAL LETTER YU
509 /*DF = U+*/0x042F, //CYRILLIC CAPITAL LETTER YA
510 /*E0 = U+*/0x0430, //CYRILLIC SMALL LETTER A
511 /*E1 = U+*/0x0431, //CYRILLIC SMALL LETTER BE
512 /*E2 = U+*/0x0432, //CYRILLIC SMALL LETTER VE
513 /*E3 = U+*/0x0433, //CYRILLIC SMALL LETTER GHE
514 /*E4 = U+*/0x0434, //CYRILLIC SMALL LETTER DE
515 /*E5 = U+*/0x0435, //CYRILLIC SMALL LETTER IE
516 /*E6 = U+*/0x0436, //CYRILLIC SMALL LETTER ZHE
517 /*E7 = U+*/0x0437, //CYRILLIC SMALL LETTER ZE
518 /*E8 = U+*/0x0438, //CYRILLIC SMALL LETTER I
519 /*E9 = U+*/0x0439, //CYRILLIC SMALL LETTER SHORT I
520 /*EA = U+*/0x043A, //CYRILLIC SMALL LETTER KA
521 /*EB = U+*/0x043B, //CYRILLIC SMALL LETTER EL
522 /*EC = U+*/0x043C, //CYRILLIC SMALL LETTER EM
523 /*ED = U+*/0x043D, //CYRILLIC SMALL LETTER EN
524 /*EE = U+*/0x043E, //CYRILLIC SMALL LETTER O
525 /*EF = U+*/0x043F, //CYRILLIC SMALL LETTER PE
526 /*F0 = U+*/0x0440, //CYRILLIC SMALL LETTER ER
527 /*F1 = U+*/0x0441, //CYRILLIC SMALL LETTER ES
528 /*F2 = U+*/0x0442, //CYRILLIC SMALL LETTER TE
529 /*F3 = U+*/0x0443, //CYRILLIC SMALL LETTER U
530 /*F4 = U+*/0x0444, //CYRILLIC SMALL LETTER EF
531 /*F5 = U+*/0x0445, //CYRILLIC SMALL LETTER HA
532 /*F6 = U+*/0x0446, //CYRILLIC SMALL LETTER TSE
533 /*F7 = U+*/0x0447, //CYRILLIC SMALL LETTER CHE
534 /*F8 = U+*/0x0448, //CYRILLIC SMALL LETTER SHA
535 /*F9 = U+*/0x0449, //CYRILLIC SMALL LETTER SHCHA
536 /*FA = U+*/0x044A, //CYRILLIC SMALL LETTER HARD SIGN
537 /*FB = U+*/0x044B, //CYRILLIC SMALL LETTER YERU
538 /*FC = U+*/0x044C, //CYRILLIC SMALL LETTER SOFT SIGN
539 /*FD = U+*/0x044D, //CYRILLIC SMALL LETTER E
540 /*FE = U+*/0x044E, //CYRILLIC SMALL LETTER YU
541 /*FF = U+*/0x044F //CYRILLIC SMALL LETTER YA
542 };
543
544 // The following map came from here:
545 // http://www.microsoft.com/globaldev/reference/sbcs/1252.mspx
546 // The <UNMAPPED> entries were added by hand based on the holes in the chart.
547 static wchar_t g_cp1252[256] =
548 {
549 /*00 = U+*/0x0000, //NULL
550 /*01 = U+*/0x0001, //START OF HEADING
551 /*02 = U+*/0x0002, //START OF TEXT
552 /*03 = U+*/0x0003, //END OF TEXT
553 /*04 = U+*/0x0004, //END OF TRANSMISSION
554 /*05 = U+*/0x0005, //ENQUIRY
555 /*06 = U+*/0x0006, //ACKNOWLEDGE
556 /*07 = U+*/0x0007, //BELL
557 /*08 = U+*/0x0008, //BACKSPACE
558 /*09 = U+*/0x0009, //HORIZONTAL TABULATION
559 /*0A = U+*/0x000A, //LINE FEED
560 /*0B = U+*/0x000B, //VERTICAL TABULATION
561 /*0C = U+*/0x000C, //FORM FEED
562 /*0D = U+*/0x000D, //CARRIAGE RETURN
563 /*0E = U+*/0x000E, //SHIFT OUT
564 /*0F = U+*/0x000F, //SHIFT IN
565 /*10 = U+*/0x0010, //DATA LINK ESCAPE
566 /*11 = U+*/0x0011, //DEVICE CONTROL ONE
567 /*12 = U+*/0x0012, //DEVICE CONTROL TWO
568 /*13 = U+*/0x0013, //DEVICE CONTROL THREE
569 /*14 = U+*/0x0014, //DEVICE CONTROL FOUR
570 /*15 = U+*/0x0015, //NEGATIVE ACKNOWLEDGE
571 /*16 = U+*/0x0016, //SYNCHRONOUS IDLE
572 /*17 = U+*/0x0017, //END OF TRANSMISSION BLOCK
573 /*18 = U+*/0x0018, //CANCEL
574 /*19 = U+*/0x0019, //END OF MEDIUM
575 /*1A = U+*/0x001A, //SUBSTITUTE
576 /*1B = U+*/0x001B, //ESCAPE
577 /*1C = U+*/0x001C, //FILE SEPARATOR
578 /*1D = U+*/0x001D, //GROUP SEPARATOR
579 /*1E = U+*/0x001E, //RECORD SEPARATOR
580 /*1F = U+*/0x001F, //UNIT SEPARATOR
581 /*20 = U+*/0x0020, //SPACE
582 /*21 = U+*/0x0021, //EXCLAMATION MARK
583 /*22 = U+*/0x0022, //QUOTATION MARK
584 /*23 = U+*/0x0023, //NUMBER SIGN
585 /*24 = U+*/0x0024, //DOLLAR SIGN
586 /*25 = U+*/0x0025, //PERCENT SIGN
587 /*26 = U+*/0x0026, //AMPERSAND
588 /*27 = U+*/0x0027, //APOSTROPHE
589 /*28 = U+*/0x0028, //LEFT PARENTHESIS
590 /*29 = U+*/0x0029, //RIGHT PARENTHESIS
591 /*2A = U+*/0x002A, //ASTERISK
592 /*2B = U+*/0x002B, //PLUS SIGN
593 /*2C = U+*/0x002C, //COMMA
594 /*2D = U+*/0x002D, //HYPHEN-MINUS
595 /*2E = U+*/0x002E, //FULL STOP
596 /*2F = U+*/0x002F, //SOLIDUS
597 /*30 = U+*/0x0030, //DIGIT ZERO
598 /*31 = U+*/0x0031, //DIGIT ONE
599 /*32 = U+*/0x0032, //DIGIT TWO
600 /*33 = U+*/0x0033, //DIGIT THREE
601 /*34 = U+*/0x0034, //DIGIT FOUR
602 /*35 = U+*/0x0035, //DIGIT FIVE
603 /*36 = U+*/0x0036, //DIGIT SIX
604 /*37 = U+*/0x0037, //DIGIT SEVEN
605 /*38 = U+*/0x0038, //DIGIT EIGHT
606 /*39 = U+*/0x0039, //DIGIT NINE
607 /*3A = U+*/0x003A, //COLON
608 /*3B = U+*/0x003B, //SEMICOLON
609 /*3C = U+*/0x003C, //LESS-THAN SIGN
610 /*3D = U+*/0x003D, //EQUALS SIGN
611 /*3E = U+*/0x003E, //GREATER-THAN SIGN
612 /*3F = U+*/0x003F, //QUESTION MARK
613 /*40 = U+*/0x0040, //COMMERCIAL AT
614 /*41 = U+*/0x0041, //LATIN CAPITAL LETTER A
615 /*42 = U+*/0x0042, //LATIN CAPITAL LETTER B
616 /*43 = U+*/0x0043, //LATIN CAPITAL LETTER C
617 /*44 = U+*/0x0044, //LATIN CAPITAL LETTER D
618 /*45 = U+*/0x0045, //LATIN CAPITAL LETTER E
619 /*46 = U+*/0x0046, //LATIN CAPITAL LETTER F
620 /*47 = U+*/0x0047, //LATIN CAPITAL LETTER G
621 /*48 = U+*/0x0048, //LATIN CAPITAL LETTER H
622 /*49 = U+*/0x0049, //LATIN CAPITAL LETTER I
623 /*4A = U+*/0x004A, //LATIN CAPITAL LETTER J
624 /*4B = U+*/0x004B, //LATIN CAPITAL LETTER K
625 /*4C = U+*/0x004C, //LATIN CAPITAL LETTER L
626 /*4D = U+*/0x004D, //LATIN CAPITAL LETTER M
627 /*4E = U+*/0x004E, //LATIN CAPITAL LETTER N
628 /*4F = U+*/0x004F, //LATIN CAPITAL LETTER O
629 /*50 = U+*/0x0050, //LATIN CAPITAL LETTER P
630 /*51 = U+*/0x0051, //LATIN CAPITAL LETTER Q
631 /*52 = U+*/0x0052, //LATIN CAPITAL LETTER R
632 /*53 = U+*/0x0053, //LATIN CAPITAL LETTER S
633 /*54 = U+*/0x0054, //LATIN CAPITAL LETTER T
634 /*55 = U+*/0x0055, //LATIN CAPITAL LETTER U
635 /*56 = U+*/0x0056, //LATIN CAPITAL LETTER V
636 /*57 = U+*/0x0057, //LATIN CAPITAL LETTER W
637 /*58 = U+*/0x0058, //LATIN CAPITAL LETTER X
638 /*59 = U+*/0x0059, //LATIN CAPITAL LETTER Y
639 /*5A = U+*/0x005A, //LATIN CAPITAL LETTER Z
640 /*5B = U+*/0x005B, //LEFT SQUARE BRACKET
641 /*5C = U+*/0x005C, //REVERSE SOLIDUS
642 /*5D = U+*/0x005D, //RIGHT SQUARE BRACKET
643 /*5E = U+*/0x005E, //CIRCUMFLEX ACCENT
644 /*5F = U+*/0x005F, //LOW LINE
645 /*60 = U+*/0x0060, //GRAVE ACCENT
646 /*61 = U+*/0x0061, //LATIN SMALL LETTER A
647 /*62 = U+*/0x0062, //LATIN SMALL LETTER B
648 /*63 = U+*/0x0063, //LATIN SMALL LETTER C
649 /*64 = U+*/0x0064, //LATIN SMALL LETTER D
650 /*65 = U+*/0x0065, //LATIN SMALL LETTER E
651 /*66 = U+*/0x0066, //LATIN SMALL LETTER F
652 /*67 = U+*/0x0067, //LATIN SMALL LETTER G
653 /*68 = U+*/0x0068, //LATIN SMALL LETTER H
654 /*69 = U+*/0x0069, //LATIN SMALL LETTER I
655 /*6A = U+*/0x006A, //LATIN SMALL LETTER J
656 /*6B = U+*/0x006B, //LATIN SMALL LETTER K
657 /*6C = U+*/0x006C, //LATIN SMALL LETTER L
658 /*6D = U+*/0x006D, //LATIN SMALL LETTER M
659 /*6E = U+*/0x006E, //LATIN SMALL LETTER N
660 /*6F = U+*/0x006F, //LATIN SMALL LETTER O
661 /*70 = U+*/0x0070, //LATIN SMALL LETTER P
662 /*71 = U+*/0x0071, //LATIN SMALL LETTER Q
663 /*72 = U+*/0x0072, //LATIN SMALL LETTER R
664 /*73 = U+*/0x0073, //LATIN SMALL LETTER S
665 /*74 = U+*/0x0074, //LATIN SMALL LETTER T
666 /*75 = U+*/0x0075, //LATIN SMALL LETTER U
667 /*76 = U+*/0x0076, //LATIN SMALL LETTER V
668 /*77 = U+*/0x0077, //LATIN SMALL LETTER W
669 /*78 = U+*/0x0078, //LATIN SMALL LETTER X
670 /*79 = U+*/0x0079, //LATIN SMALL LETTER Y
671 /*7A = U+*/0x007A, //LATIN SMALL LETTER Z
672 /*7B = U+*/0x007B, //LEFT CURLY BRACKET
673 /*7C = U+*/0x007C, //VERTICAL LINE
674 /*7D = U+*/0x007D, //RIGHT CURLY BRACKET
675 /*7E = U+*/0x007E, //TILDE
676 /*7F = U+*/0x007F, //DELETE
677 /*80 = U+*/0x20AC, //EURO SIGN
678 /*81 = U+*/0xFFFF, //<UNMAPPED>
679 /*82 = U+*/0x201A, //SINGLE LOW-9 QUOTATION MARK
680 /*83 = U+*/0x0192, //LATIN SMALL LETTER F WITH HOOK
681 /*84 = U+*/0x201E, //DOUBLE LOW-9 QUOTATION MARK
682 /*85 = U+*/0x2026, //HORIZONTAL ELLIPSIS
683 /*86 = U+*/0x2020, //DAGGER
684 /*87 = U+*/0x2021, //DOUBLE DAGGER
685 /*88 = U+*/0x02C6, //MODIFIER LETTER CIRCUMFLEX ACCENT
686 /*89 = U+*/0x2030, //PER MILLE SIGN
687 /*8A = U+*/0x0160, //LATIN CAPITAL LETTER S WITH CARON
688 /*8B = U+*/0x2039, //SINGLE LEFT-POINTING ANGLE QUOTATION MARK
689 /*8C = U+*/0x0152, //LATIN CAPITAL LIGATURE OE
690 /*8D = U+*/0xFFFF, //<UNMAPPED>
691 /*8E = U+*/0x017D, //LATIN CAPITAL LETTER Z WITH CARON
692 /*8F = U+*/0xFFFF, //<UNMAPPED>
693 /*90 = U+*/0xFFFF, //<UNMAPPED>
694 /*91 = U+*/0x2018, //LEFT SINGLE QUOTATION MARK
695 /*92 = U+*/0x2019, //RIGHT SINGLE QUOTATION MARK
696 /*93 = U+*/0x201C, //LEFT DOUBLE QUOTATION MARK
697 /*94 = U+*/0x201D, //RIGHT DOUBLE QUOTATION MARK
698 /*95 = U+*/0x2022, //BULLET
699 /*96 = U+*/0x2013, //EN DASH
700 /*97 = U+*/0x2014, //EM DASH
701 /*98 = U+*/0x02DC, //SMALL TILDE
702 /*99 = U+*/0x2122, //TRADE MARK SIGN
703 /*9A = U+*/0x0161, //LATIN SMALL LETTER S WITH CARON
704 /*9B = U+*/0x203A, //SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
705 /*9C = U+*/0x0153, //LATIN SMALL LIGATURE OE
706 /*9D = U+*/0xFFFF, //<UNMAPPED>
707 /*9E = U+*/0x017E, //LATIN SMALL LETTER Z WITH CARON
708 /*9F = U+*/0x0178, //LATIN CAPITAL LETTER Y WITH DIAERESIS
709 /*A0 = U+*/0x00A0, //NO-BREAK SPACE
710 /*A1 = U+*/0x00A1, //INVERTED EXCLAMATION MARK
711 /*A2 = U+*/0x00A2, //CENT SIGN
712 /*A3 = U+*/0x00A3, //POUND SIGN
713 /*A4 = U+*/0x00A4, //CURRENCY SIGN
714 /*A5 = U+*/0x00A5, //YEN SIGN
715 /*A6 = U+*/0x00A6, //BROKEN BAR
716 /*A7 = U+*/0x00A7, //SECTION SIGN
717 /*A8 = U+*/0x00A8, //DIAERESIS
718 /*A9 = U+*/0x00A9, //COPYRIGHT SIGN
719 /*AA = U+*/0x00AA, //FEMININE ORDINAL INDICATOR
720 /*AB = U+*/0x00AB, //LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
721 /*AC = U+*/0x00AC, //NOT SIGN
722 /*AD = U+*/0x00AD, //SOFT HYPHEN
723 /*AE = U+*/0x00AE, //REGISTERED SIGN
724 /*AF = U+*/0x00AF, //MACRON
725 /*B0 = U+*/0x00B0, //DEGREE SIGN
726 /*B1 = U+*/0x00B1, //PLUS-MINUS SIGN
727 /*B2 = U+*/0x00B2, //SUPERSCRIPT TWO
728 /*B3 = U+*/0x00B3, //SUPERSCRIPT THREE
729 /*B4 = U+*/0x00B4, //ACUTE ACCENT
730 /*B5 = U+*/0x00B5, //MICRO SIGN
731 /*B6 = U+*/0x00B6, //PILCROW SIGN
732 /*B7 = U+*/0x00B7, //MIDDLE DOT
733 /*B8 = U+*/0x00B8, //CEDILLA
734 /*B9 = U+*/0x00B9, //SUPERSCRIPT ONE
735 /*BA = U+*/0x00BA, //MASCULINE ORDINAL INDICATOR
736 /*BB = U+*/0x00BB, //RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
737 /*BC = U+*/0x00BC, //VULGAR FRACTION ONE QUARTER
738 /*BD = U+*/0x00BD, //VULGAR FRACTION ONE HALF
739 /*BE = U+*/0x00BE, //VULGAR FRACTION THREE QUARTERS
740 /*BF = U+*/0x00BF, //INVERTED QUESTION MARK
741 /*C0 = U+*/0x00C0, //LATIN CAPITAL LETTER A WITH GRAVE
742 /*C1 = U+*/0x00C1, //LATIN CAPITAL LETTER A WITH ACUTE
743 /*C2 = U+*/0x00C2, //LATIN CAPITAL LETTER A WITH CIRCUMFLEX
744 /*C3 = U+*/0x00C3, //LATIN CAPITAL LETTER A WITH TILDE
745 /*C4 = U+*/0x00C4, //LATIN CAPITAL LETTER A WITH DIAERESIS
746 /*C5 = U+*/0x00C5, //LATIN CAPITAL LETTER A WITH RING ABOVE
747 /*C6 = U+*/0x00C6, //LATIN CAPITAL LETTER AE
748 /*C7 = U+*/0x00C7, //LATIN CAPITAL LETTER C WITH CEDILLA
749 /*C8 = U+*/0x00C8, //LATIN CAPITAL LETTER E WITH GRAVE
750 /*C9 = U+*/0x00C9, //LATIN CAPITAL LETTER E WITH ACUTE
751 /*CA = U+*/0x00CA, //LATIN CAPITAL LETTER E WITH CIRCUMFLEX
752 /*CB = U+*/0x00CB, //LATIN CAPITAL LETTER E WITH DIAERESIS
753 /*CC = U+*/0x00CC, //LATIN CAPITAL LETTER I WITH GRAVE
754 /*CD = U+*/0x00CD, //LATIN CAPITAL LETTER I WITH ACUTE
755 /*CE = U+*/0x00CE, //LATIN CAPITAL LETTER I WITH CIRCUMFLEX
756 /*CF = U+*/0x00CF, //LATIN CAPITAL LETTER I WITH DIAERESIS
757 /*D0 = U+*/0x00D0, //LATIN CAPITAL LETTER ETH
758 /*D1 = U+*/0x00D1, //LATIN CAPITAL LETTER N WITH TILDE
759 /*D2 = U+*/0x00D2, //LATIN CAPITAL LETTER O WITH GRAVE
760 /*D3 = U+*/0x00D3, //LATIN CAPITAL LETTER O WITH ACUTE
761 /*D4 = U+*/0x00D4, //LATIN CAPITAL LETTER O WITH CIRCUMFLEX
762 /*D5 = U+*/0x00D5, //LATIN CAPITAL LETTER O WITH TILDE
763 /*D6 = U+*/0x00D6, //LATIN CAPITAL LETTER O WITH DIAERESIS
764 /*D7 = U+*/0x00D7, //MULTIPLICATION SIGN
765 /*D8 = U+*/0x00D8, //LATIN CAPITAL LETTER O WITH STROKE
766 /*D9 = U+*/0x00D9, //LATIN CAPITAL LETTER U WITH GRAVE
767 /*DA = U+*/0x00DA, //LATIN CAPITAL LETTER U WITH ACUTE
768 /*DB = U+*/0x00DB, //LATIN CAPITAL LETTER U WITH CIRCUMFLEX
769 /*DC = U+*/0x00DC, //LATIN CAPITAL LETTER U WITH DIAERESIS
770 /*DD = U+*/0x00DD, //LATIN CAPITAL LETTER Y WITH ACUTE
771 /*DE = U+*/0x00DE, //LATIN CAPITAL LETTER THORN
772 /*DF = U+*/0x00DF, //LATIN SMALL LETTER SHARP S
773 /*E0 = U+*/0x00E0, //LATIN SMALL LETTER A WITH GRAVE
774 /*E1 = U+*/0x00E1, //LATIN SMALL LETTER A WITH ACUTE
775 /*E2 = U+*/0x00E2, //LATIN SMALL LETTER A WITH CIRCUMFLEX
776 /*E3 = U+*/0x00E3, //LATIN SMALL LETTER A WITH TILDE
777 /*E4 = U+*/0x00E4, //LATIN SMALL LETTER A WITH DIAERESIS
778 /*E5 = U+*/0x00E5, //LATIN SMALL LETTER A WITH RING ABOVE
779 /*E6 = U+*/0x00E6, //LATIN SMALL LETTER AE
780 /*E7 = U+*/0x00E7, //LATIN SMALL LETTER C WITH CEDILLA
781 /*E8 = U+*/0x00E8, //LATIN SMALL LETTER E WITH GRAVE
782 /*E9 = U+*/0x00E9, //LATIN SMALL LETTER E WITH ACUTE
783 /*EA = U+*/0x00EA, //LATIN SMALL LETTER E WITH CIRCUMFLEX
784 /*EB = U+*/0x00EB, //LATIN SMALL LETTER E WITH DIAERESIS
785 /*EC = U+*/0x00EC, //LATIN SMALL LETTER I WITH GRAVE
786 /*ED = U+*/0x00ED, //LATIN SMALL LETTER I WITH ACUTE
787 /*EE = U+*/0x00EE, //LATIN SMALL LETTER I WITH CIRCUMFLEX
788 /*EF = U+*/0x00EF, //LATIN SMALL LETTER I WITH DIAERESIS
789 /*F0 = U+*/0x00F0, //LATIN SMALL LETTER ETH
790 /*F1 = U+*/0x00F1, //LATIN SMALL LETTER N WITH TILDE
791 /*F2 = U+*/0x00F2, //LATIN SMALL LETTER O WITH GRAVE
792 /*F3 = U+*/0x00F3, //LATIN SMALL LETTER O WITH ACUTE
793 /*F4 = U+*/0x00F4, //LATIN SMALL LETTER O WITH CIRCUMFLEX
794 /*F5 = U+*/0x00F5, //LATIN SMALL LETTER O WITH TILDE
795 /*F6 = U+*/0x00F6, //LATIN SMALL LETTER O WITH DIAERESIS
796 /*F7 = U+*/0x00F7, //DIVISION SIGN
797 /*F8 = U+*/0x00F8, //LATIN SMALL LETTER O WITH STROKE
798 /*F9 = U+*/0x00F9, //LATIN SMALL LETTER U WITH GRAVE
799 /*FA = U+*/0x00FA, //LATIN SMALL LETTER U WITH ACUTE
800 /*FB = U+*/0x00FB, //LATIN SMALL LETTER U WITH CIRCUMFLEX
801 /*FC = U+*/0x00FC, //LATIN SMALL LETTER U WITH DIAERESIS
802 /*FD = U+*/0x00FD, //LATIN SMALL LETTER Y WITH ACUTE
803 /*FE = U+*/0x00FE, //LATIN SMALL LETTER THORN
804 /*FF = U+*/0x00FF //LATIN SMALL LETTER Y WITH DIAERESIS
805 };
806
807 class TCStringObject : public TCObject
808 {
809 public:
TCStringObject(void)810 TCStringObject(void) : string(NULL) {}
TCStringObject(const char * string)811 TCStringObject(const char *string) : string(copyString(string)) {}
setString(const char * value)812 void setString(const char *value)
813 {
814 if (string != value)
815 {
816 delete[] string;
817 string = copyString(value);
818 }
819 }
getString(void)820 const char *getString(void) { return string; }
821 protected:
822 virtual ~TCStringObject(void);
dealloc(void)823 virtual void dealloc(void)
824 {
825 delete[] string;
826 TCObject::dealloc();
827 }
828
829 char *string;
830 };
831
832 // I got a compiler warning about this not being inlined when it was in the
833 // class definition, so I pulled it out.
~TCStringObject(void)834 TCStringObject::~TCStringObject(void)
835 {
836 }
837
838 TCLocalStrings *TCLocalStrings::currentLocalStrings = NULL;
839 TCLocalStrings::TCLocalStringsCleanup TCLocalStrings::localStringsCleanup;
840 IntWCharMap TCLocalStrings::sm_codePages;
841
TCLocalStringsCleanup(void)842 TCLocalStrings::TCLocalStringsCleanup::TCLocalStringsCleanup(void)
843 {
844 TCLocalStrings::initCodePages();
845 }
846
~TCLocalStringsCleanup(void)847 TCLocalStrings::TCLocalStringsCleanup::~TCLocalStringsCleanup(void)
848 {
849 if (currentLocalStrings)
850 {
851 currentLocalStrings->release();
852 }
853 }
854
855 // Note: Code Page 1252 is Windows Latin I, which is the default.
TCLocalStrings(void)856 TCLocalStrings::TCLocalStrings(void):
857 #if !defined(WIN32) && !defined(__APPLE__) && !defined(_OSMESA)
858 m_textCodec(NULL),
859 #endif // WIN32
860 m_codePage(UTF8_CODE_PAGE)
861 {
862 stringDict = new TCDictionary;
863 sm_codePages[1250] = g_cp1250;
864 sm_codePages[1251] = g_cp1251;
865 sm_codePages[1252] = g_cp1252;
866 }
867
~TCLocalStrings(void)868 TCLocalStrings::~TCLocalStrings(void)
869 {
870 }
871
dealloc(void)872 void TCLocalStrings::dealloc(void)
873 {
874 TCObject::release(stringDict);
875 TCObject::dealloc();
876 }
877
initCodePages(void)878 void TCLocalStrings::initCodePages(void)
879 {
880 }
881
setStringTable(const char * stringTable,bool replace)882 bool TCLocalStrings::setStringTable(const char *stringTable, bool replace)
883 {
884 return getCurrentLocalStrings()->instSetStringTable(stringTable, replace);
885 }
886
setStringTable(const wchar_t * stringTable,bool replace)887 bool TCLocalStrings::setStringTable(const wchar_t *stringTable, bool replace)
888 {
889 return getCurrentLocalStrings()->instSetStringTable(stringTable, replace);
890 }
891
getCodePage(void)892 int TCLocalStrings::getCodePage(void)
893 {
894 return getCurrentLocalStrings()->instGetCodePage();
895 }
896
897 //#if !defined(WIN32) && !defined(__APPLE__) && !defined(_OSMESA)
898 //const QString &TCLocalStrings::get(const char *key)
899 //#else // WIN32
get(const char * key)900 const char *TCLocalStrings::get(const char *key)
901 //#endif // WIN32
902 {
903 return getCurrentLocalStrings()->instGetLocalString(key);
904 }
905
getUtf8(const char * key)906 const char *TCLocalStrings::getUtf8(const char *key)
907 {
908 return getCurrentLocalStrings()->instGetUtf8LocalString(key);
909 }
910
get(const wchar_t * key)911 const wchar_t *TCLocalStrings::get(const wchar_t *key)
912 {
913 return getCurrentLocalStrings()->instGetLocalString(key);
914 }
915
setStringTable(const TCByte * data,int tableSize,bool replace)916 bool TCLocalStrings::setStringTable(
917 const TCByte *data,
918 int tableSize,
919 bool replace /*= true*/)
920 {
921 bool retValue = false;
922 bool bUnicode16 = false;
923 bool bBigEndian = true;
924 int offset = 0;
925
926 if (tableSize >= 2 && data[0] == 0xFF && data[1] == 0xFE)
927 {
928 // Little Endian Unicode
929 bUnicode16 = true;
930 bBigEndian = false;
931 }
932 else if (tableSize >= 2 && data[0] == 0xFE && data[1] == 0xFF)
933 {
934 // Big Endian Unicode
935 bUnicode16 = true;
936 }
937 else if (tableSize >= 3 && data[0] == 0xEF && data[1] == 0xBB &&
938 data[2] == 0xBF)
939 {
940 offset = 3;
941 }
942 if (bUnicode16)
943 {
944 std::wstring wstringTable;
945 int i;
946 int count = tableSize / 2;
947
948 wstringTable.reserve(count + 1);
949 // Note: skip first 2 bytes, which are the Byte Order Mark.
950 for (i = 2; i < tableSize; i += 2)
951 {
952 int uByte;
953 int lByte;
954
955 if (bBigEndian)
956 {
957 uByte = data[i];
958 lByte = data[i + 1];
959 }
960 else
961 {
962 uByte = data[i + 1];
963 lByte = data[i];
964 }
965 wchar_t wc = (wchar_t)((uByte << 8) | lByte);
966 wstringTable.append(&wc, 1);
967 }
968 // wstringTable now contains the string table.
969 #ifdef NO_WSTRING
970 retValue = setStringTable(L"", replace);
971 #else // NO_WSTRING
972 retValue = setStringTable(wstringTable.c_str(), replace);
973 #endif // NO_WSTRING
974 }
975 else
976 {
977 char *stringTable = new char[tableSize - offset + 1];
978 memcpy(stringTable, &data[offset], tableSize - offset);
979
980 // Null terminate the string table
981 stringTable[tableSize - offset] = 0;
982 retValue = setStringTable(stringTable, replace);
983 delete[] stringTable;
984 }
985 return retValue;
986 }
987
loadStringTable(const char * filename,bool replace)988 bool TCLocalStrings::loadStringTable(const char *filename, bool replace)
989 {
990 FILE *tableFile = ucfopen(filename, "rb");
991 bool retValue = false;
992
993 if (tableFile)
994 {
995 long fileSize;
996 TCByte *fileData;
997
998 fseek(tableFile, 0, SEEK_END);
999 fileSize = ftell(tableFile);
1000 fseek(tableFile, 0, SEEK_SET);
1001 fileData = new TCByte[fileSize];
1002 if (fread(fileData, 1, fileSize, tableFile) == (unsigned)fileSize)
1003 {
1004 retValue = setStringTable(fileData, (int)fileSize, replace);
1005 }
1006 delete[] fileData;
1007 fclose(tableFile);
1008 }
1009 return retValue;
1010 }
1011
getCurrentLocalStrings(void)1012 TCLocalStrings *TCLocalStrings::getCurrentLocalStrings(void)
1013 {
1014 if (!currentLocalStrings)
1015 {
1016 currentLocalStrings = new TCLocalStrings;
1017 }
1018 return currentLocalStrings;
1019 }
1020
dumpTable(const char * filename,const char * header)1021 void TCLocalStrings::dumpTable(const char *filename, const char *header)
1022 {
1023 getCurrentLocalStrings()->instDumpTable(filename, header);
1024 }
1025
instDumpTable(const char * filename,const char * header)1026 void TCLocalStrings::instDumpTable(const char *filename, const char *header)
1027 {
1028 FILE *file = ucfopen(filename, "w");
1029
1030 if (file)
1031 {
1032 TCSortedStringArray *keys = stringDict->allKeys();
1033 int i;
1034 int count = keys->getCount();
1035
1036 if (header)
1037 {
1038 fprintf(file, "%s\n", header);
1039 }
1040 for (i = 0; i < count; i++)
1041 {
1042 const char *key = keys->stringAtIndex(i);
1043 const char *value = ((TCStringObject *)stringDict->objectForKey(key))->getString();
1044
1045 fprintf(file, "%s = %s\n", key, value);
1046 }
1047 for (WStringWStringMap::iterator it = m_strings.begin(); it != m_strings.end(); ++it)
1048 {
1049 #ifndef NO_WSTRING
1050 fprintf(file, "%S = %S\n", it->first.c_str(), it->second.c_str());
1051 #endif // NO_WSTRING
1052 }
1053 fclose(file);
1054 }
1055 }
1056
clear(void)1057 void TCLocalStrings::clear(void)
1058 {
1059 stringDict->removeAll();
1060 m_strings.clear();
1061 m_utf8Strings.clear();
1062 }
1063
instSetStringTable(const char * stringTable,bool replace)1064 bool TCLocalStrings::instSetStringTable(const char *stringTable, bool replace)
1065 {
1066 bool sectionFound = false;
1067 int lastKeyIndex = -1;
1068 std::string lastKey;
1069
1070 if (replace)
1071 {
1072 clear();
1073 }
1074 while (1)
1075 {
1076 const char *eol = strchr(stringTable, '\n');
1077
1078 if (!eol && strlen(stringTable) > 0)
1079 {
1080 eol = stringTable + strlen(stringTable);
1081 }
1082 if (eol)
1083 {
1084 int len = (int)(eol - stringTable);
1085 char *line = new char[len + 1];
1086
1087 strncpy(line, stringTable, len);
1088 line[len] = 0;
1089 stripCRLF(line);
1090 stripLeadingWhitespace(line);
1091 if (!sectionFound)
1092 {
1093 // We haven't found the [StringTable] section yet
1094 stripTrailingWhitespace(line);
1095 if (stringHasCaseInsensitivePrefix(line, "[StringTable") &&
1096 stringHasSuffix(line, "]"))
1097 {
1098 const char *codePageString = strcasestr(line, "CP=");
1099
1100 sectionFound = true;
1101 if (codePageString)
1102 {
1103 int codePage;
1104
1105 if (sscanf(&codePageString[3], "%d", &codePage) == 1)
1106 {
1107 instSetCodePage(codePage);
1108 }
1109 }
1110 }
1111 // Note that we are ignoring all lines until we find the section
1112 }
1113 else
1114 {
1115 // We're in the [StringTable] section
1116 if (line[0] == '[' && strchr(line, ']'))
1117 {
1118 if (!stringHasCaseInsensitivePrefix(line, "[StringTable") ||
1119 !stringHasSuffix(line, "]"))
1120 {
1121 // We found another section header, which means we are
1122 // at the end of the [StringTable] section, so we're
1123 // done. Note that if we see another [StringTable]
1124 // section, we'll just ignore that and continue on.
1125 break;
1126 }
1127 }
1128 else if (line[0] != ';')
1129 {
1130 // Comment lines begin with ;
1131 char *equalSpot = strchr(line, '=');
1132
1133 if (equalSpot)
1134 {
1135 char *value;
1136 char *key = line;
1137 TCStringObject *stringObject;
1138 int keyLen;
1139
1140 *equalSpot = 0;
1141 stripTrailingWhitespace(key);
1142 keyLen = (int)strlen(key);
1143 if (keyLen)
1144 {
1145 bool appended = false;
1146 std::wstring wkey;
1147 std::wstring wvalue;
1148
1149 mbstowstring(wkey, key, keyLen);
1150 value = copyString(equalSpot + 1);
1151 processEscapedString(value);
1152 mbstowstring(wvalue, value);
1153 // value = stringByReplacingSubstring(equalSpot + 1,
1154 // "\\n", "\n");
1155 if (isdigit(key[keyLen - 1]))
1156 {
1157 int keyIndex;
1158
1159 // If the last character of the key is a digit,
1160 // then it must be a multi-line key. So strip
1161 // off all trailing digits, and append to any
1162 // existing value. Note that keys aren't
1163 // allowed to end in a digit, so even if there
1164 // is only one line, the key still gets the
1165 // number stripped off the end.
1166 while (isdigit(key[keyLen - 1]) && keyLen > 0)
1167 {
1168 keyLen--;
1169 }
1170 keyIndex = atoi(&key[keyLen]);
1171 key[keyLen] = 0;
1172 if (lastKey != key)
1173 {
1174 lastKeyIndex = 0;
1175 }
1176 if (lastKey == key &&
1177 lastKeyIndex + 1 != keyIndex)
1178 {
1179 debugPrintf(
1180 "Key index out of sequence: %s%d\n",
1181 key, keyIndex);
1182 }
1183 lastKeyIndex = keyIndex;
1184 lastKey = key;
1185 mbstowstring(wkey, key, keyLen);
1186 stringObject = (TCStringObject*)stringDict->
1187 objectForKey(key);
1188 if (stringObject)
1189 {
1190 // If we've already got data for this key,
1191 // we need to append to it and note that we
1192 // did so.
1193 char *newValue = new char[strlen(value) +
1194 strlen(stringObject->getString()) + 1];
1195
1196 strcpy(newValue, stringObject->getString());
1197 strcat(newValue, value);
1198 // Note that we don't have to update the
1199 // dict; we're simply updating the text in
1200 // the string object already there.
1201 stringObject->setString(newValue);
1202 delete[] newValue;
1203 appended = true;
1204 // wstring copy constructor broken in VC++
1205 // 2005?!?!? The below doesn't work without
1206 // the .c_str() calls.
1207 #ifndef NO_WSTRING
1208 m_strings[wkey.c_str()] += wvalue.c_str();
1209 #endif // NO_WSTRING
1210 }
1211 }
1212 if (!appended)
1213 {
1214 if (stringDict->objectForKey(line))
1215 {
1216 debugPrintf("Local String key \"%s\" "
1217 "defined multiple times.\n", line);
1218 }
1219 stringObject = new TCStringObject(value);
1220 stringDict->setObjectForKey(stringObject, line);
1221 stringObject->release();
1222 // wstring copy constructor broken in VC++
1223 // 2005?!?!? The below doesn't work without the
1224 // .c_str() calls.
1225 #ifndef NO_WSTRING
1226 m_strings[wkey.c_str()] = wvalue.c_str();
1227 #endif // NO_WSTRING
1228 }
1229 delete[] value;
1230 }
1231 }
1232 }
1233 }
1234 delete[] line;
1235 if (!eol[0])
1236 {
1237 // If there isn't an EOL at the end of the file, we're done now.
1238 break;
1239 }
1240 stringTable += len + 1;
1241 while (stringTable[0] == '\r' || stringTable[0] == '\n')
1242 {
1243 stringTable++;
1244 }
1245 }
1246 else
1247 {
1248 break;
1249 }
1250 }
1251 #if !defined(WIN32) && !defined(__APPLE__) && !defined(_OSMESA)
1252 //buildQStringMap();
1253 #endif // WIN32
1254 // Note that the load is considered a success if the [StringTable] section
1255 // is found in the data.
1256 return sectionFound;
1257 }
1258
instSetCodePage(int codePage)1259 void TCLocalStrings::instSetCodePage(int codePage)
1260 {
1261 m_codePage = codePage;
1262 // if (m_codePage == 1250)
1263 // {
1264 // return;
1265 // }
1266 #if !defined(WIN32) && !defined(__APPLE__) && !defined(_OSMESA)
1267 QString name;
1268
1269 name = QString("CP%1").arg(codePage);
1270 m_textCodec =
1271 QTextCodec::codecForName((const char *)name.toLatin1().constData());
1272 #endif // WIN32
1273 }
1274
instSetStringTable(const wchar_t * stringTable,bool replace)1275 bool TCLocalStrings::instSetStringTable(const wchar_t *stringTable,
1276 bool replace)
1277 {
1278 bool sectionFound = false;
1279 int lastKeyIndex = -1;
1280 std::wstring lastKey;
1281
1282 if (replace)
1283 {
1284 clear();
1285 }
1286 while (1)
1287 {
1288 const wchar_t *eol = wcschr(stringTable, '\n');
1289
1290 if (!eol && wcslen(stringTable) > 0)
1291 {
1292 eol = stringTable + wcslen(stringTable);
1293 }
1294 if (eol)
1295 {
1296 int len = (int)(eol - stringTable);
1297 wchar_t *line = new wchar_t[len + 1];
1298
1299 wcsncpy(line, stringTable, len);
1300 line[len] = 0;
1301 stripCRLF(line);
1302 stripLeadingWhitespace(line);
1303 if (!sectionFound)
1304 {
1305 // We haven't found the [StringTable] section yet
1306 stripTrailingWhitespace(line);
1307 if (stringHasCaseInsensitivePrefix(line, L"[StringTable") &&
1308 stringHasSuffix(line, L"]"))
1309 {
1310 sectionFound = true;
1311 }
1312 // Note that we are ignoring all lines until we find the section
1313 }
1314 else
1315 {
1316 // We're in the [StringTable] section
1317 if (line[0] == '[' && wcschr(line, ']'))
1318 {
1319 // We found another section header, which means we are at
1320 // the end of the [StringTable] section, so we're done
1321 break;
1322 }
1323 else if (line[0] != ';')
1324 {
1325 // Comment lines begin with ;
1326 wchar_t *equalSpot = wcschr(line, '=');
1327
1328 if (equalSpot)
1329 {
1330 wchar_t *value;
1331 wchar_t *key = line;
1332 TCStringObject *stringObject;
1333 int keyLen;
1334
1335 *equalSpot = 0;
1336 stripTrailingWhitespace(key);
1337 keyLen = (int)wcslen(key);
1338 if (keyLen)
1339 {
1340 bool appended = false;
1341 std::string skey;
1342 std::string svalue;
1343
1344 wcstostring(skey, key, keyLen);
1345 value = copyString(equalSpot + 1);
1346 processEscapedString(value);
1347 wcstostring(svalue, value);
1348 // value = stringByReplacingSubstring(equalSpot + 1,
1349 // "\\n", "\n");
1350 if (isdigit(key[keyLen - 1]))
1351 {
1352 int keyIndex;
1353
1354 // If the last character of the key is a digit,
1355 // then it must be a multi-line key. So strip
1356 // off all trailing digits, and append to any
1357 // existing value. Note that keys aren't
1358 // allowed to end in a digit, so even if there
1359 // is only one line, the key still gets the
1360 // number stripped off the end.
1361 while (isdigit(key[keyLen - 1]) && keyLen > 0)
1362 {
1363 keyLen--;
1364 }
1365 keyIndex = (int)wcstoul(&key[keyLen], NULL, 10);
1366 key[keyLen] = 0;
1367 if (lastKey != key)
1368 {
1369 lastKeyIndex = 0;
1370 }
1371 if (lastKey == key &&
1372 lastKeyIndex + 1 != keyIndex)
1373 {
1374 debugPrintf(
1375 "Key index out of sequence: %s%d\n",
1376 key, keyIndex);
1377 }
1378 lastKeyIndex = keyIndex;
1379 lastKey = key;
1380 wcstostring(skey, key, keyLen);
1381 stringObject = (TCStringObject*)stringDict->
1382 objectForKey(skey.c_str());
1383 if (stringObject)
1384 {
1385 // If we've already got data for this key,
1386 // we need to append to it and note that we
1387 // did so.
1388 char *newValue = new char[svalue.size() +
1389 strlen(stringObject->getString()) + 1];
1390
1391 strcpy(newValue, stringObject->getString());
1392 strcat(newValue, svalue.c_str());
1393 // Note that we don't have to update the
1394 // dict; we're simply updating the text in
1395 // the string object already there.
1396 stringObject->setString(newValue);
1397 delete[] newValue;
1398 appended = true;
1399 m_strings[key] += value;
1400 }
1401 }
1402 if (!appended)
1403 {
1404 if (stringDict->objectForKey(skey.c_str()))
1405 {
1406 debugPrintf("Local String key \"%s\" "
1407 "defined multiple times.\n",
1408 skey.c_str());
1409 }
1410 stringObject =
1411 new TCStringObject(svalue.c_str());
1412 stringDict->setObjectForKey(stringObject,
1413 skey.c_str());
1414 stringObject->release();
1415 m_strings[key] = value;
1416 }
1417 delete[] value;
1418 }
1419 }
1420 }
1421 }
1422 delete[] line;
1423 if (!eol[0])
1424 {
1425 // If there isn't an EOL at the end of the file, we're done now.
1426 break;
1427 }
1428 stringTable += len + 1;
1429 while (stringTable[0] == '\r' || stringTable[0] == '\n')
1430 {
1431 stringTable++;
1432 }
1433 }
1434 else
1435 {
1436 break;
1437 }
1438 }
1439 #if !defined(WIN32) && !defined(__APPLE__) && !defined(_OSMESA)
1440 //buildQStringMap();
1441 #endif // WIN32
1442 // Note that the load is considered a success if the [StringTable] section
1443 // is found in the data.
1444 return sectionFound;
1445 }
1446
instGetLocalString(const wchar_t * key)1447 const wchar_t *TCLocalStrings::instGetLocalString(const wchar_t *key)
1448 {
1449 WStringWStringMap::const_iterator it = m_strings.find(key);
1450
1451 if (it != m_strings.end())
1452 {
1453 #ifdef NO_WSTRING
1454 return L"";
1455 #else // NO_WSTRING
1456 return it->second.c_str();
1457 #endif // NO_WSTRING
1458 }
1459 else
1460 {
1461 std::string temp;
1462
1463 wstringtostring(temp, key);
1464 debugPrintf("LocalString %s not found!!!!!!\n", temp.c_str());
1465 // It should really be NULL, but that means a mistake will likely cause
1466 // a crash. At least with an empty string it's less likely to crash.
1467 return L"";
1468 }
1469 }
1470
mbstowstring(std::wstring & dst,const char * src,int length)1471 void TCLocalStrings::mbstowstring(std::wstring &dst, const char *src,
1472 int length /*= -1*/)
1473 {
1474 if (m_codePage == UTF8_CODE_PAGE)
1475 {
1476 utf8towstring(dst, src);
1477 return;
1478 }
1479 wchar_t *codePageTable = NULL;
1480 IntWCharMap::const_iterator it = sm_codePages.find(m_codePage);
1481
1482 if (it != sm_codePages.end())
1483 {
1484 codePageTable = it->second;
1485 }
1486 if (codePageTable)
1487 {
1488 int i;
1489
1490 if (length == -1)
1491 {
1492 length = (int)strlen(src);
1493 }
1494 dst.resize(length);
1495 for (i = 0; i < length; i++)
1496 {
1497 dst[i] = codePageTable[(TCByte)src[i]];
1498 }
1499 }
1500 #if !defined(WIN32) && !defined(__APPLE__) && !defined(_OSMESA)
1501 else if (m_textCodec)
1502 {
1503 QString unicodeString = m_textCodec->toUnicode(src);
1504 dst.clear();
1505 dst.resize(unicodeString.length());
1506 for (int i = 0; i < (int)unicodeString.length(); i++)
1507 {
1508 QChar qchar = unicodeString.at(i);
1509
1510 dst[i] = (wchar_t)qchar.unicode();
1511 }
1512 }
1513 #endif // WIN32
1514 else
1515 {
1516 ::mbstowstring(dst, src, length);
1517 }
1518 }
1519
1520 /*
1521 #if !defined(WIN32) && !defined(__APPLE__) && !defined(_OSMESA)
1522 #include <QT/misc.h>
1523 const QString &TCLocalStrings::instGetLocalString(const char *key)
1524 {
1525 QStringQStringMap::iterator it = m_qStrings.find(key);
1526
1527 if (it != m_qStrings.end())
1528 {
1529 return it->second;
1530 }
1531 else
1532 {
1533 debugPrintf("LocalString %s not found!!!!!!\n", key);
1534 return m_emptyQString;
1535 }
1536 }
1537
1538 void TCLocalStrings::buildQStringMap(void)
1539 {
1540 m_qStrings.clear();
1541 for (WStringWStringMap::iterator it = m_strings.begin();
1542 it != m_strings.end(); ++it)
1543 {
1544 QString key;
1545 QString value;
1546
1547 wstringtoqstring(key, it->first);
1548 wstringtoqstring(value, it->second);
1549 m_qStrings[key] = value;
1550 }
1551 }
1552
1553 #else // WIN32
1554 */
instGetLocalString(const char * key)1555 const char *TCLocalStrings::instGetLocalString(const char *key)
1556 {
1557 TCStringObject *stringObject =
1558 (TCStringObject*)stringDict->objectForKey(key);
1559
1560 if (stringObject)
1561 {
1562 return stringObject->getString();
1563 }
1564 else
1565 {
1566 debugPrintf("LocalString %s not found!!!!!!\n", key);
1567 // It should really be NULL, but that means a mistake will likely cause
1568 // a crash. At least with an empty string it's less likely to crash.
1569 return "";
1570 }
1571 }
1572 //#endif // WIN32
1573
instGetUtf8LocalString(const char * key)1574 const char *TCLocalStrings::instGetUtf8LocalString(const char *key)
1575 {
1576 StringStringMap::const_iterator it = m_utf8Strings.find(key);
1577
1578 if (it != m_utf8Strings.end())
1579 {
1580 return it->second.c_str();
1581 }
1582 else
1583 {
1584 #ifdef TC_NO_UNICODE
1585 m_utf8Strings[key] = (const char *)instGetLocalString(key);
1586 #else // TC_NO_UNICODE
1587 std::wstring wKey;
1588 const wchar_t *wValue;
1589 std::string utf8Value;
1590
1591 mbstowstring(wKey, key);
1592 wValue = instGetLocalString(wKey.c_str());
1593 if (ucstringtoutf8(utf8Value, wValue))
1594 {
1595 m_utf8Strings[key] = utf8Value;
1596 }
1597 else
1598 {
1599 m_utf8Strings[key] = instGetLocalString(key);
1600 // m_utf8Strings[key] =
1601 // (const char *)instGetLocalString(key).toLatin1().constData();
1602 }
1603 #endif // TC_NO_UNICODE
1604 return m_utf8Strings[key].c_str();
1605 }
1606 }
1607