1 #include "TCLocalStrings.h"
2 #include "TCDictionary.h"
3 #include "TCSortedStringArray.h"
4 #include "mystring.h"
5 
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <ctype.h>
9 
10 #define UTF8_CODE_PAGE 65001
11 
12 #ifdef WIN32
13 #if defined(_MSC_VER) && _MSC_VER >= 1400 && defined(_DEBUG)
14 #define new DEBUG_CLIENTBLOCK
15 #endif // _DEBUG
16 #endif // WIN32
17 
18 // The following map came from here:
19 // http://www.microsoft.com/globaldev/reference/sbcs/1250.mspx
20 // The <UNMAPPED> entries were added by hand based on the holes in the chart.
21 static wchar_t g_cp1250[256] =
22 {
23 	/*00 = U+*/0x0000, //NULL
24 	/*01 = U+*/0x0001, //START OF HEADING
25 	/*02 = U+*/0x0002, //START OF TEXT
26 	/*03 = U+*/0x0003, //END OF TEXT
27 	/*04 = U+*/0x0004, //END OF TRANSMISSION
28 	/*05 = U+*/0x0005, //ENQUIRY
29 	/*06 = U+*/0x0006, //ACKNOWLEDGE
30 	/*07 = U+*/0x0007, //BELL
31 	/*08 = U+*/0x0008, //BACKSPACE
32 	/*09 = U+*/0x0009, //HORIZONTAL TABULATION
33 	/*0A = U+*/0x000A, //LINE FEED
34 	/*0B = U+*/0x000B, //VERTICAL TABULATION
35 	/*0C = U+*/0x000C, //FORM FEED
36 	/*0D = U+*/0x000D, //CARRIAGE RETURN
37 	/*0E = U+*/0x000E, //SHIFT OUT
38 	/*0F = U+*/0x000F, //SHIFT IN
39 	/*10 = U+*/0x0010, //DATA LINK ESCAPE
40 	/*11 = U+*/0x0011, //DEVICE CONTROL ONE
41 	/*12 = U+*/0x0012, //DEVICE CONTROL TWO
42 	/*13 = U+*/0x0013, //DEVICE CONTROL THREE
43 	/*14 = U+*/0x0014, //DEVICE CONTROL FOUR
44 	/*15 = U+*/0x0015, //NEGATIVE ACKNOWLEDGE
45 	/*16 = U+*/0x0016, //SYNCHRONOUS IDLE
46 	/*17 = U+*/0x0017, //END OF TRANSMISSION BLOCK
47 	/*18 = U+*/0x0018, //CANCEL
48 	/*19 = U+*/0x0019, //END OF MEDIUM
49 	/*1A = U+*/0x001A, //SUBSTITUTE
50 	/*1B = U+*/0x001B, //ESCAPE
51 	/*1C = U+*/0x001C, //FILE SEPARATOR
52 	/*1D = U+*/0x001D, //GROUP SEPARATOR
53 	/*1E = U+*/0x001E, //RECORD SEPARATOR
54 	/*1F = U+*/0x001F, //UNIT SEPARATOR
55 	/*20 = U+*/0x0020, //SPACE
56 	/*21 = U+*/0x0021, //EXCLAMATION MARK
57 	/*22 = U+*/0x0022, //QUOTATION MARK
58 	/*23 = U+*/0x0023, //NUMBER SIGN
59 	/*24 = U+*/0x0024, //DOLLAR SIGN
60 	/*25 = U+*/0x0025, //PERCENT SIGN
61 	/*26 = U+*/0x0026, //AMPERSAND
62 	/*27 = U+*/0x0027, //APOSTROPHE
63 	/*28 = U+*/0x0028, //LEFT PARENTHESIS
64 	/*29 = U+*/0x0029, //RIGHT PARENTHESIS
65 	/*2A = U+*/0x002A, //ASTERISK
66 	/*2B = U+*/0x002B, //PLUS SIGN
67 	/*2C = U+*/0x002C, //COMMA
68 	/*2D = U+*/0x002D, //HYPHEN-MINUS
69 	/*2E = U+*/0x002E, //FULL STOP
70 	/*2F = U+*/0x002F, //SOLIDUS
71 	/*30 = U+*/0x0030, //DIGIT ZERO
72 	/*31 = U+*/0x0031, //DIGIT ONE
73 	/*32 = U+*/0x0032, //DIGIT TWO
74 	/*33 = U+*/0x0033, //DIGIT THREE
75 	/*34 = U+*/0x0034, //DIGIT FOUR
76 	/*35 = U+*/0x0035, //DIGIT FIVE
77 	/*36 = U+*/0x0036, //DIGIT SIX
78 	/*37 = U+*/0x0037, //DIGIT SEVEN
79 	/*38 = U+*/0x0038, //DIGIT EIGHT
80 	/*39 = U+*/0x0039, //DIGIT NINE
81 	/*3A = U+*/0x003A, //COLON
82 	/*3B = U+*/0x003B, //SEMICOLON
83 	/*3C = U+*/0x003C, //LESS-THAN SIGN
84 	/*3D = U+*/0x003D, //EQUALS SIGN
85 	/*3E = U+*/0x003E, //GREATER-THAN SIGN
86 	/*3F = U+*/0x003F, //QUESTION MARK
87 	/*40 = U+*/0x0040, //COMMERCIAL AT
88 	/*41 = U+*/0x0041, //LATIN CAPITAL LETTER A
89 	/*42 = U+*/0x0042, //LATIN CAPITAL LETTER B
90 	/*43 = U+*/0x0043, //LATIN CAPITAL LETTER C
91 	/*44 = U+*/0x0044, //LATIN CAPITAL LETTER D
92 	/*45 = U+*/0x0045, //LATIN CAPITAL LETTER E
93 	/*46 = U+*/0x0046, //LATIN CAPITAL LETTER F
94 	/*47 = U+*/0x0047, //LATIN CAPITAL LETTER G
95 	/*48 = U+*/0x0048, //LATIN CAPITAL LETTER H
96 	/*49 = U+*/0x0049, //LATIN CAPITAL LETTER I
97 	/*4A = U+*/0x004A, //LATIN CAPITAL LETTER J
98 	/*4B = U+*/0x004B, //LATIN CAPITAL LETTER K
99 	/*4C = U+*/0x004C, //LATIN CAPITAL LETTER L
100 	/*4D = U+*/0x004D, //LATIN CAPITAL LETTER M
101 	/*4E = U+*/0x004E, //LATIN CAPITAL LETTER N
102 	/*4F = U+*/0x004F, //LATIN CAPITAL LETTER O
103 	/*50 = U+*/0x0050, //LATIN CAPITAL LETTER P
104 	/*51 = U+*/0x0051, //LATIN CAPITAL LETTER Q
105 	/*52 = U+*/0x0052, //LATIN CAPITAL LETTER R
106 	/*53 = U+*/0x0053, //LATIN CAPITAL LETTER S
107 	/*54 = U+*/0x0054, //LATIN CAPITAL LETTER T
108 	/*55 = U+*/0x0055, //LATIN CAPITAL LETTER U
109 	/*56 = U+*/0x0056, //LATIN CAPITAL LETTER V
110 	/*57 = U+*/0x0057, //LATIN CAPITAL LETTER W
111 	/*58 = U+*/0x0058, //LATIN CAPITAL LETTER X
112 	/*59 = U+*/0x0059, //LATIN CAPITAL LETTER Y
113 	/*5A = U+*/0x005A, //LATIN CAPITAL LETTER Z
114 	/*5B = U+*/0x005B, //LEFT SQUARE BRACKET
115 	/*5C = U+*/0x005C, //REVERSE SOLIDUS
116 	/*5D = U+*/0x005D, //RIGHT SQUARE BRACKET
117 	/*5E = U+*/0x005E, //CIRCUMFLEX ACCENT
118 	/*5F = U+*/0x005F, //LOW LINE
119 	/*60 = U+*/0x0060, //GRAVE ACCENT
120 	/*61 = U+*/0x0061, //LATIN SMALL LETTER A
121 	/*62 = U+*/0x0062, //LATIN SMALL LETTER B
122 	/*63 = U+*/0x0063, //LATIN SMALL LETTER C
123 	/*64 = U+*/0x0064, //LATIN SMALL LETTER D
124 	/*65 = U+*/0x0065, //LATIN SMALL LETTER E
125 	/*66 = U+*/0x0066, //LATIN SMALL LETTER F
126 	/*67 = U+*/0x0067, //LATIN SMALL LETTER G
127 	/*68 = U+*/0x0068, //LATIN SMALL LETTER H
128 	/*69 = U+*/0x0069, //LATIN SMALL LETTER I
129 	/*6A = U+*/0x006A, //LATIN SMALL LETTER J
130 	/*6B = U+*/0x006B, //LATIN SMALL LETTER K
131 	/*6C = U+*/0x006C, //LATIN SMALL LETTER L
132 	/*6D = U+*/0x006D, //LATIN SMALL LETTER M
133 	/*6E = U+*/0x006E, //LATIN SMALL LETTER N
134 	/*6F = U+*/0x006F, //LATIN SMALL LETTER O
135 	/*70 = U+*/0x0070, //LATIN SMALL LETTER P
136 	/*71 = U+*/0x0071, //LATIN SMALL LETTER Q
137 	/*72 = U+*/0x0072, //LATIN SMALL LETTER R
138 	/*73 = U+*/0x0073, //LATIN SMALL LETTER S
139 	/*74 = U+*/0x0074, //LATIN SMALL LETTER T
140 	/*75 = U+*/0x0075, //LATIN SMALL LETTER U
141 	/*76 = U+*/0x0076, //LATIN SMALL LETTER V
142 	/*77 = U+*/0x0077, //LATIN SMALL LETTER W
143 	/*78 = U+*/0x0078, //LATIN SMALL LETTER X
144 	/*79 = U+*/0x0079, //LATIN SMALL LETTER Y
145 	/*7A = U+*/0x007A, //LATIN SMALL LETTER Z
146 	/*7B = U+*/0x007B, //LEFT CURLY BRACKET
147 	/*7C = U+*/0x007C, //VERTICAL LINE
148 	/*7D = U+*/0x007D, //RIGHT CURLY BRACKET
149 	/*7E = U+*/0x007E, //TILDE
150 	/*7F = U+*/0x007F, //DELETE
151 	/*80 = U+*/0x20AC, //EURO SIGN
152 	/*81 = U+*/0xFFFF, //<UNMAPPED>
153 	/*82 = U+*/0x201A, //SINGLE LOW-9 QUOTATION MARK
154 	/*83 = U+*/0xFFFF, //<UNMAPPED>
155 	/*84 = U+*/0x201E, //DOUBLE LOW-9 QUOTATION MARK
156 	/*85 = U+*/0x2026, //HORIZONTAL ELLIPSIS
157 	/*86 = U+*/0x2020, //DAGGER
158 	/*87 = U+*/0x2021, //DOUBLE DAGGER
159 	/*88 = U+*/0xFFFF, //<UNMAPPED>
160 	/*89 = U+*/0x2030, //PER MILLE SIGN
161 	/*8A = U+*/0x0160, //LATIN CAPITAL LETTER S WITH CARON
162 	/*8B = U+*/0x2039, //SINGLE LEFT-POINTING ANGLE QUOTATION MARK
163 	/*8C = U+*/0x015A, //LATIN CAPITAL LETTER S WITH ACUTE
164 	/*8D = U+*/0x0164, //LATIN CAPITAL LETTER T WITH CARON
165 	/*8E = U+*/0x017D, //LATIN CAPITAL LETTER Z WITH CARON
166 	/*8F = U+*/0x0179, //LATIN CAPITAL LETTER Z WITH ACUTE
167 	/*90 = U+*/0xFFFF, //<UNMAPPED>
168 	/*91 = U+*/0x2018, //LEFT SINGLE QUOTATION MARK
169 	/*92 = U+*/0x2019, //RIGHT SINGLE QUOTATION MARK
170 	/*93 = U+*/0x201C, //LEFT DOUBLE QUOTATION MARK
171 	/*94 = U+*/0x201D, //RIGHT DOUBLE QUOTATION MARK
172 	/*95 = U+*/0x2022, //BULLET
173 	/*96 = U+*/0x2013, //EN DASH
174 	/*97 = U+*/0x2014, //EM DASH
175 	/*98 = U+*/0xFFFF, //<UNMAPPED>
176 	/*99 = U+*/0x2122, //TRADE MARK SIGN
177 	/*9A = U+*/0x0161, //LATIN SMALL LETTER S WITH CARON
178 	/*9B = U+*/0x203A, //SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
179 	/*9C = U+*/0x015B, //LATIN SMALL LETTER S WITH ACUTE
180 	/*9D = U+*/0x0165, //LATIN SMALL LETTER T WITH CARON
181 	/*9E = U+*/0x017E, //LATIN SMALL LETTER Z WITH CARON
182 	/*9F = U+*/0x017A, //LATIN SMALL LETTER Z WITH ACUTE
183 	/*A0 = U+*/0x00A0, //NO-BREAK SPACE
184 	/*A1 = U+*/0x02C7, //CARON
185 	/*A2 = U+*/0x02D8, //BREVE
186 	/*A3 = U+*/0x0141, //LATIN CAPITAL LETTER L WITH STROKE
187 	/*A4 = U+*/0x00A4, //CURRENCY SIGN
188 	/*A5 = U+*/0x0104, //LATIN CAPITAL LETTER A WITH OGONEK
189 	/*A6 = U+*/0x00A6, //BROKEN BAR
190 	/*A7 = U+*/0x00A7, //SECTION SIGN
191 	/*A8 = U+*/0x00A8, //DIAERESIS
192 	/*A9 = U+*/0x00A9, //COPYRIGHT SIGN
193 	/*AA = U+*/0x015E, //LATIN CAPITAL LETTER S WITH CEDILLA
194 	/*AB = U+*/0x00AB, //LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
195 	/*AC = U+*/0x00AC, //NOT SIGN
196 	/*AD = U+*/0x00AD, //SOFT HYPHEN
197 	/*AE = U+*/0x00AE, //REGISTERED SIGN
198 	/*AF = U+*/0x017B, //LATIN CAPITAL LETTER Z WITH DOT ABOVE
199 	/*B0 = U+*/0x00B0, //DEGREE SIGN
200 	/*B1 = U+*/0x00B1, //PLUS-MINUS SIGN
201 	/*B2 = U+*/0x02DB, //OGONEK
202 	/*B3 = U+*/0x0142, //LATIN SMALL LETTER L WITH STROKE
203 	/*B4 = U+*/0x00B4, //ACUTE ACCENT
204 	/*B5 = U+*/0x00B5, //MICRO SIGN
205 	/*B6 = U+*/0x00B6, //PILCROW SIGN
206 	/*B7 = U+*/0x00B7, //MIDDLE DOT
207 	/*B8 = U+*/0x00B8, //CEDILLA
208 	/*B9 = U+*/0x0105, //LATIN SMALL LETTER A WITH OGONEK
209 	/*BA = U+*/0x015F, //LATIN SMALL LETTER S WITH CEDILLA
210 	/*BB = U+*/0x00BB, //RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
211 	/*BC = U+*/0x013D, //LATIN CAPITAL LETTER L WITH CARON
212 	/*BD = U+*/0x02DD, //DOUBLE ACUTE ACCENT
213 	/*BE = U+*/0x013E, //LATIN SMALL LETTER L WITH CARON
214 	/*BF = U+*/0x017C, //LATIN SMALL LETTER Z WITH DOT ABOVE
215 	/*C0 = U+*/0x0154, //LATIN CAPITAL LETTER R WITH ACUTE
216 	/*C1 = U+*/0x00C1, //LATIN CAPITAL LETTER A WITH ACUTE
217 	/*C2 = U+*/0x00C2, //LATIN CAPITAL LETTER A WITH CIRCUMFLEX
218 	/*C3 = U+*/0x0102, //LATIN CAPITAL LETTER A WITH BREVE
219 	/*C4 = U+*/0x00C4, //LATIN CAPITAL LETTER A WITH DIAERESIS
220 	/*C5 = U+*/0x0139, //LATIN CAPITAL LETTER L WITH ACUTE
221 	/*C6 = U+*/0x0106, //LATIN CAPITAL LETTER C WITH ACUTE
222 	/*C7 = U+*/0x00C7, //LATIN CAPITAL LETTER C WITH CEDILLA
223 	/*C8 = U+*/0x010C, //LATIN CAPITAL LETTER C WITH CARON
224 	/*C9 = U+*/0x00C9, //LATIN CAPITAL LETTER E WITH ACUTE
225 	/*CA = U+*/0x0118, //LATIN CAPITAL LETTER E WITH OGONEK
226 	/*CB = U+*/0x00CB, //LATIN CAPITAL LETTER E WITH DIAERESIS
227 	/*CC = U+*/0x011A, //LATIN CAPITAL LETTER E WITH CARON
228 	/*CD = U+*/0x00CD, //LATIN CAPITAL LETTER I WITH ACUTE
229 	/*CE = U+*/0x00CE, //LATIN CAPITAL LETTER I WITH CIRCUMFLEX
230 	/*CF = U+*/0x010E, //LATIN CAPITAL LETTER D WITH CARON
231 	/*D0 = U+*/0x0110, //LATIN CAPITAL LETTER D WITH STROKE
232 	/*D1 = U+*/0x0143, //LATIN CAPITAL LETTER N WITH ACUTE
233 	/*D2 = U+*/0x0147, //LATIN CAPITAL LETTER N WITH CARON
234 	/*D3 = U+*/0x00D3, //LATIN CAPITAL LETTER O WITH ACUTE
235 	/*D4 = U+*/0x00D4, //LATIN CAPITAL LETTER O WITH CIRCUMFLEX
236 	/*D5 = U+*/0x0150, //LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
237 	/*D6 = U+*/0x00D6, //LATIN CAPITAL LETTER O WITH DIAERESIS
238 	/*D7 = U+*/0x00D7, //MULTIPLICATION SIGN
239 	/*D8 = U+*/0x0158, //LATIN CAPITAL LETTER R WITH CARON
240 	/*D9 = U+*/0x016E, //LATIN CAPITAL LETTER U WITH RING ABOVE
241 	/*DA = U+*/0x00DA, //LATIN CAPITAL LETTER U WITH ACUTE
242 	/*DB = U+*/0x0170, //LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
243 	/*DC = U+*/0x00DC, //LATIN CAPITAL LETTER U WITH DIAERESIS
244 	/*DD = U+*/0x00DD, //LATIN CAPITAL LETTER Y WITH ACUTE
245 	/*DE = U+*/0x0162, //LATIN CAPITAL LETTER T WITH CEDILLA
246 	/*DF = U+*/0x00DF, //LATIN SMALL LETTER SHARP S
247 	/*E0 = U+*/0x0155, //LATIN SMALL LETTER R WITH ACUTE
248 	/*E1 = U+*/0x00E1, //LATIN SMALL LETTER A WITH ACUTE
249 	/*E2 = U+*/0x00E2, //LATIN SMALL LETTER A WITH CIRCUMFLEX
250 	/*E3 = U+*/0x0103, //LATIN SMALL LETTER A WITH BREVE
251 	/*E4 = U+*/0x00E4, //LATIN SMALL LETTER A WITH DIAERESIS
252 	/*E5 = U+*/0x013A, //LATIN SMALL LETTER L WITH ACUTE
253 	/*E6 = U+*/0x0107, //LATIN SMALL LETTER C WITH ACUTE
254 	/*E7 = U+*/0x00E7, //LATIN SMALL LETTER C WITH CEDILLA
255 	/*E8 = U+*/0x010D, //LATIN SMALL LETTER C WITH CARON
256 	/*E9 = U+*/0x00E9, //LATIN SMALL LETTER E WITH ACUTE
257 	/*EA = U+*/0x0119, //LATIN SMALL LETTER E WITH OGONEK
258 	/*EB = U+*/0x00EB, //LATIN SMALL LETTER E WITH DIAERESIS
259 	/*EC = U+*/0x011B, //LATIN SMALL LETTER E WITH CARON
260 	/*ED = U+*/0x00ED, //LATIN SMALL LETTER I WITH ACUTE
261 	/*EE = U+*/0x00EE, //LATIN SMALL LETTER I WITH CIRCUMFLEX
262 	/*EF = U+*/0x010F, //LATIN SMALL LETTER D WITH CARON
263 	/*F0 = U+*/0x0111, //LATIN SMALL LETTER D WITH STROKE
264 	/*F1 = U+*/0x0144, //LATIN SMALL LETTER N WITH ACUTE
265 	/*F2 = U+*/0x0148, //LATIN SMALL LETTER N WITH CARON
266 	/*F3 = U+*/0x00F3, //LATIN SMALL LETTER O WITH ACUTE
267 	/*F4 = U+*/0x00F4, //LATIN SMALL LETTER O WITH CIRCUMFLEX
268 	/*F5 = U+*/0x0151, //LATIN SMALL LETTER O WITH DOUBLE ACUTE
269 	/*F6 = U+*/0x00F6, //LATIN SMALL LETTER O WITH DIAERESIS
270 	/*F7 = U+*/0x00F7, //DIVISION SIGN
271 	/*F8 = U+*/0x0159, //LATIN SMALL LETTER R WITH CARON
272 	/*F9 = U+*/0x016F, //LATIN SMALL LETTER U WITH RING ABOVE
273 	/*FA = U+*/0x00FA, //LATIN SMALL LETTER U WITH ACUTE
274 	/*FB = U+*/0x0171, //LATIN SMALL LETTER U WITH DOUBLE ACUTE
275 	/*FC = U+*/0x00FC, //LATIN SMALL LETTER U WITH DIAERESIS
276 	/*FD = U+*/0x00FD, //LATIN SMALL LETTER Y WITH ACUTE
277 	/*FE = U+*/0x0163, //LATIN SMALL LETTER T WITH CEDILLA
278 	/*FF = U+*/0x02D9  //DOT ABOVE
279 };
280 
281 // The following map came from here:
282 // http://www.microsoft.com/globaldev/reference/sbcs/1251.mspx
283 // The <UNMAPPED> entries were added by hand based on the holes in the chart.
284 static wchar_t g_cp1251[256] =
285 {
286 	/*00 = U+*/0x0000, //NULL
287 	/*01 = U+*/0x0001, //START OF HEADING
288 	/*02 = U+*/0x0002, //START OF TEXT
289 	/*03 = U+*/0x0003, //END OF TEXT
290 	/*04 = U+*/0x0004, //END OF TRANSMISSION
291 	/*05 = U+*/0x0005, //ENQUIRY
292 	/*06 = U+*/0x0006, //ACKNOWLEDGE
293 	/*07 = U+*/0x0007, //BELL
294 	/*08 = U+*/0x0008, //BACKSPACE
295 	/*09 = U+*/0x0009, //HORIZONTAL TABULATION
296 	/*0A = U+*/0x000A, //LINE FEED
297 	/*0B = U+*/0x000B, //VERTICAL TABULATION
298 	/*0C = U+*/0x000C, //FORM FEED
299 	/*0D = U+*/0x000D, //CARRIAGE RETURN
300 	/*0E = U+*/0x000E, //SHIFT OUT
301 	/*0F = U+*/0x000F, //SHIFT IN
302 	/*10 = U+*/0x0010, //DATA LINK ESCAPE
303 	/*11 = U+*/0x0011, //DEVICE CONTROL ONE
304 	/*12 = U+*/0x0012, //DEVICE CONTROL TWO
305 	/*13 = U+*/0x0013, //DEVICE CONTROL THREE
306 	/*14 = U+*/0x0014, //DEVICE CONTROL FOUR
307 	/*15 = U+*/0x0015, //NEGATIVE ACKNOWLEDGE
308 	/*16 = U+*/0x0016, //SYNCHRONOUS IDLE
309 	/*17 = U+*/0x0017, //END OF TRANSMISSION BLOCK
310 	/*18 = U+*/0x0018, //CANCEL
311 	/*19 = U+*/0x0019, //END OF MEDIUM
312 	/*1A = U+*/0x001A, //SUBSTITUTE
313 	/*1B = U+*/0x001B, //ESCAPE
314 	/*1C = U+*/0x001C, //FILE SEPARATOR
315 	/*1D = U+*/0x001D, //GROUP SEPARATOR
316 	/*1E = U+*/0x001E, //RECORD SEPARATOR
317 	/*1F = U+*/0x001F, //UNIT SEPARATOR
318 	/*20 = U+*/0x0020, //SPACE
319 	/*21 = U+*/0x0021, //EXCLAMATION MARK
320 	/*22 = U+*/0x0022, //QUOTATION MARK
321 	/*23 = U+*/0x0023, //NUMBER SIGN
322 	/*24 = U+*/0x0024, //DOLLAR SIGN
323 	/*25 = U+*/0x0025, //PERCENT SIGN
324 	/*26 = U+*/0x0026, //AMPERSAND
325 	/*27 = U+*/0x0027, //APOSTROPHE
326 	/*28 = U+*/0x0028, //LEFT PARENTHESIS
327 	/*29 = U+*/0x0029, //RIGHT PARENTHESIS
328 	/*2A = U+*/0x002A, //ASTERISK
329 	/*2B = U+*/0x002B, //PLUS SIGN
330 	/*2C = U+*/0x002C, //COMMA
331 	/*2D = U+*/0x002D, //HYPHEN-MINUS
332 	/*2E = U+*/0x002E, //FULL STOP
333 	/*2F = U+*/0x002F, //SOLIDUS
334 	/*30 = U+*/0x0030, //DIGIT ZERO
335 	/*31 = U+*/0x0031, //DIGIT ONE
336 	/*32 = U+*/0x0032, //DIGIT TWO
337 	/*33 = U+*/0x0033, //DIGIT THREE
338 	/*34 = U+*/0x0034, //DIGIT FOUR
339 	/*35 = U+*/0x0035, //DIGIT FIVE
340 	/*36 = U+*/0x0036, //DIGIT SIX
341 	/*37 = U+*/0x0037, //DIGIT SEVEN
342 	/*38 = U+*/0x0038, //DIGIT EIGHT
343 	/*39 = U+*/0x0039, //DIGIT NINE
344 	/*3A = U+*/0x003A, //COLON
345 	/*3B = U+*/0x003B, //SEMICOLON
346 	/*3C = U+*/0x003C, //LESS-THAN SIGN
347 	/*3D = U+*/0x003D, //EQUALS SIGN
348 	/*3E = U+*/0x003E, //GREATER-THAN SIGN
349 	/*3F = U+*/0x003F, //QUESTION MARK
350 	/*40 = U+*/0x0040, //COMMERCIAL AT
351 	/*41 = U+*/0x0041, //LATIN CAPITAL LETTER A
352 	/*42 = U+*/0x0042, //LATIN CAPITAL LETTER B
353 	/*43 = U+*/0x0043, //LATIN CAPITAL LETTER C
354 	/*44 = U+*/0x0044, //LATIN CAPITAL LETTER D
355 	/*45 = U+*/0x0045, //LATIN CAPITAL LETTER E
356 	/*46 = U+*/0x0046, //LATIN CAPITAL LETTER F
357 	/*47 = U+*/0x0047, //LATIN CAPITAL LETTER G
358 	/*48 = U+*/0x0048, //LATIN CAPITAL LETTER H
359 	/*49 = U+*/0x0049, //LATIN CAPITAL LETTER I
360 	/*4A = U+*/0x004A, //LATIN CAPITAL LETTER J
361 	/*4B = U+*/0x004B, //LATIN CAPITAL LETTER K
362 	/*4C = U+*/0x004C, //LATIN CAPITAL LETTER L
363 	/*4D = U+*/0x004D, //LATIN CAPITAL LETTER M
364 	/*4E = U+*/0x004E, //LATIN CAPITAL LETTER N
365 	/*4F = U+*/0x004F, //LATIN CAPITAL LETTER O
366 	/*50 = U+*/0x0050, //LATIN CAPITAL LETTER P
367 	/*51 = U+*/0x0051, //LATIN CAPITAL LETTER Q
368 	/*52 = U+*/0x0052, //LATIN CAPITAL LETTER R
369 	/*53 = U+*/0x0053, //LATIN CAPITAL LETTER S
370 	/*54 = U+*/0x0054, //LATIN CAPITAL LETTER T
371 	/*55 = U+*/0x0055, //LATIN CAPITAL LETTER U
372 	/*56 = U+*/0x0056, //LATIN CAPITAL LETTER V
373 	/*57 = U+*/0x0057, //LATIN CAPITAL LETTER W
374 	/*58 = U+*/0x0058, //LATIN CAPITAL LETTER X
375 	/*59 = U+*/0x0059, //LATIN CAPITAL LETTER Y
376 	/*5A = U+*/0x005A, //LATIN CAPITAL LETTER Z
377 	/*5B = U+*/0x005B, //LEFT SQUARE BRACKET
378 	/*5C = U+*/0x005C, //REVERSE SOLIDUS
379 	/*5D = U+*/0x005D, //RIGHT SQUARE BRACKET
380 	/*5E = U+*/0x005E, //CIRCUMFLEX ACCENT
381 	/*5F = U+*/0x005F, //LOW LINE
382 	/*60 = U+*/0x0060, //GRAVE ACCENT
383 	/*61 = U+*/0x0061, //LATIN SMALL LETTER A
384 	/*62 = U+*/0x0062, //LATIN SMALL LETTER B
385 	/*63 = U+*/0x0063, //LATIN SMALL LETTER C
386 	/*64 = U+*/0x0064, //LATIN SMALL LETTER D
387 	/*65 = U+*/0x0065, //LATIN SMALL LETTER E
388 	/*66 = U+*/0x0066, //LATIN SMALL LETTER F
389 	/*67 = U+*/0x0067, //LATIN SMALL LETTER G
390 	/*68 = U+*/0x0068, //LATIN SMALL LETTER H
391 	/*69 = U+*/0x0069, //LATIN SMALL LETTER I
392 	/*6A = U+*/0x006A, //LATIN SMALL LETTER J
393 	/*6B = U+*/0x006B, //LATIN SMALL LETTER K
394 	/*6C = U+*/0x006C, //LATIN SMALL LETTER L
395 	/*6D = U+*/0x006D, //LATIN SMALL LETTER M
396 	/*6E = U+*/0x006E, //LATIN SMALL LETTER N
397 	/*6F = U+*/0x006F, //LATIN SMALL LETTER O
398 	/*70 = U+*/0x0070, //LATIN SMALL LETTER P
399 	/*71 = U+*/0x0071, //LATIN SMALL LETTER Q
400 	/*72 = U+*/0x0072, //LATIN SMALL LETTER R
401 	/*73 = U+*/0x0073, //LATIN SMALL LETTER S
402 	/*74 = U+*/0x0074, //LATIN SMALL LETTER T
403 	/*75 = U+*/0x0075, //LATIN SMALL LETTER U
404 	/*76 = U+*/0x0076, //LATIN SMALL LETTER V
405 	/*77 = U+*/0x0077, //LATIN SMALL LETTER W
406 	/*78 = U+*/0x0078, //LATIN SMALL LETTER X
407 	/*79 = U+*/0x0079, //LATIN SMALL LETTER Y
408 	/*7A = U+*/0x007A, //LATIN SMALL LETTER Z
409 	/*7B = U+*/0x007B, //LEFT CURLY BRACKET
410 	/*7C = U+*/0x007C, //VERTICAL LINE
411 	/*7D = U+*/0x007D, //RIGHT CURLY BRACKET
412 	/*7E = U+*/0x007E, //TILDE
413 	/*7F = U+*/0x007F, //DELETE
414 	/*80 = U+*/0x0402, //CYRILLIC CAPITAL LETTER DJE
415 	/*81 = U+*/0x0403, //CYRILLIC CAPITAL LETTER GJE
416 	/*82 = U+*/0x201A, //SINGLE LOW-9 QUOTATION MARK
417 	/*83 = U+*/0x0453, //CYRILLIC SMALL LETTER GJE
418 	/*84 = U+*/0x201E, //DOUBLE LOW-9 QUOTATION MARK
419 	/*85 = U+*/0x2026, //HORIZONTAL ELLIPSIS
420 	/*86 = U+*/0x2020, //DAGGER
421 	/*87 = U+*/0x2021, //DOUBLE DAGGER
422 	/*88 = U+*/0x20AC, //EURO SIGN
423 	/*89 = U+*/0x2030, //PER MILLE SIGN
424 	/*8A = U+*/0x0409, //CYRILLIC CAPITAL LETTER LJE
425 	/*8B = U+*/0x2039, //SINGLE LEFT-POINTING ANGLE QUOTATION MARK
426 	/*8C = U+*/0x040A, //CYRILLIC CAPITAL LETTER NJE
427 	/*8D = U+*/0x040C, //CYRILLIC CAPITAL LETTER KJE
428 	/*8E = U+*/0x040B, //CYRILLIC CAPITAL LETTER TSHE
429 	/*8F = U+*/0x040F, //CYRILLIC CAPITAL LETTER DZHE
430 	/*90 = U+*/0x0452, //CYRILLIC SMALL LETTER DJE
431 	/*91 = U+*/0x2018, //LEFT SINGLE QUOTATION MARK
432 	/*92 = U+*/0x2019, //RIGHT SINGLE QUOTATION MARK
433 	/*93 = U+*/0x201C, //LEFT DOUBLE QUOTATION MARK
434 	/*94 = U+*/0x201D, //RIGHT DOUBLE QUOTATION MARK
435 	/*95 = U+*/0x2022, //BULLET
436 	/*96 = U+*/0x2013, //EN DASH
437 	/*97 = U+*/0x2014, //EM DASH
438 	/*98 = U+*/0x0000, //NULL
439 	/*99 = U+*/0x2122, //TRADE MARK SIGN
440 	/*9A = U+*/0x0459, //CYRILLIC SMALL LETTER LJE
441 	/*9B = U+*/0x203A, //SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
442 	/*9C = U+*/0x045A, //CYRILLIC SMALL LETTER NJE
443 	/*9D = U+*/0x045C, //CYRILLIC SMALL LETTER KJE
444 	/*9E = U+*/0x045B, //CYRILLIC SMALL LETTER TSHE
445 	/*9F = U+*/0x045F, //CYRILLIC SMALL LETTER DZHE
446 	/*A0 = U+*/0x00A0, //NO-BREAK SPACE
447 	/*A1 = U+*/0x040E, //CYRILLIC CAPITAL LETTER SHORT U
448 	/*A2 = U+*/0x045E, //CYRILLIC SMALL LETTER SHORT U
449 	/*A3 = U+*/0x0408, //CYRILLIC CAPITAL LETTER JE
450 	/*A4 = U+*/0x00A4, //CURRENCY SIGN
451 	/*A5 = U+*/0x0490, //CYRILLIC CAPITAL LETTER GHE WITH UPTURN
452 	/*A6 = U+*/0x00A6, //BROKEN BAR
453 	/*A7 = U+*/0x00A7, //SECTION SIGN
454 	/*A8 = U+*/0x0401, //CYRILLIC CAPITAL LETTER IO
455 	/*A9 = U+*/0x00A9, //COPYRIGHT SIGN
456 	/*AA = U+*/0x0404, //CYRILLIC CAPITAL LETTER UKRAINIAN IE
457 	/*AB = U+*/0x00AB, //LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
458 	/*AC = U+*/0x00AC, //NOT SIGN
459 	/*AD = U+*/0x00AD, //SOFT HYPHEN
460 	/*AE = U+*/0x00AE, //REGISTERED SIGN
461 	/*AF = U+*/0x0407, //CYRILLIC CAPITAL LETTER YI
462 	/*B0 = U+*/0x00B0, //DEGREE SIGN
463 	/*B1 = U+*/0x00B1, //PLUS-MINUS SIGN
464 	/*B2 = U+*/0x0406, //CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
465 	/*B3 = U+*/0x0456, //CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
466 	/*B4 = U+*/0x0491, //CYRILLIC SMALL LETTER GHE WITH UPTURN
467 	/*B5 = U+*/0x00B5, //MICRO SIGN
468 	/*B6 = U+*/0x00B6, //PILCROW SIGN
469 	/*B7 = U+*/0x00B7, //MIDDLE DOT
470 	/*B8 = U+*/0x0451, //CYRILLIC SMALL LETTER IO
471 	/*B9 = U+*/0x2116, //NUMERO SIGN
472 	/*BA = U+*/0x0454, //CYRILLIC SMALL LETTER UKRAINIAN IE
473 	/*BB = U+*/0x00BB, //RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
474 	/*BC = U+*/0x0458, //CYRILLIC SMALL LETTER JE
475 	/*BD = U+*/0x0405, //CYRILLIC CAPITAL LETTER DZE
476 	/*BE = U+*/0x0455, //CYRILLIC SMALL LETTER DZE
477 	/*BF = U+*/0x0457, //CYRILLIC SMALL LETTER YI
478 	/*C0 = U+*/0x0410, //CYRILLIC CAPITAL LETTER A
479 	/*C1 = U+*/0x0411, //CYRILLIC CAPITAL LETTER BE
480 	/*C2 = U+*/0x0412, //CYRILLIC CAPITAL LETTER VE
481 	/*C3 = U+*/0x0413, //CYRILLIC CAPITAL LETTER GHE
482 	/*C4 = U+*/0x0414, //CYRILLIC CAPITAL LETTER DE
483 	/*C5 = U+*/0x0415, //CYRILLIC CAPITAL LETTER IE
484 	/*C6 = U+*/0x0416, //CYRILLIC CAPITAL LETTER ZHE
485 	/*C7 = U+*/0x0417, //CYRILLIC CAPITAL LETTER ZE
486 	/*C8 = U+*/0x0418, //CYRILLIC CAPITAL LETTER I
487 	/*C9 = U+*/0x0419, //CYRILLIC CAPITAL LETTER SHORT I
488 	/*CA = U+*/0x041A, //CYRILLIC CAPITAL LETTER KA
489 	/*CB = U+*/0x041B, //CYRILLIC CAPITAL LETTER EL
490 	/*CC = U+*/0x041C, //CYRILLIC CAPITAL LETTER EM
491 	/*CD = U+*/0x041D, //CYRILLIC CAPITAL LETTER EN
492 	/*CE = U+*/0x041E, //CYRILLIC CAPITAL LETTER O
493 	/*CF = U+*/0x041F, //CYRILLIC CAPITAL LETTER PE
494 	/*D0 = U+*/0x0420, //CYRILLIC CAPITAL LETTER ER
495 	/*D1 = U+*/0x0421, //CYRILLIC CAPITAL LETTER ES
496 	/*D2 = U+*/0x0422, //CYRILLIC CAPITAL LETTER TE
497 	/*D3 = U+*/0x0423, //CYRILLIC CAPITAL LETTER U
498 	/*D4 = U+*/0x0424, //CYRILLIC CAPITAL LETTER EF
499 	/*D5 = U+*/0x0425, //CYRILLIC CAPITAL LETTER HA
500 	/*D6 = U+*/0x0426, //CYRILLIC CAPITAL LETTER TSE
501 	/*D7 = U+*/0x0427, //CYRILLIC CAPITAL LETTER CHE
502 	/*D8 = U+*/0x0428, //CYRILLIC CAPITAL LETTER SHA
503 	/*D9 = U+*/0x0429, //CYRILLIC CAPITAL LETTER SHCHA
504 	/*DA = U+*/0x042A, //CYRILLIC CAPITAL LETTER HARD SIGN
505 	/*DB = U+*/0x042B, //CYRILLIC CAPITAL LETTER YERU
506 	/*DC = U+*/0x042C, //CYRILLIC CAPITAL LETTER SOFT SIGN
507 	/*DD = U+*/0x042D, //CYRILLIC CAPITAL LETTER E
508 	/*DE = U+*/0x042E, //CYRILLIC CAPITAL LETTER YU
509 	/*DF = U+*/0x042F, //CYRILLIC CAPITAL LETTER YA
510 	/*E0 = U+*/0x0430, //CYRILLIC SMALL LETTER A
511 	/*E1 = U+*/0x0431, //CYRILLIC SMALL LETTER BE
512 	/*E2 = U+*/0x0432, //CYRILLIC SMALL LETTER VE
513 	/*E3 = U+*/0x0433, //CYRILLIC SMALL LETTER GHE
514 	/*E4 = U+*/0x0434, //CYRILLIC SMALL LETTER DE
515 	/*E5 = U+*/0x0435, //CYRILLIC SMALL LETTER IE
516 	/*E6 = U+*/0x0436, //CYRILLIC SMALL LETTER ZHE
517 	/*E7 = U+*/0x0437, //CYRILLIC SMALL LETTER ZE
518 	/*E8 = U+*/0x0438, //CYRILLIC SMALL LETTER I
519 	/*E9 = U+*/0x0439, //CYRILLIC SMALL LETTER SHORT I
520 	/*EA = U+*/0x043A, //CYRILLIC SMALL LETTER KA
521 	/*EB = U+*/0x043B, //CYRILLIC SMALL LETTER EL
522 	/*EC = U+*/0x043C, //CYRILLIC SMALL LETTER EM
523 	/*ED = U+*/0x043D, //CYRILLIC SMALL LETTER EN
524 	/*EE = U+*/0x043E, //CYRILLIC SMALL LETTER O
525 	/*EF = U+*/0x043F, //CYRILLIC SMALL LETTER PE
526 	/*F0 = U+*/0x0440, //CYRILLIC SMALL LETTER ER
527 	/*F1 = U+*/0x0441, //CYRILLIC SMALL LETTER ES
528 	/*F2 = U+*/0x0442, //CYRILLIC SMALL LETTER TE
529 	/*F3 = U+*/0x0443, //CYRILLIC SMALL LETTER U
530 	/*F4 = U+*/0x0444, //CYRILLIC SMALL LETTER EF
531 	/*F5 = U+*/0x0445, //CYRILLIC SMALL LETTER HA
532 	/*F6 = U+*/0x0446, //CYRILLIC SMALL LETTER TSE
533 	/*F7 = U+*/0x0447, //CYRILLIC SMALL LETTER CHE
534 	/*F8 = U+*/0x0448, //CYRILLIC SMALL LETTER SHA
535 	/*F9 = U+*/0x0449, //CYRILLIC SMALL LETTER SHCHA
536 	/*FA = U+*/0x044A, //CYRILLIC SMALL LETTER HARD SIGN
537 	/*FB = U+*/0x044B, //CYRILLIC SMALL LETTER YERU
538 	/*FC = U+*/0x044C, //CYRILLIC SMALL LETTER SOFT SIGN
539 	/*FD = U+*/0x044D, //CYRILLIC SMALL LETTER E
540 	/*FE = U+*/0x044E, //CYRILLIC SMALL LETTER YU
541 	/*FF = U+*/0x044F  //CYRILLIC SMALL LETTER YA
542 };
543 
544 // The following map came from here:
545 // http://www.microsoft.com/globaldev/reference/sbcs/1252.mspx
546 // The <UNMAPPED> entries were added by hand based on the holes in the chart.
547 static wchar_t g_cp1252[256] =
548 {
549 	/*00 = U+*/0x0000, //NULL
550 	/*01 = U+*/0x0001, //START OF HEADING
551 	/*02 = U+*/0x0002, //START OF TEXT
552 	/*03 = U+*/0x0003, //END OF TEXT
553 	/*04 = U+*/0x0004, //END OF TRANSMISSION
554 	/*05 = U+*/0x0005, //ENQUIRY
555 	/*06 = U+*/0x0006, //ACKNOWLEDGE
556 	/*07 = U+*/0x0007, //BELL
557 	/*08 = U+*/0x0008, //BACKSPACE
558 	/*09 = U+*/0x0009, //HORIZONTAL TABULATION
559 	/*0A = U+*/0x000A, //LINE FEED
560 	/*0B = U+*/0x000B, //VERTICAL TABULATION
561 	/*0C = U+*/0x000C, //FORM FEED
562 	/*0D = U+*/0x000D, //CARRIAGE RETURN
563 	/*0E = U+*/0x000E, //SHIFT OUT
564 	/*0F = U+*/0x000F, //SHIFT IN
565 	/*10 = U+*/0x0010, //DATA LINK ESCAPE
566 	/*11 = U+*/0x0011, //DEVICE CONTROL ONE
567 	/*12 = U+*/0x0012, //DEVICE CONTROL TWO
568 	/*13 = U+*/0x0013, //DEVICE CONTROL THREE
569 	/*14 = U+*/0x0014, //DEVICE CONTROL FOUR
570 	/*15 = U+*/0x0015, //NEGATIVE ACKNOWLEDGE
571 	/*16 = U+*/0x0016, //SYNCHRONOUS IDLE
572 	/*17 = U+*/0x0017, //END OF TRANSMISSION BLOCK
573 	/*18 = U+*/0x0018, //CANCEL
574 	/*19 = U+*/0x0019, //END OF MEDIUM
575 	/*1A = U+*/0x001A, //SUBSTITUTE
576 	/*1B = U+*/0x001B, //ESCAPE
577 	/*1C = U+*/0x001C, //FILE SEPARATOR
578 	/*1D = U+*/0x001D, //GROUP SEPARATOR
579 	/*1E = U+*/0x001E, //RECORD SEPARATOR
580 	/*1F = U+*/0x001F, //UNIT SEPARATOR
581 	/*20 = U+*/0x0020, //SPACE
582 	/*21 = U+*/0x0021, //EXCLAMATION MARK
583 	/*22 = U+*/0x0022, //QUOTATION MARK
584 	/*23 = U+*/0x0023, //NUMBER SIGN
585 	/*24 = U+*/0x0024, //DOLLAR SIGN
586 	/*25 = U+*/0x0025, //PERCENT SIGN
587 	/*26 = U+*/0x0026, //AMPERSAND
588 	/*27 = U+*/0x0027, //APOSTROPHE
589 	/*28 = U+*/0x0028, //LEFT PARENTHESIS
590 	/*29 = U+*/0x0029, //RIGHT PARENTHESIS
591 	/*2A = U+*/0x002A, //ASTERISK
592 	/*2B = U+*/0x002B, //PLUS SIGN
593 	/*2C = U+*/0x002C, //COMMA
594 	/*2D = U+*/0x002D, //HYPHEN-MINUS
595 	/*2E = U+*/0x002E, //FULL STOP
596 	/*2F = U+*/0x002F, //SOLIDUS
597 	/*30 = U+*/0x0030, //DIGIT ZERO
598 	/*31 = U+*/0x0031, //DIGIT ONE
599 	/*32 = U+*/0x0032, //DIGIT TWO
600 	/*33 = U+*/0x0033, //DIGIT THREE
601 	/*34 = U+*/0x0034, //DIGIT FOUR
602 	/*35 = U+*/0x0035, //DIGIT FIVE
603 	/*36 = U+*/0x0036, //DIGIT SIX
604 	/*37 = U+*/0x0037, //DIGIT SEVEN
605 	/*38 = U+*/0x0038, //DIGIT EIGHT
606 	/*39 = U+*/0x0039, //DIGIT NINE
607 	/*3A = U+*/0x003A, //COLON
608 	/*3B = U+*/0x003B, //SEMICOLON
609 	/*3C = U+*/0x003C, //LESS-THAN SIGN
610 	/*3D = U+*/0x003D, //EQUALS SIGN
611 	/*3E = U+*/0x003E, //GREATER-THAN SIGN
612 	/*3F = U+*/0x003F, //QUESTION MARK
613 	/*40 = U+*/0x0040, //COMMERCIAL AT
614 	/*41 = U+*/0x0041, //LATIN CAPITAL LETTER A
615 	/*42 = U+*/0x0042, //LATIN CAPITAL LETTER B
616 	/*43 = U+*/0x0043, //LATIN CAPITAL LETTER C
617 	/*44 = U+*/0x0044, //LATIN CAPITAL LETTER D
618 	/*45 = U+*/0x0045, //LATIN CAPITAL LETTER E
619 	/*46 = U+*/0x0046, //LATIN CAPITAL LETTER F
620 	/*47 = U+*/0x0047, //LATIN CAPITAL LETTER G
621 	/*48 = U+*/0x0048, //LATIN CAPITAL LETTER H
622 	/*49 = U+*/0x0049, //LATIN CAPITAL LETTER I
623 	/*4A = U+*/0x004A, //LATIN CAPITAL LETTER J
624 	/*4B = U+*/0x004B, //LATIN CAPITAL LETTER K
625 	/*4C = U+*/0x004C, //LATIN CAPITAL LETTER L
626 	/*4D = U+*/0x004D, //LATIN CAPITAL LETTER M
627 	/*4E = U+*/0x004E, //LATIN CAPITAL LETTER N
628 	/*4F = U+*/0x004F, //LATIN CAPITAL LETTER O
629 	/*50 = U+*/0x0050, //LATIN CAPITAL LETTER P
630 	/*51 = U+*/0x0051, //LATIN CAPITAL LETTER Q
631 	/*52 = U+*/0x0052, //LATIN CAPITAL LETTER R
632 	/*53 = U+*/0x0053, //LATIN CAPITAL LETTER S
633 	/*54 = U+*/0x0054, //LATIN CAPITAL LETTER T
634 	/*55 = U+*/0x0055, //LATIN CAPITAL LETTER U
635 	/*56 = U+*/0x0056, //LATIN CAPITAL LETTER V
636 	/*57 = U+*/0x0057, //LATIN CAPITAL LETTER W
637 	/*58 = U+*/0x0058, //LATIN CAPITAL LETTER X
638 	/*59 = U+*/0x0059, //LATIN CAPITAL LETTER Y
639 	/*5A = U+*/0x005A, //LATIN CAPITAL LETTER Z
640 	/*5B = U+*/0x005B, //LEFT SQUARE BRACKET
641 	/*5C = U+*/0x005C, //REVERSE SOLIDUS
642 	/*5D = U+*/0x005D, //RIGHT SQUARE BRACKET
643 	/*5E = U+*/0x005E, //CIRCUMFLEX ACCENT
644 	/*5F = U+*/0x005F, //LOW LINE
645 	/*60 = U+*/0x0060, //GRAVE ACCENT
646 	/*61 = U+*/0x0061, //LATIN SMALL LETTER A
647 	/*62 = U+*/0x0062, //LATIN SMALL LETTER B
648 	/*63 = U+*/0x0063, //LATIN SMALL LETTER C
649 	/*64 = U+*/0x0064, //LATIN SMALL LETTER D
650 	/*65 = U+*/0x0065, //LATIN SMALL LETTER E
651 	/*66 = U+*/0x0066, //LATIN SMALL LETTER F
652 	/*67 = U+*/0x0067, //LATIN SMALL LETTER G
653 	/*68 = U+*/0x0068, //LATIN SMALL LETTER H
654 	/*69 = U+*/0x0069, //LATIN SMALL LETTER I
655 	/*6A = U+*/0x006A, //LATIN SMALL LETTER J
656 	/*6B = U+*/0x006B, //LATIN SMALL LETTER K
657 	/*6C = U+*/0x006C, //LATIN SMALL LETTER L
658 	/*6D = U+*/0x006D, //LATIN SMALL LETTER M
659 	/*6E = U+*/0x006E, //LATIN SMALL LETTER N
660 	/*6F = U+*/0x006F, //LATIN SMALL LETTER O
661 	/*70 = U+*/0x0070, //LATIN SMALL LETTER P
662 	/*71 = U+*/0x0071, //LATIN SMALL LETTER Q
663 	/*72 = U+*/0x0072, //LATIN SMALL LETTER R
664 	/*73 = U+*/0x0073, //LATIN SMALL LETTER S
665 	/*74 = U+*/0x0074, //LATIN SMALL LETTER T
666 	/*75 = U+*/0x0075, //LATIN SMALL LETTER U
667 	/*76 = U+*/0x0076, //LATIN SMALL LETTER V
668 	/*77 = U+*/0x0077, //LATIN SMALL LETTER W
669 	/*78 = U+*/0x0078, //LATIN SMALL LETTER X
670 	/*79 = U+*/0x0079, //LATIN SMALL LETTER Y
671 	/*7A = U+*/0x007A, //LATIN SMALL LETTER Z
672 	/*7B = U+*/0x007B, //LEFT CURLY BRACKET
673 	/*7C = U+*/0x007C, //VERTICAL LINE
674 	/*7D = U+*/0x007D, //RIGHT CURLY BRACKET
675 	/*7E = U+*/0x007E, //TILDE
676 	/*7F = U+*/0x007F, //DELETE
677 	/*80 = U+*/0x20AC, //EURO SIGN
678 	/*81 = U+*/0xFFFF, //<UNMAPPED>
679 	/*82 = U+*/0x201A, //SINGLE LOW-9 QUOTATION MARK
680 	/*83 = U+*/0x0192, //LATIN SMALL LETTER F WITH HOOK
681 	/*84 = U+*/0x201E, //DOUBLE LOW-9 QUOTATION MARK
682 	/*85 = U+*/0x2026, //HORIZONTAL ELLIPSIS
683 	/*86 = U+*/0x2020, //DAGGER
684 	/*87 = U+*/0x2021, //DOUBLE DAGGER
685 	/*88 = U+*/0x02C6, //MODIFIER LETTER CIRCUMFLEX ACCENT
686 	/*89 = U+*/0x2030, //PER MILLE SIGN
687 	/*8A = U+*/0x0160, //LATIN CAPITAL LETTER S WITH CARON
688 	/*8B = U+*/0x2039, //SINGLE LEFT-POINTING ANGLE QUOTATION MARK
689 	/*8C = U+*/0x0152, //LATIN CAPITAL LIGATURE OE
690 	/*8D = U+*/0xFFFF, //<UNMAPPED>
691 	/*8E = U+*/0x017D, //LATIN CAPITAL LETTER Z WITH CARON
692 	/*8F = U+*/0xFFFF, //<UNMAPPED>
693 	/*90 = U+*/0xFFFF, //<UNMAPPED>
694 	/*91 = U+*/0x2018, //LEFT SINGLE QUOTATION MARK
695 	/*92 = U+*/0x2019, //RIGHT SINGLE QUOTATION MARK
696 	/*93 = U+*/0x201C, //LEFT DOUBLE QUOTATION MARK
697 	/*94 = U+*/0x201D, //RIGHT DOUBLE QUOTATION MARK
698 	/*95 = U+*/0x2022, //BULLET
699 	/*96 = U+*/0x2013, //EN DASH
700 	/*97 = U+*/0x2014, //EM DASH
701 	/*98 = U+*/0x02DC, //SMALL TILDE
702 	/*99 = U+*/0x2122, //TRADE MARK SIGN
703 	/*9A = U+*/0x0161, //LATIN SMALL LETTER S WITH CARON
704 	/*9B = U+*/0x203A, //SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
705 	/*9C = U+*/0x0153, //LATIN SMALL LIGATURE OE
706 	/*9D = U+*/0xFFFF, //<UNMAPPED>
707 	/*9E = U+*/0x017E, //LATIN SMALL LETTER Z WITH CARON
708 	/*9F = U+*/0x0178, //LATIN CAPITAL LETTER Y WITH DIAERESIS
709 	/*A0 = U+*/0x00A0, //NO-BREAK SPACE
710 	/*A1 = U+*/0x00A1, //INVERTED EXCLAMATION MARK
711 	/*A2 = U+*/0x00A2, //CENT SIGN
712 	/*A3 = U+*/0x00A3, //POUND SIGN
713 	/*A4 = U+*/0x00A4, //CURRENCY SIGN
714 	/*A5 = U+*/0x00A5, //YEN SIGN
715 	/*A6 = U+*/0x00A6, //BROKEN BAR
716 	/*A7 = U+*/0x00A7, //SECTION SIGN
717 	/*A8 = U+*/0x00A8, //DIAERESIS
718 	/*A9 = U+*/0x00A9, //COPYRIGHT SIGN
719 	/*AA = U+*/0x00AA, //FEMININE ORDINAL INDICATOR
720 	/*AB = U+*/0x00AB, //LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
721 	/*AC = U+*/0x00AC, //NOT SIGN
722 	/*AD = U+*/0x00AD, //SOFT HYPHEN
723 	/*AE = U+*/0x00AE, //REGISTERED SIGN
724 	/*AF = U+*/0x00AF, //MACRON
725 	/*B0 = U+*/0x00B0, //DEGREE SIGN
726 	/*B1 = U+*/0x00B1, //PLUS-MINUS SIGN
727 	/*B2 = U+*/0x00B2, //SUPERSCRIPT TWO
728 	/*B3 = U+*/0x00B3, //SUPERSCRIPT THREE
729 	/*B4 = U+*/0x00B4, //ACUTE ACCENT
730 	/*B5 = U+*/0x00B5, //MICRO SIGN
731 	/*B6 = U+*/0x00B6, //PILCROW SIGN
732 	/*B7 = U+*/0x00B7, //MIDDLE DOT
733 	/*B8 = U+*/0x00B8, //CEDILLA
734 	/*B9 = U+*/0x00B9, //SUPERSCRIPT ONE
735 	/*BA = U+*/0x00BA, //MASCULINE ORDINAL INDICATOR
736 	/*BB = U+*/0x00BB, //RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
737 	/*BC = U+*/0x00BC, //VULGAR FRACTION ONE QUARTER
738 	/*BD = U+*/0x00BD, //VULGAR FRACTION ONE HALF
739 	/*BE = U+*/0x00BE, //VULGAR FRACTION THREE QUARTERS
740 	/*BF = U+*/0x00BF, //INVERTED QUESTION MARK
741 	/*C0 = U+*/0x00C0, //LATIN CAPITAL LETTER A WITH GRAVE
742 	/*C1 = U+*/0x00C1, //LATIN CAPITAL LETTER A WITH ACUTE
743 	/*C2 = U+*/0x00C2, //LATIN CAPITAL LETTER A WITH CIRCUMFLEX
744 	/*C3 = U+*/0x00C3, //LATIN CAPITAL LETTER A WITH TILDE
745 	/*C4 = U+*/0x00C4, //LATIN CAPITAL LETTER A WITH DIAERESIS
746 	/*C5 = U+*/0x00C5, //LATIN CAPITAL LETTER A WITH RING ABOVE
747 	/*C6 = U+*/0x00C6, //LATIN CAPITAL LETTER AE
748 	/*C7 = U+*/0x00C7, //LATIN CAPITAL LETTER C WITH CEDILLA
749 	/*C8 = U+*/0x00C8, //LATIN CAPITAL LETTER E WITH GRAVE
750 	/*C9 = U+*/0x00C9, //LATIN CAPITAL LETTER E WITH ACUTE
751 	/*CA = U+*/0x00CA, //LATIN CAPITAL LETTER E WITH CIRCUMFLEX
752 	/*CB = U+*/0x00CB, //LATIN CAPITAL LETTER E WITH DIAERESIS
753 	/*CC = U+*/0x00CC, //LATIN CAPITAL LETTER I WITH GRAVE
754 	/*CD = U+*/0x00CD, //LATIN CAPITAL LETTER I WITH ACUTE
755 	/*CE = U+*/0x00CE, //LATIN CAPITAL LETTER I WITH CIRCUMFLEX
756 	/*CF = U+*/0x00CF, //LATIN CAPITAL LETTER I WITH DIAERESIS
757 	/*D0 = U+*/0x00D0, //LATIN CAPITAL LETTER ETH
758 	/*D1 = U+*/0x00D1, //LATIN CAPITAL LETTER N WITH TILDE
759 	/*D2 = U+*/0x00D2, //LATIN CAPITAL LETTER O WITH GRAVE
760 	/*D3 = U+*/0x00D3, //LATIN CAPITAL LETTER O WITH ACUTE
761 	/*D4 = U+*/0x00D4, //LATIN CAPITAL LETTER O WITH CIRCUMFLEX
762 	/*D5 = U+*/0x00D5, //LATIN CAPITAL LETTER O WITH TILDE
763 	/*D6 = U+*/0x00D6, //LATIN CAPITAL LETTER O WITH DIAERESIS
764 	/*D7 = U+*/0x00D7, //MULTIPLICATION SIGN
765 	/*D8 = U+*/0x00D8, //LATIN CAPITAL LETTER O WITH STROKE
766 	/*D9 = U+*/0x00D9, //LATIN CAPITAL LETTER U WITH GRAVE
767 	/*DA = U+*/0x00DA, //LATIN CAPITAL LETTER U WITH ACUTE
768 	/*DB = U+*/0x00DB, //LATIN CAPITAL LETTER U WITH CIRCUMFLEX
769 	/*DC = U+*/0x00DC, //LATIN CAPITAL LETTER U WITH DIAERESIS
770 	/*DD = U+*/0x00DD, //LATIN CAPITAL LETTER Y WITH ACUTE
771 	/*DE = U+*/0x00DE, //LATIN CAPITAL LETTER THORN
772 	/*DF = U+*/0x00DF, //LATIN SMALL LETTER SHARP S
773 	/*E0 = U+*/0x00E0, //LATIN SMALL LETTER A WITH GRAVE
774 	/*E1 = U+*/0x00E1, //LATIN SMALL LETTER A WITH ACUTE
775 	/*E2 = U+*/0x00E2, //LATIN SMALL LETTER A WITH CIRCUMFLEX
776 	/*E3 = U+*/0x00E3, //LATIN SMALL LETTER A WITH TILDE
777 	/*E4 = U+*/0x00E4, //LATIN SMALL LETTER A WITH DIAERESIS
778 	/*E5 = U+*/0x00E5, //LATIN SMALL LETTER A WITH RING ABOVE
779 	/*E6 = U+*/0x00E6, //LATIN SMALL LETTER AE
780 	/*E7 = U+*/0x00E7, //LATIN SMALL LETTER C WITH CEDILLA
781 	/*E8 = U+*/0x00E8, //LATIN SMALL LETTER E WITH GRAVE
782 	/*E9 = U+*/0x00E9, //LATIN SMALL LETTER E WITH ACUTE
783 	/*EA = U+*/0x00EA, //LATIN SMALL LETTER E WITH CIRCUMFLEX
784 	/*EB = U+*/0x00EB, //LATIN SMALL LETTER E WITH DIAERESIS
785 	/*EC = U+*/0x00EC, //LATIN SMALL LETTER I WITH GRAVE
786 	/*ED = U+*/0x00ED, //LATIN SMALL LETTER I WITH ACUTE
787 	/*EE = U+*/0x00EE, //LATIN SMALL LETTER I WITH CIRCUMFLEX
788 	/*EF = U+*/0x00EF, //LATIN SMALL LETTER I WITH DIAERESIS
789 	/*F0 = U+*/0x00F0, //LATIN SMALL LETTER ETH
790 	/*F1 = U+*/0x00F1, //LATIN SMALL LETTER N WITH TILDE
791 	/*F2 = U+*/0x00F2, //LATIN SMALL LETTER O WITH GRAVE
792 	/*F3 = U+*/0x00F3, //LATIN SMALL LETTER O WITH ACUTE
793 	/*F4 = U+*/0x00F4, //LATIN SMALL LETTER O WITH CIRCUMFLEX
794 	/*F5 = U+*/0x00F5, //LATIN SMALL LETTER O WITH TILDE
795 	/*F6 = U+*/0x00F6, //LATIN SMALL LETTER O WITH DIAERESIS
796 	/*F7 = U+*/0x00F7, //DIVISION SIGN
797 	/*F8 = U+*/0x00F8, //LATIN SMALL LETTER O WITH STROKE
798 	/*F9 = U+*/0x00F9, //LATIN SMALL LETTER U WITH GRAVE
799 	/*FA = U+*/0x00FA, //LATIN SMALL LETTER U WITH ACUTE
800 	/*FB = U+*/0x00FB, //LATIN SMALL LETTER U WITH CIRCUMFLEX
801 	/*FC = U+*/0x00FC, //LATIN SMALL LETTER U WITH DIAERESIS
802 	/*FD = U+*/0x00FD, //LATIN SMALL LETTER Y WITH ACUTE
803 	/*FE = U+*/0x00FE, //LATIN SMALL LETTER THORN
804 	/*FF = U+*/0x00FF  //LATIN SMALL LETTER Y WITH DIAERESIS
805 };
806 
807 class TCStringObject : public TCObject
808 {
809 public:
TCStringObject(void)810 	TCStringObject(void) : string(NULL) {}
TCStringObject(const char * string)811 	TCStringObject(const char *string) : string(copyString(string)) {}
setString(const char * value)812 	void setString(const char *value)
813 	{
814 		if (string != value)
815 		{
816 			delete[] string;
817 			string = copyString(value);
818 		}
819 	}
getString(void)820 	const char *getString(void) { return string; }
821 protected:
822 	virtual ~TCStringObject(void);
dealloc(void)823 	virtual void dealloc(void)
824 	{
825 		delete[] string;
826 		TCObject::dealloc();
827 	}
828 
829 	char *string;
830 };
831 
832 // I got a compiler warning about this not being inlined when it was in the
833 // class definition, so I pulled it out.
~TCStringObject(void)834 TCStringObject::~TCStringObject(void)
835 {
836 }
837 
838 TCLocalStrings *TCLocalStrings::currentLocalStrings = NULL;
839 TCLocalStrings::TCLocalStringsCleanup TCLocalStrings::localStringsCleanup;
840 IntWCharMap TCLocalStrings::sm_codePages;
841 
TCLocalStringsCleanup(void)842 TCLocalStrings::TCLocalStringsCleanup::TCLocalStringsCleanup(void)
843 {
844 	TCLocalStrings::initCodePages();
845 }
846 
~TCLocalStringsCleanup(void)847 TCLocalStrings::TCLocalStringsCleanup::~TCLocalStringsCleanup(void)
848 {
849 	if (currentLocalStrings)
850 	{
851 		currentLocalStrings->release();
852 	}
853 }
854 
855 // Note: Code Page 1252 is Windows Latin I, which is the default.
TCLocalStrings(void)856 TCLocalStrings::TCLocalStrings(void):
857 #if !defined(WIN32) && !defined(__APPLE__) && !defined(_OSMESA)
858 	m_textCodec(NULL),
859 #endif // WIN32
860 	m_codePage(UTF8_CODE_PAGE)
861 {
862 	stringDict = new TCDictionary;
863 	sm_codePages[1250] = g_cp1250;
864 	sm_codePages[1251] = g_cp1251;
865 	sm_codePages[1252] = g_cp1252;
866 }
867 
~TCLocalStrings(void)868 TCLocalStrings::~TCLocalStrings(void)
869 {
870 }
871 
dealloc(void)872 void TCLocalStrings::dealloc(void)
873 {
874 	TCObject::release(stringDict);
875 	TCObject::dealloc();
876 }
877 
initCodePages(void)878 void TCLocalStrings::initCodePages(void)
879 {
880 }
881 
setStringTable(const char * stringTable,bool replace)882 bool TCLocalStrings::setStringTable(const char *stringTable, bool replace)
883 {
884 	return getCurrentLocalStrings()->instSetStringTable(stringTable, replace);
885 }
886 
setStringTable(const wchar_t * stringTable,bool replace)887 bool TCLocalStrings::setStringTable(const wchar_t *stringTable, bool replace)
888 {
889 	return getCurrentLocalStrings()->instSetStringTable(stringTable, replace);
890 }
891 
getCodePage(void)892 int TCLocalStrings::getCodePage(void)
893 {
894 	return getCurrentLocalStrings()->instGetCodePage();
895 }
896 
897 //#if !defined(WIN32) && !defined(__APPLE__) && !defined(_OSMESA)
898 //const QString &TCLocalStrings::get(const char *key)
899 //#else // WIN32
get(const char * key)900 const char *TCLocalStrings::get(const char *key)
901 //#endif // WIN32
902 {
903 	return getCurrentLocalStrings()->instGetLocalString(key);
904 }
905 
getUtf8(const char * key)906 const char *TCLocalStrings::getUtf8(const char *key)
907 {
908 	return getCurrentLocalStrings()->instGetUtf8LocalString(key);
909 }
910 
get(const wchar_t * key)911 const wchar_t *TCLocalStrings::get(const wchar_t *key)
912 {
913 	return getCurrentLocalStrings()->instGetLocalString(key);
914 }
915 
setStringTable(const TCByte * data,int tableSize,bool replace)916 bool TCLocalStrings::setStringTable(
917 	const TCByte *data,
918 	int tableSize,
919 	bool replace /*= true*/)
920 {
921 	bool retValue = false;
922 	bool bUnicode16 = false;
923 	bool bBigEndian = true;
924 	int offset = 0;
925 
926 	if (tableSize >= 2 && data[0] == 0xFF && data[1] == 0xFE)
927 	{
928 		// Little Endian Unicode
929 		bUnicode16 = true;
930 		bBigEndian = false;
931 	}
932 	else if (tableSize >= 2 && data[0] == 0xFE && data[1] == 0xFF)
933 	{
934 		// Big Endian Unicode
935 		bUnicode16 = true;
936 	}
937 	else if (tableSize >= 3 && data[0] == 0xEF && data[1] == 0xBB &&
938 		data[2] == 0xBF)
939 	{
940 		offset = 3;
941 	}
942 	if (bUnicode16)
943 	{
944 		std::wstring wstringTable;
945 		int i;
946 		int count = tableSize / 2;
947 
948 		wstringTable.reserve(count + 1);
949 		// Note: skip first 2 bytes, which are the Byte Order Mark.
950 		for (i = 2; i < tableSize; i += 2)
951 		{
952 			int uByte;
953 			int lByte;
954 
955 			if (bBigEndian)
956 			{
957 				uByte = data[i];
958 				lByte = data[i + 1];
959 			}
960 			else
961 			{
962 				uByte = data[i + 1];
963 				lByte = data[i];
964 			}
965 			wchar_t wc = (wchar_t)((uByte << 8) | lByte);
966 			wstringTable.append(&wc, 1);
967 		}
968 		// wstringTable now contains the string table.
969 #ifdef NO_WSTRING
970         retValue = setStringTable(L"", replace);
971 #else // NO_WSTRING
972 		retValue = setStringTable(wstringTable.c_str(), replace);
973 #endif // NO_WSTRING
974 	}
975 	else
976 	{
977 		char *stringTable = new char[tableSize - offset + 1];
978 		memcpy(stringTable, &data[offset], tableSize - offset);
979 
980 		// Null terminate the string table
981 		stringTable[tableSize - offset] = 0;
982 		retValue = setStringTable(stringTable, replace);
983 		delete[] stringTable;
984 	}
985 	return retValue;
986 }
987 
loadStringTable(const char * filename,bool replace)988 bool TCLocalStrings::loadStringTable(const char *filename, bool replace)
989 {
990 	FILE *tableFile = ucfopen(filename, "rb");
991 	bool retValue = false;
992 
993 	if (tableFile)
994 	{
995 		long fileSize;
996 		TCByte *fileData;
997 
998 		fseek(tableFile, 0, SEEK_END);
999 		fileSize = ftell(tableFile);
1000 		fseek(tableFile, 0, SEEK_SET);
1001 		fileData = new TCByte[fileSize];
1002 		if (fread(fileData, 1, fileSize, tableFile) == (unsigned)fileSize)
1003 		{
1004 			retValue = setStringTable(fileData, (int)fileSize, replace);
1005 		}
1006 		delete[] fileData;
1007 		fclose(tableFile);
1008 	}
1009 	return retValue;
1010 }
1011 
getCurrentLocalStrings(void)1012 TCLocalStrings *TCLocalStrings::getCurrentLocalStrings(void)
1013 {
1014 	if (!currentLocalStrings)
1015 	{
1016 		currentLocalStrings = new TCLocalStrings;
1017 	}
1018 	return currentLocalStrings;
1019 }
1020 
dumpTable(const char * filename,const char * header)1021 void TCLocalStrings::dumpTable(const char *filename, const char *header)
1022 {
1023 	getCurrentLocalStrings()->instDumpTable(filename, header);
1024 }
1025 
instDumpTable(const char * filename,const char * header)1026 void TCLocalStrings::instDumpTable(const char *filename, const char *header)
1027 {
1028 	FILE *file = ucfopen(filename, "w");
1029 
1030 	if (file)
1031 	{
1032 		TCSortedStringArray *keys = stringDict->allKeys();
1033 		int i;
1034 		int count = keys->getCount();
1035 
1036 		if (header)
1037 		{
1038 			fprintf(file, "%s\n", header);
1039 		}
1040 		for (i = 0; i < count; i++)
1041 		{
1042 			const char *key = keys->stringAtIndex(i);
1043 			const char *value = ((TCStringObject *)stringDict->objectForKey(key))->getString();
1044 
1045 			fprintf(file, "%s = %s\n", key, value);
1046 		}
1047 		for (WStringWStringMap::iterator it = m_strings.begin(); it != m_strings.end(); ++it)
1048 		{
1049 #ifndef NO_WSTRING
1050 			fprintf(file, "%S = %S\n", it->first.c_str(), it->second.c_str());
1051 #endif // NO_WSTRING
1052 		}
1053 		fclose(file);
1054 	}
1055 }
1056 
clear(void)1057 void TCLocalStrings::clear(void)
1058 {
1059 	stringDict->removeAll();
1060 	m_strings.clear();
1061 	m_utf8Strings.clear();
1062 }
1063 
instSetStringTable(const char * stringTable,bool replace)1064 bool TCLocalStrings::instSetStringTable(const char *stringTable, bool replace)
1065 {
1066 	bool sectionFound = false;
1067 	int lastKeyIndex = -1;
1068 	std::string lastKey;
1069 
1070 	if (replace)
1071 	{
1072 		clear();
1073 	}
1074 	while (1)
1075 	{
1076 		const char *eol = strchr(stringTable, '\n');
1077 
1078 		if (!eol && strlen(stringTable) > 0)
1079 		{
1080 			eol = stringTable + strlen(stringTable);
1081 		}
1082 		if (eol)
1083 		{
1084 			int len = (int)(eol - stringTable);
1085 			char *line = new char[len + 1];
1086 
1087 			strncpy(line, stringTable, len);
1088 			line[len] = 0;
1089 			stripCRLF(line);
1090 			stripLeadingWhitespace(line);
1091 			if (!sectionFound)
1092 			{
1093 				// We haven't found the [StringTable] section yet
1094 				stripTrailingWhitespace(line);
1095 				if (stringHasCaseInsensitivePrefix(line, "[StringTable") &&
1096 					stringHasSuffix(line, "]"))
1097 				{
1098 					const char *codePageString = strcasestr(line, "CP=");
1099 
1100 					sectionFound = true;
1101 					if (codePageString)
1102 					{
1103 						int codePage;
1104 
1105 						if (sscanf(&codePageString[3], "%d", &codePage) == 1)
1106 						{
1107 							instSetCodePage(codePage);
1108 						}
1109 					}
1110 				}
1111 				// Note that we are ignoring all lines until we find the section
1112 			}
1113 			else
1114 			{
1115 				// We're in the [StringTable] section
1116 				if (line[0] == '[' && strchr(line, ']'))
1117 				{
1118 					if (!stringHasCaseInsensitivePrefix(line, "[StringTable") ||
1119 						!stringHasSuffix(line, "]"))
1120 					{
1121 						// We found another section header, which means we are
1122 						// at the end of the [StringTable] section, so we're
1123 						// done.  Note that if we see another [StringTable]
1124 						// section, we'll just ignore that and continue on.
1125 						break;
1126 					}
1127 				}
1128 				else if (line[0] != ';')
1129 				{
1130 					// Comment lines begin with ;
1131 					char *equalSpot = strchr(line, '=');
1132 
1133 					if (equalSpot)
1134 					{
1135 						char *value;
1136 						char *key = line;
1137 						TCStringObject *stringObject;
1138 						int keyLen;
1139 
1140 						*equalSpot = 0;
1141 						stripTrailingWhitespace(key);
1142 						keyLen = (int)strlen(key);
1143 						if (keyLen)
1144 						{
1145 							bool appended = false;
1146 							std::wstring wkey;
1147 							std::wstring wvalue;
1148 
1149 							mbstowstring(wkey, key, keyLen);
1150 							value = copyString(equalSpot + 1);
1151 							processEscapedString(value);
1152 							mbstowstring(wvalue, value);
1153 //							value = stringByReplacingSubstring(equalSpot + 1,
1154 //								"\\n", "\n");
1155 							if (isdigit(key[keyLen - 1]))
1156 							{
1157 								int keyIndex;
1158 
1159 								// If the last character of the key is a digit,
1160 								// then it must be a multi-line key.  So strip
1161 								// off all trailing digits, and append to any
1162 								// existing value.  Note that keys aren't
1163 								// allowed to end in a digit, so even if there
1164 								// is only one line, the key still gets the
1165 								// number stripped off the end.
1166 								while (isdigit(key[keyLen - 1]) && keyLen > 0)
1167 								{
1168 									keyLen--;
1169 								}
1170 								keyIndex = atoi(&key[keyLen]);
1171 								key[keyLen] = 0;
1172 								if (lastKey != key)
1173 								{
1174 									lastKeyIndex = 0;
1175 								}
1176 								if (lastKey == key &&
1177 									lastKeyIndex + 1 != keyIndex)
1178 								{
1179 									debugPrintf(
1180 										"Key index out of sequence: %s%d\n",
1181 										key, keyIndex);
1182 								}
1183 								lastKeyIndex = keyIndex;
1184 								lastKey = key;
1185 								mbstowstring(wkey, key, keyLen);
1186 								stringObject = (TCStringObject*)stringDict->
1187 									objectForKey(key);
1188 								if (stringObject)
1189 								{
1190 									// If we've already got data for this key,
1191 									// we need to append to it and note that we
1192 									// did so.
1193 									char *newValue = new char[strlen(value) +
1194 										strlen(stringObject->getString()) + 1];
1195 
1196 									strcpy(newValue, stringObject->getString());
1197 									strcat(newValue, value);
1198 									// Note that we don't have to update the
1199 									// dict; we're simply updating the text in
1200 									// the string object already there.
1201 									stringObject->setString(newValue);
1202 									delete[] newValue;
1203 									appended = true;
1204 									// wstring copy constructor broken in VC++
1205 									// 2005?!?!?  The below doesn't work without
1206 									// the .c_str() calls.
1207 #ifndef NO_WSTRING
1208 									m_strings[wkey.c_str()] += wvalue.c_str();
1209 #endif // NO_WSTRING
1210 								}
1211 							}
1212 							if (!appended)
1213 							{
1214 								if (stringDict->objectForKey(line))
1215 								{
1216 									debugPrintf("Local String key \"%s\" "
1217 										"defined multiple times.\n", line);
1218 								}
1219 								stringObject = new TCStringObject(value);
1220 								stringDict->setObjectForKey(stringObject, line);
1221 								stringObject->release();
1222 								// wstring copy constructor broken in VC++
1223 								// 2005?!?!?  The below doesn't work without the
1224 								// .c_str() calls.
1225 #ifndef NO_WSTRING
1226 								m_strings[wkey.c_str()] = wvalue.c_str();
1227 #endif // NO_WSTRING
1228 							}
1229 							delete[] value;
1230 						}
1231 					}
1232 				}
1233 			}
1234 			delete[] line;
1235 			if (!eol[0])
1236 			{
1237 				// If there isn't an EOL at the end of the file, we're done now.
1238 				break;
1239 			}
1240 			stringTable += len + 1;
1241 			while (stringTable[0] == '\r' || stringTable[0] == '\n')
1242 			{
1243 				stringTable++;
1244 			}
1245 		}
1246 		else
1247 		{
1248 			break;
1249 		}
1250 	}
1251 #if !defined(WIN32) && !defined(__APPLE__) && !defined(_OSMESA)
1252 	//buildQStringMap();
1253 #endif // WIN32
1254 	// Note that the load is considered a success if the [StringTable] section
1255 	// is found in the data.
1256 	return sectionFound;
1257 }
1258 
instSetCodePage(int codePage)1259 void TCLocalStrings::instSetCodePage(int codePage)
1260 {
1261 	m_codePage = codePage;
1262 //	if (m_codePage == 1250)
1263 //	{
1264 //		return;
1265 //	}
1266 #if !defined(WIN32) && !defined(__APPLE__) && !defined(_OSMESA)
1267 	QString name;
1268 
1269 	name = QString("CP%1").arg(codePage);
1270 	m_textCodec =
1271 		QTextCodec::codecForName((const char *)name.toLatin1().constData());
1272 #endif // WIN32
1273 }
1274 
instSetStringTable(const wchar_t * stringTable,bool replace)1275 bool TCLocalStrings::instSetStringTable(const wchar_t *stringTable,
1276 										bool replace)
1277 {
1278 	bool sectionFound = false;
1279 	int lastKeyIndex = -1;
1280 	std::wstring lastKey;
1281 
1282 	if (replace)
1283 	{
1284 		clear();
1285 	}
1286 	while (1)
1287 	{
1288 		const wchar_t *eol = wcschr(stringTable, '\n');
1289 
1290 		if (!eol && wcslen(stringTable) > 0)
1291 		{
1292 			eol = stringTable + wcslen(stringTable);
1293 		}
1294 		if (eol)
1295 		{
1296 			int len = (int)(eol - stringTable);
1297 			wchar_t *line = new wchar_t[len + 1];
1298 
1299 			wcsncpy(line, stringTable, len);
1300 			line[len] = 0;
1301 			stripCRLF(line);
1302 			stripLeadingWhitespace(line);
1303 			if (!sectionFound)
1304 			{
1305 				// We haven't found the [StringTable] section yet
1306 				stripTrailingWhitespace(line);
1307 				if (stringHasCaseInsensitivePrefix(line, L"[StringTable") &&
1308 					stringHasSuffix(line, L"]"))
1309 				{
1310 					sectionFound = true;
1311 				}
1312 				// Note that we are ignoring all lines until we find the section
1313 			}
1314 			else
1315 			{
1316 				// We're in the [StringTable] section
1317 				if (line[0] == '[' && wcschr(line, ']'))
1318 				{
1319 					// We found another section header, which means we are at
1320 					// the end of the [StringTable] section, so we're done
1321 					break;
1322 				}
1323 				else if (line[0] != ';')
1324 				{
1325 					// Comment lines begin with ;
1326 					wchar_t *equalSpot = wcschr(line, '=');
1327 
1328 					if (equalSpot)
1329 					{
1330 						wchar_t *value;
1331 						wchar_t *key = line;
1332 						TCStringObject *stringObject;
1333 						int keyLen;
1334 
1335 						*equalSpot = 0;
1336 						stripTrailingWhitespace(key);
1337 						keyLen = (int)wcslen(key);
1338 						if (keyLen)
1339 						{
1340 							bool appended = false;
1341 							std::string skey;
1342 							std::string svalue;
1343 
1344 							wcstostring(skey, key, keyLen);
1345 							value = copyString(equalSpot + 1);
1346 							processEscapedString(value);
1347 							wcstostring(svalue, value);
1348 //							value = stringByReplacingSubstring(equalSpot + 1,
1349 //								"\\n", "\n");
1350 							if (isdigit(key[keyLen - 1]))
1351 							{
1352 								int keyIndex;
1353 
1354 								// If the last character of the key is a digit,
1355 								// then it must be a multi-line key.  So strip
1356 								// off all trailing digits, and append to any
1357 								// existing value.  Note that keys aren't
1358 								// allowed to end in a digit, so even if there
1359 								// is only one line, the key still gets the
1360 								// number stripped off the end.
1361 								while (isdigit(key[keyLen - 1]) && keyLen > 0)
1362 								{
1363 									keyLen--;
1364 								}
1365 								keyIndex = (int)wcstoul(&key[keyLen], NULL, 10);
1366 								key[keyLen] = 0;
1367 								if (lastKey != key)
1368 								{
1369 									lastKeyIndex = 0;
1370 								}
1371 								if (lastKey == key &&
1372 									lastKeyIndex + 1 != keyIndex)
1373 								{
1374 									debugPrintf(
1375 										"Key index out of sequence: %s%d\n",
1376 										key, keyIndex);
1377 								}
1378 								lastKeyIndex = keyIndex;
1379 								lastKey = key;
1380 								wcstostring(skey, key, keyLen);
1381 								stringObject = (TCStringObject*)stringDict->
1382 									objectForKey(skey.c_str());
1383 								if (stringObject)
1384 								{
1385 									// If we've already got data for this key,
1386 									// we need to append to it and note that we
1387 									// did so.
1388 									char *newValue = new char[svalue.size() +
1389 										strlen(stringObject->getString()) + 1];
1390 
1391 									strcpy(newValue, stringObject->getString());
1392 									strcat(newValue, svalue.c_str());
1393 									// Note that we don't have to update the
1394 									// dict; we're simply updating the text in
1395 									// the string object already there.
1396 									stringObject->setString(newValue);
1397 									delete[] newValue;
1398 									appended = true;
1399 									m_strings[key] += value;
1400 								}
1401 							}
1402 							if (!appended)
1403 							{
1404 								if (stringDict->objectForKey(skey.c_str()))
1405 								{
1406 									debugPrintf("Local String key \"%s\" "
1407 										"defined multiple times.\n",
1408 										skey.c_str());
1409 								}
1410 								stringObject =
1411 									new TCStringObject(svalue.c_str());
1412 								stringDict->setObjectForKey(stringObject,
1413 									skey.c_str());
1414 								stringObject->release();
1415 								m_strings[key] = value;
1416 							}
1417 							delete[] value;
1418 						}
1419 					}
1420 				}
1421 			}
1422 			delete[] line;
1423 			if (!eol[0])
1424 			{
1425 				// If there isn't an EOL at the end of the file, we're done now.
1426 				break;
1427 			}
1428 			stringTable += len + 1;
1429 			while (stringTable[0] == '\r' || stringTable[0] == '\n')
1430 			{
1431 				stringTable++;
1432 			}
1433 		}
1434 		else
1435 		{
1436 			break;
1437 		}
1438 	}
1439 #if !defined(WIN32) && !defined(__APPLE__) && !defined(_OSMESA)
1440 	//buildQStringMap();
1441 #endif // WIN32
1442 	// Note that the load is considered a success if the [StringTable] section
1443 	// is found in the data.
1444 	return sectionFound;
1445 }
1446 
instGetLocalString(const wchar_t * key)1447 const wchar_t *TCLocalStrings::instGetLocalString(const wchar_t *key)
1448 {
1449 	WStringWStringMap::const_iterator it = m_strings.find(key);
1450 
1451 	if (it != m_strings.end())
1452 	{
1453 #ifdef NO_WSTRING
1454 		return L"";
1455 #else // NO_WSTRING
1456 		return it->second.c_str();
1457 #endif // NO_WSTRING
1458 	}
1459 	else
1460 	{
1461 		std::string temp;
1462 
1463 		wstringtostring(temp, key);
1464 		debugPrintf("LocalString %s not found!!!!!!\n", temp.c_str());
1465 		// It should really be NULL, but that means a mistake will likely cause
1466 		// a crash.  At least with an empty string it's less likely to crash.
1467 		return L"";
1468 	}
1469 }
1470 
mbstowstring(std::wstring & dst,const char * src,int length)1471 void TCLocalStrings::mbstowstring(std::wstring &dst, const char *src,
1472 								  int length /*= -1*/)
1473 {
1474 	if (m_codePage == UTF8_CODE_PAGE)
1475 	{
1476 		utf8towstring(dst, src);
1477 		return;
1478 	}
1479 	wchar_t *codePageTable = NULL;
1480 	IntWCharMap::const_iterator it = sm_codePages.find(m_codePage);
1481 
1482 	if (it != sm_codePages.end())
1483 	{
1484 		codePageTable = it->second;
1485 	}
1486 	if (codePageTable)
1487 	{
1488 		int i;
1489 
1490 		if (length == -1)
1491 		{
1492 			length = (int)strlen(src);
1493 		}
1494 		dst.resize(length);
1495 		for (i = 0; i < length; i++)
1496 		{
1497 			dst[i] = codePageTable[(TCByte)src[i]];
1498 		}
1499 	}
1500 #if !defined(WIN32) && !defined(__APPLE__) && !defined(_OSMESA)
1501 	else if (m_textCodec)
1502 	{
1503 		QString unicodeString = m_textCodec->toUnicode(src);
1504 		dst.clear();
1505 		dst.resize(unicodeString.length());
1506 		for (int i = 0; i < (int)unicodeString.length(); i++)
1507 		{
1508 			QChar qchar = unicodeString.at(i);
1509 
1510 			dst[i] = (wchar_t)qchar.unicode();
1511 		}
1512 	}
1513 #endif // WIN32
1514 	else
1515 	{
1516 		::mbstowstring(dst, src, length);
1517 	}
1518 }
1519 
1520 /*
1521 #if !defined(WIN32) && !defined(__APPLE__) && !defined(_OSMESA)
1522 #include <QT/misc.h>
1523 const QString &TCLocalStrings::instGetLocalString(const char *key)
1524 {
1525 	QStringQStringMap::iterator it = m_qStrings.find(key);
1526 
1527 	if (it != m_qStrings.end())
1528 	{
1529 		return it->second;
1530 	}
1531 	else
1532 	{
1533 		debugPrintf("LocalString %s not found!!!!!!\n", key);
1534 		return m_emptyQString;
1535 	}
1536 }
1537 
1538 void TCLocalStrings::buildQStringMap(void)
1539 {
1540 	m_qStrings.clear();
1541 	for (WStringWStringMap::iterator it = m_strings.begin();
1542 		it != m_strings.end(); ++it)
1543 	{
1544 		QString key;
1545 		QString value;
1546 
1547 		wstringtoqstring(key, it->first);
1548 		wstringtoqstring(value, it->second);
1549 		m_qStrings[key] = value;
1550 	}
1551 }
1552 
1553 #else // WIN32
1554 */
instGetLocalString(const char * key)1555 const char *TCLocalStrings::instGetLocalString(const char *key)
1556 {
1557 	TCStringObject *stringObject =
1558 		(TCStringObject*)stringDict->objectForKey(key);
1559 
1560 	if (stringObject)
1561 	{
1562 		return stringObject->getString();
1563 	}
1564 	else
1565 	{
1566 		debugPrintf("LocalString %s not found!!!!!!\n", key);
1567 		// It should really be NULL, but that means a mistake will likely cause
1568 		// a crash.  At least with an empty string it's less likely to crash.
1569 		return "";
1570 	}
1571 }
1572 //#endif // WIN32
1573 
instGetUtf8LocalString(const char * key)1574 const char *TCLocalStrings::instGetUtf8LocalString(const char *key)
1575 {
1576 	StringStringMap::const_iterator it = m_utf8Strings.find(key);
1577 
1578 	if (it != m_utf8Strings.end())
1579 	{
1580 		return it->second.c_str();
1581 	}
1582 	else
1583 	{
1584 #ifdef TC_NO_UNICODE
1585 		m_utf8Strings[key] = (const char *)instGetLocalString(key);
1586 #else // TC_NO_UNICODE
1587 		std::wstring wKey;
1588 		const wchar_t *wValue;
1589 		std::string utf8Value;
1590 
1591 		mbstowstring(wKey, key);
1592 		wValue = instGetLocalString(wKey.c_str());
1593 		if (ucstringtoutf8(utf8Value, wValue))
1594 		{
1595 			m_utf8Strings[key] = utf8Value;
1596 		}
1597 		else
1598 		{
1599 			m_utf8Strings[key] = instGetLocalString(key);
1600 //			m_utf8Strings[key] =
1601 //				(const char *)instGetLocalString(key).toLatin1().constData();
1602 		}
1603 #endif // TC_NO_UNICODE
1604 		return m_utf8Strings[key].c_str();
1605 	}
1606 }
1607