1 /*
2 * Tool for creating NT-like NLS files for Unicode <-> Codepage conversions.
3 * Tool for creating NT-like l_intl.nls file for case mapping of unicode
4 * characters.
5 * Copyright 2000 Timoshkov Dmitry
6 * Copyright 2001 Matei Alexandru
7 *
8 * Sources of information:
9 * Andrew Kozin's YAW project http://www.chat.ru/~stanson/yaw_en.html
10 * Ove K�ven's investigations http://www.ping.uio.no/~ovehk/nls
11 */
12 #include <windows.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <malloc.h>
16 #include <string.h>
17 #include <ctype.h>
18
19 static const WCHAR * const uprtable[256];
20 static const WCHAR * const lwrtable[256];
21
22 #define NLSDIR "../../media/nls"
23 #define LIBDIR "unicode.org/"
24
25 typedef struct {
26 WORD wSize; /* in words 0x000D */
27 WORD CodePage;
28 WORD MaxCharSize; /* 1 or 2 */
29 BYTE DefaultChar[MAX_DEFAULTCHAR];
30 WCHAR UnicodeDefaultChar;
31 WCHAR unknown1;
32 WCHAR unknown2;
33 BYTE LeadByte[MAX_LEADBYTES];
34 } __attribute__((packed)) NLS_FILE_HEADER;
35
36 /*
37 Support for translation from the multiple unicode chars
38 to the single code page char.
39
40 002D;HYPHEN-MINUS;Pd;0;ET;;;;;N;;;;;
41 00AD;SOFT HYPHEN;Pd;0;ON;;;;;N;;;;;
42 2010;HYPHEN;Pd;0;ON;;;;;N;;;;;
43 2011;NON-BREAKING HYPHEN;Pd;0;ON;<noBreak> 2010;;;;N;;;;;
44 2013;EN DASH;Pd;0;ON;;;;;N;;;;;
45 2014;EM DASH;Pd;0;ON;;;;;N;;;;;
46 2015;HORIZONTAL BAR;Pd;0;ON;;;;;N;QUOTATION DASH;;;;
47 */
48
49 /* HYPHEN-MINUS aliases */
50 static WCHAR hyphen_aliases[] = {0x00AD,0x2010,0x2011,0x2013,0x2014,0x2015,0};
51
52 static struct {
53 WCHAR cp_char;
54 WCHAR *alias; /* must be 0 terminated */
55 } u2cp_alias[] = {
56 /* HYPHEN-MINUS aliases */
57 {0x002D, hyphen_aliases}
58 };
59
patch_aliases(void * u2cp,CPINFOEXA * cpi)60 static void patch_aliases(void *u2cp, CPINFOEXA *cpi)
61 {
62 int i, j;
63 WCHAR *wc, *alias;
64 BYTE *c;
65
66 if(cpi->MaxCharSize == 2) {
67 wc = (WCHAR *)u2cp;
68 for(i = 0; i < 65536; i++) {
69 for(j = 0; j < sizeof(u2cp_alias)/sizeof(u2cp_alias[0]); j++) {
70 alias = u2cp_alias[j].alias;
71 while(*alias) {
72 if(*alias == i && wc[i] == *(WCHAR *)cpi->DefaultChar) {
73 wc[i] = u2cp_alias[j].cp_char;
74 }
75 alias++;
76 }
77 }
78 }
79 }
80 else {
81 c = (BYTE *)u2cp;
82 for(i = 0; i < 65536; i++) {
83 for(j = 0; j < sizeof(u2cp_alias)/sizeof(u2cp_alias[0]); j++) {
84 alias = u2cp_alias[j].alias;
85 while(*alias) {
86 if(*alias == i && c[i] == cpi->DefaultChar[0] && u2cp_alias[j].cp_char < 256) {
87 c[i] = (BYTE)u2cp_alias[j].cp_char;
88 }
89 alias++;
90 }
91 }
92 }
93 }
94 }
95
write_unicode2cp_table(FILE * out,CPINFOEXA * cpi,WCHAR * table)96 static BOOL write_unicode2cp_table(FILE *out, CPINFOEXA *cpi, WCHAR *table)
97 {
98 void *u2cp;
99 WCHAR *wc;
100 CHAR *c;
101 int i;
102 BOOL ret = TRUE;
103
104 u2cp = malloc(cpi->MaxCharSize * 65536);
105 if(!u2cp) {
106 printf("Not enough memory for Unicode to Codepage table\n");
107 return FALSE;
108 }
109
110 if(cpi->MaxCharSize == 2) {
111 wc = (WCHAR *)u2cp;
112 for(i = 0; i < 65536; i++)
113 wc[i] = *(WCHAR *)cpi->DefaultChar;
114
115 for(i = 0; i < 65536; i++)
116 if (table[i] != '?')
117 wc[table[i]] = (WCHAR)i;
118 }
119 else {
120 c = (CHAR *)u2cp;
121 for(i = 0; i < 65536; i++)
122 c[i] = cpi->DefaultChar[0];
123
124 for(i = 0; i < 256; i++)
125 if (table[i] != '?')
126 c[table[i]] = (CHAR)i;
127 }
128
129 patch_aliases(u2cp, cpi);
130
131 if(fwrite(u2cp, 1, cpi->MaxCharSize * 65536, out) != cpi->MaxCharSize * 65536)
132 ret = FALSE;
133
134 free(u2cp);
135
136 return ret;
137 }
138
write_lb_ranges(FILE * out,CPINFOEXA * cpi,WCHAR * table)139 static BOOL write_lb_ranges(FILE *out, CPINFOEXA *cpi, WCHAR *table)
140 {
141 WCHAR sub_table[256];
142 WORD offset, offsets[256];
143 int i, j, range;
144
145 memset(offsets, 0, sizeof(offsets));
146
147 offset = 0;
148
149 for(i = 0; i < MAX_LEADBYTES; i += 2) {
150 for(range = cpi->LeadByte[i]; range != 0 && range <= cpi->LeadByte[i + 1]; range++) {
151 offset += 256;
152 offsets[range] = offset;
153 }
154 }
155
156 if(fwrite(offsets, 1, sizeof(offsets), out) != sizeof(offsets))
157 return FALSE;
158
159 for(i = 0; i < MAX_LEADBYTES; i += 2) {
160 for(range = cpi->LeadByte[i]; range != 0 && range <= cpi->LeadByte[i + 1]; range++) {
161 /*printf("Writing sub table for LeadByte %02X\n", range);*/
162 for(j = MAKEWORD(0, range); j <= MAKEWORD(0xFF, range); j++) {
163 sub_table[j - MAKEWORD(0, range)] = table[j];
164 }
165
166 if(fwrite(sub_table, 1, sizeof(sub_table), out) != sizeof(sub_table))
167 return FALSE;
168 }
169 }
170
171 return TRUE;
172 }
173
create_nls_file(char * name,CPINFOEXA * cpi,WCHAR * table,WCHAR * oemtable)174 static BOOL create_nls_file(char *name, CPINFOEXA *cpi, WCHAR *table, WCHAR *oemtable)
175 {
176 FILE *out;
177 NLS_FILE_HEADER nls;
178 WORD wValue, number_of_lb_ranges, number_of_lb_subtables, i;
179
180 printf("Creating NLS table \"%s\"\n", name);
181
182 if(!(out = fopen(name, "wb"))) {
183 printf("Could not create file \"%s\"\n", name);
184 return FALSE;
185 }
186
187 memset(&nls, 0, sizeof(nls));
188
189 nls.wSize = sizeof(nls) / sizeof(WORD);
190 nls.CodePage = cpi->CodePage;
191 nls.MaxCharSize = cpi->MaxCharSize;
192 memcpy(nls.DefaultChar, cpi->DefaultChar, MAX_DEFAULTCHAR);
193 nls.UnicodeDefaultChar = cpi->UnicodeDefaultChar;
194 nls.unknown1 = '?';
195 nls.unknown2 = '?';
196 memcpy(nls.LeadByte, cpi->LeadByte, MAX_LEADBYTES);
197
198 if(fwrite(&nls, 1, sizeof(nls), out) != sizeof(nls)) {
199 fclose(out);
200 printf("Could not write to file \"%s\"\n", name);
201 return FALSE;
202 }
203
204 number_of_lb_ranges = 0;
205 number_of_lb_subtables = 0;
206
207 for(i = 0; i < MAX_LEADBYTES; i += 2) {
208 if(cpi->LeadByte[i] != 0 && cpi->LeadByte[i + 1] > cpi->LeadByte[i]) {
209 number_of_lb_ranges++;
210 number_of_lb_subtables += cpi->LeadByte[i + 1] - cpi->LeadByte[i] + 1;
211 }
212 }
213
214 /*printf("Number of LeadByte ranges %d\n", number_of_lb_ranges);*/
215 /*printf("Number of LeadByte subtables %d\n", number_of_lb_subtables);*/
216
217 /* Calculate offset to Unicode to CP table in words:
218 * 1. (256 * sizeof(WORD)) primary CP to Unicode table +
219 * 2. (WORD) optional OEM glyph table size in words +
220 * 3. OEM glyph table size in words * sizeof(WORD) +
221 * 4. (WORD) Number of DBCS LeadByte ranges +
222 * 5. if (Number of DBCS LeadByte ranges != 0) 256 * sizeof(WORD) offsets of lead byte sub tables
223 * 6. (Number of DBCS LeadByte sub tables * 256 * sizeof(WORD)) LeadByte sub tables +
224 * 7. (WORD) Unknown flag
225 */
226
227 wValue = (256 * sizeof(WORD) + /* 1 */
228 sizeof(WORD) + /* 2 */
229 ((oemtable !=NULL) ? (256 * sizeof(WORD)) : 0) + /* 3 */
230 sizeof(WORD) + /* 4 */
231 ((number_of_lb_subtables != 0) ? 256 * sizeof(WORD) : 0) + /* 5 */
232 number_of_lb_subtables * 256 * sizeof(WORD) + /* 6 */
233 sizeof(WORD) /* 7 */
234 ) / sizeof(WORD);
235
236 /* offset of Unicode to CP table in words */
237 fwrite(&wValue, 1, sizeof(wValue), out);
238
239 /* primary CP to Unicode table */
240 if(fwrite(table, 1, 256 * sizeof(WCHAR), out) != 256 * sizeof(WCHAR)) {
241 fclose(out);
242 printf("Could not write to file \"%s\"\n", name);
243 return FALSE;
244 }
245
246 /* optional OEM glyph table size in words */
247 wValue = (oemtable != NULL) ? (256 * sizeof(WORD)) : 0;
248 fwrite(&wValue, 1, sizeof(wValue), out);
249
250 /* optional OEM to Unicode table */
251 if (oemtable) {
252 if(fwrite(oemtable, 1, 256 * sizeof(WCHAR), out) != 256 * sizeof(WCHAR)) {
253 fclose(out);
254 printf("Could not write to file \"%s\"\n", name);
255 return FALSE;
256 }
257 }
258
259 /* Number of DBCS LeadByte ranges */
260 fwrite(&number_of_lb_ranges, 1, sizeof(number_of_lb_ranges), out);
261
262 /* offsets of lead byte sub tables and lead byte sub tables */
263 if(number_of_lb_ranges > 0) {
264 if(!write_lb_ranges(out, cpi, table)) {
265 fclose(out);
266 printf("Could not write to file \"%s\"\n", name);
267 return FALSE;
268 }
269 }
270
271 /* Unknown flag */
272 wValue = 0;
273 fwrite(&wValue, 1, sizeof(wValue), out);
274
275 if(!write_unicode2cp_table(out, cpi, table)) {
276 fclose(out);
277 printf("Could not write to file \"%s\"\n", name);
278 return FALSE;
279 }
280
281 fclose(out);
282 return TRUE;
283 }
284
285 /* correct the codepage information such as default chars */
patch_codepage_info(CPINFOEXA * cpi)286 static void patch_codepage_info(CPINFOEXA *cpi)
287 {
288 /* currently nothing */
289 }
290
Load_CP2Unicode_Table(char * table_name,UINT cp,CPINFOEXA * cpi)291 static WCHAR *Load_CP2Unicode_Table(char *table_name, UINT cp, CPINFOEXA *cpi)
292 {
293 char buf[256];
294 char *p;
295 DWORD n, value;
296 FILE *file;
297 WCHAR *table;
298 int lb_ranges, lb_range_started, line;
299
300 printf("Loading translation table \"%s\"\n", table_name);
301
302 /* Init to default values */
303 memset(cpi, 0, sizeof(CPINFOEXA));
304 cpi->CodePage = cp;
305 *(WCHAR *)cpi->DefaultChar = '?';
306 cpi->MaxCharSize = 1;
307 cpi->UnicodeDefaultChar = '?';
308
309 patch_codepage_info(cpi);
310
311 table = (WCHAR *)malloc(sizeof(WCHAR) * 65536);
312 if(!table) {
313 printf("Not enough memory for Codepage to Unicode table\n");
314 return NULL;
315 }
316
317 for(n = 0; n < 256; n++)
318 table[n] = (WCHAR)n;
319
320 for(n = 256; n < 65536; n++)
321 table[n] = cpi->UnicodeDefaultChar;
322
323 file = fopen(table_name, "r");
324 if(file == NULL) {
325 free(table);
326 return NULL;
327 }
328
329 line = 0;
330 lb_ranges = 0;
331 lb_range_started = 0;
332
333 while(fgets(buf, sizeof(buf), file)) {
334 line++;
335 p = buf;
336 while(isspace(*p)) p++;
337
338 if(!*p || p[0] == '#')
339 continue;
340
341 n = strtol(p, &p, 0);
342 if(n > 0xFFFF) {
343 printf("Line %d: Entry 0x%06lX: File \"%s\" corrupted\n", line, n, table_name);
344 continue;
345 }
346
347 if(n > 0xFF && cpi->MaxCharSize != 2) {
348 /*printf("Line %d: Entry 0x%04lX: Switching to DBCS\n", line, n);*/
349 cpi->MaxCharSize = 2;
350 }
351
352 while(isspace(*p)) p++;
353
354 if(!*p || p[0] == '#') {
355 /*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/
356 }
357 else {
358 value = strtol(p, &p, 0);
359 if(value > 0xFFFF) {
360 printf("Line %d: Entry 0x%06lX unicode value: File \"%s\" corrupted\n", line, n, table_name);
361 }
362 table[n] = (WCHAR)value;
363 }
364
365 /* wait for comment */
366 while(*p && *p != '#') p++;
367
368 if(*p == '#' && strstr(p, "DBCS LEAD BYTE")) {
369 /*printf("Line %d, entry 0x%02lX DBCS LEAD BYTE\n", line, n);*/
370 if(n > 0xFF) {
371 printf("Line %d: Entry 0x%04lX: Error: DBCS lead byte overflowed\n", line, n);
372 continue;
373 }
374
375 table[n] = (WCHAR)0;
376
377 if(lb_range_started) {
378 cpi->LeadByte[(lb_ranges - 1) * 2 + 1] = (BYTE)n;
379 }
380 else {
381 /*printf("Line %d: Starting new DBCS lead byte range, entry 0x%02lX\n", line, n);*/
382 if(lb_ranges < MAX_LEADBYTES/2) {
383 lb_ranges++;
384 lb_range_started = 1;
385 cpi->LeadByte[(lb_ranges - 1) * 2] = (BYTE)n;
386 }
387 else
388 printf("Line %d: Error: could not start new lead byte range\n", line);
389 }
390 }
391 else {
392 if(lb_range_started)
393 lb_range_started = 0;
394 }
395 }
396
397 fclose(file);
398
399 return table;
400 }
401
Load_OEM2Unicode_Table(char * table_name,WCHAR * def_table,UINT cp,CPINFOEXA * cpi)402 static WCHAR *Load_OEM2Unicode_Table(char *table_name, WCHAR *def_table, UINT cp, CPINFOEXA *cpi)
403 {
404 char buf[256];
405 char *p;
406 DWORD n, value;
407 FILE *file;
408 WCHAR *table;
409 int line;
410
411 printf("Loading oem glyph table \"%s\"\n", table_name);
412
413 table = (WCHAR *)malloc(sizeof(WCHAR) * 65536);
414 if(!table) {
415 printf("Not enough memory for Codepage to Unicode table\n");
416 return NULL;
417 }
418
419 memcpy(table, def_table, 65536 * sizeof(WCHAR));
420
421 file = fopen(table_name, "r");
422 if(file == NULL) {
423 free(table);
424 return NULL;
425 }
426
427 line = 0;
428
429 while(fgets(buf, sizeof(buf), file)) {
430 line++;
431 p = buf;
432 while(isspace(*p)) p++;
433
434 if(!*p || p[0] == '#')
435 continue;
436
437 value = strtol(p, &p, 16);
438 if(value > 0xFFFF) {
439 printf("Line %d: Entry 0x%06lX: File \"%s\" corrupted\n", line, value, table_name);
440 continue;
441 }
442
443 while(isspace(*p)) p++;
444
445 if(!*p || p[0] == '#') {
446 /*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/
447 continue;
448 }
449 else {
450 n = strtol(p, &p, 16);
451 if(n > 0xFFFF) {
452 printf("Line %d: Entry 0x%06lX unicode value: File \"%s\" corrupted\n", line, value, table_name);
453 continue;
454 }
455 }
456
457 if (cpi->CodePage == 864) {
458 while(isspace(*p)) p++;
459
460 if(!*p || p[0] == '#' || p[0] == '-') {
461 /*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/
462 continue;
463 }
464 else {
465 n = strtol(p, &p, 16);
466 if(n > 0xFFFF) {
467 printf("Line %d: Entry 0x%06lX oem value: File \"%s\" corrupted\n", line, value, table_name);
468 }
469 continue;
470 }
471 }
472
473 table[n] = (WCHAR)value;
474 }
475
476 fclose(file);
477
478 return table;
479 }
480
write_nls_files()481 int write_nls_files()
482 {
483 WCHAR *table;
484 WCHAR *oemtable;
485 char nls_filename[256];
486 CPINFOEXA cpi;
487 int i;
488 struct code_page {
489 UINT cp;
490 BOOL oem;
491 char *table_filename;
492 char *comment;
493 } pages[] = {
494 {37, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT", "IBM EBCDIC US Canada"},
495 {424, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/CP424.TXT", "IBM EBCDIC Hebrew"},
496 {437, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT", "OEM United States"},
497 {500, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP500.TXT", "IBM EBCDIC International"},
498 /*{708, FALSE, "", "Arabic ASMO"},*/
499 /*{720, FALSE, "", "Arabic Transparent ASMO"},*/
500 {737, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP737.TXT", "OEM Greek 437G"},
501 {775, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP775.TXT", "OEM Baltic"},
502 {850, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP850.TXT", "OEM Multilingual Latin 1"},
503 {852, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP852.TXT", "OEM Slovak Latin 2"},
504 {855, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP855.TXT", "OEM Cyrillic" },
505 {856, TRUE, LIBDIR"MAPPINGS/VENDORS/MISC/CP856.TXT", "Hebrew PC"},
506 {857, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP857.TXT", "OEM Turkish"},
507 {860, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP860.TXT", "OEM Portuguese"},
508 {861, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP861.TXT", "OEM Icelandic"},
509 {862, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP862.TXT", "OEM Hebrew"},
510 {863, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP863.TXT", "OEM Canadian French"},
511 {864, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP864.TXT", "OEM Arabic"},
512 {865, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP865.TXT", "OEM Nordic"},
513 {866, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP866.TXT", "OEM Russian"},
514 {869, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP869.TXT", "OEM Greek"},
515 /*{870, FALSE, "", "IBM EBCDIC Multilingual/ROECE (Latin 2)"},*/
516 {874, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP874.TXT", "ANSI/OEM Thai"},
517 {875, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP875.TXT", "IBM EBCDIC Greek"},
518 {878, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/KOI8-R.TXT", "Russian KOI8"},
519 {932, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT", "ANSI/OEM Japanese Shift-JIS"},
520 {936, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT", "ANSI/OEM Simplified Chinese GBK"},
521 {949, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT", "ANSI/OEM Korean Unified Hangul"},
522 {950, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT", "ANSI/OEM Traditional Chinese Big5"},
523 {1006, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/CP1006.TXT", "IBM Arabic"},
524 {1026, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP1026.TXT", "IBM EBCDIC Latin 5 Turkish"},
525 {1250, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT", "ANSI Eastern Europe"},
526 {1251, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT", "ANSI Cyrillic"},
527 {1252, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT", "ANSI Latin 1"},
528 {1253, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1253.TXT", "ANSI Greek"},
529 {1254, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1254.TXT", "ANSI Turkish"},
530 {1255, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1255.TXT", "ANSI Hebrew"},
531 {1256, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1256.TXT", "ANSI Arabic"},
532 {1257, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1257.TXT", "ANSI Baltic"},
533 {1258, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1258.TXT", "ANSI/OEM Viet Nam"},
534 {10000, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/ROMAN.TXT", "Mac Roman"},
535 {10006, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/GREEK.TXT", "Mac Greek"},
536 {10007, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/CYRILLIC.TXT", "Mac Cyrillic"},
537 {10029, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/LATIN2.TXT", "Mac Latin 2"},
538 {10079, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/ICELAND.TXT", "Mac Icelandic"},
539 {10081, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/TURKISH.TXT", "Mac Turkish"},
540 /*{20000, FALSE, "", "CNS Taiwan"},*/
541 /*{20001, FALSE, "", "TCA Taiwan"},*/
542 /*{20002, FALSE, "", "Eten Taiwan"},*/
543 /*{20003, FALSE, "", "IBM5550 Taiwan"},*/
544 /*{20004, FALSE, "", "TeleText Taiwan"},*/
545 /*{20005, FALSE, "", "Wang Taiwan"},*/
546 /*{20105, FALSE, "", "IA5 IRV International Alphabet No.5"},*/
547 /*{20106, FALSE, "", "IA5 German"},*/
548 /*{20107, FALSE, "", "IA5 Swedish"},*/
549 /*{20108, FALSE, "", "IA5 Norwegian"},*/
550 /*{20127, FALSE, "", "US ASCII"}, */
551 /*{20261, FALSE, "", "T.61"},*/
552 /*{20269, FALSE, "", "ISO 6937 NonSpacing Accent"},*/
553 /*{20273, FALSE, "", "IBM EBCDIC Germany"},*/
554 /*{20277, FALSE, "", "IBM EBCDIC Denmark/Norway"},*/
555 /*{20278, FALSE, "", "IBM EBCDIC Finland/Sweden"},*/
556 /*{20280, FALSE, "", "IBM EBCDIC Italy"},*/
557 /*{20284, FALSE, "", "IBM EBCDIC Latin America/Spain"},*/
558 /*{20285, FALSE, "", "IBM EBCDIC United Kingdom"},*/
559 /*{20290, FALSE, "", "IBM EBCDIC Japanese Katakana Extended"},*/
560 /*{20297, FALSE, "", "IBM EBCDIC France"},*/
561 /*{20420, FALSE, "", "IBM EBCDIC Arabic"},*/
562 /*{20423, FALSE, "IBM869.TXT", "IBM EBCDIC Greek"},*/
563 /*{20424, FALSE, "", "IBM EBCDIC Hebrew"},*/
564 /*{20833, FALSE, "", "IBM EBCDIC Korean Extended"},*/
565 /*{20838, FALSE, "", "IBM EBCDIC Thai"},*/
566 {20871, FALSE, "ReactOS/IBMCP861.TXT", "IBM EBCDIC Icelandic"},
567 /*{20880, FALSE, "", "IBM EBCDIC Cyrillic (Russian)"},*/
568 {20866, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/KOI8-R.TXT", "Russian KOI8"},
569 /*{20905, FALSE, "", "IBM EBCDIC Turkish"},*/
570 /*{21025, FALSE, "", "IBM EBCDIC Cyrillic (Serbian, Bulgarian)"},*/
571 /*{21027, FALSE, "", "Ext Alpha Lowercase"},*/
572 {28591, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-1.TXT", "ISO 8859-1 Latin 1"},
573 {28592, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-2.TXT", "ISO 8859-2 Eastern Europe"},
574 {28593, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-3.TXT", "ISO 8859-3 Turkish"},
575 {28594, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-4.TXT", "ISO 8859-4 Baltic"},
576 {28595, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-5.TXT", "ISO 8859-5 Cyrillic"},
577 {28596, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-6.TXT", "ISO 8859-6 Arabic"},
578 {28597, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-7.TXT", "ISO 8859-7 Greek"},
579 {28598, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-8.TXT", "ISO 8859-8 Hebrew"},
580 {28599, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-9.TXT", "ISO 8859-9 Latin 5"}
581 };
582
583 for(i = 0; i < sizeof(pages)/sizeof(pages[0]); i++) {
584 table = Load_CP2Unicode_Table(pages[i].table_filename, pages[i].cp, &cpi);
585 if(!table) {
586 printf("Could not load \"%s\" (%s)\n", pages[i].table_filename, pages[i].comment);
587 continue;
588 }
589
590 if (pages[i].oem) {
591 oemtable = Load_OEM2Unicode_Table(LIBDIR"MAPPINGS/VENDORS/MISC/IBMGRAPH.TXT", table, pages[i].cp, &cpi);
592 if(!oemtable) {
593 printf("Could not load \"%s\" (%s)\n", LIBDIR"MAPPINGS/VENDORS/MISC/IBMGRAPH.TXT", "IBM OEM glyph table");
594 continue;
595 }
596 }
597
598 sprintf(nls_filename, "%s/c_%03d.nls", NLSDIR, cpi.CodePage);
599 if(!create_nls_file(nls_filename, &cpi, table, pages[i].oem ? oemtable : NULL)) {
600 printf("Could not write \"%s\" (%s)\n", nls_filename, pages[i].comment);
601 }
602
603 if (pages[i].oem)
604 free(oemtable);
605
606 free(table);
607 }
608
609 return 0;
610 }
611
612
613
614 static WORD *to_upper_org = NULL, *to_lower_org = NULL;
615
616 #if 0
617 static WORD diffs[256];
618 static int number_of_diffs;
619 #endif
620
621 static WORD number_of_subtables_with_diffs;
622 /* pointers to subtables with 16 elements in each to the main table */
623 static WORD *subtables_with_diffs[4096];
624
625 static WORD number_of_subtables_with_offsets;
626 /* subtables with 16 elements */
627 static WORD subtables_with_offsets[4096 * 16];
628
test_packed_table(WCHAR * table)629 static void test_packed_table(WCHAR *table)
630 {
631 WCHAR test_str[] = L"This is an English text. \x0CF\x0EE-\x0F0\x0F3\x0F1\x0F1\x0EA\x0E8 \x0FF \x0EF\x0E8\x0F1\x0E0\x0F2\x0FC \x0F3\x0EC\x0E5\x0FE \x0ED\x0E5\x0EC\x0ED\x0EE\x0E6\x0EA\x0EE. 1234567890";
632 //WORD diff, off;
633 //WORD *sub_table;
634 DWORD i, len;
635
636 len = lstrlenW(test_str);
637
638 for(i = 0; i < len + 1; i++) {
639 /*off = table[HIBYTE(test_str[i])];
640
641 sub_table = table + off;
642 off = sub_table[LOBYTE(test_str[i]) >> 4];
643
644 sub_table = table + off;
645 off = LOBYTE(test_str[i]) & 0x0F;
646
647 diff = sub_table[off];
648
649 test_str[i] += diff;*/
650 test_str[i] += table[table[table[HIBYTE(test_str[i])] + (LOBYTE(test_str[i]) >> 4)] + (LOBYTE(test_str[i]) & 0x0F)];
651 }
652 /*
653 {
654 FILE *file;
655 static int n = 0;
656 char name[20];
657
658 sprintf(name, "text%02d.dat", n++);
659 file = fopen(name, "wb");
660 fwrite(test_str, len * sizeof(WCHAR), 1, file);
661 fclose(file);
662 }*/
663 }
664
CreateCaseDiff(char * table_name)665 static BOOL CreateCaseDiff(char *table_name)
666 {
667 char buf[256];
668 char *p;
669 WORD code, case_mapping;
670 FILE *file;
671 int line;
672
673 to_upper_org = (WORD *)calloc(65536, sizeof(WORD));
674 if(!to_upper_org) {
675 printf("Not enough memory for to upper table\n");
676 return FALSE;
677 }
678
679 to_lower_org = (WORD *)calloc(65536, sizeof(WORD));
680 if(!to_lower_org) {
681 printf("Not enough memory for to lower table\n");
682 return FALSE;
683 }
684
685 file = fopen(table_name, "r");
686 if(file == NULL) {
687 printf("Could not open file \"%s\"\n", table_name);
688 return FALSE;
689 }
690
691 line = 0;
692
693 while(fgets(buf, sizeof(buf), file)) {
694 line++;
695 p = buf;
696 while(*p && isspace(*p)) p++;
697
698 if(!*p)
699 continue;
700
701 /* 0. Code value */
702 code = (WORD)strtol(p, &p, 16);
703
704 //if(code != 0x9A0 && code != 0xBA0)
705 //continue;
706
707 while(*p && *p != ';') p++;
708 if(!*p)
709 continue;
710 p++;
711
712 /* 1. Character name */
713 while(*p && *p != ';') p++;
714 if(!*p)
715 continue;
716 p++;
717
718 /* 2. General Category */
719 while(*p && *p != ';') p++;
720 if(!*p)
721 continue;
722 p++;
723
724 /* 3. Canonical Combining Classes */
725 while(*p && *p != ';') p++;
726 if(!*p)
727 continue;
728 p++;
729
730 /* 4. Bidirectional Category */
731 while(*p && *p != ';') p++;
732 if(!*p)
733 continue;
734 p++;
735
736 /* 5. Character Decomposition Mapping */
737 while(*p && *p != ';') p++;
738 if(!*p)
739 continue;
740 p++;
741
742 /* 6. Decimal digit value */
743 while(*p && *p != ';') p++;
744 if(!*p)
745 continue;
746 p++;
747
748 /* 7. Digit value */
749 while(*p && *p != ';') p++;
750 if(!*p)
751 continue;
752 p++;
753
754 /* 8. Numeric value */
755 while(*p && *p != ';') p++;
756 if(!*p)
757 continue;
758 p++;
759
760 /* 9. Mirrored */
761 while(*p && *p != ';') p++;
762 if(!*p)
763 continue;
764 p++;
765
766 /* 10. Unicode 1.0 Name */
767 while(*p && *p != ';') p++;
768 if(!*p)
769 continue;
770 p++;
771
772 /* 11. 10646 comment field */
773 while(*p && *p != ';') p++;
774 if(!*p)
775 continue;
776 p++;
777
778 /* 12. Uppercase Mapping */
779 while(*p && isspace(*p)) p++;
780 if(!*p) continue;
781 if(*p != ';') {
782 case_mapping = (WORD)strtol(p, &p, 16);
783 to_upper_org[code] = case_mapping - code;
784 while(*p && *p != ';') p++;
785 }
786 else
787 p++;
788
789 /* 13. Lowercase Mapping */
790 while(*p && isspace(*p)) p++;
791 if(!*p) continue;
792 if(*p != ';') {
793 case_mapping = (WORD)strtol(p, &p, 16);
794 to_lower_org[code] = case_mapping - code;
795 while(*p && *p != ';') p++;
796 }
797 else
798 p++;
799
800 /* 14. Titlecase Mapping */
801 while(*p && *p != ';') p++;
802 if(!*p)
803 continue;
804 p++;
805 }
806
807 fclose(file);
808
809 return TRUE;
810 }
811
812 #if 0
813 static int find_diff(WORD diff)
814 {
815 int i;
816
817 for(i = 0; i < number_of_diffs; i++) {
818 if(diffs[i] == diff)
819 return i;
820 }
821
822 return -1;
823 }
824 #endif
825
find_subtable_with_diffs(WORD * table,WORD * subtable)826 static WORD find_subtable_with_diffs(WORD *table, WORD *subtable)
827 {
828 WORD index;
829
830 for(index = 0; index < number_of_subtables_with_diffs; index++) {
831 if(memcmp(subtables_with_diffs[index], subtable, 16 * sizeof(WORD)) == 0) {
832 return index;
833 }
834 }
835
836 if(number_of_subtables_with_diffs >= 4096) {
837 printf("Could not add new subtable with diffs, storage is full\n");
838 return 0;
839 }
840
841 subtables_with_diffs[number_of_subtables_with_diffs] = subtable;
842 number_of_subtables_with_diffs++;
843
844 return index;
845 }
846
find_subtable_with_offsets(WORD * subtable)847 static WORD find_subtable_with_offsets(WORD *subtable)
848 {
849 WORD index;
850
851 for(index = 0; index < number_of_subtables_with_offsets; index++) {
852 if(memcmp(&subtables_with_offsets[index * 16], subtable, 16 * sizeof(WORD)) == 0) {
853 return index;
854 }
855 }
856
857 if(number_of_subtables_with_offsets >= 4096) {
858 printf("Could not add new subtable with offsets, storage is full\n");
859 return 0;
860 }
861
862 memcpy(&subtables_with_offsets[number_of_subtables_with_offsets * 16], subtable, 16 * sizeof(WORD));
863 number_of_subtables_with_offsets++;
864
865 return index;
866 }
867
pack_table(WORD * table,WORD * packed_size_in_words)868 static WORD *pack_table(WORD *table, WORD *packed_size_in_words)
869 {
870 WORD high, low4, index;
871 WORD main_index[256];
872 WORD temp_subtable[16];
873 WORD *packed_table;
874 WORD *subtable_src, *subtable_dst;
875
876 memset(subtables_with_diffs, 0, sizeof(subtables_with_diffs));
877 number_of_subtables_with_diffs = 0;
878
879 memset(subtables_with_offsets, 0, sizeof(subtables_with_offsets));
880 number_of_subtables_with_offsets = 0;
881
882 for(high = 0; high < 256; high++) {
883 for(low4 = 0; low4 < 256; low4 += 16) {
884 index = find_subtable_with_diffs(table, &table[MAKEWORD(low4, high)]);
885
886 temp_subtable[low4 >> 4] = index;
887 }
888
889 index = find_subtable_with_offsets(temp_subtable);
890 main_index[high] = index;
891 }
892
893 *packed_size_in_words = 0x100 + number_of_subtables_with_offsets * 16 + number_of_subtables_with_diffs * 16;
894 packed_table = calloc(*packed_size_in_words, sizeof(WORD));
895
896 /* fill main index according to the subtables_with_offsets */
897 for(high = 0; high < 256; high++) {
898 packed_table[high] = 0x100 + main_index[high] * 16;
899 }
900
901 //memcpy(sub_table, subtables_with_offsets, number_of_subtables_with_offsets * 16);
902
903 /* fill subtable index according to the subtables_with_diffs */
904 for(index = 0; index < number_of_subtables_with_offsets; index++) {
905 subtable_dst = packed_table + 0x100 + index * 16;
906 subtable_src = &subtables_with_offsets[index * 16];
907
908 for(low4 = 0; low4 < 16; low4++) {
909 subtable_dst[low4] = 0x100 + number_of_subtables_with_offsets * 16 + subtable_src[low4] * 16;
910 }
911 }
912
913
914 for(index = 0; index < number_of_subtables_with_diffs; index++) {
915 subtable_dst = packed_table + 0x100 + number_of_subtables_with_offsets * 16 + index * 16;
916 memcpy(subtable_dst, subtables_with_diffs[index], 16 * sizeof(WORD));
917
918 }
919
920
921 test_packed_table(packed_table);
922
923 return packed_table;
924 }
925
write_casemap_file(void)926 int write_casemap_file(void)
927 {
928 WORD packed_size_in_words, offset_to_next_table_in_words;
929 WORD *packed_table, value;
930 FILE *file;
931
932 if(!CreateCaseDiff(LIBDIR"UnicodeData.txt"))
933 return -1;
934
935 file = fopen(NLSDIR"/l_intl.nls", "wb");
936
937 /* write version number */
938 value = 1;
939 fwrite(&value, 1, sizeof(WORD), file);
940
941 /* pack upper case table */
942 packed_table = pack_table(to_upper_org, &packed_size_in_words);
943 offset_to_next_table_in_words = packed_size_in_words + 1;
944 fwrite(&offset_to_next_table_in_words, 1, sizeof(WORD), file);
945 /* write packed upper case table */
946 fwrite(packed_table, sizeof(WORD), packed_size_in_words, file);
947 free(packed_table);
948
949 /* pack lower case table */
950 packed_table = pack_table(to_lower_org, &packed_size_in_words);
951 offset_to_next_table_in_words = packed_size_in_words + 1;
952 fwrite(&offset_to_next_table_in_words, 1, sizeof(WORD), file);
953 /* write packed lower case table */
954 fwrite(packed_table, sizeof(WORD), packed_size_in_words, file);
955 free(packed_table);
956
957 fclose(file);
958
959 free(to_upper_org);
960 free(to_lower_org);
961
962 return 0;
963 }
964
main()965 int main()
966 {
967 write_nls_files();
968 write_casemap_file();
969
970 return 0;
971 }
972