xref: /reactos/sdk/tools/create_nls/create_nls.c (revision 40462c92)
1 /*
2  * Tool for creating NT-like NLS files for Unicode <-> Codepage conversions.
3  * Tool for creating NT-like l_intl.nls file for case mapping of unicode
4  * characters.
5  * Copyright 2000 Timoshkov Dmitry
6  * Copyright 2001 Matei Alexandru
7  *
8  * Sources of information:
9  * Andrew Kozin's YAW project http://www.chat.ru/~stanson/yaw_en.html
10  * Ove K�ven's investigations http://www.ping.uio.no/~ovehk/nls
11  */
12 #include <windows.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <malloc.h>
16 #include <string.h>
17 #include <ctype.h>
18 
19 static const WCHAR * const uprtable[256];
20 static const WCHAR * const lwrtable[256];
21 
22 #define NLSDIR "../../media/nls"
23 #define LIBDIR "unicode.org/"
24 
25 typedef struct {
26     WORD wSize; /* in words 0x000D */
27     WORD CodePage;
28     WORD MaxCharSize; /* 1 or 2 */
29     BYTE DefaultChar[MAX_DEFAULTCHAR];
30     WCHAR UnicodeDefaultChar;
31     WCHAR unknown1;
32     WCHAR unknown2;
33     BYTE LeadByte[MAX_LEADBYTES];
34 } __attribute__((packed)) NLS_FILE_HEADER;
35 
36 /*
37 Support for translation from the multiple unicode chars
38 to the single code page char.
39 
40 002D;HYPHEN-MINUS;Pd;0;ET;;;;;N;;;;;
41 00AD;SOFT HYPHEN;Pd;0;ON;;;;;N;;;;;
42 2010;HYPHEN;Pd;0;ON;;;;;N;;;;;
43 2011;NON-BREAKING HYPHEN;Pd;0;ON;<noBreak> 2010;;;;N;;;;;
44 2013;EN DASH;Pd;0;ON;;;;;N;;;;;
45 2014;EM DASH;Pd;0;ON;;;;;N;;;;;
46 2015;HORIZONTAL BAR;Pd;0;ON;;;;;N;QUOTATION DASH;;;;
47 */
48 
49 /* HYPHEN-MINUS aliases */
50 static WCHAR hyphen_aliases[] = {0x00AD,0x2010,0x2011,0x2013,0x2014,0x2015,0};
51 
52 static struct {
53     WCHAR cp_char;
54     WCHAR *alias; /* must be 0 terminated */
55 } u2cp_alias[] = {
56 /* HYPHEN-MINUS aliases */
57 {0x002D, hyphen_aliases}
58 };
59 
60 static void patch_aliases(void *u2cp, CPINFOEXA *cpi)
61 {
62     int i, j;
63     WCHAR *wc, *alias;
64     BYTE *c;
65 
66     if(cpi->MaxCharSize == 2) {
67         wc = (WCHAR *)u2cp;
68         for(i = 0; i < 65536; i++) {
69             for(j = 0; j < sizeof(u2cp_alias)/sizeof(u2cp_alias[0]); j++) {
70                 alias = u2cp_alias[j].alias;
71                 while(*alias) {
72                     if(*alias == i && wc[i] == *(WCHAR *)cpi->DefaultChar) {
73                         wc[i] = u2cp_alias[j].cp_char;
74                     }
75                     alias++;
76                 }
77             }
78         }
79     }
80     else {
81         c = (BYTE *)u2cp;
82         for(i = 0; i < 65536; i++) {
83             for(j = 0; j < sizeof(u2cp_alias)/sizeof(u2cp_alias[0]); j++) {
84                 alias = u2cp_alias[j].alias;
85                 while(*alias) {
86                     if(*alias == i && c[i] == cpi->DefaultChar[0] && u2cp_alias[j].cp_char < 256) {
87                         c[i] = (BYTE)u2cp_alias[j].cp_char;
88                     }
89                     alias++;
90                 }
91             }
92         }
93     }
94 }
95 
96 static BOOL write_unicode2cp_table(FILE *out, CPINFOEXA *cpi, WCHAR *table)
97 {
98     void *u2cp;
99     WCHAR *wc;
100     CHAR *c;
101     int i;
102     BOOL ret = TRUE;
103 
104     u2cp = malloc(cpi->MaxCharSize * 65536);
105     if(!u2cp) {
106         printf("Not enough memory for Unicode to Codepage table\n");
107         return FALSE;
108     }
109 
110     if(cpi->MaxCharSize == 2) {
111         wc = (WCHAR *)u2cp;
112         for(i = 0; i < 65536; i++)
113             wc[i] = *(WCHAR *)cpi->DefaultChar;
114 
115         for(i = 0; i < 65536; i++)
116             if (table[i] != '?')
117                 wc[table[i]] = (WCHAR)i;
118     }
119     else {
120         c = (CHAR *)u2cp;
121         for(i = 0; i < 65536; i++)
122             c[i] = cpi->DefaultChar[0];
123 
124         for(i = 0; i < 256; i++)
125             if (table[i] != '?')
126                 c[table[i]] = (CHAR)i;
127     }
128 
129     patch_aliases(u2cp, cpi);
130 
131     if(fwrite(u2cp, 1, cpi->MaxCharSize * 65536, out) != cpi->MaxCharSize * 65536)
132         ret = FALSE;
133 
134     free(u2cp);
135 
136     return ret;
137 }
138 
139 static BOOL write_lb_ranges(FILE *out, CPINFOEXA *cpi, WCHAR *table)
140 {
141     WCHAR sub_table[256];
142     WORD offset, offsets[256];
143     int i, j, range;
144 
145     memset(offsets, 0, sizeof(offsets));
146 
147     offset = 0;
148 
149     for(i = 0; i < MAX_LEADBYTES; i += 2) {
150         for(range = cpi->LeadByte[i]; range != 0 && range <= cpi->LeadByte[i + 1]; range++) {
151             offset += 256;
152             offsets[range] = offset;
153         }
154     }
155 
156     if(fwrite(offsets, 1, sizeof(offsets), out) != sizeof(offsets))
157         return FALSE;
158 
159     for(i = 0; i < MAX_LEADBYTES; i += 2) {
160         for(range = cpi->LeadByte[i]; range != 0 && range <= cpi->LeadByte[i + 1]; range++) {
161             /*printf("Writing sub table for LeadByte %02X\n", range);*/
162             for(j = MAKEWORD(0, range); j <= MAKEWORD(0xFF, range); j++) {
163                 sub_table[j - MAKEWORD(0, range)] = table[j];
164             }
165 
166             if(fwrite(sub_table, 1, sizeof(sub_table), out) != sizeof(sub_table))
167                 return FALSE;
168         }
169     }
170 
171     return TRUE;
172 }
173 
174 static BOOL create_nls_file(char *name, CPINFOEXA *cpi, WCHAR *table, WCHAR *oemtable)
175 {
176     FILE *out;
177     NLS_FILE_HEADER nls;
178     WORD wValue, number_of_lb_ranges, number_of_lb_subtables, i;
179 
180     printf("Creating NLS table \"%s\"\n", name);
181 
182     if(!(out = fopen(name, "wb"))) {
183         printf("Could not create file \"%s\"\n", name);
184         return FALSE;
185     }
186 
187     memset(&nls, 0, sizeof(nls));
188 
189     nls.wSize = sizeof(nls) / sizeof(WORD);
190     nls.CodePage = cpi->CodePage;
191     nls.MaxCharSize = cpi->MaxCharSize;
192     memcpy(nls.DefaultChar, cpi->DefaultChar, MAX_DEFAULTCHAR);
193     nls.UnicodeDefaultChar = cpi->UnicodeDefaultChar;
194     nls.unknown1 = '?';
195     nls.unknown2 = '?';
196     memcpy(nls.LeadByte, cpi->LeadByte, MAX_LEADBYTES);
197 
198     if(fwrite(&nls, 1, sizeof(nls), out) != sizeof(nls)) {
199         fclose(out);
200         printf("Could not write to file \"%s\"\n", name);
201         return FALSE;
202     }
203 
204     number_of_lb_ranges = 0;
205     number_of_lb_subtables = 0;
206 
207     for(i = 0; i < MAX_LEADBYTES; i += 2) {
208         if(cpi->LeadByte[i] != 0 && cpi->LeadByte[i + 1] > cpi->LeadByte[i]) {
209             number_of_lb_ranges++;
210             number_of_lb_subtables += cpi->LeadByte[i + 1] - cpi->LeadByte[i] + 1;
211         }
212     }
213 
214     /*printf("Number of LeadByte ranges %d\n", number_of_lb_ranges);*/
215     /*printf("Number of LeadByte subtables %d\n", number_of_lb_subtables);*/
216 
217     /* Calculate offset to Unicode to CP table in words:
218      *  1. (256 * sizeof(WORD)) primary CP to Unicode table +
219      *  2. (WORD) optional OEM glyph table size in words +
220      *  3. OEM glyph table size in words * sizeof(WORD) +
221      *  4. (WORD) Number of DBCS LeadByte ranges +
222      *  5. if (Number of DBCS LeadByte ranges != 0) 256 * sizeof(WORD) offsets of lead byte sub tables
223      *  6. (Number of DBCS LeadByte sub tables * 256 * sizeof(WORD)) LeadByte sub tables +
224      *  7. (WORD) Unknown flag
225      */
226 
227     wValue = (256 * sizeof(WORD) + /* 1 */
228               sizeof(WORD) + /* 2 */
229               ((oemtable !=NULL) ? (256 * sizeof(WORD)) : 0) + /* 3 */
230               sizeof(WORD) + /* 4 */
231               ((number_of_lb_subtables != 0) ? 256 * sizeof(WORD) : 0) + /* 5 */
232               number_of_lb_subtables * 256 * sizeof(WORD) + /* 6 */
233               sizeof(WORD) /* 7 */
234               ) / sizeof(WORD);
235 
236     /* offset of Unicode to CP table in words */
237     fwrite(&wValue, 1, sizeof(wValue), out);
238 
239     /* primary CP to Unicode table */
240     if(fwrite(table, 1, 256 * sizeof(WCHAR), out) != 256 * sizeof(WCHAR)) {
241         fclose(out);
242         printf("Could not write to file \"%s\"\n", name);
243         return FALSE;
244     }
245 
246     /* optional OEM glyph table size in words */
247     wValue = (oemtable != NULL) ? (256 * sizeof(WORD)) : 0;
248     fwrite(&wValue, 1, sizeof(wValue), out);
249 
250     /* optional OEM to Unicode table */
251     if (oemtable) {
252         if(fwrite(oemtable, 1, 256 * sizeof(WCHAR), out) != 256 * sizeof(WCHAR)) {
253             fclose(out);
254             printf("Could not write to file \"%s\"\n", name);
255             return FALSE;
256         }
257     }
258 
259     /* Number of DBCS LeadByte ranges */
260     fwrite(&number_of_lb_ranges, 1, sizeof(number_of_lb_ranges), out);
261 
262     /* offsets of lead byte sub tables and lead byte sub tables */
263     if(number_of_lb_ranges > 0) {
264         if(!write_lb_ranges(out, cpi, table)) {
265             fclose(out);
266             printf("Could not write to file \"%s\"\n", name);
267             return FALSE;
268         }
269     }
270 
271     /* Unknown flag */
272     wValue = 0;
273     fwrite(&wValue, 1, sizeof(wValue), out);
274 
275     if(!write_unicode2cp_table(out, cpi, table)) {
276         fclose(out);
277         printf("Could not write to file \"%s\"\n", name);
278         return FALSE;
279     }
280 
281     fclose(out);
282     return TRUE;
283 }
284 
285 /* correct the codepage information such as default chars */
286 static void patch_codepage_info(CPINFOEXA *cpi)
287 {
288     /* currently nothing */
289 }
290 
291 static WCHAR *Load_CP2Unicode_Table(char *table_name, UINT cp, CPINFOEXA *cpi)
292 {
293     char buf[256];
294     char *p;
295     DWORD n, value;
296     FILE *file;
297     WCHAR *table;
298     int lb_ranges, lb_range_started, line;
299 
300     printf("Loading translation table \"%s\"\n", table_name);
301 
302     /* Init to default values */
303     memset(cpi, 0, sizeof(CPINFOEXA));
304     cpi->CodePage = cp;
305     *(WCHAR *)cpi->DefaultChar = '?';
306     cpi->MaxCharSize = 1;
307     cpi->UnicodeDefaultChar = '?';
308 
309     patch_codepage_info(cpi);
310 
311     table = (WCHAR *)malloc(sizeof(WCHAR) * 65536);
312     if(!table) {
313         printf("Not enough memory for Codepage to Unicode table\n");
314         return NULL;
315     }
316 
317     for(n = 0; n < 256; n++)
318         table[n] = (WCHAR)n;
319 
320     for(n = 256; n < 65536; n++)
321         table[n] = cpi->UnicodeDefaultChar;
322 
323     file = fopen(table_name, "r");
324     if(file == NULL) {
325         free(table);
326         return NULL;
327     }
328 
329     line = 0;
330     lb_ranges = 0;
331     lb_range_started = 0;
332 
333     while(fgets(buf, sizeof(buf), file)) {
334         line++;
335         p = buf;
336         while(isspace(*p)) p++;
337 
338         if(!*p || p[0] == '#')
339             continue;
340 
341         n = strtol(p, &p, 0);
342         if(n > 0xFFFF) {
343             printf("Line %d: Entry 0x%06lX: File \"%s\" corrupted\n", line, n, table_name);
344             continue;
345         }
346 
347         if(n > 0xFF && cpi->MaxCharSize != 2) {
348             /*printf("Line %d: Entry 0x%04lX: Switching to DBCS\n", line, n);*/
349             cpi->MaxCharSize = 2;
350         }
351 
352         while(isspace(*p)) p++;
353 
354         if(!*p || p[0] == '#') {
355             /*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/
356         }
357         else {
358             value = strtol(p, &p, 0);
359             if(value > 0xFFFF) {
360                 printf("Line %d: Entry 0x%06lX unicode value: File \"%s\" corrupted\n", line, n, table_name);
361             }
362             table[n] = (WCHAR)value;
363         }
364 
365         /* wait for comment */
366         while(*p && *p != '#') p++;
367 
368         if(*p == '#' && strstr(p, "DBCS LEAD BYTE")) {
369             /*printf("Line %d, entry 0x%02lX DBCS LEAD BYTE\n", line, n);*/
370             if(n > 0xFF) {
371                 printf("Line %d: Entry 0x%04lX: Error: DBCS lead byte overflowed\n", line, n);
372                 continue;
373             }
374 
375             table[n] = (WCHAR)0;
376 
377             if(lb_range_started) {
378                 cpi->LeadByte[(lb_ranges - 1) * 2 + 1] = (BYTE)n;
379             }
380             else {
381                 /*printf("Line %d: Starting new DBCS lead byte range, entry 0x%02lX\n", line, n);*/
382                 if(lb_ranges < MAX_LEADBYTES/2) {
383                     lb_ranges++;
384                     lb_range_started = 1;
385                     cpi->LeadByte[(lb_ranges - 1) * 2] = (BYTE)n;
386                 }
387                 else
388                     printf("Line %d: Error: could not start new lead byte range\n", line);
389             }
390         }
391         else {
392             if(lb_range_started)
393                 lb_range_started = 0;
394         }
395     }
396 
397     fclose(file);
398 
399     return table;
400 }
401 
402 static WCHAR *Load_OEM2Unicode_Table(char *table_name, WCHAR *def_table, UINT cp, CPINFOEXA *cpi)
403 {
404     char buf[256];
405     char *p;
406     DWORD n, value;
407     FILE *file;
408     WCHAR *table;
409     int line;
410 
411     printf("Loading oem glyph table \"%s\"\n", table_name);
412 
413     table = (WCHAR *)malloc(sizeof(WCHAR) * 65536);
414     if(!table) {
415         printf("Not enough memory for Codepage to Unicode table\n");
416         return NULL;
417     }
418 
419     memcpy(table, def_table, 65536 * sizeof(WCHAR));
420 
421     file = fopen(table_name, "r");
422     if(file == NULL) {
423         free(table);
424         return NULL;
425     }
426 
427     line = 0;
428 
429     while(fgets(buf, sizeof(buf), file)) {
430         line++;
431         p = buf;
432         while(isspace(*p)) p++;
433 
434         if(!*p || p[0] == '#')
435             continue;
436 
437         value = strtol(p, &p, 16);
438         if(value > 0xFFFF) {
439             printf("Line %d: Entry 0x%06lX: File \"%s\" corrupted\n", line, value, table_name);
440             continue;
441         }
442 
443         while(isspace(*p)) p++;
444 
445         if(!*p || p[0] == '#') {
446             /*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/
447             continue;
448         }
449         else {
450             n = strtol(p, &p, 16);
451             if(n > 0xFFFF) {
452                 printf("Line %d: Entry 0x%06lX unicode value: File \"%s\" corrupted\n", line, value, table_name);
453                 continue;
454             }
455         }
456 
457         if (cpi->CodePage == 864) {
458             while(isspace(*p)) p++;
459 
460             if(!*p || p[0] == '#' || p[0] == '-') {
461                 /*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/
462                 continue;
463             }
464             else {
465                 n = strtol(p, &p, 16);
466                 if(n > 0xFFFF) {
467                     printf("Line %d: Entry 0x%06lX oem value: File \"%s\" corrupted\n", line, value, table_name);
468                 }
469                 continue;
470             }
471         }
472 
473         table[n] = (WCHAR)value;
474     }
475 
476     fclose(file);
477 
478     return table;
479 }
480 
481 int write_nls_files()
482 {
483     WCHAR *table;
484     WCHAR *oemtable;
485     char nls_filename[256];
486     CPINFOEXA cpi;
487     int i;
488     struct code_page {
489         UINT cp;
490         BOOL oem;
491         char *table_filename;
492         char *comment;
493     } pages[] = {
494         {37,  FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT", "IBM EBCDIC US Canada"},
495         {424, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/CP424.TXT", "IBM EBCDIC Hebrew"},
496         {437, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT", "OEM United States"},
497         {500, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP500.TXT", "IBM EBCDIC International"},
498         /*{708, FALSE, "", "Arabic ASMO"},*/
499         /*{720, FALSE, "", "Arabic Transparent ASMO"},*/
500         {737, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP737.TXT", "OEM Greek 437G"},
501         {775, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP775.TXT", "OEM Baltic"},
502         {850, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP850.TXT", "OEM Multilingual Latin 1"},
503         {852, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP852.TXT", "OEM Slovak Latin 2"},
504         {855, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP855.TXT", "OEM Cyrillic" },
505         {856, TRUE,  LIBDIR"MAPPINGS/VENDORS/MISC/CP856.TXT", "Hebrew PC"},
506         {857, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP857.TXT", "OEM Turkish"},
507         {860, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP860.TXT", "OEM Portuguese"},
508         {861, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP861.TXT", "OEM Icelandic"},
509         {862, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP862.TXT", "OEM Hebrew"},
510         {863, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP863.TXT", "OEM Canadian French"},
511         {864, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP864.TXT", "OEM Arabic"},
512         {865, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP865.TXT", "OEM Nordic"},
513         {866, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP866.TXT", "OEM Russian"},
514         {869, TRUE,  LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP869.TXT", "OEM Greek"},
515         /*{870, FALSE, "", "IBM EBCDIC Multilingual/ROECE (Latin 2)"},*/
516         {874, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP874.TXT", "ANSI/OEM Thai"},
517         {875, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP875.TXT", "IBM EBCDIC Greek"},
518         {878, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/KOI8-R.TXT", "Russian KOI8"},
519         {932, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT", "ANSI/OEM Japanese Shift-JIS"},
520         {936, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT", "ANSI/OEM Simplified Chinese GBK"},
521         {949, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT", "ANSI/OEM Korean Unified Hangul"},
522         {950, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT", "ANSI/OEM Traditional Chinese Big5"},
523         {1006, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/CP1006.TXT", "IBM Arabic"},
524         {1026, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP1026.TXT", "IBM EBCDIC Latin 5 Turkish"},
525         {1250, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT", "ANSI Eastern Europe"},
526         {1251, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT", "ANSI Cyrillic"},
527         {1252, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT", "ANSI Latin 1"},
528         {1253, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1253.TXT", "ANSI Greek"},
529         {1254, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1254.TXT", "ANSI Turkish"},
530         {1255, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1255.TXT", "ANSI Hebrew"},
531         {1256, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1256.TXT", "ANSI Arabic"},
532         {1257, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1257.TXT", "ANSI Baltic"},
533         {1258, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1258.TXT", "ANSI/OEM Viet Nam"},
534         {10000, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/ROMAN.TXT", "Mac Roman"},
535         {10006, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/GREEK.TXT", "Mac Greek"},
536         {10007, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/CYRILLIC.TXT", "Mac Cyrillic"},
537         {10029, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/LATIN2.TXT", "Mac Latin 2"},
538         {10079, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/ICELAND.TXT", "Mac Icelandic"},
539         {10081, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/TURKISH.TXT", "Mac Turkish"},
540         /*{20000, FALSE, "", "CNS Taiwan"},*/
541         /*{20001, FALSE, "", "TCA Taiwan"},*/
542         /*{20002, FALSE, "", "Eten Taiwan"},*/
543         /*{20003, FALSE, "", "IBM5550 Taiwan"},*/
544         /*{20004, FALSE, "", "TeleText Taiwan"},*/
545         /*{20005, FALSE, "", "Wang Taiwan"},*/
546         /*{20105, FALSE, "", "IA5 IRV International Alphabet No.5"},*/
547         /*{20106, FALSE, "", "IA5 German"},*/
548         /*{20107, FALSE, "", "IA5 Swedish"},*/
549         /*{20108, FALSE, "", "IA5 Norwegian"},*/
550         /*{20127, FALSE, "", "US ASCII"}, */
551         /*{20261, FALSE, "", "T.61"},*/
552         /*{20269, FALSE, "", "ISO 6937 NonSpacing Accent"},*/
553         /*{20273, FALSE, "", "IBM EBCDIC Germany"},*/
554         /*{20277, FALSE, "", "IBM EBCDIC Denmark/Norway"},*/
555         /*{20278, FALSE, "", "IBM EBCDIC Finland/Sweden"},*/
556         /*{20280, FALSE, "", "IBM EBCDIC Italy"},*/
557         /*{20284, FALSE, "", "IBM EBCDIC Latin America/Spain"},*/
558         /*{20285, FALSE, "", "IBM EBCDIC United Kingdom"},*/
559         /*{20290, FALSE, "", "IBM EBCDIC Japanese Katakana Extended"},*/
560         /*{20297, FALSE, "", "IBM EBCDIC France"},*/
561         /*{20420, FALSE, "", "IBM EBCDIC Arabic"},*/
562         /*{20423, FALSE, "IBM869.TXT", "IBM EBCDIC Greek"},*/
563         /*{20424, FALSE, "", "IBM EBCDIC Hebrew"},*/
564         /*{20833, FALSE, "", "IBM EBCDIC Korean Extended"},*/
565         /*{20838, FALSE, "", "IBM EBCDIC Thai"},*/
566         {20871, FALSE, "ReactOS/IBMCP861.TXT", "IBM EBCDIC Icelandic"},
567         /*{20880, FALSE, "", "IBM EBCDIC Cyrillic (Russian)"},*/
568         {20866, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/KOI8-R.TXT", "Russian KOI8"},
569         /*{20905, FALSE, "", "IBM EBCDIC Turkish"},*/
570         /*{21025, FALSE, "", "IBM EBCDIC Cyrillic (Serbian, Bulgarian)"},*/
571         /*{21027, FALSE, "", "Ext Alpha Lowercase"},*/
572         {28591, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-1.TXT", "ISO 8859-1 Latin 1"},
573         {28592, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-2.TXT", "ISO 8859-2 Eastern Europe"},
574         {28593, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-3.TXT", "ISO 8859-3 Turkish"},
575         {28594, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-4.TXT", "ISO 8859-4 Baltic"},
576         {28595, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-5.TXT", "ISO 8859-5 Cyrillic"},
577         {28596, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-6.TXT", "ISO 8859-6 Arabic"},
578         {28597, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-7.TXT", "ISO 8859-7 Greek"},
579         {28598, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-8.TXT", "ISO 8859-8 Hebrew"},
580         {28599, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-9.TXT", "ISO 8859-9 Latin 5"}
581     };
582 
583     for(i = 0; i < sizeof(pages)/sizeof(pages[0]); i++) {
584         table = Load_CP2Unicode_Table(pages[i].table_filename, pages[i].cp, &cpi);
585         if(!table) {
586             printf("Could not load \"%s\" (%s)\n", pages[i].table_filename, pages[i].comment);
587             continue;
588         }
589 
590         if (pages[i].oem) {
591             oemtable = Load_OEM2Unicode_Table(LIBDIR"MAPPINGS/VENDORS/MISC/IBMGRAPH.TXT", table, pages[i].cp, &cpi);
592             if(!oemtable) {
593                 printf("Could not load \"%s\" (%s)\n", LIBDIR"MAPPINGS/VENDORS/MISC/IBMGRAPH.TXT", "IBM OEM glyph table");
594                 continue;
595             }
596         }
597 
598         sprintf(nls_filename, "%s/c_%03d.nls", NLSDIR, cpi.CodePage);
599         if(!create_nls_file(nls_filename, &cpi, table, pages[i].oem ? oemtable : NULL)) {
600             printf("Could not write \"%s\" (%s)\n", nls_filename, pages[i].comment);
601         }
602 
603         if (pages[i].oem)
604             free(oemtable);
605 
606         free(table);
607     }
608 
609     return 0;
610 }
611 
612 
613 
614 static WORD *to_upper_org = NULL, *to_lower_org = NULL;
615 
616 #if 0
617 static WORD diffs[256];
618 static int number_of_diffs;
619 #endif
620 
621 static WORD number_of_subtables_with_diffs;
622 /* pointers to subtables with 16 elements in each to the main table */
623 static WORD *subtables_with_diffs[4096];
624 
625 static WORD number_of_subtables_with_offsets;
626 /* subtables with 16 elements  */
627 static WORD subtables_with_offsets[4096 * 16];
628 
629 static void test_packed_table(WCHAR *table)
630 {
631     WCHAR test_str[] = L"This is an English text. \x0CF\x0EE-\x0F0\x0F3\x0F1\x0F1\x0EA\x0E8 \x0FF \x0EF\x0E8\x0F1\x0E0\x0F2\x0FC \x0F3\x0EC\x0E5\x0FE \x0ED\x0E5\x0EC\x0ED\x0EE\x0E6\x0EA\x0EE. 1234567890";
632     //WORD diff, off;
633     //WORD *sub_table;
634     DWORD i, len;
635 
636     len = lstrlenW(test_str);
637 
638     for(i = 0; i < len + 1; i++) {
639         /*off = table[HIBYTE(test_str[i])];
640 
641         sub_table = table + off;
642         off = sub_table[LOBYTE(test_str[i]) >> 4];
643 
644         sub_table = table + off;
645         off = LOBYTE(test_str[i]) & 0x0F;
646 
647         diff = sub_table[off];
648 
649         test_str[i] += diff;*/
650         test_str[i] += table[table[table[HIBYTE(test_str[i])] + (LOBYTE(test_str[i]) >> 4)] + (LOBYTE(test_str[i]) & 0x0F)];
651     }
652 /*
653     {
654         FILE *file;
655         static int n = 0;
656         char name[20];
657 
658         sprintf(name, "text%02d.dat", n++);
659         file = fopen(name, "wb");
660         fwrite(test_str, len * sizeof(WCHAR), 1, file);
661         fclose(file);
662     }*/
663 }
664 
665 static BOOL CreateCaseDiff(char *table_name)
666 {
667     char buf[256];
668     char *p;
669     WORD code, case_mapping;
670     FILE *file;
671     int line;
672 
673     to_upper_org = (WORD *)calloc(65536, sizeof(WORD));
674     if(!to_upper_org) {
675         printf("Not enough memory for to upper table\n");
676         return FALSE;
677     }
678 
679     to_lower_org = (WORD *)calloc(65536, sizeof(WORD));
680     if(!to_lower_org) {
681         printf("Not enough memory for to lower table\n");
682         return FALSE;
683     }
684 
685     file = fopen(table_name, "r");
686     if(file == NULL) {
687         printf("Could not open file \"%s\"\n", table_name);
688         return FALSE;
689     }
690 
691     line = 0;
692 
693     while(fgets(buf, sizeof(buf), file)) {
694         line++;
695         p = buf;
696         while(*p && isspace(*p)) p++;
697 
698         if(!*p)
699             continue;
700 
701         /* 0. Code value */
702         code = (WORD)strtol(p, &p, 16);
703 
704         //if(code != 0x9A0 && code != 0xBA0)
705             //continue;
706 
707         while(*p && *p != ';') p++;
708         if(!*p)
709             continue;
710         p++;
711 
712         /* 1. Character name */
713         while(*p && *p != ';') p++;
714         if(!*p)
715             continue;
716         p++;
717 
718         /* 2. General Category */
719         while(*p && *p != ';') p++;
720         if(!*p)
721             continue;
722         p++;
723 
724         /* 3. Canonical Combining Classes */
725         while(*p && *p != ';') p++;
726         if(!*p)
727             continue;
728         p++;
729 
730         /* 4. Bidirectional Category */
731         while(*p && *p != ';') p++;
732         if(!*p)
733             continue;
734         p++;
735 
736         /* 5. Character Decomposition Mapping */
737         while(*p && *p != ';') p++;
738         if(!*p)
739             continue;
740         p++;
741 
742         /* 6. Decimal digit value */
743         while(*p && *p != ';') p++;
744         if(!*p)
745             continue;
746         p++;
747 
748         /* 7. Digit value */
749         while(*p && *p != ';') p++;
750         if(!*p)
751             continue;
752         p++;
753 
754         /* 8. Numeric value */
755         while(*p && *p != ';') p++;
756         if(!*p)
757             continue;
758         p++;
759 
760         /* 9. Mirrored */
761         while(*p && *p != ';') p++;
762         if(!*p)
763             continue;
764         p++;
765 
766         /* 10. Unicode 1.0 Name */
767         while(*p && *p != ';') p++;
768         if(!*p)
769             continue;
770         p++;
771 
772         /* 11. 10646 comment field */
773         while(*p && *p != ';') p++;
774         if(!*p)
775             continue;
776         p++;
777 
778         /* 12. Uppercase Mapping */
779         while(*p && isspace(*p)) p++;
780         if(!*p) continue;
781         if(*p != ';') {
782             case_mapping = (WORD)strtol(p, &p, 16);
783             to_upper_org[code] = case_mapping - code;
784             while(*p && *p != ';') p++;
785         }
786         else
787             p++;
788 
789         /* 13. Lowercase Mapping */
790         while(*p && isspace(*p)) p++;
791         if(!*p) continue;
792         if(*p != ';') {
793             case_mapping = (WORD)strtol(p, &p, 16);
794             to_lower_org[code] = case_mapping - code;
795             while(*p && *p != ';') p++;
796         }
797         else
798             p++;
799 
800         /* 14. Titlecase Mapping */
801         while(*p && *p != ';') p++;
802         if(!*p)
803             continue;
804         p++;
805     }
806 
807     fclose(file);
808 
809     return TRUE;
810 }
811 
812 #if 0
813 static int find_diff(WORD diff)
814 {
815     int i;
816 
817     for(i = 0; i < number_of_diffs; i++) {
818         if(diffs[i] == diff)
819             return i;
820     }
821 
822     return -1;
823 }
824 #endif
825 
826 static WORD find_subtable_with_diffs(WORD *table, WORD *subtable)
827 {
828     WORD index;
829 
830     for(index = 0; index < number_of_subtables_with_diffs; index++) {
831         if(memcmp(subtables_with_diffs[index], subtable, 16 * sizeof(WORD)) == 0) {
832             return index;
833         }
834     }
835 
836     if(number_of_subtables_with_diffs >= 4096) {
837         printf("Could not add new subtable with diffs, storage is full\n");
838         return 0;
839     }
840 
841     subtables_with_diffs[number_of_subtables_with_diffs] = subtable;
842     number_of_subtables_with_diffs++;
843 
844     return index;
845 }
846 
847 static WORD find_subtable_with_offsets(WORD *subtable)
848 {
849     WORD index;
850 
851     for(index = 0; index < number_of_subtables_with_offsets; index++) {
852         if(memcmp(&subtables_with_offsets[index * 16], subtable, 16 * sizeof(WORD)) == 0) {
853             return index;
854         }
855     }
856 
857     if(number_of_subtables_with_offsets >= 4096) {
858         printf("Could not add new subtable with offsets, storage is full\n");
859         return 0;
860     }
861 
862     memcpy(&subtables_with_offsets[number_of_subtables_with_offsets * 16], subtable, 16 * sizeof(WORD));
863     number_of_subtables_with_offsets++;
864 
865     return index;
866 }
867 
868 static WORD *pack_table(WORD *table, WORD *packed_size_in_words)
869 {
870     WORD high, low4, index;
871     WORD main_index[256];
872     WORD temp_subtable[16];
873     WORD *packed_table;
874     WORD *subtable_src, *subtable_dst;
875 
876     memset(subtables_with_diffs, 0, sizeof(subtables_with_diffs));
877     number_of_subtables_with_diffs = 0;
878 
879     memset(subtables_with_offsets, 0, sizeof(subtables_with_offsets));
880     number_of_subtables_with_offsets = 0;
881 
882     for(high = 0; high < 256; high++) {
883         for(low4 = 0; low4 < 256; low4 += 16) {
884             index = find_subtable_with_diffs(table, &table[MAKEWORD(low4, high)]);
885 
886             temp_subtable[low4 >> 4] = index;
887         }
888 
889         index = find_subtable_with_offsets(temp_subtable);
890         main_index[high] = index;
891     }
892 
893     *packed_size_in_words = 0x100 + number_of_subtables_with_offsets * 16 + number_of_subtables_with_diffs * 16;
894     packed_table = calloc(*packed_size_in_words, sizeof(WORD));
895 
896     /* fill main index according to the subtables_with_offsets */
897     for(high = 0; high < 256; high++) {
898         packed_table[high] = 0x100 + main_index[high] * 16;
899     }
900 
901     //memcpy(sub_table, subtables_with_offsets, number_of_subtables_with_offsets * 16);
902 
903     /* fill subtable index according to the subtables_with_diffs */
904     for(index = 0; index < number_of_subtables_with_offsets; index++) {
905         subtable_dst = packed_table + 0x100 + index * 16;
906         subtable_src = &subtables_with_offsets[index * 16];
907 
908         for(low4 = 0; low4 < 16; low4++) {
909             subtable_dst[low4] = 0x100 + number_of_subtables_with_offsets * 16 + subtable_src[low4] * 16;
910         }
911     }
912 
913 
914     for(index = 0; index < number_of_subtables_with_diffs; index++) {
915         subtable_dst = packed_table + 0x100 + number_of_subtables_with_offsets * 16 + index * 16;
916         memcpy(subtable_dst, subtables_with_diffs[index], 16 * sizeof(WORD));
917 
918     }
919 
920 
921     test_packed_table(packed_table);
922 
923     return packed_table;
924 }
925 
926 int write_casemap_file(void)
927 {
928     WORD packed_size_in_words, offset_to_next_table_in_words;
929     WORD *packed_table, value;
930     FILE *file;
931 
932     if(!CreateCaseDiff(LIBDIR"UnicodeData.txt"))
933         return -1;
934 
935     file = fopen(NLSDIR"/l_intl.nls", "wb");
936 
937     /* write version number */
938     value = 1;
939     fwrite(&value, 1, sizeof(WORD), file);
940 
941     /* pack upper case table */
942     packed_table = pack_table(to_upper_org, &packed_size_in_words);
943     offset_to_next_table_in_words = packed_size_in_words + 1;
944     fwrite(&offset_to_next_table_in_words, 1, sizeof(WORD), file);
945     /* write packed upper case table */
946     fwrite(packed_table, sizeof(WORD), packed_size_in_words, file);
947     free(packed_table);
948 
949     /* pack lower case table */
950     packed_table = pack_table(to_lower_org, &packed_size_in_words);
951     offset_to_next_table_in_words = packed_size_in_words + 1;
952     fwrite(&offset_to_next_table_in_words, 1, sizeof(WORD), file);
953     /* write packed lower case table */
954     fwrite(packed_table, sizeof(WORD), packed_size_in_words, file);
955     free(packed_table);
956 
957     fclose(file);
958 
959     free(to_upper_org);
960     free(to_lower_org);
961 
962     return 0;
963 }
964 
965 int main()
966 {
967     write_nls_files();
968     write_casemap_file();
969 
970     return 0;
971 }
972