1 /* 2 * Tool for creating NT-like NLS files for Unicode <-> Codepage conversions. 3 * Tool for creating NT-like l_intl.nls file for case mapping of unicode 4 * characters. 5 * Copyright 2000 Timoshkov Dmitry 6 * Copyright 2001 Matei Alexandru 7 * 8 * Sources of information: 9 * Andrew Kozin's YAW project http://www.chat.ru/~stanson/yaw_en.html 10 * Ove K�ven's investigations http://www.ping.uio.no/~ovehk/nls 11 */ 12 #include <windows.h> 13 #include <stdio.h> 14 #include <stdlib.h> 15 #include <malloc.h> 16 #include <string.h> 17 #include <ctype.h> 18 19 static const WCHAR * const uprtable[256]; 20 static const WCHAR * const lwrtable[256]; 21 22 #define NLSDIR "../../media/nls" 23 #define LIBDIR "unicode.org/" 24 25 typedef struct { 26 WORD wSize; /* in words 0x000D */ 27 WORD CodePage; 28 WORD MaxCharSize; /* 1 or 2 */ 29 BYTE DefaultChar[MAX_DEFAULTCHAR]; 30 WCHAR UnicodeDefaultChar; 31 WCHAR unknown1; 32 WCHAR unknown2; 33 BYTE LeadByte[MAX_LEADBYTES]; 34 } __attribute__((packed)) NLS_FILE_HEADER; 35 36 /* 37 Support for translation from the multiple unicode chars 38 to the single code page char. 39 40 002D;HYPHEN-MINUS;Pd;0;ET;;;;;N;;;;; 41 00AD;SOFT HYPHEN;Pd;0;ON;;;;;N;;;;; 42 2010;HYPHEN;Pd;0;ON;;;;;N;;;;; 43 2011;NON-BREAKING HYPHEN;Pd;0;ON;<noBreak> 2010;;;;N;;;;; 44 2013;EN DASH;Pd;0;ON;;;;;N;;;;; 45 2014;EM DASH;Pd;0;ON;;;;;N;;;;; 46 2015;HORIZONTAL BAR;Pd;0;ON;;;;;N;QUOTATION DASH;;;; 47 */ 48 49 /* HYPHEN-MINUS aliases */ 50 static WCHAR hyphen_aliases[] = {0x00AD,0x2010,0x2011,0x2013,0x2014,0x2015,0}; 51 52 static struct { 53 WCHAR cp_char; 54 WCHAR *alias; /* must be 0 terminated */ 55 } u2cp_alias[] = { 56 /* HYPHEN-MINUS aliases */ 57 {0x002D, hyphen_aliases} 58 }; 59 60 static void patch_aliases(void *u2cp, CPINFOEXA *cpi) 61 { 62 int i, j; 63 WCHAR *wc, *alias; 64 BYTE *c; 65 66 if(cpi->MaxCharSize == 2) { 67 wc = (WCHAR *)u2cp; 68 for(i = 0; i < 65536; i++) { 69 for(j = 0; j < sizeof(u2cp_alias)/sizeof(u2cp_alias[0]); j++) { 70 alias = u2cp_alias[j].alias; 71 while(*alias) { 72 if(*alias == i && wc[i] == *(WCHAR *)cpi->DefaultChar) { 73 wc[i] = u2cp_alias[j].cp_char; 74 } 75 alias++; 76 } 77 } 78 } 79 } 80 else { 81 c = (BYTE *)u2cp; 82 for(i = 0; i < 65536; i++) { 83 for(j = 0; j < sizeof(u2cp_alias)/sizeof(u2cp_alias[0]); j++) { 84 alias = u2cp_alias[j].alias; 85 while(*alias) { 86 if(*alias == i && c[i] == cpi->DefaultChar[0] && u2cp_alias[j].cp_char < 256) { 87 c[i] = (BYTE)u2cp_alias[j].cp_char; 88 } 89 alias++; 90 } 91 } 92 } 93 } 94 } 95 96 static BOOL write_unicode2cp_table(FILE *out, CPINFOEXA *cpi, WCHAR *table) 97 { 98 void *u2cp; 99 WCHAR *wc; 100 CHAR *c; 101 int i; 102 BOOL ret = TRUE; 103 104 u2cp = malloc(cpi->MaxCharSize * 65536); 105 if(!u2cp) { 106 printf("Not enough memory for Unicode to Codepage table\n"); 107 return FALSE; 108 } 109 110 if(cpi->MaxCharSize == 2) { 111 wc = (WCHAR *)u2cp; 112 for(i = 0; i < 65536; i++) 113 wc[i] = *(WCHAR *)cpi->DefaultChar; 114 115 for(i = 0; i < 65536; i++) 116 if (table[i] != '?') 117 wc[table[i]] = (WCHAR)i; 118 } 119 else { 120 c = (CHAR *)u2cp; 121 for(i = 0; i < 65536; i++) 122 c[i] = cpi->DefaultChar[0]; 123 124 for(i = 0; i < 256; i++) 125 if (table[i] != '?') 126 c[table[i]] = (CHAR)i; 127 } 128 129 patch_aliases(u2cp, cpi); 130 131 if(fwrite(u2cp, 1, cpi->MaxCharSize * 65536, out) != cpi->MaxCharSize * 65536) 132 ret = FALSE; 133 134 free(u2cp); 135 136 return ret; 137 } 138 139 static BOOL write_lb_ranges(FILE *out, CPINFOEXA *cpi, WCHAR *table) 140 { 141 WCHAR sub_table[256]; 142 WORD offset, offsets[256]; 143 int i, j, range; 144 145 memset(offsets, 0, sizeof(offsets)); 146 147 offset = 0; 148 149 for(i = 0; i < MAX_LEADBYTES; i += 2) { 150 for(range = cpi->LeadByte[i]; range != 0 && range <= cpi->LeadByte[i + 1]; range++) { 151 offset += 256; 152 offsets[range] = offset; 153 } 154 } 155 156 if(fwrite(offsets, 1, sizeof(offsets), out) != sizeof(offsets)) 157 return FALSE; 158 159 for(i = 0; i < MAX_LEADBYTES; i += 2) { 160 for(range = cpi->LeadByte[i]; range != 0 && range <= cpi->LeadByte[i + 1]; range++) { 161 /*printf("Writing sub table for LeadByte %02X\n", range);*/ 162 for(j = MAKEWORD(0, range); j <= MAKEWORD(0xFF, range); j++) { 163 sub_table[j - MAKEWORD(0, range)] = table[j]; 164 } 165 166 if(fwrite(sub_table, 1, sizeof(sub_table), out) != sizeof(sub_table)) 167 return FALSE; 168 } 169 } 170 171 return TRUE; 172 } 173 174 static BOOL create_nls_file(char *name, CPINFOEXA *cpi, WCHAR *table, WCHAR *oemtable) 175 { 176 FILE *out; 177 NLS_FILE_HEADER nls; 178 WORD wValue, number_of_lb_ranges, number_of_lb_subtables, i; 179 180 printf("Creating NLS table \"%s\"\n", name); 181 182 if(!(out = fopen(name, "wb"))) { 183 printf("Could not create file \"%s\"\n", name); 184 return FALSE; 185 } 186 187 memset(&nls, 0, sizeof(nls)); 188 189 nls.wSize = sizeof(nls) / sizeof(WORD); 190 nls.CodePage = cpi->CodePage; 191 nls.MaxCharSize = cpi->MaxCharSize; 192 memcpy(nls.DefaultChar, cpi->DefaultChar, MAX_DEFAULTCHAR); 193 nls.UnicodeDefaultChar = cpi->UnicodeDefaultChar; 194 nls.unknown1 = '?'; 195 nls.unknown2 = '?'; 196 memcpy(nls.LeadByte, cpi->LeadByte, MAX_LEADBYTES); 197 198 if(fwrite(&nls, 1, sizeof(nls), out) != sizeof(nls)) { 199 fclose(out); 200 printf("Could not write to file \"%s\"\n", name); 201 return FALSE; 202 } 203 204 number_of_lb_ranges = 0; 205 number_of_lb_subtables = 0; 206 207 for(i = 0; i < MAX_LEADBYTES; i += 2) { 208 if(cpi->LeadByte[i] != 0 && cpi->LeadByte[i + 1] > cpi->LeadByte[i]) { 209 number_of_lb_ranges++; 210 number_of_lb_subtables += cpi->LeadByte[i + 1] - cpi->LeadByte[i] + 1; 211 } 212 } 213 214 /*printf("Number of LeadByte ranges %d\n", number_of_lb_ranges);*/ 215 /*printf("Number of LeadByte subtables %d\n", number_of_lb_subtables);*/ 216 217 /* Calculate offset to Unicode to CP table in words: 218 * 1. (256 * sizeof(WORD)) primary CP to Unicode table + 219 * 2. (WORD) optional OEM glyph table size in words + 220 * 3. OEM glyph table size in words * sizeof(WORD) + 221 * 4. (WORD) Number of DBCS LeadByte ranges + 222 * 5. if (Number of DBCS LeadByte ranges != 0) 256 * sizeof(WORD) offsets of lead byte sub tables 223 * 6. (Number of DBCS LeadByte sub tables * 256 * sizeof(WORD)) LeadByte sub tables + 224 * 7. (WORD) Unknown flag 225 */ 226 227 wValue = (256 * sizeof(WORD) + /* 1 */ 228 sizeof(WORD) + /* 2 */ 229 ((oemtable !=NULL) ? (256 * sizeof(WORD)) : 0) + /* 3 */ 230 sizeof(WORD) + /* 4 */ 231 ((number_of_lb_subtables != 0) ? 256 * sizeof(WORD) : 0) + /* 5 */ 232 number_of_lb_subtables * 256 * sizeof(WORD) + /* 6 */ 233 sizeof(WORD) /* 7 */ 234 ) / sizeof(WORD); 235 236 /* offset of Unicode to CP table in words */ 237 fwrite(&wValue, 1, sizeof(wValue), out); 238 239 /* primary CP to Unicode table */ 240 if(fwrite(table, 1, 256 * sizeof(WCHAR), out) != 256 * sizeof(WCHAR)) { 241 fclose(out); 242 printf("Could not write to file \"%s\"\n", name); 243 return FALSE; 244 } 245 246 /* optional OEM glyph table size in words */ 247 wValue = (oemtable != NULL) ? (256 * sizeof(WORD)) : 0; 248 fwrite(&wValue, 1, sizeof(wValue), out); 249 250 /* optional OEM to Unicode table */ 251 if (oemtable) { 252 if(fwrite(oemtable, 1, 256 * sizeof(WCHAR), out) != 256 * sizeof(WCHAR)) { 253 fclose(out); 254 printf("Could not write to file \"%s\"\n", name); 255 return FALSE; 256 } 257 } 258 259 /* Number of DBCS LeadByte ranges */ 260 fwrite(&number_of_lb_ranges, 1, sizeof(number_of_lb_ranges), out); 261 262 /* offsets of lead byte sub tables and lead byte sub tables */ 263 if(number_of_lb_ranges > 0) { 264 if(!write_lb_ranges(out, cpi, table)) { 265 fclose(out); 266 printf("Could not write to file \"%s\"\n", name); 267 return FALSE; 268 } 269 } 270 271 /* Unknown flag */ 272 wValue = 0; 273 fwrite(&wValue, 1, sizeof(wValue), out); 274 275 if(!write_unicode2cp_table(out, cpi, table)) { 276 fclose(out); 277 printf("Could not write to file \"%s\"\n", name); 278 return FALSE; 279 } 280 281 fclose(out); 282 return TRUE; 283 } 284 285 /* correct the codepage information such as default chars */ 286 static void patch_codepage_info(CPINFOEXA *cpi) 287 { 288 /* currently nothing */ 289 } 290 291 static WCHAR *Load_CP2Unicode_Table(char *table_name, UINT cp, CPINFOEXA *cpi) 292 { 293 char buf[256]; 294 char *p; 295 DWORD n, value; 296 FILE *file; 297 WCHAR *table; 298 int lb_ranges, lb_range_started, line; 299 300 printf("Loading translation table \"%s\"\n", table_name); 301 302 /* Init to default values */ 303 memset(cpi, 0, sizeof(CPINFOEXA)); 304 cpi->CodePage = cp; 305 *(WCHAR *)cpi->DefaultChar = '?'; 306 cpi->MaxCharSize = 1; 307 cpi->UnicodeDefaultChar = '?'; 308 309 patch_codepage_info(cpi); 310 311 table = (WCHAR *)malloc(sizeof(WCHAR) * 65536); 312 if(!table) { 313 printf("Not enough memory for Codepage to Unicode table\n"); 314 return NULL; 315 } 316 317 for(n = 0; n < 256; n++) 318 table[n] = (WCHAR)n; 319 320 for(n = 256; n < 65536; n++) 321 table[n] = cpi->UnicodeDefaultChar; 322 323 file = fopen(table_name, "r"); 324 if(file == NULL) { 325 free(table); 326 return NULL; 327 } 328 329 line = 0; 330 lb_ranges = 0; 331 lb_range_started = 0; 332 333 while(fgets(buf, sizeof(buf), file)) { 334 line++; 335 p = buf; 336 while(isspace(*p)) p++; 337 338 if(!*p || p[0] == '#') 339 continue; 340 341 n = strtol(p, &p, 0); 342 if(n > 0xFFFF) { 343 printf("Line %d: Entry 0x%06lX: File \"%s\" corrupted\n", line, n, table_name); 344 continue; 345 } 346 347 if(n > 0xFF && cpi->MaxCharSize != 2) { 348 /*printf("Line %d: Entry 0x%04lX: Switching to DBCS\n", line, n);*/ 349 cpi->MaxCharSize = 2; 350 } 351 352 while(isspace(*p)) p++; 353 354 if(!*p || p[0] == '#') { 355 /*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/ 356 } 357 else { 358 value = strtol(p, &p, 0); 359 if(value > 0xFFFF) { 360 printf("Line %d: Entry 0x%06lX unicode value: File \"%s\" corrupted\n", line, n, table_name); 361 } 362 table[n] = (WCHAR)value; 363 } 364 365 /* wait for comment */ 366 while(*p && *p != '#') p++; 367 368 if(*p == '#' && strstr(p, "DBCS LEAD BYTE")) { 369 /*printf("Line %d, entry 0x%02lX DBCS LEAD BYTE\n", line, n);*/ 370 if(n > 0xFF) { 371 printf("Line %d: Entry 0x%04lX: Error: DBCS lead byte overflowed\n", line, n); 372 continue; 373 } 374 375 table[n] = (WCHAR)0; 376 377 if(lb_range_started) { 378 cpi->LeadByte[(lb_ranges - 1) * 2 + 1] = (BYTE)n; 379 } 380 else { 381 /*printf("Line %d: Starting new DBCS lead byte range, entry 0x%02lX\n", line, n);*/ 382 if(lb_ranges < MAX_LEADBYTES/2) { 383 lb_ranges++; 384 lb_range_started = 1; 385 cpi->LeadByte[(lb_ranges - 1) * 2] = (BYTE)n; 386 } 387 else 388 printf("Line %d: Error: could not start new lead byte range\n", line); 389 } 390 } 391 else { 392 if(lb_range_started) 393 lb_range_started = 0; 394 } 395 } 396 397 fclose(file); 398 399 return table; 400 } 401 402 static WCHAR *Load_OEM2Unicode_Table(char *table_name, WCHAR *def_table, UINT cp, CPINFOEXA *cpi) 403 { 404 char buf[256]; 405 char *p; 406 DWORD n, value; 407 FILE *file; 408 WCHAR *table; 409 int line; 410 411 printf("Loading oem glyph table \"%s\"\n", table_name); 412 413 table = (WCHAR *)malloc(sizeof(WCHAR) * 65536); 414 if(!table) { 415 printf("Not enough memory for Codepage to Unicode table\n"); 416 return NULL; 417 } 418 419 memcpy(table, def_table, 65536 * sizeof(WCHAR)); 420 421 file = fopen(table_name, "r"); 422 if(file == NULL) { 423 free(table); 424 return NULL; 425 } 426 427 line = 0; 428 429 while(fgets(buf, sizeof(buf), file)) { 430 line++; 431 p = buf; 432 while(isspace(*p)) p++; 433 434 if(!*p || p[0] == '#') 435 continue; 436 437 value = strtol(p, &p, 16); 438 if(value > 0xFFFF) { 439 printf("Line %d: Entry 0x%06lX: File \"%s\" corrupted\n", line, value, table_name); 440 continue; 441 } 442 443 while(isspace(*p)) p++; 444 445 if(!*p || p[0] == '#') { 446 /*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/ 447 continue; 448 } 449 else { 450 n = strtol(p, &p, 16); 451 if(n > 0xFFFF) { 452 printf("Line %d: Entry 0x%06lX unicode value: File \"%s\" corrupted\n", line, value, table_name); 453 continue; 454 } 455 } 456 457 if (cpi->CodePage == 864) { 458 while(isspace(*p)) p++; 459 460 if(!*p || p[0] == '#' || p[0] == '-') { 461 /*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/ 462 continue; 463 } 464 else { 465 n = strtol(p, &p, 16); 466 if(n > 0xFFFF) { 467 printf("Line %d: Entry 0x%06lX oem value: File \"%s\" corrupted\n", line, value, table_name); 468 } 469 continue; 470 } 471 } 472 473 table[n] = (WCHAR)value; 474 } 475 476 fclose(file); 477 478 return table; 479 } 480 481 int write_nls_files() 482 { 483 WCHAR *table; 484 WCHAR *oemtable; 485 char nls_filename[256]; 486 CPINFOEXA cpi; 487 int i; 488 struct code_page { 489 UINT cp; 490 BOOL oem; 491 char *table_filename; 492 char *comment; 493 } pages[] = { 494 {37, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT", "IBM EBCDIC US Canada"}, 495 {424, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/CP424.TXT", "IBM EBCDIC Hebrew"}, 496 {437, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT", "OEM United States"}, 497 {500, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP500.TXT", "IBM EBCDIC International"}, 498 /*{708, FALSE, "", "Arabic ASMO"},*/ 499 /*{720, FALSE, "", "Arabic Transparent ASMO"},*/ 500 {737, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP737.TXT", "OEM Greek 437G"}, 501 {775, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP775.TXT", "OEM Baltic"}, 502 {850, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP850.TXT", "OEM Multilingual Latin 1"}, 503 {852, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP852.TXT", "OEM Slovak Latin 2"}, 504 {855, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP855.TXT", "OEM Cyrillic" }, 505 {856, TRUE, LIBDIR"MAPPINGS/VENDORS/MISC/CP856.TXT", "Hebrew PC"}, 506 {857, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP857.TXT", "OEM Turkish"}, 507 {860, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP860.TXT", "OEM Portuguese"}, 508 {861, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP861.TXT", "OEM Icelandic"}, 509 {862, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP862.TXT", "OEM Hebrew"}, 510 {863, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP863.TXT", "OEM Canadian French"}, 511 {864, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP864.TXT", "OEM Arabic"}, 512 {865, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP865.TXT", "OEM Nordic"}, 513 {866, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP866.TXT", "OEM Russian"}, 514 {869, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP869.TXT", "OEM Greek"}, 515 /*{870, FALSE, "", "IBM EBCDIC Multilingual/ROECE (Latin 2)"},*/ 516 {874, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP874.TXT", "ANSI/OEM Thai"}, 517 {875, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP875.TXT", "IBM EBCDIC Greek"}, 518 {878, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/KOI8-R.TXT", "Russian KOI8"}, 519 {932, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT", "ANSI/OEM Japanese Shift-JIS"}, 520 {936, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT", "ANSI/OEM Simplified Chinese GBK"}, 521 {949, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT", "ANSI/OEM Korean Unified Hangul"}, 522 {950, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT", "ANSI/OEM Traditional Chinese Big5"}, 523 {1006, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/CP1006.TXT", "IBM Arabic"}, 524 {1026, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP1026.TXT", "IBM EBCDIC Latin 5 Turkish"}, 525 {1250, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT", "ANSI Eastern Europe"}, 526 {1251, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT", "ANSI Cyrillic"}, 527 {1252, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT", "ANSI Latin 1"}, 528 {1253, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1253.TXT", "ANSI Greek"}, 529 {1254, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1254.TXT", "ANSI Turkish"}, 530 {1255, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1255.TXT", "ANSI Hebrew"}, 531 {1256, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1256.TXT", "ANSI Arabic"}, 532 {1257, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1257.TXT", "ANSI Baltic"}, 533 {1258, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1258.TXT", "ANSI/OEM Viet Nam"}, 534 {10000, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/ROMAN.TXT", "Mac Roman"}, 535 {10006, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/GREEK.TXT", "Mac Greek"}, 536 {10007, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/CYRILLIC.TXT", "Mac Cyrillic"}, 537 {10029, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/LATIN2.TXT", "Mac Latin 2"}, 538 {10079, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/ICELAND.TXT", "Mac Icelandic"}, 539 {10081, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/TURKISH.TXT", "Mac Turkish"}, 540 /*{20000, FALSE, "", "CNS Taiwan"},*/ 541 /*{20001, FALSE, "", "TCA Taiwan"},*/ 542 /*{20002, FALSE, "", "Eten Taiwan"},*/ 543 /*{20003, FALSE, "", "IBM5550 Taiwan"},*/ 544 /*{20004, FALSE, "", "TeleText Taiwan"},*/ 545 /*{20005, FALSE, "", "Wang Taiwan"},*/ 546 /*{20105, FALSE, "", "IA5 IRV International Alphabet No.5"},*/ 547 /*{20106, FALSE, "", "IA5 German"},*/ 548 /*{20107, FALSE, "", "IA5 Swedish"},*/ 549 /*{20108, FALSE, "", "IA5 Norwegian"},*/ 550 /*{20127, FALSE, "", "US ASCII"}, */ 551 /*{20261, FALSE, "", "T.61"},*/ 552 /*{20269, FALSE, "", "ISO 6937 NonSpacing Accent"},*/ 553 /*{20273, FALSE, "", "IBM EBCDIC Germany"},*/ 554 /*{20277, FALSE, "", "IBM EBCDIC Denmark/Norway"},*/ 555 /*{20278, FALSE, "", "IBM EBCDIC Finland/Sweden"},*/ 556 /*{20280, FALSE, "", "IBM EBCDIC Italy"},*/ 557 /*{20284, FALSE, "", "IBM EBCDIC Latin America/Spain"},*/ 558 /*{20285, FALSE, "", "IBM EBCDIC United Kingdom"},*/ 559 /*{20290, FALSE, "", "IBM EBCDIC Japanese Katakana Extended"},*/ 560 /*{20297, FALSE, "", "IBM EBCDIC France"},*/ 561 /*{20420, FALSE, "", "IBM EBCDIC Arabic"},*/ 562 /*{20423, FALSE, "IBM869.TXT", "IBM EBCDIC Greek"},*/ 563 /*{20424, FALSE, "", "IBM EBCDIC Hebrew"},*/ 564 /*{20833, FALSE, "", "IBM EBCDIC Korean Extended"},*/ 565 /*{20838, FALSE, "", "IBM EBCDIC Thai"},*/ 566 {20871, FALSE, "ReactOS/IBMCP861.TXT", "IBM EBCDIC Icelandic"}, 567 /*{20880, FALSE, "", "IBM EBCDIC Cyrillic (Russian)"},*/ 568 {20866, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/KOI8-R.TXT", "Russian KOI8"}, 569 /*{20905, FALSE, "", "IBM EBCDIC Turkish"},*/ 570 /*{21025, FALSE, "", "IBM EBCDIC Cyrillic (Serbian, Bulgarian)"},*/ 571 /*{21027, FALSE, "", "Ext Alpha Lowercase"},*/ 572 {28591, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-1.TXT", "ISO 8859-1 Latin 1"}, 573 {28592, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-2.TXT", "ISO 8859-2 Eastern Europe"}, 574 {28593, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-3.TXT", "ISO 8859-3 Turkish"}, 575 {28594, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-4.TXT", "ISO 8859-4 Baltic"}, 576 {28595, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-5.TXT", "ISO 8859-5 Cyrillic"}, 577 {28596, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-6.TXT", "ISO 8859-6 Arabic"}, 578 {28597, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-7.TXT", "ISO 8859-7 Greek"}, 579 {28598, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-8.TXT", "ISO 8859-8 Hebrew"}, 580 {28599, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-9.TXT", "ISO 8859-9 Latin 5"} 581 }; 582 583 for(i = 0; i < sizeof(pages)/sizeof(pages[0]); i++) { 584 table = Load_CP2Unicode_Table(pages[i].table_filename, pages[i].cp, &cpi); 585 if(!table) { 586 printf("Could not load \"%s\" (%s)\n", pages[i].table_filename, pages[i].comment); 587 continue; 588 } 589 590 if (pages[i].oem) { 591 oemtable = Load_OEM2Unicode_Table(LIBDIR"MAPPINGS/VENDORS/MISC/IBMGRAPH.TXT", table, pages[i].cp, &cpi); 592 if(!oemtable) { 593 printf("Could not load \"%s\" (%s)\n", LIBDIR"MAPPINGS/VENDORS/MISC/IBMGRAPH.TXT", "IBM OEM glyph table"); 594 continue; 595 } 596 } 597 598 sprintf(nls_filename, "%s/c_%03d.nls", NLSDIR, cpi.CodePage); 599 if(!create_nls_file(nls_filename, &cpi, table, pages[i].oem ? oemtable : NULL)) { 600 printf("Could not write \"%s\" (%s)\n", nls_filename, pages[i].comment); 601 } 602 603 if (pages[i].oem) 604 free(oemtable); 605 606 free(table); 607 } 608 609 return 0; 610 } 611 612 613 614 static WORD *to_upper_org = NULL, *to_lower_org = NULL; 615 616 #if 0 617 static WORD diffs[256]; 618 static int number_of_diffs; 619 #endif 620 621 static WORD number_of_subtables_with_diffs; 622 /* pointers to subtables with 16 elements in each to the main table */ 623 static WORD *subtables_with_diffs[4096]; 624 625 static WORD number_of_subtables_with_offsets; 626 /* subtables with 16 elements */ 627 static WORD subtables_with_offsets[4096 * 16]; 628 629 static void test_packed_table(WCHAR *table) 630 { 631 WCHAR test_str[] = L"This is an English text. \x0CF\x0EE-\x0F0\x0F3\x0F1\x0F1\x0EA\x0E8 \x0FF \x0EF\x0E8\x0F1\x0E0\x0F2\x0FC \x0F3\x0EC\x0E5\x0FE \x0ED\x0E5\x0EC\x0ED\x0EE\x0E6\x0EA\x0EE. 1234567890"; 632 //WORD diff, off; 633 //WORD *sub_table; 634 DWORD i, len; 635 636 len = lstrlenW(test_str); 637 638 for(i = 0; i < len + 1; i++) { 639 /*off = table[HIBYTE(test_str[i])]; 640 641 sub_table = table + off; 642 off = sub_table[LOBYTE(test_str[i]) >> 4]; 643 644 sub_table = table + off; 645 off = LOBYTE(test_str[i]) & 0x0F; 646 647 diff = sub_table[off]; 648 649 test_str[i] += diff;*/ 650 test_str[i] += table[table[table[HIBYTE(test_str[i])] + (LOBYTE(test_str[i]) >> 4)] + (LOBYTE(test_str[i]) & 0x0F)]; 651 } 652 /* 653 { 654 FILE *file; 655 static int n = 0; 656 char name[20]; 657 658 sprintf(name, "text%02d.dat", n++); 659 file = fopen(name, "wb"); 660 fwrite(test_str, len * sizeof(WCHAR), 1, file); 661 fclose(file); 662 }*/ 663 } 664 665 static BOOL CreateCaseDiff(char *table_name) 666 { 667 char buf[256]; 668 char *p; 669 WORD code, case_mapping; 670 FILE *file; 671 int line; 672 673 to_upper_org = (WORD *)calloc(65536, sizeof(WORD)); 674 if(!to_upper_org) { 675 printf("Not enough memory for to upper table\n"); 676 return FALSE; 677 } 678 679 to_lower_org = (WORD *)calloc(65536, sizeof(WORD)); 680 if(!to_lower_org) { 681 printf("Not enough memory for to lower table\n"); 682 return FALSE; 683 } 684 685 file = fopen(table_name, "r"); 686 if(file == NULL) { 687 printf("Could not open file \"%s\"\n", table_name); 688 return FALSE; 689 } 690 691 line = 0; 692 693 while(fgets(buf, sizeof(buf), file)) { 694 line++; 695 p = buf; 696 while(*p && isspace(*p)) p++; 697 698 if(!*p) 699 continue; 700 701 /* 0. Code value */ 702 code = (WORD)strtol(p, &p, 16); 703 704 //if(code != 0x9A0 && code != 0xBA0) 705 //continue; 706 707 while(*p && *p != ';') p++; 708 if(!*p) 709 continue; 710 p++; 711 712 /* 1. Character name */ 713 while(*p && *p != ';') p++; 714 if(!*p) 715 continue; 716 p++; 717 718 /* 2. General Category */ 719 while(*p && *p != ';') p++; 720 if(!*p) 721 continue; 722 p++; 723 724 /* 3. Canonical Combining Classes */ 725 while(*p && *p != ';') p++; 726 if(!*p) 727 continue; 728 p++; 729 730 /* 4. Bidirectional Category */ 731 while(*p && *p != ';') p++; 732 if(!*p) 733 continue; 734 p++; 735 736 /* 5. Character Decomposition Mapping */ 737 while(*p && *p != ';') p++; 738 if(!*p) 739 continue; 740 p++; 741 742 /* 6. Decimal digit value */ 743 while(*p && *p != ';') p++; 744 if(!*p) 745 continue; 746 p++; 747 748 /* 7. Digit value */ 749 while(*p && *p != ';') p++; 750 if(!*p) 751 continue; 752 p++; 753 754 /* 8. Numeric value */ 755 while(*p && *p != ';') p++; 756 if(!*p) 757 continue; 758 p++; 759 760 /* 9. Mirrored */ 761 while(*p && *p != ';') p++; 762 if(!*p) 763 continue; 764 p++; 765 766 /* 10. Unicode 1.0 Name */ 767 while(*p && *p != ';') p++; 768 if(!*p) 769 continue; 770 p++; 771 772 /* 11. 10646 comment field */ 773 while(*p && *p != ';') p++; 774 if(!*p) 775 continue; 776 p++; 777 778 /* 12. Uppercase Mapping */ 779 while(*p && isspace(*p)) p++; 780 if(!*p) continue; 781 if(*p != ';') { 782 case_mapping = (WORD)strtol(p, &p, 16); 783 to_upper_org[code] = case_mapping - code; 784 while(*p && *p != ';') p++; 785 } 786 else 787 p++; 788 789 /* 13. Lowercase Mapping */ 790 while(*p && isspace(*p)) p++; 791 if(!*p) continue; 792 if(*p != ';') { 793 case_mapping = (WORD)strtol(p, &p, 16); 794 to_lower_org[code] = case_mapping - code; 795 while(*p && *p != ';') p++; 796 } 797 else 798 p++; 799 800 /* 14. Titlecase Mapping */ 801 while(*p && *p != ';') p++; 802 if(!*p) 803 continue; 804 p++; 805 } 806 807 fclose(file); 808 809 return TRUE; 810 } 811 812 #if 0 813 static int find_diff(WORD diff) 814 { 815 int i; 816 817 for(i = 0; i < number_of_diffs; i++) { 818 if(diffs[i] == diff) 819 return i; 820 } 821 822 return -1; 823 } 824 #endif 825 826 static WORD find_subtable_with_diffs(WORD *table, WORD *subtable) 827 { 828 WORD index; 829 830 for(index = 0; index < number_of_subtables_with_diffs; index++) { 831 if(memcmp(subtables_with_diffs[index], subtable, 16 * sizeof(WORD)) == 0) { 832 return index; 833 } 834 } 835 836 if(number_of_subtables_with_diffs >= 4096) { 837 printf("Could not add new subtable with diffs, storage is full\n"); 838 return 0; 839 } 840 841 subtables_with_diffs[number_of_subtables_with_diffs] = subtable; 842 number_of_subtables_with_diffs++; 843 844 return index; 845 } 846 847 static WORD find_subtable_with_offsets(WORD *subtable) 848 { 849 WORD index; 850 851 for(index = 0; index < number_of_subtables_with_offsets; index++) { 852 if(memcmp(&subtables_with_offsets[index * 16], subtable, 16 * sizeof(WORD)) == 0) { 853 return index; 854 } 855 } 856 857 if(number_of_subtables_with_offsets >= 4096) { 858 printf("Could not add new subtable with offsets, storage is full\n"); 859 return 0; 860 } 861 862 memcpy(&subtables_with_offsets[number_of_subtables_with_offsets * 16], subtable, 16 * sizeof(WORD)); 863 number_of_subtables_with_offsets++; 864 865 return index; 866 } 867 868 static WORD *pack_table(WORD *table, WORD *packed_size_in_words) 869 { 870 WORD high, low4, index; 871 WORD main_index[256]; 872 WORD temp_subtable[16]; 873 WORD *packed_table; 874 WORD *subtable_src, *subtable_dst; 875 876 memset(subtables_with_diffs, 0, sizeof(subtables_with_diffs)); 877 number_of_subtables_with_diffs = 0; 878 879 memset(subtables_with_offsets, 0, sizeof(subtables_with_offsets)); 880 number_of_subtables_with_offsets = 0; 881 882 for(high = 0; high < 256; high++) { 883 for(low4 = 0; low4 < 256; low4 += 16) { 884 index = find_subtable_with_diffs(table, &table[MAKEWORD(low4, high)]); 885 886 temp_subtable[low4 >> 4] = index; 887 } 888 889 index = find_subtable_with_offsets(temp_subtable); 890 main_index[high] = index; 891 } 892 893 *packed_size_in_words = 0x100 + number_of_subtables_with_offsets * 16 + number_of_subtables_with_diffs * 16; 894 packed_table = calloc(*packed_size_in_words, sizeof(WORD)); 895 896 /* fill main index according to the subtables_with_offsets */ 897 for(high = 0; high < 256; high++) { 898 packed_table[high] = 0x100 + main_index[high] * 16; 899 } 900 901 //memcpy(sub_table, subtables_with_offsets, number_of_subtables_with_offsets * 16); 902 903 /* fill subtable index according to the subtables_with_diffs */ 904 for(index = 0; index < number_of_subtables_with_offsets; index++) { 905 subtable_dst = packed_table + 0x100 + index * 16; 906 subtable_src = &subtables_with_offsets[index * 16]; 907 908 for(low4 = 0; low4 < 16; low4++) { 909 subtable_dst[low4] = 0x100 + number_of_subtables_with_offsets * 16 + subtable_src[low4] * 16; 910 } 911 } 912 913 914 for(index = 0; index < number_of_subtables_with_diffs; index++) { 915 subtable_dst = packed_table + 0x100 + number_of_subtables_with_offsets * 16 + index * 16; 916 memcpy(subtable_dst, subtables_with_diffs[index], 16 * sizeof(WORD)); 917 918 } 919 920 921 test_packed_table(packed_table); 922 923 return packed_table; 924 } 925 926 int write_casemap_file(void) 927 { 928 WORD packed_size_in_words, offset_to_next_table_in_words; 929 WORD *packed_table, value; 930 FILE *file; 931 932 if(!CreateCaseDiff(LIBDIR"UnicodeData.txt")) 933 return -1; 934 935 file = fopen(NLSDIR"/l_intl.nls", "wb"); 936 937 /* write version number */ 938 value = 1; 939 fwrite(&value, 1, sizeof(WORD), file); 940 941 /* pack upper case table */ 942 packed_table = pack_table(to_upper_org, &packed_size_in_words); 943 offset_to_next_table_in_words = packed_size_in_words + 1; 944 fwrite(&offset_to_next_table_in_words, 1, sizeof(WORD), file); 945 /* write packed upper case table */ 946 fwrite(packed_table, sizeof(WORD), packed_size_in_words, file); 947 free(packed_table); 948 949 /* pack lower case table */ 950 packed_table = pack_table(to_lower_org, &packed_size_in_words); 951 offset_to_next_table_in_words = packed_size_in_words + 1; 952 fwrite(&offset_to_next_table_in_words, 1, sizeof(WORD), file); 953 /* write packed lower case table */ 954 fwrite(packed_table, sizeof(WORD), packed_size_in_words, file); 955 free(packed_table); 956 957 fclose(file); 958 959 free(to_upper_org); 960 free(to_lower_org); 961 962 return 0; 963 } 964 965 int main() 966 { 967 write_nls_files(); 968 write_casemap_file(); 969 970 return 0; 971 } 972