1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  ********************************************************************************
5  *
6  *   Copyright (C) 1998-2015, International Business Machines
7  *   Corporation and others.  All Rights Reserved.
8  *
9  ********************************************************************************
10  *
11  *
12  *  makeconv.cpp:
13  *  tool creating a binary (compressed) representation of the conversion mapping
14  *  table (IBM NLTC ucmap format).
15  *
16  *  05/04/2000    helena     Added fallback mapping into the picture...
17  *  06/29/2000  helena      Major rewrite of the callback APIs.
18  */
19 
20 #include <stdio.h>
21 #include "unicode/putil.h"
22 #include "unicode/ucnv_err.h"
23 #include "charstr.h"
24 #include "ucnv_bld.h"
25 #include "ucnv_imp.h"
26 #include "ucnv_cnv.h"
27 #include "cstring.h"
28 #include "cmemory.h"
29 #include "uinvchar.h"
30 #include "filestrm.h"
31 #include "toolutil.h"
32 #include "uoptions.h"
33 #include "unicode/udata.h"
34 #include "unewdata.h"
35 #include "uparse.h"
36 #include "ucm.h"
37 #include "makeconv.h"
38 #include "genmbcs.h"
39 
40 #define DEBUG 0
41 
42 typedef struct ConvData {
43     UCMFile *ucm;
44     NewConverter *cnvData, *extData;
45     UConverterSharedData sharedData;
46     UConverterStaticData staticData;
47 } ConvData;
48 
49 static void
initConvData(ConvData * data)50 initConvData(ConvData *data) {
51     uprv_memset(data, 0, sizeof(ConvData));
52     data->sharedData.structSize=sizeof(UConverterSharedData);
53     data->staticData.structSize=sizeof(UConverterStaticData);
54     data->sharedData.staticData=&data->staticData;
55 }
56 
57 static void
cleanupConvData(ConvData * data)58 cleanupConvData(ConvData *data) {
59     if(data!=NULL) {
60         if(data->cnvData!=NULL) {
61             data->cnvData->close(data->cnvData);
62             data->cnvData=NULL;
63         }
64         if(data->extData!=NULL) {
65             data->extData->close(data->extData);
66             data->extData=NULL;
67         }
68         ucm_close(data->ucm);
69         data->ucm=NULL;
70     }
71 }
72 
73 /*
74  * from ucnvstat.c - static prototypes of data-based converters
75  */
76 U_CAPI const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
77 
78 /*
79  * Global - verbosity
80  */
81 UBool VERBOSE = FALSE;
82 UBool QUIET = FALSE;
83 UBool SMALL = FALSE;
84 UBool IGNORE_SISO_CHECK = FALSE;
85 
86 static void
87 createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
88 
89 /*
90  * Set up the UNewData and write the converter..
91  */
92 static void
93 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
94 
95 UBool haveCopyright=TRUE;
96 
97 static UDataInfo dataInfo={
98     sizeof(UDataInfo),
99     0,
100 
101     U_IS_BIG_ENDIAN,
102     U_CHARSET_FAMILY,
103     sizeof(UChar),
104     0,
105 
106     {0x63, 0x6e, 0x76, 0x74},     /* dataFormat="cnvt" */
107     {6, 2, 0, 0},                 /* formatVersion */
108     {0, 0, 0, 0}                  /* dataVersion (calculated at runtime) */
109 };
110 
111 static void
writeConverterData(ConvData * data,const char * cnvName,const char * cnvDir,UErrorCode * status)112 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
113 {
114     UNewDataMemory *mem = NULL;
115     uint32_t sz2;
116     uint32_t size = 0;
117     int32_t tableType;
118 
119     if(U_FAILURE(*status))
120       {
121         return;
122       }
123 
124     tableType=TABLE_NONE;
125     if(data->cnvData!=NULL) {
126         tableType|=TABLE_BASE;
127     }
128     if(data->extData!=NULL) {
129         tableType|=TABLE_EXT;
130     }
131 
132     mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
133 
134     if(U_FAILURE(*status))
135       {
136         fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
137                 cnvName,
138                 "cnv",
139                 u_errorName(*status));
140         return;
141       }
142 
143     if(VERBOSE)
144       {
145         printf("- Opened udata %s.%s\n", cnvName, "cnv");
146       }
147 
148 
149     /* all read only, clean, platform independent data.  Mmmm. :)  */
150     udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
151     size += sizeof(UConverterStaticData); /* Is 4-aligned  - by size */
152     /* Now, write the table */
153     if(tableType&TABLE_BASE) {
154         size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
155     }
156     if(tableType&TABLE_EXT) {
157         size += data->extData->write(data->extData, &data->staticData, mem, tableType);
158     }
159 
160     sz2 = udata_finish(mem, status);
161     if(size != sz2)
162     {
163         fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);
164         *status=U_INTERNAL_PROGRAM_ERROR;
165     }
166     if(VERBOSE)
167     {
168       printf("- Wrote %u bytes to the udata.\n", (int)sz2);
169     }
170 }
171 
172 enum {
173     OPT_HELP_H,
174     OPT_HELP_QUESTION_MARK,
175     OPT_COPYRIGHT,
176     OPT_VERSION,
177     OPT_DESTDIR,
178     OPT_VERBOSE,
179     OPT_SMALL,
180     OPT_IGNORE_SISO_CHECK,
181     OPT_QUIET,
182     OPT_SOURCEDIR,
183 
184     OPT_COUNT
185 };
186 
187 static UOption options[]={
188     UOPTION_HELP_H,
189     UOPTION_HELP_QUESTION_MARK,
190     UOPTION_COPYRIGHT,
191     UOPTION_VERSION,
192     UOPTION_DESTDIR,
193     UOPTION_VERBOSE,
194     { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
195     { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
196     UOPTION_QUIET,
197     UOPTION_SOURCEDIR,
198 };
199 
main(int argc,char * argv[])200 int main(int argc, char* argv[])
201 {
202     ConvData data;
203     char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
204 
205     U_MAIN_INIT_ARGS(argc, argv);
206 
207     /* Set up the ICU version number */
208     UVersionInfo icuVersion;
209     u_getVersion(icuVersion);
210     uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
211 
212     /* preset then read command line options */
213     options[OPT_DESTDIR].value=u_getDataDirectory();
214     argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
215 
216     /* error handling, printing usage message */
217     if(argc<0) {
218         fprintf(stderr,
219             "error in command line argument \"%s\"\n",
220             argv[-argc]);
221     } else if(argc<2) {
222         argc=-1;
223     }
224     if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
225         FILE *stdfile=argc<0 ? stderr : stdout;
226         fprintf(stdfile,
227             "usage: %s [-options] files...\n"
228             "\tread .ucm codepage mapping files and write .cnv files\n"
229             "options:\n"
230             "\t-h or -? or --help  this usage text\n"
231             "\t-V or --version     show a version message\n"
232             "\t-c or --copyright   include a copyright notice\n"
233             "\t-d or --destdir     destination directory, followed by the path\n"
234             "\t-v or --verbose     Turn on verbose output\n"
235             "\t-q or --quiet       do not display warnings and progress\n"
236             "\t-s or --sourcedir   source directory, followed by the path\n",
237             argv[0]);
238         fprintf(stdfile,
239             "\t      --small       Generate smaller .cnv files. They will be\n"
240             "\t                    significantly smaller but may not be compatible with\n"
241             "\t                    older versions of ICU and will require heap memory\n"
242             "\t                    allocation when loaded.\n"
243             "\t      --ignore-siso-check         Use SI/SO other than 0xf/0xe.\n");
244         return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
245     }
246 
247     if(options[OPT_VERSION].doesOccur) {
248         printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
249                dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
250         printf("%s\n", U_COPYRIGHT_STRING);
251         exit(0);
252     }
253 
254     /* get the options values */
255     haveCopyright = options[OPT_COPYRIGHT].doesOccur;
256     const char *destdir = options[OPT_DESTDIR].value;
257     VERBOSE = options[OPT_VERBOSE].doesOccur;
258     QUIET = options[OPT_QUIET].doesOccur;
259     SMALL = options[OPT_SMALL].doesOccur;
260 
261     if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
262         IGNORE_SISO_CHECK = TRUE;
263     }
264 
265     icu::CharString outFileName;
266     UErrorCode err = U_ZERO_ERROR;
267     if (destdir != NULL && *destdir != 0) {
268         outFileName.append(destdir, err).ensureEndsWithFileSeparator(err);
269         if (U_FAILURE(err)) {
270             return err;
271         }
272     }
273     int32_t outBasenameStart = outFileName.length();
274 
275 #if DEBUG
276     {
277       int i;
278       printf("makeconv: processing %d files...\n", argc - 1);
279       for(i=1; i<argc; ++i) {
280         printf("%s ", argv[i]);
281       }
282       printf("\n");
283       fflush(stdout);
284     }
285 #endif
286 
287     UBool printFilename = (UBool) (argc > 2 || VERBOSE);
288     icu::CharString pathBuf;
289     for (++argv; --argc; ++argv)
290     {
291         UErrorCode localError = U_ZERO_ERROR;
292         const char *arg = getLongPathname(*argv);
293 
294         const char* sourcedir = options[OPT_SOURCEDIR].value;
295         if (sourcedir != NULL && *sourcedir != 0 && uprv_strcmp(sourcedir, ".") != 0) {
296             pathBuf.clear();
297             pathBuf.appendPathPart(sourcedir, localError);
298             pathBuf.appendPathPart(arg, localError);
299             arg = pathBuf.data();
300         }
301 
302         /*produces the right destination path for display*/
303         outFileName.truncate(outBasenameStart);
304         if (outBasenameStart != 0)
305         {
306             /* find the last file sepator */
307             const char *basename = findBasename(arg);
308             outFileName.append(basename, localError);
309         }
310         else
311         {
312             outFileName.append(arg, localError);
313         }
314         if (U_FAILURE(localError)) {
315             return localError;
316         }
317 
318         /*removes the extension if any is found*/
319         int32_t lastDotIndex = outFileName.lastIndexOf('.');
320         if (lastDotIndex >= outBasenameStart) {
321             outFileName.truncate(lastDotIndex);
322         }
323 
324         /* the basename without extension is the converter name */
325         if ((outFileName.length() - outBasenameStart) >= UPRV_LENGTHOF(cnvName)) {
326             fprintf(stderr, "converter name %s too long\n", outFileName.data() + outBasenameStart);
327             return U_BUFFER_OVERFLOW_ERROR;
328         }
329         uprv_strcpy(cnvName, outFileName.data() + outBasenameStart);
330 
331         /*Adds the target extension*/
332         outFileName.append(CONVERTER_FILE_EXTENSION, localError);
333         if (U_FAILURE(localError)) {
334             return localError;
335         }
336 
337 #if DEBUG
338         printf("makeconv: processing %s  ...\n", arg);
339         fflush(stdout);
340 #endif
341         initConvData(&data);
342         createConverter(&data, arg, &localError);
343 
344         if (U_FAILURE(localError))
345         {
346             /* if an error is found, print out an error msg and keep going */
347             fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n",
348                     outFileName.data(), arg, u_errorName(localError));
349             if(U_SUCCESS(err)) {
350                 err = localError;
351             }
352         }
353         else
354         {
355             /* Insure the static data name matches the  file name */
356             /* Changed to ignore directory and only compare base name
357              LDH 1/2/08*/
358             char *p;
359             p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
360 
361             if(p == NULL)            /* OK, try alternate */
362             {
363                 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
364                 if(p == NULL)
365                 {
366                     p=cnvName; /* If no separators, no problem */
367                 }
368             }
369             else
370             {
371                 p++;   /* If found separator, don't include it in compare */
372             }
373             if(uprv_stricmp(p,data.staticData.name) && !QUIET)
374             {
375                 fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
376                     cnvName,  CONVERTER_FILE_EXTENSION,
377                     data.staticData.name);
378             }
379 
380             uprv_strcpy((char*)data.staticData.name, cnvName);
381 
382             if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
383                 fprintf(stderr,
384                     "Error: A converter name must contain only invariant characters.\n"
385                     "%s is not a valid converter name.\n",
386                     data.staticData.name);
387                 if(U_SUCCESS(err)) {
388                     err = U_INVALID_TABLE_FORMAT;
389                 }
390             }
391 
392             localError = U_ZERO_ERROR;
393             writeConverterData(&data, cnvName, destdir, &localError);
394 
395             if(U_FAILURE(localError))
396             {
397                 /* if an error is found, print out an error msg and keep going*/
398                 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName.data(), arg,
399                     u_errorName(localError));
400                 if(U_SUCCESS(err)) {
401                     err = localError;
402                 }
403             }
404             else if (printFilename)
405             {
406                 puts(outFileName.data() + outBasenameStart);
407             }
408         }
409         fflush(stdout);
410         fflush(stderr);
411 
412         cleanupConvData(&data);
413     }
414 
415     return err;
416 }
417 
418 static void
getPlatformAndCCSIDFromName(const char * name,int8_t * pPlatform,int32_t * pCCSID)419 getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
420     if( (name[0]=='i' || name[0]=='I') &&
421         (name[1]=='b' || name[1]=='B') &&
422         (name[2]=='m' || name[2]=='M')
423     ) {
424         name+=3;
425         if(*name=='-') {
426             ++name;
427         }
428         *pPlatform=UCNV_IBM;
429         *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
430     } else {
431         *pPlatform=UCNV_UNKNOWN;
432         *pCCSID=0;
433     }
434 }
435 
436 static void
readHeader(ConvData * data,FileStream * convFile,UErrorCode * pErrorCode)437 readHeader(ConvData *data,
438            FileStream* convFile,
439            UErrorCode *pErrorCode) {
440     char line[1024];
441     char *s, *key, *value;
442     const UConverterStaticData *prototype;
443     UConverterStaticData *staticData;
444 
445     if(U_FAILURE(*pErrorCode)) {
446         return;
447     }
448 
449     staticData=&data->staticData;
450     staticData->platform=UCNV_IBM;
451     staticData->subCharLen=0;
452 
453     while(T_FileStream_readLine(convFile, line, sizeof(line))) {
454         /* basic parsing and handling of state-related items */
455         if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
456             continue;
457         }
458 
459         /* stop at the beginning of the mapping section */
460         if(uprv_strcmp(line, "CHARMAP")==0) {
461             break;
462         }
463 
464         /* collect the information from the header field, ignore unknown keys */
465         if(uprv_strcmp(key, "code_set_name")==0) {
466             if(*value!=0) {
467                 uprv_strcpy((char *)staticData->name, value);
468                 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
469             }
470         } else if(uprv_strcmp(key, "subchar")==0) {
471             uint8_t bytes[UCNV_EXT_MAX_BYTES];
472             int8_t length;
473 
474             s=value;
475             length=ucm_parseBytes(bytes, line, (const char **)&s);
476             if(1<=length && length<=4 && *s==0) {
477                 staticData->subCharLen=length;
478                 uprv_memcpy(staticData->subChar, bytes, length);
479             } else {
480                 fprintf(stderr, "error: illegal <subchar> %s\n", value);
481                 *pErrorCode=U_INVALID_TABLE_FORMAT;
482                 return;
483             }
484         } else if(uprv_strcmp(key, "subchar1")==0) {
485             uint8_t bytes[UCNV_EXT_MAX_BYTES];
486 
487             s=value;
488             if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
489                 staticData->subChar1=bytes[0];
490             } else {
491                 fprintf(stderr, "error: illegal <subchar1> %s\n", value);
492                 *pErrorCode=U_INVALID_TABLE_FORMAT;
493                 return;
494             }
495         }
496     }
497 
498     /* copy values from the UCMFile to the static data */
499     staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
500     staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
501     staticData->conversionType=data->ucm->states.conversionType;
502 
503     if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
504         fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
505         *pErrorCode=U_INVALID_TABLE_FORMAT;
506         return;
507     }
508 
509     /*
510      * Now that we know the type, copy any 'default' values from the table.
511      * We need not check the type any further because the parser only
512      * recognizes what we have prototypes for.
513      *
514      * For delta (extension-only) tables, copy values from the base file
515      * instead, see createConverter().
516      */
517     if(data->ucm->baseName[0]==0) {
518         prototype=ucnv_converterStaticData[staticData->conversionType];
519         if(prototype!=NULL) {
520             if(staticData->name[0]==0) {
521                 uprv_strcpy((char *)staticData->name, prototype->name);
522             }
523 
524             if(staticData->codepage==0) {
525                 staticData->codepage=prototype->codepage;
526             }
527 
528             if(staticData->platform==0) {
529                 staticData->platform=prototype->platform;
530             }
531 
532             if(staticData->minBytesPerChar==0) {
533                 staticData->minBytesPerChar=prototype->minBytesPerChar;
534             }
535 
536             if(staticData->maxBytesPerChar==0) {
537                 staticData->maxBytesPerChar=prototype->maxBytesPerChar;
538             }
539 
540             if(staticData->subCharLen==0) {
541                 staticData->subCharLen=prototype->subCharLen;
542                 if(prototype->subCharLen>0) {
543                     uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
544                 }
545             }
546         }
547     }
548 
549     if(data->ucm->states.outputType<0) {
550         data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
551     }
552 
553     if( staticData->subChar1!=0 &&
554             (staticData->minBytesPerChar>1 ||
555                 (staticData->conversionType!=UCNV_MBCS &&
556                  staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
557     ) {
558         fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
559         *pErrorCode=U_INVALID_TABLE_FORMAT;
560     }
561 }
562 
563 /* return TRUE if a base table was read, FALSE for an extension table */
564 static UBool
readFile(ConvData * data,const char * converterName,UErrorCode * pErrorCode)565 readFile(ConvData *data, const char* converterName,
566          UErrorCode *pErrorCode) {
567     char line[1024];
568     char *end;
569     FileStream *convFile;
570 
571     UCMStates *baseStates;
572     UBool dataIsBase;
573 
574     if(U_FAILURE(*pErrorCode)) {
575         return FALSE;
576     }
577 
578     data->ucm=ucm_open();
579 
580     convFile=T_FileStream_open(converterName, "r");
581     if(convFile==NULL) {
582         *pErrorCode=U_FILE_ACCESS_ERROR;
583         return FALSE;
584     }
585 
586     readHeader(data, convFile, pErrorCode);
587     if(U_FAILURE(*pErrorCode)) {
588         return FALSE;
589     }
590 
591     if(data->ucm->baseName[0]==0) {
592         dataIsBase=TRUE;
593         baseStates=&data->ucm->states;
594         ucm_processStates(baseStates, IGNORE_SISO_CHECK);
595     } else {
596         dataIsBase=FALSE;
597         baseStates=NULL;
598     }
599 
600     /* read the base table */
601     ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
602     if(U_FAILURE(*pErrorCode)) {
603         return FALSE;
604     }
605 
606     /* read an extension table if there is one */
607     while(T_FileStream_readLine(convFile, line, sizeof(line))) {
608         end=uprv_strchr(line, 0);
609         while(line<end &&
610               (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
611             --end;
612         }
613         *end=0;
614 
615         if(line[0]=='#' || u_skipWhitespace(line)==end) {
616             continue; /* ignore empty and comment lines */
617         }
618 
619         if(0==uprv_strcmp(line, "CHARMAP")) {
620             /* read the extension table */
621             ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
622         } else {
623             fprintf(stderr, "unexpected text after the base mapping table\n");
624         }
625         break;
626     }
627 
628     T_FileStream_close(convFile);
629 
630     if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
631         fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
632         *pErrorCode=U_INVALID_TABLE_FORMAT;
633     }
634 
635     return dataIsBase;
636 }
637 
638 static void
createConverter(ConvData * data,const char * converterName,UErrorCode * pErrorCode)639 createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
640     ConvData baseData;
641     UBool dataIsBase;
642 
643     UConverterStaticData *staticData;
644     UCMStates *states, *baseStates;
645 
646     if(U_FAILURE(*pErrorCode)) {
647         return;
648     }
649 
650     initConvData(data);
651 
652     dataIsBase=readFile(data, converterName, pErrorCode);
653     if(U_FAILURE(*pErrorCode)) {
654         return;
655     }
656 
657     staticData=&data->staticData;
658     states=&data->ucm->states;
659 
660     if(dataIsBase) {
661         /*
662          * Build a normal .cnv file with a base table
663          * and an optional extension table.
664          */
665         data->cnvData=MBCSOpen(data->ucm);
666         if(data->cnvData==NULL) {
667             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
668 
669         } else if(!data->cnvData->isValid(data->cnvData,
670                             staticData->subChar, staticData->subCharLen)
671         ) {
672             fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
673             *pErrorCode=U_INVALID_TABLE_FORMAT;
674 
675         } else if(staticData->subChar1!=0 &&
676                     !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
677         ) {
678             fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
679             *pErrorCode=U_INVALID_TABLE_FORMAT;
680 
681         } else if(
682             data->ucm->ext->mappingsLength>0 &&
683             !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
684         ) {
685             *pErrorCode=U_INVALID_TABLE_FORMAT;
686         } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
687             /* sort the table so that it can be turned into UTF-8-friendly data */
688             ucm_sortTable(data->ucm->base);
689         }
690 
691         if(U_SUCCESS(*pErrorCode)) {
692             if(
693                 /* add the base table after ucm_checkBaseExt()! */
694                 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
695             ) {
696                 *pErrorCode=U_INVALID_TABLE_FORMAT;
697             } else {
698                 /*
699                  * addTable() may have requested moving more mappings to the extension table
700                  * if they fit into the base toUnicode table but not into the
701                  * base fromUnicode table.
702                  * (Especially for UTF-8-friendly fromUnicode tables.)
703                  * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
704                  * to be excluded from the extension toUnicode data.
705                  * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
706                  * the base fromUnicode table.
707                  */
708                 ucm_moveMappings(data->ucm->base, data->ucm->ext);
709                 ucm_sortTable(data->ucm->ext);
710                 if(data->ucm->ext->mappingsLength>0) {
711                     /* prepare the extension table, if there is one */
712                     data->extData=CnvExtOpen(data->ucm);
713                     if(data->extData==NULL) {
714                         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
715                     } else if(
716                         !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
717                     ) {
718                         *pErrorCode=U_INVALID_TABLE_FORMAT;
719                     }
720                 }
721             }
722         }
723     } else {
724         /* Build an extension-only .cnv file. */
725         char baseFilename[500];
726         char *basename;
727 
728         initConvData(&baseData);
729 
730         /* assemble a path/filename for data->ucm->baseName */
731         uprv_strcpy(baseFilename, converterName);
732         basename=(char *)findBasename(baseFilename);
733         uprv_strcpy(basename, data->ucm->baseName);
734         uprv_strcat(basename, ".ucm");
735 
736         /* read the base table */
737         dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
738         if(U_FAILURE(*pErrorCode)) {
739             return;
740         } else if(!dataIsBase) {
741             fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
742             *pErrorCode=U_INVALID_TABLE_FORMAT;
743         } else {
744             /* prepare the extension table */
745             data->extData=CnvExtOpen(data->ucm);
746             if(data->extData==NULL) {
747                 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
748             } else {
749                 /* fill in gaps in extension file header fields */
750                 UCMapping *m, *mLimit;
751                 uint8_t fallbackFlags;
752 
753                 baseStates=&baseData.ucm->states;
754                 if(states->conversionType==UCNV_DBCS) {
755                     staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
756                 } else if(states->minCharLength==0) {
757                     staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
758                 }
759                 if(states->maxCharLength<states->minCharLength) {
760                     staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
761                 }
762 
763                 if(staticData->subCharLen==0) {
764                     uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
765                     staticData->subCharLen=baseData.staticData.subCharLen;
766                 }
767                 /*
768                  * do not copy subChar1 -
769                  * only use what is explicitly specified
770                  * because it cannot be unset in the extension file header
771                  */
772 
773                 /* get the fallback flags */
774                 fallbackFlags=0;
775                 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
776                     m<mLimit && fallbackFlags!=3;
777                     ++m
778                 ) {
779                     if(m->f==1) {
780                         fallbackFlags|=1;
781                     } else if(m->f==3) {
782                         fallbackFlags|=2;
783                     }
784                 }
785 
786                 if(fallbackFlags&1) {
787                     staticData->hasFromUnicodeFallback=TRUE;
788                 }
789                 if(fallbackFlags&2) {
790                     staticData->hasToUnicodeFallback=TRUE;
791                 }
792 
793                 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
794                     fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
795                     *pErrorCode=U_INVALID_TABLE_FORMAT;
796 
797                 } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
798                     fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
799                     *pErrorCode=U_INVALID_TABLE_FORMAT;
800 
801                 } else if(
802                     !ucm_checkValidity(data->ucm->ext, baseStates) ||
803                     !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
804                 ) {
805                     *pErrorCode=U_INVALID_TABLE_FORMAT;
806                 } else {
807                     if(states->maxCharLength>1) {
808                         /*
809                          * When building a normal .cnv file with a base table
810                          * for an MBCS (not SBCS) table with explicit precision flags,
811                          * the MBCSAddTable() function marks some mappings for moving
812                          * to the extension table.
813                          * They fit into the base toUnicode table but not into the
814                          * base fromUnicode table.
815                          * (Note: We do have explicit precision flags because they are
816                          * required for extension table generation, and
817                          * ucm_checkBaseExt() verified it.)
818                          *
819                          * We do not call MBCSAddTable() here (we probably could)
820                          * so we need to do the analysis before building the extension table.
821                          * We assume that MBCSAddTable() will build a UTF-8-friendly table.
822                          * Redundant mappings in the extension table are ok except they cost some size.
823                          *
824                          * Do this after ucm_checkBaseExt().
825                          */
826                         const MBCSData *mbcsData=MBCSGetDummy();
827                         int32_t needsMove=0;
828                         for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
829                             m<mLimit;
830                             ++m
831                         ) {
832                             if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
833                                 m->f|=MBCS_FROM_U_EXT_FLAG;
834                                 m->moveFlag=UCM_MOVE_TO_EXT;
835                                 ++needsMove;
836                             }
837                         }
838 
839                         if(needsMove!=0) {
840                             ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
841                             ucm_sortTable(data->ucm->ext);
842                         }
843                     }
844                     if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
845                         *pErrorCode=U_INVALID_TABLE_FORMAT;
846                     }
847                 }
848             }
849         }
850 
851         cleanupConvData(&baseData);
852     }
853 }
854 
855 /*
856  * Hey, Emacs, please set the following:
857  *
858  * Local Variables:
859  * indent-tabs-mode: nil
860  * End:
861  *
862  */
863