1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 *   Copyright (C) 1999-2015, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 ******************************************************************************
10 *
11 *
12 *  ucnv_io.cpp:
13 *  initializes global variables and defines functions pertaining to converter
14 *  name resolution aspect of the conversion code.
15 *
16 *   new implementation:
17 *
18 *   created on: 1999nov22
19 *   created by: Markus W. Scherer
20 *
21 *   Use the binary cnvalias.icu (created from convrtrs.txt) to work
22 *   with aliases for converter names.
23 *
24 *   Date        Name        Description
25 *   11/22/1999  markus      Created
26 *   06/28/2002  grhoten     Major overhaul of the converter alias design.
27 *                           Now an alias can map to different converters
28 *                           depending on the specified standard.
29 *******************************************************************************
30 */
31 
32 #include "unicode/utypes.h"
33 
34 #if !UCONFIG_NO_CONVERSION
35 
36 #include "unicode/ucnv.h"
37 #include "unicode/udata.h"
38 
39 #include "umutex.h"
40 #include "uarrsort.h"
41 #include "uassert.h"
42 #include "udataswp.h"
43 #include "cstring.h"
44 #include "cmemory.h"
45 #include "ucnv_io.h"
46 #include "uenumimp.h"
47 #include "ucln_cmn.h"
48 
49 /* Format of cnvalias.icu -----------------------------------------------------
50  *
51  * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt.
52  * This binary form contains several tables. All indexes are to uint16_t
53  * units, and not to the bytes (uint8_t units). Addressing everything on
54  * 16-bit boundaries allows us to store more information with small index
55  * numbers, which are also 16-bit in size. The majority of the table (except
56  * the string table) are 16-bit numbers.
57  *
58  * First there is the size of the Table of Contents (TOC). The TOC
59  * entries contain the size of each section. In order to find the offset
60  * you just need to sum up the previous offsets.
61  * The TOC length and entries are an array of uint32_t values.
62  * The first section after the TOC starts immediately after the TOC.
63  *
64  * 1) This section contains a list of converters. This list contains indexes
65  * into the string table for the converter name. The index of this list is
66  * also used by other sections, which are mentioned later on.
67  * This list is not sorted.
68  *
69  * 2) This section contains a list of tags. This list contains indexes
70  * into the string table for the tag name. The index of this list is
71  * also used by other sections, which are mentioned later on.
72  * This list is in priority order of standards.
73  *
74  * 3) This section contains a list of sorted unique aliases. This
75  * list contains indexes into the string table for the alias name. The
76  * index of this list is also used by other sections, like the 4th section.
77  * The index for the 3rd and 4th section is used to get the
78  * alias -> converter name mapping. Section 3 and 4 form a two column table.
79  * Some of the most significant bits of each index may contain other
80  * information (see findConverter for details).
81  *
82  * 4) This section contains a list of mapped converter names. Consider this
83  * as a table that maps the 3rd section to the 1st section. This list contains
84  * indexes into the 1st section. The index of this list is the same index in
85  * the 3rd section. There is also some extra information in the high bits of
86  * each converter index in this table. Currently it's only used to say that
87  * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
88  * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
89  * the predigested form of the 5th section so that an alias lookup can be fast.
90  *
91  * 5) This section contains a 2D array with indexes to the 6th section. This
92  * section is the full form of all alias mappings. The column index is the
93  * index into the converter list (column header). The row index is the index
94  * to tag list (row header). This 2D array is the top part a 3D array. The
95  * third dimension is in the 6th section.
96  *
97  * 6) This is blob of variable length arrays. Each array starts with a size,
98  * and is followed by indexes to alias names in the string table. This is
99  * the third dimension to the section 5. No other section should be referencing
100  * this section.
101  *
102  * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its
103  * presence indicates that a section 9 exists. UConverterAliasOptions specifies
104  * what type of string normalization is used among other potential things in the
105  * future.
106  *
107  * 8) This is the string table. All strings are indexed on an even address.
108  * There are two reasons for this. First many chip architectures locate strings
109  * faster on even address boundaries. Second, since all indexes are 16-bit
110  * numbers, this string table can be 128KB in size instead of 64KB when we
111  * only have strings starting on an even address.
112  *
113  * 9) When present this is a set of prenormalized strings from section 8. This
114  * table contains normalized strings with the dashes and spaces stripped out,
115  * and all strings lowercased. In the future, the options in section 7 may state
116  * other types of normalization.
117  *
118  * Here is the concept of section 5 and 6. It's a 3D cube. Each tag
119  * has a unique alias among all converters. That same alias can
120  * be mentioned in other standards on different converters,
121  * but only one alias per tag can be unique.
122  *
123  *
124  *              Converter Names (Usually in TR22 form)
125  *           -------------------------------------------.
126  *     T    /                                          /|
127  *     a   /                                          / |
128  *     g  /                                          /  |
129  *     s /                                          /   |
130  *      /                                          /    |
131  *      ------------------------------------------/     |
132  *    A |                                         |     |
133  *    l |                                         |     |
134  *    i |                                         |    /
135  *    a |                                         |   /
136  *    s |                                         |  /
137  *    e |                                         | /
138  *    s |                                         |/
139  *      -------------------------------------------
140  *
141  *
142  *
143  * Here is what it really looks like. It's like swiss cheese.
144  * There are holes. Some converters aren't recognized by
145  * a standard, or they are really old converters that the
146  * standard doesn't recognize anymore.
147  *
148  *              Converter Names (Usually in TR22 form)
149  *           -------------------------------------------.
150  *     T    /##########################################/|
151  *     a   /     #            #                       /#
152  *     g  /  #      ##     ##     ### # ### ### ### #/
153  *     s / #             #####  ####        ##  ## #/#
154  *      / ### # # ##  #  #   #          ### # #   #/##
155  *      ------------------------------------------/# #
156  *    A |### # # ##  #  #   #          ### # #   #|# #
157  *    l |# # #    #     #               ## #     #|# #
158  *    i |# # #    #     #                #       #|#
159  *    a |#                                       #|#
160  *    s |                                        #|#
161  *    e
162  *    s
163  *
164  */
165 
166 /**
167  * Used by the UEnumeration API
168  */
169 typedef struct UAliasContext {
170     uint32_t listOffset;
171     uint32_t listIdx;
172 } UAliasContext;
173 
174 static const char DATA_NAME[] = "cnvalias";
175 static const char DATA_TYPE[] = "icu";
176 
177 static UDataMemory *gAliasData=NULL;
178 static icu::UInitOnce gAliasDataInitOnce = U_INITONCE_INITIALIZER;
179 
180 enum {
181     tocLengthIndex=0,
182     converterListIndex=1,
183     tagListIndex=2,
184     aliasListIndex=3,
185     untaggedConvArrayIndex=4,
186     taggedAliasArrayIndex=5,
187     taggedAliasListsIndex=6,
188     tableOptionsIndex=7,
189     stringTableIndex=8,
190     normalizedStringTableIndex=9,
191     offsetsCount,    /* length of the swapper's temporary offsets[] */
192     minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */
193 };
194 
195 static const UConverterAliasOptions defaultTableOptions = {
196     UCNV_IO_UNNORMALIZED,
197     0 /* containsCnvOptionInfo */
198 };
199 static UConverterAlias gMainTable;
200 
201 #define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx))
202 #define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx))
203 
204 static UBool U_CALLCONV
isAcceptable(void *,const char *,const char *,const UDataInfo * pInfo)205 isAcceptable(void * /*context*/,
206              const char * /*type*/, const char * /*name*/,
207              const UDataInfo *pInfo) {
208     return (UBool)(
209         pInfo->size>=20 &&
210         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
211         pInfo->charsetFamily==U_CHARSET_FAMILY &&
212         pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
213         pInfo->dataFormat[1]==0x76 &&
214         pInfo->dataFormat[2]==0x41 &&
215         pInfo->dataFormat[3]==0x6c &&
216         pInfo->formatVersion[0]==3);
217 }
218 
ucnv_io_cleanup(void)219 static UBool U_CALLCONV ucnv_io_cleanup(void)
220 {
221     if (gAliasData) {
222         udata_close(gAliasData);
223         gAliasData = NULL;
224     }
225     gAliasDataInitOnce.reset();
226 
227     uprv_memset(&gMainTable, 0, sizeof(gMainTable));
228 
229     return TRUE;                   /* Everything was cleaned up */
230 }
231 
initAliasData(UErrorCode & errCode)232 static void U_CALLCONV initAliasData(UErrorCode &errCode) {
233     UDataMemory *data;
234     const uint16_t *table;
235     const uint32_t *sectionSizes;
236     uint32_t tableStart;
237     uint32_t currOffset;
238 
239     ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup);
240 
241     U_ASSERT(gAliasData == NULL);
242     data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &errCode);
243     if(U_FAILURE(errCode)) {
244         return;
245     }
246 
247     sectionSizes = (const uint32_t *)udata_getMemory(data);
248     table = (const uint16_t *)sectionSizes;
249 
250     tableStart      = sectionSizes[0];
251     if (tableStart < minTocLength) {
252         errCode = U_INVALID_FORMAT_ERROR;
253         udata_close(data);
254         return;
255     }
256     gAliasData = data;
257 
258     gMainTable.converterListSize      = sectionSizes[1];
259     gMainTable.tagListSize            = sectionSizes[2];
260     gMainTable.aliasListSize          = sectionSizes[3];
261     gMainTable.untaggedConvArraySize  = sectionSizes[4];
262     gMainTable.taggedAliasArraySize   = sectionSizes[5];
263     gMainTable.taggedAliasListsSize   = sectionSizes[6];
264     gMainTable.optionTableSize        = sectionSizes[7];
265     gMainTable.stringTableSize        = sectionSizes[8];
266 
267     if (tableStart > 8) {
268         gMainTable.normalizedStringTableSize = sectionSizes[9];
269     }
270 
271     currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t));
272     gMainTable.converterList = table + currOffset;
273 
274     currOffset += gMainTable.converterListSize;
275     gMainTable.tagList = table + currOffset;
276 
277     currOffset += gMainTable.tagListSize;
278     gMainTable.aliasList = table + currOffset;
279 
280     currOffset += gMainTable.aliasListSize;
281     gMainTable.untaggedConvArray = table + currOffset;
282 
283     currOffset += gMainTable.untaggedConvArraySize;
284     gMainTable.taggedAliasArray = table + currOffset;
285 
286     /* aliasLists is a 1's based array, but it has a padding character */
287     currOffset += gMainTable.taggedAliasArraySize;
288     gMainTable.taggedAliasLists = table + currOffset;
289 
290     currOffset += gMainTable.taggedAliasListsSize;
291     if (gMainTable.optionTableSize > 0
292         && ((const UConverterAliasOptions *)(table + currOffset))->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT)
293     {
294         /* Faster table */
295         gMainTable.optionTable = (const UConverterAliasOptions *)(table + currOffset);
296     }
297     else {
298         /* Smaller table, or I can't handle this normalization mode!
299         Use the original slower table lookup. */
300         gMainTable.optionTable = &defaultTableOptions;
301     }
302 
303     currOffset += gMainTable.optionTableSize;
304     gMainTable.stringTable = table + currOffset;
305 
306     currOffset += gMainTable.stringTableSize;
307     gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED)
308         ? gMainTable.stringTable : (table + currOffset));
309 }
310 
311 
312 static UBool
haveAliasData(UErrorCode * pErrorCode)313 haveAliasData(UErrorCode *pErrorCode) {
314     umtx_initOnce(gAliasDataInitOnce, &initAliasData, *pErrorCode);
315     return U_SUCCESS(*pErrorCode);
316 }
317 
318 static inline UBool
isAlias(const char * alias,UErrorCode * pErrorCode)319 isAlias(const char *alias, UErrorCode *pErrorCode) {
320     if(alias==NULL) {
321         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
322         return FALSE;
323     }
324     return (UBool)(*alias!=0);
325 }
326 
getTagNumber(const char * tagname)327 static uint32_t getTagNumber(const char *tagname) {
328     if (gMainTable.tagList) {
329         uint32_t tagNum;
330         for (tagNum = 0; tagNum < gMainTable.tagListSize; tagNum++) {
331             if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) {
332                 return tagNum;
333             }
334         }
335     }
336 
337     return UINT32_MAX;
338 }
339 
340 /* character types relevant for ucnv_compareNames() */
341 enum {
342     UIGNORE,
343     ZERO,
344     NONZERO,
345     MINLETTER /* any values from here on are lowercase letter mappings */
346 };
347 
348 /* character types for ASCII 00..7F */
349 static const uint8_t asciiTypes[128] = {
350     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
351     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
352     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
353     ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0,
354     0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
355     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0,
356     0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
357     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0
358 };
359 
360 #define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)UIGNORE)
361 
362 /* character types for EBCDIC 80..FF */
363 static const uint8_t ebcdicTypes[128] = {
364     0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
365     0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
366     0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
367     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
368     0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
369     0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
370     0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
371     ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0
372 };
373 
374 #define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)UIGNORE)
375 
376 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
377 #   define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c)
378 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
379 #   define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c)
380 #else
381 #   error U_CHARSET_FAMILY is not valid
382 #endif
383 
384 
385 /* @see ucnv_compareNames */
386 U_CAPI char * U_CALLCONV
ucnv_io_stripASCIIForCompare(char * dst,const char * name)387 ucnv_io_stripASCIIForCompare(char *dst, const char *name) {
388     char *dstItr = dst;
389     uint8_t type, nextType;
390     char c1;
391     UBool afterDigit = FALSE;
392 
393     while ((c1 = *name++) != 0) {
394         type = GET_ASCII_TYPE(c1);
395         switch (type) {
396         case UIGNORE:
397             afterDigit = FALSE;
398             continue; /* ignore all but letters and digits */
399         case ZERO:
400             if (!afterDigit) {
401                 nextType = GET_ASCII_TYPE(*name);
402                 if (nextType == ZERO || nextType == NONZERO) {
403                     continue; /* ignore leading zero before another digit */
404                 }
405             }
406             break;
407         case NONZERO:
408             afterDigit = TRUE;
409             break;
410         default:
411             c1 = (char)type; /* lowercased letter */
412             afterDigit = FALSE;
413             break;
414         }
415         *dstItr++ = c1;
416     }
417     *dstItr = 0;
418     return dst;
419 }
420 
421 U_CAPI char * U_CALLCONV
ucnv_io_stripEBCDICForCompare(char * dst,const char * name)422 ucnv_io_stripEBCDICForCompare(char *dst, const char *name) {
423     char *dstItr = dst;
424     uint8_t type, nextType;
425     char c1;
426     UBool afterDigit = FALSE;
427 
428     while ((c1 = *name++) != 0) {
429         type = GET_EBCDIC_TYPE(c1);
430         switch (type) {
431         case UIGNORE:
432             afterDigit = FALSE;
433             continue; /* ignore all but letters and digits */
434         case ZERO:
435             if (!afterDigit) {
436                 nextType = GET_EBCDIC_TYPE(*name);
437                 if (nextType == ZERO || nextType == NONZERO) {
438                     continue; /* ignore leading zero before another digit */
439                 }
440             }
441             break;
442         case NONZERO:
443             afterDigit = TRUE;
444             break;
445         default:
446             c1 = (char)type; /* lowercased letter */
447             afterDigit = FALSE;
448             break;
449         }
450         *dstItr++ = c1;
451     }
452     *dstItr = 0;
453     return dst;
454 }
455 
456 /**
457  * Do a fuzzy compare of two converter/alias names.
458  * The comparison is case-insensitive, ignores leading zeroes if they are not
459  * followed by further digits, and ignores all but letters and digits.
460  * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
461  * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
462  * at http://www.unicode.org/reports/tr22/
463  *
464  * This is a symmetrical (commutative) operation; order of arguments
465  * is insignificant.  This is an important property for sorting the
466  * list (when the list is preprocessed into binary form) and for
467  * performing binary searches on it at run time.
468  *
469  * @param name1 a converter name or alias, zero-terminated
470  * @param name2 a converter name or alias, zero-terminated
471  * @return 0 if the names match, or a negative value if the name1
472  * lexically precedes name2, or a positive value if the name1
473  * lexically follows name2.
474  *
475  * @see ucnv_io_stripForCompare
476  */
477 U_CAPI int U_EXPORT2
ucnv_compareNames(const char * name1,const char * name2)478 ucnv_compareNames(const char *name1, const char *name2) {
479     int rc;
480     uint8_t type, nextType;
481     char c1, c2;
482     UBool afterDigit1 = FALSE, afterDigit2 = FALSE;
483 
484     for (;;) {
485         while ((c1 = *name1++) != 0) {
486             type = GET_CHAR_TYPE(c1);
487             switch (type) {
488             case UIGNORE:
489                 afterDigit1 = FALSE;
490                 continue; /* ignore all but letters and digits */
491             case ZERO:
492                 if (!afterDigit1) {
493                     nextType = GET_CHAR_TYPE(*name1);
494                     if (nextType == ZERO || nextType == NONZERO) {
495                         continue; /* ignore leading zero before another digit */
496                     }
497                 }
498                 break;
499             case NONZERO:
500                 afterDigit1 = TRUE;
501                 break;
502             default:
503                 c1 = (char)type; /* lowercased letter */
504                 afterDigit1 = FALSE;
505                 break;
506             }
507             break; /* deliver c1 */
508         }
509         while ((c2 = *name2++) != 0) {
510             type = GET_CHAR_TYPE(c2);
511             switch (type) {
512             case UIGNORE:
513                 afterDigit2 = FALSE;
514                 continue; /* ignore all but letters and digits */
515             case ZERO:
516                 if (!afterDigit2) {
517                     nextType = GET_CHAR_TYPE(*name2);
518                     if (nextType == ZERO || nextType == NONZERO) {
519                         continue; /* ignore leading zero before another digit */
520                     }
521                 }
522                 break;
523             case NONZERO:
524                 afterDigit2 = TRUE;
525                 break;
526             default:
527                 c2 = (char)type; /* lowercased letter */
528                 afterDigit2 = FALSE;
529                 break;
530             }
531             break; /* deliver c2 */
532         }
533 
534         /* If we reach the ends of both strings then they match */
535         if ((c1|c2)==0) {
536             return 0;
537         }
538 
539         /* Case-insensitive comparison */
540         rc = (int)(unsigned char)c1 - (int)(unsigned char)c2;
541         if (rc != 0) {
542             return rc;
543         }
544     }
545 }
546 
547 /*
548  * search for an alias
549  * return the converter number index for gConverterList
550  */
551 static inline uint32_t
findConverter(const char * alias,UBool * containsOption,UErrorCode * pErrorCode)552 findConverter(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
553     uint32_t mid, start, limit;
554     uint32_t lastMid;
555     int result;
556     int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED);
557     char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH];
558 
559     if (!isUnnormalized) {
560         if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) {
561             *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
562             return UINT32_MAX;
563         }
564 
565         /* Lower case and remove ignoreable characters. */
566         ucnv_io_stripForCompare(strippedName, alias);
567         alias = strippedName;
568     }
569 
570     /* do a binary search for the alias */
571     start = 0;
572     limit = gMainTable.untaggedConvArraySize;
573     mid = limit;
574     lastMid = UINT32_MAX;
575 
576     for (;;) {
577         mid = (uint32_t)((start + limit) / 2);
578         if (lastMid == mid) {   /* Have we moved? */
579             break;  /* We haven't moved, and it wasn't found. */
580         }
581         lastMid = mid;
582         if (isUnnormalized) {
583             result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid]));
584         }
585         else {
586             result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid]));
587         }
588 
589         if (result < 0) {
590             limit = mid;
591         } else if (result > 0) {
592             start = mid;
593         } else {
594             /* Since the gencnval tool folds duplicates into one entry,
595              * this alias in gAliasList is unique, but different standards
596              * may map an alias to different converters.
597              */
598             if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) {
599                 *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING;
600             }
601             /* State whether the canonical converter name contains an option.
602             This information is contained in this list in order to maintain backward & forward compatibility. */
603             if (containsOption) {
604                 UBool containsCnvOptionInfo = (UBool)gMainTable.optionTable->containsCnvOptionInfo;
605                 *containsOption = (UBool)((containsCnvOptionInfo
606                     && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0))
607                     || !containsCnvOptionInfo);
608             }
609             return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK;
610         }
611     }
612 
613     return UINT32_MAX;
614 }
615 
616 /*
617  * Is this alias in this list?
618  * alias and listOffset should be non-NULL.
619  */
620 static inline UBool
isAliasInList(const char * alias,uint32_t listOffset)621 isAliasInList(const char *alias, uint32_t listOffset) {
622     if (listOffset) {
623         uint32_t currAlias;
624         uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
625         /* +1 to skip listCount */
626         const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
627         for (currAlias = 0; currAlias < listCount; currAlias++) {
628             if (currList[currAlias]
629                 && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0)
630             {
631                 return TRUE;
632             }
633         }
634     }
635     return FALSE;
636 }
637 
638 /*
639  * Search for an standard name of an alias (what is the default name
640  * that this standard uses?)
641  * return the listOffset for gTaggedAliasLists. If it's 0,
642  * the it couldn't be found, but the parameters are valid.
643  */
644 static uint32_t
findTaggedAliasListsOffset(const char * alias,const char * standard,UErrorCode * pErrorCode)645 findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) {
646     uint32_t idx;
647     uint32_t listOffset;
648     uint32_t convNum;
649     UErrorCode myErr = U_ZERO_ERROR;
650     uint32_t tagNum = getTagNumber(standard);
651 
652     /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
653     convNum = findConverter(alias, NULL, &myErr);
654     if (myErr != U_ZERO_ERROR) {
655         *pErrorCode = myErr;
656     }
657 
658     if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
659         listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
660         if (listOffset && gMainTable.taggedAliasLists[listOffset + 1]) {
661             return listOffset;
662         }
663         if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
664             /* Uh Oh! They used an ambiguous alias.
665                We have to search the whole swiss cheese starting
666                at the highest standard affinity.
667                This may take a while.
668             */
669             for (idx = 0; idx < gMainTable.taggedAliasArraySize; idx++) {
670                 listOffset = gMainTable.taggedAliasArray[idx];
671                 if (listOffset && isAliasInList(alias, listOffset)) {
672                     uint32_t currTagNum = idx/gMainTable.converterListSize;
673                     uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize);
674                     uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum];
675                     if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + 1]) {
676                         return tempListOffset;
677                     }
678                     /* else keep on looking */
679                     /* We could speed this up by starting on the next row
680                        because an alias is unique per row, right now.
681                        This would change if alias versioning appears. */
682                 }
683             }
684             /* The standard doesn't know about the alias */
685         }
686         /* else no default name */
687         return 0;
688     }
689     /* else converter or tag not found */
690 
691     return UINT32_MAX;
692 }
693 
694 /* Return the canonical name */
695 static uint32_t
findTaggedConverterNum(const char * alias,const char * standard,UErrorCode * pErrorCode)696 findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) {
697     uint32_t idx;
698     uint32_t listOffset;
699     uint32_t convNum;
700     UErrorCode myErr = U_ZERO_ERROR;
701     uint32_t tagNum = getTagNumber(standard);
702 
703     /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
704     convNum = findConverter(alias, NULL, &myErr);
705     if (myErr != U_ZERO_ERROR) {
706         *pErrorCode = myErr;
707     }
708 
709     if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
710         listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
711         if (listOffset && isAliasInList(alias, listOffset)) {
712             return convNum;
713         }
714         if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
715             /* Uh Oh! They used an ambiguous alias.
716                We have to search one slice of the swiss cheese.
717                We search only in the requested tag, not the whole thing.
718                This may take a while.
719             */
720             uint32_t convStart = (tagNum)*gMainTable.converterListSize;
721             uint32_t convLimit = (tagNum+1)*gMainTable.converterListSize;
722             for (idx = convStart; idx < convLimit; idx++) {
723                 listOffset = gMainTable.taggedAliasArray[idx];
724                 if (listOffset && isAliasInList(alias, listOffset)) {
725                     return idx-convStart;
726                 }
727             }
728             /* The standard doesn't know about the alias */
729         }
730         /* else no canonical name */
731     }
732     /* else converter or tag not found */
733 
734     return UINT32_MAX;
735 }
736 
737 U_CAPI const char *
ucnv_io_getConverterName(const char * alias,UBool * containsOption,UErrorCode * pErrorCode)738 ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
739     const char *aliasTmp = alias;
740     int32_t i = 0;
741     for (i = 0; i < 2; i++) {
742         if (i == 1) {
743             /*
744              * After the first unsuccess converter lookup, check to see if
745              * the name begins with 'x-'. If it does, strip it off and try
746              * again.  This behaviour is similar to how ICU4J does it.
747              */
748             if (aliasTmp[0] == 'x' && aliasTmp[1] == '-') {
749                 aliasTmp = aliasTmp+2;
750             } else {
751                 break;
752             }
753         }
754         if(haveAliasData(pErrorCode) && isAlias(aliasTmp, pErrorCode)) {
755             uint32_t convNum = findConverter(aliasTmp, containsOption, pErrorCode);
756             if (convNum < gMainTable.converterListSize) {
757                 return GET_STRING(gMainTable.converterList[convNum]);
758             }
759             /* else converter not found */
760         } else {
761             break;
762         }
763     }
764 
765     return NULL;
766 }
767 
768 U_CDECL_BEGIN
769 
770 
771 static int32_t U_CALLCONV
ucnv_io_countStandardAliases(UEnumeration * enumerator,UErrorCode *)772 ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
773     int32_t value = 0;
774     UAliasContext *myContext = (UAliasContext *)(enumerator->context);
775     uint32_t listOffset = myContext->listOffset;
776 
777     if (listOffset) {
778         value = gMainTable.taggedAliasLists[listOffset];
779     }
780     return value;
781 }
782 
783 static const char * U_CALLCONV
ucnv_io_nextStandardAliases(UEnumeration * enumerator,int32_t * resultLength,UErrorCode *)784 ucnv_io_nextStandardAliases(UEnumeration *enumerator,
785                             int32_t* resultLength,
786                             UErrorCode * /*pErrorCode*/)
787 {
788     UAliasContext *myContext = (UAliasContext *)(enumerator->context);
789     uint32_t listOffset = myContext->listOffset;
790 
791     if (listOffset) {
792         uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
793         const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
794 
795         if (myContext->listIdx < listCount) {
796             const char *myStr = GET_STRING(currList[myContext->listIdx++]);
797             if (resultLength) {
798                 *resultLength = (int32_t)uprv_strlen(myStr);
799             }
800             return myStr;
801         }
802     }
803     /* Either we accessed a zero length list, or we enumerated too far. */
804     if (resultLength) {
805         *resultLength = 0;
806     }
807     return NULL;
808 }
809 
810 static void U_CALLCONV
ucnv_io_resetStandardAliases(UEnumeration * enumerator,UErrorCode *)811 ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
812     ((UAliasContext *)(enumerator->context))->listIdx = 0;
813 }
814 
815 static void U_CALLCONV
ucnv_io_closeUEnumeration(UEnumeration * enumerator)816 ucnv_io_closeUEnumeration(UEnumeration *enumerator) {
817     uprv_free(enumerator->context);
818     uprv_free(enumerator);
819 }
820 
821 U_CDECL_END
822 
823 /* Enumerate the aliases for the specified converter and standard tag */
824 static const UEnumeration gEnumAliases = {
825     NULL,
826     NULL,
827     ucnv_io_closeUEnumeration,
828     ucnv_io_countStandardAliases,
829     uenum_unextDefault,
830     ucnv_io_nextStandardAliases,
831     ucnv_io_resetStandardAliases
832 };
833 
834 U_CAPI UEnumeration * U_EXPORT2
ucnv_openStandardNames(const char * convName,const char * standard,UErrorCode * pErrorCode)835 ucnv_openStandardNames(const char *convName,
836                        const char *standard,
837                        UErrorCode *pErrorCode)
838 {
839     UEnumeration *myEnum = NULL;
840     if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) {
841         uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode);
842 
843         /* When listOffset == 0, we want to acknowledge that the
844            converter name and standard are okay, but there
845            is nothing to enumerate. */
846         if (listOffset < gMainTable.taggedAliasListsSize) {
847             UAliasContext *myContext;
848 
849             myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
850             if (myEnum == NULL) {
851                 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
852                 return NULL;
853             }
854             uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration));
855             myContext = static_cast<UAliasContext *>(uprv_malloc(sizeof(UAliasContext)));
856             if (myContext == NULL) {
857                 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
858                 uprv_free(myEnum);
859                 return NULL;
860             }
861             myContext->listOffset = listOffset;
862             myContext->listIdx = 0;
863             myEnum->context = myContext;
864         }
865         /* else converter or tag not found */
866     }
867     return myEnum;
868 }
869 
870 static uint16_t
ucnv_io_countAliases(const char * alias,UErrorCode * pErrorCode)871 ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) {
872     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
873         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
874         if (convNum < gMainTable.converterListSize) {
875             /* tagListNum - 1 is the ALL tag */
876             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
877 
878             if (listOffset) {
879                 return gMainTable.taggedAliasLists[listOffset];
880             }
881             /* else this shouldn't happen. internal program error */
882         }
883         /* else converter not found */
884     }
885     return 0;
886 }
887 
888 static uint16_t
ucnv_io_getAliases(const char * alias,uint16_t start,const char ** aliases,UErrorCode * pErrorCode)889 ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) {
890     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
891         uint32_t currAlias;
892         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
893         if (convNum < gMainTable.converterListSize) {
894             /* tagListNum - 1 is the ALL tag */
895             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
896 
897             if (listOffset) {
898                 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
899                 /* +1 to skip listCount */
900                 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
901 
902                 for (currAlias = start; currAlias < listCount; currAlias++) {
903                     aliases[currAlias] = GET_STRING(currList[currAlias]);
904                 }
905             }
906             /* else this shouldn't happen. internal program error */
907         }
908         /* else converter not found */
909     }
910     return 0;
911 }
912 
913 static const char *
ucnv_io_getAlias(const char * alias,uint16_t n,UErrorCode * pErrorCode)914 ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
915     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
916         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
917         if (convNum < gMainTable.converterListSize) {
918             /* tagListNum - 1 is the ALL tag */
919             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
920 
921             if (listOffset) {
922                 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
923                 /* +1 to skip listCount */
924                 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
925 
926                 if (n < listCount)  {
927                     return GET_STRING(currList[n]);
928                 }
929                 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
930             }
931             /* else this shouldn't happen. internal program error */
932         }
933         /* else converter not found */
934     }
935     return NULL;
936 }
937 
938 static uint16_t
ucnv_io_countStandards(UErrorCode * pErrorCode)939 ucnv_io_countStandards(UErrorCode *pErrorCode) {
940     if (haveAliasData(pErrorCode)) {
941         /* Don't include the empty list */
942         return (uint16_t)(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS);
943     }
944 
945     return 0;
946 }
947 
948 U_CAPI const char * U_EXPORT2
ucnv_getStandard(uint16_t n,UErrorCode * pErrorCode)949 ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
950     if (haveAliasData(pErrorCode)) {
951         if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) {
952             return GET_STRING(gMainTable.tagList[n]);
953         }
954         *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
955     }
956 
957     return NULL;
958 }
959 
960 U_CAPI const char * U_EXPORT2
ucnv_getStandardName(const char * alias,const char * standard,UErrorCode * pErrorCode)961 ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
962     if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
963         uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode);
964 
965         if (0 < listOffset && listOffset < gMainTable.taggedAliasListsSize) {
966             const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
967 
968             /* Get the preferred name from this list */
969             if (currList[0]) {
970                 return GET_STRING(currList[0]);
971             }
972             /* else someone screwed up the alias table. */
973             /* *pErrorCode = U_INVALID_FORMAT_ERROR */
974         }
975     }
976 
977     return NULL;
978 }
979 
980 U_CAPI uint16_t U_EXPORT2
ucnv_countAliases(const char * alias,UErrorCode * pErrorCode)981 ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
982 {
983     return ucnv_io_countAliases(alias, pErrorCode);
984 }
985 
986 
987 U_CAPI const char* U_EXPORT2
ucnv_getAlias(const char * alias,uint16_t n,UErrorCode * pErrorCode)988 ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
989 {
990     return ucnv_io_getAlias(alias, n, pErrorCode);
991 }
992 
993 U_CAPI void U_EXPORT2
ucnv_getAliases(const char * alias,const char ** aliases,UErrorCode * pErrorCode)994 ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
995 {
996     ucnv_io_getAliases(alias, 0, aliases, pErrorCode);
997 }
998 
999 U_CAPI uint16_t U_EXPORT2
ucnv_countStandards(void)1000 ucnv_countStandards(void)
1001 {
1002     UErrorCode err = U_ZERO_ERROR;
1003     return ucnv_io_countStandards(&err);
1004 }
1005 
1006 U_CAPI const char * U_EXPORT2
ucnv_getCanonicalName(const char * alias,const char * standard,UErrorCode * pErrorCode)1007 ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
1008     if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
1009         uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode);
1010 
1011         if (convNum < gMainTable.converterListSize) {
1012             return GET_STRING(gMainTable.converterList[convNum]);
1013         }
1014     }
1015 
1016     return NULL;
1017 }
1018 
1019 U_CDECL_BEGIN
1020 
1021 
1022 static int32_t U_CALLCONV
ucnv_io_countAllConverters(UEnumeration *,UErrorCode *)1023 ucnv_io_countAllConverters(UEnumeration * /*enumerator*/, UErrorCode * /*pErrorCode*/) {
1024     return gMainTable.converterListSize;
1025 }
1026 
1027 static const char * U_CALLCONV
ucnv_io_nextAllConverters(UEnumeration * enumerator,int32_t * resultLength,UErrorCode *)1028 ucnv_io_nextAllConverters(UEnumeration *enumerator,
1029                             int32_t* resultLength,
1030                             UErrorCode * /*pErrorCode*/)
1031 {
1032     uint16_t *myContext = (uint16_t *)(enumerator->context);
1033 
1034     if (*myContext < gMainTable.converterListSize) {
1035         const char *myStr = GET_STRING(gMainTable.converterList[(*myContext)++]);
1036         if (resultLength) {
1037             *resultLength = (int32_t)uprv_strlen(myStr);
1038         }
1039         return myStr;
1040     }
1041     /* Either we accessed a zero length list, or we enumerated too far. */
1042     if (resultLength) {
1043         *resultLength = 0;
1044     }
1045     return NULL;
1046 }
1047 
1048 static void U_CALLCONV
ucnv_io_resetAllConverters(UEnumeration * enumerator,UErrorCode *)1049 ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
1050     *((uint16_t *)(enumerator->context)) = 0;
1051 }
1052 U_CDECL_END
1053 static const UEnumeration gEnumAllConverters = {
1054     NULL,
1055     NULL,
1056     ucnv_io_closeUEnumeration,
1057     ucnv_io_countAllConverters,
1058     uenum_unextDefault,
1059     ucnv_io_nextAllConverters,
1060     ucnv_io_resetAllConverters
1061 };
1062 
1063 U_CAPI UEnumeration * U_EXPORT2
ucnv_openAllNames(UErrorCode * pErrorCode)1064 ucnv_openAllNames(UErrorCode *pErrorCode) {
1065     UEnumeration *myEnum = NULL;
1066     if (haveAliasData(pErrorCode)) {
1067         uint16_t *myContext;
1068 
1069         myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
1070         if (myEnum == NULL) {
1071             *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1072             return NULL;
1073         }
1074         uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration));
1075         myContext = static_cast<uint16_t *>(uprv_malloc(sizeof(uint16_t)));
1076         if (myContext == NULL) {
1077             *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1078             uprv_free(myEnum);
1079             return NULL;
1080         }
1081         *myContext = 0;
1082         myEnum->context = myContext;
1083     }
1084     return myEnum;
1085 }
1086 
1087 U_CAPI uint16_t
ucnv_io_countKnownConverters(UErrorCode * pErrorCode)1088 ucnv_io_countKnownConverters(UErrorCode *pErrorCode) {
1089     if (haveAliasData(pErrorCode)) {
1090         return (uint16_t)gMainTable.converterListSize;
1091     }
1092     return 0;
1093 }
1094 
1095 /* alias table swapping ----------------------------------------------------- */
1096 
1097 U_CDECL_BEGIN
1098 
1099 typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name);
1100 U_CDECL_END
1101 
1102 
1103 /*
1104  * row of a temporary array
1105  *
1106  * gets platform-endian charset string indexes and sorting indexes;
1107  * after sorting this array by strings, the actual arrays are permutated
1108  * according to the sorting indexes
1109  */
1110 typedef struct TempRow {
1111     uint16_t strIndex, sortIndex;
1112 } TempRow;
1113 
1114 typedef struct TempAliasTable {
1115     const char *chars;
1116     TempRow *rows;
1117     uint16_t *resort;
1118     StripForCompareFn *stripForCompare;
1119 } TempAliasTable;
1120 
1121 enum {
1122     STACK_ROW_CAPACITY=500
1123 };
1124 
1125 static int32_t U_CALLCONV
io_compareRows(const void * context,const void * left,const void * right)1126 io_compareRows(const void *context, const void *left, const void *right) {
1127     char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH],
1128          strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH];
1129 
1130     TempAliasTable *tempTable=(TempAliasTable *)context;
1131     const char *chars=tempTable->chars;
1132 
1133     return (int32_t)uprv_strcmp(tempTable->stripForCompare(strippedLeft, chars+2*((const TempRow *)left)->strIndex),
1134                                 tempTable->stripForCompare(strippedRight, chars+2*((const TempRow *)right)->strIndex));
1135 }
1136 
1137 U_CAPI int32_t U_EXPORT2
ucnv_swapAliases(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)1138 ucnv_swapAliases(const UDataSwapper *ds,
1139                  const void *inData, int32_t length, void *outData,
1140                  UErrorCode *pErrorCode) {
1141     const UDataInfo *pInfo;
1142     int32_t headerSize;
1143 
1144     const uint16_t *inTable;
1145     const uint32_t *inSectionSizes;
1146     uint32_t toc[offsetsCount];
1147     uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */
1148     uint32_t i, count, tocLength, topOffset;
1149 
1150     TempRow rows[STACK_ROW_CAPACITY];
1151     uint16_t resort[STACK_ROW_CAPACITY];
1152     TempAliasTable tempTable;
1153 
1154     /* udata_swapDataHeader checks the arguments */
1155     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
1156     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1157         return 0;
1158     }
1159 
1160     /* check data format and format version */
1161     pInfo=(const UDataInfo *)((const char *)inData+4);
1162     if(!(
1163         pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
1164         pInfo->dataFormat[1]==0x76 &&
1165         pInfo->dataFormat[2]==0x41 &&
1166         pInfo->dataFormat[3]==0x6c &&
1167         pInfo->formatVersion[0]==3
1168     )) {
1169         udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n",
1170                          pInfo->dataFormat[0], pInfo->dataFormat[1],
1171                          pInfo->dataFormat[2], pInfo->dataFormat[3],
1172                          pInfo->formatVersion[0]);
1173         *pErrorCode=U_UNSUPPORTED_ERROR;
1174         return 0;
1175     }
1176 
1177     /* an alias table must contain at least the table of contents array */
1178     if(length>=0 && (length-headerSize)<4*(1+minTocLength)) {
1179         udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1180                          length-headerSize);
1181         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1182         return 0;
1183     }
1184 
1185     inSectionSizes=(const uint32_t *)((const char *)inData+headerSize);
1186     inTable=(const uint16_t *)inSectionSizes;
1187     uprv_memset(toc, 0, sizeof(toc));
1188     toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]);
1189     if(tocLength<minTocLength || offsetsCount<=tocLength) {
1190         udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength);
1191         *pErrorCode=U_INVALID_FORMAT_ERROR;
1192         return 0;
1193     }
1194 
1195     /* read the known part of the table of contents */
1196     for(i=converterListIndex; i<=tocLength; ++i) {
1197         toc[i]=ds->readUInt32(inSectionSizes[i]);
1198     }
1199 
1200     /* compute offsets */
1201     uprv_memset(offsets, 0, sizeof(offsets));
1202     offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */
1203     for(i=tagListIndex; i<=tocLength; ++i) {
1204         offsets[i]=offsets[i-1]+toc[i-1];
1205     }
1206 
1207     /* compute the overall size of the after-header data, in numbers of 16-bit units */
1208     topOffset=offsets[i-1]+toc[i-1];
1209 
1210     if(length>=0) {
1211         uint16_t *outTable;
1212         const uint16_t *p, *p2;
1213         uint16_t *q, *q2;
1214         uint16_t oldIndex;
1215 
1216         if((length-headerSize)<(2*(int32_t)topOffset)) {
1217             udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1218                              length-headerSize);
1219             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1220             return 0;
1221         }
1222 
1223         outTable=(uint16_t *)((char *)outData+headerSize);
1224 
1225         /* swap the entire table of contents */
1226         ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode);
1227 
1228         /* swap unormalized strings & normalized strings */
1229         ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]),
1230                              outTable+offsets[stringTableIndex], pErrorCode);
1231         if(U_FAILURE(*pErrorCode)) {
1232             udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n");
1233             return 0;
1234         }
1235 
1236         if(ds->inCharset==ds->outCharset) {
1237             /* no need to sort, just swap all 16-bit values together */
1238             ds->swapArray16(ds,
1239                             inTable+offsets[converterListIndex],
1240                             2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]),
1241                             outTable+offsets[converterListIndex],
1242                             pErrorCode);
1243         } else {
1244             /* allocate the temporary table for sorting */
1245             count=toc[aliasListIndex];
1246 
1247             tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */
1248 
1249             if(count<=STACK_ROW_CAPACITY) {
1250                 tempTable.rows=rows;
1251                 tempTable.resort=resort;
1252             } else {
1253                 tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2);
1254                 if(tempTable.rows==NULL) {
1255                     udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n",
1256                                      count);
1257                     *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1258                     return 0;
1259                 }
1260                 tempTable.resort=(uint16_t *)(tempTable.rows+count);
1261             }
1262 
1263             if(ds->outCharset==U_ASCII_FAMILY) {
1264                 tempTable.stripForCompare=ucnv_io_stripASCIIForCompare;
1265             } else /* U_EBCDIC_FAMILY */ {
1266                 tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare;
1267             }
1268 
1269             /*
1270              * Sort unique aliases+mapped names.
1271              *
1272              * We need to sort the list again by outCharset strings because they
1273              * sort differently for different charset families.
1274              * First we set up a temporary table with the string indexes and
1275              * sorting indexes and sort that.
1276              * Then we permutate and copy/swap the actual values.
1277              */
1278             p=inTable+offsets[aliasListIndex];
1279             q=outTable+offsets[aliasListIndex];
1280 
1281             p2=inTable+offsets[untaggedConvArrayIndex];
1282             q2=outTable+offsets[untaggedConvArrayIndex];
1283 
1284             for(i=0; i<count; ++i) {
1285                 tempTable.rows[i].strIndex=ds->readUInt16(p[i]);
1286                 tempTable.rows[i].sortIndex=(uint16_t)i;
1287             }
1288 
1289             uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow),
1290                            io_compareRows, &tempTable,
1291                            FALSE, pErrorCode);
1292 
1293             if(U_SUCCESS(*pErrorCode)) {
1294                 /* copy/swap/permutate items */
1295                 if(p!=q) {
1296                     for(i=0; i<count; ++i) {
1297                         oldIndex=tempTable.rows[i].sortIndex;
1298                         ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode);
1299                         ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode);
1300                     }
1301                 } else {
1302                     /*
1303                      * If we swap in-place, then the permutation must use another
1304                      * temporary array (tempTable.resort)
1305                      * before the results are copied to the outBundle.
1306                      */
1307                     uint16_t *r=tempTable.resort;
1308 
1309                     for(i=0; i<count; ++i) {
1310                         oldIndex=tempTable.rows[i].sortIndex;
1311                         ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode);
1312                     }
1313                     uprv_memcpy(q, r, 2*(size_t)count);
1314 
1315                     for(i=0; i<count; ++i) {
1316                         oldIndex=tempTable.rows[i].sortIndex;
1317                         ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode);
1318                     }
1319                     uprv_memcpy(q2, r, 2*(size_t)count);
1320                 }
1321             }
1322 
1323             if(tempTable.rows!=rows) {
1324                 uprv_free(tempTable.rows);
1325             }
1326 
1327             if(U_FAILURE(*pErrorCode)) {
1328                 udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n",
1329                                  count);
1330                 return 0;
1331             }
1332 
1333             /* swap remaining 16-bit values */
1334             ds->swapArray16(ds,
1335                             inTable+offsets[converterListIndex],
1336                             2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]),
1337                             outTable+offsets[converterListIndex],
1338                             pErrorCode);
1339             ds->swapArray16(ds,
1340                             inTable+offsets[taggedAliasArrayIndex],
1341                             2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]),
1342                             outTable+offsets[taggedAliasArrayIndex],
1343                             pErrorCode);
1344         }
1345     }
1346 
1347     return headerSize+2*(int32_t)topOffset;
1348 }
1349 
1350 #endif
1351 
1352 
1353 /*
1354  * Hey, Emacs, please set the following:
1355  *
1356  * Local Variables:
1357  * indent-tabs-mode: nil
1358  * End:
1359  *
1360  */
1361