1 /*
2 ******************************************************************************
3 *
4 *   Copyright (C) 1999-2015, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 ******************************************************************************
8 *
9 *
10 *  ucnv_io.cpp:
11 *  initializes global variables and defines functions pertaining to converter
12 *  name resolution aspect of the conversion code.
13 *
14 *   new implementation:
15 *
16 *   created on: 1999nov22
17 *   created by: Markus W. Scherer
18 *
19 *   Use the binary cnvalias.icu (created from convrtrs.txt) to work
20 *   with aliases for converter names.
21 *
22 *   Date        Name        Description
23 *   11/22/1999  markus      Created
24 *   06/28/2002  grhoten     Major overhaul of the converter alias design.
25 *                           Now an alias can map to different converters
26 *                           depending on the specified standard.
27 *******************************************************************************
28 */
29 
30 #include "unicode/utypes.h"
31 
32 #if !UCONFIG_NO_CONVERSION
33 
34 #include "unicode/ucnv.h"
35 #include "unicode/udata.h"
36 
37 #include "umutex.h"
38 #include "uarrsort.h"
39 #include "uassert.h"
40 #include "udataswp.h"
41 #include "cstring.h"
42 #include "cmemory.h"
43 #include "ucnv_io.h"
44 #include "uenumimp.h"
45 #include "ucln_cmn.h"
46 
47 /* Format of cnvalias.icu -----------------------------------------------------
48  *
49  * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt.
50  * This binary form contains several tables. All indexes are to uint16_t
51  * units, and not to the bytes (uint8_t units). Addressing everything on
52  * 16-bit boundaries allows us to store more information with small index
53  * numbers, which are also 16-bit in size. The majority of the table (except
54  * the string table) are 16-bit numbers.
55  *
56  * First there is the size of the Table of Contents (TOC). The TOC
57  * entries contain the size of each section. In order to find the offset
58  * you just need to sum up the previous offsets.
59  * The TOC length and entries are an array of uint32_t values.
60  * The first section after the TOC starts immediately after the TOC.
61  *
62  * 1) This section contains a list of converters. This list contains indexes
63  * into the string table for the converter name. The index of this list is
64  * also used by other sections, which are mentioned later on.
65  * This list is not sorted.
66  *
67  * 2) This section contains a list of tags. This list contains indexes
68  * into the string table for the tag name. The index of this list is
69  * also used by other sections, which are mentioned later on.
70  * This list is in priority order of standards.
71  *
72  * 3) This section contains a list of sorted unique aliases. This
73  * list contains indexes into the string table for the alias name. The
74  * index of this list is also used by other sections, like the 4th section.
75  * The index for the 3rd and 4th section is used to get the
76  * alias -> converter name mapping. Section 3 and 4 form a two column table.
77  * Some of the most significant bits of each index may contain other
78  * information (see findConverter for details).
79  *
80  * 4) This section contains a list of mapped converter names. Consider this
81  * as a table that maps the 3rd section to the 1st section. This list contains
82  * indexes into the 1st section. The index of this list is the same index in
83  * the 3rd section. There is also some extra information in the high bits of
84  * each converter index in this table. Currently it's only used to say that
85  * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
86  * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
87  * the predigested form of the 5th section so that an alias lookup can be fast.
88  *
89  * 5) This section contains a 2D array with indexes to the 6th section. This
90  * section is the full form of all alias mappings. The column index is the
91  * index into the converter list (column header). The row index is the index
92  * to tag list (row header). This 2D array is the top part a 3D array. The
93  * third dimension is in the 6th section.
94  *
95  * 6) This is blob of variable length arrays. Each array starts with a size,
96  * and is followed by indexes to alias names in the string table. This is
97  * the third dimension to the section 5. No other section should be referencing
98  * this section.
99  *
100  * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its
101  * presence indicates that a section 9 exists. UConverterAliasOptions specifies
102  * what type of string normalization is used among other potential things in the
103  * future.
104  *
105  * 8) This is the string table. All strings are indexed on an even address.
106  * There are two reasons for this. First many chip architectures locate strings
107  * faster on even address boundaries. Second, since all indexes are 16-bit
108  * numbers, this string table can be 128KB in size instead of 64KB when we
109  * only have strings starting on an even address.
110  *
111  * 9) When present this is a set of prenormalized strings from section 8. This
112  * table contains normalized strings with the dashes and spaces stripped out,
113  * and all strings lowercased. In the future, the options in section 7 may state
114  * other types of normalization.
115  *
116  * Here is the concept of section 5 and 6. It's a 3D cube. Each tag
117  * has a unique alias among all converters. That same alias can
118  * be mentioned in other standards on different converters,
119  * but only one alias per tag can be unique.
120  *
121  *
122  *              Converter Names (Usually in TR22 form)
123  *           -------------------------------------------.
124  *     T    /                                          /|
125  *     a   /                                          / |
126  *     g  /                                          /  |
127  *     s /                                          /   |
128  *      /                                          /    |
129  *      ------------------------------------------/     |
130  *    A |                                         |     |
131  *    l |                                         |     |
132  *    i |                                         |    /
133  *    a |                                         |   /
134  *    s |                                         |  /
135  *    e |                                         | /
136  *    s |                                         |/
137  *      -------------------------------------------
138  *
139  *
140  *
141  * Here is what it really looks like. It's like swiss cheese.
142  * There are holes. Some converters aren't recognized by
143  * a standard, or they are really old converters that the
144  * standard doesn't recognize anymore.
145  *
146  *              Converter Names (Usually in TR22 form)
147  *           -------------------------------------------.
148  *     T    /##########################################/|
149  *     a   /     #            #                       /#
150  *     g  /  #      ##     ##     ### # ### ### ### #/
151  *     s / #             #####  ####        ##  ## #/#
152  *      / ### # # ##  #  #   #          ### # #   #/##
153  *      ------------------------------------------/# #
154  *    A |### # # ##  #  #   #          ### # #   #|# #
155  *    l |# # #    #     #               ## #     #|# #
156  *    i |# # #    #     #                #       #|#
157  *    a |#                                       #|#
158  *    s |                                        #|#
159  *    e
160  *    s
161  *
162  */
163 
164 /**
165  * Used by the UEnumeration API
166  */
167 typedef struct UAliasContext {
168     uint32_t listOffset;
169     uint32_t listIdx;
170 } UAliasContext;
171 
172 static const char DATA_NAME[] = "cnvalias";
173 static const char DATA_TYPE[] = "icu";
174 
175 static UDataMemory *gAliasData=NULL;
176 static icu::UInitOnce gAliasDataInitOnce = U_INITONCE_INITIALIZER;
177 
178 enum {
179     tocLengthIndex=0,
180     converterListIndex=1,
181     tagListIndex=2,
182     aliasListIndex=3,
183     untaggedConvArrayIndex=4,
184     taggedAliasArrayIndex=5,
185     taggedAliasListsIndex=6,
186     tableOptionsIndex=7,
187     stringTableIndex=8,
188     normalizedStringTableIndex=9,
189     offsetsCount,    /* length of the swapper's temporary offsets[] */
190     minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */
191 };
192 
193 static const UConverterAliasOptions defaultTableOptions = {
194     UCNV_IO_UNNORMALIZED,
195     0 /* containsCnvOptionInfo */
196 };
197 static UConverterAlias gMainTable;
198 
199 #define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx))
200 #define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx))
201 
202 static UBool U_CALLCONV
isAcceptable(void *,const char *,const char *,const UDataInfo * pInfo)203 isAcceptable(void * /*context*/,
204              const char * /*type*/, const char * /*name*/,
205              const UDataInfo *pInfo) {
206     return (UBool)(
207         pInfo->size>=20 &&
208         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
209         pInfo->charsetFamily==U_CHARSET_FAMILY &&
210         pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
211         pInfo->dataFormat[1]==0x76 &&
212         pInfo->dataFormat[2]==0x41 &&
213         pInfo->dataFormat[3]==0x6c &&
214         pInfo->formatVersion[0]==3);
215 }
216 
ucnv_io_cleanup(void)217 static UBool U_CALLCONV ucnv_io_cleanup(void)
218 {
219     if (gAliasData) {
220         udata_close(gAliasData);
221         gAliasData = NULL;
222     }
223     gAliasDataInitOnce.reset();
224 
225     uprv_memset(&gMainTable, 0, sizeof(gMainTable));
226 
227     return TRUE;                   /* Everything was cleaned up */
228 }
229 
initAliasData(UErrorCode & errCode)230 static void U_CALLCONV initAliasData(UErrorCode &errCode) {
231     UDataMemory *data;
232     const uint16_t *table;
233     const uint32_t *sectionSizes;
234     uint32_t tableStart;
235     uint32_t currOffset;
236 
237     ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup);
238 
239     U_ASSERT(gAliasData == NULL);
240     data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &errCode);
241     if(U_FAILURE(errCode)) {
242         return;
243     }
244 
245     sectionSizes = (const uint32_t *)udata_getMemory(data);
246     table = (const uint16_t *)sectionSizes;
247 
248     tableStart      = sectionSizes[0];
249     if (tableStart < minTocLength) {
250         errCode = U_INVALID_FORMAT_ERROR;
251         udata_close(data);
252         return;
253     }
254     gAliasData = data;
255 
256     gMainTable.converterListSize      = sectionSizes[1];
257     gMainTable.tagListSize            = sectionSizes[2];
258     gMainTable.aliasListSize          = sectionSizes[3];
259     gMainTable.untaggedConvArraySize  = sectionSizes[4];
260     gMainTable.taggedAliasArraySize   = sectionSizes[5];
261     gMainTable.taggedAliasListsSize   = sectionSizes[6];
262     gMainTable.optionTableSize        = sectionSizes[7];
263     gMainTable.stringTableSize        = sectionSizes[8];
264 
265     if (tableStart > 8) {
266         gMainTable.normalizedStringTableSize = sectionSizes[9];
267     }
268 
269     currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t));
270     gMainTable.converterList = table + currOffset;
271 
272     currOffset += gMainTable.converterListSize;
273     gMainTable.tagList = table + currOffset;
274 
275     currOffset += gMainTable.tagListSize;
276     gMainTable.aliasList = table + currOffset;
277 
278     currOffset += gMainTable.aliasListSize;
279     gMainTable.untaggedConvArray = table + currOffset;
280 
281     currOffset += gMainTable.untaggedConvArraySize;
282     gMainTable.taggedAliasArray = table + currOffset;
283 
284     /* aliasLists is a 1's based array, but it has a padding character */
285     currOffset += gMainTable.taggedAliasArraySize;
286     gMainTable.taggedAliasLists = table + currOffset;
287 
288     currOffset += gMainTable.taggedAliasListsSize;
289     if (gMainTable.optionTableSize > 0
290         && ((const UConverterAliasOptions *)(table + currOffset))->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT)
291     {
292         /* Faster table */
293         gMainTable.optionTable = (const UConverterAliasOptions *)(table + currOffset);
294     }
295     else {
296         /* Smaller table, or I can't handle this normalization mode!
297         Use the original slower table lookup. */
298         gMainTable.optionTable = &defaultTableOptions;
299     }
300 
301     currOffset += gMainTable.optionTableSize;
302     gMainTable.stringTable = table + currOffset;
303 
304     currOffset += gMainTable.stringTableSize;
305     gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED)
306         ? gMainTable.stringTable : (table + currOffset));
307 }
308 
309 
310 static UBool
haveAliasData(UErrorCode * pErrorCode)311 haveAliasData(UErrorCode *pErrorCode) {
312     umtx_initOnce(gAliasDataInitOnce, &initAliasData, *pErrorCode);
313     return U_SUCCESS(*pErrorCode);
314 }
315 
316 static inline UBool
isAlias(const char * alias,UErrorCode * pErrorCode)317 isAlias(const char *alias, UErrorCode *pErrorCode) {
318     if(alias==NULL) {
319         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
320         return FALSE;
321     }
322     return (UBool)(*alias!=0);
323 }
324 
getTagNumber(const char * tagname)325 static uint32_t getTagNumber(const char *tagname) {
326     if (gMainTable.tagList) {
327         uint32_t tagNum;
328         for (tagNum = 0; tagNum < gMainTable.tagListSize; tagNum++) {
329             if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) {
330                 return tagNum;
331             }
332         }
333     }
334 
335     return UINT32_MAX;
336 }
337 
338 /* character types relevant for ucnv_compareNames() */
339 enum {
340     UIGNORE,
341     ZERO,
342     NONZERO,
343     MINLETTER /* any values from here on are lowercase letter mappings */
344 };
345 
346 /* character types for ASCII 00..7F */
347 static const uint8_t asciiTypes[128] = {
348     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
349     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
350     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
351     ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0,
352     0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
353     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0,
354     0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
355     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0
356 };
357 
358 #define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)UIGNORE)
359 
360 /* character types for EBCDIC 80..FF */
361 static const uint8_t ebcdicTypes[128] = {
362     0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
363     0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
364     0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
365     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
366     0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
367     0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
368     0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
369     ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0
370 };
371 
372 #define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)UIGNORE)
373 
374 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
375 #   define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c)
376 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
377 #   define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c)
378 #else
379 #   error U_CHARSET_FAMILY is not valid
380 #endif
381 
382 /* @see ucnv_compareNames */
383 U_CFUNC char * U_EXPORT2
ucnv_io_stripASCIIForCompare(char * dst,const char * name)384 ucnv_io_stripASCIIForCompare(char *dst, const char *name) {
385     char *dstItr = dst;
386     uint8_t type, nextType;
387     char c1;
388     UBool afterDigit = FALSE;
389 
390     while ((c1 = *name++) != 0) {
391         type = GET_ASCII_TYPE(c1);
392         switch (type) {
393         case UIGNORE:
394             afterDigit = FALSE;
395             continue; /* ignore all but letters and digits */
396         case ZERO:
397             if (!afterDigit) {
398                 nextType = GET_ASCII_TYPE(*name);
399                 if (nextType == ZERO || nextType == NONZERO) {
400                     continue; /* ignore leading zero before another digit */
401                 }
402             }
403             break;
404         case NONZERO:
405             afterDigit = TRUE;
406             break;
407         default:
408             c1 = (char)type; /* lowercased letter */
409             afterDigit = FALSE;
410             break;
411         }
412         *dstItr++ = c1;
413     }
414     *dstItr = 0;
415     return dst;
416 }
417 
418 U_CFUNC char * U_EXPORT2
ucnv_io_stripEBCDICForCompare(char * dst,const char * name)419 ucnv_io_stripEBCDICForCompare(char *dst, const char *name) {
420     char *dstItr = dst;
421     uint8_t type, nextType;
422     char c1;
423     UBool afterDigit = FALSE;
424 
425     while ((c1 = *name++) != 0) {
426         type = GET_EBCDIC_TYPE(c1);
427         switch (type) {
428         case UIGNORE:
429             afterDigit = FALSE;
430             continue; /* ignore all but letters and digits */
431         case ZERO:
432             if (!afterDigit) {
433                 nextType = GET_EBCDIC_TYPE(*name);
434                 if (nextType == ZERO || nextType == NONZERO) {
435                     continue; /* ignore leading zero before another digit */
436                 }
437             }
438             break;
439         case NONZERO:
440             afterDigit = TRUE;
441             break;
442         default:
443             c1 = (char)type; /* lowercased letter */
444             afterDigit = FALSE;
445             break;
446         }
447         *dstItr++ = c1;
448     }
449     *dstItr = 0;
450     return dst;
451 }
452 
453 /**
454  * Do a fuzzy compare of two converter/alias names.
455  * The comparison is case-insensitive, ignores leading zeroes if they are not
456  * followed by further digits, and ignores all but letters and digits.
457  * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
458  * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
459  * at http://www.unicode.org/reports/tr22/
460  *
461  * This is a symmetrical (commutative) operation; order of arguments
462  * is insignificant.  This is an important property for sorting the
463  * list (when the list is preprocessed into binary form) and for
464  * performing binary searches on it at run time.
465  *
466  * @param name1 a converter name or alias, zero-terminated
467  * @param name2 a converter name or alias, zero-terminated
468  * @return 0 if the names match, or a negative value if the name1
469  * lexically precedes name2, or a positive value if the name1
470  * lexically follows name2.
471  *
472  * @see ucnv_io_stripForCompare
473  */
474 U_CAPI int U_EXPORT2
ucnv_compareNames(const char * name1,const char * name2)475 ucnv_compareNames(const char *name1, const char *name2) {
476     int rc;
477     uint8_t type, nextType;
478     char c1, c2;
479     UBool afterDigit1 = FALSE, afterDigit2 = FALSE;
480 
481     for (;;) {
482         while ((c1 = *name1++) != 0) {
483             type = GET_CHAR_TYPE(c1);
484             switch (type) {
485             case UIGNORE:
486                 afterDigit1 = FALSE;
487                 continue; /* ignore all but letters and digits */
488             case ZERO:
489                 if (!afterDigit1) {
490                     nextType = GET_CHAR_TYPE(*name1);
491                     if (nextType == ZERO || nextType == NONZERO) {
492                         continue; /* ignore leading zero before another digit */
493                     }
494                 }
495                 break;
496             case NONZERO:
497                 afterDigit1 = TRUE;
498                 break;
499             default:
500                 c1 = (char)type; /* lowercased letter */
501                 afterDigit1 = FALSE;
502                 break;
503             }
504             break; /* deliver c1 */
505         }
506         while ((c2 = *name2++) != 0) {
507             type = GET_CHAR_TYPE(c2);
508             switch (type) {
509             case UIGNORE:
510                 afterDigit2 = FALSE;
511                 continue; /* ignore all but letters and digits */
512             case ZERO:
513                 if (!afterDigit2) {
514                     nextType = GET_CHAR_TYPE(*name2);
515                     if (nextType == ZERO || nextType == NONZERO) {
516                         continue; /* ignore leading zero before another digit */
517                     }
518                 }
519                 break;
520             case NONZERO:
521                 afterDigit2 = TRUE;
522                 break;
523             default:
524                 c2 = (char)type; /* lowercased letter */
525                 afterDigit2 = FALSE;
526                 break;
527             }
528             break; /* deliver c2 */
529         }
530 
531         /* If we reach the ends of both strings then they match */
532         if ((c1|c2)==0) {
533             return 0;
534         }
535 
536         /* Case-insensitive comparison */
537         rc = (int)(unsigned char)c1 - (int)(unsigned char)c2;
538         if (rc != 0) {
539             return rc;
540         }
541     }
542 }
543 
544 /*
545  * search for an alias
546  * return the converter number index for gConverterList
547  */
548 static inline uint32_t
findConverter(const char * alias,UBool * containsOption,UErrorCode * pErrorCode)549 findConverter(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
550     uint32_t mid, start, limit;
551     uint32_t lastMid;
552     int result;
553     int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED);
554     char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH];
555 
556     if (!isUnnormalized) {
557         if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) {
558             *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
559             return UINT32_MAX;
560         }
561 
562         /* Lower case and remove ignoreable characters. */
563         ucnv_io_stripForCompare(strippedName, alias);
564         alias = strippedName;
565     }
566 
567     /* do a binary search for the alias */
568     start = 0;
569     limit = gMainTable.untaggedConvArraySize;
570     mid = limit;
571     lastMid = UINT32_MAX;
572 
573     for (;;) {
574         mid = (uint32_t)((start + limit) / 2);
575         if (lastMid == mid) {   /* Have we moved? */
576             break;  /* We haven't moved, and it wasn't found. */
577         }
578         lastMid = mid;
579         if (isUnnormalized) {
580             result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid]));
581         }
582         else {
583             result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid]));
584         }
585 
586         if (result < 0) {
587             limit = mid;
588         } else if (result > 0) {
589             start = mid;
590         } else {
591             /* Since the gencnval tool folds duplicates into one entry,
592              * this alias in gAliasList is unique, but different standards
593              * may map an alias to different converters.
594              */
595             if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) {
596                 *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING;
597             }
598             /* State whether the canonical converter name contains an option.
599             This information is contained in this list in order to maintain backward & forward compatibility. */
600             if (containsOption) {
601                 UBool containsCnvOptionInfo = (UBool)gMainTable.optionTable->containsCnvOptionInfo;
602                 *containsOption = (UBool)((containsCnvOptionInfo
603                     && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0))
604                     || !containsCnvOptionInfo);
605             }
606             return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK;
607         }
608     }
609 
610     return UINT32_MAX;
611 }
612 
613 /*
614  * Is this alias in this list?
615  * alias and listOffset should be non-NULL.
616  */
617 static inline UBool
isAliasInList(const char * alias,uint32_t listOffset)618 isAliasInList(const char *alias, uint32_t listOffset) {
619     if (listOffset) {
620         uint32_t currAlias;
621         uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
622         /* +1 to skip listCount */
623         const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
624         for (currAlias = 0; currAlias < listCount; currAlias++) {
625             if (currList[currAlias]
626                 && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0)
627             {
628                 return TRUE;
629             }
630         }
631     }
632     return FALSE;
633 }
634 
635 /*
636  * Search for an standard name of an alias (what is the default name
637  * that this standard uses?)
638  * return the listOffset for gTaggedAliasLists. If it's 0,
639  * the it couldn't be found, but the parameters are valid.
640  */
641 static uint32_t
findTaggedAliasListsOffset(const char * alias,const char * standard,UErrorCode * pErrorCode)642 findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) {
643     uint32_t idx;
644     uint32_t listOffset;
645     uint32_t convNum;
646     UErrorCode myErr = U_ZERO_ERROR;
647     uint32_t tagNum = getTagNumber(standard);
648 
649     /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
650     convNum = findConverter(alias, NULL, &myErr);
651     if (myErr != U_ZERO_ERROR) {
652         *pErrorCode = myErr;
653     }
654 
655     if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
656         listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
657         if (listOffset && gMainTable.taggedAliasLists[listOffset + 1]) {
658             return listOffset;
659         }
660         if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
661             /* Uh Oh! They used an ambiguous alias.
662                We have to search the whole swiss cheese starting
663                at the highest standard affinity.
664                This may take a while.
665             */
666             for (idx = 0; idx < gMainTable.taggedAliasArraySize; idx++) {
667                 listOffset = gMainTable.taggedAliasArray[idx];
668                 if (listOffset && isAliasInList(alias, listOffset)) {
669                     uint32_t currTagNum = idx/gMainTable.converterListSize;
670                     uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize);
671                     uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum];
672                     if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + 1]) {
673                         return tempListOffset;
674                     }
675                     /* else keep on looking */
676                     /* We could speed this up by starting on the next row
677                        because an alias is unique per row, right now.
678                        This would change if alias versioning appears. */
679                 }
680             }
681             /* The standard doesn't know about the alias */
682         }
683         /* else no default name */
684         return 0;
685     }
686     /* else converter or tag not found */
687 
688     return UINT32_MAX;
689 }
690 
691 /* Return the canonical name */
692 static uint32_t
findTaggedConverterNum(const char * alias,const char * standard,UErrorCode * pErrorCode)693 findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) {
694     uint32_t idx;
695     uint32_t listOffset;
696     uint32_t convNum;
697     UErrorCode myErr = U_ZERO_ERROR;
698     uint32_t tagNum = getTagNumber(standard);
699 
700     /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
701     convNum = findConverter(alias, NULL, &myErr);
702     if (myErr != U_ZERO_ERROR) {
703         *pErrorCode = myErr;
704     }
705 
706     if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
707         listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
708         if (listOffset && isAliasInList(alias, listOffset)) {
709             return convNum;
710         }
711         if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
712             /* Uh Oh! They used an ambiguous alias.
713                We have to search one slice of the swiss cheese.
714                We search only in the requested tag, not the whole thing.
715                This may take a while.
716             */
717             uint32_t convStart = (tagNum)*gMainTable.converterListSize;
718             uint32_t convLimit = (tagNum+1)*gMainTable.converterListSize;
719             for (idx = convStart; idx < convLimit; idx++) {
720                 listOffset = gMainTable.taggedAliasArray[idx];
721                 if (listOffset && isAliasInList(alias, listOffset)) {
722                     return idx-convStart;
723                 }
724             }
725             /* The standard doesn't know about the alias */
726         }
727         /* else no canonical name */
728     }
729     /* else converter or tag not found */
730 
731     return UINT32_MAX;
732 }
733 
734 
735 
736 U_CFUNC const char *
ucnv_io_getConverterName(const char * alias,UBool * containsOption,UErrorCode * pErrorCode)737 ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
738     const char *aliasTmp = alias;
739     int32_t i = 0;
740     for (i = 0; i < 2; i++) {
741         if (i == 1) {
742             /*
743              * After the first unsuccess converter lookup, check to see if
744              * the name begins with 'x-'. If it does, strip it off and try
745              * again.  This behaviour is similar to how ICU4J does it.
746              */
747             if (aliasTmp[0] == 'x' && aliasTmp[1] == '-') {
748                 aliasTmp = aliasTmp+2;
749             } else {
750                 break;
751             }
752         }
753         if(haveAliasData(pErrorCode) && isAlias(aliasTmp, pErrorCode)) {
754             uint32_t convNum = findConverter(aliasTmp, containsOption, pErrorCode);
755             if (convNum < gMainTable.converterListSize) {
756                 return GET_STRING(gMainTable.converterList[convNum]);
757             }
758             /* else converter not found */
759         } else {
760             break;
761         }
762     }
763 
764     return NULL;
765 }
766 
767 static int32_t U_CALLCONV
ucnv_io_countStandardAliases(UEnumeration * enumerator,UErrorCode *)768 ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
769     int32_t value = 0;
770     UAliasContext *myContext = (UAliasContext *)(enumerator->context);
771     uint32_t listOffset = myContext->listOffset;
772 
773     if (listOffset) {
774         value = gMainTable.taggedAliasLists[listOffset];
775     }
776     return value;
777 }
778 
779 static const char* U_CALLCONV
ucnv_io_nextStandardAliases(UEnumeration * enumerator,int32_t * resultLength,UErrorCode *)780 ucnv_io_nextStandardAliases(UEnumeration *enumerator,
781                             int32_t* resultLength,
782                             UErrorCode * /*pErrorCode*/)
783 {
784     UAliasContext *myContext = (UAliasContext *)(enumerator->context);
785     uint32_t listOffset = myContext->listOffset;
786 
787     if (listOffset) {
788         uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
789         const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
790 
791         if (myContext->listIdx < listCount) {
792             const char *myStr = GET_STRING(currList[myContext->listIdx++]);
793             if (resultLength) {
794                 *resultLength = (int32_t)uprv_strlen(myStr);
795             }
796             return myStr;
797         }
798     }
799     /* Either we accessed a zero length list, or we enumerated too far. */
800     if (resultLength) {
801         *resultLength = 0;
802     }
803     return NULL;
804 }
805 
806 static void U_CALLCONV
ucnv_io_resetStandardAliases(UEnumeration * enumerator,UErrorCode *)807 ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
808     ((UAliasContext *)(enumerator->context))->listIdx = 0;
809 }
810 
811 static void U_CALLCONV
ucnv_io_closeUEnumeration(UEnumeration * enumerator)812 ucnv_io_closeUEnumeration(UEnumeration *enumerator) {
813     uprv_free(enumerator->context);
814     uprv_free(enumerator);
815 }
816 
817 /* Enumerate the aliases for the specified converter and standard tag */
818 static const UEnumeration gEnumAliases = {
819     NULL,
820     NULL,
821     ucnv_io_closeUEnumeration,
822     ucnv_io_countStandardAliases,
823     uenum_unextDefault,
824     ucnv_io_nextStandardAliases,
825     ucnv_io_resetStandardAliases
826 };
827 
828 U_CAPI UEnumeration * U_EXPORT2
ucnv_openStandardNames(const char * convName,const char * standard,UErrorCode * pErrorCode)829 ucnv_openStandardNames(const char *convName,
830                        const char *standard,
831                        UErrorCode *pErrorCode)
832 {
833     UEnumeration *myEnum = NULL;
834     if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) {
835         uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode);
836 
837         /* When listOffset == 0, we want to acknowledge that the
838            converter name and standard are okay, but there
839            is nothing to enumerate. */
840         if (listOffset < gMainTable.taggedAliasListsSize) {
841             UAliasContext *myContext;
842 
843             myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
844             if (myEnum == NULL) {
845                 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
846                 return NULL;
847             }
848             uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration));
849             myContext = static_cast<UAliasContext *>(uprv_malloc(sizeof(UAliasContext)));
850             if (myContext == NULL) {
851                 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
852                 uprv_free(myEnum);
853                 return NULL;
854             }
855             myContext->listOffset = listOffset;
856             myContext->listIdx = 0;
857             myEnum->context = myContext;
858         }
859         /* else converter or tag not found */
860     }
861     return myEnum;
862 }
863 
864 static uint16_t
ucnv_io_countAliases(const char * alias,UErrorCode * pErrorCode)865 ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) {
866     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
867         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
868         if (convNum < gMainTable.converterListSize) {
869             /* tagListNum - 1 is the ALL tag */
870             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
871 
872             if (listOffset) {
873                 return gMainTable.taggedAliasLists[listOffset];
874             }
875             /* else this shouldn't happen. internal program error */
876         }
877         /* else converter not found */
878     }
879     return 0;
880 }
881 
882 static uint16_t
ucnv_io_getAliases(const char * alias,uint16_t start,const char ** aliases,UErrorCode * pErrorCode)883 ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) {
884     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
885         uint32_t currAlias;
886         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
887         if (convNum < gMainTable.converterListSize) {
888             /* tagListNum - 1 is the ALL tag */
889             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
890 
891             if (listOffset) {
892                 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
893                 /* +1 to skip listCount */
894                 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
895 
896                 for (currAlias = start; currAlias < listCount; currAlias++) {
897                     aliases[currAlias] = GET_STRING(currList[currAlias]);
898                 }
899             }
900             /* else this shouldn't happen. internal program error */
901         }
902         /* else converter not found */
903     }
904     return 0;
905 }
906 
907 static const char *
ucnv_io_getAlias(const char * alias,uint16_t n,UErrorCode * pErrorCode)908 ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
909     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
910         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
911         if (convNum < gMainTable.converterListSize) {
912             /* tagListNum - 1 is the ALL tag */
913             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
914 
915             if (listOffset) {
916                 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
917                 /* +1 to skip listCount */
918                 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
919 
920                 if (n < listCount)  {
921                     return GET_STRING(currList[n]);
922                 }
923                 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
924             }
925             /* else this shouldn't happen. internal program error */
926         }
927         /* else converter not found */
928     }
929     return NULL;
930 }
931 
932 static uint16_t
ucnv_io_countStandards(UErrorCode * pErrorCode)933 ucnv_io_countStandards(UErrorCode *pErrorCode) {
934     if (haveAliasData(pErrorCode)) {
935         /* Don't include the empty list */
936         return (uint16_t)(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS);
937     }
938 
939     return 0;
940 }
941 
942 U_CAPI const char * U_EXPORT2
ucnv_getStandard(uint16_t n,UErrorCode * pErrorCode)943 ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
944     if (haveAliasData(pErrorCode)) {
945         if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) {
946             return GET_STRING(gMainTable.tagList[n]);
947         }
948         *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
949     }
950 
951     return NULL;
952 }
953 
954 U_CAPI const char * U_EXPORT2
ucnv_getStandardName(const char * alias,const char * standard,UErrorCode * pErrorCode)955 ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
956     if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
957         uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode);
958 
959         if (0 < listOffset && listOffset < gMainTable.taggedAliasListsSize) {
960             const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
961 
962             /* Get the preferred name from this list */
963             if (currList[0]) {
964                 return GET_STRING(currList[0]);
965             }
966             /* else someone screwed up the alias table. */
967             /* *pErrorCode = U_INVALID_FORMAT_ERROR */
968         }
969     }
970 
971     return NULL;
972 }
973 
974 U_CAPI uint16_t U_EXPORT2
ucnv_countAliases(const char * alias,UErrorCode * pErrorCode)975 ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
976 {
977     return ucnv_io_countAliases(alias, pErrorCode);
978 }
979 
980 
981 U_CAPI const char* U_EXPORT2
ucnv_getAlias(const char * alias,uint16_t n,UErrorCode * pErrorCode)982 ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
983 {
984     return ucnv_io_getAlias(alias, n, pErrorCode);
985 }
986 
987 U_CAPI void U_EXPORT2
ucnv_getAliases(const char * alias,const char ** aliases,UErrorCode * pErrorCode)988 ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
989 {
990     ucnv_io_getAliases(alias, 0, aliases, pErrorCode);
991 }
992 
993 U_CAPI uint16_t U_EXPORT2
ucnv_countStandards(void)994 ucnv_countStandards(void)
995 {
996     UErrorCode err = U_ZERO_ERROR;
997     return ucnv_io_countStandards(&err);
998 }
999 
1000 U_CAPI const char * U_EXPORT2
ucnv_getCanonicalName(const char * alias,const char * standard,UErrorCode * pErrorCode)1001 ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
1002     if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
1003         uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode);
1004 
1005         if (convNum < gMainTable.converterListSize) {
1006             return GET_STRING(gMainTable.converterList[convNum]);
1007         }
1008     }
1009 
1010     return NULL;
1011 }
1012 
1013 static int32_t U_CALLCONV
ucnv_io_countAllConverters(UEnumeration *,UErrorCode *)1014 ucnv_io_countAllConverters(UEnumeration * /*enumerator*/, UErrorCode * /*pErrorCode*/) {
1015     return gMainTable.converterListSize;
1016 }
1017 
1018 static const char* U_CALLCONV
ucnv_io_nextAllConverters(UEnumeration * enumerator,int32_t * resultLength,UErrorCode *)1019 ucnv_io_nextAllConverters(UEnumeration *enumerator,
1020                             int32_t* resultLength,
1021                             UErrorCode * /*pErrorCode*/)
1022 {
1023     uint16_t *myContext = (uint16_t *)(enumerator->context);
1024 
1025     if (*myContext < gMainTable.converterListSize) {
1026         const char *myStr = GET_STRING(gMainTable.converterList[(*myContext)++]);
1027         if (resultLength) {
1028             *resultLength = (int32_t)uprv_strlen(myStr);
1029         }
1030         return myStr;
1031     }
1032     /* Either we accessed a zero length list, or we enumerated too far. */
1033     if (resultLength) {
1034         *resultLength = 0;
1035     }
1036     return NULL;
1037 }
1038 
1039 static void U_CALLCONV
ucnv_io_resetAllConverters(UEnumeration * enumerator,UErrorCode *)1040 ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
1041     *((uint16_t *)(enumerator->context)) = 0;
1042 }
1043 
1044 static const UEnumeration gEnumAllConverters = {
1045     NULL,
1046     NULL,
1047     ucnv_io_closeUEnumeration,
1048     ucnv_io_countAllConverters,
1049     uenum_unextDefault,
1050     ucnv_io_nextAllConverters,
1051     ucnv_io_resetAllConverters
1052 };
1053 
1054 U_CAPI UEnumeration * U_EXPORT2
ucnv_openAllNames(UErrorCode * pErrorCode)1055 ucnv_openAllNames(UErrorCode *pErrorCode) {
1056     UEnumeration *myEnum = NULL;
1057     if (haveAliasData(pErrorCode)) {
1058         uint16_t *myContext;
1059 
1060         myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
1061         if (myEnum == NULL) {
1062             *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1063             return NULL;
1064         }
1065         uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration));
1066         myContext = static_cast<uint16_t *>(uprv_malloc(sizeof(uint16_t)));
1067         if (myContext == NULL) {
1068             *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1069             uprv_free(myEnum);
1070             return NULL;
1071         }
1072         *myContext = 0;
1073         myEnum->context = myContext;
1074     }
1075     return myEnum;
1076 }
1077 
1078 U_CFUNC uint16_t
ucnv_io_countKnownConverters(UErrorCode * pErrorCode)1079 ucnv_io_countKnownConverters(UErrorCode *pErrorCode) {
1080     if (haveAliasData(pErrorCode)) {
1081         return (uint16_t)gMainTable.converterListSize;
1082     }
1083     return 0;
1084 }
1085 
1086 /* alias table swapping ----------------------------------------------------- */
1087 
1088 typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name);
1089 
1090 /*
1091  * row of a temporary array
1092  *
1093  * gets platform-endian charset string indexes and sorting indexes;
1094  * after sorting this array by strings, the actual arrays are permutated
1095  * according to the sorting indexes
1096  */
1097 typedef struct TempRow {
1098     uint16_t strIndex, sortIndex;
1099 } TempRow;
1100 
1101 typedef struct TempAliasTable {
1102     const char *chars;
1103     TempRow *rows;
1104     uint16_t *resort;
1105     StripForCompareFn *stripForCompare;
1106 } TempAliasTable;
1107 
1108 enum {
1109     STACK_ROW_CAPACITY=500
1110 };
1111 
1112 static int32_t
io_compareRows(const void * context,const void * left,const void * right)1113 io_compareRows(const void *context, const void *left, const void *right) {
1114     char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH],
1115          strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH];
1116 
1117     TempAliasTable *tempTable=(TempAliasTable *)context;
1118     const char *chars=tempTable->chars;
1119 
1120     return (int32_t)uprv_strcmp(tempTable->stripForCompare(strippedLeft, chars+2*((const TempRow *)left)->strIndex),
1121                                 tempTable->stripForCompare(strippedRight, chars+2*((const TempRow *)right)->strIndex));
1122 }
1123 
1124 U_CAPI int32_t U_EXPORT2
ucnv_swapAliases(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)1125 ucnv_swapAliases(const UDataSwapper *ds,
1126                  const void *inData, int32_t length, void *outData,
1127                  UErrorCode *pErrorCode) {
1128     const UDataInfo *pInfo;
1129     int32_t headerSize;
1130 
1131     const uint16_t *inTable;
1132     const uint32_t *inSectionSizes;
1133     uint32_t toc[offsetsCount];
1134     uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */
1135     uint32_t i, count, tocLength, topOffset;
1136 
1137     TempRow rows[STACK_ROW_CAPACITY];
1138     uint16_t resort[STACK_ROW_CAPACITY];
1139     TempAliasTable tempTable;
1140 
1141     /* udata_swapDataHeader checks the arguments */
1142     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
1143     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1144         return 0;
1145     }
1146 
1147     /* check data format and format version */
1148     pInfo=(const UDataInfo *)((const char *)inData+4);
1149     if(!(
1150         pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
1151         pInfo->dataFormat[1]==0x76 &&
1152         pInfo->dataFormat[2]==0x41 &&
1153         pInfo->dataFormat[3]==0x6c &&
1154         pInfo->formatVersion[0]==3
1155     )) {
1156         udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n",
1157                          pInfo->dataFormat[0], pInfo->dataFormat[1],
1158                          pInfo->dataFormat[2], pInfo->dataFormat[3],
1159                          pInfo->formatVersion[0]);
1160         *pErrorCode=U_UNSUPPORTED_ERROR;
1161         return 0;
1162     }
1163 
1164     /* an alias table must contain at least the table of contents array */
1165     if(length>=0 && (length-headerSize)<4*(1+minTocLength)) {
1166         udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1167                          length-headerSize);
1168         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1169         return 0;
1170     }
1171 
1172     inSectionSizes=(const uint32_t *)((const char *)inData+headerSize);
1173     inTable=(const uint16_t *)inSectionSizes;
1174     uprv_memset(toc, 0, sizeof(toc));
1175     toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]);
1176     if(tocLength<minTocLength || offsetsCount<=tocLength) {
1177         udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength);
1178         *pErrorCode=U_INVALID_FORMAT_ERROR;
1179         return 0;
1180     }
1181 
1182     /* read the known part of the table of contents */
1183     for(i=converterListIndex; i<=tocLength; ++i) {
1184         toc[i]=ds->readUInt32(inSectionSizes[i]);
1185     }
1186 
1187     /* compute offsets */
1188     uprv_memset(offsets, 0, sizeof(offsets));
1189     offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */
1190     for(i=tagListIndex; i<=tocLength; ++i) {
1191         offsets[i]=offsets[i-1]+toc[i-1];
1192     }
1193 
1194     /* compute the overall size of the after-header data, in numbers of 16-bit units */
1195     topOffset=offsets[i-1]+toc[i-1];
1196 
1197     if(length>=0) {
1198         uint16_t *outTable;
1199         const uint16_t *p, *p2;
1200         uint16_t *q, *q2;
1201         uint16_t oldIndex;
1202 
1203         if((length-headerSize)<(2*(int32_t)topOffset)) {
1204             udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
1205                              length-headerSize);
1206             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1207             return 0;
1208         }
1209 
1210         outTable=(uint16_t *)((char *)outData+headerSize);
1211 
1212         /* swap the entire table of contents */
1213         ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode);
1214 
1215         /* swap unormalized strings & normalized strings */
1216         ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]),
1217                              outTable+offsets[stringTableIndex], pErrorCode);
1218         if(U_FAILURE(*pErrorCode)) {
1219             udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n");
1220             return 0;
1221         }
1222 
1223         if(ds->inCharset==ds->outCharset) {
1224             /* no need to sort, just swap all 16-bit values together */
1225             ds->swapArray16(ds,
1226                             inTable+offsets[converterListIndex],
1227                             2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]),
1228                             outTable+offsets[converterListIndex],
1229                             pErrorCode);
1230         } else {
1231             /* allocate the temporary table for sorting */
1232             count=toc[aliasListIndex];
1233 
1234             tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */
1235 
1236             if(count<=STACK_ROW_CAPACITY) {
1237                 tempTable.rows=rows;
1238                 tempTable.resort=resort;
1239             } else {
1240                 tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2);
1241                 if(tempTable.rows==NULL) {
1242                     udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n",
1243                                      count);
1244                     *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1245                     return 0;
1246                 }
1247                 tempTable.resort=(uint16_t *)(tempTable.rows+count);
1248             }
1249 
1250             if(ds->outCharset==U_ASCII_FAMILY) {
1251                 tempTable.stripForCompare=ucnv_io_stripASCIIForCompare;
1252             } else /* U_EBCDIC_FAMILY */ {
1253                 tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare;
1254             }
1255 
1256             /*
1257              * Sort unique aliases+mapped names.
1258              *
1259              * We need to sort the list again by outCharset strings because they
1260              * sort differently for different charset families.
1261              * First we set up a temporary table with the string indexes and
1262              * sorting indexes and sort that.
1263              * Then we permutate and copy/swap the actual values.
1264              */
1265             p=inTable+offsets[aliasListIndex];
1266             q=outTable+offsets[aliasListIndex];
1267 
1268             p2=inTable+offsets[untaggedConvArrayIndex];
1269             q2=outTable+offsets[untaggedConvArrayIndex];
1270 
1271             for(i=0; i<count; ++i) {
1272                 tempTable.rows[i].strIndex=ds->readUInt16(p[i]);
1273                 tempTable.rows[i].sortIndex=(uint16_t)i;
1274             }
1275 
1276             uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow),
1277                            io_compareRows, &tempTable,
1278                            FALSE, pErrorCode);
1279 
1280             if(U_SUCCESS(*pErrorCode)) {
1281                 /* copy/swap/permutate items */
1282                 if(p!=q) {
1283                     for(i=0; i<count; ++i) {
1284                         oldIndex=tempTable.rows[i].sortIndex;
1285                         ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode);
1286                         ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode);
1287                     }
1288                 } else {
1289                     /*
1290                      * If we swap in-place, then the permutation must use another
1291                      * temporary array (tempTable.resort)
1292                      * before the results are copied to the outBundle.
1293                      */
1294                     uint16_t *r=tempTable.resort;
1295 
1296                     for(i=0; i<count; ++i) {
1297                         oldIndex=tempTable.rows[i].sortIndex;
1298                         ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode);
1299                     }
1300                     uprv_memcpy(q, r, 2*count);
1301 
1302                     for(i=0; i<count; ++i) {
1303                         oldIndex=tempTable.rows[i].sortIndex;
1304                         ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode);
1305                     }
1306                     uprv_memcpy(q2, r, 2*count);
1307                 }
1308             }
1309 
1310             if(tempTable.rows!=rows) {
1311                 uprv_free(tempTable.rows);
1312             }
1313 
1314             if(U_FAILURE(*pErrorCode)) {
1315                 udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n",
1316                                  count);
1317                 return 0;
1318             }
1319 
1320             /* swap remaining 16-bit values */
1321             ds->swapArray16(ds,
1322                             inTable+offsets[converterListIndex],
1323                             2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]),
1324                             outTable+offsets[converterListIndex],
1325                             pErrorCode);
1326             ds->swapArray16(ds,
1327                             inTable+offsets[taggedAliasArrayIndex],
1328                             2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]),
1329                             outTable+offsets[taggedAliasArrayIndex],
1330                             pErrorCode);
1331         }
1332     }
1333 
1334     return headerSize+2*(int32_t)topOffset;
1335 }
1336 
1337 #endif
1338 
1339 
1340 /*
1341  * Hey, Emacs, please set the following:
1342  *
1343  * Local Variables:
1344  * indent-tabs-mode: nil
1345  * End:
1346  *
1347  */
1348