1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2005-2014, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  swapimpl.cpp
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2005may05
16 *   created by: Markus W. Scherer
17 *
18 *   Data file swapping functions moved here from the common library
19 *   because some data is hardcoded in ICU4C and needs not be swapped any more.
20 *   Moving the functions here simplifies testing (for code coverage) because
21 *   we need not jump through hoops (like adding snapshots of these files
22 *   to testdata).
23 *
24 *   The declarations for these functions remain in the internal header files
25 *   in icu/source/common/
26 */
27 
28 #include "unicode/utypes.h"
29 #include "unicode/putil.h"
30 #include "unicode/udata.h"
31 
32 /* Explicit include statement for std_string.h is needed
33  * for compilation on certain platforms. (e.g. AIX/VACPP)
34  */
35 #include "unicode/std_string.h"
36 
37 #include "cmemory.h"
38 #include "cstring.h"
39 #include "uinvchar.h"
40 #include "uassert.h"
41 #include "uarrsort.h"
42 #include "ucmndata.h"
43 #include "udataswp.h"
44 #include "ulayout_props.h"
45 
46 /* swapping implementations in common */
47 
48 #include "emojiprops.h"
49 #include "uresdata.h"
50 #include "ucnv_io.h"
51 #include "uprops.h"
52 #include "ucase.h"
53 #include "ubidi_props.h"
54 #include "ucol_swp.h"
55 #include "ucnv_bld.h"
56 #include "unormimp.h"
57 #include "normalizer2impl.h"
58 #include "sprpimpl.h"
59 #include "propname.h"
60 #include "rbbidata.h"
61 #include "utrie.h"
62 #include "utrie2.h"
63 #include "dictionarydata.h"
64 
65 /* swapping implementations in i18n */
66 
67 #if !UCONFIG_NO_NORMALIZATION
68 #include "uspoof_impl.h"
69 #endif
70 
71 U_NAMESPACE_USE
72 
73 /* definitions */
74 
75 /* Unicode property (value) aliases data swapping --------------------------- */
76 
77 static int32_t U_CALLCONV
upname_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)78 upname_swap(const UDataSwapper *ds,
79             const void *inData, int32_t length, void *outData,
80             UErrorCode *pErrorCode) {
81     /* udata_swapDataHeader checks the arguments */
82     int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
83     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
84         return 0;
85     }
86 
87     /* check data format and format version */
88     const UDataInfo *pInfo=
89         reinterpret_cast<const UDataInfo *>(
90             static_cast<const char *>(inData)+4);
91     if(!(
92         pInfo->dataFormat[0]==0x70 &&   /* dataFormat="pnam" */
93         pInfo->dataFormat[1]==0x6e &&
94         pInfo->dataFormat[2]==0x61 &&
95         pInfo->dataFormat[3]==0x6d &&
96         pInfo->formatVersion[0]==2
97     )) {
98         udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n",
99                          pInfo->dataFormat[0], pInfo->dataFormat[1],
100                          pInfo->dataFormat[2], pInfo->dataFormat[3],
101                          pInfo->formatVersion[0]);
102         *pErrorCode=U_UNSUPPORTED_ERROR;
103         return 0;
104     }
105 
106     const uint8_t *inBytes=static_cast<const uint8_t *>(inData)+headerSize;
107     uint8_t *outBytes=static_cast<uint8_t *>(outData)+headerSize;
108 
109     if(length>=0) {
110         length-=headerSize;
111         // formatVersion 2 initially has indexes[8], 32 bytes.
112         if(length<32) {
113             udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n",
114                              (int)length);
115             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
116             return 0;
117         }
118     }
119 
120     const int32_t *inIndexes=reinterpret_cast<const int32_t *>(inBytes);
121     int32_t totalSize=udata_readInt32(ds, inIndexes[PropNameData::IX_TOTAL_SIZE]);
122     if(length>=0) {
123         if(length<totalSize) {
124             udata_printError(ds, "upname_swap(): too few bytes (%d after header, should be %d) "
125                              "for pnames.icu\n",
126                              (int)length, (int)totalSize);
127             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
128             return 0;
129         }
130 
131         int32_t numBytesIndexesAndValueMaps=
132             udata_readInt32(ds, inIndexes[PropNameData::IX_BYTE_TRIES_OFFSET]);
133 
134         // Swap the indexes[] and the valueMaps[].
135         ds->swapArray32(ds, inBytes, numBytesIndexesAndValueMaps, outBytes, pErrorCode);
136 
137         // Copy the rest of the data.
138         if(inBytes!=outBytes) {
139             uprv_memcpy(outBytes+numBytesIndexesAndValueMaps,
140                         inBytes+numBytesIndexesAndValueMaps,
141                         totalSize-numBytesIndexesAndValueMaps);
142         }
143 
144         // We need not swap anything else:
145         //
146         // The ByteTries are already byte-serialized, and are fixed on ASCII.
147         // (On an EBCDIC machine, the input string is converted to lowercase ASCII
148         // while matching.)
149         //
150         // The name groups are mostly invariant characters, but since we only
151         // generate, and keep in subversion, ASCII versions of pnames.icu,
152         // and since only ICU4J uses the pnames.icu data file
153         // (the data is hardcoded in ICU4C) and ICU4J uses ASCII data files,
154         // we just copy those bytes too.
155     }
156 
157     return headerSize+totalSize;
158 }
159 
160 /* Unicode properties data swapping ----------------------------------------- */
161 
162 static int32_t U_CALLCONV
uprops_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)163 uprops_swap(const UDataSwapper *ds,
164             const void *inData, int32_t length, void *outData,
165             UErrorCode *pErrorCode) {
166     const UDataInfo *pInfo;
167     int32_t headerSize, i;
168 
169     int32_t dataIndexes[UPROPS_INDEX_COUNT];
170     const int32_t *inData32;
171 
172     /* udata_swapDataHeader checks the arguments */
173     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
174     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
175         return 0;
176     }
177 
178     /* check data format and format version */
179     pInfo=(const UDataInfo *)((const char *)inData+4);
180     if(!(
181         pInfo->dataFormat[0]==0x55 &&   /* dataFormat="UPro" */
182         pInfo->dataFormat[1]==0x50 &&
183         pInfo->dataFormat[2]==0x72 &&
184         pInfo->dataFormat[3]==0x6f &&
185         (3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=7) &&
186         (pInfo->formatVersion[0]>=7 ||
187             (pInfo->formatVersion[2]==UTRIE_SHIFT &&
188              pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT))
189     )) {
190         udata_printError(ds, "uprops_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not a Unicode properties file\n",
191                          pInfo->dataFormat[0], pInfo->dataFormat[1],
192                          pInfo->dataFormat[2], pInfo->dataFormat[3],
193                          pInfo->formatVersion[0]);
194         *pErrorCode=U_UNSUPPORTED_ERROR;
195         return 0;
196     }
197 
198     /* the properties file must contain at least the indexes array */
199     if(length>=0 && (length-headerSize)<(int32_t)sizeof(dataIndexes)) {
200         udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
201                          length-headerSize);
202         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
203         return 0;
204     }
205 
206     /* read the indexes */
207     inData32=(const int32_t *)((const char *)inData+headerSize);
208     for(i=0; i<UPROPS_INDEX_COUNT; ++i) {
209         dataIndexes[i]=udata_readInt32(ds, inData32[i]);
210     }
211 
212     /*
213      * comments are copied from the data format description in genprops/store.c
214      * indexes[] constants are in uprops.h
215      */
216     int32_t dataTop;
217     if(length>=0) {
218         int32_t *outData32;
219 
220         /*
221          * In formatVersion 7, UPROPS_DATA_TOP_INDEX has the post-header data size.
222          * In earlier formatVersions, it is 0 and a lower dataIndexes entry
223          * has the top of the last item.
224          */
225         for(i=UPROPS_DATA_TOP_INDEX; i>0 && (dataTop=dataIndexes[i])==0; --i) {}
226 
227         if((length-headerSize)<(4*dataTop)) {
228             udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
229                              length-headerSize);
230             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
231             return 0;
232         }
233 
234         outData32=(int32_t *)((char *)outData+headerSize);
235 
236         /* copy everything for inaccessible data (padding) */
237         if(inData32!=outData32) {
238             uprv_memcpy(outData32, inData32, 4*(size_t)dataTop);
239         }
240 
241         /* swap the indexes[16] */
242         ds->swapArray32(ds, inData32, 4*UPROPS_INDEX_COUNT, outData32, pErrorCode);
243 
244         /*
245          * swap the main properties UTrie
246          * PT serialized properties trie, see utrie.h (byte size: 4*(i0-16))
247          */
248         utrie_swapAnyVersion(ds,
249             inData32+UPROPS_INDEX_COUNT,
250             4*(dataIndexes[UPROPS_PROPS32_INDEX]-UPROPS_INDEX_COUNT),
251             outData32+UPROPS_INDEX_COUNT,
252             pErrorCode);
253 
254         /*
255          * swap the properties and exceptions words
256          * P  const uint32_t props32[i1-i0];
257          * E  const uint32_t exceptions[i2-i1];
258          */
259         ds->swapArray32(ds,
260             inData32+dataIndexes[UPROPS_PROPS32_INDEX],
261             4*(dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]-dataIndexes[UPROPS_PROPS32_INDEX]),
262             outData32+dataIndexes[UPROPS_PROPS32_INDEX],
263             pErrorCode);
264 
265         /*
266          * swap the UChars
267          * U  const UChar uchars[2*(i3-i2)];
268          */
269         ds->swapArray16(ds,
270             inData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX],
271             4*(dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]-dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]),
272             outData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX],
273             pErrorCode);
274 
275         /*
276          * swap the additional UTrie
277          * i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties
278          */
279         utrie_swapAnyVersion(ds,
280             inData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX],
281             4*(dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]),
282             outData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX],
283             pErrorCode);
284 
285         /*
286          * swap the properties vectors
287          * PV const uint32_t propsVectors[(i6-i4)/i5][i5]==uint32_t propsVectors[i6-i4];
288          */
289         ds->swapArray32(ds,
290             inData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX],
291             4*(dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]),
292             outData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX],
293             pErrorCode);
294 
295         // swap the Script_Extensions data
296         // SCX const uint16_t scriptExtensions[2*(i7-i6)];
297         ds->swapArray16(ds,
298             inData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX],
299             4*(dataIndexes[UPROPS_RESERVED_INDEX_7]-dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]),
300             outData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX],
301             pErrorCode);
302     }
303 
304     /* i7 reservedIndex7; -- 32-bit unit index to the top of the Script_Extensions data */
305     return headerSize+4*dataIndexes[UPROPS_RESERVED_INDEX_7];
306 }
307 
308 /* Unicode case mapping data swapping --------------------------------------- */
309 
310 static int32_t U_CALLCONV
ucase_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)311 ucase_swap(const UDataSwapper *ds,
312            const void *inData, int32_t length, void *outData,
313            UErrorCode *pErrorCode) {
314     const UDataInfo *pInfo;
315     int32_t headerSize;
316 
317     const uint8_t *inBytes;
318     uint8_t *outBytes;
319 
320     const int32_t *inIndexes;
321     int32_t indexes[16];
322 
323     int32_t i, offset, count, size;
324 
325     /* udata_swapDataHeader checks the arguments */
326     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
327     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
328         return 0;
329     }
330 
331     /* check data format and format version */
332     pInfo=(const UDataInfo *)((const char *)inData+4);
333     if(!(
334         pInfo->dataFormat[0]==UCASE_FMT_0 &&    /* dataFormat="cAsE" */
335         pInfo->dataFormat[1]==UCASE_FMT_1 &&
336         pInfo->dataFormat[2]==UCASE_FMT_2 &&
337         pInfo->dataFormat[3]==UCASE_FMT_3 &&
338         ((pInfo->formatVersion[0]==1 &&
339           pInfo->formatVersion[2]==UTRIE_SHIFT &&
340           pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) ||
341          (2<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=4))
342     )) {
343         udata_printError(ds, "ucase_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as case mapping data\n",
344                          pInfo->dataFormat[0], pInfo->dataFormat[1],
345                          pInfo->dataFormat[2], pInfo->dataFormat[3],
346                          pInfo->formatVersion[0]);
347         *pErrorCode=U_UNSUPPORTED_ERROR;
348         return 0;
349     }
350 
351     inBytes=(const uint8_t *)inData+headerSize;
352     outBytes=(uint8_t *)outData+headerSize;
353 
354     inIndexes=(const int32_t *)inBytes;
355 
356     if(length>=0) {
357         length-=headerSize;
358         if(length<16*4) {
359             udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for case mapping data\n",
360                              length);
361             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
362             return 0;
363         }
364     }
365 
366     /* read the first 16 indexes (ICU 3.2/format version 1: UCASE_IX_TOP==16, might grow) */
367     for(i=0; i<16; ++i) {
368         indexes[i]=udata_readInt32(ds, inIndexes[i]);
369     }
370 
371     /* get the total length of the data */
372     size=indexes[UCASE_IX_LENGTH];
373 
374     if(length>=0) {
375         if(length<size) {
376             udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for all of case mapping data\n",
377                              length);
378             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
379             return 0;
380         }
381 
382         /* copy the data for inaccessible bytes */
383         if(inBytes!=outBytes) {
384             uprv_memcpy(outBytes, inBytes, size);
385         }
386 
387         offset=0;
388 
389         /* swap the int32_t indexes[] */
390         count=indexes[UCASE_IX_INDEX_TOP]*4;
391         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
392         offset+=count;
393 
394         /* swap the UTrie */
395         count=indexes[UCASE_IX_TRIE_SIZE];
396         utrie_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
397         offset+=count;
398 
399         /* swap the uint16_t exceptions[] and unfold[] */
400         count=(indexes[UCASE_IX_EXC_LENGTH]+indexes[UCASE_IX_UNFOLD_LENGTH])*2;
401         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
402         offset+=count;
403 
404         U_ASSERT(offset==size);
405     }
406 
407     return headerSize+size;
408 }
409 
410 /* Unicode bidi/shaping data swapping --------------------------------------- */
411 
412 static int32_t U_CALLCONV
ubidi_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)413 ubidi_swap(const UDataSwapper *ds,
414            const void *inData, int32_t length, void *outData,
415            UErrorCode *pErrorCode) {
416     const UDataInfo *pInfo;
417     int32_t headerSize;
418 
419     const uint8_t *inBytes;
420     uint8_t *outBytes;
421 
422     const int32_t *inIndexes;
423     int32_t indexes[16];
424 
425     int32_t i, offset, count, size;
426 
427     /* udata_swapDataHeader checks the arguments */
428     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
429     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
430         return 0;
431     }
432 
433     /* check data format and format version */
434     pInfo=(const UDataInfo *)((const char *)inData+4);
435     if(!(
436         pInfo->dataFormat[0]==UBIDI_FMT_0 &&    /* dataFormat="BiDi" */
437         pInfo->dataFormat[1]==UBIDI_FMT_1 &&
438         pInfo->dataFormat[2]==UBIDI_FMT_2 &&
439         pInfo->dataFormat[3]==UBIDI_FMT_3 &&
440         ((pInfo->formatVersion[0]==1 &&
441           pInfo->formatVersion[2]==UTRIE_SHIFT &&
442           pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) ||
443          pInfo->formatVersion[0]==2)
444     )) {
445         udata_printError(ds, "ubidi_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as bidi/shaping data\n",
446                          pInfo->dataFormat[0], pInfo->dataFormat[1],
447                          pInfo->dataFormat[2], pInfo->dataFormat[3],
448                          pInfo->formatVersion[0]);
449         *pErrorCode=U_UNSUPPORTED_ERROR;
450         return 0;
451     }
452 
453     inBytes=(const uint8_t *)inData+headerSize;
454     outBytes=(uint8_t *)outData+headerSize;
455 
456     inIndexes=(const int32_t *)inBytes;
457 
458     if(length>=0) {
459         length-=headerSize;
460         if(length<16*4) {
461             udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for bidi/shaping data\n",
462                              length);
463             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
464             return 0;
465         }
466     }
467 
468     /* read the first 16 indexes (ICU 3.4/format version 1: UBIDI_IX_TOP==16, might grow) */
469     for(i=0; i<16; ++i) {
470         indexes[i]=udata_readInt32(ds, inIndexes[i]);
471     }
472 
473     /* get the total length of the data */
474     size=indexes[UBIDI_IX_LENGTH];
475 
476     if(length>=0) {
477         if(length<size) {
478             udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for all of bidi/shaping data\n",
479                              length);
480             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
481             return 0;
482         }
483 
484         /* copy the data for inaccessible bytes */
485         if(inBytes!=outBytes) {
486             uprv_memcpy(outBytes, inBytes, size);
487         }
488 
489         offset=0;
490 
491         /* swap the int32_t indexes[] */
492         count=indexes[UBIDI_IX_INDEX_TOP]*4;
493         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
494         offset+=count;
495 
496         /* swap the UTrie */
497         count=indexes[UBIDI_IX_TRIE_SIZE];
498         utrie_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
499         offset+=count;
500 
501         /* swap the uint32_t mirrors[] */
502         count=indexes[UBIDI_IX_MIRROR_LENGTH]*4;
503         ds->swapArray32(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
504         offset+=count;
505 
506         /* just skip the uint8_t jgArray[] and jgArray2[] */
507         count=indexes[UBIDI_IX_JG_LIMIT]-indexes[UBIDI_IX_JG_START];
508         offset+=count;
509         count=indexes[UBIDI_IX_JG_LIMIT2]-indexes[UBIDI_IX_JG_START2];
510         offset+=count;
511 
512         U_ASSERT(offset==size);
513     }
514 
515     return headerSize+size;
516 }
517 
518 /* Unicode normalization data swapping -------------------------------------- */
519 
520 #if !UCONFIG_NO_NORMALIZATION
521 
522 static int32_t U_CALLCONV
unorm_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)523 unorm_swap(const UDataSwapper *ds,
524            const void *inData, int32_t length, void *outData,
525            UErrorCode *pErrorCode) {
526     const UDataInfo *pInfo;
527     int32_t headerSize;
528 
529     const uint8_t *inBytes;
530     uint8_t *outBytes;
531 
532     const int32_t *inIndexes;
533     int32_t indexes[32];
534 
535     int32_t i, offset, count, size;
536 
537     /* udata_swapDataHeader checks the arguments */
538     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
539     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
540         return 0;
541     }
542 
543     /* check data format and format version */
544     pInfo=(const UDataInfo *)((const char *)inData+4);
545     if(!(
546         pInfo->dataFormat[0]==0x4e &&   /* dataFormat="Norm" */
547         pInfo->dataFormat[1]==0x6f &&
548         pInfo->dataFormat[2]==0x72 &&
549         pInfo->dataFormat[3]==0x6d &&
550         pInfo->formatVersion[0]==2
551     )) {
552         udata_printError(ds, "unorm_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unorm.icu\n",
553                          pInfo->dataFormat[0], pInfo->dataFormat[1],
554                          pInfo->dataFormat[2], pInfo->dataFormat[3],
555                          pInfo->formatVersion[0]);
556         *pErrorCode=U_UNSUPPORTED_ERROR;
557         return 0;
558     }
559 
560     inBytes=(const uint8_t *)inData+headerSize;
561     outBytes=(uint8_t *)outData+headerSize;
562 
563     inIndexes=(const int32_t *)inBytes;
564 
565     if(length>=0) {
566         length-=headerSize;
567         if(length<32*4) {
568             udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for unorm.icu\n",
569                              length);
570             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
571             return 0;
572         }
573     }
574 
575     /* read the first 32 indexes (ICU 2.8/format version 2.2: _NORM_INDEX_TOP==32, might grow) */
576     for(i=0; i<32; ++i) {
577         indexes[i]=udata_readInt32(ds, inIndexes[i]);
578     }
579 
580     /* calculate the total length of the data */
581     size=
582         32*4+ /* size of indexes[] */
583         indexes[_NORM_INDEX_TRIE_SIZE]+
584         indexes[_NORM_INDEX_UCHAR_COUNT]*2+
585         indexes[_NORM_INDEX_COMBINE_DATA_COUNT]*2+
586         indexes[_NORM_INDEX_FCD_TRIE_SIZE]+
587         indexes[_NORM_INDEX_AUX_TRIE_SIZE]+
588         indexes[_NORM_INDEX_CANON_SET_COUNT]*2;
589 
590     if(length>=0) {
591         if(length<size) {
592             udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for all of unorm.icu\n",
593                              length);
594             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
595             return 0;
596         }
597 
598         /* copy the data for inaccessible bytes */
599         if(inBytes!=outBytes) {
600             uprv_memcpy(outBytes, inBytes, size);
601         }
602 
603         offset=0;
604 
605         /* swap the indexes[] */
606         count=32*4;
607         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
608         offset+=count;
609 
610         /* swap the main UTrie */
611         count=indexes[_NORM_INDEX_TRIE_SIZE];
612         utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
613         offset+=count;
614 
615         /* swap the uint16_t extraData[] and the uint16_t combiningTable[] */
616         count=(indexes[_NORM_INDEX_UCHAR_COUNT]+indexes[_NORM_INDEX_COMBINE_DATA_COUNT])*2;
617         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
618         offset+=count;
619 
620         /* swap the FCD UTrie */
621         count=indexes[_NORM_INDEX_FCD_TRIE_SIZE];
622         if(count!=0) {
623             utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
624             offset+=count;
625         }
626 
627         /* swap the aux UTrie */
628         count=indexes[_NORM_INDEX_AUX_TRIE_SIZE];
629         if(count!=0) {
630             utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
631             offset+=count;
632         }
633 
634         /* swap the uint16_t combiningTable[] */
635         count=indexes[_NORM_INDEX_CANON_SET_COUNT]*2;
636         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
637         offset+=count;
638     }
639 
640     return headerSize+size;
641 }
642 
643 #endif
644 
645 // Unicode text layout properties data swapping --------------------------------
646 
647 static int32_t U_CALLCONV
ulayout_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)648 ulayout_swap(const UDataSwapper *ds,
649              const void *inData, int32_t length, void *outData,
650              UErrorCode *pErrorCode) {
651     // udata_swapDataHeader checks the arguments.
652     int32_t headerSize = udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
653     if (pErrorCode == nullptr || U_FAILURE(*pErrorCode)) {
654         return 0;
655     }
656 
657     // Check data format and format version.
658     const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData + 4);
659     if (!(
660             pInfo->dataFormat[0] == ULAYOUT_FMT_0 &&    // dataFormat="Layo"
661             pInfo->dataFormat[1] == ULAYOUT_FMT_1 &&
662             pInfo->dataFormat[2] == ULAYOUT_FMT_2 &&
663             pInfo->dataFormat[3] == ULAYOUT_FMT_3 &&
664             pInfo->formatVersion[0] == 1)) {
665         udata_printError(ds,
666             "ulayout_swap(): data format %02x.%02x.%02x.%02x (format version %02x) "
667             "is not recognized as text layout properties data\n",
668             pInfo->dataFormat[0], pInfo->dataFormat[1],
669             pInfo->dataFormat[2], pInfo->dataFormat[3],
670             pInfo->formatVersion[0]);
671         *pErrorCode = U_UNSUPPORTED_ERROR;
672         return 0;
673     }
674 
675     const uint8_t *inBytes = (const uint8_t *)inData + headerSize;
676     uint8_t *outBytes = (uint8_t *)outData + headerSize;
677 
678     const int32_t *inIndexes = (const int32_t *)inBytes;
679 
680     if (length >= 0) {
681         length -= headerSize;
682         if (length < 12 * 4) {
683             udata_printError(ds,
684                 "ulayout_swap(): too few bytes (%d after header) for text layout properties data\n",
685                 length);
686             *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
687             return 0;
688         }
689     }
690 
691     int32_t indexesLength = udata_readInt32(ds, inIndexes[ULAYOUT_IX_INDEXES_LENGTH]);
692     if (indexesLength < 12) {
693         udata_printError(ds,
694             "ulayout_swap(): too few indexes (%d) for text layout properties data\n",
695             indexesLength);
696         *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
697         return 0;
698     }
699 
700     // Read the data offsets before swapping anything.
701     int32_t indexes[ULAYOUT_IX_TRIES_TOP + 1];
702     for (int32_t i = ULAYOUT_IX_INPC_TRIE_TOP; i <= ULAYOUT_IX_TRIES_TOP; ++i) {
703         indexes[i] = udata_readInt32(ds, inIndexes[i]);
704     }
705     int32_t size = indexes[ULAYOUT_IX_TRIES_TOP];
706 
707     if (length >= 0) {
708         if (length < size) {
709             udata_printError(ds,
710                 "ulayout_swap(): too few bytes (%d after header) "
711                 "for all of text layout properties data\n",
712                 length);
713             *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
714             return 0;
715         }
716 
717         // Copy the data for inaccessible bytes.
718         if (inBytes != outBytes) {
719             uprv_memcpy(outBytes, inBytes, size);
720         }
721 
722         // Swap the int32_t indexes[].
723         int32_t offset = 0;
724         int32_t count = indexesLength * 4;
725         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
726         offset += count;
727 
728         // Swap each trie.
729         for (int32_t i = ULAYOUT_IX_INPC_TRIE_TOP; i <= ULAYOUT_IX_TRIES_TOP; ++i) {
730             int32_t top = indexes[i];
731             count = top - offset;
732             U_ASSERT(count >= 0);
733             if (count >= 16) {
734                 utrie_swapAnyVersion(ds, inBytes + offset, count, outBytes + offset, pErrorCode);
735             }
736             offset = top;
737         }
738 
739         U_ASSERT(offset == size);
740     }
741 
742     return headerSize + size;
743 }
744 
745 // Unicode emoji properties data swapping --------------------------------------
746 
747 static int32_t U_CALLCONV
uemoji_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)748 uemoji_swap(const UDataSwapper *ds,
749             const void *inData, int32_t length, void *outData,
750             UErrorCode *pErrorCode) {
751     // udata_swapDataHeader checks the arguments.
752     int32_t headerSize = udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
753     if (pErrorCode == nullptr || U_FAILURE(*pErrorCode)) {
754         return 0;
755     }
756 
757     // Check data format and format version.
758     const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData + 4);
759     if (!(
760             pInfo->dataFormat[0] == u'E' &&
761             pInfo->dataFormat[1] == u'm' &&
762             pInfo->dataFormat[2] == u'o' &&
763             pInfo->dataFormat[3] == u'j' &&
764             pInfo->formatVersion[0] == 1)) {
765         udata_printError(ds,
766             "uemoji_swap(): data format %02x.%02x.%02x.%02x (format version %02x) "
767             "is not recognized as emoji properties data\n",
768             pInfo->dataFormat[0], pInfo->dataFormat[1],
769             pInfo->dataFormat[2], pInfo->dataFormat[3],
770             pInfo->formatVersion[0]);
771         *pErrorCode = U_UNSUPPORTED_ERROR;
772         return 0;
773     }
774 
775     const uint8_t *inBytes = (const uint8_t *)inData + headerSize;
776     uint8_t *outBytes = (uint8_t *)outData + headerSize;
777 
778     const int32_t *inIndexes = (const int32_t *)inBytes;
779 
780     if (length >= 0) {
781         length -= headerSize;
782         // We expect to read at least EmojiProps::IX_TOTAL_SIZE.
783         if (length < 14 * 4) {
784             udata_printError(ds,
785                 "uemoji_swap(): too few bytes (%d after header) for emoji properties data\n",
786                 length);
787             *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
788             return 0;
789         }
790     }
791 
792     // First offset after indexes[].
793     int32_t cpTrieOffset = udata_readInt32(ds, inIndexes[EmojiProps::IX_CPTRIE_OFFSET]);
794     int32_t indexesLength = cpTrieOffset / 4;
795     if (indexesLength < 14) {
796         udata_printError(ds,
797             "uemoji_swap(): too few indexes (%d) for emoji properties data\n",
798             indexesLength);
799         *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
800         return 0;
801     }
802 
803     // Read the data offsets before swapping anything.
804     int32_t indexes[EmojiProps::IX_TOTAL_SIZE + 1];
805     indexes[0] = cpTrieOffset;
806     for (int32_t i = 1; i <= EmojiProps::IX_TOTAL_SIZE; ++i) {
807         indexes[i] = udata_readInt32(ds, inIndexes[i]);
808     }
809     int32_t size = indexes[EmojiProps::IX_TOTAL_SIZE];
810 
811     if (length >= 0) {
812         if (length < size) {
813             udata_printError(ds,
814                 "uemoji_swap(): too few bytes (%d after header) "
815                 "for all of emoji properties data\n",
816                 length);
817             *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
818             return 0;
819         }
820 
821         // Copy the data for inaccessible bytes.
822         if (inBytes != outBytes) {
823             uprv_memcpy(outBytes, inBytes, size);
824         }
825 
826         // Swap the int32_t indexes[].
827         int32_t offset = 0;
828         int32_t top = cpTrieOffset;
829         ds->swapArray32(ds, inBytes, top - offset, outBytes, pErrorCode);
830         offset = top;
831 
832         // Swap the code point trie.
833         top = indexes[EmojiProps::IX_CPTRIE_OFFSET + 1];
834         int32_t count = top - offset;
835         U_ASSERT(count >= 0);
836         if (count >= 16) {
837             utrie_swapAnyVersion(ds, inBytes + offset, count, outBytes + offset, pErrorCode);
838         }
839         offset = top;
840 
841         // Swap all of the string tries.
842         // They are all serialized as arrays of 16-bit units.
843         offset = indexes[EmojiProps::IX_BASIC_EMOJI_TRIE_OFFSET];
844         top = indexes[EmojiProps::IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET + 1];
845         ds->swapArray16(ds, inBytes + offset, top - offset, outBytes + offset, pErrorCode);
846         offset = top;
847 
848         U_ASSERT(offset == size);
849     }
850 
851     return headerSize + size;
852 }
853 
854 /* Swap 'Test' data from gentest */
855 static int32_t U_CALLCONV
test_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)856 test_swap(const UDataSwapper *ds,
857            const void *inData, int32_t length, void *outData,
858            UErrorCode *pErrorCode) {
859     const UDataInfo *pInfo;
860     int32_t headerSize;
861 
862     const uint8_t *inBytes;
863     uint8_t *outBytes;
864 
865     int32_t offset;
866 
867     /* udata_swapDataHeader checks the arguments */
868     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
869     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
870         udata_printError(ds, "test_swap(): data header swap failed %s\n", pErrorCode != NULL ? u_errorName(*pErrorCode) : "pErrorCode is NULL");
871         return 0;
872     }
873 
874     /* check data format and format version */
875     pInfo=(const UDataInfo *)((const char *)inData+4);
876     if(!(
877         pInfo->dataFormat[0]==0x54 &&   /* dataFormat="Norm" */
878         pInfo->dataFormat[1]==0x65 &&
879         pInfo->dataFormat[2]==0x73 &&
880         pInfo->dataFormat[3]==0x74 &&
881         pInfo->formatVersion[0]==1
882     )) {
883         udata_printError(ds, "test_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as testdata\n",
884                          pInfo->dataFormat[0], pInfo->dataFormat[1],
885                          pInfo->dataFormat[2], pInfo->dataFormat[3],
886                          pInfo->formatVersion[0]);
887         *pErrorCode=U_UNSUPPORTED_ERROR;
888         return 0;
889     }
890 
891     inBytes=(const uint8_t *)inData+headerSize;
892     outBytes=(uint8_t *)outData+headerSize;
893 
894     int32_t size16 = 2; // 16bit plus padding
895     int32_t sizeStr = 5; // 4 char inv-str plus null
896     int32_t size = size16 + sizeStr;
897 
898     if(length>=0) {
899         if(length<size) {
900             udata_printError(ds, "test_swap(): too few bytes (%d after header, wanted %d) for all of testdata\n",
901                              length, size);
902             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
903             return 0;
904         }
905 
906 	offset =0;
907 	/* swap a 1 entry array */
908         ds->swapArray16(ds, inBytes+offset, size16, outBytes+offset, pErrorCode);
909 	offset+=size16;
910 	ds->swapInvChars(ds, inBytes+offset, sizeStr, outBytes+offset, pErrorCode);
911     }
912 
913     return headerSize+size;
914 }
915 
916 /* swap any data (except a .dat package) ------------------------------------ */
917 
918 static const struct {
919     uint8_t dataFormat[4];
920     UDataSwapFn *swapFn;
921 } swapFns[]={
922     { { 0x52, 0x65, 0x73, 0x42 }, ures_swap },          /* dataFormat="ResB" */
923 #if !UCONFIG_NO_LEGACY_CONVERSION
924     { { 0x63, 0x6e, 0x76, 0x74 }, ucnv_swap },          /* dataFormat="cnvt" */
925 #endif
926 #if !UCONFIG_NO_CONVERSION
927     { { 0x43, 0x76, 0x41, 0x6c }, ucnv_swapAliases },   /* dataFormat="CvAl" */
928 #endif
929 #if !UCONFIG_NO_IDNA
930     { { 0x53, 0x50, 0x52, 0x50 }, usprep_swap },        /* dataFormat="SPRP" */
931 #endif
932     /* insert data formats here, descending by expected frequency of occurrence */
933     { { 0x55, 0x50, 0x72, 0x6f }, uprops_swap },        /* dataFormat="UPro" */
934 
935     { { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 },
936                                   ucase_swap },         /* dataFormat="cAsE" */
937 
938     { { UBIDI_FMT_0, UBIDI_FMT_1, UBIDI_FMT_2, UBIDI_FMT_3 },
939                                   ubidi_swap },         /* dataFormat="BiDi" */
940 
941 #if !UCONFIG_NO_NORMALIZATION
942     { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap },         /* dataFormat="Norm" */
943     { { 0x4e, 0x72, 0x6d, 0x32 }, unorm2_swap },        /* dataFormat="Nrm2" */
944 #endif
945 
946     { { ULAYOUT_FMT_0, ULAYOUT_FMT_1, ULAYOUT_FMT_2, ULAYOUT_FMT_3 },
947                                   ulayout_swap },       // dataFormat="Layo"
948 
949     { { u'E', u'm', u'o', u'j' }, uemoji_swap },
950 
951 #if !UCONFIG_NO_COLLATION
952     { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap },          /* dataFormat="UCol" */
953     { { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */
954 #endif
955 #if !UCONFIG_NO_BREAK_ITERATION
956     { { 0x42, 0x72, 0x6b, 0x20 }, ubrk_swap },          /* dataFormat="Brk " */
957     { { 0x44, 0x69, 0x63, 0x74 }, udict_swap },         /* dataFormat="Dict" */
958 #endif
959     { { 0x70, 0x6e, 0x61, 0x6d }, upname_swap },        /* dataFormat="pnam" */
960     { { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames },    /* dataFormat="unam" */
961 #if !UCONFIG_NO_NORMALIZATION
962     { { 0x43, 0x66, 0x75, 0x20 }, uspoof_swap },         /* dataFormat="Cfu " */
963 #endif
964     { { 0x54, 0x65, 0x73, 0x74 }, test_swap }            /* dataFormat="Test" */
965 };
966 
967 U_CAPI int32_t U_EXPORT2
udata_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)968 udata_swap(const UDataSwapper *ds,
969            const void *inData, int32_t length, void *outData,
970            UErrorCode *pErrorCode) {
971     char dataFormatChars[4];
972     const UDataInfo *pInfo;
973     int32_t i, swappedLength;
974 
975     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
976         return 0;
977     }
978 
979     /*
980      * Preflight the header first; checks for illegal arguments, too.
981      * Do not swap the header right away because the format-specific swapper
982      * will swap it, get the headerSize again, and also use the header
983      * information. Otherwise we would have to pass some of the information
984      * and not be able to use the UDataSwapFn signature.
985      */
986     udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode);
987 
988     /*
989      * If we wanted udata_swap() to also handle non-loadable data like a UTrie,
990      * then we could check here for further known magic values and structures.
991      */
992     if(U_FAILURE(*pErrorCode)) {
993         return 0; /* the data format was not recognized */
994     }
995 
996     pInfo=(const UDataInfo *)((const char *)inData+4);
997 
998     {
999         /* convert the data format from ASCII to Unicode to the system charset */
1000         UChar u[4]={
1001              pInfo->dataFormat[0], pInfo->dataFormat[1],
1002              pInfo->dataFormat[2], pInfo->dataFormat[3]
1003         };
1004 
1005         if(uprv_isInvariantUString(u, 4)) {
1006             u_UCharsToChars(u, dataFormatChars, 4);
1007         } else {
1008             dataFormatChars[0]=dataFormatChars[1]=dataFormatChars[2]=dataFormatChars[3]='?';
1009         }
1010     }
1011 
1012     /* dispatch to the swap function for the dataFormat */
1013     for(i=0; i<UPRV_LENGTHOF(swapFns); ++i) {
1014         if(0==memcmp(swapFns[i].dataFormat, pInfo->dataFormat, 4)) {
1015             swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode);
1016 
1017             if(U_FAILURE(*pErrorCode)) {
1018                 udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n",
1019                                  pInfo->dataFormat[0], pInfo->dataFormat[1],
1020                                  pInfo->dataFormat[2], pInfo->dataFormat[3],
1021                                  dataFormatChars[0], dataFormatChars[1],
1022                                  dataFormatChars[2], dataFormatChars[3],
1023                                  u_errorName(*pErrorCode));
1024             } else if(swappedLength<(length-15)) {
1025                 /* swapped less than expected */
1026                 udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
1027                                  swappedLength, length,
1028                                  pInfo->dataFormat[0], pInfo->dataFormat[1],
1029                                  pInfo->dataFormat[2], pInfo->dataFormat[3],
1030                                  dataFormatChars[0], dataFormatChars[1],
1031                                  dataFormatChars[2], dataFormatChars[3],
1032                                  u_errorName(*pErrorCode));
1033             }
1034 
1035             return swappedLength;
1036         }
1037     }
1038 
1039     /* the dataFormat was not recognized */
1040     udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
1041                      pInfo->dataFormat[0], pInfo->dataFormat[1],
1042                      pInfo->dataFormat[2], pInfo->dataFormat[3],
1043                      dataFormatChars[0], dataFormatChars[1],
1044                      dataFormatChars[2], dataFormatChars[3]);
1045 
1046     *pErrorCode=U_UNSUPPORTED_ERROR;
1047     return 0;
1048 }
1049