1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 // utrie_swap.cpp
5 // created: 2018aug08 Markus W. Scherer
6 
7 #include "unicode/utypes.h"
8 #include "cmemory.h"
9 #include "ucptrie_impl.h"
10 #include "udataswp.h"
11 #include "utrie.h"
12 #include "utrie2_impl.h"
13 
14 // These functions for swapping different generations of ICU code point tries are here
15 // so that their implementation files need not depend on swapper code,
16 // need not depend on each other, and so that other swapper code
17 // need not depend on other trie code.
18 
19 namespace {
20 
21 constexpr int32_t ASCII_LIMIT = 0x80;
22 
23 }  // namespace
24 
25 U_CAPI int32_t U_EXPORT2
utrie_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)26 utrie_swap(const UDataSwapper *ds,
27            const void *inData, int32_t length, void *outData,
28            UErrorCode *pErrorCode) {
29     const UTrieHeader *inTrie;
30     UTrieHeader trie;
31     int32_t size;
32     UBool dataIs32;
33 
34     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
35         return 0;
36     }
37     if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
38         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
39         return 0;
40     }
41 
42     /* setup and swapping */
43     if(length>=0 && (uint32_t)length<sizeof(UTrieHeader)) {
44         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
45         return 0;
46     }
47 
48     inTrie=(const UTrieHeader *)inData;
49     trie.signature=ds->readUInt32(inTrie->signature);
50     trie.options=ds->readUInt32(inTrie->options);
51     trie.indexLength=udata_readInt32(ds, inTrie->indexLength);
52     trie.dataLength=udata_readInt32(ds, inTrie->dataLength);
53 
54     if( trie.signature!=0x54726965 ||
55         (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT ||
56         ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT ||
57         trie.indexLength<UTRIE_BMP_INDEX_LENGTH ||
58         (trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 ||
59         trie.dataLength<UTRIE_DATA_BLOCK_LENGTH ||
60         (trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 ||
61         ((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100))
62     ) {
63         *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
64         return 0;
65     }
66 
67     dataIs32=(UBool)((trie.options&UTRIE_OPTIONS_DATA_IS_32_BIT)!=0);
68     size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2);
69 
70     if(length>=0) {
71         UTrieHeader *outTrie;
72 
73         if(length<size) {
74             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
75             return 0;
76         }
77 
78         outTrie=(UTrieHeader *)outData;
79 
80         /* swap the header */
81         ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode);
82 
83         /* swap the index and the data */
84         if(dataIs32) {
85             ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
86             ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4,
87                                      (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
88         } else {
89             ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode);
90         }
91     }
92 
93     return size;
94 }
95 
96 U_CAPI int32_t U_EXPORT2
utrie2_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)97 utrie2_swap(const UDataSwapper *ds,
98             const void *inData, int32_t length, void *outData,
99             UErrorCode *pErrorCode) {
100     const UTrie2Header *inTrie;
101     UTrie2Header trie;
102     int32_t dataLength, size;
103     UTrie2ValueBits valueBits;
104 
105     if(U_FAILURE(*pErrorCode)) {
106         return 0;
107     }
108     if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
109         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
110         return 0;
111     }
112 
113     /* setup and swapping */
114     if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) {
115         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
116         return 0;
117     }
118 
119     inTrie=(const UTrie2Header *)inData;
120     trie.signature=ds->readUInt32(inTrie->signature);
121     trie.options=ds->readUInt16(inTrie->options);
122     trie.indexLength=ds->readUInt16(inTrie->indexLength);
123     trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength);
124 
125     valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK);
126     dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT;
127 
128     if( trie.signature!=UTRIE2_SIG ||
129         valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits ||
130         trie.indexLength<UTRIE2_INDEX_1_OFFSET ||
131         dataLength<UTRIE2_DATA_START_OFFSET
132     ) {
133         *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
134         return 0;
135     }
136 
137     size=sizeof(UTrie2Header)+trie.indexLength*2;
138     switch(valueBits) {
139     case UTRIE2_16_VALUE_BITS:
140         size+=dataLength*2;
141         break;
142     case UTRIE2_32_VALUE_BITS:
143         size+=dataLength*4;
144         break;
145     default:
146         *pErrorCode=U_INVALID_FORMAT_ERROR;
147         return 0;
148     }
149 
150     if(length>=0) {
151         UTrie2Header *outTrie;
152 
153         if(length<size) {
154             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
155             return 0;
156         }
157 
158         outTrie=(UTrie2Header *)outData;
159 
160         /* swap the header */
161         ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode);
162         ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode);
163 
164         /* swap the index and the data */
165         switch(valueBits) {
166         case UTRIE2_16_VALUE_BITS:
167             ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode);
168             break;
169         case UTRIE2_32_VALUE_BITS:
170             ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
171             ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4,
172                                      (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
173             break;
174         default:
175             *pErrorCode=U_INVALID_FORMAT_ERROR;
176             return 0;
177         }
178     }
179 
180     return size;
181 }
182 
183 U_CAPI int32_t U_EXPORT2
ucptrie_swap(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)184 ucptrie_swap(const UDataSwapper *ds,
185              const void *inData, int32_t length, void *outData,
186              UErrorCode *pErrorCode) {
187     const UCPTrieHeader *inTrie;
188     UCPTrieHeader trie;
189     int32_t dataLength, size;
190     UCPTrieValueWidth valueWidth;
191 
192     if(U_FAILURE(*pErrorCode)) {
193         return 0;
194     }
195     if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) {
196         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
197         return 0;
198     }
199 
200     /* setup and swapping */
201     if(length>=0 && length<(int32_t)sizeof(UCPTrieHeader)) {
202         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
203         return 0;
204     }
205 
206     inTrie=(const UCPTrieHeader *)inData;
207     trie.signature=ds->readUInt32(inTrie->signature);
208     trie.options=ds->readUInt16(inTrie->options);
209     trie.indexLength=ds->readUInt16(inTrie->indexLength);
210     trie.dataLength = ds->readUInt16(inTrie->dataLength);
211 
212     UCPTrieType type = (UCPTrieType)((trie.options >> 6) & 3);
213     valueWidth = (UCPTrieValueWidth)(trie.options & UCPTRIE_OPTIONS_VALUE_BITS_MASK);
214     dataLength = ((int32_t)(trie.options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | trie.dataLength;
215 
216     int32_t minIndexLength = type == UCPTRIE_TYPE_FAST ?
217         UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH;
218     if( trie.signature!=UCPTRIE_SIG ||
219         type > UCPTRIE_TYPE_SMALL ||
220         (trie.options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0 ||
221         valueWidth > UCPTRIE_VALUE_BITS_8 ||
222         trie.indexLength < minIndexLength ||
223         dataLength < ASCII_LIMIT
224     ) {
225         *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UCPTrie */
226         return 0;
227     }
228 
229     size=sizeof(UCPTrieHeader)+trie.indexLength*2;
230     switch(valueWidth) {
231     case UCPTRIE_VALUE_BITS_16:
232         size+=dataLength*2;
233         break;
234     case UCPTRIE_VALUE_BITS_32:
235         size+=dataLength*4;
236         break;
237     case UCPTRIE_VALUE_BITS_8:
238         size+=dataLength;
239         break;
240     default:
241         *pErrorCode=U_INVALID_FORMAT_ERROR;
242         return 0;
243     }
244 
245     if(length>=0) {
246         UCPTrieHeader *outTrie;
247 
248         if(length<size) {
249             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
250             return 0;
251         }
252 
253         outTrie=(UCPTrieHeader *)outData;
254 
255         /* swap the header */
256         ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode);
257         ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode);
258 
259         /* swap the index */
260         const uint16_t *inIndex=reinterpret_cast<const uint16_t *>(inTrie+1);
261         uint16_t *outIndex=reinterpret_cast<uint16_t *>(outTrie+1);
262         ds->swapArray16(ds, inIndex, trie.indexLength*2, outIndex, pErrorCode);
263 
264         /* swap the data */
265         const uint16_t *inData=inIndex+trie.indexLength;
266         uint16_t *outData=outIndex+trie.indexLength;
267         switch(valueWidth) {
268         case UCPTRIE_VALUE_BITS_16:
269             ds->swapArray16(ds, inData, dataLength*2, outData, pErrorCode);
270             break;
271         case UCPTRIE_VALUE_BITS_32:
272             ds->swapArray32(ds, inData, dataLength*4, outData, pErrorCode);
273             break;
274         case UCPTRIE_VALUE_BITS_8:
275             if(inTrie!=outTrie) {
276                 uprv_memmove(outData, inData, dataLength);
277             }
278             break;
279         default:
280             *pErrorCode=U_INVALID_FORMAT_ERROR;
281             return 0;
282         }
283     }
284 
285     return size;
286 }
287 
288 namespace {
289 
290 /**
291  * Gets the trie version from 32-bit-aligned memory containing the serialized form
292  * of a UTrie (version 1), a UTrie2 (version 2), or a UCPTrie (version 3).
293  *
294  * @param data a pointer to 32-bit-aligned memory containing the serialized form of a trie
295  * @param length the number of bytes available at data;
296  *               can be more than necessary (see return value)
297  * @param anyEndianOk If FALSE, only platform-endian serialized forms are recognized.
298  *                    If TRUE, opposite-endian serialized forms are recognized as well.
299  * @return the trie version of the serialized form, or 0 if it is not
300  *         recognized as a serialized trie
301  */
302 int32_t
getVersion(const void * data,int32_t length,UBool anyEndianOk)303 getVersion(const void *data, int32_t length, UBool anyEndianOk) {
304     uint32_t signature;
305     if(length<16 || data==nullptr || (U_POINTER_MASK_LSB(data, 3)!=0)) {
306         return 0;
307     }
308     signature=*(const uint32_t *)data;
309     if(signature==UCPTRIE_SIG) {
310         return 3;
311     }
312     if(anyEndianOk && signature==UCPTRIE_OE_SIG) {
313         return 3;
314     }
315     if(signature==UTRIE2_SIG) {
316         return 2;
317     }
318     if(anyEndianOk && signature==UTRIE2_OE_SIG) {
319         return 2;
320     }
321     if(signature==UTRIE_SIG) {
322         return 1;
323     }
324     if(anyEndianOk && signature==UTRIE_OE_SIG) {
325         return 1;
326     }
327     return 0;
328 }
329 
330 }  // namespace
331 
332 U_CAPI int32_t U_EXPORT2
utrie_swapAnyVersion(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)333 utrie_swapAnyVersion(const UDataSwapper *ds,
334                      const void *inData, int32_t length, void *outData,
335                      UErrorCode *pErrorCode) {
336     if(U_FAILURE(*pErrorCode)) { return 0; }
337     switch(getVersion(inData, length, TRUE)) {
338     case 1:
339         return utrie_swap(ds, inData, length, outData, pErrorCode);
340     case 2:
341         return utrie2_swap(ds, inData, length, outData, pErrorCode);
342     case 3:
343         return ucptrie_swap(ds, inData, length, outData, pErrorCode);
344     default:
345         *pErrorCode=U_INVALID_FORMAT_ERROR;
346         return 0;
347     }
348 }
349