1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2003-2014, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  udataswp.c
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2003jun05
16 *   created by: Markus W. Scherer
17 *
18 *   Definitions for ICU data transformations for different platforms,
19 *   changing between big- and little-endian data and/or between
20 *   charset families (ASCII<->EBCDIC).
21 */
22 
23 #include <stdarg.h>
24 #include "unicode/utypes.h"
25 #include "unicode/udata.h" /* UDataInfo */
26 #include "ucmndata.h" /* DataHeader */
27 #include "cmemory.h"
28 #include "udataswp.h"
29 
30 /* swapping primitives ------------------------------------------------------ */
31 
32 static int32_t U_CALLCONV
uprv_swapArray16(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)33 uprv_swapArray16(const UDataSwapper *ds,
34                  const void *inData, int32_t length, void *outData,
35                  UErrorCode *pErrorCode) {
36     const uint16_t *p;
37     uint16_t *q;
38     int32_t count;
39     uint16_t x;
40 
41     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
42         return 0;
43     }
44     if(ds==NULL || inData==NULL || length<0 || (length&1)!=0 || outData==NULL) {
45         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
46         return 0;
47     }
48 
49     /* setup and swapping */
50     p=(const uint16_t *)inData;
51     q=(uint16_t *)outData;
52     count=length/2;
53     while(count>0) {
54         x=*p++;
55         *q++=(uint16_t)((x<<8)|(x>>8));
56         --count;
57     }
58 
59     return length;
60 }
61 
62 static int32_t U_CALLCONV
uprv_copyArray16(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)63 uprv_copyArray16(const UDataSwapper *ds,
64                  const void *inData, int32_t length, void *outData,
65                  UErrorCode *pErrorCode) {
66     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
67         return 0;
68     }
69     if(ds==NULL || inData==NULL || length<0 || (length&1)!=0 || outData==NULL) {
70         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
71         return 0;
72     }
73 
74     if(length>0 && inData!=outData) {
75         uprv_memcpy(outData, inData, length);
76     }
77     return length;
78 }
79 
80 static int32_t U_CALLCONV
uprv_swapArray32(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)81 uprv_swapArray32(const UDataSwapper *ds,
82                  const void *inData, int32_t length, void *outData,
83                  UErrorCode *pErrorCode) {
84     const uint32_t *p;
85     uint32_t *q;
86     int32_t count;
87     uint32_t x;
88 
89     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
90         return 0;
91     }
92     if(ds==NULL || inData==NULL || length<0 || (length&3)!=0 || outData==NULL) {
93         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
94         return 0;
95     }
96 
97     /* setup and swapping */
98     p=(const uint32_t *)inData;
99     q=(uint32_t *)outData;
100     count=length/4;
101     while(count>0) {
102         x=*p++;
103         *q++=(uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24));
104         --count;
105     }
106 
107     return length;
108 }
109 
110 static int32_t U_CALLCONV
uprv_copyArray32(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)111 uprv_copyArray32(const UDataSwapper *ds,
112                  const void *inData, int32_t length, void *outData,
113                  UErrorCode *pErrorCode) {
114     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
115         return 0;
116     }
117     if(ds==NULL || inData==NULL || length<0 || (length&3)!=0 || outData==NULL) {
118         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
119         return 0;
120     }
121 
122     if(length>0 && inData!=outData) {
123         uprv_memcpy(outData, inData, length);
124     }
125     return length;
126 }
127 
128 static int32_t U_CALLCONV
uprv_swapArray64(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)129 uprv_swapArray64(const UDataSwapper *ds,
130                  const void *inData, int32_t length, void *outData,
131                  UErrorCode *pErrorCode) {
132     const uint64_t *p;
133     uint64_t *q;
134     int32_t count;
135 
136     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
137         return 0;
138     }
139     if(ds==NULL || inData==NULL || length<0 || (length&7)!=0 || outData==NULL) {
140         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
141         return 0;
142     }
143 
144     /* setup and swapping */
145     p=(const uint64_t *)inData;
146     q=(uint64_t *)outData;
147     count=length/8;
148     while(count>0) {
149         uint64_t x=*p++;
150         x=(x<<56)|((x&0xff00)<<40)|((x&0xff0000)<<24)|((x&0xff000000)<<8)|
151             ((x>>8)&0xff000000)|((x>>24)&0xff0000)|((x>>40)&0xff00)|(x>>56);
152         *q++=x;
153         --count;
154     }
155 
156     return length;
157 }
158 
159 static int32_t U_CALLCONV
uprv_copyArray64(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)160 uprv_copyArray64(const UDataSwapper *ds,
161                  const void *inData, int32_t length, void *outData,
162                  UErrorCode *pErrorCode) {
163     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
164         return 0;
165     }
166     if(ds==NULL || inData==NULL || length<0 || (length&7)!=0 || outData==NULL) {
167         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
168         return 0;
169     }
170 
171     if(length>0 && inData!=outData) {
172         uprv_memcpy(outData, inData, length);
173     }
174     return length;
175 }
176 
177 static uint16_t U_CALLCONV
uprv_readSwapUInt16(uint16_t x)178 uprv_readSwapUInt16(uint16_t x) {
179     return (uint16_t)((x<<8)|(x>>8));
180 }
181 
182 static uint16_t U_CALLCONV
uprv_readDirectUInt16(uint16_t x)183 uprv_readDirectUInt16(uint16_t x) {
184     return x;
185 }
186 
187 static uint32_t U_CALLCONV
uprv_readSwapUInt32(uint32_t x)188 uprv_readSwapUInt32(uint32_t x) {
189     return (uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24));
190 }
191 
192 static uint32_t U_CALLCONV
uprv_readDirectUInt32(uint32_t x)193 uprv_readDirectUInt32(uint32_t x) {
194     return x;
195 }
196 
197 static void U_CALLCONV
uprv_writeSwapUInt16(uint16_t * p,uint16_t x)198 uprv_writeSwapUInt16(uint16_t *p, uint16_t x) {
199     *p=(uint16_t)((x<<8)|(x>>8));
200 }
201 
202 static void U_CALLCONV
uprv_writeDirectUInt16(uint16_t * p,uint16_t x)203 uprv_writeDirectUInt16(uint16_t *p, uint16_t x) {
204     *p=x;
205 }
206 
207 static void U_CALLCONV
uprv_writeSwapUInt32(uint32_t * p,uint32_t x)208 uprv_writeSwapUInt32(uint32_t *p, uint32_t x) {
209     *p=(uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24));
210 }
211 
212 static void U_CALLCONV
uprv_writeDirectUInt32(uint32_t * p,uint32_t x)213 uprv_writeDirectUInt32(uint32_t *p, uint32_t x) {
214     *p=x;
215 }
216 
217 U_CAPI int16_t U_EXPORT2
udata_readInt16(const UDataSwapper * ds,int16_t x)218 udata_readInt16(const UDataSwapper *ds, int16_t x) {
219     return (int16_t)ds->readUInt16((uint16_t)x);
220 }
221 
222 U_CAPI int32_t U_EXPORT2
udata_readInt32(const UDataSwapper * ds,int32_t x)223 udata_readInt32(const UDataSwapper *ds, int32_t x) {
224     return (int32_t)ds->readUInt32((uint32_t)x);
225 }
226 
227 /**
228  * Swap a block of invariant, NUL-terminated strings, but not padding
229  * bytes after the last string.
230  * @internal
231  */
232 U_CAPI int32_t U_EXPORT2
udata_swapInvStringBlock(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)233 udata_swapInvStringBlock(const UDataSwapper *ds,
234                          const void *inData, int32_t length, void *outData,
235                          UErrorCode *pErrorCode) {
236     const char *inChars;
237     int32_t stringsLength;
238 
239     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
240         return 0;
241     }
242     if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
243         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
244         return 0;
245     }
246 
247     /* reduce the strings length to not include bytes after the last NUL */
248     inChars=(const char *)inData;
249     stringsLength=length;
250     while(stringsLength>0 && inChars[stringsLength-1]!=0) {
251         --stringsLength;
252     }
253 
254     /* swap up to the last NUL */
255     ds->swapInvChars(ds, inData, stringsLength, outData, pErrorCode);
256 
257     /* copy the bytes after the last NUL */
258     if(inData!=outData && length>stringsLength) {
259         uprv_memcpy((char *)outData+stringsLength, inChars+stringsLength, length-stringsLength);
260     }
261 
262     /* return the length including padding bytes */
263     if(U_SUCCESS(*pErrorCode)) {
264         return length;
265     } else {
266         return 0;
267     }
268 }
269 
270 U_CAPI void U_EXPORT2
udata_printError(const UDataSwapper * ds,const char * fmt,...)271 udata_printError(const UDataSwapper *ds,
272                  const char *fmt,
273                  ...) {
274     va_list args;
275 
276     if(ds->printError!=NULL) {
277         va_start(args, fmt);
278         ds->printError(ds->printErrorContext, fmt, args);
279         va_end(args);
280     }
281 }
282 
283 /* swap a data header ------------------------------------------------------- */
284 
285 U_CAPI int32_t U_EXPORT2
udata_swapDataHeader(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)286 udata_swapDataHeader(const UDataSwapper *ds,
287                      const void *inData, int32_t length, void *outData,
288                      UErrorCode *pErrorCode) {
289     const DataHeader *pHeader;
290     uint16_t headerSize, infoSize;
291 
292     /* argument checking */
293     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
294         return 0;
295     }
296     if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {
297         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
298         return 0;
299     }
300 
301     /* check minimum length and magic bytes */
302     pHeader=(const DataHeader *)inData;
303     if( (length>=0 && length<(int32_t)sizeof(DataHeader)) ||
304         pHeader->dataHeader.magic1!=0xda ||
305         pHeader->dataHeader.magic2!=0x27 ||
306         pHeader->info.sizeofUChar!=2
307     ) {
308         udata_printError(ds, "udata_swapDataHeader(): initial bytes do not look like ICU data\n");
309         *pErrorCode=U_UNSUPPORTED_ERROR;
310         return 0;
311     }
312 
313     headerSize=ds->readUInt16(pHeader->dataHeader.headerSize);
314     infoSize=ds->readUInt16(pHeader->info.size);
315 
316     if( headerSize<sizeof(DataHeader) ||
317         infoSize<sizeof(UDataInfo) ||
318         headerSize<(sizeof(pHeader->dataHeader)+infoSize) ||
319         (length>=0 && length<headerSize)
320     ) {
321         udata_printError(ds, "udata_swapDataHeader(): header size mismatch - headerSize %d infoSize %d length %d\n",
322                          headerSize, infoSize, length);
323         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
324         return 0;
325     }
326 
327     if(length>0) {
328         DataHeader *outHeader;
329         const char *s;
330         int32_t maxLength;
331 
332         /* Most of the fields are just bytes and need no swapping. */
333         if(inData!=outData) {
334             uprv_memcpy(outData, inData, headerSize);
335         }
336         outHeader=(DataHeader *)outData;
337 
338         outHeader->info.isBigEndian = ds->outIsBigEndian;
339         outHeader->info.charsetFamily = ds->outCharset;
340 
341         /* swap headerSize */
342         ds->swapArray16(ds, &pHeader->dataHeader.headerSize, 2, &outHeader->dataHeader.headerSize, pErrorCode);
343 
344         /* swap UDataInfo size and reservedWord */
345         ds->swapArray16(ds, &pHeader->info.size, 4, &outHeader->info.size, pErrorCode);
346 
347         /* swap copyright statement after the UDataInfo */
348         infoSize+=sizeof(pHeader->dataHeader);
349         s=(const char *)inData+infoSize;
350         maxLength=headerSize-infoSize;
351         /* get the length of the string */
352         for(length=0; length<maxLength && s[length]!=0; ++length) {}
353         /* swap the string contents */
354         ds->swapInvChars(ds, s, length, (char *)outData+infoSize, pErrorCode);
355     }
356 
357     return headerSize;
358 }
359 
360 /* API functions ------------------------------------------------------------ */
361 
362 U_CAPI UDataSwapper * U_EXPORT2
udata_openSwapper(UBool inIsBigEndian,uint8_t inCharset,UBool outIsBigEndian,uint8_t outCharset,UErrorCode * pErrorCode)363 udata_openSwapper(UBool inIsBigEndian, uint8_t inCharset,
364                   UBool outIsBigEndian, uint8_t outCharset,
365                   UErrorCode *pErrorCode) {
366     UDataSwapper *swapper;
367 
368     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
369         return NULL;
370     }
371     if(inCharset>U_EBCDIC_FAMILY || outCharset>U_EBCDIC_FAMILY) {
372         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
373         return NULL;
374     }
375 
376     /* allocate the swapper */
377     swapper=(UDataSwapper *)uprv_malloc(sizeof(UDataSwapper));
378     if(swapper==NULL) {
379         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
380         return NULL;
381     }
382     uprv_memset(swapper, 0, sizeof(UDataSwapper));
383 
384     /* set values and functions pointers according to in/out parameters */
385     swapper->inIsBigEndian=inIsBigEndian;
386     swapper->inCharset=inCharset;
387     swapper->outIsBigEndian=outIsBigEndian;
388     swapper->outCharset=outCharset;
389 
390     swapper->readUInt16= inIsBigEndian==U_IS_BIG_ENDIAN ? uprv_readDirectUInt16 : uprv_readSwapUInt16;
391     swapper->readUInt32= inIsBigEndian==U_IS_BIG_ENDIAN ? uprv_readDirectUInt32 : uprv_readSwapUInt32;
392 
393     swapper->writeUInt16= outIsBigEndian==U_IS_BIG_ENDIAN ? uprv_writeDirectUInt16 : uprv_writeSwapUInt16;
394     swapper->writeUInt32= outIsBigEndian==U_IS_BIG_ENDIAN ? uprv_writeDirectUInt32 : uprv_writeSwapUInt32;
395 
396     swapper->compareInvChars= outCharset==U_ASCII_FAMILY ? uprv_compareInvAscii : uprv_compareInvEbcdic;
397 
398     if(inIsBigEndian==outIsBigEndian) {
399         swapper->swapArray16=uprv_copyArray16;
400         swapper->swapArray32=uprv_copyArray32;
401         swapper->swapArray64=uprv_copyArray64;
402     } else {
403         swapper->swapArray16=uprv_swapArray16;
404         swapper->swapArray32=uprv_swapArray32;
405         swapper->swapArray64=uprv_swapArray64;
406     }
407 
408     if(inCharset==U_ASCII_FAMILY) {
409         swapper->swapInvChars= outCharset==U_ASCII_FAMILY ? uprv_copyAscii : uprv_ebcdicFromAscii;
410     } else /* U_EBCDIC_FAMILY */ {
411         swapper->swapInvChars= outCharset==U_EBCDIC_FAMILY ? uprv_copyEbcdic : uprv_asciiFromEbcdic;
412     }
413 
414     return swapper;
415 }
416 
417 U_CAPI UDataSwapper * U_EXPORT2
udata_openSwapperForInputData(const void * data,int32_t length,UBool outIsBigEndian,uint8_t outCharset,UErrorCode * pErrorCode)418 udata_openSwapperForInputData(const void *data, int32_t length,
419                               UBool outIsBigEndian, uint8_t outCharset,
420                               UErrorCode *pErrorCode) {
421     const DataHeader *pHeader;
422     uint16_t headerSize, infoSize;
423     UBool inIsBigEndian;
424     int8_t inCharset;
425 
426     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
427         return NULL;
428     }
429     if( data==NULL ||
430         (length>=0 && length<(int32_t)sizeof(DataHeader)) ||
431         outCharset>U_EBCDIC_FAMILY
432     ) {
433         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
434         return NULL;
435     }
436 
437     pHeader=(const DataHeader *)data;
438     if( (length>=0 && length<(int32_t)sizeof(DataHeader)) ||
439         pHeader->dataHeader.magic1!=0xda ||
440         pHeader->dataHeader.magic2!=0x27 ||
441         pHeader->info.sizeofUChar!=2
442     ) {
443         *pErrorCode=U_UNSUPPORTED_ERROR;
444         return 0;
445     }
446 
447     inIsBigEndian=(UBool)pHeader->info.isBigEndian;
448     inCharset=pHeader->info.charsetFamily;
449 
450     if(inIsBigEndian==U_IS_BIG_ENDIAN) {
451         headerSize=pHeader->dataHeader.headerSize;
452         infoSize=pHeader->info.size;
453     } else {
454         headerSize=uprv_readSwapUInt16(pHeader->dataHeader.headerSize);
455         infoSize=uprv_readSwapUInt16(pHeader->info.size);
456     }
457 
458     if( headerSize<sizeof(DataHeader) ||
459         infoSize<sizeof(UDataInfo) ||
460         headerSize<(sizeof(pHeader->dataHeader)+infoSize) ||
461         (length>=0 && length<headerSize)
462     ) {
463         *pErrorCode=U_UNSUPPORTED_ERROR;
464         return 0;
465     }
466 
467     return udata_openSwapper(inIsBigEndian, inCharset, outIsBigEndian, outCharset, pErrorCode);
468 }
469 
470 U_CAPI void U_EXPORT2
udata_closeSwapper(UDataSwapper * ds)471 udata_closeSwapper(UDataSwapper *ds) {
472     uprv_free(ds);
473 }
474