1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2003-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: udataswp.c
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2003jun05
16 * created by: Markus W. Scherer
17 *
18 * Definitions for ICU data transformations for different platforms,
19 * changing between big- and little-endian data and/or between
20 * charset families (ASCII<->EBCDIC).
21 */
22
23 #include <stdarg.h>
24 #include "unicode/utypes.h"
25 #include "unicode/udata.h" /* UDataInfo */
26 #include "ucmndata.h" /* DataHeader */
27 #include "cmemory.h"
28 #include "udataswp.h"
29
30 /* swapping primitives ------------------------------------------------------ */
31
32 static int32_t U_CALLCONV
uprv_swapArray16(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)33 uprv_swapArray16(const UDataSwapper *ds,
34 const void *inData, int32_t length, void *outData,
35 UErrorCode *pErrorCode) {
36 const uint16_t *p;
37 uint16_t *q;
38 int32_t count;
39 uint16_t x;
40
41 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
42 return 0;
43 }
44 if(ds==NULL || inData==NULL || length<0 || (length&1)!=0 || outData==NULL) {
45 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
46 return 0;
47 }
48
49 /* setup and swapping */
50 p=(const uint16_t *)inData;
51 q=(uint16_t *)outData;
52 count=length/2;
53 while(count>0) {
54 x=*p++;
55 *q++=(uint16_t)((x<<8)|(x>>8));
56 --count;
57 }
58
59 return length;
60 }
61
62 static int32_t U_CALLCONV
uprv_copyArray16(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)63 uprv_copyArray16(const UDataSwapper *ds,
64 const void *inData, int32_t length, void *outData,
65 UErrorCode *pErrorCode) {
66 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
67 return 0;
68 }
69 if(ds==NULL || inData==NULL || length<0 || (length&1)!=0 || outData==NULL) {
70 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
71 return 0;
72 }
73
74 if(length>0 && inData!=outData) {
75 uprv_memcpy(outData, inData, length);
76 }
77 return length;
78 }
79
80 static int32_t U_CALLCONV
uprv_swapArray32(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)81 uprv_swapArray32(const UDataSwapper *ds,
82 const void *inData, int32_t length, void *outData,
83 UErrorCode *pErrorCode) {
84 const uint32_t *p;
85 uint32_t *q;
86 int32_t count;
87 uint32_t x;
88
89 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
90 return 0;
91 }
92 if(ds==NULL || inData==NULL || length<0 || (length&3)!=0 || outData==NULL) {
93 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
94 return 0;
95 }
96
97 /* setup and swapping */
98 p=(const uint32_t *)inData;
99 q=(uint32_t *)outData;
100 count=length/4;
101 while(count>0) {
102 x=*p++;
103 *q++=(uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24));
104 --count;
105 }
106
107 return length;
108 }
109
110 static int32_t U_CALLCONV
uprv_copyArray32(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)111 uprv_copyArray32(const UDataSwapper *ds,
112 const void *inData, int32_t length, void *outData,
113 UErrorCode *pErrorCode) {
114 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
115 return 0;
116 }
117 if(ds==NULL || inData==NULL || length<0 || (length&3)!=0 || outData==NULL) {
118 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
119 return 0;
120 }
121
122 if(length>0 && inData!=outData) {
123 uprv_memcpy(outData, inData, length);
124 }
125 return length;
126 }
127
128 static int32_t U_CALLCONV
uprv_swapArray64(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)129 uprv_swapArray64(const UDataSwapper *ds,
130 const void *inData, int32_t length, void *outData,
131 UErrorCode *pErrorCode) {
132 const uint64_t *p;
133 uint64_t *q;
134 int32_t count;
135
136 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
137 return 0;
138 }
139 if(ds==NULL || inData==NULL || length<0 || (length&7)!=0 || outData==NULL) {
140 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
141 return 0;
142 }
143
144 /* setup and swapping */
145 p=(const uint64_t *)inData;
146 q=(uint64_t *)outData;
147 count=length/8;
148 while(count>0) {
149 uint64_t x=*p++;
150 x=(x<<56)|((x&0xff00)<<40)|((x&0xff0000)<<24)|((x&0xff000000)<<8)|
151 ((x>>8)&0xff000000)|((x>>24)&0xff0000)|((x>>40)&0xff00)|(x>>56);
152 *q++=x;
153 --count;
154 }
155
156 return length;
157 }
158
159 static int32_t U_CALLCONV
uprv_copyArray64(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)160 uprv_copyArray64(const UDataSwapper *ds,
161 const void *inData, int32_t length, void *outData,
162 UErrorCode *pErrorCode) {
163 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
164 return 0;
165 }
166 if(ds==NULL || inData==NULL || length<0 || (length&7)!=0 || outData==NULL) {
167 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
168 return 0;
169 }
170
171 if(length>0 && inData!=outData) {
172 uprv_memcpy(outData, inData, length);
173 }
174 return length;
175 }
176
177 static uint16_t U_CALLCONV
uprv_readSwapUInt16(uint16_t x)178 uprv_readSwapUInt16(uint16_t x) {
179 return (uint16_t)((x<<8)|(x>>8));
180 }
181
182 static uint16_t U_CALLCONV
uprv_readDirectUInt16(uint16_t x)183 uprv_readDirectUInt16(uint16_t x) {
184 return x;
185 }
186
187 static uint32_t U_CALLCONV
uprv_readSwapUInt32(uint32_t x)188 uprv_readSwapUInt32(uint32_t x) {
189 return (uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24));
190 }
191
192 static uint32_t U_CALLCONV
uprv_readDirectUInt32(uint32_t x)193 uprv_readDirectUInt32(uint32_t x) {
194 return x;
195 }
196
197 static void U_CALLCONV
uprv_writeSwapUInt16(uint16_t * p,uint16_t x)198 uprv_writeSwapUInt16(uint16_t *p, uint16_t x) {
199 *p=(uint16_t)((x<<8)|(x>>8));
200 }
201
202 static void U_CALLCONV
uprv_writeDirectUInt16(uint16_t * p,uint16_t x)203 uprv_writeDirectUInt16(uint16_t *p, uint16_t x) {
204 *p=x;
205 }
206
207 static void U_CALLCONV
uprv_writeSwapUInt32(uint32_t * p,uint32_t x)208 uprv_writeSwapUInt32(uint32_t *p, uint32_t x) {
209 *p=(uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24));
210 }
211
212 static void U_CALLCONV
uprv_writeDirectUInt32(uint32_t * p,uint32_t x)213 uprv_writeDirectUInt32(uint32_t *p, uint32_t x) {
214 *p=x;
215 }
216
217 U_CAPI int16_t U_EXPORT2
udata_readInt16(const UDataSwapper * ds,int16_t x)218 udata_readInt16(const UDataSwapper *ds, int16_t x) {
219 return (int16_t)ds->readUInt16((uint16_t)x);
220 }
221
222 U_CAPI int32_t U_EXPORT2
udata_readInt32(const UDataSwapper * ds,int32_t x)223 udata_readInt32(const UDataSwapper *ds, int32_t x) {
224 return (int32_t)ds->readUInt32((uint32_t)x);
225 }
226
227 /**
228 * Swap a block of invariant, NUL-terminated strings, but not padding
229 * bytes after the last string.
230 * @internal
231 */
232 U_CAPI int32_t U_EXPORT2
udata_swapInvStringBlock(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)233 udata_swapInvStringBlock(const UDataSwapper *ds,
234 const void *inData, int32_t length, void *outData,
235 UErrorCode *pErrorCode) {
236 const char *inChars;
237 int32_t stringsLength;
238
239 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
240 return 0;
241 }
242 if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
243 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
244 return 0;
245 }
246
247 /* reduce the strings length to not include bytes after the last NUL */
248 inChars=(const char *)inData;
249 stringsLength=length;
250 while(stringsLength>0 && inChars[stringsLength-1]!=0) {
251 --stringsLength;
252 }
253
254 /* swap up to the last NUL */
255 ds->swapInvChars(ds, inData, stringsLength, outData, pErrorCode);
256
257 /* copy the bytes after the last NUL */
258 if(inData!=outData && length>stringsLength) {
259 uprv_memcpy((char *)outData+stringsLength, inChars+stringsLength, length-stringsLength);
260 }
261
262 /* return the length including padding bytes */
263 if(U_SUCCESS(*pErrorCode)) {
264 return length;
265 } else {
266 return 0;
267 }
268 }
269
270 U_CAPI void U_EXPORT2
udata_printError(const UDataSwapper * ds,const char * fmt,...)271 udata_printError(const UDataSwapper *ds,
272 const char *fmt,
273 ...) {
274 va_list args;
275
276 if(ds->printError!=NULL) {
277 va_start(args, fmt);
278 ds->printError(ds->printErrorContext, fmt, args);
279 va_end(args);
280 }
281 }
282
283 /* swap a data header ------------------------------------------------------- */
284
285 U_CAPI int32_t U_EXPORT2
udata_swapDataHeader(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)286 udata_swapDataHeader(const UDataSwapper *ds,
287 const void *inData, int32_t length, void *outData,
288 UErrorCode *pErrorCode) {
289 const DataHeader *pHeader;
290 uint16_t headerSize, infoSize;
291
292 /* argument checking */
293 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
294 return 0;
295 }
296 if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {
297 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
298 return 0;
299 }
300
301 /* check minimum length and magic bytes */
302 pHeader=(const DataHeader *)inData;
303 if( (length>=0 && length<(int32_t)sizeof(DataHeader)) ||
304 pHeader->dataHeader.magic1!=0xda ||
305 pHeader->dataHeader.magic2!=0x27 ||
306 pHeader->info.sizeofUChar!=2
307 ) {
308 udata_printError(ds, "udata_swapDataHeader(): initial bytes do not look like ICU data\n");
309 *pErrorCode=U_UNSUPPORTED_ERROR;
310 return 0;
311 }
312
313 headerSize=ds->readUInt16(pHeader->dataHeader.headerSize);
314 infoSize=ds->readUInt16(pHeader->info.size);
315
316 if( headerSize<sizeof(DataHeader) ||
317 infoSize<sizeof(UDataInfo) ||
318 headerSize<(sizeof(pHeader->dataHeader)+infoSize) ||
319 (length>=0 && length<headerSize)
320 ) {
321 udata_printError(ds, "udata_swapDataHeader(): header size mismatch - headerSize %d infoSize %d length %d\n",
322 headerSize, infoSize, length);
323 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
324 return 0;
325 }
326
327 if(length>0) {
328 DataHeader *outHeader;
329 const char *s;
330 int32_t maxLength;
331
332 /* Most of the fields are just bytes and need no swapping. */
333 if(inData!=outData) {
334 uprv_memcpy(outData, inData, headerSize);
335 }
336 outHeader=(DataHeader *)outData;
337
338 outHeader->info.isBigEndian = ds->outIsBigEndian;
339 outHeader->info.charsetFamily = ds->outCharset;
340
341 /* swap headerSize */
342 ds->swapArray16(ds, &pHeader->dataHeader.headerSize, 2, &outHeader->dataHeader.headerSize, pErrorCode);
343
344 /* swap UDataInfo size and reservedWord */
345 ds->swapArray16(ds, &pHeader->info.size, 4, &outHeader->info.size, pErrorCode);
346
347 /* swap copyright statement after the UDataInfo */
348 infoSize+=sizeof(pHeader->dataHeader);
349 s=(const char *)inData+infoSize;
350 maxLength=headerSize-infoSize;
351 /* get the length of the string */
352 for(length=0; length<maxLength && s[length]!=0; ++length) {}
353 /* swap the string contents */
354 ds->swapInvChars(ds, s, length, (char *)outData+infoSize, pErrorCode);
355 }
356
357 return headerSize;
358 }
359
360 /* API functions ------------------------------------------------------------ */
361
362 U_CAPI UDataSwapper * U_EXPORT2
udata_openSwapper(UBool inIsBigEndian,uint8_t inCharset,UBool outIsBigEndian,uint8_t outCharset,UErrorCode * pErrorCode)363 udata_openSwapper(UBool inIsBigEndian, uint8_t inCharset,
364 UBool outIsBigEndian, uint8_t outCharset,
365 UErrorCode *pErrorCode) {
366 UDataSwapper *swapper;
367
368 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
369 return NULL;
370 }
371 if(inCharset>U_EBCDIC_FAMILY || outCharset>U_EBCDIC_FAMILY) {
372 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
373 return NULL;
374 }
375
376 /* allocate the swapper */
377 swapper=(UDataSwapper *)uprv_malloc(sizeof(UDataSwapper));
378 if(swapper==NULL) {
379 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
380 return NULL;
381 }
382 uprv_memset(swapper, 0, sizeof(UDataSwapper));
383
384 /* set values and functions pointers according to in/out parameters */
385 swapper->inIsBigEndian=inIsBigEndian;
386 swapper->inCharset=inCharset;
387 swapper->outIsBigEndian=outIsBigEndian;
388 swapper->outCharset=outCharset;
389
390 swapper->readUInt16= inIsBigEndian==U_IS_BIG_ENDIAN ? uprv_readDirectUInt16 : uprv_readSwapUInt16;
391 swapper->readUInt32= inIsBigEndian==U_IS_BIG_ENDIAN ? uprv_readDirectUInt32 : uprv_readSwapUInt32;
392
393 swapper->writeUInt16= outIsBigEndian==U_IS_BIG_ENDIAN ? uprv_writeDirectUInt16 : uprv_writeSwapUInt16;
394 swapper->writeUInt32= outIsBigEndian==U_IS_BIG_ENDIAN ? uprv_writeDirectUInt32 : uprv_writeSwapUInt32;
395
396 swapper->compareInvChars= outCharset==U_ASCII_FAMILY ? uprv_compareInvAscii : uprv_compareInvEbcdic;
397
398 if(inIsBigEndian==outIsBigEndian) {
399 swapper->swapArray16=uprv_copyArray16;
400 swapper->swapArray32=uprv_copyArray32;
401 swapper->swapArray64=uprv_copyArray64;
402 } else {
403 swapper->swapArray16=uprv_swapArray16;
404 swapper->swapArray32=uprv_swapArray32;
405 swapper->swapArray64=uprv_swapArray64;
406 }
407
408 if(inCharset==U_ASCII_FAMILY) {
409 swapper->swapInvChars= outCharset==U_ASCII_FAMILY ? uprv_copyAscii : uprv_ebcdicFromAscii;
410 } else /* U_EBCDIC_FAMILY */ {
411 swapper->swapInvChars= outCharset==U_EBCDIC_FAMILY ? uprv_copyEbcdic : uprv_asciiFromEbcdic;
412 }
413
414 return swapper;
415 }
416
417 U_CAPI UDataSwapper * U_EXPORT2
udata_openSwapperForInputData(const void * data,int32_t length,UBool outIsBigEndian,uint8_t outCharset,UErrorCode * pErrorCode)418 udata_openSwapperForInputData(const void *data, int32_t length,
419 UBool outIsBigEndian, uint8_t outCharset,
420 UErrorCode *pErrorCode) {
421 const DataHeader *pHeader;
422 uint16_t headerSize, infoSize;
423 UBool inIsBigEndian;
424 int8_t inCharset;
425
426 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
427 return NULL;
428 }
429 if( data==NULL ||
430 (length>=0 && length<(int32_t)sizeof(DataHeader)) ||
431 outCharset>U_EBCDIC_FAMILY
432 ) {
433 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
434 return NULL;
435 }
436
437 pHeader=(const DataHeader *)data;
438 if( (length>=0 && length<(int32_t)sizeof(DataHeader)) ||
439 pHeader->dataHeader.magic1!=0xda ||
440 pHeader->dataHeader.magic2!=0x27 ||
441 pHeader->info.sizeofUChar!=2
442 ) {
443 *pErrorCode=U_UNSUPPORTED_ERROR;
444 return 0;
445 }
446
447 inIsBigEndian=(UBool)pHeader->info.isBigEndian;
448 inCharset=pHeader->info.charsetFamily;
449
450 if(inIsBigEndian==U_IS_BIG_ENDIAN) {
451 headerSize=pHeader->dataHeader.headerSize;
452 infoSize=pHeader->info.size;
453 } else {
454 headerSize=uprv_readSwapUInt16(pHeader->dataHeader.headerSize);
455 infoSize=uprv_readSwapUInt16(pHeader->info.size);
456 }
457
458 if( headerSize<sizeof(DataHeader) ||
459 infoSize<sizeof(UDataInfo) ||
460 headerSize<(sizeof(pHeader->dataHeader)+infoSize) ||
461 (length>=0 && length<headerSize)
462 ) {
463 *pErrorCode=U_UNSUPPORTED_ERROR;
464 return 0;
465 }
466
467 return udata_openSwapper(inIsBigEndian, inCharset, outIsBigEndian, outCharset, pErrorCode);
468 }
469
470 U_CAPI void U_EXPORT2
udata_closeSwapper(UDataSwapper * ds)471 udata_closeSwapper(UDataSwapper *ds) {
472 uprv_free(ds);
473 }
474