1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2003-2014, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  udataswp.h
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2003jun05
16 *   created by: Markus W. Scherer
17 *
18 *   Definitions for ICU data transformations for different platforms,
19 *   changing between big- and little-endian data and/or between
20 *   charset families (ASCII<->EBCDIC).
21 */
22 
23 #ifndef __UDATASWP_H__
24 #define __UDATASWP_H__
25 
26 #include <stdarg.h>
27 #include "unicode/utypes.h"
28 
29 /* forward declaration */
30 
31 U_CDECL_BEGIN
32 
33 struct UDataSwapper;
34 typedef struct UDataSwapper UDataSwapper;
35 
36 /**
37  * Function type for data transformation.
38  * Transforms data, or just returns the length of the data if
39  * the input length is -1.
40  * Swap functions assume that their data pointers are aligned properly.
41  *
42  * Quick implementation outline:
43  * (best to copy and adapt and existing swapper implementation)
44  * check that the data looks like the expected format
45  * if(length<0) {
46  *   preflight:
47  *   never dereference outData
48  *   read inData and determine the data size
49  *   assume that inData is long enough for this
50  * } else {
51  *   outData can be NULL if length==0
52  *   inData==outData (in-place swapping) possible but not required!
53  *   verify that length>=(actual size)
54  *   if there is a chance that not every byte up to size is reached
55  *     due to padding etc.:
56  *   if(inData!=outData) {
57  *     memcpy(outData, inData, actual size);
58  *   }
59  *   swap contents
60  * }
61  * return actual size
62  *
63  * Further implementation notes:
64  * - read integers from inData before swapping them
65  *   because in-place swapping can make them unreadable
66  * - compareInvChars compares a local Unicode string with already-swapped
67  *   output charset strings
68  *
69  * @param ds Pointer to UDataSwapper containing global data about the
70  *           transformation and function pointers for handling primitive
71  *           types.
72  * @param inData Pointer to the input data to be transformed or examined.
73  * @param length Length of the data, counting bytes. May be -1 for preflighting.
74  *               If length>=0, then transform the data.
75  *               If length==-1, then only determine the length of the data.
76  *               The length cannot be determined from the data itself for all
77  *               types of data (e.g., not for simple arrays of integers).
78  * @param outData Pointer to the output data buffer.
79  *                If length>=0 (transformation), then the output buffer must
80  *                have a capacity of at least length.
81  *                If length==-1, then outData will not be used and can be NULL.
82  * @param pErrorCode ICU UErrorCode parameter, must not be NULL and must
83  *                   fulfill U_SUCCESS on input.
84  * @return The actual length of the data.
85  *
86  * @see UDataSwapper
87  * @internal ICU 2.8
88  */
89 typedef int32_t U_CALLCONV
90 UDataSwapFn(const UDataSwapper *ds,
91             const void *inData, int32_t length, void *outData,
92             UErrorCode *pErrorCode);
93 
94 /**
95  * Convert one uint16_t from input to platform endianness.
96  * @internal ICU 2.8
97  */
98 typedef uint16_t U_CALLCONV
99 UDataReadUInt16(uint16_t x);
100 
101 /**
102  * Convert one uint32_t from input to platform endianness.
103  * @internal ICU 2.8
104  */
105 typedef uint32_t U_CALLCONV
106 UDataReadUInt32(uint32_t x);
107 
108 /**
109  * Convert one uint16_t from platform to input endianness.
110  * @internal ICU 2.8
111  */
112 typedef void U_CALLCONV
113 UDataWriteUInt16(uint16_t *p, uint16_t x);
114 
115 /**
116  * Convert one uint32_t from platform to input endianness.
117  * @internal ICU 2.8
118  */
119 typedef void U_CALLCONV
120 UDataWriteUInt32(uint32_t *p, uint32_t x);
121 
122 /**
123  * Compare invariant-character strings, one in the output data and the
124  * other one caller-provided in Unicode.
125  * An output data string is compared because strings are usually swapped
126  * before the rest of the data, to allow for sorting of string tables
127  * according to the output charset.
128  * You can use -1 for the length parameters of NUL-terminated strings as usual.
129  * Returns Unicode code point order for invariant characters.
130  * @internal ICU 2.8
131  */
132 typedef int32_t U_CALLCONV
133 UDataCompareInvChars(const UDataSwapper *ds,
134                      const char *outString, int32_t outLength,
135                      const UChar *localString, int32_t localLength);
136 
137 /**
138  * Function for message output when an error occurs during data swapping.
139  * A format string and variable number of arguments are passed
140  * like for vprintf().
141  *
142  * @param context A function-specific context pointer.
143  * @param fmt The format string.
144  * @param args The arguments for format string inserts.
145  *
146  * @internal ICU 2.8
147  */
148 typedef void U_CALLCONV
149 UDataPrintError(void *context, const char *fmt, va_list args);
150 
151 struct UDataSwapper {
152     /** Input endianness. @internal ICU 2.8 */
153     UBool inIsBigEndian;
154     /** Input charset family. @see U_CHARSET_FAMILY @internal ICU 2.8 */
155     uint8_t inCharset;
156     /** Output endianness. @internal ICU 2.8 */
157     UBool outIsBigEndian;
158     /** Output charset family. @see U_CHARSET_FAMILY @internal ICU 2.8 */
159     uint8_t outCharset;
160 
161     /* basic functions for reading data values */
162 
163     /** Convert one uint16_t from input to platform endianness. @internal ICU 2.8 */
164     UDataReadUInt16 *readUInt16;
165     /** Convert one uint32_t from input to platform endianness. @internal ICU 2.8 */
166     UDataReadUInt32 *readUInt32;
167     /** Compare an invariant-character output string with a local one. @internal ICU 2.8 */
168     UDataCompareInvChars *compareInvChars;
169 
170     /* basic functions for writing data values */
171 
172     /** Convert one uint16_t from platform to input endianness. @internal ICU 2.8 */
173     UDataWriteUInt16 *writeUInt16;
174     /** Convert one uint32_t from platform to input endianness. @internal ICU 2.8 */
175     UDataWriteUInt32 *writeUInt32;
176 
177     /* basic functions for data transformations */
178 
179     /** Transform an array of 16-bit integers. @internal ICU 2.8 */
180     UDataSwapFn *swapArray16;
181     /** Transform an array of 32-bit integers. @internal ICU 2.8 */
182     UDataSwapFn *swapArray32;
183     /** Transform an array of 64-bit integers. @internal ICU 53 */
184     UDataSwapFn *swapArray64;
185     /** Transform an invariant-character string. @internal ICU 2.8 */
186     UDataSwapFn *swapInvChars;
187 
188     /**
189      * Function for message output when an error occurs during data swapping.
190      * Can be NULL.
191      * @internal ICU 2.8
192      */
193     UDataPrintError *printError;
194     /** Context pointer for printError. @internal ICU 2.8 */
195     void *printErrorContext;
196 };
197 
198 U_CDECL_END
199 
200 U_CAPI UDataSwapper * U_EXPORT2
201 udata_openSwapper(UBool inIsBigEndian, uint8_t inCharset,
202                   UBool outIsBigEndian, uint8_t outCharset,
203                   UErrorCode *pErrorCode);
204 
205 /**
206  * Open a UDataSwapper for the given input data and the specified output
207  * characteristics.
208  * Values of -1 for any of the characteristics mean the local platform's
209  * characteristics.
210  *
211  * @see udata_swap
212  * @internal ICU 2.8
213  */
214 U_CAPI UDataSwapper * U_EXPORT2
215 udata_openSwapperForInputData(const void *data, int32_t length,
216                               UBool outIsBigEndian, uint8_t outCharset,
217                               UErrorCode *pErrorCode);
218 
219 U_CAPI void U_EXPORT2
220 udata_closeSwapper(UDataSwapper *ds);
221 
222 /**
223  * Read the beginning of an ICU data piece, recognize magic bytes,
224  * swap the structure.
225  * Set a U_UNSUPPORTED_ERROR if it does not look like an ICU data piece.
226  *
227  * @return The size of the data header, in bytes.
228  *
229  * @internal ICU 2.8
230  */
231 U_CAPI int32_t U_EXPORT2
232 udata_swapDataHeader(const UDataSwapper *ds,
233                      const void *inData, int32_t length, void *outData,
234                      UErrorCode *pErrorCode);
235 
236 /**
237  * Convert one int16_t from input to platform endianness.
238  * @internal ICU 2.8
239  */
240 U_CAPI int16_t U_EXPORT2
241 udata_readInt16(const UDataSwapper *ds, int16_t x);
242 
243 /**
244  * Convert one int32_t from input to platform endianness.
245  * @internal ICU 2.8
246  */
247 U_CAPI int32_t U_EXPORT2
248 udata_readInt32(const UDataSwapper *ds, int32_t x);
249 
250 /**
251  * Swap a block of invariant, NUL-terminated strings, but not padding
252  * bytes after the last string.
253  * @internal
254  */
255 U_CAPI int32_t U_EXPORT2
256 udata_swapInvStringBlock(const UDataSwapper *ds,
257                          const void *inData, int32_t length, void *outData,
258                          UErrorCode *pErrorCode);
259 
260 U_CAPI void U_EXPORT2
261 udata_printError(const UDataSwapper *ds,
262                  const char *fmt,
263                  ...);
264 
265 /* internal exports from putil.c -------------------------------------------- */
266 
267 /* declared here to keep them out of the public putil.h */
268 
269 /**
270  * Swap invariant char * strings ASCII->EBCDIC.
271  * @internal
272  */
273 U_CAPI int32_t U_EXPORT2
274 uprv_ebcdicFromAscii(const UDataSwapper *ds,
275                      const void *inData, int32_t length, void *outData,
276                      UErrorCode *pErrorCode);
277 
278 /**
279  * Copy invariant ASCII char * strings and verify they are invariant.
280  * @internal
281  */
282 U_CFUNC int32_t
283 uprv_copyAscii(const UDataSwapper *ds,
284                const void *inData, int32_t length, void *outData,
285                UErrorCode *pErrorCode);
286 
287 /**
288  * Swap invariant char * strings EBCDIC->ASCII.
289  * @internal
290  */
291 U_CFUNC int32_t
292 uprv_asciiFromEbcdic(const UDataSwapper *ds,
293                      const void *inData, int32_t length, void *outData,
294                      UErrorCode *pErrorCode);
295 
296 /**
297  * Copy invariant EBCDIC char * strings and verify they are invariant.
298  * @internal
299  */
300 U_CFUNC int32_t
301 uprv_copyEbcdic(const UDataSwapper *ds,
302                 const void *inData, int32_t length, void *outData,
303                 UErrorCode *pErrorCode);
304 
305 /**
306  * Compare ASCII invariant char * with Unicode invariant UChar *
307  * @internal
308  */
309 U_CFUNC int32_t
310 uprv_compareInvAscii(const UDataSwapper *ds,
311                      const char *outString, int32_t outLength,
312                      const UChar *localString, int32_t localLength);
313 
314 /**
315  * Compare EBCDIC invariant char * with Unicode invariant UChar *
316  * @internal
317  */
318 U_CFUNC int32_t
319 uprv_compareInvEbcdic(const UDataSwapper *ds,
320                       const char *outString, int32_t outLength,
321                       const UChar *localString, int32_t localLength);
322 
323 /**
324  * \def uprv_compareInvWithUChar
325  * Compare an invariant-character strings with a UChar string
326  * @internal
327  */
328 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
329 #   define uprv_compareInvWithUChar uprv_compareInvAscii
330 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
331 #   define uprv_compareInvWithUChar uprv_compareInvEbcdic
332 #else
333 #   error Unknown charset family!
334 #endif
335 
336 // utrie_swap.cpp -----------------------------------------------------------***
337 
338 /**
339  * Swaps a serialized UTrie.
340  * @internal
341  */
342 U_CAPI int32_t U_EXPORT2
343 utrie_swap(const UDataSwapper *ds,
344            const void *inData, int32_t length, void *outData,
345            UErrorCode *pErrorCode);
346 
347 /**
348  * Swaps a serialized UTrie2.
349  * @internal
350  */
351 U_CAPI int32_t U_EXPORT2
352 utrie2_swap(const UDataSwapper *ds,
353             const void *inData, int32_t length, void *outData,
354             UErrorCode *pErrorCode);
355 
356 /**
357  * Swaps a serialized UCPTrie.
358  * @internal
359  */
360 U_CAPI int32_t U_EXPORT2
361 ucptrie_swap(const UDataSwapper *ds,
362              const void *inData, int32_t length, void *outData,
363              UErrorCode *pErrorCode);
364 
365 /**
366  * Swaps a serialized UTrie, UTrie2, or UCPTrie.
367  * @internal
368  */
369 U_CAPI int32_t U_EXPORT2
370 utrie_swapAnyVersion(const UDataSwapper *ds,
371                      const void *inData, int32_t length, void *outData,
372                      UErrorCode *pErrorCode);
373 
374 /* material... -------------------------------------------------------------- */
375 
376 #if 0
377 
378 /* udata.h */
379 
380 /**
381  * Public API function in udata.c
382  *
383  * Same as udata_openChoice() but automatically swaps the data.
384  * isAcceptable, if not NULL, may accept data with endianness and charset family
385  * different from the current platform's properties.
386  * If the data is acceptable and the platform properties do not match, then
387  * the swap function is called to swap an allocated version of the data.
388  * Preflighting may or may not be performed depending on whether the size of
389  * the loaded data item is known.
390  *
391  * @param isAcceptable Same as for udata_openChoice(). May be NULL.
392  *
393  * @internal ICU 2.8
394  */
395 U_CAPI UDataMemory * U_EXPORT2
396 udata_openSwap(const char *path, const char *type, const char *name,
397                UDataMemoryIsAcceptable *isAcceptable, void *isAcceptableContext,
398                UDataSwapFn *swap,
399                UDataPrintError *printError, void *printErrorContext,
400                UErrorCode *pErrorCode);
401 
402 #endif
403 
404 #endif
405