1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 1999-2010, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  uinvchar.c
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:2
14 *
15 *   created on: 2004sep14
16 *   created by: Markus W. Scherer
17 *
18 *   Functions for handling invariant characters, moved here from putil.c
19 *   for better modularization.
20 */
21 
22 #include "unicode/utypes.h"
23 #include "unicode/ustring.h"
24 #include "udataswp.h"
25 #include "cstring.h"
26 #include "cmemory.h"
27 #include "uassert.h"
28 #include "uinvchar.h"
29 
30 /* invariant-character handling --------------------------------------------- */
31 
32 /*
33  * These maps for ASCII to/from EBCDIC map invariant characters (see utypes.h)
34  * appropriately for most EBCDIC codepages.
35  *
36  * They currently also map most other ASCII graphic characters,
37  * appropriately for codepages 37 and 1047.
38  * Exceptions: The characters for []^ have different codes in 37 & 1047.
39  * Both versions are mapped to ASCII.
40  *
41  *    ASCII 37 1047
42  * [     5B BA   AD
43  * ]     5D BB   BD
44  * ^     5E B0   5F
45  *
46  * There are no mappings for variant characters from Unicode to EBCDIC.
47  *
48  * Currently, C0 control codes are also included in these maps.
49  * Exceptions: S/390 Open Edition swaps LF and NEL codes compared with other
50  * EBCDIC platforms; both codes (15 and 25) are mapped to ASCII LF (0A),
51  * but there is no mapping for ASCII LF back to EBCDIC.
52  *
53  *    ASCII EBCDIC S/390-OE
54  * LF    0A     25       15
55  * NEL   85     15       25
56  *
57  * The maps below explicitly exclude the variant
58  * control and graphical characters that are in ASCII-based
59  * codepages at 0x80 and above.
60  * "No mapping" is expressed by mapping to a 00 byte.
61  *
62  * These tables do not establish a converter or a codepage.
63  */
64 
65 static const uint8_t asciiFromEbcdic[256]={
66     0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
67     0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
68     0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
69     0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
70 
71     0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
72     0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
73     0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
74     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
75 
76     0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
77     0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
78     0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
79     0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
80 
81     0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
82     0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
83     0x5c, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
84     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
85 };
86 
87 static const uint8_t ebcdicFromAscii[256]={
88     0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
89     0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f,
90     0x40, 0x00, 0x7f, 0x00, 0x00, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61,
91     0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f,
92 
93     0x00, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
94     0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x6d,
95     0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
96     0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x07,
97 
98     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
99     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
100     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
101     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
102 
103     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
104     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
105     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
106     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
107 };
108 
109 /* Same as asciiFromEbcdic[] except maps all letters to lowercase. */
110 static const uint8_t lowercaseAsciiFromEbcdic[256]={
111     0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
112     0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
113     0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
114     0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
115 
116     0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
117     0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
118     0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
119     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
120 
121     0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
122     0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
123     0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
124     0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
125 
126     0x7b, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
127     0x7d, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
128     0x7c, 0x00, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
129     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
130 };
131 
132 /*
133  * Bit sets indicating which characters of the ASCII repertoire
134  * (by ASCII/Unicode code) are "invariant".
135  * See utypes.h for more details.
136  *
137  * As invariant are considered the characters of the ASCII repertoire except
138  * for the following:
139  * 21  '!' <exclamation mark>
140  * 23  '#' <number sign>
141  * 24  '$' <dollar sign>
142  *
143  * 40  '@' <commercial at>
144  *
145  * 5b  '[' <left bracket>
146  * 5c  '\' <backslash>
147  * 5d  ']' <right bracket>
148  * 5e  '^' <circumflex>
149  *
150  * 60  '`' <grave accent>
151  *
152  * 7b  '{' <left brace>
153  * 7c  '|' <vertical line>
154  * 7d  '}' <right brace>
155  * 7e  '~' <tilde>
156  */
157 static const uint32_t invariantChars[4]={
158     0xfffffbff, /* 00..1f but not 0a */
159     0xffffffe5, /* 20..3f but not 21 23 24 */
160     0x87fffffe, /* 40..5f but not 40 5b..5e */
161     0x87fffffe  /* 60..7f but not 60 7b..7e */
162 };
163 
164 /*
165  * test unsigned types (or values known to be non-negative) for invariant characters,
166  * tests ASCII-family character values
167  */
168 #define UCHAR_IS_INVARIANT(c) (((c)<=0x7f) && (invariantChars[(c)>>5]&((uint32_t)1<<((c)&0x1f)))!=0)
169 
170 /* test signed types for invariant characters, adds test for positive values */
171 #define SCHAR_IS_INVARIANT(c) ((0<=(c)) && UCHAR_IS_INVARIANT(c))
172 
173 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
174 #define CHAR_TO_UCHAR(c) c
175 #define UCHAR_TO_CHAR(c) c
176 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
177 #define CHAR_TO_UCHAR(u) asciiFromEbcdic[u]
178 #define UCHAR_TO_CHAR(u) ebcdicFromAscii[u]
179 #else
180 #   error U_CHARSET_FAMILY is not valid
181 #endif
182 
183 
184 U_CAPI void U_EXPORT2
u_charsToUChars(const char * cs,UChar * us,int32_t length)185 u_charsToUChars(const char *cs, UChar *us, int32_t length) {
186     UChar u;
187     uint8_t c;
188 
189     /*
190      * Allow the entire ASCII repertoire to be mapped _to_ Unicode.
191      * For EBCDIC systems, this works for characters with codes from
192      * codepages 37 and 1047 or compatible.
193      */
194     while(length>0) {
195         c=(uint8_t)(*cs++);
196         u=(UChar)CHAR_TO_UCHAR(c);
197         U_ASSERT((u!=0 || c==0)); /* only invariant chars converted? */
198         *us++=u;
199         --length;
200     }
201 }
202 
203 U_CAPI void U_EXPORT2
u_UCharsToChars(const UChar * us,char * cs,int32_t length)204 u_UCharsToChars(const UChar *us, char *cs, int32_t length) {
205     UChar u;
206 
207     while(length>0) {
208         u=*us++;
209         if(!UCHAR_IS_INVARIANT(u)) {
210             U_ASSERT(FALSE); /* Variant characters were used. These are not portable in ICU. */
211             u=0;
212         }
213         *cs++=(char)UCHAR_TO_CHAR(u);
214         --length;
215     }
216 }
217 
218 U_CAPI UBool U_EXPORT2
uprv_isInvariantString(const char * s,int32_t length)219 uprv_isInvariantString(const char *s, int32_t length) {
220     uint8_t c;
221 
222     for(;;) {
223         if(length<0) {
224             /* NUL-terminated */
225             c=(uint8_t)*s++;
226             if(c==0) {
227                 break;
228             }
229         } else {
230             /* count length */
231             if(length==0) {
232                 break;
233             }
234             --length;
235             c=(uint8_t)*s++;
236             if(c==0) {
237                 continue; /* NUL is invariant */
238             }
239         }
240         /* c!=0 now, one branch below checks c==0 for variant characters */
241 
242         /*
243          * no assertions here because these functions are legitimately called
244          * for strings with variant characters
245          */
246 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
247         if(!UCHAR_IS_INVARIANT(c)) {
248             return FALSE; /* found a variant char */
249         }
250 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
251         c=CHAR_TO_UCHAR(c);
252         if(c==0 || !UCHAR_IS_INVARIANT(c)) {
253             return FALSE; /* found a variant char */
254         }
255 #else
256 #   error U_CHARSET_FAMILY is not valid
257 #endif
258     }
259     return TRUE;
260 }
261 
262 U_CAPI UBool U_EXPORT2
uprv_isInvariantUString(const UChar * s,int32_t length)263 uprv_isInvariantUString(const UChar *s, int32_t length) {
264     UChar c;
265 
266     for(;;) {
267         if(length<0) {
268             /* NUL-terminated */
269             c=*s++;
270             if(c==0) {
271                 break;
272             }
273         } else {
274             /* count length */
275             if(length==0) {
276                 break;
277             }
278             --length;
279             c=*s++;
280         }
281 
282         /*
283          * no assertions here because these functions are legitimately called
284          * for strings with variant characters
285          */
286         if(!UCHAR_IS_INVARIANT(c)) {
287             return FALSE; /* found a variant char */
288         }
289     }
290     return TRUE;
291 }
292 
293 /* UDataSwapFn implementations used in udataswp.c ------- */
294 
295 /* convert ASCII to EBCDIC and verify that all characters are invariant */
296 U_CAPI int32_t U_EXPORT2
uprv_ebcdicFromAscii(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)297 uprv_ebcdicFromAscii(const UDataSwapper *ds,
298                      const void *inData, int32_t length, void *outData,
299                      UErrorCode *pErrorCode) {
300     const uint8_t *s;
301     uint8_t *t;
302     uint8_t c;
303 
304     int32_t count;
305 
306     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
307         return 0;
308     }
309     if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
310         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
311         return 0;
312     }
313 
314     /* setup and swapping */
315     s=(const uint8_t *)inData;
316     t=(uint8_t *)outData;
317     count=length;
318     while(count>0) {
319         c=*s++;
320         if(!UCHAR_IS_INVARIANT(c)) {
321             udata_printError(ds, "uprv_ebcdicFromAscii() string[%d] contains a variant character in position %d\n",
322                              length, length-count);
323             *pErrorCode=U_INVALID_CHAR_FOUND;
324             return 0;
325         }
326         *t++=ebcdicFromAscii[c];
327         --count;
328     }
329 
330     return length;
331 }
332 
333 /* this function only checks and copies ASCII strings without conversion */
334 U_CFUNC int32_t
uprv_copyAscii(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)335 uprv_copyAscii(const UDataSwapper *ds,
336                const void *inData, int32_t length, void *outData,
337                UErrorCode *pErrorCode) {
338     const uint8_t *s;
339     uint8_t c;
340 
341     int32_t count;
342 
343     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
344         return 0;
345     }
346     if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
347         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
348         return 0;
349     }
350 
351     /* setup and checking */
352     s=(const uint8_t *)inData;
353     count=length;
354     while(count>0) {
355         c=*s++;
356         if(!UCHAR_IS_INVARIANT(c)) {
357             udata_printError(ds, "uprv_copyFromAscii() string[%d] contains a variant character in position %d\n",
358                              length, length-count);
359             *pErrorCode=U_INVALID_CHAR_FOUND;
360             return 0;
361         }
362         --count;
363     }
364 
365     if(length>0 && inData!=outData) {
366         uprv_memcpy(outData, inData, length);
367     }
368 
369     return length;
370 }
371 
372 /* convert EBCDIC to ASCII and verify that all characters are invariant */
373 U_CFUNC int32_t
uprv_asciiFromEbcdic(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)374 uprv_asciiFromEbcdic(const UDataSwapper *ds,
375                      const void *inData, int32_t length, void *outData,
376                      UErrorCode *pErrorCode) {
377     const uint8_t *s;
378     uint8_t *t;
379     uint8_t c;
380 
381     int32_t count;
382 
383     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
384         return 0;
385     }
386     if(ds==NULL || inData==NULL || length<0 ||  (length>0 && outData==NULL)) {
387         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
388         return 0;
389     }
390 
391     /* setup and swapping */
392     s=(const uint8_t *)inData;
393     t=(uint8_t *)outData;
394     count=length;
395     while(count>0) {
396         c=*s++;
397         if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
398             udata_printError(ds, "uprv_asciiFromEbcdic() string[%d] contains a variant character in position %d\n",
399                              length, length-count);
400             *pErrorCode=U_INVALID_CHAR_FOUND;
401             return 0;
402         }
403         *t++=c;
404         --count;
405     }
406 
407     return length;
408 }
409 
410 /* this function only checks and copies EBCDIC strings without conversion */
411 U_CFUNC int32_t
uprv_copyEbcdic(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)412 uprv_copyEbcdic(const UDataSwapper *ds,
413                 const void *inData, int32_t length, void *outData,
414                 UErrorCode *pErrorCode) {
415     const uint8_t *s;
416     uint8_t c;
417 
418     int32_t count;
419 
420     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
421         return 0;
422     }
423     if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
424         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
425         return 0;
426     }
427 
428     /* setup and checking */
429     s=(const uint8_t *)inData;
430     count=length;
431     while(count>0) {
432         c=*s++;
433         if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
434             udata_printError(ds, "uprv_copyEbcdic() string[%] contains a variant character in position %d\n",
435                              length, length-count);
436             *pErrorCode=U_INVALID_CHAR_FOUND;
437             return 0;
438         }
439         --count;
440     }
441 
442     if(length>0 && inData!=outData) {
443         uprv_memcpy(outData, inData, length);
444     }
445 
446     return length;
447 }
448 
449 U_CFUNC UBool
uprv_isEbcdicAtSign(char c)450 uprv_isEbcdicAtSign(char c) {
451     static const uint8_t ebcdicAtSigns[] = {
452         0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
453     return c != 0 && uprv_strchr((const char *)ebcdicAtSigns, c) != nullptr;
454 }
455 
456 /* compare invariant strings; variant characters compare less than others and unlike each other */
457 U_CFUNC int32_t
uprv_compareInvAscii(const UDataSwapper * ds,const char * outString,int32_t outLength,const UChar * localString,int32_t localLength)458 uprv_compareInvAscii(const UDataSwapper *ds,
459                      const char *outString, int32_t outLength,
460                      const UChar *localString, int32_t localLength) {
461     (void)ds;
462     int32_t minLength;
463     UChar32 c1, c2;
464     uint8_t c;
465 
466     if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
467         return 0;
468     }
469 
470     if(outLength<0) {
471         outLength=(int32_t)uprv_strlen(outString);
472     }
473     if(localLength<0) {
474         localLength=u_strlen(localString);
475     }
476 
477     minLength= outLength<localLength ? outLength : localLength;
478 
479     while(minLength>0) {
480         c=(uint8_t)*outString++;
481         if(UCHAR_IS_INVARIANT(c)) {
482             c1=c;
483         } else {
484             c1=-1;
485         }
486 
487         c2=*localString++;
488         if(!UCHAR_IS_INVARIANT(c2)) {
489             c2=-2;
490         }
491 
492         if((c1-=c2)!=0) {
493             return c1;
494         }
495 
496         --minLength;
497     }
498 
499     /* strings start with same prefix, compare lengths */
500     return outLength-localLength;
501 }
502 
503 U_CFUNC int32_t
uprv_compareInvEbcdic(const UDataSwapper * ds,const char * outString,int32_t outLength,const UChar * localString,int32_t localLength)504 uprv_compareInvEbcdic(const UDataSwapper *ds,
505                       const char *outString, int32_t outLength,
506                       const UChar *localString, int32_t localLength) {
507     (void)ds;
508     int32_t minLength;
509     UChar32 c1, c2;
510     uint8_t c;
511 
512     if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
513         return 0;
514     }
515 
516     if(outLength<0) {
517         outLength=(int32_t)uprv_strlen(outString);
518     }
519     if(localLength<0) {
520         localLength=u_strlen(localString);
521     }
522 
523     minLength= outLength<localLength ? outLength : localLength;
524 
525     while(minLength>0) {
526         c=(uint8_t)*outString++;
527         if(c==0) {
528             c1=0;
529         } else if((c1=asciiFromEbcdic[c])!=0 && UCHAR_IS_INVARIANT(c1)) {
530             /* c1 is set */
531         } else {
532             c1=-1;
533         }
534 
535         c2=*localString++;
536         if(!UCHAR_IS_INVARIANT(c2)) {
537             c2=-2;
538         }
539 
540         if((c1-=c2)!=0) {
541             return c1;
542         }
543 
544         --minLength;
545     }
546 
547     /* strings start with same prefix, compare lengths */
548     return outLength-localLength;
549 }
550 
551 U_CAPI int32_t U_EXPORT2
uprv_compareInvEbcdicAsAscii(const char * s1,const char * s2)552 uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) {
553     int32_t c1, c2;
554 
555     for(;; ++s1, ++s2) {
556         c1=(uint8_t)*s1;
557         c2=(uint8_t)*s2;
558         if(c1!=c2) {
559             if(c1!=0 && ((c1=asciiFromEbcdic[c1])==0 || !UCHAR_IS_INVARIANT(c1))) {
560                 c1=-(int32_t)(uint8_t)*s1;
561             }
562             if(c2!=0 && ((c2=asciiFromEbcdic[c2])==0 || !UCHAR_IS_INVARIANT(c2))) {
563                 c2=-(int32_t)(uint8_t)*s2;
564             }
565             return c1-c2;
566         } else if(c1==0) {
567             return 0;
568         }
569     }
570 }
571 
572 U_CAPI char U_EXPORT2
uprv_ebcdicToAscii(char c)573 uprv_ebcdicToAscii(char c) {
574     return (char)asciiFromEbcdic[(uint8_t)c];
575 }
576 
577 U_CAPI char U_EXPORT2
uprv_ebcdicToLowercaseAscii(char c)578 uprv_ebcdicToLowercaseAscii(char c) {
579     return (char)lowercaseAsciiFromEbcdic[(uint8_t)c];
580 }
581 
582 U_CAPI uint8_t* U_EXPORT2
uprv_aestrncpy(uint8_t * dst,const uint8_t * src,int32_t n)583 uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
584 {
585   uint8_t *orig_dst = dst;
586 
587   if(n==-1) {
588     n = static_cast<int32_t>(uprv_strlen((const char*)src)+1); /* copy NUL */
589   }
590   /* copy non-null */
591   while(*src && n>0) {
592     *(dst++) = asciiFromEbcdic[*(src++)];
593     n--;
594   }
595   /* pad */
596   while(n>0) {
597     *(dst++) = 0;
598     n--;
599   }
600   return orig_dst;
601 }
602 
603 U_CAPI uint8_t* U_EXPORT2
uprv_eastrncpy(uint8_t * dst,const uint8_t * src,int32_t n)604 uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
605 {
606   uint8_t *orig_dst = dst;
607 
608   if(n==-1) {
609     n = static_cast<int32_t>(uprv_strlen((const char*)src)+1); /* copy NUL */
610   }
611   /* copy non-null */
612   while(*src && n>0) {
613     char ch = ebcdicFromAscii[*(src++)];
614     if(ch == 0) {
615       ch = ebcdicFromAscii[0x3f]; /* questionmark (subchar) */
616     }
617     *(dst++) = ch;
618     n--;
619   }
620   /* pad */
621   while(n>0) {
622     *(dst++) = 0;
623     n--;
624   }
625   return orig_dst;
626 }
627 
628