1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 1999-2015, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  uinvchar.h
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:2
14 *
15 *   created on: 2004sep14
16 *   created by: Markus W. Scherer
17 *
18 *   Definitions for handling invariant characters, moved here from putil.c
19 *   for better modularization.
20 */
21 
22 #ifndef __UINVCHAR_H__
23 #define __UINVCHAR_H__
24 
25 #include "unicode/utypes.h"
26 #ifdef __cplusplus
27 #include "unicode/unistr.h"
28 #endif
29 
30 /**
31  * Check if a char string only contains invariant characters.
32  * See utypes.h for details.
33  *
34  * @param s Input string pointer.
35  * @param length Length of the string, can be -1 if NUL-terminated.
36  * @return true if s contains only invariant characters.
37  *
38  * @internal (ICU 2.8)
39  */
40 U_CAPI UBool U_EXPORT2
41 uprv_isInvariantString(const char *s, int32_t length);
42 
43 /**
44  * Check if a Unicode string only contains invariant characters.
45  * See utypes.h for details.
46  *
47  * @param s Input string pointer.
48  * @param length Length of the string, can be -1 if NUL-terminated.
49  * @return true if s contains only invariant characters.
50  *
51  * @internal (ICU 2.8)
52  */
53 U_CAPI UBool U_EXPORT2
54 uprv_isInvariantUString(const UChar *s, int32_t length);
55 
56 /**
57  * \def U_UPPER_ORDINAL
58  * Get the ordinal number of an uppercase invariant character
59  * @internal
60  */
61 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
62 #   define U_UPPER_ORDINAL(x) ((x)-'A')
63 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
64 #   define U_UPPER_ORDINAL(x) (((x) < 'J') ? ((x)-'A') : \
65                               (((x) < 'S') ? ((x)-'J'+9) : \
66                                ((x)-'S'+18)))
67 #else
68 #   error Unknown charset family!
69 #endif
70 
71 #ifdef __cplusplus
72 
73 U_NAMESPACE_BEGIN
74 
75 /**
76  * Like U_UPPER_ORDINAL(x) but with validation.
77  * Returns 0..25 for A..Z else a value outside 0..25.
78  */
uprv_upperOrdinal(int32_t c)79 inline int32_t uprv_upperOrdinal(int32_t c) {
80 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
81     return c - 'A';
82 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
83     // EBCDIC: A-Z (26 letters) is split into three ranges A-I (9 letters), J-R (9), S-Z (8).
84     // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout
85     if (c <= 'I') { return c - 'A'; }  // A-I --> 0-8
86     if (c < 'J') { return -1; }
87     if (c <= 'R') { return c - 'J' + 9; }  // J-R --> 9..17
88     if (c < 'S') { return -1; }
89     return c - 'S' + 18;  // S-Z --> 18..25
90 #else
91 #   error Unknown charset family!
92 #endif
93 }
94 
95 // Like U_UPPER_ORDINAL(x) but for lowercase and with validation.
96 // Returns 0..25 for a..z else a value outside 0..25.
uprv_lowerOrdinal(int32_t c)97 inline int32_t uprv_lowerOrdinal(int32_t c) {
98 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
99     return c - 'a';
100 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
101     // EBCDIC: a-z (26 letters) is split into three ranges a-i (9 letters), j-r (9), s-z (8).
102     // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout
103     if (c <= 'i') { return c - 'a'; }  // a-i --> 0-8
104     if (c < 'j') { return -1; }
105     if (c <= 'r') { return c - 'j' + 9; }  // j-r --> 9..17
106     if (c < 's') { return -1; }
107     return c - 's' + 18;  // s-z --> 18..25
108 #else
109 #   error Unknown charset family!
110 #endif
111 }
112 
113 U_NAMESPACE_END
114 
115 #endif
116 
117 /**
118  * Returns true if c == '@' is possible.
119  * The @ sign is variant, and the @ sign used on one
120  * EBCDIC machine won't be compiled the same way on other EBCDIC based machines.
121  * @internal
122  */
123 U_CFUNC UBool
124 uprv_isEbcdicAtSign(char c);
125 
126 /**
127  * \def uprv_isAtSign
128  * Returns true if c == '@' is possible.
129  * For ASCII, checks for exactly '@'. For EBCDIC, calls uprv_isEbcdicAtSign().
130  * @internal
131  */
132 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
133 #   define uprv_isAtSign(c) ((c)=='@')
134 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
135 #   define uprv_isAtSign(c) uprv_isEbcdicAtSign(c)
136 #else
137 #   error Unknown charset family!
138 #endif
139 
140 /**
141  * Compare two EBCDIC invariant-character strings in ASCII order.
142  * @internal
143  */
144 U_CAPI int32_t U_EXPORT2
145 uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2);
146 
147 /**
148  * \def uprv_compareInvCharsAsAscii
149  * Compare two invariant-character strings in ASCII order.
150  * @internal
151  */
152 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
153 #   define uprv_compareInvCharsAsAscii(s1, s2) uprv_strcmp(s1, s2)
154 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
155 #   define uprv_compareInvCharsAsAscii(s1, s2) uprv_compareInvEbcdicAsAscii(s1, s2)
156 #else
157 #   error Unknown charset family!
158 #endif
159 
160 /**
161  * Converts an EBCDIC invariant character to ASCII.
162  * @internal
163  */
164 U_CAPI char U_EXPORT2
165 uprv_ebcdicToAscii(char c);
166 
167 /**
168  * \def uprv_invCharToAscii
169  * Converts an invariant character to ASCII.
170  * @internal
171  */
172 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
173 #   define uprv_invCharToAscii(c) (c)
174 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
175 #   define uprv_invCharToAscii(c) uprv_ebcdicToAscii(c)
176 #else
177 #   error Unknown charset family!
178 #endif
179 
180 /**
181  * Converts an EBCDIC invariant character to lowercase ASCII.
182  * @internal
183  */
184 U_CAPI char U_EXPORT2
185 uprv_ebcdicToLowercaseAscii(char c);
186 
187 /**
188  * \def uprv_invCharToLowercaseAscii
189  * Converts an invariant character to lowercase ASCII.
190  * @internal
191  */
192 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
193 #   define uprv_invCharToLowercaseAscii uprv_asciitolower
194 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
195 #   define uprv_invCharToLowercaseAscii uprv_ebcdicToLowercaseAscii
196 #else
197 #   error Unknown charset family!
198 #endif
199 
200 /**
201  * Copy EBCDIC to ASCII
202  * @internal
203  * @see uprv_strncpy
204  */
205 U_CAPI uint8_t* U_EXPORT2
206 uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n);
207 
208 
209 /**
210  * Copy ASCII to EBCDIC
211  * @internal
212  * @see uprv_strncpy
213  */
214 U_CAPI uint8_t* U_EXPORT2
215 uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n);
216 
217 
218 
219 #endif
220