1 /*
2  *  Copyright (C) 2006 George Staikos <staikos@kde.org>
3  *  Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
4  *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
5  *
6  *  This library is free software; you can redistribute it and/or
7  *  modify it under the terms of the GNU Library General Public
8  *  License as published by the Free Software Foundation; either
9  *  version 2 of the License, or (at your option) any later version.
10  *
11  *  This library is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  *  Library General Public License for more details.
15  *
16  *  You should have received a copy of the GNU Library General Public License
17  *  along with this library; see the file COPYING.LIB.  If not, write to
18  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19  *  Boston, MA 02110-1301, USA.
20  *
21  */
22 
23 #ifndef WTF_UNICODE_ICU_H
24 #define WTF_UNICODE_ICU_H
25 
26 #include <stdlib.h>
27 #include <unicode/uchar.h>
28 #include <unicode/ustring.h>
29 #include <unicode/utf16.h>
30 
31 namespace WTF {
32 namespace Unicode {
33 
34 enum Direction {
35     LeftToRight = U_LEFT_TO_RIGHT,
36     RightToLeft = U_RIGHT_TO_LEFT,
37     EuropeanNumber = U_EUROPEAN_NUMBER,
38     EuropeanNumberSeparator = U_EUROPEAN_NUMBER_SEPARATOR,
39     EuropeanNumberTerminator = U_EUROPEAN_NUMBER_TERMINATOR,
40     ArabicNumber = U_ARABIC_NUMBER,
41     CommonNumberSeparator = U_COMMON_NUMBER_SEPARATOR,
42     BlockSeparator = U_BLOCK_SEPARATOR,
43     SegmentSeparator = U_SEGMENT_SEPARATOR,
44     WhiteSpaceNeutral = U_WHITE_SPACE_NEUTRAL,
45     OtherNeutral = U_OTHER_NEUTRAL,
46     LeftToRightEmbedding = U_LEFT_TO_RIGHT_EMBEDDING,
47     LeftToRightOverride = U_LEFT_TO_RIGHT_OVERRIDE,
48     RightToLeftArabic = U_RIGHT_TO_LEFT_ARABIC,
49     RightToLeftEmbedding = U_RIGHT_TO_LEFT_EMBEDDING,
50     RightToLeftOverride = U_RIGHT_TO_LEFT_OVERRIDE,
51     PopDirectionalFormat = U_POP_DIRECTIONAL_FORMAT,
52     NonSpacingMark = U_DIR_NON_SPACING_MARK,
53     BoundaryNeutral = U_BOUNDARY_NEUTRAL
54 };
55 
56 enum DecompositionType {
57     DecompositionNone = U_DT_NONE,
58     DecompositionCanonical = U_DT_CANONICAL,
59     DecompositionCompat = U_DT_COMPAT,
60     DecompositionCircle = U_DT_CIRCLE,
61     DecompositionFinal = U_DT_FINAL,
62     DecompositionFont = U_DT_FONT,
63     DecompositionFraction = U_DT_FRACTION,
64     DecompositionInitial = U_DT_INITIAL,
65     DecompositionIsolated = U_DT_ISOLATED,
66     DecompositionMedial = U_DT_MEDIAL,
67     DecompositionNarrow = U_DT_NARROW,
68     DecompositionNoBreak = U_DT_NOBREAK,
69     DecompositionSmall = U_DT_SMALL,
70     DecompositionSquare = U_DT_SQUARE,
71     DecompositionSub = U_DT_SUB,
72     DecompositionSuper = U_DT_SUPER,
73     DecompositionVertical = U_DT_VERTICAL,
74     DecompositionWide = U_DT_WIDE,
75 };
76 
77 enum CharCategory {
78     NoCategory =  0,
79     Other_NotAssigned = U_MASK(U_GENERAL_OTHER_TYPES),
80     Letter_Uppercase = U_MASK(U_UPPERCASE_LETTER),
81     Letter_Lowercase = U_MASK(U_LOWERCASE_LETTER),
82     Letter_Titlecase = U_MASK(U_TITLECASE_LETTER),
83     Letter_Modifier = U_MASK(U_MODIFIER_LETTER),
84     Letter_Other = U_MASK(U_OTHER_LETTER),
85 
86     Mark_NonSpacing = U_MASK(U_NON_SPACING_MARK),
87     Mark_Enclosing = U_MASK(U_ENCLOSING_MARK),
88     Mark_SpacingCombining = U_MASK(U_COMBINING_SPACING_MARK),
89 
90     Number_DecimalDigit = U_MASK(U_DECIMAL_DIGIT_NUMBER),
91     Number_Letter = U_MASK(U_LETTER_NUMBER),
92     Number_Other = U_MASK(U_OTHER_NUMBER),
93 
94     Separator_Space = U_MASK(U_SPACE_SEPARATOR),
95     Separator_Line = U_MASK(U_LINE_SEPARATOR),
96     Separator_Paragraph = U_MASK(U_PARAGRAPH_SEPARATOR),
97 
98     Other_Control = U_MASK(U_CONTROL_CHAR),
99     Other_Format = U_MASK(U_FORMAT_CHAR),
100     Other_PrivateUse = U_MASK(U_PRIVATE_USE_CHAR),
101     Other_Surrogate = U_MASK(U_SURROGATE),
102 
103     Punctuation_Dash = U_MASK(U_DASH_PUNCTUATION),
104     Punctuation_Open = U_MASK(U_START_PUNCTUATION),
105     Punctuation_Close = U_MASK(U_END_PUNCTUATION),
106     Punctuation_Connector = U_MASK(U_CONNECTOR_PUNCTUATION),
107     Punctuation_Other = U_MASK(U_OTHER_PUNCTUATION),
108 
109     Symbol_Math = U_MASK(U_MATH_SYMBOL),
110     Symbol_Currency = U_MASK(U_CURRENCY_SYMBOL),
111     Symbol_Modifier = U_MASK(U_MODIFIER_SYMBOL),
112     Symbol_Other = U_MASK(U_OTHER_SYMBOL),
113 
114     Punctuation_InitialQuote = U_MASK(U_INITIAL_PUNCTUATION),
115     Punctuation_FinalQuote = U_MASK(U_FINAL_PUNCTUATION)
116 };
117 
foldCase(UChar32 c)118 inline UChar32 foldCase(UChar32 c)
119 {
120     return u_foldCase(c, U_FOLD_CASE_DEFAULT);
121 }
122 
foldCase(UChar * result,int resultLength,const UChar * src,int srcLength,bool * error)123 inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
124 {
125     UErrorCode status = U_ZERO_ERROR;
126     int realLength = u_strFoldCase(result, resultLength, src, srcLength, U_FOLD_CASE_DEFAULT, &status);
127     *error = !U_SUCCESS(status);
128     return realLength;
129 }
130 
toLower(UChar * result,int resultLength,const UChar * src,int srcLength,bool * error)131 inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
132 {
133     UErrorCode status = U_ZERO_ERROR;
134     int realLength = u_strToLower(result, resultLength, src, srcLength, "", &status);
135     *error = !!U_FAILURE(status);
136     return realLength;
137 }
138 
toLower(UChar32 c)139 inline UChar32 toLower(UChar32 c)
140 {
141     return u_tolower(c);
142 }
143 
toUpper(UChar32 c)144 inline UChar32 toUpper(UChar32 c)
145 {
146     return u_toupper(c);
147 }
148 
toUpper(UChar * result,int resultLength,const UChar * src,int srcLength,bool * error)149 inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
150 {
151     UErrorCode status = U_ZERO_ERROR;
152     int realLength = u_strToUpper(result, resultLength, src, srcLength, "", &status);
153     *error = !!U_FAILURE(status);
154     return realLength;
155 }
156 
toTitleCase(UChar32 c)157 inline UChar32 toTitleCase(UChar32 c)
158 {
159     return u_totitle(c);
160 }
161 
isArabicChar(UChar32 c)162 inline bool isArabicChar(UChar32 c)
163 {
164       return ublock_getCode(c) == UBLOCK_ARABIC;
165 }
166 
isAlphanumeric(UChar32 c)167 inline bool isAlphanumeric(UChar32 c)
168 {
169     return u_isalnum(c);
170 }
171 
isSeparatorSpace(UChar32 c)172 inline bool isSeparatorSpace(UChar32 c)
173 {
174     return u_charType(c) == U_SPACE_SEPARATOR;
175 }
176 
isPrintableChar(UChar32 c)177 inline bool isPrintableChar(UChar32 c)
178 {
179     return !!u_isprint(c);
180 }
181 
isPunct(UChar32 c)182 inline bool isPunct(UChar32 c)
183 {
184     return !!u_ispunct(c);
185 }
186 
hasLineBreakingPropertyComplexContext(UChar32 c)187 inline bool hasLineBreakingPropertyComplexContext(UChar32 c)
188 {
189     return u_getIntPropertyValue(c, UCHAR_LINE_BREAK) == U_LB_COMPLEX_CONTEXT;
190 }
191 
hasLineBreakingPropertyComplexContextOrIdeographic(UChar32 c)192 inline bool hasLineBreakingPropertyComplexContextOrIdeographic(UChar32 c)
193 {
194     int32_t prop = u_getIntPropertyValue(c, UCHAR_LINE_BREAK);
195     return prop == U_LB_COMPLEX_CONTEXT || prop == U_LB_IDEOGRAPHIC;
196 }
197 
mirroredChar(UChar32 c)198 inline UChar32 mirroredChar(UChar32 c)
199 {
200     return u_charMirror(c);
201 }
202 
category(UChar32 c)203 inline CharCategory category(UChar32 c)
204 {
205     return static_cast<CharCategory>(U_GET_GC_MASK(c));
206 }
207 
direction(UChar32 c)208 inline Direction direction(UChar32 c)
209 {
210     return static_cast<Direction>(u_charDirection(c));
211 }
212 
isLower(UChar32 c)213 inline bool isLower(UChar32 c)
214 {
215     return !!u_islower(c);
216 }
217 
combiningClass(UChar32 c)218 inline uint8_t combiningClass(UChar32 c)
219 {
220     return u_getCombiningClass(c);
221 }
222 
decompositionType(UChar32 c)223 inline DecompositionType decompositionType(UChar32 c)
224 {
225     return static_cast<DecompositionType>(u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE));
226 }
227 
umemcasecmp(const UChar * a,const UChar * b,int len)228 inline int umemcasecmp(const UChar* a, const UChar* b, int len)
229 {
230     return u_memcasecmp(a, b, len, U_FOLD_CASE_DEFAULT);
231 }
232 
233 } }
234 
235 #endif // WTF_UNICODE_ICU_H
236