1 /*
2 * Copyright (C) 2006 George Staikos <staikos@kde.org>
3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
4 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23 #ifndef WTF_UNICODE_QT4_H
24 #define WTF_UNICODE_QT4_H
25
26 #include <QChar>
27 #include <QString>
28
29 #include <config.h>
30
31 #include <stdint.h>
32
33 QT_BEGIN_NAMESPACE
34 namespace QUnicodeTables {
35 struct Properties {
36 ushort category : 8;
37 ushort line_break_class : 8;
38 ushort direction : 8;
39 ushort combiningClass :8;
40 ushort joining : 2;
41 signed short digitValue : 6; /* 5 needed */
42 ushort unicodeVersion : 4;
43 ushort lowerCaseSpecial : 1;
44 ushort upperCaseSpecial : 1;
45 ushort titleCaseSpecial : 1;
46 ushort caseFoldSpecial : 1; /* currently unused */
47 signed short mirrorDiff : 16;
48 signed short lowerCaseDiff : 16;
49 signed short upperCaseDiff : 16;
50 signed short titleCaseDiff : 16;
51 signed short caseFoldDiff : 16;
52 };
53 Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4);
54 Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2);
55 }
56 QT_END_NAMESPACE
57
58 // ugly hack to make UChar compatible with JSChar in API/JSStringRef.h
59 #if defined(Q_OS_WIN) || COMPILER(WINSCW) || COMPILER(RVCT)
60 typedef wchar_t UChar;
61 #else
62 typedef uint16_t UChar;
63 #endif
64 typedef uint32_t UChar32;
65
66 // some defines from ICU
67
68 #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
69 #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
70 #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
71 #define U16_GET_SUPPLEMENTARY(lead, trail) \
72 (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
73
74 #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
75 #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
76
77 #define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800)
78 #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
79 #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
80 #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
81
82 #define U16_NEXT(s, i, length, c) { \
83 (c)=(s)[(i)++]; \
84 if(U16_IS_LEAD(c)) { \
85 uint16_t __c2; \
86 if((i)<(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
87 ++(i); \
88 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
89 } \
90 } \
91 }
92
93 #define U16_PREV(s, start, i, c) { \
94 (c)=(s)[--(i)]; \
95 if(U16_IS_TRAIL(c)) { \
96 uint16_t __c2; \
97 if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
98 --(i); \
99 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
100 } \
101 } \
102 }
103
104 #define U_MASK(x) ((uint32_t)1<<(x))
105
106 namespace WTF {
107 namespace Unicode {
108
109 QT_USE_NAMESPACE
110
111 enum Direction {
112 LeftToRight = QChar::DirL,
113 RightToLeft = QChar::DirR,
114 EuropeanNumber = QChar::DirEN,
115 EuropeanNumberSeparator = QChar::DirES,
116 EuropeanNumberTerminator = QChar::DirET,
117 ArabicNumber = QChar::DirAN,
118 CommonNumberSeparator = QChar::DirCS,
119 BlockSeparator = QChar::DirB,
120 SegmentSeparator = QChar::DirS,
121 WhiteSpaceNeutral = QChar::DirWS,
122 OtherNeutral = QChar::DirON,
123 LeftToRightEmbedding = QChar::DirLRE,
124 LeftToRightOverride = QChar::DirLRO,
125 RightToLeftArabic = QChar::DirAL,
126 RightToLeftEmbedding = QChar::DirRLE,
127 RightToLeftOverride = QChar::DirRLO,
128 PopDirectionalFormat = QChar::DirPDF,
129 NonSpacingMark = QChar::DirNSM,
130 BoundaryNeutral = QChar::DirBN
131 };
132
133 enum DecompositionType {
134 DecompositionNone = QChar::NoDecomposition,
135 DecompositionCanonical = QChar::Canonical,
136 DecompositionCompat = QChar::Compat,
137 DecompositionCircle = QChar::Circle,
138 DecompositionFinal = QChar::Final,
139 DecompositionFont = QChar::Font,
140 DecompositionFraction = QChar::Fraction,
141 DecompositionInitial = QChar::Initial,
142 DecompositionIsolated = QChar::Isolated,
143 DecompositionMedial = QChar::Medial,
144 DecompositionNarrow = QChar::Narrow,
145 DecompositionNoBreak = QChar::NoBreak,
146 DecompositionSmall = QChar::Small,
147 DecompositionSquare = QChar::Square,
148 DecompositionSub = QChar::Sub,
149 DecompositionSuper = QChar::Super,
150 DecompositionVertical = QChar::Vertical,
151 DecompositionWide = QChar::Wide
152 };
153
154 enum CharCategory {
155 NoCategory = 0,
156 Mark_NonSpacing = U_MASK(QChar::Mark_NonSpacing),
157 Mark_SpacingCombining = U_MASK(QChar::Mark_SpacingCombining),
158 Mark_Enclosing = U_MASK(QChar::Mark_Enclosing),
159 Number_DecimalDigit = U_MASK(QChar::Number_DecimalDigit),
160 Number_Letter = U_MASK(QChar::Number_Letter),
161 Number_Other = U_MASK(QChar::Number_Other),
162 Separator_Space = U_MASK(QChar::Separator_Space),
163 Separator_Line = U_MASK(QChar::Separator_Line),
164 Separator_Paragraph = U_MASK(QChar::Separator_Paragraph),
165 Other_Control = U_MASK(QChar::Other_Control),
166 Other_Format = U_MASK(QChar::Other_Format),
167 Other_Surrogate = U_MASK(QChar::Other_Surrogate),
168 Other_PrivateUse = U_MASK(QChar::Other_PrivateUse),
169 Other_NotAssigned = U_MASK(QChar::Other_NotAssigned),
170 Letter_Uppercase = U_MASK(QChar::Letter_Uppercase),
171 Letter_Lowercase = U_MASK(QChar::Letter_Lowercase),
172 Letter_Titlecase = U_MASK(QChar::Letter_Titlecase),
173 Letter_Modifier = U_MASK(QChar::Letter_Modifier),
174 Letter_Other = U_MASK(QChar::Letter_Other),
175 Punctuation_Connector = U_MASK(QChar::Punctuation_Connector),
176 Punctuation_Dash = U_MASK(QChar::Punctuation_Dash),
177 Punctuation_Open = U_MASK(QChar::Punctuation_Open),
178 Punctuation_Close = U_MASK(QChar::Punctuation_Close),
179 Punctuation_InitialQuote = U_MASK(QChar::Punctuation_InitialQuote),
180 Punctuation_FinalQuote = U_MASK(QChar::Punctuation_FinalQuote),
181 Punctuation_Other = U_MASK(QChar::Punctuation_Other),
182 Symbol_Math = U_MASK(QChar::Symbol_Math),
183 Symbol_Currency = U_MASK(QChar::Symbol_Currency),
184 Symbol_Modifier = U_MASK(QChar::Symbol_Modifier),
185 Symbol_Other = U_MASK(QChar::Symbol_Other)
186 };
187
188
189 // FIXME: handle surrogates correctly in all methods
190
toLower(UChar32 ch)191 inline UChar32 toLower(UChar32 ch)
192 {
193 return QChar::toLower(ch);
194 }
195
toLower(UChar * result,int resultLength,const UChar * src,int srcLength,bool * error)196 inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
197 {
198 const UChar *e = src + srcLength;
199 const UChar *s = src;
200 UChar *r = result;
201 uint rindex = 0;
202
203 // this avoids one out of bounds check in the loop
204 if (s < e && QChar(*s).isLowSurrogate()) {
205 if (r)
206 r[rindex] = *s++;
207 ++rindex;
208 }
209
210 int needed = 0;
211 while (s < e && (rindex < uint(resultLength) || !r)) {
212 uint c = *s;
213 if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate())
214 c = QChar::surrogateToUcs4(*(s - 1), c);
215 const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c);
216 if (prop->lowerCaseSpecial) {
217 QString qstring;
218 if (c < 0x10000) {
219 qstring += QChar(c);
220 } else {
221 qstring += QChar(*(s-1));
222 qstring += QChar(*s);
223 }
224 qstring = qstring.toLower();
225 for (int i = 0; i < qstring.length(); ++i) {
226 if (rindex >= uint(resultLength)) {
227 needed += qstring.length() - i;
228 break;
229 }
230 if (r)
231 r[rindex] = qstring.at(i).unicode();
232 ++rindex;
233 }
234 } else {
235 if (r)
236 r[rindex] = *s + prop->lowerCaseDiff;
237 ++rindex;
238 }
239 ++s;
240 }
241 if (s < e)
242 needed += e - s;
243 *error = (needed != 0);
244 if (rindex < uint(resultLength))
245 r[rindex] = 0;
246 return rindex + needed;
247 }
248
toUpper(UChar32 ch)249 inline UChar32 toUpper(UChar32 ch)
250 {
251 return QChar::toUpper(ch);
252 }
253
toUpper(UChar * result,int resultLength,const UChar * src,int srcLength,bool * error)254 inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
255 {
256 const UChar *e = src + srcLength;
257 const UChar *s = src;
258 UChar *r = result;
259 int rindex = 0;
260
261 // this avoids one out of bounds check in the loop
262 if (s < e && QChar(*s).isLowSurrogate()) {
263 if (r)
264 r[rindex] = *s++;
265 ++rindex;
266 }
267
268 int needed = 0;
269 while (s < e && (rindex < resultLength || !r)) {
270 uint c = *s;
271 if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate())
272 c = QChar::surrogateToUcs4(*(s - 1), c);
273 const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c);
274 if (prop->upperCaseSpecial) {
275 QString qstring;
276 if (c < 0x10000) {
277 qstring += QChar(c);
278 } else {
279 qstring += QChar(*(s-1));
280 qstring += QChar(*s);
281 }
282 qstring = qstring.toUpper();
283 for (int i = 0; i < qstring.length(); ++i) {
284 if (rindex >= resultLength) {
285 needed += qstring.length() - i;
286 break;
287 }
288 if (r)
289 r[rindex] = qstring.at(i).unicode();
290 ++rindex;
291 }
292 } else {
293 if (r)
294 r[rindex] = *s + prop->upperCaseDiff;
295 ++rindex;
296 }
297 ++s;
298 }
299 if (s < e)
300 needed += e - s;
301 *error = (needed != 0);
302 if (rindex < resultLength)
303 r[rindex] = 0;
304 return rindex + needed;
305 }
306
toTitleCase(UChar32 c)307 inline int toTitleCase(UChar32 c)
308 {
309 return QChar::toTitleCase(c);
310 }
311
foldCase(UChar32 c)312 inline UChar32 foldCase(UChar32 c)
313 {
314 return QChar::toCaseFolded(c);
315 }
316
foldCase(UChar * result,int resultLength,const UChar * src,int srcLength,bool * error)317 inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
318 {
319 // FIXME: handle special casing. Easiest with some low level API in Qt
320 *error = false;
321 if (resultLength < srcLength) {
322 *error = true;
323 return srcLength;
324 }
325 for (int i = 0; i < srcLength; ++i)
326 result[i] = QChar::toCaseFolded(ushort(src[i]));
327 return srcLength;
328 }
329
isArabicChar(UChar32 c)330 inline bool isArabicChar(UChar32 c)
331 {
332 return c >= 0x0600 && c <= 0x06FF;
333 }
334
isPrintableChar(UChar32 c)335 inline bool isPrintableChar(UChar32 c)
336 {
337 const uint test = U_MASK(QChar::Other_Control) |
338 U_MASK(QChar::Other_NotAssigned);
339 return !(U_MASK(QChar::category(c)) & test);
340 }
341
isSeparatorSpace(UChar32 c)342 inline bool isSeparatorSpace(UChar32 c)
343 {
344 return QChar::category(c) == QChar::Separator_Space;
345 }
346
isPunct(UChar32 c)347 inline bool isPunct(UChar32 c)
348 {
349 const uint test = U_MASK(QChar::Punctuation_Connector) |
350 U_MASK(QChar::Punctuation_Dash) |
351 U_MASK(QChar::Punctuation_Open) |
352 U_MASK(QChar::Punctuation_Close) |
353 U_MASK(QChar::Punctuation_InitialQuote) |
354 U_MASK(QChar::Punctuation_FinalQuote) |
355 U_MASK(QChar::Punctuation_Other);
356 return U_MASK(QChar::category(c)) & test;
357 }
358
isLower(UChar32 c)359 inline bool isLower(UChar32 c)
360 {
361 return QChar::category(c) == QChar::Letter_Lowercase;
362 }
363
hasLineBreakingPropertyComplexContext(UChar32)364 inline bool hasLineBreakingPropertyComplexContext(UChar32)
365 {
366 // FIXME: Implement this to return whether the character has line breaking property SA (Complex Context).
367 return false;
368 }
369
mirroredChar(UChar32 c)370 inline UChar32 mirroredChar(UChar32 c)
371 {
372 return QChar::mirroredChar(c);
373 }
374
combiningClass(UChar32 c)375 inline uint8_t combiningClass(UChar32 c)
376 {
377 return QChar::combiningClass(c);
378 }
379
decompositionType(UChar32 c)380 inline DecompositionType decompositionType(UChar32 c)
381 {
382 return (DecompositionType)QChar::decompositionTag(c);
383 }
384
umemcasecmp(const UChar * a,const UChar * b,int len)385 inline int umemcasecmp(const UChar* a, const UChar* b, int len)
386 {
387 // handle surrogates correctly
388 for (int i = 0; i < len; ++i) {
389 uint c1 = QChar::toCaseFolded(ushort(a[i]));
390 uint c2 = QChar::toCaseFolded(ushort(b[i]));
391 if (c1 != c2)
392 return c1 - c2;
393 }
394 return 0;
395 }
396
direction(UChar32 c)397 inline Direction direction(UChar32 c)
398 {
399 return (Direction)QChar::direction(c);
400 }
401
category(UChar32 c)402 inline CharCategory category(UChar32 c)
403 {
404 return (CharCategory) U_MASK(QChar::category(c));
405 }
406
407 } }
408
409 #endif // WTF_UNICODE_QT4_H
410