1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of the QtCore module of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see https://www.qt.io/terms-conditions. For further
15 ** information use the contact form at https://www.qt.io/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 3 requirements
23 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24 **
25 ** GNU General Public License Usage
26 ** Alternatively, this file may be used under the terms of the GNU
27 ** General Public License version 2.0 or (at your option) the GNU General
28 ** Public license version 3 or any later version approved by the KDE Free
29 ** Qt Foundation. The licenses are as published by the Free Software
30 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31 ** included in the packaging of this file. Please review the following
32 ** information to ensure the GNU General Public License requirements will
33 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34 ** https://www.gnu.org/licenses/gpl-3.0.html.
35 **
36 ** $QT_END_LICENSE$
37 **
38 ****************************************************************************/
39
40 #include "qunicodetools_p.h"
41
42 #include "qunicodetables_p.h"
43 #include "qvarlengtharray.h"
44
45 #include "qharfbuzz_p.h"
46
47 #define FLAG(x) (1 << (x))
48
49 QT_BEGIN_NAMESPACE
50
51 Q_AUTOTEST_EXPORT int qt_initcharattributes_default_algorithm_only = 0;
52
53 namespace QUnicodeTools {
54
55 // -----------------------------------------------------------------------------------------------------
56 //
57 // The text boundaries determination algorithm.
58 // See http://www.unicode.org/reports/tr29/tr29-31.html
59 //
60 // -----------------------------------------------------------------------------------------------------
61
62 namespace GB {
63
64 /*
65 * Most grapheme break rules can be implemented table driven, but rules GB10, GB12 and GB13 need a bit
66 * of special treatment.
67 */
68 enum State : uchar {
69 Break,
70 Inside,
71 GB10,
72 GB10_2,
73 GB10_3,
74 GB13, // also covers GB12
75 };
76
77 static const State breakTable[QUnicodeTables::NumGraphemeBreakClasses][QUnicodeTables::NumGraphemeBreakClasses] = {
78 // Any CR LF Control Extend ZWJ RI Prepend S-Mark L V T LV LVT E_B E_M GAZ EBG
79 { Break , Break , Break , Break , Inside, Inside, Break , Break , Inside, Break , Break , Break , Break , Break , Break , Break , Break , Break }, // Any
80 { Break , Break , Inside, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // CR
81 { Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // LF
82 { Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // Control
83 { Break , Break , Break , Break , GB10_2, Inside, Break , Break , Inside, Break , Break , Break , Break , Break , Break , GB10_3, Break , Break }, // Extend
84 { Break , Break , Break , Break , Inside, Inside, Break , Break , Inside, Break , Break , Break , Break , Break , Break , Break , Inside, Inside }, // ZWJ
85 { Break , Break , Break , Break , Inside, Inside, GB13 , Break , Inside, Break , Break , Break , Break , Break , Break , Break , Break , Break }, // RegionalIndicator
86 { Inside, Break , Break , Break , Inside, Inside, Inside, Inside, Inside, Inside, Inside, Inside, Inside, Inside, Inside, Inside, Inside, Inside }, // Prepend
87 { Break , Break , Break , Break , Inside, Inside, Break , Break , Inside, Break , Break , Break , Break , Break , Break , Break , Break , Break }, // SpacingMark
88 { Break , Break , Break , Break , Inside, Inside, Break , Break , Inside, Inside, Inside, Break , Inside, Inside, Break , Break , Break , Break }, // L
89 { Break , Break , Break , Break , Inside, Inside, Break , Break , Inside, Break , Inside, Inside, Break , Break , Break , Break , Break , Break }, // V
90 { Break , Break , Break , Break , Inside, Inside, Break , Break , Inside, Break , Break , Inside, Break , Break , Break , Break , Break , Break }, // T
91 { Break , Break , Break , Break , Inside, Inside, Break , Break , Inside, Break , Inside, Inside, Break , Break , Break , Break , Break , Break }, // LV
92 { Break , Break , Break , Break , Inside, Inside, Break , Break , Inside, Break , Break , Inside, Break , Break , Break , Break , Break , Break }, // LVT
93 { Break , Break , Break , Break , GB10 , Inside, Break , Break , Inside, Break , Break , Break , Break , Break , Break , Inside, Break , Break }, // E_B
94 { Break , Break , Break , Break , Inside, Inside, Break , Break , Inside, Break , Break , Break , Break , Break , Break , Break , Break , Break }, // E_M
95 { Break , Break , Break , Break , Inside, Inside, Break , Break , Inside, Break , Break , Break , Break , Break , Break , Break , Break , Break }, // GAZ
96 { Break , Break , Break , Break , GB10 , Inside, Break , Break , Inside, Break , Break , Break , Break , Break , Break , Inside, Break , Break }, // EBG
97 };
98
99 } // namespace GB
100
getGraphemeBreaks(const ushort * string,quint32 len,QCharAttributes * attributes)101 static void getGraphemeBreaks(const ushort *string, quint32 len, QCharAttributes *attributes)
102 {
103 QUnicodeTables::GraphemeBreakClass lcls = QUnicodeTables::GraphemeBreak_LF; // to meet GB1
104 GB::State state = GB::Break; // only required to track some of the rules
105 for (quint32 i = 0; i != len; ++i) {
106 quint32 pos = i;
107 uint ucs4 = string[i];
108 if (QChar::isHighSurrogate(ucs4) && i + 1 != len) {
109 ushort low = string[i + 1];
110 if (QChar::isLowSurrogate(low)) {
111 ucs4 = QChar::surrogateToUcs4(ucs4, low);
112 ++i;
113 }
114 }
115
116 const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ucs4);
117 QUnicodeTables::GraphemeBreakClass cls = (QUnicodeTables::GraphemeBreakClass) prop->graphemeBreakClass;
118
119 switch (GB::breakTable[lcls][cls]) {
120 case GB::Break:
121 attributes[pos].graphemeBoundary = true;
122 state = GB::Break;
123 break;
124 case GB::Inside:
125 state = GB::Break;
126 break;
127 case GB::GB10:
128 state = GB::GB10;
129 break;
130 case GB::GB10_2:
131 if (state == GB::GB10 || state == GB::GB10_2)
132 state = GB::GB10_2;
133 else
134 state = GB::Break;
135 break;
136 case GB::GB10_3:
137 if (state != GB::GB10 && state != GB::GB10_2)
138 attributes[pos].graphemeBoundary = true;
139 state = GB::Break;
140 break;
141 case GB::GB13:
142 if (state != GB::GB13) {
143 state = GB::GB13;
144 } else {
145 attributes[pos].graphemeBoundary = true;
146 state = GB::Break;
147 }
148 }
149
150 lcls = cls;
151 }
152
153 attributes[len].graphemeBoundary = true; // GB2
154 }
155
156
157 namespace WB {
158
159 enum Action {
160 NoBreak,
161 Break,
162 Lookup,
163 LookupW
164 };
165
166 static const uchar breakTable[QUnicodeTables::NumWordBreakClasses][QUnicodeTables::NumWordBreakClasses] = {
167 // Any CR LF Newline Extend ZWJ Format RI Katakana HLetter ALetter SQuote DQuote MidNumLet MidLetter MidNum Numeric ExtNumLet E_Base E_Mod GAZ EBG WSeg
168 { Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // Any
169 { Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // CR
170 { Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // LF
171 { Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // Newline
172 { Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // Extend
173 { Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , NoBreak, NoBreak, Break }, // ZWJ
174 { Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // Format
175 { Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // RegionalIndicator
176 { Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break }, // Katakana
177 { Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , NoBreak, NoBreak, LookupW, Lookup , LookupW, LookupW, Break , NoBreak, NoBreak, Break , Break , Break , Break , Break }, // HebrewLetter
178 { Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , NoBreak, NoBreak, LookupW, Break , LookupW, LookupW, Break , NoBreak, NoBreak, Break , Break , Break , Break , Break }, // ALetter
179 { Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // SingleQuote
180 { Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // DoubleQuote
181 { Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // MidNumLet
182 { Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // MidLetter
183 { Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // MidNum
184 { Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , NoBreak, NoBreak, Lookup , Break , Lookup , Break , Lookup , NoBreak, NoBreak, Break , Break , Break , Break , Break }, // Numeric
185 { Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , NoBreak, NoBreak, Break , Break , Break , Break , Break }, // ExtendNumLet
186 { Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , NoBreak, Break , Break , Break }, // E_Base
187 { Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // E_Mod
188 { Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // GAZ
189 { Break , Break , Break , Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , NoBreak, Break , Break , Break }, // EBG
190 { Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // WSeg
191 };
192
193 } // namespace WB
194
getWordBreaks(const ushort * string,quint32 len,QCharAttributes * attributes)195 static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *attributes)
196 {
197 enum WordType {
198 WordTypeNone, WordTypeAlphaNumeric, WordTypeHiraganaKatakana
199 } currentWordType = WordTypeNone;
200
201 QUnicodeTables::WordBreakClass cls = QUnicodeTables::WordBreak_LF; // to meet WB1
202 for (quint32 i = 0; i != len; ++i) {
203 quint32 pos = i;
204 uint ucs4 = string[i];
205 if (QChar::isHighSurrogate(ucs4) && i + 1 != len) {
206 ushort low = string[i + 1];
207 if (QChar::isLowSurrogate(low)) {
208 ucs4 = QChar::surrogateToUcs4(ucs4, low);
209 ++i;
210 }
211 }
212
213 const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ucs4);
214 QUnicodeTables::WordBreakClass ncls = (QUnicodeTables::WordBreakClass) prop->wordBreakClass;
215 #ifdef QT_BUILD_INTERNAL
216 if (qt_initcharattributes_default_algorithm_only) {
217 // as of Unicode 5.1, some punctuation marks were mapped to MidLetter and MidNumLet
218 // which caused "hi.there" to be treated like if it were just a single word;
219 // we keep the pre-5.1 behavior by remapping these characters in the Unicode tables generator
220 // and this code is needed to pass the coverage tests; remove once the issue is fixed.
221 if (ucs4 == 0x002E) // FULL STOP
222 ncls = QUnicodeTables::WordBreak_MidNumLet;
223 else if (ucs4 == 0x003A) // COLON
224 ncls = QUnicodeTables::WordBreak_MidLetter;
225 }
226 #endif
227
228 uchar action = WB::breakTable[cls][ncls];
229 switch (action) {
230 case WB::Break:
231 break;
232 case WB::NoBreak:
233 if (Q_UNLIKELY(ncls == QUnicodeTables::WordBreak_Extend || ncls == QUnicodeTables::WordBreak_ZWJ || ncls == QUnicodeTables::WordBreak_Format)) {
234 // WB4: X(Extend|Format)* -> X
235 if (cls != QUnicodeTables::WordBreak_ZWJ) // WB3c
236 continue;
237 }
238 if (Q_UNLIKELY(cls == QUnicodeTables::WordBreak_RegionalIndicator)) {
239 // WB15/WB16: break between pairs of Regional indicator
240 ncls = QUnicodeTables::WordBreak_Any;
241 }
242 break;
243 case WB::Lookup:
244 case WB::LookupW:
245 for (quint32 lookahead = i + 1; lookahead < len; ++lookahead) {
246 ucs4 = string[lookahead];
247 if (QChar::isHighSurrogate(ucs4) && lookahead + 1 != len) {
248 ushort low = string[lookahead + 1];
249 if (QChar::isLowSurrogate(low)) {
250 ucs4 = QChar::surrogateToUcs4(ucs4, low);
251 ++lookahead;
252 }
253 }
254
255 prop = QUnicodeTables::properties(ucs4);
256 QUnicodeTables::WordBreakClass tcls = (QUnicodeTables::WordBreakClass) prop->wordBreakClass;
257
258 if (Q_UNLIKELY(tcls == QUnicodeTables::WordBreak_Extend || tcls == QUnicodeTables::WordBreak_ZWJ || tcls == QUnicodeTables::WordBreak_Format)) {
259 // WB4: X(Extend|Format)* -> X
260 continue;
261 }
262
263 if (Q_LIKELY(tcls == cls || (action == WB::LookupW && (tcls == QUnicodeTables::WordBreak_HebrewLetter
264 || tcls == QUnicodeTables::WordBreak_ALetter)))) {
265 i = lookahead;
266 ncls = tcls;
267 action = WB::NoBreak;
268 }
269 break;
270 }
271 if (action != WB::NoBreak) {
272 action = WB::Break;
273 if (Q_UNLIKELY(ncls == QUnicodeTables::WordBreak_SingleQuote && cls == QUnicodeTables::WordBreak_HebrewLetter))
274 action = WB::NoBreak; // WB7a
275 }
276 break;
277 }
278
279 cls = ncls;
280 if (action == WB::Break) {
281 attributes[pos].wordBreak = true;
282 if (currentWordType != WordTypeNone)
283 attributes[pos].wordEnd = true;
284 switch (cls) {
285 case QUnicodeTables::WordBreak_Katakana:
286 currentWordType = WordTypeHiraganaKatakana;
287 attributes[pos].wordStart = true;
288 break;
289 case QUnicodeTables::WordBreak_HebrewLetter:
290 case QUnicodeTables::WordBreak_ALetter:
291 case QUnicodeTables::WordBreak_Numeric:
292 currentWordType = WordTypeAlphaNumeric;
293 attributes[pos].wordStart = true;
294 break;
295 default:
296 currentWordType = WordTypeNone;
297 break;
298 }
299 }
300 }
301
302 if (currentWordType != WordTypeNone)
303 attributes[len].wordEnd = true;
304 attributes[len].wordBreak = true; // WB2
305 }
306
307
308 namespace SB {
309
310 enum State {
311 Initial,
312 Lower,
313 Upper,
314 LUATerm,
315 ATerm,
316 ATermC,
317 ACS,
318 STerm,
319 STermC,
320 SCS,
321 BAfterC,
322 BAfter,
323 Break,
324 Lookup
325 };
326
327 static const uchar breakTable[BAfter + 1][QUnicodeTables::NumSentenceBreakClasses] = {
328 // Any CR LF Sep Extend Sp Lower Upper OLetter Numeric ATerm SContinue STerm Close
329 { Initial, BAfterC, BAfter , BAfter , Initial, Initial, Lower , Upper , Initial, Initial, ATerm , Initial, STerm , Initial }, // Initial
330 { Initial, BAfterC, BAfter , BAfter , Lower , Initial, Initial, Initial, Initial, Initial, LUATerm, Initial, STerm , Initial }, // Lower
331 { Initial, BAfterC, BAfter , BAfter , Upper , Initial, Initial, Upper , Initial, Initial, LUATerm, Initial, STerm , Initial }, // Upper
332
333 { Lookup , BAfterC, BAfter , BAfter , LUATerm, ACS , Initial, Upper , Break , Initial, ATerm , STerm , STerm , ATermC }, // LUATerm
334 { Lookup , BAfterC, BAfter , BAfter , ATerm , ACS , Initial, Break , Break , Initial, ATerm , STerm , STerm , ATermC }, // ATerm
335 { Lookup , BAfterC, BAfter , BAfter , ATermC , ACS , Initial, Break , Break , Lookup , ATerm , STerm , STerm , ATermC }, // ATermC
336 { Lookup , BAfterC, BAfter , BAfter , ACS , ACS , Initial, Break , Break , Lookup , ATerm , STerm , STerm , Lookup }, // ACS
337
338 { Break , BAfterC, BAfter , BAfter , STerm , SCS , Break , Break , Break , Break , ATerm , STerm , STerm , STermC }, // STerm,
339 { Break , BAfterC, BAfter , BAfter , STermC , SCS , Break , Break , Break , Break , ATerm , STerm , STerm , STermC }, // STermC
340 { Break , BAfterC, BAfter , BAfter , SCS , SCS , Break , Break , Break , Break , ATerm , STerm , STerm , Break }, // SCS
341 { Break , Break , BAfter , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // BAfterC
342 { Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // BAfter
343 };
344
345 } // namespace SB
346
getSentenceBreaks(const ushort * string,quint32 len,QCharAttributes * attributes)347 static void getSentenceBreaks(const ushort *string, quint32 len, QCharAttributes *attributes)
348 {
349 uchar state = SB::BAfter; // to meet SB1
350 for (quint32 i = 0; i != len; ++i) {
351 quint32 pos = i;
352 uint ucs4 = string[i];
353 if (QChar::isHighSurrogate(ucs4) && i + 1 != len) {
354 ushort low = string[i + 1];
355 if (QChar::isLowSurrogate(low)) {
356 ucs4 = QChar::surrogateToUcs4(ucs4, low);
357 ++i;
358 }
359 }
360
361 const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ucs4);
362 QUnicodeTables::SentenceBreakClass ncls = (QUnicodeTables::SentenceBreakClass) prop->sentenceBreakClass;
363
364 Q_ASSERT(state <= SB::BAfter);
365 state = SB::breakTable[state][ncls];
366 if (Q_UNLIKELY(state == SB::Lookup)) { // SB8
367 state = SB::Break;
368 for (quint32 lookahead = i + 1; lookahead < len; ++lookahead) {
369 ucs4 = string[lookahead];
370 if (QChar::isHighSurrogate(ucs4) && lookahead + 1 != len) {
371 ushort low = string[lookahead + 1];
372 if (QChar::isLowSurrogate(low)) {
373 ucs4 = QChar::surrogateToUcs4(ucs4, low);
374 ++lookahead;
375 }
376 }
377
378 prop = QUnicodeTables::properties(ucs4);
379 QUnicodeTables::SentenceBreakClass tcls = (QUnicodeTables::SentenceBreakClass) prop->sentenceBreakClass;
380 switch (tcls) {
381 case QUnicodeTables::SentenceBreak_Any:
382 case QUnicodeTables::SentenceBreak_Extend:
383 case QUnicodeTables::SentenceBreak_Sp:
384 case QUnicodeTables::SentenceBreak_Numeric:
385 case QUnicodeTables::SentenceBreak_SContinue:
386 case QUnicodeTables::SentenceBreak_Close:
387 continue;
388 case QUnicodeTables::SentenceBreak_Lower:
389 i = lookahead;
390 state = SB::Initial;
391 break;
392 default:
393 break;
394 }
395 break;
396 }
397 }
398 if (Q_UNLIKELY(state == SB::Break)) {
399 attributes[pos].sentenceBoundary = true;
400 state = SB::breakTable[SB::Initial][ncls];
401 }
402 }
403
404 attributes[len].sentenceBoundary = true; // SB2
405 }
406
407
408 // -----------------------------------------------------------------------------------------------------
409 //
410 // The line breaking algorithm.
411 // See http://www.unicode.org/reports/tr14/tr14-39.html
412 //
413 // -----------------------------------------------------------------------------------------------------
414
415 namespace LB {
416
417 namespace NS { // Number Sequence
418
419 // LB25 recommends to not break lines inside numbers of the form
420 // described by the following regular expression:
421 // (PR|PO)?(OP|HY)?NU(NU|SY|IS)*(CL|CP)?(PR|PO)?
422
423 enum Action {
424 None,
425 Start,
426 Continue,
427 Break
428 };
429
430 enum Class {
431 XX,
432 PRPO,
433 OPHY,
434 NU,
435 SYIS,
436 CLCP
437 };
438
439 static const uchar actionTable[CLCP + 1][CLCP + 1] = {
440 // XX PRPO OPHY NU SYIS CLCP
441 { None , Start , Start , Start , None , None }, // XX
442 { None , Start , Continue, Continue, None , None }, // PRPO
443 { None , Start , Start , Continue, None , None }, // OPHY
444 { Break , Break , Break , Continue, Continue, Continue }, // NU
445 { Break , Break , Break , Continue, Continue, Continue }, // SYIS
446 { Break , Continue, Break , Break , Break , Break }, // CLCP
447 };
448
toClass(QUnicodeTables::LineBreakClass lbc,QChar::Category category)449 inline Class toClass(QUnicodeTables::LineBreakClass lbc, QChar::Category category)
450 {
451 switch (lbc) {
452 case QUnicodeTables::LineBreak_AL:// case QUnicodeTables::LineBreak_AI:
453 // resolve AI math symbols in numerical context to IS
454 if (category == QChar::Symbol_Math)
455 return SYIS;
456 break;
457 case QUnicodeTables::LineBreak_PR: case QUnicodeTables::LineBreak_PO:
458 return PRPO;
459 case QUnicodeTables::LineBreak_OP: case QUnicodeTables::LineBreak_HY:
460 return OPHY;
461 case QUnicodeTables::LineBreak_NU:
462 return NU;
463 case QUnicodeTables::LineBreak_SY: case QUnicodeTables::LineBreak_IS:
464 return SYIS;
465 case QUnicodeTables::LineBreak_CL: case QUnicodeTables::LineBreak_CP:
466 return CLCP;
467 default:
468 break;
469 }
470 return XX;
471 }
472
473 } // namespace NS
474
475 /* In order to support the tailored implementation of LB25 properly
476 the following changes were made in the pair table to allow breaks
477 where the numeric expression doesn't match the template (i.e. [^NU](IS|SY)NU):
478 (CL)(PO) from IB to DB
479 (CP)(PO) from IB to DB
480 (CL)(PR) from IB to DB
481 (CP)(PR) from IB to DB
482 (PO)(OP) from IB to DB
483 (PR)(OP) from IB to DB
484 (IS)(NU) from IB to DB
485 (SY)(NU) from IB to DB
486 */
487
488 /* In order to implementat LB21a properly a special rule HH has been introduced and
489 the following changes were made in the pair table to disallow breaks after Hebrew + Hyphen:
490 (HL)(HY|BA) from IB to CI
491 (HY|BA)(!CB) from DB to HH
492 */
493
494 enum Action {
495 ProhibitedBreak, PB = ProhibitedBreak,
496 DirectBreak, DB = DirectBreak,
497 IndirectBreak, IB = IndirectBreak,
498 CombiningIndirectBreak, CI = CombiningIndirectBreak,
499 CombiningProhibitedBreak, CP = CombiningProhibitedBreak,
500 ProhibitedBreakAfterHebrewPlusHyphen, HH = ProhibitedBreakAfterHebrewPlusHyphen
501 };
502
503 static const uchar breakTable[QUnicodeTables::LineBreak_SA][QUnicodeTables::LineBreak_SA] = {
504 /* OP CL CP QU GL NS EX SY IS PR PO NU AL HL ID IN HY BA BB B2 ZW CM WJ H2 H3 JL JV JT RI CB EB EM ZWJ*/
505 /* OP */ { PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, CP, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB, PB },
506 /* CL */ { DB, PB, PB, IB, IB, PB, PB, PB, PB, DB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB, DB, DB, IB },
507 /* CP */ { DB, PB, PB, IB, IB, PB, PB, PB, PB, DB, DB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB, DB, DB, IB },
508 /* QU */ { PB, PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB },
509 /* GL */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB },
510 /* NS */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB, DB, DB, IB },
511 /* EX */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB, DB, DB, IB },
512 /* SY */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB, DB, DB, IB },
513 /* IS */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB, DB, DB, IB },
514 /* PR */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, IB, IB, DB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, IB, DB, DB, IB, IB, IB },
515 /* PO */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, IB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB, DB, DB, IB },
516 /* NU */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB, DB, DB, IB },
517 /* AL */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB, DB, DB, IB },
518 /* HL */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, DB, IB, CI, CI, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB, DB, DB, IB },
519 /* ID */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB, DB, DB, IB },
520 /* IN */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB, DB, DB, IB },
521 /* HY */ { HH, PB, PB, IB, HH, IB, PB, PB, PB, HH, HH, IB, HH, HH, HH, HH, IB, IB, HH, HH, PB, CI, PB, HH, HH, HH, HH, HH, HH, DB, DB, DB, IB },
522 /* BA */ { HH, PB, PB, IB, HH, IB, PB, PB, PB, HH, HH, HH, HH, HH, HH, HH, IB, IB, HH, HH, PB, CI, PB, HH, HH, HH, HH, HH, HH, DB, DB, DB, IB },
523 /* BB */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB, IB, DB, IB, IB, IB },
524 /* B2 */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, DB, IB, IB, DB, PB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB, DB, DB, IB },
525 /* ZW */ { DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, PB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB },
526 /* CM */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, IB, IB, IB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB, DB, DB, IB },
527 /* WJ */ { IB, PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB, PB, CI, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, IB },
528 /* H2 */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, IB, IB, DB, DB, DB, DB, IB },
529 /* H3 */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, IB, DB, DB, DB, DB, IB },
530 /* JL */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, IB, IB, IB, IB, DB, DB, DB, DB, DB, IB },
531 /* JV */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, IB, IB, DB, DB, DB, DB, IB },
532 /* JT */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, IB, DB, DB, DB, DB, IB },
533 /* RI */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, DB, DB, DB, DB, DB, DB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, IB, DB, DB, DB, IB },
534 /* CB */ { DB, PB, PB, IB, IB, DB, PB, PB, PB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB, DB, DB, IB },
535 /* EB */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB, DB, IB, IB },
536 /* EM */ { DB, PB, PB, IB, IB, IB, PB, PB, PB, DB, IB, DB, DB, DB, DB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB, DB, DB, IB },
537 /* ZWJ*/ { IB, PB, PB, IB, IB, IB, PB, PB, PB, IB, IB, IB, IB, IB, IB, IB, IB, IB, DB, DB, PB, CI, PB, DB, DB, DB, DB, DB, DB, DB, IB, IB, IB }
538 };
539
540 // The following line break classes are not treated by the pair table
541 // and must be resolved outside:
542 // AI, BK, CB, CJ, CR, LF, NL, SA, SG, SP, XX
543
544 } // namespace LB
545
getLineBreaks(const ushort * string,quint32 len,QCharAttributes * attributes,QUnicodeTools::CharAttributeOptions options)546 static void getLineBreaks(const ushort *string, quint32 len, QCharAttributes *attributes, QUnicodeTools::CharAttributeOptions options)
547 {
548 quint32 nestart = 0;
549 LB::NS::Class nelast = LB::NS::XX;
550
551 QUnicodeTables::LineBreakClass lcls = QUnicodeTables::LineBreak_LF; // to meet LB10
552 QUnicodeTables::LineBreakClass cls = lcls;
553 for (quint32 i = 0; i != len; ++i) {
554 quint32 pos = i;
555 uint ucs4 = string[i];
556 if (QChar::isHighSurrogate(ucs4) && i + 1 != len) {
557 ushort low = string[i + 1];
558 if (QChar::isLowSurrogate(low)) {
559 ucs4 = QChar::surrogateToUcs4(ucs4, low);
560 ++i;
561 }
562 }
563
564 const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ucs4);
565 QUnicodeTables::LineBreakClass ncls = (QUnicodeTables::LineBreakClass) prop->lineBreakClass;
566 QUnicodeTables::LineBreakClass tcls;
567
568 if (options & QUnicodeTools::HangulLineBreakTailoring) {
569 if (Q_UNLIKELY((ncls >= QUnicodeTables::LineBreak_H2
570 && ncls <= QUnicodeTables::LineBreak_JT)
571 || (ucs4 >= 0x3130 && ucs4 <= 0x318F && ncls == QUnicodeTables::LineBreak_ID))
572 ) {
573 // LB27: use SPACE for line breaking
574 // "When Korean uses SPACE for line breaking, the classes in rule LB26,
575 // as well as characters of class ID, are often tailored to AL; see Section 8, Customization."
576 // In case of Korean syllables: "3130..318F HANGUL COMPATIBILITY JAMO"
577 ncls = QUnicodeTables::LineBreak_AL;
578 } else {
579 if (Q_UNLIKELY(ncls == QUnicodeTables::LineBreak_SA)) {
580 // LB1: resolve SA to AL, except of those that have Category Mn or Mc be resolved to CM
581 static const int test = FLAG(QChar::Mark_NonSpacing) | FLAG(QChar::Mark_SpacingCombining);
582 if (FLAG(prop->category) & test)
583 ncls = QUnicodeTables::LineBreak_CM;
584 }
585 if (Q_UNLIKELY(ncls == QUnicodeTables::LineBreak_CM)) {
586 // LB10: treat CM that follows SP, BK, CR, LF, NL, or ZW as AL
587 if (lcls == QUnicodeTables::LineBreak_ZW || lcls >= QUnicodeTables::LineBreak_SP)
588 ncls = QUnicodeTables::LineBreak_AL;
589 }
590 }
591 }
592
593 if (Q_UNLIKELY(ncls == QUnicodeTables::LineBreak_SA)) {
594 // LB1: resolve SA to AL, except of those that have Category Mn or Mc be resolved to CM
595 static const int test = FLAG(QChar::Mark_NonSpacing) | FLAG(QChar::Mark_SpacingCombining);
596 if (FLAG(prop->category) & test)
597 ncls = QUnicodeTables::LineBreak_CM;
598 }
599
600 if (Q_UNLIKELY(lcls >= QUnicodeTables::LineBreak_CR)) {
601 // LB4: BK!, LB5: (CRxLF|CR|LF|NL)!
602 if (lcls > QUnicodeTables::LineBreak_CR || ncls != QUnicodeTables::LineBreak_LF)
603 attributes[pos].lineBreak = attributes[pos].mandatoryBreak = true;
604 if (Q_UNLIKELY(ncls == QUnicodeTables::LineBreak_CM || ncls == QUnicodeTables::LineBreak_ZWJ)) {
605 cls = QUnicodeTables::LineBreak_AL;
606 goto next_no_cls_update;
607 }
608 goto next;
609 }
610
611 if (Q_UNLIKELY(ncls >= QUnicodeTables::LineBreak_SP)) {
612 if (ncls > QUnicodeTables::LineBreak_SP)
613 goto next; // LB6: x(BK|CR|LF|NL)
614 goto next_no_cls_update; // LB7: xSP
615 }
616
617 if (Q_UNLIKELY(ncls == QUnicodeTables::LineBreak_CM || ncls == QUnicodeTables::LineBreak_ZWJ)) {
618 // LB9: treat CM that don't follows SP, BK, CR, LF, NL, or ZW as X
619 if (lcls != QUnicodeTables::LineBreak_ZW && lcls < QUnicodeTables::LineBreak_SP)
620 // don't update anything
621 goto next_no_cls_update;
622 }
623
624 if (Q_UNLIKELY(lcls == QUnicodeTables::LineBreak_ZWJ)) {
625 // LB8a: ZWJ x (ID | EB | EM)
626 if (ncls == QUnicodeTables::LineBreak_ID || ncls == QUnicodeTables::LineBreak_EB || ncls == QUnicodeTables::LineBreak_EM)
627 goto next;
628 }
629
630 // LB25: do not break lines inside numbers
631 {
632 LB::NS::Class necur = LB::NS::toClass(ncls, (QChar::Category)prop->category);
633 switch (LB::NS::actionTable[nelast][necur]) {
634 case LB::NS::Break:
635 // do not change breaks before and after the expression
636 for (quint32 j = nestart + 1; j < pos; ++j)
637 attributes[j].lineBreak = false;
638 Q_FALLTHROUGH();
639 case LB::NS::None:
640 nelast = LB::NS::XX; // reset state
641 break;
642 case LB::NS::Start:
643 nestart = i;
644 Q_FALLTHROUGH();
645 default:
646 nelast = necur;
647 break;
648 }
649 }
650
651 if (Q_UNLIKELY(ncls == QUnicodeTables::LineBreak_RI && lcls == QUnicodeTables::LineBreak_RI)) {
652 // LB30a
653 ncls = QUnicodeTables::LineBreak_SP;
654 goto next;
655 }
656
657 // for South East Asian chars that require a complex analysis, the Unicode
658 // standard recommends to treat them as AL. tailoring that do dictionary analysis can override
659 if (Q_UNLIKELY(cls >= QUnicodeTables::LineBreak_SA))
660 cls = QUnicodeTables::LineBreak_AL;
661
662 tcls = cls;
663 if (tcls == QUnicodeTables::LineBreak_CM)
664 // LB10
665 tcls = QUnicodeTables::LineBreak_AL;
666 switch (LB::breakTable[tcls][ncls < QUnicodeTables::LineBreak_SA ? ncls : QUnicodeTables::LineBreak_AL]) {
667 case LB::DirectBreak:
668 attributes[pos].lineBreak = true;
669 break;
670 case LB::IndirectBreak:
671 if (lcls == QUnicodeTables::LineBreak_SP)
672 attributes[pos].lineBreak = true;
673 break;
674 case LB::CombiningIndirectBreak:
675 if (lcls != QUnicodeTables::LineBreak_SP)
676 goto next_no_cls_update;
677 attributes[pos].lineBreak = true;
678 break;
679 case LB::CombiningProhibitedBreak:
680 if (lcls != QUnicodeTables::LineBreak_SP)
681 goto next_no_cls_update;
682 break;
683 case LB::ProhibitedBreakAfterHebrewPlusHyphen:
684 if (lcls != QUnicodeTables::LineBreak_HL)
685 attributes[pos].lineBreak = true;
686 break;
687 case LB::ProhibitedBreak:
688 // nothing to do
689 default:
690 break;
691 }
692
693 next:
694 cls = ncls;
695 next_no_cls_update:
696 lcls = ncls;
697 }
698
699 if (Q_UNLIKELY(LB::NS::actionTable[nelast][LB::NS::XX] == LB::NS::Break)) {
700 // LB25: do not break lines inside numbers
701 for (quint32 j = nestart + 1; j < len; ++j)
702 attributes[j].lineBreak = false;
703 }
704
705 attributes[0].lineBreak = attributes[0].mandatoryBreak = false; // LB2
706 attributes[len].lineBreak = attributes[len].mandatoryBreak = true; // LB3
707 }
708
709
getWhiteSpaces(const ushort * string,quint32 len,QCharAttributes * attributes)710 static void getWhiteSpaces(const ushort *string, quint32 len, QCharAttributes *attributes)
711 {
712 for (quint32 i = 0; i != len; ++i) {
713 uint ucs4 = string[i];
714 if (QChar::isHighSurrogate(ucs4) && i + 1 != len) {
715 ushort low = string[i + 1];
716 if (QChar::isLowSurrogate(low)) {
717 ucs4 = QChar::surrogateToUcs4(ucs4, low);
718 ++i;
719 }
720 }
721
722 if (Q_UNLIKELY(QChar::isSpace(ucs4)))
723 attributes[i].whiteSpace = true;
724 }
725 }
726
727
initCharAttributes(const ushort * string,int length,const ScriptItem * items,int numItems,QCharAttributes * attributes,CharAttributeOptions options)728 Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length,
729 const ScriptItem *items, int numItems,
730 QCharAttributes *attributes, CharAttributeOptions options)
731 {
732 if (length <= 0)
733 return;
734
735 if (!(options & DontClearAttributes))
736 ::memset(attributes, 0, (length + 1) * sizeof(QCharAttributes));
737
738 if (options & GraphemeBreaks)
739 getGraphemeBreaks(string, length, attributes);
740 if (options & WordBreaks)
741 getWordBreaks(string, length, attributes);
742 if (options & SentenceBreaks)
743 getSentenceBreaks(string, length, attributes);
744 if (options & LineBreaks)
745 getLineBreaks(string, length, attributes, options);
746 if (options & WhiteSpaces)
747 getWhiteSpaces(string, length, attributes);
748
749 if (!qt_initcharattributes_default_algorithm_only) {
750 if (!items || numItems <= 0)
751 return;
752
753 QVarLengthArray<HB_ScriptItem, 64> scriptItems;
754 scriptItems.reserve(numItems);
755 int start = 0;
756 HB_Script startScript = script_to_hbscript(items[start].script);
757 if (Q_UNLIKELY(startScript == HB_Script_Inherited))
758 startScript = HB_Script_Common;
759 for (int i = start + 1; i < numItems; ++i) {
760 HB_Script script = script_to_hbscript(items[i].script);
761 if (Q_LIKELY(script == startScript || script == HB_Script_Inherited))
762 continue;
763 Q_ASSERT(items[i].position > items[start].position);
764 HB_ScriptItem item;
765 item.pos = items[start].position;
766 item.length = items[i].position - items[start].position;
767 item.script = startScript;
768 item.bidiLevel = 0; // unused
769 scriptItems.append(item);
770 start = i;
771 startScript = script;
772 }
773 if (items[start].position + 1 < length) {
774 HB_ScriptItem item;
775 item.pos = items[start].position;
776 item.length = length - items[start].position;
777 item.script = startScript;
778 item.bidiLevel = 0; // unused
779 scriptItems.append(item);
780 }
781 Q_STATIC_ASSERT(sizeof(QCharAttributes) == sizeof(HB_CharAttributes));
782 HB_GetTailoredCharAttributes(string, length,
783 scriptItems.constData(), scriptItems.size(),
784 reinterpret_cast<HB_CharAttributes *>(attributes));
785 }
786 }
787
788
789 // ----------------------------------------------------------------------------
790 //
791 // The Unicode script property. See http://www.unicode.org/reports/tr24/tr24-24.html
792 //
793 // ----------------------------------------------------------------------------
794
initScripts(const ushort * string,int length,uchar * scripts)795 Q_CORE_EXPORT void initScripts(const ushort *string, int length, uchar *scripts)
796 {
797 int sor = 0;
798 int eor = 0;
799 uchar script = QChar::Script_Common;
800
801 for (int i = 0; i < length; ++i, eor = i) {
802 uint ucs4 = string[i];
803 if (QChar::isHighSurrogate(ucs4) && i + 1 < length) {
804 ushort low = string[i + 1];
805 if (QChar::isLowSurrogate(low)) {
806 ucs4 = QChar::surrogateToUcs4(ucs4, low);
807 ++i;
808 }
809 }
810
811 const QUnicodeTables::Properties *prop = QUnicodeTables::properties(ucs4);
812
813 uchar nscript = prop->script;
814
815 if (Q_LIKELY(nscript == script || nscript <= QChar::Script_Common))
816 continue;
817
818 // inherit preceding Common-s
819 if (Q_UNLIKELY(script <= QChar::Script_Common)) {
820 // also covers a case where the base character of Common script followed
821 // by one or more combining marks of non-Inherited, non-Common script
822 script = nscript;
823 continue;
824 }
825
826 // Never break between a combining mark (gc= Mc, Mn or Me) and its base character.
827 // Thus, a combining mark - whatever its script property value is - should inherit
828 // the script property value of its base character.
829 static const int test = (FLAG(QChar::Mark_NonSpacing) | FLAG(QChar::Mark_SpacingCombining) | FLAG(QChar::Mark_Enclosing));
830 if (Q_UNLIKELY(FLAG(prop->category) & test))
831 continue;
832
833 Q_ASSERT(script > QChar::Script_Common);
834 Q_ASSERT(sor < eor);
835 ::memset(scripts + sor, script, (eor - sor) * sizeof(uchar));
836 sor = eor;
837
838 script = nscript;
839 }
840
841 Q_ASSERT(script >= QChar::Script_Common);
842 Q_ASSERT(eor == length);
843 ::memset(scripts + sor, script, (eor - sor) * sizeof(uchar));
844 }
845
846 } // namespace QUnicodeTools
847
848 QT_END_NAMESPACE
849