1 // SPDX-License-Identifier: LGPL-3.0-linking-exception
2 unit BGRAUnicode;
3 { Implementation of Unicode bidi algorithm }
4 { Author: circular }
5
6 {$mode objfpc}{$H+}
7 {$modeswitch advancedrecords}
8
9 interface
10
11 uses
12 BGRAClasses, SysUtils;
13
14 type
15 TUnicodeBidiClass = (ubcBoundaryNeutral, ubcSegmentSeparator, ubcParagraphSeparator, ubcWhiteSpace, ubcOtherNeutrals,
16 ubcCommonSeparator, ubcNonSpacingMark,
17 ubcLeftToRight, ubcEuropeanNumber, ubcEuropeanNumberSeparator, ubcEuropeanNumberTerminator,
18 ubcRightToLeft, ubcArabicLetter, ubcArabicNumber,
19 ubcUnknown,
20 ubcCombiningLeftToRight, //ubcLeftToRight in Mc category
21 ubcMirroredNeutral); //ubcOtherNeutrals with Mirrored property
22 TUnicodeJoiningType = (ujtNonJoining{U}, ujtTransparent{T}, ujtRightJoining{R}, ujtLeftJoining{L},
23 ujtDualJoining{D}, ujtJoinCausing{C});
24 TFontBidiMode = (fbmAuto, fbmLeftToRight, fbmRightToLeft);
25
26 const
27 ubcNeutral = [ubcSegmentSeparator, ubcParagraphSeparator, ubcWhiteSpace, ubcOtherNeutrals];
28
29 BIDI_FLAG_REMOVED = 1; //RLE, LRE, RLO, LRO, PDF and BN are supposed to be removed
30 BIDI_FLAG_IMPLICIT_END_OF_PARAGRAPH = 2; //implicit end of paragraph (paragraph spacing below due to end of text)
31 BIDI_FLAG_EXPLICIT_END_OF_PARAGRAPH = 4; //explicit end of paragraph (paragraph spacing below due to paragraph split)
32 BIDI_FLAG_END_OF_LINE = 8; //line break <br>
33 BIDI_FLAG_LIGATURE_RIGHT = 16; //joins to the letter on the right (possible for joining type R and D)
34 BIDI_FLAG_LIGATURE_LEFT = 32; //joins to the letter on the left (possible for joining type L and D)
35 BIDI_FLAG_LIGATURE_BOUNDARY = 64; //zero-width joiner or non-joiner
36 BIDI_FLAG_LIGATURE_TRANSPARENT = 128; //does not affect ligature
37 BIDI_FLAG_RTL_SCRIPT = 256; //script is written from right to left (arabic, N'Ko...)
38 BIDI_FLAG_NON_SPACING_MARK = 512; //it is a non-spacing mark
39 BIDI_FLAG_COMBINING_LEFT = 1024; //this letter is to be combined to the left of previous letter
40 BIDI_FLAG_COMBINING_RIGHT = 2048; //this letter is to be combined to the right of previous letter
41 BIDI_FLAG_MULTICHAR_START = 4096; //start of a multichar (letter + non spacing marks, non spacing marks)
42 BIDI_FLAG_MIRRORED = 8192; //the glyph is mirrored when in RTL text
43
44 type
45 PUnicodeBidiInfo = ^TUnicodeBidiInfo;
46
47 { TUnicodeBidiInfo }
48
49 TUnicodeBidiInfo = packed record
50 private
51 function GetDiscardable: boolean;
52 function GetEndOfLine: boolean;
53 function GetEndOfParagraph: boolean;
54 function GetExplicitEndOfParagraph: boolean;
55 function GetHasLigatureLeft: boolean;
56 function GetHasLigatureRight: boolean;
57 function GetImplicitEndOfParagraph: boolean;
58 function GetIsCombiningLeft: boolean;
59 function GetIsCombiningRight: boolean;
60 function GetIsMirrored: boolean;
61 function GetLigatureBoundary: boolean;
62 function GetLigatureTransparent: boolean;
63 function GetMulticharStart: boolean;
64 function GetNonSpacingMark: boolean;
65 function GetRemoved: boolean;
66 function GetRightToLeft: boolean;
67 function GetParagraphRightToLeft: boolean;
68 function GetRightToLeftScript: boolean;
69 public
70 ParagraphBidiLevel, BidiLevel: byte;
71 Flags: Word;
72 class operator =(const AInfo1, AInfo2: TUnicodeBidiInfo): boolean;
73 property IsRemoved: boolean read GetRemoved;
74 property IsRightToLeft: boolean read GetRightToLeft;
75 property IsParagraphRightToLeft: boolean read GetParagraphRightToLeft;
76 property IsEndOfLine: boolean read GetEndOfLine;
77 property IsEndOfParagraph: boolean read GetEndOfParagraph;
78 property IsExplicitEndOfParagraph: boolean read GetExplicitEndOfParagraph;
79 property IsImplicitEndOfParagraph: boolean read GetImplicitEndOfParagraph;
80 property HasLigatureRight: boolean read GetHasLigatureRight;
81 property HasLigatureLeft: boolean read GetHasLigatureLeft;
82 property IsLigatureBoundary: boolean read GetLigatureBoundary;
83 property IsLigatureTransparent: boolean read GetLigatureTransparent;
84 property IsDiscardable: boolean read GetDiscardable;
85 property IsRightToLeftScript: boolean read GetRightToLeftScript;
86 property IsNonSpacingMark: boolean read GetNonSpacingMark;
87 property IsCombiningLeft: boolean read GetIsCombiningLeft;
88 property IsCombiningRight: boolean read GetIsCombiningRight;
89 property IsMulticharStart: boolean read GetMulticharStart;
90 property IsMirrored: boolean read GetIsMirrored;
91 end;
92
93 TUnicodeBidiArray = packed array of TUnicodeBidiInfo;
94 TUnicodeDisplayOrder = array of integer;
95
96 const
97 //maximum nesting level of isolates and bidi-formatting blocks (char bidi level can actually be higher due to char properties)
98 UNICODE_MAX_BIDI_DEPTH = 125;
99
100 UNICODE_NO_BREAK_SPACE = $A0;
101 UNICODE_LINE_SEPARATOR = $2028; //equivalent of <br>
102 UNICODE_PARAGRAPH_SEPARATOR = $2029; //equivalent of </p>
103 UNICODE_NEXT_LINE = $0085; //equivalent of CRLF
104
105 //characters that split lines into top-level bidi blocks
106 UNICODE_LEFT_TO_RIGHT_ISOLATE = $2066;
107 UNICODE_RIGHT_TO_LEFT_ISOLATE = $2067;
108 UNICODE_FIRST_STRONG_ISOLATE = $2068;
109 UNICODE_POP_DIRECTIONAL_ISOLATE = $2069;
110
111 //characters that split into bidi sub-blocks (called "formatting")
112 UNICODE_LEFT_TO_RIGHT_EMBEDDING = $202A;
113 UNICODE_RIGHT_TO_LEFT_EMBEDDING = $202B;
114 UNICODE_LEFT_TO_RIGHT_OVERRIDE = $202D;
115 UNICODE_RIGHT_TO_LEFT_OVERRIDE = $202E;
116 UNICODE_POP_DIRECTIONAL_FORMATTING = $202C;
117
118 //characters that mark direction without splitting the bidi block
119 UNICODE_LEFT_TO_RIGHT_MARK = $200E;
120 UNICODE_RIGHT_TO_LEFT_MARK = $200F;
121 UNICODE_ARABIC_LETTER_MARK = $061C;
122
123 //data separators
124 UNICODE_INFORMATION_SEPARATOR_FOUR = $001C; //end-of-file
125 UNICODE_INFORMATION_SEPARATOR_THREE = $001D; //section separator
126 UNICODE_INFORMATION_SEPARATOR_TWO = $001E; //record separator, kind of equivalent to paragraph separator
127 UNICODE_INFORMATION_SEPARATOR_ONE = $001F; //field separator, kind of equivalent to Tab
128
129 //zero-width
130 UNICODE_ZERO_WIDTH_SPACE = $200B;
131 UNICODE_ZERO_WIDTH_NON_JOINER = $200C;
132 UNICODE_ZERO_WIDTH_NO_BREAK_SPACE = $FEFF; //byte order mark
133 UNICODE_ZERO_WIDTH_JOINER = $200D;
134 UNICODE_COMBINING_GRAPHEME_JOINER = $034F;
135
136 //arabic letters
137 UNICODE_ARABIC_TATWEEL = $0640; //horizontal line that makes a ligature with most letters
138
139 //ideographic punctuation
140 UNICODE_IDEOGRAPHIC_COMMA = $3001;
141 UNICODE_IDEOGRAPHIC_FULL_STOP = $3002;
142 UNICODE_FULLWIDTH_COMMA = $FF0C;
143 UNICODE_HORIZONTAL_ELLIPSIS = $2026;
144
145 //bracket equivalence
146 UNICODE_RIGHT_POINTING_ANGLE_BRACKET = $232A;
147 UNICODE_RIGHT_ANGLE_BRACKET = $3009;
148
149 type //bracket matching
150 TUnicodeBracketInfo = record
151 IsBracket: boolean;
152 OpeningBracket,ClosingBracket: LongWord;
153 end;
154
155 { Returns the Bidi class as defined by Unicode used to determine text direction }
156 function GetUnicodeBidiClass(u: LongWord): TUnicodeBidiClass;
157 { Same as above but returns additional classes: ubcCombiningLeftToRight and ubcMirroredNeutral }
158 function GetUnicodeBidiClassEx(u: LongWord): TUnicodeBidiClass;
159 function GetUnicodeBracketInfo(u: LongWord): TUnicodeBracketInfo;
160 { Returns how the letter can be joined to the surrounding letters (for example in arabic) }
161 function GetUnicodeJoiningType(u: LongWord): TUnicodeJoiningType;
162 { Returns the Combining class defined by unicode for non-spacing marks and combining marks
163 or 255 if the character is not to be combined }
164 function GetUnicodeCombiningClass(u: LongWord): byte;
165 function IsZeroWidthUnicode(u: LongWord): boolean;
166 { Returns if the symbol can be mirrored horizontally for right-to-left text }
167 function IsUnicodeMirrored(u: LongWord): boolean;
168 function IsUnicodeParagraphSeparator(u: LongWord): boolean;
169 function IsUnicodeCrLf(u: LongWord): boolean;
170 function IsUnicodeSpace(u: LongWord): boolean;
171 function IsUnicodeIsolateOrFormatting(u: LongWord): boolean;
172 function IsModifierCombiningMark(u: LongWord): boolean;
173
174 { Analyze unicode and return bidi levels for each character.
175 baseDirection can be either UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE or UNICODE_FIRST_STRONG_ISOLATE }
176 function AnalyzeBidiUnicode(u: PLongWord; ALength: integer; baseDirection: LongWord): TUnicodeBidiArray;
177 function AnalyzeBidiUnicode(u: PLongWord; ALength: integer; ABidiMode: TFontBidiMode): TUnicodeBidiArray;
178
179 { Determine diplay order, provided the display surface is horizontally infinite }
180 function GetUnicodeDisplayOrder(const AInfo: TUnicodeBidiArray): TUnicodeDisplayOrder; overload;
181 function GetUnicodeDisplayOrder(ALevels: PByte; ACount: integer): TUnicodeDisplayOrder; overload;
182 function GetUnicodeDisplayOrder(ABidiInfo: PUnicodeBidiInfo; AStride, ACount: integer): TUnicodeDisplayOrder; overload;
183
184 implementation
185
186 {$i generatedunicode.inc}
187
188 function GetUnicodeCombiningClass(u: LongWord): byte;
189 var
190 minIndex, maxIndex, midIndex: Integer;
191 compU: LongWord;
192 begin
193 minIndex := 0;
194 maxIndex := high(UnicodeCombiningInfos);
195 repeat
196 midIndex := (minIndex+maxIndex) shr 1;
197 compU := UnicodeCombiningInfos[midIndex].u;
198 if u = compU then exit(UnicodeCombiningInfos[midIndex].c) else
199 if u < compU then maxIndex := midIndex-1
200 else minIndex := midIndex+1;
201 until maxIndex < minIndex;
202 result := 255; //not combining
203 end;
204
205 function GetUnicodeBidiClass(u: LongWord): TUnicodeBidiClass;
206 begin
207 result := GetUnicodeBidiClassEx(u);
208 if result = ubcMirroredNeutral then result := ubcOtherNeutrals
209 else if result = ubcCombiningLeftToRight then result := ubcLeftToRight;
210 end;
211
212 function IsUnicodeMirrored(u: LongWord): boolean;
213 begin
214 result := GetUnicodeBidiClassEx(u) = ubcMirroredNeutral;
215 end;
216
217 function IsZeroWidthUnicode(u: LongWord): boolean;
218 begin
219 case u of
220 UNICODE_ZERO_WIDTH_SPACE, UNICODE_ZERO_WIDTH_NON_JOINER,
221 UNICODE_ZERO_WIDTH_JOINER, UNICODE_ZERO_WIDTH_NO_BREAK_SPACE,
222 UNICODE_LEFT_TO_RIGHT_MARK,UNICODE_RIGHT_TO_LEFT_MARK,
223 UNICODE_ARABIC_LETTER_MARK: result := true;
224 else result := false;
225 end;
226 end;
227
228 function IsUnicodeParagraphSeparator(u: LongWord): boolean;
229 begin
230 case u of
231 $0A, $0D, UNICODE_NEXT_LINE, UNICODE_PARAGRAPH_SEPARATOR,
232 UNICODE_INFORMATION_SEPARATOR_FOUR, UNICODE_INFORMATION_SEPARATOR_THREE, UNICODE_INFORMATION_SEPARATOR_TWO: result := true;
233 else result := false;
234 end;
235 end;
236
237 function IsUnicodeCrLf(u: LongWord): boolean;
238 begin
239 result := (u=10) or (u=13);
240 end;
241
242 function IsUnicodeSpace(u: LongWord): boolean;
243 begin
244 result := GetUnicodeBidiClass(u) = ubcWhiteSpace;
245 end;
246
247 function IsUnicodeIsolateOrFormatting(u: LongWord): boolean;
248 begin
249 case u of
250 UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE, UNICODE_FIRST_STRONG_ISOLATE,
251 UNICODE_LEFT_TO_RIGHT_EMBEDDING, UNICODE_RIGHT_TO_LEFT_EMBEDDING,
252 UNICODE_LEFT_TO_RIGHT_OVERRIDE, UNICODE_RIGHT_TO_LEFT_OVERRIDE: exit(true)
253 else exit(false);
254 end;
255 end;
256
257 function IsModifierCombiningMark(u: LongWord): boolean;
258 begin
259 case u of
260 $0654,$0655,$0658,$06DC,$06E3,$06E7,$06E8,$08D3,$08F3: exit(true);
261 else exit(false);
262 end;
263 end;
264
265 { TUnicodeBidiInfo }
266
GetDiscardablenull267 function TUnicodeBidiInfo.GetDiscardable: boolean;
268 begin
269 result := IsRemoved and not IsLigatureBoundary;
270 end;
271
GetEndOfLinenull272 function TUnicodeBidiInfo.GetEndOfLine: boolean;
273 begin
274 result := (Flags and BIDI_FLAG_END_OF_LINE) <> 0;
275 end;
276
GetEndOfParagraphnull277 function TUnicodeBidiInfo.GetEndOfParagraph: boolean;
278 begin
279 result := (Flags and (BIDI_FLAG_EXPLICIT_END_OF_PARAGRAPH or BIDI_FLAG_IMPLICIT_END_OF_PARAGRAPH)) <> 0;
280 end;
281
GetExplicitEndOfParagraphnull282 function TUnicodeBidiInfo.GetExplicitEndOfParagraph: boolean;
283 begin
284 result := (Flags and BIDI_FLAG_EXPLICIT_END_OF_PARAGRAPH) <> 0;
285 end;
286
GetHasLigatureLeftnull287 function TUnicodeBidiInfo.GetHasLigatureLeft: boolean;
288 begin
289 result := (Flags and BIDI_FLAG_LIGATURE_LEFT) <> 0;
290 end;
291
GetHasLigatureRightnull292 function TUnicodeBidiInfo.GetHasLigatureRight: boolean;
293 begin
294 result := (Flags and BIDI_FLAG_LIGATURE_RIGHT) <> 0;
295 end;
296
GetImplicitEndOfParagraphnull297 function TUnicodeBidiInfo.GetImplicitEndOfParagraph: boolean;
298 begin
299 result := (Flags and BIDI_FLAG_IMPLICIT_END_OF_PARAGRAPH) <> 0;
300 end;
301
GetIsCombiningLeftnull302 function TUnicodeBidiInfo.GetIsCombiningLeft: boolean;
303 begin
304 result := (Flags and BIDI_FLAG_COMBINING_LEFT) <> 0;
305 end;
306
GetIsCombiningRightnull307 function TUnicodeBidiInfo.GetIsCombiningRight: boolean;
308 begin
309 result := (Flags and BIDI_FLAG_COMBINING_RIGHT) <> 0;
310 end;
311
GetIsMirrorednull312 function TUnicodeBidiInfo.GetIsMirrored: boolean;
313 begin
314 result := (Flags and BIDI_FLAG_MIRRORED) <> 0;
315 end;
316
GetLigatureBoundarynull317 function TUnicodeBidiInfo.GetLigatureBoundary: boolean;
318 begin
319 result := (Flags and BIDI_FLAG_LIGATURE_BOUNDARY) <> 0;
320 end;
321
GetLigatureTransparentnull322 function TUnicodeBidiInfo.GetLigatureTransparent: boolean;
323 begin
324 result := (Flags and BIDI_FLAG_LIGATURE_TRANSPARENT) <> 0;
325 end;
326
GetMulticharStartnull327 function TUnicodeBidiInfo.GetMulticharStart: boolean;
328 begin
329 result := (Flags and BIDI_FLAG_MULTICHAR_START) <> 0;
330 end;
331
GetNonSpacingMarknull332 function TUnicodeBidiInfo.GetNonSpacingMark: boolean;
333 begin
334 result := (Flags and BIDI_FLAG_NON_SPACING_MARK) <> 0;
335 end;
336
GetRemovednull337 function TUnicodeBidiInfo.GetRemoved: boolean;
338 begin
339 result := (Flags and BIDI_FLAG_REMOVED) <> 0;
340 end;
341
GetRightToLeftnull342 function TUnicodeBidiInfo.GetRightToLeft: boolean;
343 begin
344 result := Odd(BidiLevel);
345 end;
346
GetParagraphRightToLeftnull347 function TUnicodeBidiInfo.GetParagraphRightToLeft: boolean;
348 begin
349 result := Odd(ParagraphBidiLevel);
350 end;
351
GetRightToLeftScriptnull352 function TUnicodeBidiInfo.GetRightToLeftScript: boolean;
353 begin
354 result := (Flags and BIDI_FLAG_RTL_SCRIPT) <> 0;
355 end;
356
357 class operator TUnicodeBidiInfo.=(const AInfo1, AInfo2: TUnicodeBidiInfo
358 ): boolean;
359 begin
360 result := (AInfo1.BidiLevel = AInfo2.BidiLevel) and
361 (AInfo1.Flags = AInfo2.Flags) and
362 (AInfo1.ParagraphBidiLevel = AInfo2.ParagraphBidiLevel);
363 end;
364
365 function AnalyzeBidiUnicode(u: PLongWord; ALength: integer; baseDirection: LongWord): TUnicodeBidiArray;
366 type
367 TUnicodeAnalysisElement = record
368 bidiClass: TUnicodeBidiClass;
369 prevInIsolate, nextInIsolate: integer; //next index in current isolate
370 end;
371 TUnicodeAnalysisArray = array of TUnicodeAnalysisElement;
372
373 var
374 a: TUnicodeAnalysisArray;
375
376 procedure ResolveWeakTypes(startIndex, afterEndIndex: integer; startOfSequence, {%H-}endOfSequence: TUnicodeBidiClass);
377 var
378 curIndex,backIndex: Integer;
379 latestStrongClass, prevClass: TUnicodeBidiClass;
380 begin
381 //rules W1 and W2
382 prevClass := startOfSequence;
383 latestStrongClass:= prevClass;
384 curIndex := startIndex;
385 while curIndex <> afterEndIndex do
386 begin
387 if not result[curIndex].IsRemoved then
388 begin
389 case a[curIndex].bidiClass of
390 ubcNonSpacingMark: a[curIndex].bidiClass:= prevClass;
391 ubcEuropeanNumber: if latestStrongClass = ubcArabicLetter then a[curIndex].bidiClass:= ubcArabicNumber;
392 end;
393 case u[curIndex] of
394 UNICODE_LEFT_TO_RIGHT_ISOLATE,
395 UNICODE_RIGHT_TO_LEFT_ISOLATE,
396 UNICODE_FIRST_STRONG_ISOLATE,
397 UNICODE_POP_DIRECTIONAL_ISOLATE: prevClass := ubcOtherNeutrals;
398 else prevClass := a[curIndex].bidiClass;
399 end;
400 if prevClass in [ubcLeftToRight,ubcRightToLeft,ubcArabicLetter] then latestStrongClass:= prevClass;
401 end;
402 curIndex := a[curIndex].nextInIsolate;
403 end;
404
405 // rule W4 and W5
406 prevClass := startOfSequence;
407 curIndex := startIndex;
408 while curIndex <> afterEndIndex do
409 begin
410 if not result[curIndex].IsRemoved then
411 begin
412 case a[curIndex].bidiClass of
413 ubcArabicLetter: a[curIndex].bidiClass := ubcRightToLeft;
414 ubcEuropeanNumber:
415 begin
416 backIndex := curIndex;
417 while backIndex > startIndex do
418 begin
419 dec(backIndex);
420 if result[backIndex].IsRemoved then continue;
421 if a[backIndex].bidiClass = ubcEuropeanNumberTerminator then
422 a[backIndex].bidiClass := ubcEuropeanNumber
423 else break;
424 end;
425 end;
426 ubcEuropeanNumberSeparator:
427 if (prevClass = ubcEuropeanNumber) and (a[curIndex].nextInIsolate <> afterEndIndex) and
428 (a[a[curIndex].nextInIsolate].bidiClass = ubcEuropeanNumber) then
429 a[curIndex].bidiClass:= ubcEuropeanNumber;
430 ubcCommonSeparator:
431 if (prevClass in[ubcEuropeanNumber,ubcArabicNumber]) and (a[curIndex].nextInIsolate <> afterEndIndex) and
432 (a[a[curIndex].nextInIsolate].bidiClass = prevClass) then
433 a[curIndex].bidiClass:= prevClass;
434 ubcEuropeanNumberTerminator:
435 if prevClass = ubcEuropeanNumber then
436 a[curIndex].bidiClass:= ubcEuropeanNumber;
437 end;
438 prevClass := a[curIndex].bidiClass;
439 end;
440
441 curIndex := a[curIndex].nextInIsolate;
442 end;
443
444 // rule W6 and W7
445 curIndex := startIndex;
446 latestStrongClass := startOfSequence;
447 while curIndex <> afterEndIndex do
448 begin
449 if not result[curIndex].IsRemoved then
450 begin
451 case a[curIndex].bidiClass of
452 ubcEuropeanNumberSeparator,ubcEuropeanNumberTerminator,ubcCommonSeparator: a[curIndex].bidiClass := ubcOtherNeutrals;
453 ubcLeftToRight,ubcRightToLeft,ubcArabicLetter: latestStrongClass:= a[curIndex].bidiClass;
454 ubcEuropeanNumber: if latestStrongClass = ubcLeftToRight then a[curIndex].bidiClass := ubcLeftToRight;
455 end;
456 end;
457 curIndex := a[curIndex].nextInIsolate;
458 end;
459 end;
460
461 procedure ResolveNeutrals(startIndex, afterEndIndex: integer; startOfSequence, endOfSequence: TUnicodeBidiClass);
462 var
463 curIndex,prevIndex,previewIndex: Integer;
464 curRTL, include, rightToLeftEmbedding: Boolean;
465 bidiClass: TUnicodeBidiClass;
466 begin
467 rightToLeftEmbedding := odd(result[startIndex].BidiLevel);
468 curIndex := startIndex;
469 curRTL := startOfSequence in [ubcRightToLeft,ubcArabicLetter];
470 while curIndex <> afterEndIndex do
471 begin
472 case a[curIndex].bidiClass of
473 ubcLeftToRight: curRTL := false;
474 ubcRightToLeft,ubcArabicLetter,ubcArabicNumber,ubcEuropeanNumber: curRTL := true;
475 else
476 if curRTL <> rightToLeftEmbedding then
477 begin
478 //determine whether following neutral chars are included in reverse direction
479 prevIndex := curIndex;
480 previewIndex := a[curIndex].nextInIsolate;
481 include := false;
482 while previewIndex <> afterEndIndex do //uses endOfSequence for overflow
483 begin
484 if previewIndex = afterEndIndex then
485 bidiClass:= endOfSequence
486 else
487 bidiClass:= a[previewIndex].bidiClass;
488 case bidiClass of
489 ubcLeftToRight:
490 begin
491 include := not curRTL;
492 break;
493 end;
494 ubcRightToLeft,ubcArabicLetter,ubcArabicNumber,ubcEuropeanNumber:
495 begin
496 include := curRTL;
497 break;
498 end;
499 end;
500 prevIndex := previewIndex;
501 previewIndex := a[previewIndex].nextInIsolate;
502 end;
503 if previewIndex = afterEndIndex then previewIndex := prevIndex;
504 if include then
505 begin
506 while curIndex <> previewIndex do
507 begin
508 if a[curIndex].bidiClass = ubcBoundaryNeutral then
509 result[curIndex].Flags := result[curIndex].Flags OR BIDI_FLAG_REMOVED; //supposed to be removed for rendering
510
511 if a[curIndex].bidiClass in (ubcNeutral+[ubcBoundaryNeutral,ubcUnknown]) then
512 begin
513 if curRTL then a[curIndex].bidiClass := ubcRightToLeft
514 else a[curIndex].bidiClass := ubcLeftToRight;
515 end;
516
517 curIndex := a[curIndex].nextInIsolate;
518 end;
519 end else
520 curRTL := rightToLeftEmbedding;
521 end;
522 end;
523
524 if a[curIndex].bidiClass = ubcBoundaryNeutral then
525 result[curIndex].Flags := result[curIndex].Flags OR BIDI_FLAG_REMOVED; //supposed to be removed for rendering
526
527 if a[curIndex].bidiClass in (ubcNeutral+[ubcBoundaryNeutral,ubcUnknown]) then
528 begin
529 if curRTL then a[curIndex].bidiClass := ubcRightToLeft
530 else a[curIndex].bidiClass := ubcLeftToRight;
531 end;
532
533 curIndex := a[curIndex].nextInIsolate;
534 end;
535 end;
536
537 procedure ResolveBrackets(startIndex, afterEndIndex: integer; startOfSequence, {%H-}endOfSequence: TUnicodeBidiClass);
538 type TBracketPair = record
539 openIndex,closeIndex: integer;
540 end;
541 var
542 bracketPairs: array of TBracketPair;
543 bracketPairCount: integer;
544 rightToLeft: boolean;
545
546 procedure SortBracketPairs;
547 var
548 i,j,k: Integer;
549 temp: TBracketPair;
550 begin
551 for i := 1 to bracketPairCount-1 do
552 begin
553 for j := 0 to i-1 do
554 if bracketPairs[j].openIndex > bracketPairs[i].openIndex then
555 begin
556 temp := bracketPairs[i];
557 for k := i downto j+1 do
558 bracketPairs[k] := bracketPairs[k-1];
559 bracketPairs[j] := temp;
560 end;
561 end;
562 end;
563
564 procedure FindBrackets; // rule BD16
565 const MAX_BRACKET_STACK = 63;
566 var
567 bracketStack: array[0..MAX_BRACKET_STACK-1] of record
568 bracketCharInfo: TUnicodeBracketInfo;
569 index: integer;
570 end;
571 bracketStackPos,peekPos: integer;
572 curIndex: integer;
573 curBracket: TUnicodeBracketInfo;
574 begin
575 bracketPairCount := 0;
576 bracketStackPos := 0;
577 bracketStack[0].index := -1; //avoid warning
578 curIndex := startIndex;
579 while curIndex <> afterEndIndex do
580 begin
581 if a[curIndex].bidiClass = ubcOtherNeutrals then
582 begin
583 curBracket := GetUnicodeBracketInfo(u[curIndex]);
584 if curBracket.IsBracket then
585 begin
586 // found opening bracket
587 if curBracket.OpeningBracket = u[curIndex] then
588 begin
589 if bracketStackPos <= high(bracketStack) then
590 begin
591 bracketStack[bracketStackPos].bracketCharInfo := curBracket;
592 bracketStack[bracketStackPos].index := curIndex;
593 inc(bracketStackPos);
594 end else
595 break;
596 end else
597 begin
598 for peekPos := bracketStackPos-1 downto 0 do
599 if (bracketStack[peekPos].bracketCharInfo.ClosingBracket = u[curIndex]) or
600 ((bracketStack[peekPos].bracketCharInfo.ClosingBracket = UNICODE_RIGHT_ANGLE_BRACKET) and (u[curIndex] = UNICODE_RIGHT_POINTING_ANGLE_BRACKET)) or
601 ((bracketStack[peekPos].bracketCharInfo.ClosingBracket = UNICODE_RIGHT_POINTING_ANGLE_BRACKET) and (u[curIndex] = UNICODE_RIGHT_ANGLE_BRACKET)) then
602 begin
603 bracketStackPos := peekPos;
604 if bracketPairCount >= length(bracketPairs) then
605 setlength(bracketPairs, bracketPairCount*2 + 8);
606 bracketPairs[bracketPairCount].openIndex := bracketStack[peekPos].index;
607 bracketPairs[bracketPairCount].closeIndex := curIndex;
608 inc(bracketPairCount);
609 break;
610 end;
611 end;
612 end;
613 end;
614 curIndex := a[curIndex].nextInIsolate;
615 end;
616 end;
617
618 procedure SetCharClass(index: integer; newClass: TUnicodeBidiClass);
619 begin
620 a[index].bidiClass:= newClass;
621 index := a[index].nextInIsolate;
622 while (index <> afterEndIndex) and (GetUnicodeBidiClass(u[index]) = ubcNonSpacingMark) do
623 begin
624 a[index].bidiClass := newClass;
625 index := a[index].nextInIsolate;
626 end;
627 end;
628
629 procedure ResolveBrackets; // rule N0
630 var
631 i, curIndex: Integer;
632 sameDirection, oppositeDirection, oppositeContext: boolean;
633 begin
634 for i := 0 to bracketPairCount-1 do
635 begin
636 curIndex := bracketPairs[i].openIndex+1;
637 sameDirection:= false;
638 oppositeDirection:= false;
639 while curIndex <> bracketPairs[i].closeIndex do
640 begin
641 Assert((curIndex >= startIndex) and (curIndex < length(a)), 'Expecting valid index');
642 case a[curIndex].bidiClass of
643 ubcLeftToRight:
644 if not rightToLeft then
645 begin
646 sameDirection := true;
647 break;
648 end else oppositeDirection:= true;
649 ubcRightToLeft,ubcArabicLetter,ubcEuropeanNumber,ubcArabicNumber:
650 if rightToLeft then
651 begin
652 sameDirection := true;
653 break;
654 end else oppositeDirection:= true;
655 end;
656 curIndex := a[curIndex].nextInIsolate;
657 end;
658 if sameDirection then
659 begin
660 if rightToLeft then
661 begin
662 SetCharClass(bracketPairs[i].openIndex, ubcRightToLeft);
663 SetCharClass(bracketPairs[i].closeIndex, ubcRightToLeft);
664 end else
665 begin
666 SetCharClass(bracketPairs[i].openIndex, ubcLeftToRight);
667 SetCharClass(bracketPairs[i].closeIndex, ubcLeftToRight);
668 end;
669 end else
670 if oppositeDirection then
671 begin
672 curIndex := a[bracketPairs[i].openIndex].prevInIsolate;
673 oppositeContext := false;
674 while curIndex >= startIndex do
675 begin
676 case a[curIndex].bidiClass of
677 ubcRightToLeft,ubcArabicLetter,ubcEuropeanNumber,ubcArabicNumber:
678 begin
679 oppositeContext := not rightToLeft;
680 break;
681 end;
682 ubcLeftToRight:
683 begin
684 oppositeContext := rightToLeft;
685 break;
686 end;
687 end;
688 curIndex := a[curIndex].prevInIsolate;
689 end;
690 if rightToLeft xor oppositeContext then
691 begin
692 SetCharClass(bracketPairs[i].openIndex, ubcRightToLeft);
693 SetCharClass(bracketPairs[i].closeIndex, ubcRightToLeft);
694 end else
695 begin
696 SetCharClass(bracketPairs[i].openIndex, ubcLeftToRight);
697 SetCharClass(bracketPairs[i].closeIndex, ubcLeftToRight);
698 end;
699 end;
700 end;
701 end;
702
703 begin
704 rightToLeft:= startOfSequence in[ubcRightToLeft,ubcArabicLetter];
705 FindBrackets;
706 SortBracketPairs;
707 ResolveBrackets;
708 end;
709
710 procedure ResolveLigature(startIndex: integer);
711 var
712 prevJoiningType, joiningType: TUnicodeJoiningType;
713 prevJoiningTypeBidilevel: byte;
714 prevJoiningTypeIndex: integer;
715 curIndex: Integer;
716 begin
717 prevJoiningType := ujtNonJoining;
718 prevJoiningTypeIndex := -1;
719 prevJoiningTypeBidilevel:= 0;
720 curIndex := startIndex;
721 while curIndex <> -1 do
722 begin
723 if prevJoiningTypeBidilevel <> result[curIndex].BidiLevel then
724 prevJoiningType := ujtNonJoining;
725 if result[curIndex].IsNonSpacingMark then
726 joiningType := ujtTransparent //NSM are always joining-transparent
727 else joiningType := GetUnicodeJoiningType(u[curIndex]);
728 if joiningType = ujtTransparent then
729 result[curIndex].Flags:= result[curIndex].Flags or BIDI_FLAG_LIGATURE_TRANSPARENT;
730 if result[curIndex].IsRightToLeft then
731 begin
732 if (joiningType in[ujtRightJoining,ujtDualJoining])
733 and (prevJoiningType in[ujtLeftJoining,ujtDualJoining,ujtJoinCausing]) then
734 result[curIndex].Flags:= result[curIndex].Flags or BIDI_FLAG_LIGATURE_RIGHT;
735 if (prevJoiningType in[ujtLeftJoining,ujtDualJoining]) and (prevJoiningTypeIndex <> -1) and
736 (joiningType in[ujtRightJoining,ujtDualJoining,ujtJoinCausing]) then
737 result[prevJoiningTypeIndex].Flags:= result[prevJoiningTypeIndex].Flags or BIDI_FLAG_LIGATURE_LEFT;
738 end else
739 begin
740 if (joiningType in[ujtLeftJoining,ujtDualJoining])
741 and (prevJoiningType in[ujtRightJoining,ujtDualJoining,ujtJoinCausing]) then
742 result[curIndex].Flags:= result[curIndex].Flags or BIDI_FLAG_LIGATURE_LEFT;
743 if (prevJoiningType in[ujtRightJoining,ujtDualJoining]) and (prevJoiningTypeIndex <> -1) and
744 (joiningType in[ujtLeftJoining,ujtDualJoining,ujtJoinCausing]) then
745 result[prevJoiningTypeIndex].Flags:= result[prevJoiningTypeIndex].Flags or BIDI_FLAG_LIGATURE_RIGHT;
746 end;
747 if joiningType <> ujtTransparent then
748 begin
749 prevJoiningType := joiningType;
750 prevJoiningTypeIndex:= curIndex;
751 prevJoiningTypeBidilevel:= result[curIndex].BidiLevel;
752 end;
753 curIndex := a[curIndex].nextInIsolate;
754 end;
755 end;
756
757 procedure AnalyzeSequence(startIndex, afterEndIndex: integer; sos, eos: TUnicodeBidiClass);
758 begin
759 if afterEndIndex = startIndex then exit;
760 ResolveWeakTypes(startIndex, afterEndIndex, sos, eos);
761 ResolveBrackets(startIndex, afterEndIndex, sos, eos);
762 ResolveNeutrals(startIndex, afterEndIndex, sos, eos);
763 end;
764
765 procedure SameLevelRuns(startIndex: integer);
766 var
767 curBidiLevel: byte;
768 latestIndex,curIndex, curStartIndex: Integer;
769 curSos,eos: TUnicodeBidiClass;
770 begin
771 curIndex := startIndex;
772 while (curIndex<>-1) and result[curIndex].IsRemoved do
773 curIndex := a[curIndex].nextInIsolate;
774 if curIndex = -1 then exit;
775
776 curStartIndex:= curIndex;
777 curBidiLevel := result[curIndex].bidiLevel;
778 if odd(curBidiLevel) then curSos := ubcRightToLeft else curSos := ubcLeftToRight;
779 latestIndex := -1;
780 while curIndex <> -1 do
781 begin
782 if not result[curIndex].IsRemoved then
783 begin
784 if (latestIndex <> -1) and (result[curIndex].bidiLevel <> curBidiLevel) then
785 begin
786 if result[curIndex].bidiLevel > curBidiLevel then
787 begin
788 if odd(result[curIndex].bidiLevel) then eos := ubcRightToLeft else eos := ubcLeftToRight;
789 end else
790 begin
791 if odd(curBidiLevel) then eos := ubcRightToLeft else eos := ubcLeftToRight;
792 end;
793
794 AnalyzeSequence(curStartIndex, a[latestIndex].nextInIsolate, curSos, eos);
795
796 curSos := eos;
797 curBidiLevel:= result[curIndex].bidiLevel;
798 curStartIndex:= curIndex;
799 end;
800 latestIndex := curIndex;
801 end;
802
803 if (a[curIndex].nextInIsolate = -1) and (latestIndex<>-1) then
804 begin
805 if odd(result[latestIndex].bidiLevel) then eos := ubcRightToLeft else eos := ubcLeftToRight;
806 AnalyzeSequence(curStartIndex, a[latestIndex].nextInIsolate, curSos, eos);
807 break;
808 end;
809
810 curIndex := a[curIndex].nextInIsolate;
811 end;
812 end;
813
814 //analyse bidi formatting of an embedding or an override block
815 procedure AnalyzeFormattingBlocks(startIndex, lastIndex: integer; minBidiLevel: byte; formattingCode: LongWord);
816 var curIndex, nextIndex, levelIncrease: integer;
817 subFormatBeforeStart, subFormatStart, formatNesting: integer;
818 subFormatCode: LongWord;
819 begin
820 case formattingCode of
821 UNICODE_LEFT_TO_RIGHT_OVERRIDE,UNICODE_LEFT_TO_RIGHT_EMBEDDING:
822 if odd(minBidiLevel) then inc(minBidiLevel);
823 UNICODE_RIGHT_TO_LEFT_OVERRIDE,UNICODE_RIGHT_TO_LEFT_EMBEDDING:
824 if not odd(minBidiLevel) then inc(minBidiLevel);
825 end;
826 nextIndex := startIndex;
827 repeat
828 Assert(nextIndex >= 0, 'Expecting valid index');
829 curIndex := nextIndex;
830 nextIndex := a[curIndex].nextInIsolate;
831 result[curIndex].bidiLevel := minBidiLevel;
832
833 //apply override
834 if formattingCode = UNICODE_LEFT_TO_RIGHT_OVERRIDE then a[curIndex].bidiClass := ubcLeftToRight
835 else if formattingCode = UNICODE_RIGHT_TO_LEFT_OVERRIDE then a[curIndex].bidiClass := ubcRightToLeft;
836
837 case u[curIndex] of
838 UNICODE_LEFT_TO_RIGHT_EMBEDDING, UNICODE_RIGHT_TO_LEFT_EMBEDDING,
839 UNICODE_LEFT_TO_RIGHT_OVERRIDE, UNICODE_RIGHT_TO_LEFT_OVERRIDE:
840 begin
841 result[curIndex].Flags := result[curIndex].Flags OR BIDI_FLAG_REMOVED;
842 case u[curIndex] of
843 UNICODE_LEFT_TO_RIGHT_OVERRIDE,UNICODE_LEFT_TO_RIGHT_EMBEDDING:
844 if odd(minBidiLevel) then levelIncrease := 1
845 else levelIncrease := 2;
846 UNICODE_RIGHT_TO_LEFT_OVERRIDE,UNICODE_RIGHT_TO_LEFT_EMBEDDING:
847 if odd(minBidiLevel) then levelIncrease := 2
848 else levelIncrease := 1;
849 else levelIncrease:= 2;
850 end;
851 if minBidiLevel <= UNICODE_MAX_BIDI_DEPTH-levelIncrease-1 then
852 begin
853 subFormatCode:= u[curIndex];
854 subFormatBeforeStart := curIndex;
855 subFormatStart := nextIndex;
856 formatNesting:= 1;
857 while formatNesting > 0 do
858 begin
859 //sub-format ends because no more chars
860 if curIndex = lastIndex then
861 begin
862 if curIndex <> subFormatBeforeStart then
863 AnalyzeFormattingBlocks(subFormatStart, curIndex, minBidiLevel+levelIncrease, subFormatCode);
864 break;
865 end;
866
867 Assert(nextIndex >= 0, 'Expecting valid index');
868 case u[nextIndex] of
869 UNICODE_LEFT_TO_RIGHT_EMBEDDING, UNICODE_RIGHT_TO_LEFT_EMBEDDING,
870 UNICODE_LEFT_TO_RIGHT_OVERRIDE, UNICODE_RIGHT_TO_LEFT_OVERRIDE: inc(formatNesting);
871 UNICODE_POP_DIRECTIONAL_FORMATTING:
872 begin
873 dec(formatNesting);
874 if formatNesting = 0 then
875 begin
876 //sub-format ends because enough matching pop chars found
877 if curIndex <> subFormatBeforeStart then
878 AnalyzeFormattingBlocks(subFormatStart, curIndex, minBidiLevel+levelIncrease, subFormatCode);
879
880 curIndex := nextIndex;
881 nextIndex := a[curIndex].nextInIsolate;
882 result[curIndex].Flags := result[curIndex].Flags OR BIDI_FLAG_REMOVED;
883 break;
884 end;
885 end;
886 end;
887
888 curIndex := nextIndex;
889 nextIndex := a[curIndex].nextInIsolate;
890 end;
891 end;
892 end;
893 UNICODE_POP_DIRECTIONAL_FORMATTING: //ignored when no matching formatting code
894 begin
895 result[curIndex].Flags := result[curIndex].Flags OR BIDI_FLAG_REMOVED;
896 end;
897 end;
898 until curIndex = lastIndex;
899 end;
900
901 procedure ResolveImplicitLevels(startIndex: integer); // rule I1 and I2
902 var
903 curIndex: Integer;
904 begin
905 curIndex := startIndex;
906 while curIndex <> -1 do
907 begin
908 case a[curIndex].bidiClass of
909 ubcRightToLeft,ubcArabicLetter:
910 if not Odd(result[curIndex].bidiLevel) then inc(result[curIndex].bidiLevel);
911 ubcEuropeanNumber,ubcArabicNumber:
912 if Odd(result[curIndex].bidiLevel) then inc(result[curIndex].bidiLevel)
913 else inc(result[curIndex].bidiLevel, 2);
914 ubcLeftToRight: if Odd(result[curIndex].bidiLevel) then inc(result[curIndex].bidiLevel);
915 end;
916 curIndex := a[curIndex].nextInIsolate;
917 end;
918 end;
919
920 procedure ResetEndOfParagraphLevels(startIndex: integer); // rule L1
921 var
922 prevIndex,curIndex: Integer;
923
924 procedure TweakWhiteSpaceBefore(index: integer);
925 var
926 isWhiteSpaceOrIsolate: boolean;
927 begin
928 while index <> -1 do
929 begin
930 case u[index] of
931 UNICODE_FIRST_STRONG_ISOLATE, UNICODE_POP_DIRECTIONAL_ISOLATE,
932 UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE:
933 isWhiteSpaceOrIsolate:= true;
934 else
935 isWhiteSpaceOrIsolate:= GetUnicodeBidiClass(u[index]) = ubcWhiteSpace;
936 end;
937 if isWhiteSpaceOrIsolate then
938 result[index].bidiLevel := result[index].ParagraphBidiLevel
939 else
940 break;
941 index := a[index].prevInIsolate;
942 end;
943 end;
944
945 begin
946 prevIndex := -1;
947 curIndex := startIndex;
948 while curIndex <> -1 do
949 begin
950 case GetUnicodeBidiClass(u[curIndex]) of
951 ubcSegmentSeparator, ubcParagraphSeparator:
952 begin
953 result[curIndex].bidiLevel := result[curIndex].ParagraphBidiLevel;
954 TweakWhiteSpaceBefore(prevIndex);
955 end;
956 end;
957 prevIndex := curIndex;
958 curIndex := a[curIndex].nextInIsolate;
959 end;
960 TweakWhiteSpaceBefore(prevIndex);
961 end;
962
963 function DetermineIsolateDirectionFromFirstStrongClass(startIndex: integer): LongWord;
964 var
965 curIndex: Integer;
966 begin
967 curIndex := startIndex;
968 while curIndex <> -1 do
969 begin
970 Assert(curIndex >= 0, 'Expecting valid index');
971 case a[curIndex].bidiClass of
972 ubcLeftToRight: exit(UNICODE_LEFT_TO_RIGHT_ISOLATE);
973 ubcRightToLeft,ubcArabicLetter: exit(UNICODE_RIGHT_TO_LEFT_ISOLATE);
974 end;
975 case u[curIndex] of
976 UNICODE_LEFT_TO_RIGHT_OVERRIDE: exit(UNICODE_LEFT_TO_RIGHT_ISOLATE);
977 UNICODE_RIGHT_TO_LEFT_OVERRIDE: exit(UNICODE_RIGHT_TO_LEFT_ISOLATE);
978 end;
979 curIndex := a[curIndex].nextInIsolate;
980 end;
981 result := UNICODE_LEFT_TO_RIGHT_ISOLATE;
982 end;
983
984 procedure LinkCharsInIsolate(startIndex: integer; charCount: integer; out endIndex : integer);
985 var
986 curIndex,isolateStackPos,
987 prevIndex: Integer;
988 begin
989 a[startIndex].prevInIsolate := -1;
990 prevIndex := -1;
991 curIndex := startIndex;
992 isolateStackPos:= 0;
993 while curIndex < startIndex+charCount do
994 begin
995 if u[curIndex] = UNICODE_POP_DIRECTIONAL_ISOLATE then
996 if isolateStackPos > 0 then dec(isolateStackPos);
997
998 if isolateStackPos = 0 then
999 begin
1000 if prevIndex<>-1 then a[prevIndex].nextInIsolate := curIndex;
1001 a[curIndex].prevInIsolate := prevIndex;
1002
1003 prevIndex := curIndex;
1004 end;
1005
1006 case u[curIndex] of
1007 UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE, UNICODE_FIRST_STRONG_ISOLATE: inc(isolateStackPos);
1008 end;
1009 inc(curIndex);
1010 end;
1011 a[prevIndex].nextInIsolate := -1;
1012 endIndex := prevIndex;
1013 end;
1014
1015 //split isolates in order to format them independently
1016 procedure AnalyzeIsolates(startIndex: integer; charCount: integer; isolateDirection: LongWord; minBidiLevel: byte = 0;
1017 isParagraph: boolean = false);
1018 var curIndex, endIndex: integer;
1019 nextIndex: integer;
1020 subBidiLevel, levelIncrease: byte;
1021 subIsolateStart: integer;
1022 subIsolateDirection: LongWord;
1023 begin
1024 if charCount = 0 then exit;
1025 Assert(startIndex>=0, 'Invalid start index');
1026
1027 LinkCharsInIsolate(startIndex, charCount, endIndex);
1028
1029 if isolateDirection = UNICODE_FIRST_STRONG_ISOLATE then
1030 isolateDirection := DetermineIsolateDirectionFromFirstStrongClass(startIndex);
1031
1032 case isolateDirection of
1033 UNICODE_LEFT_TO_RIGHT_ISOLATE: if Odd(minBidiLevel) then inc(minBidiLevel);
1034 UNICODE_RIGHT_TO_LEFT_ISOLATE: if not Odd(minBidiLevel) then inc(minBidiLevel);
1035 else
1036 raise EInvalidOperation.Create('Unknown isolate direction');
1037 end;
1038
1039 if isParagraph then
1040 begin
1041 curIndex := startIndex;
1042 while curIndex <> -1 do
1043 begin
1044 result[curIndex].ParagraphBidiLevel := minBidiLevel;
1045 curIndex := a[curIndex].nextInIsolate;
1046 end;
1047 end;
1048
1049 case isolateDirection of
1050 UNICODE_LEFT_TO_RIGHT_ISOLATE: AnalyzeFormattingBlocks(startIndex, endIndex, minBidiLevel, UNICODE_LEFT_TO_RIGHT_EMBEDDING);
1051 UNICODE_RIGHT_TO_LEFT_ISOLATE: AnalyzeFormattingBlocks(startIndex, endIndex, minBidiLevel, UNICODE_RIGHT_TO_LEFT_EMBEDDING);
1052 end;
1053
1054 SameLevelRuns(startIndex);
1055 ResolveImplicitLevels(startIndex);
1056 ResolveLigature(startIndex);
1057
1058 if isParagraph then
1059 ResetEndOfParagraphLevels(startIndex);
1060
1061 //analyse sub-isolates
1062 curIndex := startIndex;
1063 while curIndex <> -1 do
1064 begin
1065 Assert(curIndex >= 0, 'Expecting valid index');
1066 case u[curIndex] of
1067 UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE, UNICODE_FIRST_STRONG_ISOLATE:
1068 begin
1069 subBidiLevel := result[curIndex].bidiLevel;
1070 nextIndex := a[curIndex].nextInIsolate;
1071 if nextIndex <> -1 then
1072 begin
1073 if result[nextIndex].bidiLevel > subBidiLevel then
1074 subBidiLevel:= result[nextIndex].bidiLevel;
1075 end;
1076 if ((isolateDirection = UNICODE_LEFT_TO_RIGHT_ISOLATE) and
1077 (u[curIndex] = UNICODE_RIGHT_TO_LEFT_ISOLATE)) or
1078 ((isolateDirection = UNICODE_LEFT_TO_RIGHT_ISOLATE) and
1079 (u[curIndex] = UNICODE_RIGHT_TO_LEFT_ISOLATE)) then
1080 levelIncrease := 1
1081 else
1082 levelIncrease:= 2;
1083 if subBidiLevel+levelIncrease <= UNICODE_MAX_BIDI_DEPTH-1 then
1084 begin
1085 subIsolateDirection := u[curIndex];
1086 subIsolateStart:= curIndex+1;
1087 curIndex := nextIndex;
1088
1089 //sub-isolates ends because no more chars
1090 if curIndex = -1 then
1091 begin
1092 AnalyzeIsolates(subIsolateStart, startIndex+charCount-subIsolateStart, subIsolateDirection, subBidiLevel+1);
1093 break;
1094 end else
1095 begin
1096 AnalyzeIsolates(subIsolateStart, curIndex-subIsolateStart, subIsolateDirection, subBidiLevel+1);
1097 continue;
1098 end;
1099 end;
1100 end;
1101 end;
1102 curIndex := a[curIndex].nextInIsolate;
1103 end;
1104 end;
1105
1106 //split UTF8 string into paragraphs
1107 procedure SplitParagraphs;
1108 var
1109 lineStartIndex, curIndex: integer;
1110 begin
1111 curIndex := 0;
1112 lineStartIndex := curIndex;
1113 while curIndex < ALength do
1114 begin
1115 if a[curIndex].bidiClass = ubcParagraphSeparator then
1116 begin
1117 //skip second CRLF char
1118 if IsUnicodeCrLf(u[curIndex]) and (curIndex+1 < ALength) and
1119 IsUnicodeCrLf(u[curIndex+1]) and (u[curIndex+1] <> u[curIndex]) then
1120 begin
1121 inc(curIndex);
1122 result[curIndex].Flags := result[curIndex].Flags and not BIDI_FLAG_MULTICHAR_START;
1123 end;
1124
1125 result[curIndex].Flags := result[curIndex].Flags or BIDI_FLAG_EXPLICIT_END_OF_PARAGRAPH;
1126
1127 AnalyzeIsolates(lineStartIndex, curIndex+1-lineStartIndex, baseDirection, 0, true);
1128 lineStartIndex := curIndex+1;
1129 end;
1130 inc(curIndex);
1131 end;
1132 if curIndex > lineStartIndex then
1133 begin
1134 result[curIndex-1].Flags := result[curIndex-1].Flags or BIDI_FLAG_IMPLICIT_END_OF_PARAGRAPH;
1135 AnalyzeIsolates(lineStartIndex, curIndex-lineStartIndex, baseDirection, 0, true);
1136 end;
1137 end;
1138
1139 var i: integer;
1140 classEx: TUnicodeBidiClass;
1141 begin
1142 setlength(a, ALength);
1143 setlength(result, ALength);
1144 if ALength > 0 then
1145 begin
1146 for i := 0 to high(a) do
1147 begin
1148 classEx := GetUnicodeBidiClassEx(u[i]);
1149 case classEx of
1150 ubcMirroredNeutral:
1151 begin
1152 result[i].Flags := result[i].Flags or BIDI_FLAG_MIRRORED;
1153 a[i].bidiClass := ubcOtherNeutrals;
1154 end;
1155 ubcCombiningLeftToRight:
1156 begin
1157 case GetUnicodeCombiningClass(u[i]) of
1158 208, 224: result[i].Flags := result[i].Flags OR BIDI_FLAG_COMBINING_LEFT;
1159 210, 226, 9: result[i].Flags := result[i].Flags OR BIDI_FLAG_COMBINING_RIGHT;
1160 0: result[i].Flags := result[i].Flags OR BIDI_FLAG_COMBINING_LEFT OR BIDI_FLAG_COMBINING_RIGHT;
1161 end;
1162 a[i].bidiClass := ubcLeftToRight;
1163 end;
1164 otherwise
1165 a[i].bidiClass := classEx;
1166 end;
1167 case u[i] of
1168 UNICODE_LINE_SEPARATOR: //line separator within paragraph
1169 result[i].Flags := result[i].Flags or BIDI_FLAG_END_OF_LINE;
1170 UNICODE_ZERO_WIDTH_JOINER, UNICODE_ZERO_WIDTH_NON_JOINER:
1171 result[i].Flags := result[i].Flags OR BIDI_FLAG_LIGATURE_BOUNDARY;
1172 end;
1173 case a[i].bidiClass of
1174 ubcArabicLetter,ubcArabicNumber,ubcRightToLeft:
1175 result[i].Flags := result[i].Flags OR BIDI_FLAG_RTL_SCRIPT;
1176 ubcNonSpacingMark: result[i].Flags := result[i].Flags OR BIDI_FLAG_NON_SPACING_MARK;
1177 end;
1178 if (result[i].Flags and (BIDI_FLAG_NON_SPACING_MARK or BIDI_FLAG_COMBINING_LEFT
1179 or BIDI_FLAG_COMBINING_RIGHT) = 0) or
1180 (i = 0) or (a[i-1].bidiClass in [ubcSegmentSeparator, ubcParagraphSeparator]) then
1181 result[i].Flags := result[i].Flags OR BIDI_FLAG_MULTICHAR_START;
1182 end;
1183 SplitParagraphs;
1184 end;
1185 end;
1186
1187 function AnalyzeBidiUnicode(u: PLongWord; ALength: integer;
1188 ABidiMode: TFontBidiMode): TUnicodeBidiArray;
1189 begin
1190 case ABidiMode of
1191 fbmLeftToRight: result := AnalyzeBidiUnicode(u, ALength, UNICODE_LEFT_TO_RIGHT_ISOLATE);
1192 fbmRightToLeft: result := AnalyzeBidiUnicode(u, ALength, UNICODE_RIGHT_TO_LEFT_ISOLATE);
1193 else
1194 {fbmAuto} result := AnalyzeBidiUnicode(u, ALength, UNICODE_FIRST_STRONG_ISOLATE);
1195 end;
1196 end;
1197
1198 function GetUnicodeDisplayOrder(const AInfo: TUnicodeBidiArray): TUnicodeDisplayOrder;
1199 begin
1200 if length(AInfo)=0 then
1201 result := nil
1202 else
1203 result := GetUnicodeDisplayOrder(@AInfo[0], sizeof(TUnicodeBidiInfo), length(AInfo));
1204 end;
1205
1206 function GetUnicodeDisplayOrder(ALevels: PByte; ACount: integer): TUnicodeDisplayOrder;
1207
1208 procedure DetermineDisplayOrderRec(AOffset: integer; AStartIndex, ABlockCount: integer; AEmbeddingLevel: byte);
1209 var minLevel: byte;
1210 blockIndex,subStartIndex,subCount, subOffset: integer;
1211 begin
1212 //writeln('DetermineDisplayOrderRec('+inttostr(AOffset)+'/'+inttostr(ACount)+',' + inttostr(AStartIndex) +',*' +inttostr(ABlockCount)+','+inttostr(AEmbeddingLevel)+')');
1213 blockIndex := 0;
1214 subStartIndex := 0; //avoid warning
1215 while blockIndex < ABlockCount do
1216 begin
1217 Assert(AOffset < ACount, 'Offset out of bounds');
1218 if ALevels[AOffset] = AEmbeddingLevel then
1219 begin
1220 if odd(AEmbeddingLevel) then
1221 result[AStartIndex+ABlockCount-1-blockIndex] := AOffset
1222 else
1223 result[AStartIndex+blockIndex] := AOffset;
1224 inc(AOffset);
1225 inc(blockIndex);
1226 end else
1227 begin
1228 if not odd(AEmbeddingLevel) then
1229 subStartIndex := AStartIndex+blockIndex;
1230 subOffset := AOffset;
1231 minLevel := ALevels[AOffset];
1232 inc(AOffset);
1233 inc(blockIndex);
1234 subCount := 1;
1235 while true do
1236 begin
1237 if (blockIndex < ABlockCount) and (ALevels[AOffset] > AEmbeddingLevel) then
1238 begin
1239 Assert(AOffset < ACount, 'Offset out of bounds');
1240 if ALevels[AOffset] < minLevel then
1241 minLevel:= ALevels[AOffset];
1242 inc(AOffset);
1243 inc(blockIndex);
1244 inc(subCount);
1245 end else
1246 begin
1247 if odd(AEmbeddingLevel) then
1248 subStartIndex := AStartIndex+ABlockCount-1-(blockIndex-1);
1249 DetermineDisplayOrderRec(subOffset, subStartIndex, subCount, minLevel);
1250 break;
1251 end;
1252 end;
1253 end;
1254 end;
1255 end;
1256
1257 begin
1258 setlength(result, ACount);
1259 DetermineDisplayOrderRec(0, 0, ACount, 0);
1260 end;
1261
1262 function GetUnicodeDisplayOrder(ABidiInfo: PUnicodeBidiInfo; AStride, ACount: integer): TUnicodeDisplayOrder;
1263 var
1264 levels: packed array of byte;
1265 originalIndices: array of integer;
1266 index,len, i: integer;
1267 p: PByte;
1268 begin
1269 len := 0;
1270 p := PByte(ABidiInfo);
1271 for i := 0 to ACount-1 do
1272 begin
1273 if not PUnicodeBidiInfo(p)^.IsRemoved then inc(len);
1274 inc(p, AStride);
1275 end;
1276 if len = 0 then
1277 result := nil
1278 else
1279 begin
1280 setlength(levels, len);
1281 setlength(originalIndices, len);
1282 p := PByte(ABidiInfo);
1283 index := 0;
1284 for i := 0 to ACount-1 do
1285 begin
1286 if not PUnicodeBidiInfo(p)^.IsRemoved then
1287 begin
1288 levels[index] := PUnicodeBidiInfo(p)^.BidiLevel;
1289 originalIndices[index] := i;
1290 inc(index);
1291 end;
1292 inc(p, AStride);
1293 end;
1294 result := GetUnicodeDisplayOrder(@levels[0], len);
1295 for i := 0 to len-1 do
1296 result[i] := originalIndices[result[i]];
1297 end;
1298 end;
1299
1300 end.
1301
1302