1 // SPDX-License-Identifier: LGPL-3.0-linking-exception
2 unit BGRAUnicode;
3 { Implementation of Unicode bidi algorithm }
4 { Author: circular }
5 
6 {$mode objfpc}{$H+}
7 {$modeswitch advancedrecords}
8 
9 interface
10 
11 uses
12   BGRAClasses, SysUtils;
13 
14 type
15   TUnicodeBidiClass = (ubcBoundaryNeutral, ubcSegmentSeparator, ubcParagraphSeparator, ubcWhiteSpace, ubcOtherNeutrals,
16                       ubcCommonSeparator, ubcNonSpacingMark,
17                       ubcLeftToRight, ubcEuropeanNumber, ubcEuropeanNumberSeparator, ubcEuropeanNumberTerminator,
18                       ubcRightToLeft, ubcArabicLetter, ubcArabicNumber,
19                       ubcUnknown,
20                       ubcCombiningLeftToRight,   //ubcLeftToRight in Mc category
21                       ubcMirroredNeutral);       //ubcOtherNeutrals with Mirrored property
22   TUnicodeJoiningType = (ujtNonJoining{U}, ujtTransparent{T}, ujtRightJoining{R}, ujtLeftJoining{L},
23                          ujtDualJoining{D}, ujtJoinCausing{C});
24   TFontBidiMode = (fbmAuto, fbmLeftToRight, fbmRightToLeft);
25 
26 const
27   ubcNeutral = [ubcSegmentSeparator, ubcParagraphSeparator, ubcWhiteSpace, ubcOtherNeutrals];
28 
29   BIDI_FLAG_REMOVED = 1;                   //RLE, LRE, RLO, LRO, PDF and BN are supposed to be removed
30   BIDI_FLAG_IMPLICIT_END_OF_PARAGRAPH = 2; //implicit end of paragraph (paragraph spacing below due to end of text)
31   BIDI_FLAG_EXPLICIT_END_OF_PARAGRAPH = 4; //explicit end of paragraph (paragraph spacing below due to paragraph split)
32   BIDI_FLAG_END_OF_LINE = 8;               //line break <br>
33   BIDI_FLAG_LIGATURE_RIGHT = 16;           //joins to the letter on the right (possible for joining type R and D)
34   BIDI_FLAG_LIGATURE_LEFT = 32;            //joins to the letter on the left (possible for joining type L and D)
35   BIDI_FLAG_LIGATURE_BOUNDARY = 64;        //zero-width joiner or non-joiner
36   BIDI_FLAG_LIGATURE_TRANSPARENT = 128;    //does not affect ligature
37   BIDI_FLAG_RTL_SCRIPT = 256;              //script is written from right to left (arabic, N'Ko...)
38   BIDI_FLAG_NON_SPACING_MARK = 512;        //it is a non-spacing mark
39   BIDI_FLAG_COMBINING_LEFT = 1024;         //this letter is to be combined to the left of previous letter
40   BIDI_FLAG_COMBINING_RIGHT = 2048;        //this letter is to be combined to the right of previous letter
41   BIDI_FLAG_MULTICHAR_START = 4096;        //start of a multichar (letter + non spacing marks, non spacing marks)
42   BIDI_FLAG_MIRRORED = 8192;               //the glyph is mirrored when in RTL text
43 
44 type
45   PUnicodeBidiInfo = ^TUnicodeBidiInfo;
46 
47   { TUnicodeBidiInfo }
48 
49   TUnicodeBidiInfo = packed record
50   private
51     function GetDiscardable: boolean;
52     function GetEndOfLine: boolean;
53     function GetEndOfParagraph: boolean;
54     function GetExplicitEndOfParagraph: boolean;
55     function GetHasLigatureLeft: boolean;
56     function GetHasLigatureRight: boolean;
57     function GetImplicitEndOfParagraph: boolean;
58     function GetIsCombiningLeft: boolean;
59     function GetIsCombiningRight: boolean;
60     function GetIsMirrored: boolean;
61     function GetLigatureBoundary: boolean;
62     function GetLigatureTransparent: boolean;
63     function GetMulticharStart: boolean;
64     function GetNonSpacingMark: boolean;
65     function GetRemoved: boolean;
66     function GetRightToLeft: boolean;
67     function GetParagraphRightToLeft: boolean;
68     function GetRightToLeftScript: boolean;
69   public
70     ParagraphBidiLevel, BidiLevel: byte;
71     Flags: Word;
72     class operator =(const AInfo1, AInfo2: TUnicodeBidiInfo): boolean;
73     property IsRemoved: boolean read GetRemoved;
74     property IsRightToLeft: boolean read GetRightToLeft;
75     property IsParagraphRightToLeft: boolean read GetParagraphRightToLeft;
76     property IsEndOfLine: boolean read GetEndOfLine;
77     property IsEndOfParagraph: boolean read GetEndOfParagraph;
78     property IsExplicitEndOfParagraph: boolean read GetExplicitEndOfParagraph;
79     property IsImplicitEndOfParagraph: boolean read GetImplicitEndOfParagraph;
80     property HasLigatureRight: boolean read GetHasLigatureRight;
81     property HasLigatureLeft: boolean read GetHasLigatureLeft;
82     property IsLigatureBoundary: boolean read GetLigatureBoundary;
83     property IsLigatureTransparent: boolean read GetLigatureTransparent;
84     property IsDiscardable: boolean read GetDiscardable;
85     property IsRightToLeftScript: boolean read GetRightToLeftScript;
86     property IsNonSpacingMark: boolean read GetNonSpacingMark;
87     property IsCombiningLeft: boolean read GetIsCombiningLeft;
88     property IsCombiningRight: boolean read GetIsCombiningRight;
89     property IsMulticharStart: boolean read GetMulticharStart;
90     property IsMirrored: boolean read GetIsMirrored;
91   end;
92 
93   TUnicodeBidiArray = packed array of TUnicodeBidiInfo;
94   TUnicodeDisplayOrder = array of integer;
95 
96 const
97   //maximum nesting level of isolates and bidi-formatting blocks (char bidi level can actually be higher due to char properties)
98   UNICODE_MAX_BIDI_DEPTH = 125;
99 
100   UNICODE_NO_BREAK_SPACE = $A0;
101   UNICODE_LINE_SEPARATOR = $2028;      //equivalent of <br>
102   UNICODE_PARAGRAPH_SEPARATOR = $2029; //equivalent of </p>
103   UNICODE_NEXT_LINE = $0085;           //equivalent of CRLF
104 
105   //characters that split lines into top-level bidi blocks
106   UNICODE_LEFT_TO_RIGHT_ISOLATE = $2066;
107   UNICODE_RIGHT_TO_LEFT_ISOLATE = $2067;
108   UNICODE_FIRST_STRONG_ISOLATE = $2068;
109   UNICODE_POP_DIRECTIONAL_ISOLATE = $2069;
110 
111   //characters that split into bidi sub-blocks (called "formatting")
112   UNICODE_LEFT_TO_RIGHT_EMBEDDING = $202A;
113   UNICODE_RIGHT_TO_LEFT_EMBEDDING = $202B;
114   UNICODE_LEFT_TO_RIGHT_OVERRIDE = $202D;
115   UNICODE_RIGHT_TO_LEFT_OVERRIDE = $202E;
116   UNICODE_POP_DIRECTIONAL_FORMATTING = $202C;
117 
118   //characters that mark direction without splitting the bidi block
119   UNICODE_LEFT_TO_RIGHT_MARK = $200E;
120   UNICODE_RIGHT_TO_LEFT_MARK = $200F;
121   UNICODE_ARABIC_LETTER_MARK = $061C;
122 
123   //data separators
124   UNICODE_INFORMATION_SEPARATOR_FOUR = $001C;   //end-of-file
125   UNICODE_INFORMATION_SEPARATOR_THREE = $001D;  //section separator
126   UNICODE_INFORMATION_SEPARATOR_TWO = $001E;    //record separator, kind of equivalent to paragraph separator
127   UNICODE_INFORMATION_SEPARATOR_ONE = $001F;    //field separator, kind of equivalent to Tab
128 
129   //zero-width
130   UNICODE_ZERO_WIDTH_SPACE = $200B;
131   UNICODE_ZERO_WIDTH_NON_JOINER = $200C;
132   UNICODE_ZERO_WIDTH_NO_BREAK_SPACE = $FEFF;   //byte order mark
133   UNICODE_ZERO_WIDTH_JOINER = $200D;
134   UNICODE_COMBINING_GRAPHEME_JOINER = $034F;
135 
136   //arabic letters
137   UNICODE_ARABIC_TATWEEL = $0640;    //horizontal line that makes a ligature with most letters
138 
139   //ideographic punctuation
140   UNICODE_IDEOGRAPHIC_COMMA = $3001;
141   UNICODE_IDEOGRAPHIC_FULL_STOP = $3002;
142   UNICODE_FULLWIDTH_COMMA = $FF0C;
143   UNICODE_HORIZONTAL_ELLIPSIS = $2026;
144 
145   //bracket equivalence
146   UNICODE_RIGHT_POINTING_ANGLE_BRACKET = $232A;
147   UNICODE_RIGHT_ANGLE_BRACKET = $3009;
148 
149 type //bracket matching
150   TUnicodeBracketInfo = record
151     IsBracket: boolean;
152     OpeningBracket,ClosingBracket: LongWord;
153   end;
154 
155 { Returns the Bidi class as defined by Unicode used to determine text direction }
156 function GetUnicodeBidiClass(u: LongWord): TUnicodeBidiClass;
157 { Same as above but returns additional classes: ubcCombiningLeftToRight and ubcMirroredNeutral }
158 function GetUnicodeBidiClassEx(u: LongWord): TUnicodeBidiClass;
159 function GetUnicodeBracketInfo(u: LongWord): TUnicodeBracketInfo;
160 { Returns how the letter can be joined to the surrounding letters (for example in arabic) }
161 function GetUnicodeJoiningType(u: LongWord): TUnicodeJoiningType;
162 { Returns the Combining class defined by unicode for non-spacing marks and combining marks
163   or 255 if the character is not to be combined }
164 function GetUnicodeCombiningClass(u: LongWord): byte;
165 function IsZeroWidthUnicode(u: LongWord): boolean;
166 { Returns if the symbol can be mirrored horizontally for right-to-left text }
167 function IsUnicodeMirrored(u: LongWord): boolean;
168 function IsUnicodeParagraphSeparator(u: LongWord): boolean;
169 function IsUnicodeCrLf(u: LongWord): boolean;
170 function IsUnicodeSpace(u: LongWord): boolean;
171 function IsUnicodeIsolateOrFormatting(u: LongWord): boolean;
172 function IsModifierCombiningMark(u: LongWord): boolean;
173 
174 { Analyze unicode and return bidi levels for each character.
175   baseDirection can be either UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE or UNICODE_FIRST_STRONG_ISOLATE }
176 function AnalyzeBidiUnicode(u: PLongWord; ALength: integer; baseDirection: LongWord): TUnicodeBidiArray;
177 function AnalyzeBidiUnicode(u: PLongWord; ALength: integer; ABidiMode: TFontBidiMode): TUnicodeBidiArray;
178 
179 { Determine diplay order, provided the display surface is horizontally infinite }
180 function GetUnicodeDisplayOrder(const AInfo: TUnicodeBidiArray): TUnicodeDisplayOrder; overload;
181 function GetUnicodeDisplayOrder(ALevels: PByte; ACount: integer): TUnicodeDisplayOrder; overload;
182 function GetUnicodeDisplayOrder(ABidiInfo: PUnicodeBidiInfo; AStride, ACount: integer): TUnicodeDisplayOrder; overload;
183 
184 implementation
185 
186 {$i generatedunicode.inc}
187 
188 function GetUnicodeCombiningClass(u: LongWord): byte;
189 var
190   minIndex, maxIndex, midIndex: Integer;
191   compU: LongWord;
192 begin
193   minIndex := 0;
194   maxIndex := high(UnicodeCombiningInfos);
195   repeat
196     midIndex := (minIndex+maxIndex) shr 1;
197     compU := UnicodeCombiningInfos[midIndex].u;
198     if u = compU then exit(UnicodeCombiningInfos[midIndex].c) else
199     if u < compU then maxIndex := midIndex-1
200     else minIndex := midIndex+1;
201   until maxIndex < minIndex;
202   result := 255; //not combining
203 end;
204 
205 function GetUnicodeBidiClass(u: LongWord): TUnicodeBidiClass;
206 begin
207   result := GetUnicodeBidiClassEx(u);
208   if result = ubcMirroredNeutral then result := ubcOtherNeutrals
209   else if result = ubcCombiningLeftToRight then result := ubcLeftToRight;
210 end;
211 
212 function IsUnicodeMirrored(u: LongWord): boolean;
213 begin
214   result := GetUnicodeBidiClassEx(u) = ubcMirroredNeutral;
215 end;
216 
217 function IsZeroWidthUnicode(u: LongWord): boolean;
218 begin
219   case u of
220   UNICODE_ZERO_WIDTH_SPACE, UNICODE_ZERO_WIDTH_NON_JOINER,
221   UNICODE_ZERO_WIDTH_JOINER, UNICODE_ZERO_WIDTH_NO_BREAK_SPACE,
222   UNICODE_LEFT_TO_RIGHT_MARK,UNICODE_RIGHT_TO_LEFT_MARK,
223   UNICODE_ARABIC_LETTER_MARK: result := true;
224   else result := false;
225   end;
226 end;
227 
228 function IsUnicodeParagraphSeparator(u: LongWord): boolean;
229 begin
230   case u of
231   $0A, $0D, UNICODE_NEXT_LINE, UNICODE_PARAGRAPH_SEPARATOR,
232   UNICODE_INFORMATION_SEPARATOR_FOUR, UNICODE_INFORMATION_SEPARATOR_THREE, UNICODE_INFORMATION_SEPARATOR_TWO: result := true;
233   else result := false;
234   end;
235 end;
236 
237 function IsUnicodeCrLf(u: LongWord): boolean;
238 begin
239   result := (u=10) or (u=13);
240 end;
241 
242 function IsUnicodeSpace(u: LongWord): boolean;
243 begin
244   result := GetUnicodeBidiClass(u) = ubcWhiteSpace;
245 end;
246 
247 function IsUnicodeIsolateOrFormatting(u: LongWord): boolean;
248 begin
249   case u of
250   UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE, UNICODE_FIRST_STRONG_ISOLATE,
251   UNICODE_LEFT_TO_RIGHT_EMBEDDING, UNICODE_RIGHT_TO_LEFT_EMBEDDING,
252   UNICODE_LEFT_TO_RIGHT_OVERRIDE, UNICODE_RIGHT_TO_LEFT_OVERRIDE: exit(true)
253   else exit(false);
254   end;
255 end;
256 
257 function IsModifierCombiningMark(u: LongWord): boolean;
258 begin
259   case u of
260   $0654,$0655,$0658,$06DC,$06E3,$06E7,$06E8,$08D3,$08F3: exit(true);
261   else exit(false);
262   end;
263 end;
264 
265 { TUnicodeBidiInfo }
266 
GetDiscardablenull267 function TUnicodeBidiInfo.GetDiscardable: boolean;
268 begin
269   result := IsRemoved and not IsLigatureBoundary;
270 end;
271 
GetEndOfLinenull272 function TUnicodeBidiInfo.GetEndOfLine: boolean;
273 begin
274   result := (Flags and BIDI_FLAG_END_OF_LINE) <> 0;
275 end;
276 
GetEndOfParagraphnull277 function TUnicodeBidiInfo.GetEndOfParagraph: boolean;
278 begin
279   result := (Flags and (BIDI_FLAG_EXPLICIT_END_OF_PARAGRAPH or BIDI_FLAG_IMPLICIT_END_OF_PARAGRAPH)) <> 0;
280 end;
281 
GetExplicitEndOfParagraphnull282 function TUnicodeBidiInfo.GetExplicitEndOfParagraph: boolean;
283 begin
284   result := (Flags and BIDI_FLAG_EXPLICIT_END_OF_PARAGRAPH) <> 0;
285 end;
286 
GetHasLigatureLeftnull287 function TUnicodeBidiInfo.GetHasLigatureLeft: boolean;
288 begin
289   result := (Flags and BIDI_FLAG_LIGATURE_LEFT) <> 0;
290 end;
291 
GetHasLigatureRightnull292 function TUnicodeBidiInfo.GetHasLigatureRight: boolean;
293 begin
294   result := (Flags and BIDI_FLAG_LIGATURE_RIGHT) <> 0;
295 end;
296 
GetImplicitEndOfParagraphnull297 function TUnicodeBidiInfo.GetImplicitEndOfParagraph: boolean;
298 begin
299   result := (Flags and BIDI_FLAG_IMPLICIT_END_OF_PARAGRAPH) <> 0;
300 end;
301 
GetIsCombiningLeftnull302 function TUnicodeBidiInfo.GetIsCombiningLeft: boolean;
303 begin
304   result := (Flags and BIDI_FLAG_COMBINING_LEFT) <> 0;
305 end;
306 
GetIsCombiningRightnull307 function TUnicodeBidiInfo.GetIsCombiningRight: boolean;
308 begin
309   result := (Flags and BIDI_FLAG_COMBINING_RIGHT) <> 0;
310 end;
311 
GetIsMirrorednull312 function TUnicodeBidiInfo.GetIsMirrored: boolean;
313 begin
314   result := (Flags and BIDI_FLAG_MIRRORED) <> 0;
315 end;
316 
GetLigatureBoundarynull317 function TUnicodeBidiInfo.GetLigatureBoundary: boolean;
318 begin
319   result := (Flags and BIDI_FLAG_LIGATURE_BOUNDARY) <> 0;
320 end;
321 
GetLigatureTransparentnull322 function TUnicodeBidiInfo.GetLigatureTransparent: boolean;
323 begin
324   result := (Flags and BIDI_FLAG_LIGATURE_TRANSPARENT) <> 0;
325 end;
326 
GetMulticharStartnull327 function TUnicodeBidiInfo.GetMulticharStart: boolean;
328 begin
329   result := (Flags and BIDI_FLAG_MULTICHAR_START) <> 0;
330 end;
331 
GetNonSpacingMarknull332 function TUnicodeBidiInfo.GetNonSpacingMark: boolean;
333 begin
334   result := (Flags and BIDI_FLAG_NON_SPACING_MARK) <> 0;
335 end;
336 
GetRemovednull337 function TUnicodeBidiInfo.GetRemoved: boolean;
338 begin
339   result := (Flags and BIDI_FLAG_REMOVED) <> 0;
340 end;
341 
GetRightToLeftnull342 function TUnicodeBidiInfo.GetRightToLeft: boolean;
343 begin
344   result := Odd(BidiLevel);
345 end;
346 
GetParagraphRightToLeftnull347 function TUnicodeBidiInfo.GetParagraphRightToLeft: boolean;
348 begin
349   result := Odd(ParagraphBidiLevel);
350 end;
351 
GetRightToLeftScriptnull352 function TUnicodeBidiInfo.GetRightToLeftScript: boolean;
353 begin
354   result := (Flags and BIDI_FLAG_RTL_SCRIPT) <> 0;
355 end;
356 
357 class operator TUnicodeBidiInfo.=(const AInfo1, AInfo2: TUnicodeBidiInfo
358   ): boolean;
359 begin
360   result := (AInfo1.BidiLevel = AInfo2.BidiLevel) and
361     (AInfo1.Flags = AInfo2.Flags) and
362     (AInfo1.ParagraphBidiLevel = AInfo2.ParagraphBidiLevel);
363 end;
364 
365 function AnalyzeBidiUnicode(u: PLongWord; ALength: integer; baseDirection: LongWord): TUnicodeBidiArray;
366 type
367   TUnicodeAnalysisElement = record
368     bidiClass: TUnicodeBidiClass;
369     prevInIsolate, nextInIsolate: integer; //next index in current isolate
370   end;
371   TUnicodeAnalysisArray = array of TUnicodeAnalysisElement;
372 
373 var
374   a: TUnicodeAnalysisArray;
375 
376   procedure ResolveWeakTypes(startIndex, afterEndIndex: integer; startOfSequence, {%H-}endOfSequence: TUnicodeBidiClass);
377   var
378     curIndex,backIndex: Integer;
379     latestStrongClass, prevClass: TUnicodeBidiClass;
380   begin
381     //rules W1 and W2
382     prevClass := startOfSequence;
383     latestStrongClass:= prevClass;
384     curIndex := startIndex;
385     while curIndex <> afterEndIndex do
386     begin
387       if not result[curIndex].IsRemoved then
388       begin
389         case a[curIndex].bidiClass of
390           ubcNonSpacingMark: a[curIndex].bidiClass:= prevClass;
391           ubcEuropeanNumber: if latestStrongClass = ubcArabicLetter then a[curIndex].bidiClass:= ubcArabicNumber;
392         end;
393         case u[curIndex] of
394         UNICODE_LEFT_TO_RIGHT_ISOLATE,
395         UNICODE_RIGHT_TO_LEFT_ISOLATE,
396         UNICODE_FIRST_STRONG_ISOLATE,
397         UNICODE_POP_DIRECTIONAL_ISOLATE: prevClass := ubcOtherNeutrals;
398         else prevClass := a[curIndex].bidiClass;
399         end;
400         if prevClass in [ubcLeftToRight,ubcRightToLeft,ubcArabicLetter] then latestStrongClass:= prevClass;
401       end;
402       curIndex := a[curIndex].nextInIsolate;
403     end;
404 
405     // rule W4 and W5
406     prevClass := startOfSequence;
407     curIndex := startIndex;
408     while curIndex <> afterEndIndex do
409     begin
410       if not result[curIndex].IsRemoved then
411       begin
412         case a[curIndex].bidiClass of
413           ubcArabicLetter: a[curIndex].bidiClass := ubcRightToLeft;
414           ubcEuropeanNumber:
415             begin
416               backIndex := curIndex;
417               while backIndex > startIndex do
418               begin
419                 dec(backIndex);
420                 if result[backIndex].IsRemoved then continue;
421                 if a[backIndex].bidiClass = ubcEuropeanNumberTerminator then
422                   a[backIndex].bidiClass := ubcEuropeanNumber
423                 else break;
424               end;
425             end;
426           ubcEuropeanNumberSeparator:
427             if (prevClass = ubcEuropeanNumber) and (a[curIndex].nextInIsolate <> afterEndIndex) and
428               (a[a[curIndex].nextInIsolate].bidiClass = ubcEuropeanNumber) then
429                 a[curIndex].bidiClass:= ubcEuropeanNumber;
430           ubcCommonSeparator:
431             if (prevClass in[ubcEuropeanNumber,ubcArabicNumber]) and (a[curIndex].nextInIsolate <> afterEndIndex) and
432               (a[a[curIndex].nextInIsolate].bidiClass = prevClass) then
433                 a[curIndex].bidiClass:= prevClass;
434           ubcEuropeanNumberTerminator:
435             if prevClass = ubcEuropeanNumber then
436               a[curIndex].bidiClass:= ubcEuropeanNumber;
437         end;
438         prevClass := a[curIndex].bidiClass;
439       end;
440 
441       curIndex := a[curIndex].nextInIsolate;
442     end;
443 
444     // rule W6 and W7
445     curIndex := startIndex;
446     latestStrongClass := startOfSequence;
447     while curIndex <> afterEndIndex do
448     begin
449       if not result[curIndex].IsRemoved then
450       begin
451         case a[curIndex].bidiClass of
452           ubcEuropeanNumberSeparator,ubcEuropeanNumberTerminator,ubcCommonSeparator: a[curIndex].bidiClass := ubcOtherNeutrals;
453           ubcLeftToRight,ubcRightToLeft,ubcArabicLetter: latestStrongClass:= a[curIndex].bidiClass;
454           ubcEuropeanNumber: if latestStrongClass = ubcLeftToRight then a[curIndex].bidiClass := ubcLeftToRight;
455         end;
456       end;
457       curIndex := a[curIndex].nextInIsolate;
458     end;
459   end;
460 
461   procedure ResolveNeutrals(startIndex, afterEndIndex: integer; startOfSequence, endOfSequence: TUnicodeBidiClass);
462   var
463     curIndex,prevIndex,previewIndex: Integer;
464     curRTL, include, rightToLeftEmbedding: Boolean;
465     bidiClass: TUnicodeBidiClass;
466   begin
467     rightToLeftEmbedding := odd(result[startIndex].BidiLevel);
468     curIndex := startIndex;
469     curRTL := startOfSequence in [ubcRightToLeft,ubcArabicLetter];
470     while curIndex <> afterEndIndex do
471     begin
472       case a[curIndex].bidiClass of
473         ubcLeftToRight: curRTL := false;
474         ubcRightToLeft,ubcArabicLetter,ubcArabicNumber,ubcEuropeanNumber: curRTL := true;
475       else
476         if curRTL <> rightToLeftEmbedding then
477         begin
478           //determine whether following neutral chars are included in reverse direction
479           prevIndex := curIndex;
480           previewIndex := a[curIndex].nextInIsolate;
481           include := false;
482           while previewIndex <> afterEndIndex do //uses endOfSequence for overflow
483           begin
484             if previewIndex = afterEndIndex then
485               bidiClass:= endOfSequence
486             else
487               bidiClass:= a[previewIndex].bidiClass;
488             case bidiClass of
489               ubcLeftToRight:
490                 begin
491                   include := not curRTL;
492                   break;
493                 end;
494               ubcRightToLeft,ubcArabicLetter,ubcArabicNumber,ubcEuropeanNumber:
495                 begin
496                   include := curRTL;
497                   break;
498                 end;
499             end;
500             prevIndex := previewIndex;
501             previewIndex := a[previewIndex].nextInIsolate;
502           end;
503           if previewIndex = afterEndIndex then previewIndex := prevIndex;
504           if include then
505           begin
506             while curIndex <> previewIndex do
507             begin
508               if a[curIndex].bidiClass = ubcBoundaryNeutral then
509                 result[curIndex].Flags := result[curIndex].Flags OR BIDI_FLAG_REMOVED; //supposed to be removed for rendering
510 
511               if a[curIndex].bidiClass in (ubcNeutral+[ubcBoundaryNeutral,ubcUnknown]) then
512               begin
513                 if curRTL then a[curIndex].bidiClass := ubcRightToLeft
514                 else a[curIndex].bidiClass := ubcLeftToRight;
515               end;
516 
517               curIndex := a[curIndex].nextInIsolate;
518             end;
519           end else
520             curRTL := rightToLeftEmbedding;
521         end;
522       end;
523 
524       if a[curIndex].bidiClass = ubcBoundaryNeutral then
525         result[curIndex].Flags := result[curIndex].Flags OR BIDI_FLAG_REMOVED; //supposed to be removed for rendering
526 
527       if a[curIndex].bidiClass in (ubcNeutral+[ubcBoundaryNeutral,ubcUnknown]) then
528       begin
529         if curRTL then a[curIndex].bidiClass := ubcRightToLeft
530         else a[curIndex].bidiClass := ubcLeftToRight;
531       end;
532 
533       curIndex := a[curIndex].nextInIsolate;
534     end;
535   end;
536 
537   procedure ResolveBrackets(startIndex, afterEndIndex: integer; startOfSequence, {%H-}endOfSequence: TUnicodeBidiClass);
538   type TBracketPair = record
539                   openIndex,closeIndex: integer;
540                 end;
541   var
542     bracketPairs: array of TBracketPair;
543     bracketPairCount: integer;
544     rightToLeft: boolean;
545 
546     procedure SortBracketPairs;
547     var
548       i,j,k: Integer;
549       temp: TBracketPair;
550     begin
551       for i := 1 to bracketPairCount-1 do
552       begin
553         for j := 0 to i-1 do
554           if bracketPairs[j].openIndex > bracketPairs[i].openIndex then
555           begin
556             temp := bracketPairs[i];
557             for k := i downto j+1 do
558               bracketPairs[k] := bracketPairs[k-1];
559             bracketPairs[j] := temp;
560           end;
561       end;
562     end;
563 
564     procedure FindBrackets; // rule BD16
565     const MAX_BRACKET_STACK = 63;
566     var
567       bracketStack: array[0..MAX_BRACKET_STACK-1] of record
568           bracketCharInfo: TUnicodeBracketInfo;
569           index: integer;
570         end;
571       bracketStackPos,peekPos: integer;
572       curIndex: integer;
573       curBracket: TUnicodeBracketInfo;
574     begin
575       bracketPairCount := 0;
576       bracketStackPos := 0;
577       bracketStack[0].index := -1; //avoid warning
578       curIndex := startIndex;
579       while curIndex <> afterEndIndex do
580       begin
581         if a[curIndex].bidiClass = ubcOtherNeutrals then
582         begin
583           curBracket := GetUnicodeBracketInfo(u[curIndex]);
584           if curBracket.IsBracket then
585           begin
586             // found opening bracket
587             if curBracket.OpeningBracket = u[curIndex] then
588             begin
589               if bracketStackPos <= high(bracketStack) then
590               begin
591                 bracketStack[bracketStackPos].bracketCharInfo := curBracket;
592                 bracketStack[bracketStackPos].index := curIndex;
593                 inc(bracketStackPos);
594               end else
595                 break;
596             end else
597             begin
598               for peekPos := bracketStackPos-1 downto 0 do
599                 if (bracketStack[peekPos].bracketCharInfo.ClosingBracket = u[curIndex]) or
600                   ((bracketStack[peekPos].bracketCharInfo.ClosingBracket = UNICODE_RIGHT_ANGLE_BRACKET) and (u[curIndex] = UNICODE_RIGHT_POINTING_ANGLE_BRACKET)) or
601                   ((bracketStack[peekPos].bracketCharInfo.ClosingBracket = UNICODE_RIGHT_POINTING_ANGLE_BRACKET) and (u[curIndex] = UNICODE_RIGHT_ANGLE_BRACKET)) then
602                 begin
603                   bracketStackPos := peekPos;
604                   if bracketPairCount >= length(bracketPairs) then
605                     setlength(bracketPairs, bracketPairCount*2 + 8);
606                   bracketPairs[bracketPairCount].openIndex := bracketStack[peekPos].index;
607                   bracketPairs[bracketPairCount].closeIndex := curIndex;
608                   inc(bracketPairCount);
609                   break;
610                 end;
611             end;
612           end;
613         end;
614         curIndex := a[curIndex].nextInIsolate;
615       end;
616     end;
617 
618     procedure SetCharClass(index: integer; newClass: TUnicodeBidiClass);
619     begin
620       a[index].bidiClass:= newClass;
621       index := a[index].nextInIsolate;
622       while (index <> afterEndIndex) and (GetUnicodeBidiClass(u[index]) = ubcNonSpacingMark) do
623       begin
624         a[index].bidiClass := newClass;
625         index := a[index].nextInIsolate;
626       end;
627     end;
628 
629     procedure ResolveBrackets; // rule N0
630     var
631       i, curIndex: Integer;
632       sameDirection, oppositeDirection, oppositeContext: boolean;
633     begin
634       for i := 0 to bracketPairCount-1 do
635       begin
636         curIndex := bracketPairs[i].openIndex+1;
637         sameDirection:= false;
638         oppositeDirection:= false;
639         while curIndex <> bracketPairs[i].closeIndex do
640         begin
641           Assert((curIndex >= startIndex) and (curIndex < length(a)), 'Expecting valid index');
642           case a[curIndex].bidiClass of
643           ubcLeftToRight:
644             if not rightToLeft then
645             begin
646               sameDirection := true;
647               break;
648             end else oppositeDirection:= true;
649           ubcRightToLeft,ubcArabicLetter,ubcEuropeanNumber,ubcArabicNumber:
650             if rightToLeft then
651             begin
652               sameDirection := true;
653               break;
654             end else oppositeDirection:= true;
655           end;
656           curIndex := a[curIndex].nextInIsolate;
657         end;
658         if sameDirection then
659         begin
660           if rightToLeft then
661           begin
662             SetCharClass(bracketPairs[i].openIndex, ubcRightToLeft);
663             SetCharClass(bracketPairs[i].closeIndex, ubcRightToLeft);
664           end else
665           begin
666             SetCharClass(bracketPairs[i].openIndex, ubcLeftToRight);
667             SetCharClass(bracketPairs[i].closeIndex, ubcLeftToRight);
668           end;
669         end else
670         if oppositeDirection then
671         begin
672           curIndex := a[bracketPairs[i].openIndex].prevInIsolate;
673           oppositeContext := false;
674           while curIndex >= startIndex do
675           begin
676             case a[curIndex].bidiClass of
677             ubcRightToLeft,ubcArabicLetter,ubcEuropeanNumber,ubcArabicNumber:
678               begin
679                 oppositeContext := not rightToLeft;
680                 break;
681               end;
682             ubcLeftToRight:
683               begin
684                 oppositeContext := rightToLeft;
685                 break;
686               end;
687             end;
688             curIndex := a[curIndex].prevInIsolate;
689           end;
690           if rightToLeft xor oppositeContext then
691           begin
692             SetCharClass(bracketPairs[i].openIndex, ubcRightToLeft);
693             SetCharClass(bracketPairs[i].closeIndex, ubcRightToLeft);
694           end else
695           begin
696             SetCharClass(bracketPairs[i].openIndex, ubcLeftToRight);
697             SetCharClass(bracketPairs[i].closeIndex, ubcLeftToRight);
698           end;
699         end;
700       end;
701     end;
702 
703   begin
704     rightToLeft:= startOfSequence in[ubcRightToLeft,ubcArabicLetter];
705     FindBrackets;
706     SortBracketPairs;
707     ResolveBrackets;
708   end;
709 
710   procedure ResolveLigature(startIndex: integer);
711   var
712     prevJoiningType, joiningType: TUnicodeJoiningType;
713     prevJoiningTypeBidilevel: byte;
714     prevJoiningTypeIndex: integer;
715     curIndex: Integer;
716   begin
717     prevJoiningType := ujtNonJoining;
718     prevJoiningTypeIndex := -1;
719     prevJoiningTypeBidilevel:= 0;
720     curIndex := startIndex;
721     while curIndex <> -1 do
722     begin
723       if prevJoiningTypeBidilevel <> result[curIndex].BidiLevel then
724         prevJoiningType := ujtNonJoining;
725       if result[curIndex].IsNonSpacingMark then
726         joiningType := ujtTransparent //NSM are always joining-transparent
727         else joiningType := GetUnicodeJoiningType(u[curIndex]);
728       if joiningType = ujtTransparent then
729         result[curIndex].Flags:= result[curIndex].Flags or BIDI_FLAG_LIGATURE_TRANSPARENT;
730       if result[curIndex].IsRightToLeft then
731       begin
732         if (joiningType in[ujtRightJoining,ujtDualJoining])
733           and (prevJoiningType in[ujtLeftJoining,ujtDualJoining,ujtJoinCausing]) then
734           result[curIndex].Flags:= result[curIndex].Flags or BIDI_FLAG_LIGATURE_RIGHT;
735         if (prevJoiningType in[ujtLeftJoining,ujtDualJoining]) and (prevJoiningTypeIndex <> -1) and
736           (joiningType in[ujtRightJoining,ujtDualJoining,ujtJoinCausing]) then
737           result[prevJoiningTypeIndex].Flags:= result[prevJoiningTypeIndex].Flags or BIDI_FLAG_LIGATURE_LEFT;
738       end else
739       begin
740         if (joiningType in[ujtLeftJoining,ujtDualJoining])
741           and (prevJoiningType in[ujtRightJoining,ujtDualJoining,ujtJoinCausing]) then
742           result[curIndex].Flags:= result[curIndex].Flags or BIDI_FLAG_LIGATURE_LEFT;
743         if (prevJoiningType in[ujtRightJoining,ujtDualJoining]) and (prevJoiningTypeIndex <> -1) and
744           (joiningType in[ujtLeftJoining,ujtDualJoining,ujtJoinCausing]) then
745           result[prevJoiningTypeIndex].Flags:= result[prevJoiningTypeIndex].Flags or BIDI_FLAG_LIGATURE_RIGHT;
746       end;
747       if joiningType <> ujtTransparent then
748       begin
749         prevJoiningType := joiningType;
750         prevJoiningTypeIndex:= curIndex;
751         prevJoiningTypeBidilevel:= result[curIndex].BidiLevel;
752       end;
753       curIndex := a[curIndex].nextInIsolate;
754     end;
755   end;
756 
757   procedure AnalyzeSequence(startIndex, afterEndIndex: integer; sos, eos: TUnicodeBidiClass);
758   begin
759     if afterEndIndex = startIndex then exit;
760     ResolveWeakTypes(startIndex, afterEndIndex, sos, eos);
761     ResolveBrackets(startIndex, afterEndIndex, sos, eos);
762     ResolveNeutrals(startIndex, afterEndIndex, sos, eos);
763   end;
764 
765   procedure SameLevelRuns(startIndex: integer);
766   var
767     curBidiLevel: byte;
768     latestIndex,curIndex, curStartIndex: Integer;
769     curSos,eos: TUnicodeBidiClass;
770   begin
771     curIndex := startIndex;
772     while (curIndex<>-1) and result[curIndex].IsRemoved do
773       curIndex := a[curIndex].nextInIsolate;
774     if curIndex = -1 then exit;
775 
776     curStartIndex:= curIndex;
777     curBidiLevel := result[curIndex].bidiLevel;
778     if odd(curBidiLevel) then curSos := ubcRightToLeft else curSos := ubcLeftToRight;
779     latestIndex := -1;
780     while curIndex <> -1 do
781     begin
782       if not result[curIndex].IsRemoved then
783       begin
784         if (latestIndex <> -1) and (result[curIndex].bidiLevel <> curBidiLevel) then
785         begin
786           if result[curIndex].bidiLevel > curBidiLevel then
787           begin
788             if odd(result[curIndex].bidiLevel) then eos := ubcRightToLeft else eos := ubcLeftToRight;
789           end else
790           begin
791             if odd(curBidiLevel) then eos := ubcRightToLeft else eos := ubcLeftToRight;
792           end;
793 
794           AnalyzeSequence(curStartIndex, a[latestIndex].nextInIsolate, curSos, eos);
795 
796           curSos := eos;
797           curBidiLevel:= result[curIndex].bidiLevel;
798           curStartIndex:= curIndex;
799         end;
800         latestIndex := curIndex;
801       end;
802 
803       if (a[curIndex].nextInIsolate = -1) and (latestIndex<>-1) then
804       begin
805         if odd(result[latestIndex].bidiLevel) then eos := ubcRightToLeft else eos := ubcLeftToRight;
806         AnalyzeSequence(curStartIndex, a[latestIndex].nextInIsolate, curSos, eos);
807         break;
808       end;
809 
810       curIndex := a[curIndex].nextInIsolate;
811     end;
812   end;
813 
814   //analyse bidi formatting of an embedding or an override block
815   procedure AnalyzeFormattingBlocks(startIndex, lastIndex: integer; minBidiLevel: byte; formattingCode: LongWord);
816   var curIndex, nextIndex, levelIncrease: integer;
817     subFormatBeforeStart, subFormatStart, formatNesting: integer;
818     subFormatCode: LongWord;
819   begin
820     case formattingCode of
821     UNICODE_LEFT_TO_RIGHT_OVERRIDE,UNICODE_LEFT_TO_RIGHT_EMBEDDING:
822       if odd(minBidiLevel) then inc(minBidiLevel);
823     UNICODE_RIGHT_TO_LEFT_OVERRIDE,UNICODE_RIGHT_TO_LEFT_EMBEDDING:
824       if not odd(minBidiLevel) then inc(minBidiLevel);
825     end;
826     nextIndex := startIndex;
827     repeat
828       Assert(nextIndex >= 0, 'Expecting valid index');
829       curIndex := nextIndex;
830       nextIndex := a[curIndex].nextInIsolate;
831       result[curIndex].bidiLevel := minBidiLevel;
832 
833       //apply override
834       if formattingCode = UNICODE_LEFT_TO_RIGHT_OVERRIDE then a[curIndex].bidiClass := ubcLeftToRight
835       else if formattingCode = UNICODE_RIGHT_TO_LEFT_OVERRIDE then a[curIndex].bidiClass := ubcRightToLeft;
836 
837       case u[curIndex] of
838       UNICODE_LEFT_TO_RIGHT_EMBEDDING, UNICODE_RIGHT_TO_LEFT_EMBEDDING,
839       UNICODE_LEFT_TO_RIGHT_OVERRIDE, UNICODE_RIGHT_TO_LEFT_OVERRIDE:
840         begin
841           result[curIndex].Flags := result[curIndex].Flags OR BIDI_FLAG_REMOVED;
842           case u[curIndex] of
843             UNICODE_LEFT_TO_RIGHT_OVERRIDE,UNICODE_LEFT_TO_RIGHT_EMBEDDING:
844               if odd(minBidiLevel) then levelIncrease := 1
845               else levelIncrease := 2;
846             UNICODE_RIGHT_TO_LEFT_OVERRIDE,UNICODE_RIGHT_TO_LEFT_EMBEDDING:
847               if odd(minBidiLevel) then levelIncrease := 2
848               else levelIncrease := 1;
849           else levelIncrease:= 2;
850           end;
851           if minBidiLevel <= UNICODE_MAX_BIDI_DEPTH-levelIncrease-1 then
852           begin
853             subFormatCode:= u[curIndex];
854             subFormatBeforeStart := curIndex;
855             subFormatStart := nextIndex;
856             formatNesting:= 1;
857             while formatNesting > 0 do
858             begin
859               //sub-format ends because no more chars
860               if curIndex = lastIndex then
861               begin
862                 if curIndex <> subFormatBeforeStart then
863                   AnalyzeFormattingBlocks(subFormatStart, curIndex, minBidiLevel+levelIncrease, subFormatCode);
864                 break;
865               end;
866 
867               Assert(nextIndex >= 0, 'Expecting valid index');
868               case u[nextIndex] of
869               UNICODE_LEFT_TO_RIGHT_EMBEDDING, UNICODE_RIGHT_TO_LEFT_EMBEDDING,
870               UNICODE_LEFT_TO_RIGHT_OVERRIDE, UNICODE_RIGHT_TO_LEFT_OVERRIDE: inc(formatNesting);
871               UNICODE_POP_DIRECTIONAL_FORMATTING:
872                 begin
873                   dec(formatNesting);
874                   if formatNesting = 0 then
875                   begin
876                     //sub-format ends because enough matching pop chars found
877                     if curIndex <> subFormatBeforeStart then
878                       AnalyzeFormattingBlocks(subFormatStart, curIndex, minBidiLevel+levelIncrease, subFormatCode);
879 
880                     curIndex := nextIndex;
881                     nextIndex := a[curIndex].nextInIsolate;
882                     result[curIndex].Flags := result[curIndex].Flags OR BIDI_FLAG_REMOVED;
883                     break;
884                   end;
885                 end;
886               end;
887 
888               curIndex := nextIndex;
889               nextIndex := a[curIndex].nextInIsolate;
890             end;
891           end;
892         end;
893       UNICODE_POP_DIRECTIONAL_FORMATTING: //ignored when no matching formatting code
894         begin
895           result[curIndex].Flags := result[curIndex].Flags OR BIDI_FLAG_REMOVED;
896         end;
897       end;
898     until curIndex = lastIndex;
899   end;
900 
901   procedure ResolveImplicitLevels(startIndex: integer); // rule I1 and I2
902   var
903     curIndex: Integer;
904   begin
905     curIndex := startIndex;
906     while curIndex <> -1 do
907     begin
908       case a[curIndex].bidiClass of
909       ubcRightToLeft,ubcArabicLetter:
910         if not Odd(result[curIndex].bidiLevel) then inc(result[curIndex].bidiLevel);
911       ubcEuropeanNumber,ubcArabicNumber:
912         if Odd(result[curIndex].bidiLevel) then inc(result[curIndex].bidiLevel)
913         else inc(result[curIndex].bidiLevel, 2);
914       ubcLeftToRight: if Odd(result[curIndex].bidiLevel) then inc(result[curIndex].bidiLevel);
915       end;
916       curIndex := a[curIndex].nextInIsolate;
917     end;
918   end;
919 
920   procedure ResetEndOfParagraphLevels(startIndex: integer);  // rule L1
921   var
922     prevIndex,curIndex: Integer;
923 
924     procedure TweakWhiteSpaceBefore(index: integer);
925     var
926       isWhiteSpaceOrIsolate: boolean;
927     begin
928       while index <> -1 do
929       begin
930         case u[index] of
931         UNICODE_FIRST_STRONG_ISOLATE, UNICODE_POP_DIRECTIONAL_ISOLATE,
932         UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE:
933           isWhiteSpaceOrIsolate:= true;
934         else
935           isWhiteSpaceOrIsolate:= GetUnicodeBidiClass(u[index]) = ubcWhiteSpace;
936         end;
937         if isWhiteSpaceOrIsolate then
938           result[index].bidiLevel := result[index].ParagraphBidiLevel
939         else
940           break;
941         index := a[index].prevInIsolate;
942       end;
943     end;
944 
945   begin
946     prevIndex := -1;
947     curIndex := startIndex;
948     while curIndex <> -1 do
949     begin
950       case GetUnicodeBidiClass(u[curIndex]) of
951         ubcSegmentSeparator, ubcParagraphSeparator:
952         begin
953           result[curIndex].bidiLevel := result[curIndex].ParagraphBidiLevel;
954           TweakWhiteSpaceBefore(prevIndex);
955         end;
956       end;
957       prevIndex := curIndex;
958       curIndex := a[curIndex].nextInIsolate;
959     end;
960     TweakWhiteSpaceBefore(prevIndex);
961   end;
962 
963   function DetermineIsolateDirectionFromFirstStrongClass(startIndex: integer): LongWord;
964   var
965     curIndex: Integer;
966   begin
967     curIndex := startIndex;
968     while curIndex <> -1 do
969     begin
970       Assert(curIndex >= 0, 'Expecting valid index');
971       case a[curIndex].bidiClass of
972         ubcLeftToRight: exit(UNICODE_LEFT_TO_RIGHT_ISOLATE);
973         ubcRightToLeft,ubcArabicLetter: exit(UNICODE_RIGHT_TO_LEFT_ISOLATE);
974       end;
975       case u[curIndex] of
976         UNICODE_LEFT_TO_RIGHT_OVERRIDE: exit(UNICODE_LEFT_TO_RIGHT_ISOLATE);
977         UNICODE_RIGHT_TO_LEFT_OVERRIDE: exit(UNICODE_RIGHT_TO_LEFT_ISOLATE);
978       end;
979       curIndex := a[curIndex].nextInIsolate;
980     end;
981     result := UNICODE_LEFT_TO_RIGHT_ISOLATE;
982   end;
983 
984   procedure LinkCharsInIsolate(startIndex: integer; charCount: integer; out endIndex : integer);
985   var
986     curIndex,isolateStackPos,
987     prevIndex: Integer;
988   begin
989     a[startIndex].prevInIsolate := -1;
990     prevIndex := -1;
991     curIndex := startIndex;
992     isolateStackPos:= 0;
993     while curIndex < startIndex+charCount do
994     begin
995       if u[curIndex] = UNICODE_POP_DIRECTIONAL_ISOLATE then
996         if isolateStackPos > 0 then dec(isolateStackPos);
997 
998       if isolateStackPos = 0 then
999       begin
1000         if prevIndex<>-1 then a[prevIndex].nextInIsolate := curIndex;
1001         a[curIndex].prevInIsolate := prevIndex;
1002 
1003         prevIndex := curIndex;
1004       end;
1005 
1006       case u[curIndex] of
1007       UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE, UNICODE_FIRST_STRONG_ISOLATE: inc(isolateStackPos);
1008       end;
1009       inc(curIndex);
1010     end;
1011     a[prevIndex].nextInIsolate := -1;
1012     endIndex := prevIndex;
1013   end;
1014 
1015   //split isolates in order to format them independently
1016   procedure AnalyzeIsolates(startIndex: integer; charCount: integer; isolateDirection: LongWord; minBidiLevel: byte = 0;
1017                             isParagraph: boolean = false);
1018   var curIndex, endIndex: integer;
1019     nextIndex: integer;
1020     subBidiLevel, levelIncrease: byte;
1021     subIsolateStart: integer;
1022     subIsolateDirection: LongWord;
1023   begin
1024     if charCount = 0 then exit;
1025     Assert(startIndex>=0, 'Invalid start index');
1026 
1027     LinkCharsInIsolate(startIndex, charCount, endIndex);
1028 
1029     if isolateDirection = UNICODE_FIRST_STRONG_ISOLATE then
1030       isolateDirection := DetermineIsolateDirectionFromFirstStrongClass(startIndex);
1031 
1032     case isolateDirection of
1033     UNICODE_LEFT_TO_RIGHT_ISOLATE: if Odd(minBidiLevel) then inc(minBidiLevel);
1034     UNICODE_RIGHT_TO_LEFT_ISOLATE: if not Odd(minBidiLevel) then inc(minBidiLevel);
1035     else
1036       raise EInvalidOperation.Create('Unknown isolate direction');
1037     end;
1038 
1039     if isParagraph then
1040     begin
1041       curIndex := startIndex;
1042       while curIndex <> -1 do
1043       begin
1044         result[curIndex].ParagraphBidiLevel := minBidiLevel;
1045         curIndex := a[curIndex].nextInIsolate;
1046       end;
1047     end;
1048 
1049     case isolateDirection of
1050     UNICODE_LEFT_TO_RIGHT_ISOLATE: AnalyzeFormattingBlocks(startIndex, endIndex, minBidiLevel, UNICODE_LEFT_TO_RIGHT_EMBEDDING);
1051     UNICODE_RIGHT_TO_LEFT_ISOLATE: AnalyzeFormattingBlocks(startIndex, endIndex, minBidiLevel, UNICODE_RIGHT_TO_LEFT_EMBEDDING);
1052     end;
1053 
1054     SameLevelRuns(startIndex);
1055     ResolveImplicitLevels(startIndex);
1056     ResolveLigature(startIndex);
1057 
1058     if isParagraph then
1059       ResetEndOfParagraphLevels(startIndex);
1060 
1061     //analyse sub-isolates
1062     curIndex := startIndex;
1063     while curIndex <> -1 do
1064     begin
1065       Assert(curIndex >= 0, 'Expecting valid index');
1066       case u[curIndex] of
1067       UNICODE_LEFT_TO_RIGHT_ISOLATE, UNICODE_RIGHT_TO_LEFT_ISOLATE, UNICODE_FIRST_STRONG_ISOLATE:
1068         begin
1069           subBidiLevel := result[curIndex].bidiLevel;
1070           nextIndex := a[curIndex].nextInIsolate;
1071           if nextIndex <> -1 then
1072           begin
1073             if result[nextIndex].bidiLevel > subBidiLevel then
1074               subBidiLevel:= result[nextIndex].bidiLevel;
1075           end;
1076           if ((isolateDirection = UNICODE_LEFT_TO_RIGHT_ISOLATE) and
1077              (u[curIndex] = UNICODE_RIGHT_TO_LEFT_ISOLATE)) or
1078              ((isolateDirection = UNICODE_LEFT_TO_RIGHT_ISOLATE) and
1079              (u[curIndex] = UNICODE_RIGHT_TO_LEFT_ISOLATE)) then
1080             levelIncrease := 1
1081           else
1082             levelIncrease:= 2;
1083           if subBidiLevel+levelIncrease <= UNICODE_MAX_BIDI_DEPTH-1 then
1084           begin
1085             subIsolateDirection := u[curIndex];
1086             subIsolateStart:= curIndex+1;
1087             curIndex := nextIndex;
1088 
1089             //sub-isolates ends because no more chars
1090             if curIndex = -1 then
1091             begin
1092               AnalyzeIsolates(subIsolateStart, startIndex+charCount-subIsolateStart, subIsolateDirection, subBidiLevel+1);
1093               break;
1094             end else
1095             begin
1096               AnalyzeIsolates(subIsolateStart, curIndex-subIsolateStart, subIsolateDirection, subBidiLevel+1);
1097               continue;
1098             end;
1099           end;
1100         end;
1101       end;
1102       curIndex := a[curIndex].nextInIsolate;
1103     end;
1104   end;
1105 
1106   //split UTF8 string into paragraphs
1107   procedure SplitParagraphs;
1108   var
1109     lineStartIndex, curIndex: integer;
1110   begin
1111     curIndex := 0;
1112     lineStartIndex := curIndex;
1113     while curIndex < ALength do
1114     begin
1115       if a[curIndex].bidiClass = ubcParagraphSeparator then
1116       begin
1117         //skip second CRLF char
1118         if IsUnicodeCrLf(u[curIndex]) and (curIndex+1 < ALength) and
1119            IsUnicodeCrLf(u[curIndex+1]) and (u[curIndex+1] <> u[curIndex]) then
1120         begin
1121           inc(curIndex);
1122           result[curIndex].Flags := result[curIndex].Flags and not BIDI_FLAG_MULTICHAR_START;
1123         end;
1124 
1125         result[curIndex].Flags := result[curIndex].Flags or BIDI_FLAG_EXPLICIT_END_OF_PARAGRAPH;
1126 
1127         AnalyzeIsolates(lineStartIndex, curIndex+1-lineStartIndex, baseDirection, 0, true);
1128         lineStartIndex := curIndex+1;
1129       end;
1130       inc(curIndex);
1131     end;
1132     if curIndex > lineStartIndex then
1133     begin
1134       result[curIndex-1].Flags := result[curIndex-1].Flags or BIDI_FLAG_IMPLICIT_END_OF_PARAGRAPH;
1135       AnalyzeIsolates(lineStartIndex, curIndex-lineStartIndex, baseDirection, 0, true);
1136     end;
1137   end;
1138 
1139 var i: integer;
1140   classEx: TUnicodeBidiClass;
1141 begin
1142   setlength(a, ALength);
1143   setlength(result, ALength);
1144   if ALength > 0 then
1145   begin
1146     for i := 0 to high(a) do
1147     begin
1148       classEx := GetUnicodeBidiClassEx(u[i]);
1149       case classEx of
1150       ubcMirroredNeutral:
1151         begin
1152           result[i].Flags := result[i].Flags or BIDI_FLAG_MIRRORED;
1153           a[i].bidiClass := ubcOtherNeutrals;
1154         end;
1155       ubcCombiningLeftToRight:
1156         begin
1157           case GetUnicodeCombiningClass(u[i]) of
1158           208, 224: result[i].Flags := result[i].Flags OR BIDI_FLAG_COMBINING_LEFT;
1159           210, 226, 9: result[i].Flags := result[i].Flags OR BIDI_FLAG_COMBINING_RIGHT;
1160           0: result[i].Flags := result[i].Flags OR BIDI_FLAG_COMBINING_LEFT OR BIDI_FLAG_COMBINING_RIGHT;
1161           end;
1162           a[i].bidiClass := ubcLeftToRight;
1163         end;
1164       otherwise
1165         a[i].bidiClass := classEx;
1166       end;
1167       case u[i] of
1168       UNICODE_LINE_SEPARATOR: //line separator within paragraph
1169         result[i].Flags := result[i].Flags or BIDI_FLAG_END_OF_LINE;
1170       UNICODE_ZERO_WIDTH_JOINER, UNICODE_ZERO_WIDTH_NON_JOINER:
1171         result[i].Flags := result[i].Flags OR BIDI_FLAG_LIGATURE_BOUNDARY;
1172       end;
1173       case a[i].bidiClass of
1174       ubcArabicLetter,ubcArabicNumber,ubcRightToLeft:
1175         result[i].Flags := result[i].Flags OR BIDI_FLAG_RTL_SCRIPT;
1176       ubcNonSpacingMark: result[i].Flags := result[i].Flags OR BIDI_FLAG_NON_SPACING_MARK;
1177       end;
1178       if (result[i].Flags and (BIDI_FLAG_NON_SPACING_MARK or BIDI_FLAG_COMBINING_LEFT
1179                                or BIDI_FLAG_COMBINING_RIGHT) = 0) or
1180         (i = 0) or (a[i-1].bidiClass in [ubcSegmentSeparator, ubcParagraphSeparator]) then
1181         result[i].Flags := result[i].Flags OR BIDI_FLAG_MULTICHAR_START;
1182     end;
1183     SplitParagraphs;
1184   end;
1185 end;
1186 
1187 function AnalyzeBidiUnicode(u: PLongWord; ALength: integer;
1188   ABidiMode: TFontBidiMode): TUnicodeBidiArray;
1189 begin
1190   case ABidiMode of
1191     fbmLeftToRight: result := AnalyzeBidiUnicode(u, ALength, UNICODE_LEFT_TO_RIGHT_ISOLATE);
1192     fbmRightToLeft: result := AnalyzeBidiUnicode(u, ALength, UNICODE_RIGHT_TO_LEFT_ISOLATE);
1193   else
1194     {fbmAuto} result := AnalyzeBidiUnicode(u, ALength, UNICODE_FIRST_STRONG_ISOLATE);
1195   end;
1196 end;
1197 
1198 function GetUnicodeDisplayOrder(const AInfo: TUnicodeBidiArray): TUnicodeDisplayOrder;
1199 begin
1200   if length(AInfo)=0 then
1201     result := nil
1202   else
1203     result := GetUnicodeDisplayOrder(@AInfo[0], sizeof(TUnicodeBidiInfo), length(AInfo));
1204 end;
1205 
1206 function GetUnicodeDisplayOrder(ALevels: PByte; ACount: integer): TUnicodeDisplayOrder;
1207 
1208   procedure DetermineDisplayOrderRec(AOffset: integer; AStartIndex, ABlockCount: integer; AEmbeddingLevel: byte);
1209   var minLevel: byte;
1210     blockIndex,subStartIndex,subCount, subOffset: integer;
1211   begin
1212     //writeln('DetermineDisplayOrderRec('+inttostr(AOffset)+'/'+inttostr(ACount)+',' + inttostr(AStartIndex) +',*' +inttostr(ABlockCount)+','+inttostr(AEmbeddingLevel)+')');
1213     blockIndex := 0;
1214     subStartIndex := 0; //avoid warning
1215     while blockIndex < ABlockCount do
1216     begin
1217       Assert(AOffset < ACount, 'Offset out of bounds');
1218       if ALevels[AOffset] = AEmbeddingLevel then
1219       begin
1220         if odd(AEmbeddingLevel) then
1221           result[AStartIndex+ABlockCount-1-blockIndex] := AOffset
1222         else
1223           result[AStartIndex+blockIndex] := AOffset;
1224         inc(AOffset);
1225         inc(blockIndex);
1226       end else
1227       begin
1228         if not odd(AEmbeddingLevel) then
1229           subStartIndex := AStartIndex+blockIndex;
1230         subOffset := AOffset;
1231         minLevel := ALevels[AOffset];
1232         inc(AOffset);
1233         inc(blockIndex);
1234         subCount := 1;
1235         while true do
1236         begin
1237           if (blockIndex < ABlockCount) and (ALevels[AOffset] > AEmbeddingLevel) then
1238           begin
1239             Assert(AOffset < ACount, 'Offset out of bounds');
1240             if ALevels[AOffset] < minLevel then
1241               minLevel:= ALevels[AOffset];
1242             inc(AOffset);
1243             inc(blockIndex);
1244             inc(subCount);
1245           end else
1246           begin
1247             if odd(AEmbeddingLevel) then
1248               subStartIndex := AStartIndex+ABlockCount-1-(blockIndex-1);
1249             DetermineDisplayOrderRec(subOffset, subStartIndex, subCount, minLevel);
1250             break;
1251           end;
1252         end;
1253       end;
1254     end;
1255   end;
1256 
1257 begin
1258   setlength(result, ACount);
1259   DetermineDisplayOrderRec(0, 0, ACount, 0);
1260 end;
1261 
1262 function GetUnicodeDisplayOrder(ABidiInfo: PUnicodeBidiInfo; AStride, ACount: integer): TUnicodeDisplayOrder;
1263 var
1264   levels: packed array of byte;
1265   originalIndices: array of integer;
1266   index,len, i: integer;
1267   p: PByte;
1268 begin
1269   len := 0;
1270   p := PByte(ABidiInfo);
1271   for i := 0 to ACount-1 do
1272   begin
1273     if not PUnicodeBidiInfo(p)^.IsRemoved then inc(len);
1274     inc(p, AStride);
1275   end;
1276   if len = 0 then
1277     result := nil
1278   else
1279   begin
1280     setlength(levels, len);
1281     setlength(originalIndices, len);
1282     p := PByte(ABidiInfo);
1283     index := 0;
1284     for i := 0 to ACount-1 do
1285     begin
1286       if not PUnicodeBidiInfo(p)^.IsRemoved then
1287       begin
1288         levels[index] := PUnicodeBidiInfo(p)^.BidiLevel;
1289         originalIndices[index] := i;
1290         inc(index);
1291       end;
1292       inc(p, AStride);
1293     end;
1294     result := GetUnicodeDisplayOrder(@levels[0], len);
1295     for i := 0 to len-1 do
1296       result[i] := originalIndices[result[i]];
1297   end;
1298 end;
1299 
1300 end.
1301 
1302