1 {   Unicode parser helper unit.
2 
3     Copyright (c) 2012-2015 by Inoussa OUEDRAOGO
4 
5     The source code is distributed under the Library GNU
6     General Public License with the following modification:
7 
8         - object files and libraries linked into an application may be
9           distributed without source code.
10 
11     If you didn't receive a copy of the file COPYING, contact:
12           Free Software Foundation
13           675 Mass Ave
14           Cambridge, MA  02139
15           USA
16 
17     This program is distributed in the hope that it will be useful,
18     but WITHOUT ANY WARRANTY; without even the implied warranty of
19     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }
20 unit helper;
21 
22 {$mode delphi}
23 {$H+}
24 {$PACKENUM 1}
25 {$pointermath on}
26 {$typedaddress on}
27 {$warn 4056 off}  //Conversion between ordinals and pointers is not portable
28 
29 {$macro on}
30 {$ifdef FPC_REQUIRES_PROPER_ALIGNMENT}
31   {$define X_PACKED:=}
32 {$else FPC_REQUIRES_PROPER_ALIGNMENT}
33   {$define X_PACKED:=packed}
34 {$endif FPC_REQUIRES_PROPER_ALIGNMENT}
35 
36 interface
37 
38 uses
39   Classes, SysUtils, StrUtils;
40 
41 const
42   SLicenseText =
43     '    {   Unicode implementation tables. ' + sLineBreak +
44     ' ' + sLineBreak +
45     '        Copyright (c) 2013 - 2017 by Inoussa OUEDRAOGO ' + sLineBreak +
46     ' ' + sLineBreak +
47     '        Permission is hereby granted, free of charge, to any person ' + sLineBreak +
48     '        obtaining a copy of the Unicode data files and any associated ' + sLineBreak +
49     '        documentation (the "Data Files") or Unicode software and any ' + sLineBreak +
50     '        associated documentation (the "Software") to deal in the Data ' + sLineBreak +
51     '        Files or Software without restriction, including without ' + sLineBreak +
52     '        limitation the rights to use, copy, modify, merge, publish, ' + sLineBreak +
53     '        distribute, and/or sell copies of the Data Files or Software, ' + sLineBreak +
54     '        and to permit persons to whom the Data Files or Software are ' + sLineBreak +
55     '        furnished to do so, provided that (a) the above copyright ' + sLineBreak +
56     '        notice(s) and this permission notice appear with all copies ' + sLineBreak +
57     '        of the Data Files or Software, (b) both the above copyright ' + sLineBreak +
58     '        notice(s) and this permission notice appear in associated ' + sLineBreak +
59     '        documentation, and (c) there is clear notice in each modified ' + sLineBreak +
60     '        Data File or in the Software as well as in the documentation ' + sLineBreak +
61     '        associated with the Data File(s) or Software that the data or ' + sLineBreak +
62     '        software has been modified. ' + sLineBreak +
63     ' ' + sLineBreak +
64     ' ' + sLineBreak +
65     '        This program is distributed in the hope that it will be useful, ' + sLineBreak +
66     '        but WITHOUT ANY WARRANTY; without even the implied warranty of ' + sLineBreak +
67     '        MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }';
68 
69   WEIGHT_LEVEL_COUNT = 3;
70 
71 type
72   // Unicode General Category
73   TUnicodeCategory = (
74     ucUppercaseLetter,             // Lu = Letter, uppercase
75     ucLowercaseLetter,             //  Ll = Letter, lowercase
76     ucTitlecaseLetter,             //  Lt = Letter, titlecase
77     ucModifierLetter,              //  Lm = Letter, modifier
78     ucOtherLetter,                 //  Lo = Letter, other
79 
80     ucNonSpacingMark,              //  Mn = Mark, nonspacing
81     ucCombiningMark,               //  Mc = Mark, spacing combining
82     ucEnclosingMark,               //  Me = Mark, enclosing
83 
84     ucDecimalNumber,               //  Nd = Number, decimal digit
85     ucLetterNumber,                //  Nl = Number, letter
86     ucOtherNumber,                 //  No = Number, other
87 
88     ucConnectPunctuation,          //  Pc = Punctuation, connector
89     ucDashPunctuation,             //  Pd = Punctuation, dash
90     ucOpenPunctuation,             //  Ps = Punctuation, open
91     ucClosePunctuation,            //  Pe = Punctuation, close
92     ucInitialPunctuation,          //  Pi = Punctuation, initial quote (may behave like Ps or Pe depending on usage)
93     ucFinalPunctuation,            //  Pf = Punctuation, final quote (may behave like Ps or Pe depending on usage)
94     ucOtherPunctuation,            //  Po = Punctuation, other
95 
96     ucMathSymbol,                  //  Sm = Symbol, math
97     ucCurrencySymbol,              //  Sc = Symbol, currency
98     ucModifierSymbol,              //  Sk = Symbol, modifier
99     ucOtherSymbol,                 //  So = Symbol, other
100 
101     ucSpaceSeparator,              //  Zs = Separator, space
102     ucLineSeparator,               //  Zl = Separator, line
103     ucParagraphSeparator,          //  Zp = Separator, paragraph
104 
105     ucControl,                     //  Cc = Other, control
106     ucFormat,                      //  Cf = Other, format
107     ucSurrogate,                   //  Cs = Other, surrogate
108     ucPrivateUse,                  //  Co = Other, private use
109     ucUnassigned                   //  Cn = Other, not assigned (including noncharacters)
110   );
111 
112 
113   TUInt24Rec = packed record
114   public
115   {$ifdef FPC_LITTLE_ENDIAN}
116     byte0, byte1, byte2 : Byte;
117   {$else FPC_LITTLE_ENDIAN}
118     byte2, byte1, byte0 : Byte;
119   {$endif FPC_LITTLE_ENDIAN}
120   public
121     class operator Implicit(a : TUInt24Rec) : Cardinal;{$ifdef USE_INLINE}inline;{$ENDIF}
122     class operator Implicit(a : TUInt24Rec) : LongInt;{$ifdef USE_INLINE}inline;{$ENDIF}
123     class operator Implicit(a : TUInt24Rec) : Word;{$ifdef USE_INLINE}inline;{$ENDIF}
124     class operator Implicit(a : TUInt24Rec) : Byte;{$ifdef USE_INLINE}inline;{$ENDIF}
125     class operator Implicit(a : Cardinal) : TUInt24Rec;{$ifdef USE_INLINE}inline;{$ENDIF}
126 
127     class operator Explicit(a : TUInt24Rec) : Cardinal;{$ifdef USE_INLINE}inline;{$ENDIF}
128 
129     class operator Equal(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
130 
131     class operator Equal(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
132     class operator Equal(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
133 
134     class operator Equal(a : TUInt24Rec; b : LongInt): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
135     class operator Equal(a : LongInt; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
136 
137     class operator Equal(a : TUInt24Rec; b : Word): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
138     class operator Equal(a : Word; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
139 
140     class operator Equal(a : TUInt24Rec; b : Byte): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
141     class operator Equal(a : Byte; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
142 
143     class operator NotEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
144     class operator NotEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
145     class operator NotEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
146     class operator GreaterThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
147     class operator GreaterThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
148     class operator GreaterThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
149     class operator GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
150     class operator GreaterThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
151     class operator GreaterThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
152     class operator LessThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
153     class operator LessThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
154     class operator LessThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
155     class operator LessThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
156     class operator LessThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
157     class operator LessThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
158   end;
159 
160   UInt24 = TUInt24Rec;
161   PUInt24 = ^UInt24;
162   TUnicodeCodePoint = Cardinal;
163   TUnicodeCodePointArray = array of TUnicodeCodePoint;
164   TDecompositionArray = array of TUnicodeCodePointArray;
165   TNumericValue = Double;
166   TNumericValueArray = array of TNumericValue;
167 
168   TBlockItemRec = packed record
169     RangeStart    : TUnicodeCodePoint;
170     RangeEnd      : TUnicodeCodePoint;
171     Name          : string[120];
172     CanonicalName : string[120];
173   end;
174   TBlocks = array of TBlockItemRec;
175 
176   PPropRec = ^TPropRec;
177 
178   { TPropRec }
179 
180   TPropRec = packed record
181   private
GetCategorynull182     function GetCategory : TUnicodeCategory;inline;
183     procedure SetCategory(AValue : TUnicodeCategory);
GetWhiteSpacenull184     function GetWhiteSpace : Boolean;inline;
185     procedure SetWhiteSpace(AValue : Boolean);
GetHangulSyllablenull186     function GetHangulSyllable : Boolean;inline;
187     procedure SetHangulSyllable(AValue : Boolean);
188   public
189     CategoryData    : Byte;
190 
191     PropID          : Word;
192     CCC             : Byte; // Canonical Combining Class
193     NumericIndex    : Byte;
194     SimpleUpperCase : UInt24;
195     SimpleLowerCase : UInt24;
196     DecompositionID : SmallInt;
197   public
198     property Category : TUnicodeCategory read GetCategory write SetCategory;
199     property WhiteSpace : Boolean read GetWhiteSpace write SetWhiteSpace;
200     property HangulSyllable : Boolean read GetHangulSyllable write SetHangulSyllable;
201   end;
202   TPropRecArray = array of TPropRec;
203 
204   TDecompositionIndexRec = packed record
205     StartPosition : Word;
206     Length        : Byte;
207   end;
208   TDecompositionBook = X_PACKED record
209     Index      : array of TDecompositionIndexRec;
210     CodePoints : array of TUnicodeCodePoint;
211   end;
212 
213   PDataLineRec = ^TDataLineRec;
214   TDataLineRec = record
215     PropID    : Integer;
216     case LineType : Byte of
217       0 : (CodePoint : TUnicodeCodePoint);
218       1 : (StartCodePoint, EndCodePoint : TUnicodeCodePoint);
219   end;
220   TDataLineRecArray = array of TDataLineRec;
221 
222   TCodePointRec = record
223     case LineType : Byte of
224       0 : (CodePoint : TUnicodeCodePoint);
225       1 : (StartCodePoint, EndCodePoint : TUnicodeCodePoint);
226   end;
227   TCodePointRecArray = array of TCodePointRec;
228 
229   TPropListLineRec = packed record
230     CodePoint : TCodePointRec;
231     PropName  : string[123];
232   end;
233   TPropListLineRecArray = array of TPropListLineRec;
234 
235   { TUCA_WeightRec }
236 
237   TUCA_WeightRec = packed record
238   public
239     Weights  : array[0..3] of Cardinal;
240     Variable : Boolean;
241   public
242     class operator Equal(a, b: TUCA_WeightRec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
243   end;
244   TUCA_WeightRecArray = array of TUCA_WeightRec;
245 
246   PUCA_LineContextItemRec = ^TUCA_LineContextItemRec;
247   TUCA_LineContextItemRec = X_PACKED record
248   public
249     CodePoints : TUnicodeCodePointArray;
250     Weights    : TUCA_WeightRecArray;
251   public
252     procedure Clear();
253     procedure Assign(ASource : PUCA_LineContextItemRec);
Clonenull254     function Clone() : TUCA_LineContextItemRec;
255   end;
256 
257   PUCA_LineContextRec = ^TUCA_LineContextRec;
258   TUCA_LineContextRec = X_PACKED record
259   public
260     Data : array of TUCA_LineContextItemRec;
261   public
262     procedure Clear();
263     procedure Assign(ASource : PUCA_LineContextRec);
Clonenull264     function Clone() : TUCA_LineContextRec;
265   end;
266 
267   PUCA_LineRec = ^TUCA_LineRec;
268   TUCA_LineRec = X_PACKED record
269   public
270     CodePoints : TUnicodeCodePointArray;
271     Weights    : TUCA_WeightRecArray;
272     Context    : TUCA_LineContextRec;
273     //Variable   : Boolean;
274     Deleted    : Boolean;
275     Stored     : Boolean;
276   public
277     procedure Clear();
278     procedure Assign(ASource : PUCA_LineRec);
Clonenull279     function Clone() : TUCA_LineRec;
HasContextnull280     function HasContext() : Boolean;
281   end;
282   TUCA_VariableKind = (
283     ucaShifted, ucaNonIgnorable, ucaBlanked, ucaShiftedTrimmed,
284     ucaIgnoreSP
285   );
286   TUCA_DataBook = X_PACKED record
287     Version        : string;
288     VariableWeight : TUCA_VariableKind;
289     Backwards      : array[0..3] of Boolean;
290     Lines          : array of TUCA_LineRec;
291   end;
292   PUCA_DataBook = ^TUCA_DataBook;
293   TUCA_DataBookIndex = array of Integer;
294 
295 type
296   TUCA_PropWeights = packed record
297     Weights  : array[0..2] of Word;
298     //Variable : Byte;
299   end;
300   PUCA_PropWeights = ^TUCA_PropWeights;
301 
302   TUCA_PropItemContextRec = packed record
303     CodePointCount : Byte;
304     WeightCount    : Byte;
305     //CodePoints     : UInt24;
306     //Weights        : TUCA_PropWeights;
307   end;
308   PUCA_PropItemContextRec = ^TUCA_PropItemContextRec;
309   TUCA_PropItemContextTreeNodeRec = packed record
310     Left    : Word;
311     Right   : Word;
312     Data    : TUCA_PropItemContextRec;
313   end;
314   PUCA_PropItemContextTreeNodeRec = ^TUCA_PropItemContextTreeNodeRec;
315 
316   TUCA_PropItemContextTreeRec = packed record
317   public
318     Size : UInt24;
319   public
GetDatanull320     function GetData:PUCA_PropItemContextTreeNodeRec;inline;
321     property Data : PUCA_PropItemContextTreeNodeRec read GetData;
322   end;
323   PUCA_PropItemContextTreeRec = ^TUCA_PropItemContextTreeRec;
324 
325   { TUCA_PropItemRec }
326 
327   TUCA_PropItemRec = packed record
328   private
329     const FLAG_VALID      = 0;
330     const FLAG_CODEPOINT  = 1;
331     const FLAG_CONTEXTUAL = 2;
332     const FLAG_DELETION   = 3;
333     const FLAG_COMPRESS_WEIGHT_1 = 6;
334     const FLAG_COMPRESS_WEIGHT_2 = 7;
335   private
GetWeightSizenull336     function GetWeightSize : Word;inline;
337   public
338     WeightLength : Byte;
339     ChildCount   : Byte;
340     Size         : Word;
341     Flags        : Byte;
342   public
HasCodePointnull343     function HasCodePoint() : Boolean;inline;
GetCodePointnull344     function GetCodePoint() : UInt24;//inline;
345     property CodePoint : UInt24 read GetCodePoint;
346     //Weights    : array[0..WeightLength] of TUCA_PropWeights;
347     procedure GetWeightArray(ADest : PUCA_PropWeights);
GetSelfOnlySizenull348     function GetSelfOnlySize() : Cardinal;inline;
349 
350     procedure SetContextual(AValue : Boolean);inline;
GetContextualnull351     function GetContextual() : Boolean;inline;
352     property Contextual : Boolean read GetContextual write setContextual;
GetContextnull353     function GetContext() : PUCA_PropItemContextTreeRec;
354     procedure SetDeleted(AValue : Boolean);inline;
IsDeletednull355     function IsDeleted() : Boolean;inline;
IsValidnull356     function IsValid() : Boolean;inline;
IsWeightCompress_1null357     function IsWeightCompress_1() : Boolean;inline;
IsWeightCompress_2null358     function IsWeightCompress_2() : Boolean;inline;
359   end;
360   PUCA_PropItemRec = ^TUCA_PropItemRec;
361   TUCA_PropIndexItem = packed record
362     CodePoint : Cardinal;
363     Position  : Integer;
364   end;
365   PUCA_PropIndexItem = ^TUCA_PropIndexItem;
366   TUCA_PropBook = X_PACKED record
367     ItemSize      : Integer;
368     Index         : array of TUCA_PropIndexItem;
369     Items         : PUCA_PropItemRec; //Native Endian
370     ItemsOtherEndian  : PUCA_PropItemRec;//Non Native Endian
371     VariableLowLimit  : Word;
372     VariableHighLimit : Word;
373   end;
374   PUCA_PropBook = ^TUCA_PropBook;
375 
376   TBmpFirstTable = array[0..255] of Byte;
377   TBmpSecondTableItem = array[0..255] of Word;
378   TBmpSecondTable = array of TBmpSecondTableItem;
379 
380   T3lvlBmp1Table = array[0..255] of Byte;
381   T3lvlBmp2TableItem = array[0..15] of Word;
382   T3lvlBmp2Table = array of T3lvlBmp2TableItem;
383   T3lvlBmp3TableItem = array[0..15] of Word;
384   T3lvlBmp3Table = array of T3lvlBmp3TableItem;
385 
386   TucaBmpFirstTable = array[0..255] of Byte;
387   TucaBmpSecondTableItem = array[0..255] of Cardinal;
388   TucaBmpSecondTable = array of TucaBmpSecondTableItem;
389   PucaBmpFirstTable = ^TucaBmpFirstTable;
390   PucaBmpSecondTable = ^TucaBmpSecondTable;
391 
392 const
393   LOW_SURROGATE_BEGIN  = Word($DC00);
394   LOW_SURROGATE_END    = Word($DFFF);
395   LOW_SURROGATE_COUNT  = LOW_SURROGATE_END - LOW_SURROGATE_BEGIN + 1;
396 
397   HIGH_SURROGATE_BEGIN = Word($D800);
398   HIGH_SURROGATE_END   = Word($DBFF);
399   HIGH_SURROGATE_COUNT = HIGH_SURROGATE_END - HIGH_SURROGATE_BEGIN + 1;
400 type
401   TOBmpFirstTable = array[0..(HIGH_SURROGATE_COUNT-1)] of Word;
402   TOBmpSecondTableItem = array[0..(LOW_SURROGATE_COUNT-1)] of Word;
403   TOBmpSecondTable = array of TOBmpSecondTableItem;
404 
405   T3lvlOBmp1Table = array[0..1023] of Byte;
406   T3lvlOBmp2TableItem = array[0..31] of Word;
407   T3lvlOBmp2Table = array of T3lvlOBmp2TableItem;
408   T3lvlOBmp3TableItem = array[0..31] of Word;
409   T3lvlOBmp3Table = array of T3lvlOBmp3TableItem;
410 
411   TucaOBmpFirstTable = array[0..(HIGH_SURROGATE_COUNT-1)] of Word;
412   TucaOBmpSecondTableItem = array[0..(LOW_SURROGATE_COUNT-1)] of Cardinal;
413   TucaOBmpSecondTable = array of TucaOBmpSecondTableItem;
414   PucaOBmpFirstTable = ^TucaOBmpFirstTable;
415   PucaOBmpSecondTable = ^TucaOBmpSecondTable;
416 
417 type
418   TEndianKind = (ekLittle, ekBig);
419 const
420   ENDIAN_SUFFIX : array[TEndianKind] of string[2] = ('le','be');
421 {$IFDEF ENDIAN_LITTLE}
422   ENDIAN_NATIVE     = ekLittle;
423   ENDIAN_NON_NATIVE = ekBig;
424 {$ENDIF ENDIAN_LITTLE}
425 {$IFDEF ENDIAN_BIG}
426   ENDIAN_NATIVE = ekBig;
427   ENDIAN_NON_NATIVE = ekLittle;
428 {$ENDIF ENDIAN_BIG}
429 
430   procedure GenerateLicenceText(ADest : TStream);
431 
BoolToBytenull432   function BoolToByte(AValue : Boolean): Byte;inline;
433 
IsHangulSyllablenull434   function IsHangulSyllable(
435     const ACodePoint  : TUnicodeCodePoint;
436     const AHangulList : TCodePointRecArray
437   ) : Boolean;
438   procedure ParseHangulSyllableTypes(
439         ADataAStream   : TMemoryStream;
440     var ACodePointList : TCodePointRecArray
441   );
442 
443   procedure ParseProps(
444         ADataAStream   : TMemoryStream;
445     var APropList      : TPropListLineRecArray
446   );
FindCodePointsByPropertynull447   function FindCodePointsByProperty(
448     const APropName : string;
449     const APropList : TPropListLineRecArray
450   ) : TCodePointRecArray;
451 
452   procedure ParseBlokcs(
453         ADataAStream   : TMemoryStream;
454     var ABlocks        : TBlocks
455   );
456   procedure ParseUCAFile(
457         ADataAStream : TMemoryStream;
458     var ABook        : TUCA_DataBook
459   );
460   procedure MakeUCA_Props(
461           ABook         : PUCA_DataBook;
462     out   AProps        : PUCA_PropBook
463   );
464   procedure FreeUcaBook(var ABook : PUCA_PropBook);
465   procedure MakeUCA_BmpTables(
466     var   AFirstTable   : TucaBmpFirstTable;
467     var   ASecondTable  : TucaBmpSecondTable;
468     const APropBook     : PUCA_PropBook
469   );
470   procedure MakeUCA_OBmpTables(
471     var   AFirstTable   : TucaOBmpFirstTable;
472     var   ASecondTable  : TucaOBmpSecondTable;
473     const APropBook     : PUCA_PropBook
474   );
GetPropPositionnull475   function GetPropPosition(
476     const AHighS,
477           ALowS         : Word;
478     const AFirstTable   : PucaOBmpFirstTable;
479     const ASecondTable  : PucaOBmpSecondTable
480   ): Integer;inline;overload;
481 
482   procedure GenerateUCA_Head(
483     ADest  : TStream;
484     ABook  : PUCA_DataBook;
485     AProps : PUCA_PropBook
486   );
487   procedure GenerateUCA_BmpTables(
488           AStream,
489           ANativeEndianStream,
490           ANonNativeEndianStream : TStream;
491     var   AFirstTable            : TucaBmpFirstTable;
492     var   ASecondTable           : TucaBmpSecondTable
493   );
494   procedure GenerateBinaryUCA_BmpTables(
495           ANativeEndianStream,
496           ANonNativeEndianStream : TStream;
497     var   AFirstTable            : TucaBmpFirstTable;
498     var   ASecondTable           : TucaBmpSecondTable
499   );
500   procedure GenerateUCA_PropTable(
501           ADest     : TStream;
502     const APropBook : PUCA_PropBook;
503     const AEndian   : TEndianKind
504   );
505   procedure GenerateBinaryUCA_PropTable(
506   // WARNING : files must be generated for each endianess (Little / Big)
507           ANativeEndianStream,
508           ANonNativeEndianStream : TStream;
509     const APropBook              : PUCA_PropBook
510   );
511   procedure GenerateUCA_OBmpTables(
512           AStream,
513           ANativeEndianStream,
514           ANonNativeEndianStream : TStream;
515     var   AFirstTable            : TucaOBmpFirstTable;
516     var   ASecondTable           : TucaOBmpSecondTable
517   );
518   procedure GenerateBinaryUCA_OBmpTables(
519           ANativeEndianStream,
520           ANonNativeEndianStream : TStream;
521     var   AFirstTable            : TucaOBmpFirstTable;
522     var   ASecondTable           : TucaOBmpSecondTable
523   );
524 
525   procedure Parse_UnicodeData(
526           ADataAStream   : TMemoryStream;
527     var   APropList      : TPropRecArray;
528     var   ANumericTable  : TNumericValueArray;
529     var   ADataLineList  : TDataLineRecArray;
530     var   ADecomposition : TDecompositionArray;
531     const AHangulList    : TCodePointRecArray;
532     const AWhiteSpaces   : TCodePointRecArray
533   );
534   procedure MakeDecomposition(
535     const ARawData : TDecompositionArray;
536     var   ABook    : TDecompositionBook
537   );
538 
539   procedure MakeBmpTables(
540     var   AFirstTable   : TBmpFirstTable;
541     var   ASecondTable  : TBmpSecondTable;
542     const ADataLineList : TDataLineRecArray
543   );
544   procedure MakeBmpTables3Levels(
545     var   AFirstTable   : T3lvlBmp1Table;
546     var   ASecondTable  : T3lvlBmp2Table;
547     var   AThirdTable  : T3lvlBmp3Table;
548     const ADataLineList : TDataLineRecArray
549   );
550   procedure GenerateBmpTables(
551           ADest : TStream;
552     var   AFirstTable   : TBmpFirstTable;
553     var   ASecondTable  : TBmpSecondTable
554   );
555   procedure Generate3lvlBmpTables(
556           ADest : TStream;
557     var   AFirstTable   : T3lvlBmp1Table;
558     var   ASecondTable  : T3lvlBmp2Table;
559     var   AThirdTable   : T3lvlBmp3Table
560   );
561   procedure GeneratePropTable(
562           ADest     : TStream;
563     const APropList : TPropRecArray;
564     const AEndian   : TEndianKind
565   );
566   procedure GenerateNumericTable(
567           ADest         : TStream;
568     const ANumList      : TNumericValueArray;
569     const ACompleteUnit : Boolean
570   );
571   procedure GenerateDecompositionBookTable(
572           ADest   : TStream;
573     const ABook   : TDecompositionBook;
574     const AEndian : TEndianKind
575   );
576   procedure GenerateOutBmpTable(
577           ADest     : TStream;
578     const AList : TDataLineRecArray
579   );
580 
Compressnull581   function Compress(const AData : TDataLineRecArray) : TDataLineRecArray;
582 
EvaluateFloatnull583   function EvaluateFloat(const AStr : string) : Double;
StrToCategorynull584   function StrToCategory(const AStr : string) : TUnicodeCategory;
StringToCodePointnull585   function StringToCodePoint(ACP : string) : TUnicodeCodePoint;
IsWhiteSpacenull586   function IsWhiteSpace(
587     const ACodePoint   : TUnicodeCodePoint;
588     const AWhiteSpaces : TCodePointRecArray
589   ) : Boolean;
590 
GetPropIDnull591   function GetPropID(
592           ACodePoint    : TUnicodeCodePoint;
593     const ADataLineList : TDataLineRecArray
594   ) : Cardinal;
595 
596 //--------------------
597   procedure MakeOBmpTables(
598     var   AFirstTable   : TOBmpFirstTable;
599     var   ASecondTable  : TOBmpSecondTable;
600     const ADataLineList : TDataLineRecArray
601   );
602   procedure MakeOBmpTables3Levels(
603     var   AFirstTable   : T3lvlOBmp1Table;
604     var   ASecondTable  : T3lvlOBmp2Table;
605     var   AThirdTable  : T3lvlOBmp3Table;
606     const ADataLineList : TDataLineRecArray
607   );
608   procedure GenerateOBmpTables(
609           ADest : TStream;
610     var   AFirstTable   : TOBmpFirstTable;
611     var   ASecondTable  : TOBmpSecondTable
612   );
613   procedure Generate3lvlOBmpTables(
614           ADest : TStream;
615     var   AFirstTable   : T3lvlOBmp1Table;
616     var   ASecondTable  : T3lvlOBmp2Table;
617     var   AThirdTable   : T3lvlOBmp3Table
618   );
GetPropnull619   function GetProp(
620     const AHighS,
621           ALowS         : Word;
622     const AProps        : TPropRecArray;
623     var   AFirstTable   : TOBmpFirstTable;
624     var   ASecondTable  : TOBmpSecondTable
625   ): PPropRec; inline;overload;
GetPropnull626   function GetProp(
627     const AHighS,
628           ALowS         : Word;
629     const AProps        : TPropRecArray;
630     var   AFirstTable   : T3lvlOBmp1Table;
631     var   ASecondTable  : T3lvlOBmp2Table;
632     var   AThirdTable   : T3lvlOBmp3Table
633   ): PPropRec; inline;overload;
634   procedure FromUCS4(const AValue : TUnicodeCodePoint; var AHighS, ALowS : Word);inline;
ToUCS4null635   function ToUCS4(const AHighS, ALowS : Word) : TUnicodeCodePoint; inline;
636 
637 type
638   TBitOrder = 0..7;
639 
IsBitONnull640   function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;{$IFDEF USE_INLINE}inline;{$ENDIF}
641   procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);{$IFDEF USE_INLINE}inline;{$ENDIF}
642 
GenerateEndianIncludeFileNamenull643   function GenerateEndianIncludeFileName(
644     const AStoreName : string;
645     const AEndian    : TEndianKind
646   ): string;inline;
647 
648   procedure ReverseFromNativeEndian(
649     const AData    : PUCA_PropItemRec;
650     const ADataLen : Cardinal;
651     const ADest    : PUCA_PropItemRec
652   );
653   procedure ReverseToNativeEndian(
654     const AData    : PUCA_PropItemRec;
655     const ADataLen : Cardinal;
656     const ADest    : PUCA_PropItemRec
657   );
658   procedure CompareProps(
659     const AProp1,
660           AProp2   : PUCA_PropItemRec;
661     const ADataLen : Integer
662   );
663 
664 type
665   TCollationName = array[0..(128-1)] of Byte;
666   TCollationVersion = TCollationName;
667   TSerializedCollationHeader = packed record
668     Base               : TCollationName;
669     Version            : TCollationVersion;
670     CollationName      : TCollationName;
671     CollationAliases   : TCollationName; // ";" separated
672     VariableWeight     : Byte;
673     Backwards          : Byte;
674     BMP_Table1Length   : DWord;
675     BMP_Table2Length   : DWord;
676     OBMP_Table1Length  : DWord;
677     OBMP_Table2Length  : DWord;
678     PropCount          : DWord;
679     VariableLowLimit   : Word;
680     VariableHighLimit  : Word;
681     NoNormalization    : Byte;
682     Strength           : Byte;
683     ChangedFields      : Byte;
684   end;
685   PSerializedCollationHeader = ^TSerializedCollationHeader;
686 
687   procedure StringToByteArray(AStr : UnicodeString; var ABuffer : array of Byte);overload;
688   procedure StringToByteArray(AStr : UnicodeString; ABuffer : PByte; const ABufferLength : Integer);overload;
689 
690   procedure ReverseRecordBytes(var AItem : TSerializedCollationHeader);
691   procedure ReverseBytes(var AData; const ALength : Integer);
692   procedure ReverseArray(var AValue; const AArrayLength, AItemSize : PtrInt);
693 
CalcMaxLevel2Valuenull694   function CalcMaxLevel2Value(ALines : array of TUCA_LineRec) : Cardinal;
695   procedure RewriteLevel2Values(ALines : PUCA_LineRec; ALength : Integer);
RewriteLevel2null696   function RewriteLevel2(
697     const ALevel1Value : Cardinal;
698           ALines       : PUCA_LineRec;
699     const ALinesLength : Integer
700   ) : Integer;
701 
702 resourcestring
703   SInsufficientMemoryBuffer = 'Insufficient Memory Buffer';
704 
705 implementation
706 uses
707   typinfo, Math, AVL_Tree,
708   trie;
709 
710 
711 type
712 
713   TCardinalRec = packed record
714   {$ifdef FPC_LITTLE_ENDIAN}
715     byte0, byte1, byte2, byte3 : Byte;
716   {$else FPC_LITTLE_ENDIAN}
717     byte3, byte2, byte1, byte0 : Byte;
718   {$endif FPC_LITTLE_ENDIAN}
719   end;
720 
721   TWordRec = packed record
722   {$ifdef FPC_LITTLE_ENDIAN}
723     byte0, byte1 : Byte;
724   {$else FPC_LITTLE_ENDIAN}
725     byte1, byte0 : Byte;
726   {$endif FPC_LITTLE_ENDIAN}
727   end;
728 
729 { TUInt24Rec }
730 
731 class operator TUInt24Rec.Explicit(a : TUInt24Rec) : Cardinal;
732 begin
733   TCardinalRec(Result).byte0 := a.byte0;
734   TCardinalRec(Result).byte1 := a.byte1;
735   TCardinalRec(Result).byte2 := a.byte2;
736   TCardinalRec(Result).byte3 := 0;
737 end;
738 
739 class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Cardinal;
740 begin
741   TCardinalRec(Result).byte0 := a.byte0;
742   TCardinalRec(Result).byte1 := a.byte1;
743   TCardinalRec(Result).byte2 := a.byte2;
744   TCardinalRec(Result).byte3 := 0;
745 end;
746 
747 class operator TUInt24Rec.Implicit(a : TUInt24Rec) : LongInt;
748 begin
749   Result := Cardinal(a);
750 end;
751 
752 class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Word;
753 begin
754 {$IFOPT R+}
755   if (a.byte2 > 0) then
756     Error(reIntOverflow);
757 {$ENDIF R+}
758   TWordRec(Result).byte0 := a.byte0;
759   TWordRec(Result).byte1 := a.byte1;
760 end;
761 
762 class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Byte;
763 begin
764 {$IFOPT R+}
765   if (a.byte1 > 0) or (a.byte2 > 0) then
766     Error(reIntOverflow);
767 {$ENDIF R+}
768   Result := a.byte0;
769 end;
770 
771 class operator TUInt24Rec.Implicit(a : Cardinal) : TUInt24Rec;
772 begin
773 {$IFOPT R+}
774   if (a > $FFFFFF) then
775     Error(reIntOverflow);
776 {$ENDIF R+}
777   Result.byte0 := TCardinalRec(a).byte0;
778   Result.byte1 := TCardinalRec(a).byte1;
779   Result.byte2 := TCardinalRec(a).byte2;
780 end;
781 
782 class operator TUInt24Rec.Equal(a, b : TUInt24Rec) : Boolean;
783 begin
784   Result := (a.byte0 = b.byte0) and (a.byte1 = b.byte1) and (a.byte2 = b.byte2);
785 end;
786 
787 class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Cardinal) : Boolean;
788 begin
789   Result := (TCardinalRec(b).byte3 = 0) and
790             (a.byte0 = TCardinalRec(b).byte0) and
791             (a.byte1 = TCardinalRec(b).byte1) and
792             (a.byte2 = TCardinalRec(b).byte2);
793 end;
794 
795 class operator TUInt24Rec.Equal(a : Cardinal; b : TUInt24Rec) : Boolean;
796 begin
797   Result := (b = a);
798 end;
799 
800 class operator TUInt24Rec.Equal(a : TUInt24Rec; b : LongInt) : Boolean;
801 begin
802   Result := (LongInt(a) = b);
803 end;
804 
805 class operator TUInt24Rec.Equal(a : LongInt; b : TUInt24Rec) : Boolean;
806 begin
807   Result := (b = a);
808 end;
809 
810 class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Word) : Boolean;
811 begin
812   Result := (a.byte2 = 0) and
813             (a.byte0 = TWordRec(b).byte0) and
814             (a.byte1 = TWordRec(b).byte1);
815 end;
816 
817 class operator TUInt24Rec.Equal(a : Word; b : TUInt24Rec) : Boolean;
818 begin
819   Result := (b = a);
820 end;
821 
822 class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Byte) : Boolean;
823 begin
824   Result := (a.byte2 = 0) and
825             (a.byte1 = 0) and
826             (a.byte0 = b);
827 end;
828 
829 class operator TUInt24Rec.Equal(a : Byte; b : TUInt24Rec) : Boolean;
830 begin
831   Result := (b = a);
832 end;
833 
834 class operator TUInt24Rec.NotEqual(a, b : TUInt24Rec) : Boolean;
835 begin
836   Result := (a.byte0 <> b.byte0) or (a.byte1 <> b.byte1) or (a.byte2 <> b.byte2);
837 end;
838 
839 class operator TUInt24Rec.NotEqual(a : TUInt24Rec; b : Cardinal) : Boolean;
840 begin
841   Result := (TCardinalRec(b).byte3 <> 0) or
842             (a.byte0 <> TCardinalRec(b).byte0) or
843             (a.byte1 <> TCardinalRec(b).byte1) or
844             (a.byte2 <> TCardinalRec(b).byte2);
845 end;
846 
847 class operator TUInt24Rec.NotEqual(a : Cardinal; b : TUInt24Rec) : Boolean;
848 begin
849   Result := (b <> a);
850 end;
851 
852 class operator TUInt24Rec.GreaterThan(a, b: TUInt24Rec): Boolean;
853 begin
854   Result := (a.byte2 > b.byte2) or
855             ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
856             ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 > b.byte0));
857 end;
858 
859 class operator TUInt24Rec.GreaterThan(a: TUInt24Rec; b: Cardinal): Boolean;
860 begin
861   Result := Cardinal(a) > b;
862 end;
863 
864 class operator TUInt24Rec.GreaterThan(a: Cardinal; b: TUInt24Rec): Boolean;
865 begin
866   Result := a > Cardinal(b);
867 end;
868 
869 class operator TUInt24Rec.GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;
870 begin
871   Result := (a.byte2 > b.byte2) or
872             ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
873             ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 >= b.byte0));
874 end;
875 
876 class operator TUInt24Rec.GreaterThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
877 begin
878   Result := Cardinal(a) >= b;
879 end;
880 
881 class operator TUInt24Rec.GreaterThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
882 begin
883   Result := a >= Cardinal(b);
884 end;
885 
886 class operator TUInt24Rec.LessThan(a, b: TUInt24Rec): Boolean;
887 begin
888   Result := (b > a);
889 end;
890 
891 class operator TUInt24Rec.LessThan(a: TUInt24Rec; b: Cardinal): Boolean;
892 begin
893   Result := Cardinal(a) < b;
894 end;
895 
896 class operator TUInt24Rec.LessThan(a: Cardinal; b: TUInt24Rec): Boolean;
897 begin
898   Result := a < Cardinal(b);
899 end;
900 
901 class operator TUInt24Rec.LessThanOrEqual(a, b: TUInt24Rec): Boolean;
902 begin
903   Result := (b >= a);
904 end;
905 
906 class operator TUInt24Rec.LessThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
907 begin
908   Result := Cardinal(a) <= b;
909 end;
910 
911 class operator TUInt24Rec.LessThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
912 begin
913   Result := a <= Cardinal(b);
914 end;
915 
916 { TUCA_WeightRec }
917 
918 class operator TUCA_WeightRec.Equal(a, b : TUCA_WeightRec) : Boolean;
919 begin
920   Result := (a.Weights[0] = b.Weights[0]) and (a.Weights[1] = b.Weights[1]) and
921             (a.Weights[2] = b.Weights[2]) and (a.Weights[3] = b.Weights[3]) and
922             (a.Variable = b.Variable);
923 end;
924 
925 procedure StringToByteArray(AStr : UnicodeString; var ABuffer : array of Byte);
926 begin
927   StringToByteArray(AStr,@(ABuffer[Low(ABuffer)]),Length(ABuffer));
928 end;
929 
930 procedure StringToByteArray(AStr : UnicodeString; ABuffer : PByte; const ABufferLength : Integer);
931 var
932   c, i, bl : Integer;
933   ps : PWord;
934   pb : PByte;
935 begin
936   if (ABufferLength < 1) then
937     exit;
938   c := Length(AStr);
939   if (c > ABufferLength) then
940     c := ABufferLength;
941   bl := 0;
942   pb := ABuffer;
943   if (c > 0) then begin
944     ps := PWord(@AStr[1]);
945     for i := 1 to c do begin
946       if (ps^ <= High(Byte)) then begin
947         pb^ := ps^;
948         bl := bl+1;
949         Inc(pb);
950       end;
951       Inc(ps);
952     end;
953   end;
954   if (bl < ABufferLength) then begin
955     for i := bl+1 to ABufferLength do begin
956       pb^:= 0;
957       Inc(pb);
958     end;
959   end;
960 end;
961 
GenerateEndianIncludeFileNamenull962 function GenerateEndianIncludeFileName(
963   const AStoreName : string;
964   const AEndian    : TEndianKind
965 ): string;inline;
966 begin
967   Result := ExtractFilePath(AStoreName) +
968             ChangeFileExt(ExtractFileName(AStoreName),Format('_%s.inc',[ENDIAN_SUFFIX[AEndian]]));
969 end;
970 
IsBitONnull971 function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;
972 begin
973   Result := ( ( AData and ( 1 shl ABit ) ) <> 0 );
974 end;
975 
976 procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);
977 begin
978   if AValue then
979     AData := AData or (1 shl (ABit mod 8))
980   else
981     AData := AData and ( not ( 1 shl ( ABit mod 8 ) ) );
982 end;
983 
984 var
985   FS : TFormatSettings;
986 
EvaluateFloatnull987 function EvaluateFloat(const AStr : string) : Double;
988 var
989   s, n, d : string;
990   i : Integer;
991 begin
992   Result := 0;
993   s := Trim(AStr);
994   if (Length(s) > 0) then begin
995     i := Pos('/',s);
996     if (i < 1) then
997       Result := StrToFloat(s,FS)
998     else begin
999       n := Copy(s,1,i-1);
1000       d := Copy(s,i+1,MaxInt);
1001       Result := StrToInt(n) / StrToInt(d);
1002     end;
1003   end;
1004 end;
1005 
StrToCategorynull1006 function StrToCategory(const AStr : string) : TUnicodeCategory;
1007 var
1008   s : string;
1009 begin
1010   s := UpperCase(Trim(AStr));
1011   if (s = 'LU') then
1012     Result := ucUppercaseLetter
1013   else if (s = 'LL') then
1014     Result := ucLowercaseLetter
1015   else if (s = 'LT') then
1016     Result := ucTitlecaseLetter
1017   else if (s = 'LM') then
1018     Result := ucModifierLetter
1019   else if (s = 'LO') then
1020     Result := ucOtherLetter
1021   else
1022 
1023   if (s = 'MN') then
1024     Result := ucNonSpacingMark
1025   else if (s = 'MC') then
1026     Result := ucCombiningMark
1027   else if (s = 'ME') then
1028     Result := ucEnclosingMark
1029   else
1030 
1031   if (s = 'ND') then
1032     Result := ucDecimalNumber
1033   else if (s = 'NL') then
1034     Result := ucLetterNumber
1035   else if (s = 'NO') then
1036     Result := ucOtherNumber
1037   else
1038 
1039   if (s = 'PC') then
1040     Result := ucConnectPunctuation
1041   else if (s = 'PD') then
1042     Result := ucDashPunctuation
1043   else if (s = 'PS') then
1044     Result := ucOpenPunctuation
1045   else if (s = 'PE') then
1046     Result := ucClosePunctuation
1047   else if (s = 'PI') then
1048     Result := ucInitialPunctuation
1049   else     if (s = 'PF') then
1050     Result := ucFinalPunctuation
1051   else if (s = 'PO') then
1052     Result := ucOtherPunctuation
1053   else
1054 
1055   if (s = 'SM') then
1056     Result := ucMathSymbol
1057   else if (s = 'SC') then
1058     Result := ucCurrencySymbol
1059   else if (s = 'SK') then
1060     Result := ucModifierSymbol
1061   else if (s = 'SO') then
1062     Result := ucOtherSymbol
1063   else
1064 
1065   if (s = 'ZS') then
1066     Result := ucSpaceSeparator
1067   else if (s = 'ZL') then
1068     Result := ucLineSeparator
1069   else if (s = 'ZP') then
1070     Result := ucParagraphSeparator
1071   else
1072 
1073   if (s = 'CC') then
1074     Result := ucControl
1075   else if (s = 'CF') then
1076     Result := ucFormat
1077   else if (s = 'CS') then
1078     Result := ucSurrogate
1079   else if (s = 'CO') then
1080     Result := ucPrivateUse
1081   else
1082     Result := ucUnassigned;
1083 end;
1084 
StringToCodePointnull1085 function StringToCodePoint(ACP : string) : TUnicodeCodePoint;
1086 var
1087   s : string;
1088 begin
1089   s := Trim(ACP);
1090   Result := 0;
1091   if (Length(s) > 0) and (s <> '#') then
1092     Result := StrToInt('$' + s);
1093 end;
1094 
1095 {function IsWhiteSpace(const ACodePoint : TUnicodeCodePoint) : Boolean;
1096 begin
1097   case ACodePoint of
1098     $0009..$000D  : Result := True;// White_Space # Cc   [5] <control-0009>..<control-000D>
1099     $0020          : Result := True;// White_Space # Zs       SPACE
1100     $0085          : Result := True;// White_Space # Cc       <control-0085>
1101     $00A0          : Result := True;// White_Space # Zs       NO-BREAK SPACE
1102     $1680          : Result := True;// White_Space # Zs       OGHAM SPACE MARK
1103     $180E          : Result := True;// White_Space # Zs       MONGOLIAN VOWEL SEPARATOR
1104     $2000..$200A   : Result := True;// White_Space # Zs  [11] EN QUAD..HAIR SPACE
1105     $2028          : Result := True;// White_Space # Zl       LINE SEPARATOR
1106     $2029          : Result := True;// White_Space # Zp       PARAGRAPH SEPARATOR
1107     $202F          : Result := True;// White_Space # Zs       NARROW NO-BREAK SPACE
1108     $205F          : Result := True;// White_Space # Zs       MEDIUM MATHEMATICAL SPACE
1109     $3000          : Result := True;// White_Space # Zs       IDEOGRAPHIC SPACE
1110     else
1111       Result := False;
1112   end;
1113 end;}
IsWhiteSpacenull1114 function IsWhiteSpace(
1115   const ACodePoint   : TUnicodeCodePoint;
1116   const AWhiteSpaces : TCodePointRecArray
1117 ) : Boolean;
1118 var
1119   i : Integer;
1120   p : ^TCodePointRec;
1121 begin
1122   p := @AWhiteSpaces[Low(AWhiteSpaces)];
1123   for i := Low(AWhiteSpaces) to High(AWhiteSpaces) do begin
1124     if (p^.LineType = 0) then begin
1125       if (p^.CodePoint = ACodePoint) then
1126         exit(True);
1127     end else begin
1128       if (ACodePoint >= p^.StartCodePoint) and (ACodePoint <= p^.EndCodePoint) then
1129         exit(True);
1130     end;
1131     Inc(p);
1132   end;
1133   Result := False;
1134 end;
1135 
NormalizeBlockNamenull1136 function NormalizeBlockName(const AName : string) : string;
1137 var
1138   i, c, k : Integer;
1139   s : string;
1140 begin
1141   c := Length(AName);
1142   SetLength(Result,c);
1143   s := LowerCase(AName);
1144   k := 0;
1145   for i := 1 to c do begin
1146     if (s[1] in ['a'..'z','0'..'9','-']) then begin
1147       k := k + 1;
1148       Result[k] := s[i];
1149     end;
1150   end;
1151   SetLength(Result,k);
1152 end;
1153 
1154 procedure ParseBlokcs(
1155       ADataAStream   : TMemoryStream;
1156   var ABlocks        : TBlocks
1157 );
1158 const
1159   LINE_LENGTH        = 1024;
1160   DATA_LENGTH        = 25000;
1161 var
1162   p : PAnsiChar;
1163   actualDataLen : Integer;
1164   bufferLength, bufferPos, lineLength, linePos : Integer;
1165   line : ansistring;
1166 
NextLinenull1167   function NextLine() : Boolean;
1168   var
1169     locOldPos : Integer;
1170     locOldPointer : PAnsiChar;
1171   begin
1172     Result := False;
1173     locOldPointer := p;
1174     locOldPos := bufferPos;
1175     while (bufferPos < bufferLength) and (p^ <> #10) do begin
1176       Inc(p);
1177       Inc(bufferPos);
1178     end;
1179     if (locOldPos = bufferPos) and (p^ = #10) then begin
1180       lineLength := 0;
1181       Inc(p);
1182       Inc(bufferPos);
1183       linePos := 1;
1184       Result := True;
1185     end else  if (locOldPos < bufferPos) then begin
1186       lineLength := (bufferPos - locOldPos);
1187       Move(locOldPointer^,line[1],lineLength);
1188       if (p^ = #10) then begin
1189         Dec(lineLength);
1190         Inc(p);
1191         Inc(bufferPos);
1192       end;
1193       linePos := 1;
1194       Result := True;
1195     end;
1196   end;
1197 
NextTokennull1198   function NextToken() : ansistring;
1199   var
1200     k : Integer;
1201   begin
1202     k := linePos;
1203     if (linePos < lineLength) and (line[linePos] in [';','#','.']) then begin
1204       Inc(linePos);
1205       Result := Copy(line,k,(linePos-k));
1206       exit;
1207     end;
1208     while (linePos < lineLength) and not(line[linePos] in [';','#','.']) do
1209       Inc(linePos);
1210     if (linePos > k) then begin
1211       if (line[linePos] in [';','#','.']) then
1212         Result := Copy(line,k,(linePos-k))
1213       else
1214         Result := Copy(line,k,(linePos-k+1));
1215       Result := Trim(Result);
1216     end else begin
1217       Result := '';
1218     end;
1219   end;
1220 
1221   procedure ParseLine();
1222   var
1223     locData : TBlockItemRec;
1224     s : ansistring;
1225   begin
1226     s := NextToken();
1227     if (s = '') or (s[1] = '#') then
1228       exit;
1229     locData.RangeStart := StrToInt('$'+s);
1230     s := NextToken();
1231     if (s <> '.') then
1232       raise Exception.CreateFmt('"." expected but "%s" found.',[s]);
1233     s := NextToken();
1234     if (s <> '.') then
1235       raise Exception.CreateFmt('"." expected but "%s" found.',[s]);
1236     s := NextToken();
1237     locData.RangeEnd := StrToInt('$'+s);
1238     s := NextToken();
1239     if (s <> ';') then
1240       raise Exception.CreateFmt('";" expected but "%s" found.',[s]);
1241     locData.Name := Trim(NextToken());
1242     locData.CanonicalName := NormalizeBlockName(locData.Name);
1243     if (Length(ABlocks) <= actualDataLen) then
1244       SetLength(ABlocks,Length(ABlocks)*2);
1245     ABlocks[actualDataLen] := locData;
1246     Inc(actualDataLen);
1247   end;
1248 
1249   procedure Prepare();
1250   begin
1251     SetLength(ABlocks,DATA_LENGTH);
1252     actualDataLen := 0;
1253     bufferLength := ADataAStream.Size;
1254     bufferPos := 0;
1255     p := ADataAStream.Memory;
1256     lineLength := 0;
1257     SetLength(line,LINE_LENGTH);
1258   end;
1259 
1260 begin
1261   Prepare();
1262   while NextLine() do
1263     ParseLine();
1264   SetLength(ABlocks,actualDataLen);
1265 end;
1266 
1267 procedure ParseProps(
1268       ADataAStream   : TMemoryStream;
1269   var APropList      : TPropListLineRecArray
1270 );
1271 const
1272   LINE_LENGTH        = 1024;
1273   DATA_LENGTH        = 25000;
1274 var
1275   p : PAnsiChar;
1276   actualDataLen : Integer;
1277   bufferLength, bufferPos, lineLength, linePos : Integer;
1278   line : ansistring;
1279 
NextLinenull1280   function NextLine() : Boolean;
1281   var
1282     locOldPos : Integer;
1283     locOldPointer : PAnsiChar;
1284   begin
1285     Result := False;
1286     locOldPointer := p;
1287     locOldPos := bufferPos;
1288     while (bufferPos < bufferLength) and (p^ <> #10) do begin
1289       Inc(p);
1290       Inc(bufferPos);
1291     end;
1292     if (locOldPos = bufferPos) and (p^ = #10) then begin
1293       lineLength := 0;
1294       Inc(p);
1295       Inc(bufferPos);
1296       linePos := 1;
1297       Result := True;
1298     end else  if (locOldPos < bufferPos) then begin
1299       lineLength := (bufferPos - locOldPos);
1300       Move(locOldPointer^,line[1],lineLength);
1301       if (p^ = #10) then begin
1302         Dec(lineLength);
1303         Inc(p);
1304         Inc(bufferPos);
1305       end;
1306       linePos := 1;
1307       Result := True;
1308     end;
1309   end;
1310 
NextTokennull1311   function NextToken() : ansistring;
1312   var
1313     k : Integer;
1314   begin
1315     k := linePos;
1316     if (linePos < lineLength) and (line[linePos] in [';','#','.']) then begin
1317       Inc(linePos);
1318       Result := Copy(line,k,(linePos-k));
1319       exit;
1320     end;
1321     while (linePos < lineLength) and not(line[linePos] in [';','#','.']) do
1322       Inc(linePos);
1323     if (linePos > k) then begin
1324       if (line[linePos] in [';','#','.']) then
1325         Result := Copy(line,k,(linePos-k))
1326       else
1327         Result := Copy(line,k,(linePos-k+1));
1328       Result := Trim(Result);
1329     end else begin
1330       Result := '';
1331     end;
1332   end;
1333 
1334   procedure ParseLine();
1335   var
1336     locCP : Cardinal;
1337     locData : TPropListLineRec;
1338     s : ansistring;
1339   begin
1340     s := NextToken();
1341     if (s = '') or (s[1] = '#') then
1342       exit;
1343     locCP := StrToInt('$'+s);
1344     s := NextToken();
1345     if (s = ';') then begin
1346       locData.CodePoint.LineType := 0;
1347       locData.CodePoint.CodePoint := locCP;
1348     end else begin
1349       if (s = '') or (s <> '.') or (NextToken() <> '.') then
1350         raise Exception.CreateFmt('Invalid line : "%s".',[Copy(line,1,lineLength)]);
1351       locData.CodePoint.LineType := 1;
1352       locData.CodePoint.StartCodePoint := locCP;
1353       locData.CodePoint.EndCodePoint := StrToInt('$'+NextToken());
1354       s := NextToken();
1355       if (s <> ';') then
1356         raise Exception.CreateFmt('"." expected but "%s" found.',[s]);
1357     end;
1358     locData.PropName := Trim(NextToken());
1359     if (Length(APropList) <= actualDataLen) then
1360       SetLength(APropList,Length(APropList)*2);
1361     APropList[actualDataLen] := locData;
1362     Inc(actualDataLen);
1363   end;
1364 
1365   procedure Prepare();
1366   begin
1367     SetLength(APropList,DATA_LENGTH);
1368     actualDataLen := 0;
1369     bufferLength := ADataAStream.Size;
1370     bufferPos := 0;
1371     p := ADataAStream.Memory;
1372     lineLength := 0;
1373     SetLength(line,LINE_LENGTH);
1374   end;
1375 
1376 begin
1377   Prepare();
1378   while NextLine() do
1379     ParseLine();
1380   SetLength(APropList,actualDataLen);
1381 end;
1382 
FindCodePointsByPropertynull1383 function FindCodePointsByProperty(
1384   const APropName : string;
1385   const APropList : TPropListLineRecArray
1386 ) : TCodePointRecArray;
1387 var
1388   r : TCodePointRecArray;
1389   i, k : Integer;
1390   s : string;
1391 begin
1392   k := 0;
1393   r := nil;
1394   s := LowerCase(Trim(APropName));
1395   for i := Low(APropList) to High(APropList) do begin
1396     if (LowerCase(APropList[i].PropName) = s) then begin
1397       if (k >= Length(r)) then begin
1398         if (k = 0) then
1399           SetLength(r,24)
1400         else
1401           SetLength(r,(2*Length(r)));
1402       end;
1403       r[k] := APropList[i].CodePoint;
1404       Inc(k);
1405     end;
1406   end;
1407   SetLength(r,k);
1408   Result := r;
1409 end;
1410 
1411 procedure ParseHangulSyllableTypes(
1412       ADataAStream   : TMemoryStream;
1413   var ACodePointList : TCodePointRecArray
1414 );
1415 const
1416   LINE_LENGTH        = 1024;
1417   DATA_LENGTH        = 25000;
1418 var
1419   p : PAnsiChar;
1420   actualDataLen : Integer;
1421   bufferLength, bufferPos, lineLength, linePos : Integer;
1422   line : ansistring;
1423 
NextLinenull1424   function NextLine() : Boolean;
1425   var
1426     locOldPos : Integer;
1427     locOldPointer : PAnsiChar;
1428   begin
1429     Result := False;
1430     locOldPointer := p;
1431     locOldPos := bufferPos;
1432     while (bufferPos < bufferLength) and (p^ <> #10) do begin
1433       Inc(p);
1434       Inc(bufferPos);
1435     end;
1436     if (locOldPos = bufferPos) and (p^ = #10) then begin
1437       lineLength := 0;
1438       Inc(p);
1439       Inc(bufferPos);
1440       linePos := 1;
1441       Result := True;
1442     end else  if (locOldPos < bufferPos) then begin
1443       lineLength := (bufferPos - locOldPos);
1444       Move(locOldPointer^,line[1],lineLength);
1445       if (p^ = #10) then begin
1446         Dec(lineLength);
1447         Inc(p);
1448         Inc(bufferPos);
1449       end;
1450       linePos := 1;
1451       Result := True;
1452     end;
1453   end;
1454 
NextTokennull1455   function NextToken() : ansistring;
1456   var
1457     k : Integer;
1458   begin
1459     k := linePos;
1460     if (linePos < lineLength) and (line[linePos] = '.') then begin
1461       Inc(linePos);
1462       while (linePos < lineLength) and (line[linePos] = '.') do begin
1463         Inc(linePos);
1464       end;
1465       Result := Copy(line,k,(linePos-k));
1466       exit;
1467     end;
1468     while (linePos < lineLength) and not(line[linePos] in [';','#','.']) do
1469       Inc(linePos);
1470     if (linePos > k) then begin
1471       if (line[linePos] in [';','#','.']) then
1472         Result := Copy(line,k,(linePos-k))
1473       else
1474         Result := Copy(line,k,(linePos-k+1));
1475       Result := Trim(Result);
1476     end else begin
1477       Result := '';
1478     end;
1479     //Inc(linePos);
1480   end;
1481 
1482   procedure ParseLine();
1483   var
1484     locData : TCodePointRec;
1485     s : ansistring;
1486   begin
1487     s := NextToken();
1488     if (s = '') or (s[1] = '#') then
1489       exit;
1490     locData.CodePoint := StrToInt('$'+s);
1491     s := NextToken();
1492     if (s = '') or (s[1] in [';','#']) then begin
1493       locData.LineType := 0;
1494     end else begin
1495       if (s <> '..') then
1496         raise Exception.CreateFmt('Unknown line type : "%s"',[Copy(line,1,lineLength)]);
1497       locData.StartCodePoint := locData.CodePoint;
1498       locData.EndCodePoint := StrToInt('$'+NextToken());
1499       locData.LineType := 1;
1500     end;
1501     if (Length(ACodePointList) <= actualDataLen) then
1502       SetLength(ACodePointList,Length(ACodePointList)*2);
1503     ACodePointList[actualDataLen] := locData;
1504     Inc(actualDataLen);
1505   end;
1506 
1507   procedure Prepare();
1508   begin
1509     SetLength(ACodePointList,DATA_LENGTH);
1510     actualDataLen := 0;
1511     bufferLength := ADataAStream.Size;
1512     bufferPos := 0;
1513     p := ADataAStream.Memory;
1514     lineLength := 0;
1515     SetLength(line,LINE_LENGTH);
1516   end;
1517 
1518 begin
1519   Prepare();
1520   while NextLine() do
1521     ParseLine();
1522   SetLength(ACodePointList,actualDataLen);
1523 end;
1524 
IsHangulSyllablenull1525 function IsHangulSyllable(
1526   const ACodePoint  : TUnicodeCodePoint;
1527   const AHangulList : TCodePointRecArray
1528 ) : Boolean;
1529 var
1530   i : Integer;
1531   p : ^TCodePointRec;
1532 begin
1533   Result := False;
1534   p := @AHangulList[Low(AHangulList)];
1535   for i := Low(AHangulList) to High(AHangulList) do begin
1536     if ( (p^.LineType = 0) and (ACodePoint = p^.CodePoint) ) or
1537        ( (p^.LineType = 1) and (ACodePoint >= p^.StartCodePoint) and (ACodePoint <= p^.EndCodePoint) )
1538     then begin
1539       Result := True;
1540       Break;
1541     end;
1542     Inc(p);
1543   end;
1544 end;
1545 
IndexOfnull1546 function IndexOf(
1547   const AProp     : TPropRec;
1548   const APropList : TPropRecArray;
1549   const AActualLen : Integer
1550 ) : Integer;overload;
1551 var
1552   i : Integer;
1553   p : PPropRec;
1554 begin
1555   Result := -1;
1556   if (AActualLen > 0) then begin
1557     p := @APropList[0];
1558     for i := 0 to AActualLen - 1 do begin
1559       if (AProp.Category = p^.Category) and
1560          (AProp.CCC = p^.CCC) and
1561          (AProp.NumericIndex = p^.NumericIndex) and
1562          (AProp.SimpleUpperCase = p^.SimpleUpperCase) and
1563          (AProp.SimpleLowerCase = p^.SimpleLowerCase) and
1564          (AProp.WhiteSpace = p^.WhiteSpace) and
1565          //
1566          (AProp.DecompositionID =  p^.DecompositionID) and
1567          (*   ( (AProp.DecompositionID = -1 ) and (p^.DecompositionID = -1) ) or
1568              ( (AProp.DecompositionID <> -1 ) and (p^.DecompositionID <> -1) )
1569          *)
1570          (AProp.HangulSyllable = p^.HangulSyllable)
1571       then begin
1572         Result := i;
1573         Break;
1574       end;
1575       Inc(p);
1576     end;
1577   end;
1578 end;
1579 
IndexOfnull1580 function IndexOf(
1581   const AItem : TUnicodeCodePointArray;
1582   const AList : TDecompositionArray
1583 ) : Integer;overload;
1584 var
1585   p : TUnicodeCodePointArray;
1586   i : Integer;
1587 begin
1588   Result := -1;
1589   if (Length(AList) = 0) then
1590     exit;
1591   for i := Low(AList) to High(AList) do begin
1592     p := AList[i];
1593     if (Length(p) = Length(AItem)) then begin
1594       if CompareMem(@p[0],@AItem[0],Length(AItem)*SizeOf(TUnicodeCodePoint)) then
1595         exit(i);
1596     end;
1597   end;
1598   Result := -1;
1599 end;
1600 
IndexOfnull1601 function IndexOf(
1602   const AItem : TNumericValue;
1603   const AList : TNumericValueArray;
1604   const AActualLen : Integer
1605 ) : Integer;overload;
1606 var
1607   p : ^TNumericValue;
1608   i : Integer;
1609 begin
1610   Result := -1;
1611   if (AActualLen = 0) then
1612     exit;
1613   p := @AList[Low(AList)];
1614   for i := Low(AList) to AActualLen - 1 do begin
1615     if (AItem = p^) then
1616       exit(i);
1617     Inc(p);
1618   end;
1619   Result := -1;
1620 end;
1621 
1622 procedure Parse_UnicodeData(
1623         ADataAStream   : TMemoryStream;
1624   var   APropList      : TPropRecArray;
1625   var   ANumericTable  : TNumericValueArray;
1626   var   ADataLineList  : TDataLineRecArray;
1627   var   ADecomposition : TDecompositionArray;
1628   const AHangulList    : TCodePointRecArray;
1629   const AWhiteSpaces   : TCodePointRecArray
1630 );
1631 const
1632   LINE_LENGTH        = 1024;
1633   PROP_LENGTH        = 5000;
1634   DATA_LENGTH        = 25000;
1635 var
1636   p : PAnsiChar;
1637   bufferLength, bufferPos : Integer;
1638   actualPropLen, actualDataLen, actualNumLen : Integer;
1639   line : ansistring;
1640   lineLength, linePos : Integer;
1641 
NextLinenull1642   function NextLine() : Boolean;
1643   var
1644     locOldPos : Integer;
1645     locOldPointer : PAnsiChar;
1646   begin
1647     Result := False;
1648     locOldPointer := p;
1649     locOldPos := bufferPos;
1650     while (bufferPos < bufferLength) and (p^ <> #10) do begin
1651       Inc(p);
1652       Inc(bufferPos);
1653     end;
1654     if (locOldPos < bufferPos) then begin
1655       lineLength := (bufferPos - locOldPos);
1656       Move(locOldPointer^,line[1],lineLength);
1657       if (p^ = #10) then begin
1658         Dec(lineLength);
1659         Inc(p);
1660         Inc(bufferPos);
1661       end;
1662       if (lineLength > 7) then begin
1663         linePos := 1;
1664         Result := True;
1665       end;
1666     end;
1667   end;
1668 
NextTokennull1669   function NextToken() : ansistring;
1670   var
1671     k : Integer;
1672   begin
1673     k := linePos;
1674     while (linePos < lineLength) and not(line[linePos] in [';','#']) do
1675       Inc(linePos);
1676     if (linePos > k) then begin
1677       if (line[linePos] in [';','#']) then
1678         Result := Copy(line,k,(linePos-k))
1679       else
1680         Result := Copy(line,k,(linePos-k+1));
1681       Result := Trim(Result);
1682     end else begin
1683       Result := '';
1684     end;
1685     Inc(linePos);
1686   end;
1687 
ParseCanonicalDecompositionnull1688   function ParseCanonicalDecomposition(AStr : ansistring) : TUnicodeCodePointArray;
1689   var
1690     locStr, ks : ansistring;
1691     k0,k : Integer;
1692   begin
1693     SetLength(Result,0);
1694     locStr := UpperCase(Trim(AStr));
1695     if (locStr = '') or (locStr[1] = '<') then
1696       exit;
1697     k0 := 1;
1698     k := 1;
1699     while (k <= Length(locStr)) do begin
1700       while (k <= Length(locStr)) and (locStr[k] in ['0'..'9','A'..'F']) do
1701         inc(k);
1702       ks := Trim(Copy(locStr,k0,k-k0));
1703       SetLength(Result,Length(Result)+1);
1704       Result[Length(Result)-1] := StringToCodePoint(ks);
1705       Inc(k);
1706       k0 := k;
1707     end;
1708   end;
1709 
1710   procedure ParseLine();
1711   var
1712     locCP : TUnicodeCodePoint;
1713     locProp : TPropRec;
1714     locData : TDataLineRec;
1715     s : ansistring;
1716     locRangeStart, locRangeEnd : Boolean;
1717     k : Integer;
1718     locDecompItem : TUnicodeCodePointArray;
1719     numVal : TNumericValue;
1720   begin
1721     FillChar(locData,SizeOf(locData),#0);
1722     FillChar(locProp,SizeOf(locProp),#0);
1723     locCP := StrToInt('$'+NextToken());
1724     s := NextToken();
1725     locRangeStart := AnsiEndsText(', First>',s);
1726     if locRangeStart then
1727       locRangeEnd := False
1728     else
1729       locRangeEnd := AnsiEndsText(', Last>',s);
1730     if locRangeStart then begin
1731       locData.LineType := 1;
1732       locData.StartCodePoint := locCP;
1733     end else if locRangeEnd then begin
1734       ADataLineList[actualDataLen - 1].EndCodePoint := locCP;
1735       exit;
1736       //locData.EndCodePoint := locCP;
1737     end else begin
1738       locData.LineType := 0;
1739       locData.CodePoint := locCP;
1740     end;
1741     locProp.Category := StrToCategory(NextToken());
1742     locProp.CCC := StrToInt(NextToken());//Canonical_Combining_Class
1743     NextToken();//Bidi_Class
1744     s := NextToken();//Decomposition_Type
1745     locDecompItem := ParseCanonicalDecomposition(s);
1746     if (Length(locDecompItem) = 0) then
1747       locProp.DecompositionID := -1
1748     else begin
1749       locProp.DecompositionID := IndexOf(locDecompItem,ADecomposition);
1750       if (locProp.DecompositionID = -1) then begin
1751         k := Length(ADecomposition);
1752         locProp.DecompositionID := k;
1753         SetLength(ADecomposition,k+1);
1754         ADecomposition[k] := locDecompItem;
1755       end;
1756     end;
1757 
1758     numVal := EvaluateFloat(NextToken());
1759     if (numVal <> Double(0.0)) then begin
1760       NextToken();
1761       NextToken();
1762     end else begin
1763       s := NextToken();
1764       if (s <> '') then
1765         numVal := EvaluateFloat(s);
1766       s := NextToken();
1767       if (numVal = Double(0.0)) then
1768         numVal := EvaluateFloat(s);
1769     end;
1770     k := IndexOf(numVal,ANumericTable,actualNumLen);
1771     if (k = -1) then begin
1772       if (actualNumLen >= Length(ANumericTable)) then
1773         SetLength(ANumericTable,(actualNumLen*2));
1774       ANumericTable[actualNumLen] := numVal;
1775       k := actualNumLen;
1776       Inc(actualNumLen);
1777     end;
1778     locProp.NumericIndex := k;
1779 
1780     NextToken();//Bidi_Mirroed
1781     NextToken();//Unicode_l_Name
1782     NextToken();//ISO_Comment
1783     locProp.SimpleUpperCase := StringToCodePoint(NextToken());
1784     locProp.SimpleLowerCase := StringToCodePoint(NextToken());
1785     NextToken();//Simple_Title_Case_Mapping
1786     locProp.WhiteSpace := IsWhiteSpace(locCP,AWhiteSpaces);
1787     locProp.HangulSyllable := IsHangulSyllable(locCP,AHangulList);
1788     k := IndexOf(locProp,APropList,actualPropLen);
1789     if (k = -1) then begin
1790       k := actualPropLen;
1791       locProp.PropID := k{ + 1};
1792       APropList[k] := locProp;
1793       Inc(actualPropLen);
1794     end;
1795     locData.PropID := k;
1796     if (actualDataLen >= Length(ADataLineList)) then
1797       SetLength(ADataLineList,(2*Length(ADataLineList)));
1798     ADataLineList[actualDataLen] := locData;
1799     Inc(actualDataLen);
1800   end;
1801 
1802   procedure Prepare();
1803   var
1804     r : TPropRec;
1805   begin
1806     SetLength(APropList,PROP_LENGTH);
1807     actualPropLen := 0;
1808     SetLength(ADataLineList,DATA_LENGTH);
1809     actualDataLen := 0;
1810     bufferLength := ADataAStream.Size;
1811     bufferPos := 0;
1812     p := ADataAStream.Memory;
1813     lineLength := 0;
1814     SetLength(line,LINE_LENGTH);
1815     SetLength(ANumericTable,500);
1816     actualNumLen := 0;
1817 
1818     FillChar(r,SizeOf(r),#0);
1819     r.PropID := 0;
1820     r.Category := ucUnassigned;
1821     r.DecompositionID := -1;
1822     r.NumericIndex := 0;
1823     APropList[0] := r;
1824     Inc(actualPropLen);
1825     ANumericTable[0] := 0;
1826     Inc(actualNumLen);
1827   end;
1828 
1829 begin
1830   Prepare();
1831   while NextLine() do
1832     ParseLine();
1833   SetLength(APropList,actualPropLen);
1834   SetLength(ADataLineList,actualDataLen);
1835   SetLength(ANumericTable,actualNumLen);
1836 end;
1837 
GetPropIDnull1838 function GetPropID(
1839         ACodePoint    : TUnicodeCodePoint;
1840   const ADataLineList : TDataLineRecArray
1841 ) : Cardinal;
1842 var
1843   i : Integer;
1844   p : PDataLineRec;
1845 begin
1846   Result := 0;
1847   p := @ADataLineList[Low(ADataLineList)];
1848   for i := Low(ADataLineList) to High(ADataLineList) do begin
1849     if (p^.LineType = 0) then begin
1850       if (p^.CodePoint = ACodePoint) then begin
1851         Result := p^.PropID;
1852         Break;
1853       end;
1854     end else begin
1855       if (p^.StartCodePoint <= ACodePoint) and (p^.EndCodePoint >= ACodePoint) then begin
1856         Result := p^.PropID;
1857         Break;
1858       end;
1859     end;
1860     Inc(p);
1861   end;
1862 end;
1863 
1864 procedure MakeDecomposition(
1865   const ARawData : TDecompositionArray;
1866   var   ABook    : TDecompositionBook
1867 );
1868 var
1869   i, c, locPos : Integer;
1870   locItem : TUnicodeCodePointArray;
1871 begin
1872   c := 0;
1873   for i := Low(ARawData) to High(ARawData) do
1874     c := c + Length(ARawData[i]);
1875   SetLength(ABook.CodePoints,c);
1876   SetLength(ABook.Index,Length(ARawData));
1877   locPos := 0;
1878   for i := Low(ARawData) to High(ARawData) do begin
1879     locItem := ARawData[i];
1880     ABook.Index[i].StartPosition := locPos;
1881     ABook.Index[i].Length := Length(locItem);
1882     Move(locItem[0],ABook.CodePoints[locPos],(Length(locItem) * SizeOf(TUnicodeCodePoint)));
1883     locPos := locPos + Length(locItem);
1884   end;
1885 end;
1886 
1887 type
1888   PBmpSecondTableItem = ^TBmpSecondTableItem;
IndexOfnull1889 function IndexOf(
1890   const AItem  : PBmpSecondTableItem;
1891   const ATable : TBmpSecondTable;
1892   const ATableActualLength : Integer
1893 ) : Integer;overload;
1894 var
1895   i : Integer;
1896   p : PBmpSecondTableItem;
1897 begin
1898   Result := -1;
1899   if (ATableActualLength > 0) then begin
1900     p := @ATable[0];
1901     for i := 0 to ATableActualLength - 1 do begin
1902       if CompareMem(p,AItem,SizeOf(TBmpSecondTableItem)) then begin
1903         Result := i;
1904         Break;
1905       end;
1906       Inc(p);
1907     end;
1908   end;
1909 end;
1910 
1911 procedure MakeBmpTables(
1912   var   AFirstTable   : TBmpFirstTable;
1913   var   ASecondTable  : TBmpSecondTable;
1914   const ADataLineList : TDataLineRecArray
1915 );
1916 var
1917   locLowByte, locHighByte : Byte;
1918   locTableItem : TBmpSecondTableItem;
1919   locCP : TUnicodeCodePoint;
1920   i, locSecondActualLen : Integer;
1921 begin
1922   SetLength(ASecondTable,120);
1923   locSecondActualLen := 0;
1924   for locHighByte := 0 to 255 do begin
1925     FillChar(locTableItem,SizeOf(locTableItem),#0);
1926     for locLowByte := 0 to 255 do begin
1927       locCP := (locHighByte * 256) + locLowByte;
1928       locTableItem[locLowByte] := GetPropID(locCP,ADataLineList)// - 1;
1929     end;
1930     i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen);
1931     if (i = -1) then begin
1932       if (locSecondActualLen = Length(ASecondTable)) then
1933         SetLength(ASecondTable,locSecondActualLen + 50);
1934       i := locSecondActualLen;
1935       ASecondTable[i] := locTableItem;
1936       Inc(locSecondActualLen);
1937     end;
1938     AFirstTable[locHighByte] := i;
1939   end;
1940   SetLength(ASecondTable,locSecondActualLen);
1941 end;
1942 
1943 type
1944   P3lvlBmp3TableItem = ^T3lvlBmp3TableItem;
IndexOfnull1945 function IndexOf(
1946   const AItem  : P3lvlBmp3TableItem;
1947   const ATable : T3lvlBmp3Table;
1948   const ATableActualLength : Integer
1949 ) : Integer;overload;
1950 var
1951   i : Integer;
1952   p : P3lvlBmp3TableItem;
1953 begin
1954   Result := -1;
1955   if (ATableActualLength > 0) then begin
1956     p := @ATable[0];
1957     for i := 0 to ATableActualLength - 1 do begin
1958       if CompareMem(p,AItem,SizeOf(T3lvlBmp3TableItem)) then begin
1959         Result := i;
1960         Break;
1961       end;
1962       Inc(p);
1963     end;
1964   end;
1965 end;
1966 
1967 type
1968   P3lvlBmp2TableItem = ^T3lvlBmp2TableItem;
IndexOfnull1969 function IndexOf(
1970   const AItem  : P3lvlBmp2TableItem;
1971   const ATable : T3lvlBmp2Table
1972 ) : Integer;overload;
1973 var
1974   i : Integer;
1975   p : P3lvlBmp2TableItem;
1976 begin
1977   Result := -1;
1978   if (Length(ATable) > 0) then begin
1979     p := @ATable[0];
1980     for i := 0 to Length(ATable) - 1 do begin
1981       if CompareMem(p,AItem,SizeOf(T3lvlBmp2TableItem)) then begin
1982         Result := i;
1983         Break;
1984       end;
1985       Inc(p);
1986     end;
1987   end;
1988 end;
1989 procedure MakeBmpTables3Levels(
1990   var   AFirstTable   : T3lvlBmp1Table;
1991   var   ASecondTable  : T3lvlBmp2Table;
1992   var   AThirdTable  : T3lvlBmp3Table;
1993   const ADataLineList : TDataLineRecArray
1994 );
1995 var
1996   locLowByte0, locLowByte1, locHighByte : Byte;
1997   locTableItem2 : T3lvlBmp2TableItem;
1998   locTableItem3 : T3lvlBmp3TableItem;
1999   locCP : TUnicodeCodePoint;
2000   i, locThirdActualLen : Integer;
2001 begin
2002   SetLength(AThirdTable,120);
2003   locThirdActualLen := 0;
2004   for locHighByte := 0 to 255 do begin
2005     FillChar(locTableItem2,SizeOf(locTableItem2),#0);
2006     for locLowByte0 := 0 to 15 do begin
2007       FillChar(locTableItem3,SizeOf(locTableItem3),#0);
2008       for locLowByte1 := 0 to 15 do begin
2009         locCP := (locHighByte * 256) + (locLowByte0*16) + locLowByte1;
2010         locTableItem3[locLowByte1] := GetPropID(locCP,ADataLineList);
2011       end;
2012       i := IndexOf(@locTableItem3,AThirdTable,locThirdActualLen);
2013       if (i = -1) then begin
2014         if (locThirdActualLen = Length(AThirdTable)) then
2015           SetLength(AThirdTable,locThirdActualLen + 50);
2016         i := locThirdActualLen;
2017         AThirdTable[i] := locTableItem3;
2018         Inc(locThirdActualLen);
2019       end;
2020       locTableItem2[locLowByte0] := i;
2021     end;
2022     i := IndexOf(@locTableItem2,ASecondTable);
2023     if (i = -1) then begin
2024       i := Length(ASecondTable);
2025       SetLength(ASecondTable,(i + 1));
2026       ASecondTable[i] := locTableItem2;
2027     end;
2028     AFirstTable[locHighByte] := i;
2029   end;
2030   SetLength(AThirdTable,locThirdActualLen);
2031 end;
2032 
2033 procedure GenerateLicenceText(ADest : TStream);
2034 var
2035   s : ansistring;
2036 begin
2037   s := SLicenseText + sLineBreak + sLineBreak;
2038   ADest.Write(s[1],Length(s));
2039 end;
2040 
2041 procedure GenerateBmpTables(
2042         ADest : TStream;
2043   var   AFirstTable   : TBmpFirstTable;
2044   var   ASecondTable  : TBmpSecondTable
2045 );
2046   procedure AddLine(const ALine : ansistring);
2047   var
2048     buffer : ansistring;
2049   begin
2050     buffer := ALine + sLineBreak;
2051     ADest.Write(buffer[1],Length(buffer));
2052   end;
2053 
2054 var
2055   i, j, c : Integer;
2056   locLine : string;
2057 begin
2058   AddLine('const');
2059   AddLine('  UC_TABLE_1 : array[0..255] of Byte = (');
2060   locLine := '';
2061   for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
2062     locLine := locLine + IntToStr(AFirstTable[i]) + ',';
2063     if (((i+1) mod 16) = 0) then begin
2064       locLine := '    ' + locLine;
2065       AddLine(locLine);
2066       locLine := '';
2067     end;
2068   end;
2069   locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
2070   locLine := '    ' + locLine;
2071   AddLine(locLine);
2072   AddLine('  );' + sLineBreak);
2073 
2074   AddLine('  UC_TABLE_2 : array[0..(256*' + IntToStr(Length(ASecondTable)) +'-1)] of Word =(');
2075   c := High(ASecondTable);
2076   for i := Low(ASecondTable) to c do begin
2077     locLine := '';
2078     for j := Low(TBmpSecondTableItem) to High(TBmpSecondTableItem) do begin
2079       locLine := locLine + IntToStr(ASecondTable[i][j]) + ',';
2080       if (((j+1) mod 16) = 0) then begin
2081         if (i = c) and (j = 255) then
2082           Delete(locLine,Length(locLine),1);
2083         locLine := '    ' + locLine;
2084         AddLine(locLine);
2085         locLine := '';
2086       end;
2087     end;
2088   end;
2089   AddLine('  );' + sLineBreak);
2090 end;
2091 
2092 //----------------------------------
2093 procedure Generate3lvlBmpTables(
2094         ADest : TStream;
2095   var   AFirstTable   : T3lvlBmp1Table;
2096   var   ASecondTable  : T3lvlBmp2Table;
2097   var   AThirdTable   : T3lvlBmp3Table
2098 );
2099 
2100   procedure AddLine(const ALine : ansistring);
2101   var
2102     buffer : ansistring;
2103   begin
2104     buffer := ALine + sLineBreak;
2105     ADest.Write(buffer[1],Length(buffer));
2106   end;
2107 
2108 var
2109   i, j, c : Integer;
2110   locLine : string;
2111 begin
2112   AddLine('const');
2113   AddLine('  UC_TABLE_1 : array[0..255] of Byte = (');
2114   locLine := '';
2115   for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
2116     locLine := locLine + IntToStr(AFirstTable[i]) + ',';
2117     if (((i+1) mod 16) = 0) then begin
2118       locLine := '    ' + locLine;
2119       AddLine(locLine);
2120       locLine := '';
2121     end;
2122   end;
2123   locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
2124   locLine := '    ' + locLine;
2125   AddLine(locLine);
2126   AddLine('  );' + sLineBreak);
2127 
2128   AddLine('  UC_TABLE_2 : array[0..' + IntToStr(Length(ASecondTable)-1) +'] of array[0..15] of Word = (');
2129   c := High(ASecondTable);
2130   for i := Low(ASecondTable) to c do begin
2131     locLine := '(';
2132     for j := Low(T3lvlBmp2TableItem) to High(T3lvlBmp2TableItem) do
2133       locLine := locLine + IntToStr(ASecondTable[i][j]) + ',';
2134     Delete(locLine,Length(locLine),1);
2135     locLine := '    ' + locLine + ')';
2136     if (i < c) then
2137       locLine := locLine + ',';
2138     AddLine(locLine);
2139   end;
2140   AddLine('  );' + sLineBreak);
2141 
2142   AddLine('  UC_TABLE_3 : array[0..' + IntToStr(Length(AThirdTable)-1) +'] of array[0..15] of Word = (');
2143   c := High(AThirdTable);
2144   for i := Low(AThirdTable) to c do begin
2145     locLine := '(';
2146     for j := Low(T3lvlBmp3TableItem) to High(T3lvlBmp3TableItem) do
2147       locLine := locLine + IntToStr(AThirdTable[i][j]) + ',';
2148     Delete(locLine,Length(locLine),1);
2149     locLine := '    ' + locLine + ')';
2150     if (i < c) then
2151       locLine := locLine + ',';
2152     AddLine(locLine);
2153   end;
2154   AddLine('  );' + sLineBreak);
2155 end;
2156 
UInt24ToStrnull2157 function UInt24ToStr(const AValue : UInt24; const AEndian : TEndianKind): string;inline;
2158 begin
2159   if (AEndian = ekBig) then
2160     Result := Format(
2161                 '(byte2 : $%s; byte1 : $%s; byte0 : $%s;)',
2162                 [ IntToHex(AValue.byte2,2), IntToHex(AValue.byte1,2),
2163                   IntToHex(AValue.byte0,2)
2164                 ]
2165               )
2166   else
2167     Result := Format(
2168                 '(byte0 : $%s; byte1 : $%s; byte2 : $%s;)',
2169                 [ IntToHex(AValue.byte0,2), IntToHex(AValue.byte1,2),
2170                   IntToHex(AValue.byte2,2)
2171                 ]
2172               );
2173 end;
2174 
2175 procedure GeneratePropTable(
2176         ADest     : TStream;
2177   const APropList : TPropRecArray;
2178   const AEndian   : TEndianKind
2179 );
2180 
2181   procedure AddLine(const ALine : ansistring);
2182   var
2183     buffer : ansistring;
2184   begin
2185     buffer := ALine + sLineBreak;
2186     ADest.Write(buffer[1],Length(buffer));
2187   end;
2188 
2189 var
2190   i : Integer;
2191   locLine : string;
2192   p : PPropRec;
2193 begin
2194   AddLine('');
2195   AddLine('const');
2196   AddLine('  UC_PROP_REC_COUNT = ' + IntToStr(Length(APropList)) + ';');
2197   AddLine('  UC_PROP_ARRAY : array[0..(UC_PROP_REC_COUNT-1)] of TUC_Prop = (');
2198   p := @APropList[0];
2199   for i := Low(APropList) to High(APropList) - 1 do begin
2200     locLine := '    (CategoryData : ' + IntToStr(p^.CategoryData) + ';' +
2201                ' CCC : ' + IntToStr(p^.CCC) + ';' +
2202                ' NumericIndex : ' + IntToStr(p^.NumericIndex) + ';' +
2203                ' SimpleUpperCase : ' + UInt24ToStr(p^.SimpleUpperCase,AEndian) + ';' +
2204                ' SimpleLowerCase : ' + UInt24ToStr(p^.SimpleLowerCase,AEndian) + ';' +
2205                ' DecompositionID : ' + IntToStr(p^.DecompositionID) + '),';
2206     AddLine(locLine);
2207     Inc(p);
2208   end;
2209   locLine := //'    (Category : TUnicodeCategory.' + GetEnumName(pti,Ord(p^.Category)) + ';' +
2210              '    (CategoryData : ' + IntToStr(p^.CategoryData) + ';' +
2211              ' CCC : ' + IntToStr(p^.CCC) + ';' +
2212              ' NumericIndex : ' + IntToStr(p^.NumericIndex) + ';' +
2213              ' SimpleUpperCase : ' + UInt24ToStr(p^.SimpleUpperCase,AEndian) + ';' +
2214              ' SimpleLowerCase : ' + UInt24ToStr(p^.SimpleLowerCase,AEndian) + ';' +
2215              ' DecompositionID : ' + IntToStr(p^.DecompositionID) + ')';
2216   AddLine(locLine);
2217   AddLine('  );' + sLineBreak);
2218 end;
2219 
2220 procedure GenerateNumericTable(
2221         ADest         : TStream;
2222   const ANumList      : TNumericValueArray;
2223   const ACompleteUnit : Boolean
2224 );
2225 
2226   procedure AddLine(const ALine : ansistring);
2227   var
2228     buffer : ansistring;
2229   begin
2230     buffer := ALine + sLineBreak;
2231     ADest.Write(buffer[1],Length(buffer));
2232   end;
2233 
2234 var
2235   i : Integer;
2236   locLine : string;
2237   p : ^TNumericValue;
2238 begin
2239   if ACompleteUnit then begin
2240     GenerateLicenceText(ADest);
2241     AddLine('unit unicodenumtable;');
2242     AddLine('interface');
2243     AddLine('');
2244   end;
2245   AddLine('');
2246   AddLine('const');
2247   AddLine('  UC_NUMERIC_COUNT = ' + IntToStr(Length(ANumList)) + ';');
2248   AddLine('  UC_NUMERIC_ARRAY : array[0..(UC_NUMERIC_COUNT-1)] of Double = (');
2249   locLine := '';
2250   p := @ANumList[0];
2251   for i := Low(ANumList) to High(ANumList) - 1 do begin
2252     locLine := locLine + FloatToStr(p^,FS) + ' ,';
2253     if (i > 0) and ((i mod 8) = 0) then begin
2254       AddLine('    ' + locLine);
2255       locLine := '';
2256     end;
2257     Inc(p);
2258   end;
2259   locLine := locLine + FloatToStr(p^,FS);
2260   AddLine('    ' + locLine);
2261   AddLine('  );' + sLineBreak);
2262   if ACompleteUnit then begin
2263     AddLine('');
2264     AddLine('implementation');
2265     AddLine('');
2266     AddLine('end.');
2267   end;
2268 end;
2269 
2270 procedure GenerateDecompositionBookTable(
2271         ADest   : TStream;
2272   const ABook   : TDecompositionBook;
2273   const AEndian : TEndianKind
2274 );
2275 
2276   procedure AddLine(const ALine : ansistring);
2277   var
2278     buffer : ansistring;
2279   begin
2280     buffer := ALine + sLineBreak;
2281     ADest.Write(buffer[1],Length(buffer));
2282   end;
2283 
2284 var
2285   i, k : Integer;
2286   p : ^TDecompositionIndexRec;
2287   cp : ^TUnicodeCodePoint;
2288   cp24 : UInt24;
2289   locLine : string;
2290 begin
2291   AddLine('const');
2292   AddLine('  UC_DEC_BOOK_INDEX_LENGTH = ' + IntToStr(Length(ABook.Index)) + ';');
2293   AddLine('  UC_DEC_BOOK_DATA_LENGTH = ' + IntToStr(Length(ABook.CodePoints)) + ';');
2294   AddLine('type');
2295   AddLine('  TDecompositionIndexRec = packed record');
2296   AddLine('    StartPosition : Word;');
2297   AddLine('    Length        : Byte;');
2298   AddLine('  end;');
2299   AddLine('  TDecompositionBookRec = packed record');
2300   AddLine('    Index      : array[0..(UC_DEC_BOOK_INDEX_LENGTH-1)] of TDecompositionIndexRec;');
2301   AddLine('    CodePoints : array[0..(UC_DEC_BOOK_DATA_LENGTH-1)] of UInt24;');
2302   AddLine('  end;');
2303   AddLine('const');
2304   AddLine('  UC_DEC_BOOK_DATA : TDecompositionBookRec = (');
2305   p := @ABook.Index[0];
2306   AddLine('    Index : (// Index BEGIN');
2307   k := 0;
2308   locLine := '      ';
2309   for i := Low(ABook.Index) to High(ABook.Index) - 1 do begin
2310     locLine := locLine + '(StartPosition : ' + IntToStr(p^.StartPosition) + ';' +
2311                ' Length : ' + IntToStr(p^.Length)  + '), ';
2312     k := k + 1;
2313     if (k >= 2) then begin
2314       AddLine(locLine);
2315       locLine := '      ';
2316       k := 0;
2317     end;
2318     Inc(p);
2319   end;
2320   locLine := locLine + '(StartPosition : ' + IntToStr(p^.StartPosition) + ';' +
2321              ' Length : ' + IntToStr(p^.Length)  + ')';
2322   AddLine(locLine);
2323   AddLine('    ); // Index END');
2324 
2325   cp := @ABook.CodePoints[0];
2326   AddLine('    CodePoints : (// CodePoints BEGIN');
2327   k := 0;
2328   locLine := '      ';
2329   for i := Low(ABook.CodePoints) to High(ABook.CodePoints) - 1 do begin
2330     cp24 := cp^;
2331     locLine := locLine + Format('%s,',[UInt24ToStr(cp24,AEndian)]);
2332     Inc(k);
2333     if (k >= 16) then begin
2334       AddLine(locLine);
2335       k := 0;
2336       locLine := '      ';
2337     end;
2338     Inc(cp);
2339   end;
2340   cp24 := cp^;
2341   locLine := locLine + Format('%s',[UInt24ToStr(cp24,AEndian)]);
2342   AddLine(locLine);
2343   AddLine('    ); // CodePoints END');
2344   AddLine('  );' + sLineBreak);
2345 end;
2346 
2347 procedure GenerateOutBmpTable(
2348         ADest     : TStream;
2349   const AList : TDataLineRecArray
2350 );
2351   procedure AddLine(const ALine : ansistring);
2352   var
2353     buffer : ansistring;
2354   begin
2355     buffer := ALine + sLineBreak;
2356     ADest.Write(buffer[1],Length(buffer));
2357   end;
2358 
2359 var
2360   i, j : Integer;
2361   locLine : string;
2362   p : PDataLineRec;
2363 begin
2364   AddLine('');
2365   //AddLine('  UC_PROP_REC_COUNT = ' + IntToStr(Length(APropList)) + ';');
2366   //AddLine('  UC_PROP_ARRAY : array[0..(UC_PROP_REC_COUNT-1)] of TUC_Prop = (');
2367   j := -1;
2368   p := @AList[0];
2369   for i := 0 to Length(AList) - 1 do begin
2370     if ((p^.LineType = 0) and (p^.CodePoint >$FFFF)) or
2371        (p^.StartCodePoint > $FFFF)
2372     then begin
2373       j := i;
2374       Break;
2375     end;
2376     Inc(p);
2377   end;
2378   if (j < 0) then
2379     exit;
2380 
2381   for i := j to Length(AList) - 2 do begin
2382     locLine := '    (PropID : ' + IntToStr(p^.PropID) + ';' +
2383                ' CodePoint : ' + IntToStr(p^.CodePoint) + ';' +
2384                ' RangeEnd : ' + IntToStr(p^.EndCodePoint) +  '),' ;
2385     AddLine(locLine);
2386     Inc(p);
2387   end;
2388   locLine := '    (PropID : ' + IntToStr(p^.PropID) + ';' +
2389              ' CodePoint : ' + IntToStr(p^.CodePoint) + ';' +
2390              ' RangeEnd : ' + IntToStr(p^.EndCodePoint) +  ')' ;
2391   AddLine(locLine);
2392   AddLine('  );' + sLineBreak);
2393 end;
2394 
Compressnull2395 function Compress(const AData : TDataLineRecArray) : TDataLineRecArray;
2396 var
2397   k, i, locResLen : Integer;
2398   q, p, pr : PDataLineRec;
2399   k_end : TUnicodeCodePoint;
2400 begin
2401   locResLen := 1;
2402   SetLength(Result,Length(AData));
2403   FillChar(Result[0],Length(Result),#0);
2404   Result[0] := AData[0];
2405   q := @AData[0];
2406   k := 0;
2407   while (k < Length(AData)) do begin
2408     if (q^.LineType = 0) then
2409       k_end := q^.CodePoint
2410     else
2411       k_end := q^.EndCodePoint;
2412     if ((k+1) = Length(AData)) then begin
2413       i := k;
2414     end else begin
2415       p := @AData[k+1];
2416       i := k +1;
2417       while (i < (Length(AData) {- 1})) do begin
2418         if (p^.PropID <> q^.PropID) then begin
2419           i := i - 1;
2420           Break;
2421         end;
2422         if (p^.LineType = 0) then begin
2423           if (p^.CodePoint <> (k_end + 1)) then begin
2424             i := i - 1;
2425             Break;
2426           end;
2427           Inc(k_end);
2428         end else begin
2429           if (p^.StartCodePoint <> (k_end + 1)) then begin
2430             i := i - 1;
2431             Break;
2432           end;
2433           k_end := p^.EndCodePoint;
2434         end;
2435         Inc(i);
2436         Inc(p);
2437       end;
2438     end;
2439     {if (i = k) then begin
2440       Result[locResLen] := q^;
2441       Inc(locResLen);
2442     end else begin }
2443       p := @AData[i];
2444       pr := @Result[locResLen];
2445       pr^.PropID := q^.PropID;
2446       if (q^.LineType = 0) then
2447         pr^.StartCodePoint := q^.CodePoint
2448       else
2449         pr^.StartCodePoint := q^.StartCodePoint;
2450       pr^.LineType := 1;
2451       if (p^.LineType = 0) then
2452         pr^.EndCodePoint := p^.CodePoint
2453       else
2454         pr^.EndCodePoint := p^.EndCodePoint;
2455       Inc(locResLen);
2456     //end;
2457     k := i + 1;
2458     if (k = Length(AData)) then
2459       Break;
2460     q := @AData[k];
2461   end;
2462   SetLength(Result,locResLen);
2463 end;
2464 
2465 procedure ParseUCAFile(
2466       ADataAStream : TMemoryStream;
2467   var ABook        : TUCA_DataBook
2468 );
2469 const
2470   LINE_LENGTH        = 1024;
2471   DATA_LENGTH        = 25000;
2472 var
2473   p : PAnsiChar;
2474   actualDataLen : Integer;
2475   bufferLength, bufferPos, lineLength, linePos : Integer;
2476   line : ansistring;
2477 
NextLinenull2478   function NextLine() : Boolean;
2479   var
2480     locOldPos : Integer;
2481     locOldPointer : PAnsiChar;
2482   begin
2483     Result := False;
2484     locOldPointer := p;
2485     locOldPos := bufferPos;
2486     while (bufferPos < bufferLength) and (p^ <> #10) do begin
2487       Inc(p);
2488       Inc(bufferPos);
2489     end;
2490     if (locOldPos = bufferPos) and (p^ = #10) then begin
2491       lineLength := 0;
2492       Inc(p);
2493       Inc(bufferPos);
2494       linePos := 1;
2495       Result := True;
2496     end else  if (locOldPos < bufferPos) then begin
2497       lineLength := (bufferPos - locOldPos) + 1;
2498       Move(locOldPointer^,line[1],lineLength);
2499       if (p^ = #10) then begin
2500         Dec(lineLength);
2501         Inc(p);
2502         Inc(bufferPos);
2503       end;
2504       linePos := 1;
2505       Result := True;
2506     end;
2507   end;
2508 
2509   procedure SkipSpace();
2510   begin
2511     while (linePos < lineLength) and (line[linePos] in [' ',#9]) do
2512       Inc(linePos);
2513   end;
2514 
NextTokennull2515   function NextToken() : ansistring;
2516   const C_SEPARATORS  = [';','#','.','[',']','*','@'];
2517   var
2518     k : Integer;
2519   begin
2520     SkipSpace();
2521     k := linePos;
2522     if (linePos <= lineLength) and (line[linePos] in C_SEPARATORS) then begin
2523       Result := line[linePos];
2524       Inc(linePos);
2525       exit;
2526     end;
2527     while (linePos <= lineLength) and not(line[linePos] in (C_SEPARATORS+[' '])) do
2528       Inc(linePos);
2529     if (linePos > k) then begin
2530       if (line[Min(linePos,lineLength)] in C_SEPARATORS) then
2531         Result := Copy(line,k,(linePos-k))
2532       else
2533         Result := Copy(line,k,(linePos-k+1));
2534       Result := Trim(Result);
2535     end else begin
2536       Result := '';
2537     end;
2538   end;
2539 
2540   procedure CheckToken(const AToken : string);
2541   var
2542     a, b : string;
2543   begin
2544     a := LowerCase(Trim(AToken));
2545     b := LowerCase(Trim(NextToken()));
2546     if (a <> b) then
2547       raise Exception.CreateFmt('Expected token "%s" but found "%s", Line = "%s".',[a,b,line]);
2548   end;
2549 
ReadWeightBlocknull2550   function ReadWeightBlock(var ADest : TUCA_WeightRec) : Boolean;
2551   var
2552     s :AnsiString;
2553     k : Integer;
2554   begin
2555     Result := False;
2556     s := NextToken();
2557     if (s <> '[') then
2558       exit;
2559     s := NextToken();
2560     if (s = '.') then
2561       ADest.Variable := False
2562     else begin
2563       if (s <> '*') then
2564         raise Exception.CreateFmt('Expected "%s" but found "%s".',['*',s]);
2565       ADest.Variable := True;
2566     end;
2567     ADest.Weights[0] := StrToInt('$'+NextToken());
2568     for k := 1 to WEIGHT_LEVEL_COUNT-1 do begin
2569       CheckToken('.');
2570       ADest.Weights[k] := StrToInt('$'+NextToken());
2571     end;
2572     CheckToken(']');
2573     Result := True;
2574   end;
2575 
2576   procedure ParseHeaderVar();
2577   var
2578     s,ss : string;
2579     k : Integer;
2580   begin
2581     s := NextToken();
2582     if (s = 'version') then begin
2583       ss := '';
2584       while True do begin
2585         s := NextToken();
2586         if (s = '') then
2587           Break;
2588         ss := ss + s;
2589       end;
2590       ABook.Version := ss;
2591     end else if (s = 'variable') then begin
2592       if (s = 'blanked') then
2593         ABook.VariableWeight := ucaBlanked
2594       else if (s = 'non-ignorable') then
2595         ABook.VariableWeight := ucaNonIgnorable
2596       else if (s = 'shifted') then
2597         ABook.VariableWeight := ucaShifted
2598       else if (s = 'shift-trimmed') then
2599         ABook.VariableWeight := ucaShiftedTrimmed
2600       else if (s = 'ignoresp') then
2601         ABook.VariableWeight := ucaIgnoreSP
2602       else
2603         raise Exception.CreateFmt('Unknown "@variable" type : "%s".',[s]);
2604     end else if (s = 'backwards') or (s = 'forwards') then begin
2605       ss := s;
2606       s := NextToken();
2607       k := StrToInt(s);
2608       if (k < 1) or (k > 4) then
2609         raise Exception.CreateFmt('Invalid "%s" position : %d.',[ss,s]);
2610       ABook.Backwards[k] := (s = 'backwards');
2611     end;
2612   end;
2613 
2614   procedure ParseLine();
2615   var
2616     locData : ^TUCA_LineRec;
2617     s : ansistring;
2618     kc : Integer;
2619   begin
2620     if (Length(ABook.Lines) <= actualDataLen) then
2621       SetLength(ABook.Lines,Length(ABook.Lines)*2);
2622     locData := @ABook.Lines[actualDataLen];
2623     s := NextToken();
2624     if (s = '') or (s[1] = '#') then
2625       exit;
2626     if (s[1] = '@') then begin
2627       ParseHeaderVar();
2628       exit;
2629     end;
2630     SetLength(locData^.CodePoints,10);
2631     locData^.CodePoints[0] := StrToInt('$'+s);
2632     kc := 1;
2633     while True do begin
2634       s := Trim(NextToken());
2635       if (s = '') then
2636         exit;
2637       if (s = ';') then
2638         Break;
2639       locData^.CodePoints[kc] := StrToInt('$'+s);
2640       Inc(kc);
2641     end;
2642     if (kc = 0) then
2643       exit;
2644     SetLength(locData^.CodePoints,kc);
2645     SetLength(locData^.Weights,24);
2646     kc := 0;
2647     while ReadWeightBlock(locData^.Weights[kc]) do begin
2648       Inc(kc);
2649     end;
2650     SetLength(locData^.Weights,kc);
2651     Inc(actualDataLen);
2652   end;
2653 
2654   procedure Prepare();
2655   var
2656     k : Integer;
2657   begin
2658     ABook.VariableWeight := ucaShifted;
2659     for k := Low(ABook.Backwards) to High(ABook.Backwards) do
2660       ABook.Backwards[k] := False;
2661     SetLength(ABook.Lines,DATA_LENGTH);
2662     actualDataLen := 0;
2663     bufferLength := ADataAStream.Size;
2664     bufferPos := 0;
2665     p := ADataAStream.Memory;
2666     lineLength := 0;
2667     SetLength(line,LINE_LENGTH);
2668   end;
2669 
2670 begin
2671   Prepare();
2672   while NextLine() do
2673     ParseLine();
2674   SetLength(ABook.Lines,actualDataLen);
2675 end;
2676 
2677 procedure Dump(X : array of TUnicodeCodePoint; const ATitle : string = '');
2678 var
2679   i : Integer;
2680 begin
2681   Write(ATitle, ' ');
2682   for i := 0 to Length(X) - 1 do
2683     Write(X[i],' ');
2684   WriteLn();
2685 end;
2686 
IsGreaterThannull2687 function IsGreaterThan(A, B : PUCA_LineRec) : Integer;
2688 var
2689   i, hb : Integer;
2690 begin
2691   if (A=B) then
2692     exit(0);
2693   Result := 1;
2694   hb := Length(B^.CodePoints) - 1;
2695   for i := 0 to Length(A^.CodePoints) - 1 do begin
2696     if (i > hb) then
2697       exit;
2698     if (A^.CodePoints[i] < B^.CodePoints[i]) then
2699       exit(-1);
2700     if (A^.CodePoints[i] > B^.CodePoints[i]) then
2701       exit(1);
2702   end;
2703   if (Length(A^.CodePoints) = Length(B^.CodePoints)) then
2704     exit(0);
2705   exit(-1);
2706 end;
2707 
2708 procedure QuickSort(
2709   var AList : TUCA_DataBookIndex;
2710       L, R  : Longint;
2711       ABook : PUCA_DataBook
2712 );overload;
2713 var
2714   I, J : Longint;
2715   P, Q : Integer;
2716 begin
2717  repeat
2718    I := L;
2719    J := R;
2720    P := AList[ (L + R) div 2 ];
2721    repeat
2722      while IsGreaterThan(@ABook^.Lines[P], @ABook^.Lines[AList[i]]) > 0 do
2723        I := I + 1;
2724      while IsGreaterThan(@ABook^.Lines[P], @ABook^.Lines[AList[J]]) < 0 do
2725        J := J - 1;
2726      If I <= J then
2727      begin
2728        Q := AList[I];
2729        AList[I] := AList[J];
2730        AList[J] := Q;
2731        I := I + 1;
2732        J := J - 1;
2733      end;
2734    until I > J;
2735    // sort the smaller range recursively
2736    // sort the bigger range via the loop
2737    // Reasons: memory usage is O(log(n)) instead of O(n) and loop is faster than recursion
2738    if J - L < R - I then
2739    begin
2740      if L < J then
2741        QuickSort(AList, L, J, ABook);
2742      L := I;
2743    end
2744    else
2745    begin
2746      if I < R then
2747        QuickSort(AList, I, R, ABook);
2748      R := J;
2749    end;
2750  until L >= R;
2751 end;
2752 
CreateIndexnull2753 function CreateIndex(ABook : PUCA_DataBook) : TUCA_DataBookIndex;
2754 var
2755   r : TUCA_DataBookIndex;
2756   i, c : Integer;
2757 begin
2758   c := Length(ABook^.Lines);
2759   SetLength(r,c);
2760   for i := 0 to c - 1 do
2761     r[i] := i;
2762   QuickSort(r,0,c-1,ABook);
2763   Result := r;
2764 end;
2765 
2766 function ConstructContextTree(
2767   const AContext : PUCA_LineContextRec;
2768   var   ADestBuffer;
2769   const ADestBufferLength : Cardinal
2770 ) : PUCA_PropItemContextTreeRec;forward;
ConstructItemnull2771 function ConstructItem(
2772         AItem         : PUCA_PropItemRec;
2773         ACodePoint    : Cardinal;
2774         AValid        : Byte;
2775         AChildCount   : Byte;
2776   const AWeights      : array of TUCA_WeightRec;
2777   const AStoreCP      : Boolean;
2778   const AContext      : PUCA_LineContextRec;
2779   const ADeleted      : Boolean
2780 ) : Cardinal;
2781 var
2782   i : Integer;
2783   p : PUCA_PropItemRec;
2784   pw : PUCA_PropWeights;
2785   pb : PByte;
2786   hasContext : Boolean;
2787   contextTree : PUCA_PropItemContextTreeRec;
2788   wl : Integer;
2789 begin
2790   p := AItem;
2791   p^.Size := 0;
2792   p^.Flags := 0;
2793   p^.WeightLength := 0;
2794   SetBit(p^.Flags,AItem^.FLAG_VALID,(AValid <> 0));
2795   p^.ChildCount := AChildCount;
2796   hasContext := (AContext <> nil) and (Length(AContext^.Data) > 0);
2797   if hasContext then
2798     wl := 0
2799   else
2800     wl := Length(AWeights);
2801   p^.WeightLength := wl;
2802   if (wl = 0) then begin
2803     Result := SizeOf(TUCA_PropItemRec);
2804     if ADeleted then
2805       SetBit(AItem^.Flags,AItem^.FLAG_DELETION,True);
2806   end else begin
2807     Result := SizeOf(TUCA_PropItemRec) + (wl*SizeOf(TUCA_PropWeights));
2808     pb := PByte(PtrUInt(p) + SizeOf(TUCA_PropItemRec));
2809     Unaligned(PWord(pb)^) := AWeights[0].Weights[0];
2810     pb := pb + 2;
2811     if (AWeights[0].Weights[1] > High(Byte)) then begin
2812       Unaligned(PWord(pb)^) := AWeights[0].Weights[1];
2813       pb := pb + 2;
2814     end else begin
2815       SetBit(p^.Flags,p^.FLAG_COMPRESS_WEIGHT_1,True);
2816       pb^ := AWeights[0].Weights[1];
2817       pb := pb + 1;
2818       Result := Result - 1;
2819     end;
2820     if (AWeights[0].Weights[2] > High(Byte)) then begin
2821       Unaligned(PWord(pb)^) := AWeights[0].Weights[2];
2822       pb := pb + 2;
2823     end else begin
2824       SetBit(p^.Flags,p^.FLAG_COMPRESS_WEIGHT_2,True);
2825       pb^ := AWeights[0].Weights[2];
2826       pb := pb + 1;
2827       Result := Result - 1;
2828     end;
2829     pw := PUCA_PropWeights(pb);
2830     for i := 1 to wl - 1 do begin
2831       pw^.Weights[0] := AWeights[i].Weights[0];
2832       pw^.Weights[1] := AWeights[i].Weights[1];
2833       pw^.Weights[2] := AWeights[i].Weights[2];
2834       //pw^.Variable := BoolToByte(AWeights[i].Variable);
2835       Inc(pw);
2836     end;
2837   end;
2838   hasContext := (AContext <> nil) and (Length(AContext^.Data) > 0);
2839   if AStoreCP or hasContext then begin
2840     Unaligned(PUInt24(PtrUInt(AItem)+Result)^) := ACodePoint;
2841     Result := Result + SizeOf(UInt24);
2842     SetBit(AItem^.Flags,AItem^.FLAG_CODEPOINT,True);
2843   end;
2844   if hasContext then begin
2845     contextTree := ConstructContextTree(AContext,Unaligned(Pointer(PtrUInt(AItem)+Result)^),MaxInt);
2846     Result := Result + Cardinal(contextTree^.Size);
2847     SetBit(AItem^.Flags,AItem^.FLAG_CONTEXTUAL,True);
2848   end;
2849   p^.Size := Result;
2850 end;
2851 
CalcCharChildCountnull2852 function CalcCharChildCount(
2853   const ASearchStartPos : Integer;
2854   const ALinePos        : Integer;
2855   const ABookLines      : PUCA_LineRec;
2856   const AMaxLength      : Integer;
2857   const ABookIndex      : TUCA_DataBookIndex;
2858   out   ALineCount      : Word
2859 ) : Byte;
2860 var
2861   locLinePos : Integer;
2862   p : PUCA_LineRec;
2863 
2864   procedure IncP();
2865   begin
2866     Inc(locLinePos);
2867     p := @ABookLines[ABookIndex[locLinePos]];
2868   end;
2869 
2870 var
2871   i, locTargetLen, locTargetBufferSize, r : Integer;
2872   locTarget : array[0..127] of Cardinal;
2873   locLastChar : Cardinal;
2874 begin
2875   locLinePos := ALinePos;
2876   p := @ABookLines[ABookIndex[locLinePos]];
2877   locTargetLen := ASearchStartPos;
2878   locTargetBufferSize := (locTargetLen*SizeOf(Cardinal));
2879   Move(p^.CodePoints[0],locTarget[0],locTargetBufferSize);
2880   if (Length(p^.CodePoints) = ASearchStartPos) then begin
2881     r := 0;
2882     locLastChar := High(Cardinal);
2883   end else begin
2884     r := 1;
2885     locLastChar := p^.CodePoints[ASearchStartPos];
2886   end;
2887   i := 1;
2888   while (i < AMaxLength) do begin
2889     IncP();
2890     if (Length(p^.CodePoints) < locTargetLen) then
2891       Break;
2892     if not CompareMem(@locTarget[0],@p^.CodePoints[0],locTargetBufferSize) then
2893       Break;
2894     if (p^.CodePoints[ASearchStartPos] <> locLastChar) then begin
2895       Inc(r);
2896       locLastChar := p^.CodePoints[ASearchStartPos];
2897     end;
2898     Inc(i);
2899   end;
2900   ALineCount := i;
2901   Result := r;
2902 end;
2903 
BuildTrienull2904 function BuildTrie(
2905   const ALinePos        : Integer;
2906   const ABookLines      : PUCA_LineRec;
2907   const AMaxLength      : Integer;
2908   const ABookIndex      : TUCA_DataBookIndex
2909 ) : PTrieNode;
2910 var
2911   p : PUCA_LineRec;
2912   root : PTrieNode;
2913   ki, k, i : Integer;
2914   key : array of TKeyType;
2915 begin
2916   k := ABookIndex[ALinePos];
2917   p := @ABookLines[k];
2918   if (Length(p^.CodePoints) = 1) then
2919     root := CreateNode(p^.CodePoints[0],k)
2920   else
2921     root := CreateNode(p^.CodePoints[0]);
2922 
2923   for i := ALinePos to ALinePos + AMaxLength - 1 do begin
2924     k := ABookIndex[i];
2925     p := @ABookLines[k];
2926     if (Length(p^.CodePoints) = 1) then begin
2927       InsertWord(root,p^.CodePoints[0],k);
2928     end else begin
2929       SetLength(key,Length(p^.CodePoints));
2930       for ki := 0 to Length(p^.CodePoints) - 1 do
2931         key[ki] := p^.CodePoints[ki];
2932       InsertWord(root,key,k);
2933     end;
2934   end;
2935   Result := root;
2936 end;
2937 
BoolToBytenull2938 function BoolToByte(AValue : Boolean): Byte;inline;
2939 begin
2940   if AValue then
2941     Result := 1
2942   else
2943     Result := 0;
2944 end;
2945 
InternalConstructFromTrienull2946 function InternalConstructFromTrie(
2947   const ATrie  : PTrieNode;
2948   const AItem  : PUCA_PropItemRec;
2949   const ALines : PUCA_LineRec;
2950   const AStoreCp : Boolean
2951 ) : Cardinal;
2952 var
2953   i : Integer;
2954   size : Cardinal;
2955   p : PUCA_PropItemRec;
2956   n : PTrieNode;
2957 begin
2958   if (ATrie = nil) then
2959     exit(0);
2960   p := AItem;
2961   n := ATrie;
2962   if n^.DataNode then
2963     size := ConstructItem(p,n^.Key,1,n^.ChildCount,ALines[n^.Data].Weights,AStoreCp,@(ALines[n^.Data].Context),ALines[n^.Data].Deleted)
2964   else
2965     size := ConstructItem(p,n^.Key,0,n^.ChildCount,[],AStoreCp,nil,False);
2966   Result := size;
2967   if (n^.ChildCount > 0) then begin
2968     for i := 0 to n^.ChildCount - 1 do begin
2969       p := PUCA_PropItemRec(PtrUInt(p) + size);
2970       size := InternalConstructFromTrie(n^.Children[i],p,ALines,True);
2971       Result := Result + size;
2972     end;
2973   end;
2974   AItem^.Size := Result;
2975 end;
2976 
ConstructFromTrienull2977 function ConstructFromTrie(
2978   const ATrie  : PTrieNode;
2979   const AItem  : PUCA_PropItemRec;
2980   const ALines : PUCA_LineRec
2981 ) : Integer;
2982 begin
2983   Result := InternalConstructFromTrie(ATrie,AItem,ALines,False);
2984 end;
2985 
2986 procedure MakeUCA_Props(
2987         ABook         : PUCA_DataBook;
2988   out   AProps        : PUCA_PropBook
2989 );
2990 var
2991   propIndexCount : Integer;
2992 
2993   procedure CapturePropIndex(AItem : PUCA_PropItemRec; ACodePoint : Cardinal);
2994   begin
2995     AProps^.Index[propIndexCount].CodePoint := ACodePoint;
2996     AProps^.Index[propIndexCount].Position := PtrUInt(AItem) - PtrUInt(AProps^.Items);
2997     propIndexCount := propIndexCount + 1;
2998   end;
2999 
3000 var
3001   locIndex : TUCA_DataBookIndex;
3002   i, c, k, kc : Integer;
3003   p, p1, p2 : PUCA_PropItemRec;
3004   lines, pl1, pl2 : PUCA_LineRec;
3005   childCount, lineCount : Word;
3006   size : Cardinal;
3007   trieRoot : PTrieNode;
3008   MaxChildCount, MaxSize : Cardinal;
3009   childList : array of PUCA_PropItemRec;
3010 begin
3011   locIndex := CreateIndex(ABook);
3012   i := Length(ABook^.Lines);
3013   i := 30 * i * (SizeOf(TUCA_PropItemRec) + SizeOf(TUCA_PropWeights));
3014   AProps := AllocMem(SizeOf(TUCA_PropBook));
3015   AProps^.ItemSize := i;
3016   AProps^.Items := AllocMem(i);
3017   propIndexCount := 0;
3018   SetLength(AProps^.Index,Length(ABook^.Lines));
3019   p := AProps^.Items;
3020   lines := @ABook^.Lines[0];
3021   c := Length(locIndex);
3022   i := 0;
3023   MaxChildCount := 0; MaxSize := 0;
3024   while (i < (c-1)) do begin
3025     pl1 := @lines[locIndex[i]];
3026     if not pl1^.Stored then begin
3027       i := i + 1;
3028       Continue;
3029     end;
3030     pl2 := @lines[locIndex[i+1]];
3031     if (pl1^.CodePoints[0] <> pl2^.CodePoints[0]) then begin
3032       if (Length(pl1^.CodePoints) = 1) then begin
3033         size := ConstructItem(p,pl1^.CodePoints[0],1,0,pl1^.Weights,False,@pl1^.Context,pl1^.Deleted);
3034         CapturePropIndex(p,pl1^.CodePoints[0]);
3035         p := PUCA_PropItemRec(PtrUInt(p) + size);
3036         if (size > MaxSize) then
3037           MaxSize := size;
3038       end else begin
3039         kc := Length(pl1^.CodePoints);
3040         SetLength(childList,kc);
3041         for k := 0 to kc - 2 do begin
3042           size := ConstructItem(p,pl1^.CodePoints[k],0,1,[],(k>0),nil,False);
3043           if (k = 0) then
3044             CapturePropIndex(p,pl1^.CodePoints[k]);
3045           childList[k] := p;
3046           p := PUCA_PropItemRec(PtrUInt(p) + size);
3047         end;
3048         size := ConstructItem(p,pl1^.CodePoints[kc-1],1,0,pl1^.Weights,True,@pl1^.Context,pl1^.Deleted);
3049         childList[kc-1] := p;
3050         p := PUCA_PropItemRec(PtrUInt(p) + size);
3051         for k := kc - 2 downto 0 do begin
3052           p1 := childList[k];
3053           p2 := childList[k+1];
3054           p1^.Size := p1^.Size + p2^.Size;
3055         end;
3056         if (p1^.Size > MaxSize) then
3057           MaxSize := p1^.Size;
3058       end;
3059       lineCount := 1;
3060     end else begin
3061       childCount := CalcCharChildCount(1,i,lines,c,locIndex,lineCount);
3062       if (childCount < 1) then
3063         raise Exception.CreateFmt('Expected "child count > 1" but found %d.',[childCount]);
3064       if (lineCount < 2) then
3065         raise Exception.CreateFmt('Expected "line count > 2" but found %d.',[lineCount]);
3066       if (childCount > MaxChildCount) then
3067         MaxChildCount := childCount;
3068       trieRoot := BuildTrie(i,lines,lineCount,locIndex);
3069       size := ConstructFromTrie(trieRoot,p,lines);
3070       CapturePropIndex(p,pl1^.CodePoints[0]);
3071       FreeNode(trieRoot);
3072       p := PUCA_PropItemRec(PtrUInt(p) + size);
3073       if (size > MaxSize) then
3074         MaxSize := size;
3075     end;
3076     i := i + lineCount;
3077   end;
3078   if (i = (c-1)) then begin
3079     pl1 := @lines[locIndex[i]];
3080     if (Length(pl1^.CodePoints) = 1) then begin
3081       size := ConstructItem(p,pl1^.CodePoints[0],1,0,pl1^.Weights,False,@pl1^.Context,pl1^.Deleted);
3082       CapturePropIndex(p,pl1^.CodePoints[0]);
3083       p := PUCA_PropItemRec(PtrUInt(p) + size);
3084       if (size > MaxSize) then
3085         MaxSize := size;
3086     end else begin
3087       kc := Length(pl1^.CodePoints);
3088       SetLength(childList,kc);
3089       for k := 0 to kc - 2 do begin
3090         size := ConstructItem(p,pl1^.CodePoints[k],0,1,[],(k>0),@pl1^.Context,pl1^.Deleted);
3091         if (k = 0) then
3092           CapturePropIndex(p,pl1^.CodePoints[0]);
3093         childList[k] := p;
3094         p := PUCA_PropItemRec(PtrUInt(p) + size);
3095       end;
3096       size := ConstructItem(p,pl1^.CodePoints[kc-1],1,0,pl1^.Weights,True,@pl1^.Context,pl1^.Deleted);
3097       childList[kc-1] := p;
3098       p := PUCA_PropItemRec(PtrUInt(p) + size);
3099       for i := kc - 2 downto 0 do begin
3100         p1 := childList[i];
3101         p2 := childList[i+1];
3102         p1^.Size := p1^.Size + p2^.Size;
3103       end;
3104       if (size > MaxSize) then
3105         MaxSize := size;
3106     end;
3107   end;
3108   //c := Int64(PtrUInt(p)) - Int64(PtrUInt(AProps^.Items));
3109   c := UInt64(PtrUInt(p)) - UInt64(PtrUInt(AProps^.Items));
3110   ReAllocMem(AProps^.Items,c);
3111   AProps^.ItemSize := c;
3112   SetLength(AProps^.Index,propIndexCount);
3113   AProps^.ItemsOtherEndian := AllocMem(AProps^.ItemSize);
3114   ReverseFromNativeEndian(AProps^.Items,AProps^.ItemSize,AProps^.ItemsOtherEndian);
3115 
3116   k := 0;
3117   c := High(Word);
3118   for i := 0 to Length(ABook^.Lines) - 1 do begin
3119     if (Length(ABook^.Lines[i].Weights) > 0) then begin
3120       if (ABook^.Lines[i].Weights[0].Variable) then begin
3121         if (ABook^.Lines[i].Weights[0].Weights[0] > k) then
3122           k := ABook^.Lines[i].Weights[0].Weights[0];
3123         if (ABook^.Lines[i].Weights[0].Weights[0] < c) then
3124           c := ABook^.Lines[i].Weights[0].Weights[0];
3125       end;
3126     end;
3127   end;
3128   AProps^.VariableHighLimit := k;
3129   AProps^.VariableLowLimit := c;
3130 end;
3131 
3132 procedure FreeUcaBook(var ABook : PUCA_PropBook);
3133 var
3134   p : PUCA_PropBook;
3135 begin
3136   if (ABook = nil) then
3137     exit;
3138   p := ABook;
3139   ABook := nil;
3140   p^.Index := nil;
3141   FreeMem(p^.Items,p^.ItemSize);
3142   FreeMem(p^.ItemsOtherEndian,p^.ItemSize);
3143   FreeMem(p,SizeOf(p^));
3144 end;
3145 
IndexOfnull3146 function IndexOf(const ACodePoint : Cardinal; APropBook : PUCA_PropBook): Integer;overload;
3147 var
3148   i : Integer;
3149 begin
3150   for i := 0 to Length(APropBook^.Index) - 1 do begin
3151     if (ACodePoint = APropBook^.Index[i].CodePoint) then
3152       exit(i);
3153   end;
3154   Result := -1;
3155 end;
3156 
3157 type
3158   PucaBmpSecondTableItem = ^TucaBmpSecondTableItem;
IndexOfnull3159 function IndexOf(
3160   const AItem  : PucaBmpSecondTableItem;
3161   const ATable : TucaBmpSecondTable;
3162   const ATableActualLength : Integer
3163 ) : Integer;overload;
3164 var
3165   i : Integer;
3166   p : PucaBmpSecondTableItem;
3167 begin
3168   Result := -1;
3169   if (ATableActualLength > 0) then begin
3170     p := @ATable[0];
3171     for i := 0 to ATableActualLength - 1 do begin
3172       if CompareMem(p,AItem,SizeOf(TucaBmpSecondTableItem)) then begin
3173         Result := i;
3174         Break;
3175       end;
3176       Inc(p);
3177     end;
3178   end;
3179 end;
3180 
3181 procedure MakeUCA_BmpTables(
3182   var   AFirstTable   : TucaBmpFirstTable;
3183   var   ASecondTable  : TucaBmpSecondTable;
3184   const APropBook     : PUCA_PropBook
3185 );
3186 var
3187   locLowByte, locHighByte : Byte;
3188   locTableItem : TucaBmpSecondTableItem;
3189   locCP : TUnicodeCodePoint;
3190   i, locSecondActualLen : Integer;
3191   k : Integer;
3192 begin
3193   SetLength(ASecondTable,120);
3194   locSecondActualLen := 0;
3195   for locHighByte := 0 to 255 do begin
3196     FillChar(locTableItem,SizeOf(locTableItem),#0);
3197     for locLowByte := 0 to 255 do begin
3198       locCP := (locHighByte * 256) + locLowByte;
3199       k := IndexOf(locCP,APropBook);
3200       if (k = -1) then
3201         k := 0
3202       else
3203         k := APropBook^.Index[k].Position + 1;
3204       locTableItem[locLowByte] := k;
3205     end;
3206     i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen);
3207     if (i = -1) then begin
3208       if (locSecondActualLen = Length(ASecondTable)) then
3209         SetLength(ASecondTable,locSecondActualLen + 50);
3210       i := locSecondActualLen;
3211       ASecondTable[i] := locTableItem;
3212       Inc(locSecondActualLen);
3213     end;
3214     AFirstTable[locHighByte] := i;
3215   end;
3216   SetLength(ASecondTable,locSecondActualLen);
3217 end;
3218 
ToUCS4null3219 function ToUCS4(const AHighS, ALowS : Word) : TUnicodeCodePoint; inline;
3220 begin
3221   //copied from utf16toutf32
3222   Result := (UCS4Char(AHighS)-$d800) shl 10 + (UCS4Char(ALowS)-$dc00) + $10000;
3223 end;
3224 
3225 procedure FromUCS4(const AValue : TUnicodeCodePoint; var AHighS, ALowS : Word);
3226 begin
3227   AHighS := Word((AValue - $10000) shr 10 + $d800);
3228   ALowS := Word((AValue - $10000) and $3ff + $dc00);
3229 end;
3230 
3231 type
3232   PucaOBmpSecondTableItem = ^TucaOBmpSecondTableItem;
IndexOfnull3233 function IndexOf(
3234   const AItem  : PucaOBmpSecondTableItem;
3235   const ATable : TucaOBmpSecondTable;
3236   const ATableActualLength : Integer
3237 ) : Integer;overload;
3238 var
3239   i : Integer;
3240   p : PucaOBmpSecondTableItem;
3241 begin
3242   Result := -1;
3243   if (ATableActualLength > 0) then begin
3244     p := @ATable[0];
3245     for i := 0 to ATableActualLength - 1 do begin
3246       if CompareMem(p,AItem,SizeOf(TucaOBmpSecondTableItem)) then begin
3247         Result := i;
3248         Break;
3249       end;
3250       Inc(p);
3251     end;
3252   end;
3253 end;
3254 
3255 procedure MakeUCA_OBmpTables(
3256   var   AFirstTable   : TucaOBmpFirstTable;
3257   var   ASecondTable  : TucaOBmpSecondTable;
3258   const APropBook     : PUCA_PropBook
3259 );
3260 var
3261   locLowByte, locHighByte : Word;
3262   locTableItem : TucaOBmpSecondTableItem;
3263   locCP : TUnicodeCodePoint;
3264   i, locSecondActualLen : Integer;
3265   k : Integer;
3266 begin
3267   if (Length(ASecondTable) = 0) then
3268     SetLength(ASecondTable,2000);
3269   locSecondActualLen := 0;
3270   for locHighByte := 0 to HIGH_SURROGATE_COUNT - 1 do begin
3271     FillChar(locTableItem,SizeOf(locTableItem),#0);
3272     for locLowByte := 0 to LOW_SURROGATE_COUNT - 1 do begin
3273       locCP := ToUCS4(HIGH_SURROGATE_BEGIN + locHighByte,LOW_SURROGATE_BEGIN + locLowByte);
3274       k := IndexOf(locCP,APropBook);
3275       if (k = -1) then
3276         k := 0
3277       else
3278         k := APropBook^.Index[k].Position + 1;
3279       locTableItem[locLowByte] := k;
3280     end;
3281     i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen);
3282     if (i = -1) then begin
3283       if (locSecondActualLen = Length(ASecondTable)) then
3284         SetLength(ASecondTable,locSecondActualLen + 50);
3285       i := locSecondActualLen;
3286       ASecondTable[i] := locTableItem;
3287       Inc(locSecondActualLen);
3288     end;
3289     AFirstTable[locHighByte] := i;
3290   end;
3291   SetLength(ASecondTable,locSecondActualLen);
3292 end;
3293 
GetPropPositionnull3294 function GetPropPosition(
3295   const AHighS,
3296         ALowS         : Word;
3297   const AFirstTable   : PucaOBmpFirstTable;
3298   const ASecondTable  : PucaOBmpSecondTable
3299 ): Integer;inline;overload;
3300 begin
3301   Result := ASecondTable^[AFirstTable^[AHighS-HIGH_SURROGATE_BEGIN]][ALowS-LOW_SURROGATE_BEGIN] - 1;
3302 end;
3303 
3304 procedure GenerateUCA_Head(
3305   ADest  : TStream;
3306   ABook  : PUCA_DataBook;
3307   AProps : PUCA_PropBook
3308 );
3309 
3310   procedure AddLine(const ALine : ansistring);
3311   var
3312     buffer : ansistring;
3313   begin
3314     buffer := ALine + sLineBreak;
3315     ADest.Write(buffer[1],Length(buffer));
3316   end;
3317 
3318 begin
3319   AddLine('const');
3320   //AddLine('  VERSION_STRING = ' + QuotedStr(ABook^.Version) + ';');
3321   AddLine('  VARIABLE_LOW_LIMIT = ' + IntToStr(AProps^.VariableLowLimit) + ';');
3322   AddLine('  VARIABLE_HIGH_LIMIT = ' + IntToStr(AProps^.VariableHighLimit) + ';');
3323   AddLine('  VARIABLE_WEIGHT = ' + IntToStr(Ord(ABook^.VariableWeight)) + ';');
3324   AddLine('  BACKWARDS_0 = ' + BoolToStr(ABook^.Backwards[0],'True','False') + ';');
3325   AddLine('  BACKWARDS_1 = ' + BoolToStr(ABook^.Backwards[1],'True','False') + ';');
3326   AddLine('  BACKWARDS_2 = ' + BoolToStr(ABook^.Backwards[2],'True','False') + ';');
3327   AddLine('  BACKWARDS_3 = ' + BoolToStr(ABook^.Backwards[3],'True','False') + ';');
3328   AddLine('  PROP_COUNT  = ' + IntToStr(Ord(AProps^.ItemSize)) + ';');
3329 
3330   AddLine('');
3331 end;
3332 
3333 procedure GenerateUCA_BmpTables(
3334         AStream,
3335         ANativeEndianStream,
3336         ANonNativeEndianStream : TStream;
3337   var   AFirstTable            : TucaBmpFirstTable;
3338   var   ASecondTable           : TucaBmpSecondTable
3339 );
3340 
3341   procedure AddLine(AOut : TStream; const ALine : ansistring);
3342   var
3343     buffer : ansistring;
3344   begin
3345     buffer := ALine + sLineBreak;
3346     AOut.Write(buffer[1],Length(buffer));
3347   end;
3348 
3349 var
3350   i, j, c : Integer;
3351   locLine : string;
3352   value : UInt24;
3353 begin
3354   AddLine(AStream,'const');
3355   AddLine(AStream,'  UCA_TABLE_1 : array[0..255] of Byte = (');
3356   locLine := '';
3357   for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
3358     locLine := locLine + IntToStr(AFirstTable[i]) + ',';
3359     if (((i+1) mod 16) = 0) then begin
3360       locLine := '    ' + locLine;
3361       AddLine(AStream,locLine);
3362       locLine := '';
3363     end;
3364   end;
3365   locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
3366   locLine := '    ' + locLine;
3367   AddLine(AStream,locLine);
3368   AddLine(AStream,'  );' + sLineBreak);
3369 
3370   AddLine(ANativeEndianStream,'const');
3371   AddLine(ANativeEndianStream,'  UCA_TABLE_2 : array[0..(256*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =(');
3372   c := High(ASecondTable);
3373   for i := Low(ASecondTable) to c do begin
3374     locLine := '';
3375     for j := Low(TucaBmpSecondTableItem) to High(TucaBmpSecondTableItem) do begin
3376       value := ASecondTable[i][j];
3377       locLine := locLine + UInt24ToStr(value,ENDIAN_NATIVE) + ',';
3378       if (((j+1) mod 2) = 0) then begin
3379         if (i = c) and (j = 255) then
3380           Delete(locLine,Length(locLine),1);
3381         locLine := '    ' + locLine;
3382         AddLine(ANativeEndianStream,locLine);
3383         locLine := '';
3384       end;
3385     end;
3386   end;
3387   AddLine(ANativeEndianStream,'  );' + sLineBreak);
3388 
3389   AddLine(ANonNativeEndianStream,'const');
3390   AddLine(ANonNativeEndianStream,'  UCA_TABLE_2 : array[0..(256*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =(');
3391   c := High(ASecondTable);
3392   for i := Low(ASecondTable) to c do begin
3393     locLine := '';
3394     for j := Low(TucaBmpSecondTableItem) to High(TucaBmpSecondTableItem) do begin
3395       value := ASecondTable[i][j];
3396       locLine := locLine + UInt24ToStr(value,ENDIAN_NON_NATIVE) + ',';
3397       if (((j+1) mod 2) = 0) then begin
3398         if (i = c) and (j = 255) then
3399           Delete(locLine,Length(locLine),1);
3400         locLine := '    ' + locLine;
3401         AddLine(ANonNativeEndianStream,locLine);
3402         locLine := '';
3403       end;
3404     end;
3405   end;
3406   AddLine(ANonNativeEndianStream,'  );' + sLineBreak);
3407 end;
3408 
3409 procedure GenerateBinaryUCA_BmpTables(
3410         ANativeEndianStream,
3411         ANonNativeEndianStream : TStream;
3412   var   AFirstTable            : TucaBmpFirstTable;
3413   var   ASecondTable           : TucaBmpSecondTable
3414 );
3415 var
3416   i, j : Integer;
3417   value : UInt24;
3418 begin
3419   ANativeEndianStream.Write(AFirstTable[0],Length(AFirstTable));
3420   ANonNativeEndianStream.Write(AFirstTable[0],Length(AFirstTable));
3421   for i := Low(ASecondTable) to High(ASecondTable) do begin
3422     for j := Low(TucaBmpSecondTableItem) to High(TucaBmpSecondTableItem) do begin
3423       value := ASecondTable[i][j];
3424       ANativeEndianStream.Write(value,SizeOf(value));
3425       ReverseBytes(value,SizeOf(value));
3426       ANonNativeEndianStream.Write(value,SizeOf(value));
3427     end;
3428   end;
3429 end;
3430 
3431 procedure GenerateUCA_PropTable(
3432 // WARNING : files must be generated for each endianess (Little / Big)
3433         ADest     : TStream;
3434   const APropBook : PUCA_PropBook;
3435   const AEndian   : TEndianKind
3436 );
3437 
3438   procedure AddLine(const ALine : ansistring);
3439   var
3440     buffer : ansistring;
3441   begin
3442     buffer := ALine + sLineBreak;
3443     ADest.Write(buffer[1],Length(buffer));
3444   end;
3445 
3446 var
3447   i, c : Integer;
3448   locLine : string;
3449   p : PByte;
3450 begin
3451   c := APropBook^.ItemSize;
3452   AddLine('const');
3453   AddLine('  UCA_PROPS : array[0..' + IntToStr(c-1) + '] of Byte = (');
3454   locLine := '';
3455   if (AEndian = ENDIAN_NATIVE) then
3456     p := PByte(APropBook^.Items)
3457   else
3458     p := PByte(APropBook^.ItemsOtherEndian);
3459   for i := 0 to c - 2 do begin
3460     locLine := locLine + IntToStr(p[i]) + ',';
3461     if (((i+1) mod 60) = 0) then begin
3462       locLine := '    ' + locLine;
3463       AddLine(locLine);
3464       locLine := '';
3465     end;
3466   end;
3467   locLine := locLine + IntToStr(p[c-1]);
3468   locLine := '    ' + locLine;
3469   AddLine(locLine);
3470   AddLine('  );' + sLineBreak);
3471 end;
3472 
3473 procedure GenerateBinaryUCA_PropTable(
3474 // WARNING : files must be generated for each endianess (Little / Big)
3475         ANativeEndianStream,
3476         ANonNativeEndianStream : TStream;
3477   const APropBook              : PUCA_PropBook
3478 );
3479 begin
3480   ANativeEndianStream.Write(APropBook^.Items^,APropBook^.ItemSize);
3481   ANonNativeEndianStream.Write(APropBook^.ItemsOtherEndian^,APropBook^.ItemSize);
3482 end;
3483 
3484 procedure GenerateUCA_OBmpTables(
3485         AStream,
3486         ANativeEndianStream,
3487         ANonNativeEndianStream : TStream;
3488   var   AFirstTable            : TucaOBmpFirstTable;
3489   var   ASecondTable           : TucaOBmpSecondTable
3490 );
3491 
3492   procedure AddLine(AOut : TStream; const ALine : ansistring);
3493   var
3494     buffer : ansistring;
3495   begin
3496     buffer := ALine + sLineBreak;
3497     AOut.Write(buffer[1],Length(buffer));
3498   end;
3499 
3500 var
3501   i, j, c : Integer;
3502   locLine : string;
3503   value : UInt24;
3504 begin
3505   AddLine(AStream,'const');
3506   AddLine(AStream,'  UCAO_TABLE_1 : array[0..' + IntToStr(HIGH_SURROGATE_COUNT-1) + '] of Word = (');
3507   locLine := '';
3508   for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
3509     locLine := locLine + IntToStr(AFirstTable[i]) + ',';
3510     if (((i+1) mod 16) = 0) then begin
3511       locLine := '    ' + locLine;
3512       AddLine(AStream,locLine);
3513       locLine := '';
3514     end;
3515   end;
3516   locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
3517   locLine := '    ' + locLine;
3518   AddLine(AStream,locLine);
3519   AddLine(AStream,'  );' + sLineBreak);
3520 
3521   AddLine(ANativeEndianStream,'  UCAO_TABLE_2 : array[0..('+IntToStr(LOW_SURROGATE_COUNT)+'*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =(');
3522   c := High(ASecondTable);
3523   for i := Low(ASecondTable) to c do begin
3524     locLine := '';
3525     for j := Low(TucaOBmpSecondTableItem) to High(TucaOBmpSecondTableItem) do begin
3526       value := ASecondTable[i][j];
3527       locLine := locLine + UInt24ToStr(value,ENDIAN_NATIVE) + ',';
3528       if (((j+1) mod 2) = 0) then begin
3529         if (i = c) and (j = High(TucaOBmpSecondTableItem)) then
3530           Delete(locLine,Length(locLine),1);
3531         locLine := '    ' + locLine;
3532         AddLine(ANativeEndianStream,locLine);
3533         locLine := '';
3534       end;
3535     end;
3536   end;
3537   AddLine(ANativeEndianStream,'  );' + sLineBreak);
3538 
3539   AddLine(ANonNativeEndianStream,'  UCAO_TABLE_2 : array[0..('+IntToStr(LOW_SURROGATE_COUNT)+'*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =(');
3540   c := High(ASecondTable);
3541   for i := Low(ASecondTable) to c do begin
3542     locLine := '';
3543     for j := Low(TucaOBmpSecondTableItem) to High(TucaOBmpSecondTableItem) do begin
3544       value := ASecondTable[i][j];
3545       locLine := locLine + UInt24ToStr(value,ENDIAN_NON_NATIVE) + ',';
3546       if (((j+1) mod 2) = 0) then begin
3547         if (i = c) and (j = High(TucaOBmpSecondTableItem)) then
3548           Delete(locLine,Length(locLine),1);
3549         locLine := '    ' + locLine;
3550         AddLine(ANonNativeEndianStream,locLine);
3551         locLine := '';
3552       end;
3553     end;
3554   end;
3555   AddLine(ANonNativeEndianStream,'  );' + sLineBreak);
3556 end;
3557 
3558 procedure GenerateBinaryUCA_OBmpTables(
3559         ANativeEndianStream,
3560         ANonNativeEndianStream : TStream;
3561   var   AFirstTable            : TucaOBmpFirstTable;
3562   var   ASecondTable           : TucaOBmpSecondTable
3563 );
3564 var
3565   i, j : Integer;
3566   locLine : string;
3567   wordValue : Word;
3568   value : UInt24;
3569 begin
3570   for i := Low(AFirstTable) to High(AFirstTable) do begin
3571     wordValue := AFirstTable[i];
3572     ANativeEndianStream.Write(wordValue,SizeOf(wordValue));
3573     ReverseBytes(wordValue,SizeOf(wordValue));
3574     ANonNativeEndianStream.Write(wordValue,SizeOf(wordValue));
3575   end;
3576 
3577   for i := Low(ASecondTable) to High(ASecondTable) do begin
3578     for j := Low(TucaOBmpSecondTableItem) to High(TucaOBmpSecondTableItem) do begin
3579       value := ASecondTable[i][j];
3580       ANativeEndianStream.Write(value,SizeOf(value));
3581       ReverseBytes(value,SizeOf(value));
3582       ANonNativeEndianStream.Write(value,SizeOf(value));
3583     end;
3584   end;
3585 end;
3586 
3587 type
3588   POBmpSecondTableItem = ^TOBmpSecondTableItem;
IndexOfnull3589 function IndexOf(
3590   const AItem  : POBmpSecondTableItem;
3591   const ATable : TOBmpSecondTable;
3592   const ATableActualLength : Integer
3593 ) : Integer;overload;
3594 var
3595   i : Integer;
3596   p : POBmpSecondTableItem;
3597 begin
3598   Result := -1;
3599   if (ATableActualLength > 0) then begin
3600     p := @ATable[0];
3601     for i := 0 to ATableActualLength - 1 do begin
3602       if CompareMem(p,AItem,SizeOf(TOBmpSecondTableItem)) then begin
3603         Result := i;
3604         Break;
3605       end;
3606       Inc(p);
3607     end;
3608   end;
3609 end;
3610 
3611 procedure MakeOBmpTables(
3612   var   AFirstTable   : TOBmpFirstTable;
3613   var   ASecondTable  : TOBmpSecondTable;
3614   const ADataLineList : TDataLineRecArray
3615 );
3616 var
3617   locLowByte, locHighByte : Word;
3618   locTableItem : TOBmpSecondTableItem;
3619   locCP : TUnicodeCodePoint;
3620   i, locSecondActualLen : Integer;
3621 begin
3622   SetLength(ASecondTable,2000);
3623   locSecondActualLen := 0;
3624   for locHighByte := 0 to HIGH_SURROGATE_COUNT - 1 do begin
3625     FillChar(locTableItem,SizeOf(locTableItem),#0);
3626     for locLowByte := 0 to LOW_SURROGATE_COUNT - 1 do begin
3627       locCP := ToUCS4(HIGH_SURROGATE_BEGIN + locHighByte,LOW_SURROGATE_BEGIN + locLowByte);
3628       locTableItem[locLowByte] := GetPropID(locCP,ADataLineList)// - 1;
3629     end;
3630     i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen);
3631     if (i = -1) then begin
3632       if (locSecondActualLen = Length(ASecondTable)) then
3633         SetLength(ASecondTable,locSecondActualLen + 50);
3634       i := locSecondActualLen;
3635       ASecondTable[i] := locTableItem;
3636       Inc(locSecondActualLen);
3637     end;
3638     AFirstTable[locHighByte] := i;
3639   end;
3640   SetLength(ASecondTable,locSecondActualLen);
3641 end;
3642 
3643 type
3644   P3lvlOBmp3TableItem = ^T3lvlOBmp3TableItem;
IndexOfnull3645 function IndexOf(
3646   const AItem  : P3lvlOBmp3TableItem;
3647   const ATable : T3lvlOBmp3Table;
3648   const ATableActualLength : Integer
3649 ) : Integer;overload;
3650 var
3651   i : Integer;
3652   p : P3lvlOBmp3TableItem;
3653 begin
3654   Result := -1;
3655   if (ATableActualLength > 0) then begin
3656     p := @ATable[0];
3657     for i := 0 to ATableActualLength - 1 do begin
3658       if CompareMem(p,AItem,SizeOf(T3lvlOBmp3TableItem)) then begin
3659         Result := i;
3660         Break;
3661       end;
3662       Inc(p);
3663     end;
3664   end;
3665 end;
3666 
3667 type
3668   P3lvlOBmp2TableItem = ^T3lvlOBmp2TableItem;
IndexOfnull3669 function IndexOf(
3670   const AItem  : P3lvlOBmp2TableItem;
3671   const ATable : T3lvlOBmp2Table
3672 ) : Integer;overload;
3673 var
3674   i : Integer;
3675   p : P3lvlOBmp2TableItem;
3676 begin
3677   Result := -1;
3678   if (Length(ATable) > 0) then begin
3679     p := @ATable[0];
3680     for i := 0 to Length(ATable) - 1 do begin
3681       if CompareMem(p,AItem,SizeOf(T3lvlOBmp2TableItem)) then begin
3682         Result := i;
3683         Break;
3684       end;
3685       Inc(p);
3686     end;
3687   end;
3688 end;
3689 procedure MakeOBmpTables3Levels(
3690   var   AFirstTable   : T3lvlOBmp1Table;
3691   var   ASecondTable  : T3lvlOBmp2Table;
3692   var   AThirdTable  : T3lvlOBmp3Table;
3693   const ADataLineList : TDataLineRecArray
3694 );
3695 var
3696   locLowByte0, locLowByte1, locHighByte : Word;
3697   locTableItem2 : T3lvlOBmp2TableItem;
3698   locTableItem3 : T3lvlOBmp3TableItem;
3699   locCP : TUnicodeCodePoint;
3700   i, locThirdActualLen : Integer;
3701 begin
3702   SetLength(AThirdTable,120);
3703   locThirdActualLen := 0;
3704   for locHighByte := 0 to 1023 do begin
3705     FillChar(locTableItem2,SizeOf(locTableItem2),#0);
3706     for locLowByte0 := 0 to 31 do begin
3707       FillChar(locTableItem3,SizeOf(locTableItem3),#0);
3708       for locLowByte1 := 0 to 31 do begin
3709         locCP := ToUCS4(HIGH_SURROGATE_BEGIN + locHighByte,LOW_SURROGATE_BEGIN + (locLowByte0*32) + locLowByte1);
3710         locTableItem3[locLowByte1] := GetPropID(locCP,ADataLineList);
3711       end;
3712       i := IndexOf(@locTableItem3,AThirdTable,locThirdActualLen);
3713       if (i = -1) then begin
3714         if (locThirdActualLen = Length(AThirdTable)) then
3715           SetLength(AThirdTable,locThirdActualLen + 50);
3716         i := locThirdActualLen;
3717         AThirdTable[i] := locTableItem3;
3718         Inc(locThirdActualLen);
3719       end;
3720       locTableItem2[locLowByte0] := i;
3721     end;
3722     i := IndexOf(@locTableItem2,ASecondTable);
3723     if (i = -1) then begin
3724       i := Length(ASecondTable);
3725       SetLength(ASecondTable,(i + 1));
3726       ASecondTable[i] := locTableItem2;
3727     end;
3728     AFirstTable[locHighByte] := i;
3729   end;
3730   SetLength(AThirdTable,locThirdActualLen);
3731 end;
3732 
3733 procedure GenerateOBmpTables(
3734         ADest : TStream;
3735   var   AFirstTable   : TOBmpFirstTable;
3736   var   ASecondTable  : TOBmpSecondTable
3737 );
3738   procedure AddLine(const ALine : ansistring);
3739   var
3740     buffer : ansistring;
3741   begin
3742     buffer := ALine + sLineBreak;
3743     ADest.Write(buffer[1],Length(buffer));
3744   end;
3745 
3746 var
3747   i, j, c : Integer;
3748   locLine : string;
3749 begin
3750   AddLine('const');
3751   AddLine('  UCO_TABLE_1 : array[0..' + IntToStr(HIGH_SURROGATE_COUNT-1) + '] of Word = (');
3752   locLine := '';
3753   for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
3754     locLine := locLine + IntToStr(AFirstTable[i]) + ',';
3755     if (((i+1) mod 16) = 0) then begin
3756       locLine := '    ' + locLine;
3757       AddLine(locLine);
3758       locLine := '';
3759     end;
3760   end;
3761   locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
3762   locLine := '    ' + locLine;
3763   AddLine(locLine);
3764   AddLine('  );' + sLineBreak);
3765 
3766   AddLine('  UCO_TABLE_2 : array[0..('+IntToStr(LOW_SURROGATE_COUNT)+'*' + IntToStr(Length(ASecondTable)) +'-1)] of Word =(');
3767   c := High(ASecondTable);
3768   for i := Low(ASecondTable) to c do begin
3769     locLine := '';
3770     for j := Low(TOBmpSecondTableItem) to High(TOBmpSecondTableItem) do begin
3771       locLine := locLine + IntToStr(ASecondTable[i][j]) + ',';
3772       if (((j+1) mod 16) = 0) then begin
3773         if (i = c) and (j = High(TOBmpSecondTableItem)) then
3774           Delete(locLine,Length(locLine),1);
3775         locLine := '    ' + locLine;
3776         AddLine(locLine);
3777         locLine := '';
3778       end;
3779     end;
3780   end;
3781   AddLine('  );' + sLineBreak);
3782 end;
3783 
3784 
3785 //----------------------------------
3786 procedure Generate3lvlOBmpTables(
3787         ADest : TStream;
3788   var   AFirstTable   : T3lvlOBmp1Table;
3789   var   ASecondTable  : T3lvlOBmp2Table;
3790   var   AThirdTable   : T3lvlOBmp3Table
3791 );
3792 
3793   procedure AddLine(const ALine : ansistring);
3794   var
3795     buffer : ansistring;
3796   begin
3797     buffer := ALine + sLineBreak;
3798     ADest.Write(buffer[1],Length(buffer));
3799   end;
3800 
3801 var
3802   i, j, c : Integer;
3803   locLine : string;
3804 begin
3805   AddLine('const');
3806   AddLine('  UCO_TABLE_1 : array[0..1023] of Word = (');
3807   locLine := '';
3808   for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
3809     locLine := locLine + IntToStr(AFirstTable[i]) + ',';
3810     if (((i+1) mod 16) = 0) then begin
3811       locLine := '    ' + locLine;
3812       AddLine(locLine);
3813       locLine := '';
3814     end;
3815   end;
3816   locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
3817   locLine := '    ' + locLine;
3818   AddLine(locLine);
3819   AddLine('  );' + sLineBreak);
3820 
3821   AddLine('  UCO_TABLE_2 : array[0..' + IntToStr(Length(ASecondTable)-1) +'] of array[0..31] of Word = (');
3822   c := High(ASecondTable);
3823   for i := Low(ASecondTable) to c do begin
3824     locLine := '(';
3825     for j := Low(T3lvlOBmp2TableItem) to High(T3lvlOBmp2TableItem) do
3826       locLine := locLine + IntToStr(ASecondTable[i][j]) + ',';
3827     Delete(locLine,Length(locLine),1);
3828     locLine := '    ' + locLine + ')';
3829     if (i < c) then
3830       locLine := locLine + ',';
3831     AddLine(locLine);
3832   end;
3833   AddLine('  );' + sLineBreak);
3834 
3835   AddLine('  UCO_TABLE_3 : array[0..' + IntToStr(Length(AThirdTable)-1) +'] of array[0..31] of Word = (');
3836   c := High(AThirdTable);
3837   for i := Low(AThirdTable) to c do begin
3838     locLine := '(';
3839     for j := Low(T3lvlOBmp3TableItem) to High(T3lvlOBmp3TableItem) do
3840       locLine := locLine + IntToStr(AThirdTable[i][j]) + ',';
3841     Delete(locLine,Length(locLine),1);
3842     locLine := '    ' + locLine + ')';
3843     if (i < c) then
3844       locLine := locLine + ',';
3845     AddLine(locLine);
3846   end;
3847   AddLine('  );' + sLineBreak);
3848 end;
3849 
GetPropnull3850 function GetProp(
3851   const AHighS,
3852         ALowS         : Word;
3853   const AProps        : TPropRecArray;
3854   var   AFirstTable   : TOBmpFirstTable;
3855   var   ASecondTable  : TOBmpSecondTable
3856 ): PPropRec;
3857 begin
3858   Result := @AProps[ASecondTable[AFirstTable[AHighS-HIGH_SURROGATE_BEGIN]][ALowS-LOW_SURROGATE_BEGIN]];
3859 end;
3860 
GetPropnull3861 function GetProp(
3862   const AHighS,
3863         ALowS         : Word;
3864   const AProps        : TPropRecArray;
3865   var   AFirstTable   : T3lvlOBmp1Table;
3866   var   ASecondTable  : T3lvlOBmp2Table;
3867   var   AThirdTable   : T3lvlOBmp3Table
3868 ): PPropRec;
3869 begin
3870   Result := @AProps[AThirdTable[ASecondTable[AFirstTable[AHighS]][ALowS div 32]][ALowS mod 32]];
3871   //Result := @AProps[ASecondTable[AFirstTable[AHighS-HIGH_SURROGATE_BEGIN]][ALowS-LOW_SURROGATE_BEGIN]];
3872 end;
3873 
3874 { TUCA_PropItemContextTreeRec }
3875 
TUCA_PropItemContextTreeRec.GetDatanull3876 function TUCA_PropItemContextTreeRec.GetData : PUCA_PropItemContextTreeNodeRec;
3877 begin
3878   if (Size = 0) then
3879     Result := nil
3880   else
3881     Result := PUCA_PropItemContextTreeNodeRec(
3882                 PtrUInt(
3883                   PtrUInt(@Self) + SizeOf(UInt24){Size}
3884                 )
3885               );
3886 end;
3887 
3888 { TUCA_LineContextRec }
3889 
3890 procedure TUCA_LineContextRec.Clear;
3891 begin
3892   Data := nil
3893 end;
3894 
3895 procedure TUCA_LineContextRec.Assign(ASource : PUCA_LineContextRec);
3896 var
3897   c, i : Integer;
3898 begin
3899   if (ASource = nil) then begin
3900     Clear();
3901     exit;
3902   end;
3903   c := Length(ASource^.Data);
3904   SetLength(Self.Data,c);
3905   for i := 0 to c-1 do
3906     Self.Data[i].Assign(@ASource^.Data[i]);
3907 end;
3908 
Clonenull3909 function TUCA_LineContextRec.Clone : TUCA_LineContextRec;
3910 begin
3911   Result.Clear();
3912   Result.Assign(@Self);
3913 end;
3914 
3915 { TUCA_LineContextItemRec }
3916 
3917 procedure TUCA_LineContextItemRec.Clear();
3918 begin
3919   CodePoints := nil;
3920   Weights := nil;
3921 end;
3922 
3923 procedure TUCA_LineContextItemRec.Assign(ASource : PUCA_LineContextItemRec);
3924 begin
3925   if (ASource = nil) then begin
3926     Clear();
3927     exit;
3928   end;
3929   Self.CodePoints := Copy(ASource^.CodePoints);
3930   Self.Weights := Copy(ASource^.Weights);
3931 end;
3932 
TUCA_LineContextItemRec.Clonenull3933 function TUCA_LineContextItemRec.Clone() : TUCA_LineContextItemRec;
3934 begin
3935   Result.Clear();
3936   Result.Assign(@Self);
3937 end;
3938 
3939 { TUCA_LineRec }
3940 
3941 procedure TUCA_LineRec.Clear;
3942 begin
3943   CodePoints := nil;
3944   Weights := nil;
3945   Deleted := False;
3946   Stored := False;
3947   Context.Clear();
3948 end;
3949 
3950 procedure TUCA_LineRec.Assign(ASource : PUCA_LineRec);
3951 begin
3952   if (ASource = nil) then begin
3953     Clear();
3954     exit;
3955   end;
3956   Self.CodePoints := Copy(ASource^.CodePoints);
3957   Self.Weights := Copy(ASource^.Weights);
3958   Self.Deleted := ASource^.Deleted;
3959   Self.Stored := ASource^.Stored;
3960   Self.Context.Assign(@ASource^.Context);
3961 end;
3962 
TUCA_LineRec.Clonenull3963 function TUCA_LineRec.Clone : TUCA_LineRec;
3964 begin
3965   Result.Clear();
3966   Result.Assign(@Self);
3967 end;
3968 
TUCA_LineRec.HasContextnull3969 function TUCA_LineRec.HasContext() : Boolean;
3970 begin
3971   Result := (Length(Context.Data) > 0);
3972 end;
3973 
3974 { TPropRec }
3975 
TPropRec.GetCategorynull3976 function TPropRec.GetCategory: TUnicodeCategory;
3977 begin
3978   Result := TUnicodeCategory((CategoryData and Byte($F8)) shr 3);
3979 end;
3980 
3981 procedure TPropRec.SetCategory(AValue: TUnicodeCategory);
3982 var
3983   b : Byte;
3984 begin
3985   b := Ord(AValue);
3986   b := b shl 3;
3987   CategoryData := CategoryData or b;
3988   //CategoryData := CategoryData or Byte(Byte(Ord(AValue)) shl 3);
3989 end;
3990 
TPropRec.GetWhiteSpacenull3991 function TPropRec.GetWhiteSpace: Boolean;
3992 begin
3993   Result := IsBitON(CategoryData,0);
3994 end;
3995 
3996 procedure TPropRec.SetWhiteSpace(AValue: Boolean);
3997 begin
3998   SetBit(CategoryData,0,AValue);
3999 end;
4000 
TPropRec.GetHangulSyllablenull4001 function TPropRec.GetHangulSyllable: Boolean;
4002 begin
4003   Result := IsBitON(CategoryData,1);
4004 end;
4005 
4006 procedure TPropRec.SetHangulSyllable(AValue: Boolean);
4007 begin
4008    SetBit(CategoryData,1,AValue);
4009 end;
4010 
4011 { TUCA_PropItemRec }
4012 
TUCA_PropItemRec.GetWeightSizenull4013 function TUCA_PropItemRec.GetWeightSize : Word;
4014 var
4015   c : Integer;
4016 begin
4017   c := WeightLength;
4018   if (c = 0) then
4019     exit(0);
4020   Result := c*SizeOf(TUCA_PropWeights);
4021   if IsWeightCompress_1() then
4022     Result := Result - 1;
4023   if IsWeightCompress_2() then
4024     Result := Result - 1;
4025 end;
4026 
TUCA_PropItemRec.HasCodePointnull4027 function TUCA_PropItemRec.HasCodePoint(): Boolean;
4028 begin
4029   Result := IsBitON(Flags,FLAG_CODEPOINT);
4030 end;
4031 
4032 procedure TUCA_PropItemRec.GetWeightArray(ADest: PUCA_PropWeights);
4033 var
4034   c : Integer;
4035   p : PByte;
4036   pd : PUCA_PropWeights;
4037 begin
4038   c := WeightLength;
4039   p := PByte(PtrUInt(@Self) + SizeOf(TUCA_PropItemRec));
4040   pd := ADest;
4041   pd^.Weights[0] := PWord(p)^;
4042   p := p + 2;
4043   if not IsWeightCompress_1() then begin
4044     pd^.Weights[1] := PWord(p)^;
4045     p := p + 2;
4046   end else begin
4047     pd^.Weights[1] := p^;
4048     p := p + 1;
4049   end;
4050   if not IsWeightCompress_2() then begin
4051     pd^.Weights[2] := PWord(p)^;
4052     p := p + 2;
4053   end else begin
4054     pd^.Weights[2] := p^;
4055     p := p + 1;
4056   end;
4057   if (c > 1) then
4058     Move(p^, (pd+1)^, ((c-1)*SizeOf(TUCA_PropWeights)));
4059 end;
4060 
TUCA_PropItemRec.GetSelfOnlySizenull4061 function TUCA_PropItemRec.GetSelfOnlySize() : Cardinal;
4062 begin
4063   Result := SizeOf(TUCA_PropItemRec);
4064   if (WeightLength > 0) then begin
4065     Result := Result + (WeightLength * Sizeof(TUCA_PropWeights));
4066     if IsWeightCompress_1() then
4067       Result := Result - 1;
4068     if IsWeightCompress_2() then
4069       Result := Result - 1;
4070   end;
4071   if HasCodePoint() then
4072     Result := Result + SizeOf(UInt24);
4073   if Contextual then
4074     Result := Result + Cardinal(GetContext()^.Size);
4075 end;
4076 
4077 procedure TUCA_PropItemRec.SetContextual(AValue : Boolean);
4078 begin
4079   SetBit(Flags,FLAG_CONTEXTUAL,AValue);
4080 end;
4081 
TUCA_PropItemRec.GetContextualnull4082 function TUCA_PropItemRec.GetContextual : Boolean;
4083 begin
4084   Result := IsBitON(Flags,FLAG_CONTEXTUAL);
4085 end;
4086 
TUCA_PropItemRec.GetContextnull4087 function TUCA_PropItemRec.GetContext() : PUCA_PropItemContextTreeRec;
4088 var
4089   p : PtrUInt;
4090 begin
4091   if not Contextual then
4092     exit(nil);
4093   p := PtrUInt(@Self) + SizeOf(TUCA_PropItemRec);
4094   if IsBitON(Flags,FLAG_CODEPOINT) then
4095     p := p + SizeOf(UInt24);
4096   Result := PUCA_PropItemContextTreeRec(p);
4097 end;
4098 
4099 procedure TUCA_PropItemRec.SetDeleted(AValue: Boolean);
4100 begin
4101   SetBit(Flags,FLAG_DELETION,AValue);
4102 end;
4103 
IsDeletednull4104 function TUCA_PropItemRec.IsDeleted: Boolean;
4105 begin
4106   Result := IsBitON(Flags,FLAG_DELETION);
4107 end;
4108 
TUCA_PropItemRec.IsValidnull4109 function TUCA_PropItemRec.IsValid() : Boolean;
4110 begin
4111   Result := IsBitON(Flags,FLAG_VALID);
4112 end;
4113 
IsWeightCompress_1null4114 function TUCA_PropItemRec.IsWeightCompress_1 : Boolean;
4115 begin
4116   Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_1);
4117 end;
4118 
IsWeightCompress_2null4119 function TUCA_PropItemRec.IsWeightCompress_2 : Boolean;
4120 begin
4121   Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_2);
4122 end;
4123 
TUCA_PropItemRec.GetCodePointnull4124 function TUCA_PropItemRec.GetCodePoint: UInt24;
4125 begin
4126   if HasCodePoint() then begin
4127     if Contextual then
4128       Result := PUInt24(
4129                   PtrUInt(@Self) + Self.GetSelfOnlySize()- SizeOf(UInt24) -
4130                   Cardinal(GetContext()^.Size)
4131                 )^
4132     else
4133       Result := PUInt24(PtrUInt(@Self) + Self.GetSelfOnlySize() - SizeOf(UInt24))^
4134   end else begin
4135     raise Exception.Create('TUCA_PropItemRec.GetCodePoint : "No code point available."');
4136   end
4137 end;
4138 
avl_CompareCodePointsnull4139 function avl_CompareCodePoints(Item1, Item2: Pointer): Integer;
4140 var
4141   a, b : PUCA_LineContextItemRec;
4142   i, hb : Integer;
4143 begin
4144   if (Item1 = Item2) then
4145     exit(0);
4146   if (Item1 = nil) then
4147     exit(-1);
4148   if (Item2 = nil) then
4149     exit(1);
4150   a := Item1;
4151   b := Item2;
4152   if (a^.CodePoints = b^.CodePoints) then
4153     exit(0);
4154   Result := 1;
4155   hb := Length(b^.CodePoints) - 1;
4156   for i := 0 to Length(a^.CodePoints) - 1 do begin
4157     if (i > hb) then
4158       exit;
4159     if (a^.CodePoints[i] < b^.CodePoints[i]) then
4160       exit(-1);
4161     if (a^.CodePoints[i] > b^.CodePoints[i]) then
4162       exit(1);
4163   end;
4164   if (Length(a^.CodePoints) = Length(b^.CodePoints)) then
4165     exit(0);
4166   exit(-1);
4167 end;
4168 
ConstructAvlContextTreenull4169 function ConstructAvlContextTree(AContext : PUCA_LineContextRec) : TAVLTree;
4170 var
4171   r : TAVLTree;
4172   i : Integer;
4173 begin
4174   r := TAVLTree.Create(@avl_CompareCodePoints);
4175   try
4176     for i := 0 to Length(AContext^.Data) - 1 do
4177       r.Add(@AContext^.Data[i]);
4178     Result := r;
4179   except
4180     FreeAndNil(r);
4181     raise;
4182   end;
4183 end;
4184 
ConstructContextTreenull4185 function ConstructContextTree(
4186   const AContext : PUCA_LineContextRec;
4187   var   ADestBuffer;
4188   const ADestBufferLength : Cardinal
4189 ) : PUCA_PropItemContextTreeRec;
4190 
CalcItemOnlySizenull4191   function CalcItemOnlySize(AItem : TAVLTreeNode) : Cardinal;
4192   var
4193     kitem : PUCA_LineContextItemRec;
4194   begin
4195     if (AItem = nil) then
4196       exit(0);
4197     kitem := AItem.Data;
4198     Result := SizeOf(PUCA_PropItemContextTreeNodeRec^.Left) +
4199               SizeOf(PUCA_PropItemContextTreeNodeRec^.Right) +
4200               SizeOf(PUCA_PropItemContextRec^.CodePointCount) +
4201                 (Length(kitem^.CodePoints)*SizeOf(UInt24)) +
4202               SizeOf(PUCA_PropItemContextRec^.WeightCount) +
4203                 (Length(kitem^.Weights)*SizeOf(TUCA_PropWeights));
4204   end;
4205 
CalcItemSizenull4206   function CalcItemSize(AItem : TAVLTreeNode) : Cardinal;
4207   begin
4208     if (AItem = nil) then
4209       exit(0);
4210     Result := CalcItemOnlySize(AItem);
4211     if (AItem.Left <> nil) then
4212       Result := Result + CalcItemSize(AItem.Left);
4213     if (AItem.Right <> nil) then
4214       Result := Result + CalcItemSize(AItem.Right);
4215   end;
4216 
CalcSizenull4217   function CalcSize(AData : TAVLTree) : Cardinal;
4218   begin
4219     Result := SizeOf(PUCA_PropItemContextTreeRec^.Size) + CalcItemSize(AData.Root);
4220   end;
4221 
ConstructItemnull4222   function ConstructItem(ASource : TAVLTreeNode; ADest : PUCA_PropItemContextTreeNodeRec) : Cardinal;
4223   var
4224     k : Integer;
4225     kitem : PUCA_LineContextItemRec;
4226     kpcp : PUInt24;
4227     kpw : PUCA_PropWeights;
4228     pextra : PtrUInt;
4229     pnext : PUCA_PropItemContextTreeNodeRec;
4230   begin
4231     kitem := ASource.Data;
4232     ADest^.Data.CodePointCount := Length(kitem^.CodePoints);
4233     ADest^.Data.WeightCount := Length(kitem^.Weights);
4234     pextra := PtrUInt(ADest)+SizeOf(ADest^.Left)+SizeOf(ADest^.Right)+
4235               SizeOf(ADest^.Data.CodePointCount)+SizeOf(ADest^.Data.WeightCount);
4236     if (ADest^.Data.CodePointCount > 0) then begin
4237       kpcp := PUInt24(pextra);
4238       for k := 0 to ADest^.Data.CodePointCount - 1 do begin
4239         kpcp^ := kitem^.CodePoints[k];
4240         Inc(kpcp);
4241       end;
4242     end;
4243     if (ADest^.Data.WeightCount > 0) then begin
4244       kpw := PUCA_PropWeights(pextra + (ADest^.Data.CodePointCount*SizeOf(UInt24)));
4245       for k := 0 to ADest^.Data.WeightCount - 1 do begin
4246         kpw^.Weights[0] := kitem^.Weights[k].Weights[0];
4247         kpw^.Weights[1] := kitem^.Weights[k].Weights[1];
4248         kpw^.Weights[2] := kitem^.Weights[k].Weights[2];
4249         Inc(kpw);
4250       end;
4251     end;
4252     Result := CalcItemOnlySize(ASource);
4253     if (ASource.Left <> nil) then begin
4254       pnext := PUCA_PropItemContextTreeNodeRec(PtrUInt(ADest) + Result);
4255       ADest^.Left := Result;
4256       Result := Result + ConstructItem(ASource.Left,pnext);
4257     end else begin
4258       ADest^.Left := 0;
4259     end;
4260     if (ASource.Right <> nil) then begin
4261       pnext := PUCA_PropItemContextTreeNodeRec(PtrUInt(ADest) + Result);
4262       ADest^.Right := Result;
4263       Result := Result + ConstructItem(ASource.Right,pnext);
4264     end else begin
4265       ADest^.Right := 0;
4266     end;
4267   end;
4268 
4269 var
4270   c : PtrUInt;
4271   r : PUCA_PropItemContextTreeRec;
4272   p : PUCA_PropItemContextTreeNodeRec;
4273   tempTree : TAVLTree;
4274 begin
4275   tempTree := ConstructAvlContextTree(AContext);
4276   try
4277     c := CalcSize(tempTree);
4278     if (ADestBufferLength > 0) and (c > ADestBufferLength) then
4279       raise Exception.Create(SInsufficientMemoryBuffer);
4280     r := @ADestBuffer;
4281     r^.Size := c;
4282     p := PUCA_PropItemContextTreeNodeRec(PtrUInt(r) + SizeOf(r^.Size));
4283     ConstructItem(tempTree.Root,p);
4284   finally
4285     tempTree.Free();
4286   end;
4287   Result := r;
4288 end;
4289 
4290 procedure ReverseRecordBytes(var AItem : TSerializedCollationHeader);
4291 begin
4292   ReverseBytes(AItem.BMP_Table1Length,SizeOf(AItem.BMP_Table1Length));
4293   ReverseBytes(AItem.BMP_Table2Length,SizeOf(AItem.BMP_Table2Length));
4294   ReverseBytes(AItem.OBMP_Table1Length,SizeOf(AItem.OBMP_Table1Length));
4295   ReverseBytes(AItem.OBMP_Table2Length,SizeOf(AItem.OBMP_Table2Length));
4296   ReverseBytes(AItem.PropCount,SizeOf(AItem.PropCount));
4297   ReverseBytes(AItem.VariableLowLimit,SizeOf(AItem.VariableLowLimit));
4298   ReverseBytes(AItem.VariableHighLimit,SizeOf(AItem.VariableHighLimit));
4299 end;
4300 
4301 procedure ReverseBytes(var AData; const ALength : Integer);
4302 var
4303   i,j : PtrInt;
4304   c : Byte;
4305   p : PByte;
4306 begin
4307   if (ALength = 1) then
4308     exit;
4309   p := @AData;
4310   j := ALength div 2;
4311   for i := 0 to Pred(j) do begin
4312     c := p[i];
4313     p[i] := p[(ALength - 1 ) - i];
4314     p[(ALength - 1 ) - i] := c;
4315   end;
4316 end;
4317 
4318 procedure ReverseArray(var AValue; const AArrayLength, AItemSize : PtrInt);
4319 var
4320   p : PByte;
4321   i : PtrInt;
4322 begin
4323   if ( AArrayLength > 0 ) and ( AItemSize > 1 ) then begin
4324     p := @AValue;
4325     for i := 0 to Pred(AArrayLength) do begin
4326       ReverseBytes(p^,AItemSize);
4327       Inc(p,AItemSize);
4328     end;
4329   end;
4330 end;
4331 
4332 procedure ReverseContextNodeFromNativeEndian(s, d : PUCA_PropItemContextTreeNodeRec);
4333 var
4334   k : PtrUInt;
4335   p_s, p_d : PByte;
4336 begin
4337   d^.Left := s^.Left;
4338     ReverseBytes(d^.Left,SizeOf(d^.Left));
4339   d^.Right := s^.Right;
4340     ReverseBytes(d^.Right,SizeOf(d^.Right));
4341   d^.Data.CodePointCount := s^.Data.CodePointCount;
4342     ReverseBytes(d^.Data.CodePointCount,SizeOf(d^.Data.CodePointCount));
4343   d^.Data.WeightCount := s^.Data.WeightCount;
4344     ReverseBytes(d^.Data.WeightCount,SizeOf(d^.Data.WeightCount));
4345 
4346   k := SizeOf(TUCA_PropItemContextTreeNodeRec);
4347   p_s := PByte(PtrUInt(s) + k);
4348   p_d := PByte(PtrUInt(d) + k);
4349   k := (s^.Data.CodePointCount*SizeOf(UInt24));
4350   Move(p_s^,p_d^, k);
4351     ReverseArray(p_d^,s^.Data.CodePointCount,SizeOf(UInt24));
4352   p_s := PByte(PtrUInt(p_s) + k);
4353   p_d := PByte(PtrUInt(p_d) + k);
4354   k := (s^.Data.WeightCount*SizeOf(TUCA_PropWeights));
4355   Move(p_s^,p_d^,k);
4356     ReverseArray(p_d^,(s^.Data.WeightCount*Length(TUCA_PropWeights.Weights)),SizeOf(TUCA_PropWeights.Weights[0]));
4357   if (s^.Left > 0) then
4358     ReverseContextNodeFromNativeEndian(
4359       PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + s^.Left),
4360       PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + s^.Left)
4361     );
4362   if (s^.Right > 0) then
4363     ReverseContextNodeFromNativeEndian(
4364       PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + s^.Right),
4365       PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + s^.Right)
4366     );
4367 end;
4368 
4369 procedure ReverseContextFromNativeEndian(s, d : PUCA_PropItemContextTreeRec);
4370 var
4371   k : PtrUInt;
4372 begin
4373   d^.Size := s^.Size;
4374     ReverseBytes(d^.Size,SizeOf(d^.Size));
4375   if (s^.Size = 0) then
4376     exit;
4377   k := SizeOf(s^.Size);
4378   ReverseContextNodeFromNativeEndian(
4379     PUCA_PropItemContextTreeNodeRec(PtrUInt(s)+k),
4380     PUCA_PropItemContextTreeNodeRec(PtrUInt(d)+k)
4381   );
4382 end;
4383 
4384 procedure ReverseFromNativeEndian(
4385   const AData    : PUCA_PropItemRec;
4386   const ADataLen : Cardinal;
4387   const ADest    : PUCA_PropItemRec
4388 );
4389 var
4390   s, d : PUCA_PropItemRec;
4391   sCtx, dCtx : PUCA_PropItemContextTreeRec;
4392   dataEnd : PtrUInt;
4393   k, i : PtrUInt;
4394   p_s, p_d : PByte;
4395   pw_s, pw_d : PUCA_PropWeights;
4396 begin
4397   dataEnd := PtrUInt(AData) + ADataLen;
4398   s := AData;
4399   d := ADest;
4400   while True do begin
4401     d^.WeightLength := s^.WeightLength;
4402       ReverseBytes(d^.WeightLength,SizeOf(d^.WeightLength));
4403     d^.ChildCount := s^.ChildCount;
4404       ReverseBytes(d^.ChildCount,SizeOf(d^.ChildCount));
4405     d^.Size := s^.Size;
4406       ReverseBytes(d^.Size,SizeOf(d^.Size));
4407     d^.Flags := s^.Flags;
4408       ReverseBytes(d^.Flags,SizeOf(d^.Flags));
4409     if s^.Contextual then begin
4410       k := SizeOf(TUCA_PropItemRec);
4411       if s^.HasCodePoint() then
4412         k := k + SizeOf(UInt24);
4413       sCtx := PUCA_PropItemContextTreeRec(PtrUInt(s) + k);
4414       dCtx := PUCA_PropItemContextTreeRec(PtrUInt(d) + k);
4415       ReverseContextFromNativeEndian(sCtx,dCtx);
4416     end;
4417     if s^.HasCodePoint() then begin
4418       if s^.Contextual then
4419         k := s^.GetSelfOnlySize()- SizeOf(UInt24) - Cardinal(s^.GetContext()^.Size)
4420       else
4421         k := s^.GetSelfOnlySize() - SizeOf(UInt24);
4422       p_s := PByte(PtrUInt(s) + k);
4423       p_d := PByte(PtrUInt(d) + k);
4424       Unaligned(PUInt24(p_d)^) := Unaligned(PUInt24(p_s)^);
4425         ReverseBytes(p_d^,SizeOf(UInt24));
4426     end;
4427     if (s^.WeightLength > 0) then begin
4428       k := SizeOf(TUCA_PropItemRec);
4429       p_s := PByte(PtrUInt(s) + k);
4430       p_d := PByte(PtrUInt(d) + k);
4431       k := SizeOf(Word);
4432       Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
4433         ReverseBytes(Unaligned(p_d^),k);
4434       p_s := PByte(PtrUInt(p_s) + k);
4435       p_d := PByte(PtrUInt(p_d) + k);
4436       if s^.IsWeightCompress_1() then begin
4437         k := SizeOf(Byte);
4438         PByte(p_d)^ := PByte(p_s)^;
4439       end else begin
4440         k := SizeOf(Word);
4441         Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
4442       end;
4443       ReverseBytes(p_d^,k);
4444       p_s := PByte(PtrUInt(p_s) + k);
4445       p_d := PByte(PtrUInt(p_d) + k);
4446       if s^.IsWeightCompress_2() then begin
4447         k := SizeOf(Byte);
4448         PByte(p_d)^ := PByte(p_s)^;
4449       end else begin
4450         k := SizeOf(Word);
4451         Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
4452       end;
4453       ReverseBytes(p_d^,k);
4454       if (s^.WeightLength > 1) then begin
4455         pw_s := PUCA_PropWeights(PtrUInt(p_s) + k);
4456         pw_d := PUCA_PropWeights(PtrUInt(p_d) + k);
4457         for i := 1 to s^.WeightLength - 1 do begin
4458           pw_d^.Weights[0] := pw_s^.Weights[0];
4459           pw_d^.Weights[1] := pw_s^.Weights[1];
4460           pw_d^.Weights[2] := pw_s^.Weights[2];
4461           ReverseArray(pw_d^,3,SizeOf(pw_s^.Weights[0]));
4462           Inc(pw_s);
4463           Inc(pw_d);
4464         end;
4465       end;
4466     end;
4467     k := s^.GetSelfOnlySize();
4468     s := PUCA_PropItemRec(PtrUInt(s)+k);
4469     d := PUCA_PropItemRec(PtrUInt(d)+k);
4470     if (PtrUInt(s) >= dataEnd) then
4471       Break;
4472   end;
4473   if ( (PtrUInt(s)-PtrUInt(AData)) <> (PtrUInt(d)-PtrUInt(ADest)) ) then
4474     raise Exception.CreateFmt('Read data length(%d) differs from written data length(%d).',[(PtrUInt(s)-PtrUInt(AData)), (PtrUInt(d)-PtrUInt(ADest))]);
4475 end;
4476 //------------------------------------------------------------------------------
4477 
4478 procedure ReverseContextNodeToNativeEndian(s, d : PUCA_PropItemContextTreeNodeRec);
4479 var
4480   k : PtrUInt;
4481   p_s, p_d : PByte;
4482 begin
4483   d^.Left := s^.Left;
4484     ReverseBytes(d^.Left,SizeOf(d^.Left));
4485   d^.Right := s^.Right;
4486     ReverseBytes(d^.Right,SizeOf(d^.Right));
4487   d^.Data.CodePointCount := s^.Data.CodePointCount;
4488     ReverseBytes(d^.Data.CodePointCount,SizeOf(d^.Data.CodePointCount));
4489   d^.Data.WeightCount := s^.Data.WeightCount;
4490     ReverseBytes(d^.Data.WeightCount,SizeOf(d^.Data.WeightCount));
4491 
4492   k := SizeOf(TUCA_PropItemContextTreeNodeRec);
4493   p_s := PByte(PtrUInt(s) + k);
4494   p_d := PByte(PtrUInt(d) + k);
4495   k := (d^.Data.CodePointCount*SizeOf(UInt24));
4496   Move(p_s^,p_d^, k);
4497     ReverseArray(p_d^,d^.Data.CodePointCount,SizeOf(UInt24));
4498   p_s := PByte(PtrUInt(p_s) + k);
4499   p_d := PByte(PtrUInt(p_d) + k);
4500   k := (d^.Data.WeightCount*SizeOf(TUCA_PropWeights));
4501   Move(p_s^,p_d^,k);
4502     ReverseArray(p_d^,(d^.Data.WeightCount*Length(TUCA_PropWeights.Weights)),SizeOf(TUCA_PropWeights.Weights[0]));
4503   if (d^.Left > 0) then
4504     ReverseContextNodeToNativeEndian(
4505       PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + d^.Left),
4506       PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + d^.Left)
4507     );
4508   if (d^.Right > 0) then
4509     ReverseContextNodeToNativeEndian(
4510       PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + d^.Right),
4511       PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + d^.Right)
4512     );
4513 end;
4514 
4515 procedure ReverseContextToNativeEndian(s, d : PUCA_PropItemContextTreeRec);
4516 var
4517   k : PtrUInt;
4518 begin
4519   d^.Size := s^.Size;
4520     ReverseBytes(d^.Size,SizeOf(d^.Size));
4521   if (s^.Size = 0) then
4522     exit;
4523   k := SizeOf(s^.Size);
4524   ReverseContextNodeToNativeEndian(
4525     PUCA_PropItemContextTreeNodeRec(PtrUInt(s)+k),
4526     PUCA_PropItemContextTreeNodeRec(PtrUInt(d)+k)
4527   );
4528 end;
4529 
4530 procedure ReverseToNativeEndian(
4531   const AData    : PUCA_PropItemRec;
4532   const ADataLen : Cardinal;
4533   const ADest    : PUCA_PropItemRec
4534 );
4535 var
4536   s, d : PUCA_PropItemRec;
4537   sCtx, dCtx : PUCA_PropItemContextTreeRec;
4538   dataEnd : PtrUInt;
4539   k, i : PtrUInt;
4540   p_s, p_d : PByte;
4541   pw_s, pw_d : PUCA_PropWeights;
4542 begin
4543   dataEnd := PtrUInt(AData) + ADataLen;
4544   s := AData;
4545   d := ADest;
4546   while True do begin
4547     d^.WeightLength := s^.WeightLength;
4548       ReverseBytes(d^.WeightLength,SizeOf(d^.WeightLength));
4549     d^.ChildCount := s^.ChildCount;
4550       ReverseBytes(d^.ChildCount,SizeOf(d^.ChildCount));
4551     d^.Size := s^.Size;
4552       ReverseBytes(d^.Size,SizeOf(d^.Size));
4553     d^.Flags := s^.Flags;
4554       ReverseBytes(d^.Flags,SizeOf(d^.Flags));
4555     if d^.Contextual then begin
4556       k := SizeOf(TUCA_PropItemRec);
4557       if d^.HasCodePoint() then
4558         k := k + SizeOf(UInt24);
4559       sCtx := PUCA_PropItemContextTreeRec(PtrUInt(s) + k);
4560       dCtx := PUCA_PropItemContextTreeRec(PtrUInt(d) + k);
4561       ReverseContextToNativeEndian(sCtx,dCtx);
4562     end;
4563     if d^.HasCodePoint() then begin
4564       if d^.Contextual then
4565         k := d^.GetSelfOnlySize()- SizeOf(UInt24) - Cardinal(d^.GetContext()^.Size)
4566       else
4567         k := d^.GetSelfOnlySize() - SizeOf(UInt24);
4568       p_s := PByte(PtrUInt(s) + k);
4569       p_d := PByte(PtrUInt(d) + k);
4570       Unaligned(PUInt24(p_d)^) := Unaligned(PUInt24(p_s)^);
4571         ReverseBytes(p_d^,SizeOf(UInt24));
4572     end;
4573     if (d^.WeightLength > 0) then begin
4574       k := SizeOf(TUCA_PropItemRec);
4575       p_s := PByte(PtrUInt(s) + k);
4576       p_d := PByte(PtrUInt(d) + k);
4577       k := SizeOf(Word);
4578       Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
4579         ReverseBytes(p_d^,k);
4580       p_s := PByte(PtrUInt(p_s) + k);
4581       p_d := PByte(PtrUInt(p_d) + k);
4582       if d^.IsWeightCompress_1() then begin
4583         k := SizeOf(Byte);
4584         PByte(p_d)^ := PByte(p_s)^;
4585       end else begin
4586         k := SizeOf(Word);
4587         Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
4588       end;
4589       ReverseBytes(p_d^,k);
4590       p_s := PByte(PtrUInt(p_s) + k);
4591       p_d := PByte(PtrUInt(p_d) + k);
4592       if d^.IsWeightCompress_2() then begin
4593         k := SizeOf(Byte);
4594         PByte(p_d)^ := PByte(p_s)^;
4595       end else begin
4596         k := SizeOf(Word);
4597         Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
4598       end;
4599       ReverseBytes(p_d^,k);
4600       if (d^.WeightLength > 1) then begin
4601         pw_s := PUCA_PropWeights(PtrUInt(p_s) + k);
4602         pw_d := PUCA_PropWeights(PtrUInt(p_d) + k);
4603         for i := 1 to d^.WeightLength - 1 do begin
4604           pw_d^.Weights[0] := pw_s^.Weights[0];
4605           pw_d^.Weights[1] := pw_s^.Weights[1];
4606           pw_d^.Weights[2] := pw_s^.Weights[2];
4607           ReverseArray(pw_d^,3,SizeOf(pw_s^.Weights[0]));
4608           Inc(pw_s);
4609           Inc(pw_d);
4610         end;
4611       end;
4612     end;
4613     k := d^.GetSelfOnlySize();
4614     s := PUCA_PropItemRec(PtrUInt(s)+k);
4615     d := PUCA_PropItemRec(PtrUInt(d)+k);
4616     if (PtrUInt(s) >= dataEnd) then
4617       Break;
4618   end;
4619   if ( (PtrUInt(s)-PtrUInt(AData)) <> (PtrUInt(d)-PtrUInt(ADest)) ) then
4620     raise Exception.CreateFmt('Read data length(%d) differs from written data length(%d).',[(PtrUInt(s)-PtrUInt(AData)), (PtrUInt(d)-PtrUInt(ADest))]);
4621 end;
4622 
4623 procedure Check(const ACondition : Boolean; const AMsg : string);overload;
4624 begin
4625   if not ACondition then
4626     raise Exception.Create(AMsg);
4627 end;
4628 
4629 procedure Check(
4630   const ACondition : Boolean;
4631   const AFormatMsg : string;
4632   const AArgs      : array of const
4633 );overload;
4634 begin
4635   Check(ACondition,Format(AFormatMsg,AArgs));
4636 end;
4637 
4638 procedure Check(const ACondition : Boolean);overload;
4639 begin
4640   Check(ACondition,'Check failed.')
4641 end;
4642 
4643 procedure CompareWeights(a, b : PUCA_PropWeights; const ALength : Integer);
4644 var
4645   i : Integer;
4646 begin
4647   if (ALength > 0) then begin
4648     for i := 0 to ALength - 1 do begin
4649       Check(a[i].Weights[0]=b[i].Weights[0]);
4650       Check(a[i].Weights[1]=b[i].Weights[1]);
4651       Check(a[i].Weights[2]=b[i].Weights[2]);
4652     end;
4653   end;
4654 end;
4655 
4656 procedure CompareCodePoints(a, b : PUInt24; const ALength : Integer);
4657 var
4658   i : Integer;
4659 begin
4660   if (ALength > 0) then begin
4661     for i := 0 to ALength - 1 do
4662       Check(a[i]=b[i]);
4663   end;
4664 end;
4665 
4666 procedure CompareContextNode(AProp1, AProp2 : PUCA_PropItemContextTreeNodeRec);
4667 var
4668   a, b : PUCA_PropItemContextTreeNodeRec;
4669   k : Cardinal;
4670 begin
4671   if (AProp1=nil) then begin
4672     Check(AProp2=nil);
4673     exit;
4674   end;
4675   a := AProp1;
4676   b := AProp2;
4677   Check(a^.Left=b^.Left);
4678   Check(a^.Right=b^.Right);
4679   Check(a^.Data.CodePointCount=b^.Data.CodePointCount);
4680   Check(a^.Data.WeightCount=b^.Data.WeightCount);
4681   k := SizeOf(a^.Data);
4682   CompareCodePoints(
4683     PUInt24(PtrUInt(a)+k),
4684     PUInt24(PtrUInt(b)+k),
4685     a^.Data.CodePointCount
4686   );
4687   k := SizeOf(a^.Data)+ (a^.Data.CodePointCount*SizeOf(UInt24));
4688   CompareWeights(
4689     PUCA_PropWeights(PtrUInt(a)+k),
4690     PUCA_PropWeights(PtrUInt(b)+k),
4691     a^.Data.WeightCount
4692   );
4693   if (a^.Left > 0) then begin
4694     k := a^.Left;
4695     CompareContextNode(
4696       PUCA_PropItemContextTreeNodeRec(PtrUInt(a)+k),
4697       PUCA_PropItemContextTreeNodeRec(PtrUInt(b)+k)
4698     );
4699   end;
4700   if (a^.Right > 0) then begin
4701     k := a^.Right;
4702     CompareContextNode(
4703       PUCA_PropItemContextTreeNodeRec(PtrUInt(a)+k),
4704       PUCA_PropItemContextTreeNodeRec(PtrUInt(b)+k)
4705     );
4706   end;
4707 end;
4708 
4709 procedure CompareContext(AProp1, AProp2 : PUCA_PropItemContextTreeRec);
4710 var
4711   a, b : PUCA_PropItemContextTreeNodeRec;
4712   k : Integer;
4713 begin
4714   if (AProp1=nil) then begin
4715     Check(AProp2=nil);
4716     exit;
4717   end;
4718   Check(AProp1^.Size=AProp2^.Size);
4719   k := Cardinal(AProp1^.Size);
4720   a := PUCA_PropItemContextTreeNodeRec(PtrUInt(AProp1)+k);
4721   b := PUCA_PropItemContextTreeNodeRec(PtrUInt(AProp2)+k);
4722   CompareContextNode(a,b);
4723 end;
4724 
4725 procedure CompareProps(const AProp1, AProp2 : PUCA_PropItemRec; const ADataLen : Integer);
4726 var
4727   a, b, pend : PUCA_PropItemRec;
4728   wa, wb : array of TUCA_PropWeights;
4729   k : Integer;
4730 begin
4731   if (ADataLen <= 0) then
4732     exit;
4733   a := PUCA_PropItemRec(AProp1);
4734   b := PUCA_PropItemRec(AProp2);
4735   pend := PUCA_PropItemRec(PtrUInt(AProp1)+ADataLen);
4736   while (a<pend) do begin
4737     Check(a^.WeightLength=b^.WeightLength);
4738     Check(a^.ChildCount=b^.ChildCount);
4739     Check(a^.Size=b^.Size);
4740     Check(a^.Flags=b^.Flags);
4741     if a^.HasCodePoint() then
4742       Check(a^.CodePoint = b^.CodePoint);
4743     if (a^.WeightLength > 0) then begin
4744       k := a^.WeightLength;
4745       SetLength(wa,k);
4746       SetLength(wb,k);
4747       a^.GetWeightArray(@wa[0]);
4748       b^.GetWeightArray(@wb[0]);
4749       CompareWeights(@wa[0],@wb[0],k);
4750     end;
4751     if a^.Contextual then
4752       CompareContext(a^.GetContext(),b^.GetContext());
4753     Check(a^.GetSelfOnlySize()=b^.GetSelfOnlySize());
4754     k := a^.GetSelfOnlySize();
4755     a := PUCA_PropItemRec(PtrUInt(a)+k);
4756     b := PUCA_PropItemRec(PtrUInt(b)+k);
4757   end;
4758 end;
4759 
4760 Procedure QuickSort(AList : PCardinal; L, R : Longint);overload;
4761 var
4762   I, J : Longint;
4763   P, Q : Cardinal;
4764 begin
4765  repeat
4766    I := L;
4767    J := R;
4768    P := AList[ (L + R) div 2 ];
4769    repeat
4770      while (P > AList[i]) do
4771        I := I + 1;
4772      while (P < AList[J]) do
4773        J := J - 1;
4774      If I <= J then
4775      begin
4776        Q := AList[I];
4777        AList[I] := AList[J];
4778        AList[J] := Q;
4779        I := I + 1;
4780        J := J - 1;
4781      end;
4782    until I > J;
4783    if J - L < R - I then
4784    begin
4785      if L < J then
4786        QuickSort(AList, L, J);
4787      L := I;
4788    end
4789    else
4790    begin
4791      if I < R then
4792        QuickSort(AList, I, R);
4793      R := J;
4794    end;
4795  until L >= R;
4796 end;
4797 
CalcMaxLevel2Countnull4798 function CalcMaxLevel2Count(
4799   const ALevel1Value : Cardinal;
4800         ALines       : array of TUCA_LineRec
4801 ) : Integer;
4802 var
4803   i, c, k : Integer;
4804   ac : Integer;
4805   items : array of Cardinal;
4806   p : PUCA_LineRec;
4807   pw : ^TUCA_WeightRec;
4808 begin
4809   c := Length(ALines);
4810   if (c < 1) then
4811     exit(0);
4812   SetLength(items,0);
4813   ac := 0;
4814   p := @ALines[Low(ALines)];
4815   for i := 0 to c-1 do begin
4816     if (Length(p^.Weights) > 0) then begin
4817       pw := @p^.Weights[Low(p^.Weights)];
4818       for k := 0 to Length(p^.Weights)-1 do begin
4819         if (pw^.Weights[0] = ALevel1Value) then begin
4820           if (ac = 0) or (IndexDWord(items[0],ac,pw^.Weights[1]) < 0) then begin
4821             if (ac >= Length(items)) then
4822               SetLength(items,Length(items)+256);
4823             items[ac] := pw^.Weights[1];
4824             ac := ac+1;
4825           end;
4826         end;
4827         Inc(pw);
4828       end;
4829     end;
4830     Inc(p);
4831   end;
4832   Result := ac;
4833 end;
4834 
RewriteLevel2null4835 function RewriteLevel2(
4836   const ALevel1Value : Cardinal;
4837         ALines       : PUCA_LineRec;
4838   const ALinesLength : Integer
4839 ) : Integer;
4840 var
4841   i, c, k : Integer;
4842   ac : Integer;
4843   items : array of Cardinal;
4844   p : PUCA_LineRec;
4845   pw : ^TUCA_WeightRec;
4846   newValue : Int64;
4847 begin
4848   c := ALinesLength;
4849   if (c < 1) then
4850     exit(0);
4851   SetLength(items,256);
4852   ac := 0;
4853   p := ALines;
4854   for i := 0 to c-1 do begin
4855     if (Length(p^.Weights) > 0) then begin
4856       for k := 0 to Length(p^.Weights)-1 do begin
4857         pw := @p^.Weights[k];
4858         if (pw^.Weights[0] = ALevel1Value) then begin
4859           if (ac = 0) or (IndexDWord(items[0],ac,pw^.Weights[1]) < 0) then begin
4860             if (ac >= Length(items)) then
4861               SetLength(items,Length(items)+256);
4862             items[ac] := pw^.Weights[1];
4863             ac := ac+1;
4864           end;
4865         end;
4866       end;
4867     end;
4868     Inc(p);
4869   end;
4870   SetLength(items,ac);
4871   if (ac > 1) then
4872     QuickSort(@items[0],0,(ac-1));
4873 
4874   p := ALines;
4875   for i := 0 to c-1 do begin
4876     if (Length(p^.Weights) > 0) then begin
4877       for k := 0 to Length(p^.Weights)-1 do begin
4878         pw := @p^.Weights[k];
4879         if (pw^.Weights[0] = ALevel1Value) then begin
4880           newValue := IndexDWord(items[0],ac,pw^.Weights[1]);
4881           if (newValue < 0) then
4882             raise Exception.CreateFmt('level 2 value %d missed in rewrite of level 1 value of %d.',[pw^.Weights[1],ALevel1Value]);
4883           pw^.Weights[1] := newValue;//+1;
4884         end;
4885       end;
4886     end;
4887     Inc(p);
4888   end;
4889   if (Length(items) > 0) then
4890     Result := items[Length(items)-1]
4891   else
4892     Result := 0;
4893 end;
4894 
4895 procedure RewriteLevel2Values(ALines : PUCA_LineRec; ALength : Integer);
4896 var
4897   c, i, ac, k : Integer;
4898   p : PUCA_LineRec;
4899   level1List : array of Cardinal;
4900   pw : ^TUCA_WeightRec;
4901 begin
4902   c := ALength;
4903   if (c < 1) then
4904     exit;
4905   ac := 0;
4906   SetLength(level1List,c);
4907   p := ALines;
4908   for i := 0 to c-1 do begin
4909     if (Length(p^.Weights) > 0) then begin
4910       for k := 0 to Length(p^.Weights)-1 do begin
4911         pw := @p^.Weights[k];
4912         if (ac = 0) or (IndexDWord(level1List[0],ac,pw^.Weights[0]) < 0) then begin
4913           if (ac >= Length(level1List)) then
4914             SetLength(level1List,ac+1000);
4915           level1List[ac] := pw^.Weights[0];
4916           RewriteLevel2(level1List[ac],ALines,ALength);
4917           ac := ac+1;
4918         end;
4919       end;
4920     end;
4921     Inc(p);
4922   end;
4923 end;
4924 
CalcMaxLevel2Valuenull4925 function CalcMaxLevel2Value(ALines : array of TUCA_LineRec) : Cardinal;
4926 var
4927   i, c, k, tempValue : Integer;
4928   p : PUCA_LineRec;
4929   maxLevel : Cardinal;
4930   maxValue : Integer;
4931 begin
4932   c := Length(ALines);
4933   if (c < 2) then
4934     exit(0);
4935   maxLevel := 0;
4936   maxValue := CalcMaxLevel2Count(maxLevel,ALines);
4937   p := @ALines[Low(ALines)+1];
4938   for i := 1 to c-1 do begin
4939     if (Length(p^.Weights) > 0) then begin
4940       for k := 0 to Length(p^.Weights)-1 do begin
4941         if (p^.Weights[k].Weights[0] <> maxLevel) then begin
4942           tempValue := CalcMaxLevel2Count(p^.Weights[k].Weights[0],ALines);
4943           if (tempValue > maxValue) then begin
4944             maxLevel := p^.Weights[k].Weights[0];
4945             maxValue := tempValue;
4946           end;
4947         end;
4948       end;
4949     end;
4950     Inc(p);
4951   end;
4952   Result := maxValue;
4953 end;
4954 
4955 initialization
4956   FS := DefaultFormatSettings;
4957   FS.DecimalSeparator := '.';
4958 
4959 end.
4960