1 { Unicode parser helper unit.
2
3 Copyright (c) 2012-2015 by Inoussa OUEDRAOGO
4
5 The source code is distributed under the Library GNU
6 General Public License with the following modification:
7
8 - object files and libraries linked into an application may be
9 distributed without source code.
10
11 If you didn't receive a copy of the file COPYING, contact:
12 Free Software Foundation
13 675 Mass Ave
14 Cambridge, MA 02139
15 USA
16
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }
20 unit helper;
21
22 {$mode delphi}
23 {$H+}
24 {$PACKENUM 1}
25 {$pointermath on}
26 {$typedaddress on}
27 {$warn 4056 off} //Conversion between ordinals and pointers is not portable
28
29 {$macro on}
30 {$ifdef FPC_REQUIRES_PROPER_ALIGNMENT}
31 {$define X_PACKED:=}
32 {$else FPC_REQUIRES_PROPER_ALIGNMENT}
33 {$define X_PACKED:=packed}
34 {$endif FPC_REQUIRES_PROPER_ALIGNMENT}
35
36 interface
37
38 uses
39 Classes, SysUtils, StrUtils;
40
41 const
42 SLicenseText =
43 ' { Unicode implementation tables. ' + sLineBreak +
44 ' ' + sLineBreak +
45 ' Copyright (c) 2013 - 2017 by Inoussa OUEDRAOGO ' + sLineBreak +
46 ' ' + sLineBreak +
47 ' Permission is hereby granted, free of charge, to any person ' + sLineBreak +
48 ' obtaining a copy of the Unicode data files and any associated ' + sLineBreak +
49 ' documentation (the "Data Files") or Unicode software and any ' + sLineBreak +
50 ' associated documentation (the "Software") to deal in the Data ' + sLineBreak +
51 ' Files or Software without restriction, including without ' + sLineBreak +
52 ' limitation the rights to use, copy, modify, merge, publish, ' + sLineBreak +
53 ' distribute, and/or sell copies of the Data Files or Software, ' + sLineBreak +
54 ' and to permit persons to whom the Data Files or Software are ' + sLineBreak +
55 ' furnished to do so, provided that (a) the above copyright ' + sLineBreak +
56 ' notice(s) and this permission notice appear with all copies ' + sLineBreak +
57 ' of the Data Files or Software, (b) both the above copyright ' + sLineBreak +
58 ' notice(s) and this permission notice appear in associated ' + sLineBreak +
59 ' documentation, and (c) there is clear notice in each modified ' + sLineBreak +
60 ' Data File or in the Software as well as in the documentation ' + sLineBreak +
61 ' associated with the Data File(s) or Software that the data or ' + sLineBreak +
62 ' software has been modified. ' + sLineBreak +
63 ' ' + sLineBreak +
64 ' ' + sLineBreak +
65 ' This program is distributed in the hope that it will be useful, ' + sLineBreak +
66 ' but WITHOUT ANY WARRANTY; without even the implied warranty of ' + sLineBreak +
67 ' MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }';
68
69 WEIGHT_LEVEL_COUNT = 3;
70
71 type
72 // Unicode General Category
73 TUnicodeCategory = (
74 ucUppercaseLetter, // Lu = Letter, uppercase
75 ucLowercaseLetter, // Ll = Letter, lowercase
76 ucTitlecaseLetter, // Lt = Letter, titlecase
77 ucModifierLetter, // Lm = Letter, modifier
78 ucOtherLetter, // Lo = Letter, other
79
80 ucNonSpacingMark, // Mn = Mark, nonspacing
81 ucCombiningMark, // Mc = Mark, spacing combining
82 ucEnclosingMark, // Me = Mark, enclosing
83
84 ucDecimalNumber, // Nd = Number, decimal digit
85 ucLetterNumber, // Nl = Number, letter
86 ucOtherNumber, // No = Number, other
87
88 ucConnectPunctuation, // Pc = Punctuation, connector
89 ucDashPunctuation, // Pd = Punctuation, dash
90 ucOpenPunctuation, // Ps = Punctuation, open
91 ucClosePunctuation, // Pe = Punctuation, close
92 ucInitialPunctuation, // Pi = Punctuation, initial quote (may behave like Ps or Pe depending on usage)
93 ucFinalPunctuation, // Pf = Punctuation, final quote (may behave like Ps or Pe depending on usage)
94 ucOtherPunctuation, // Po = Punctuation, other
95
96 ucMathSymbol, // Sm = Symbol, math
97 ucCurrencySymbol, // Sc = Symbol, currency
98 ucModifierSymbol, // Sk = Symbol, modifier
99 ucOtherSymbol, // So = Symbol, other
100
101 ucSpaceSeparator, // Zs = Separator, space
102 ucLineSeparator, // Zl = Separator, line
103 ucParagraphSeparator, // Zp = Separator, paragraph
104
105 ucControl, // Cc = Other, control
106 ucFormat, // Cf = Other, format
107 ucSurrogate, // Cs = Other, surrogate
108 ucPrivateUse, // Co = Other, private use
109 ucUnassigned // Cn = Other, not assigned (including noncharacters)
110 );
111
112
113 TUInt24Rec = packed record
114 public
115 {$ifdef FPC_LITTLE_ENDIAN}
116 byte0, byte1, byte2 : Byte;
117 {$else FPC_LITTLE_ENDIAN}
118 byte2, byte1, byte0 : Byte;
119 {$endif FPC_LITTLE_ENDIAN}
120 public
121 class operator Implicit(a : TUInt24Rec) : Cardinal;{$ifdef USE_INLINE}inline;{$ENDIF}
122 class operator Implicit(a : TUInt24Rec) : LongInt;{$ifdef USE_INLINE}inline;{$ENDIF}
123 class operator Implicit(a : TUInt24Rec) : Word;{$ifdef USE_INLINE}inline;{$ENDIF}
124 class operator Implicit(a : TUInt24Rec) : Byte;{$ifdef USE_INLINE}inline;{$ENDIF}
125 class operator Implicit(a : Cardinal) : TUInt24Rec;{$ifdef USE_INLINE}inline;{$ENDIF}
126
127 class operator Explicit(a : TUInt24Rec) : Cardinal;{$ifdef USE_INLINE}inline;{$ENDIF}
128
129 class operator Equal(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
130
131 class operator Equal(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
132 class operator Equal(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
133
134 class operator Equal(a : TUInt24Rec; b : LongInt): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
135 class operator Equal(a : LongInt; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
136
137 class operator Equal(a : TUInt24Rec; b : Word): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
138 class operator Equal(a : Word; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
139
140 class operator Equal(a : TUInt24Rec; b : Byte): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
141 class operator Equal(a : Byte; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
142
143 class operator NotEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
144 class operator NotEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
145 class operator NotEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
146 class operator GreaterThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
147 class operator GreaterThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
148 class operator GreaterThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
149 class operator GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
150 class operator GreaterThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
151 class operator GreaterThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
152 class operator LessThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
153 class operator LessThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
154 class operator LessThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
155 class operator LessThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
156 class operator LessThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
157 class operator LessThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
158 end;
159
160 UInt24 = TUInt24Rec;
161 PUInt24 = ^UInt24;
162 TUnicodeCodePoint = Cardinal;
163 TUnicodeCodePointArray = array of TUnicodeCodePoint;
164 TDecompositionArray = array of TUnicodeCodePointArray;
165 TNumericValue = Double;
166 TNumericValueArray = array of TNumericValue;
167
168 TBlockItemRec = packed record
169 RangeStart : TUnicodeCodePoint;
170 RangeEnd : TUnicodeCodePoint;
171 Name : string[120];
172 CanonicalName : string[120];
173 end;
174 TBlocks = array of TBlockItemRec;
175
176 PPropRec = ^TPropRec;
177
178 { TPropRec }
179
180 TPropRec = packed record
181 private
GetCategorynull182 function GetCategory : TUnicodeCategory;inline;
183 procedure SetCategory(AValue : TUnicodeCategory);
GetWhiteSpacenull184 function GetWhiteSpace : Boolean;inline;
185 procedure SetWhiteSpace(AValue : Boolean);
GetHangulSyllablenull186 function GetHangulSyllable : Boolean;inline;
187 procedure SetHangulSyllable(AValue : Boolean);
188 public
189 CategoryData : Byte;
190
191 PropID : Word;
192 CCC : Byte; // Canonical Combining Class
193 NumericIndex : Byte;
194 SimpleUpperCase : UInt24;
195 SimpleLowerCase : UInt24;
196 DecompositionID : SmallInt;
197 public
198 property Category : TUnicodeCategory read GetCategory write SetCategory;
199 property WhiteSpace : Boolean read GetWhiteSpace write SetWhiteSpace;
200 property HangulSyllable : Boolean read GetHangulSyllable write SetHangulSyllable;
201 end;
202 TPropRecArray = array of TPropRec;
203
204 TDecompositionIndexRec = packed record
205 StartPosition : Word;
206 Length : Byte;
207 end;
208 TDecompositionBook = X_PACKED record
209 Index : array of TDecompositionIndexRec;
210 CodePoints : array of TUnicodeCodePoint;
211 end;
212
213 PDataLineRec = ^TDataLineRec;
214 TDataLineRec = record
215 PropID : Integer;
216 case LineType : Byte of
217 0 : (CodePoint : TUnicodeCodePoint);
218 1 : (StartCodePoint, EndCodePoint : TUnicodeCodePoint);
219 end;
220 TDataLineRecArray = array of TDataLineRec;
221
222 TCodePointRec = record
223 case LineType : Byte of
224 0 : (CodePoint : TUnicodeCodePoint);
225 1 : (StartCodePoint, EndCodePoint : TUnicodeCodePoint);
226 end;
227 TCodePointRecArray = array of TCodePointRec;
228
229 TPropListLineRec = packed record
230 CodePoint : TCodePointRec;
231 PropName : string[123];
232 end;
233 TPropListLineRecArray = array of TPropListLineRec;
234
235 { TUCA_WeightRec }
236
237 TUCA_WeightRec = packed record
238 public
239 Weights : array[0..3] of Cardinal;
240 Variable : Boolean;
241 public
242 class operator Equal(a, b: TUCA_WeightRec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
243 end;
244 TUCA_WeightRecArray = array of TUCA_WeightRec;
245
246 PUCA_LineContextItemRec = ^TUCA_LineContextItemRec;
247 TUCA_LineContextItemRec = X_PACKED record
248 public
249 CodePoints : TUnicodeCodePointArray;
250 Weights : TUCA_WeightRecArray;
251 public
252 procedure Clear();
253 procedure Assign(ASource : PUCA_LineContextItemRec);
Clonenull254 function Clone() : TUCA_LineContextItemRec;
255 end;
256
257 PUCA_LineContextRec = ^TUCA_LineContextRec;
258 TUCA_LineContextRec = X_PACKED record
259 public
260 Data : array of TUCA_LineContextItemRec;
261 public
262 procedure Clear();
263 procedure Assign(ASource : PUCA_LineContextRec);
Clonenull264 function Clone() : TUCA_LineContextRec;
265 end;
266
267 PUCA_LineRec = ^TUCA_LineRec;
268 TUCA_LineRec = X_PACKED record
269 public
270 CodePoints : TUnicodeCodePointArray;
271 Weights : TUCA_WeightRecArray;
272 Context : TUCA_LineContextRec;
273 //Variable : Boolean;
274 Deleted : Boolean;
275 Stored : Boolean;
276 public
277 procedure Clear();
278 procedure Assign(ASource : PUCA_LineRec);
Clonenull279 function Clone() : TUCA_LineRec;
HasContextnull280 function HasContext() : Boolean;
281 end;
282 TUCA_VariableKind = (
283 ucaShifted, ucaNonIgnorable, ucaBlanked, ucaShiftedTrimmed,
284 ucaIgnoreSP
285 );
286 TUCA_DataBook = X_PACKED record
287 Version : string;
288 VariableWeight : TUCA_VariableKind;
289 Backwards : array[0..3] of Boolean;
290 Lines : array of TUCA_LineRec;
291 end;
292 PUCA_DataBook = ^TUCA_DataBook;
293 TUCA_DataBookIndex = array of Integer;
294
295 type
296 TUCA_PropWeights = packed record
297 Weights : array[0..2] of Word;
298 //Variable : Byte;
299 end;
300 PUCA_PropWeights = ^TUCA_PropWeights;
301
302 TUCA_PropItemContextRec = packed record
303 CodePointCount : Byte;
304 WeightCount : Byte;
305 //CodePoints : UInt24;
306 //Weights : TUCA_PropWeights;
307 end;
308 PUCA_PropItemContextRec = ^TUCA_PropItemContextRec;
309 TUCA_PropItemContextTreeNodeRec = packed record
310 Left : Word;
311 Right : Word;
312 Data : TUCA_PropItemContextRec;
313 end;
314 PUCA_PropItemContextTreeNodeRec = ^TUCA_PropItemContextTreeNodeRec;
315
316 TUCA_PropItemContextTreeRec = packed record
317 public
318 Size : UInt24;
319 public
GetDatanull320 function GetData:PUCA_PropItemContextTreeNodeRec;inline;
321 property Data : PUCA_PropItemContextTreeNodeRec read GetData;
322 end;
323 PUCA_PropItemContextTreeRec = ^TUCA_PropItemContextTreeRec;
324
325 { TUCA_PropItemRec }
326
327 TUCA_PropItemRec = packed record
328 private
329 const FLAG_VALID = 0;
330 const FLAG_CODEPOINT = 1;
331 const FLAG_CONTEXTUAL = 2;
332 const FLAG_DELETION = 3;
333 const FLAG_COMPRESS_WEIGHT_1 = 6;
334 const FLAG_COMPRESS_WEIGHT_2 = 7;
335 private
GetWeightSizenull336 function GetWeightSize : Word;inline;
337 public
338 WeightLength : Byte;
339 ChildCount : Byte;
340 Size : Word;
341 Flags : Byte;
342 public
HasCodePointnull343 function HasCodePoint() : Boolean;inline;
GetCodePointnull344 function GetCodePoint() : UInt24;//inline;
345 property CodePoint : UInt24 read GetCodePoint;
346 //Weights : array[0..WeightLength] of TUCA_PropWeights;
347 procedure GetWeightArray(ADest : PUCA_PropWeights);
GetSelfOnlySizenull348 function GetSelfOnlySize() : Cardinal;inline;
349
350 procedure SetContextual(AValue : Boolean);inline;
GetContextualnull351 function GetContextual() : Boolean;inline;
352 property Contextual : Boolean read GetContextual write setContextual;
GetContextnull353 function GetContext() : PUCA_PropItemContextTreeRec;
354 procedure SetDeleted(AValue : Boolean);inline;
IsDeletednull355 function IsDeleted() : Boolean;inline;
IsValidnull356 function IsValid() : Boolean;inline;
IsWeightCompress_1null357 function IsWeightCompress_1() : Boolean;inline;
IsWeightCompress_2null358 function IsWeightCompress_2() : Boolean;inline;
359 end;
360 PUCA_PropItemRec = ^TUCA_PropItemRec;
361 TUCA_PropIndexItem = packed record
362 CodePoint : Cardinal;
363 Position : Integer;
364 end;
365 PUCA_PropIndexItem = ^TUCA_PropIndexItem;
366 TUCA_PropBook = X_PACKED record
367 ItemSize : Integer;
368 Index : array of TUCA_PropIndexItem;
369 Items : PUCA_PropItemRec; //Native Endian
370 ItemsOtherEndian : PUCA_PropItemRec;//Non Native Endian
371 VariableLowLimit : Word;
372 VariableHighLimit : Word;
373 end;
374 PUCA_PropBook = ^TUCA_PropBook;
375
376 TBmpFirstTable = array[0..255] of Byte;
377 TBmpSecondTableItem = array[0..255] of Word;
378 TBmpSecondTable = array of TBmpSecondTableItem;
379
380 T3lvlBmp1Table = array[0..255] of Byte;
381 T3lvlBmp2TableItem = array[0..15] of Word;
382 T3lvlBmp2Table = array of T3lvlBmp2TableItem;
383 T3lvlBmp3TableItem = array[0..15] of Word;
384 T3lvlBmp3Table = array of T3lvlBmp3TableItem;
385
386 TucaBmpFirstTable = array[0..255] of Byte;
387 TucaBmpSecondTableItem = array[0..255] of Cardinal;
388 TucaBmpSecondTable = array of TucaBmpSecondTableItem;
389 PucaBmpFirstTable = ^TucaBmpFirstTable;
390 PucaBmpSecondTable = ^TucaBmpSecondTable;
391
392 const
393 LOW_SURROGATE_BEGIN = Word($DC00);
394 LOW_SURROGATE_END = Word($DFFF);
395 LOW_SURROGATE_COUNT = LOW_SURROGATE_END - LOW_SURROGATE_BEGIN + 1;
396
397 HIGH_SURROGATE_BEGIN = Word($D800);
398 HIGH_SURROGATE_END = Word($DBFF);
399 HIGH_SURROGATE_COUNT = HIGH_SURROGATE_END - HIGH_SURROGATE_BEGIN + 1;
400 type
401 TOBmpFirstTable = array[0..(HIGH_SURROGATE_COUNT-1)] of Word;
402 TOBmpSecondTableItem = array[0..(LOW_SURROGATE_COUNT-1)] of Word;
403 TOBmpSecondTable = array of TOBmpSecondTableItem;
404
405 T3lvlOBmp1Table = array[0..1023] of Byte;
406 T3lvlOBmp2TableItem = array[0..31] of Word;
407 T3lvlOBmp2Table = array of T3lvlOBmp2TableItem;
408 T3lvlOBmp3TableItem = array[0..31] of Word;
409 T3lvlOBmp3Table = array of T3lvlOBmp3TableItem;
410
411 TucaOBmpFirstTable = array[0..(HIGH_SURROGATE_COUNT-1)] of Word;
412 TucaOBmpSecondTableItem = array[0..(LOW_SURROGATE_COUNT-1)] of Cardinal;
413 TucaOBmpSecondTable = array of TucaOBmpSecondTableItem;
414 PucaOBmpFirstTable = ^TucaOBmpFirstTable;
415 PucaOBmpSecondTable = ^TucaOBmpSecondTable;
416
417 type
418 TEndianKind = (ekLittle, ekBig);
419 const
420 ENDIAN_SUFFIX : array[TEndianKind] of string[2] = ('le','be');
421 {$IFDEF ENDIAN_LITTLE}
422 ENDIAN_NATIVE = ekLittle;
423 ENDIAN_NON_NATIVE = ekBig;
424 {$ENDIF ENDIAN_LITTLE}
425 {$IFDEF ENDIAN_BIG}
426 ENDIAN_NATIVE = ekBig;
427 ENDIAN_NON_NATIVE = ekLittle;
428 {$ENDIF ENDIAN_BIG}
429
430 procedure GenerateLicenceText(ADest : TStream);
431
BoolToBytenull432 function BoolToByte(AValue : Boolean): Byte;inline;
433
IsHangulSyllablenull434 function IsHangulSyllable(
435 const ACodePoint : TUnicodeCodePoint;
436 const AHangulList : TCodePointRecArray
437 ) : Boolean;
438 procedure ParseHangulSyllableTypes(
439 ADataAStream : TMemoryStream;
440 var ACodePointList : TCodePointRecArray
441 );
442
443 procedure ParseProps(
444 ADataAStream : TMemoryStream;
445 var APropList : TPropListLineRecArray
446 );
FindCodePointsByPropertynull447 function FindCodePointsByProperty(
448 const APropName : string;
449 const APropList : TPropListLineRecArray
450 ) : TCodePointRecArray;
451
452 procedure ParseBlokcs(
453 ADataAStream : TMemoryStream;
454 var ABlocks : TBlocks
455 );
456 procedure ParseUCAFile(
457 ADataAStream : TMemoryStream;
458 var ABook : TUCA_DataBook
459 );
460 procedure MakeUCA_Props(
461 ABook : PUCA_DataBook;
462 out AProps : PUCA_PropBook
463 );
464 procedure FreeUcaBook(var ABook : PUCA_PropBook);
465 procedure MakeUCA_BmpTables(
466 var AFirstTable : TucaBmpFirstTable;
467 var ASecondTable : TucaBmpSecondTable;
468 const APropBook : PUCA_PropBook
469 );
470 procedure MakeUCA_OBmpTables(
471 var AFirstTable : TucaOBmpFirstTable;
472 var ASecondTable : TucaOBmpSecondTable;
473 const APropBook : PUCA_PropBook
474 );
GetPropPositionnull475 function GetPropPosition(
476 const AHighS,
477 ALowS : Word;
478 const AFirstTable : PucaOBmpFirstTable;
479 const ASecondTable : PucaOBmpSecondTable
480 ): Integer;inline;overload;
481
482 procedure GenerateUCA_Head(
483 ADest : TStream;
484 ABook : PUCA_DataBook;
485 AProps : PUCA_PropBook
486 );
487 procedure GenerateUCA_BmpTables(
488 AStream,
489 ANativeEndianStream,
490 ANonNativeEndianStream : TStream;
491 var AFirstTable : TucaBmpFirstTable;
492 var ASecondTable : TucaBmpSecondTable
493 );
494 procedure GenerateBinaryUCA_BmpTables(
495 ANativeEndianStream,
496 ANonNativeEndianStream : TStream;
497 var AFirstTable : TucaBmpFirstTable;
498 var ASecondTable : TucaBmpSecondTable
499 );
500 procedure GenerateUCA_PropTable(
501 ADest : TStream;
502 const APropBook : PUCA_PropBook;
503 const AEndian : TEndianKind
504 );
505 procedure GenerateBinaryUCA_PropTable(
506 // WARNING : files must be generated for each endianess (Little / Big)
507 ANativeEndianStream,
508 ANonNativeEndianStream : TStream;
509 const APropBook : PUCA_PropBook
510 );
511 procedure GenerateUCA_OBmpTables(
512 AStream,
513 ANativeEndianStream,
514 ANonNativeEndianStream : TStream;
515 var AFirstTable : TucaOBmpFirstTable;
516 var ASecondTable : TucaOBmpSecondTable
517 );
518 procedure GenerateBinaryUCA_OBmpTables(
519 ANativeEndianStream,
520 ANonNativeEndianStream : TStream;
521 var AFirstTable : TucaOBmpFirstTable;
522 var ASecondTable : TucaOBmpSecondTable
523 );
524
525 procedure Parse_UnicodeData(
526 ADataAStream : TMemoryStream;
527 var APropList : TPropRecArray;
528 var ANumericTable : TNumericValueArray;
529 var ADataLineList : TDataLineRecArray;
530 var ADecomposition : TDecompositionArray;
531 const AHangulList : TCodePointRecArray;
532 const AWhiteSpaces : TCodePointRecArray
533 );
534 procedure MakeDecomposition(
535 const ARawData : TDecompositionArray;
536 var ABook : TDecompositionBook
537 );
538
539 procedure MakeBmpTables(
540 var AFirstTable : TBmpFirstTable;
541 var ASecondTable : TBmpSecondTable;
542 const ADataLineList : TDataLineRecArray
543 );
544 procedure MakeBmpTables3Levels(
545 var AFirstTable : T3lvlBmp1Table;
546 var ASecondTable : T3lvlBmp2Table;
547 var AThirdTable : T3lvlBmp3Table;
548 const ADataLineList : TDataLineRecArray
549 );
550 procedure GenerateBmpTables(
551 ADest : TStream;
552 var AFirstTable : TBmpFirstTable;
553 var ASecondTable : TBmpSecondTable
554 );
555 procedure Generate3lvlBmpTables(
556 ADest : TStream;
557 var AFirstTable : T3lvlBmp1Table;
558 var ASecondTable : T3lvlBmp2Table;
559 var AThirdTable : T3lvlBmp3Table
560 );
561 procedure GeneratePropTable(
562 ADest : TStream;
563 const APropList : TPropRecArray;
564 const AEndian : TEndianKind
565 );
566 procedure GenerateNumericTable(
567 ADest : TStream;
568 const ANumList : TNumericValueArray;
569 const ACompleteUnit : Boolean
570 );
571 procedure GenerateDecompositionBookTable(
572 ADest : TStream;
573 const ABook : TDecompositionBook;
574 const AEndian : TEndianKind
575 );
576 procedure GenerateOutBmpTable(
577 ADest : TStream;
578 const AList : TDataLineRecArray
579 );
580
Compressnull581 function Compress(const AData : TDataLineRecArray) : TDataLineRecArray;
582
EvaluateFloatnull583 function EvaluateFloat(const AStr : string) : Double;
StrToCategorynull584 function StrToCategory(const AStr : string) : TUnicodeCategory;
StringToCodePointnull585 function StringToCodePoint(ACP : string) : TUnicodeCodePoint;
IsWhiteSpacenull586 function IsWhiteSpace(
587 const ACodePoint : TUnicodeCodePoint;
588 const AWhiteSpaces : TCodePointRecArray
589 ) : Boolean;
590
GetPropIDnull591 function GetPropID(
592 ACodePoint : TUnicodeCodePoint;
593 const ADataLineList : TDataLineRecArray
594 ) : Cardinal;
595
596 //--------------------
597 procedure MakeOBmpTables(
598 var AFirstTable : TOBmpFirstTable;
599 var ASecondTable : TOBmpSecondTable;
600 const ADataLineList : TDataLineRecArray
601 );
602 procedure MakeOBmpTables3Levels(
603 var AFirstTable : T3lvlOBmp1Table;
604 var ASecondTable : T3lvlOBmp2Table;
605 var AThirdTable : T3lvlOBmp3Table;
606 const ADataLineList : TDataLineRecArray
607 );
608 procedure GenerateOBmpTables(
609 ADest : TStream;
610 var AFirstTable : TOBmpFirstTable;
611 var ASecondTable : TOBmpSecondTable
612 );
613 procedure Generate3lvlOBmpTables(
614 ADest : TStream;
615 var AFirstTable : T3lvlOBmp1Table;
616 var ASecondTable : T3lvlOBmp2Table;
617 var AThirdTable : T3lvlOBmp3Table
618 );
GetPropnull619 function GetProp(
620 const AHighS,
621 ALowS : Word;
622 const AProps : TPropRecArray;
623 var AFirstTable : TOBmpFirstTable;
624 var ASecondTable : TOBmpSecondTable
625 ): PPropRec; inline;overload;
GetPropnull626 function GetProp(
627 const AHighS,
628 ALowS : Word;
629 const AProps : TPropRecArray;
630 var AFirstTable : T3lvlOBmp1Table;
631 var ASecondTable : T3lvlOBmp2Table;
632 var AThirdTable : T3lvlOBmp3Table
633 ): PPropRec; inline;overload;
634 procedure FromUCS4(const AValue : TUnicodeCodePoint; var AHighS, ALowS : Word);inline;
ToUCS4null635 function ToUCS4(const AHighS, ALowS : Word) : TUnicodeCodePoint; inline;
636
637 type
638 TBitOrder = 0..7;
639
IsBitONnull640 function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;{$IFDEF USE_INLINE}inline;{$ENDIF}
641 procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);{$IFDEF USE_INLINE}inline;{$ENDIF}
642
GenerateEndianIncludeFileNamenull643 function GenerateEndianIncludeFileName(
644 const AStoreName : string;
645 const AEndian : TEndianKind
646 ): string;inline;
647
648 procedure ReverseFromNativeEndian(
649 const AData : PUCA_PropItemRec;
650 const ADataLen : Cardinal;
651 const ADest : PUCA_PropItemRec
652 );
653 procedure ReverseToNativeEndian(
654 const AData : PUCA_PropItemRec;
655 const ADataLen : Cardinal;
656 const ADest : PUCA_PropItemRec
657 );
658 procedure CompareProps(
659 const AProp1,
660 AProp2 : PUCA_PropItemRec;
661 const ADataLen : Integer
662 );
663
664 type
665 TCollationName = array[0..(128-1)] of Byte;
666 TCollationVersion = TCollationName;
667 TSerializedCollationHeader = packed record
668 Base : TCollationName;
669 Version : TCollationVersion;
670 CollationName : TCollationName;
671 CollationAliases : TCollationName; // ";" separated
672 VariableWeight : Byte;
673 Backwards : Byte;
674 BMP_Table1Length : DWord;
675 BMP_Table2Length : DWord;
676 OBMP_Table1Length : DWord;
677 OBMP_Table2Length : DWord;
678 PropCount : DWord;
679 VariableLowLimit : Word;
680 VariableHighLimit : Word;
681 NoNormalization : Byte;
682 Strength : Byte;
683 ChangedFields : Byte;
684 end;
685 PSerializedCollationHeader = ^TSerializedCollationHeader;
686
687 procedure StringToByteArray(AStr : UnicodeString; var ABuffer : array of Byte);overload;
688 procedure StringToByteArray(AStr : UnicodeString; ABuffer : PByte; const ABufferLength : Integer);overload;
689
690 procedure ReverseRecordBytes(var AItem : TSerializedCollationHeader);
691 procedure ReverseBytes(var AData; const ALength : Integer);
692 procedure ReverseArray(var AValue; const AArrayLength, AItemSize : PtrInt);
693
CalcMaxLevel2Valuenull694 function CalcMaxLevel2Value(ALines : array of TUCA_LineRec) : Cardinal;
695 procedure RewriteLevel2Values(ALines : PUCA_LineRec; ALength : Integer);
RewriteLevel2null696 function RewriteLevel2(
697 const ALevel1Value : Cardinal;
698 ALines : PUCA_LineRec;
699 const ALinesLength : Integer
700 ) : Integer;
701
702 resourcestring
703 SInsufficientMemoryBuffer = 'Insufficient Memory Buffer';
704
705 implementation
706 uses
707 typinfo, Math, AVL_Tree,
708 trie;
709
710
711 type
712
713 TCardinalRec = packed record
714 {$ifdef FPC_LITTLE_ENDIAN}
715 byte0, byte1, byte2, byte3 : Byte;
716 {$else FPC_LITTLE_ENDIAN}
717 byte3, byte2, byte1, byte0 : Byte;
718 {$endif FPC_LITTLE_ENDIAN}
719 end;
720
721 TWordRec = packed record
722 {$ifdef FPC_LITTLE_ENDIAN}
723 byte0, byte1 : Byte;
724 {$else FPC_LITTLE_ENDIAN}
725 byte1, byte0 : Byte;
726 {$endif FPC_LITTLE_ENDIAN}
727 end;
728
729 { TUInt24Rec }
730
731 class operator TUInt24Rec.Explicit(a : TUInt24Rec) : Cardinal;
732 begin
733 TCardinalRec(Result).byte0 := a.byte0;
734 TCardinalRec(Result).byte1 := a.byte1;
735 TCardinalRec(Result).byte2 := a.byte2;
736 TCardinalRec(Result).byte3 := 0;
737 end;
738
739 class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Cardinal;
740 begin
741 TCardinalRec(Result).byte0 := a.byte0;
742 TCardinalRec(Result).byte1 := a.byte1;
743 TCardinalRec(Result).byte2 := a.byte2;
744 TCardinalRec(Result).byte3 := 0;
745 end;
746
747 class operator TUInt24Rec.Implicit(a : TUInt24Rec) : LongInt;
748 begin
749 Result := Cardinal(a);
750 end;
751
752 class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Word;
753 begin
754 {$IFOPT R+}
755 if (a.byte2 > 0) then
756 Error(reIntOverflow);
757 {$ENDIF R+}
758 TWordRec(Result).byte0 := a.byte0;
759 TWordRec(Result).byte1 := a.byte1;
760 end;
761
762 class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Byte;
763 begin
764 {$IFOPT R+}
765 if (a.byte1 > 0) or (a.byte2 > 0) then
766 Error(reIntOverflow);
767 {$ENDIF R+}
768 Result := a.byte0;
769 end;
770
771 class operator TUInt24Rec.Implicit(a : Cardinal) : TUInt24Rec;
772 begin
773 {$IFOPT R+}
774 if (a > $FFFFFF) then
775 Error(reIntOverflow);
776 {$ENDIF R+}
777 Result.byte0 := TCardinalRec(a).byte0;
778 Result.byte1 := TCardinalRec(a).byte1;
779 Result.byte2 := TCardinalRec(a).byte2;
780 end;
781
782 class operator TUInt24Rec.Equal(a, b : TUInt24Rec) : Boolean;
783 begin
784 Result := (a.byte0 = b.byte0) and (a.byte1 = b.byte1) and (a.byte2 = b.byte2);
785 end;
786
787 class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Cardinal) : Boolean;
788 begin
789 Result := (TCardinalRec(b).byte3 = 0) and
790 (a.byte0 = TCardinalRec(b).byte0) and
791 (a.byte1 = TCardinalRec(b).byte1) and
792 (a.byte2 = TCardinalRec(b).byte2);
793 end;
794
795 class operator TUInt24Rec.Equal(a : Cardinal; b : TUInt24Rec) : Boolean;
796 begin
797 Result := (b = a);
798 end;
799
800 class operator TUInt24Rec.Equal(a : TUInt24Rec; b : LongInt) : Boolean;
801 begin
802 Result := (LongInt(a) = b);
803 end;
804
805 class operator TUInt24Rec.Equal(a : LongInt; b : TUInt24Rec) : Boolean;
806 begin
807 Result := (b = a);
808 end;
809
810 class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Word) : Boolean;
811 begin
812 Result := (a.byte2 = 0) and
813 (a.byte0 = TWordRec(b).byte0) and
814 (a.byte1 = TWordRec(b).byte1);
815 end;
816
817 class operator TUInt24Rec.Equal(a : Word; b : TUInt24Rec) : Boolean;
818 begin
819 Result := (b = a);
820 end;
821
822 class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Byte) : Boolean;
823 begin
824 Result := (a.byte2 = 0) and
825 (a.byte1 = 0) and
826 (a.byte0 = b);
827 end;
828
829 class operator TUInt24Rec.Equal(a : Byte; b : TUInt24Rec) : Boolean;
830 begin
831 Result := (b = a);
832 end;
833
834 class operator TUInt24Rec.NotEqual(a, b : TUInt24Rec) : Boolean;
835 begin
836 Result := (a.byte0 <> b.byte0) or (a.byte1 <> b.byte1) or (a.byte2 <> b.byte2);
837 end;
838
839 class operator TUInt24Rec.NotEqual(a : TUInt24Rec; b : Cardinal) : Boolean;
840 begin
841 Result := (TCardinalRec(b).byte3 <> 0) or
842 (a.byte0 <> TCardinalRec(b).byte0) or
843 (a.byte1 <> TCardinalRec(b).byte1) or
844 (a.byte2 <> TCardinalRec(b).byte2);
845 end;
846
847 class operator TUInt24Rec.NotEqual(a : Cardinal; b : TUInt24Rec) : Boolean;
848 begin
849 Result := (b <> a);
850 end;
851
852 class operator TUInt24Rec.GreaterThan(a, b: TUInt24Rec): Boolean;
853 begin
854 Result := (a.byte2 > b.byte2) or
855 ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
856 ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 > b.byte0));
857 end;
858
859 class operator TUInt24Rec.GreaterThan(a: TUInt24Rec; b: Cardinal): Boolean;
860 begin
861 Result := Cardinal(a) > b;
862 end;
863
864 class operator TUInt24Rec.GreaterThan(a: Cardinal; b: TUInt24Rec): Boolean;
865 begin
866 Result := a > Cardinal(b);
867 end;
868
869 class operator TUInt24Rec.GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;
870 begin
871 Result := (a.byte2 > b.byte2) or
872 ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
873 ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 >= b.byte0));
874 end;
875
876 class operator TUInt24Rec.GreaterThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
877 begin
878 Result := Cardinal(a) >= b;
879 end;
880
881 class operator TUInt24Rec.GreaterThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
882 begin
883 Result := a >= Cardinal(b);
884 end;
885
886 class operator TUInt24Rec.LessThan(a, b: TUInt24Rec): Boolean;
887 begin
888 Result := (b > a);
889 end;
890
891 class operator TUInt24Rec.LessThan(a: TUInt24Rec; b: Cardinal): Boolean;
892 begin
893 Result := Cardinal(a) < b;
894 end;
895
896 class operator TUInt24Rec.LessThan(a: Cardinal; b: TUInt24Rec): Boolean;
897 begin
898 Result := a < Cardinal(b);
899 end;
900
901 class operator TUInt24Rec.LessThanOrEqual(a, b: TUInt24Rec): Boolean;
902 begin
903 Result := (b >= a);
904 end;
905
906 class operator TUInt24Rec.LessThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
907 begin
908 Result := Cardinal(a) <= b;
909 end;
910
911 class operator TUInt24Rec.LessThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
912 begin
913 Result := a <= Cardinal(b);
914 end;
915
916 { TUCA_WeightRec }
917
918 class operator TUCA_WeightRec.Equal(a, b : TUCA_WeightRec) : Boolean;
919 begin
920 Result := (a.Weights[0] = b.Weights[0]) and (a.Weights[1] = b.Weights[1]) and
921 (a.Weights[2] = b.Weights[2]) and (a.Weights[3] = b.Weights[3]) and
922 (a.Variable = b.Variable);
923 end;
924
925 procedure StringToByteArray(AStr : UnicodeString; var ABuffer : array of Byte);
926 begin
927 StringToByteArray(AStr,@(ABuffer[Low(ABuffer)]),Length(ABuffer));
928 end;
929
930 procedure StringToByteArray(AStr : UnicodeString; ABuffer : PByte; const ABufferLength : Integer);
931 var
932 c, i, bl : Integer;
933 ps : PWord;
934 pb : PByte;
935 begin
936 if (ABufferLength < 1) then
937 exit;
938 c := Length(AStr);
939 if (c > ABufferLength) then
940 c := ABufferLength;
941 bl := 0;
942 pb := ABuffer;
943 if (c > 0) then begin
944 ps := PWord(@AStr[1]);
945 for i := 1 to c do begin
946 if (ps^ <= High(Byte)) then begin
947 pb^ := ps^;
948 bl := bl+1;
949 Inc(pb);
950 end;
951 Inc(ps);
952 end;
953 end;
954 if (bl < ABufferLength) then begin
955 for i := bl+1 to ABufferLength do begin
956 pb^:= 0;
957 Inc(pb);
958 end;
959 end;
960 end;
961
GenerateEndianIncludeFileNamenull962 function GenerateEndianIncludeFileName(
963 const AStoreName : string;
964 const AEndian : TEndianKind
965 ): string;inline;
966 begin
967 Result := ExtractFilePath(AStoreName) +
968 ChangeFileExt(ExtractFileName(AStoreName),Format('_%s.inc',[ENDIAN_SUFFIX[AEndian]]));
969 end;
970
IsBitONnull971 function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;
972 begin
973 Result := ( ( AData and ( 1 shl ABit ) ) <> 0 );
974 end;
975
976 procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);
977 begin
978 if AValue then
979 AData := AData or (1 shl (ABit mod 8))
980 else
981 AData := AData and ( not ( 1 shl ( ABit mod 8 ) ) );
982 end;
983
984 var
985 FS : TFormatSettings;
986
EvaluateFloatnull987 function EvaluateFloat(const AStr : string) : Double;
988 var
989 s, n, d : string;
990 i : Integer;
991 begin
992 Result := 0;
993 s := Trim(AStr);
994 if (Length(s) > 0) then begin
995 i := Pos('/',s);
996 if (i < 1) then
997 Result := StrToFloat(s,FS)
998 else begin
999 n := Copy(s,1,i-1);
1000 d := Copy(s,i+1,MaxInt);
1001 Result := StrToInt(n) / StrToInt(d);
1002 end;
1003 end;
1004 end;
1005
StrToCategorynull1006 function StrToCategory(const AStr : string) : TUnicodeCategory;
1007 var
1008 s : string;
1009 begin
1010 s := UpperCase(Trim(AStr));
1011 if (s = 'LU') then
1012 Result := ucUppercaseLetter
1013 else if (s = 'LL') then
1014 Result := ucLowercaseLetter
1015 else if (s = 'LT') then
1016 Result := ucTitlecaseLetter
1017 else if (s = 'LM') then
1018 Result := ucModifierLetter
1019 else if (s = 'LO') then
1020 Result := ucOtherLetter
1021 else
1022
1023 if (s = 'MN') then
1024 Result := ucNonSpacingMark
1025 else if (s = 'MC') then
1026 Result := ucCombiningMark
1027 else if (s = 'ME') then
1028 Result := ucEnclosingMark
1029 else
1030
1031 if (s = 'ND') then
1032 Result := ucDecimalNumber
1033 else if (s = 'NL') then
1034 Result := ucLetterNumber
1035 else if (s = 'NO') then
1036 Result := ucOtherNumber
1037 else
1038
1039 if (s = 'PC') then
1040 Result := ucConnectPunctuation
1041 else if (s = 'PD') then
1042 Result := ucDashPunctuation
1043 else if (s = 'PS') then
1044 Result := ucOpenPunctuation
1045 else if (s = 'PE') then
1046 Result := ucClosePunctuation
1047 else if (s = 'PI') then
1048 Result := ucInitialPunctuation
1049 else if (s = 'PF') then
1050 Result := ucFinalPunctuation
1051 else if (s = 'PO') then
1052 Result := ucOtherPunctuation
1053 else
1054
1055 if (s = 'SM') then
1056 Result := ucMathSymbol
1057 else if (s = 'SC') then
1058 Result := ucCurrencySymbol
1059 else if (s = 'SK') then
1060 Result := ucModifierSymbol
1061 else if (s = 'SO') then
1062 Result := ucOtherSymbol
1063 else
1064
1065 if (s = 'ZS') then
1066 Result := ucSpaceSeparator
1067 else if (s = 'ZL') then
1068 Result := ucLineSeparator
1069 else if (s = 'ZP') then
1070 Result := ucParagraphSeparator
1071 else
1072
1073 if (s = 'CC') then
1074 Result := ucControl
1075 else if (s = 'CF') then
1076 Result := ucFormat
1077 else if (s = 'CS') then
1078 Result := ucSurrogate
1079 else if (s = 'CO') then
1080 Result := ucPrivateUse
1081 else
1082 Result := ucUnassigned;
1083 end;
1084
StringToCodePointnull1085 function StringToCodePoint(ACP : string) : TUnicodeCodePoint;
1086 var
1087 s : string;
1088 begin
1089 s := Trim(ACP);
1090 Result := 0;
1091 if (Length(s) > 0) and (s <> '#') then
1092 Result := StrToInt('$' + s);
1093 end;
1094
1095 {function IsWhiteSpace(const ACodePoint : TUnicodeCodePoint) : Boolean;
1096 begin
1097 case ACodePoint of
1098 $0009..$000D : Result := True;// White_Space # Cc [5] <control-0009>..<control-000D>
1099 $0020 : Result := True;// White_Space # Zs SPACE
1100 $0085 : Result := True;// White_Space # Cc <control-0085>
1101 $00A0 : Result := True;// White_Space # Zs NO-BREAK SPACE
1102 $1680 : Result := True;// White_Space # Zs OGHAM SPACE MARK
1103 $180E : Result := True;// White_Space # Zs MONGOLIAN VOWEL SEPARATOR
1104 $2000..$200A : Result := True;// White_Space # Zs [11] EN QUAD..HAIR SPACE
1105 $2028 : Result := True;// White_Space # Zl LINE SEPARATOR
1106 $2029 : Result := True;// White_Space # Zp PARAGRAPH SEPARATOR
1107 $202F : Result := True;// White_Space # Zs NARROW NO-BREAK SPACE
1108 $205F : Result := True;// White_Space # Zs MEDIUM MATHEMATICAL SPACE
1109 $3000 : Result := True;// White_Space # Zs IDEOGRAPHIC SPACE
1110 else
1111 Result := False;
1112 end;
1113 end;}
IsWhiteSpacenull1114 function IsWhiteSpace(
1115 const ACodePoint : TUnicodeCodePoint;
1116 const AWhiteSpaces : TCodePointRecArray
1117 ) : Boolean;
1118 var
1119 i : Integer;
1120 p : ^TCodePointRec;
1121 begin
1122 p := @AWhiteSpaces[Low(AWhiteSpaces)];
1123 for i := Low(AWhiteSpaces) to High(AWhiteSpaces) do begin
1124 if (p^.LineType = 0) then begin
1125 if (p^.CodePoint = ACodePoint) then
1126 exit(True);
1127 end else begin
1128 if (ACodePoint >= p^.StartCodePoint) and (ACodePoint <= p^.EndCodePoint) then
1129 exit(True);
1130 end;
1131 Inc(p);
1132 end;
1133 Result := False;
1134 end;
1135
NormalizeBlockNamenull1136 function NormalizeBlockName(const AName : string) : string;
1137 var
1138 i, c, k : Integer;
1139 s : string;
1140 begin
1141 c := Length(AName);
1142 SetLength(Result,c);
1143 s := LowerCase(AName);
1144 k := 0;
1145 for i := 1 to c do begin
1146 if (s[1] in ['a'..'z','0'..'9','-']) then begin
1147 k := k + 1;
1148 Result[k] := s[i];
1149 end;
1150 end;
1151 SetLength(Result,k);
1152 end;
1153
1154 procedure ParseBlokcs(
1155 ADataAStream : TMemoryStream;
1156 var ABlocks : TBlocks
1157 );
1158 const
1159 LINE_LENGTH = 1024;
1160 DATA_LENGTH = 25000;
1161 var
1162 p : PAnsiChar;
1163 actualDataLen : Integer;
1164 bufferLength, bufferPos, lineLength, linePos : Integer;
1165 line : ansistring;
1166
NextLinenull1167 function NextLine() : Boolean;
1168 var
1169 locOldPos : Integer;
1170 locOldPointer : PAnsiChar;
1171 begin
1172 Result := False;
1173 locOldPointer := p;
1174 locOldPos := bufferPos;
1175 while (bufferPos < bufferLength) and (p^ <> #10) do begin
1176 Inc(p);
1177 Inc(bufferPos);
1178 end;
1179 if (locOldPos = bufferPos) and (p^ = #10) then begin
1180 lineLength := 0;
1181 Inc(p);
1182 Inc(bufferPos);
1183 linePos := 1;
1184 Result := True;
1185 end else if (locOldPos < bufferPos) then begin
1186 lineLength := (bufferPos - locOldPos);
1187 Move(locOldPointer^,line[1],lineLength);
1188 if (p^ = #10) then begin
1189 Dec(lineLength);
1190 Inc(p);
1191 Inc(bufferPos);
1192 end;
1193 linePos := 1;
1194 Result := True;
1195 end;
1196 end;
1197
NextTokennull1198 function NextToken() : ansistring;
1199 var
1200 k : Integer;
1201 begin
1202 k := linePos;
1203 if (linePos < lineLength) and (line[linePos] in [';','#','.']) then begin
1204 Inc(linePos);
1205 Result := Copy(line,k,(linePos-k));
1206 exit;
1207 end;
1208 while (linePos < lineLength) and not(line[linePos] in [';','#','.']) do
1209 Inc(linePos);
1210 if (linePos > k) then begin
1211 if (line[linePos] in [';','#','.']) then
1212 Result := Copy(line,k,(linePos-k))
1213 else
1214 Result := Copy(line,k,(linePos-k+1));
1215 Result := Trim(Result);
1216 end else begin
1217 Result := '';
1218 end;
1219 end;
1220
1221 procedure ParseLine();
1222 var
1223 locData : TBlockItemRec;
1224 s : ansistring;
1225 begin
1226 s := NextToken();
1227 if (s = '') or (s[1] = '#') then
1228 exit;
1229 locData.RangeStart := StrToInt('$'+s);
1230 s := NextToken();
1231 if (s <> '.') then
1232 raise Exception.CreateFmt('"." expected but "%s" found.',[s]);
1233 s := NextToken();
1234 if (s <> '.') then
1235 raise Exception.CreateFmt('"." expected but "%s" found.',[s]);
1236 s := NextToken();
1237 locData.RangeEnd := StrToInt('$'+s);
1238 s := NextToken();
1239 if (s <> ';') then
1240 raise Exception.CreateFmt('";" expected but "%s" found.',[s]);
1241 locData.Name := Trim(NextToken());
1242 locData.CanonicalName := NormalizeBlockName(locData.Name);
1243 if (Length(ABlocks) <= actualDataLen) then
1244 SetLength(ABlocks,Length(ABlocks)*2);
1245 ABlocks[actualDataLen] := locData;
1246 Inc(actualDataLen);
1247 end;
1248
1249 procedure Prepare();
1250 begin
1251 SetLength(ABlocks,DATA_LENGTH);
1252 actualDataLen := 0;
1253 bufferLength := ADataAStream.Size;
1254 bufferPos := 0;
1255 p := ADataAStream.Memory;
1256 lineLength := 0;
1257 SetLength(line,LINE_LENGTH);
1258 end;
1259
1260 begin
1261 Prepare();
1262 while NextLine() do
1263 ParseLine();
1264 SetLength(ABlocks,actualDataLen);
1265 end;
1266
1267 procedure ParseProps(
1268 ADataAStream : TMemoryStream;
1269 var APropList : TPropListLineRecArray
1270 );
1271 const
1272 LINE_LENGTH = 1024;
1273 DATA_LENGTH = 25000;
1274 var
1275 p : PAnsiChar;
1276 actualDataLen : Integer;
1277 bufferLength, bufferPos, lineLength, linePos : Integer;
1278 line : ansistring;
1279
NextLinenull1280 function NextLine() : Boolean;
1281 var
1282 locOldPos : Integer;
1283 locOldPointer : PAnsiChar;
1284 begin
1285 Result := False;
1286 locOldPointer := p;
1287 locOldPos := bufferPos;
1288 while (bufferPos < bufferLength) and (p^ <> #10) do begin
1289 Inc(p);
1290 Inc(bufferPos);
1291 end;
1292 if (locOldPos = bufferPos) and (p^ = #10) then begin
1293 lineLength := 0;
1294 Inc(p);
1295 Inc(bufferPos);
1296 linePos := 1;
1297 Result := True;
1298 end else if (locOldPos < bufferPos) then begin
1299 lineLength := (bufferPos - locOldPos);
1300 Move(locOldPointer^,line[1],lineLength);
1301 if (p^ = #10) then begin
1302 Dec(lineLength);
1303 Inc(p);
1304 Inc(bufferPos);
1305 end;
1306 linePos := 1;
1307 Result := True;
1308 end;
1309 end;
1310
NextTokennull1311 function NextToken() : ansistring;
1312 var
1313 k : Integer;
1314 begin
1315 k := linePos;
1316 if (linePos < lineLength) and (line[linePos] in [';','#','.']) then begin
1317 Inc(linePos);
1318 Result := Copy(line,k,(linePos-k));
1319 exit;
1320 end;
1321 while (linePos < lineLength) and not(line[linePos] in [';','#','.']) do
1322 Inc(linePos);
1323 if (linePos > k) then begin
1324 if (line[linePos] in [';','#','.']) then
1325 Result := Copy(line,k,(linePos-k))
1326 else
1327 Result := Copy(line,k,(linePos-k+1));
1328 Result := Trim(Result);
1329 end else begin
1330 Result := '';
1331 end;
1332 end;
1333
1334 procedure ParseLine();
1335 var
1336 locCP : Cardinal;
1337 locData : TPropListLineRec;
1338 s : ansistring;
1339 begin
1340 s := NextToken();
1341 if (s = '') or (s[1] = '#') then
1342 exit;
1343 locCP := StrToInt('$'+s);
1344 s := NextToken();
1345 if (s = ';') then begin
1346 locData.CodePoint.LineType := 0;
1347 locData.CodePoint.CodePoint := locCP;
1348 end else begin
1349 if (s = '') or (s <> '.') or (NextToken() <> '.') then
1350 raise Exception.CreateFmt('Invalid line : "%s".',[Copy(line,1,lineLength)]);
1351 locData.CodePoint.LineType := 1;
1352 locData.CodePoint.StartCodePoint := locCP;
1353 locData.CodePoint.EndCodePoint := StrToInt('$'+NextToken());
1354 s := NextToken();
1355 if (s <> ';') then
1356 raise Exception.CreateFmt('"." expected but "%s" found.',[s]);
1357 end;
1358 locData.PropName := Trim(NextToken());
1359 if (Length(APropList) <= actualDataLen) then
1360 SetLength(APropList,Length(APropList)*2);
1361 APropList[actualDataLen] := locData;
1362 Inc(actualDataLen);
1363 end;
1364
1365 procedure Prepare();
1366 begin
1367 SetLength(APropList,DATA_LENGTH);
1368 actualDataLen := 0;
1369 bufferLength := ADataAStream.Size;
1370 bufferPos := 0;
1371 p := ADataAStream.Memory;
1372 lineLength := 0;
1373 SetLength(line,LINE_LENGTH);
1374 end;
1375
1376 begin
1377 Prepare();
1378 while NextLine() do
1379 ParseLine();
1380 SetLength(APropList,actualDataLen);
1381 end;
1382
FindCodePointsByPropertynull1383 function FindCodePointsByProperty(
1384 const APropName : string;
1385 const APropList : TPropListLineRecArray
1386 ) : TCodePointRecArray;
1387 var
1388 r : TCodePointRecArray;
1389 i, k : Integer;
1390 s : string;
1391 begin
1392 k := 0;
1393 r := nil;
1394 s := LowerCase(Trim(APropName));
1395 for i := Low(APropList) to High(APropList) do begin
1396 if (LowerCase(APropList[i].PropName) = s) then begin
1397 if (k >= Length(r)) then begin
1398 if (k = 0) then
1399 SetLength(r,24)
1400 else
1401 SetLength(r,(2*Length(r)));
1402 end;
1403 r[k] := APropList[i].CodePoint;
1404 Inc(k);
1405 end;
1406 end;
1407 SetLength(r,k);
1408 Result := r;
1409 end;
1410
1411 procedure ParseHangulSyllableTypes(
1412 ADataAStream : TMemoryStream;
1413 var ACodePointList : TCodePointRecArray
1414 );
1415 const
1416 LINE_LENGTH = 1024;
1417 DATA_LENGTH = 25000;
1418 var
1419 p : PAnsiChar;
1420 actualDataLen : Integer;
1421 bufferLength, bufferPos, lineLength, linePos : Integer;
1422 line : ansistring;
1423
NextLinenull1424 function NextLine() : Boolean;
1425 var
1426 locOldPos : Integer;
1427 locOldPointer : PAnsiChar;
1428 begin
1429 Result := False;
1430 locOldPointer := p;
1431 locOldPos := bufferPos;
1432 while (bufferPos < bufferLength) and (p^ <> #10) do begin
1433 Inc(p);
1434 Inc(bufferPos);
1435 end;
1436 if (locOldPos = bufferPos) and (p^ = #10) then begin
1437 lineLength := 0;
1438 Inc(p);
1439 Inc(bufferPos);
1440 linePos := 1;
1441 Result := True;
1442 end else if (locOldPos < bufferPos) then begin
1443 lineLength := (bufferPos - locOldPos);
1444 Move(locOldPointer^,line[1],lineLength);
1445 if (p^ = #10) then begin
1446 Dec(lineLength);
1447 Inc(p);
1448 Inc(bufferPos);
1449 end;
1450 linePos := 1;
1451 Result := True;
1452 end;
1453 end;
1454
NextTokennull1455 function NextToken() : ansistring;
1456 var
1457 k : Integer;
1458 begin
1459 k := linePos;
1460 if (linePos < lineLength) and (line[linePos] = '.') then begin
1461 Inc(linePos);
1462 while (linePos < lineLength) and (line[linePos] = '.') do begin
1463 Inc(linePos);
1464 end;
1465 Result := Copy(line,k,(linePos-k));
1466 exit;
1467 end;
1468 while (linePos < lineLength) and not(line[linePos] in [';','#','.']) do
1469 Inc(linePos);
1470 if (linePos > k) then begin
1471 if (line[linePos] in [';','#','.']) then
1472 Result := Copy(line,k,(linePos-k))
1473 else
1474 Result := Copy(line,k,(linePos-k+1));
1475 Result := Trim(Result);
1476 end else begin
1477 Result := '';
1478 end;
1479 //Inc(linePos);
1480 end;
1481
1482 procedure ParseLine();
1483 var
1484 locData : TCodePointRec;
1485 s : ansistring;
1486 begin
1487 s := NextToken();
1488 if (s = '') or (s[1] = '#') then
1489 exit;
1490 locData.CodePoint := StrToInt('$'+s);
1491 s := NextToken();
1492 if (s = '') or (s[1] in [';','#']) then begin
1493 locData.LineType := 0;
1494 end else begin
1495 if (s <> '..') then
1496 raise Exception.CreateFmt('Unknown line type : "%s"',[Copy(line,1,lineLength)]);
1497 locData.StartCodePoint := locData.CodePoint;
1498 locData.EndCodePoint := StrToInt('$'+NextToken());
1499 locData.LineType := 1;
1500 end;
1501 if (Length(ACodePointList) <= actualDataLen) then
1502 SetLength(ACodePointList,Length(ACodePointList)*2);
1503 ACodePointList[actualDataLen] := locData;
1504 Inc(actualDataLen);
1505 end;
1506
1507 procedure Prepare();
1508 begin
1509 SetLength(ACodePointList,DATA_LENGTH);
1510 actualDataLen := 0;
1511 bufferLength := ADataAStream.Size;
1512 bufferPos := 0;
1513 p := ADataAStream.Memory;
1514 lineLength := 0;
1515 SetLength(line,LINE_LENGTH);
1516 end;
1517
1518 begin
1519 Prepare();
1520 while NextLine() do
1521 ParseLine();
1522 SetLength(ACodePointList,actualDataLen);
1523 end;
1524
IsHangulSyllablenull1525 function IsHangulSyllable(
1526 const ACodePoint : TUnicodeCodePoint;
1527 const AHangulList : TCodePointRecArray
1528 ) : Boolean;
1529 var
1530 i : Integer;
1531 p : ^TCodePointRec;
1532 begin
1533 Result := False;
1534 p := @AHangulList[Low(AHangulList)];
1535 for i := Low(AHangulList) to High(AHangulList) do begin
1536 if ( (p^.LineType = 0) and (ACodePoint = p^.CodePoint) ) or
1537 ( (p^.LineType = 1) and (ACodePoint >= p^.StartCodePoint) and (ACodePoint <= p^.EndCodePoint) )
1538 then begin
1539 Result := True;
1540 Break;
1541 end;
1542 Inc(p);
1543 end;
1544 end;
1545
IndexOfnull1546 function IndexOf(
1547 const AProp : TPropRec;
1548 const APropList : TPropRecArray;
1549 const AActualLen : Integer
1550 ) : Integer;overload;
1551 var
1552 i : Integer;
1553 p : PPropRec;
1554 begin
1555 Result := -1;
1556 if (AActualLen > 0) then begin
1557 p := @APropList[0];
1558 for i := 0 to AActualLen - 1 do begin
1559 if (AProp.Category = p^.Category) and
1560 (AProp.CCC = p^.CCC) and
1561 (AProp.NumericIndex = p^.NumericIndex) and
1562 (AProp.SimpleUpperCase = p^.SimpleUpperCase) and
1563 (AProp.SimpleLowerCase = p^.SimpleLowerCase) and
1564 (AProp.WhiteSpace = p^.WhiteSpace) and
1565 //
1566 (AProp.DecompositionID = p^.DecompositionID) and
1567 (* ( (AProp.DecompositionID = -1 ) and (p^.DecompositionID = -1) ) or
1568 ( (AProp.DecompositionID <> -1 ) and (p^.DecompositionID <> -1) )
1569 *)
1570 (AProp.HangulSyllable = p^.HangulSyllable)
1571 then begin
1572 Result := i;
1573 Break;
1574 end;
1575 Inc(p);
1576 end;
1577 end;
1578 end;
1579
IndexOfnull1580 function IndexOf(
1581 const AItem : TUnicodeCodePointArray;
1582 const AList : TDecompositionArray
1583 ) : Integer;overload;
1584 var
1585 p : TUnicodeCodePointArray;
1586 i : Integer;
1587 begin
1588 Result := -1;
1589 if (Length(AList) = 0) then
1590 exit;
1591 for i := Low(AList) to High(AList) do begin
1592 p := AList[i];
1593 if (Length(p) = Length(AItem)) then begin
1594 if CompareMem(@p[0],@AItem[0],Length(AItem)*SizeOf(TUnicodeCodePoint)) then
1595 exit(i);
1596 end;
1597 end;
1598 Result := -1;
1599 end;
1600
IndexOfnull1601 function IndexOf(
1602 const AItem : TNumericValue;
1603 const AList : TNumericValueArray;
1604 const AActualLen : Integer
1605 ) : Integer;overload;
1606 var
1607 p : ^TNumericValue;
1608 i : Integer;
1609 begin
1610 Result := -1;
1611 if (AActualLen = 0) then
1612 exit;
1613 p := @AList[Low(AList)];
1614 for i := Low(AList) to AActualLen - 1 do begin
1615 if (AItem = p^) then
1616 exit(i);
1617 Inc(p);
1618 end;
1619 Result := -1;
1620 end;
1621
1622 procedure Parse_UnicodeData(
1623 ADataAStream : TMemoryStream;
1624 var APropList : TPropRecArray;
1625 var ANumericTable : TNumericValueArray;
1626 var ADataLineList : TDataLineRecArray;
1627 var ADecomposition : TDecompositionArray;
1628 const AHangulList : TCodePointRecArray;
1629 const AWhiteSpaces : TCodePointRecArray
1630 );
1631 const
1632 LINE_LENGTH = 1024;
1633 PROP_LENGTH = 5000;
1634 DATA_LENGTH = 25000;
1635 var
1636 p : PAnsiChar;
1637 bufferLength, bufferPos : Integer;
1638 actualPropLen, actualDataLen, actualNumLen : Integer;
1639 line : ansistring;
1640 lineLength, linePos : Integer;
1641
NextLinenull1642 function NextLine() : Boolean;
1643 var
1644 locOldPos : Integer;
1645 locOldPointer : PAnsiChar;
1646 begin
1647 Result := False;
1648 locOldPointer := p;
1649 locOldPos := bufferPos;
1650 while (bufferPos < bufferLength) and (p^ <> #10) do begin
1651 Inc(p);
1652 Inc(bufferPos);
1653 end;
1654 if (locOldPos < bufferPos) then begin
1655 lineLength := (bufferPos - locOldPos);
1656 Move(locOldPointer^,line[1],lineLength);
1657 if (p^ = #10) then begin
1658 Dec(lineLength);
1659 Inc(p);
1660 Inc(bufferPos);
1661 end;
1662 if (lineLength > 7) then begin
1663 linePos := 1;
1664 Result := True;
1665 end;
1666 end;
1667 end;
1668
NextTokennull1669 function NextToken() : ansistring;
1670 var
1671 k : Integer;
1672 begin
1673 k := linePos;
1674 while (linePos < lineLength) and not(line[linePos] in [';','#']) do
1675 Inc(linePos);
1676 if (linePos > k) then begin
1677 if (line[linePos] in [';','#']) then
1678 Result := Copy(line,k,(linePos-k))
1679 else
1680 Result := Copy(line,k,(linePos-k+1));
1681 Result := Trim(Result);
1682 end else begin
1683 Result := '';
1684 end;
1685 Inc(linePos);
1686 end;
1687
ParseCanonicalDecompositionnull1688 function ParseCanonicalDecomposition(AStr : ansistring) : TUnicodeCodePointArray;
1689 var
1690 locStr, ks : ansistring;
1691 k0,k : Integer;
1692 begin
1693 SetLength(Result,0);
1694 locStr := UpperCase(Trim(AStr));
1695 if (locStr = '') or (locStr[1] = '<') then
1696 exit;
1697 k0 := 1;
1698 k := 1;
1699 while (k <= Length(locStr)) do begin
1700 while (k <= Length(locStr)) and (locStr[k] in ['0'..'9','A'..'F']) do
1701 inc(k);
1702 ks := Trim(Copy(locStr,k0,k-k0));
1703 SetLength(Result,Length(Result)+1);
1704 Result[Length(Result)-1] := StringToCodePoint(ks);
1705 Inc(k);
1706 k0 := k;
1707 end;
1708 end;
1709
1710 procedure ParseLine();
1711 var
1712 locCP : TUnicodeCodePoint;
1713 locProp : TPropRec;
1714 locData : TDataLineRec;
1715 s : ansistring;
1716 locRangeStart, locRangeEnd : Boolean;
1717 k : Integer;
1718 locDecompItem : TUnicodeCodePointArray;
1719 numVal : TNumericValue;
1720 begin
1721 FillChar(locData,SizeOf(locData),#0);
1722 FillChar(locProp,SizeOf(locProp),#0);
1723 locCP := StrToInt('$'+NextToken());
1724 s := NextToken();
1725 locRangeStart := AnsiEndsText(', First>',s);
1726 if locRangeStart then
1727 locRangeEnd := False
1728 else
1729 locRangeEnd := AnsiEndsText(', Last>',s);
1730 if locRangeStart then begin
1731 locData.LineType := 1;
1732 locData.StartCodePoint := locCP;
1733 end else if locRangeEnd then begin
1734 ADataLineList[actualDataLen - 1].EndCodePoint := locCP;
1735 exit;
1736 //locData.EndCodePoint := locCP;
1737 end else begin
1738 locData.LineType := 0;
1739 locData.CodePoint := locCP;
1740 end;
1741 locProp.Category := StrToCategory(NextToken());
1742 locProp.CCC := StrToInt(NextToken());//Canonical_Combining_Class
1743 NextToken();//Bidi_Class
1744 s := NextToken();//Decomposition_Type
1745 locDecompItem := ParseCanonicalDecomposition(s);
1746 if (Length(locDecompItem) = 0) then
1747 locProp.DecompositionID := -1
1748 else begin
1749 locProp.DecompositionID := IndexOf(locDecompItem,ADecomposition);
1750 if (locProp.DecompositionID = -1) then begin
1751 k := Length(ADecomposition);
1752 locProp.DecompositionID := k;
1753 SetLength(ADecomposition,k+1);
1754 ADecomposition[k] := locDecompItem;
1755 end;
1756 end;
1757
1758 numVal := EvaluateFloat(NextToken());
1759 if (numVal <> Double(0.0)) then begin
1760 NextToken();
1761 NextToken();
1762 end else begin
1763 s := NextToken();
1764 if (s <> '') then
1765 numVal := EvaluateFloat(s);
1766 s := NextToken();
1767 if (numVal = Double(0.0)) then
1768 numVal := EvaluateFloat(s);
1769 end;
1770 k := IndexOf(numVal,ANumericTable,actualNumLen);
1771 if (k = -1) then begin
1772 if (actualNumLen >= Length(ANumericTable)) then
1773 SetLength(ANumericTable,(actualNumLen*2));
1774 ANumericTable[actualNumLen] := numVal;
1775 k := actualNumLen;
1776 Inc(actualNumLen);
1777 end;
1778 locProp.NumericIndex := k;
1779
1780 NextToken();//Bidi_Mirroed
1781 NextToken();//Unicode_l_Name
1782 NextToken();//ISO_Comment
1783 locProp.SimpleUpperCase := StringToCodePoint(NextToken());
1784 locProp.SimpleLowerCase := StringToCodePoint(NextToken());
1785 NextToken();//Simple_Title_Case_Mapping
1786 locProp.WhiteSpace := IsWhiteSpace(locCP,AWhiteSpaces);
1787 locProp.HangulSyllable := IsHangulSyllable(locCP,AHangulList);
1788 k := IndexOf(locProp,APropList,actualPropLen);
1789 if (k = -1) then begin
1790 k := actualPropLen;
1791 locProp.PropID := k{ + 1};
1792 APropList[k] := locProp;
1793 Inc(actualPropLen);
1794 end;
1795 locData.PropID := k;
1796 if (actualDataLen >= Length(ADataLineList)) then
1797 SetLength(ADataLineList,(2*Length(ADataLineList)));
1798 ADataLineList[actualDataLen] := locData;
1799 Inc(actualDataLen);
1800 end;
1801
1802 procedure Prepare();
1803 var
1804 r : TPropRec;
1805 begin
1806 SetLength(APropList,PROP_LENGTH);
1807 actualPropLen := 0;
1808 SetLength(ADataLineList,DATA_LENGTH);
1809 actualDataLen := 0;
1810 bufferLength := ADataAStream.Size;
1811 bufferPos := 0;
1812 p := ADataAStream.Memory;
1813 lineLength := 0;
1814 SetLength(line,LINE_LENGTH);
1815 SetLength(ANumericTable,500);
1816 actualNumLen := 0;
1817
1818 FillChar(r,SizeOf(r),#0);
1819 r.PropID := 0;
1820 r.Category := ucUnassigned;
1821 r.DecompositionID := -1;
1822 r.NumericIndex := 0;
1823 APropList[0] := r;
1824 Inc(actualPropLen);
1825 ANumericTable[0] := 0;
1826 Inc(actualNumLen);
1827 end;
1828
1829 begin
1830 Prepare();
1831 while NextLine() do
1832 ParseLine();
1833 SetLength(APropList,actualPropLen);
1834 SetLength(ADataLineList,actualDataLen);
1835 SetLength(ANumericTable,actualNumLen);
1836 end;
1837
GetPropIDnull1838 function GetPropID(
1839 ACodePoint : TUnicodeCodePoint;
1840 const ADataLineList : TDataLineRecArray
1841 ) : Cardinal;
1842 var
1843 i : Integer;
1844 p : PDataLineRec;
1845 begin
1846 Result := 0;
1847 p := @ADataLineList[Low(ADataLineList)];
1848 for i := Low(ADataLineList) to High(ADataLineList) do begin
1849 if (p^.LineType = 0) then begin
1850 if (p^.CodePoint = ACodePoint) then begin
1851 Result := p^.PropID;
1852 Break;
1853 end;
1854 end else begin
1855 if (p^.StartCodePoint <= ACodePoint) and (p^.EndCodePoint >= ACodePoint) then begin
1856 Result := p^.PropID;
1857 Break;
1858 end;
1859 end;
1860 Inc(p);
1861 end;
1862 end;
1863
1864 procedure MakeDecomposition(
1865 const ARawData : TDecompositionArray;
1866 var ABook : TDecompositionBook
1867 );
1868 var
1869 i, c, locPos : Integer;
1870 locItem : TUnicodeCodePointArray;
1871 begin
1872 c := 0;
1873 for i := Low(ARawData) to High(ARawData) do
1874 c := c + Length(ARawData[i]);
1875 SetLength(ABook.CodePoints,c);
1876 SetLength(ABook.Index,Length(ARawData));
1877 locPos := 0;
1878 for i := Low(ARawData) to High(ARawData) do begin
1879 locItem := ARawData[i];
1880 ABook.Index[i].StartPosition := locPos;
1881 ABook.Index[i].Length := Length(locItem);
1882 Move(locItem[0],ABook.CodePoints[locPos],(Length(locItem) * SizeOf(TUnicodeCodePoint)));
1883 locPos := locPos + Length(locItem);
1884 end;
1885 end;
1886
1887 type
1888 PBmpSecondTableItem = ^TBmpSecondTableItem;
IndexOfnull1889 function IndexOf(
1890 const AItem : PBmpSecondTableItem;
1891 const ATable : TBmpSecondTable;
1892 const ATableActualLength : Integer
1893 ) : Integer;overload;
1894 var
1895 i : Integer;
1896 p : PBmpSecondTableItem;
1897 begin
1898 Result := -1;
1899 if (ATableActualLength > 0) then begin
1900 p := @ATable[0];
1901 for i := 0 to ATableActualLength - 1 do begin
1902 if CompareMem(p,AItem,SizeOf(TBmpSecondTableItem)) then begin
1903 Result := i;
1904 Break;
1905 end;
1906 Inc(p);
1907 end;
1908 end;
1909 end;
1910
1911 procedure MakeBmpTables(
1912 var AFirstTable : TBmpFirstTable;
1913 var ASecondTable : TBmpSecondTable;
1914 const ADataLineList : TDataLineRecArray
1915 );
1916 var
1917 locLowByte, locHighByte : Byte;
1918 locTableItem : TBmpSecondTableItem;
1919 locCP : TUnicodeCodePoint;
1920 i, locSecondActualLen : Integer;
1921 begin
1922 SetLength(ASecondTable,120);
1923 locSecondActualLen := 0;
1924 for locHighByte := 0 to 255 do begin
1925 FillChar(locTableItem,SizeOf(locTableItem),#0);
1926 for locLowByte := 0 to 255 do begin
1927 locCP := (locHighByte * 256) + locLowByte;
1928 locTableItem[locLowByte] := GetPropID(locCP,ADataLineList)// - 1;
1929 end;
1930 i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen);
1931 if (i = -1) then begin
1932 if (locSecondActualLen = Length(ASecondTable)) then
1933 SetLength(ASecondTable,locSecondActualLen + 50);
1934 i := locSecondActualLen;
1935 ASecondTable[i] := locTableItem;
1936 Inc(locSecondActualLen);
1937 end;
1938 AFirstTable[locHighByte] := i;
1939 end;
1940 SetLength(ASecondTable,locSecondActualLen);
1941 end;
1942
1943 type
1944 P3lvlBmp3TableItem = ^T3lvlBmp3TableItem;
IndexOfnull1945 function IndexOf(
1946 const AItem : P3lvlBmp3TableItem;
1947 const ATable : T3lvlBmp3Table;
1948 const ATableActualLength : Integer
1949 ) : Integer;overload;
1950 var
1951 i : Integer;
1952 p : P3lvlBmp3TableItem;
1953 begin
1954 Result := -1;
1955 if (ATableActualLength > 0) then begin
1956 p := @ATable[0];
1957 for i := 0 to ATableActualLength - 1 do begin
1958 if CompareMem(p,AItem,SizeOf(T3lvlBmp3TableItem)) then begin
1959 Result := i;
1960 Break;
1961 end;
1962 Inc(p);
1963 end;
1964 end;
1965 end;
1966
1967 type
1968 P3lvlBmp2TableItem = ^T3lvlBmp2TableItem;
IndexOfnull1969 function IndexOf(
1970 const AItem : P3lvlBmp2TableItem;
1971 const ATable : T3lvlBmp2Table
1972 ) : Integer;overload;
1973 var
1974 i : Integer;
1975 p : P3lvlBmp2TableItem;
1976 begin
1977 Result := -1;
1978 if (Length(ATable) > 0) then begin
1979 p := @ATable[0];
1980 for i := 0 to Length(ATable) - 1 do begin
1981 if CompareMem(p,AItem,SizeOf(T3lvlBmp2TableItem)) then begin
1982 Result := i;
1983 Break;
1984 end;
1985 Inc(p);
1986 end;
1987 end;
1988 end;
1989 procedure MakeBmpTables3Levels(
1990 var AFirstTable : T3lvlBmp1Table;
1991 var ASecondTable : T3lvlBmp2Table;
1992 var AThirdTable : T3lvlBmp3Table;
1993 const ADataLineList : TDataLineRecArray
1994 );
1995 var
1996 locLowByte0, locLowByte1, locHighByte : Byte;
1997 locTableItem2 : T3lvlBmp2TableItem;
1998 locTableItem3 : T3lvlBmp3TableItem;
1999 locCP : TUnicodeCodePoint;
2000 i, locThirdActualLen : Integer;
2001 begin
2002 SetLength(AThirdTable,120);
2003 locThirdActualLen := 0;
2004 for locHighByte := 0 to 255 do begin
2005 FillChar(locTableItem2,SizeOf(locTableItem2),#0);
2006 for locLowByte0 := 0 to 15 do begin
2007 FillChar(locTableItem3,SizeOf(locTableItem3),#0);
2008 for locLowByte1 := 0 to 15 do begin
2009 locCP := (locHighByte * 256) + (locLowByte0*16) + locLowByte1;
2010 locTableItem3[locLowByte1] := GetPropID(locCP,ADataLineList);
2011 end;
2012 i := IndexOf(@locTableItem3,AThirdTable,locThirdActualLen);
2013 if (i = -1) then begin
2014 if (locThirdActualLen = Length(AThirdTable)) then
2015 SetLength(AThirdTable,locThirdActualLen + 50);
2016 i := locThirdActualLen;
2017 AThirdTable[i] := locTableItem3;
2018 Inc(locThirdActualLen);
2019 end;
2020 locTableItem2[locLowByte0] := i;
2021 end;
2022 i := IndexOf(@locTableItem2,ASecondTable);
2023 if (i = -1) then begin
2024 i := Length(ASecondTable);
2025 SetLength(ASecondTable,(i + 1));
2026 ASecondTable[i] := locTableItem2;
2027 end;
2028 AFirstTable[locHighByte] := i;
2029 end;
2030 SetLength(AThirdTable,locThirdActualLen);
2031 end;
2032
2033 procedure GenerateLicenceText(ADest : TStream);
2034 var
2035 s : ansistring;
2036 begin
2037 s := SLicenseText + sLineBreak + sLineBreak;
2038 ADest.Write(s[1],Length(s));
2039 end;
2040
2041 procedure GenerateBmpTables(
2042 ADest : TStream;
2043 var AFirstTable : TBmpFirstTable;
2044 var ASecondTable : TBmpSecondTable
2045 );
2046 procedure AddLine(const ALine : ansistring);
2047 var
2048 buffer : ansistring;
2049 begin
2050 buffer := ALine + sLineBreak;
2051 ADest.Write(buffer[1],Length(buffer));
2052 end;
2053
2054 var
2055 i, j, c : Integer;
2056 locLine : string;
2057 begin
2058 AddLine('const');
2059 AddLine(' UC_TABLE_1 : array[0..255] of Byte = (');
2060 locLine := '';
2061 for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
2062 locLine := locLine + IntToStr(AFirstTable[i]) + ',';
2063 if (((i+1) mod 16) = 0) then begin
2064 locLine := ' ' + locLine;
2065 AddLine(locLine);
2066 locLine := '';
2067 end;
2068 end;
2069 locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
2070 locLine := ' ' + locLine;
2071 AddLine(locLine);
2072 AddLine(' );' + sLineBreak);
2073
2074 AddLine(' UC_TABLE_2 : array[0..(256*' + IntToStr(Length(ASecondTable)) +'-1)] of Word =(');
2075 c := High(ASecondTable);
2076 for i := Low(ASecondTable) to c do begin
2077 locLine := '';
2078 for j := Low(TBmpSecondTableItem) to High(TBmpSecondTableItem) do begin
2079 locLine := locLine + IntToStr(ASecondTable[i][j]) + ',';
2080 if (((j+1) mod 16) = 0) then begin
2081 if (i = c) and (j = 255) then
2082 Delete(locLine,Length(locLine),1);
2083 locLine := ' ' + locLine;
2084 AddLine(locLine);
2085 locLine := '';
2086 end;
2087 end;
2088 end;
2089 AddLine(' );' + sLineBreak);
2090 end;
2091
2092 //----------------------------------
2093 procedure Generate3lvlBmpTables(
2094 ADest : TStream;
2095 var AFirstTable : T3lvlBmp1Table;
2096 var ASecondTable : T3lvlBmp2Table;
2097 var AThirdTable : T3lvlBmp3Table
2098 );
2099
2100 procedure AddLine(const ALine : ansistring);
2101 var
2102 buffer : ansistring;
2103 begin
2104 buffer := ALine + sLineBreak;
2105 ADest.Write(buffer[1],Length(buffer));
2106 end;
2107
2108 var
2109 i, j, c : Integer;
2110 locLine : string;
2111 begin
2112 AddLine('const');
2113 AddLine(' UC_TABLE_1 : array[0..255] of Byte = (');
2114 locLine := '';
2115 for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
2116 locLine := locLine + IntToStr(AFirstTable[i]) + ',';
2117 if (((i+1) mod 16) = 0) then begin
2118 locLine := ' ' + locLine;
2119 AddLine(locLine);
2120 locLine := '';
2121 end;
2122 end;
2123 locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
2124 locLine := ' ' + locLine;
2125 AddLine(locLine);
2126 AddLine(' );' + sLineBreak);
2127
2128 AddLine(' UC_TABLE_2 : array[0..' + IntToStr(Length(ASecondTable)-1) +'] of array[0..15] of Word = (');
2129 c := High(ASecondTable);
2130 for i := Low(ASecondTable) to c do begin
2131 locLine := '(';
2132 for j := Low(T3lvlBmp2TableItem) to High(T3lvlBmp2TableItem) do
2133 locLine := locLine + IntToStr(ASecondTable[i][j]) + ',';
2134 Delete(locLine,Length(locLine),1);
2135 locLine := ' ' + locLine + ')';
2136 if (i < c) then
2137 locLine := locLine + ',';
2138 AddLine(locLine);
2139 end;
2140 AddLine(' );' + sLineBreak);
2141
2142 AddLine(' UC_TABLE_3 : array[0..' + IntToStr(Length(AThirdTable)-1) +'] of array[0..15] of Word = (');
2143 c := High(AThirdTable);
2144 for i := Low(AThirdTable) to c do begin
2145 locLine := '(';
2146 for j := Low(T3lvlBmp3TableItem) to High(T3lvlBmp3TableItem) do
2147 locLine := locLine + IntToStr(AThirdTable[i][j]) + ',';
2148 Delete(locLine,Length(locLine),1);
2149 locLine := ' ' + locLine + ')';
2150 if (i < c) then
2151 locLine := locLine + ',';
2152 AddLine(locLine);
2153 end;
2154 AddLine(' );' + sLineBreak);
2155 end;
2156
UInt24ToStrnull2157 function UInt24ToStr(const AValue : UInt24; const AEndian : TEndianKind): string;inline;
2158 begin
2159 if (AEndian = ekBig) then
2160 Result := Format(
2161 '(byte2 : $%s; byte1 : $%s; byte0 : $%s;)',
2162 [ IntToHex(AValue.byte2,2), IntToHex(AValue.byte1,2),
2163 IntToHex(AValue.byte0,2)
2164 ]
2165 )
2166 else
2167 Result := Format(
2168 '(byte0 : $%s; byte1 : $%s; byte2 : $%s;)',
2169 [ IntToHex(AValue.byte0,2), IntToHex(AValue.byte1,2),
2170 IntToHex(AValue.byte2,2)
2171 ]
2172 );
2173 end;
2174
2175 procedure GeneratePropTable(
2176 ADest : TStream;
2177 const APropList : TPropRecArray;
2178 const AEndian : TEndianKind
2179 );
2180
2181 procedure AddLine(const ALine : ansistring);
2182 var
2183 buffer : ansistring;
2184 begin
2185 buffer := ALine + sLineBreak;
2186 ADest.Write(buffer[1],Length(buffer));
2187 end;
2188
2189 var
2190 i : Integer;
2191 locLine : string;
2192 p : PPropRec;
2193 begin
2194 AddLine('');
2195 AddLine('const');
2196 AddLine(' UC_PROP_REC_COUNT = ' + IntToStr(Length(APropList)) + ';');
2197 AddLine(' UC_PROP_ARRAY : array[0..(UC_PROP_REC_COUNT-1)] of TUC_Prop = (');
2198 p := @APropList[0];
2199 for i := Low(APropList) to High(APropList) - 1 do begin
2200 locLine := ' (CategoryData : ' + IntToStr(p^.CategoryData) + ';' +
2201 ' CCC : ' + IntToStr(p^.CCC) + ';' +
2202 ' NumericIndex : ' + IntToStr(p^.NumericIndex) + ';' +
2203 ' SimpleUpperCase : ' + UInt24ToStr(p^.SimpleUpperCase,AEndian) + ';' +
2204 ' SimpleLowerCase : ' + UInt24ToStr(p^.SimpleLowerCase,AEndian) + ';' +
2205 ' DecompositionID : ' + IntToStr(p^.DecompositionID) + '),';
2206 AddLine(locLine);
2207 Inc(p);
2208 end;
2209 locLine := //' (Category : TUnicodeCategory.' + GetEnumName(pti,Ord(p^.Category)) + ';' +
2210 ' (CategoryData : ' + IntToStr(p^.CategoryData) + ';' +
2211 ' CCC : ' + IntToStr(p^.CCC) + ';' +
2212 ' NumericIndex : ' + IntToStr(p^.NumericIndex) + ';' +
2213 ' SimpleUpperCase : ' + UInt24ToStr(p^.SimpleUpperCase,AEndian) + ';' +
2214 ' SimpleLowerCase : ' + UInt24ToStr(p^.SimpleLowerCase,AEndian) + ';' +
2215 ' DecompositionID : ' + IntToStr(p^.DecompositionID) + ')';
2216 AddLine(locLine);
2217 AddLine(' );' + sLineBreak);
2218 end;
2219
2220 procedure GenerateNumericTable(
2221 ADest : TStream;
2222 const ANumList : TNumericValueArray;
2223 const ACompleteUnit : Boolean
2224 );
2225
2226 procedure AddLine(const ALine : ansistring);
2227 var
2228 buffer : ansistring;
2229 begin
2230 buffer := ALine + sLineBreak;
2231 ADest.Write(buffer[1],Length(buffer));
2232 end;
2233
2234 var
2235 i : Integer;
2236 locLine : string;
2237 p : ^TNumericValue;
2238 begin
2239 if ACompleteUnit then begin
2240 GenerateLicenceText(ADest);
2241 AddLine('unit unicodenumtable;');
2242 AddLine('interface');
2243 AddLine('');
2244 end;
2245 AddLine('');
2246 AddLine('const');
2247 AddLine(' UC_NUMERIC_COUNT = ' + IntToStr(Length(ANumList)) + ';');
2248 AddLine(' UC_NUMERIC_ARRAY : array[0..(UC_NUMERIC_COUNT-1)] of Double = (');
2249 locLine := '';
2250 p := @ANumList[0];
2251 for i := Low(ANumList) to High(ANumList) - 1 do begin
2252 locLine := locLine + FloatToStr(p^,FS) + ' ,';
2253 if (i > 0) and ((i mod 8) = 0) then begin
2254 AddLine(' ' + locLine);
2255 locLine := '';
2256 end;
2257 Inc(p);
2258 end;
2259 locLine := locLine + FloatToStr(p^,FS);
2260 AddLine(' ' + locLine);
2261 AddLine(' );' + sLineBreak);
2262 if ACompleteUnit then begin
2263 AddLine('');
2264 AddLine('implementation');
2265 AddLine('');
2266 AddLine('end.');
2267 end;
2268 end;
2269
2270 procedure GenerateDecompositionBookTable(
2271 ADest : TStream;
2272 const ABook : TDecompositionBook;
2273 const AEndian : TEndianKind
2274 );
2275
2276 procedure AddLine(const ALine : ansistring);
2277 var
2278 buffer : ansistring;
2279 begin
2280 buffer := ALine + sLineBreak;
2281 ADest.Write(buffer[1],Length(buffer));
2282 end;
2283
2284 var
2285 i, k : Integer;
2286 p : ^TDecompositionIndexRec;
2287 cp : ^TUnicodeCodePoint;
2288 cp24 : UInt24;
2289 locLine : string;
2290 begin
2291 AddLine('const');
2292 AddLine(' UC_DEC_BOOK_INDEX_LENGTH = ' + IntToStr(Length(ABook.Index)) + ';');
2293 AddLine(' UC_DEC_BOOK_DATA_LENGTH = ' + IntToStr(Length(ABook.CodePoints)) + ';');
2294 AddLine('type');
2295 AddLine(' TDecompositionIndexRec = packed record');
2296 AddLine(' StartPosition : Word;');
2297 AddLine(' Length : Byte;');
2298 AddLine(' end;');
2299 AddLine(' TDecompositionBookRec = packed record');
2300 AddLine(' Index : array[0..(UC_DEC_BOOK_INDEX_LENGTH-1)] of TDecompositionIndexRec;');
2301 AddLine(' CodePoints : array[0..(UC_DEC_BOOK_DATA_LENGTH-1)] of UInt24;');
2302 AddLine(' end;');
2303 AddLine('const');
2304 AddLine(' UC_DEC_BOOK_DATA : TDecompositionBookRec = (');
2305 p := @ABook.Index[0];
2306 AddLine(' Index : (// Index BEGIN');
2307 k := 0;
2308 locLine := ' ';
2309 for i := Low(ABook.Index) to High(ABook.Index) - 1 do begin
2310 locLine := locLine + '(StartPosition : ' + IntToStr(p^.StartPosition) + ';' +
2311 ' Length : ' + IntToStr(p^.Length) + '), ';
2312 k := k + 1;
2313 if (k >= 2) then begin
2314 AddLine(locLine);
2315 locLine := ' ';
2316 k := 0;
2317 end;
2318 Inc(p);
2319 end;
2320 locLine := locLine + '(StartPosition : ' + IntToStr(p^.StartPosition) + ';' +
2321 ' Length : ' + IntToStr(p^.Length) + ')';
2322 AddLine(locLine);
2323 AddLine(' ); // Index END');
2324
2325 cp := @ABook.CodePoints[0];
2326 AddLine(' CodePoints : (// CodePoints BEGIN');
2327 k := 0;
2328 locLine := ' ';
2329 for i := Low(ABook.CodePoints) to High(ABook.CodePoints) - 1 do begin
2330 cp24 := cp^;
2331 locLine := locLine + Format('%s,',[UInt24ToStr(cp24,AEndian)]);
2332 Inc(k);
2333 if (k >= 16) then begin
2334 AddLine(locLine);
2335 k := 0;
2336 locLine := ' ';
2337 end;
2338 Inc(cp);
2339 end;
2340 cp24 := cp^;
2341 locLine := locLine + Format('%s',[UInt24ToStr(cp24,AEndian)]);
2342 AddLine(locLine);
2343 AddLine(' ); // CodePoints END');
2344 AddLine(' );' + sLineBreak);
2345 end;
2346
2347 procedure GenerateOutBmpTable(
2348 ADest : TStream;
2349 const AList : TDataLineRecArray
2350 );
2351 procedure AddLine(const ALine : ansistring);
2352 var
2353 buffer : ansistring;
2354 begin
2355 buffer := ALine + sLineBreak;
2356 ADest.Write(buffer[1],Length(buffer));
2357 end;
2358
2359 var
2360 i, j : Integer;
2361 locLine : string;
2362 p : PDataLineRec;
2363 begin
2364 AddLine('');
2365 //AddLine(' UC_PROP_REC_COUNT = ' + IntToStr(Length(APropList)) + ';');
2366 //AddLine(' UC_PROP_ARRAY : array[0..(UC_PROP_REC_COUNT-1)] of TUC_Prop = (');
2367 j := -1;
2368 p := @AList[0];
2369 for i := 0 to Length(AList) - 1 do begin
2370 if ((p^.LineType = 0) and (p^.CodePoint >$FFFF)) or
2371 (p^.StartCodePoint > $FFFF)
2372 then begin
2373 j := i;
2374 Break;
2375 end;
2376 Inc(p);
2377 end;
2378 if (j < 0) then
2379 exit;
2380
2381 for i := j to Length(AList) - 2 do begin
2382 locLine := ' (PropID : ' + IntToStr(p^.PropID) + ';' +
2383 ' CodePoint : ' + IntToStr(p^.CodePoint) + ';' +
2384 ' RangeEnd : ' + IntToStr(p^.EndCodePoint) + '),' ;
2385 AddLine(locLine);
2386 Inc(p);
2387 end;
2388 locLine := ' (PropID : ' + IntToStr(p^.PropID) + ';' +
2389 ' CodePoint : ' + IntToStr(p^.CodePoint) + ';' +
2390 ' RangeEnd : ' + IntToStr(p^.EndCodePoint) + ')' ;
2391 AddLine(locLine);
2392 AddLine(' );' + sLineBreak);
2393 end;
2394
Compressnull2395 function Compress(const AData : TDataLineRecArray) : TDataLineRecArray;
2396 var
2397 k, i, locResLen : Integer;
2398 q, p, pr : PDataLineRec;
2399 k_end : TUnicodeCodePoint;
2400 begin
2401 locResLen := 1;
2402 SetLength(Result,Length(AData));
2403 FillChar(Result[0],Length(Result),#0);
2404 Result[0] := AData[0];
2405 q := @AData[0];
2406 k := 0;
2407 while (k < Length(AData)) do begin
2408 if (q^.LineType = 0) then
2409 k_end := q^.CodePoint
2410 else
2411 k_end := q^.EndCodePoint;
2412 if ((k+1) = Length(AData)) then begin
2413 i := k;
2414 end else begin
2415 p := @AData[k+1];
2416 i := k +1;
2417 while (i < (Length(AData) {- 1})) do begin
2418 if (p^.PropID <> q^.PropID) then begin
2419 i := i - 1;
2420 Break;
2421 end;
2422 if (p^.LineType = 0) then begin
2423 if (p^.CodePoint <> (k_end + 1)) then begin
2424 i := i - 1;
2425 Break;
2426 end;
2427 Inc(k_end);
2428 end else begin
2429 if (p^.StartCodePoint <> (k_end + 1)) then begin
2430 i := i - 1;
2431 Break;
2432 end;
2433 k_end := p^.EndCodePoint;
2434 end;
2435 Inc(i);
2436 Inc(p);
2437 end;
2438 end;
2439 {if (i = k) then begin
2440 Result[locResLen] := q^;
2441 Inc(locResLen);
2442 end else begin }
2443 p := @AData[i];
2444 pr := @Result[locResLen];
2445 pr^.PropID := q^.PropID;
2446 if (q^.LineType = 0) then
2447 pr^.StartCodePoint := q^.CodePoint
2448 else
2449 pr^.StartCodePoint := q^.StartCodePoint;
2450 pr^.LineType := 1;
2451 if (p^.LineType = 0) then
2452 pr^.EndCodePoint := p^.CodePoint
2453 else
2454 pr^.EndCodePoint := p^.EndCodePoint;
2455 Inc(locResLen);
2456 //end;
2457 k := i + 1;
2458 if (k = Length(AData)) then
2459 Break;
2460 q := @AData[k];
2461 end;
2462 SetLength(Result,locResLen);
2463 end;
2464
2465 procedure ParseUCAFile(
2466 ADataAStream : TMemoryStream;
2467 var ABook : TUCA_DataBook
2468 );
2469 const
2470 LINE_LENGTH = 1024;
2471 DATA_LENGTH = 25000;
2472 var
2473 p : PAnsiChar;
2474 actualDataLen : Integer;
2475 bufferLength, bufferPos, lineLength, linePos : Integer;
2476 line : ansistring;
2477
NextLinenull2478 function NextLine() : Boolean;
2479 var
2480 locOldPos : Integer;
2481 locOldPointer : PAnsiChar;
2482 begin
2483 Result := False;
2484 locOldPointer := p;
2485 locOldPos := bufferPos;
2486 while (bufferPos < bufferLength) and (p^ <> #10) do begin
2487 Inc(p);
2488 Inc(bufferPos);
2489 end;
2490 if (locOldPos = bufferPos) and (p^ = #10) then begin
2491 lineLength := 0;
2492 Inc(p);
2493 Inc(bufferPos);
2494 linePos := 1;
2495 Result := True;
2496 end else if (locOldPos < bufferPos) then begin
2497 lineLength := (bufferPos - locOldPos) + 1;
2498 Move(locOldPointer^,line[1],lineLength);
2499 if (p^ = #10) then begin
2500 Dec(lineLength);
2501 Inc(p);
2502 Inc(bufferPos);
2503 end;
2504 linePos := 1;
2505 Result := True;
2506 end;
2507 end;
2508
2509 procedure SkipSpace();
2510 begin
2511 while (linePos < lineLength) and (line[linePos] in [' ',#9]) do
2512 Inc(linePos);
2513 end;
2514
NextTokennull2515 function NextToken() : ansistring;
2516 const C_SEPARATORS = [';','#','.','[',']','*','@'];
2517 var
2518 k : Integer;
2519 begin
2520 SkipSpace();
2521 k := linePos;
2522 if (linePos <= lineLength) and (line[linePos] in C_SEPARATORS) then begin
2523 Result := line[linePos];
2524 Inc(linePos);
2525 exit;
2526 end;
2527 while (linePos <= lineLength) and not(line[linePos] in (C_SEPARATORS+[' '])) do
2528 Inc(linePos);
2529 if (linePos > k) then begin
2530 if (line[Min(linePos,lineLength)] in C_SEPARATORS) then
2531 Result := Copy(line,k,(linePos-k))
2532 else
2533 Result := Copy(line,k,(linePos-k+1));
2534 Result := Trim(Result);
2535 end else begin
2536 Result := '';
2537 end;
2538 end;
2539
2540 procedure CheckToken(const AToken : string);
2541 var
2542 a, b : string;
2543 begin
2544 a := LowerCase(Trim(AToken));
2545 b := LowerCase(Trim(NextToken()));
2546 if (a <> b) then
2547 raise Exception.CreateFmt('Expected token "%s" but found "%s", Line = "%s".',[a,b,line]);
2548 end;
2549
ReadWeightBlocknull2550 function ReadWeightBlock(var ADest : TUCA_WeightRec) : Boolean;
2551 var
2552 s :AnsiString;
2553 k : Integer;
2554 begin
2555 Result := False;
2556 s := NextToken();
2557 if (s <> '[') then
2558 exit;
2559 s := NextToken();
2560 if (s = '.') then
2561 ADest.Variable := False
2562 else begin
2563 if (s <> '*') then
2564 raise Exception.CreateFmt('Expected "%s" but found "%s".',['*',s]);
2565 ADest.Variable := True;
2566 end;
2567 ADest.Weights[0] := StrToInt('$'+NextToken());
2568 for k := 1 to WEIGHT_LEVEL_COUNT-1 do begin
2569 CheckToken('.');
2570 ADest.Weights[k] := StrToInt('$'+NextToken());
2571 end;
2572 CheckToken(']');
2573 Result := True;
2574 end;
2575
2576 procedure ParseHeaderVar();
2577 var
2578 s,ss : string;
2579 k : Integer;
2580 begin
2581 s := NextToken();
2582 if (s = 'version') then begin
2583 ss := '';
2584 while True do begin
2585 s := NextToken();
2586 if (s = '') then
2587 Break;
2588 ss := ss + s;
2589 end;
2590 ABook.Version := ss;
2591 end else if (s = 'variable') then begin
2592 if (s = 'blanked') then
2593 ABook.VariableWeight := ucaBlanked
2594 else if (s = 'non-ignorable') then
2595 ABook.VariableWeight := ucaNonIgnorable
2596 else if (s = 'shifted') then
2597 ABook.VariableWeight := ucaShifted
2598 else if (s = 'shift-trimmed') then
2599 ABook.VariableWeight := ucaShiftedTrimmed
2600 else if (s = 'ignoresp') then
2601 ABook.VariableWeight := ucaIgnoreSP
2602 else
2603 raise Exception.CreateFmt('Unknown "@variable" type : "%s".',[s]);
2604 end else if (s = 'backwards') or (s = 'forwards') then begin
2605 ss := s;
2606 s := NextToken();
2607 k := StrToInt(s);
2608 if (k < 1) or (k > 4) then
2609 raise Exception.CreateFmt('Invalid "%s" position : %d.',[ss,s]);
2610 ABook.Backwards[k] := (s = 'backwards');
2611 end;
2612 end;
2613
2614 procedure ParseLine();
2615 var
2616 locData : ^TUCA_LineRec;
2617 s : ansistring;
2618 kc : Integer;
2619 begin
2620 if (Length(ABook.Lines) <= actualDataLen) then
2621 SetLength(ABook.Lines,Length(ABook.Lines)*2);
2622 locData := @ABook.Lines[actualDataLen];
2623 s := NextToken();
2624 if (s = '') or (s[1] = '#') then
2625 exit;
2626 if (s[1] = '@') then begin
2627 ParseHeaderVar();
2628 exit;
2629 end;
2630 SetLength(locData^.CodePoints,10);
2631 locData^.CodePoints[0] := StrToInt('$'+s);
2632 kc := 1;
2633 while True do begin
2634 s := Trim(NextToken());
2635 if (s = '') then
2636 exit;
2637 if (s = ';') then
2638 Break;
2639 locData^.CodePoints[kc] := StrToInt('$'+s);
2640 Inc(kc);
2641 end;
2642 if (kc = 0) then
2643 exit;
2644 SetLength(locData^.CodePoints,kc);
2645 SetLength(locData^.Weights,24);
2646 kc := 0;
2647 while ReadWeightBlock(locData^.Weights[kc]) do begin
2648 Inc(kc);
2649 end;
2650 SetLength(locData^.Weights,kc);
2651 Inc(actualDataLen);
2652 end;
2653
2654 procedure Prepare();
2655 var
2656 k : Integer;
2657 begin
2658 ABook.VariableWeight := ucaShifted;
2659 for k := Low(ABook.Backwards) to High(ABook.Backwards) do
2660 ABook.Backwards[k] := False;
2661 SetLength(ABook.Lines,DATA_LENGTH);
2662 actualDataLen := 0;
2663 bufferLength := ADataAStream.Size;
2664 bufferPos := 0;
2665 p := ADataAStream.Memory;
2666 lineLength := 0;
2667 SetLength(line,LINE_LENGTH);
2668 end;
2669
2670 begin
2671 Prepare();
2672 while NextLine() do
2673 ParseLine();
2674 SetLength(ABook.Lines,actualDataLen);
2675 end;
2676
2677 procedure Dump(X : array of TUnicodeCodePoint; const ATitle : string = '');
2678 var
2679 i : Integer;
2680 begin
2681 Write(ATitle, ' ');
2682 for i := 0 to Length(X) - 1 do
2683 Write(X[i],' ');
2684 WriteLn();
2685 end;
2686
IsGreaterThannull2687 function IsGreaterThan(A, B : PUCA_LineRec) : Integer;
2688 var
2689 i, hb : Integer;
2690 begin
2691 if (A=B) then
2692 exit(0);
2693 Result := 1;
2694 hb := Length(B^.CodePoints) - 1;
2695 for i := 0 to Length(A^.CodePoints) - 1 do begin
2696 if (i > hb) then
2697 exit;
2698 if (A^.CodePoints[i] < B^.CodePoints[i]) then
2699 exit(-1);
2700 if (A^.CodePoints[i] > B^.CodePoints[i]) then
2701 exit(1);
2702 end;
2703 if (Length(A^.CodePoints) = Length(B^.CodePoints)) then
2704 exit(0);
2705 exit(-1);
2706 end;
2707
2708 procedure QuickSort(
2709 var AList : TUCA_DataBookIndex;
2710 L, R : Longint;
2711 ABook : PUCA_DataBook
2712 );overload;
2713 var
2714 I, J : Longint;
2715 P, Q : Integer;
2716 begin
2717 repeat
2718 I := L;
2719 J := R;
2720 P := AList[ (L + R) div 2 ];
2721 repeat
2722 while IsGreaterThan(@ABook^.Lines[P], @ABook^.Lines[AList[i]]) > 0 do
2723 I := I + 1;
2724 while IsGreaterThan(@ABook^.Lines[P], @ABook^.Lines[AList[J]]) < 0 do
2725 J := J - 1;
2726 If I <= J then
2727 begin
2728 Q := AList[I];
2729 AList[I] := AList[J];
2730 AList[J] := Q;
2731 I := I + 1;
2732 J := J - 1;
2733 end;
2734 until I > J;
2735 // sort the smaller range recursively
2736 // sort the bigger range via the loop
2737 // Reasons: memory usage is O(log(n)) instead of O(n) and loop is faster than recursion
2738 if J - L < R - I then
2739 begin
2740 if L < J then
2741 QuickSort(AList, L, J, ABook);
2742 L := I;
2743 end
2744 else
2745 begin
2746 if I < R then
2747 QuickSort(AList, I, R, ABook);
2748 R := J;
2749 end;
2750 until L >= R;
2751 end;
2752
CreateIndexnull2753 function CreateIndex(ABook : PUCA_DataBook) : TUCA_DataBookIndex;
2754 var
2755 r : TUCA_DataBookIndex;
2756 i, c : Integer;
2757 begin
2758 c := Length(ABook^.Lines);
2759 SetLength(r,c);
2760 for i := 0 to c - 1 do
2761 r[i] := i;
2762 QuickSort(r,0,c-1,ABook);
2763 Result := r;
2764 end;
2765
2766 function ConstructContextTree(
2767 const AContext : PUCA_LineContextRec;
2768 var ADestBuffer;
2769 const ADestBufferLength : Cardinal
2770 ) : PUCA_PropItemContextTreeRec;forward;
ConstructItemnull2771 function ConstructItem(
2772 AItem : PUCA_PropItemRec;
2773 ACodePoint : Cardinal;
2774 AValid : Byte;
2775 AChildCount : Byte;
2776 const AWeights : array of TUCA_WeightRec;
2777 const AStoreCP : Boolean;
2778 const AContext : PUCA_LineContextRec;
2779 const ADeleted : Boolean
2780 ) : Cardinal;
2781 var
2782 i : Integer;
2783 p : PUCA_PropItemRec;
2784 pw : PUCA_PropWeights;
2785 pb : PByte;
2786 hasContext : Boolean;
2787 contextTree : PUCA_PropItemContextTreeRec;
2788 wl : Integer;
2789 begin
2790 p := AItem;
2791 p^.Size := 0;
2792 p^.Flags := 0;
2793 p^.WeightLength := 0;
2794 SetBit(p^.Flags,AItem^.FLAG_VALID,(AValid <> 0));
2795 p^.ChildCount := AChildCount;
2796 hasContext := (AContext <> nil) and (Length(AContext^.Data) > 0);
2797 if hasContext then
2798 wl := 0
2799 else
2800 wl := Length(AWeights);
2801 p^.WeightLength := wl;
2802 if (wl = 0) then begin
2803 Result := SizeOf(TUCA_PropItemRec);
2804 if ADeleted then
2805 SetBit(AItem^.Flags,AItem^.FLAG_DELETION,True);
2806 end else begin
2807 Result := SizeOf(TUCA_PropItemRec) + (wl*SizeOf(TUCA_PropWeights));
2808 pb := PByte(PtrUInt(p) + SizeOf(TUCA_PropItemRec));
2809 Unaligned(PWord(pb)^) := AWeights[0].Weights[0];
2810 pb := pb + 2;
2811 if (AWeights[0].Weights[1] > High(Byte)) then begin
2812 Unaligned(PWord(pb)^) := AWeights[0].Weights[1];
2813 pb := pb + 2;
2814 end else begin
2815 SetBit(p^.Flags,p^.FLAG_COMPRESS_WEIGHT_1,True);
2816 pb^ := AWeights[0].Weights[1];
2817 pb := pb + 1;
2818 Result := Result - 1;
2819 end;
2820 if (AWeights[0].Weights[2] > High(Byte)) then begin
2821 Unaligned(PWord(pb)^) := AWeights[0].Weights[2];
2822 pb := pb + 2;
2823 end else begin
2824 SetBit(p^.Flags,p^.FLAG_COMPRESS_WEIGHT_2,True);
2825 pb^ := AWeights[0].Weights[2];
2826 pb := pb + 1;
2827 Result := Result - 1;
2828 end;
2829 pw := PUCA_PropWeights(pb);
2830 for i := 1 to wl - 1 do begin
2831 pw^.Weights[0] := AWeights[i].Weights[0];
2832 pw^.Weights[1] := AWeights[i].Weights[1];
2833 pw^.Weights[2] := AWeights[i].Weights[2];
2834 //pw^.Variable := BoolToByte(AWeights[i].Variable);
2835 Inc(pw);
2836 end;
2837 end;
2838 hasContext := (AContext <> nil) and (Length(AContext^.Data) > 0);
2839 if AStoreCP or hasContext then begin
2840 Unaligned(PUInt24(PtrUInt(AItem)+Result)^) := ACodePoint;
2841 Result := Result + SizeOf(UInt24);
2842 SetBit(AItem^.Flags,AItem^.FLAG_CODEPOINT,True);
2843 end;
2844 if hasContext then begin
2845 contextTree := ConstructContextTree(AContext,Unaligned(Pointer(PtrUInt(AItem)+Result)^),MaxInt);
2846 Result := Result + Cardinal(contextTree^.Size);
2847 SetBit(AItem^.Flags,AItem^.FLAG_CONTEXTUAL,True);
2848 end;
2849 p^.Size := Result;
2850 end;
2851
CalcCharChildCountnull2852 function CalcCharChildCount(
2853 const ASearchStartPos : Integer;
2854 const ALinePos : Integer;
2855 const ABookLines : PUCA_LineRec;
2856 const AMaxLength : Integer;
2857 const ABookIndex : TUCA_DataBookIndex;
2858 out ALineCount : Word
2859 ) : Byte;
2860 var
2861 locLinePos : Integer;
2862 p : PUCA_LineRec;
2863
2864 procedure IncP();
2865 begin
2866 Inc(locLinePos);
2867 p := @ABookLines[ABookIndex[locLinePos]];
2868 end;
2869
2870 var
2871 i, locTargetLen, locTargetBufferSize, r : Integer;
2872 locTarget : array[0..127] of Cardinal;
2873 locLastChar : Cardinal;
2874 begin
2875 locLinePos := ALinePos;
2876 p := @ABookLines[ABookIndex[locLinePos]];
2877 locTargetLen := ASearchStartPos;
2878 locTargetBufferSize := (locTargetLen*SizeOf(Cardinal));
2879 Move(p^.CodePoints[0],locTarget[0],locTargetBufferSize);
2880 if (Length(p^.CodePoints) = ASearchStartPos) then begin
2881 r := 0;
2882 locLastChar := High(Cardinal);
2883 end else begin
2884 r := 1;
2885 locLastChar := p^.CodePoints[ASearchStartPos];
2886 end;
2887 i := 1;
2888 while (i < AMaxLength) do begin
2889 IncP();
2890 if (Length(p^.CodePoints) < locTargetLen) then
2891 Break;
2892 if not CompareMem(@locTarget[0],@p^.CodePoints[0],locTargetBufferSize) then
2893 Break;
2894 if (p^.CodePoints[ASearchStartPos] <> locLastChar) then begin
2895 Inc(r);
2896 locLastChar := p^.CodePoints[ASearchStartPos];
2897 end;
2898 Inc(i);
2899 end;
2900 ALineCount := i;
2901 Result := r;
2902 end;
2903
BuildTrienull2904 function BuildTrie(
2905 const ALinePos : Integer;
2906 const ABookLines : PUCA_LineRec;
2907 const AMaxLength : Integer;
2908 const ABookIndex : TUCA_DataBookIndex
2909 ) : PTrieNode;
2910 var
2911 p : PUCA_LineRec;
2912 root : PTrieNode;
2913 ki, k, i : Integer;
2914 key : array of TKeyType;
2915 begin
2916 k := ABookIndex[ALinePos];
2917 p := @ABookLines[k];
2918 if (Length(p^.CodePoints) = 1) then
2919 root := CreateNode(p^.CodePoints[0],k)
2920 else
2921 root := CreateNode(p^.CodePoints[0]);
2922
2923 for i := ALinePos to ALinePos + AMaxLength - 1 do begin
2924 k := ABookIndex[i];
2925 p := @ABookLines[k];
2926 if (Length(p^.CodePoints) = 1) then begin
2927 InsertWord(root,p^.CodePoints[0],k);
2928 end else begin
2929 SetLength(key,Length(p^.CodePoints));
2930 for ki := 0 to Length(p^.CodePoints) - 1 do
2931 key[ki] := p^.CodePoints[ki];
2932 InsertWord(root,key,k);
2933 end;
2934 end;
2935 Result := root;
2936 end;
2937
BoolToBytenull2938 function BoolToByte(AValue : Boolean): Byte;inline;
2939 begin
2940 if AValue then
2941 Result := 1
2942 else
2943 Result := 0;
2944 end;
2945
InternalConstructFromTrienull2946 function InternalConstructFromTrie(
2947 const ATrie : PTrieNode;
2948 const AItem : PUCA_PropItemRec;
2949 const ALines : PUCA_LineRec;
2950 const AStoreCp : Boolean
2951 ) : Cardinal;
2952 var
2953 i : Integer;
2954 size : Cardinal;
2955 p : PUCA_PropItemRec;
2956 n : PTrieNode;
2957 begin
2958 if (ATrie = nil) then
2959 exit(0);
2960 p := AItem;
2961 n := ATrie;
2962 if n^.DataNode then
2963 size := ConstructItem(p,n^.Key,1,n^.ChildCount,ALines[n^.Data].Weights,AStoreCp,@(ALines[n^.Data].Context),ALines[n^.Data].Deleted)
2964 else
2965 size := ConstructItem(p,n^.Key,0,n^.ChildCount,[],AStoreCp,nil,False);
2966 Result := size;
2967 if (n^.ChildCount > 0) then begin
2968 for i := 0 to n^.ChildCount - 1 do begin
2969 p := PUCA_PropItemRec(PtrUInt(p) + size);
2970 size := InternalConstructFromTrie(n^.Children[i],p,ALines,True);
2971 Result := Result + size;
2972 end;
2973 end;
2974 AItem^.Size := Result;
2975 end;
2976
ConstructFromTrienull2977 function ConstructFromTrie(
2978 const ATrie : PTrieNode;
2979 const AItem : PUCA_PropItemRec;
2980 const ALines : PUCA_LineRec
2981 ) : Integer;
2982 begin
2983 Result := InternalConstructFromTrie(ATrie,AItem,ALines,False);
2984 end;
2985
2986 procedure MakeUCA_Props(
2987 ABook : PUCA_DataBook;
2988 out AProps : PUCA_PropBook
2989 );
2990 var
2991 propIndexCount : Integer;
2992
2993 procedure CapturePropIndex(AItem : PUCA_PropItemRec; ACodePoint : Cardinal);
2994 begin
2995 AProps^.Index[propIndexCount].CodePoint := ACodePoint;
2996 AProps^.Index[propIndexCount].Position := PtrUInt(AItem) - PtrUInt(AProps^.Items);
2997 propIndexCount := propIndexCount + 1;
2998 end;
2999
3000 var
3001 locIndex : TUCA_DataBookIndex;
3002 i, c, k, kc : Integer;
3003 p, p1, p2 : PUCA_PropItemRec;
3004 lines, pl1, pl2 : PUCA_LineRec;
3005 childCount, lineCount : Word;
3006 size : Cardinal;
3007 trieRoot : PTrieNode;
3008 MaxChildCount, MaxSize : Cardinal;
3009 childList : array of PUCA_PropItemRec;
3010 begin
3011 locIndex := CreateIndex(ABook);
3012 i := Length(ABook^.Lines);
3013 i := 30 * i * (SizeOf(TUCA_PropItemRec) + SizeOf(TUCA_PropWeights));
3014 AProps := AllocMem(SizeOf(TUCA_PropBook));
3015 AProps^.ItemSize := i;
3016 AProps^.Items := AllocMem(i);
3017 propIndexCount := 0;
3018 SetLength(AProps^.Index,Length(ABook^.Lines));
3019 p := AProps^.Items;
3020 lines := @ABook^.Lines[0];
3021 c := Length(locIndex);
3022 i := 0;
3023 MaxChildCount := 0; MaxSize := 0;
3024 while (i < (c-1)) do begin
3025 pl1 := @lines[locIndex[i]];
3026 if not pl1^.Stored then begin
3027 i := i + 1;
3028 Continue;
3029 end;
3030 pl2 := @lines[locIndex[i+1]];
3031 if (pl1^.CodePoints[0] <> pl2^.CodePoints[0]) then begin
3032 if (Length(pl1^.CodePoints) = 1) then begin
3033 size := ConstructItem(p,pl1^.CodePoints[0],1,0,pl1^.Weights,False,@pl1^.Context,pl1^.Deleted);
3034 CapturePropIndex(p,pl1^.CodePoints[0]);
3035 p := PUCA_PropItemRec(PtrUInt(p) + size);
3036 if (size > MaxSize) then
3037 MaxSize := size;
3038 end else begin
3039 kc := Length(pl1^.CodePoints);
3040 SetLength(childList,kc);
3041 for k := 0 to kc - 2 do begin
3042 size := ConstructItem(p,pl1^.CodePoints[k],0,1,[],(k>0),nil,False);
3043 if (k = 0) then
3044 CapturePropIndex(p,pl1^.CodePoints[k]);
3045 childList[k] := p;
3046 p := PUCA_PropItemRec(PtrUInt(p) + size);
3047 end;
3048 size := ConstructItem(p,pl1^.CodePoints[kc-1],1,0,pl1^.Weights,True,@pl1^.Context,pl1^.Deleted);
3049 childList[kc-1] := p;
3050 p := PUCA_PropItemRec(PtrUInt(p) + size);
3051 for k := kc - 2 downto 0 do begin
3052 p1 := childList[k];
3053 p2 := childList[k+1];
3054 p1^.Size := p1^.Size + p2^.Size;
3055 end;
3056 if (p1^.Size > MaxSize) then
3057 MaxSize := p1^.Size;
3058 end;
3059 lineCount := 1;
3060 end else begin
3061 childCount := CalcCharChildCount(1,i,lines,c,locIndex,lineCount);
3062 if (childCount < 1) then
3063 raise Exception.CreateFmt('Expected "child count > 1" but found %d.',[childCount]);
3064 if (lineCount < 2) then
3065 raise Exception.CreateFmt('Expected "line count > 2" but found %d.',[lineCount]);
3066 if (childCount > MaxChildCount) then
3067 MaxChildCount := childCount;
3068 trieRoot := BuildTrie(i,lines,lineCount,locIndex);
3069 size := ConstructFromTrie(trieRoot,p,lines);
3070 CapturePropIndex(p,pl1^.CodePoints[0]);
3071 FreeNode(trieRoot);
3072 p := PUCA_PropItemRec(PtrUInt(p) + size);
3073 if (size > MaxSize) then
3074 MaxSize := size;
3075 end;
3076 i := i + lineCount;
3077 end;
3078 if (i = (c-1)) then begin
3079 pl1 := @lines[locIndex[i]];
3080 if (Length(pl1^.CodePoints) = 1) then begin
3081 size := ConstructItem(p,pl1^.CodePoints[0],1,0,pl1^.Weights,False,@pl1^.Context,pl1^.Deleted);
3082 CapturePropIndex(p,pl1^.CodePoints[0]);
3083 p := PUCA_PropItemRec(PtrUInt(p) + size);
3084 if (size > MaxSize) then
3085 MaxSize := size;
3086 end else begin
3087 kc := Length(pl1^.CodePoints);
3088 SetLength(childList,kc);
3089 for k := 0 to kc - 2 do begin
3090 size := ConstructItem(p,pl1^.CodePoints[k],0,1,[],(k>0),@pl1^.Context,pl1^.Deleted);
3091 if (k = 0) then
3092 CapturePropIndex(p,pl1^.CodePoints[0]);
3093 childList[k] := p;
3094 p := PUCA_PropItemRec(PtrUInt(p) + size);
3095 end;
3096 size := ConstructItem(p,pl1^.CodePoints[kc-1],1,0,pl1^.Weights,True,@pl1^.Context,pl1^.Deleted);
3097 childList[kc-1] := p;
3098 p := PUCA_PropItemRec(PtrUInt(p) + size);
3099 for i := kc - 2 downto 0 do begin
3100 p1 := childList[i];
3101 p2 := childList[i+1];
3102 p1^.Size := p1^.Size + p2^.Size;
3103 end;
3104 if (size > MaxSize) then
3105 MaxSize := size;
3106 end;
3107 end;
3108 //c := Int64(PtrUInt(p)) - Int64(PtrUInt(AProps^.Items));
3109 c := UInt64(PtrUInt(p)) - UInt64(PtrUInt(AProps^.Items));
3110 ReAllocMem(AProps^.Items,c);
3111 AProps^.ItemSize := c;
3112 SetLength(AProps^.Index,propIndexCount);
3113 AProps^.ItemsOtherEndian := AllocMem(AProps^.ItemSize);
3114 ReverseFromNativeEndian(AProps^.Items,AProps^.ItemSize,AProps^.ItemsOtherEndian);
3115
3116 k := 0;
3117 c := High(Word);
3118 for i := 0 to Length(ABook^.Lines) - 1 do begin
3119 if (Length(ABook^.Lines[i].Weights) > 0) then begin
3120 if (ABook^.Lines[i].Weights[0].Variable) then begin
3121 if (ABook^.Lines[i].Weights[0].Weights[0] > k) then
3122 k := ABook^.Lines[i].Weights[0].Weights[0];
3123 if (ABook^.Lines[i].Weights[0].Weights[0] < c) then
3124 c := ABook^.Lines[i].Weights[0].Weights[0];
3125 end;
3126 end;
3127 end;
3128 AProps^.VariableHighLimit := k;
3129 AProps^.VariableLowLimit := c;
3130 end;
3131
3132 procedure FreeUcaBook(var ABook : PUCA_PropBook);
3133 var
3134 p : PUCA_PropBook;
3135 begin
3136 if (ABook = nil) then
3137 exit;
3138 p := ABook;
3139 ABook := nil;
3140 p^.Index := nil;
3141 FreeMem(p^.Items,p^.ItemSize);
3142 FreeMem(p^.ItemsOtherEndian,p^.ItemSize);
3143 FreeMem(p,SizeOf(p^));
3144 end;
3145
IndexOfnull3146 function IndexOf(const ACodePoint : Cardinal; APropBook : PUCA_PropBook): Integer;overload;
3147 var
3148 i : Integer;
3149 begin
3150 for i := 0 to Length(APropBook^.Index) - 1 do begin
3151 if (ACodePoint = APropBook^.Index[i].CodePoint) then
3152 exit(i);
3153 end;
3154 Result := -1;
3155 end;
3156
3157 type
3158 PucaBmpSecondTableItem = ^TucaBmpSecondTableItem;
IndexOfnull3159 function IndexOf(
3160 const AItem : PucaBmpSecondTableItem;
3161 const ATable : TucaBmpSecondTable;
3162 const ATableActualLength : Integer
3163 ) : Integer;overload;
3164 var
3165 i : Integer;
3166 p : PucaBmpSecondTableItem;
3167 begin
3168 Result := -1;
3169 if (ATableActualLength > 0) then begin
3170 p := @ATable[0];
3171 for i := 0 to ATableActualLength - 1 do begin
3172 if CompareMem(p,AItem,SizeOf(TucaBmpSecondTableItem)) then begin
3173 Result := i;
3174 Break;
3175 end;
3176 Inc(p);
3177 end;
3178 end;
3179 end;
3180
3181 procedure MakeUCA_BmpTables(
3182 var AFirstTable : TucaBmpFirstTable;
3183 var ASecondTable : TucaBmpSecondTable;
3184 const APropBook : PUCA_PropBook
3185 );
3186 var
3187 locLowByte, locHighByte : Byte;
3188 locTableItem : TucaBmpSecondTableItem;
3189 locCP : TUnicodeCodePoint;
3190 i, locSecondActualLen : Integer;
3191 k : Integer;
3192 begin
3193 SetLength(ASecondTable,120);
3194 locSecondActualLen := 0;
3195 for locHighByte := 0 to 255 do begin
3196 FillChar(locTableItem,SizeOf(locTableItem),#0);
3197 for locLowByte := 0 to 255 do begin
3198 locCP := (locHighByte * 256) + locLowByte;
3199 k := IndexOf(locCP,APropBook);
3200 if (k = -1) then
3201 k := 0
3202 else
3203 k := APropBook^.Index[k].Position + 1;
3204 locTableItem[locLowByte] := k;
3205 end;
3206 i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen);
3207 if (i = -1) then begin
3208 if (locSecondActualLen = Length(ASecondTable)) then
3209 SetLength(ASecondTable,locSecondActualLen + 50);
3210 i := locSecondActualLen;
3211 ASecondTable[i] := locTableItem;
3212 Inc(locSecondActualLen);
3213 end;
3214 AFirstTable[locHighByte] := i;
3215 end;
3216 SetLength(ASecondTable,locSecondActualLen);
3217 end;
3218
ToUCS4null3219 function ToUCS4(const AHighS, ALowS : Word) : TUnicodeCodePoint; inline;
3220 begin
3221 //copied from utf16toutf32
3222 Result := (UCS4Char(AHighS)-$d800) shl 10 + (UCS4Char(ALowS)-$dc00) + $10000;
3223 end;
3224
3225 procedure FromUCS4(const AValue : TUnicodeCodePoint; var AHighS, ALowS : Word);
3226 begin
3227 AHighS := Word((AValue - $10000) shr 10 + $d800);
3228 ALowS := Word((AValue - $10000) and $3ff + $dc00);
3229 end;
3230
3231 type
3232 PucaOBmpSecondTableItem = ^TucaOBmpSecondTableItem;
IndexOfnull3233 function IndexOf(
3234 const AItem : PucaOBmpSecondTableItem;
3235 const ATable : TucaOBmpSecondTable;
3236 const ATableActualLength : Integer
3237 ) : Integer;overload;
3238 var
3239 i : Integer;
3240 p : PucaOBmpSecondTableItem;
3241 begin
3242 Result := -1;
3243 if (ATableActualLength > 0) then begin
3244 p := @ATable[0];
3245 for i := 0 to ATableActualLength - 1 do begin
3246 if CompareMem(p,AItem,SizeOf(TucaOBmpSecondTableItem)) then begin
3247 Result := i;
3248 Break;
3249 end;
3250 Inc(p);
3251 end;
3252 end;
3253 end;
3254
3255 procedure MakeUCA_OBmpTables(
3256 var AFirstTable : TucaOBmpFirstTable;
3257 var ASecondTable : TucaOBmpSecondTable;
3258 const APropBook : PUCA_PropBook
3259 );
3260 var
3261 locLowByte, locHighByte : Word;
3262 locTableItem : TucaOBmpSecondTableItem;
3263 locCP : TUnicodeCodePoint;
3264 i, locSecondActualLen : Integer;
3265 k : Integer;
3266 begin
3267 if (Length(ASecondTable) = 0) then
3268 SetLength(ASecondTable,2000);
3269 locSecondActualLen := 0;
3270 for locHighByte := 0 to HIGH_SURROGATE_COUNT - 1 do begin
3271 FillChar(locTableItem,SizeOf(locTableItem),#0);
3272 for locLowByte := 0 to LOW_SURROGATE_COUNT - 1 do begin
3273 locCP := ToUCS4(HIGH_SURROGATE_BEGIN + locHighByte,LOW_SURROGATE_BEGIN + locLowByte);
3274 k := IndexOf(locCP,APropBook);
3275 if (k = -1) then
3276 k := 0
3277 else
3278 k := APropBook^.Index[k].Position + 1;
3279 locTableItem[locLowByte] := k;
3280 end;
3281 i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen);
3282 if (i = -1) then begin
3283 if (locSecondActualLen = Length(ASecondTable)) then
3284 SetLength(ASecondTable,locSecondActualLen + 50);
3285 i := locSecondActualLen;
3286 ASecondTable[i] := locTableItem;
3287 Inc(locSecondActualLen);
3288 end;
3289 AFirstTable[locHighByte] := i;
3290 end;
3291 SetLength(ASecondTable,locSecondActualLen);
3292 end;
3293
GetPropPositionnull3294 function GetPropPosition(
3295 const AHighS,
3296 ALowS : Word;
3297 const AFirstTable : PucaOBmpFirstTable;
3298 const ASecondTable : PucaOBmpSecondTable
3299 ): Integer;inline;overload;
3300 begin
3301 Result := ASecondTable^[AFirstTable^[AHighS-HIGH_SURROGATE_BEGIN]][ALowS-LOW_SURROGATE_BEGIN] - 1;
3302 end;
3303
3304 procedure GenerateUCA_Head(
3305 ADest : TStream;
3306 ABook : PUCA_DataBook;
3307 AProps : PUCA_PropBook
3308 );
3309
3310 procedure AddLine(const ALine : ansistring);
3311 var
3312 buffer : ansistring;
3313 begin
3314 buffer := ALine + sLineBreak;
3315 ADest.Write(buffer[1],Length(buffer));
3316 end;
3317
3318 begin
3319 AddLine('const');
3320 //AddLine(' VERSION_STRING = ' + QuotedStr(ABook^.Version) + ';');
3321 AddLine(' VARIABLE_LOW_LIMIT = ' + IntToStr(AProps^.VariableLowLimit) + ';');
3322 AddLine(' VARIABLE_HIGH_LIMIT = ' + IntToStr(AProps^.VariableHighLimit) + ';');
3323 AddLine(' VARIABLE_WEIGHT = ' + IntToStr(Ord(ABook^.VariableWeight)) + ';');
3324 AddLine(' BACKWARDS_0 = ' + BoolToStr(ABook^.Backwards[0],'True','False') + ';');
3325 AddLine(' BACKWARDS_1 = ' + BoolToStr(ABook^.Backwards[1],'True','False') + ';');
3326 AddLine(' BACKWARDS_2 = ' + BoolToStr(ABook^.Backwards[2],'True','False') + ';');
3327 AddLine(' BACKWARDS_3 = ' + BoolToStr(ABook^.Backwards[3],'True','False') + ';');
3328 AddLine(' PROP_COUNT = ' + IntToStr(Ord(AProps^.ItemSize)) + ';');
3329
3330 AddLine('');
3331 end;
3332
3333 procedure GenerateUCA_BmpTables(
3334 AStream,
3335 ANativeEndianStream,
3336 ANonNativeEndianStream : TStream;
3337 var AFirstTable : TucaBmpFirstTable;
3338 var ASecondTable : TucaBmpSecondTable
3339 );
3340
3341 procedure AddLine(AOut : TStream; const ALine : ansistring);
3342 var
3343 buffer : ansistring;
3344 begin
3345 buffer := ALine + sLineBreak;
3346 AOut.Write(buffer[1],Length(buffer));
3347 end;
3348
3349 var
3350 i, j, c : Integer;
3351 locLine : string;
3352 value : UInt24;
3353 begin
3354 AddLine(AStream,'const');
3355 AddLine(AStream,' UCA_TABLE_1 : array[0..255] of Byte = (');
3356 locLine := '';
3357 for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
3358 locLine := locLine + IntToStr(AFirstTable[i]) + ',';
3359 if (((i+1) mod 16) = 0) then begin
3360 locLine := ' ' + locLine;
3361 AddLine(AStream,locLine);
3362 locLine := '';
3363 end;
3364 end;
3365 locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
3366 locLine := ' ' + locLine;
3367 AddLine(AStream,locLine);
3368 AddLine(AStream,' );' + sLineBreak);
3369
3370 AddLine(ANativeEndianStream,'const');
3371 AddLine(ANativeEndianStream,' UCA_TABLE_2 : array[0..(256*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =(');
3372 c := High(ASecondTable);
3373 for i := Low(ASecondTable) to c do begin
3374 locLine := '';
3375 for j := Low(TucaBmpSecondTableItem) to High(TucaBmpSecondTableItem) do begin
3376 value := ASecondTable[i][j];
3377 locLine := locLine + UInt24ToStr(value,ENDIAN_NATIVE) + ',';
3378 if (((j+1) mod 2) = 0) then begin
3379 if (i = c) and (j = 255) then
3380 Delete(locLine,Length(locLine),1);
3381 locLine := ' ' + locLine;
3382 AddLine(ANativeEndianStream,locLine);
3383 locLine := '';
3384 end;
3385 end;
3386 end;
3387 AddLine(ANativeEndianStream,' );' + sLineBreak);
3388
3389 AddLine(ANonNativeEndianStream,'const');
3390 AddLine(ANonNativeEndianStream,' UCA_TABLE_2 : array[0..(256*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =(');
3391 c := High(ASecondTable);
3392 for i := Low(ASecondTable) to c do begin
3393 locLine := '';
3394 for j := Low(TucaBmpSecondTableItem) to High(TucaBmpSecondTableItem) do begin
3395 value := ASecondTable[i][j];
3396 locLine := locLine + UInt24ToStr(value,ENDIAN_NON_NATIVE) + ',';
3397 if (((j+1) mod 2) = 0) then begin
3398 if (i = c) and (j = 255) then
3399 Delete(locLine,Length(locLine),1);
3400 locLine := ' ' + locLine;
3401 AddLine(ANonNativeEndianStream,locLine);
3402 locLine := '';
3403 end;
3404 end;
3405 end;
3406 AddLine(ANonNativeEndianStream,' );' + sLineBreak);
3407 end;
3408
3409 procedure GenerateBinaryUCA_BmpTables(
3410 ANativeEndianStream,
3411 ANonNativeEndianStream : TStream;
3412 var AFirstTable : TucaBmpFirstTable;
3413 var ASecondTable : TucaBmpSecondTable
3414 );
3415 var
3416 i, j : Integer;
3417 value : UInt24;
3418 begin
3419 ANativeEndianStream.Write(AFirstTable[0],Length(AFirstTable));
3420 ANonNativeEndianStream.Write(AFirstTable[0],Length(AFirstTable));
3421 for i := Low(ASecondTable) to High(ASecondTable) do begin
3422 for j := Low(TucaBmpSecondTableItem) to High(TucaBmpSecondTableItem) do begin
3423 value := ASecondTable[i][j];
3424 ANativeEndianStream.Write(value,SizeOf(value));
3425 ReverseBytes(value,SizeOf(value));
3426 ANonNativeEndianStream.Write(value,SizeOf(value));
3427 end;
3428 end;
3429 end;
3430
3431 procedure GenerateUCA_PropTable(
3432 // WARNING : files must be generated for each endianess (Little / Big)
3433 ADest : TStream;
3434 const APropBook : PUCA_PropBook;
3435 const AEndian : TEndianKind
3436 );
3437
3438 procedure AddLine(const ALine : ansistring);
3439 var
3440 buffer : ansistring;
3441 begin
3442 buffer := ALine + sLineBreak;
3443 ADest.Write(buffer[1],Length(buffer));
3444 end;
3445
3446 var
3447 i, c : Integer;
3448 locLine : string;
3449 p : PByte;
3450 begin
3451 c := APropBook^.ItemSize;
3452 AddLine('const');
3453 AddLine(' UCA_PROPS : array[0..' + IntToStr(c-1) + '] of Byte = (');
3454 locLine := '';
3455 if (AEndian = ENDIAN_NATIVE) then
3456 p := PByte(APropBook^.Items)
3457 else
3458 p := PByte(APropBook^.ItemsOtherEndian);
3459 for i := 0 to c - 2 do begin
3460 locLine := locLine + IntToStr(p[i]) + ',';
3461 if (((i+1) mod 60) = 0) then begin
3462 locLine := ' ' + locLine;
3463 AddLine(locLine);
3464 locLine := '';
3465 end;
3466 end;
3467 locLine := locLine + IntToStr(p[c-1]);
3468 locLine := ' ' + locLine;
3469 AddLine(locLine);
3470 AddLine(' );' + sLineBreak);
3471 end;
3472
3473 procedure GenerateBinaryUCA_PropTable(
3474 // WARNING : files must be generated for each endianess (Little / Big)
3475 ANativeEndianStream,
3476 ANonNativeEndianStream : TStream;
3477 const APropBook : PUCA_PropBook
3478 );
3479 begin
3480 ANativeEndianStream.Write(APropBook^.Items^,APropBook^.ItemSize);
3481 ANonNativeEndianStream.Write(APropBook^.ItemsOtherEndian^,APropBook^.ItemSize);
3482 end;
3483
3484 procedure GenerateUCA_OBmpTables(
3485 AStream,
3486 ANativeEndianStream,
3487 ANonNativeEndianStream : TStream;
3488 var AFirstTable : TucaOBmpFirstTable;
3489 var ASecondTable : TucaOBmpSecondTable
3490 );
3491
3492 procedure AddLine(AOut : TStream; const ALine : ansistring);
3493 var
3494 buffer : ansistring;
3495 begin
3496 buffer := ALine + sLineBreak;
3497 AOut.Write(buffer[1],Length(buffer));
3498 end;
3499
3500 var
3501 i, j, c : Integer;
3502 locLine : string;
3503 value : UInt24;
3504 begin
3505 AddLine(AStream,'const');
3506 AddLine(AStream,' UCAO_TABLE_1 : array[0..' + IntToStr(HIGH_SURROGATE_COUNT-1) + '] of Word = (');
3507 locLine := '';
3508 for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
3509 locLine := locLine + IntToStr(AFirstTable[i]) + ',';
3510 if (((i+1) mod 16) = 0) then begin
3511 locLine := ' ' + locLine;
3512 AddLine(AStream,locLine);
3513 locLine := '';
3514 end;
3515 end;
3516 locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
3517 locLine := ' ' + locLine;
3518 AddLine(AStream,locLine);
3519 AddLine(AStream,' );' + sLineBreak);
3520
3521 AddLine(ANativeEndianStream,' UCAO_TABLE_2 : array[0..('+IntToStr(LOW_SURROGATE_COUNT)+'*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =(');
3522 c := High(ASecondTable);
3523 for i := Low(ASecondTable) to c do begin
3524 locLine := '';
3525 for j := Low(TucaOBmpSecondTableItem) to High(TucaOBmpSecondTableItem) do begin
3526 value := ASecondTable[i][j];
3527 locLine := locLine + UInt24ToStr(value,ENDIAN_NATIVE) + ',';
3528 if (((j+1) mod 2) = 0) then begin
3529 if (i = c) and (j = High(TucaOBmpSecondTableItem)) then
3530 Delete(locLine,Length(locLine),1);
3531 locLine := ' ' + locLine;
3532 AddLine(ANativeEndianStream,locLine);
3533 locLine := '';
3534 end;
3535 end;
3536 end;
3537 AddLine(ANativeEndianStream,' );' + sLineBreak);
3538
3539 AddLine(ANonNativeEndianStream,' UCAO_TABLE_2 : array[0..('+IntToStr(LOW_SURROGATE_COUNT)+'*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =(');
3540 c := High(ASecondTable);
3541 for i := Low(ASecondTable) to c do begin
3542 locLine := '';
3543 for j := Low(TucaOBmpSecondTableItem) to High(TucaOBmpSecondTableItem) do begin
3544 value := ASecondTable[i][j];
3545 locLine := locLine + UInt24ToStr(value,ENDIAN_NON_NATIVE) + ',';
3546 if (((j+1) mod 2) = 0) then begin
3547 if (i = c) and (j = High(TucaOBmpSecondTableItem)) then
3548 Delete(locLine,Length(locLine),1);
3549 locLine := ' ' + locLine;
3550 AddLine(ANonNativeEndianStream,locLine);
3551 locLine := '';
3552 end;
3553 end;
3554 end;
3555 AddLine(ANonNativeEndianStream,' );' + sLineBreak);
3556 end;
3557
3558 procedure GenerateBinaryUCA_OBmpTables(
3559 ANativeEndianStream,
3560 ANonNativeEndianStream : TStream;
3561 var AFirstTable : TucaOBmpFirstTable;
3562 var ASecondTable : TucaOBmpSecondTable
3563 );
3564 var
3565 i, j : Integer;
3566 locLine : string;
3567 wordValue : Word;
3568 value : UInt24;
3569 begin
3570 for i := Low(AFirstTable) to High(AFirstTable) do begin
3571 wordValue := AFirstTable[i];
3572 ANativeEndianStream.Write(wordValue,SizeOf(wordValue));
3573 ReverseBytes(wordValue,SizeOf(wordValue));
3574 ANonNativeEndianStream.Write(wordValue,SizeOf(wordValue));
3575 end;
3576
3577 for i := Low(ASecondTable) to High(ASecondTable) do begin
3578 for j := Low(TucaOBmpSecondTableItem) to High(TucaOBmpSecondTableItem) do begin
3579 value := ASecondTable[i][j];
3580 ANativeEndianStream.Write(value,SizeOf(value));
3581 ReverseBytes(value,SizeOf(value));
3582 ANonNativeEndianStream.Write(value,SizeOf(value));
3583 end;
3584 end;
3585 end;
3586
3587 type
3588 POBmpSecondTableItem = ^TOBmpSecondTableItem;
IndexOfnull3589 function IndexOf(
3590 const AItem : POBmpSecondTableItem;
3591 const ATable : TOBmpSecondTable;
3592 const ATableActualLength : Integer
3593 ) : Integer;overload;
3594 var
3595 i : Integer;
3596 p : POBmpSecondTableItem;
3597 begin
3598 Result := -1;
3599 if (ATableActualLength > 0) then begin
3600 p := @ATable[0];
3601 for i := 0 to ATableActualLength - 1 do begin
3602 if CompareMem(p,AItem,SizeOf(TOBmpSecondTableItem)) then begin
3603 Result := i;
3604 Break;
3605 end;
3606 Inc(p);
3607 end;
3608 end;
3609 end;
3610
3611 procedure MakeOBmpTables(
3612 var AFirstTable : TOBmpFirstTable;
3613 var ASecondTable : TOBmpSecondTable;
3614 const ADataLineList : TDataLineRecArray
3615 );
3616 var
3617 locLowByte, locHighByte : Word;
3618 locTableItem : TOBmpSecondTableItem;
3619 locCP : TUnicodeCodePoint;
3620 i, locSecondActualLen : Integer;
3621 begin
3622 SetLength(ASecondTable,2000);
3623 locSecondActualLen := 0;
3624 for locHighByte := 0 to HIGH_SURROGATE_COUNT - 1 do begin
3625 FillChar(locTableItem,SizeOf(locTableItem),#0);
3626 for locLowByte := 0 to LOW_SURROGATE_COUNT - 1 do begin
3627 locCP := ToUCS4(HIGH_SURROGATE_BEGIN + locHighByte,LOW_SURROGATE_BEGIN + locLowByte);
3628 locTableItem[locLowByte] := GetPropID(locCP,ADataLineList)// - 1;
3629 end;
3630 i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen);
3631 if (i = -1) then begin
3632 if (locSecondActualLen = Length(ASecondTable)) then
3633 SetLength(ASecondTable,locSecondActualLen + 50);
3634 i := locSecondActualLen;
3635 ASecondTable[i] := locTableItem;
3636 Inc(locSecondActualLen);
3637 end;
3638 AFirstTable[locHighByte] := i;
3639 end;
3640 SetLength(ASecondTable,locSecondActualLen);
3641 end;
3642
3643 type
3644 P3lvlOBmp3TableItem = ^T3lvlOBmp3TableItem;
IndexOfnull3645 function IndexOf(
3646 const AItem : P3lvlOBmp3TableItem;
3647 const ATable : T3lvlOBmp3Table;
3648 const ATableActualLength : Integer
3649 ) : Integer;overload;
3650 var
3651 i : Integer;
3652 p : P3lvlOBmp3TableItem;
3653 begin
3654 Result := -1;
3655 if (ATableActualLength > 0) then begin
3656 p := @ATable[0];
3657 for i := 0 to ATableActualLength - 1 do begin
3658 if CompareMem(p,AItem,SizeOf(T3lvlOBmp3TableItem)) then begin
3659 Result := i;
3660 Break;
3661 end;
3662 Inc(p);
3663 end;
3664 end;
3665 end;
3666
3667 type
3668 P3lvlOBmp2TableItem = ^T3lvlOBmp2TableItem;
IndexOfnull3669 function IndexOf(
3670 const AItem : P3lvlOBmp2TableItem;
3671 const ATable : T3lvlOBmp2Table
3672 ) : Integer;overload;
3673 var
3674 i : Integer;
3675 p : P3lvlOBmp2TableItem;
3676 begin
3677 Result := -1;
3678 if (Length(ATable) > 0) then begin
3679 p := @ATable[0];
3680 for i := 0 to Length(ATable) - 1 do begin
3681 if CompareMem(p,AItem,SizeOf(T3lvlOBmp2TableItem)) then begin
3682 Result := i;
3683 Break;
3684 end;
3685 Inc(p);
3686 end;
3687 end;
3688 end;
3689 procedure MakeOBmpTables3Levels(
3690 var AFirstTable : T3lvlOBmp1Table;
3691 var ASecondTable : T3lvlOBmp2Table;
3692 var AThirdTable : T3lvlOBmp3Table;
3693 const ADataLineList : TDataLineRecArray
3694 );
3695 var
3696 locLowByte0, locLowByte1, locHighByte : Word;
3697 locTableItem2 : T3lvlOBmp2TableItem;
3698 locTableItem3 : T3lvlOBmp3TableItem;
3699 locCP : TUnicodeCodePoint;
3700 i, locThirdActualLen : Integer;
3701 begin
3702 SetLength(AThirdTable,120);
3703 locThirdActualLen := 0;
3704 for locHighByte := 0 to 1023 do begin
3705 FillChar(locTableItem2,SizeOf(locTableItem2),#0);
3706 for locLowByte0 := 0 to 31 do begin
3707 FillChar(locTableItem3,SizeOf(locTableItem3),#0);
3708 for locLowByte1 := 0 to 31 do begin
3709 locCP := ToUCS4(HIGH_SURROGATE_BEGIN + locHighByte,LOW_SURROGATE_BEGIN + (locLowByte0*32) + locLowByte1);
3710 locTableItem3[locLowByte1] := GetPropID(locCP,ADataLineList);
3711 end;
3712 i := IndexOf(@locTableItem3,AThirdTable,locThirdActualLen);
3713 if (i = -1) then begin
3714 if (locThirdActualLen = Length(AThirdTable)) then
3715 SetLength(AThirdTable,locThirdActualLen + 50);
3716 i := locThirdActualLen;
3717 AThirdTable[i] := locTableItem3;
3718 Inc(locThirdActualLen);
3719 end;
3720 locTableItem2[locLowByte0] := i;
3721 end;
3722 i := IndexOf(@locTableItem2,ASecondTable);
3723 if (i = -1) then begin
3724 i := Length(ASecondTable);
3725 SetLength(ASecondTable,(i + 1));
3726 ASecondTable[i] := locTableItem2;
3727 end;
3728 AFirstTable[locHighByte] := i;
3729 end;
3730 SetLength(AThirdTable,locThirdActualLen);
3731 end;
3732
3733 procedure GenerateOBmpTables(
3734 ADest : TStream;
3735 var AFirstTable : TOBmpFirstTable;
3736 var ASecondTable : TOBmpSecondTable
3737 );
3738 procedure AddLine(const ALine : ansistring);
3739 var
3740 buffer : ansistring;
3741 begin
3742 buffer := ALine + sLineBreak;
3743 ADest.Write(buffer[1],Length(buffer));
3744 end;
3745
3746 var
3747 i, j, c : Integer;
3748 locLine : string;
3749 begin
3750 AddLine('const');
3751 AddLine(' UCO_TABLE_1 : array[0..' + IntToStr(HIGH_SURROGATE_COUNT-1) + '] of Word = (');
3752 locLine := '';
3753 for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
3754 locLine := locLine + IntToStr(AFirstTable[i]) + ',';
3755 if (((i+1) mod 16) = 0) then begin
3756 locLine := ' ' + locLine;
3757 AddLine(locLine);
3758 locLine := '';
3759 end;
3760 end;
3761 locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
3762 locLine := ' ' + locLine;
3763 AddLine(locLine);
3764 AddLine(' );' + sLineBreak);
3765
3766 AddLine(' UCO_TABLE_2 : array[0..('+IntToStr(LOW_SURROGATE_COUNT)+'*' + IntToStr(Length(ASecondTable)) +'-1)] of Word =(');
3767 c := High(ASecondTable);
3768 for i := Low(ASecondTable) to c do begin
3769 locLine := '';
3770 for j := Low(TOBmpSecondTableItem) to High(TOBmpSecondTableItem) do begin
3771 locLine := locLine + IntToStr(ASecondTable[i][j]) + ',';
3772 if (((j+1) mod 16) = 0) then begin
3773 if (i = c) and (j = High(TOBmpSecondTableItem)) then
3774 Delete(locLine,Length(locLine),1);
3775 locLine := ' ' + locLine;
3776 AddLine(locLine);
3777 locLine := '';
3778 end;
3779 end;
3780 end;
3781 AddLine(' );' + sLineBreak);
3782 end;
3783
3784
3785 //----------------------------------
3786 procedure Generate3lvlOBmpTables(
3787 ADest : TStream;
3788 var AFirstTable : T3lvlOBmp1Table;
3789 var ASecondTable : T3lvlOBmp2Table;
3790 var AThirdTable : T3lvlOBmp3Table
3791 );
3792
3793 procedure AddLine(const ALine : ansistring);
3794 var
3795 buffer : ansistring;
3796 begin
3797 buffer := ALine + sLineBreak;
3798 ADest.Write(buffer[1],Length(buffer));
3799 end;
3800
3801 var
3802 i, j, c : Integer;
3803 locLine : string;
3804 begin
3805 AddLine('const');
3806 AddLine(' UCO_TABLE_1 : array[0..1023] of Word = (');
3807 locLine := '';
3808 for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
3809 locLine := locLine + IntToStr(AFirstTable[i]) + ',';
3810 if (((i+1) mod 16) = 0) then begin
3811 locLine := ' ' + locLine;
3812 AddLine(locLine);
3813 locLine := '';
3814 end;
3815 end;
3816 locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
3817 locLine := ' ' + locLine;
3818 AddLine(locLine);
3819 AddLine(' );' + sLineBreak);
3820
3821 AddLine(' UCO_TABLE_2 : array[0..' + IntToStr(Length(ASecondTable)-1) +'] of array[0..31] of Word = (');
3822 c := High(ASecondTable);
3823 for i := Low(ASecondTable) to c do begin
3824 locLine := '(';
3825 for j := Low(T3lvlOBmp2TableItem) to High(T3lvlOBmp2TableItem) do
3826 locLine := locLine + IntToStr(ASecondTable[i][j]) + ',';
3827 Delete(locLine,Length(locLine),1);
3828 locLine := ' ' + locLine + ')';
3829 if (i < c) then
3830 locLine := locLine + ',';
3831 AddLine(locLine);
3832 end;
3833 AddLine(' );' + sLineBreak);
3834
3835 AddLine(' UCO_TABLE_3 : array[0..' + IntToStr(Length(AThirdTable)-1) +'] of array[0..31] of Word = (');
3836 c := High(AThirdTable);
3837 for i := Low(AThirdTable) to c do begin
3838 locLine := '(';
3839 for j := Low(T3lvlOBmp3TableItem) to High(T3lvlOBmp3TableItem) do
3840 locLine := locLine + IntToStr(AThirdTable[i][j]) + ',';
3841 Delete(locLine,Length(locLine),1);
3842 locLine := ' ' + locLine + ')';
3843 if (i < c) then
3844 locLine := locLine + ',';
3845 AddLine(locLine);
3846 end;
3847 AddLine(' );' + sLineBreak);
3848 end;
3849
GetPropnull3850 function GetProp(
3851 const AHighS,
3852 ALowS : Word;
3853 const AProps : TPropRecArray;
3854 var AFirstTable : TOBmpFirstTable;
3855 var ASecondTable : TOBmpSecondTable
3856 ): PPropRec;
3857 begin
3858 Result := @AProps[ASecondTable[AFirstTable[AHighS-HIGH_SURROGATE_BEGIN]][ALowS-LOW_SURROGATE_BEGIN]];
3859 end;
3860
GetPropnull3861 function GetProp(
3862 const AHighS,
3863 ALowS : Word;
3864 const AProps : TPropRecArray;
3865 var AFirstTable : T3lvlOBmp1Table;
3866 var ASecondTable : T3lvlOBmp2Table;
3867 var AThirdTable : T3lvlOBmp3Table
3868 ): PPropRec;
3869 begin
3870 Result := @AProps[AThirdTable[ASecondTable[AFirstTable[AHighS]][ALowS div 32]][ALowS mod 32]];
3871 //Result := @AProps[ASecondTable[AFirstTable[AHighS-HIGH_SURROGATE_BEGIN]][ALowS-LOW_SURROGATE_BEGIN]];
3872 end;
3873
3874 { TUCA_PropItemContextTreeRec }
3875
TUCA_PropItemContextTreeRec.GetDatanull3876 function TUCA_PropItemContextTreeRec.GetData : PUCA_PropItemContextTreeNodeRec;
3877 begin
3878 if (Size = 0) then
3879 Result := nil
3880 else
3881 Result := PUCA_PropItemContextTreeNodeRec(
3882 PtrUInt(
3883 PtrUInt(@Self) + SizeOf(UInt24){Size}
3884 )
3885 );
3886 end;
3887
3888 { TUCA_LineContextRec }
3889
3890 procedure TUCA_LineContextRec.Clear;
3891 begin
3892 Data := nil
3893 end;
3894
3895 procedure TUCA_LineContextRec.Assign(ASource : PUCA_LineContextRec);
3896 var
3897 c, i : Integer;
3898 begin
3899 if (ASource = nil) then begin
3900 Clear();
3901 exit;
3902 end;
3903 c := Length(ASource^.Data);
3904 SetLength(Self.Data,c);
3905 for i := 0 to c-1 do
3906 Self.Data[i].Assign(@ASource^.Data[i]);
3907 end;
3908
Clonenull3909 function TUCA_LineContextRec.Clone : TUCA_LineContextRec;
3910 begin
3911 Result.Clear();
3912 Result.Assign(@Self);
3913 end;
3914
3915 { TUCA_LineContextItemRec }
3916
3917 procedure TUCA_LineContextItemRec.Clear();
3918 begin
3919 CodePoints := nil;
3920 Weights := nil;
3921 end;
3922
3923 procedure TUCA_LineContextItemRec.Assign(ASource : PUCA_LineContextItemRec);
3924 begin
3925 if (ASource = nil) then begin
3926 Clear();
3927 exit;
3928 end;
3929 Self.CodePoints := Copy(ASource^.CodePoints);
3930 Self.Weights := Copy(ASource^.Weights);
3931 end;
3932
TUCA_LineContextItemRec.Clonenull3933 function TUCA_LineContextItemRec.Clone() : TUCA_LineContextItemRec;
3934 begin
3935 Result.Clear();
3936 Result.Assign(@Self);
3937 end;
3938
3939 { TUCA_LineRec }
3940
3941 procedure TUCA_LineRec.Clear;
3942 begin
3943 CodePoints := nil;
3944 Weights := nil;
3945 Deleted := False;
3946 Stored := False;
3947 Context.Clear();
3948 end;
3949
3950 procedure TUCA_LineRec.Assign(ASource : PUCA_LineRec);
3951 begin
3952 if (ASource = nil) then begin
3953 Clear();
3954 exit;
3955 end;
3956 Self.CodePoints := Copy(ASource^.CodePoints);
3957 Self.Weights := Copy(ASource^.Weights);
3958 Self.Deleted := ASource^.Deleted;
3959 Self.Stored := ASource^.Stored;
3960 Self.Context.Assign(@ASource^.Context);
3961 end;
3962
TUCA_LineRec.Clonenull3963 function TUCA_LineRec.Clone : TUCA_LineRec;
3964 begin
3965 Result.Clear();
3966 Result.Assign(@Self);
3967 end;
3968
TUCA_LineRec.HasContextnull3969 function TUCA_LineRec.HasContext() : Boolean;
3970 begin
3971 Result := (Length(Context.Data) > 0);
3972 end;
3973
3974 { TPropRec }
3975
TPropRec.GetCategorynull3976 function TPropRec.GetCategory: TUnicodeCategory;
3977 begin
3978 Result := TUnicodeCategory((CategoryData and Byte($F8)) shr 3);
3979 end;
3980
3981 procedure TPropRec.SetCategory(AValue: TUnicodeCategory);
3982 var
3983 b : Byte;
3984 begin
3985 b := Ord(AValue);
3986 b := b shl 3;
3987 CategoryData := CategoryData or b;
3988 //CategoryData := CategoryData or Byte(Byte(Ord(AValue)) shl 3);
3989 end;
3990
TPropRec.GetWhiteSpacenull3991 function TPropRec.GetWhiteSpace: Boolean;
3992 begin
3993 Result := IsBitON(CategoryData,0);
3994 end;
3995
3996 procedure TPropRec.SetWhiteSpace(AValue: Boolean);
3997 begin
3998 SetBit(CategoryData,0,AValue);
3999 end;
4000
TPropRec.GetHangulSyllablenull4001 function TPropRec.GetHangulSyllable: Boolean;
4002 begin
4003 Result := IsBitON(CategoryData,1);
4004 end;
4005
4006 procedure TPropRec.SetHangulSyllable(AValue: Boolean);
4007 begin
4008 SetBit(CategoryData,1,AValue);
4009 end;
4010
4011 { TUCA_PropItemRec }
4012
TUCA_PropItemRec.GetWeightSizenull4013 function TUCA_PropItemRec.GetWeightSize : Word;
4014 var
4015 c : Integer;
4016 begin
4017 c := WeightLength;
4018 if (c = 0) then
4019 exit(0);
4020 Result := c*SizeOf(TUCA_PropWeights);
4021 if IsWeightCompress_1() then
4022 Result := Result - 1;
4023 if IsWeightCompress_2() then
4024 Result := Result - 1;
4025 end;
4026
TUCA_PropItemRec.HasCodePointnull4027 function TUCA_PropItemRec.HasCodePoint(): Boolean;
4028 begin
4029 Result := IsBitON(Flags,FLAG_CODEPOINT);
4030 end;
4031
4032 procedure TUCA_PropItemRec.GetWeightArray(ADest: PUCA_PropWeights);
4033 var
4034 c : Integer;
4035 p : PByte;
4036 pd : PUCA_PropWeights;
4037 begin
4038 c := WeightLength;
4039 p := PByte(PtrUInt(@Self) + SizeOf(TUCA_PropItemRec));
4040 pd := ADest;
4041 pd^.Weights[0] := PWord(p)^;
4042 p := p + 2;
4043 if not IsWeightCompress_1() then begin
4044 pd^.Weights[1] := PWord(p)^;
4045 p := p + 2;
4046 end else begin
4047 pd^.Weights[1] := p^;
4048 p := p + 1;
4049 end;
4050 if not IsWeightCompress_2() then begin
4051 pd^.Weights[2] := PWord(p)^;
4052 p := p + 2;
4053 end else begin
4054 pd^.Weights[2] := p^;
4055 p := p + 1;
4056 end;
4057 if (c > 1) then
4058 Move(p^, (pd+1)^, ((c-1)*SizeOf(TUCA_PropWeights)));
4059 end;
4060
TUCA_PropItemRec.GetSelfOnlySizenull4061 function TUCA_PropItemRec.GetSelfOnlySize() : Cardinal;
4062 begin
4063 Result := SizeOf(TUCA_PropItemRec);
4064 if (WeightLength > 0) then begin
4065 Result := Result + (WeightLength * Sizeof(TUCA_PropWeights));
4066 if IsWeightCompress_1() then
4067 Result := Result - 1;
4068 if IsWeightCompress_2() then
4069 Result := Result - 1;
4070 end;
4071 if HasCodePoint() then
4072 Result := Result + SizeOf(UInt24);
4073 if Contextual then
4074 Result := Result + Cardinal(GetContext()^.Size);
4075 end;
4076
4077 procedure TUCA_PropItemRec.SetContextual(AValue : Boolean);
4078 begin
4079 SetBit(Flags,FLAG_CONTEXTUAL,AValue);
4080 end;
4081
TUCA_PropItemRec.GetContextualnull4082 function TUCA_PropItemRec.GetContextual : Boolean;
4083 begin
4084 Result := IsBitON(Flags,FLAG_CONTEXTUAL);
4085 end;
4086
TUCA_PropItemRec.GetContextnull4087 function TUCA_PropItemRec.GetContext() : PUCA_PropItemContextTreeRec;
4088 var
4089 p : PtrUInt;
4090 begin
4091 if not Contextual then
4092 exit(nil);
4093 p := PtrUInt(@Self) + SizeOf(TUCA_PropItemRec);
4094 if IsBitON(Flags,FLAG_CODEPOINT) then
4095 p := p + SizeOf(UInt24);
4096 Result := PUCA_PropItemContextTreeRec(p);
4097 end;
4098
4099 procedure TUCA_PropItemRec.SetDeleted(AValue: Boolean);
4100 begin
4101 SetBit(Flags,FLAG_DELETION,AValue);
4102 end;
4103
IsDeletednull4104 function TUCA_PropItemRec.IsDeleted: Boolean;
4105 begin
4106 Result := IsBitON(Flags,FLAG_DELETION);
4107 end;
4108
TUCA_PropItemRec.IsValidnull4109 function TUCA_PropItemRec.IsValid() : Boolean;
4110 begin
4111 Result := IsBitON(Flags,FLAG_VALID);
4112 end;
4113
IsWeightCompress_1null4114 function TUCA_PropItemRec.IsWeightCompress_1 : Boolean;
4115 begin
4116 Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_1);
4117 end;
4118
IsWeightCompress_2null4119 function TUCA_PropItemRec.IsWeightCompress_2 : Boolean;
4120 begin
4121 Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_2);
4122 end;
4123
TUCA_PropItemRec.GetCodePointnull4124 function TUCA_PropItemRec.GetCodePoint: UInt24;
4125 begin
4126 if HasCodePoint() then begin
4127 if Contextual then
4128 Result := PUInt24(
4129 PtrUInt(@Self) + Self.GetSelfOnlySize()- SizeOf(UInt24) -
4130 Cardinal(GetContext()^.Size)
4131 )^
4132 else
4133 Result := PUInt24(PtrUInt(@Self) + Self.GetSelfOnlySize() - SizeOf(UInt24))^
4134 end else begin
4135 raise Exception.Create('TUCA_PropItemRec.GetCodePoint : "No code point available."');
4136 end
4137 end;
4138
avl_CompareCodePointsnull4139 function avl_CompareCodePoints(Item1, Item2: Pointer): Integer;
4140 var
4141 a, b : PUCA_LineContextItemRec;
4142 i, hb : Integer;
4143 begin
4144 if (Item1 = Item2) then
4145 exit(0);
4146 if (Item1 = nil) then
4147 exit(-1);
4148 if (Item2 = nil) then
4149 exit(1);
4150 a := Item1;
4151 b := Item2;
4152 if (a^.CodePoints = b^.CodePoints) then
4153 exit(0);
4154 Result := 1;
4155 hb := Length(b^.CodePoints) - 1;
4156 for i := 0 to Length(a^.CodePoints) - 1 do begin
4157 if (i > hb) then
4158 exit;
4159 if (a^.CodePoints[i] < b^.CodePoints[i]) then
4160 exit(-1);
4161 if (a^.CodePoints[i] > b^.CodePoints[i]) then
4162 exit(1);
4163 end;
4164 if (Length(a^.CodePoints) = Length(b^.CodePoints)) then
4165 exit(0);
4166 exit(-1);
4167 end;
4168
ConstructAvlContextTreenull4169 function ConstructAvlContextTree(AContext : PUCA_LineContextRec) : TAVLTree;
4170 var
4171 r : TAVLTree;
4172 i : Integer;
4173 begin
4174 r := TAVLTree.Create(@avl_CompareCodePoints);
4175 try
4176 for i := 0 to Length(AContext^.Data) - 1 do
4177 r.Add(@AContext^.Data[i]);
4178 Result := r;
4179 except
4180 FreeAndNil(r);
4181 raise;
4182 end;
4183 end;
4184
ConstructContextTreenull4185 function ConstructContextTree(
4186 const AContext : PUCA_LineContextRec;
4187 var ADestBuffer;
4188 const ADestBufferLength : Cardinal
4189 ) : PUCA_PropItemContextTreeRec;
4190
CalcItemOnlySizenull4191 function CalcItemOnlySize(AItem : TAVLTreeNode) : Cardinal;
4192 var
4193 kitem : PUCA_LineContextItemRec;
4194 begin
4195 if (AItem = nil) then
4196 exit(0);
4197 kitem := AItem.Data;
4198 Result := SizeOf(PUCA_PropItemContextTreeNodeRec^.Left) +
4199 SizeOf(PUCA_PropItemContextTreeNodeRec^.Right) +
4200 SizeOf(PUCA_PropItemContextRec^.CodePointCount) +
4201 (Length(kitem^.CodePoints)*SizeOf(UInt24)) +
4202 SizeOf(PUCA_PropItemContextRec^.WeightCount) +
4203 (Length(kitem^.Weights)*SizeOf(TUCA_PropWeights));
4204 end;
4205
CalcItemSizenull4206 function CalcItemSize(AItem : TAVLTreeNode) : Cardinal;
4207 begin
4208 if (AItem = nil) then
4209 exit(0);
4210 Result := CalcItemOnlySize(AItem);
4211 if (AItem.Left <> nil) then
4212 Result := Result + CalcItemSize(AItem.Left);
4213 if (AItem.Right <> nil) then
4214 Result := Result + CalcItemSize(AItem.Right);
4215 end;
4216
CalcSizenull4217 function CalcSize(AData : TAVLTree) : Cardinal;
4218 begin
4219 Result := SizeOf(PUCA_PropItemContextTreeRec^.Size) + CalcItemSize(AData.Root);
4220 end;
4221
ConstructItemnull4222 function ConstructItem(ASource : TAVLTreeNode; ADest : PUCA_PropItemContextTreeNodeRec) : Cardinal;
4223 var
4224 k : Integer;
4225 kitem : PUCA_LineContextItemRec;
4226 kpcp : PUInt24;
4227 kpw : PUCA_PropWeights;
4228 pextra : PtrUInt;
4229 pnext : PUCA_PropItemContextTreeNodeRec;
4230 begin
4231 kitem := ASource.Data;
4232 ADest^.Data.CodePointCount := Length(kitem^.CodePoints);
4233 ADest^.Data.WeightCount := Length(kitem^.Weights);
4234 pextra := PtrUInt(ADest)+SizeOf(ADest^.Left)+SizeOf(ADest^.Right)+
4235 SizeOf(ADest^.Data.CodePointCount)+SizeOf(ADest^.Data.WeightCount);
4236 if (ADest^.Data.CodePointCount > 0) then begin
4237 kpcp := PUInt24(pextra);
4238 for k := 0 to ADest^.Data.CodePointCount - 1 do begin
4239 kpcp^ := kitem^.CodePoints[k];
4240 Inc(kpcp);
4241 end;
4242 end;
4243 if (ADest^.Data.WeightCount > 0) then begin
4244 kpw := PUCA_PropWeights(pextra + (ADest^.Data.CodePointCount*SizeOf(UInt24)));
4245 for k := 0 to ADest^.Data.WeightCount - 1 do begin
4246 kpw^.Weights[0] := kitem^.Weights[k].Weights[0];
4247 kpw^.Weights[1] := kitem^.Weights[k].Weights[1];
4248 kpw^.Weights[2] := kitem^.Weights[k].Weights[2];
4249 Inc(kpw);
4250 end;
4251 end;
4252 Result := CalcItemOnlySize(ASource);
4253 if (ASource.Left <> nil) then begin
4254 pnext := PUCA_PropItemContextTreeNodeRec(PtrUInt(ADest) + Result);
4255 ADest^.Left := Result;
4256 Result := Result + ConstructItem(ASource.Left,pnext);
4257 end else begin
4258 ADest^.Left := 0;
4259 end;
4260 if (ASource.Right <> nil) then begin
4261 pnext := PUCA_PropItemContextTreeNodeRec(PtrUInt(ADest) + Result);
4262 ADest^.Right := Result;
4263 Result := Result + ConstructItem(ASource.Right,pnext);
4264 end else begin
4265 ADest^.Right := 0;
4266 end;
4267 end;
4268
4269 var
4270 c : PtrUInt;
4271 r : PUCA_PropItemContextTreeRec;
4272 p : PUCA_PropItemContextTreeNodeRec;
4273 tempTree : TAVLTree;
4274 begin
4275 tempTree := ConstructAvlContextTree(AContext);
4276 try
4277 c := CalcSize(tempTree);
4278 if (ADestBufferLength > 0) and (c > ADestBufferLength) then
4279 raise Exception.Create(SInsufficientMemoryBuffer);
4280 r := @ADestBuffer;
4281 r^.Size := c;
4282 p := PUCA_PropItemContextTreeNodeRec(PtrUInt(r) + SizeOf(r^.Size));
4283 ConstructItem(tempTree.Root,p);
4284 finally
4285 tempTree.Free();
4286 end;
4287 Result := r;
4288 end;
4289
4290 procedure ReverseRecordBytes(var AItem : TSerializedCollationHeader);
4291 begin
4292 ReverseBytes(AItem.BMP_Table1Length,SizeOf(AItem.BMP_Table1Length));
4293 ReverseBytes(AItem.BMP_Table2Length,SizeOf(AItem.BMP_Table2Length));
4294 ReverseBytes(AItem.OBMP_Table1Length,SizeOf(AItem.OBMP_Table1Length));
4295 ReverseBytes(AItem.OBMP_Table2Length,SizeOf(AItem.OBMP_Table2Length));
4296 ReverseBytes(AItem.PropCount,SizeOf(AItem.PropCount));
4297 ReverseBytes(AItem.VariableLowLimit,SizeOf(AItem.VariableLowLimit));
4298 ReverseBytes(AItem.VariableHighLimit,SizeOf(AItem.VariableHighLimit));
4299 end;
4300
4301 procedure ReverseBytes(var AData; const ALength : Integer);
4302 var
4303 i,j : PtrInt;
4304 c : Byte;
4305 p : PByte;
4306 begin
4307 if (ALength = 1) then
4308 exit;
4309 p := @AData;
4310 j := ALength div 2;
4311 for i := 0 to Pred(j) do begin
4312 c := p[i];
4313 p[i] := p[(ALength - 1 ) - i];
4314 p[(ALength - 1 ) - i] := c;
4315 end;
4316 end;
4317
4318 procedure ReverseArray(var AValue; const AArrayLength, AItemSize : PtrInt);
4319 var
4320 p : PByte;
4321 i : PtrInt;
4322 begin
4323 if ( AArrayLength > 0 ) and ( AItemSize > 1 ) then begin
4324 p := @AValue;
4325 for i := 0 to Pred(AArrayLength) do begin
4326 ReverseBytes(p^,AItemSize);
4327 Inc(p,AItemSize);
4328 end;
4329 end;
4330 end;
4331
4332 procedure ReverseContextNodeFromNativeEndian(s, d : PUCA_PropItemContextTreeNodeRec);
4333 var
4334 k : PtrUInt;
4335 p_s, p_d : PByte;
4336 begin
4337 d^.Left := s^.Left;
4338 ReverseBytes(d^.Left,SizeOf(d^.Left));
4339 d^.Right := s^.Right;
4340 ReverseBytes(d^.Right,SizeOf(d^.Right));
4341 d^.Data.CodePointCount := s^.Data.CodePointCount;
4342 ReverseBytes(d^.Data.CodePointCount,SizeOf(d^.Data.CodePointCount));
4343 d^.Data.WeightCount := s^.Data.WeightCount;
4344 ReverseBytes(d^.Data.WeightCount,SizeOf(d^.Data.WeightCount));
4345
4346 k := SizeOf(TUCA_PropItemContextTreeNodeRec);
4347 p_s := PByte(PtrUInt(s) + k);
4348 p_d := PByte(PtrUInt(d) + k);
4349 k := (s^.Data.CodePointCount*SizeOf(UInt24));
4350 Move(p_s^,p_d^, k);
4351 ReverseArray(p_d^,s^.Data.CodePointCount,SizeOf(UInt24));
4352 p_s := PByte(PtrUInt(p_s) + k);
4353 p_d := PByte(PtrUInt(p_d) + k);
4354 k := (s^.Data.WeightCount*SizeOf(TUCA_PropWeights));
4355 Move(p_s^,p_d^,k);
4356 ReverseArray(p_d^,(s^.Data.WeightCount*Length(TUCA_PropWeights.Weights)),SizeOf(TUCA_PropWeights.Weights[0]));
4357 if (s^.Left > 0) then
4358 ReverseContextNodeFromNativeEndian(
4359 PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + s^.Left),
4360 PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + s^.Left)
4361 );
4362 if (s^.Right > 0) then
4363 ReverseContextNodeFromNativeEndian(
4364 PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + s^.Right),
4365 PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + s^.Right)
4366 );
4367 end;
4368
4369 procedure ReverseContextFromNativeEndian(s, d : PUCA_PropItemContextTreeRec);
4370 var
4371 k : PtrUInt;
4372 begin
4373 d^.Size := s^.Size;
4374 ReverseBytes(d^.Size,SizeOf(d^.Size));
4375 if (s^.Size = 0) then
4376 exit;
4377 k := SizeOf(s^.Size);
4378 ReverseContextNodeFromNativeEndian(
4379 PUCA_PropItemContextTreeNodeRec(PtrUInt(s)+k),
4380 PUCA_PropItemContextTreeNodeRec(PtrUInt(d)+k)
4381 );
4382 end;
4383
4384 procedure ReverseFromNativeEndian(
4385 const AData : PUCA_PropItemRec;
4386 const ADataLen : Cardinal;
4387 const ADest : PUCA_PropItemRec
4388 );
4389 var
4390 s, d : PUCA_PropItemRec;
4391 sCtx, dCtx : PUCA_PropItemContextTreeRec;
4392 dataEnd : PtrUInt;
4393 k, i : PtrUInt;
4394 p_s, p_d : PByte;
4395 pw_s, pw_d : PUCA_PropWeights;
4396 begin
4397 dataEnd := PtrUInt(AData) + ADataLen;
4398 s := AData;
4399 d := ADest;
4400 while True do begin
4401 d^.WeightLength := s^.WeightLength;
4402 ReverseBytes(d^.WeightLength,SizeOf(d^.WeightLength));
4403 d^.ChildCount := s^.ChildCount;
4404 ReverseBytes(d^.ChildCount,SizeOf(d^.ChildCount));
4405 d^.Size := s^.Size;
4406 ReverseBytes(d^.Size,SizeOf(d^.Size));
4407 d^.Flags := s^.Flags;
4408 ReverseBytes(d^.Flags,SizeOf(d^.Flags));
4409 if s^.Contextual then begin
4410 k := SizeOf(TUCA_PropItemRec);
4411 if s^.HasCodePoint() then
4412 k := k + SizeOf(UInt24);
4413 sCtx := PUCA_PropItemContextTreeRec(PtrUInt(s) + k);
4414 dCtx := PUCA_PropItemContextTreeRec(PtrUInt(d) + k);
4415 ReverseContextFromNativeEndian(sCtx,dCtx);
4416 end;
4417 if s^.HasCodePoint() then begin
4418 if s^.Contextual then
4419 k := s^.GetSelfOnlySize()- SizeOf(UInt24) - Cardinal(s^.GetContext()^.Size)
4420 else
4421 k := s^.GetSelfOnlySize() - SizeOf(UInt24);
4422 p_s := PByte(PtrUInt(s) + k);
4423 p_d := PByte(PtrUInt(d) + k);
4424 Unaligned(PUInt24(p_d)^) := Unaligned(PUInt24(p_s)^);
4425 ReverseBytes(p_d^,SizeOf(UInt24));
4426 end;
4427 if (s^.WeightLength > 0) then begin
4428 k := SizeOf(TUCA_PropItemRec);
4429 p_s := PByte(PtrUInt(s) + k);
4430 p_d := PByte(PtrUInt(d) + k);
4431 k := SizeOf(Word);
4432 Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
4433 ReverseBytes(Unaligned(p_d^),k);
4434 p_s := PByte(PtrUInt(p_s) + k);
4435 p_d := PByte(PtrUInt(p_d) + k);
4436 if s^.IsWeightCompress_1() then begin
4437 k := SizeOf(Byte);
4438 PByte(p_d)^ := PByte(p_s)^;
4439 end else begin
4440 k := SizeOf(Word);
4441 Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
4442 end;
4443 ReverseBytes(p_d^,k);
4444 p_s := PByte(PtrUInt(p_s) + k);
4445 p_d := PByte(PtrUInt(p_d) + k);
4446 if s^.IsWeightCompress_2() then begin
4447 k := SizeOf(Byte);
4448 PByte(p_d)^ := PByte(p_s)^;
4449 end else begin
4450 k := SizeOf(Word);
4451 Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
4452 end;
4453 ReverseBytes(p_d^,k);
4454 if (s^.WeightLength > 1) then begin
4455 pw_s := PUCA_PropWeights(PtrUInt(p_s) + k);
4456 pw_d := PUCA_PropWeights(PtrUInt(p_d) + k);
4457 for i := 1 to s^.WeightLength - 1 do begin
4458 pw_d^.Weights[0] := pw_s^.Weights[0];
4459 pw_d^.Weights[1] := pw_s^.Weights[1];
4460 pw_d^.Weights[2] := pw_s^.Weights[2];
4461 ReverseArray(pw_d^,3,SizeOf(pw_s^.Weights[0]));
4462 Inc(pw_s);
4463 Inc(pw_d);
4464 end;
4465 end;
4466 end;
4467 k := s^.GetSelfOnlySize();
4468 s := PUCA_PropItemRec(PtrUInt(s)+k);
4469 d := PUCA_PropItemRec(PtrUInt(d)+k);
4470 if (PtrUInt(s) >= dataEnd) then
4471 Break;
4472 end;
4473 if ( (PtrUInt(s)-PtrUInt(AData)) <> (PtrUInt(d)-PtrUInt(ADest)) ) then
4474 raise Exception.CreateFmt('Read data length(%d) differs from written data length(%d).',[(PtrUInt(s)-PtrUInt(AData)), (PtrUInt(d)-PtrUInt(ADest))]);
4475 end;
4476 //------------------------------------------------------------------------------
4477
4478 procedure ReverseContextNodeToNativeEndian(s, d : PUCA_PropItemContextTreeNodeRec);
4479 var
4480 k : PtrUInt;
4481 p_s, p_d : PByte;
4482 begin
4483 d^.Left := s^.Left;
4484 ReverseBytes(d^.Left,SizeOf(d^.Left));
4485 d^.Right := s^.Right;
4486 ReverseBytes(d^.Right,SizeOf(d^.Right));
4487 d^.Data.CodePointCount := s^.Data.CodePointCount;
4488 ReverseBytes(d^.Data.CodePointCount,SizeOf(d^.Data.CodePointCount));
4489 d^.Data.WeightCount := s^.Data.WeightCount;
4490 ReverseBytes(d^.Data.WeightCount,SizeOf(d^.Data.WeightCount));
4491
4492 k := SizeOf(TUCA_PropItemContextTreeNodeRec);
4493 p_s := PByte(PtrUInt(s) + k);
4494 p_d := PByte(PtrUInt(d) + k);
4495 k := (d^.Data.CodePointCount*SizeOf(UInt24));
4496 Move(p_s^,p_d^, k);
4497 ReverseArray(p_d^,d^.Data.CodePointCount,SizeOf(UInt24));
4498 p_s := PByte(PtrUInt(p_s) + k);
4499 p_d := PByte(PtrUInt(p_d) + k);
4500 k := (d^.Data.WeightCount*SizeOf(TUCA_PropWeights));
4501 Move(p_s^,p_d^,k);
4502 ReverseArray(p_d^,(d^.Data.WeightCount*Length(TUCA_PropWeights.Weights)),SizeOf(TUCA_PropWeights.Weights[0]));
4503 if (d^.Left > 0) then
4504 ReverseContextNodeToNativeEndian(
4505 PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + d^.Left),
4506 PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + d^.Left)
4507 );
4508 if (d^.Right > 0) then
4509 ReverseContextNodeToNativeEndian(
4510 PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + d^.Right),
4511 PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + d^.Right)
4512 );
4513 end;
4514
4515 procedure ReverseContextToNativeEndian(s, d : PUCA_PropItemContextTreeRec);
4516 var
4517 k : PtrUInt;
4518 begin
4519 d^.Size := s^.Size;
4520 ReverseBytes(d^.Size,SizeOf(d^.Size));
4521 if (s^.Size = 0) then
4522 exit;
4523 k := SizeOf(s^.Size);
4524 ReverseContextNodeToNativeEndian(
4525 PUCA_PropItemContextTreeNodeRec(PtrUInt(s)+k),
4526 PUCA_PropItemContextTreeNodeRec(PtrUInt(d)+k)
4527 );
4528 end;
4529
4530 procedure ReverseToNativeEndian(
4531 const AData : PUCA_PropItemRec;
4532 const ADataLen : Cardinal;
4533 const ADest : PUCA_PropItemRec
4534 );
4535 var
4536 s, d : PUCA_PropItemRec;
4537 sCtx, dCtx : PUCA_PropItemContextTreeRec;
4538 dataEnd : PtrUInt;
4539 k, i : PtrUInt;
4540 p_s, p_d : PByte;
4541 pw_s, pw_d : PUCA_PropWeights;
4542 begin
4543 dataEnd := PtrUInt(AData) + ADataLen;
4544 s := AData;
4545 d := ADest;
4546 while True do begin
4547 d^.WeightLength := s^.WeightLength;
4548 ReverseBytes(d^.WeightLength,SizeOf(d^.WeightLength));
4549 d^.ChildCount := s^.ChildCount;
4550 ReverseBytes(d^.ChildCount,SizeOf(d^.ChildCount));
4551 d^.Size := s^.Size;
4552 ReverseBytes(d^.Size,SizeOf(d^.Size));
4553 d^.Flags := s^.Flags;
4554 ReverseBytes(d^.Flags,SizeOf(d^.Flags));
4555 if d^.Contextual then begin
4556 k := SizeOf(TUCA_PropItemRec);
4557 if d^.HasCodePoint() then
4558 k := k + SizeOf(UInt24);
4559 sCtx := PUCA_PropItemContextTreeRec(PtrUInt(s) + k);
4560 dCtx := PUCA_PropItemContextTreeRec(PtrUInt(d) + k);
4561 ReverseContextToNativeEndian(sCtx,dCtx);
4562 end;
4563 if d^.HasCodePoint() then begin
4564 if d^.Contextual then
4565 k := d^.GetSelfOnlySize()- SizeOf(UInt24) - Cardinal(d^.GetContext()^.Size)
4566 else
4567 k := d^.GetSelfOnlySize() - SizeOf(UInt24);
4568 p_s := PByte(PtrUInt(s) + k);
4569 p_d := PByte(PtrUInt(d) + k);
4570 Unaligned(PUInt24(p_d)^) := Unaligned(PUInt24(p_s)^);
4571 ReverseBytes(p_d^,SizeOf(UInt24));
4572 end;
4573 if (d^.WeightLength > 0) then begin
4574 k := SizeOf(TUCA_PropItemRec);
4575 p_s := PByte(PtrUInt(s) + k);
4576 p_d := PByte(PtrUInt(d) + k);
4577 k := SizeOf(Word);
4578 Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
4579 ReverseBytes(p_d^,k);
4580 p_s := PByte(PtrUInt(p_s) + k);
4581 p_d := PByte(PtrUInt(p_d) + k);
4582 if d^.IsWeightCompress_1() then begin
4583 k := SizeOf(Byte);
4584 PByte(p_d)^ := PByte(p_s)^;
4585 end else begin
4586 k := SizeOf(Word);
4587 Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
4588 end;
4589 ReverseBytes(p_d^,k);
4590 p_s := PByte(PtrUInt(p_s) + k);
4591 p_d := PByte(PtrUInt(p_d) + k);
4592 if d^.IsWeightCompress_2() then begin
4593 k := SizeOf(Byte);
4594 PByte(p_d)^ := PByte(p_s)^;
4595 end else begin
4596 k := SizeOf(Word);
4597 Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
4598 end;
4599 ReverseBytes(p_d^,k);
4600 if (d^.WeightLength > 1) then begin
4601 pw_s := PUCA_PropWeights(PtrUInt(p_s) + k);
4602 pw_d := PUCA_PropWeights(PtrUInt(p_d) + k);
4603 for i := 1 to d^.WeightLength - 1 do begin
4604 pw_d^.Weights[0] := pw_s^.Weights[0];
4605 pw_d^.Weights[1] := pw_s^.Weights[1];
4606 pw_d^.Weights[2] := pw_s^.Weights[2];
4607 ReverseArray(pw_d^,3,SizeOf(pw_s^.Weights[0]));
4608 Inc(pw_s);
4609 Inc(pw_d);
4610 end;
4611 end;
4612 end;
4613 k := d^.GetSelfOnlySize();
4614 s := PUCA_PropItemRec(PtrUInt(s)+k);
4615 d := PUCA_PropItemRec(PtrUInt(d)+k);
4616 if (PtrUInt(s) >= dataEnd) then
4617 Break;
4618 end;
4619 if ( (PtrUInt(s)-PtrUInt(AData)) <> (PtrUInt(d)-PtrUInt(ADest)) ) then
4620 raise Exception.CreateFmt('Read data length(%d) differs from written data length(%d).',[(PtrUInt(s)-PtrUInt(AData)), (PtrUInt(d)-PtrUInt(ADest))]);
4621 end;
4622
4623 procedure Check(const ACondition : Boolean; const AMsg : string);overload;
4624 begin
4625 if not ACondition then
4626 raise Exception.Create(AMsg);
4627 end;
4628
4629 procedure Check(
4630 const ACondition : Boolean;
4631 const AFormatMsg : string;
4632 const AArgs : array of const
4633 );overload;
4634 begin
4635 Check(ACondition,Format(AFormatMsg,AArgs));
4636 end;
4637
4638 procedure Check(const ACondition : Boolean);overload;
4639 begin
4640 Check(ACondition,'Check failed.')
4641 end;
4642
4643 procedure CompareWeights(a, b : PUCA_PropWeights; const ALength : Integer);
4644 var
4645 i : Integer;
4646 begin
4647 if (ALength > 0) then begin
4648 for i := 0 to ALength - 1 do begin
4649 Check(a[i].Weights[0]=b[i].Weights[0]);
4650 Check(a[i].Weights[1]=b[i].Weights[1]);
4651 Check(a[i].Weights[2]=b[i].Weights[2]);
4652 end;
4653 end;
4654 end;
4655
4656 procedure CompareCodePoints(a, b : PUInt24; const ALength : Integer);
4657 var
4658 i : Integer;
4659 begin
4660 if (ALength > 0) then begin
4661 for i := 0 to ALength - 1 do
4662 Check(a[i]=b[i]);
4663 end;
4664 end;
4665
4666 procedure CompareContextNode(AProp1, AProp2 : PUCA_PropItemContextTreeNodeRec);
4667 var
4668 a, b : PUCA_PropItemContextTreeNodeRec;
4669 k : Cardinal;
4670 begin
4671 if (AProp1=nil) then begin
4672 Check(AProp2=nil);
4673 exit;
4674 end;
4675 a := AProp1;
4676 b := AProp2;
4677 Check(a^.Left=b^.Left);
4678 Check(a^.Right=b^.Right);
4679 Check(a^.Data.CodePointCount=b^.Data.CodePointCount);
4680 Check(a^.Data.WeightCount=b^.Data.WeightCount);
4681 k := SizeOf(a^.Data);
4682 CompareCodePoints(
4683 PUInt24(PtrUInt(a)+k),
4684 PUInt24(PtrUInt(b)+k),
4685 a^.Data.CodePointCount
4686 );
4687 k := SizeOf(a^.Data)+ (a^.Data.CodePointCount*SizeOf(UInt24));
4688 CompareWeights(
4689 PUCA_PropWeights(PtrUInt(a)+k),
4690 PUCA_PropWeights(PtrUInt(b)+k),
4691 a^.Data.WeightCount
4692 );
4693 if (a^.Left > 0) then begin
4694 k := a^.Left;
4695 CompareContextNode(
4696 PUCA_PropItemContextTreeNodeRec(PtrUInt(a)+k),
4697 PUCA_PropItemContextTreeNodeRec(PtrUInt(b)+k)
4698 );
4699 end;
4700 if (a^.Right > 0) then begin
4701 k := a^.Right;
4702 CompareContextNode(
4703 PUCA_PropItemContextTreeNodeRec(PtrUInt(a)+k),
4704 PUCA_PropItemContextTreeNodeRec(PtrUInt(b)+k)
4705 );
4706 end;
4707 end;
4708
4709 procedure CompareContext(AProp1, AProp2 : PUCA_PropItemContextTreeRec);
4710 var
4711 a, b : PUCA_PropItemContextTreeNodeRec;
4712 k : Integer;
4713 begin
4714 if (AProp1=nil) then begin
4715 Check(AProp2=nil);
4716 exit;
4717 end;
4718 Check(AProp1^.Size=AProp2^.Size);
4719 k := Cardinal(AProp1^.Size);
4720 a := PUCA_PropItemContextTreeNodeRec(PtrUInt(AProp1)+k);
4721 b := PUCA_PropItemContextTreeNodeRec(PtrUInt(AProp2)+k);
4722 CompareContextNode(a,b);
4723 end;
4724
4725 procedure CompareProps(const AProp1, AProp2 : PUCA_PropItemRec; const ADataLen : Integer);
4726 var
4727 a, b, pend : PUCA_PropItemRec;
4728 wa, wb : array of TUCA_PropWeights;
4729 k : Integer;
4730 begin
4731 if (ADataLen <= 0) then
4732 exit;
4733 a := PUCA_PropItemRec(AProp1);
4734 b := PUCA_PropItemRec(AProp2);
4735 pend := PUCA_PropItemRec(PtrUInt(AProp1)+ADataLen);
4736 while (a<pend) do begin
4737 Check(a^.WeightLength=b^.WeightLength);
4738 Check(a^.ChildCount=b^.ChildCount);
4739 Check(a^.Size=b^.Size);
4740 Check(a^.Flags=b^.Flags);
4741 if a^.HasCodePoint() then
4742 Check(a^.CodePoint = b^.CodePoint);
4743 if (a^.WeightLength > 0) then begin
4744 k := a^.WeightLength;
4745 SetLength(wa,k);
4746 SetLength(wb,k);
4747 a^.GetWeightArray(@wa[0]);
4748 b^.GetWeightArray(@wb[0]);
4749 CompareWeights(@wa[0],@wb[0],k);
4750 end;
4751 if a^.Contextual then
4752 CompareContext(a^.GetContext(),b^.GetContext());
4753 Check(a^.GetSelfOnlySize()=b^.GetSelfOnlySize());
4754 k := a^.GetSelfOnlySize();
4755 a := PUCA_PropItemRec(PtrUInt(a)+k);
4756 b := PUCA_PropItemRec(PtrUInt(b)+k);
4757 end;
4758 end;
4759
4760 Procedure QuickSort(AList : PCardinal; L, R : Longint);overload;
4761 var
4762 I, J : Longint;
4763 P, Q : Cardinal;
4764 begin
4765 repeat
4766 I := L;
4767 J := R;
4768 P := AList[ (L + R) div 2 ];
4769 repeat
4770 while (P > AList[i]) do
4771 I := I + 1;
4772 while (P < AList[J]) do
4773 J := J - 1;
4774 If I <= J then
4775 begin
4776 Q := AList[I];
4777 AList[I] := AList[J];
4778 AList[J] := Q;
4779 I := I + 1;
4780 J := J - 1;
4781 end;
4782 until I > J;
4783 if J - L < R - I then
4784 begin
4785 if L < J then
4786 QuickSort(AList, L, J);
4787 L := I;
4788 end
4789 else
4790 begin
4791 if I < R then
4792 QuickSort(AList, I, R);
4793 R := J;
4794 end;
4795 until L >= R;
4796 end;
4797
CalcMaxLevel2Countnull4798 function CalcMaxLevel2Count(
4799 const ALevel1Value : Cardinal;
4800 ALines : array of TUCA_LineRec
4801 ) : Integer;
4802 var
4803 i, c, k : Integer;
4804 ac : Integer;
4805 items : array of Cardinal;
4806 p : PUCA_LineRec;
4807 pw : ^TUCA_WeightRec;
4808 begin
4809 c := Length(ALines);
4810 if (c < 1) then
4811 exit(0);
4812 SetLength(items,0);
4813 ac := 0;
4814 p := @ALines[Low(ALines)];
4815 for i := 0 to c-1 do begin
4816 if (Length(p^.Weights) > 0) then begin
4817 pw := @p^.Weights[Low(p^.Weights)];
4818 for k := 0 to Length(p^.Weights)-1 do begin
4819 if (pw^.Weights[0] = ALevel1Value) then begin
4820 if (ac = 0) or (IndexDWord(items[0],ac,pw^.Weights[1]) < 0) then begin
4821 if (ac >= Length(items)) then
4822 SetLength(items,Length(items)+256);
4823 items[ac] := pw^.Weights[1];
4824 ac := ac+1;
4825 end;
4826 end;
4827 Inc(pw);
4828 end;
4829 end;
4830 Inc(p);
4831 end;
4832 Result := ac;
4833 end;
4834
RewriteLevel2null4835 function RewriteLevel2(
4836 const ALevel1Value : Cardinal;
4837 ALines : PUCA_LineRec;
4838 const ALinesLength : Integer
4839 ) : Integer;
4840 var
4841 i, c, k : Integer;
4842 ac : Integer;
4843 items : array of Cardinal;
4844 p : PUCA_LineRec;
4845 pw : ^TUCA_WeightRec;
4846 newValue : Int64;
4847 begin
4848 c := ALinesLength;
4849 if (c < 1) then
4850 exit(0);
4851 SetLength(items,256);
4852 ac := 0;
4853 p := ALines;
4854 for i := 0 to c-1 do begin
4855 if (Length(p^.Weights) > 0) then begin
4856 for k := 0 to Length(p^.Weights)-1 do begin
4857 pw := @p^.Weights[k];
4858 if (pw^.Weights[0] = ALevel1Value) then begin
4859 if (ac = 0) or (IndexDWord(items[0],ac,pw^.Weights[1]) < 0) then begin
4860 if (ac >= Length(items)) then
4861 SetLength(items,Length(items)+256);
4862 items[ac] := pw^.Weights[1];
4863 ac := ac+1;
4864 end;
4865 end;
4866 end;
4867 end;
4868 Inc(p);
4869 end;
4870 SetLength(items,ac);
4871 if (ac > 1) then
4872 QuickSort(@items[0],0,(ac-1));
4873
4874 p := ALines;
4875 for i := 0 to c-1 do begin
4876 if (Length(p^.Weights) > 0) then begin
4877 for k := 0 to Length(p^.Weights)-1 do begin
4878 pw := @p^.Weights[k];
4879 if (pw^.Weights[0] = ALevel1Value) then begin
4880 newValue := IndexDWord(items[0],ac,pw^.Weights[1]);
4881 if (newValue < 0) then
4882 raise Exception.CreateFmt('level 2 value %d missed in rewrite of level 1 value of %d.',[pw^.Weights[1],ALevel1Value]);
4883 pw^.Weights[1] := newValue;//+1;
4884 end;
4885 end;
4886 end;
4887 Inc(p);
4888 end;
4889 if (Length(items) > 0) then
4890 Result := items[Length(items)-1]
4891 else
4892 Result := 0;
4893 end;
4894
4895 procedure RewriteLevel2Values(ALines : PUCA_LineRec; ALength : Integer);
4896 var
4897 c, i, ac, k : Integer;
4898 p : PUCA_LineRec;
4899 level1List : array of Cardinal;
4900 pw : ^TUCA_WeightRec;
4901 begin
4902 c := ALength;
4903 if (c < 1) then
4904 exit;
4905 ac := 0;
4906 SetLength(level1List,c);
4907 p := ALines;
4908 for i := 0 to c-1 do begin
4909 if (Length(p^.Weights) > 0) then begin
4910 for k := 0 to Length(p^.Weights)-1 do begin
4911 pw := @p^.Weights[k];
4912 if (ac = 0) or (IndexDWord(level1List[0],ac,pw^.Weights[0]) < 0) then begin
4913 if (ac >= Length(level1List)) then
4914 SetLength(level1List,ac+1000);
4915 level1List[ac] := pw^.Weights[0];
4916 RewriteLevel2(level1List[ac],ALines,ALength);
4917 ac := ac+1;
4918 end;
4919 end;
4920 end;
4921 Inc(p);
4922 end;
4923 end;
4924
CalcMaxLevel2Valuenull4925 function CalcMaxLevel2Value(ALines : array of TUCA_LineRec) : Cardinal;
4926 var
4927 i, c, k, tempValue : Integer;
4928 p : PUCA_LineRec;
4929 maxLevel : Cardinal;
4930 maxValue : Integer;
4931 begin
4932 c := Length(ALines);
4933 if (c < 2) then
4934 exit(0);
4935 maxLevel := 0;
4936 maxValue := CalcMaxLevel2Count(maxLevel,ALines);
4937 p := @ALines[Low(ALines)+1];
4938 for i := 1 to c-1 do begin
4939 if (Length(p^.Weights) > 0) then begin
4940 for k := 0 to Length(p^.Weights)-1 do begin
4941 if (p^.Weights[k].Weights[0] <> maxLevel) then begin
4942 tempValue := CalcMaxLevel2Count(p^.Weights[k].Weights[0],ALines);
4943 if (tempValue > maxValue) then begin
4944 maxLevel := p^.Weights[k].Weights[0];
4945 maxValue := tempValue;
4946 end;
4947 end;
4948 end;
4949 end;
4950 Inc(p);
4951 end;
4952 Result := maxValue;
4953 end;
4954
4955 initialization
4956 FS := DefaultFormatSettings;
4957 FS.DecimalSeparator := '.';
4958
4959 end.
4960