1 {
2 **********************************************************************
3 This file is part of LazUtils.
4 It is copied and modified from a file in Free Component Library.
5
6 See the file COPYING.FPC, included in this distribution,
7 for details about the license.
8 **********************************************************************
9
10 XML utility routines.
11 Copyright (c) 2006 by Sergei Gorelkin, sergei_gorelkin@mail.ru
12
13 }
14 unit laz2_xmlutils;
15
16 {$ifdef fpc}{$mode objfpc}{$endif}
17 {$H+}
18 {$ifopt Q+}{$define overflow_check}{$endif}
19 {$R-}
20
21 interface
22
23 uses
24 SysUtils, Classes, LazUTF8;
25
26 type
27 TXMLUtilString = AnsiString;
28 TXMLUtilChar = Char;
29 PXMLUtilChar = PChar;
30 PXMLUtilString = ^TXMLUtilString;
31
IsXmlNamenull32 function IsXmlName(const Value: TXMLUtilString; Xml11: Boolean = False): Boolean; overload;
IsXmlNamenull33 function IsXmlName(Value: PXMLUtilChar; Len: Integer; Xml11: Boolean = False): Boolean; overload;
IsXmlNamesnull34 function IsXmlNames(const Value: TXMLUtilString; Xml11: Boolean = False): Boolean;
IsXmlNmTokennull35 function IsXmlNmToken(const Value: TXMLUtilString; Xml11: Boolean = False): Boolean;
IsXmlNmTokensnull36 function IsXmlNmTokens(const Value: TXMLUtilString; Xml11: Boolean = False): Boolean;
IsValidXmlEncodingnull37 function IsValidXmlEncoding(const Value: TXMLUtilString): Boolean;
Xml11NamePagesnull38 function Xml11NamePages: PByteArray;
39 procedure NormalizeSpaces(var Value: TXMLUtilString);
IsXmlWhiteSpacenull40 function IsXmlWhiteSpace(c: PXMLUtilChar): Boolean;
Hashnull41 function Hash(InitValue: LongWord; Key: PXMLUtilChar; KeyLen: Integer): LongWord;
42 { beware, works in ASCII range only }
XUStrLICompnull43 function XUStrLIComp(S1, S2: PXMLUtilChar; Len: Integer): Integer;
44
45 procedure TranslateUTF8Chars(var s: TXMLUtilString; SrcChars, DstChars: string);
46
47 { a simple hash table with TXMLUtilString keys }
48
49 type
50 {$ifndef fpc}
51 PtrInt = LongInt;
52 TFPList = TList;
53 {$endif}
54
55 PPHashItem = ^PHashItem;
56 PHashItem = ^THashItem;
57 THashItem = record
58 Key: TXMLUtilString;
59 HashValue: LongWord;
60 Next: PHashItem;
61 Data: TObject;
62 end;
63 THashItemArray = array[0..MaxInt div sizeof(Pointer)-1] of PHashItem;
64 PHashItemArray = ^THashItemArray;
65
ntrynull66 THashForEach = function(Entry: PHashItem; arg: Pointer): Boolean;
67
68 THashTable = class(TObject)
69 private
70 FCount: LongWord;
71 FBucketCount: LongWord;
72 FBucket: PHashItemArray;
73 FOwnsObjects: Boolean;
Lookupnull74 function Lookup(Key: PXMLUtilChar; KeyLength: Integer; out Found: Boolean; CanCreate: Boolean): PHashItem;
75 procedure Resize(NewCapacity: LongWord);
76 public
77 constructor Create(InitSize: Integer; OwnObjects: Boolean);
78 destructor Destroy; override;
79 procedure Clear;
Findnull80 function Find(Key: PXMLUtilChar; KeyLen: Integer): PHashItem;
FindOrAddnull81 function FindOrAdd(Key: PXMLUtilChar; KeyLen: Integer; out Found: Boolean): PHashItem; overload;
FindOrAddnull82 function FindOrAdd(Key: PXMLUtilChar; KeyLen: Integer): PHashItem; overload;
Getnull83 function Get(Key: PXMLUtilChar; KeyLen: Integer): TObject;
Removenull84 function Remove(Entry: PHashItem): Boolean;
RemoveDatanull85 function RemoveData(aData: TObject): Boolean;
86 procedure ForEach(proc: THashForEach; arg: Pointer);
87 property Count: LongWord read FCount;
88 end;
89
90 { another hash, for detecting duplicate namespaced attributes without memory allocations }
91
92 TExpHashEntry = record
93 rev: LongWord;
94 hash: LongWord;
95 uriPtr: PXMLUtilString;
96 lname: PXMLUtilChar;
97 lnameLen: Integer;
98 end;
99 TExpHashEntryArray = array[0..MaxInt div sizeof(TExpHashEntry)-1] of TExpHashEntry;
100 PExpHashEntryArray = ^TExpHashEntryArray;
101
102 TDblHashArray = class(TObject)
103 private
104 FSizeLog: Integer;
105 FRevision: LongWord;
106 FData: PExpHashEntryArray;
107 public
108 procedure Init(NumSlots: Integer);
Locatenull109 function Locate(uri: PXMLUtilString; localName: PXMLUtilChar; localLength: Integer): Boolean;
110 destructor Destroy; override;
111 end;
112
113 TBinding = class
114 public
115 uri: TXMLUtilString;
116 next: TBinding;
117 prevPrefixBinding: TObject;
118 Prefix: PHashItem;
119 end;
120
121 TAttributeAction = (
122 aaUnchanged,
123 aaPrefix, // only override the prefix
124 aaBoth // override prefix and emit namespace definition
125 );
126
127 TNSSupport = class(TObject)
128 private
129 FNesting: Integer;
130 FPrefixSeqNo: Integer;
131 FFreeBindings: TBinding;
132 FBindings: TFPList;
133 FBindingStack: array of TBinding;
134 FPrefixes: THashTable;
135 FDefaultPrefix: THashItem;
136 public
137 constructor Create;
138 destructor Destroy; override;
139 procedure DefineBinding(const Prefix, nsURI: TXMLUtilString; out Binding: TBinding);
CheckAttributenull140 function CheckAttribute(const Prefix, nsURI: TXMLUtilString;
141 out Binding: TBinding): TAttributeAction;
IsPrefixBoundnull142 function IsPrefixBound(P: PXMLUtilChar; Len: Integer; out Prefix: PHashItem): Boolean;
GetPrefixnull143 function GetPrefix(P: PXMLUtilChar; Len: Integer): PHashItem;
BindPrefixnull144 function BindPrefix(const nsURI: TXMLUtilString; aPrefix: PHashItem): TBinding;
DefaultNSBindingnull145 function DefaultNSBinding: TBinding;
146 procedure StartElement;
147 procedure EndElement;
148 end;
149
150 {$i laz2_names.inc}
151
152 implementation
153
154 var
155 Xml11Pg: PByteArray = nil;
156
Xml11NamePagesnull157 function Xml11NamePages: PByteArray;
158 var
159 I: Integer;
160 p: PByteArray;
161 begin
162 if Xml11Pg = nil then
163 begin
164 GetMem(p, 512);
165 for I := 0 to 255 do
166 p^[I] := ord(Byte(I) in Xml11HighPages);
167 p^[0] := 2;
168 p^[3] := $2c;
169 p^[$20] := $2a;
170 p^[$21] := $2b;
171 p^[$2f] := $29;
172 p^[$30] := $2d;
173 p^[$fd] := $28;
174 p^[$ff] := $30;
175
176 Move(p^, p^[256], 256);
177 p^[$100] := $19;
178 p^[$103] := $2E;
179 p^[$120] := $2F;
180 Xml11Pg := p;
181 end;
182 Result := Xml11Pg;
183 end;
184
IsXml11Charnull185 function IsXml11Char(Value: PXMLUtilChar; var Index: Integer): Boolean; overload;
186 begin
187 if (Value[Index] >= #$D800) and (Value[Index] <= #$DB7F) then
188 begin
189 Inc(Index);
190 Result := (Value[Index] >= #$DC00) and (Value[Index] <= #$DFFF);
191 end
192 else
193 Result := False;
194 end;
195
IsXml11Charnull196 function IsXml11Char(const Value: TXMLUtilString; var Index: Integer): Boolean; overload;
197 begin
198 if (Value[Index] >= #$D800) and (Value[Index] <= #$DB7F) then
199 begin
200 Inc(Index);
201 Result := (Value[Index] >= #$DC00) and (Value[Index] <= #$DFFF);
202 end
203 else
204 Result := False;
205 end;
206
IsXmlNamenull207 function IsXmlName(const Value: TXMLUtilString; Xml11: Boolean): Boolean;
208 begin
209 Result := IsXmlName(PXMLUtilChar(Value), Length(Value), Xml11);
210 end;
211
IsXmlNamenull212 function IsXmlName(Value: PXMLUtilChar; Len: Integer; Xml11: Boolean = False): Boolean;
213 var
214 Pages: PByteArray;
215 I: Integer;
216 begin
217 Result := False;
218 if Xml11 then
219 Pages := Xml11NamePages
220 else
221 Pages := @NamePages;
222
223 I := 0;
224 if (Len = 0) or not ((Byte(Value[I]) in NamingBitmap[Pages^[hi(Word(Value[I]))]]) or
225 (Value[I] = ':') or
226 (Xml11 and IsXml11Char(Value, I))) then
227 Exit;
228 Inc(I);
229 while I < Len do
230 begin
231 if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
232 (Value[I] = ':') or
233 (Xml11 and IsXml11Char(Value, I))) then
234 Exit;
235 Inc(I);
236 end;
237 Result := True;
238 end;
239
IsXmlNamesnull240 function IsXmlNames(const Value: TXMLUtilString; Xml11: Boolean): Boolean;
241 var
242 Pages: PByteArray;
243 I: Integer;
244 Offset: Integer;
245 begin
246 if Xml11 then
247 Pages := Xml11NamePages
248 else
249 Pages := @NamePages;
250 Result := False;
251 if Value = '' then
252 Exit;
253 I := 1;
254 Offset := 0;
255 while I <= Length(Value) do
256 begin
257 if not ((Byte(Value[I]) in NamingBitmap[Pages^[Offset+hi(Word(Value[I]))]]) or
258 (Value[I] = ':') or
259 (Xml11 and IsXml11Char(Value, I))) then
260 begin
261 if (I = Length(Value)) or (Value[I] <> #32) then
262 Exit;
263 Offset := 0;
264 Inc(I);
265 Continue;
266 end;
267 Offset := $100;
268 Inc(I);
269 end;
270 Result := True;
271 end;
272
IsXmlNmTokennull273 function IsXmlNmToken(const Value: TXMLUtilString; Xml11: Boolean): Boolean;
274 var
275 I: Integer;
276 Pages: PByteArray;
277 begin
278 if Xml11 then
279 Pages := Xml11NamePages
280 else
281 Pages := @NamePages;
282 Result := False;
283 if Value = '' then
284 Exit;
285 I := 1;
286 while I <= Length(Value) do
287 begin
288 if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
289 (Value[I] = ':') or
290 (Xml11 and IsXml11Char(Value, I))) then
291 Exit;
292 Inc(I);
293 end;
294 Result := True;
295 end;
296
IsXmlNmTokensnull297 function IsXmlNmTokens(const Value: TXMLUtilString; Xml11: Boolean): Boolean;
298 var
299 I: Integer;
300 Pages: PByteArray;
301 begin
302 if Xml11 then
303 Pages := Xml11NamePages
304 else
305 Pages := @NamePages;
306 I := 1;
307 Result := False;
308 if Value = '' then
309 Exit;
310 while I <= Length(Value) do
311 begin
312 if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
313 (Value[I] = ':') or
314 (Xml11 and IsXml11Char(Value, I))) then
315 begin
316 if (I = Length(Value)) or (Value[I] <> #32) then
317 Exit;
318 end;
319 Inc(I);
320 end;
321 Result := True;
322 end;
323
IsValidXmlEncodingnull324 function IsValidXmlEncoding(const Value: TXMLUtilString): Boolean;
325 var
326 I: Integer;
327 begin
328 Result := False;
329 if (Value = '') or (Value[1] > #255) or not (char(ord(Value[1])) in ['A'..'Z', 'a'..'z']) then
330 Exit;
331 for I := 2 to Length(Value) do
332 if (Value[I] > #255) or not (char(ord(Value[I])) in ['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']) then
333 Exit;
334 Result := True;
335 end;
336
337 procedure NormalizeSpaces(var Value: TXMLUtilString);
338 var
339 I, J: Integer;
340 begin
341 I := Length(Value);
342 // speed: trim only whed needed
343 if (I > 0) and ((Value[1] = #32) or (Value[I] = #32)) then
344 Value := Trim(Value);
345 I := 1;
346 while I < Length(Value) do
347 begin
348 if Value[I] = #32 then
349 begin
350 J := I+1;
351 while (J <= Length(Value)) and (Value[J] = #32) do Inc(J);
352 if J-I > 1 then Delete(Value, I+1, J-I-1);
353 end;
354 Inc(I);
355 end;
356 end;
357
IsXmlWhiteSpacenull358 function IsXmlWhiteSpace(c: PXMLUtilChar): Boolean;
359 begin
360 Result := c^ in [#32,#9,#10,#13];
361 end;
362
XUStrLICompnull363 function XUStrLIComp(S1, S2: PXMLUtilChar; Len: Integer): Integer;
364 var
365 counter: Integer;
366 c1, c2: Word;
367 begin
368 counter := 0;
369 result := 0;
370 if Len = 0 then
371 exit;
372 repeat
373 c1 := ord(S1[counter]);
374 c2 := ord(S2[counter]);
375 if (c1 = 0) or (c2 = 0) then break;
376 if c1 <> c2 then
377 begin
378 if c1 in [97..122] then
379 Dec(c1, 32);
380 if c2 in [97..122] then
381 Dec(c2, 32);
382 if c1 <> c2 then
383 Break;
384 end;
385 Inc(counter);
386 until counter >= Len;
387 result := c1 - c2;
388 end;
389
390 procedure TranslateUTF8Chars(var s: TXMLUtilString; SrcChars, DstChars: string);
391 { replaces characters in s.
392 The mapping is defined by SrcChars and DstChars.
393 The n-th UTF-8 character of SrcChars will be replaced with the n-th
394 character of DstChars. If there is no n-th character in DstChars then the
395 character will be deleted in s.
396 }
397 type
398 TItem = packed record
399 SrcLen: byte;
400 Src: array[0..4] of char;
401 DstLen: byte;
402 Dst: array[0..4] of char;
403 end;
404 PItem = ^TItem;
405
406 var
407 unique: boolean;
408
IsASCIInull409 function IsASCII(const h: string): boolean;
410 var
411 i: Integer;
412 begin
413 for i:=1 to length(h) do
414 if ord(h[i])>=128 then exit(false);
415 Result:=true;
416 end;
417
418 procedure UniqString(var p: PChar); inline;
419 var
420 OldPos: SizeInt;
421 begin
422 if unique then exit;
423 unique:=true;
424 OldPos:=p-PChar(s);
425 UniqueString(s);
426 p:=PChar(s)+OldPos;
427 end;
428
429 procedure ReplaceASCIIWithDelete(Src: PChar);
430 var
431 c: Char;
432 Dst: PChar;
433 NewLen: SizeInt;
434 i: SizeInt;
435 begin
436 UniqString(Src);
437 Dst:=Src;
438 while true do begin
439 c:=Src^;
440 if (c=#0) and (Src-PChar(s)=length(s)) then break;
441 i:=Pos(c,SrcChars);
442 if i<1 then begin
443 // keep character
444 Dst^:=c;
445 inc(Src);
446 inc(Dst);
447 end else begin
448 if i<=length(DstChars) then begin
449 // replace a character
450 Dst^:=DstChars[i];
451 inc(Src);
452 inc(Dst);
453 end else begin
454 // delete a character = skip
455 inc(Src);
456 end;
457 end;
458 end;
459 NewLen:=Dst-PChar(s);
460 SetLength(s,NewLen);
461 end;
462
463 procedure ReplaceASCII;
464 // use a simple byte replace
465 // if a delete is needed then switch to another algorithm
466 var
467 i: SizeInt;
468 p: PChar;
469 c: Char;
470 begin
471 p:=PChar(s);
472 while true do begin
473 c:=p^;
474 if (c=#0) and (p-PChar(s)=length(s)) then break;
475 i:=Pos(c,SrcChars);
476 if i<1 then begin
477 // keep character
478 inc(p);
479 end else begin
480 if i<=length(DstChars) then begin
481 // replace a character
482 UniqString(p);
483 p^:=DstChars[i];
484 inc(p);
485 end else begin
486 // delete a character
487 // all following characters are moved
488 // => use an optimized algorithm for this
489 ReplaceASCIIWithDelete(p);
490 exit;
491 end;
492 end;
493 end;
494 end;
495
496 procedure BuildMultiByteCompareArray(var List: PItem; var Count: SizeInt);
497 var
498 SrcP: PChar;
499 DstP: PChar;
500 Item: PItem;
501 i: Integer;
502 begin
503 Count:=UTF8Length(SrcChars);
504 GetMem(List,Count*SizeOf(TItem));
505 FillByte(List^,Count*SizeOf(TItem),0);
506 SrcP:=PChar(SrcChars);
507 DstP:=PChar(DstChars);
508 Item:=List;
509 for i:=1 to Count do begin
510 Item^.SrcLen:=UTF8CodepointSize(SrcP);
511 Move(SrcP^,Item^.Src[0],Item^.SrcLen);
512 if (DstP^<>#0) or (DstP-PChar(DstChars)<length(DstChars)) then begin
513 Item^.DstLen:=UTF8CodepointSize(DstP);
514 Move(DstP^,Item^.Dst[0],Item^.DstLen);
515 end;
516 inc(Item);
517 inc(SrcP,UTF8CodepointSize(SrcP));
518 inc(DstP,UTF8CodepointSize(DstP));
519 end;
520 end;
521
FindItemnull522 function FindItem(var List: PItem; var ListLen: SizeInt; p: PChar; clen: integer): PItem; inline;
523 // Search p in list
524 var
525 Item: PItem;
526 i: SizeInt;
527 c: Char;
528 j: Integer;
529 begin
530 if List=nil then
531 BuildMultiByteCompareArray(List,ListLen);
532 Item:=List;
533 c:=p^;
534 for i:=0 to ListLen-1 do begin
535 if (Item^.SrcLen=clen)
536 and (Item^.Src[0]=c) then begin
537 j:=1;
538 while true do begin
539 if (Item^.Src[j]=#0) then
540 exit(Item);
541 if (Item^.Src[j]<>p[j]) then break;
542 inc(j);
543 end;
544 end;
545 inc(Item);
546 end;
547 Result:=nil;
548 end;
549
550 procedure ReplaceMultiByteWithResize(List: PItem; ListLen: SizeInt; Src: PChar);
551 var
552 c: Char;
553 clen: Integer;
554 NewSIndex, i: SizeInt;
555 Item: PItem;
556 NewS: string;
557 NewSP: PChar;
558 NewCharLen: Integer;
559 NewCharP: PChar;
560 begin
561 SetLength(NewS,length(s));
562 NewSIndex:=Src-PChar(s)+1;
563 if NewSIndex>1 then
564 Move(s[1],NewS[1],NewSIndex-1);
565 while true do begin
566 c:=Src^;
567 if (c=#0) and (Src-PChar(s)=length(s)) then break;
568 clen:=UTF8CodepointSize(Src);
569 NewCharP:=Src;
570 NewCharLen:=clen;
571 // do a quick test via Pos
572 i:=Pos(c,SrcChars);
573 if i>0 then begin
574 // quick test positive, now search correctly
575 Item:=FindItem(List,ListLen,Src,clen);
576 if Item<>nil then begin
577 // replace
578 NewCharP:=@Item^.Dst[0];
579 NewCharLen:=Item^.DstLen;
580 end;
581 end;
582 inc(Src,clen);
583 if NewSIndex+NewCharLen-1>length(NewS) then begin
584 // need more space => grow
585 SetLength(NewS,NewSIndex+((length(NewS)-NewSIndex-NewCharLen)*3 div 2)+2);
586 end;
587 // copy character
588 NewSP:=@NewS[NewSIndex];
589 for i:=1 to NewCharLen do begin
590 NewSP^:=NewCharP^;
591 inc(NewCharP);
592 inc(NewSP);
593 end;
594 inc(NewSIndex,NewCharLen);
595 end;
596 s:=LeftStr(NewS,NewSIndex-1);
597 end;
598
599 procedure ReplaceMultiByte;
600 var
601 p: PChar;
602 clen: Integer;
603 c: Char;
604 i: SizeInt;
605 List: PItem;
606 ListLen: SizeInt;
607 Item: PItem;
608 begin
609 p:=PChar(s);
610 List:=nil;
611 ListLen:=0;
612 try
613 while true do begin
614 c:=p^;
615 if (c=#0) and (p-PChar(s)=length(s)) then break;
616 clen:=UTF8CodepointSize(p);
617 // do a quick test via Pos
618 i:=Pos(c,SrcChars);
619 if i>0 then begin
620 // quick test positive, now search correctly
621 Item:=FindItem(List,ListLen,p,clen);
622 if Item<>nil then begin
623 // replace
624 if Item^.DstLen=clen then begin
625 // simple replace
626 UniqString(p);
627 Move(Item^.Dst[0],p^,clen);
628 end else begin
629 // replace with different size
630 // all following characters are moved
631 // => use an optimized algorithm for this
632 ReplaceMultiByteWithResize(List,ListLen,p);
633 exit;
634 end;
635 end;
636 end;
637 inc(p,clen);
638 end;
639 finally
640 if List<>nil then Freemem(List);
641 end;
642 end;
643
644 begin
645 if (SrcChars='') or (s='') or (SrcChars=DstChars) then exit;
646
647 unique:=false;
648 if IsASCII(SrcChars) and IsASCII(DstChars) then begin
649 // search and replace single byte characters
650 ReplaceASCII;
651 exit;
652 end;
653 // search for multi byte UTF-8 characters
654 ReplaceMultiByte;
655 end;
656
Hashnull657 function Hash(InitValue: LongWord; Key: PXMLUtilChar; KeyLen: Integer): LongWord;
658 begin
659 Result := InitValue;
660 while KeyLen <> 0 do
661 begin
662 {$ifdef overflow_check}{$q-}{$endif}
663 Result := Result * $F4243 xor ord(Key^);
664 {$ifdef overflow_check}{$q+}{$endif}
665 Inc(Key);
666 Dec(KeyLen);
667 end;
668 end;
669
KeyComparenull670 function KeyCompare(const Key1: TXMLUtilString; Key2: Pointer; Key2Len: Integer): Boolean;
671 begin
672 {$IF defined(FPC) and (SizeOf(TXMLUtilChar)=2)}
673 Result := (Length(Key1)=Key2Len) and (CompareWord(Pointer(Key1)^, Key2^, Key2Len) = 0);
674 {$ELSE}
675 Result := (Length(Key1)=Key2Len) and CompareMem(Pointer(Key1), Key2, Key2Len*SizeOf(TXMLUtilChar));
676 {$ENDIF}
677 end;
678
679 { THashTable }
680
681 constructor THashTable.Create(InitSize: Integer; OwnObjects: Boolean);
682 var
683 I: Integer;
684 begin
685 inherited Create;
686 FOwnsObjects := OwnObjects;
687 I := 256;
688 while I < InitSize do I := I shl 1;
689 FBucketCount := I;
690 FBucket := AllocMem(I * sizeof(PHashItem));
691 end;
692
693 destructor THashTable.Destroy;
694 begin
695 Clear;
696 FreeMem(FBucket);
697 inherited Destroy;
698 end;
699
700 procedure THashTable.Clear;
701 var
702 I: Integer;
703 item, next: PHashItem;
704 begin
705 for I := 0 to FBucketCount-1 do
706 begin
707 item := FBucket^[I];
708 while Assigned(item) do
709 begin
710 next := item^.Next;
711 if FOwnsObjects then
712 item^.Data.Free;
713 Dispose(item);
714 item := next;
715 end;
716 FBucket^[I] := nil;
717 end;
718 end;
719
THashTable.Findnull720 function THashTable.Find(Key: PXMLUtilChar; KeyLen: Integer): PHashItem;
721 var
722 Dummy: Boolean;
723 begin
724 Result := Lookup(Key, KeyLen, Dummy, False);
725 end;
726
FindOrAddnull727 function THashTable.FindOrAdd(Key: PXMLUtilChar; KeyLen: Integer;
728 out Found: Boolean): PHashItem;
729 begin
730 Result := Lookup(Key, KeyLen, Found, True);
731 end;
732
FindOrAddnull733 function THashTable.FindOrAdd(Key: PXMLUtilChar; KeyLen: Integer): PHashItem;
734 var
735 Dummy: Boolean;
736 begin
737 Result := Lookup(Key, KeyLen, Dummy, True);
738 end;
739
Getnull740 function THashTable.Get(Key: PXMLUtilChar; KeyLen: Integer): TObject;
741 var
742 e: PHashItem;
743 Dummy: Boolean;
744 begin
745 e := Lookup(Key, KeyLen, Dummy, False);
746 if Assigned(e) then
747 Result := e^.Data
748 else
749 Result := nil;
750 end;
751
Lookupnull752 function THashTable.Lookup(Key: PXMLUtilChar; KeyLength: Integer;
753 out Found: Boolean; CanCreate: Boolean): PHashItem;
754 var
755 Entry: PPHashItem;
756 h: LongWord;
757 begin
758 h := Hash(0, Key, KeyLength);
759 Entry := @FBucket^[h mod FBucketCount];
760 while Assigned(Entry^) and not ((Entry^^.HashValue = h) and KeyCompare(Entry^^.Key, Key, KeyLength) ) do
761 Entry := @Entry^^.Next;
762 Found := Assigned(Entry^);
763 if Found or (not CanCreate) then
764 begin
765 Result := Entry^;
766 Exit;
767 end;
768 if FCount > ((FBucketCount*7) div 8) then
769 begin
770 Resize(FBucketCount * 2);
771 Result := Lookup(Key, KeyLength, Found, CanCreate);
772 end
773 else
774 begin
775 New(Result);
776 // SetString for TXMLUtilStrings trims on zero chars [fixed, #14740]
777 SetLength(Result^.Key, KeyLength);
778 Move(Key^, Pointer(Result^.Key)^, KeyLength*sizeof(TXMLUtilChar));
779 Result^.HashValue := h;
780 Result^.Data := nil;
781 Result^.Next := nil;
782 Inc(FCount);
783 Entry^ := Result;
784 end;
785 end;
786
787 procedure THashTable.Resize(NewCapacity: LongWord);
788 var
789 p: PHashItemArray;
790 chain: PPHashItem;
791 i: Integer;
792 e, n: PHashItem;
793 begin
794 p := AllocMem(NewCapacity * sizeof(PHashItem));
795 for i := 0 to FBucketCount-1 do
796 begin
797 e := FBucket^[i];
798 while Assigned(e) do
799 begin
800 chain := @p^[e^.HashValue mod NewCapacity];
801 n := e^.Next;
802 e^.Next := chain^;
803 chain^ := e;
804 e := n;
805 end;
806 end;
807 FBucketCount := NewCapacity;
808 FreeMem(FBucket);
809 FBucket := p;
810 end;
811
THashTable.Removenull812 function THashTable.Remove(Entry: PHashItem): Boolean;
813 var
814 chain: PPHashItem;
815 begin
816 chain := @FBucket^[Entry^.HashValue mod FBucketCount];
817 while Assigned(chain^) do
818 begin
819 if chain^ = Entry then
820 begin
821 chain^ := Entry^.Next;
822 if FOwnsObjects then
823 Entry^.Data.Free;
824 Dispose(Entry);
825 Dec(FCount);
826 Result := True;
827 Exit;
828 end;
829 chain := @chain^^.Next;
830 end;
831 Result := False;
832 end;
833
834 // this does not free the aData object
THashTable.RemoveDatanull835 function THashTable.RemoveData(aData: TObject): Boolean;
836 var
837 i: Integer;
838 chain: PPHashItem;
839 e: PHashItem;
840 begin
841 for i := 0 to FBucketCount-1 do
842 begin
843 chain := @FBucket^[i];
844 while Assigned(chain^) do
845 begin
846 if chain^^.Data = aData then
847 begin
848 e := chain^;
849 chain^ := e^.Next;
850 Dispose(e);
851 Dec(FCount);
852 Result := True;
853 Exit;
854 end;
855 chain := @chain^^.Next;
856 end;
857 end;
858 Result := False;
859 end;
860
861 procedure THashTable.ForEach(proc: THashForEach; arg: Pointer);
862 var
863 i: Integer;
864 e: PHashItem;
865 begin
866 for i := 0 to FBucketCount-1 do
867 begin
868 e := FBucket^[i];
869 while Assigned(e) do
870 begin
871 if not proc(e, arg) then
872 Exit;
873 e := e^.Next;
874 end;
875 end;
876 end;
877
878 { TDblHashArray }
879
880 destructor TDblHashArray.Destroy;
881 begin
882 FreeMem(FData);
883 inherited Destroy;
884 end;
885
886 procedure TDblHashArray.Init(NumSlots: Integer);
887 var
888 i: Integer;
889 begin
890 if ((NumSlots * 2) shr FSizeLog) <> 0 then // need at least twice more entries, and no less than 8
891 begin
892 FSizeLog := 3;
893 while (NumSlots shr FSizeLog) <> 0 do
894 Inc(FSizeLog);
895 ReallocMem(FData, (1 shl FSizeLog) * sizeof(TExpHashEntry));
896 FRevision := 0;
897 end;
898 if FRevision = 0 then
899 begin
900 FRevision := $FFFFFFFF;
901 for i := (1 shl FSizeLog)-1 downto 0 do
902 FData^[i].rev := FRevision;
903 end;
904 Dec(FRevision);
905 end;
906
Locatenull907 function TDblHashArray.Locate(uri: PXMLUtilString; localName: PXMLUtilChar; localLength: Integer): Boolean;
908 var
909 step: Byte;
910 mask: LongWord;
911 idx: Integer;
912 HashValue: LongWord;
913 begin
914 HashValue := Hash(0, PXMLUtilChar(uri^), Length(uri^));
915 HashValue := Hash(HashValue, localName, localLength);
916
917 mask := (1 shl FSizeLog) - 1;
918 step := (HashValue and (not mask)) shr (FSizeLog-1) and (mask shr 2) or 1;
919 idx := HashValue and mask;
920 result := True;
921 while FData^[idx].rev = FRevision do
922 begin
923 if (HashValue = FData^[idx].hash) and (FData^[idx].uriPtr^ = uri^) and
924 (FData^[idx].lnameLen = localLength) and
925 CompareMem(FData^[idx].lname, localName, localLength * sizeof(TXMLUtilChar)) then
926 Exit;
927 if idx < step then
928 Inc(idx, (1 shl FSizeLog) - step)
929 else
930 Dec(idx, step);
931 end;
932 with FData^[idx] do
933 begin
934 rev := FRevision;
935 hash := HashValue;
936 uriPtr := uri;
937 lname := localName;
938 lnameLen := localLength;
939 end;
940 result := False;
941 end;
942
943 { TNSSupport }
944
945 constructor TNSSupport.Create;
946 var
947 b: TBinding;
948 begin
949 inherited Create;
950 FPrefixes := THashTable.Create(16, False);
951 FBindings := TFPList.Create;
952 SetLength(FBindingStack, 16);
953
954 { provide implicit binding for the 'xml' prefix }
955 // TODO: move stduri_xml, etc. to this unit, so they are reused.
956 DefineBinding('xml', 'http://www.w3.org/XML/1998/namespace', b);
957 end;
958
959 destructor TNSSupport.Destroy;
960 var
961 I: Integer;
962 begin
963 for I := FBindings.Count-1 downto 0 do
964 TObject(FBindings.List^[I]).Free;
965 FBindings.Free;
966 FPrefixes.Free;
967 inherited Destroy;
968 end;
969
BindPrefixnull970 function TNSSupport.BindPrefix(const nsURI: TXMLUtilString; aPrefix: PHashItem): TBinding;
971 begin
972 { try to reuse an existing binding }
973 result := FFreeBindings;
974 if Assigned(result) then
975 FFreeBindings := result.Next
976 else { no free bindings, create a new one }
977 begin
978 result := TBinding.Create;
979 FBindings.Add(result);
980 end;
981
982 { link it into chain of bindings at the current element level }
983 result.Next := FBindingStack[FNesting];
984 FBindingStack[FNesting] := result;
985
986 { bind }
987 result.uri := nsURI;
988 result.Prefix := aPrefix;
989 result.PrevPrefixBinding := aPrefix^.Data;
990 aPrefix^.Data := result;
991 end;
992
TNSSupport.DefaultNSBindingnull993 function TNSSupport.DefaultNSBinding: TBinding;
994 begin
995 result := TBinding(FDefaultPrefix.Data);
996 end;
997
998 procedure TNSSupport.DefineBinding(const Prefix, nsURI: TXMLUtilString;
999 out Binding: TBinding);
1000 var
1001 Pfx: PHashItem;
1002 begin
1003 Pfx := @FDefaultPrefix;
1004 if (nsURI <> '') and (Prefix <> '') then
1005 Pfx := FPrefixes.FindOrAdd(PXMLUtilChar(Prefix), Length(Prefix));
1006 if (Pfx^.Data = nil) or (TBinding(Pfx^.Data).uri <> nsURI) then
1007 Binding := BindPrefix(nsURI, Pfx)
1008 else
1009 Binding := nil;
1010 end;
1011
TNSSupport.CheckAttributenull1012 function TNSSupport.CheckAttribute(const Prefix, nsURI: TXMLUtilString;
1013 out Binding: TBinding): TAttributeAction;
1014 var
1015 Pfx: PHashItem;
1016 I: Integer;
1017 b: TBinding;
1018 buf: array[0..31] of TXMLUtilChar;
1019 p: PXMLUtilChar;
1020 begin
1021 Binding := nil;
1022 Pfx := nil;
1023 Result := aaUnchanged;
1024 if Prefix <> '' then
1025 Pfx := FPrefixes.FindOrAdd(PXMLUtilChar(Prefix), Length(Prefix))
1026 else if nsURI = '' then
1027 Exit;
1028 { if the prefix is already bound to correct URI, we're done }
1029 if Assigned(Pfx) and Assigned(Pfx^.Data) and (TBinding(Pfx^.Data).uri = nsURI) then
1030 Exit;
1031
1032 { see if there's another prefix bound to the target URI }
1033 // TODO: should use something faster than linear search
1034 for i := FNesting downto 0 do
1035 begin
1036 b := FBindingStack[i];
1037 while Assigned(b) do
1038 begin
1039 if (b.uri = nsURI) and (b.Prefix <> @FDefaultPrefix) then
1040 begin
1041 Binding := b; // found one -> override the attribute's prefix
1042 Result := aaPrefix;
1043 Exit;
1044 end;
1045 b := b.Next;
1046 end;
1047 end;
1048 { no prefix, or bound (to wrong URI) -> use generated prefix instead }
1049 if (Pfx = nil) or Assigned(Pfx^.Data) then
1050 repeat
1051 Inc(FPrefixSeqNo);
1052 i := FPrefixSeqNo; // This is just 'NS'+IntToStr(FPrefixSeqNo);
1053 p := @Buf[high(Buf)]; // done without using strings
1054 while i <> 0 do
1055 begin
1056 p^ := TXMLUtilChar(i mod 10+ord('0'));
1057 dec(p);
1058 i := i div 10;
1059 end;
1060 p^ := 'S'; dec(p);
1061 p^ := 'N';
1062 Pfx := FPrefixes.FindOrAdd(p, @Buf[high(Buf)]-p+1);
1063 until Pfx^.Data = nil;
1064 Binding := BindPrefix(nsURI, Pfx);
1065 Result := aaBoth;
1066 end;
1067
IsPrefixBoundnull1068 function TNSSupport.IsPrefixBound(P: PXMLUtilChar; Len: Integer; out
1069 Prefix: PHashItem): Boolean;
1070 begin
1071 Prefix := FPrefixes.FindOrAdd(P, Len);
1072 Result := Assigned(Prefix^.Data) and (TBinding(Prefix^.Data).uri <> '');
1073 end;
1074
GetPrefixnull1075 function TNSSupport.GetPrefix(P: PXMLUtilChar; Len: Integer): PHashItem;
1076 begin
1077 if Assigned(P) and (Len > 0) then
1078 Result := FPrefixes.FindOrAdd(P, Len)
1079 else
1080 Result := @FDefaultPrefix;
1081 end;
1082
1083 procedure TNSSupport.StartElement;
1084 begin
1085 Inc(FNesting);
1086 if FNesting >= Length(FBindingStack) then
1087 SetLength(FBindingStack, FNesting * 2);
1088 end;
1089
1090 procedure TNSSupport.EndElement;
1091 var
1092 b, temp: TBinding;
1093 begin
1094 temp := FBindingStack[FNesting];
1095 while Assigned(temp) do
1096 begin
1097 b := temp;
1098 temp := b.next;
1099 b.next := FFreeBindings;
1100 FFreeBindings := b;
1101 b.Prefix^.Data := b.prevPrefixBinding;
1102 end;
1103 FBindingStack[FNesting] := nil;
1104 if FNesting > 0 then
1105 Dec(FNesting);
1106 end;
1107
1108
1109 initialization
1110
1111 finalization
1112 if Assigned(Xml11Pg) then
1113 FreeMem(Xml11Pg);
1114
1115 end.
1116