1 {
2  **********************************************************************
3   This file is part of LazUtils.
4   It is copied and modified from a file in Free Component Library.
5 
6   See the file COPYING.FPC, included in this distribution,
7   for details about the license.
8  **********************************************************************
9 
10   XML utility routines.
11   Copyright (c) 2006 by Sergei Gorelkin, sergei_gorelkin@mail.ru
12 
13 }
14 unit laz2_xmlutils;
15 
16 {$ifdef fpc}{$mode objfpc}{$endif}
17 {$H+}
18 {$ifopt Q+}{$define overflow_check}{$endif}
19 {$R-}
20 
21 interface
22 
23 uses
24   SysUtils, Classes, LazUTF8;
25 
26 type
27   TXMLUtilString = AnsiString;
28   TXMLUtilChar = Char;
29   PXMLUtilChar = PChar;
30   PXMLUtilString = ^TXMLUtilString;
31 
IsXmlNamenull32 function IsXmlName(const Value: TXMLUtilString; Xml11: Boolean = False): Boolean; overload;
IsXmlNamenull33 function IsXmlName(Value: PXMLUtilChar; Len: Integer; Xml11: Boolean = False): Boolean; overload;
IsXmlNamesnull34 function IsXmlNames(const Value: TXMLUtilString; Xml11: Boolean = False): Boolean;
IsXmlNmTokennull35 function IsXmlNmToken(const Value: TXMLUtilString; Xml11: Boolean = False): Boolean;
IsXmlNmTokensnull36 function IsXmlNmTokens(const Value: TXMLUtilString; Xml11: Boolean = False): Boolean;
IsValidXmlEncodingnull37 function IsValidXmlEncoding(const Value: TXMLUtilString): Boolean;
Xml11NamePagesnull38 function Xml11NamePages: PByteArray;
39 procedure NormalizeSpaces(var Value: TXMLUtilString);
IsXmlWhiteSpacenull40 function IsXmlWhiteSpace(c: PXMLUtilChar): Boolean;
Hashnull41 function Hash(InitValue: LongWord; Key: PXMLUtilChar; KeyLen: Integer): LongWord;
42 { beware, works in ASCII range only }
XUStrLICompnull43 function XUStrLIComp(S1, S2: PXMLUtilChar; Len: Integer): Integer;
44 
45 procedure TranslateUTF8Chars(var s: TXMLUtilString; SrcChars, DstChars: string);
46 
47 { a simple hash table with TXMLUtilString keys }
48 
49 type
50 {$ifndef fpc}
51   PtrInt = LongInt;
52   TFPList = TList;
53 {$endif}
54 
55   PPHashItem = ^PHashItem;
56   PHashItem = ^THashItem;
57   THashItem = record
58     Key: TXMLUtilString;
59     HashValue: LongWord;
60     Next: PHashItem;
61     Data: TObject;
62   end;
63   THashItemArray = array[0..MaxInt div sizeof(Pointer)-1] of PHashItem;
64   PHashItemArray = ^THashItemArray;
65 
ntrynull66   THashForEach = function(Entry: PHashItem; arg: Pointer): Boolean;
67 
68   THashTable = class(TObject)
69   private
70     FCount: LongWord;
71     FBucketCount: LongWord;
72     FBucket: PHashItemArray;
73     FOwnsObjects: Boolean;
Lookupnull74     function Lookup(Key: PXMLUtilChar; KeyLength: Integer; out Found: Boolean; CanCreate: Boolean): PHashItem;
75     procedure Resize(NewCapacity: LongWord);
76   public
77     constructor Create(InitSize: Integer; OwnObjects: Boolean);
78     destructor Destroy; override;
79     procedure Clear;
Findnull80     function Find(Key: PXMLUtilChar; KeyLen: Integer): PHashItem;
FindOrAddnull81     function FindOrAdd(Key: PXMLUtilChar; KeyLen: Integer; out Found: Boolean): PHashItem; overload;
FindOrAddnull82     function FindOrAdd(Key: PXMLUtilChar; KeyLen: Integer): PHashItem; overload;
Getnull83     function Get(Key: PXMLUtilChar; KeyLen: Integer): TObject;
Removenull84     function Remove(Entry: PHashItem): Boolean;
RemoveDatanull85     function RemoveData(aData: TObject): Boolean;
86     procedure ForEach(proc: THashForEach; arg: Pointer);
87     property Count: LongWord read FCount;
88   end;
89 
90 { another hash, for detecting duplicate namespaced attributes without memory allocations }
91 
92   TExpHashEntry = record
93     rev: LongWord;
94     hash: LongWord;
95     uriPtr: PXMLUtilString;
96     lname: PXMLUtilChar;
97     lnameLen: Integer;
98   end;
99   TExpHashEntryArray = array[0..MaxInt div sizeof(TExpHashEntry)-1] of TExpHashEntry;
100   PExpHashEntryArray = ^TExpHashEntryArray;
101 
102   TDblHashArray = class(TObject)
103   private
104     FSizeLog: Integer;
105     FRevision: LongWord;
106     FData: PExpHashEntryArray;
107   public
108     procedure Init(NumSlots: Integer);
Locatenull109     function Locate(uri: PXMLUtilString; localName: PXMLUtilChar; localLength: Integer): Boolean;
110     destructor Destroy; override;
111   end;
112 
113   TBinding = class
114   public
115     uri: TXMLUtilString;
116     next: TBinding;
117     prevPrefixBinding: TObject;
118     Prefix: PHashItem;
119   end;
120 
121   TAttributeAction = (
122     aaUnchanged,
123     aaPrefix,         // only override the prefix
124     aaBoth            // override prefix and emit namespace definition
125   );
126 
127   TNSSupport = class(TObject)
128   private
129     FNesting: Integer;
130     FPrefixSeqNo: Integer;
131     FFreeBindings: TBinding;
132     FBindings: TFPList;
133     FBindingStack: array of TBinding;
134     FPrefixes: THashTable;
135     FDefaultPrefix: THashItem;
136   public
137     constructor Create;
138     destructor Destroy; override;
139     procedure DefineBinding(const Prefix, nsURI: TXMLUtilString; out Binding: TBinding);
CheckAttributenull140     function CheckAttribute(const Prefix, nsURI: TXMLUtilString;
141       out Binding: TBinding): TAttributeAction;
IsPrefixBoundnull142     function IsPrefixBound(P: PXMLUtilChar; Len: Integer; out Prefix: PHashItem): Boolean;
GetPrefixnull143     function GetPrefix(P: PXMLUtilChar; Len: Integer): PHashItem;
BindPrefixnull144     function BindPrefix(const nsURI: TXMLUtilString; aPrefix: PHashItem): TBinding;
DefaultNSBindingnull145     function DefaultNSBinding: TBinding;
146     procedure StartElement;
147     procedure EndElement;
148   end;
149 
150 {$i laz2_names.inc}
151 
152 implementation
153 
154 var
155   Xml11Pg: PByteArray = nil;
156 
Xml11NamePagesnull157 function Xml11NamePages: PByteArray;
158 var
159   I: Integer;
160   p: PByteArray;
161 begin
162   if Xml11Pg = nil then
163   begin
164     GetMem(p, 512);
165     for I := 0 to 255 do
166       p^[I] := ord(Byte(I) in Xml11HighPages);
167     p^[0] := 2;
168     p^[3] := $2c;
169     p^[$20] := $2a;
170     p^[$21] := $2b;
171     p^[$2f] := $29;
172     p^[$30] := $2d;
173     p^[$fd] := $28;
174     p^[$ff] := $30;
175 
176     Move(p^, p^[256], 256);
177     p^[$100] := $19;
178     p^[$103] := $2E;
179     p^[$120] := $2F;
180     Xml11Pg := p;
181   end;
182   Result := Xml11Pg;
183 end;
184 
IsXml11Charnull185 function IsXml11Char(Value: PXMLUtilChar; var Index: Integer): Boolean; overload;
186 begin
187   if (Value[Index] >= #$D800) and (Value[Index] <= #$DB7F) then
188   begin
189     Inc(Index);
190     Result := (Value[Index] >= #$DC00) and (Value[Index] <= #$DFFF);
191   end
192   else
193     Result := False;
194 end;
195 
IsXml11Charnull196 function IsXml11Char(const Value: TXMLUtilString; var Index: Integer): Boolean; overload;
197 begin
198   if (Value[Index] >= #$D800) and (Value[Index] <= #$DB7F) then
199   begin
200     Inc(Index);
201     Result := (Value[Index] >= #$DC00) and (Value[Index] <= #$DFFF);
202   end
203   else
204     Result := False;
205 end;
206 
IsXmlNamenull207 function IsXmlName(const Value: TXMLUtilString; Xml11: Boolean): Boolean;
208 begin
209   Result := IsXmlName(PXMLUtilChar(Value), Length(Value), Xml11);
210 end;
211 
IsXmlNamenull212 function IsXmlName(Value: PXMLUtilChar; Len: Integer; Xml11: Boolean = False): Boolean;
213 var
214   Pages: PByteArray;
215   I: Integer;
216 begin
217   Result := False;
218   if Xml11 then
219     Pages := Xml11NamePages
220   else
221     Pages := @NamePages;
222 
223   I := 0;
224   if (Len = 0) or not ((Byte(Value[I]) in NamingBitmap[Pages^[hi(Word(Value[I]))]]) or
225     (Value[I] = ':') or
226     (Xml11 and IsXml11Char(Value, I))) then
227       Exit;
228   Inc(I);
229   while I < Len do
230   begin
231     if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
232       (Value[I] = ':') or
233       (Xml11 and IsXml11Char(Value, I))) then
234         Exit;
235     Inc(I);
236   end;
237   Result := True;
238 end;
239 
IsXmlNamesnull240 function IsXmlNames(const Value: TXMLUtilString; Xml11: Boolean): Boolean;
241 var
242   Pages: PByteArray;
243   I: Integer;
244   Offset: Integer;
245 begin
246   if Xml11 then
247     Pages := Xml11NamePages
248   else
249     Pages := @NamePages;
250   Result := False;
251   if Value = '' then
252     Exit;
253   I := 1;
254   Offset := 0;
255   while I <= Length(Value) do
256   begin
257     if not ((Byte(Value[I]) in NamingBitmap[Pages^[Offset+hi(Word(Value[I]))]]) or
258       (Value[I] = ':') or
259       (Xml11 and IsXml11Char(Value, I))) then
260     begin
261       if (I = Length(Value)) or (Value[I] <> #32) then
262         Exit;
263       Offset := 0;
264       Inc(I);
265       Continue;
266     end;
267     Offset := $100;
268     Inc(I);
269   end;
270   Result := True;
271 end;
272 
IsXmlNmTokennull273 function IsXmlNmToken(const Value: TXMLUtilString; Xml11: Boolean): Boolean;
274 var
275   I: Integer;
276   Pages: PByteArray;
277 begin
278   if Xml11 then
279     Pages := Xml11NamePages
280   else
281     Pages := @NamePages;
282   Result := False;
283   if Value = '' then
284     Exit;
285   I := 1;
286   while I <= Length(Value) do
287   begin
288     if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
289       (Value[I] = ':') or
290       (Xml11 and IsXml11Char(Value, I))) then
291         Exit;
292     Inc(I);
293   end;
294   Result := True;
295 end;
296 
IsXmlNmTokensnull297 function IsXmlNmTokens(const Value: TXMLUtilString; Xml11: Boolean): Boolean;
298 var
299   I: Integer;
300   Pages: PByteArray;
301 begin
302   if Xml11 then
303     Pages := Xml11NamePages
304   else
305     Pages := @NamePages;
306   I := 1;
307   Result := False;
308   if Value = '' then
309     Exit;
310   while I <= Length(Value) do
311   begin
312     if not ((Byte(Value[I]) in NamingBitmap[Pages^[$100+hi(Word(Value[I]))]]) or
313       (Value[I] = ':') or
314       (Xml11 and IsXml11Char(Value, I))) then
315     begin
316       if (I = Length(Value)) or (Value[I] <> #32) then
317         Exit;
318     end;
319     Inc(I);
320   end;
321   Result := True;
322 end;
323 
IsValidXmlEncodingnull324 function IsValidXmlEncoding(const Value: TXMLUtilString): Boolean;
325 var
326   I: Integer;
327 begin
328   Result := False;
329   if (Value = '') or (Value[1] > #255) or not (char(ord(Value[1])) in ['A'..'Z', 'a'..'z']) then
330     Exit;
331   for I := 2 to Length(Value) do
332     if (Value[I] > #255) or not (char(ord(Value[I])) in ['A'..'Z', 'a'..'z', '0'..'9', '.', '_', '-']) then
333       Exit;
334   Result := True;
335 end;
336 
337 procedure NormalizeSpaces(var Value: TXMLUtilString);
338 var
339   I, J: Integer;
340 begin
341   I := Length(Value);
342   // speed: trim only whed needed
343   if (I > 0) and ((Value[1] = #32) or (Value[I] = #32)) then
344     Value := Trim(Value);
345   I := 1;
346   while I < Length(Value) do
347   begin
348     if Value[I] = #32 then
349     begin
350       J := I+1;
351       while (J <= Length(Value)) and (Value[J] = #32) do Inc(J);
352       if J-I > 1 then Delete(Value, I+1, J-I-1);
353     end;
354     Inc(I);
355   end;
356 end;
357 
IsXmlWhiteSpacenull358 function IsXmlWhiteSpace(c: PXMLUtilChar): Boolean;
359 begin
360   Result := c^ in [#32,#9,#10,#13];
361 end;
362 
XUStrLICompnull363 function XUStrLIComp(S1, S2: PXMLUtilChar; Len: Integer): Integer;
364 var
365   counter: Integer;
366   c1, c2: Word;
367 begin
368   counter := 0;
369   result := 0;
370   if Len = 0 then
371     exit;
372   repeat
373     c1 := ord(S1[counter]);
374     c2 := ord(S2[counter]);
375     if (c1 = 0) or (c2 = 0) then break;
376     if c1 <> c2 then
377     begin
378       if c1 in [97..122] then
379         Dec(c1, 32);
380       if c2 in [97..122] then
381         Dec(c2, 32);
382       if c1 <> c2 then
383         Break;
384     end;
385     Inc(counter);
386   until counter >= Len;
387   result := c1 - c2;
388 end;
389 
390 procedure TranslateUTF8Chars(var s: TXMLUtilString; SrcChars, DstChars: string);
391 { replaces characters in s.
392   The mapping is defined by SrcChars and DstChars.
393   The n-th UTF-8 character of SrcChars will be replaced with the n-th
394   character of DstChars. If there is no n-th character in DstChars then the
395   character will be deleted in s.
396 }
397 type
398   TItem = packed record
399     SrcLen: byte;
400     Src: array[0..4] of char;
401     DstLen: byte;
402     Dst: array[0..4] of char;
403   end;
404   PItem = ^TItem;
405 
406 var
407   unique: boolean;
408 
IsASCIInull409   function IsASCII(const h: string): boolean;
410   var
411     i: Integer;
412   begin
413     for i:=1 to length(h) do
414       if ord(h[i])>=128 then exit(false);
415     Result:=true;
416   end;
417 
418   procedure UniqString(var p: PChar); inline;
419   var
420     OldPos: SizeInt;
421   begin
422     if unique then exit;
423     unique:=true;
424     OldPos:=p-PChar(s);
425     UniqueString(s);
426     p:=PChar(s)+OldPos;
427   end;
428 
429   procedure ReplaceASCIIWithDelete(Src: PChar);
430   var
431     c: Char;
432     Dst: PChar;
433     NewLen: SizeInt;
434     i: SizeInt;
435   begin
436     UniqString(Src);
437     Dst:=Src;
438     while true do begin
439       c:=Src^;
440       if (c=#0) and (Src-PChar(s)=length(s)) then break;
441       i:=Pos(c,SrcChars);
442       if i<1 then begin
443         // keep character
444         Dst^:=c;
445         inc(Src);
446         inc(Dst);
447       end else begin
448         if i<=length(DstChars) then begin
449           // replace a character
450           Dst^:=DstChars[i];
451           inc(Src);
452           inc(Dst);
453         end else begin
454           // delete a character = skip
455           inc(Src);
456         end;
457       end;
458     end;
459     NewLen:=Dst-PChar(s);
460     SetLength(s,NewLen);
461   end;
462 
463   procedure ReplaceASCII;
464   // use a simple byte replace
465   // if a delete is needed then switch to another algorithm
466   var
467     i: SizeInt;
468     p: PChar;
469     c: Char;
470   begin
471     p:=PChar(s);
472     while true do begin
473       c:=p^;
474       if (c=#0) and (p-PChar(s)=length(s)) then break;
475       i:=Pos(c,SrcChars);
476       if i<1 then begin
477         // keep character
478         inc(p);
479       end else begin
480         if i<=length(DstChars) then begin
481           // replace a character
482           UniqString(p);
483           p^:=DstChars[i];
484           inc(p);
485         end else begin
486           // delete a character
487           // all following characters are moved
488           // => use an optimized algorithm for this
489           ReplaceASCIIWithDelete(p);
490           exit;
491         end;
492       end;
493     end;
494   end;
495 
496   procedure BuildMultiByteCompareArray(var List: PItem; var Count: SizeInt);
497   var
498     SrcP: PChar;
499     DstP: PChar;
500     Item: PItem;
501     i: Integer;
502   begin
503     Count:=UTF8Length(SrcChars);
504     GetMem(List,Count*SizeOf(TItem));
505     FillByte(List^,Count*SizeOf(TItem),0);
506     SrcP:=PChar(SrcChars);
507     DstP:=PChar(DstChars);
508     Item:=List;
509     for i:=1 to Count do begin
510       Item^.SrcLen:=UTF8CodepointSize(SrcP);
511       Move(SrcP^,Item^.Src[0],Item^.SrcLen);
512       if (DstP^<>#0) or (DstP-PChar(DstChars)<length(DstChars)) then begin
513         Item^.DstLen:=UTF8CodepointSize(DstP);
514         Move(DstP^,Item^.Dst[0],Item^.DstLen);
515       end;
516       inc(Item);
517       inc(SrcP,UTF8CodepointSize(SrcP));
518       inc(DstP,UTF8CodepointSize(DstP));
519     end;
520   end;
521 
FindItemnull522   function FindItem(var List: PItem; var ListLen: SizeInt; p: PChar; clen: integer): PItem; inline;
523   // Search p in list
524   var
525     Item: PItem;
526     i: SizeInt;
527     c: Char;
528     j: Integer;
529   begin
530     if List=nil then
531       BuildMultiByteCompareArray(List,ListLen);
532     Item:=List;
533     c:=p^;
534     for i:=0 to ListLen-1 do begin
535       if (Item^.SrcLen=clen)
536       and (Item^.Src[0]=c) then begin
537         j:=1;
538         while true do begin
539           if (Item^.Src[j]=#0) then
540             exit(Item);
541           if (Item^.Src[j]<>p[j]) then break;
542           inc(j);
543         end;
544       end;
545       inc(Item);
546     end;
547     Result:=nil;
548   end;
549 
550   procedure ReplaceMultiByteWithResize(List: PItem; ListLen: SizeInt; Src: PChar);
551   var
552     c: Char;
553     clen: Integer;
554     NewSIndex, i: SizeInt;
555     Item: PItem;
556     NewS: string;
557     NewSP: PChar;
558     NewCharLen: Integer;
559     NewCharP: PChar;
560   begin
561     SetLength(NewS,length(s));
562     NewSIndex:=Src-PChar(s)+1;
563     if NewSIndex>1 then
564       Move(s[1],NewS[1],NewSIndex-1);
565     while true do begin
566       c:=Src^;
567       if (c=#0) and (Src-PChar(s)=length(s)) then break;
568       clen:=UTF8CodepointSize(Src);
569       NewCharP:=Src;
570       NewCharLen:=clen;
571       // do a quick test via Pos
572       i:=Pos(c,SrcChars);
573       if i>0 then begin
574         // quick test positive, now search correctly
575         Item:=FindItem(List,ListLen,Src,clen);
576         if Item<>nil then begin
577           // replace
578           NewCharP:=@Item^.Dst[0];
579           NewCharLen:=Item^.DstLen;
580         end;
581       end;
582       inc(Src,clen);
583       if NewSIndex+NewCharLen-1>length(NewS) then begin
584         // need more space => grow
585         SetLength(NewS,NewSIndex+((length(NewS)-NewSIndex-NewCharLen)*3 div 2)+2);
586       end;
587       // copy character
588       NewSP:=@NewS[NewSIndex];
589       for i:=1 to NewCharLen do begin
590         NewSP^:=NewCharP^;
591         inc(NewCharP);
592         inc(NewSP);
593       end;
594       inc(NewSIndex,NewCharLen);
595     end;
596     s:=LeftStr(NewS,NewSIndex-1);
597   end;
598 
599   procedure ReplaceMultiByte;
600   var
601     p: PChar;
602     clen: Integer;
603     c: Char;
604     i: SizeInt;
605     List: PItem;
606     ListLen: SizeInt;
607     Item: PItem;
608   begin
609     p:=PChar(s);
610     List:=nil;
611     ListLen:=0;
612     try
613       while true do begin
614         c:=p^;
615         if (c=#0) and (p-PChar(s)=length(s)) then break;
616         clen:=UTF8CodepointSize(p);
617         // do a quick test via Pos
618         i:=Pos(c,SrcChars);
619         if i>0 then begin
620           // quick test positive, now search correctly
621           Item:=FindItem(List,ListLen,p,clen);
622           if Item<>nil then begin
623             // replace
624             if Item^.DstLen=clen then begin
625               // simple replace
626               UniqString(p);
627               Move(Item^.Dst[0],p^,clen);
628             end else begin
629               // replace with different size
630               // all following characters are moved
631               // => use an optimized algorithm for this
632               ReplaceMultiByteWithResize(List,ListLen,p);
633               exit;
634             end;
635           end;
636         end;
637         inc(p,clen);
638       end;
639     finally
640       if List<>nil then Freemem(List);
641     end;
642   end;
643 
644 begin
645   if (SrcChars='') or (s='') or (SrcChars=DstChars) then exit;
646 
647   unique:=false;
648   if IsASCII(SrcChars) and IsASCII(DstChars) then begin
649     // search and replace single byte characters
650     ReplaceASCII;
651     exit;
652   end;
653   // search for multi byte UTF-8 characters
654   ReplaceMultiByte;
655 end;
656 
Hashnull657 function Hash(InitValue: LongWord; Key: PXMLUtilChar; KeyLen: Integer): LongWord;
658 begin
659   Result := InitValue;
660   while KeyLen <> 0 do
661   begin
662 {$ifdef overflow_check}{$q-}{$endif}
663     Result := Result * $F4243 xor ord(Key^);
664 {$ifdef overflow_check}{$q+}{$endif}
665     Inc(Key);
666     Dec(KeyLen);
667   end;
668 end;
669 
KeyComparenull670 function KeyCompare(const Key1: TXMLUtilString; Key2: Pointer; Key2Len: Integer): Boolean;
671 begin
672 {$IF defined(FPC) and (SizeOf(TXMLUtilChar)=2)}
673   Result := (Length(Key1)=Key2Len) and (CompareWord(Pointer(Key1)^, Key2^, Key2Len) = 0);
674 {$ELSE}
675   Result := (Length(Key1)=Key2Len) and CompareMem(Pointer(Key1), Key2, Key2Len*SizeOf(TXMLUtilChar));
676 {$ENDIF}
677 end;
678 
679 { THashTable }
680 
681 constructor THashTable.Create(InitSize: Integer; OwnObjects: Boolean);
682 var
683   I: Integer;
684 begin
685   inherited Create;
686   FOwnsObjects := OwnObjects;
687   I := 256;
688   while I < InitSize do I := I shl 1;
689   FBucketCount := I;
690   FBucket := AllocMem(I * sizeof(PHashItem));
691 end;
692 
693 destructor THashTable.Destroy;
694 begin
695   Clear;
696   FreeMem(FBucket);
697   inherited Destroy;
698 end;
699 
700 procedure THashTable.Clear;
701 var
702   I: Integer;
703   item, next: PHashItem;
704 begin
705   for I := 0 to FBucketCount-1 do
706   begin
707     item := FBucket^[I];
708     while Assigned(item) do
709     begin
710       next := item^.Next;
711       if FOwnsObjects then
712         item^.Data.Free;
713       Dispose(item);
714       item := next;
715     end;
716     FBucket^[I] := nil;
717   end;
718 end;
719 
THashTable.Findnull720 function THashTable.Find(Key: PXMLUtilChar; KeyLen: Integer): PHashItem;
721 var
722   Dummy: Boolean;
723 begin
724   Result := Lookup(Key, KeyLen, Dummy, False);
725 end;
726 
FindOrAddnull727 function THashTable.FindOrAdd(Key: PXMLUtilChar; KeyLen: Integer;
728   out Found: Boolean): PHashItem;
729 begin
730   Result := Lookup(Key, KeyLen, Found, True);
731 end;
732 
FindOrAddnull733 function THashTable.FindOrAdd(Key: PXMLUtilChar; KeyLen: Integer): PHashItem;
734 var
735   Dummy: Boolean;
736 begin
737   Result := Lookup(Key, KeyLen, Dummy, True);
738 end;
739 
Getnull740 function THashTable.Get(Key: PXMLUtilChar; KeyLen: Integer): TObject;
741 var
742   e: PHashItem;
743   Dummy: Boolean;
744 begin
745   e := Lookup(Key, KeyLen, Dummy, False);
746   if Assigned(e) then
747     Result := e^.Data
748   else
749     Result := nil;
750 end;
751 
Lookupnull752 function THashTable.Lookup(Key: PXMLUtilChar; KeyLength: Integer;
753   out Found: Boolean; CanCreate: Boolean): PHashItem;
754 var
755   Entry: PPHashItem;
756   h: LongWord;
757 begin
758   h := Hash(0, Key, KeyLength);
759   Entry := @FBucket^[h mod FBucketCount];
760   while Assigned(Entry^) and not ((Entry^^.HashValue = h) and KeyCompare(Entry^^.Key, Key, KeyLength) ) do
761     Entry := @Entry^^.Next;
762   Found := Assigned(Entry^);
763   if Found or (not CanCreate) then
764   begin
765     Result := Entry^;
766     Exit;
767   end;
768   if FCount > ((FBucketCount*7) div 8) then
769   begin
770     Resize(FBucketCount * 2);
771     Result := Lookup(Key, KeyLength, Found, CanCreate);
772   end
773   else
774   begin
775     New(Result);
776     // SetString for TXMLUtilStrings trims on zero chars [fixed, #14740]
777     SetLength(Result^.Key, KeyLength);
778     Move(Key^, Pointer(Result^.Key)^, KeyLength*sizeof(TXMLUtilChar));
779     Result^.HashValue := h;
780     Result^.Data := nil;
781     Result^.Next := nil;
782     Inc(FCount);
783     Entry^ := Result;
784   end;
785 end;
786 
787 procedure THashTable.Resize(NewCapacity: LongWord);
788 var
789   p: PHashItemArray;
790   chain: PPHashItem;
791   i: Integer;
792   e, n: PHashItem;
793 begin
794   p := AllocMem(NewCapacity * sizeof(PHashItem));
795   for i := 0 to FBucketCount-1 do
796   begin
797     e := FBucket^[i];
798     while Assigned(e) do
799     begin
800       chain := @p^[e^.HashValue mod NewCapacity];
801       n := e^.Next;
802       e^.Next := chain^;
803       chain^ := e;
804       e := n;
805     end;
806   end;
807   FBucketCount := NewCapacity;
808   FreeMem(FBucket);
809   FBucket := p;
810 end;
811 
THashTable.Removenull812 function THashTable.Remove(Entry: PHashItem): Boolean;
813 var
814   chain: PPHashItem;
815 begin
816   chain := @FBucket^[Entry^.HashValue mod FBucketCount];
817   while Assigned(chain^) do
818   begin
819     if chain^ = Entry then
820     begin
821       chain^ := Entry^.Next;
822       if FOwnsObjects then
823         Entry^.Data.Free;
824       Dispose(Entry);
825       Dec(FCount);
826       Result := True;
827       Exit;
828     end;
829     chain := @chain^^.Next;
830   end;
831   Result := False;
832 end;
833 
834 // this does not free the aData object
THashTable.RemoveDatanull835 function THashTable.RemoveData(aData: TObject): Boolean;
836 var
837   i: Integer;
838   chain: PPHashItem;
839   e: PHashItem;
840 begin
841   for i := 0 to FBucketCount-1 do
842   begin
843     chain := @FBucket^[i];
844     while Assigned(chain^) do
845     begin
846       if chain^^.Data = aData then
847       begin
848         e := chain^;
849         chain^ := e^.Next;
850         Dispose(e);
851         Dec(FCount);
852         Result := True;
853         Exit;
854       end;
855       chain := @chain^^.Next;
856     end;
857   end;
858   Result := False;
859 end;
860 
861 procedure THashTable.ForEach(proc: THashForEach; arg: Pointer);
862 var
863   i: Integer;
864   e: PHashItem;
865 begin
866   for i := 0 to FBucketCount-1 do
867   begin
868     e := FBucket^[i];
869     while Assigned(e) do
870     begin
871       if not proc(e, arg) then
872         Exit;
873       e := e^.Next;
874     end;
875   end;
876 end;
877 
878 { TDblHashArray }
879 
880 destructor TDblHashArray.Destroy;
881 begin
882   FreeMem(FData);
883   inherited Destroy;
884 end;
885 
886 procedure TDblHashArray.Init(NumSlots: Integer);
887 var
888   i: Integer;
889 begin
890   if ((NumSlots * 2) shr FSizeLog) <> 0 then   // need at least twice more entries, and no less than 8
891   begin
892     FSizeLog := 3;
893     while (NumSlots shr FSizeLog) <> 0 do
894       Inc(FSizeLog);
895     ReallocMem(FData, (1 shl FSizeLog) * sizeof(TExpHashEntry));
896     FRevision := 0;
897   end;
898   if FRevision = 0 then
899   begin
900     FRevision := $FFFFFFFF;
901     for i := (1 shl FSizeLog)-1 downto 0 do
902       FData^[i].rev := FRevision;
903   end;
904   Dec(FRevision);
905 end;
906 
Locatenull907 function TDblHashArray.Locate(uri: PXMLUtilString; localName: PXMLUtilChar; localLength: Integer): Boolean;
908 var
909   step: Byte;
910   mask: LongWord;
911   idx: Integer;
912   HashValue: LongWord;
913 begin
914   HashValue := Hash(0, PXMLUtilChar(uri^), Length(uri^));
915   HashValue := Hash(HashValue, localName, localLength);
916 
917   mask := (1 shl FSizeLog) - 1;
918   step := (HashValue and (not mask)) shr (FSizeLog-1) and (mask shr 2) or 1;
919   idx := HashValue and mask;
920   result := True;
921   while FData^[idx].rev = FRevision do
922   begin
923     if (HashValue = FData^[idx].hash) and (FData^[idx].uriPtr^ = uri^) and
924       (FData^[idx].lnameLen = localLength) and
925        CompareMem(FData^[idx].lname, localName, localLength * sizeof(TXMLUtilChar)) then
926       Exit;
927     if idx < step then
928       Inc(idx, (1 shl FSizeLog) - step)
929     else
930       Dec(idx, step);
931   end;
932   with FData^[idx] do
933   begin
934     rev := FRevision;
935     hash := HashValue;
936     uriPtr := uri;
937     lname := localName;
938     lnameLen := localLength;
939   end;
940   result := False;
941 end;
942 
943 { TNSSupport }
944 
945 constructor TNSSupport.Create;
946 var
947   b: TBinding;
948 begin
949   inherited Create;
950   FPrefixes := THashTable.Create(16, False);
951   FBindings := TFPList.Create;
952   SetLength(FBindingStack, 16);
953 
954   { provide implicit binding for the 'xml' prefix }
955   // TODO: move stduri_xml, etc. to this unit, so they are reused.
956   DefineBinding('xml', 'http://www.w3.org/XML/1998/namespace', b);
957 end;
958 
959 destructor TNSSupport.Destroy;
960 var
961   I: Integer;
962 begin
963   for I := FBindings.Count-1 downto 0 do
964     TObject(FBindings.List^[I]).Free;
965   FBindings.Free;
966   FPrefixes.Free;
967   inherited Destroy;
968 end;
969 
BindPrefixnull970 function TNSSupport.BindPrefix(const nsURI: TXMLUtilString; aPrefix: PHashItem): TBinding;
971 begin
972   { try to reuse an existing binding }
973   result := FFreeBindings;
974   if Assigned(result) then
975     FFreeBindings := result.Next
976   else { no free bindings, create a new one }
977   begin
978     result := TBinding.Create;
979     FBindings.Add(result);
980   end;
981 
982   { link it into chain of bindings at the current element level }
983   result.Next := FBindingStack[FNesting];
984   FBindingStack[FNesting] := result;
985 
986   { bind }
987   result.uri := nsURI;
988   result.Prefix := aPrefix;
989   result.PrevPrefixBinding := aPrefix^.Data;
990   aPrefix^.Data := result;
991 end;
992 
TNSSupport.DefaultNSBindingnull993 function TNSSupport.DefaultNSBinding: TBinding;
994 begin
995   result := TBinding(FDefaultPrefix.Data);
996 end;
997 
998 procedure TNSSupport.DefineBinding(const Prefix, nsURI: TXMLUtilString;
999   out Binding: TBinding);
1000 var
1001   Pfx: PHashItem;
1002 begin
1003   Pfx := @FDefaultPrefix;
1004   if (nsURI <> '') and (Prefix <> '') then
1005     Pfx := FPrefixes.FindOrAdd(PXMLUtilChar(Prefix), Length(Prefix));
1006   if (Pfx^.Data = nil) or (TBinding(Pfx^.Data).uri <> nsURI) then
1007     Binding := BindPrefix(nsURI, Pfx)
1008   else
1009     Binding := nil;
1010 end;
1011 
TNSSupport.CheckAttributenull1012 function TNSSupport.CheckAttribute(const Prefix, nsURI: TXMLUtilString;
1013   out Binding: TBinding): TAttributeAction;
1014 var
1015   Pfx: PHashItem;
1016   I: Integer;
1017   b: TBinding;
1018   buf: array[0..31] of TXMLUtilChar;
1019   p: PXMLUtilChar;
1020 begin
1021   Binding := nil;
1022   Pfx := nil;
1023   Result := aaUnchanged;
1024   if Prefix <> '' then
1025     Pfx := FPrefixes.FindOrAdd(PXMLUtilChar(Prefix), Length(Prefix))
1026   else if nsURI = '' then
1027     Exit;
1028   { if the prefix is already bound to correct URI, we're done }
1029   if Assigned(Pfx) and Assigned(Pfx^.Data) and (TBinding(Pfx^.Data).uri = nsURI) then
1030     Exit;
1031 
1032   { see if there's another prefix bound to the target URI }
1033   // TODO: should use something faster than linear search
1034   for i := FNesting downto 0 do
1035   begin
1036     b := FBindingStack[i];
1037     while Assigned(b) do
1038     begin
1039       if (b.uri = nsURI) and (b.Prefix <> @FDefaultPrefix) then
1040       begin
1041         Binding := b;   // found one -> override the attribute's prefix
1042         Result := aaPrefix;
1043         Exit;
1044       end;
1045       b := b.Next;
1046     end;
1047   end;
1048   { no prefix, or bound (to wrong URI) -> use generated prefix instead }
1049   if (Pfx = nil) or Assigned(Pfx^.Data) then
1050   repeat
1051     Inc(FPrefixSeqNo);
1052     i := FPrefixSeqNo;    // This is just 'NS'+IntToStr(FPrefixSeqNo);
1053     p := @Buf[high(Buf)]; // done without using strings
1054     while i <> 0 do
1055     begin
1056       p^ := TXMLUtilChar(i mod 10+ord('0'));
1057       dec(p);
1058       i := i div 10;
1059     end;
1060     p^ := 'S'; dec(p);
1061     p^ := 'N';
1062     Pfx := FPrefixes.FindOrAdd(p, @Buf[high(Buf)]-p+1);
1063   until Pfx^.Data = nil;
1064   Binding := BindPrefix(nsURI, Pfx);
1065   Result := aaBoth;
1066 end;
1067 
IsPrefixBoundnull1068 function TNSSupport.IsPrefixBound(P: PXMLUtilChar; Len: Integer; out
1069   Prefix: PHashItem): Boolean;
1070 begin
1071   Prefix := FPrefixes.FindOrAdd(P, Len);
1072   Result := Assigned(Prefix^.Data) and (TBinding(Prefix^.Data).uri <> '');
1073 end;
1074 
GetPrefixnull1075 function TNSSupport.GetPrefix(P: PXMLUtilChar; Len: Integer): PHashItem;
1076 begin
1077   if Assigned(P) and (Len > 0) then
1078     Result := FPrefixes.FindOrAdd(P, Len)
1079   else
1080     Result := @FDefaultPrefix;
1081 end;
1082 
1083 procedure TNSSupport.StartElement;
1084 begin
1085   Inc(FNesting);
1086   if FNesting >= Length(FBindingStack) then
1087     SetLength(FBindingStack, FNesting * 2);
1088 end;
1089 
1090 procedure TNSSupport.EndElement;
1091 var
1092   b, temp: TBinding;
1093 begin
1094   temp := FBindingStack[FNesting];
1095   while Assigned(temp) do
1096   begin
1097     b := temp;
1098     temp := b.next;
1099     b.next := FFreeBindings;
1100     FFreeBindings := b;
1101     b.Prefix^.Data := b.prevPrefixBinding;
1102   end;
1103   FBindingStack[FNesting] := nil;
1104   if FNesting > 0 then
1105     Dec(FNesting);
1106 end;
1107 
1108 
1109 initialization
1110 
1111 finalization
1112   if Assigned(Xml11Pg) then
1113     FreeMem(Xml11Pg);
1114 
1115 end.
1116