1 { *************************************************************************** }
2 {                                                                             }
3 { EControl Syntax Editor SDK                                                  }
4 {                                                                             }
5 { Copyright (c) 2004 - 2015 EControl Ltd., Zaharov Michael                    }
6 {     www.econtrol.ru                                                         }
7 {     support@econtrol.ru                                                     }
8 {                                                                             }
9 { Changes in Lazarus port: by Alexey Torgashin (CudaText)                     }
10 {                                                                             }
11 { *************************************************************************** }
12 
13 {$mode delphi}
14 
15 unit ec_RegExpr;
16 
17 interface
18 
19 uses
20   Classes,
21   {$IFDEF RE_DEBUG}ComCtrls,{$ENDIF}
22   ec_StrUtils;
23 
24 const
25   MaskModI = 1;  // modifier /i bit in fModifiers
26   MaskModR = 2;  // -"- /r
27   MaskModS = 4;  // -"- /s
28   MaskModG = 8;  // -"- /g
29   MaskModM = 16; // -"- /m
30   MaskModX = 32; // -"- /x
31 
32 type
33   TecRegExpr = class //(TPersistent)
34   private
35     FProgRoot: TObject;
36     FModifiers: Word;
37     FMatchOK: Boolean;
38     FExpression: ecString;
39     FModifiersStatic: Word;
40     procedure SetModifiers(const Value: Word);
GetModifiernull41     function GetModifier(const Index: Integer): boolean;
GetModifierStrnull42     function GetModifierStr: ecString;
43     procedure SetModifier(const Index: Integer; const Value: boolean);
44     procedure SetModifierStr(const Value: ecString);
GetIsInvalidnull45     function GetIsInvalid: Boolean;
GetMatchLennull46     function GetMatchLen(Idx: integer): integer;
GetMatchPosnull47     function GetMatchPos(Idx: integer): integer;
GetSubExprMatchCountnull48     function GetSubExprMatchCount: integer;
49     procedure SetExpression(const Value: ecString);
50     procedure ClearRoot;
IsEmptynull51     function IsEmpty: Boolean;
52     procedure ParseModifiers(const S: PWideChar; Len: integer; var Modifiers: Word); // Alexey
53   public
54     constructor Create;
55     destructor Destroy; override;
56     procedure Assign(Source: TecRegExpr);
57 
Compilenull58     function Compile: Boolean; overload;
59     procedure Compile(const AExpression: UCString); overload;
Matchnull60     function Match(const InputString: UCString; var aPos: integer; Back: Boolean = False): Boolean; overload;
MatchLengthnull61     //function MatchLength(const InputString: AnsiString; aPos: integer; Back: Boolean = False): integer; overload;
62     function MatchLength(const InputString: UCString; aPos: integer; Back: Boolean = False): integer; overload;
GetMatchnull63     //function GetMatch(const InputString: AnsiString; SubIdx: integer): AnsiString; overload;
64     function GetMatch(const InputString: UCString; SubIdx: integer): UCString; overload;
Substitutenull65     function Substitute (const InputString, ATemplate : ecString) : ecString;
66 
67     property ModifierMask: Word read FModifiers write SetModifiers;
68     property ModifierI: boolean index MaskModI read GetModifier write SetModifier;
69     property ModifierR: boolean index MaskModR read GetModifier write SetModifier;
70     property ModifierS: boolean index MaskModS read GetModifier write SetModifier;
71     property ModifierG: boolean index MaskModG read GetModifier write SetModifier;
72     property ModifierM: boolean index MaskModM read GetModifier write SetModifier;
73     property ModifierX: boolean index MaskModX read GetModifier write SetModifier;
74     property IsInvalid: Boolean read GetIsInvalid;
75 
76     property MatchPos[Idx: integer]: integer read GetMatchPos;
77     property MatchLen[Idx: integer]: integer read GetMatchLen;
78     property SubExprMatchCount: integer read GetSubExprMatchCount;
79   published
80     property Expression: ecString read FExpression write SetExpression;
81     property ModifierStr: ecString read GetModifierStr write SetModifierStr stored False;
82     property ModifierFlags: Word read FModifiers write FModifiers;
83     property ModifiersStatic: Word read FModifiersStatic write FModifiersStatic;
84   end;
85 
86 type
lassCodenull87   TGetCustomCharClass = function(ClassCode: UCChar): Boolean;
lassCodenull88   TCheckCustomCharClass = function(ClassCode: UCChar; CharCode: Word; var IsInClass: Boolean): Boolean;
89 
90 var
91   DefaultModifiers: integer = MaskModI or MaskModR or MaskModG or MaskModM or MaskModX;
92   //GetCustomCharClassProc: TGetCustomCharClass = nil;
93   //CheckCustomCharClassProc: TCheckCustomCharClass = nil;
94 
95 const
96   // Error messages
97   zreUnexpectedEnd = 'Unexpected end of expression';
98   zreUnexpectedModifier = 'Unexpected modifier';
99   zreUnexpectedBracket = 'Unexpected bracket';
100   zreInvalidZeroWidth = 'Invalid zero-width expression';
101 
102 {$IFDEF RE_DEBUG}
103 type TREDebugOnMatchProc = procedure(const TraceStr: string) of object;
104 var  REDebugOnMatchProc: TREDebugOnMatchProc = nil;
105 procedure REDebugCompiledBuildTree(RE: TecRegExpr; TV: TTreeView);
106 {$ENDIF}
107 
108 
109 implementation
110 
111 uses SysUtils, Contnrs, Math;
112 
BufferHexToIntnull113 function BufferHexToInt(p: PWideChar; Len: integer): integer; //Alexey
114 var
115   N, i: integer;
116   ch: WideChar;
117 begin
118   Result:= 0;
119   for i:= 1 to Len do
120   begin
121     ch:= p^;
122     case ch of
123       '0'..'9':
124         N:= Ord(ch)-Ord('0');
125       'a'..'f':
126         N:= Ord(ch)-(Ord('a')-10);
127       'A'..'F':
128         N:= Ord(ch)-(Ord('A')-10);
129       else
130         exit(-1);
131     end;
132     Inc(p);
133     Result:= Result*16+N;
134   end;
135 end;
136 
BufferStrToIntnull137 function BufferStrToInt(p: PWideChar; Len: integer): integer; //Alexey
138 var
139   N, i: integer;
140   ch: WideChar;
141 begin
142   Result:= 0;
143   for i:= 1 to Len do
144   begin
145     ch:= p^;
146     if IsDigitChar(ch) then
147       N:= Ord(ch)-Ord('0')
148     else
149       exit(-1);
150     Inc(p);
151     Result:= Result*10+N;
152   end;
153 end;
154 
155 
156 {$IFDEF RE_DEBUG}
157 var
158   LastNodeID: integer;
159 {$ENDIF}
160 
161 type
162   TreSubExpr = class;
163   TreRootNode = class;
164   // Base node class
165   TRENodeBase = class
166   private
167     {$IFDEF RE_DEBUG}
168     FNodeId: integer;
169     {$ENDIF}
170     FLoopMin: integer;    // repeat at least
171     FLoopMax: integer;    // not more than
172     FNonGreedy: Boolean;  // is greedy
173     FNext: TRENodeBase;
174     FOwner: TreSubExpr;
GetRootnull175     function GetRoot: TreRootNode;
176   protected
177   public
178     constructor Create(AOwner: TreSubExpr); virtual;
179 
Matchnull180     function Match(const InputString: UCString; var aPos: integer): integer; overload; virtual; abstract;
BackMatchnull181     function BackMatch(const InputString: UCString; var aPos: integer): integer; overload; virtual; abstract;
182 
183     property Next: TRENodeBase read FNext write FNext;
184     property Owner: TreSubExpr read FOwner;
185     property Root: TreRootNode read GetRoot;
186   end;
187 
188   // Char set & String
189   TCharSetNode = class(TRENodeBase)
190   private
191     FIgnoreCase: Boolean;
192     FInvert: Boolean;
193     FCharRanges: UCString;
194     FCharSets: UCString;
195     FCharArray: UCString;
196     {
197     procedure AddChar(C: AnsiChar); overload;
198     procedure AddRange(st, en: AnsiChar); overload;
199     function HasChar(C: AnsiChar): Boolean; overload;
200     }
201     procedure AddChar(C: UCChar); overload;
202     procedure AddRange(st, en: UCChar); overload;
203     procedure AddSet(C: UCChar); overload;
HasCharnull204     function HasChar(C: UCChar): Boolean; overload;
205   public
Matchnull206     function Match(const InputString: UCString; var aPos: integer): integer; override;
BackMatchnull207     function BackMatch(const InputString: UCString; var aPos: integer): integer; override;
208   end;
209 
210   // Simple text
211   TCharSeqNode = class(TRENodeBase)
212   private
213     FIgnoreCase: Boolean;
214     FChar: UCChar;
215   public
Matchnull216     function Match(const InputString: UCString; var aPos: integer): integer; override;
BackMatchnull217     function BackMatch(const InputString: UCString; var aPos: integer): integer; override;
218   end;
219 
220   // Special ckecking
221   TSpecCheckNode = class(TRENodeBase)
222   private
223     FCheckType: UCChar;
224   public
Matchnull225     function Match(const InputString: UCString; var aPos: integer): integer; override;
BackMatchnull226     function BackMatch(const InputString: UCString; var aPos: integer): integer; override;
227   end;
228 
229   // Reference to sub expression
230 
231   { TRefNode }
232 
233   TRefNode = class(TRENodeBase)
234   private
235     FRef: integer;
236     FIgnoreCase: Boolean;
GetExprStrnull237     //function GetExprStr(const InputString: AnsiString): AnsiString; overload;
238     //function GetExprStr(const InputString: UCString): UCString; overload;
239     procedure GetExprPtr(const InputString: UCString; out Ptr: PWideChar; out Len: integer); // Alexey
240   public
Matchnull241     function Match(const InputString: UCString; var aPos: integer): integer; override;
BackMatchnull242     function BackMatch(const InputString: UCString; var aPos: integer): integer; override;
243   end;
244 
245   // Base class of containers
246   TreListNodeBase = class(TRENodeBase)
247   private
248     FList: TFPObjectList;
249   public
250     constructor Create(AOwner: TreSubExpr); override;
251     destructor Destroy; override;
252     procedure Clear; virtual;
253   end;
254 
255   // Branch of sub expression
256   TreBranchNode = class(TreListNodeBase)
257   private
GetClassCharnull258     function GetClassChar(C: UCChar; Modifiers: Word): UCChar;
259   public
260     procedure Add(Node: TRENodeBase);
261     procedure Invert;
262     {
263     procedure Compile(const Expression: AnsiString; var aPos: integer; Modifiers: Word); overload;
264     }
265     procedure Compile(const Expression: UCString; var aPos: integer; Modifiers: Word); overload;
Matchnull266     function Match(const InputString: UCString; var aPos: integer): integer; override;
BackMatchnull267     function BackMatch(const InputString: UCString; var aPos: integer): integer; override;
268   end;
269 
270   // Zero-width testing
271   // (?=RE) - positive, ahead
272   // (?!RE) - negative, ahead
273   // (?<=RE) - positive, behind
274   // (?<!RE) - negative, behind
275   TZeroWidth = class(TRENodeBase)
276   private
277     FIsBack: Boolean;
278     FNegative: Boolean;
279     FBranch: TreBranchNode;
DoResultnull280     function DoResult(MatchRes: integer): integer;
281   public
Matchnull282     function Match(const InputString: UCString; var aPos: integer): integer; override;
BackMatchnull283     function BackMatch(const InputString: UCString; var aPos: integer): integer; override;
284     destructor Destroy; override;
285   end;
286 
287 
288   // Select operation (list of TreListNodeBase) => sub expression
289   TreSubExpr = class(TreListNodeBase)
290   private
291     FStart: integer; // first char of sub expression
292     FEnd: integer; // first char after sub expression
293   public
294     constructor Create(AOwner: TreSubExpr); override;
295     //procedure Compile(const Expression: AnsiString; var aPos: integer; Modifiers: Word); overload;
296     procedure Compile(const Expression: UCString; var aPos: integer; Modifiers: Word); overload;
Matchnull297     function Match(const InputString: UCString; var aPos: integer): integer; override;
BackMatchnull298     function BackMatch(const InputString: UCString; var aPos: integer): integer; override;
299   end;
300 
301   // Root node
302   TreRootNode = class(TreSubExpr)
303   private
304     FSubExpr: TFPList;
305     FOwner: TecRegExpr;
306   public
307     constructor Create(AOwner: TecRegExpr); reintroduce;
308     destructor Destroy; override;
309     procedure Clear; override;
MatchStrnull310     function MatchStr(const InputString: UCString; var aPos: integer; Back: Boolean): Boolean; overload;
311 
312     property Owner: TecRegExpr read FOwner;
313   end;
314 
315 
316 // =============================================================================
317 // Utils functions
318 // =============================================================================
319 
GetAbsoluteNextnull320 function GetAbsoluteNext(Node: TRENodeBase):TRENodeBase;
321 begin
322   Result := Node.Next;
323   if (Result = nil) and (Node.Owner <> nil) then
324     Result := GetAbsoluteNext(Node.Owner);
325 end;
326 
IsInRangenull327 function IsInRange(RngType: UCChar; C: UCChar): Boolean; overload;
328 begin
329   Result := False;
330   { // Alexey
331   if not Assigned(CheckCustomCharClassProc) or
332      not CheckCustomCharClassProc(RngType, Ord(C), Result)
333   then
334   }
335   case RngType of
336     // \c is \w without (?r)
337     'c': Result := IsIdentChar(C);
338     'C': Result := not IsIdentChar(C);
339     // \k is \d without (?r)
340     'k': Result := IsIdentDigitChar(C);
341     'K': Result := not IsIdentDigitChar(C);
342     // \w with (?r)
343     'w': Result := IsWordChar(C);
344     'W': Result := not IsWordChar(C);
345     // \d with (?r)
346     'd': Result := IsDigitChar(C);
347     'D': Result := not IsDigitChar(C);
348     // space
349     's': Result := IsSpaceOrBreakChar(C);
350     'S': Result := not IsSpaceOrBreakChar(C);
351     // hex, rarely used
352     'h': Result := IsHexDigitChar(C);
353     'H': Result := not IsHexDigitChar(C);
354     // Alexey: changed /l and /L meaning, now it's: word char except '_'
355     'l': Result := IsWordChar(C) and (C<>'_');
356     'L': Result := not IsWordChar(C) or (C='_');
357     { //Alexey: removed
358     'g': Result := IsIdentLetterChar(C);
359     'G': Result := not IsIdentLetterChar(C);
360     }
361   end;
362 end;
363 
364 function GetEscape(const Expression: UCString; var aPos: integer): UCChar; overload;
365 var
366   strt: integer;
367   N: integer;
368 begin
369   Result := #0;
370   case Expression[aPos] of
371     't': Result := #$9;
372     'n': Result := #$A;
373     'r': Result := #$D;
374     'f': Result := #$C;
375     'a': Result := #$7;
376     'e': Result := #$1B;
377     'x': begin
378           inc(aPos);
379           if aPos > Length(Expression) then
380              raise Exception.Create('Invalid escape char');
381           if Expression[aPos] = '{' then
382            begin
383              inc(aPos);
384              strt := aPos;
385              while (aPos < Length(Expression)) and (Expression[aPos] <> '}') do
386                inc(aPos);
387              N := BufferHexToInt(@Expression[strt], aPos - strt);
388              if N<0 then
389                raise Exception.Create('Invalid hex digit in \x');
390              Result := UCChar(N);
391            end else
392             begin
393               N := BufferHexToInt(@Expression[aPos], 2);
394               if N<0 then
395                 raise Exception.Create('Invalid hex digit in \x');
396               Result := UCChar(N);
397               Inc(aPos);
398             end;
399           if Result = '' then
400             raise Exception.Create('Invalid hex digit in \x');
401          end;
402   end;
403 end;
404 
405 { TRENodeBase }
406 
407 constructor TRENodeBase.Create(AOwner: TreSubExpr);
408 begin
409   inherited Create;
410   FOwner := AOwner;
411   FLoopMin := 1;
412   FLoopMax := 1;
413   {$IFDEF RE_DEBUG}
414   Inc(LastNodeID);
415   FNodeId := LastNodeID;
416   {$ENDIF}
417 end;
418 
GetRootnull419 function TRENodeBase.GetRoot: TreRootNode;
420 var Node: TRENodeBase;
421 begin
422   Node := Self;
423   while Node.Owner <> nil do
424     Node := Node.Owner;
425   Result := Node as TreRootNode;
426 end;
427 
428 { TCharSetNode }
429 
HasCharnull430 function TCharSetNode.HasChar(C: UCChar): Boolean;
431 var i, N, k: integer;
432 begin
433   N := Length(FCharRanges);
434   if N > 0 then
435    for i := 1 to N shr 1 do
436     begin
437      k := i shl 1;
438      if (Ord(FCharRanges[k]) >= Ord(C)) and
439         (Ord(FCharRanges[k - 1]) <= Ord(C)) then
440       begin
441         Result := True;
442         Exit;
443       end;
444     end;
445   for i := 1 to Length(FCharSets) do
446    if IsInRange(FCharSets[i], C) then
447     begin
448      Result := True;
449      Exit;
450     end;
451   Result := Pos(C, FCharArray) <> 0;
452 end;
453 
454 procedure TCharSetNode.AddChar(C: UCChar);
455 begin
456   if FIgnoreCase then
457     CharToUpCase(C);
458   if Pos(C, FCharArray) = 0 then
459     FCharArray := FCharArray + C;
460 end;
461 
462 procedure TCharSetNode.AddRange(st, en: UCChar);
463 begin
464   if FIgnoreCase then
465    begin
466     CharToUpCase(st);
467     CharToUpCase(en);
468    end;
469   FCharRanges := FCharRanges + st + en;
470 end;
471 
472 procedure TCharSetNode.AddSet(C: UCChar);
473 begin
474   FCharSets := FCharSets + C;
475 end;
476 
Matchnull477 function TCharSetNode.Match(const InputString: UCString; var aPos: integer): integer;
478 var C: UCChar;
479     b: Boolean;
480 begin
481   if aPos > Length(InputString) then
482     begin
483       Result := 0;
484       Exit;
485     end;
486 
487   C := InputString[aPos];
488   if FIgnoreCase then CharToUpCase(C);
489   b := HasChar(C) xor FInvert;
490   if b then Inc(aPos);
491   if b then Result := 1 else Result := 0;
492 end;
493 
BackMatchnull494 function TCharSetNode.BackMatch(const InputString: UCString; var aPos: integer): integer;
495 var C: UCChar;
496     b: Boolean;
497 begin
498   if aPos <= 1 then
499     begin
500       Result := 0;
501       Exit;
502     end;
503 
504   C := InputString[aPos - 1];
505   if FIgnoreCase then CharToUpCase(C);
506   b := HasChar(C) xor FInvert;
507   if b then Dec(aPos);
508   if b then Result := 1 else Result := 0;
509 end;
510 
511 { TCharSeqNode }
512 
BackMatchnull513 function TCharSeqNode.BackMatch(const InputString: UCString; var aPos: integer): integer;
514 var C: UCChar;
515 begin
516   Result := 0;
517   if aPos > 1 then
518     begin
519       C := InputString[aPos - 1];
520       if FIgnoreCase then
521         CharToUpCase(C);
522       if C = FChar then
523         begin
524           Dec(aPos);
525           Result := 1;
526         end;
527     end;
528 end;
529 
Matchnull530 function TCharSeqNode.Match(const InputString: UCString; var aPos: integer): integer;
531 var C: UCChar;
532 begin
533   Result := 0;
534   if aPos <= Length(InputString) then
535     begin
536       C := InputString[aPos];
537       if FIgnoreCase then
538         CharToUpCase(C);
539       if C = FChar then
540         begin
541           Inc(aPos, 1);
542           Result := 1;
543         end;
544     end;
545 end;
546 
547 { TSpecCheckNode }
548 
BackMatchnull549 function TSpecCheckNode.BackMatch(const InputString: UCString; var aPos: integer): integer;
550 var C: UCChar;
551     b: Boolean;
552 begin
553   b := False;
554   if aPos <= 1 then
555    begin
556     if aPos < 1 then
557       begin
558         Result := 0;
559         Exit;
560       end;
561     C := #0;
562    end
563   else C := InputString[aPos - 1];
564 
565   case FCheckType of
566     '^': b := (C = #0) or (C = #10) or
567                    (C = #13) and (aPos <= Length(InputString)) and (InputString[aPos] <> #10);
568     '$': b := (aPos > Length(InputString)) or (InputString[aPos] = #13) or
569                    (InputString[aPos] = #10) and (C <> #13);
570     'A': b := C = #0;
571     'Z': b := aPos > Length(InputString);
572     'b': b := IsWordBreak(aPos, InputString);
573     'B': b := not IsWordBreak(aPos, InputString);
574     'z': begin
575            b := IsLineBreakChar(C);
576            if b then
577              begin
578                Dec(aPos);
579                if (C = #10) and (aPos > 0) and (InputString[aPos] = #13) then
580                  Dec(aPos);
581              end;
582          end;
583   else if C<> #0 then
584    begin
585      case FCheckType of
586       '.': b := True;
587       ':': b := not IsLineBreakChar(C);
588       else b := IsInRange(FCheckType, C);
589      end;
590      if b then Dec(aPos);
591    end;
592   end;
593   if b then Result := 1 else Result := 0;
594 end;
595 
Matchnull596 function TSpecCheckNode.Match(const InputString: UCString; var aPos: integer): integer;
597 var C: UCChar;
598     b: Boolean;
599 begin
600   b := False;
601   if aPos > Length(InputString) then
602    begin
603     if aPos - 1 > Length(InputString) then
604       begin
605         Result := 0;
606         Exit;
607       end;
608     C := #0;
609    end
610   else C := InputString[aPos];
611 
612   case FCheckType of
613     '^': b := (aPos = 1) or (InputString[aPos - 1] = #10) or
614                    (InputString[aPos - 1] = #13) and (C <> #10);
615     '$': b := (C = #13) or (C = #10) and (aPos > 1) and (InputString[aPos - 1] <> #13) or (C = #0);
616     'A': b := aPos = 1;
617     'Z': b := C = #0;
618     'b': b := IsWordBreak(aPos, InputString);
619     'B': b := not IsWordBreak(aPos, InputString);
620     'z': begin
621            b := IsLineBreakChar(C);
622            if b then
623              begin
624                Inc(aPos);
625                if (C = #13) and (aPos <= Length(InputString)) and (InputString[aPos] = #10) then
626                  Inc(aPos);
627              end;
628          end;
629   else if C<> #0 then
630    begin
631      case FCheckType of
632       '.': b := True;
633       ':': b := not IsLineBreakChar(C);
634       else b := IsInRange(FCheckType, C);
635      end;
636      if b then Inc(aPos);
637    end;
638   end;
639   if b then Result := 1 else Result := 0;
640 end;
641 
642 { TRefNode }
643 
644 procedure TRefNode.GetExprPtr(const InputString: UCString; out Ptr: PWideChar; out Len: integer); // Alexey
645 var se: TreSubExpr;
646 begin
647   Ptr := nil;
648   Len := 0;
649   with Root do
650     if FSubExpr.Count > FRef then
651       begin
652         se := TreSubExpr(FSubExpr[FRef]);
653         Len := abs(se.FEnd - se.FStart);
654         if (se.FStart > 0) and (se.FEnd > 0) then
655           Ptr := @InputString[Min(se.FStart, se.FEnd)];
656       end;
657 end;
658 
BackMatchnull659 function TRefNode.BackMatch(const InputString: UCString; var aPos: integer): integer; // Alexey
660 var
661   P1, P2: PWideChar;
662   L: integer;
663   b: Boolean;
664 begin
665   Result := 0;
666   GetExprPtr(InputString, P1, L);
667   if (P1 <> nil) and (L < aPos) then
668     begin
669       P2 := @InputString[aPos - L];
670       if FIgnoreCase then
671         b := strlicomp(P1, P2, L) = 0
672       else
673         b := strlcomp(P1, P2, L) = 0;
674       if b then
675         begin
676           Dec(aPos, L);
677           Result := 1;
678         end;
679     end;
680 end;
681 
Matchnull682 function TRefNode.Match(const InputString: UCString; var aPos: integer): integer; // Alexey
683 var
684   P1, P2: PWideChar;
685   L: integer;
686   b: Boolean;
687 begin
688   Result := 0;
689   GetExprPtr(InputString, P1, L);
690   if (P1 <> nil) and (L <= Length(InputString) - aPos + 1) then
691     begin
692       P2 := @InputString[aPos];
693       if FIgnoreCase then
694         b := strlicomp(P1, P2, L) = 0
695       else
696         b := strlcomp(P1, P2, L) = 0;
697       if b then
698         begin
699           Inc(aPos, L);
700           Result := 1;
701         end;
702     end;
703 end;
704 
705 { TZeroWidth }
706 
707 destructor TZeroWidth.Destroy;
708 begin
709   FreeAndNil(FBranch);
710   inherited;
711 end;
712 
DoResultnull713 function TZeroWidth.DoResult(MatchRes: integer): integer;
714 begin
715   if FNegative then
716     begin
717       If MatchRes = 0 then
718         Result := 2
719       else
720         Result := 0;
721     end else
722       Result := MatchRes;
723 end;
724 
BackMatchnull725 function TZeroWidth.BackMatch(const InputString: UCString;
726   var aPos: integer): integer;
727 var testPos: integer;
728 begin
729   testPos := aPos;
730   if FIsBack then
731     Result := DoResult(FBranch.Match(InputString, testPos))
732   else
733     Result := DoResult(FBranch.BackMatch(InputString, testPos));
734 end;
735 
Matchnull736 function TZeroWidth.Match(const InputString: UCString;
737   var aPos: integer): integer;
738 var testPos: integer;
739 begin
740   testPos := aPos;
741   if FIsBack then
742     Result := DoResult(FBranch.BackMatch(InputString, testPos))
743   else
744     Result := DoResult(FBranch.Match(InputString, testPos));
745 end;
746 
747 { TreListNodeBase }
748 
749 procedure TreListNodeBase.Clear;
750 begin
751   FList.Clear;
752 end;
753 
754 constructor TreListNodeBase.Create(AOwner: TreSubExpr);
755 begin
756   inherited;
757   FList := TFPObjectList.Create;
758 end;
759 
760 destructor TreListNodeBase.Destroy;
761 begin
762   FList.Free;
763   inherited;
764 end;
765 
766 { TreBranchNode }
767 
GetClassCharnull768 function TreBranchNode.GetClassChar(C: UCChar; Modifiers: Word): UCChar;
769 // Alexey: rewritten
770 begin
771   { // Alexey
772   if Assigned(GetCustomCharClassProc) and GetCustomCharClassProc(C) then
773     Result := C else // User defined character class
774   }
775   case C of
776     'w':
777       begin
778         if (MaskModR and Modifiers) = 0 then
779           Result := 'c'
780         else
781           Result := 'w';
782       end;
783     'W':
784       begin
785         if (MaskModR and Modifiers) = 0 then
786           Result := 'C'
787         else
788           Result := 'W';
789       end;
790     'd':
791       begin
792         if (MaskModR and Modifiers) = 0 then
793           Result := 'k'
794         else
795           Result := 'd';
796       end;
797     'D':
798       begin
799         if (MaskModR and Modifiers) = 0 then
800           Result := 'K'
801         else
802           Result := 'D';
803       end;
804     // Alexey
805     's', 'S',
806     'h', 'H',
807     'c', 'C',
808     'l', 'L',
809     //'g', 'G', // Alexey: removed
810     'k', 'K':
811       Result := C;
812     else
813       Result := #0;
814   end;
815 end;
816 
817 procedure TreBranchNode.Add(Node: TRENodeBase);
818 begin
819   if FList.Count > 0 then
820     TRENodeBase(FList.Last).Next := Node;
821   FList.Add(Node);
822 end;
823 
824 procedure TreBranchNode.Invert;
825 var i, N: integer;
826     NextLast: TRENodeBase;
827 begin
828   N := FList.Count;
829   if N = 0 then Exit;
830   NextLast := TRENodeBase(FList.Last).Next;
831   for i := 0 to (N div 2) - 1 do
832     FList.Exchange(i, N - 1 - i);
833   for i := 0 to N - 2 do
834     TRENodeBase(FList[i]).Next := TRENodeBase(FList[i + 1]);
835   TRENodeBase(FList.Last).Next := NextLast;
836 end;
837 
838 // All characters are ANSI characters.
839 // UCChar is used only for holding char codes less 255
840 // This is made to have unified storage of regular expr. nodes
841 
842 procedure TreBranchNode.Compile(const Expression: UCString; var aPos: integer;
843   Modifiers: Word);
844 var Len: integer;
845     sub: TreSubExpr;
846     C: UCChar;
847 
848     function SafeInc(RaiseEx: Boolean = False): UCChar; // Increment position to the next significant char
849     var
850       C: WideChar;
851       bComment: boolean;
852     begin
853       inc(aPos);
854       // Skip spaces and comments
855       if (Modifiers and MaskModX) <> 0 then
856         while (aPos <= Len) do
857          begin
858            C := Expression[aPos];
859            bComment:= C = '#';
860            if not (IsSpaceChar(C) or bComment) then Break;
861            if bComment then
862            begin
863             Inc(aPos);
864             while (aPos <= Len) and not IsLineBreakChar(Expression[aPos]) do
865               Inc(aPos);
866            end;
867           Inc(aPos);
868          end;
869 
870       if aPos <= Len then Result := Expression[aPos] else
871        begin
872          Result := #0;
873          if RaiseEx then
874            raise Exception.Create(zreUnexpectedEnd);
875        end;
876     end;
877 
878     function ReadNumber: integer;
879     var strt: integer;
880     begin
881       strt := aPos;
882       while (aPos <= Len) and IsDigitChar(Expression[aPos]) do
883        Inc(aPos);
884       if strt = aPos then
885        raise Exception.Create('Number is expected');
886       Result := BufferStrToInt(@Expression[strt], aPos - strt);
887       Dec(aPos);
888     end;
889 
890     // Read repeaters for node
891     procedure ReadRepeaters(Node: TRENodeBase);
892     begin
893       if aPos > Len then Exit;// repeaters are optional
894       case SafeInc of
895         '+': Node.FLoopMax := -1;
896         '?': Node.FLoopMin := 0;
897         '*': begin
898                Node.FLoopMax := -1;
899                Node.FLoopMin := 0;
900              end;
901         '{': begin
902               SafeInc;
903               Node.FLoopMin := ReadNumber;
904               case SafeInc of
905                 ',': if SafeInc <> '}' then
906                        begin
907                         Node.FLoopMax := ReadNumber;
908                         if SafeInc <> '}' then
909                           raise Exception.Create('There is no closing "}"');
910                        end
911                      else
912                        Node.FLoopMax := -1;
913                 '}': Node.FLoopMax := Node.FLoopMin;
914                 else raise Exception.Create('Invalid loop specifier');
915               end;
916               if (Node.FLoopMax >= 0) and (Node.FLoopMax < Node.FLoopMin) then
917                 raise Exception.Create('Loop minimum must be less then loop maximum');
918              end;
919         else begin
920               Dec(aPos);
921               Exit;
922              end
923       end;
924       if SafeInc = '?' then Node.FNonGreedy := True else
925        begin
926         Node.FNonGreedy := (Modifiers and MaskModG) = 0;
927         Dec(aPos);
928        end;
929     end;
930 
931     procedure AddCharSeq(const C: UCChar);
932     var csNode: TCharSeqNode;
933     begin
934       csNode := TCharSeqNode.Create(Owner);
935       csNode.FIgnoreCase := (Modifiers and MaskModI) <> 0;
936       if csNode.FIgnoreCase then csNode.FChar := ecUpCase(C)
937                             else csNode.FChar := C;
938       Add(csNode);
939       ReadRepeaters(csNode);
940     end;
941 
942     procedure AddSpecNode(const C: UCChar; WithRepeat: Boolean = True);
943     var sn: TSpecCheckNode;
944     begin
945       sn := TSpecCheckNode.Create(Owner);
946       sn.FCheckType := C;
947       Add(sn);
948       if WithRepeat then ReadRepeaters(sn);
949     end;
950 
951     function PickSetChar(cs: TCharSetNode): UCChar;
952     begin
953       Result := Expression[aPos];
954       if Result = '\' then
955        begin
956          Inc(aPos);
957          if aPos > Length(Expression) then
958           Exit;
959          Result := GetEscape(Expression, aPos);
960          if Result = #0 then
961           begin
962             Result := GetClassChar(Expression[aPos], Modifiers);
963             if Result = #0 then Result := Expression[aPos]
964               else
965                 begin
966                   cs.AddSet(Result);
967                   Result := #0;
968                 end;
969           end;
970        end;
971     end;
972 
973     procedure ReadCharSet;
974     var cs: TCharSetNode;
975         Cstrt, Cend: UCChar;
976     begin
977       cs := TCharSetNode.Create(Owner);
978       cs.FIgnoreCase := (Modifiers and MaskModI) <> 0;
979       Add(cs);
980       Cstrt := #0;
981       if SafeInc(True) = '^' then cs.FInvert := True
982         else Cstrt := PickSetChar(cs);
983       while SafeInc(True) <> ']' do
984        begin
985          if (Expression[aPos] = '-') and (Cstrt <> #0) then // Add Range
986           if SafeInc = ']' then
987            begin
988             cs.AddChar(Cstrt);
989             Cstrt := '-';
990             Break;
991            end else
992            begin
993             Cend := PickSetChar(cs);
994             if Cend = #0 then
995              begin
996               cs.AddChar(Cstrt);
997               Cstrt := '-';
998              end else
999              if Ord(Cend) < Ord(Cstrt) then
1000               raise Exception.Create('Invalid set range') else
1001               begin
1002                // Extended russian support
1003                cs.AddRange(Cstrt, Cend);
1004                Cstrt := #0;
1005                if SafeInc(True) = ']' then Break;
1006               end;
1007            end;
1008          if Cstrt <> #0 then cs.AddChar(Cstrt);
1009          Cstrt := PickSetChar(cs);
1010        end;
1011       if Cstrt <> #0 then cs.AddChar(Cstrt);
1012       ReadRepeaters(cs);
1013     end;
1014 
1015     procedure AddRefNode(RefIdx: integer);
1016     var rn: TRefNode;
1017     begin
1018       if TreRootNode(Root).FSubExpr.Count <= RefIdx then
1019        raise Exception.Create('Invalid reference');
1020       rn := TRefNode.Create(Owner);
1021       rn.FRef := RefIdx;
1022       rn.FIgnoreCase := (Modifiers and MaskModI) <> 0;
1023       Add(rn);
1024       ReadRepeaters(rn);
1025     end;
1026 
1027     procedure AddZeroWidth(IsBack: Boolean);
1028     var Negative: Boolean;
1029         Branch: TreBranchNode;
1030         Node: TZeroWidth;
1031     begin
1032       case Expression[aPos] of
1033         '!': Negative := True;
1034         '=': Negative := False;
1035         else
1036           raise Exception.Create(zreInvalidZeroWidth);
1037       end;
1038       SafeInc(True);
1039       Branch := TreBranchNode.Create(nil);
1040       try
1041         Branch.Compile(Expression, aPos, Modifiers);
1042         Node := TZeroWidth.Create(Owner);
1043         Node.FIsBack := IsBack;
1044         Node.FNegative := Negative;
1045         Node.FBranch := Branch;
1046         Add(Node);
1047         if IsBack then
1048           Branch.Invert;
1049       except
1050         Branch.Free;
1051       end;
1052     end;
1053 
1054 var tp: integer;
1055 begin
1056   Clear;
1057   Len := Length(Expression);
1058   Dec(aPos);
1059   while aPos <= Len do
1060    begin
1061      case SafeInc of
1062        ')', '|', #0: Exit; // end of branch
1063        '(': begin
1064              if SafeInc = '?' then
1065               begin // Change modifiers
1066                 case SafeInc(True) of
1067                   '<': begin
1068                          SafeInc(True);
1069                          AddZeroWidth(True);
1070                        end;
1071                   '=', '!': AddZeroWidth(False);
1072                   else
1073                     begin
1074                       tp := aPos;
1075                       repeat // skip comment
1076                         Inc(aPos);
1077                       until (aPos > Len) or (Expression[aPos] = ')');
1078                       if Expression[tp] <> '#' then
1079                         Root.Owner.ParseModifiers(@Expression[tp], aPos - tp, Modifiers)
1080                     end;
1081                 end;
1082               end else
1083               begin // sub expression
1084                 sub := TreSubExpr.Create(Owner);
1085                 Add(sub);
1086                 sub.Compile(Expression, aPos, Modifiers);
1087                 if (aPos > Len) or (Expression[aPos] <> ')') then
1088                  raise Exception.Create('Do not closed sub expression');
1089                 ReadRepeaters(sub);
1090               end;
1091             end;
1092        '[': begin    // char set node
1093               ReadCharSet;
1094             end;
1095        '^': if (Modifiers and MaskModM) = 0 then AddSpecNode('A', False)  // begin of text
1096                                             else AddSpecNode('^', False); // begin of line
1097        '$': if (Modifiers and MaskModM) = 0 then AddSpecNode('Z', False)  // end of text
1098                                             else AddSpecNode('$', False); // end of line
1099        '.': if (Modifiers and MaskModS) <> 0  then AddSpecNode('.')  // all
1100                                              else AddSpecNode(':'); // all without line separators
1101        '\': begin
1102               Inc(aPos);
1103               if aPos > Len then C := '\'
1104                else C := GetEscape(Expression, aPos);
1105               if C <> #0 then AddCharSeq(C) else
1106                begin
1107                  C := GetClassChar(Expression[aPos], Modifiers);
1108                  if C <> #0 then AddSpecNode(C) else
1109                  case Expression[aPos] of
1110                    'A', 'Z', 'b', 'B', 'z': AddSpecNode(Expression[aPos]);
1111                    '1'..'9': AddRefNode(Ord(Expression[aPos])-Ord('0'));
1112                    else      AddCharSeq(Expression[aPos]);
1113                  end;
1114                end;
1115             end;
1116        else AddCharSeq(Expression[aPos]); // Simple char
1117      end;
1118    end;
1119 end;
1120 
1121 // Main mtaching routine (recursive)
1122 // Returns:
1123 //  0 - does not match
1124 //  1 - match the Node
1125 //  2 - match The Node and all next nodes
1126 function MatchNode(Node: TRENodeBase; const InputString: UCString;
1127    var aPos: integer): integer; overload;
1128 
1129 var save, k, sv, LastSucc, total, Success: integer;
1130     IsBrEnd: Boolean;
1131 
1132 begin
1133   if Node = nil then
1134     begin
1135       Result := 2;
1136       Exit;
1137     end;
1138   Result := 0;
1139 
1140   // required minimum repeat
1141   save := aPos;
1142   Success := 0;
1143   for k := 1 to node.FLoopMin do
1144     begin
1145       Success := Node.Match(InputString, aPos);
1146       if Success = 0 then
1147         begin
1148           aPos := save;
1149           {$IFDEF RE_DEBUG}
1150           if Assigned(REDebugOnMatchProc) then
1151             REDebugOnMatchProc(Format('Node: %d; Position: %d; Result: %d',[Node.FNodeID, aPos, 0]));
1152           {$ENDIF}
1153           Exit;
1154         end;
1155     end;
1156 
1157   k := node.FLoopMin - 1;
1158 //  k := 0;
1159   LastSucc := 0;
1160   total := 0;
1161 
1162   IsBrEnd := Assigned(Node.Owner) and
1163              ((Node.Next = nil) or (Node.Next.Owner <> Node.Owner));
1164   repeat
1165     Inc(k);
1166     sv := aPos;
1167     if IsBrEnd then Node.Owner.FEnd := aPos;
1168     if Node is TreSubExpr then
1169       begin
1170         if Success <> 2 then
1171           Success := MatchNode(GetAbsoluteNext(Node), InputString, aPos);
1172       end else
1173     if Node.Next <> nil then
1174       Success := MatchNode(Node.Next, InputString, aPos) else
1175     if IsBrEnd then
1176       Success := MatchNode(GetAbsoluteNext(Node.Owner), InputString, aPos) else
1177       Success := 2;
1178 
1179     if Success = 2 then // success all next nodes
1180      begin
1181        {$IFDEF RE_DEBUG}
1182        if Assigned(REDebugOnMatchProc) then
1183           REDebugOnMatchProc(Format('Success at %d;  Node: %d; NextRes: %s',[sv, Node.FNodeID, IntToStr(Success)]));
1184        {$ENDIF}
1185        total := aPos;
1186        LastSucc := sv;
1187        aPos := LastSucc;
1188        Result := Success;
1189        if node.FNonGreedy then
1190          Break; // for non GREEDY mode
1191      end else
1192     if IsBrEnd then
1193       begin
1194        total := sv;
1195        Result := 1;
1196       end else
1197     if Success = 1 then
1198       begin
1199         aPos := sv;
1200         total := sv;
1201         Result := 1;
1202         if node.FNonGreedy then
1203           Break; // for non GREEDY mode
1204       end;
1205 
1206     if (node.FLoopMax > 0) and (k >= node.FLoopMax) then // check max limit
1207       begin
1208         if not IsBrEnd then  //v2.36
1209           Result := Success; //v2.33
1210         Break;
1211       end;
1212 
1213     sv := aPos;
1214     Success := Node.Match(InputString, aPos);
1215   until (Success = 0) or (aPos = sv);
1216 
1217 //  if (node.FLoopMin > 0) and (k < node.FLoopMin) then
1218 //    Result := 0;
1219 {$IFDEF RE_DEBUG}
1220   if Assigned(REDebugOnMatchProc) then
1221     REDebugOnMatchProc(Format('Node: %d; Position: %d; Result: %d',[Node.FNodeID, aPos, integer(Result)]));
1222 {$ENDIF}
1223   if Result > 0 then
1224    begin
1225     aPos := total;
1226     // Save sub-expression result
1227     if IsBrEnd and (LastSucc > 0) then
1228       Node.Owner.FEnd := LastSucc;
1229    end else
1230    begin
1231     aPos := save;
1232     if IsBrEnd then Node.Owner.FEnd := 0;
1233    end;
1234 end;
1235 
1236 // Main mtaching routine (recursive)
1237 function BackMatchNode(Node: TRENodeBase; const InputString: UCString;
1238   var aPos: integer): integer; overload;
1239 
1240 var save, k, sv, LastSucc, total, Success: integer;
1241     IsBrEnd: Boolean;
1242 
1243 begin
1244   if Node = nil then
1245     begin
1246       Result := 2;
1247       Exit;
1248     end;
1249   Result := 0;
1250 
1251   // required minimum repeat
1252   save := aPos;
1253   Success := 0;
1254   for k := 1 to node.FLoopMin do
1255     begin
1256       Success := Node.BackMatch(InputString, aPos);
1257       if Success = 0 then
1258         begin
1259           aPos := save;
1260           {$IFDEF RE_DEBUG}
1261           if Assigned(REDebugOnMatchProc) then
1262             REDebugOnMatchProc(Format('Node: %d; Position: %d; Result: %d',[Node.FNodeID, aPos, 0]));
1263           {$ENDIF}
1264           Exit;
1265         end;
1266     end;
1267 
1268   k := node.FLoopMin - 1;
1269   LastSucc := 0;
1270   total := 0;
1271 
1272   IsBrEnd := Assigned(Node.Owner) and
1273              ((Node.Next = nil) or (Node.Next.Owner <> Node.Owner));
1274   repeat
1275     Inc(k);
1276     sv := aPos;
1277     if IsBrEnd then Node.Owner.FStart := aPos;
1278     if Node is TreSubExpr then
1279       begin
1280         if Success <> 2 then
1281           Success := BackMatchNode(GetAbsoluteNext(Node), InputString, aPos);
1282       end else
1283     if Node.Next <> nil then
1284       Success := BackMatchNode(Node.Next, InputString, aPos) else
1285     if IsBrEnd then
1286       Success := BackMatchNode(GetAbsoluteNext(Node.Owner), InputString, aPos) else
1287       Success := 2;
1288 
1289     if Success = 2 then
1290      begin
1291        {$IFDEF RE_DEBUG}
1292        if Assigned(REDebugOnMatchProc) then
1293           REDebugOnMatchProc(Format('Success at %d;  Node: %d; NextRes: %s',[sv, Node.FNodeID, IntToStr(Success)]));
1294        {$ENDIF}
1295        total := aPos;
1296        LastSucc := sv;
1297        aPos := LastSucc;
1298        Result := Success;
1299        if node.FNonGreedy then Break; // for non GREEDY mode
1300      end else
1301     if IsBrEnd then
1302       begin
1303        total := sv;
1304        Result := 1;
1305       end else
1306     if Success = 1 then
1307       begin
1308         aPos := sv;
1309         total := sv;
1310         Result := 1;
1311       end;
1312 
1313     if (node.FLoopMax > 0) and (k >= node.FLoopMax) then // check max limit
1314       begin
1315         if not IsBrEnd then  //v2.36
1316           Result := Success; //v2.33
1317         Break;
1318       end;
1319 
1320     sv := aPos;
1321     Success := Node.BackMatch(InputString, aPos);
1322   until (Success = 0) or (aPos = sv);
1323 
1324 {$IFDEF RE_DEBUG}
1325   if Assigned(REDebugOnMatchProc) then
1326     REDebugOnMatchProc(Format('Node: %d; Position: %d; Result: %d',[Node.FNodeID, aPos, integer(Result)]));
1327 {$ENDIF}
1328   if Result > 0 then
1329    begin
1330     aPos := total;
1331     // Save sub-expression result
1332     if IsBrEnd and (LastSucc > 0) then
1333       Node.Owner.FStart := LastSucc;
1334    end else
1335    begin
1336     aPos := save;
1337     if IsBrEnd then Node.Owner.FStart := 0;
1338    end;
1339 end;
1340 
Matchnull1341 function TreBranchNode.Match(const InputString: UCString; var aPos: integer): integer;
1342 begin
1343 {$IFDEF RE_DEBUG}
1344   if Assigned(REDebugOnMatchProc) then
1345     REDebugOnMatchProc(Format('Branch<: %d; Position: %d',[FNodeID, aPos]));
1346 {$ENDIF}
1347   if FList.Count = 0 then Result := 1
1348     else Result := MatchNode(TRENodeBase(FList.First), InputString, aPos);
1349 {$IFDEF RE_DEBUG}
1350   if Assigned(REDebugOnMatchProc) then
1351     REDebugOnMatchProc(Format('Branch>: %d; Position: %d; Result: %d',[FNodeID, aPos, integer(Result)]));
1352 {$ENDIF}
1353 end;
1354 
BackMatchnull1355 function TreBranchNode.BackMatch(const InputString: UCString; var aPos: integer): integer;
1356 begin
1357 {$IFDEF RE_DEBUG}
1358   if Assigned(REDebugOnMatchProc) then
1359     REDebugOnMatchProc(Format('Branch<: %d; Position: %d',[FNodeID, aPos]));
1360 {$ENDIF}
1361   if FList.Count = 0 then Result := 1
1362     else Result := BackMatchNode(TRENodeBase(FList.First), InputString, aPos);
1363 {$IFDEF RE_DEBUG}
1364   if Assigned(REDebugOnMatchProc) then
1365     REDebugOnMatchProc(Format('Branch>: %d; Position: %d; Result: %d',[FNodeID, aPos, integer(Result)]));
1366 {$ENDIF}
1367 end;
1368 
1369 { TreSubExpr }
1370 
1371 procedure TreSubExpr.Compile(const Expression: UCString; var aPos: integer;
1372   Modifiers: Word);
1373 var Br: TreBranchNode;
1374 begin
1375   Dec(aPos);
1376   repeat
1377     Inc(aPos);
1378     Br := TreBranchNode.Create(Self);
1379     FList.Add(Br);
1380     Br.Compile(Expression, aPos, Modifiers);
1381     if Br.FList.Count = 0 then FList.Remove(Br);
1382   until (aPos > Length(Expression)) or (Expression[aPos] = ')');
1383 end;
1384 
1385 constructor TreSubExpr.Create(AOwner: TreSubExpr);
1386 begin
1387   inherited;
1388   if Owner <> nil then
1389     TreRootNode(Root).FSubExpr.Add(Self);
1390 end;
1391 
Matchnull1392 function TreSubExpr.Match(const InputString: UCString; var aPos: integer): integer;
1393 var i: integer;
1394     OldEnd, OldStart, CurRes, svPos: integer;
1395 begin
1396   OldEnd := FEnd;
1397   OldStart := FStart;
1398   FStart := aPos;
1399   FEnd := 0;
1400   Result := 0;
1401   svPos := aPos;
1402   for i := 0 to FList.Count - 1 do
1403     begin
1404       CurRes := TreBranchNode(FList[i]).Match(InputString, aPos);
1405       if FNonGreedy then
1406         begin
1407           if CurRes > 0 then
1408             begin
1409               OldEnd := FEnd;
1410               OldStart := FStart;
1411               Result := CurRes;
1412               aPos := svPos;
1413               Break;
1414             end;
1415         end else
1416           if CurRes > Result then
1417             begin
1418               OldEnd := FEnd;
1419               OldStart := FStart;
1420               Result := CurRes;
1421               aPos := svPos;
1422               if Result = 2 then
1423                 Break;
1424             end;
1425     end;
1426   FEnd := OldEnd;
1427   FStart := OldStart;
1428   if (FEnd > 0) and (Result > 0) then
1429     aPos := FEnd;
1430 end;
1431 
BackMatchnull1432 function TreSubExpr.BackMatch(const InputString: UCString; var aPos: integer): integer;
1433 var i: integer;
1434     OldEnd, OldStart, CurRes, svPos: integer;
1435 begin
1436   OldEnd := FEnd;
1437   OldStart := FStart;
1438   FStart := 0;
1439   FEnd := aPos;
1440   Result := 0;
1441   svPos := aPos;
1442   for i := 0 to FList.Count - 1 do
1443     begin
1444       CurRes := TreBranchNode(FList[i]).BackMatch(InputString, aPos);
1445       if FNonGreedy then
1446         begin
1447           if CurRes > 0 then
1448             begin
1449               OldEnd := FEnd;
1450               OldStart := FStart;
1451               Result := CurRes;
1452               aPos := svPos;
1453               Break;
1454             end;
1455         end else
1456           if CurRes > Result then
1457             begin
1458               OldEnd := FEnd;
1459               OldStart := FStart;
1460               Result := CurRes;
1461               aPos := svPos;
1462               if Result = 2 then
1463                 Break;
1464             end;
1465     end;
1466   FEnd := OldEnd;
1467   FStart := OldStart;
1468   if (FStart > 0) and (Result > 0) then
1469     aPos := FStart;
1470 end;
1471 
1472 { TreRootNode }
1473 
1474 procedure TreRootNode.Clear;
1475 begin
1476   inherited;
1477   FSubExpr.Clear;
1478   FSubExpr.Add(Self);
1479 end;
1480 
1481 constructor TreRootNode.Create(AOwner: TecRegExpr);
1482 begin
1483   inherited Create(nil);
1484   FOwner := AOwner;
1485   FSubExpr := TFPList.Create;
1486   FSubExpr.Add(Self);
1487 end;
1488 
1489 destructor TreRootNode.Destroy;
1490 begin
1491   FSubExpr.Free;
1492   inherited;
1493 end;
1494 
MatchStrnull1495 function TreRootNode.MatchStr(const InputString: UCString;
1496   var aPos: integer; Back: Boolean): Boolean;
1497 var i: integer;
1498 begin
1499   for i := 0 to FSubExpr.Count - 1 do
1500    begin
1501      TreSubExpr(FSubExpr[i]).FStart := -1;
1502      TreSubExpr(FSubExpr[i]).FEnd := -1;
1503    end;
1504   if Back then
1505     Result := BackMatchNode(Self, InputString, aPos) <> 0
1506   else
1507     Result := MatchNode(Self, InputString, aPos) <> 0;
1508 end;
1509 
1510 // =============================================================================
1511 //   Application Level
1512 // =============================================================================
1513 
1514 { TecRegExpr }
1515 
1516 constructor TecRegExpr.Create;
1517 begin
1518   inherited;
1519   FModifiers := DefaultModifiers;
1520 end;
1521 
1522 destructor TecRegExpr.Destroy;
1523 begin
1524   FreeAndNil(FProgRoot);
1525   inherited;
1526 end;
1527 
1528 procedure TecRegExpr.ClearRoot;
1529 begin
1530   FreeAndNil(FProgRoot);
1531   FMatchOK := False;
1532 end;
1533 
IsEmptynull1534 function TecRegExpr.IsEmpty: Boolean;
1535 begin
1536   Result := not Assigned(FProgRoot) or (TreRootNode(FProgRoot).FList.Count = 0);
1537 end;
1538 
1539 procedure TecRegExpr.Compile(const AExpression: UCString);
1540 var Pos: integer;
1541 begin
1542   {$IFDEF RE_DEBUG} LastNodeID := 0; {$ENDIF}
1543   FMatchOK := False;
1544   if not Assigned(FProgRoot) then
1545     FProgRoot := TreRootNode.Create(Self)
1546   else
1547     TreRootNode(FProgRoot).Clear;
1548 
1549   Pos := 1;
1550   try
1551     if AExpression <> '' then
1552       TreRootNode(FProgRoot).Compile(AExpression, Pos, FModifiers);
1553   except
1554     ClearRoot;
1555     raise;
1556   end;
1557   //FUnicodeCompiled := True;
1558 end;
1559 
Compilenull1560 function TecRegExpr.Compile: Boolean;
1561 begin
1562   try
1563     if IsEmpty then
1564       Compile(FExpression);
1565   except
1566   end;
1567   Result := not IsEmpty;
1568 end;
1569 
GetIsInvalidnull1570 function TecRegExpr.GetIsInvalid: Boolean;
1571 begin
1572   Result := not Compile;
1573 end;
1574 
GetModifiernull1575 function TecRegExpr.GetModifier(const Index: Integer): boolean;
1576 begin
1577   Result := (FModifiers and Index) <> 0;
1578 end;
1579 
GetModifierStrnull1580 function TecRegExpr.GetModifierStr: ecString;
1581 const ModLet: ecString = 'irsgmx';
1582 var s1, s2: string;
1583     i: integer;
1584 begin
1585   s1 := ''; s2 := '';
1586   for i := 0 to 5 do
1587    if (FModifiers and (1 shl i)) <> 0 then
1588     s1 := s1 + ModLet[i + 1]
1589    else
1590     s2 := s2 + ModLet[i + 1];
1591 
1592   Result := '(?' + s1;
1593   if s2 <> '' then
1594    Result := Result + '-' + s2;
1595   Result := Result + ')';
1596 end;
1597 
Matchnull1598 function TecRegExpr.Match(const InputString: UCString; var aPos: integer; Back: Boolean): Boolean;
1599 begin
1600   Result := Compile; // ensure compiling and validity
1601   if Result then
1602     begin
1603       if aPos < 1 then
1604         aPos := 1;
1605       Result := TreRootNode(FProgRoot).MatchStr(InputString, aPos, Back);
1606       FMatchOK := Result;
1607     end;
1608 end;
1609 
MatchLengthnull1610 function TecRegExpr.MatchLength(const InputString: UCString;
1611   aPos: integer; Back: Boolean): integer;
1612 begin
1613   Result := aPos;
1614   if Match(InputString, aPos, Back) then
1615     begin
1616      if Back then
1617        Result := Result - aPos
1618      else
1619        Result := aPos - Result;
1620     end
1621   else
1622     Result := 0;
1623 end;
1624 
1625 
1626 //Delete regex comments "#...." in multi-line text
1627 function _MultilineToString(const Value: ecString): ecString;
1628   //
1629   function IsEol(C: WideChar): boolean; inline;
1630   begin
1631     Result:= (C=#10) or (C=#13);
1632   end;
1633   //
1634 var
1635   NBegin, NEnd, i: integer;
1636 begin
1637   Result:= Value;
1638 
1639   repeat
1640     NBegin:= 0;
1641     for i:= 1 to Length(Result) do
1642       if (Result[i]='#') and ((i=1) or (Result[i-1]<>'\')) then
1643       begin
1644         NBegin:= i;
1645         Break;
1646       end;
1647     if NBegin=0 then Break;
1648 
1649     NEnd:= Length(Result);
1650     for i:= NBegin+1 to Length(Result) do
1651       if IsEol(Result[i]) then
1652       begin
1653         NEnd:= i-1;
1654         Break;
1655       end;
1656 
1657     Delete(Result, NBegin, NEnd-NBegin+1);
1658   until false;
1659 
1660   for i:= 1 to Length(Result) do
1661      if IsEol(Result[i]) then
1662        Result[i]:= ' ';
1663 end;
1664 
1665 procedure TecRegExpr.SetExpression(const Value: ecString);
1666 begin
1667   FExpression:= _MultilineToString(Value); // Alexey: fix to handle #-comments in regex
1668   ClearRoot;
1669 end;
1670 
1671 procedure TecRegExpr.SetModifier(const Index: Integer;
1672   const Value: boolean);
1673 begin
1674   if Value then FModifiers := FModifiers or Index
1675    else FModifiers := FModifiers and not Index;
1676   ClearRoot;
1677 end;
1678 
1679 procedure TecRegExpr.SetModifiers(const Value: Word);
1680 begin
1681   FModifiers := Value;
1682   ClearRoot;
1683 end;
1684 
1685 procedure TecRegExpr.SetModifierStr(const Value: ecString);
1686 begin
1687   if (Length(Value) >= 3) and (Value[1] = '(') and (Value[2] = '?') then
1688     ParseModifiers(@Value[3], Length(Value) - 3, FModifiers);
1689 end;
1690 
GetMatchLennull1691 function TecRegExpr.GetMatchLen(Idx: integer): integer;
1692 begin
1693   Result := -1;
1694   if FMatchOK then
1695     with TreRootNode(FProgRoot) do
1696       if (idx < FSubExpr.Count) then
1697         with TreSubExpr(FSubExpr[Idx]) do
1698           if (FStart <> -1) and (FEnd <> -1) then
1699             Result := FEnd - FStart;
1700 end;
1701 
GetMatchPosnull1702 function TecRegExpr.GetMatchPos(Idx: integer): integer;
1703 begin
1704   Result := -1;
1705   if FMatchOK then
1706     with TreRootNode(FProgRoot) do
1707       if (idx < FSubExpr.Count) then
1708         with TreSubExpr(FSubExpr[Idx]) do
1709           Result := FStart;
1710 end;
1711 
GetSubExprMatchCountnull1712 function TecRegExpr.GetSubExprMatchCount: integer;
1713 var i: integer;
1714 begin
1715   Result := -1;
1716   if FMatchOK then
1717     with TreRootNode(FProgRoot) do
1718       for i := 0 to FSubExpr.Count - 1 do
1719         with TreSubExpr(FSubExpr[i]) do
1720           if (FStart <> -1) and (FEnd <> -1) then
1721             Inc(Result);
1722 end;
1723 
GetMatchnull1724 function TecRegExpr.GetMatch(const InputString: UCString;
1725   SubIdx: integer): UCString;
1726 begin
1727   Result := '';
1728   if FMatchOK then
1729     with TreRootNode(FProgRoot) do
1730       if (SubIdx < FSubExpr.Count) then
1731         with TreSubExpr(FSubExpr[SubIdx]) do
1732           if (FStart <> -1) and (FEnd <> -1) then
1733             Result := Copy(InputString, FStart, FEnd - FStart);
1734 end;
1735 
Substitutenull1736 function TecRegExpr.Substitute(const InputString, ATemplate: ecString): ecString;
1737 var i: integer;
1738     C: ecChar;
1739 begin
1740 //  if not FMatchOK then
1741 //   raise Exception.Create('No matched string');
1742   Result := '';
1743   i := 1;
1744   while i <= Length(ATemplate) do
1745    begin
1746      if (ATemplate[i] = '\') and (i < Length(ATemplate)) then
1747       begin
1748        inc(i);
1749        if IsDigitChar(ATemplate[i]) then
1750          Result := Result + GetMatch(InputString, StrToInt(ATemplate[i]))
1751        else
1752         begin
1753          C := ecChar(Ord(GetEscape(ATemplate, i)));
1754          if C = #0 then C := ATemplate[i];
1755          Result := Result + C;
1756         end;
1757       end else Result := Result + ATemplate[i];
1758      inc(i);
1759    end;
1760 end;
1761 
1762 procedure TecRegExpr.Assign(Source: TecRegExpr);
1763 begin
1764   Self.Expression := Source.Expression;
1765   Self.ModifierMask := Source.ModifierMask;
1766 end;
1767 
1768 procedure TecRegExpr.ParseModifiers(const S: PWideChar; Len: integer; var Modifiers: Word); // Alexey
1769 var IsOn : boolean;
1770     i: integer;
1771   procedure SetModif(m: integer); inline;
1772   begin
1773     if (m and FModifiersStatic) = 0 then
1774       if IsOn then Modifiers := Modifiers or m
1775        else Modifiers := Modifiers and not m;
1776   end;
1777 begin
1778   IsOn := true;
1779   for i := 0 to Len-1 do
1780     case S[i] of
1781       '-': IsOn := false;
1782       'i','I': SetModif(MaskModI);
1783       'r','R': SetModif(MaskModR);
1784       's','S': SetModif(MaskModS);
1785       'g','G': SetModif(MaskModG);
1786       'm','M': SetModif(MaskModM);
1787       'x','X': SetModif(MaskModX);
1788     else
1789       raise Exception.Create(zreUnexpectedModifier);
1790     end;
1791 end;
1792 
1793 // =============================================================================
1794 //  DEBUGGER
1795 // =============================================================================
1796 {$IFDEF RE_DEBUG}
1797 // Fill tree with compiled nodes {debug purpose}
1798 procedure REDebugCompiledBuildTree(RE: TecRegExpr; TV: TTreeView);
1799   function GetNodeCaption(Node: TRENodeBase): string;
1800   begin
1801    if Node.ClassType = TCharSeqNode then
1802     Result := ecChar(TCharSeqNode(Node).FChar) else
1803    if Node.ClassType = TCharSetNode then
1804     begin
1805       Result := '[ ... ]';
1806     end else
1807    if Node.ClassType = TSpecCheckNode then
1808      Result := '! '+ TSpecCheckNode(Node).FCheckType + ' !' else
1809    if Node.ClassType = TZeroWidth then
1810      Result := '0'
1811    else
1812      Result := '<' + Node.ClassName + '>';
1813    Result := IntToStr(Node.FNodeId) + '  ' + Result + Format(' {%d, %d}', [Node.FLoopMin, Node.FLoopMax]);
1814   end;
1815 
1816   procedure AddNode(Node: TRENodeBase; Prn: TTreeNode);
1817   var tn: TTreeNode;
1818       i: integer;
1819   begin
1820    tn := TV.Items.AddChild(Prn, GetNodeCaption(Node));
1821    if Node is TreListNodeBase then
1822     for i := 0 to TreListNodeBase(Node).FList.Count - 1 do
1823      AddNode(TRENodeBase(TreListNodeBase(Node).FList[i]), tn);
1824    if Node is TZeroWidth then
1825      AddNode(TZeroWidth(Node).FBranch, tn);
1826   end;
1827 begin
1828   TV.Items.Clear;
1829   AddNode(TreRootNode(RE.FProgRoot), nil);
1830 end;
1831 {$ENDIF}
1832 
1833 end.
1834 
1835