1 { *************************************************************************** }
2 { }
3 { EControl Syntax Editor SDK }
4 { }
5 { Copyright (c) 2004 - 2015 EControl Ltd., Zaharov Michael }
6 { www.econtrol.ru }
7 { support@econtrol.ru }
8 { }
9 { Changes in Lazarus port: by Alexey Torgashin (CudaText) }
10 { }
11 { *************************************************************************** }
12
13 {$mode delphi}
14
15 unit ec_RegExpr;
16
17 interface
18
19 uses
20 Classes,
21 {$IFDEF RE_DEBUG}ComCtrls,{$ENDIF}
22 ec_StrUtils;
23
24 const
25 MaskModI = 1; // modifier /i bit in fModifiers
26 MaskModR = 2; // -"- /r
27 MaskModS = 4; // -"- /s
28 MaskModG = 8; // -"- /g
29 MaskModM = 16; // -"- /m
30 MaskModX = 32; // -"- /x
31
32 type
33 TecRegExpr = class //(TPersistent)
34 private
35 FProgRoot: TObject;
36 FModifiers: Word;
37 FMatchOK: Boolean;
38 FExpression: ecString;
39 FModifiersStatic: Word;
40 procedure SetModifiers(const Value: Word);
GetModifiernull41 function GetModifier(const Index: Integer): boolean;
GetModifierStrnull42 function GetModifierStr: ecString;
43 procedure SetModifier(const Index: Integer; const Value: boolean);
44 procedure SetModifierStr(const Value: ecString);
GetIsInvalidnull45 function GetIsInvalid: Boolean;
GetMatchLennull46 function GetMatchLen(Idx: integer): integer;
GetMatchPosnull47 function GetMatchPos(Idx: integer): integer;
GetSubExprMatchCountnull48 function GetSubExprMatchCount: integer;
49 procedure SetExpression(const Value: ecString);
50 procedure ClearRoot;
IsEmptynull51 function IsEmpty: Boolean;
52 procedure ParseModifiers(const S: PWideChar; Len: integer; var Modifiers: Word); // Alexey
53 public
54 constructor Create;
55 destructor Destroy; override;
56 procedure Assign(Source: TecRegExpr);
57
Compilenull58 function Compile: Boolean; overload;
59 procedure Compile(const AExpression: UCString); overload;
Matchnull60 function Match(const InputString: UCString; var aPos: integer; Back: Boolean = False): Boolean; overload;
MatchLengthnull61 //function MatchLength(const InputString: AnsiString; aPos: integer; Back: Boolean = False): integer; overload;
62 function MatchLength(const InputString: UCString; aPos: integer; Back: Boolean = False): integer; overload;
GetMatchnull63 //function GetMatch(const InputString: AnsiString; SubIdx: integer): AnsiString; overload;
64 function GetMatch(const InputString: UCString; SubIdx: integer): UCString; overload;
Substitutenull65 function Substitute (const InputString, ATemplate : ecString) : ecString;
66
67 property ModifierMask: Word read FModifiers write SetModifiers;
68 property ModifierI: boolean index MaskModI read GetModifier write SetModifier;
69 property ModifierR: boolean index MaskModR read GetModifier write SetModifier;
70 property ModifierS: boolean index MaskModS read GetModifier write SetModifier;
71 property ModifierG: boolean index MaskModG read GetModifier write SetModifier;
72 property ModifierM: boolean index MaskModM read GetModifier write SetModifier;
73 property ModifierX: boolean index MaskModX read GetModifier write SetModifier;
74 property IsInvalid: Boolean read GetIsInvalid;
75
76 property MatchPos[Idx: integer]: integer read GetMatchPos;
77 property MatchLen[Idx: integer]: integer read GetMatchLen;
78 property SubExprMatchCount: integer read GetSubExprMatchCount;
79 published
80 property Expression: ecString read FExpression write SetExpression;
81 property ModifierStr: ecString read GetModifierStr write SetModifierStr stored False;
82 property ModifierFlags: Word read FModifiers write FModifiers;
83 property ModifiersStatic: Word read FModifiersStatic write FModifiersStatic;
84 end;
85
86 type
lassCodenull87 TGetCustomCharClass = function(ClassCode: UCChar): Boolean;
lassCodenull88 TCheckCustomCharClass = function(ClassCode: UCChar; CharCode: Word; var IsInClass: Boolean): Boolean;
89
90 var
91 DefaultModifiers: integer = MaskModI or MaskModR or MaskModG or MaskModM or MaskModX;
92 //GetCustomCharClassProc: TGetCustomCharClass = nil;
93 //CheckCustomCharClassProc: TCheckCustomCharClass = nil;
94
95 const
96 // Error messages
97 zreUnexpectedEnd = 'Unexpected end of expression';
98 zreUnexpectedModifier = 'Unexpected modifier';
99 zreUnexpectedBracket = 'Unexpected bracket';
100 zreInvalidZeroWidth = 'Invalid zero-width expression';
101
102 {$IFDEF RE_DEBUG}
103 type TREDebugOnMatchProc = procedure(const TraceStr: string) of object;
104 var REDebugOnMatchProc: TREDebugOnMatchProc = nil;
105 procedure REDebugCompiledBuildTree(RE: TecRegExpr; TV: TTreeView);
106 {$ENDIF}
107
108
109 implementation
110
111 uses SysUtils, Contnrs, Math;
112
BufferHexToIntnull113 function BufferHexToInt(p: PWideChar; Len: integer): integer; //Alexey
114 var
115 N, i: integer;
116 ch: WideChar;
117 begin
118 Result:= 0;
119 for i:= 1 to Len do
120 begin
121 ch:= p^;
122 case ch of
123 '0'..'9':
124 N:= Ord(ch)-Ord('0');
125 'a'..'f':
126 N:= Ord(ch)-(Ord('a')-10);
127 'A'..'F':
128 N:= Ord(ch)-(Ord('A')-10);
129 else
130 exit(-1);
131 end;
132 Inc(p);
133 Result:= Result*16+N;
134 end;
135 end;
136
BufferStrToIntnull137 function BufferStrToInt(p: PWideChar; Len: integer): integer; //Alexey
138 var
139 N, i: integer;
140 ch: WideChar;
141 begin
142 Result:= 0;
143 for i:= 1 to Len do
144 begin
145 ch:= p^;
146 if IsDigitChar(ch) then
147 N:= Ord(ch)-Ord('0')
148 else
149 exit(-1);
150 Inc(p);
151 Result:= Result*10+N;
152 end;
153 end;
154
155
156 {$IFDEF RE_DEBUG}
157 var
158 LastNodeID: integer;
159 {$ENDIF}
160
161 type
162 TreSubExpr = class;
163 TreRootNode = class;
164 // Base node class
165 TRENodeBase = class
166 private
167 {$IFDEF RE_DEBUG}
168 FNodeId: integer;
169 {$ENDIF}
170 FLoopMin: integer; // repeat at least
171 FLoopMax: integer; // not more than
172 FNonGreedy: Boolean; // is greedy
173 FNext: TRENodeBase;
174 FOwner: TreSubExpr;
GetRootnull175 function GetRoot: TreRootNode;
176 protected
177 public
178 constructor Create(AOwner: TreSubExpr); virtual;
179
Matchnull180 function Match(const InputString: UCString; var aPos: integer): integer; overload; virtual; abstract;
BackMatchnull181 function BackMatch(const InputString: UCString; var aPos: integer): integer; overload; virtual; abstract;
182
183 property Next: TRENodeBase read FNext write FNext;
184 property Owner: TreSubExpr read FOwner;
185 property Root: TreRootNode read GetRoot;
186 end;
187
188 // Char set & String
189 TCharSetNode = class(TRENodeBase)
190 private
191 FIgnoreCase: Boolean;
192 FInvert: Boolean;
193 FCharRanges: UCString;
194 FCharSets: UCString;
195 FCharArray: UCString;
196 {
197 procedure AddChar(C: AnsiChar); overload;
198 procedure AddRange(st, en: AnsiChar); overload;
199 function HasChar(C: AnsiChar): Boolean; overload;
200 }
201 procedure AddChar(C: UCChar); overload;
202 procedure AddRange(st, en: UCChar); overload;
203 procedure AddSet(C: UCChar); overload;
HasCharnull204 function HasChar(C: UCChar): Boolean; overload;
205 public
Matchnull206 function Match(const InputString: UCString; var aPos: integer): integer; override;
BackMatchnull207 function BackMatch(const InputString: UCString; var aPos: integer): integer; override;
208 end;
209
210 // Simple text
211 TCharSeqNode = class(TRENodeBase)
212 private
213 FIgnoreCase: Boolean;
214 FChar: UCChar;
215 public
Matchnull216 function Match(const InputString: UCString; var aPos: integer): integer; override;
BackMatchnull217 function BackMatch(const InputString: UCString; var aPos: integer): integer; override;
218 end;
219
220 // Special ckecking
221 TSpecCheckNode = class(TRENodeBase)
222 private
223 FCheckType: UCChar;
224 public
Matchnull225 function Match(const InputString: UCString; var aPos: integer): integer; override;
BackMatchnull226 function BackMatch(const InputString: UCString; var aPos: integer): integer; override;
227 end;
228
229 // Reference to sub expression
230
231 { TRefNode }
232
233 TRefNode = class(TRENodeBase)
234 private
235 FRef: integer;
236 FIgnoreCase: Boolean;
GetExprStrnull237 //function GetExprStr(const InputString: AnsiString): AnsiString; overload;
238 //function GetExprStr(const InputString: UCString): UCString; overload;
239 procedure GetExprPtr(const InputString: UCString; out Ptr: PWideChar; out Len: integer); // Alexey
240 public
Matchnull241 function Match(const InputString: UCString; var aPos: integer): integer; override;
BackMatchnull242 function BackMatch(const InputString: UCString; var aPos: integer): integer; override;
243 end;
244
245 // Base class of containers
246 TreListNodeBase = class(TRENodeBase)
247 private
248 FList: TFPObjectList;
249 public
250 constructor Create(AOwner: TreSubExpr); override;
251 destructor Destroy; override;
252 procedure Clear; virtual;
253 end;
254
255 // Branch of sub expression
256 TreBranchNode = class(TreListNodeBase)
257 private
GetClassCharnull258 function GetClassChar(C: UCChar; Modifiers: Word): UCChar;
259 public
260 procedure Add(Node: TRENodeBase);
261 procedure Invert;
262 {
263 procedure Compile(const Expression: AnsiString; var aPos: integer; Modifiers: Word); overload;
264 }
265 procedure Compile(const Expression: UCString; var aPos: integer; Modifiers: Word); overload;
Matchnull266 function Match(const InputString: UCString; var aPos: integer): integer; override;
BackMatchnull267 function BackMatch(const InputString: UCString; var aPos: integer): integer; override;
268 end;
269
270 // Zero-width testing
271 // (?=RE) - positive, ahead
272 // (?!RE) - negative, ahead
273 // (?<=RE) - positive, behind
274 // (?<!RE) - negative, behind
275 TZeroWidth = class(TRENodeBase)
276 private
277 FIsBack: Boolean;
278 FNegative: Boolean;
279 FBranch: TreBranchNode;
DoResultnull280 function DoResult(MatchRes: integer): integer;
281 public
Matchnull282 function Match(const InputString: UCString; var aPos: integer): integer; override;
BackMatchnull283 function BackMatch(const InputString: UCString; var aPos: integer): integer; override;
284 destructor Destroy; override;
285 end;
286
287
288 // Select operation (list of TreListNodeBase) => sub expression
289 TreSubExpr = class(TreListNodeBase)
290 private
291 FStart: integer; // first char of sub expression
292 FEnd: integer; // first char after sub expression
293 public
294 constructor Create(AOwner: TreSubExpr); override;
295 //procedure Compile(const Expression: AnsiString; var aPos: integer; Modifiers: Word); overload;
296 procedure Compile(const Expression: UCString; var aPos: integer; Modifiers: Word); overload;
Matchnull297 function Match(const InputString: UCString; var aPos: integer): integer; override;
BackMatchnull298 function BackMatch(const InputString: UCString; var aPos: integer): integer; override;
299 end;
300
301 // Root node
302 TreRootNode = class(TreSubExpr)
303 private
304 FSubExpr: TFPList;
305 FOwner: TecRegExpr;
306 public
307 constructor Create(AOwner: TecRegExpr); reintroduce;
308 destructor Destroy; override;
309 procedure Clear; override;
MatchStrnull310 function MatchStr(const InputString: UCString; var aPos: integer; Back: Boolean): Boolean; overload;
311
312 property Owner: TecRegExpr read FOwner;
313 end;
314
315
316 // =============================================================================
317 // Utils functions
318 // =============================================================================
319
GetAbsoluteNextnull320 function GetAbsoluteNext(Node: TRENodeBase):TRENodeBase;
321 begin
322 Result := Node.Next;
323 if (Result = nil) and (Node.Owner <> nil) then
324 Result := GetAbsoluteNext(Node.Owner);
325 end;
326
IsInRangenull327 function IsInRange(RngType: UCChar; C: UCChar): Boolean; overload;
328 begin
329 Result := False;
330 { // Alexey
331 if not Assigned(CheckCustomCharClassProc) or
332 not CheckCustomCharClassProc(RngType, Ord(C), Result)
333 then
334 }
335 case RngType of
336 // \c is \w without (?r)
337 'c': Result := IsIdentChar(C);
338 'C': Result := not IsIdentChar(C);
339 // \k is \d without (?r)
340 'k': Result := IsIdentDigitChar(C);
341 'K': Result := not IsIdentDigitChar(C);
342 // \w with (?r)
343 'w': Result := IsWordChar(C);
344 'W': Result := not IsWordChar(C);
345 // \d with (?r)
346 'd': Result := IsDigitChar(C);
347 'D': Result := not IsDigitChar(C);
348 // space
349 's': Result := IsSpaceOrBreakChar(C);
350 'S': Result := not IsSpaceOrBreakChar(C);
351 // hex, rarely used
352 'h': Result := IsHexDigitChar(C);
353 'H': Result := not IsHexDigitChar(C);
354 // Alexey: changed /l and /L meaning, now it's: word char except '_'
355 'l': Result := IsWordChar(C) and (C<>'_');
356 'L': Result := not IsWordChar(C) or (C='_');
357 { //Alexey: removed
358 'g': Result := IsIdentLetterChar(C);
359 'G': Result := not IsIdentLetterChar(C);
360 }
361 end;
362 end;
363
364 function GetEscape(const Expression: UCString; var aPos: integer): UCChar; overload;
365 var
366 strt: integer;
367 N: integer;
368 begin
369 Result := #0;
370 case Expression[aPos] of
371 't': Result := #$9;
372 'n': Result := #$A;
373 'r': Result := #$D;
374 'f': Result := #$C;
375 'a': Result := #$7;
376 'e': Result := #$1B;
377 'x': begin
378 inc(aPos);
379 if aPos > Length(Expression) then
380 raise Exception.Create('Invalid escape char');
381 if Expression[aPos] = '{' then
382 begin
383 inc(aPos);
384 strt := aPos;
385 while (aPos < Length(Expression)) and (Expression[aPos] <> '}') do
386 inc(aPos);
387 N := BufferHexToInt(@Expression[strt], aPos - strt);
388 if N<0 then
389 raise Exception.Create('Invalid hex digit in \x');
390 Result := UCChar(N);
391 end else
392 begin
393 N := BufferHexToInt(@Expression[aPos], 2);
394 if N<0 then
395 raise Exception.Create('Invalid hex digit in \x');
396 Result := UCChar(N);
397 Inc(aPos);
398 end;
399 if Result = '' then
400 raise Exception.Create('Invalid hex digit in \x');
401 end;
402 end;
403 end;
404
405 { TRENodeBase }
406
407 constructor TRENodeBase.Create(AOwner: TreSubExpr);
408 begin
409 inherited Create;
410 FOwner := AOwner;
411 FLoopMin := 1;
412 FLoopMax := 1;
413 {$IFDEF RE_DEBUG}
414 Inc(LastNodeID);
415 FNodeId := LastNodeID;
416 {$ENDIF}
417 end;
418
GetRootnull419 function TRENodeBase.GetRoot: TreRootNode;
420 var Node: TRENodeBase;
421 begin
422 Node := Self;
423 while Node.Owner <> nil do
424 Node := Node.Owner;
425 Result := Node as TreRootNode;
426 end;
427
428 { TCharSetNode }
429
HasCharnull430 function TCharSetNode.HasChar(C: UCChar): Boolean;
431 var i, N, k: integer;
432 begin
433 N := Length(FCharRanges);
434 if N > 0 then
435 for i := 1 to N shr 1 do
436 begin
437 k := i shl 1;
438 if (Ord(FCharRanges[k]) >= Ord(C)) and
439 (Ord(FCharRanges[k - 1]) <= Ord(C)) then
440 begin
441 Result := True;
442 Exit;
443 end;
444 end;
445 for i := 1 to Length(FCharSets) do
446 if IsInRange(FCharSets[i], C) then
447 begin
448 Result := True;
449 Exit;
450 end;
451 Result := Pos(C, FCharArray) <> 0;
452 end;
453
454 procedure TCharSetNode.AddChar(C: UCChar);
455 begin
456 if FIgnoreCase then
457 CharToUpCase(C);
458 if Pos(C, FCharArray) = 0 then
459 FCharArray := FCharArray + C;
460 end;
461
462 procedure TCharSetNode.AddRange(st, en: UCChar);
463 begin
464 if FIgnoreCase then
465 begin
466 CharToUpCase(st);
467 CharToUpCase(en);
468 end;
469 FCharRanges := FCharRanges + st + en;
470 end;
471
472 procedure TCharSetNode.AddSet(C: UCChar);
473 begin
474 FCharSets := FCharSets + C;
475 end;
476
Matchnull477 function TCharSetNode.Match(const InputString: UCString; var aPos: integer): integer;
478 var C: UCChar;
479 b: Boolean;
480 begin
481 if aPos > Length(InputString) then
482 begin
483 Result := 0;
484 Exit;
485 end;
486
487 C := InputString[aPos];
488 if FIgnoreCase then CharToUpCase(C);
489 b := HasChar(C) xor FInvert;
490 if b then Inc(aPos);
491 if b then Result := 1 else Result := 0;
492 end;
493
BackMatchnull494 function TCharSetNode.BackMatch(const InputString: UCString; var aPos: integer): integer;
495 var C: UCChar;
496 b: Boolean;
497 begin
498 if aPos <= 1 then
499 begin
500 Result := 0;
501 Exit;
502 end;
503
504 C := InputString[aPos - 1];
505 if FIgnoreCase then CharToUpCase(C);
506 b := HasChar(C) xor FInvert;
507 if b then Dec(aPos);
508 if b then Result := 1 else Result := 0;
509 end;
510
511 { TCharSeqNode }
512
BackMatchnull513 function TCharSeqNode.BackMatch(const InputString: UCString; var aPos: integer): integer;
514 var C: UCChar;
515 begin
516 Result := 0;
517 if aPos > 1 then
518 begin
519 C := InputString[aPos - 1];
520 if FIgnoreCase then
521 CharToUpCase(C);
522 if C = FChar then
523 begin
524 Dec(aPos);
525 Result := 1;
526 end;
527 end;
528 end;
529
Matchnull530 function TCharSeqNode.Match(const InputString: UCString; var aPos: integer): integer;
531 var C: UCChar;
532 begin
533 Result := 0;
534 if aPos <= Length(InputString) then
535 begin
536 C := InputString[aPos];
537 if FIgnoreCase then
538 CharToUpCase(C);
539 if C = FChar then
540 begin
541 Inc(aPos, 1);
542 Result := 1;
543 end;
544 end;
545 end;
546
547 { TSpecCheckNode }
548
BackMatchnull549 function TSpecCheckNode.BackMatch(const InputString: UCString; var aPos: integer): integer;
550 var C: UCChar;
551 b: Boolean;
552 begin
553 b := False;
554 if aPos <= 1 then
555 begin
556 if aPos < 1 then
557 begin
558 Result := 0;
559 Exit;
560 end;
561 C := #0;
562 end
563 else C := InputString[aPos - 1];
564
565 case FCheckType of
566 '^': b := (C = #0) or (C = #10) or
567 (C = #13) and (aPos <= Length(InputString)) and (InputString[aPos] <> #10);
568 '$': b := (aPos > Length(InputString)) or (InputString[aPos] = #13) or
569 (InputString[aPos] = #10) and (C <> #13);
570 'A': b := C = #0;
571 'Z': b := aPos > Length(InputString);
572 'b': b := IsWordBreak(aPos, InputString);
573 'B': b := not IsWordBreak(aPos, InputString);
574 'z': begin
575 b := IsLineBreakChar(C);
576 if b then
577 begin
578 Dec(aPos);
579 if (C = #10) and (aPos > 0) and (InputString[aPos] = #13) then
580 Dec(aPos);
581 end;
582 end;
583 else if C<> #0 then
584 begin
585 case FCheckType of
586 '.': b := True;
587 ':': b := not IsLineBreakChar(C);
588 else b := IsInRange(FCheckType, C);
589 end;
590 if b then Dec(aPos);
591 end;
592 end;
593 if b then Result := 1 else Result := 0;
594 end;
595
Matchnull596 function TSpecCheckNode.Match(const InputString: UCString; var aPos: integer): integer;
597 var C: UCChar;
598 b: Boolean;
599 begin
600 b := False;
601 if aPos > Length(InputString) then
602 begin
603 if aPos - 1 > Length(InputString) then
604 begin
605 Result := 0;
606 Exit;
607 end;
608 C := #0;
609 end
610 else C := InputString[aPos];
611
612 case FCheckType of
613 '^': b := (aPos = 1) or (InputString[aPos - 1] = #10) or
614 (InputString[aPos - 1] = #13) and (C <> #10);
615 '$': b := (C = #13) or (C = #10) and (aPos > 1) and (InputString[aPos - 1] <> #13) or (C = #0);
616 'A': b := aPos = 1;
617 'Z': b := C = #0;
618 'b': b := IsWordBreak(aPos, InputString);
619 'B': b := not IsWordBreak(aPos, InputString);
620 'z': begin
621 b := IsLineBreakChar(C);
622 if b then
623 begin
624 Inc(aPos);
625 if (C = #13) and (aPos <= Length(InputString)) and (InputString[aPos] = #10) then
626 Inc(aPos);
627 end;
628 end;
629 else if C<> #0 then
630 begin
631 case FCheckType of
632 '.': b := True;
633 ':': b := not IsLineBreakChar(C);
634 else b := IsInRange(FCheckType, C);
635 end;
636 if b then Inc(aPos);
637 end;
638 end;
639 if b then Result := 1 else Result := 0;
640 end;
641
642 { TRefNode }
643
644 procedure TRefNode.GetExprPtr(const InputString: UCString; out Ptr: PWideChar; out Len: integer); // Alexey
645 var se: TreSubExpr;
646 begin
647 Ptr := nil;
648 Len := 0;
649 with Root do
650 if FSubExpr.Count > FRef then
651 begin
652 se := TreSubExpr(FSubExpr[FRef]);
653 Len := abs(se.FEnd - se.FStart);
654 if (se.FStart > 0) and (se.FEnd > 0) then
655 Ptr := @InputString[Min(se.FStart, se.FEnd)];
656 end;
657 end;
658
BackMatchnull659 function TRefNode.BackMatch(const InputString: UCString; var aPos: integer): integer; // Alexey
660 var
661 P1, P2: PWideChar;
662 L: integer;
663 b: Boolean;
664 begin
665 Result := 0;
666 GetExprPtr(InputString, P1, L);
667 if (P1 <> nil) and (L < aPos) then
668 begin
669 P2 := @InputString[aPos - L];
670 if FIgnoreCase then
671 b := strlicomp(P1, P2, L) = 0
672 else
673 b := strlcomp(P1, P2, L) = 0;
674 if b then
675 begin
676 Dec(aPos, L);
677 Result := 1;
678 end;
679 end;
680 end;
681
Matchnull682 function TRefNode.Match(const InputString: UCString; var aPos: integer): integer; // Alexey
683 var
684 P1, P2: PWideChar;
685 L: integer;
686 b: Boolean;
687 begin
688 Result := 0;
689 GetExprPtr(InputString, P1, L);
690 if (P1 <> nil) and (L <= Length(InputString) - aPos + 1) then
691 begin
692 P2 := @InputString[aPos];
693 if FIgnoreCase then
694 b := strlicomp(P1, P2, L) = 0
695 else
696 b := strlcomp(P1, P2, L) = 0;
697 if b then
698 begin
699 Inc(aPos, L);
700 Result := 1;
701 end;
702 end;
703 end;
704
705 { TZeroWidth }
706
707 destructor TZeroWidth.Destroy;
708 begin
709 FreeAndNil(FBranch);
710 inherited;
711 end;
712
DoResultnull713 function TZeroWidth.DoResult(MatchRes: integer): integer;
714 begin
715 if FNegative then
716 begin
717 If MatchRes = 0 then
718 Result := 2
719 else
720 Result := 0;
721 end else
722 Result := MatchRes;
723 end;
724
BackMatchnull725 function TZeroWidth.BackMatch(const InputString: UCString;
726 var aPos: integer): integer;
727 var testPos: integer;
728 begin
729 testPos := aPos;
730 if FIsBack then
731 Result := DoResult(FBranch.Match(InputString, testPos))
732 else
733 Result := DoResult(FBranch.BackMatch(InputString, testPos));
734 end;
735
Matchnull736 function TZeroWidth.Match(const InputString: UCString;
737 var aPos: integer): integer;
738 var testPos: integer;
739 begin
740 testPos := aPos;
741 if FIsBack then
742 Result := DoResult(FBranch.BackMatch(InputString, testPos))
743 else
744 Result := DoResult(FBranch.Match(InputString, testPos));
745 end;
746
747 { TreListNodeBase }
748
749 procedure TreListNodeBase.Clear;
750 begin
751 FList.Clear;
752 end;
753
754 constructor TreListNodeBase.Create(AOwner: TreSubExpr);
755 begin
756 inherited;
757 FList := TFPObjectList.Create;
758 end;
759
760 destructor TreListNodeBase.Destroy;
761 begin
762 FList.Free;
763 inherited;
764 end;
765
766 { TreBranchNode }
767
GetClassCharnull768 function TreBranchNode.GetClassChar(C: UCChar; Modifiers: Word): UCChar;
769 // Alexey: rewritten
770 begin
771 { // Alexey
772 if Assigned(GetCustomCharClassProc) and GetCustomCharClassProc(C) then
773 Result := C else // User defined character class
774 }
775 case C of
776 'w':
777 begin
778 if (MaskModR and Modifiers) = 0 then
779 Result := 'c'
780 else
781 Result := 'w';
782 end;
783 'W':
784 begin
785 if (MaskModR and Modifiers) = 0 then
786 Result := 'C'
787 else
788 Result := 'W';
789 end;
790 'd':
791 begin
792 if (MaskModR and Modifiers) = 0 then
793 Result := 'k'
794 else
795 Result := 'd';
796 end;
797 'D':
798 begin
799 if (MaskModR and Modifiers) = 0 then
800 Result := 'K'
801 else
802 Result := 'D';
803 end;
804 // Alexey
805 's', 'S',
806 'h', 'H',
807 'c', 'C',
808 'l', 'L',
809 //'g', 'G', // Alexey: removed
810 'k', 'K':
811 Result := C;
812 else
813 Result := #0;
814 end;
815 end;
816
817 procedure TreBranchNode.Add(Node: TRENodeBase);
818 begin
819 if FList.Count > 0 then
820 TRENodeBase(FList.Last).Next := Node;
821 FList.Add(Node);
822 end;
823
824 procedure TreBranchNode.Invert;
825 var i, N: integer;
826 NextLast: TRENodeBase;
827 begin
828 N := FList.Count;
829 if N = 0 then Exit;
830 NextLast := TRENodeBase(FList.Last).Next;
831 for i := 0 to (N div 2) - 1 do
832 FList.Exchange(i, N - 1 - i);
833 for i := 0 to N - 2 do
834 TRENodeBase(FList[i]).Next := TRENodeBase(FList[i + 1]);
835 TRENodeBase(FList.Last).Next := NextLast;
836 end;
837
838 // All characters are ANSI characters.
839 // UCChar is used only for holding char codes less 255
840 // This is made to have unified storage of regular expr. nodes
841
842 procedure TreBranchNode.Compile(const Expression: UCString; var aPos: integer;
843 Modifiers: Word);
844 var Len: integer;
845 sub: TreSubExpr;
846 C: UCChar;
847
848 function SafeInc(RaiseEx: Boolean = False): UCChar; // Increment position to the next significant char
849 var
850 C: WideChar;
851 bComment: boolean;
852 begin
853 inc(aPos);
854 // Skip spaces and comments
855 if (Modifiers and MaskModX) <> 0 then
856 while (aPos <= Len) do
857 begin
858 C := Expression[aPos];
859 bComment:= C = '#';
860 if not (IsSpaceChar(C) or bComment) then Break;
861 if bComment then
862 begin
863 Inc(aPos);
864 while (aPos <= Len) and not IsLineBreakChar(Expression[aPos]) do
865 Inc(aPos);
866 end;
867 Inc(aPos);
868 end;
869
870 if aPos <= Len then Result := Expression[aPos] else
871 begin
872 Result := #0;
873 if RaiseEx then
874 raise Exception.Create(zreUnexpectedEnd);
875 end;
876 end;
877
878 function ReadNumber: integer;
879 var strt: integer;
880 begin
881 strt := aPos;
882 while (aPos <= Len) and IsDigitChar(Expression[aPos]) do
883 Inc(aPos);
884 if strt = aPos then
885 raise Exception.Create('Number is expected');
886 Result := BufferStrToInt(@Expression[strt], aPos - strt);
887 Dec(aPos);
888 end;
889
890 // Read repeaters for node
891 procedure ReadRepeaters(Node: TRENodeBase);
892 begin
893 if aPos > Len then Exit;// repeaters are optional
894 case SafeInc of
895 '+': Node.FLoopMax := -1;
896 '?': Node.FLoopMin := 0;
897 '*': begin
898 Node.FLoopMax := -1;
899 Node.FLoopMin := 0;
900 end;
901 '{': begin
902 SafeInc;
903 Node.FLoopMin := ReadNumber;
904 case SafeInc of
905 ',': if SafeInc <> '}' then
906 begin
907 Node.FLoopMax := ReadNumber;
908 if SafeInc <> '}' then
909 raise Exception.Create('There is no closing "}"');
910 end
911 else
912 Node.FLoopMax := -1;
913 '}': Node.FLoopMax := Node.FLoopMin;
914 else raise Exception.Create('Invalid loop specifier');
915 end;
916 if (Node.FLoopMax >= 0) and (Node.FLoopMax < Node.FLoopMin) then
917 raise Exception.Create('Loop minimum must be less then loop maximum');
918 end;
919 else begin
920 Dec(aPos);
921 Exit;
922 end
923 end;
924 if SafeInc = '?' then Node.FNonGreedy := True else
925 begin
926 Node.FNonGreedy := (Modifiers and MaskModG) = 0;
927 Dec(aPos);
928 end;
929 end;
930
931 procedure AddCharSeq(const C: UCChar);
932 var csNode: TCharSeqNode;
933 begin
934 csNode := TCharSeqNode.Create(Owner);
935 csNode.FIgnoreCase := (Modifiers and MaskModI) <> 0;
936 if csNode.FIgnoreCase then csNode.FChar := ecUpCase(C)
937 else csNode.FChar := C;
938 Add(csNode);
939 ReadRepeaters(csNode);
940 end;
941
942 procedure AddSpecNode(const C: UCChar; WithRepeat: Boolean = True);
943 var sn: TSpecCheckNode;
944 begin
945 sn := TSpecCheckNode.Create(Owner);
946 sn.FCheckType := C;
947 Add(sn);
948 if WithRepeat then ReadRepeaters(sn);
949 end;
950
951 function PickSetChar(cs: TCharSetNode): UCChar;
952 begin
953 Result := Expression[aPos];
954 if Result = '\' then
955 begin
956 Inc(aPos);
957 if aPos > Length(Expression) then
958 Exit;
959 Result := GetEscape(Expression, aPos);
960 if Result = #0 then
961 begin
962 Result := GetClassChar(Expression[aPos], Modifiers);
963 if Result = #0 then Result := Expression[aPos]
964 else
965 begin
966 cs.AddSet(Result);
967 Result := #0;
968 end;
969 end;
970 end;
971 end;
972
973 procedure ReadCharSet;
974 var cs: TCharSetNode;
975 Cstrt, Cend: UCChar;
976 begin
977 cs := TCharSetNode.Create(Owner);
978 cs.FIgnoreCase := (Modifiers and MaskModI) <> 0;
979 Add(cs);
980 Cstrt := #0;
981 if SafeInc(True) = '^' then cs.FInvert := True
982 else Cstrt := PickSetChar(cs);
983 while SafeInc(True) <> ']' do
984 begin
985 if (Expression[aPos] = '-') and (Cstrt <> #0) then // Add Range
986 if SafeInc = ']' then
987 begin
988 cs.AddChar(Cstrt);
989 Cstrt := '-';
990 Break;
991 end else
992 begin
993 Cend := PickSetChar(cs);
994 if Cend = #0 then
995 begin
996 cs.AddChar(Cstrt);
997 Cstrt := '-';
998 end else
999 if Ord(Cend) < Ord(Cstrt) then
1000 raise Exception.Create('Invalid set range') else
1001 begin
1002 // Extended russian support
1003 cs.AddRange(Cstrt, Cend);
1004 Cstrt := #0;
1005 if SafeInc(True) = ']' then Break;
1006 end;
1007 end;
1008 if Cstrt <> #0 then cs.AddChar(Cstrt);
1009 Cstrt := PickSetChar(cs);
1010 end;
1011 if Cstrt <> #0 then cs.AddChar(Cstrt);
1012 ReadRepeaters(cs);
1013 end;
1014
1015 procedure AddRefNode(RefIdx: integer);
1016 var rn: TRefNode;
1017 begin
1018 if TreRootNode(Root).FSubExpr.Count <= RefIdx then
1019 raise Exception.Create('Invalid reference');
1020 rn := TRefNode.Create(Owner);
1021 rn.FRef := RefIdx;
1022 rn.FIgnoreCase := (Modifiers and MaskModI) <> 0;
1023 Add(rn);
1024 ReadRepeaters(rn);
1025 end;
1026
1027 procedure AddZeroWidth(IsBack: Boolean);
1028 var Negative: Boolean;
1029 Branch: TreBranchNode;
1030 Node: TZeroWidth;
1031 begin
1032 case Expression[aPos] of
1033 '!': Negative := True;
1034 '=': Negative := False;
1035 else
1036 raise Exception.Create(zreInvalidZeroWidth);
1037 end;
1038 SafeInc(True);
1039 Branch := TreBranchNode.Create(nil);
1040 try
1041 Branch.Compile(Expression, aPos, Modifiers);
1042 Node := TZeroWidth.Create(Owner);
1043 Node.FIsBack := IsBack;
1044 Node.FNegative := Negative;
1045 Node.FBranch := Branch;
1046 Add(Node);
1047 if IsBack then
1048 Branch.Invert;
1049 except
1050 Branch.Free;
1051 end;
1052 end;
1053
1054 var tp: integer;
1055 begin
1056 Clear;
1057 Len := Length(Expression);
1058 Dec(aPos);
1059 while aPos <= Len do
1060 begin
1061 case SafeInc of
1062 ')', '|', #0: Exit; // end of branch
1063 '(': begin
1064 if SafeInc = '?' then
1065 begin // Change modifiers
1066 case SafeInc(True) of
1067 '<': begin
1068 SafeInc(True);
1069 AddZeroWidth(True);
1070 end;
1071 '=', '!': AddZeroWidth(False);
1072 else
1073 begin
1074 tp := aPos;
1075 repeat // skip comment
1076 Inc(aPos);
1077 until (aPos > Len) or (Expression[aPos] = ')');
1078 if Expression[tp] <> '#' then
1079 Root.Owner.ParseModifiers(@Expression[tp], aPos - tp, Modifiers)
1080 end;
1081 end;
1082 end else
1083 begin // sub expression
1084 sub := TreSubExpr.Create(Owner);
1085 Add(sub);
1086 sub.Compile(Expression, aPos, Modifiers);
1087 if (aPos > Len) or (Expression[aPos] <> ')') then
1088 raise Exception.Create('Do not closed sub expression');
1089 ReadRepeaters(sub);
1090 end;
1091 end;
1092 '[': begin // char set node
1093 ReadCharSet;
1094 end;
1095 '^': if (Modifiers and MaskModM) = 0 then AddSpecNode('A', False) // begin of text
1096 else AddSpecNode('^', False); // begin of line
1097 '$': if (Modifiers and MaskModM) = 0 then AddSpecNode('Z', False) // end of text
1098 else AddSpecNode('$', False); // end of line
1099 '.': if (Modifiers and MaskModS) <> 0 then AddSpecNode('.') // all
1100 else AddSpecNode(':'); // all without line separators
1101 '\': begin
1102 Inc(aPos);
1103 if aPos > Len then C := '\'
1104 else C := GetEscape(Expression, aPos);
1105 if C <> #0 then AddCharSeq(C) else
1106 begin
1107 C := GetClassChar(Expression[aPos], Modifiers);
1108 if C <> #0 then AddSpecNode(C) else
1109 case Expression[aPos] of
1110 'A', 'Z', 'b', 'B', 'z': AddSpecNode(Expression[aPos]);
1111 '1'..'9': AddRefNode(Ord(Expression[aPos])-Ord('0'));
1112 else AddCharSeq(Expression[aPos]);
1113 end;
1114 end;
1115 end;
1116 else AddCharSeq(Expression[aPos]); // Simple char
1117 end;
1118 end;
1119 end;
1120
1121 // Main mtaching routine (recursive)
1122 // Returns:
1123 // 0 - does not match
1124 // 1 - match the Node
1125 // 2 - match The Node and all next nodes
1126 function MatchNode(Node: TRENodeBase; const InputString: UCString;
1127 var aPos: integer): integer; overload;
1128
1129 var save, k, sv, LastSucc, total, Success: integer;
1130 IsBrEnd: Boolean;
1131
1132 begin
1133 if Node = nil then
1134 begin
1135 Result := 2;
1136 Exit;
1137 end;
1138 Result := 0;
1139
1140 // required minimum repeat
1141 save := aPos;
1142 Success := 0;
1143 for k := 1 to node.FLoopMin do
1144 begin
1145 Success := Node.Match(InputString, aPos);
1146 if Success = 0 then
1147 begin
1148 aPos := save;
1149 {$IFDEF RE_DEBUG}
1150 if Assigned(REDebugOnMatchProc) then
1151 REDebugOnMatchProc(Format('Node: %d; Position: %d; Result: %d',[Node.FNodeID, aPos, 0]));
1152 {$ENDIF}
1153 Exit;
1154 end;
1155 end;
1156
1157 k := node.FLoopMin - 1;
1158 // k := 0;
1159 LastSucc := 0;
1160 total := 0;
1161
1162 IsBrEnd := Assigned(Node.Owner) and
1163 ((Node.Next = nil) or (Node.Next.Owner <> Node.Owner));
1164 repeat
1165 Inc(k);
1166 sv := aPos;
1167 if IsBrEnd then Node.Owner.FEnd := aPos;
1168 if Node is TreSubExpr then
1169 begin
1170 if Success <> 2 then
1171 Success := MatchNode(GetAbsoluteNext(Node), InputString, aPos);
1172 end else
1173 if Node.Next <> nil then
1174 Success := MatchNode(Node.Next, InputString, aPos) else
1175 if IsBrEnd then
1176 Success := MatchNode(GetAbsoluteNext(Node.Owner), InputString, aPos) else
1177 Success := 2;
1178
1179 if Success = 2 then // success all next nodes
1180 begin
1181 {$IFDEF RE_DEBUG}
1182 if Assigned(REDebugOnMatchProc) then
1183 REDebugOnMatchProc(Format('Success at %d; Node: %d; NextRes: %s',[sv, Node.FNodeID, IntToStr(Success)]));
1184 {$ENDIF}
1185 total := aPos;
1186 LastSucc := sv;
1187 aPos := LastSucc;
1188 Result := Success;
1189 if node.FNonGreedy then
1190 Break; // for non GREEDY mode
1191 end else
1192 if IsBrEnd then
1193 begin
1194 total := sv;
1195 Result := 1;
1196 end else
1197 if Success = 1 then
1198 begin
1199 aPos := sv;
1200 total := sv;
1201 Result := 1;
1202 if node.FNonGreedy then
1203 Break; // for non GREEDY mode
1204 end;
1205
1206 if (node.FLoopMax > 0) and (k >= node.FLoopMax) then // check max limit
1207 begin
1208 if not IsBrEnd then //v2.36
1209 Result := Success; //v2.33
1210 Break;
1211 end;
1212
1213 sv := aPos;
1214 Success := Node.Match(InputString, aPos);
1215 until (Success = 0) or (aPos = sv);
1216
1217 // if (node.FLoopMin > 0) and (k < node.FLoopMin) then
1218 // Result := 0;
1219 {$IFDEF RE_DEBUG}
1220 if Assigned(REDebugOnMatchProc) then
1221 REDebugOnMatchProc(Format('Node: %d; Position: %d; Result: %d',[Node.FNodeID, aPos, integer(Result)]));
1222 {$ENDIF}
1223 if Result > 0 then
1224 begin
1225 aPos := total;
1226 // Save sub-expression result
1227 if IsBrEnd and (LastSucc > 0) then
1228 Node.Owner.FEnd := LastSucc;
1229 end else
1230 begin
1231 aPos := save;
1232 if IsBrEnd then Node.Owner.FEnd := 0;
1233 end;
1234 end;
1235
1236 // Main mtaching routine (recursive)
1237 function BackMatchNode(Node: TRENodeBase; const InputString: UCString;
1238 var aPos: integer): integer; overload;
1239
1240 var save, k, sv, LastSucc, total, Success: integer;
1241 IsBrEnd: Boolean;
1242
1243 begin
1244 if Node = nil then
1245 begin
1246 Result := 2;
1247 Exit;
1248 end;
1249 Result := 0;
1250
1251 // required minimum repeat
1252 save := aPos;
1253 Success := 0;
1254 for k := 1 to node.FLoopMin do
1255 begin
1256 Success := Node.BackMatch(InputString, aPos);
1257 if Success = 0 then
1258 begin
1259 aPos := save;
1260 {$IFDEF RE_DEBUG}
1261 if Assigned(REDebugOnMatchProc) then
1262 REDebugOnMatchProc(Format('Node: %d; Position: %d; Result: %d',[Node.FNodeID, aPos, 0]));
1263 {$ENDIF}
1264 Exit;
1265 end;
1266 end;
1267
1268 k := node.FLoopMin - 1;
1269 LastSucc := 0;
1270 total := 0;
1271
1272 IsBrEnd := Assigned(Node.Owner) and
1273 ((Node.Next = nil) or (Node.Next.Owner <> Node.Owner));
1274 repeat
1275 Inc(k);
1276 sv := aPos;
1277 if IsBrEnd then Node.Owner.FStart := aPos;
1278 if Node is TreSubExpr then
1279 begin
1280 if Success <> 2 then
1281 Success := BackMatchNode(GetAbsoluteNext(Node), InputString, aPos);
1282 end else
1283 if Node.Next <> nil then
1284 Success := BackMatchNode(Node.Next, InputString, aPos) else
1285 if IsBrEnd then
1286 Success := BackMatchNode(GetAbsoluteNext(Node.Owner), InputString, aPos) else
1287 Success := 2;
1288
1289 if Success = 2 then
1290 begin
1291 {$IFDEF RE_DEBUG}
1292 if Assigned(REDebugOnMatchProc) then
1293 REDebugOnMatchProc(Format('Success at %d; Node: %d; NextRes: %s',[sv, Node.FNodeID, IntToStr(Success)]));
1294 {$ENDIF}
1295 total := aPos;
1296 LastSucc := sv;
1297 aPos := LastSucc;
1298 Result := Success;
1299 if node.FNonGreedy then Break; // for non GREEDY mode
1300 end else
1301 if IsBrEnd then
1302 begin
1303 total := sv;
1304 Result := 1;
1305 end else
1306 if Success = 1 then
1307 begin
1308 aPos := sv;
1309 total := sv;
1310 Result := 1;
1311 end;
1312
1313 if (node.FLoopMax > 0) and (k >= node.FLoopMax) then // check max limit
1314 begin
1315 if not IsBrEnd then //v2.36
1316 Result := Success; //v2.33
1317 Break;
1318 end;
1319
1320 sv := aPos;
1321 Success := Node.BackMatch(InputString, aPos);
1322 until (Success = 0) or (aPos = sv);
1323
1324 {$IFDEF RE_DEBUG}
1325 if Assigned(REDebugOnMatchProc) then
1326 REDebugOnMatchProc(Format('Node: %d; Position: %d; Result: %d',[Node.FNodeID, aPos, integer(Result)]));
1327 {$ENDIF}
1328 if Result > 0 then
1329 begin
1330 aPos := total;
1331 // Save sub-expression result
1332 if IsBrEnd and (LastSucc > 0) then
1333 Node.Owner.FStart := LastSucc;
1334 end else
1335 begin
1336 aPos := save;
1337 if IsBrEnd then Node.Owner.FStart := 0;
1338 end;
1339 end;
1340
Matchnull1341 function TreBranchNode.Match(const InputString: UCString; var aPos: integer): integer;
1342 begin
1343 {$IFDEF RE_DEBUG}
1344 if Assigned(REDebugOnMatchProc) then
1345 REDebugOnMatchProc(Format('Branch<: %d; Position: %d',[FNodeID, aPos]));
1346 {$ENDIF}
1347 if FList.Count = 0 then Result := 1
1348 else Result := MatchNode(TRENodeBase(FList.First), InputString, aPos);
1349 {$IFDEF RE_DEBUG}
1350 if Assigned(REDebugOnMatchProc) then
1351 REDebugOnMatchProc(Format('Branch>: %d; Position: %d; Result: %d',[FNodeID, aPos, integer(Result)]));
1352 {$ENDIF}
1353 end;
1354
BackMatchnull1355 function TreBranchNode.BackMatch(const InputString: UCString; var aPos: integer): integer;
1356 begin
1357 {$IFDEF RE_DEBUG}
1358 if Assigned(REDebugOnMatchProc) then
1359 REDebugOnMatchProc(Format('Branch<: %d; Position: %d',[FNodeID, aPos]));
1360 {$ENDIF}
1361 if FList.Count = 0 then Result := 1
1362 else Result := BackMatchNode(TRENodeBase(FList.First), InputString, aPos);
1363 {$IFDEF RE_DEBUG}
1364 if Assigned(REDebugOnMatchProc) then
1365 REDebugOnMatchProc(Format('Branch>: %d; Position: %d; Result: %d',[FNodeID, aPos, integer(Result)]));
1366 {$ENDIF}
1367 end;
1368
1369 { TreSubExpr }
1370
1371 procedure TreSubExpr.Compile(const Expression: UCString; var aPos: integer;
1372 Modifiers: Word);
1373 var Br: TreBranchNode;
1374 begin
1375 Dec(aPos);
1376 repeat
1377 Inc(aPos);
1378 Br := TreBranchNode.Create(Self);
1379 FList.Add(Br);
1380 Br.Compile(Expression, aPos, Modifiers);
1381 if Br.FList.Count = 0 then FList.Remove(Br);
1382 until (aPos > Length(Expression)) or (Expression[aPos] = ')');
1383 end;
1384
1385 constructor TreSubExpr.Create(AOwner: TreSubExpr);
1386 begin
1387 inherited;
1388 if Owner <> nil then
1389 TreRootNode(Root).FSubExpr.Add(Self);
1390 end;
1391
Matchnull1392 function TreSubExpr.Match(const InputString: UCString; var aPos: integer): integer;
1393 var i: integer;
1394 OldEnd, OldStart, CurRes, svPos: integer;
1395 begin
1396 OldEnd := FEnd;
1397 OldStart := FStart;
1398 FStart := aPos;
1399 FEnd := 0;
1400 Result := 0;
1401 svPos := aPos;
1402 for i := 0 to FList.Count - 1 do
1403 begin
1404 CurRes := TreBranchNode(FList[i]).Match(InputString, aPos);
1405 if FNonGreedy then
1406 begin
1407 if CurRes > 0 then
1408 begin
1409 OldEnd := FEnd;
1410 OldStart := FStart;
1411 Result := CurRes;
1412 aPos := svPos;
1413 Break;
1414 end;
1415 end else
1416 if CurRes > Result then
1417 begin
1418 OldEnd := FEnd;
1419 OldStart := FStart;
1420 Result := CurRes;
1421 aPos := svPos;
1422 if Result = 2 then
1423 Break;
1424 end;
1425 end;
1426 FEnd := OldEnd;
1427 FStart := OldStart;
1428 if (FEnd > 0) and (Result > 0) then
1429 aPos := FEnd;
1430 end;
1431
BackMatchnull1432 function TreSubExpr.BackMatch(const InputString: UCString; var aPos: integer): integer;
1433 var i: integer;
1434 OldEnd, OldStart, CurRes, svPos: integer;
1435 begin
1436 OldEnd := FEnd;
1437 OldStart := FStart;
1438 FStart := 0;
1439 FEnd := aPos;
1440 Result := 0;
1441 svPos := aPos;
1442 for i := 0 to FList.Count - 1 do
1443 begin
1444 CurRes := TreBranchNode(FList[i]).BackMatch(InputString, aPos);
1445 if FNonGreedy then
1446 begin
1447 if CurRes > 0 then
1448 begin
1449 OldEnd := FEnd;
1450 OldStart := FStart;
1451 Result := CurRes;
1452 aPos := svPos;
1453 Break;
1454 end;
1455 end else
1456 if CurRes > Result then
1457 begin
1458 OldEnd := FEnd;
1459 OldStart := FStart;
1460 Result := CurRes;
1461 aPos := svPos;
1462 if Result = 2 then
1463 Break;
1464 end;
1465 end;
1466 FEnd := OldEnd;
1467 FStart := OldStart;
1468 if (FStart > 0) and (Result > 0) then
1469 aPos := FStart;
1470 end;
1471
1472 { TreRootNode }
1473
1474 procedure TreRootNode.Clear;
1475 begin
1476 inherited;
1477 FSubExpr.Clear;
1478 FSubExpr.Add(Self);
1479 end;
1480
1481 constructor TreRootNode.Create(AOwner: TecRegExpr);
1482 begin
1483 inherited Create(nil);
1484 FOwner := AOwner;
1485 FSubExpr := TFPList.Create;
1486 FSubExpr.Add(Self);
1487 end;
1488
1489 destructor TreRootNode.Destroy;
1490 begin
1491 FSubExpr.Free;
1492 inherited;
1493 end;
1494
MatchStrnull1495 function TreRootNode.MatchStr(const InputString: UCString;
1496 var aPos: integer; Back: Boolean): Boolean;
1497 var i: integer;
1498 begin
1499 for i := 0 to FSubExpr.Count - 1 do
1500 begin
1501 TreSubExpr(FSubExpr[i]).FStart := -1;
1502 TreSubExpr(FSubExpr[i]).FEnd := -1;
1503 end;
1504 if Back then
1505 Result := BackMatchNode(Self, InputString, aPos) <> 0
1506 else
1507 Result := MatchNode(Self, InputString, aPos) <> 0;
1508 end;
1509
1510 // =============================================================================
1511 // Application Level
1512 // =============================================================================
1513
1514 { TecRegExpr }
1515
1516 constructor TecRegExpr.Create;
1517 begin
1518 inherited;
1519 FModifiers := DefaultModifiers;
1520 end;
1521
1522 destructor TecRegExpr.Destroy;
1523 begin
1524 FreeAndNil(FProgRoot);
1525 inherited;
1526 end;
1527
1528 procedure TecRegExpr.ClearRoot;
1529 begin
1530 FreeAndNil(FProgRoot);
1531 FMatchOK := False;
1532 end;
1533
IsEmptynull1534 function TecRegExpr.IsEmpty: Boolean;
1535 begin
1536 Result := not Assigned(FProgRoot) or (TreRootNode(FProgRoot).FList.Count = 0);
1537 end;
1538
1539 procedure TecRegExpr.Compile(const AExpression: UCString);
1540 var Pos: integer;
1541 begin
1542 {$IFDEF RE_DEBUG} LastNodeID := 0; {$ENDIF}
1543 FMatchOK := False;
1544 if not Assigned(FProgRoot) then
1545 FProgRoot := TreRootNode.Create(Self)
1546 else
1547 TreRootNode(FProgRoot).Clear;
1548
1549 Pos := 1;
1550 try
1551 if AExpression <> '' then
1552 TreRootNode(FProgRoot).Compile(AExpression, Pos, FModifiers);
1553 except
1554 ClearRoot;
1555 raise;
1556 end;
1557 //FUnicodeCompiled := True;
1558 end;
1559
Compilenull1560 function TecRegExpr.Compile: Boolean;
1561 begin
1562 try
1563 if IsEmpty then
1564 Compile(FExpression);
1565 except
1566 end;
1567 Result := not IsEmpty;
1568 end;
1569
GetIsInvalidnull1570 function TecRegExpr.GetIsInvalid: Boolean;
1571 begin
1572 Result := not Compile;
1573 end;
1574
GetModifiernull1575 function TecRegExpr.GetModifier(const Index: Integer): boolean;
1576 begin
1577 Result := (FModifiers and Index) <> 0;
1578 end;
1579
GetModifierStrnull1580 function TecRegExpr.GetModifierStr: ecString;
1581 const ModLet: ecString = 'irsgmx';
1582 var s1, s2: string;
1583 i: integer;
1584 begin
1585 s1 := ''; s2 := '';
1586 for i := 0 to 5 do
1587 if (FModifiers and (1 shl i)) <> 0 then
1588 s1 := s1 + ModLet[i + 1]
1589 else
1590 s2 := s2 + ModLet[i + 1];
1591
1592 Result := '(?' + s1;
1593 if s2 <> '' then
1594 Result := Result + '-' + s2;
1595 Result := Result + ')';
1596 end;
1597
Matchnull1598 function TecRegExpr.Match(const InputString: UCString; var aPos: integer; Back: Boolean): Boolean;
1599 begin
1600 Result := Compile; // ensure compiling and validity
1601 if Result then
1602 begin
1603 if aPos < 1 then
1604 aPos := 1;
1605 Result := TreRootNode(FProgRoot).MatchStr(InputString, aPos, Back);
1606 FMatchOK := Result;
1607 end;
1608 end;
1609
MatchLengthnull1610 function TecRegExpr.MatchLength(const InputString: UCString;
1611 aPos: integer; Back: Boolean): integer;
1612 begin
1613 Result := aPos;
1614 if Match(InputString, aPos, Back) then
1615 begin
1616 if Back then
1617 Result := Result - aPos
1618 else
1619 Result := aPos - Result;
1620 end
1621 else
1622 Result := 0;
1623 end;
1624
1625
1626 //Delete regex comments "#...." in multi-line text
1627 function _MultilineToString(const Value: ecString): ecString;
1628 //
1629 function IsEol(C: WideChar): boolean; inline;
1630 begin
1631 Result:= (C=#10) or (C=#13);
1632 end;
1633 //
1634 var
1635 NBegin, NEnd, i: integer;
1636 begin
1637 Result:= Value;
1638
1639 repeat
1640 NBegin:= 0;
1641 for i:= 1 to Length(Result) do
1642 if (Result[i]='#') and ((i=1) or (Result[i-1]<>'\')) then
1643 begin
1644 NBegin:= i;
1645 Break;
1646 end;
1647 if NBegin=0 then Break;
1648
1649 NEnd:= Length(Result);
1650 for i:= NBegin+1 to Length(Result) do
1651 if IsEol(Result[i]) then
1652 begin
1653 NEnd:= i-1;
1654 Break;
1655 end;
1656
1657 Delete(Result, NBegin, NEnd-NBegin+1);
1658 until false;
1659
1660 for i:= 1 to Length(Result) do
1661 if IsEol(Result[i]) then
1662 Result[i]:= ' ';
1663 end;
1664
1665 procedure TecRegExpr.SetExpression(const Value: ecString);
1666 begin
1667 FExpression:= _MultilineToString(Value); // Alexey: fix to handle #-comments in regex
1668 ClearRoot;
1669 end;
1670
1671 procedure TecRegExpr.SetModifier(const Index: Integer;
1672 const Value: boolean);
1673 begin
1674 if Value then FModifiers := FModifiers or Index
1675 else FModifiers := FModifiers and not Index;
1676 ClearRoot;
1677 end;
1678
1679 procedure TecRegExpr.SetModifiers(const Value: Word);
1680 begin
1681 FModifiers := Value;
1682 ClearRoot;
1683 end;
1684
1685 procedure TecRegExpr.SetModifierStr(const Value: ecString);
1686 begin
1687 if (Length(Value) >= 3) and (Value[1] = '(') and (Value[2] = '?') then
1688 ParseModifiers(@Value[3], Length(Value) - 3, FModifiers);
1689 end;
1690
GetMatchLennull1691 function TecRegExpr.GetMatchLen(Idx: integer): integer;
1692 begin
1693 Result := -1;
1694 if FMatchOK then
1695 with TreRootNode(FProgRoot) do
1696 if (idx < FSubExpr.Count) then
1697 with TreSubExpr(FSubExpr[Idx]) do
1698 if (FStart <> -1) and (FEnd <> -1) then
1699 Result := FEnd - FStart;
1700 end;
1701
GetMatchPosnull1702 function TecRegExpr.GetMatchPos(Idx: integer): integer;
1703 begin
1704 Result := -1;
1705 if FMatchOK then
1706 with TreRootNode(FProgRoot) do
1707 if (idx < FSubExpr.Count) then
1708 with TreSubExpr(FSubExpr[Idx]) do
1709 Result := FStart;
1710 end;
1711
GetSubExprMatchCountnull1712 function TecRegExpr.GetSubExprMatchCount: integer;
1713 var i: integer;
1714 begin
1715 Result := -1;
1716 if FMatchOK then
1717 with TreRootNode(FProgRoot) do
1718 for i := 0 to FSubExpr.Count - 1 do
1719 with TreSubExpr(FSubExpr[i]) do
1720 if (FStart <> -1) and (FEnd <> -1) then
1721 Inc(Result);
1722 end;
1723
GetMatchnull1724 function TecRegExpr.GetMatch(const InputString: UCString;
1725 SubIdx: integer): UCString;
1726 begin
1727 Result := '';
1728 if FMatchOK then
1729 with TreRootNode(FProgRoot) do
1730 if (SubIdx < FSubExpr.Count) then
1731 with TreSubExpr(FSubExpr[SubIdx]) do
1732 if (FStart <> -1) and (FEnd <> -1) then
1733 Result := Copy(InputString, FStart, FEnd - FStart);
1734 end;
1735
Substitutenull1736 function TecRegExpr.Substitute(const InputString, ATemplate: ecString): ecString;
1737 var i: integer;
1738 C: ecChar;
1739 begin
1740 // if not FMatchOK then
1741 // raise Exception.Create('No matched string');
1742 Result := '';
1743 i := 1;
1744 while i <= Length(ATemplate) do
1745 begin
1746 if (ATemplate[i] = '\') and (i < Length(ATemplate)) then
1747 begin
1748 inc(i);
1749 if IsDigitChar(ATemplate[i]) then
1750 Result := Result + GetMatch(InputString, StrToInt(ATemplate[i]))
1751 else
1752 begin
1753 C := ecChar(Ord(GetEscape(ATemplate, i)));
1754 if C = #0 then C := ATemplate[i];
1755 Result := Result + C;
1756 end;
1757 end else Result := Result + ATemplate[i];
1758 inc(i);
1759 end;
1760 end;
1761
1762 procedure TecRegExpr.Assign(Source: TecRegExpr);
1763 begin
1764 Self.Expression := Source.Expression;
1765 Self.ModifierMask := Source.ModifierMask;
1766 end;
1767
1768 procedure TecRegExpr.ParseModifiers(const S: PWideChar; Len: integer; var Modifiers: Word); // Alexey
1769 var IsOn : boolean;
1770 i: integer;
1771 procedure SetModif(m: integer); inline;
1772 begin
1773 if (m and FModifiersStatic) = 0 then
1774 if IsOn then Modifiers := Modifiers or m
1775 else Modifiers := Modifiers and not m;
1776 end;
1777 begin
1778 IsOn := true;
1779 for i := 0 to Len-1 do
1780 case S[i] of
1781 '-': IsOn := false;
1782 'i','I': SetModif(MaskModI);
1783 'r','R': SetModif(MaskModR);
1784 's','S': SetModif(MaskModS);
1785 'g','G': SetModif(MaskModG);
1786 'm','M': SetModif(MaskModM);
1787 'x','X': SetModif(MaskModX);
1788 else
1789 raise Exception.Create(zreUnexpectedModifier);
1790 end;
1791 end;
1792
1793 // =============================================================================
1794 // DEBUGGER
1795 // =============================================================================
1796 {$IFDEF RE_DEBUG}
1797 // Fill tree with compiled nodes {debug purpose}
1798 procedure REDebugCompiledBuildTree(RE: TecRegExpr; TV: TTreeView);
1799 function GetNodeCaption(Node: TRENodeBase): string;
1800 begin
1801 if Node.ClassType = TCharSeqNode then
1802 Result := ecChar(TCharSeqNode(Node).FChar) else
1803 if Node.ClassType = TCharSetNode then
1804 begin
1805 Result := '[ ... ]';
1806 end else
1807 if Node.ClassType = TSpecCheckNode then
1808 Result := '! '+ TSpecCheckNode(Node).FCheckType + ' !' else
1809 if Node.ClassType = TZeroWidth then
1810 Result := '0'
1811 else
1812 Result := '<' + Node.ClassName + '>';
1813 Result := IntToStr(Node.FNodeId) + ' ' + Result + Format(' {%d, %d}', [Node.FLoopMin, Node.FLoopMax]);
1814 end;
1815
1816 procedure AddNode(Node: TRENodeBase; Prn: TTreeNode);
1817 var tn: TTreeNode;
1818 i: integer;
1819 begin
1820 tn := TV.Items.AddChild(Prn, GetNodeCaption(Node));
1821 if Node is TreListNodeBase then
1822 for i := 0 to TreListNodeBase(Node).FList.Count - 1 do
1823 AddNode(TRENodeBase(TreListNodeBase(Node).FList[i]), tn);
1824 if Node is TZeroWidth then
1825 AddNode(TZeroWidth(Node).FBranch, tn);
1826 end;
1827 begin
1828 TV.Items.Clear;
1829 AddNode(TreRootNode(RE.FProgRoot), nil);
1830 end;
1831 {$ENDIF}
1832
1833 end.
1834
1835