1------------------------------------------------------------------------------ 2-- -- 3-- GNAT LIBRARY COMPONENTS -- 4-- -- 5-- G N A T . S P I T B O L . P A T T E R N S -- 6-- -- 7-- B o d y -- 8-- -- 9-- Copyright (C) 1998-2019, AdaCore -- 10-- -- 11-- GNAT is free software; you can redistribute it and/or modify it under -- 12-- terms of the GNU General Public License as published by the Free Soft- -- 13-- ware Foundation; either version 3, or (at your option) any later ver- -- 14-- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- 15-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- 16-- or FITNESS FOR A PARTICULAR PURPOSE. -- 17-- -- 18-- As a special exception under Section 7 of GPL version 3, you are granted -- 19-- additional permissions described in the GCC Runtime Library Exception, -- 20-- version 3.1, as published by the Free Software Foundation. -- 21-- -- 22-- You should have received a copy of the GNU General Public License and -- 23-- a copy of the GCC Runtime Library Exception along with this program; -- 24-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- 25-- <http://www.gnu.org/licenses/>. -- 26-- -- 27-- GNAT was originally developed by the GNAT team at New York University. -- 28-- Extensive contributions were provided by Ada Core Technologies Inc. -- 29-- -- 30------------------------------------------------------------------------------ 31 32-- Note: the data structures and general approach used in this implementation 33-- are derived from the original MINIMAL sources for SPITBOL. The code is not 34-- a direct translation, but the approach is followed closely. In particular, 35-- we use the one stack approach developed in the SPITBOL implementation. 36 37with Ada.Strings.Unbounded.Aux; use Ada.Strings.Unbounded.Aux; 38 39with GNAT.Debug_Utilities; use GNAT.Debug_Utilities; 40 41with System; use System; 42 43with Ada.Unchecked_Conversion; 44with Ada.Unchecked_Deallocation; 45 46package body GNAT.Spitbol.Patterns is 47 48 ------------------------ 49 -- Internal Debugging -- 50 ------------------------ 51 52 Internal_Debug : constant Boolean := False; 53 -- Set this flag to True to activate some built-in debugging traceback 54 -- These are all lines output with PutD and Put_LineD. 55 56 procedure New_LineD; 57 pragma Inline (New_LineD); 58 -- Output new blank line with New_Line if Internal_Debug is True 59 60 procedure PutD (Str : String); 61 pragma Inline (PutD); 62 -- Output string with Put if Internal_Debug is True 63 64 procedure Put_LineD (Str : String); 65 pragma Inline (Put_LineD); 66 -- Output string with Put_Line if Internal_Debug is True 67 68 ----------------------------- 69 -- Local Type Declarations -- 70 ----------------------------- 71 72 subtype String_Ptr is Ada.Strings.Unbounded.String_Access; 73 subtype File_Ptr is Ada.Text_IO.File_Access; 74 75 function To_Address is new Ada.Unchecked_Conversion (PE_Ptr, Address); 76 -- Used only for debugging output purposes 77 78 subtype AFC is Ada.Finalization.Controlled; 79 80 N : constant PE_Ptr := null; 81 -- Shorthand used to initialize Copy fields to null 82 83 type Natural_Ptr is access all Natural; 84 type Pattern_Ptr is access all Pattern; 85 86 -------------------------------------------------- 87 -- Description of Algorithm and Data Structures -- 88 -------------------------------------------------- 89 90 -- A pattern structure is represented as a linked graph of nodes 91 -- with the following structure: 92 93 -- +------------------------------------+ 94 -- I Pcode I 95 -- +------------------------------------+ 96 -- I Index I 97 -- +------------------------------------+ 98 -- I Pthen I 99 -- +------------------------------------+ 100 -- I parameter(s) I 101 -- +------------------------------------+ 102 103 -- Pcode is a code value indicating the type of the pattern node. This 104 -- code is used both as the discriminant value for the record, and as 105 -- the case index in the main match routine that branches to the proper 106 -- match code for the given element. 107 108 -- Index is a serial index number. The use of these serial index 109 -- numbers is described in a separate section. 110 111 -- Pthen is a pointer to the successor node, i.e the node to be matched 112 -- if the attempt to match the node succeeds. If this is the last node 113 -- of the pattern to be matched, then Pthen points to a dummy node 114 -- of kind PC_EOP (end of pattern), which initializes pattern exit. 115 116 -- The parameter or parameters are present for certain node types, 117 -- and the type varies with the pattern code. 118 119 type Pattern_Code is ( 120 PC_Arb_Y, 121 PC_Assign, 122 PC_Bal, 123 PC_BreakX_X, 124 PC_Cancel, 125 PC_EOP, 126 PC_Fail, 127 PC_Fence, 128 PC_Fence_X, 129 PC_Fence_Y, 130 PC_R_Enter, 131 PC_R_Remove, 132 PC_R_Restore, 133 PC_Rest, 134 PC_Succeed, 135 PC_Unanchored, 136 137 PC_Alt, 138 PC_Arb_X, 139 PC_Arbno_S, 140 PC_Arbno_X, 141 142 PC_Rpat, 143 144 PC_Pred_Func, 145 146 PC_Assign_Imm, 147 PC_Assign_OnM, 148 PC_Any_VP, 149 PC_Break_VP, 150 PC_BreakX_VP, 151 PC_NotAny_VP, 152 PC_NSpan_VP, 153 PC_Span_VP, 154 PC_String_VP, 155 156 PC_Write_Imm, 157 PC_Write_OnM, 158 159 PC_Null, 160 PC_String, 161 162 PC_String_2, 163 PC_String_3, 164 PC_String_4, 165 PC_String_5, 166 PC_String_6, 167 168 PC_Setcur, 169 170 PC_Any_CH, 171 PC_Break_CH, 172 PC_BreakX_CH, 173 PC_Char, 174 PC_NotAny_CH, 175 PC_NSpan_CH, 176 PC_Span_CH, 177 178 PC_Any_CS, 179 PC_Break_CS, 180 PC_BreakX_CS, 181 PC_NotAny_CS, 182 PC_NSpan_CS, 183 PC_Span_CS, 184 185 PC_Arbno_Y, 186 PC_Len_Nat, 187 PC_Pos_Nat, 188 PC_RPos_Nat, 189 PC_RTab_Nat, 190 PC_Tab_Nat, 191 192 PC_Pos_NF, 193 PC_Len_NF, 194 PC_RPos_NF, 195 PC_RTab_NF, 196 PC_Tab_NF, 197 198 PC_Pos_NP, 199 PC_Len_NP, 200 PC_RPos_NP, 201 PC_RTab_NP, 202 PC_Tab_NP, 203 204 PC_Any_VF, 205 PC_Break_VF, 206 PC_BreakX_VF, 207 PC_NotAny_VF, 208 PC_NSpan_VF, 209 PC_Span_VF, 210 PC_String_VF); 211 212 type IndexT is range 0 .. +(2 **15 - 1); 213 214 type PE (Pcode : Pattern_Code) is record 215 216 Index : IndexT; 217 -- Serial index number of pattern element within pattern 218 219 Pthen : PE_Ptr; 220 -- Successor element, to be matched after this one 221 222 case Pcode is 223 when PC_Arb_Y 224 | PC_Assign 225 | PC_Bal 226 | PC_BreakX_X 227 | PC_Cancel 228 | PC_EOP 229 | PC_Fail 230 | PC_Fence 231 | PC_Fence_X 232 | PC_Fence_Y 233 | PC_Null 234 | PC_R_Enter 235 | PC_R_Remove 236 | PC_R_Restore 237 | PC_Rest 238 | PC_Succeed 239 | PC_Unanchored 240 => 241 null; 242 243 when PC_Alt 244 | PC_Arb_X 245 | PC_Arbno_S 246 | PC_Arbno_X 247 => 248 Alt : PE_Ptr; 249 250 when PC_Rpat => 251 PP : Pattern_Ptr; 252 253 when PC_Pred_Func => 254 BF : Boolean_Func; 255 256 when PC_Assign_Imm 257 | PC_Assign_OnM 258 | PC_Any_VP 259 | PC_Break_VP 260 | PC_BreakX_VP 261 | PC_NotAny_VP 262 | PC_NSpan_VP 263 | PC_Span_VP 264 | PC_String_VP 265 => 266 VP : VString_Ptr; 267 268 when PC_Write_Imm 269 | PC_Write_OnM 270 => 271 FP : File_Ptr; 272 273 when PC_String => 274 Str : String_Ptr; 275 276 when PC_String_2 => 277 Str2 : String (1 .. 2); 278 279 when PC_String_3 => 280 Str3 : String (1 .. 3); 281 282 when PC_String_4 => 283 Str4 : String (1 .. 4); 284 285 when PC_String_5 => 286 Str5 : String (1 .. 5); 287 288 when PC_String_6 => 289 Str6 : String (1 .. 6); 290 291 when PC_Setcur => 292 Var : Natural_Ptr; 293 294 when PC_Any_CH 295 | PC_Break_CH 296 | PC_BreakX_CH 297 | PC_Char 298 | PC_NotAny_CH 299 | PC_NSpan_CH 300 | PC_Span_CH 301 => 302 Char : Character; 303 304 when PC_Any_CS 305 | PC_Break_CS 306 | PC_BreakX_CS 307 | PC_NotAny_CS 308 | PC_NSpan_CS 309 | PC_Span_CS 310 => 311 CS : Character_Set; 312 313 when PC_Arbno_Y 314 | PC_Len_Nat 315 | PC_Pos_Nat 316 | PC_RPos_Nat 317 | PC_RTab_Nat 318 | PC_Tab_Nat 319 => 320 Nat : Natural; 321 322 when PC_Pos_NF 323 | PC_Len_NF 324 | PC_RPos_NF 325 | PC_RTab_NF 326 | PC_Tab_NF 327 => 328 NF : Natural_Func; 329 330 when PC_Pos_NP 331 | PC_Len_NP 332 | PC_RPos_NP 333 | PC_RTab_NP 334 | PC_Tab_NP 335 => 336 NP : Natural_Ptr; 337 338 when PC_Any_VF 339 | PC_Break_VF 340 | PC_BreakX_VF 341 | PC_NotAny_VF 342 | PC_NSpan_VF 343 | PC_Span_VF 344 | PC_String_VF 345 => 346 VF : VString_Func; 347 end case; 348 end record; 349 350 subtype PC_Has_Alt is Pattern_Code range PC_Alt .. PC_Arbno_X; 351 -- Range of pattern codes that has an Alt field. This is used in the 352 -- recursive traversals, since these links must be followed. 353 354 EOP_Element : aliased constant PE := (PC_EOP, 0, N); 355 -- This is the end of pattern element, and is thus the representation of 356 -- a null pattern. It has a zero index element since it is never placed 357 -- inside a pattern. Furthermore it does not need a successor, since it 358 -- marks the end of the pattern, so that no more successors are needed. 359 360 EOP : constant PE_Ptr := EOP_Element'Unrestricted_Access; 361 -- This is the end of pattern pointer, that is used in the Pthen pointer 362 -- of other nodes to signal end of pattern. 363 364 -- The following array is used to determine if a pattern used as an 365 -- argument for Arbno is eligible for treatment using the simple Arbno 366 -- structure (i.e. it is a pattern that is guaranteed to match at least 367 -- one character on success, and not to make any entries on the stack. 368 369 OK_For_Simple_Arbno : constant array (Pattern_Code) of Boolean := 370 (PC_Any_CS | 371 PC_Any_CH | 372 PC_Any_VF | 373 PC_Any_VP | 374 PC_Char | 375 PC_Len_Nat | 376 PC_NotAny_CS | 377 PC_NotAny_CH | 378 PC_NotAny_VF | 379 PC_NotAny_VP | 380 PC_Span_CS | 381 PC_Span_CH | 382 PC_Span_VF | 383 PC_Span_VP | 384 PC_String | 385 PC_String_2 | 386 PC_String_3 | 387 PC_String_4 | 388 PC_String_5 | 389 PC_String_6 => True, 390 others => False); 391 392 ------------------------------- 393 -- The Pattern History Stack -- 394 ------------------------------- 395 396 -- The pattern history stack is used for controlling backtracking when 397 -- a match fails. The idea is to stack entries that give a cursor value 398 -- to be restored, and a node to be reestablished as the current node to 399 -- attempt an appropriate rematch operation. The processing for a pattern 400 -- element that has rematch alternatives pushes an appropriate entry or 401 -- entry on to the stack, and the proceeds. If a match fails at any point, 402 -- the top element of the stack is popped off, resetting the cursor and 403 -- the match continues by accessing the node stored with this entry. 404 405 type Stack_Entry is record 406 407 Cursor : Integer; 408 -- Saved cursor value that is restored when this entry is popped 409 -- from the stack if a match attempt fails. Occasionally, this 410 -- field is used to store a history stack pointer instead of a 411 -- cursor. Such cases are noted in the documentation and the value 412 -- stored is negative since stack pointer values are always negative. 413 414 Node : PE_Ptr; 415 -- This pattern element reference is reestablished as the current 416 -- Node to be matched (which will attempt an appropriate rematch). 417 418 end record; 419 420 subtype Stack_Range is Integer range -Stack_Size .. -1; 421 422 type Stack_Type is array (Stack_Range) of Stack_Entry; 423 -- The type used for a history stack. The actual instance of the stack 424 -- is declared as a local variable in the Match routine, to properly 425 -- handle recursive calls to Match. All stack pointer values are negative 426 -- to distinguish them from normal cursor values. 427 428 -- Note: the pattern matching stack is used only to handle backtracking. 429 -- If no backtracking occurs, its entries are never accessed, and never 430 -- popped off, and in particular it is normal for a successful match 431 -- to terminate with entries on the stack that are simply discarded. 432 433 -- Note: in subsequent diagrams of the stack, we always place element 434 -- zero (the deepest element) at the top of the page, then build the 435 -- stack down on the page with the most recent (top of stack) element 436 -- being the bottom-most entry on the page. 437 438 -- Stack checking is handled by labeling every pattern with the maximum 439 -- number of stack entries that are required, so a single check at the 440 -- start of matching the pattern suffices. There are two exceptions. 441 442 -- First, the count does not include entries for recursive pattern 443 -- references. Such recursions must therefore perform a specific 444 -- stack check with respect to the number of stack entries required 445 -- by the recursive pattern that is accessed and the amount of stack 446 -- that remains unused. 447 448 -- Second, the count includes only one iteration of an Arbno pattern, 449 -- so a specific check must be made on subsequent iterations that there 450 -- is still enough stack space left. The Arbno node has a field that 451 -- records the number of stack entries required by its argument for 452 -- this purpose. 453 454 --------------------------------------------------- 455 -- Use of Serial Index Field in Pattern Elements -- 456 --------------------------------------------------- 457 458 -- The serial index numbers for the pattern elements are assigned as 459 -- a pattern is constructed from its constituent elements. Note that there 460 -- is never any sharing of pattern elements between patterns (copies are 461 -- always made), so the serial index numbers are unique to a particular 462 -- pattern as referenced from the P field of a value of type Pattern. 463 464 -- The index numbers meet three separate invariants, which are used for 465 -- various purposes as described in this section. 466 467 -- First, the numbers uniquely identify the pattern elements within a 468 -- pattern. If Num is the number of elements in a given pattern, then 469 -- the serial index numbers for the elements of this pattern will range 470 -- from 1 .. Num, so that each element has a separate value. 471 472 -- The purpose of this assignment is to provide a convenient auxiliary 473 -- data structure mechanism during operations which must traverse a 474 -- pattern (e.g. copy and finalization processing). Once constructed 475 -- patterns are strictly read only. This is necessary to allow sharing 476 -- of patterns between tasks. This means that we cannot go marking the 477 -- pattern (e.g. with a visited bit). Instead we construct a separate 478 -- vector that contains the necessary information indexed by the Index 479 -- values in the pattern elements. For this purpose the only requirement 480 -- is that they be uniquely assigned. 481 482 -- Second, the pattern element referenced directly, i.e. the leading 483 -- pattern element, is always the maximum numbered element and therefore 484 -- indicates the total number of elements in the pattern. More precisely, 485 -- the element referenced by the P field of a pattern value, or the 486 -- element returned by any of the internal pattern construction routines 487 -- in the body (that return a value of type PE_Ptr) always is this 488 -- maximum element, 489 490 -- The purpose of this requirement is to allow an immediate determination 491 -- of the number of pattern elements within a pattern. This is used to 492 -- properly size the vectors used to contain auxiliary information for 493 -- traversal as described above. 494 495 -- Third, as compound pattern structures are constructed, the way in which 496 -- constituent parts of the pattern are constructed is stylized. This is 497 -- an automatic consequence of the way that these compound structures 498 -- are constructed, and basically what we are doing is simply documenting 499 -- and specifying the natural result of the pattern construction. The 500 -- section describing compound pattern structures gives details of the 501 -- numbering of each compound pattern structure. 502 503 -- The purpose of specifying the stylized numbering structures for the 504 -- compound patterns is to help simplify the processing in the Image 505 -- function, since it eases the task of retrieving the original recursive 506 -- structure of the pattern from the flat graph structure of elements. 507 -- This use in the Image function is the only point at which the code 508 -- makes use of the stylized structures. 509 510 type Ref_Array is array (IndexT range <>) of PE_Ptr; 511 -- This type is used to build an array whose N'th entry references the 512 -- element in a pattern whose Index value is N. See Build_Ref_Array. 513 514 procedure Build_Ref_Array (E : PE_Ptr; RA : out Ref_Array); 515 -- Given a pattern element which is the leading element of a pattern 516 -- structure, and a Ref_Array with bounds 1 .. E.Index, fills in the 517 -- Ref_Array so that its N'th entry references the element of the 518 -- referenced pattern whose Index value is N. 519 520 ------------------------------- 521 -- Recursive Pattern Matches -- 522 ------------------------------- 523 524 -- The pattern primitive (+P) where P is a Pattern_Ptr or Pattern_Func 525 -- causes a recursive pattern match. This cannot be handled by an actual 526 -- recursive call to the outer level Match routine, since this would not 527 -- allow for possible backtracking into the region matched by the inner 528 -- pattern. Indeed this is the classical clash between recursion and 529 -- backtracking, and a simple recursive stack structure does not suffice. 530 531 -- This section describes how this recursion and the possible associated 532 -- backtracking is handled. We still use a single stack, but we establish 533 -- the concept of nested regions on this stack, each of which has a stack 534 -- base value pointing to the deepest stack entry of the region. The base 535 -- value for the outer level is zero. 536 537 -- When a recursive match is established, two special stack entries are 538 -- made. The first entry is used to save the original node that starts 539 -- the recursive match. This is saved so that the successor field of 540 -- this node is accessible at the end of the match, but it is never 541 -- popped and executed. 542 543 -- The second entry corresponds to a standard new region action. A 544 -- PC_R_Remove node is stacked, whose cursor field is used to store 545 -- the outer stack base, and the stack base is reset to point to 546 -- this PC_R_Remove node. Then the recursive pattern is matched and 547 -- it can make history stack entries in the normal matter, so now 548 -- the stack looks like: 549 550 -- (stack entries made by outer level) 551 552 -- (Special entry, node is (+P) successor 553 -- cursor entry is not used) 554 555 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack base 556 -- saved base value for the enclosing region) 557 558 -- (stack entries made by inner level) 559 560 -- If a subsequent failure occurs and pops the PC_R_Remove node, it 561 -- removes itself and the special entry immediately underneath it, 562 -- restores the stack base value for the enclosing region, and then 563 -- again signals failure to look for alternatives that were stacked 564 -- before the recursion was initiated. 565 566 -- Now we need to consider what happens if the inner pattern succeeds, as 567 -- signalled by accessing the special PC_EOP pattern primitive. First we 568 -- recognize the nested case by looking at the Base value. If this Base 569 -- value is Stack'First, then the entire match has succeeded, but if the 570 -- base value is greater than Stack'First, then we have successfully 571 -- matched an inner pattern, and processing continues at the outer level. 572 573 -- There are two cases. The simple case is when the inner pattern has made 574 -- no stack entries, as recognized by the fact that the current stack 575 -- pointer is equal to the current base value. In this case it is fine to 576 -- remove all trace of the recursion by restoring the outer base value and 577 -- using the special entry to find the appropriate successor node. 578 579 -- The more complex case arises when the inner match does make stack 580 -- entries. In this case, the PC_EOP processing stacks a special entry 581 -- whose cursor value saves the saved inner base value (the one that 582 -- references the corresponding PC_R_Remove value), and whose node 583 -- pointer references a PC_R_Restore node, so the stack looks like: 584 585 -- (stack entries made by outer level) 586 587 -- (Special entry, node is (+P) successor, 588 -- cursor entry is not used) 589 590 -- (PC_R_Remove entry, "cursor" value is (negative) 591 -- saved base value for the enclosing region) 592 593 -- (stack entries made by inner level) 594 595 -- (PC_Region_Replace entry, "cursor" value is (negative) 596 -- stack pointer value referencing the PC_R_Remove entry). 597 598 -- If the entire match succeeds, then these stack entries are, as usual, 599 -- ignored and abandoned. If on the other hand a subsequent failure 600 -- causes the PC_Region_Replace entry to be popped, it restores the 601 -- inner base value from its saved "cursor" value and then fails again. 602 -- Note that it is OK that the cursor is temporarily clobbered by this 603 -- pop, since the second failure will reestablish a proper cursor value. 604 605 --------------------------------- 606 -- Compound Pattern Structures -- 607 --------------------------------- 608 609 -- This section discusses the compound structures used to represent 610 -- constructed patterns. It shows the graph structures of pattern 611 -- elements that are constructed, and in the case of patterns that 612 -- provide backtracking possibilities, describes how the history 613 -- stack is used to control the backtracking. Finally, it notes the 614 -- way in which the Index numbers are assigned to the structure. 615 616 -- In all diagrams, solid lines (built with minus signs or vertical 617 -- bars, represent successor pointers (Pthen fields) with > or V used 618 -- to indicate the direction of the pointer. The initial node of the 619 -- structure is in the upper left of the diagram. A dotted line is an 620 -- alternative pointer from the element above it to the element below 621 -- it. See individual sections for details on how alternatives are used. 622 623 ------------------- 624 -- Concatenation -- 625 ------------------- 626 627 -- In the pattern structures listed in this section, a line that looks 628 -- like ----> with nothing to the right indicates an end of pattern 629 -- (EOP) pointer that represents the end of the match. 630 631 -- When a pattern concatenation (L & R) occurs, the resulting structure 632 -- is obtained by finding all such EOP pointers in L, and replacing 633 -- them to point to R. This is the most important flattening that 634 -- occurs in constructing a pattern, and it means that the pattern 635 -- matching circuitry does not have to keep track of the structure 636 -- of a pattern with respect to concatenation, since the appropriate 637 -- successor is always at hand. 638 639 -- Concatenation itself generates no additional possibilities for 640 -- backtracking, but the constituent patterns of the concatenated 641 -- structure will make stack entries as usual. The maximum amount 642 -- of stack required by the structure is thus simply the sum of the 643 -- maximums required by L and R. 644 645 -- The index numbering of a concatenation structure works by leaving 646 -- the numbering of the right hand pattern, R, unchanged and adjusting 647 -- the numbers in the left hand pattern, L up by the count of elements 648 -- in R. This ensures that the maximum numbered element is the leading 649 -- element as required (given that it was the leading element in L). 650 651 ----------------- 652 -- Alternation -- 653 ----------------- 654 655 -- A pattern (L or R) constructs the structure: 656 657 -- +---+ +---+ 658 -- | A |---->| L |----> 659 -- +---+ +---+ 660 -- . 661 -- . 662 -- +---+ 663 -- | R |----> 664 -- +---+ 665 666 -- The A element here is a PC_Alt node, and the dotted line represents 667 -- the contents of the Alt field. When the PC_Alt element is matched, 668 -- it stacks a pointer to the leading element of R on the history stack 669 -- so that on subsequent failure, a match of R is attempted. 670 671 -- The A node is the highest numbered element in the pattern. The 672 -- original index numbers of R are unchanged, but the index numbers 673 -- of the L pattern are adjusted up by the count of elements in R. 674 675 -- Note that the difference between the index of the L leading element 676 -- the index of the R leading element (after building the alt structure) 677 -- indicates the number of nodes in L, and this is true even after the 678 -- structure is incorporated into some larger structure. For example, 679 -- if the A node has index 16, and L has index 15 and R has index 680 -- 5, then we know that L has 10 (15-5) elements in it. 681 682 -- Suppose that we now concatenate this structure to another pattern 683 -- with 9 elements in it. We will now have the A node with an index 684 -- of 25, L with an index of 24 and R with an index of 14. We still 685 -- know that L has 10 (24-14) elements in it, numbered 15-24, and 686 -- consequently the successor of the alternation structure has an 687 -- index with a value less than 15. This is used in Image to figure 688 -- out the original recursive structure of a pattern. 689 690 -- To clarify the interaction of the alternation and concatenation 691 -- structures, here is a more complex example of the structure built 692 -- for the pattern: 693 694 -- (V or W or X) (Y or Z) 695 696 -- where A,B,C,D,E are all single element patterns: 697 698 -- +---+ +---+ +---+ +---+ 699 -- I A I---->I V I---+-->I A I---->I Y I----> 700 -- +---+ +---+ I +---+ +---+ 701 -- . I . 702 -- . I . 703 -- +---+ +---+ I +---+ 704 -- I A I---->I W I-->I I Z I----> 705 -- +---+ +---+ I +---+ 706 -- . I 707 -- . I 708 -- +---+ I 709 -- I X I------------>+ 710 -- +---+ 711 712 -- The numbering of the nodes would be as follows: 713 714 -- +---+ +---+ +---+ +---+ 715 -- I 8 I---->I 7 I---+-->I 3 I---->I 2 I----> 716 -- +---+ +---+ I +---+ +---+ 717 -- . I . 718 -- . I . 719 -- +---+ +---+ I +---+ 720 -- I 6 I---->I 5 I-->I I 1 I----> 721 -- +---+ +---+ I +---+ 722 -- . I 723 -- . I 724 -- +---+ I 725 -- I 4 I------------>+ 726 -- +---+ 727 728 -- Note: The above structure actually corresponds to 729 730 -- (A or (B or C)) (D or E) 731 732 -- rather than 733 734 -- ((A or B) or C) (D or E) 735 736 -- which is the more natural interpretation, but in fact alternation 737 -- is associative, and the construction of an alternative changes the 738 -- left grouped pattern to the right grouped pattern in any case, so 739 -- that the Image function produces a more natural looking output. 740 741 --------- 742 -- Arb -- 743 --------- 744 745 -- An Arb pattern builds the structure 746 747 -- +---+ 748 -- | X |----> 749 -- +---+ 750 -- . 751 -- . 752 -- +---+ 753 -- | Y |----> 754 -- +---+ 755 756 -- The X node is a PC_Arb_X node, which matches null, and stacks a 757 -- pointer to Y node, which is the PC_Arb_Y node that matches one 758 -- extra character and restacks itself. 759 760 -- The PC_Arb_X node is numbered 2, and the PC_Arb_Y node is 1 761 762 ------------------------- 763 -- Arbno (simple case) -- 764 ------------------------- 765 766 -- The simple form of Arbno can be used where the pattern always 767 -- matches at least one character if it succeeds, and it is known 768 -- not to make any history stack entries. In this case, Arbno (P) 769 -- can construct the following structure: 770 771 -- +-------------+ 772 -- | ^ 773 -- V | 774 -- +---+ | 775 -- | S |----> | 776 -- +---+ | 777 -- . | 778 -- . | 779 -- +---+ | 780 -- | P |---------->+ 781 -- +---+ 782 783 -- The S (PC_Arbno_S) node matches null stacking a pointer to the 784 -- pattern P. If a subsequent failure causes P to be matched and 785 -- this match succeeds, then node A gets restacked to try another 786 -- instance if needed by a subsequent failure. 787 788 -- The node numbering of the constituent pattern P is not affected. 789 -- The S node has a node number of P.Index + 1. 790 791 -------------------------- 792 -- Arbno (complex case) -- 793 -------------------------- 794 795 -- A call to Arbno (P), where P can match null (or at least is not 796 -- known to require a non-null string) and/or P requires pattern stack 797 -- entries, constructs the following structure: 798 799 -- +--------------------------+ 800 -- | ^ 801 -- V | 802 -- +---+ | 803 -- | X |----> | 804 -- +---+ | 805 -- . | 806 -- . | 807 -- +---+ +---+ +---+ | 808 -- | E |---->| P |---->| Y |--->+ 809 -- +---+ +---+ +---+ 810 811 -- The node X (PC_Arbno_X) matches null, stacking a pointer to the 812 -- E-P-X structure used to match one Arbno instance. 813 814 -- Here E is the PC_R_Enter node which matches null and creates two 815 -- stack entries. The first is a special entry whose node field is 816 -- not used at all, and whose cursor field has the initial cursor. 817 818 -- The second entry corresponds to a standard new region action. A 819 -- PC_R_Remove node is stacked, whose cursor field is used to store 820 -- the outer stack base, and the stack base is reset to point to 821 -- this PC_R_Remove node. Then the pattern P is matched, and it can 822 -- make history stack entries in the normal manner, so now the stack 823 -- looks like: 824 825 -- (stack entries made before assign pattern) 826 827 -- (Special entry, node field not used, 828 -- used only to save initial cursor) 829 830 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base 831 -- saved base value for the enclosing region) 832 833 -- (stack entries made by matching P) 834 835 -- If the match of P fails, then the PC_R_Remove entry is popped and 836 -- it removes both itself and the special entry underneath it, 837 -- restores the outer stack base, and signals failure. 838 839 -- If the match of P succeeds, then node Y, the PC_Arbno_Y node, pops 840 -- the inner region. There are two possibilities. If matching P left 841 -- no stack entries, then all traces of the inner region can be removed. 842 -- If there are stack entries, then we push an PC_Region_Replace stack 843 -- entry whose "cursor" value is the inner stack base value, and then 844 -- restore the outer stack base value, so the stack looks like: 845 846 -- (stack entries made before assign pattern) 847 848 -- (Special entry, node field not used, 849 -- used only to save initial cursor) 850 851 -- (PC_R_Remove entry, "cursor" value is (negative) 852 -- saved base value for the enclosing region) 853 854 -- (stack entries made by matching P) 855 856 -- (PC_Region_Replace entry, "cursor" value is (negative) 857 -- stack pointer value referencing the PC_R_Remove entry). 858 859 -- Now that we have matched another instance of the Arbno pattern, 860 -- we need to move to the successor. There are two cases. If the 861 -- Arbno pattern matched null, then there is no point in seeking 862 -- alternatives, since we would just match a whole bunch of nulls. 863 -- In this case we look through the alternative node, and move 864 -- directly to its successor (i.e. the successor of the Arbno 865 -- pattern). If on the other hand a non-null string was matched, 866 -- we simply follow the successor to the alternative node, which 867 -- sets up for another possible match of the Arbno pattern. 868 869 -- As noted in the section on stack checking, the stack count (and 870 -- hence the stack check) for a pattern includes only one iteration 871 -- of the Arbno pattern. To make sure that multiple iterations do not 872 -- overflow the stack, the Arbno node saves the stack count required 873 -- by a single iteration, and the Concat function increments this to 874 -- include stack entries required by any successor. The PC_Arbno_Y 875 -- node uses this count to ensure that sufficient stack remains 876 -- before proceeding after matching each new instance. 877 878 -- The node numbering of the constituent pattern P is not affected. 879 -- Where N is the number of nodes in P, the Y node is numbered N + 1, 880 -- the E node is N + 2, and the X node is N + 3. 881 882 ---------------------- 883 -- Assign Immediate -- 884 ---------------------- 885 886 -- Immediate assignment (P * V) constructs the following structure 887 888 -- +---+ +---+ +---+ 889 -- | E |---->| P |---->| A |----> 890 -- +---+ +---+ +---+ 891 892 -- Here E is the PC_R_Enter node which matches null and creates two 893 -- stack entries. The first is a special entry whose node field is 894 -- not used at all, and whose cursor field has the initial cursor. 895 896 -- The second entry corresponds to a standard new region action. A 897 -- PC_R_Remove node is stacked, whose cursor field is used to store 898 -- the outer stack base, and the stack base is reset to point to 899 -- this PC_R_Remove node. Then the pattern P is matched, and it can 900 -- make history stack entries in the normal manner, so now the stack 901 -- looks like: 902 903 -- (stack entries made before assign pattern) 904 905 -- (Special entry, node field not used, 906 -- used only to save initial cursor) 907 908 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base 909 -- saved base value for the enclosing region) 910 911 -- (stack entries made by matching P) 912 913 -- If the match of P fails, then the PC_R_Remove entry is popped 914 -- and it removes both itself and the special entry underneath it, 915 -- restores the outer stack base, and signals failure. 916 917 -- If the match of P succeeds, then node A, which is the actual 918 -- PC_Assign_Imm node, executes the assignment (using the stack 919 -- base to locate the entry with the saved starting cursor value), 920 -- and the pops the inner region. There are two possibilities, if 921 -- matching P left no stack entries, then all traces of the inner 922 -- region can be removed. If there are stack entries, then we push 923 -- an PC_Region_Replace stack entry whose "cursor" value is the 924 -- inner stack base value, and then restore the outer stack base 925 -- value, so the stack looks like: 926 927 -- (stack entries made before assign pattern) 928 929 -- (Special entry, node field not used, 930 -- used only to save initial cursor) 931 932 -- (PC_R_Remove entry, "cursor" value is (negative) 933 -- saved base value for the enclosing region) 934 935 -- (stack entries made by matching P) 936 937 -- (PC_Region_Replace entry, "cursor" value is the (negative) 938 -- stack pointer value referencing the PC_R_Remove entry). 939 940 -- If a subsequent failure occurs, the PC_Region_Replace node restores 941 -- the inner stack base value and signals failure to explore rematches 942 -- of the pattern P. 943 944 -- The node numbering of the constituent pattern P is not affected. 945 -- Where N is the number of nodes in P, the A node is numbered N + 1, 946 -- and the E node is N + 2. 947 948 --------------------- 949 -- Assign On Match -- 950 --------------------- 951 952 -- The assign on match (**) pattern is quite similar to the assign 953 -- immediate pattern, except that the actual assignment has to be 954 -- delayed. The following structure is constructed: 955 956 -- +---+ +---+ +---+ 957 -- | E |---->| P |---->| A |----> 958 -- +---+ +---+ +---+ 959 960 -- The operation of this pattern is identical to that described above 961 -- for deferred assignment, up to the point where P has been matched. 962 963 -- The A node, which is the PC_Assign_OnM node first pushes a 964 -- PC_Assign node onto the history stack. This node saves the ending 965 -- cursor and acts as a flag for the final assignment, as further 966 -- described below. 967 968 -- It then stores a pointer to itself in the special entry node field. 969 -- This was otherwise unused, and is now used to retrieve the address 970 -- of the variable to be assigned at the end of the pattern. 971 972 -- After that the inner region is terminated in the usual manner, 973 -- by stacking a PC_R_Restore entry as described for the assign 974 -- immediate case. Note that the optimization of completely 975 -- removing the inner region does not happen in this case, since 976 -- we have at least one stack entry (the PC_Assign one we just made). 977 -- The stack now looks like: 978 979 -- (stack entries made before assign pattern) 980 981 -- (Special entry, node points to copy of 982 -- the PC_Assign_OnM node, and the 983 -- cursor field saves the initial cursor). 984 985 -- (PC_R_Remove entry, "cursor" value is (negative) 986 -- saved base value for the enclosing region) 987 988 -- (stack entries made by matching P) 989 990 -- (PC_Assign entry, saves final cursor) 991 992 -- (PC_Region_Replace entry, "cursor" value is (negative) 993 -- stack pointer value referencing the PC_R_Remove entry). 994 995 -- If a subsequent failure causes the PC_Assign node to execute it 996 -- simply removes itself and propagates the failure. 997 998 -- If the match succeeds, then the history stack is scanned for 999 -- PC_Assign nodes, and the assignments are executed (examination 1000 -- of the above diagram will show that all the necessary data is 1001 -- at hand for the assignment). 1002 1003 -- To optimize the common case where no assign-on-match operations 1004 -- are present, a global flag Assign_OnM is maintained which is 1005 -- initialize to False, and gets set True as part of the execution 1006 -- of the PC_Assign_OnM node. The scan of the history stack for 1007 -- PC_Assign entries is done only if this flag is set. 1008 1009 -- The node numbering of the constituent pattern P is not affected. 1010 -- Where N is the number of nodes in P, the A node is numbered N + 1, 1011 -- and the E node is N + 2. 1012 1013 --------- 1014 -- Bal -- 1015 --------- 1016 1017 -- Bal builds a single node: 1018 1019 -- +---+ 1020 -- | B |----> 1021 -- +---+ 1022 1023 -- The node B is the PC_Bal node which matches a parentheses balanced 1024 -- string, starting at the current cursor position. It then updates 1025 -- the cursor past this matched string, and stacks a pointer to itself 1026 -- with this updated cursor value on the history stack, to extend the 1027 -- matched string on a subsequent failure. 1028 1029 -- Since this is a single node it is numbered 1 (the reason we include 1030 -- it in the compound patterns section is that it backtracks). 1031 1032 ------------ 1033 -- BreakX -- 1034 ------------ 1035 1036 -- BreakX builds the structure 1037 1038 -- +---+ +---+ 1039 -- | B |---->| A |----> 1040 -- +---+ +---+ 1041 -- ^ . 1042 -- | . 1043 -- | +---+ 1044 -- +<------| X | 1045 -- +---+ 1046 1047 -- Here the B node is the BreakX_xx node that performs a normal Break 1048 -- function. The A node is an alternative (PC_Alt) node that matches 1049 -- null, but stacks a pointer to node X (the PC_BreakX_X node) which 1050 -- extends the match one character (to eat up the previously detected 1051 -- break character), and then rematches the break. 1052 1053 -- The B node is numbered 3, the alternative node is 1, and the X 1054 -- node is 2. 1055 1056 ----------- 1057 -- Fence -- 1058 ----------- 1059 1060 -- Fence builds a single node: 1061 1062 -- +---+ 1063 -- | F |----> 1064 -- +---+ 1065 1066 -- The element F, PC_Fence, matches null, and stacks a pointer to a 1067 -- PC_Cancel element which will abort the match on a subsequent failure. 1068 1069 -- Since this is a single element it is numbered 1 (the reason we 1070 -- include it in the compound patterns section is that it backtracks). 1071 1072 -------------------- 1073 -- Fence Function -- 1074 -------------------- 1075 1076 -- A call to the Fence function builds the structure: 1077 1078 -- +---+ +---+ +---+ 1079 -- | E |---->| P |---->| X |----> 1080 -- +---+ +---+ +---+ 1081 1082 -- Here E is the PC_R_Enter node which matches null and creates two 1083 -- stack entries. The first is a special entry which is not used at 1084 -- all in the fence case (it is present merely for uniformity with 1085 -- other cases of region enter operations). 1086 1087 -- The second entry corresponds to a standard new region action. A 1088 -- PC_R_Remove node is stacked, whose cursor field is used to store 1089 -- the outer stack base, and the stack base is reset to point to 1090 -- this PC_R_Remove node. Then the pattern P is matched, and it can 1091 -- make history stack entries in the normal manner, so now the stack 1092 -- looks like: 1093 1094 -- (stack entries made before fence pattern) 1095 1096 -- (Special entry, not used at all) 1097 1098 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base 1099 -- saved base value for the enclosing region) 1100 1101 -- (stack entries made by matching P) 1102 1103 -- If the match of P fails, then the PC_R_Remove entry is popped 1104 -- and it removes both itself and the special entry underneath it, 1105 -- restores the outer stack base, and signals failure. 1106 1107 -- If the match of P succeeds, then node X, the PC_Fence_X node, gets 1108 -- control. One might be tempted to think that at this point, the 1109 -- history stack entries made by matching P can just be removed since 1110 -- they certainly are not going to be used for rematching (that is 1111 -- whole point of Fence after all). However, this is wrong, because 1112 -- it would result in the loss of possible assign-on-match entries 1113 -- for deferred pattern assignments. 1114 1115 -- Instead what we do is to make a special entry whose node references 1116 -- PC_Fence_Y, and whose cursor saves the inner stack base value, i.e. 1117 -- the pointer to the PC_R_Remove entry. Then the outer stack base 1118 -- pointer is restored, so the stack looks like: 1119 1120 -- (stack entries made before assign pattern) 1121 1122 -- (Special entry, not used at all) 1123 1124 -- (PC_R_Remove entry, "cursor" value is (negative) 1125 -- saved base value for the enclosing region) 1126 1127 -- (stack entries made by matching P) 1128 1129 -- (PC_Fence_Y entry, "cursor" value is (negative) stack 1130 -- pointer value referencing the PC_R_Remove entry). 1131 1132 -- If a subsequent failure occurs, then the PC_Fence_Y entry removes 1133 -- the entire inner region, including all entries made by matching P, 1134 -- and alternatives prior to the Fence pattern are sought. 1135 1136 -- The node numbering of the constituent pattern P is not affected. 1137 -- Where N is the number of nodes in P, the X node is numbered N + 1, 1138 -- and the E node is N + 2. 1139 1140 ------------- 1141 -- Succeed -- 1142 ------------- 1143 1144 -- Succeed builds a single node: 1145 1146 -- +---+ 1147 -- | S |----> 1148 -- +---+ 1149 1150 -- The node S is the PC_Succeed node which matches null, and stacks 1151 -- a pointer to itself on the history stack, so that a subsequent 1152 -- failure repeats the same match. 1153 1154 -- Since this is a single node it is numbered 1 (the reason we include 1155 -- it in the compound patterns section is that it backtracks). 1156 1157 --------------------- 1158 -- Write Immediate -- 1159 --------------------- 1160 1161 -- The structure built for a write immediate operation (P * F, where 1162 -- F is a file access value) is: 1163 1164 -- +---+ +---+ +---+ 1165 -- | E |---->| P |---->| W |----> 1166 -- +---+ +---+ +---+ 1167 1168 -- Here E is the PC_R_Enter node and W is the PC_Write_Imm node. The 1169 -- handling is identical to that described above for Assign Immediate, 1170 -- except that at the point where a successful match occurs, the matched 1171 -- substring is written to the referenced file. 1172 1173 -- The node numbering of the constituent pattern P is not affected. 1174 -- Where N is the number of nodes in P, the W node is numbered N + 1, 1175 -- and the E node is N + 2. 1176 1177 -------------------- 1178 -- Write On Match -- 1179 -------------------- 1180 1181 -- The structure built for a write on match operation (P ** F, where 1182 -- F is a file access value) is: 1183 1184 -- +---+ +---+ +---+ 1185 -- | E |---->| P |---->| W |----> 1186 -- +---+ +---+ +---+ 1187 1188 -- Here E is the PC_R_Enter node and W is the PC_Write_OnM node. The 1189 -- handling is identical to that described above for Assign On Match, 1190 -- except that at the point where a successful match has completed, 1191 -- the matched substring is written to the referenced file. 1192 1193 -- The node numbering of the constituent pattern P is not affected. 1194 -- Where N is the number of nodes in P, the W node is numbered N + 1, 1195 -- and the E node is N + 2. 1196 ----------------------- 1197 -- Constant Patterns -- 1198 ----------------------- 1199 1200 -- The following pattern elements are referenced only from the pattern 1201 -- history stack. In each case the processing for the pattern element 1202 -- results in pattern match abort, or further failure, so there is no 1203 -- need for a successor and no need for a node number 1204 1205 CP_Assign : aliased PE := (PC_Assign, 0, N); 1206 CP_Cancel : aliased PE := (PC_Cancel, 0, N); 1207 CP_Fence_Y : aliased PE := (PC_Fence_Y, 0, N); 1208 CP_R_Remove : aliased PE := (PC_R_Remove, 0, N); 1209 CP_R_Restore : aliased PE := (PC_R_Restore, 0, N); 1210 1211 ----------------------- 1212 -- Local Subprograms -- 1213 ----------------------- 1214 1215 function Alternate (L, R : PE_Ptr) return PE_Ptr; 1216 function "or" (L, R : PE_Ptr) return PE_Ptr renames Alternate; 1217 -- Build pattern structure corresponding to the alternation of L, R. 1218 -- (i.e. try to match L, and if that fails, try to match R). 1219 1220 function Arbno_Simple (P : PE_Ptr) return PE_Ptr; 1221 -- Build simple Arbno pattern, P is a pattern that is guaranteed to 1222 -- match at least one character if it succeeds and to require no 1223 -- stack entries under all circumstances. The result returned is 1224 -- a simple Arbno structure as previously described. 1225 1226 function Bracket (E, P, A : PE_Ptr) return PE_Ptr; 1227 -- Given two single node pattern elements E and A, and a (possible 1228 -- complex) pattern P, construct the concatenation E-->P-->A and 1229 -- return a pointer to E. The concatenation does not affect the 1230 -- node numbering in P. A has a number one higher than the maximum 1231 -- number in P, and E has a number two higher than the maximum 1232 -- number in P (see for example the Assign_Immediate structure to 1233 -- understand a typical use of this function). 1234 1235 function BreakX_Make (B : PE_Ptr) return Pattern; 1236 -- Given a pattern element for a Break pattern, returns the 1237 -- corresponding BreakX compound pattern structure. 1238 1239 function Concat (L, R : PE_Ptr; Incr : Natural) return PE_Ptr; 1240 -- Creates a pattern element that represents a concatenation of the 1241 -- two given pattern elements (i.e. the pattern L followed by R). 1242 -- The result returned is always the same as L, but the pattern 1243 -- referenced by L is modified to have R as a successor. This 1244 -- procedure does not copy L or R, so if a copy is required, it 1245 -- is the responsibility of the caller. The Incr parameter is an 1246 -- amount to be added to the Nat field of any P_Arbno_Y node that is 1247 -- in the left operand, it represents the additional stack space 1248 -- required by the right operand. 1249 1250 function C_To_PE (C : PChar) return PE_Ptr; 1251 -- Given a character, constructs a pattern element that matches 1252 -- the single character. 1253 1254 function Copy (P : PE_Ptr) return PE_Ptr; 1255 -- Creates a copy of the pattern element referenced by the given 1256 -- pattern element reference. This is a deep copy, which means that 1257 -- it follows the Next and Alt pointers. 1258 1259 function Image (P : PE_Ptr) return String; 1260 -- Returns the image of the address of the referenced pattern element. 1261 -- This is equivalent to Image (To_Address (P)); 1262 1263 function Is_In (C : Character; Str : String) return Boolean; 1264 pragma Inline (Is_In); 1265 -- Determines if the character C is in string Str 1266 1267 procedure Logic_Error; 1268 -- Called to raise Program_Error with an appropriate message if an 1269 -- internal logic error is detected. 1270 1271 function Str_BF (A : Boolean_Func) return String; 1272 function Str_FP (A : File_Ptr) return String; 1273 function Str_NF (A : Natural_Func) return String; 1274 function Str_NP (A : Natural_Ptr) return String; 1275 function Str_PP (A : Pattern_Ptr) return String; 1276 function Str_VF (A : VString_Func) return String; 1277 function Str_VP (A : VString_Ptr) return String; 1278 -- These are debugging routines, which return a representation of the 1279 -- given access value (they are called only by Image and Dump) 1280 1281 procedure Set_Successor (Pat : PE_Ptr; Succ : PE_Ptr); 1282 -- Adjusts all EOP pointers in Pat to point to Succ. No other changes 1283 -- are made. In particular, Succ is unchanged, and no index numbers 1284 -- are modified. Note that Pat may not be equal to EOP on entry. 1285 1286 function S_To_PE (Str : PString) return PE_Ptr; 1287 -- Given a string, constructs a pattern element that matches the string 1288 1289 procedure Uninitialized_Pattern; 1290 pragma No_Return (Uninitialized_Pattern); 1291 -- Called to raise Program_Error with an appropriate error message if 1292 -- an uninitialized pattern is used in any pattern construction or 1293 -- pattern matching operation. 1294 1295 procedure XMatch 1296 (Subject : String; 1297 Pat_P : PE_Ptr; 1298 Pat_S : Natural; 1299 Start : out Natural; 1300 Stop : out Natural); 1301 -- This is the common pattern match routine. It is passed a string and 1302 -- a pattern, and it indicates success or failure, and on success the 1303 -- section of the string matched. It does not perform any assignments 1304 -- to the subject string, so pattern replacement is for the caller. 1305 -- 1306 -- Subject The subject string. The lower bound is always one. In the 1307 -- Match procedures, it is fine to use strings whose lower bound 1308 -- is not one, but we perform a one time conversion before the 1309 -- call to XMatch, so that XMatch does not have to be bothered 1310 -- with strange lower bounds. 1311 -- 1312 -- Pat_P Points to initial pattern element of pattern to be matched 1313 -- 1314 -- Pat_S Maximum required stack entries for pattern to be matched 1315 -- 1316 -- Start If match is successful, starting index of matched section. 1317 -- This value is always non-zero. A value of zero is used to 1318 -- indicate a failed match. 1319 -- 1320 -- Stop If match is successful, ending index of matched section. 1321 -- This can be zero if we match the null string at the start, 1322 -- in which case Start is set to zero, and Stop to one. If the 1323 -- Match fails, then the contents of Stop is undefined. 1324 1325 procedure XMatchD 1326 (Subject : String; 1327 Pat_P : PE_Ptr; 1328 Pat_S : Natural; 1329 Start : out Natural; 1330 Stop : out Natural); 1331 -- Identical in all respects to XMatch, except that trace information is 1332 -- output on Standard_Output during execution of the match. This is the 1333 -- version that is called if the original Match call has Debug => True. 1334 1335 --------- 1336 -- "&" -- 1337 --------- 1338 1339 function "&" (L : PString; R : Pattern) return Pattern is 1340 begin 1341 return (AFC with R.Stk, Concat (S_To_PE (L), Copy (R.P), R.Stk)); 1342 end "&"; 1343 1344 function "&" (L : Pattern; R : PString) return Pattern is 1345 begin 1346 return (AFC with L.Stk, Concat (Copy (L.P), S_To_PE (R), 0)); 1347 end "&"; 1348 1349 function "&" (L : PChar; R : Pattern) return Pattern is 1350 begin 1351 return (AFC with R.Stk, Concat (C_To_PE (L), Copy (R.P), R.Stk)); 1352 end "&"; 1353 1354 function "&" (L : Pattern; R : PChar) return Pattern is 1355 begin 1356 return (AFC with L.Stk, Concat (Copy (L.P), C_To_PE (R), 0)); 1357 end "&"; 1358 1359 function "&" (L : Pattern; R : Pattern) return Pattern is 1360 begin 1361 return (AFC with L.Stk + R.Stk, Concat (Copy (L.P), Copy (R.P), R.Stk)); 1362 end "&"; 1363 1364 --------- 1365 -- "*" -- 1366 --------- 1367 1368 -- Assign immediate 1369 1370 -- +---+ +---+ +---+ 1371 -- | E |---->| P |---->| A |----> 1372 -- +---+ +---+ +---+ 1373 1374 -- The node numbering of the constituent pattern P is not affected. 1375 -- Where N is the number of nodes in P, the A node is numbered N + 1, 1376 -- and the E node is N + 2. 1377 1378 function "*" (P : Pattern; Var : VString_Var) return Pattern is 1379 Pat : constant PE_Ptr := Copy (P.P); 1380 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); 1381 A : constant PE_Ptr := 1382 new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access); 1383 begin 1384 return (AFC with P.Stk + 3, Bracket (E, Pat, A)); 1385 end "*"; 1386 1387 function "*" (P : PString; Var : VString_Var) return Pattern is 1388 Pat : constant PE_Ptr := S_To_PE (P); 1389 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); 1390 A : constant PE_Ptr := 1391 new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access); 1392 begin 1393 return (AFC with 3, Bracket (E, Pat, A)); 1394 end "*"; 1395 1396 function "*" (P : PChar; Var : VString_Var) return Pattern is 1397 Pat : constant PE_Ptr := C_To_PE (P); 1398 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); 1399 A : constant PE_Ptr := 1400 new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access); 1401 begin 1402 return (AFC with 3, Bracket (E, Pat, A)); 1403 end "*"; 1404 1405 -- Write immediate 1406 1407 -- +---+ +---+ +---+ 1408 -- | E |---->| P |---->| W |----> 1409 -- +---+ +---+ +---+ 1410 1411 -- The node numbering of the constituent pattern P is not affected. 1412 -- Where N is the number of nodes in P, the W node is numbered N + 1, 1413 -- and the E node is N + 2. 1414 1415 function "*" (P : Pattern; Fil : File_Access) return Pattern is 1416 Pat : constant PE_Ptr := Copy (P.P); 1417 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); 1418 W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil); 1419 begin 1420 return (AFC with 3, Bracket (E, Pat, W)); 1421 end "*"; 1422 1423 function "*" (P : PString; Fil : File_Access) return Pattern is 1424 Pat : constant PE_Ptr := S_To_PE (P); 1425 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); 1426 W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil); 1427 begin 1428 return (AFC with 3, Bracket (E, Pat, W)); 1429 end "*"; 1430 1431 function "*" (P : PChar; Fil : File_Access) return Pattern is 1432 Pat : constant PE_Ptr := C_To_PE (P); 1433 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); 1434 W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil); 1435 begin 1436 return (AFC with 3, Bracket (E, Pat, W)); 1437 end "*"; 1438 1439 ---------- 1440 -- "**" -- 1441 ---------- 1442 1443 -- Assign on match 1444 1445 -- +---+ +---+ +---+ 1446 -- | E |---->| P |---->| A |----> 1447 -- +---+ +---+ +---+ 1448 1449 -- The node numbering of the constituent pattern P is not affected. 1450 -- Where N is the number of nodes in P, the A node is numbered N + 1, 1451 -- and the E node is N + 2. 1452 1453 function "**" (P : Pattern; Var : VString_Var) return Pattern is 1454 Pat : constant PE_Ptr := Copy (P.P); 1455 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); 1456 A : constant PE_Ptr := 1457 new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access); 1458 begin 1459 return (AFC with P.Stk + 3, Bracket (E, Pat, A)); 1460 end "**"; 1461 1462 function "**" (P : PString; Var : VString_Var) return Pattern is 1463 Pat : constant PE_Ptr := S_To_PE (P); 1464 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); 1465 A : constant PE_Ptr := 1466 new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access); 1467 begin 1468 return (AFC with 3, Bracket (E, Pat, A)); 1469 end "**"; 1470 1471 function "**" (P : PChar; Var : VString_Var) return Pattern is 1472 Pat : constant PE_Ptr := C_To_PE (P); 1473 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); 1474 A : constant PE_Ptr := 1475 new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access); 1476 begin 1477 return (AFC with 3, Bracket (E, Pat, A)); 1478 end "**"; 1479 1480 -- Write on match 1481 1482 -- +---+ +---+ +---+ 1483 -- | E |---->| P |---->| W |----> 1484 -- +---+ +---+ +---+ 1485 1486 -- The node numbering of the constituent pattern P is not affected. 1487 -- Where N is the number of nodes in P, the W node is numbered N + 1, 1488 -- and the E node is N + 2. 1489 1490 function "**" (P : Pattern; Fil : File_Access) return Pattern is 1491 Pat : constant PE_Ptr := Copy (P.P); 1492 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); 1493 W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil); 1494 begin 1495 return (AFC with P.Stk + 3, Bracket (E, Pat, W)); 1496 end "**"; 1497 1498 function "**" (P : PString; Fil : File_Access) return Pattern is 1499 Pat : constant PE_Ptr := S_To_PE (P); 1500 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); 1501 W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil); 1502 begin 1503 return (AFC with 3, Bracket (E, Pat, W)); 1504 end "**"; 1505 1506 function "**" (P : PChar; Fil : File_Access) return Pattern is 1507 Pat : constant PE_Ptr := C_To_PE (P); 1508 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); 1509 W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil); 1510 begin 1511 return (AFC with 3, Bracket (E, Pat, W)); 1512 end "**"; 1513 1514 --------- 1515 -- "+" -- 1516 --------- 1517 1518 function "+" (Str : VString_Var) return Pattern is 1519 begin 1520 return 1521 (AFC with 0, 1522 new PE'(PC_String_VP, 1, EOP, Str'Unrestricted_Access)); 1523 end "+"; 1524 1525 function "+" (Str : VString_Func) return Pattern is 1526 begin 1527 return (AFC with 0, new PE'(PC_String_VF, 1, EOP, Str)); 1528 end "+"; 1529 1530 function "+" (P : Pattern_Var) return Pattern is 1531 begin 1532 return 1533 (AFC with 3, 1534 new PE'(PC_Rpat, 1, EOP, P'Unrestricted_Access)); 1535 end "+"; 1536 1537 function "+" (P : Boolean_Func) return Pattern is 1538 begin 1539 return (AFC with 3, new PE'(PC_Pred_Func, 1, EOP, P)); 1540 end "+"; 1541 1542 ---------- 1543 -- "or" -- 1544 ---------- 1545 1546 function "or" (L : PString; R : Pattern) return Pattern is 1547 begin 1548 return (AFC with R.Stk + 1, S_To_PE (L) or Copy (R.P)); 1549 end "or"; 1550 1551 function "or" (L : Pattern; R : PString) return Pattern is 1552 begin 1553 return (AFC with L.Stk + 1, Copy (L.P) or S_To_PE (R)); 1554 end "or"; 1555 1556 function "or" (L : PString; R : PString) return Pattern is 1557 begin 1558 return (AFC with 1, S_To_PE (L) or S_To_PE (R)); 1559 end "or"; 1560 1561 function "or" (L : Pattern; R : Pattern) return Pattern is 1562 begin 1563 return (AFC with 1564 Natural'Max (L.Stk, R.Stk) + 1, Copy (L.P) or Copy (R.P)); 1565 end "or"; 1566 1567 function "or" (L : PChar; R : Pattern) return Pattern is 1568 begin 1569 return (AFC with 1, C_To_PE (L) or Copy (R.P)); 1570 end "or"; 1571 1572 function "or" (L : Pattern; R : PChar) return Pattern is 1573 begin 1574 return (AFC with 1, Copy (L.P) or C_To_PE (R)); 1575 end "or"; 1576 1577 function "or" (L : PChar; R : PChar) return Pattern is 1578 begin 1579 return (AFC with 1, C_To_PE (L) or C_To_PE (R)); 1580 end "or"; 1581 1582 function "or" (L : PString; R : PChar) return Pattern is 1583 begin 1584 return (AFC with 1, S_To_PE (L) or C_To_PE (R)); 1585 end "or"; 1586 1587 function "or" (L : PChar; R : PString) return Pattern is 1588 begin 1589 return (AFC with 1, C_To_PE (L) or S_To_PE (R)); 1590 end "or"; 1591 1592 ------------ 1593 -- Adjust -- 1594 ------------ 1595 1596 -- No two patterns share the same pattern elements, so the adjust 1597 -- procedure for a Pattern assignment must do a deep copy of the 1598 -- pattern element structure. 1599 1600 procedure Adjust (Object : in out Pattern) is 1601 begin 1602 Object.P := Copy (Object.P); 1603 end Adjust; 1604 1605 --------------- 1606 -- Alternate -- 1607 --------------- 1608 1609 function Alternate (L, R : PE_Ptr) return PE_Ptr is 1610 begin 1611 -- If the left pattern is null, then we just add the alternation 1612 -- node with an index one greater than the right hand pattern. 1613 1614 if L = EOP then 1615 return new PE'(PC_Alt, R.Index + 1, EOP, R); 1616 1617 -- If the left pattern is non-null, then build a reference vector 1618 -- for its elements, and adjust their index values to accommodate 1619 -- the right hand elements. Then add the alternation node. 1620 1621 else 1622 declare 1623 Refs : Ref_Array (1 .. L.Index); 1624 1625 begin 1626 Build_Ref_Array (L, Refs); 1627 1628 for J in Refs'Range loop 1629 Refs (J).Index := Refs (J).Index + R.Index; 1630 end loop; 1631 end; 1632 1633 return new PE'(PC_Alt, L.Index + 1, L, R); 1634 end if; 1635 end Alternate; 1636 1637 --------- 1638 -- Any -- 1639 --------- 1640 1641 function Any (Str : String) return Pattern is 1642 begin 1643 return (AFC with 0, new PE'(PC_Any_CS, 1, EOP, To_Set (Str))); 1644 end Any; 1645 1646 function Any (Str : VString) return Pattern is 1647 begin 1648 return Any (S (Str)); 1649 end Any; 1650 1651 function Any (Str : Character) return Pattern is 1652 begin 1653 return (AFC with 0, new PE'(PC_Any_CH, 1, EOP, Str)); 1654 end Any; 1655 1656 function Any (Str : Character_Set) return Pattern is 1657 begin 1658 return (AFC with 0, new PE'(PC_Any_CS, 1, EOP, Str)); 1659 end Any; 1660 1661 function Any (Str : not null access VString) return Pattern is 1662 begin 1663 return (AFC with 0, new PE'(PC_Any_VP, 1, EOP, VString_Ptr (Str))); 1664 end Any; 1665 1666 function Any (Str : VString_Func) return Pattern is 1667 begin 1668 return (AFC with 0, new PE'(PC_Any_VF, 1, EOP, Str)); 1669 end Any; 1670 1671 --------- 1672 -- Arb -- 1673 --------- 1674 1675 -- +---+ 1676 -- | X |----> 1677 -- +---+ 1678 -- . 1679 -- . 1680 -- +---+ 1681 -- | Y |----> 1682 -- +---+ 1683 1684 -- The PC_Arb_X element is numbered 2, and the PC_Arb_Y element is 1 1685 1686 function Arb return Pattern is 1687 Y : constant PE_Ptr := new PE'(PC_Arb_Y, 1, EOP); 1688 X : constant PE_Ptr := new PE'(PC_Arb_X, 2, EOP, Y); 1689 begin 1690 return (AFC with 1, X); 1691 end Arb; 1692 1693 ----------- 1694 -- Arbno -- 1695 ----------- 1696 1697 function Arbno (P : PString) return Pattern is 1698 begin 1699 if P'Length = 0 then 1700 return (AFC with 0, EOP); 1701 else 1702 return (AFC with 0, Arbno_Simple (S_To_PE (P))); 1703 end if; 1704 end Arbno; 1705 1706 function Arbno (P : PChar) return Pattern is 1707 begin 1708 return (AFC with 0, Arbno_Simple (C_To_PE (P))); 1709 end Arbno; 1710 1711 function Arbno (P : Pattern) return Pattern is 1712 Pat : constant PE_Ptr := Copy (P.P); 1713 1714 begin 1715 if P.Stk = 0 1716 and then OK_For_Simple_Arbno (Pat.Pcode) 1717 then 1718 return (AFC with 0, Arbno_Simple (Pat)); 1719 end if; 1720 1721 -- This is the complex case, either the pattern makes stack entries 1722 -- or it is possible for the pattern to match the null string (more 1723 -- accurately, we don't know that this is not the case). 1724 1725 -- +--------------------------+ 1726 -- | ^ 1727 -- V | 1728 -- +---+ | 1729 -- | X |----> | 1730 -- +---+ | 1731 -- . | 1732 -- . | 1733 -- +---+ +---+ +---+ | 1734 -- | E |---->| P |---->| Y |--->+ 1735 -- +---+ +---+ +---+ 1736 1737 -- The node numbering of the constituent pattern P is not affected. 1738 -- Where N is the number of nodes in P, the Y node is numbered N + 1, 1739 -- the E node is N + 2, and the X node is N + 3. 1740 1741 declare 1742 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); 1743 X : constant PE_Ptr := new PE'(PC_Arbno_X, 0, EOP, E); 1744 Y : constant PE_Ptr := new PE'(PC_Arbno_Y, 0, X, P.Stk + 3); 1745 EPY : constant PE_Ptr := Bracket (E, Pat, Y); 1746 begin 1747 X.Alt := EPY; 1748 X.Index := EPY.Index + 1; 1749 return (AFC with P.Stk + 3, X); 1750 end; 1751 end Arbno; 1752 1753 ------------------ 1754 -- Arbno_Simple -- 1755 ------------------ 1756 1757 -- +-------------+ 1758 -- | ^ 1759 -- V | 1760 -- +---+ | 1761 -- | S |----> | 1762 -- +---+ | 1763 -- . | 1764 -- . | 1765 -- +---+ | 1766 -- | P |---------->+ 1767 -- +---+ 1768 1769 -- The node numbering of the constituent pattern P is not affected. 1770 -- The S node has a node number of P.Index + 1. 1771 1772 -- Note that we know that P cannot be EOP, because a null pattern 1773 -- does not meet the requirements for simple Arbno. 1774 1775 function Arbno_Simple (P : PE_Ptr) return PE_Ptr is 1776 S : constant PE_Ptr := new PE'(PC_Arbno_S, P.Index + 1, EOP, P); 1777 begin 1778 Set_Successor (P, S); 1779 return S; 1780 end Arbno_Simple; 1781 1782 --------- 1783 -- Bal -- 1784 --------- 1785 1786 function Bal return Pattern is 1787 begin 1788 return (AFC with 1, new PE'(PC_Bal, 1, EOP)); 1789 end Bal; 1790 1791 ------------- 1792 -- Bracket -- 1793 ------------- 1794 1795 function Bracket (E, P, A : PE_Ptr) return PE_Ptr is 1796 begin 1797 if P = EOP then 1798 E.Pthen := A; 1799 E.Index := 2; 1800 A.Index := 1; 1801 1802 else 1803 E.Pthen := P; 1804 Set_Successor (P, A); 1805 E.Index := P.Index + 2; 1806 A.Index := P.Index + 1; 1807 end if; 1808 1809 return E; 1810 end Bracket; 1811 1812 ----------- 1813 -- Break -- 1814 ----------- 1815 1816 function Break (Str : String) return Pattern is 1817 begin 1818 return (AFC with 0, new PE'(PC_Break_CS, 1, EOP, To_Set (Str))); 1819 end Break; 1820 1821 function Break (Str : VString) return Pattern is 1822 begin 1823 return Break (S (Str)); 1824 end Break; 1825 1826 function Break (Str : Character) return Pattern is 1827 begin 1828 return (AFC with 0, new PE'(PC_Break_CH, 1, EOP, Str)); 1829 end Break; 1830 1831 function Break (Str : Character_Set) return Pattern is 1832 begin 1833 return (AFC with 0, new PE'(PC_Break_CS, 1, EOP, Str)); 1834 end Break; 1835 1836 function Break (Str : not null access VString) return Pattern is 1837 begin 1838 return (AFC with 0, 1839 new PE'(PC_Break_VP, 1, EOP, Str.all'Unchecked_Access)); 1840 end Break; 1841 1842 function Break (Str : VString_Func) return Pattern is 1843 begin 1844 return (AFC with 0, new PE'(PC_Break_VF, 1, EOP, Str)); 1845 end Break; 1846 1847 ------------ 1848 -- BreakX -- 1849 ------------ 1850 1851 function BreakX (Str : String) return Pattern is 1852 begin 1853 return BreakX_Make (new PE'(PC_BreakX_CS, 3, N, To_Set (Str))); 1854 end BreakX; 1855 1856 function BreakX (Str : VString) return Pattern is 1857 begin 1858 return BreakX (S (Str)); 1859 end BreakX; 1860 1861 function BreakX (Str : Character) return Pattern is 1862 begin 1863 return BreakX_Make (new PE'(PC_BreakX_CH, 3, N, Str)); 1864 end BreakX; 1865 1866 function BreakX (Str : Character_Set) return Pattern is 1867 begin 1868 return BreakX_Make (new PE'(PC_BreakX_CS, 3, N, Str)); 1869 end BreakX; 1870 1871 function BreakX (Str : not null access VString) return Pattern is 1872 begin 1873 return BreakX_Make (new PE'(PC_BreakX_VP, 3, N, VString_Ptr (Str))); 1874 end BreakX; 1875 1876 function BreakX (Str : VString_Func) return Pattern is 1877 begin 1878 return BreakX_Make (new PE'(PC_BreakX_VF, 3, N, Str)); 1879 end BreakX; 1880 1881 ----------------- 1882 -- BreakX_Make -- 1883 ----------------- 1884 1885 -- +---+ +---+ 1886 -- | B |---->| A |----> 1887 -- +---+ +---+ 1888 -- ^ . 1889 -- | . 1890 -- | +---+ 1891 -- +<------| X | 1892 -- +---+ 1893 1894 -- The B node is numbered 3, the alternative node is 1, and the X 1895 -- node is 2. 1896 1897 function BreakX_Make (B : PE_Ptr) return Pattern is 1898 X : constant PE_Ptr := new PE'(PC_BreakX_X, 2, B); 1899 A : constant PE_Ptr := new PE'(PC_Alt, 1, EOP, X); 1900 begin 1901 B.Pthen := A; 1902 return (AFC with 2, B); 1903 end BreakX_Make; 1904 1905 --------------------- 1906 -- Build_Ref_Array -- 1907 --------------------- 1908 1909 procedure Build_Ref_Array (E : PE_Ptr; RA : out Ref_Array) is 1910 1911 procedure Record_PE (E : PE_Ptr); 1912 -- Record given pattern element if not already recorded in RA, 1913 -- and also record any referenced pattern elements recursively. 1914 1915 --------------- 1916 -- Record_PE -- 1917 --------------- 1918 1919 procedure Record_PE (E : PE_Ptr) is 1920 begin 1921 PutD (" Record_PE called with PE_Ptr = " & Image (E)); 1922 1923 if E = EOP or else RA (E.Index) /= null then 1924 Put_LineD (", nothing to do"); 1925 return; 1926 1927 else 1928 Put_LineD (", recording" & IndexT'Image (E.Index)); 1929 RA (E.Index) := E; 1930 Record_PE (E.Pthen); 1931 1932 if E.Pcode in PC_Has_Alt then 1933 Record_PE (E.Alt); 1934 end if; 1935 end if; 1936 end Record_PE; 1937 1938 -- Start of processing for Build_Ref_Array 1939 1940 begin 1941 New_LineD; 1942 Put_LineD ("Entering Build_Ref_Array"); 1943 Record_PE (E); 1944 New_LineD; 1945 end Build_Ref_Array; 1946 1947 ------------- 1948 -- C_To_PE -- 1949 ------------- 1950 1951 function C_To_PE (C : PChar) return PE_Ptr is 1952 begin 1953 return new PE'(PC_Char, 1, EOP, C); 1954 end C_To_PE; 1955 1956 ------------ 1957 -- Cancel -- 1958 ------------ 1959 1960 function Cancel return Pattern is 1961 begin 1962 return (AFC with 0, new PE'(PC_Cancel, 1, EOP)); 1963 end Cancel; 1964 1965 ------------ 1966 -- Concat -- 1967 ------------ 1968 1969 -- Concat needs to traverse the left operand performing the following 1970 -- set of fixups: 1971 1972 -- a) Any successor pointers (Pthen fields) that are set to EOP are 1973 -- reset to point to the second operand. 1974 1975 -- b) Any PC_Arbno_Y node has its stack count field incremented 1976 -- by the parameter Incr provided for this purpose. 1977 1978 -- d) Num fields of all pattern elements in the left operand are 1979 -- adjusted to include the elements of the right operand. 1980 1981 -- Note: we do not use Set_Successor in the processing for Concat, since 1982 -- there is no point in doing two traversals, we may as well do everything 1983 -- at the same time. 1984 1985 function Concat (L, R : PE_Ptr; Incr : Natural) return PE_Ptr is 1986 begin 1987 if L = EOP then 1988 return R; 1989 1990 elsif R = EOP then 1991 return L; 1992 1993 else 1994 declare 1995 Refs : Ref_Array (1 .. L.Index); 1996 -- We build a reference array for L whose N'th element points to 1997 -- the pattern element of L whose original Index value is N. 1998 1999 P : PE_Ptr; 2000 2001 begin 2002 Build_Ref_Array (L, Refs); 2003 2004 for J in Refs'Range loop 2005 P := Refs (J); 2006 2007 P.Index := P.Index + R.Index; 2008 2009 if P.Pcode = PC_Arbno_Y then 2010 P.Nat := P.Nat + Incr; 2011 end if; 2012 2013 if P.Pthen = EOP then 2014 P.Pthen := R; 2015 end if; 2016 2017 if P.Pcode in PC_Has_Alt and then P.Alt = EOP then 2018 P.Alt := R; 2019 end if; 2020 end loop; 2021 end; 2022 2023 return L; 2024 end if; 2025 end Concat; 2026 2027 ---------- 2028 -- Copy -- 2029 ---------- 2030 2031 function Copy (P : PE_Ptr) return PE_Ptr is 2032 begin 2033 if P = null then 2034 Uninitialized_Pattern; 2035 2036 else 2037 declare 2038 Refs : Ref_Array (1 .. P.Index); 2039 -- References to elements in P, indexed by Index field 2040 2041 Copy : Ref_Array (1 .. P.Index); 2042 -- Holds copies of elements of P, indexed by Index field 2043 2044 E : PE_Ptr; 2045 2046 begin 2047 Build_Ref_Array (P, Refs); 2048 2049 -- Now copy all nodes 2050 2051 for J in Refs'Range loop 2052 Copy (J) := new PE'(Refs (J).all); 2053 end loop; 2054 2055 -- Adjust all internal references 2056 2057 for J in Copy'Range loop 2058 E := Copy (J); 2059 2060 -- Adjust successor pointer to point to copy 2061 2062 if E.Pthen /= EOP then 2063 E.Pthen := Copy (E.Pthen.Index); 2064 end if; 2065 2066 -- Adjust Alt pointer if there is one to point to copy 2067 2068 if E.Pcode in PC_Has_Alt and then E.Alt /= EOP then 2069 E.Alt := Copy (E.Alt.Index); 2070 end if; 2071 2072 -- Copy referenced string 2073 2074 if E.Pcode = PC_String then 2075 E.Str := new String'(E.Str.all); 2076 end if; 2077 end loop; 2078 2079 return Copy (P.Index); 2080 end; 2081 end if; 2082 end Copy; 2083 2084 ---------- 2085 -- Dump -- 2086 ---------- 2087 2088 procedure Dump (P : Pattern) is 2089 procedure Write_Node_Id (E : PE_Ptr; Cols : Natural); 2090 -- Writes out a string identifying the given pattern element. Cols is 2091 -- the column indentation level. 2092 2093 ------------------- 2094 -- Write_Node_Id -- 2095 ------------------- 2096 2097 procedure Write_Node_Id (E : PE_Ptr; Cols : Natural) is 2098 begin 2099 if E = EOP then 2100 Put ("EOP"); 2101 2102 for J in 4 .. Cols loop 2103 Put (' '); 2104 end loop; 2105 2106 else 2107 declare 2108 Str : String (1 .. Cols); 2109 N : Natural := Natural (E.Index); 2110 2111 begin 2112 Put ("#"); 2113 2114 for J in reverse Str'Range loop 2115 Str (J) := Character'Val (48 + N mod 10); 2116 N := N / 10; 2117 end loop; 2118 2119 Put (Str); 2120 end; 2121 end if; 2122 end Write_Node_Id; 2123 2124 -- Local variables 2125 2126 Cols : Natural := 2; 2127 -- Number of columns used for pattern numbers, minimum is 2 2128 2129 E : PE_Ptr; 2130 2131 subtype Count is Ada.Text_IO.Count; 2132 Scol : Count; 2133 -- Used to keep track of column in dump output 2134 2135 -- Start of processing for Dump 2136 2137 begin 2138 New_Line; 2139 Put 2140 ("Pattern Dump Output (pattern at " 2141 & Image (P'Address) 2142 & ", S = " 2143 & Natural'Image (P.Stk) & ')'); 2144 New_Line; 2145 2146 Scol := Col; 2147 2148 while Col < Scol loop 2149 Put ('-'); 2150 end loop; 2151 2152 New_Line; 2153 2154 -- If uninitialized pattern, dump line and we are done 2155 2156 if P.P = null then 2157 Put_Line ("Uninitialized pattern value"); 2158 return; 2159 end if; 2160 2161 -- If null pattern, just dump it and we are all done 2162 2163 if P.P = EOP then 2164 Put_Line ("EOP (null pattern)"); 2165 return; 2166 end if; 2167 2168 declare 2169 Refs : Ref_Array (1 .. P.P.Index); 2170 -- We build a reference array whose N'th element points to the 2171 -- pattern element whose Index value is N. 2172 2173 begin 2174 Build_Ref_Array (P.P, Refs); 2175 2176 -- Set number of columns required for node numbers 2177 2178 while 10 ** Cols - 1 < Integer (P.P.Index) loop 2179 Cols := Cols + 1; 2180 end loop; 2181 2182 -- Now dump the nodes in reverse sequence. We output them in reverse 2183 -- sequence since this corresponds to the natural order used to 2184 -- construct the patterns. 2185 2186 for J in reverse Refs'Range loop 2187 E := Refs (J); 2188 Write_Node_Id (E, Cols); 2189 Set_Col (Count (Cols) + 4); 2190 Put (Image (E)); 2191 Put (" "); 2192 Put (Pattern_Code'Image (E.Pcode)); 2193 Put (" "); 2194 Set_Col (21 + Count (Cols) + Address_Image_Length); 2195 Write_Node_Id (E.Pthen, Cols); 2196 Set_Col (24 + 2 * Count (Cols) + Address_Image_Length); 2197 2198 case E.Pcode is 2199 when PC_Alt 2200 | PC_Arb_X 2201 | PC_Arbno_S 2202 | PC_Arbno_X 2203 => 2204 Write_Node_Id (E.Alt, Cols); 2205 2206 when PC_Rpat => 2207 Put (Str_PP (E.PP)); 2208 2209 when PC_Pred_Func => 2210 Put (Str_BF (E.BF)); 2211 2212 when PC_Assign_Imm 2213 | PC_Assign_OnM 2214 | PC_Any_VP 2215 | PC_Break_VP 2216 | PC_BreakX_VP 2217 | PC_NotAny_VP 2218 | PC_NSpan_VP 2219 | PC_Span_VP 2220 | PC_String_VP 2221 => 2222 Put (Str_VP (E.VP)); 2223 2224 when PC_Write_Imm 2225 | PC_Write_OnM 2226 => 2227 Put (Str_FP (E.FP)); 2228 2229 when PC_String => 2230 Put (Image (E.Str.all)); 2231 2232 when PC_String_2 => 2233 Put (Image (E.Str2)); 2234 2235 when PC_String_3 => 2236 Put (Image (E.Str3)); 2237 2238 when PC_String_4 => 2239 Put (Image (E.Str4)); 2240 2241 when PC_String_5 => 2242 Put (Image (E.Str5)); 2243 2244 when PC_String_6 => 2245 Put (Image (E.Str6)); 2246 2247 when PC_Setcur => 2248 Put (Str_NP (E.Var)); 2249 2250 when PC_Any_CH 2251 | PC_Break_CH 2252 | PC_BreakX_CH 2253 | PC_Char 2254 | PC_NotAny_CH 2255 | PC_NSpan_CH 2256 | PC_Span_CH 2257 => 2258 Put (''' & E.Char & '''); 2259 2260 when PC_Any_CS 2261 | PC_Break_CS 2262 | PC_BreakX_CS 2263 | PC_NotAny_CS 2264 | PC_NSpan_CS 2265 | PC_Span_CS 2266 => 2267 Put ('"' & To_Sequence (E.CS) & '"'); 2268 2269 when PC_Arbno_Y 2270 | PC_Len_Nat 2271 | PC_Pos_Nat 2272 | PC_RPos_Nat 2273 | PC_RTab_Nat 2274 | PC_Tab_Nat 2275 => 2276 Put (S (E.Nat)); 2277 2278 when PC_Pos_NF 2279 | PC_Len_NF 2280 | PC_RPos_NF 2281 | PC_RTab_NF 2282 | PC_Tab_NF 2283 => 2284 Put (Str_NF (E.NF)); 2285 2286 when PC_Pos_NP 2287 | PC_Len_NP 2288 | PC_RPos_NP 2289 | PC_RTab_NP 2290 | PC_Tab_NP 2291 => 2292 Put (Str_NP (E.NP)); 2293 2294 when PC_Any_VF 2295 | PC_Break_VF 2296 | PC_BreakX_VF 2297 | PC_NotAny_VF 2298 | PC_NSpan_VF 2299 | PC_Span_VF 2300 | PC_String_VF 2301 => 2302 Put (Str_VF (E.VF)); 2303 2304 when others => 2305 null; 2306 end case; 2307 2308 New_Line; 2309 end loop; 2310 2311 New_Line; 2312 end; 2313 end Dump; 2314 2315 ---------- 2316 -- Fail -- 2317 ---------- 2318 2319 function Fail return Pattern is 2320 begin 2321 return (AFC with 0, new PE'(PC_Fail, 1, EOP)); 2322 end Fail; 2323 2324 ----------- 2325 -- Fence -- 2326 ----------- 2327 2328 -- Simple case 2329 2330 function Fence return Pattern is 2331 begin 2332 return (AFC with 1, new PE'(PC_Fence, 1, EOP)); 2333 end Fence; 2334 2335 -- Function case 2336 2337 -- +---+ +---+ +---+ 2338 -- | E |---->| P |---->| X |----> 2339 -- +---+ +---+ +---+ 2340 2341 -- The node numbering of the constituent pattern P is not affected. 2342 -- Where N is the number of nodes in P, the X node is numbered N + 1, 2343 -- and the E node is N + 2. 2344 2345 function Fence (P : Pattern) return Pattern is 2346 Pat : constant PE_Ptr := Copy (P.P); 2347 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); 2348 X : constant PE_Ptr := new PE'(PC_Fence_X, 0, EOP); 2349 begin 2350 return (AFC with P.Stk + 1, Bracket (E, Pat, X)); 2351 end Fence; 2352 2353 -------------- 2354 -- Finalize -- 2355 -------------- 2356 2357 procedure Finalize (Object : in out Pattern) is 2358 2359 procedure Free is new Ada.Unchecked_Deallocation (PE, PE_Ptr); 2360 procedure Free is new Ada.Unchecked_Deallocation (String, String_Ptr); 2361 2362 begin 2363 -- Nothing to do if already freed 2364 2365 if Object.P = null then 2366 return; 2367 2368 -- Otherwise we must free all elements 2369 2370 else 2371 declare 2372 Refs : Ref_Array (1 .. Object.P.Index); 2373 -- References to elements in pattern to be finalized 2374 2375 begin 2376 Build_Ref_Array (Object.P, Refs); 2377 2378 for J in Refs'Range loop 2379 if Refs (J).Pcode = PC_String then 2380 Free (Refs (J).Str); 2381 end if; 2382 2383 Free (Refs (J)); 2384 end loop; 2385 2386 Object.P := null; 2387 end; 2388 end if; 2389 end Finalize; 2390 2391 ----------- 2392 -- Image -- 2393 ----------- 2394 2395 function Image (P : PE_Ptr) return String is 2396 begin 2397 return Image (To_Address (P)); 2398 end Image; 2399 2400 function Image (P : Pattern) return String is 2401 begin 2402 return S (Image (P)); 2403 end Image; 2404 2405 function Image (P : Pattern) return VString is 2406 2407 Kill_Ampersand : Boolean := False; 2408 -- Set True to delete next & to be output to Result 2409 2410 Result : VString := Nul; 2411 -- The result is accumulated here, using Append 2412 2413 Refs : Ref_Array (1 .. P.P.Index); 2414 -- We build a reference array whose N'th element points to the 2415 -- pattern element whose Index value is N. 2416 2417 procedure Delete_Ampersand; 2418 -- Deletes the ampersand at the end of Result 2419 2420 procedure Image_Seq (E : PE_Ptr; Succ : PE_Ptr; Paren : Boolean); 2421 -- E refers to a pattern structure whose successor is given by Succ. 2422 -- This procedure appends to Result a representation of this pattern. 2423 -- The Paren parameter indicates whether parentheses are required if 2424 -- the output is more than one element. 2425 2426 procedure Image_One (E : in out PE_Ptr); 2427 -- E refers to a pattern structure. This procedure appends to Result 2428 -- a representation of the single simple or compound pattern structure 2429 -- at the start of E and updates E to point to its successor. 2430 2431 ---------------------- 2432 -- Delete_Ampersand -- 2433 ---------------------- 2434 2435 procedure Delete_Ampersand is 2436 L : constant Natural := Length (Result); 2437 begin 2438 if L > 2 then 2439 Delete (Result, L - 1, L); 2440 end if; 2441 end Delete_Ampersand; 2442 2443 --------------- 2444 -- Image_One -- 2445 --------------- 2446 2447 procedure Image_One (E : in out PE_Ptr) is 2448 2449 ER : PE_Ptr := E.Pthen; 2450 -- Successor set as result in E unless reset 2451 2452 begin 2453 case E.Pcode is 2454 when PC_Cancel => 2455 Append (Result, "Cancel"); 2456 2457 when PC_Alt => Alt : declare 2458 2459 Elmts_In_L : constant IndexT := E.Pthen.Index - E.Alt.Index; 2460 -- Number of elements in left pattern of alternation 2461 2462 Lowest_In_L : constant IndexT := E.Index - Elmts_In_L; 2463 -- Number of lowest index in elements of left pattern 2464 2465 E1 : PE_Ptr; 2466 2467 begin 2468 -- The successor of the alternation node must have a lower 2469 -- index than any node that is in the left pattern or a 2470 -- higher index than the alternation node itself. 2471 2472 while ER /= EOP 2473 and then ER.Index >= Lowest_In_L 2474 and then ER.Index < E.Index 2475 loop 2476 ER := ER.Pthen; 2477 end loop; 2478 2479 Append (Result, '('); 2480 2481 E1 := E; 2482 loop 2483 Image_Seq (E1.Pthen, ER, False); 2484 Append (Result, " or "); 2485 E1 := E1.Alt; 2486 exit when E1.Pcode /= PC_Alt; 2487 end loop; 2488 2489 Image_Seq (E1, ER, False); 2490 Append (Result, ')'); 2491 end Alt; 2492 2493 when PC_Any_CS => 2494 Append (Result, "Any (" & Image (To_Sequence (E.CS)) & ')'); 2495 2496 when PC_Any_VF => 2497 Append (Result, "Any (" & Str_VF (E.VF) & ')'); 2498 2499 when PC_Any_VP => 2500 Append (Result, "Any (" & Str_VP (E.VP) & ')'); 2501 2502 when PC_Arb_X => 2503 Append (Result, "Arb"); 2504 2505 when PC_Arbno_S => 2506 Append (Result, "Arbno ("); 2507 Image_Seq (E.Alt, E, False); 2508 Append (Result, ')'); 2509 2510 when PC_Arbno_X => 2511 Append (Result, "Arbno ("); 2512 Image_Seq (E.Alt.Pthen, Refs (E.Index - 2), False); 2513 Append (Result, ')'); 2514 2515 when PC_Assign_Imm => 2516 Delete_Ampersand; 2517 Append (Result, "* " & Str_VP (Refs (E.Index).VP)); 2518 2519 when PC_Assign_OnM => 2520 Delete_Ampersand; 2521 Append (Result, "** " & Str_VP (Refs (E.Index).VP)); 2522 2523 when PC_Any_CH => 2524 Append (Result, "Any ('" & E.Char & "')"); 2525 2526 when PC_Bal => 2527 Append (Result, "Bal"); 2528 2529 when PC_Break_CH => 2530 Append (Result, "Break ('" & E.Char & "')"); 2531 2532 when PC_Break_CS => 2533 Append (Result, "Break (" & Image (To_Sequence (E.CS)) & ')'); 2534 2535 when PC_Break_VF => 2536 Append (Result, "Break (" & Str_VF (E.VF) & ')'); 2537 2538 when PC_Break_VP => 2539 Append (Result, "Break (" & Str_VP (E.VP) & ')'); 2540 2541 when PC_BreakX_CH => 2542 Append (Result, "BreakX ('" & E.Char & "')"); 2543 ER := ER.Pthen; 2544 2545 when PC_BreakX_CS => 2546 Append (Result, "BreakX (" & Image (To_Sequence (E.CS)) & ')'); 2547 ER := ER.Pthen; 2548 2549 when PC_BreakX_VF => 2550 Append (Result, "BreakX (" & Str_VF (E.VF) & ')'); 2551 ER := ER.Pthen; 2552 2553 when PC_BreakX_VP => 2554 Append (Result, "BreakX (" & Str_VP (E.VP) & ')'); 2555 ER := ER.Pthen; 2556 2557 when PC_Char => 2558 Append (Result, ''' & E.Char & '''); 2559 2560 when PC_Fail => 2561 Append (Result, "Fail"); 2562 2563 when PC_Fence => 2564 Append (Result, "Fence"); 2565 2566 when PC_Fence_X => 2567 Append (Result, "Fence ("); 2568 Image_Seq (E.Pthen, Refs (E.Index - 1), False); 2569 Append (Result, ")"); 2570 ER := Refs (E.Index - 1).Pthen; 2571 2572 when PC_Len_Nat => 2573 Append (Result, "Len (" & E.Nat & ')'); 2574 2575 when PC_Len_NF => 2576 Append (Result, "Len (" & Str_NF (E.NF) & ')'); 2577 2578 when PC_Len_NP => 2579 Append (Result, "Len (" & Str_NP (E.NP) & ')'); 2580 2581 when PC_NotAny_CH => 2582 Append (Result, "NotAny ('" & E.Char & "')"); 2583 2584 when PC_NotAny_CS => 2585 Append (Result, "NotAny (" & Image (To_Sequence (E.CS)) & ')'); 2586 2587 when PC_NotAny_VF => 2588 Append (Result, "NotAny (" & Str_VF (E.VF) & ')'); 2589 2590 when PC_NotAny_VP => 2591 Append (Result, "NotAny (" & Str_VP (E.VP) & ')'); 2592 2593 when PC_NSpan_CH => 2594 Append (Result, "NSpan ('" & E.Char & "')"); 2595 2596 when PC_NSpan_CS => 2597 Append (Result, "NSpan (" & Image (To_Sequence (E.CS)) & ')'); 2598 2599 when PC_NSpan_VF => 2600 Append (Result, "NSpan (" & Str_VF (E.VF) & ')'); 2601 2602 when PC_NSpan_VP => 2603 Append (Result, "NSpan (" & Str_VP (E.VP) & ')'); 2604 2605 when PC_Null => 2606 Append (Result, """"""); 2607 2608 when PC_Pos_Nat => 2609 Append (Result, "Pos (" & E.Nat & ')'); 2610 2611 when PC_Pos_NF => 2612 Append (Result, "Pos (" & Str_NF (E.NF) & ')'); 2613 2614 when PC_Pos_NP => 2615 Append (Result, "Pos (" & Str_NP (E.NP) & ')'); 2616 2617 when PC_R_Enter => 2618 Kill_Ampersand := True; 2619 2620 when PC_Rest => 2621 Append (Result, "Rest"); 2622 2623 when PC_Rpat => 2624 Append (Result, "(+ " & Str_PP (E.PP) & ')'); 2625 2626 when PC_Pred_Func => 2627 Append (Result, "(+ " & Str_BF (E.BF) & ')'); 2628 2629 when PC_RPos_Nat => 2630 Append (Result, "RPos (" & E.Nat & ')'); 2631 2632 when PC_RPos_NF => 2633 Append (Result, "RPos (" & Str_NF (E.NF) & ')'); 2634 2635 when PC_RPos_NP => 2636 Append (Result, "RPos (" & Str_NP (E.NP) & ')'); 2637 2638 when PC_RTab_Nat => 2639 Append (Result, "RTab (" & E.Nat & ')'); 2640 2641 when PC_RTab_NF => 2642 Append (Result, "RTab (" & Str_NF (E.NF) & ')'); 2643 2644 when PC_RTab_NP => 2645 Append (Result, "RTab (" & Str_NP (E.NP) & ')'); 2646 2647 when PC_Setcur => 2648 Append (Result, "Setcur (" & Str_NP (E.Var) & ')'); 2649 2650 when PC_Span_CH => 2651 Append (Result, "Span ('" & E.Char & "')"); 2652 2653 when PC_Span_CS => 2654 Append (Result, "Span (" & Image (To_Sequence (E.CS)) & ')'); 2655 2656 when PC_Span_VF => 2657 Append (Result, "Span (" & Str_VF (E.VF) & ')'); 2658 2659 when PC_Span_VP => 2660 Append (Result, "Span (" & Str_VP (E.VP) & ')'); 2661 2662 when PC_String => 2663 Append (Result, Image (E.Str.all)); 2664 2665 when PC_String_2 => 2666 Append (Result, Image (E.Str2)); 2667 2668 when PC_String_3 => 2669 Append (Result, Image (E.Str3)); 2670 2671 when PC_String_4 => 2672 Append (Result, Image (E.Str4)); 2673 2674 when PC_String_5 => 2675 Append (Result, Image (E.Str5)); 2676 2677 when PC_String_6 => 2678 Append (Result, Image (E.Str6)); 2679 2680 when PC_String_VF => 2681 Append (Result, "(+" & Str_VF (E.VF) & ')'); 2682 2683 when PC_String_VP => 2684 Append (Result, "(+" & Str_VP (E.VP) & ')'); 2685 2686 when PC_Succeed => 2687 Append (Result, "Succeed"); 2688 2689 when PC_Tab_Nat => 2690 Append (Result, "Tab (" & E.Nat & ')'); 2691 2692 when PC_Tab_NF => 2693 Append (Result, "Tab (" & Str_NF (E.NF) & ')'); 2694 2695 when PC_Tab_NP => 2696 Append (Result, "Tab (" & Str_NP (E.NP) & ')'); 2697 2698 when PC_Write_Imm => 2699 Append (Result, '('); 2700 Image_Seq (E, Refs (E.Index - 1), True); 2701 Append (Result, " * " & Str_FP (Refs (E.Index - 1).FP)); 2702 ER := Refs (E.Index - 1).Pthen; 2703 2704 when PC_Write_OnM => 2705 Append (Result, '('); 2706 Image_Seq (E.Pthen, Refs (E.Index - 1), True); 2707 Append (Result, " ** " & Str_FP (Refs (E.Index - 1).FP)); 2708 ER := Refs (E.Index - 1).Pthen; 2709 2710 -- Other pattern codes should not appear as leading elements 2711 2712 when PC_Arb_Y 2713 | PC_Arbno_Y 2714 | PC_Assign 2715 | PC_BreakX_X 2716 | PC_EOP 2717 | PC_Fence_Y 2718 | PC_R_Remove 2719 | PC_R_Restore 2720 | PC_Unanchored 2721 => 2722 Append (Result, "???"); 2723 end case; 2724 2725 E := ER; 2726 end Image_One; 2727 2728 --------------- 2729 -- Image_Seq -- 2730 --------------- 2731 2732 procedure Image_Seq (E : PE_Ptr; Succ : PE_Ptr; Paren : Boolean) is 2733 Indx : constant Natural := Length (Result); 2734 E1 : PE_Ptr := E; 2735 Mult : Boolean := False; 2736 2737 begin 2738 -- The image of EOP is "" (the null string) 2739 2740 if E = EOP then 2741 Append (Result, """"""); 2742 2743 -- Else generate appropriate concatenation sequence 2744 2745 else 2746 loop 2747 Image_One (E1); 2748 exit when E1 = Succ; 2749 exit when E1 = EOP; 2750 Mult := True; 2751 2752 if Kill_Ampersand then 2753 Kill_Ampersand := False; 2754 else 2755 Append (Result, " & "); 2756 end if; 2757 end loop; 2758 end if; 2759 2760 if Mult and Paren then 2761 Insert (Result, Indx + 1, "("); 2762 Append (Result, ")"); 2763 end if; 2764 end Image_Seq; 2765 2766 -- Start of processing for Image 2767 2768 begin 2769 Build_Ref_Array (P.P, Refs); 2770 Image_Seq (P.P, EOP, False); 2771 return Result; 2772 end Image; 2773 2774 ----------- 2775 -- Is_In -- 2776 ----------- 2777 2778 function Is_In (C : Character; Str : String) return Boolean is 2779 begin 2780 for J in Str'Range loop 2781 if Str (J) = C then 2782 return True; 2783 end if; 2784 end loop; 2785 2786 return False; 2787 end Is_In; 2788 2789 --------- 2790 -- Len -- 2791 --------- 2792 2793 function Len (Count : Natural) return Pattern is 2794 begin 2795 -- Note, the following is not just an optimization, it is needed 2796 -- to ensure that Arbno (Len (0)) does not generate an infinite 2797 -- matching loop (since PC_Len_Nat is OK_For_Simple_Arbno). 2798 2799 if Count = 0 then 2800 return (AFC with 0, new PE'(PC_Null, 1, EOP)); 2801 2802 else 2803 return (AFC with 0, new PE'(PC_Len_Nat, 1, EOP, Count)); 2804 end if; 2805 end Len; 2806 2807 function Len (Count : Natural_Func) return Pattern is 2808 begin 2809 return (AFC with 0, new PE'(PC_Len_NF, 1, EOP, Count)); 2810 end Len; 2811 2812 function Len (Count : not null access Natural) return Pattern is 2813 begin 2814 return (AFC with 0, new PE'(PC_Len_NP, 1, EOP, Natural_Ptr (Count))); 2815 end Len; 2816 2817 ----------------- 2818 -- Logic_Error -- 2819 ----------------- 2820 2821 procedure Logic_Error is 2822 begin 2823 raise Program_Error with 2824 "Internal logic error in GNAT.Spitbol.Patterns"; 2825 end Logic_Error; 2826 2827 ----------- 2828 -- Match -- 2829 ----------- 2830 2831 function Match 2832 (Subject : VString; 2833 Pat : Pattern) return Boolean 2834 is 2835 S : Big_String_Access; 2836 L : Natural; 2837 Start : Natural; 2838 Stop : Natural; 2839 pragma Unreferenced (Stop); 2840 2841 begin 2842 Get_String (Subject, S, L); 2843 2844 if Debug_Mode then 2845 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 2846 else 2847 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 2848 end if; 2849 2850 return Start /= 0; 2851 end Match; 2852 2853 function Match 2854 (Subject : String; 2855 Pat : Pattern) return Boolean 2856 is 2857 Start, Stop : Natural; 2858 pragma Unreferenced (Stop); 2859 2860 subtype String1 is String (1 .. Subject'Length); 2861 2862 begin 2863 if Debug_Mode then 2864 XMatchD (String1 (Subject), Pat.P, Pat.Stk, Start, Stop); 2865 else 2866 XMatch (String1 (Subject), Pat.P, Pat.Stk, Start, Stop); 2867 end if; 2868 2869 return Start /= 0; 2870 end Match; 2871 2872 function Match 2873 (Subject : VString_Var; 2874 Pat : Pattern; 2875 Replace : VString) return Boolean 2876 is 2877 Start : Natural; 2878 Stop : Natural; 2879 S : Big_String_Access; 2880 L : Natural; 2881 2882 begin 2883 Get_String (Subject, S, L); 2884 2885 if Debug_Mode then 2886 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 2887 else 2888 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 2889 end if; 2890 2891 if Start = 0 then 2892 return False; 2893 else 2894 Get_String (Replace, S, L); 2895 Replace_Slice 2896 (Subject'Unrestricted_Access.all, Start, Stop, S (1 .. L)); 2897 return True; 2898 end if; 2899 end Match; 2900 2901 function Match 2902 (Subject : VString_Var; 2903 Pat : Pattern; 2904 Replace : String) return Boolean 2905 is 2906 Start : Natural; 2907 Stop : Natural; 2908 S : Big_String_Access; 2909 L : Natural; 2910 2911 begin 2912 Get_String (Subject, S, L); 2913 2914 if Debug_Mode then 2915 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 2916 else 2917 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 2918 end if; 2919 2920 if Start = 0 then 2921 return False; 2922 else 2923 Replace_Slice 2924 (Subject'Unrestricted_Access.all, Start, Stop, Replace); 2925 return True; 2926 end if; 2927 end Match; 2928 2929 procedure Match 2930 (Subject : VString; 2931 Pat : Pattern) 2932 is 2933 S : Big_String_Access; 2934 L : Natural; 2935 2936 Start : Natural; 2937 Stop : Natural; 2938 pragma Unreferenced (Start, Stop); 2939 2940 begin 2941 Get_String (Subject, S, L); 2942 2943 if Debug_Mode then 2944 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 2945 else 2946 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 2947 end if; 2948 end Match; 2949 2950 procedure Match 2951 (Subject : String; 2952 Pat : Pattern) 2953 is 2954 Start, Stop : Natural; 2955 pragma Unreferenced (Start, Stop); 2956 2957 subtype String1 is String (1 .. Subject'Length); 2958 2959 begin 2960 if Debug_Mode then 2961 XMatchD (String1 (Subject), Pat.P, Pat.Stk, Start, Stop); 2962 else 2963 XMatch (String1 (Subject), Pat.P, Pat.Stk, Start, Stop); 2964 end if; 2965 end Match; 2966 2967 procedure Match 2968 (Subject : in out VString; 2969 Pat : Pattern; 2970 Replace : VString) 2971 is 2972 Start : Natural; 2973 Stop : Natural; 2974 S : Big_String_Access; 2975 L : Natural; 2976 2977 begin 2978 Get_String (Subject, S, L); 2979 2980 if Debug_Mode then 2981 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 2982 else 2983 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 2984 end if; 2985 2986 if Start /= 0 then 2987 Get_String (Replace, S, L); 2988 Replace_Slice (Subject, Start, Stop, S (1 .. L)); 2989 end if; 2990 end Match; 2991 2992 procedure Match 2993 (Subject : in out VString; 2994 Pat : Pattern; 2995 Replace : String) 2996 is 2997 Start : Natural; 2998 Stop : Natural; 2999 S : Big_String_Access; 3000 L : Natural; 3001 3002 begin 3003 Get_String (Subject, S, L); 3004 3005 if Debug_Mode then 3006 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 3007 else 3008 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 3009 end if; 3010 3011 if Start /= 0 then 3012 Replace_Slice (Subject, Start, Stop, Replace); 3013 end if; 3014 end Match; 3015 3016 function Match 3017 (Subject : VString; 3018 Pat : PString) return Boolean 3019 is 3020 Pat_Len : constant Natural := Pat'Length; 3021 S : Big_String_Access; 3022 L : Natural; 3023 3024 begin 3025 Get_String (Subject, S, L); 3026 3027 if Anchored_Mode then 3028 if Pat_Len > L then 3029 return False; 3030 else 3031 return Pat = S (1 .. Pat_Len); 3032 end if; 3033 3034 else 3035 for J in 1 .. L - Pat_Len + 1 loop 3036 if Pat = S (J .. J + (Pat_Len - 1)) then 3037 return True; 3038 end if; 3039 end loop; 3040 3041 return False; 3042 end if; 3043 end Match; 3044 3045 function Match 3046 (Subject : String; 3047 Pat : PString) return Boolean 3048 is 3049 Pat_Len : constant Natural := Pat'Length; 3050 Sub_Len : constant Natural := Subject'Length; 3051 SFirst : constant Natural := Subject'First; 3052 3053 begin 3054 if Anchored_Mode then 3055 if Pat_Len > Sub_Len then 3056 return False; 3057 else 3058 return Pat = Subject (SFirst .. SFirst + Pat_Len - 1); 3059 end if; 3060 3061 else 3062 for J in SFirst .. SFirst + Sub_Len - Pat_Len loop 3063 if Pat = Subject (J .. J + (Pat_Len - 1)) then 3064 return True; 3065 end if; 3066 end loop; 3067 3068 return False; 3069 end if; 3070 end Match; 3071 3072 function Match 3073 (Subject : VString_Var; 3074 Pat : PString; 3075 Replace : VString) return Boolean 3076 is 3077 Start : Natural; 3078 Stop : Natural; 3079 S : Big_String_Access; 3080 L : Natural; 3081 3082 begin 3083 Get_String (Subject, S, L); 3084 3085 if Debug_Mode then 3086 XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); 3087 else 3088 XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); 3089 end if; 3090 3091 if Start = 0 then 3092 return False; 3093 else 3094 Get_String (Replace, S, L); 3095 Replace_Slice 3096 (Subject'Unrestricted_Access.all, Start, Stop, S (1 .. L)); 3097 return True; 3098 end if; 3099 end Match; 3100 3101 function Match 3102 (Subject : VString_Var; 3103 Pat : PString; 3104 Replace : String) return Boolean 3105 is 3106 Start : Natural; 3107 Stop : Natural; 3108 S : Big_String_Access; 3109 L : Natural; 3110 3111 begin 3112 Get_String (Subject, S, L); 3113 3114 if Debug_Mode then 3115 XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); 3116 else 3117 XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); 3118 end if; 3119 3120 if Start = 0 then 3121 return False; 3122 else 3123 Replace_Slice 3124 (Subject'Unrestricted_Access.all, Start, Stop, Replace); 3125 return True; 3126 end if; 3127 end Match; 3128 3129 procedure Match 3130 (Subject : VString; 3131 Pat : PString) 3132 is 3133 S : Big_String_Access; 3134 L : Natural; 3135 3136 Start : Natural; 3137 Stop : Natural; 3138 pragma Unreferenced (Start, Stop); 3139 3140 begin 3141 Get_String (Subject, S, L); 3142 3143 if Debug_Mode then 3144 XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); 3145 else 3146 XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); 3147 end if; 3148 end Match; 3149 3150 procedure Match 3151 (Subject : String; 3152 Pat : PString) 3153 is 3154 Start, Stop : Natural; 3155 pragma Unreferenced (Start, Stop); 3156 3157 subtype String1 is String (1 .. Subject'Length); 3158 3159 begin 3160 if Debug_Mode then 3161 XMatchD (String1 (Subject), S_To_PE (Pat), 0, Start, Stop); 3162 else 3163 XMatch (String1 (Subject), S_To_PE (Pat), 0, Start, Stop); 3164 end if; 3165 end Match; 3166 3167 procedure Match 3168 (Subject : in out VString; 3169 Pat : PString; 3170 Replace : VString) 3171 is 3172 Start : Natural; 3173 Stop : Natural; 3174 S : Big_String_Access; 3175 L : Natural; 3176 3177 begin 3178 Get_String (Subject, S, L); 3179 3180 if Debug_Mode then 3181 XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); 3182 else 3183 XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); 3184 end if; 3185 3186 if Start /= 0 then 3187 Get_String (Replace, S, L); 3188 Replace_Slice (Subject, Start, Stop, S (1 .. L)); 3189 end if; 3190 end Match; 3191 3192 procedure Match 3193 (Subject : in out VString; 3194 Pat : PString; 3195 Replace : String) 3196 is 3197 Start : Natural; 3198 Stop : Natural; 3199 S : Big_String_Access; 3200 L : Natural; 3201 3202 begin 3203 Get_String (Subject, S, L); 3204 3205 if Debug_Mode then 3206 XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); 3207 else 3208 XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); 3209 end if; 3210 3211 if Start /= 0 then 3212 Replace_Slice (Subject, Start, Stop, Replace); 3213 end if; 3214 end Match; 3215 3216 function Match 3217 (Subject : VString_Var; 3218 Pat : Pattern; 3219 Result : Match_Result_Var) return Boolean 3220 is 3221 Start : Natural; 3222 Stop : Natural; 3223 S : Big_String_Access; 3224 L : Natural; 3225 3226 begin 3227 Get_String (Subject, S, L); 3228 3229 if Debug_Mode then 3230 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 3231 else 3232 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 3233 end if; 3234 3235 if Start = 0 then 3236 Result'Unrestricted_Access.all.Var := null; 3237 return False; 3238 3239 else 3240 Result'Unrestricted_Access.all.Var := Subject'Unrestricted_Access; 3241 Result'Unrestricted_Access.all.Start := Start; 3242 Result'Unrestricted_Access.all.Stop := Stop; 3243 return True; 3244 end if; 3245 end Match; 3246 3247 procedure Match 3248 (Subject : in out VString; 3249 Pat : Pattern; 3250 Result : out Match_Result) 3251 is 3252 Start : Natural; 3253 Stop : Natural; 3254 S : Big_String_Access; 3255 L : Natural; 3256 3257 begin 3258 Get_String (Subject, S, L); 3259 3260 if Debug_Mode then 3261 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 3262 else 3263 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 3264 end if; 3265 3266 if Start = 0 then 3267 Result.Var := null; 3268 else 3269 Result.Var := Subject'Unrestricted_Access; 3270 Result.Start := Start; 3271 Result.Stop := Stop; 3272 end if; 3273 end Match; 3274 3275 --------------- 3276 -- New_LineD -- 3277 --------------- 3278 3279 procedure New_LineD is 3280 begin 3281 if Internal_Debug then 3282 New_Line; 3283 end if; 3284 end New_LineD; 3285 3286 ------------ 3287 -- NotAny -- 3288 ------------ 3289 3290 function NotAny (Str : String) return Pattern is 3291 begin 3292 return (AFC with 0, new PE'(PC_NotAny_CS, 1, EOP, To_Set (Str))); 3293 end NotAny; 3294 3295 function NotAny (Str : VString) return Pattern is 3296 begin 3297 return NotAny (S (Str)); 3298 end NotAny; 3299 3300 function NotAny (Str : Character) return Pattern is 3301 begin 3302 return (AFC with 0, new PE'(PC_NotAny_CH, 1, EOP, Str)); 3303 end NotAny; 3304 3305 function NotAny (Str : Character_Set) return Pattern is 3306 begin 3307 return (AFC with 0, new PE'(PC_NotAny_CS, 1, EOP, Str)); 3308 end NotAny; 3309 3310 function NotAny (Str : not null access VString) return Pattern is 3311 begin 3312 return (AFC with 0, new PE'(PC_NotAny_VP, 1, EOP, VString_Ptr (Str))); 3313 end NotAny; 3314 3315 function NotAny (Str : VString_Func) return Pattern is 3316 begin 3317 return (AFC with 0, new PE'(PC_NotAny_VF, 1, EOP, Str)); 3318 end NotAny; 3319 3320 ----------- 3321 -- NSpan -- 3322 ----------- 3323 3324 function NSpan (Str : String) return Pattern is 3325 begin 3326 return (AFC with 0, new PE'(PC_NSpan_CS, 1, EOP, To_Set (Str))); 3327 end NSpan; 3328 3329 function NSpan (Str : VString) return Pattern is 3330 begin 3331 return NSpan (S (Str)); 3332 end NSpan; 3333 3334 function NSpan (Str : Character) return Pattern is 3335 begin 3336 return (AFC with 0, new PE'(PC_NSpan_CH, 1, EOP, Str)); 3337 end NSpan; 3338 3339 function NSpan (Str : Character_Set) return Pattern is 3340 begin 3341 return (AFC with 0, new PE'(PC_NSpan_CS, 1, EOP, Str)); 3342 end NSpan; 3343 3344 function NSpan (Str : not null access VString) return Pattern is 3345 begin 3346 return (AFC with 0, new PE'(PC_NSpan_VP, 1, EOP, VString_Ptr (Str))); 3347 end NSpan; 3348 3349 function NSpan (Str : VString_Func) return Pattern is 3350 begin 3351 return (AFC with 0, new PE'(PC_NSpan_VF, 1, EOP, Str)); 3352 end NSpan; 3353 3354 --------- 3355 -- Pos -- 3356 --------- 3357 3358 function Pos (Count : Natural) return Pattern is 3359 begin 3360 return (AFC with 0, new PE'(PC_Pos_Nat, 1, EOP, Count)); 3361 end Pos; 3362 3363 function Pos (Count : Natural_Func) return Pattern is 3364 begin 3365 return (AFC with 0, new PE'(PC_Pos_NF, 1, EOP, Count)); 3366 end Pos; 3367 3368 function Pos (Count : not null access Natural) return Pattern is 3369 begin 3370 return (AFC with 0, new PE'(PC_Pos_NP, 1, EOP, Natural_Ptr (Count))); 3371 end Pos; 3372 3373 ---------- 3374 -- PutD -- 3375 ---------- 3376 3377 procedure PutD (Str : String) is 3378 begin 3379 if Internal_Debug then 3380 Put (Str); 3381 end if; 3382 end PutD; 3383 3384 --------------- 3385 -- Put_LineD -- 3386 --------------- 3387 3388 procedure Put_LineD (Str : String) is 3389 begin 3390 if Internal_Debug then 3391 Put_Line (Str); 3392 end if; 3393 end Put_LineD; 3394 3395 ------------- 3396 -- Replace -- 3397 ------------- 3398 3399 procedure Replace 3400 (Result : in out Match_Result; 3401 Replace : VString) 3402 is 3403 S : Big_String_Access; 3404 L : Natural; 3405 3406 begin 3407 Get_String (Replace, S, L); 3408 3409 if Result.Var /= null then 3410 Replace_Slice (Result.Var.all, Result.Start, Result.Stop, S (1 .. L)); 3411 Result.Var := null; 3412 end if; 3413 end Replace; 3414 3415 ---------- 3416 -- Rest -- 3417 ---------- 3418 3419 function Rest return Pattern is 3420 begin 3421 return (AFC with 0, new PE'(PC_Rest, 1, EOP)); 3422 end Rest; 3423 3424 ---------- 3425 -- Rpos -- 3426 ---------- 3427 3428 function Rpos (Count : Natural) return Pattern is 3429 begin 3430 return (AFC with 0, new PE'(PC_RPos_Nat, 1, EOP, Count)); 3431 end Rpos; 3432 3433 function Rpos (Count : Natural_Func) return Pattern is 3434 begin 3435 return (AFC with 0, new PE'(PC_RPos_NF, 1, EOP, Count)); 3436 end Rpos; 3437 3438 function Rpos (Count : not null access Natural) return Pattern is 3439 begin 3440 return (AFC with 0, new PE'(PC_RPos_NP, 1, EOP, Natural_Ptr (Count))); 3441 end Rpos; 3442 3443 ---------- 3444 -- Rtab -- 3445 ---------- 3446 3447 function Rtab (Count : Natural) return Pattern is 3448 begin 3449 return (AFC with 0, new PE'(PC_RTab_Nat, 1, EOP, Count)); 3450 end Rtab; 3451 3452 function Rtab (Count : Natural_Func) return Pattern is 3453 begin 3454 return (AFC with 0, new PE'(PC_RTab_NF, 1, EOP, Count)); 3455 end Rtab; 3456 3457 function Rtab (Count : not null access Natural) return Pattern is 3458 begin 3459 return (AFC with 0, new PE'(PC_RTab_NP, 1, EOP, Natural_Ptr (Count))); 3460 end Rtab; 3461 3462 ------------- 3463 -- S_To_PE -- 3464 ------------- 3465 3466 function S_To_PE (Str : PString) return PE_Ptr is 3467 Len : constant Natural := Str'Length; 3468 3469 begin 3470 case Len is 3471 when 0 => 3472 return new PE'(PC_Null, 1, EOP); 3473 3474 when 1 => 3475 return new PE'(PC_Char, 1, EOP, Str (Str'First)); 3476 3477 when 2 => 3478 return new PE'(PC_String_2, 1, EOP, Str); 3479 3480 when 3 => 3481 return new PE'(PC_String_3, 1, EOP, Str); 3482 3483 when 4 => 3484 return new PE'(PC_String_4, 1, EOP, Str); 3485 3486 when 5 => 3487 return new PE'(PC_String_5, 1, EOP, Str); 3488 3489 when 6 => 3490 return new PE'(PC_String_6, 1, EOP, Str); 3491 3492 when others => 3493 return new PE'(PC_String, 1, EOP, new String'(Str)); 3494 end case; 3495 end S_To_PE; 3496 3497 ------------------- 3498 -- Set_Successor -- 3499 ------------------- 3500 3501 -- Note: this procedure is not used by the normal concatenation circuit, 3502 -- since other fixups are required on the left operand in this case, and 3503 -- they might as well be done all together. 3504 3505 procedure Set_Successor (Pat : PE_Ptr; Succ : PE_Ptr) is 3506 begin 3507 if Pat = null then 3508 Uninitialized_Pattern; 3509 3510 elsif Pat = EOP then 3511 Logic_Error; 3512 3513 else 3514 declare 3515 Refs : Ref_Array (1 .. Pat.Index); 3516 -- We build a reference array for L whose N'th element points to 3517 -- the pattern element of L whose original Index value is N. 3518 3519 P : PE_Ptr; 3520 3521 begin 3522 Build_Ref_Array (Pat, Refs); 3523 3524 for J in Refs'Range loop 3525 P := Refs (J); 3526 3527 if P.Pthen = EOP then 3528 P.Pthen := Succ; 3529 end if; 3530 3531 if P.Pcode in PC_Has_Alt and then P.Alt = EOP then 3532 P.Alt := Succ; 3533 end if; 3534 end loop; 3535 end; 3536 end if; 3537 end Set_Successor; 3538 3539 ------------ 3540 -- Setcur -- 3541 ------------ 3542 3543 function Setcur (Var : not null access Natural) return Pattern is 3544 begin 3545 return (AFC with 0, new PE'(PC_Setcur, 1, EOP, Natural_Ptr (Var))); 3546 end Setcur; 3547 3548 ---------- 3549 -- Span -- 3550 ---------- 3551 3552 function Span (Str : String) return Pattern is 3553 begin 3554 return (AFC with 0, new PE'(PC_Span_CS, 1, EOP, To_Set (Str))); 3555 end Span; 3556 3557 function Span (Str : VString) return Pattern is 3558 begin 3559 return Span (S (Str)); 3560 end Span; 3561 3562 function Span (Str : Character) return Pattern is 3563 begin 3564 return (AFC with 0, new PE'(PC_Span_CH, 1, EOP, Str)); 3565 end Span; 3566 3567 function Span (Str : Character_Set) return Pattern is 3568 begin 3569 return (AFC with 0, new PE'(PC_Span_CS, 1, EOP, Str)); 3570 end Span; 3571 3572 function Span (Str : not null access VString) return Pattern is 3573 begin 3574 return (AFC with 0, new PE'(PC_Span_VP, 1, EOP, VString_Ptr (Str))); 3575 end Span; 3576 3577 function Span (Str : VString_Func) return Pattern is 3578 begin 3579 return (AFC with 0, new PE'(PC_Span_VF, 1, EOP, Str)); 3580 end Span; 3581 3582 ------------ 3583 -- Str_BF -- 3584 ------------ 3585 3586 function Str_BF (A : Boolean_Func) return String is 3587 function To_A is new Ada.Unchecked_Conversion (Boolean_Func, Address); 3588 begin 3589 return "BF(" & Image (To_A (A)) & ')'; 3590 end Str_BF; 3591 3592 ------------ 3593 -- Str_FP -- 3594 ------------ 3595 3596 function Str_FP (A : File_Ptr) return String is 3597 begin 3598 return "FP(" & Image (A.all'Address) & ')'; 3599 end Str_FP; 3600 3601 ------------ 3602 -- Str_NF -- 3603 ------------ 3604 3605 function Str_NF (A : Natural_Func) return String is 3606 function To_A is new Ada.Unchecked_Conversion (Natural_Func, Address); 3607 begin 3608 return "NF(" & Image (To_A (A)) & ')'; 3609 end Str_NF; 3610 3611 ------------ 3612 -- Str_NP -- 3613 ------------ 3614 3615 function Str_NP (A : Natural_Ptr) return String is 3616 begin 3617 return "NP(" & Image (A.all'Address) & ')'; 3618 end Str_NP; 3619 3620 ------------ 3621 -- Str_PP -- 3622 ------------ 3623 3624 function Str_PP (A : Pattern_Ptr) return String is 3625 begin 3626 return "PP(" & Image (A.all'Address) & ')'; 3627 end Str_PP; 3628 3629 ------------ 3630 -- Str_VF -- 3631 ------------ 3632 3633 function Str_VF (A : VString_Func) return String is 3634 function To_A is new Ada.Unchecked_Conversion (VString_Func, Address); 3635 begin 3636 return "VF(" & Image (To_A (A)) & ')'; 3637 end Str_VF; 3638 3639 ------------ 3640 -- Str_VP -- 3641 ------------ 3642 3643 function Str_VP (A : VString_Ptr) return String is 3644 begin 3645 return "VP(" & Image (A.all'Address) & ')'; 3646 end Str_VP; 3647 3648 ------------- 3649 -- Succeed -- 3650 ------------- 3651 3652 function Succeed return Pattern is 3653 begin 3654 return (AFC with 1, new PE'(PC_Succeed, 1, EOP)); 3655 end Succeed; 3656 3657 --------- 3658 -- Tab -- 3659 --------- 3660 3661 function Tab (Count : Natural) return Pattern is 3662 begin 3663 return (AFC with 0, new PE'(PC_Tab_Nat, 1, EOP, Count)); 3664 end Tab; 3665 3666 function Tab (Count : Natural_Func) return Pattern is 3667 begin 3668 return (AFC with 0, new PE'(PC_Tab_NF, 1, EOP, Count)); 3669 end Tab; 3670 3671 function Tab (Count : not null access Natural) return Pattern is 3672 begin 3673 return (AFC with 0, new PE'(PC_Tab_NP, 1, EOP, Natural_Ptr (Count))); 3674 end Tab; 3675 3676 --------------------------- 3677 -- Uninitialized_Pattern -- 3678 --------------------------- 3679 3680 procedure Uninitialized_Pattern is 3681 begin 3682 raise Program_Error with 3683 "uninitialized value of type GNAT.Spitbol.Patterns.Pattern"; 3684 end Uninitialized_Pattern; 3685 3686 ------------ 3687 -- XMatch -- 3688 ------------ 3689 3690 procedure XMatch 3691 (Subject : String; 3692 Pat_P : PE_Ptr; 3693 Pat_S : Natural; 3694 Start : out Natural; 3695 Stop : out Natural) 3696 is 3697 Node : PE_Ptr; 3698 -- Pointer to current pattern node. Initialized from Pat_P, and then 3699 -- updated as the match proceeds through its constituent elements. 3700 3701 Length : constant Natural := Subject'Length; 3702 -- Length of string (= Subject'Last, since Subject'First is always 1) 3703 3704 Cursor : Integer := 0; 3705 -- If the value is non-negative, then this value is the index showing 3706 -- the current position of the match in the subject string. The next 3707 -- character to be matched is at Subject (Cursor + 1). Note that since 3708 -- our view of the subject string in XMatch always has a lower bound 3709 -- of one, regardless of original bounds, that this definition exactly 3710 -- corresponds to the cursor value as referenced by functions like Pos. 3711 -- 3712 -- If the value is negative, then this is a saved stack pointer, 3713 -- typically a base pointer of an inner or outer region. Cursor 3714 -- temporarily holds such a value when it is popped from the stack 3715 -- by Fail. In all cases, Cursor is reset to a proper non-negative 3716 -- cursor value before the match proceeds (e.g. by propagating the 3717 -- failure and popping a "real" cursor value from the stack. 3718 3719 PE_Unanchored : aliased PE := (PC_Unanchored, 0, Pat_P); 3720 -- Dummy pattern element used in the unanchored case 3721 3722 Stack : Stack_Type; 3723 -- The pattern matching failure stack for this call to Match 3724 3725 Stack_Ptr : Stack_Range; 3726 -- Current stack pointer. This points to the top element of the stack 3727 -- that is currently in use. At the outer level this is the special 3728 -- entry placed on the stack according to the anchor mode. 3729 3730 Stack_Init : constant Stack_Range := Stack'First + 1; 3731 -- This is the initial value of the Stack_Ptr and Stack_Base. The 3732 -- initial (Stack'First) element of the stack is not used so that 3733 -- when we pop the last element off, Stack_Ptr is still in range. 3734 3735 Stack_Base : Stack_Range; 3736 -- This value is the stack base value, i.e. the stack pointer for the 3737 -- first history stack entry in the current stack region. See separate 3738 -- section on handling of recursive pattern matches. 3739 3740 Assign_OnM : Boolean := False; 3741 -- Set True if assign-on-match or write-on-match operations may be 3742 -- present in the history stack, which must then be scanned on a 3743 -- successful match. 3744 3745 procedure Pop_Region; 3746 pragma Inline (Pop_Region); 3747 -- Used at the end of processing of an inner region. If the inner 3748 -- region left no stack entries, then all trace of it is removed. 3749 -- Otherwise a PC_Restore_Region entry is pushed to ensure proper 3750 -- handling of alternatives in the inner region. 3751 3752 procedure Push (Node : PE_Ptr); 3753 pragma Inline (Push); 3754 -- Make entry in pattern matching stack with current cursor value 3755 3756 procedure Push_Region; 3757 pragma Inline (Push_Region); 3758 -- This procedure makes a new region on the history stack. The 3759 -- caller first establishes the special entry on the stack, but 3760 -- does not push the stack pointer. Then this call stacks a 3761 -- PC_Remove_Region node, on top of this entry, using the cursor 3762 -- field of the PC_Remove_Region entry to save the outer level 3763 -- stack base value, and resets the stack base to point to this 3764 -- PC_Remove_Region node. 3765 3766 ---------------- 3767 -- Pop_Region -- 3768 ---------------- 3769 3770 procedure Pop_Region is 3771 begin 3772 -- If nothing was pushed in the inner region, we can just get 3773 -- rid of it entirely, leaving no traces that it was ever there 3774 3775 if Stack_Ptr = Stack_Base then 3776 Stack_Ptr := Stack_Base - 2; 3777 Stack_Base := Stack (Stack_Ptr + 2).Cursor; 3778 3779 -- If stuff was pushed in the inner region, then we have to 3780 -- push a PC_R_Restore node so that we properly handle possible 3781 -- rematches within the region. 3782 3783 else 3784 Stack_Ptr := Stack_Ptr + 1; 3785 Stack (Stack_Ptr).Cursor := Stack_Base; 3786 Stack (Stack_Ptr).Node := CP_R_Restore'Access; 3787 Stack_Base := Stack (Stack_Base).Cursor; 3788 end if; 3789 end Pop_Region; 3790 3791 ---------- 3792 -- Push -- 3793 ---------- 3794 3795 procedure Push (Node : PE_Ptr) is 3796 begin 3797 Stack_Ptr := Stack_Ptr + 1; 3798 Stack (Stack_Ptr).Cursor := Cursor; 3799 Stack (Stack_Ptr).Node := Node; 3800 end Push; 3801 3802 ----------------- 3803 -- Push_Region -- 3804 ----------------- 3805 3806 procedure Push_Region is 3807 begin 3808 Stack_Ptr := Stack_Ptr + 2; 3809 Stack (Stack_Ptr).Cursor := Stack_Base; 3810 Stack (Stack_Ptr).Node := CP_R_Remove'Access; 3811 Stack_Base := Stack_Ptr; 3812 end Push_Region; 3813 3814 -- Start of processing for XMatch 3815 3816 begin 3817 if Pat_P = null then 3818 Uninitialized_Pattern; 3819 end if; 3820 3821 -- Check we have enough stack for this pattern. This check deals with 3822 -- every possibility except a match of a recursive pattern, where we 3823 -- make a check at each recursion level. 3824 3825 if Pat_S >= Stack_Size - 1 then 3826 raise Pattern_Stack_Overflow; 3827 end if; 3828 3829 -- In anchored mode, the bottom entry on the stack is an abort entry 3830 3831 if Anchored_Mode then 3832 Stack (Stack_Init).Node := CP_Cancel'Access; 3833 Stack (Stack_Init).Cursor := 0; 3834 3835 -- In unanchored more, the bottom entry on the stack references 3836 -- the special pattern element PE_Unanchored, whose Pthen field 3837 -- points to the initial pattern element. The cursor value in this 3838 -- entry is the number of anchor moves so far. 3839 3840 else 3841 Stack (Stack_Init).Node := PE_Unanchored'Unchecked_Access; 3842 Stack (Stack_Init).Cursor := 0; 3843 end if; 3844 3845 Stack_Ptr := Stack_Init; 3846 Stack_Base := Stack_Ptr; 3847 Cursor := 0; 3848 Node := Pat_P; 3849 goto Match; 3850 3851 ----------------------------------------- 3852 -- Main Pattern Matching State Control -- 3853 ----------------------------------------- 3854 3855 -- This is a state machine which uses gotos to change state. The 3856 -- initial state is Match, to initiate the matching of the first 3857 -- element, so the goto Match above starts the match. In the 3858 -- following descriptions, we indicate the global values that 3859 -- are relevant for the state transition. 3860 3861 -- Come here if entire match fails 3862 3863 <<Match_Fail>> 3864 Start := 0; 3865 Stop := 0; 3866 return; 3867 3868 -- Come here if entire match succeeds 3869 3870 -- Cursor current position in subject string 3871 3872 <<Match_Succeed>> 3873 Start := Stack (Stack_Init).Cursor + 1; 3874 Stop := Cursor; 3875 3876 -- Scan history stack for deferred assignments or writes 3877 3878 if Assign_OnM then 3879 for S in Stack_Init .. Stack_Ptr loop 3880 if Stack (S).Node = CP_Assign'Access then 3881 declare 3882 Inner_Base : constant Stack_Range := 3883 Stack (S + 1).Cursor; 3884 Special_Entry : constant Stack_Range := 3885 Inner_Base - 1; 3886 Node_OnM : constant PE_Ptr := 3887 Stack (Special_Entry).Node; 3888 Start : constant Natural := 3889 Stack (Special_Entry).Cursor + 1; 3890 Stop : constant Natural := Stack (S).Cursor; 3891 3892 begin 3893 if Node_OnM.Pcode = PC_Assign_OnM then 3894 Set_Unbounded_String 3895 (Node_OnM.VP.all, Subject (Start .. Stop)); 3896 3897 elsif Node_OnM.Pcode = PC_Write_OnM then 3898 Put_Line (Node_OnM.FP.all, Subject (Start .. Stop)); 3899 3900 else 3901 Logic_Error; 3902 end if; 3903 end; 3904 end if; 3905 end loop; 3906 end if; 3907 3908 return; 3909 3910 -- Come here if attempt to match current element fails 3911 3912 -- Stack_Base current stack base 3913 -- Stack_Ptr current stack pointer 3914 3915 <<Fail>> 3916 Cursor := Stack (Stack_Ptr).Cursor; 3917 Node := Stack (Stack_Ptr).Node; 3918 Stack_Ptr := Stack_Ptr - 1; 3919 goto Match; 3920 3921 -- Come here if attempt to match current element succeeds 3922 3923 -- Cursor current position in subject string 3924 -- Node pointer to node successfully matched 3925 -- Stack_Base current stack base 3926 -- Stack_Ptr current stack pointer 3927 3928 <<Succeed>> 3929 Node := Node.Pthen; 3930 3931 -- Come here to match the next pattern element 3932 3933 -- Cursor current position in subject string 3934 -- Node pointer to node to be matched 3935 -- Stack_Base current stack base 3936 -- Stack_Ptr current stack pointer 3937 3938 <<Match>> 3939 3940 -------------------------------------------------- 3941 -- Main Pattern Match Element Matching Routines -- 3942 -------------------------------------------------- 3943 3944 -- Here is the case statement that processes the current node. The 3945 -- processing for each element does one of five things: 3946 3947 -- goto Succeed to move to the successor 3948 -- goto Match_Succeed if the entire match succeeds 3949 -- goto Match_Fail if the entire match fails 3950 -- goto Fail to signal failure of current match 3951 3952 -- Processing is NOT allowed to fall through 3953 3954 case Node.Pcode is 3955 3956 -- Cancel 3957 3958 when PC_Cancel => 3959 goto Match_Fail; 3960 3961 -- Alternation 3962 3963 when PC_Alt => 3964 Push (Node.Alt); 3965 Node := Node.Pthen; 3966 goto Match; 3967 3968 -- Any (one character case) 3969 3970 when PC_Any_CH => 3971 if Cursor < Length 3972 and then Subject (Cursor + 1) = Node.Char 3973 then 3974 Cursor := Cursor + 1; 3975 goto Succeed; 3976 else 3977 goto Fail; 3978 end if; 3979 3980 -- Any (character set case) 3981 3982 when PC_Any_CS => 3983 if Cursor < Length 3984 and then Is_In (Subject (Cursor + 1), Node.CS) 3985 then 3986 Cursor := Cursor + 1; 3987 goto Succeed; 3988 else 3989 goto Fail; 3990 end if; 3991 3992 -- Any (string function case) 3993 3994 when PC_Any_VF => declare 3995 U : constant VString := Node.VF.all; 3996 S : Big_String_Access; 3997 L : Natural; 3998 3999 begin 4000 Get_String (U, S, L); 4001 4002 if Cursor < Length 4003 and then Is_In (Subject (Cursor + 1), S (1 .. L)) 4004 then 4005 Cursor := Cursor + 1; 4006 goto Succeed; 4007 else 4008 goto Fail; 4009 end if; 4010 end; 4011 4012 -- Any (string pointer case) 4013 4014 when PC_Any_VP => declare 4015 U : constant VString := Node.VP.all; 4016 S : Big_String_Access; 4017 L : Natural; 4018 4019 begin 4020 Get_String (U, S, L); 4021 4022 if Cursor < Length 4023 and then Is_In (Subject (Cursor + 1), S (1 .. L)) 4024 then 4025 Cursor := Cursor + 1; 4026 goto Succeed; 4027 else 4028 goto Fail; 4029 end if; 4030 end; 4031 4032 -- Arb (initial match) 4033 4034 when PC_Arb_X => 4035 Push (Node.Alt); 4036 Node := Node.Pthen; 4037 goto Match; 4038 4039 -- Arb (extension) 4040 4041 when PC_Arb_Y => 4042 if Cursor < Length then 4043 Cursor := Cursor + 1; 4044 Push (Node); 4045 goto Succeed; 4046 else 4047 goto Fail; 4048 end if; 4049 4050 -- Arbno_S (simple Arbno initialize). This is the node that 4051 -- initiates the match of a simple Arbno structure. 4052 4053 when PC_Arbno_S => 4054 Push (Node.Alt); 4055 Node := Node.Pthen; 4056 goto Match; 4057 4058 -- Arbno_X (Arbno initialize). This is the node that initiates 4059 -- the match of a complex Arbno structure. 4060 4061 when PC_Arbno_X => 4062 Push (Node.Alt); 4063 Node := Node.Pthen; 4064 goto Match; 4065 4066 -- Arbno_Y (Arbno rematch). This is the node that is executed 4067 -- following successful matching of one instance of a complex 4068 -- Arbno pattern. 4069 4070 when PC_Arbno_Y => declare 4071 Null_Match : constant Boolean := 4072 Cursor = Stack (Stack_Base - 1).Cursor; 4073 4074 begin 4075 Pop_Region; 4076 4077 -- If arbno extension matched null, then immediately fail 4078 4079 if Null_Match then 4080 goto Fail; 4081 end if; 4082 4083 -- Here we must do a stack check to make sure enough stack 4084 -- is left. This check will happen once for each instance of 4085 -- the Arbno pattern that is matched. The Nat field of a 4086 -- PC_Arbno pattern contains the maximum stack entries needed 4087 -- for the Arbno with one instance and the successor pattern 4088 4089 if Stack_Ptr + Node.Nat >= Stack'Last then 4090 raise Pattern_Stack_Overflow; 4091 end if; 4092 4093 goto Succeed; 4094 end; 4095 4096 -- Assign. If this node is executed, it means the assign-on-match 4097 -- or write-on-match operation will not happen after all, so we 4098 -- is propagate the failure, removing the PC_Assign node. 4099 4100 when PC_Assign => 4101 goto Fail; 4102 4103 -- Assign immediate. This node performs the actual assignment 4104 4105 when PC_Assign_Imm => 4106 Set_Unbounded_String 4107 (Node.VP.all, 4108 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor)); 4109 Pop_Region; 4110 goto Succeed; 4111 4112 -- Assign on match. This node sets up for the eventual assignment 4113 4114 when PC_Assign_OnM => 4115 Stack (Stack_Base - 1).Node := Node; 4116 Push (CP_Assign'Access); 4117 Pop_Region; 4118 Assign_OnM := True; 4119 goto Succeed; 4120 4121 -- Bal 4122 4123 when PC_Bal => 4124 if Cursor >= Length or else Subject (Cursor + 1) = ')' then 4125 goto Fail; 4126 4127 elsif Subject (Cursor + 1) = '(' then 4128 declare 4129 Paren_Count : Natural := 1; 4130 4131 begin 4132 loop 4133 Cursor := Cursor + 1; 4134 4135 if Cursor >= Length then 4136 goto Fail; 4137 4138 elsif Subject (Cursor + 1) = '(' then 4139 Paren_Count := Paren_Count + 1; 4140 4141 elsif Subject (Cursor + 1) = ')' then 4142 Paren_Count := Paren_Count - 1; 4143 exit when Paren_Count = 0; 4144 end if; 4145 end loop; 4146 end; 4147 end if; 4148 4149 Cursor := Cursor + 1; 4150 Push (Node); 4151 goto Succeed; 4152 4153 -- Break (one character case) 4154 4155 when PC_Break_CH => 4156 while Cursor < Length loop 4157 if Subject (Cursor + 1) = Node.Char then 4158 goto Succeed; 4159 else 4160 Cursor := Cursor + 1; 4161 end if; 4162 end loop; 4163 4164 goto Fail; 4165 4166 -- Break (character set case) 4167 4168 when PC_Break_CS => 4169 while Cursor < Length loop 4170 if Is_In (Subject (Cursor + 1), Node.CS) then 4171 goto Succeed; 4172 else 4173 Cursor := Cursor + 1; 4174 end if; 4175 end loop; 4176 4177 goto Fail; 4178 4179 -- Break (string function case) 4180 4181 when PC_Break_VF => declare 4182 U : constant VString := Node.VF.all; 4183 S : Big_String_Access; 4184 L : Natural; 4185 4186 begin 4187 Get_String (U, S, L); 4188 4189 while Cursor < Length loop 4190 if Is_In (Subject (Cursor + 1), S (1 .. L)) then 4191 goto Succeed; 4192 else 4193 Cursor := Cursor + 1; 4194 end if; 4195 end loop; 4196 4197 goto Fail; 4198 end; 4199 4200 -- Break (string pointer case) 4201 4202 when PC_Break_VP => declare 4203 U : constant VString := Node.VP.all; 4204 S : Big_String_Access; 4205 L : Natural; 4206 4207 begin 4208 Get_String (U, S, L); 4209 4210 while Cursor < Length loop 4211 if Is_In (Subject (Cursor + 1), S (1 .. L)) then 4212 goto Succeed; 4213 else 4214 Cursor := Cursor + 1; 4215 end if; 4216 end loop; 4217 4218 goto Fail; 4219 end; 4220 4221 -- BreakX (one character case) 4222 4223 when PC_BreakX_CH => 4224 while Cursor < Length loop 4225 if Subject (Cursor + 1) = Node.Char then 4226 goto Succeed; 4227 else 4228 Cursor := Cursor + 1; 4229 end if; 4230 end loop; 4231 4232 goto Fail; 4233 4234 -- BreakX (character set case) 4235 4236 when PC_BreakX_CS => 4237 while Cursor < Length loop 4238 if Is_In (Subject (Cursor + 1), Node.CS) then 4239 goto Succeed; 4240 else 4241 Cursor := Cursor + 1; 4242 end if; 4243 end loop; 4244 4245 goto Fail; 4246 4247 -- BreakX (string function case) 4248 4249 when PC_BreakX_VF => declare 4250 U : constant VString := Node.VF.all; 4251 S : Big_String_Access; 4252 L : Natural; 4253 4254 begin 4255 Get_String (U, S, L); 4256 4257 while Cursor < Length loop 4258 if Is_In (Subject (Cursor + 1), S (1 .. L)) then 4259 goto Succeed; 4260 else 4261 Cursor := Cursor + 1; 4262 end if; 4263 end loop; 4264 4265 goto Fail; 4266 end; 4267 4268 -- BreakX (string pointer case) 4269 4270 when PC_BreakX_VP => declare 4271 U : constant VString := Node.VP.all; 4272 S : Big_String_Access; 4273 L : Natural; 4274 4275 begin 4276 Get_String (U, S, L); 4277 4278 while Cursor < Length loop 4279 if Is_In (Subject (Cursor + 1), S (1 .. L)) then 4280 goto Succeed; 4281 else 4282 Cursor := Cursor + 1; 4283 end if; 4284 end loop; 4285 4286 goto Fail; 4287 end; 4288 4289 -- BreakX_X (BreakX extension). See section on "Compound Pattern 4290 -- Structures". This node is the alternative that is stacked to 4291 -- skip past the break character and extend the break. 4292 4293 when PC_BreakX_X => 4294 Cursor := Cursor + 1; 4295 goto Succeed; 4296 4297 -- Character (one character string) 4298 4299 when PC_Char => 4300 if Cursor < Length 4301 and then Subject (Cursor + 1) = Node.Char 4302 then 4303 Cursor := Cursor + 1; 4304 goto Succeed; 4305 else 4306 goto Fail; 4307 end if; 4308 4309 -- End of Pattern 4310 4311 when PC_EOP => 4312 if Stack_Base = Stack_Init then 4313 goto Match_Succeed; 4314 4315 -- End of recursive inner match. See separate section on 4316 -- handing of recursive pattern matches for details. 4317 4318 else 4319 Node := Stack (Stack_Base - 1).Node; 4320 Pop_Region; 4321 goto Match; 4322 end if; 4323 4324 -- Fail 4325 4326 when PC_Fail => 4327 goto Fail; 4328 4329 -- Fence (built in pattern) 4330 4331 when PC_Fence => 4332 Push (CP_Cancel'Access); 4333 goto Succeed; 4334 4335 -- Fence function node X. This is the node that gets control 4336 -- after a successful match of the fenced pattern. 4337 4338 when PC_Fence_X => 4339 Stack_Ptr := Stack_Ptr + 1; 4340 Stack (Stack_Ptr).Cursor := Stack_Base; 4341 Stack (Stack_Ptr).Node := CP_Fence_Y'Access; 4342 Stack_Base := Stack (Stack_Base).Cursor; 4343 goto Succeed; 4344 4345 -- Fence function node Y. This is the node that gets control on 4346 -- a failure that occurs after the fenced pattern has matched. 4347 4348 -- Note: the Cursor at this stage is actually the inner stack 4349 -- base value. We don't reset this, but we do use it to strip 4350 -- off all the entries made by the fenced pattern. 4351 4352 when PC_Fence_Y => 4353 Stack_Ptr := Cursor - 2; 4354 goto Fail; 4355 4356 -- Len (integer case) 4357 4358 when PC_Len_Nat => 4359 if Cursor + Node.Nat > Length then 4360 goto Fail; 4361 else 4362 Cursor := Cursor + Node.Nat; 4363 goto Succeed; 4364 end if; 4365 4366 -- Len (Integer function case) 4367 4368 when PC_Len_NF => declare 4369 N : constant Natural := Node.NF.all; 4370 begin 4371 if Cursor + N > Length then 4372 goto Fail; 4373 else 4374 Cursor := Cursor + N; 4375 goto Succeed; 4376 end if; 4377 end; 4378 4379 -- Len (integer pointer case) 4380 4381 when PC_Len_NP => 4382 if Cursor + Node.NP.all > Length then 4383 goto Fail; 4384 else 4385 Cursor := Cursor + Node.NP.all; 4386 goto Succeed; 4387 end if; 4388 4389 -- NotAny (one character case) 4390 4391 when PC_NotAny_CH => 4392 if Cursor < Length 4393 and then Subject (Cursor + 1) /= Node.Char 4394 then 4395 Cursor := Cursor + 1; 4396 goto Succeed; 4397 else 4398 goto Fail; 4399 end if; 4400 4401 -- NotAny (character set case) 4402 4403 when PC_NotAny_CS => 4404 if Cursor < Length 4405 and then not Is_In (Subject (Cursor + 1), Node.CS) 4406 then 4407 Cursor := Cursor + 1; 4408 goto Succeed; 4409 else 4410 goto Fail; 4411 end if; 4412 4413 -- NotAny (string function case) 4414 4415 when PC_NotAny_VF => declare 4416 U : constant VString := Node.VF.all; 4417 S : Big_String_Access; 4418 L : Natural; 4419 4420 begin 4421 Get_String (U, S, L); 4422 4423 if Cursor < Length 4424 and then 4425 not Is_In (Subject (Cursor + 1), S (1 .. L)) 4426 then 4427 Cursor := Cursor + 1; 4428 goto Succeed; 4429 else 4430 goto Fail; 4431 end if; 4432 end; 4433 4434 -- NotAny (string pointer case) 4435 4436 when PC_NotAny_VP => declare 4437 U : constant VString := Node.VP.all; 4438 S : Big_String_Access; 4439 L : Natural; 4440 4441 begin 4442 Get_String (U, S, L); 4443 4444 if Cursor < Length 4445 and then 4446 not Is_In (Subject (Cursor + 1), S (1 .. L)) 4447 then 4448 Cursor := Cursor + 1; 4449 goto Succeed; 4450 else 4451 goto Fail; 4452 end if; 4453 end; 4454 4455 -- NSpan (one character case) 4456 4457 when PC_NSpan_CH => 4458 while Cursor < Length 4459 and then Subject (Cursor + 1) = Node.Char 4460 loop 4461 Cursor := Cursor + 1; 4462 end loop; 4463 4464 goto Succeed; 4465 4466 -- NSpan (character set case) 4467 4468 when PC_NSpan_CS => 4469 while Cursor < Length 4470 and then Is_In (Subject (Cursor + 1), Node.CS) 4471 loop 4472 Cursor := Cursor + 1; 4473 end loop; 4474 4475 goto Succeed; 4476 4477 -- NSpan (string function case) 4478 4479 when PC_NSpan_VF => declare 4480 U : constant VString := Node.VF.all; 4481 S : Big_String_Access; 4482 L : Natural; 4483 4484 begin 4485 Get_String (U, S, L); 4486 4487 while Cursor < Length 4488 and then Is_In (Subject (Cursor + 1), S (1 .. L)) 4489 loop 4490 Cursor := Cursor + 1; 4491 end loop; 4492 4493 goto Succeed; 4494 end; 4495 4496 -- NSpan (string pointer case) 4497 4498 when PC_NSpan_VP => declare 4499 U : constant VString := Node.VP.all; 4500 S : Big_String_Access; 4501 L : Natural; 4502 4503 begin 4504 Get_String (U, S, L); 4505 4506 while Cursor < Length 4507 and then Is_In (Subject (Cursor + 1), S (1 .. L)) 4508 loop 4509 Cursor := Cursor + 1; 4510 end loop; 4511 4512 goto Succeed; 4513 end; 4514 4515 -- Null string 4516 4517 when PC_Null => 4518 goto Succeed; 4519 4520 -- Pos (integer case) 4521 4522 when PC_Pos_Nat => 4523 if Cursor = Node.Nat then 4524 goto Succeed; 4525 else 4526 goto Fail; 4527 end if; 4528 4529 -- Pos (Integer function case) 4530 4531 when PC_Pos_NF => declare 4532 N : constant Natural := Node.NF.all; 4533 begin 4534 if Cursor = N then 4535 goto Succeed; 4536 else 4537 goto Fail; 4538 end if; 4539 end; 4540 4541 -- Pos (integer pointer case) 4542 4543 when PC_Pos_NP => 4544 if Cursor = Node.NP.all then 4545 goto Succeed; 4546 else 4547 goto Fail; 4548 end if; 4549 4550 -- Predicate function 4551 4552 when PC_Pred_Func => 4553 if Node.BF.all then 4554 goto Succeed; 4555 else 4556 goto Fail; 4557 end if; 4558 4559 -- Region Enter. Initiate new pattern history stack region 4560 4561 when PC_R_Enter => 4562 Stack (Stack_Ptr + 1).Cursor := Cursor; 4563 Push_Region; 4564 goto Succeed; 4565 4566 -- Region Remove node. This is the node stacked by an R_Enter. 4567 -- It removes the special format stack entry right underneath, and 4568 -- then restores the outer level stack base and signals failure. 4569 4570 -- Note: the cursor value at this stage is actually the (negative) 4571 -- stack base value for the outer level. 4572 4573 when PC_R_Remove => 4574 Stack_Base := Cursor; 4575 Stack_Ptr := Stack_Ptr - 1; 4576 goto Fail; 4577 4578 -- Region restore node. This is the node stacked at the end of an 4579 -- inner level match. Its function is to restore the inner level 4580 -- region, so that alternatives in this region can be sought. 4581 4582 -- Note: the Cursor at this stage is actually the negative of the 4583 -- inner stack base value, which we use to restore the inner region. 4584 4585 when PC_R_Restore => 4586 Stack_Base := Cursor; 4587 goto Fail; 4588 4589 -- Rest 4590 4591 when PC_Rest => 4592 Cursor := Length; 4593 goto Succeed; 4594 4595 -- Initiate recursive match (pattern pointer case) 4596 4597 when PC_Rpat => 4598 Stack (Stack_Ptr + 1).Node := Node.Pthen; 4599 Push_Region; 4600 4601 if Stack_Ptr + Node.PP.all.Stk >= Stack_Size then 4602 raise Pattern_Stack_Overflow; 4603 else 4604 Node := Node.PP.all.P; 4605 goto Match; 4606 end if; 4607 4608 -- RPos (integer case) 4609 4610 when PC_RPos_Nat => 4611 if Cursor = (Length - Node.Nat) then 4612 goto Succeed; 4613 else 4614 goto Fail; 4615 end if; 4616 4617 -- RPos (integer function case) 4618 4619 when PC_RPos_NF => declare 4620 N : constant Natural := Node.NF.all; 4621 begin 4622 if Length - Cursor = N then 4623 goto Succeed; 4624 else 4625 goto Fail; 4626 end if; 4627 end; 4628 4629 -- RPos (integer pointer case) 4630 4631 when PC_RPos_NP => 4632 if Cursor = (Length - Node.NP.all) then 4633 goto Succeed; 4634 else 4635 goto Fail; 4636 end if; 4637 4638 -- RTab (integer case) 4639 4640 when PC_RTab_Nat => 4641 if Cursor <= (Length - Node.Nat) then 4642 Cursor := Length - Node.Nat; 4643 goto Succeed; 4644 else 4645 goto Fail; 4646 end if; 4647 4648 -- RTab (integer function case) 4649 4650 when PC_RTab_NF => declare 4651 N : constant Natural := Node.NF.all; 4652 begin 4653 if Length - Cursor >= N then 4654 Cursor := Length - N; 4655 goto Succeed; 4656 else 4657 goto Fail; 4658 end if; 4659 end; 4660 4661 -- RTab (integer pointer case) 4662 4663 when PC_RTab_NP => 4664 if Cursor <= (Length - Node.NP.all) then 4665 Cursor := Length - Node.NP.all; 4666 goto Succeed; 4667 else 4668 goto Fail; 4669 end if; 4670 4671 -- Cursor assignment 4672 4673 when PC_Setcur => 4674 Node.Var.all := Cursor; 4675 goto Succeed; 4676 4677 -- Span (one character case) 4678 4679 when PC_Span_CH => declare 4680 P : Natural; 4681 4682 begin 4683 P := Cursor; 4684 while P < Length 4685 and then Subject (P + 1) = Node.Char 4686 loop 4687 P := P + 1; 4688 end loop; 4689 4690 if P /= Cursor then 4691 Cursor := P; 4692 goto Succeed; 4693 else 4694 goto Fail; 4695 end if; 4696 end; 4697 4698 -- Span (character set case) 4699 4700 when PC_Span_CS => declare 4701 P : Natural; 4702 4703 begin 4704 P := Cursor; 4705 while P < Length 4706 and then Is_In (Subject (P + 1), Node.CS) 4707 loop 4708 P := P + 1; 4709 end loop; 4710 4711 if P /= Cursor then 4712 Cursor := P; 4713 goto Succeed; 4714 else 4715 goto Fail; 4716 end if; 4717 end; 4718 4719 -- Span (string function case) 4720 4721 when PC_Span_VF => declare 4722 U : constant VString := Node.VF.all; 4723 S : Big_String_Access; 4724 L : Natural; 4725 P : Natural; 4726 4727 begin 4728 Get_String (U, S, L); 4729 4730 P := Cursor; 4731 while P < Length 4732 and then Is_In (Subject (P + 1), S (1 .. L)) 4733 loop 4734 P := P + 1; 4735 end loop; 4736 4737 if P /= Cursor then 4738 Cursor := P; 4739 goto Succeed; 4740 else 4741 goto Fail; 4742 end if; 4743 end; 4744 4745 -- Span (string pointer case) 4746 4747 when PC_Span_VP => declare 4748 U : constant VString := Node.VP.all; 4749 S : Big_String_Access; 4750 L : Natural; 4751 P : Natural; 4752 4753 begin 4754 Get_String (U, S, L); 4755 4756 P := Cursor; 4757 while P < Length 4758 and then Is_In (Subject (P + 1), S (1 .. L)) 4759 loop 4760 P := P + 1; 4761 end loop; 4762 4763 if P /= Cursor then 4764 Cursor := P; 4765 goto Succeed; 4766 else 4767 goto Fail; 4768 end if; 4769 end; 4770 4771 -- String (two character case) 4772 4773 when PC_String_2 => 4774 if (Length - Cursor) >= 2 4775 and then Subject (Cursor + 1 .. Cursor + 2) = Node.Str2 4776 then 4777 Cursor := Cursor + 2; 4778 goto Succeed; 4779 else 4780 goto Fail; 4781 end if; 4782 4783 -- String (three character case) 4784 4785 when PC_String_3 => 4786 if (Length - Cursor) >= 3 4787 and then Subject (Cursor + 1 .. Cursor + 3) = Node.Str3 4788 then 4789 Cursor := Cursor + 3; 4790 goto Succeed; 4791 else 4792 goto Fail; 4793 end if; 4794 4795 -- String (four character case) 4796 4797 when PC_String_4 => 4798 if (Length - Cursor) >= 4 4799 and then Subject (Cursor + 1 .. Cursor + 4) = Node.Str4 4800 then 4801 Cursor := Cursor + 4; 4802 goto Succeed; 4803 else 4804 goto Fail; 4805 end if; 4806 4807 -- String (five character case) 4808 4809 when PC_String_5 => 4810 if (Length - Cursor) >= 5 4811 and then Subject (Cursor + 1 .. Cursor + 5) = Node.Str5 4812 then 4813 Cursor := Cursor + 5; 4814 goto Succeed; 4815 else 4816 goto Fail; 4817 end if; 4818 4819 -- String (six character case) 4820 4821 when PC_String_6 => 4822 if (Length - Cursor) >= 6 4823 and then Subject (Cursor + 1 .. Cursor + 6) = Node.Str6 4824 then 4825 Cursor := Cursor + 6; 4826 goto Succeed; 4827 else 4828 goto Fail; 4829 end if; 4830 4831 -- String (case of more than six characters) 4832 4833 when PC_String => declare 4834 Len : constant Natural := Node.Str'Length; 4835 begin 4836 if (Length - Cursor) >= Len 4837 and then Node.Str.all = Subject (Cursor + 1 .. Cursor + Len) 4838 then 4839 Cursor := Cursor + Len; 4840 goto Succeed; 4841 else 4842 goto Fail; 4843 end if; 4844 end; 4845 4846 -- String (function case) 4847 4848 when PC_String_VF => declare 4849 U : constant VString := Node.VF.all; 4850 S : Big_String_Access; 4851 L : Natural; 4852 4853 begin 4854 Get_String (U, S, L); 4855 4856 if (Length - Cursor) >= L 4857 and then S (1 .. L) = Subject (Cursor + 1 .. Cursor + L) 4858 then 4859 Cursor := Cursor + L; 4860 goto Succeed; 4861 else 4862 goto Fail; 4863 end if; 4864 end; 4865 4866 -- String (pointer case) 4867 4868 when PC_String_VP => declare 4869 U : constant VString := Node.VP.all; 4870 S : Big_String_Access; 4871 L : Natural; 4872 4873 begin 4874 Get_String (U, S, L); 4875 4876 if (Length - Cursor) >= L 4877 and then S (1 .. L) = Subject (Cursor + 1 .. Cursor + L) 4878 then 4879 Cursor := Cursor + L; 4880 goto Succeed; 4881 else 4882 goto Fail; 4883 end if; 4884 end; 4885 4886 -- Succeed 4887 4888 when PC_Succeed => 4889 Push (Node); 4890 goto Succeed; 4891 4892 -- Tab (integer case) 4893 4894 when PC_Tab_Nat => 4895 if Cursor <= Node.Nat then 4896 Cursor := Node.Nat; 4897 goto Succeed; 4898 else 4899 goto Fail; 4900 end if; 4901 4902 -- Tab (integer function case) 4903 4904 when PC_Tab_NF => declare 4905 N : constant Natural := Node.NF.all; 4906 begin 4907 if Cursor <= N then 4908 Cursor := N; 4909 goto Succeed; 4910 else 4911 goto Fail; 4912 end if; 4913 end; 4914 4915 -- Tab (integer pointer case) 4916 4917 when PC_Tab_NP => 4918 if Cursor <= Node.NP.all then 4919 Cursor := Node.NP.all; 4920 goto Succeed; 4921 else 4922 goto Fail; 4923 end if; 4924 4925 -- Unanchored movement 4926 4927 when PC_Unanchored => 4928 4929 -- All done if we tried every position 4930 4931 if Cursor > Length then 4932 goto Match_Fail; 4933 4934 -- Otherwise extend the anchor point, and restack ourself 4935 4936 else 4937 Cursor := Cursor + 1; 4938 Push (Node); 4939 goto Succeed; 4940 end if; 4941 4942 -- Write immediate. This node performs the actual write 4943 4944 when PC_Write_Imm => 4945 Put_Line 4946 (Node.FP.all, 4947 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor)); 4948 Pop_Region; 4949 goto Succeed; 4950 4951 -- Write on match. This node sets up for the eventual write 4952 4953 when PC_Write_OnM => 4954 Stack (Stack_Base - 1).Node := Node; 4955 Push (CP_Assign'Access); 4956 Pop_Region; 4957 Assign_OnM := True; 4958 goto Succeed; 4959 end case; 4960 4961 -- We are NOT allowed to fall though this case statement, since every 4962 -- match routine must end by executing a goto to the appropriate point 4963 -- in the finite state machine model. 4964 4965 pragma Warnings (Off); 4966 Logic_Error; 4967 pragma Warnings (On); 4968 end XMatch; 4969 4970 ------------- 4971 -- XMatchD -- 4972 ------------- 4973 4974 -- Maintenance note: There is a LOT of code duplication between XMatch 4975 -- and XMatchD. This is quite intentional, the point is to avoid any 4976 -- unnecessary debugging overhead in the XMatch case, but this does mean 4977 -- that any changes to XMatchD must be mirrored in XMatch. In case of 4978 -- any major changes, the proper approach is to delete XMatch, make the 4979 -- changes to XMatchD, and then make a copy of XMatchD, removing all 4980 -- calls to Dout, and all Put and Put_Line operations. This copy becomes 4981 -- the new XMatch. 4982 4983 procedure XMatchD 4984 (Subject : String; 4985 Pat_P : PE_Ptr; 4986 Pat_S : Natural; 4987 Start : out Natural; 4988 Stop : out Natural) 4989 is 4990 Node : PE_Ptr; 4991 -- Pointer to current pattern node. Initialized from Pat_P, and then 4992 -- updated as the match proceeds through its constituent elements. 4993 4994 Length : constant Natural := Subject'Length; 4995 -- Length of string (= Subject'Last, since Subject'First is always 1) 4996 4997 Cursor : Integer := 0; 4998 -- If the value is non-negative, then this value is the index showing 4999 -- the current position of the match in the subject string. The next 5000 -- character to be matched is at Subject (Cursor + 1). Note that since 5001 -- our view of the subject string in XMatch always has a lower bound 5002 -- of one, regardless of original bounds, that this definition exactly 5003 -- corresponds to the cursor value as referenced by functions like Pos. 5004 -- 5005 -- If the value is negative, then this is a saved stack pointer, 5006 -- typically a base pointer of an inner or outer region. Cursor 5007 -- temporarily holds such a value when it is popped from the stack 5008 -- by Fail. In all cases, Cursor is reset to a proper non-negative 5009 -- cursor value before the match proceeds (e.g. by propagating the 5010 -- failure and popping a "real" cursor value from the stack. 5011 5012 PE_Unanchored : aliased PE := (PC_Unanchored, 0, Pat_P); 5013 -- Dummy pattern element used in the unanchored case 5014 5015 Region_Level : Natural := 0; 5016 -- Keeps track of recursive region level. This is used only for 5017 -- debugging, it is the number of saved history stack base values. 5018 5019 Stack : Stack_Type; 5020 -- The pattern matching failure stack for this call to Match 5021 5022 Stack_Ptr : Stack_Range; 5023 -- Current stack pointer. This points to the top element of the stack 5024 -- that is currently in use. At the outer level this is the special 5025 -- entry placed on the stack according to the anchor mode. 5026 5027 Stack_Init : constant Stack_Range := Stack'First + 1; 5028 -- This is the initial value of the Stack_Ptr and Stack_Base. The 5029 -- initial (Stack'First) element of the stack is not used so that 5030 -- when we pop the last element off, Stack_Ptr is still in range. 5031 5032 Stack_Base : Stack_Range; 5033 -- This value is the stack base value, i.e. the stack pointer for the 5034 -- first history stack entry in the current stack region. See separate 5035 -- section on handling of recursive pattern matches. 5036 5037 Assign_OnM : Boolean := False; 5038 -- Set True if assign-on-match or write-on-match operations may be 5039 -- present in the history stack, which must then be scanned on a 5040 -- successful match. 5041 5042 procedure Dout (Str : String); 5043 -- Output string to standard error with bars indicating region level 5044 5045 procedure Dout (Str : String; A : Character); 5046 -- Calls Dout with the string S ('A') 5047 5048 procedure Dout (Str : String; A : Character_Set); 5049 -- Calls Dout with the string S ("A") 5050 5051 procedure Dout (Str : String; A : Natural); 5052 -- Calls Dout with the string S (A) 5053 5054 procedure Dout (Str : String; A : String); 5055 -- Calls Dout with the string S ("A") 5056 5057 function Img (P : PE_Ptr) return String; 5058 -- Returns a string of the form #nnn where nnn is P.Index 5059 5060 procedure Pop_Region; 5061 pragma Inline (Pop_Region); 5062 -- Used at the end of processing of an inner region. If the inner 5063 -- region left no stack entries, then all trace of it is removed. 5064 -- Otherwise a PC_Restore_Region entry is pushed to ensure proper 5065 -- handling of alternatives in the inner region. 5066 5067 procedure Push (Node : PE_Ptr); 5068 pragma Inline (Push); 5069 -- Make entry in pattern matching stack with current cursor value 5070 5071 procedure Push_Region; 5072 pragma Inline (Push_Region); 5073 -- This procedure makes a new region on the history stack. The 5074 -- caller first establishes the special entry on the stack, but 5075 -- does not push the stack pointer. Then this call stacks a 5076 -- PC_Remove_Region node, on top of this entry, using the cursor 5077 -- field of the PC_Remove_Region entry to save the outer level 5078 -- stack base value, and resets the stack base to point to this 5079 -- PC_Remove_Region node. 5080 5081 ---------- 5082 -- Dout -- 5083 ---------- 5084 5085 procedure Dout (Str : String) is 5086 begin 5087 for J in 1 .. Region_Level loop 5088 Put ("| "); 5089 end loop; 5090 5091 Put_Line (Str); 5092 end Dout; 5093 5094 procedure Dout (Str : String; A : Character) is 5095 begin 5096 Dout (Str & " ('" & A & "')"); 5097 end Dout; 5098 5099 procedure Dout (Str : String; A : Character_Set) is 5100 begin 5101 Dout (Str & " (" & Image (To_Sequence (A)) & ')'); 5102 end Dout; 5103 5104 procedure Dout (Str : String; A : Natural) is 5105 begin 5106 Dout (Str & " (" & A & ')'); 5107 end Dout; 5108 5109 procedure Dout (Str : String; A : String) is 5110 begin 5111 Dout (Str & " (" & Image (A) & ')'); 5112 end Dout; 5113 5114 --------- 5115 -- Img -- 5116 --------- 5117 5118 function Img (P : PE_Ptr) return String is 5119 begin 5120 return "#" & Integer (P.Index) & " "; 5121 end Img; 5122 5123 ---------------- 5124 -- Pop_Region -- 5125 ---------------- 5126 5127 procedure Pop_Region is 5128 begin 5129 Region_Level := Region_Level - 1; 5130 5131 -- If nothing was pushed in the inner region, we can just get 5132 -- rid of it entirely, leaving no traces that it was ever there 5133 5134 if Stack_Ptr = Stack_Base then 5135 Stack_Ptr := Stack_Base - 2; 5136 Stack_Base := Stack (Stack_Ptr + 2).Cursor; 5137 5138 -- If stuff was pushed in the inner region, then we have to 5139 -- push a PC_R_Restore node so that we properly handle possible 5140 -- rematches within the region. 5141 5142 else 5143 Stack_Ptr := Stack_Ptr + 1; 5144 Stack (Stack_Ptr).Cursor := Stack_Base; 5145 Stack (Stack_Ptr).Node := CP_R_Restore'Access; 5146 Stack_Base := Stack (Stack_Base).Cursor; 5147 end if; 5148 end Pop_Region; 5149 5150 ---------- 5151 -- Push -- 5152 ---------- 5153 5154 procedure Push (Node : PE_Ptr) is 5155 begin 5156 Stack_Ptr := Stack_Ptr + 1; 5157 Stack (Stack_Ptr).Cursor := Cursor; 5158 Stack (Stack_Ptr).Node := Node; 5159 end Push; 5160 5161 ----------------- 5162 -- Push_Region -- 5163 ----------------- 5164 5165 procedure Push_Region is 5166 begin 5167 Region_Level := Region_Level + 1; 5168 Stack_Ptr := Stack_Ptr + 2; 5169 Stack (Stack_Ptr).Cursor := Stack_Base; 5170 Stack (Stack_Ptr).Node := CP_R_Remove'Access; 5171 Stack_Base := Stack_Ptr; 5172 end Push_Region; 5173 5174 -- Start of processing for XMatchD 5175 5176 begin 5177 New_Line; 5178 Put_Line ("Initiating pattern match, subject = " & Image (Subject)); 5179 Put ("--------------------------------------"); 5180 5181 for J in 1 .. Length loop 5182 Put ('-'); 5183 end loop; 5184 5185 New_Line; 5186 Put_Line ("subject length = " & Length); 5187 5188 if Pat_P = null then 5189 Uninitialized_Pattern; 5190 end if; 5191 5192 -- Check we have enough stack for this pattern. This check deals with 5193 -- every possibility except a match of a recursive pattern, where we 5194 -- make a check at each recursion level. 5195 5196 if Pat_S >= Stack_Size - 1 then 5197 raise Pattern_Stack_Overflow; 5198 end if; 5199 5200 -- In anchored mode, the bottom entry on the stack is an abort entry 5201 5202 if Anchored_Mode then 5203 Stack (Stack_Init).Node := CP_Cancel'Access; 5204 Stack (Stack_Init).Cursor := 0; 5205 5206 -- In unanchored more, the bottom entry on the stack references 5207 -- the special pattern element PE_Unanchored, whose Pthen field 5208 -- points to the initial pattern element. The cursor value in this 5209 -- entry is the number of anchor moves so far. 5210 5211 else 5212 Stack (Stack_Init).Node := PE_Unanchored'Unchecked_Access; 5213 Stack (Stack_Init).Cursor := 0; 5214 end if; 5215 5216 Stack_Ptr := Stack_Init; 5217 Stack_Base := Stack_Ptr; 5218 Cursor := 0; 5219 Node := Pat_P; 5220 goto Match; 5221 5222 ----------------------------------------- 5223 -- Main Pattern Matching State Control -- 5224 ----------------------------------------- 5225 5226 -- This is a state machine which uses gotos to change state. The 5227 -- initial state is Match, to initiate the matching of the first 5228 -- element, so the goto Match above starts the match. In the 5229 -- following descriptions, we indicate the global values that 5230 -- are relevant for the state transition. 5231 5232 -- Come here if entire match fails 5233 5234 <<Match_Fail>> 5235 Dout ("match fails"); 5236 New_Line; 5237 Start := 0; 5238 Stop := 0; 5239 return; 5240 5241 -- Come here if entire match succeeds 5242 5243 -- Cursor current position in subject string 5244 5245 <<Match_Succeed>> 5246 Dout ("match succeeds"); 5247 Start := Stack (Stack_Init).Cursor + 1; 5248 Stop := Cursor; 5249 Dout ("first matched character index = " & Start); 5250 Dout ("last matched character index = " & Stop); 5251 Dout ("matched substring = " & Image (Subject (Start .. Stop))); 5252 5253 -- Scan history stack for deferred assignments or writes 5254 5255 if Assign_OnM then 5256 for S in Stack'First .. Stack_Ptr loop 5257 if Stack (S).Node = CP_Assign'Access then 5258 declare 5259 Inner_Base : constant Stack_Range := 5260 Stack (S + 1).Cursor; 5261 Special_Entry : constant Stack_Range := 5262 Inner_Base - 1; 5263 Node_OnM : constant PE_Ptr := 5264 Stack (Special_Entry).Node; 5265 Start : constant Natural := 5266 Stack (Special_Entry).Cursor + 1; 5267 Stop : constant Natural := Stack (S).Cursor; 5268 5269 begin 5270 if Node_OnM.Pcode = PC_Assign_OnM then 5271 Set_Unbounded_String 5272 (Node_OnM.VP.all, Subject (Start .. Stop)); 5273 Dout 5274 (Img (Stack (S).Node) & 5275 "deferred assignment of " & 5276 Image (Subject (Start .. Stop))); 5277 5278 elsif Node_OnM.Pcode = PC_Write_OnM then 5279 Put_Line (Node_OnM.FP.all, Subject (Start .. Stop)); 5280 Dout 5281 (Img (Stack (S).Node) & 5282 "deferred write of " & 5283 Image (Subject (Start .. Stop))); 5284 5285 else 5286 Logic_Error; 5287 end if; 5288 end; 5289 end if; 5290 end loop; 5291 end if; 5292 5293 New_Line; 5294 return; 5295 5296 -- Come here if attempt to match current element fails 5297 5298 -- Stack_Base current stack base 5299 -- Stack_Ptr current stack pointer 5300 5301 <<Fail>> 5302 Cursor := Stack (Stack_Ptr).Cursor; 5303 Node := Stack (Stack_Ptr).Node; 5304 Stack_Ptr := Stack_Ptr - 1; 5305 5306 if Cursor >= 0 then 5307 Dout ("failure, cursor reset to " & Cursor); 5308 end if; 5309 5310 goto Match; 5311 5312 -- Come here if attempt to match current element succeeds 5313 5314 -- Cursor current position in subject string 5315 -- Node pointer to node successfully matched 5316 -- Stack_Base current stack base 5317 -- Stack_Ptr current stack pointer 5318 5319 <<Succeed>> 5320 Dout ("success, cursor = " & Cursor); 5321 Node := Node.Pthen; 5322 5323 -- Come here to match the next pattern element 5324 5325 -- Cursor current position in subject string 5326 -- Node pointer to node to be matched 5327 -- Stack_Base current stack base 5328 -- Stack_Ptr current stack pointer 5329 5330 <<Match>> 5331 5332 -------------------------------------------------- 5333 -- Main Pattern Match Element Matching Routines -- 5334 -------------------------------------------------- 5335 5336 -- Here is the case statement that processes the current node. The 5337 -- processing for each element does one of five things: 5338 5339 -- goto Succeed to move to the successor 5340 -- goto Match_Succeed if the entire match succeeds 5341 -- goto Match_Fail if the entire match fails 5342 -- goto Fail to signal failure of current match 5343 5344 -- Processing is NOT allowed to fall through 5345 5346 case Node.Pcode is 5347 5348 -- Cancel 5349 5350 when PC_Cancel => 5351 Dout (Img (Node) & "matching Cancel"); 5352 goto Match_Fail; 5353 5354 -- Alternation 5355 5356 when PC_Alt => 5357 Dout (Img (Node) & "setting up alternative " & Img (Node.Alt)); 5358 Push (Node.Alt); 5359 Node := Node.Pthen; 5360 goto Match; 5361 5362 -- Any (one character case) 5363 5364 when PC_Any_CH => 5365 Dout (Img (Node) & "matching Any", Node.Char); 5366 5367 if Cursor < Length 5368 and then Subject (Cursor + 1) = Node.Char 5369 then 5370 Cursor := Cursor + 1; 5371 goto Succeed; 5372 else 5373 goto Fail; 5374 end if; 5375 5376 -- Any (character set case) 5377 5378 when PC_Any_CS => 5379 Dout (Img (Node) & "matching Any", Node.CS); 5380 5381 if Cursor < Length 5382 and then Is_In (Subject (Cursor + 1), Node.CS) 5383 then 5384 Cursor := Cursor + 1; 5385 goto Succeed; 5386 else 5387 goto Fail; 5388 end if; 5389 5390 -- Any (string function case) 5391 5392 when PC_Any_VF => declare 5393 U : constant VString := Node.VF.all; 5394 S : Big_String_Access; 5395 L : Natural; 5396 5397 begin 5398 Get_String (U, S, L); 5399 5400 Dout (Img (Node) & "matching Any", S (1 .. L)); 5401 5402 if Cursor < Length 5403 and then Is_In (Subject (Cursor + 1), S (1 .. L)) 5404 then 5405 Cursor := Cursor + 1; 5406 goto Succeed; 5407 else 5408 goto Fail; 5409 end if; 5410 end; 5411 5412 -- Any (string pointer case) 5413 5414 when PC_Any_VP => declare 5415 U : constant VString := Node.VP.all; 5416 S : Big_String_Access; 5417 L : Natural; 5418 5419 begin 5420 Get_String (U, S, L); 5421 Dout (Img (Node) & "matching Any", S (1 .. L)); 5422 5423 if Cursor < Length 5424 and then Is_In (Subject (Cursor + 1), S (1 .. L)) 5425 then 5426 Cursor := Cursor + 1; 5427 goto Succeed; 5428 else 5429 goto Fail; 5430 end if; 5431 end; 5432 5433 -- Arb (initial match) 5434 5435 when PC_Arb_X => 5436 Dout (Img (Node) & "matching Arb"); 5437 Push (Node.Alt); 5438 Node := Node.Pthen; 5439 goto Match; 5440 5441 -- Arb (extension) 5442 5443 when PC_Arb_Y => 5444 Dout (Img (Node) & "extending Arb"); 5445 5446 if Cursor < Length then 5447 Cursor := Cursor + 1; 5448 Push (Node); 5449 goto Succeed; 5450 else 5451 goto Fail; 5452 end if; 5453 5454 -- Arbno_S (simple Arbno initialize). This is the node that 5455 -- initiates the match of a simple Arbno structure. 5456 5457 when PC_Arbno_S => 5458 Dout (Img (Node) & 5459 "setting up Arbno alternative " & Img (Node.Alt)); 5460 Push (Node.Alt); 5461 Node := Node.Pthen; 5462 goto Match; 5463 5464 -- Arbno_X (Arbno initialize). This is the node that initiates 5465 -- the match of a complex Arbno structure. 5466 5467 when PC_Arbno_X => 5468 Dout (Img (Node) & 5469 "setting up Arbno alternative " & Img (Node.Alt)); 5470 Push (Node.Alt); 5471 Node := Node.Pthen; 5472 goto Match; 5473 5474 -- Arbno_Y (Arbno rematch). This is the node that is executed 5475 -- following successful matching of one instance of a complex 5476 -- Arbno pattern. 5477 5478 when PC_Arbno_Y => declare 5479 Null_Match : constant Boolean := 5480 Cursor = Stack (Stack_Base - 1).Cursor; 5481 5482 begin 5483 Dout (Img (Node) & "extending Arbno"); 5484 Pop_Region; 5485 5486 -- If arbno extension matched null, then immediately fail 5487 5488 if Null_Match then 5489 Dout ("Arbno extension matched null, so fails"); 5490 goto Fail; 5491 end if; 5492 5493 -- Here we must do a stack check to make sure enough stack 5494 -- is left. This check will happen once for each instance of 5495 -- the Arbno pattern that is matched. The Nat field of a 5496 -- PC_Arbno pattern contains the maximum stack entries needed 5497 -- for the Arbno with one instance and the successor pattern 5498 5499 if Stack_Ptr + Node.Nat >= Stack'Last then 5500 raise Pattern_Stack_Overflow; 5501 end if; 5502 5503 goto Succeed; 5504 end; 5505 5506 -- Assign. If this node is executed, it means the assign-on-match 5507 -- or write-on-match operation will not happen after all, so we 5508 -- is propagate the failure, removing the PC_Assign node. 5509 5510 when PC_Assign => 5511 Dout (Img (Node) & "deferred assign/write cancelled"); 5512 goto Fail; 5513 5514 -- Assign immediate. This node performs the actual assignment 5515 5516 when PC_Assign_Imm => 5517 Dout 5518 (Img (Node) & "executing immediate assignment of " & 5519 Image (Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor))); 5520 Set_Unbounded_String 5521 (Node.VP.all, 5522 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor)); 5523 Pop_Region; 5524 goto Succeed; 5525 5526 -- Assign on match. This node sets up for the eventual assignment 5527 5528 when PC_Assign_OnM => 5529 Dout (Img (Node) & "registering deferred assignment"); 5530 Stack (Stack_Base - 1).Node := Node; 5531 Push (CP_Assign'Access); 5532 Pop_Region; 5533 Assign_OnM := True; 5534 goto Succeed; 5535 5536 -- Bal 5537 5538 when PC_Bal => 5539 Dout (Img (Node) & "matching or extending Bal"); 5540 if Cursor >= Length or else Subject (Cursor + 1) = ')' then 5541 goto Fail; 5542 5543 elsif Subject (Cursor + 1) = '(' then 5544 declare 5545 Paren_Count : Natural := 1; 5546 5547 begin 5548 loop 5549 Cursor := Cursor + 1; 5550 5551 if Cursor >= Length then 5552 goto Fail; 5553 5554 elsif Subject (Cursor + 1) = '(' then 5555 Paren_Count := Paren_Count + 1; 5556 5557 elsif Subject (Cursor + 1) = ')' then 5558 Paren_Count := Paren_Count - 1; 5559 exit when Paren_Count = 0; 5560 end if; 5561 end loop; 5562 end; 5563 end if; 5564 5565 Cursor := Cursor + 1; 5566 Push (Node); 5567 goto Succeed; 5568 5569 -- Break (one character case) 5570 5571 when PC_Break_CH => 5572 Dout (Img (Node) & "matching Break", Node.Char); 5573 5574 while Cursor < Length loop 5575 if Subject (Cursor + 1) = Node.Char then 5576 goto Succeed; 5577 else 5578 Cursor := Cursor + 1; 5579 end if; 5580 end loop; 5581 5582 goto Fail; 5583 5584 -- Break (character set case) 5585 5586 when PC_Break_CS => 5587 Dout (Img (Node) & "matching Break", Node.CS); 5588 5589 while Cursor < Length loop 5590 if Is_In (Subject (Cursor + 1), Node.CS) then 5591 goto Succeed; 5592 else 5593 Cursor := Cursor + 1; 5594 end if; 5595 end loop; 5596 5597 goto Fail; 5598 5599 -- Break (string function case) 5600 5601 when PC_Break_VF => declare 5602 U : constant VString := Node.VF.all; 5603 S : Big_String_Access; 5604 L : Natural; 5605 5606 begin 5607 Get_String (U, S, L); 5608 Dout (Img (Node) & "matching Break", S (1 .. L)); 5609 5610 while Cursor < Length loop 5611 if Is_In (Subject (Cursor + 1), S (1 .. L)) then 5612 goto Succeed; 5613 else 5614 Cursor := Cursor + 1; 5615 end if; 5616 end loop; 5617 5618 goto Fail; 5619 end; 5620 5621 -- Break (string pointer case) 5622 5623 when PC_Break_VP => declare 5624 U : constant VString := Node.VP.all; 5625 S : Big_String_Access; 5626 L : Natural; 5627 5628 begin 5629 Get_String (U, S, L); 5630 Dout (Img (Node) & "matching Break", S (1 .. L)); 5631 5632 while Cursor < Length loop 5633 if Is_In (Subject (Cursor + 1), S (1 .. L)) then 5634 goto Succeed; 5635 else 5636 Cursor := Cursor + 1; 5637 end if; 5638 end loop; 5639 5640 goto Fail; 5641 end; 5642 5643 -- BreakX (one character case) 5644 5645 when PC_BreakX_CH => 5646 Dout (Img (Node) & "matching BreakX", Node.Char); 5647 5648 while Cursor < Length loop 5649 if Subject (Cursor + 1) = Node.Char then 5650 goto Succeed; 5651 else 5652 Cursor := Cursor + 1; 5653 end if; 5654 end loop; 5655 5656 goto Fail; 5657 5658 -- BreakX (character set case) 5659 5660 when PC_BreakX_CS => 5661 Dout (Img (Node) & "matching BreakX", Node.CS); 5662 5663 while Cursor < Length loop 5664 if Is_In (Subject (Cursor + 1), Node.CS) then 5665 goto Succeed; 5666 else 5667 Cursor := Cursor + 1; 5668 end if; 5669 end loop; 5670 5671 goto Fail; 5672 5673 -- BreakX (string function case) 5674 5675 when PC_BreakX_VF => declare 5676 U : constant VString := Node.VF.all; 5677 S : Big_String_Access; 5678 L : Natural; 5679 5680 begin 5681 Get_String (U, S, L); 5682 Dout (Img (Node) & "matching BreakX", S (1 .. L)); 5683 5684 while Cursor < Length loop 5685 if Is_In (Subject (Cursor + 1), S (1 .. L)) then 5686 goto Succeed; 5687 else 5688 Cursor := Cursor + 1; 5689 end if; 5690 end loop; 5691 5692 goto Fail; 5693 end; 5694 5695 -- BreakX (string pointer case) 5696 5697 when PC_BreakX_VP => declare 5698 U : constant VString := Node.VP.all; 5699 S : Big_String_Access; 5700 L : Natural; 5701 5702 begin 5703 Get_String (U, S, L); 5704 Dout (Img (Node) & "matching BreakX", S (1 .. L)); 5705 5706 while Cursor < Length loop 5707 if Is_In (Subject (Cursor + 1), S (1 .. L)) then 5708 goto Succeed; 5709 else 5710 Cursor := Cursor + 1; 5711 end if; 5712 end loop; 5713 5714 goto Fail; 5715 end; 5716 5717 -- BreakX_X (BreakX extension). See section on "Compound Pattern 5718 -- Structures". This node is the alternative that is stacked 5719 -- to skip past the break character and extend the break. 5720 5721 when PC_BreakX_X => 5722 Dout (Img (Node) & "extending BreakX"); 5723 Cursor := Cursor + 1; 5724 goto Succeed; 5725 5726 -- Character (one character string) 5727 5728 when PC_Char => 5729 Dout (Img (Node) & "matching '" & Node.Char & '''); 5730 5731 if Cursor < Length 5732 and then Subject (Cursor + 1) = Node.Char 5733 then 5734 Cursor := Cursor + 1; 5735 goto Succeed; 5736 else 5737 goto Fail; 5738 end if; 5739 5740 -- End of Pattern 5741 5742 when PC_EOP => 5743 if Stack_Base = Stack_Init then 5744 Dout ("end of pattern"); 5745 goto Match_Succeed; 5746 5747 -- End of recursive inner match. See separate section on 5748 -- handing of recursive pattern matches for details. 5749 5750 else 5751 Dout ("terminating recursive match"); 5752 Node := Stack (Stack_Base - 1).Node; 5753 Pop_Region; 5754 goto Match; 5755 end if; 5756 5757 -- Fail 5758 5759 when PC_Fail => 5760 Dout (Img (Node) & "matching Fail"); 5761 goto Fail; 5762 5763 -- Fence (built in pattern) 5764 5765 when PC_Fence => 5766 Dout (Img (Node) & "matching Fence"); 5767 Push (CP_Cancel'Access); 5768 goto Succeed; 5769 5770 -- Fence function node X. This is the node that gets control 5771 -- after a successful match of the fenced pattern. 5772 5773 when PC_Fence_X => 5774 Dout (Img (Node) & "matching Fence function"); 5775 Stack_Ptr := Stack_Ptr + 1; 5776 Stack (Stack_Ptr).Cursor := Stack_Base; 5777 Stack (Stack_Ptr).Node := CP_Fence_Y'Access; 5778 Stack_Base := Stack (Stack_Base).Cursor; 5779 Region_Level := Region_Level - 1; 5780 goto Succeed; 5781 5782 -- Fence function node Y. This is the node that gets control on 5783 -- a failure that occurs after the fenced pattern has matched. 5784 5785 -- Note: the Cursor at this stage is actually the inner stack 5786 -- base value. We don't reset this, but we do use it to strip 5787 -- off all the entries made by the fenced pattern. 5788 5789 when PC_Fence_Y => 5790 Dout (Img (Node) & "pattern matched by Fence caused failure"); 5791 Stack_Ptr := Cursor - 2; 5792 goto Fail; 5793 5794 -- Len (integer case) 5795 5796 when PC_Len_Nat => 5797 Dout (Img (Node) & "matching Len", Node.Nat); 5798 5799 if Cursor + Node.Nat > Length then 5800 goto Fail; 5801 else 5802 Cursor := Cursor + Node.Nat; 5803 goto Succeed; 5804 end if; 5805 5806 -- Len (Integer function case) 5807 5808 when PC_Len_NF => declare 5809 N : constant Natural := Node.NF.all; 5810 5811 begin 5812 Dout (Img (Node) & "matching Len", N); 5813 5814 if Cursor + N > Length then 5815 goto Fail; 5816 else 5817 Cursor := Cursor + N; 5818 goto Succeed; 5819 end if; 5820 end; 5821 5822 -- Len (integer pointer case) 5823 5824 when PC_Len_NP => 5825 Dout (Img (Node) & "matching Len", Node.NP.all); 5826 5827 if Cursor + Node.NP.all > Length then 5828 goto Fail; 5829 else 5830 Cursor := Cursor + Node.NP.all; 5831 goto Succeed; 5832 end if; 5833 5834 -- NotAny (one character case) 5835 5836 when PC_NotAny_CH => 5837 Dout (Img (Node) & "matching NotAny", Node.Char); 5838 5839 if Cursor < Length 5840 and then Subject (Cursor + 1) /= Node.Char 5841 then 5842 Cursor := Cursor + 1; 5843 goto Succeed; 5844 else 5845 goto Fail; 5846 end if; 5847 5848 -- NotAny (character set case) 5849 5850 when PC_NotAny_CS => 5851 Dout (Img (Node) & "matching NotAny", Node.CS); 5852 5853 if Cursor < Length 5854 and then not Is_In (Subject (Cursor + 1), Node.CS) 5855 then 5856 Cursor := Cursor + 1; 5857 goto Succeed; 5858 else 5859 goto Fail; 5860 end if; 5861 5862 -- NotAny (string function case) 5863 5864 when PC_NotAny_VF => declare 5865 U : constant VString := Node.VF.all; 5866 S : Big_String_Access; 5867 L : Natural; 5868 5869 begin 5870 Get_String (U, S, L); 5871 Dout (Img (Node) & "matching NotAny", S (1 .. L)); 5872 5873 if Cursor < Length 5874 and then 5875 not Is_In (Subject (Cursor + 1), S (1 .. L)) 5876 then 5877 Cursor := Cursor + 1; 5878 goto Succeed; 5879 else 5880 goto Fail; 5881 end if; 5882 end; 5883 5884 -- NotAny (string pointer case) 5885 5886 when PC_NotAny_VP => declare 5887 U : constant VString := Node.VP.all; 5888 S : Big_String_Access; 5889 L : Natural; 5890 5891 begin 5892 Get_String (U, S, L); 5893 Dout (Img (Node) & "matching NotAny", S (1 .. L)); 5894 5895 if Cursor < Length 5896 and then 5897 not Is_In (Subject (Cursor + 1), S (1 .. L)) 5898 then 5899 Cursor := Cursor + 1; 5900 goto Succeed; 5901 else 5902 goto Fail; 5903 end if; 5904 end; 5905 5906 -- NSpan (one character case) 5907 5908 when PC_NSpan_CH => 5909 Dout (Img (Node) & "matching NSpan", Node.Char); 5910 5911 while Cursor < Length 5912 and then Subject (Cursor + 1) = Node.Char 5913 loop 5914 Cursor := Cursor + 1; 5915 end loop; 5916 5917 goto Succeed; 5918 5919 -- NSpan (character set case) 5920 5921 when PC_NSpan_CS => 5922 Dout (Img (Node) & "matching NSpan", Node.CS); 5923 5924 while Cursor < Length 5925 and then Is_In (Subject (Cursor + 1), Node.CS) 5926 loop 5927 Cursor := Cursor + 1; 5928 end loop; 5929 5930 goto Succeed; 5931 5932 -- NSpan (string function case) 5933 5934 when PC_NSpan_VF => declare 5935 U : constant VString := Node.VF.all; 5936 S : Big_String_Access; 5937 L : Natural; 5938 5939 begin 5940 Get_String (U, S, L); 5941 Dout (Img (Node) & "matching NSpan", S (1 .. L)); 5942 5943 while Cursor < Length 5944 and then Is_In (Subject (Cursor + 1), S (1 .. L)) 5945 loop 5946 Cursor := Cursor + 1; 5947 end loop; 5948 5949 goto Succeed; 5950 end; 5951 5952 -- NSpan (string pointer case) 5953 5954 when PC_NSpan_VP => declare 5955 U : constant VString := Node.VP.all; 5956 S : Big_String_Access; 5957 L : Natural; 5958 5959 begin 5960 Get_String (U, S, L); 5961 Dout (Img (Node) & "matching NSpan", S (1 .. L)); 5962 5963 while Cursor < Length 5964 and then Is_In (Subject (Cursor + 1), S (1 .. L)) 5965 loop 5966 Cursor := Cursor + 1; 5967 end loop; 5968 5969 goto Succeed; 5970 end; 5971 5972 when PC_Null => 5973 Dout (Img (Node) & "matching null"); 5974 goto Succeed; 5975 5976 -- Pos (integer case) 5977 5978 when PC_Pos_Nat => 5979 Dout (Img (Node) & "matching Pos", Node.Nat); 5980 5981 if Cursor = Node.Nat then 5982 goto Succeed; 5983 else 5984 goto Fail; 5985 end if; 5986 5987 -- Pos (Integer function case) 5988 5989 when PC_Pos_NF => declare 5990 N : constant Natural := Node.NF.all; 5991 5992 begin 5993 Dout (Img (Node) & "matching Pos", N); 5994 5995 if Cursor = N then 5996 goto Succeed; 5997 else 5998 goto Fail; 5999 end if; 6000 end; 6001 6002 -- Pos (integer pointer case) 6003 6004 when PC_Pos_NP => 6005 Dout (Img (Node) & "matching Pos", Node.NP.all); 6006 6007 if Cursor = Node.NP.all then 6008 goto Succeed; 6009 else 6010 goto Fail; 6011 end if; 6012 6013 -- Predicate function 6014 6015 when PC_Pred_Func => 6016 Dout (Img (Node) & "matching predicate function"); 6017 6018 if Node.BF.all then 6019 goto Succeed; 6020 else 6021 goto Fail; 6022 end if; 6023 6024 -- Region Enter. Initiate new pattern history stack region 6025 6026 when PC_R_Enter => 6027 Dout (Img (Node) & "starting match of nested pattern"); 6028 Stack (Stack_Ptr + 1).Cursor := Cursor; 6029 Push_Region; 6030 goto Succeed; 6031 6032 -- Region Remove node. This is the node stacked by an R_Enter. 6033 -- It removes the special format stack entry right underneath, and 6034 -- then restores the outer level stack base and signals failure. 6035 6036 -- Note: the cursor value at this stage is actually the (negative) 6037 -- stack base value for the outer level. 6038 6039 when PC_R_Remove => 6040 Dout ("failure, match of nested pattern terminated"); 6041 Stack_Base := Cursor; 6042 Region_Level := Region_Level - 1; 6043 Stack_Ptr := Stack_Ptr - 1; 6044 goto Fail; 6045 6046 -- Region restore node. This is the node stacked at the end of an 6047 -- inner level match. Its function is to restore the inner level 6048 -- region, so that alternatives in this region can be sought. 6049 6050 -- Note: the Cursor at this stage is actually the negative of the 6051 -- inner stack base value, which we use to restore the inner region. 6052 6053 when PC_R_Restore => 6054 Dout ("failure, search for alternatives in nested pattern"); 6055 Region_Level := Region_Level + 1; 6056 Stack_Base := Cursor; 6057 goto Fail; 6058 6059 -- Rest 6060 6061 when PC_Rest => 6062 Dout (Img (Node) & "matching Rest"); 6063 Cursor := Length; 6064 goto Succeed; 6065 6066 -- Initiate recursive match (pattern pointer case) 6067 6068 when PC_Rpat => 6069 Stack (Stack_Ptr + 1).Node := Node.Pthen; 6070 Push_Region; 6071 Dout (Img (Node) & "initiating recursive match"); 6072 6073 if Stack_Ptr + Node.PP.all.Stk >= Stack_Size then 6074 raise Pattern_Stack_Overflow; 6075 else 6076 Node := Node.PP.all.P; 6077 goto Match; 6078 end if; 6079 6080 -- RPos (integer case) 6081 6082 when PC_RPos_Nat => 6083 Dout (Img (Node) & "matching RPos", Node.Nat); 6084 6085 if Cursor = (Length - Node.Nat) then 6086 goto Succeed; 6087 else 6088 goto Fail; 6089 end if; 6090 6091 -- RPos (integer function case) 6092 6093 when PC_RPos_NF => declare 6094 N : constant Natural := Node.NF.all; 6095 6096 begin 6097 Dout (Img (Node) & "matching RPos", N); 6098 6099 if Length - Cursor = N then 6100 goto Succeed; 6101 else 6102 goto Fail; 6103 end if; 6104 end; 6105 6106 -- RPos (integer pointer case) 6107 6108 when PC_RPos_NP => 6109 Dout (Img (Node) & "matching RPos", Node.NP.all); 6110 6111 if Cursor = (Length - Node.NP.all) then 6112 goto Succeed; 6113 else 6114 goto Fail; 6115 end if; 6116 6117 -- RTab (integer case) 6118 6119 when PC_RTab_Nat => 6120 Dout (Img (Node) & "matching RTab", Node.Nat); 6121 6122 if Cursor <= (Length - Node.Nat) then 6123 Cursor := Length - Node.Nat; 6124 goto Succeed; 6125 else 6126 goto Fail; 6127 end if; 6128 6129 -- RTab (integer function case) 6130 6131 when PC_RTab_NF => declare 6132 N : constant Natural := Node.NF.all; 6133 6134 begin 6135 Dout (Img (Node) & "matching RPos", N); 6136 6137 if Length - Cursor >= N then 6138 Cursor := Length - N; 6139 goto Succeed; 6140 else 6141 goto Fail; 6142 end if; 6143 end; 6144 6145 -- RTab (integer pointer case) 6146 6147 when PC_RTab_NP => 6148 Dout (Img (Node) & "matching RPos", Node.NP.all); 6149 6150 if Cursor <= (Length - Node.NP.all) then 6151 Cursor := Length - Node.NP.all; 6152 goto Succeed; 6153 else 6154 goto Fail; 6155 end if; 6156 6157 -- Cursor assignment 6158 6159 when PC_Setcur => 6160 Dout (Img (Node) & "matching Setcur"); 6161 Node.Var.all := Cursor; 6162 goto Succeed; 6163 6164 -- Span (one character case) 6165 6166 when PC_Span_CH => declare 6167 P : Natural := Cursor; 6168 6169 begin 6170 Dout (Img (Node) & "matching Span", Node.Char); 6171 6172 while P < Length 6173 and then Subject (P + 1) = Node.Char 6174 loop 6175 P := P + 1; 6176 end loop; 6177 6178 if P /= Cursor then 6179 Cursor := P; 6180 goto Succeed; 6181 else 6182 goto Fail; 6183 end if; 6184 end; 6185 6186 -- Span (character set case) 6187 6188 when PC_Span_CS => declare 6189 P : Natural := Cursor; 6190 6191 begin 6192 Dout (Img (Node) & "matching Span", Node.CS); 6193 6194 while P < Length 6195 and then Is_In (Subject (P + 1), Node.CS) 6196 loop 6197 P := P + 1; 6198 end loop; 6199 6200 if P /= Cursor then 6201 Cursor := P; 6202 goto Succeed; 6203 else 6204 goto Fail; 6205 end if; 6206 end; 6207 6208 -- Span (string function case) 6209 6210 when PC_Span_VF => declare 6211 U : constant VString := Node.VF.all; 6212 S : Big_String_Access; 6213 L : Natural; 6214 P : Natural; 6215 6216 begin 6217 Get_String (U, S, L); 6218 Dout (Img (Node) & "matching Span", S (1 .. L)); 6219 6220 P := Cursor; 6221 while P < Length 6222 and then Is_In (Subject (P + 1), S (1 .. L)) 6223 loop 6224 P := P + 1; 6225 end loop; 6226 6227 if P /= Cursor then 6228 Cursor := P; 6229 goto Succeed; 6230 else 6231 goto Fail; 6232 end if; 6233 end; 6234 6235 -- Span (string pointer case) 6236 6237 when PC_Span_VP => declare 6238 U : constant VString := Node.VP.all; 6239 S : Big_String_Access; 6240 L : Natural; 6241 P : Natural; 6242 6243 begin 6244 Get_String (U, S, L); 6245 Dout (Img (Node) & "matching Span", S (1 .. L)); 6246 6247 P := Cursor; 6248 while P < Length 6249 and then Is_In (Subject (P + 1), S (1 .. L)) 6250 loop 6251 P := P + 1; 6252 end loop; 6253 6254 if P /= Cursor then 6255 Cursor := P; 6256 goto Succeed; 6257 else 6258 goto Fail; 6259 end if; 6260 end; 6261 6262 -- String (two character case) 6263 6264 when PC_String_2 => 6265 Dout (Img (Node) & "matching " & Image (Node.Str2)); 6266 6267 if (Length - Cursor) >= 2 6268 and then Subject (Cursor + 1 .. Cursor + 2) = Node.Str2 6269 then 6270 Cursor := Cursor + 2; 6271 goto Succeed; 6272 else 6273 goto Fail; 6274 end if; 6275 6276 -- String (three character case) 6277 6278 when PC_String_3 => 6279 Dout (Img (Node) & "matching " & Image (Node.Str3)); 6280 6281 if (Length - Cursor) >= 3 6282 and then Subject (Cursor + 1 .. Cursor + 3) = Node.Str3 6283 then 6284 Cursor := Cursor + 3; 6285 goto Succeed; 6286 else 6287 goto Fail; 6288 end if; 6289 6290 -- String (four character case) 6291 6292 when PC_String_4 => 6293 Dout (Img (Node) & "matching " & Image (Node.Str4)); 6294 6295 if (Length - Cursor) >= 4 6296 and then Subject (Cursor + 1 .. Cursor + 4) = Node.Str4 6297 then 6298 Cursor := Cursor + 4; 6299 goto Succeed; 6300 else 6301 goto Fail; 6302 end if; 6303 6304 -- String (five character case) 6305 6306 when PC_String_5 => 6307 Dout (Img (Node) & "matching " & Image (Node.Str5)); 6308 6309 if (Length - Cursor) >= 5 6310 and then Subject (Cursor + 1 .. Cursor + 5) = Node.Str5 6311 then 6312 Cursor := Cursor + 5; 6313 goto Succeed; 6314 else 6315 goto Fail; 6316 end if; 6317 6318 -- String (six character case) 6319 6320 when PC_String_6 => 6321 Dout (Img (Node) & "matching " & Image (Node.Str6)); 6322 6323 if (Length - Cursor) >= 6 6324 and then Subject (Cursor + 1 .. Cursor + 6) = Node.Str6 6325 then 6326 Cursor := Cursor + 6; 6327 goto Succeed; 6328 else 6329 goto Fail; 6330 end if; 6331 6332 -- String (case of more than six characters) 6333 6334 when PC_String => declare 6335 Len : constant Natural := Node.Str'Length; 6336 6337 begin 6338 Dout (Img (Node) & "matching " & Image (Node.Str.all)); 6339 6340 if (Length - Cursor) >= Len 6341 and then Node.Str.all = Subject (Cursor + 1 .. Cursor + Len) 6342 then 6343 Cursor := Cursor + Len; 6344 goto Succeed; 6345 else 6346 goto Fail; 6347 end if; 6348 end; 6349 6350 -- String (function case) 6351 6352 when PC_String_VF => declare 6353 U : constant VString := Node.VF.all; 6354 S : Big_String_Access; 6355 L : Natural; 6356 6357 begin 6358 Get_String (U, S, L); 6359 Dout (Img (Node) & "matching " & Image (S (1 .. L))); 6360 6361 if (Length - Cursor) >= L 6362 and then S (1 .. L) = Subject (Cursor + 1 .. Cursor + L) 6363 then 6364 Cursor := Cursor + L; 6365 goto Succeed; 6366 else 6367 goto Fail; 6368 end if; 6369 end; 6370 6371 -- String (vstring pointer case) 6372 6373 when PC_String_VP => declare 6374 U : constant VString := Node.VP.all; 6375 S : Big_String_Access; 6376 L : Natural; 6377 6378 begin 6379 Get_String (U, S, L); 6380 Dout (Img (Node) & "matching " & Image (S (1 .. L))); 6381 6382 if (Length - Cursor) >= L 6383 and then S (1 .. L) = Subject (Cursor + 1 .. Cursor + L) 6384 then 6385 Cursor := Cursor + L; 6386 goto Succeed; 6387 else 6388 goto Fail; 6389 end if; 6390 end; 6391 6392 -- Succeed 6393 6394 when PC_Succeed => 6395 Dout (Img (Node) & "matching Succeed"); 6396 Push (Node); 6397 goto Succeed; 6398 6399 -- Tab (integer case) 6400 6401 when PC_Tab_Nat => 6402 Dout (Img (Node) & "matching Tab", Node.Nat); 6403 6404 if Cursor <= Node.Nat then 6405 Cursor := Node.Nat; 6406 goto Succeed; 6407 else 6408 goto Fail; 6409 end if; 6410 6411 -- Tab (integer function case) 6412 6413 when PC_Tab_NF => declare 6414 N : constant Natural := Node.NF.all; 6415 6416 begin 6417 Dout (Img (Node) & "matching Tab ", N); 6418 6419 if Cursor <= N then 6420 Cursor := N; 6421 goto Succeed; 6422 else 6423 goto Fail; 6424 end if; 6425 end; 6426 6427 -- Tab (integer pointer case) 6428 6429 when PC_Tab_NP => 6430 Dout (Img (Node) & "matching Tab ", Node.NP.all); 6431 6432 if Cursor <= Node.NP.all then 6433 Cursor := Node.NP.all; 6434 goto Succeed; 6435 else 6436 goto Fail; 6437 end if; 6438 6439 -- Unanchored movement 6440 6441 when PC_Unanchored => 6442 Dout ("attempting to move anchor point"); 6443 6444 -- All done if we tried every position 6445 6446 if Cursor > Length then 6447 goto Match_Fail; 6448 6449 -- Otherwise extend the anchor point, and restack ourself 6450 6451 else 6452 Cursor := Cursor + 1; 6453 Push (Node); 6454 goto Succeed; 6455 end if; 6456 6457 -- Write immediate. This node performs the actual write 6458 6459 when PC_Write_Imm => 6460 Dout (Img (Node) & "executing immediate write of " & 6461 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor)); 6462 6463 Put_Line 6464 (Node.FP.all, 6465 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor)); 6466 Pop_Region; 6467 goto Succeed; 6468 6469 -- Write on match. This node sets up for the eventual write 6470 6471 when PC_Write_OnM => 6472 Dout (Img (Node) & "registering deferred write"); 6473 Stack (Stack_Base - 1).Node := Node; 6474 Push (CP_Assign'Access); 6475 Pop_Region; 6476 Assign_OnM := True; 6477 goto Succeed; 6478 end case; 6479 6480 -- We are NOT allowed to fall though this case statement, since every 6481 -- match routine must end by executing a goto to the appropriate point 6482 -- in the finite state machine model. 6483 6484 pragma Warnings (Off); 6485 Logic_Error; 6486 pragma Warnings (On); 6487 end XMatchD; 6488 6489end GNAT.Spitbol.Patterns; 6490