1------------------------------------------------------------------------------ 2-- -- 3-- GNAT LIBRARY COMPONENTS -- 4-- -- 5-- G N A T . S P I T B O L . P A T T E R N S -- 6-- -- 7-- B o d y -- 8-- -- 9-- Copyright (C) 1998-2013, AdaCore -- 10-- -- 11-- GNAT is free software; you can redistribute it and/or modify it under -- 12-- terms of the GNU General Public License as published by the Free Soft- -- 13-- ware Foundation; either version 3, or (at your option) any later ver- -- 14-- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- 15-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- 16-- or FITNESS FOR A PARTICULAR PURPOSE. -- 17-- -- 18-- As a special exception under Section 7 of GPL version 3, you are granted -- 19-- additional permissions described in the GCC Runtime Library Exception, -- 20-- version 3.1, as published by the Free Software Foundation. -- 21-- -- 22-- You should have received a copy of the GNU General Public License and -- 23-- a copy of the GCC Runtime Library Exception along with this program; -- 24-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- 25-- <http://www.gnu.org/licenses/>. -- 26-- -- 27-- GNAT was originally developed by the GNAT team at New York University. -- 28-- Extensive contributions were provided by Ada Core Technologies Inc. -- 29-- -- 30------------------------------------------------------------------------------ 31 32-- Note: the data structures and general approach used in this implementation 33-- are derived from the original MINIMAL sources for SPITBOL. The code is not 34-- a direct translation, but the approach is followed closely. In particular, 35-- we use the one stack approach developed in the SPITBOL implementation. 36 37with Ada.Strings.Unbounded.Aux; use Ada.Strings.Unbounded.Aux; 38 39with GNAT.Debug_Utilities; use GNAT.Debug_Utilities; 40 41with System; use System; 42 43with Ada.Unchecked_Conversion; 44with Ada.Unchecked_Deallocation; 45 46package body GNAT.Spitbol.Patterns is 47 48 ------------------------ 49 -- Internal Debugging -- 50 ------------------------ 51 52 Internal_Debug : constant Boolean := False; 53 -- Set this flag to True to activate some built-in debugging traceback 54 -- These are all lines output with PutD and Put_LineD. 55 56 procedure New_LineD; 57 pragma Inline (New_LineD); 58 -- Output new blank line with New_Line if Internal_Debug is True 59 60 procedure PutD (Str : String); 61 pragma Inline (PutD); 62 -- Output string with Put if Internal_Debug is True 63 64 procedure Put_LineD (Str : String); 65 pragma Inline (Put_LineD); 66 -- Output string with Put_Line if Internal_Debug is True 67 68 ----------------------------- 69 -- Local Type Declarations -- 70 ----------------------------- 71 72 subtype String_Ptr is Ada.Strings.Unbounded.String_Access; 73 subtype File_Ptr is Ada.Text_IO.File_Access; 74 75 function To_Address is new Ada.Unchecked_Conversion (PE_Ptr, Address); 76 -- Used only for debugging output purposes 77 78 subtype AFC is Ada.Finalization.Controlled; 79 80 N : constant PE_Ptr := null; 81 -- Shorthand used to initialize Copy fields to null 82 83 type Natural_Ptr is access all Natural; 84 type Pattern_Ptr is access all Pattern; 85 86 -------------------------------------------------- 87 -- Description of Algorithm and Data Structures -- 88 -------------------------------------------------- 89 90 -- A pattern structure is represented as a linked graph of nodes 91 -- with the following structure: 92 93 -- +------------------------------------+ 94 -- I Pcode I 95 -- +------------------------------------+ 96 -- I Index I 97 -- +------------------------------------+ 98 -- I Pthen I 99 -- +------------------------------------+ 100 -- I parameter(s) I 101 -- +------------------------------------+ 102 103 -- Pcode is a code value indicating the type of the pattern node. This 104 -- code is used both as the discriminant value for the record, and as 105 -- the case index in the main match routine that branches to the proper 106 -- match code for the given element. 107 108 -- Index is a serial index number. The use of these serial index 109 -- numbers is described in a separate section. 110 111 -- Pthen is a pointer to the successor node, i.e the node to be matched 112 -- if the attempt to match the node succeeds. If this is the last node 113 -- of the pattern to be matched, then Pthen points to a dummy node 114 -- of kind PC_EOP (end of pattern), which initializes pattern exit. 115 116 -- The parameter or parameters are present for certain node types, 117 -- and the type varies with the pattern code. 118 119 type Pattern_Code is ( 120 PC_Arb_Y, 121 PC_Assign, 122 PC_Bal, 123 PC_BreakX_X, 124 PC_Cancel, 125 PC_EOP, 126 PC_Fail, 127 PC_Fence, 128 PC_Fence_X, 129 PC_Fence_Y, 130 PC_R_Enter, 131 PC_R_Remove, 132 PC_R_Restore, 133 PC_Rest, 134 PC_Succeed, 135 PC_Unanchored, 136 137 PC_Alt, 138 PC_Arb_X, 139 PC_Arbno_S, 140 PC_Arbno_X, 141 142 PC_Rpat, 143 144 PC_Pred_Func, 145 146 PC_Assign_Imm, 147 PC_Assign_OnM, 148 PC_Any_VP, 149 PC_Break_VP, 150 PC_BreakX_VP, 151 PC_NotAny_VP, 152 PC_NSpan_VP, 153 PC_Span_VP, 154 PC_String_VP, 155 156 PC_Write_Imm, 157 PC_Write_OnM, 158 159 PC_Null, 160 PC_String, 161 162 PC_String_2, 163 PC_String_3, 164 PC_String_4, 165 PC_String_5, 166 PC_String_6, 167 168 PC_Setcur, 169 170 PC_Any_CH, 171 PC_Break_CH, 172 PC_BreakX_CH, 173 PC_Char, 174 PC_NotAny_CH, 175 PC_NSpan_CH, 176 PC_Span_CH, 177 178 PC_Any_CS, 179 PC_Break_CS, 180 PC_BreakX_CS, 181 PC_NotAny_CS, 182 PC_NSpan_CS, 183 PC_Span_CS, 184 185 PC_Arbno_Y, 186 PC_Len_Nat, 187 PC_Pos_Nat, 188 PC_RPos_Nat, 189 PC_RTab_Nat, 190 PC_Tab_Nat, 191 192 PC_Pos_NF, 193 PC_Len_NF, 194 PC_RPos_NF, 195 PC_RTab_NF, 196 PC_Tab_NF, 197 198 PC_Pos_NP, 199 PC_Len_NP, 200 PC_RPos_NP, 201 PC_RTab_NP, 202 PC_Tab_NP, 203 204 PC_Any_VF, 205 PC_Break_VF, 206 PC_BreakX_VF, 207 PC_NotAny_VF, 208 PC_NSpan_VF, 209 PC_Span_VF, 210 PC_String_VF); 211 212 type IndexT is range 0 .. +(2 **15 - 1); 213 214 type PE (Pcode : Pattern_Code) is record 215 216 Index : IndexT; 217 -- Serial index number of pattern element within pattern 218 219 Pthen : PE_Ptr; 220 -- Successor element, to be matched after this one 221 222 case Pcode is 223 224 when PC_Arb_Y | 225 PC_Assign | 226 PC_Bal | 227 PC_BreakX_X | 228 PC_Cancel | 229 PC_EOP | 230 PC_Fail | 231 PC_Fence | 232 PC_Fence_X | 233 PC_Fence_Y | 234 PC_Null | 235 PC_R_Enter | 236 PC_R_Remove | 237 PC_R_Restore | 238 PC_Rest | 239 PC_Succeed | 240 PC_Unanchored => null; 241 242 when PC_Alt | 243 PC_Arb_X | 244 PC_Arbno_S | 245 PC_Arbno_X => Alt : PE_Ptr; 246 247 when PC_Rpat => PP : Pattern_Ptr; 248 249 when PC_Pred_Func => BF : Boolean_Func; 250 251 when PC_Assign_Imm | 252 PC_Assign_OnM | 253 PC_Any_VP | 254 PC_Break_VP | 255 PC_BreakX_VP | 256 PC_NotAny_VP | 257 PC_NSpan_VP | 258 PC_Span_VP | 259 PC_String_VP => VP : VString_Ptr; 260 261 when PC_Write_Imm | 262 PC_Write_OnM => FP : File_Ptr; 263 264 when PC_String => Str : String_Ptr; 265 266 when PC_String_2 => Str2 : String (1 .. 2); 267 268 when PC_String_3 => Str3 : String (1 .. 3); 269 270 when PC_String_4 => Str4 : String (1 .. 4); 271 272 when PC_String_5 => Str5 : String (1 .. 5); 273 274 when PC_String_6 => Str6 : String (1 .. 6); 275 276 when PC_Setcur => Var : Natural_Ptr; 277 278 when PC_Any_CH | 279 PC_Break_CH | 280 PC_BreakX_CH | 281 PC_Char | 282 PC_NotAny_CH | 283 PC_NSpan_CH | 284 PC_Span_CH => Char : Character; 285 286 when PC_Any_CS | 287 PC_Break_CS | 288 PC_BreakX_CS | 289 PC_NotAny_CS | 290 PC_NSpan_CS | 291 PC_Span_CS => CS : Character_Set; 292 293 when PC_Arbno_Y | 294 PC_Len_Nat | 295 PC_Pos_Nat | 296 PC_RPos_Nat | 297 PC_RTab_Nat | 298 PC_Tab_Nat => Nat : Natural; 299 300 when PC_Pos_NF | 301 PC_Len_NF | 302 PC_RPos_NF | 303 PC_RTab_NF | 304 PC_Tab_NF => NF : Natural_Func; 305 306 when PC_Pos_NP | 307 PC_Len_NP | 308 PC_RPos_NP | 309 PC_RTab_NP | 310 PC_Tab_NP => NP : Natural_Ptr; 311 312 when PC_Any_VF | 313 PC_Break_VF | 314 PC_BreakX_VF | 315 PC_NotAny_VF | 316 PC_NSpan_VF | 317 PC_Span_VF | 318 PC_String_VF => VF : VString_Func; 319 320 end case; 321 end record; 322 323 subtype PC_Has_Alt is Pattern_Code range PC_Alt .. PC_Arbno_X; 324 -- Range of pattern codes that has an Alt field. This is used in the 325 -- recursive traversals, since these links must be followed. 326 327 EOP_Element : aliased constant PE := (PC_EOP, 0, N); 328 -- This is the end of pattern element, and is thus the representation of 329 -- a null pattern. It has a zero index element since it is never placed 330 -- inside a pattern. Furthermore it does not need a successor, since it 331 -- marks the end of the pattern, so that no more successors are needed. 332 333 EOP : constant PE_Ptr := EOP_Element'Unrestricted_Access; 334 -- This is the end of pattern pointer, that is used in the Pthen pointer 335 -- of other nodes to signal end of pattern. 336 337 -- The following array is used to determine if a pattern used as an 338 -- argument for Arbno is eligible for treatment using the simple Arbno 339 -- structure (i.e. it is a pattern that is guaranteed to match at least 340 -- one character on success, and not to make any entries on the stack. 341 342 OK_For_Simple_Arbno : constant array (Pattern_Code) of Boolean := 343 (PC_Any_CS | 344 PC_Any_CH | 345 PC_Any_VF | 346 PC_Any_VP | 347 PC_Char | 348 PC_Len_Nat | 349 PC_NotAny_CS | 350 PC_NotAny_CH | 351 PC_NotAny_VF | 352 PC_NotAny_VP | 353 PC_Span_CS | 354 PC_Span_CH | 355 PC_Span_VF | 356 PC_Span_VP | 357 PC_String | 358 PC_String_2 | 359 PC_String_3 | 360 PC_String_4 | 361 PC_String_5 | 362 PC_String_6 => True, 363 others => False); 364 365 ------------------------------- 366 -- The Pattern History Stack -- 367 ------------------------------- 368 369 -- The pattern history stack is used for controlling backtracking when 370 -- a match fails. The idea is to stack entries that give a cursor value 371 -- to be restored, and a node to be reestablished as the current node to 372 -- attempt an appropriate rematch operation. The processing for a pattern 373 -- element that has rematch alternatives pushes an appropriate entry or 374 -- entry on to the stack, and the proceeds. If a match fails at any point, 375 -- the top element of the stack is popped off, resetting the cursor and 376 -- the match continues by accessing the node stored with this entry. 377 378 type Stack_Entry is record 379 380 Cursor : Integer; 381 -- Saved cursor value that is restored when this entry is popped 382 -- from the stack if a match attempt fails. Occasionally, this 383 -- field is used to store a history stack pointer instead of a 384 -- cursor. Such cases are noted in the documentation and the value 385 -- stored is negative since stack pointer values are always negative. 386 387 Node : PE_Ptr; 388 -- This pattern element reference is reestablished as the current 389 -- Node to be matched (which will attempt an appropriate rematch). 390 391 end record; 392 393 subtype Stack_Range is Integer range -Stack_Size .. -1; 394 395 type Stack_Type is array (Stack_Range) of Stack_Entry; 396 -- The type used for a history stack. The actual instance of the stack 397 -- is declared as a local variable in the Match routine, to properly 398 -- handle recursive calls to Match. All stack pointer values are negative 399 -- to distinguish them from normal cursor values. 400 401 -- Note: the pattern matching stack is used only to handle backtracking. 402 -- If no backtracking occurs, its entries are never accessed, and never 403 -- popped off, and in particular it is normal for a successful match 404 -- to terminate with entries on the stack that are simply discarded. 405 406 -- Note: in subsequent diagrams of the stack, we always place element 407 -- zero (the deepest element) at the top of the page, then build the 408 -- stack down on the page with the most recent (top of stack) element 409 -- being the bottom-most entry on the page. 410 411 -- Stack checking is handled by labeling every pattern with the maximum 412 -- number of stack entries that are required, so a single check at the 413 -- start of matching the pattern suffices. There are two exceptions. 414 415 -- First, the count does not include entries for recursive pattern 416 -- references. Such recursions must therefore perform a specific 417 -- stack check with respect to the number of stack entries required 418 -- by the recursive pattern that is accessed and the amount of stack 419 -- that remains unused. 420 421 -- Second, the count includes only one iteration of an Arbno pattern, 422 -- so a specific check must be made on subsequent iterations that there 423 -- is still enough stack space left. The Arbno node has a field that 424 -- records the number of stack entries required by its argument for 425 -- this purpose. 426 427 --------------------------------------------------- 428 -- Use of Serial Index Field in Pattern Elements -- 429 --------------------------------------------------- 430 431 -- The serial index numbers for the pattern elements are assigned as 432 -- a pattern is constructed from its constituent elements. Note that there 433 -- is never any sharing of pattern elements between patterns (copies are 434 -- always made), so the serial index numbers are unique to a particular 435 -- pattern as referenced from the P field of a value of type Pattern. 436 437 -- The index numbers meet three separate invariants, which are used for 438 -- various purposes as described in this section. 439 440 -- First, the numbers uniquely identify the pattern elements within a 441 -- pattern. If Num is the number of elements in a given pattern, then 442 -- the serial index numbers for the elements of this pattern will range 443 -- from 1 .. Num, so that each element has a separate value. 444 445 -- The purpose of this assignment is to provide a convenient auxiliary 446 -- data structure mechanism during operations which must traverse a 447 -- pattern (e.g. copy and finalization processing). Once constructed 448 -- patterns are strictly read only. This is necessary to allow sharing 449 -- of patterns between tasks. This means that we cannot go marking the 450 -- pattern (e.g. with a visited bit). Instead we construct a separate 451 -- vector that contains the necessary information indexed by the Index 452 -- values in the pattern elements. For this purpose the only requirement 453 -- is that they be uniquely assigned. 454 455 -- Second, the pattern element referenced directly, i.e. the leading 456 -- pattern element, is always the maximum numbered element and therefore 457 -- indicates the total number of elements in the pattern. More precisely, 458 -- the element referenced by the P field of a pattern value, or the 459 -- element returned by any of the internal pattern construction routines 460 -- in the body (that return a value of type PE_Ptr) always is this 461 -- maximum element, 462 463 -- The purpose of this requirement is to allow an immediate determination 464 -- of the number of pattern elements within a pattern. This is used to 465 -- properly size the vectors used to contain auxiliary information for 466 -- traversal as described above. 467 468 -- Third, as compound pattern structures are constructed, the way in which 469 -- constituent parts of the pattern are constructed is stylized. This is 470 -- an automatic consequence of the way that these compound structures 471 -- are constructed, and basically what we are doing is simply documenting 472 -- and specifying the natural result of the pattern construction. The 473 -- section describing compound pattern structures gives details of the 474 -- numbering of each compound pattern structure. 475 476 -- The purpose of specifying the stylized numbering structures for the 477 -- compound patterns is to help simplify the processing in the Image 478 -- function, since it eases the task of retrieving the original recursive 479 -- structure of the pattern from the flat graph structure of elements. 480 -- This use in the Image function is the only point at which the code 481 -- makes use of the stylized structures. 482 483 type Ref_Array is array (IndexT range <>) of PE_Ptr; 484 -- This type is used to build an array whose N'th entry references the 485 -- element in a pattern whose Index value is N. See Build_Ref_Array. 486 487 procedure Build_Ref_Array (E : PE_Ptr; RA : out Ref_Array); 488 -- Given a pattern element which is the leading element of a pattern 489 -- structure, and a Ref_Array with bounds 1 .. E.Index, fills in the 490 -- Ref_Array so that its N'th entry references the element of the 491 -- referenced pattern whose Index value is N. 492 493 ------------------------------- 494 -- Recursive Pattern Matches -- 495 ------------------------------- 496 497 -- The pattern primitive (+P) where P is a Pattern_Ptr or Pattern_Func 498 -- causes a recursive pattern match. This cannot be handled by an actual 499 -- recursive call to the outer level Match routine, since this would not 500 -- allow for possible backtracking into the region matched by the inner 501 -- pattern. Indeed this is the classical clash between recursion and 502 -- backtracking, and a simple recursive stack structure does not suffice. 503 504 -- This section describes how this recursion and the possible associated 505 -- backtracking is handled. We still use a single stack, but we establish 506 -- the concept of nested regions on this stack, each of which has a stack 507 -- base value pointing to the deepest stack entry of the region. The base 508 -- value for the outer level is zero. 509 510 -- When a recursive match is established, two special stack entries are 511 -- made. The first entry is used to save the original node that starts 512 -- the recursive match. This is saved so that the successor field of 513 -- this node is accessible at the end of the match, but it is never 514 -- popped and executed. 515 516 -- The second entry corresponds to a standard new region action. A 517 -- PC_R_Remove node is stacked, whose cursor field is used to store 518 -- the outer stack base, and the stack base is reset to point to 519 -- this PC_R_Remove node. Then the recursive pattern is matched and 520 -- it can make history stack entries in the normal matter, so now 521 -- the stack looks like: 522 523 -- (stack entries made by outer level) 524 525 -- (Special entry, node is (+P) successor 526 -- cursor entry is not used) 527 528 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack base 529 -- saved base value for the enclosing region) 530 531 -- (stack entries made by inner level) 532 533 -- If a subsequent failure occurs and pops the PC_R_Remove node, it 534 -- removes itself and the special entry immediately underneath it, 535 -- restores the stack base value for the enclosing region, and then 536 -- again signals failure to look for alternatives that were stacked 537 -- before the recursion was initiated. 538 539 -- Now we need to consider what happens if the inner pattern succeeds, as 540 -- signalled by accessing the special PC_EOP pattern primitive. First we 541 -- recognize the nested case by looking at the Base value. If this Base 542 -- value is Stack'First, then the entire match has succeeded, but if the 543 -- base value is greater than Stack'First, then we have successfully 544 -- matched an inner pattern, and processing continues at the outer level. 545 546 -- There are two cases. The simple case is when the inner pattern has made 547 -- no stack entries, as recognized by the fact that the current stack 548 -- pointer is equal to the current base value. In this case it is fine to 549 -- remove all trace of the recursion by restoring the outer base value and 550 -- using the special entry to find the appropriate successor node. 551 552 -- The more complex case arises when the inner match does make stack 553 -- entries. In this case, the PC_EOP processing stacks a special entry 554 -- whose cursor value saves the saved inner base value (the one that 555 -- references the corresponding PC_R_Remove value), and whose node 556 -- pointer references a PC_R_Restore node, so the stack looks like: 557 558 -- (stack entries made by outer level) 559 560 -- (Special entry, node is (+P) successor, 561 -- cursor entry is not used) 562 563 -- (PC_R_Remove entry, "cursor" value is (negative) 564 -- saved base value for the enclosing region) 565 566 -- (stack entries made by inner level) 567 568 -- (PC_Region_Replace entry, "cursor" value is (negative) 569 -- stack pointer value referencing the PC_R_Remove entry). 570 571 -- If the entire match succeeds, then these stack entries are, as usual, 572 -- ignored and abandoned. If on the other hand a subsequent failure 573 -- causes the PC_Region_Replace entry to be popped, it restores the 574 -- inner base value from its saved "cursor" value and then fails again. 575 -- Note that it is OK that the cursor is temporarily clobbered by this 576 -- pop, since the second failure will reestablish a proper cursor value. 577 578 --------------------------------- 579 -- Compound Pattern Structures -- 580 --------------------------------- 581 582 -- This section discusses the compound structures used to represent 583 -- constructed patterns. It shows the graph structures of pattern 584 -- elements that are constructed, and in the case of patterns that 585 -- provide backtracking possibilities, describes how the history 586 -- stack is used to control the backtracking. Finally, it notes the 587 -- way in which the Index numbers are assigned to the structure. 588 589 -- In all diagrams, solid lines (built with minus signs or vertical 590 -- bars, represent successor pointers (Pthen fields) with > or V used 591 -- to indicate the direction of the pointer. The initial node of the 592 -- structure is in the upper left of the diagram. A dotted line is an 593 -- alternative pointer from the element above it to the element below 594 -- it. See individual sections for details on how alternatives are used. 595 596 ------------------- 597 -- Concatenation -- 598 ------------------- 599 600 -- In the pattern structures listed in this section, a line that looks 601 -- like ----> with nothing to the right indicates an end of pattern 602 -- (EOP) pointer that represents the end of the match. 603 604 -- When a pattern concatenation (L & R) occurs, the resulting structure 605 -- is obtained by finding all such EOP pointers in L, and replacing 606 -- them to point to R. This is the most important flattening that 607 -- occurs in constructing a pattern, and it means that the pattern 608 -- matching circuitry does not have to keep track of the structure 609 -- of a pattern with respect to concatenation, since the appropriate 610 -- successor is always at hand. 611 612 -- Concatenation itself generates no additional possibilities for 613 -- backtracking, but the constituent patterns of the concatenated 614 -- structure will make stack entries as usual. The maximum amount 615 -- of stack required by the structure is thus simply the sum of the 616 -- maximums required by L and R. 617 618 -- The index numbering of a concatenation structure works by leaving 619 -- the numbering of the right hand pattern, R, unchanged and adjusting 620 -- the numbers in the left hand pattern, L up by the count of elements 621 -- in R. This ensures that the maximum numbered element is the leading 622 -- element as required (given that it was the leading element in L). 623 624 ----------------- 625 -- Alternation -- 626 ----------------- 627 628 -- A pattern (L or R) constructs the structure: 629 630 -- +---+ +---+ 631 -- | A |---->| L |----> 632 -- +---+ +---+ 633 -- . 634 -- . 635 -- +---+ 636 -- | R |----> 637 -- +---+ 638 639 -- The A element here is a PC_Alt node, and the dotted line represents 640 -- the contents of the Alt field. When the PC_Alt element is matched, 641 -- it stacks a pointer to the leading element of R on the history stack 642 -- so that on subsequent failure, a match of R is attempted. 643 644 -- The A node is the highest numbered element in the pattern. The 645 -- original index numbers of R are unchanged, but the index numbers 646 -- of the L pattern are adjusted up by the count of elements in R. 647 648 -- Note that the difference between the index of the L leading element 649 -- the index of the R leading element (after building the alt structure) 650 -- indicates the number of nodes in L, and this is true even after the 651 -- structure is incorporated into some larger structure. For example, 652 -- if the A node has index 16, and L has index 15 and R has index 653 -- 5, then we know that L has 10 (15-5) elements in it. 654 655 -- Suppose that we now concatenate this structure to another pattern 656 -- with 9 elements in it. We will now have the A node with an index 657 -- of 25, L with an index of 24 and R with an index of 14. We still 658 -- know that L has 10 (24-14) elements in it, numbered 15-24, and 659 -- consequently the successor of the alternation structure has an 660 -- index with a value less than 15. This is used in Image to figure 661 -- out the original recursive structure of a pattern. 662 663 -- To clarify the interaction of the alternation and concatenation 664 -- structures, here is a more complex example of the structure built 665 -- for the pattern: 666 667 -- (V or W or X) (Y or Z) 668 669 -- where A,B,C,D,E are all single element patterns: 670 671 -- +---+ +---+ +---+ +---+ 672 -- I A I---->I V I---+-->I A I---->I Y I----> 673 -- +---+ +---+ I +---+ +---+ 674 -- . I . 675 -- . I . 676 -- +---+ +---+ I +---+ 677 -- I A I---->I W I-->I I Z I----> 678 -- +---+ +---+ I +---+ 679 -- . I 680 -- . I 681 -- +---+ I 682 -- I X I------------>+ 683 -- +---+ 684 685 -- The numbering of the nodes would be as follows: 686 687 -- +---+ +---+ +---+ +---+ 688 -- I 8 I---->I 7 I---+-->I 3 I---->I 2 I----> 689 -- +---+ +---+ I +---+ +---+ 690 -- . I . 691 -- . I . 692 -- +---+ +---+ I +---+ 693 -- I 6 I---->I 5 I-->I I 1 I----> 694 -- +---+ +---+ I +---+ 695 -- . I 696 -- . I 697 -- +---+ I 698 -- I 4 I------------>+ 699 -- +---+ 700 701 -- Note: The above structure actually corresponds to 702 703 -- (A or (B or C)) (D or E) 704 705 -- rather than 706 707 -- ((A or B) or C) (D or E) 708 709 -- which is the more natural interpretation, but in fact alternation 710 -- is associative, and the construction of an alternative changes the 711 -- left grouped pattern to the right grouped pattern in any case, so 712 -- that the Image function produces a more natural looking output. 713 714 --------- 715 -- Arb -- 716 --------- 717 718 -- An Arb pattern builds the structure 719 720 -- +---+ 721 -- | X |----> 722 -- +---+ 723 -- . 724 -- . 725 -- +---+ 726 -- | Y |----> 727 -- +---+ 728 729 -- The X node is a PC_Arb_X node, which matches null, and stacks a 730 -- pointer to Y node, which is the PC_Arb_Y node that matches one 731 -- extra character and restacks itself. 732 733 -- The PC_Arb_X node is numbered 2, and the PC_Arb_Y node is 1 734 735 ------------------------- 736 -- Arbno (simple case) -- 737 ------------------------- 738 739 -- The simple form of Arbno can be used where the pattern always 740 -- matches at least one character if it succeeds, and it is known 741 -- not to make any history stack entries. In this case, Arbno (P) 742 -- can construct the following structure: 743 744 -- +-------------+ 745 -- | ^ 746 -- V | 747 -- +---+ | 748 -- | S |----> | 749 -- +---+ | 750 -- . | 751 -- . | 752 -- +---+ | 753 -- | P |---------->+ 754 -- +---+ 755 756 -- The S (PC_Arbno_S) node matches null stacking a pointer to the 757 -- pattern P. If a subsequent failure causes P to be matched and 758 -- this match succeeds, then node A gets restacked to try another 759 -- instance if needed by a subsequent failure. 760 761 -- The node numbering of the constituent pattern P is not affected. 762 -- The S node has a node number of P.Index + 1. 763 764 -------------------------- 765 -- Arbno (complex case) -- 766 -------------------------- 767 768 -- A call to Arbno (P), where P can match null (or at least is not 769 -- known to require a non-null string) and/or P requires pattern stack 770 -- entries, constructs the following structure: 771 772 -- +--------------------------+ 773 -- | ^ 774 -- V | 775 -- +---+ | 776 -- | X |----> | 777 -- +---+ | 778 -- . | 779 -- . | 780 -- +---+ +---+ +---+ | 781 -- | E |---->| P |---->| Y |--->+ 782 -- +---+ +---+ +---+ 783 784 -- The node X (PC_Arbno_X) matches null, stacking a pointer to the 785 -- E-P-X structure used to match one Arbno instance. 786 787 -- Here E is the PC_R_Enter node which matches null and creates two 788 -- stack entries. The first is a special entry whose node field is 789 -- not used at all, and whose cursor field has the initial cursor. 790 791 -- The second entry corresponds to a standard new region action. A 792 -- PC_R_Remove node is stacked, whose cursor field is used to store 793 -- the outer stack base, and the stack base is reset to point to 794 -- this PC_R_Remove node. Then the pattern P is matched, and it can 795 -- make history stack entries in the normal manner, so now the stack 796 -- looks like: 797 798 -- (stack entries made before assign pattern) 799 800 -- (Special entry, node field not used, 801 -- used only to save initial cursor) 802 803 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base 804 -- saved base value for the enclosing region) 805 806 -- (stack entries made by matching P) 807 808 -- If the match of P fails, then the PC_R_Remove entry is popped and 809 -- it removes both itself and the special entry underneath it, 810 -- restores the outer stack base, and signals failure. 811 812 -- If the match of P succeeds, then node Y, the PC_Arbno_Y node, pops 813 -- the inner region. There are two possibilities. If matching P left 814 -- no stack entries, then all traces of the inner region can be removed. 815 -- If there are stack entries, then we push an PC_Region_Replace stack 816 -- entry whose "cursor" value is the inner stack base value, and then 817 -- restore the outer stack base value, so the stack looks like: 818 819 -- (stack entries made before assign pattern) 820 821 -- (Special entry, node field not used, 822 -- used only to save initial cursor) 823 824 -- (PC_R_Remove entry, "cursor" value is (negative) 825 -- saved base value for the enclosing region) 826 827 -- (stack entries made by matching P) 828 829 -- (PC_Region_Replace entry, "cursor" value is (negative) 830 -- stack pointer value referencing the PC_R_Remove entry). 831 832 -- Now that we have matched another instance of the Arbno pattern, 833 -- we need to move to the successor. There are two cases. If the 834 -- Arbno pattern matched null, then there is no point in seeking 835 -- alternatives, since we would just match a whole bunch of nulls. 836 -- In this case we look through the alternative node, and move 837 -- directly to its successor (i.e. the successor of the Arbno 838 -- pattern). If on the other hand a non-null string was matched, 839 -- we simply follow the successor to the alternative node, which 840 -- sets up for another possible match of the Arbno pattern. 841 842 -- As noted in the section on stack checking, the stack count (and 843 -- hence the stack check) for a pattern includes only one iteration 844 -- of the Arbno pattern. To make sure that multiple iterations do not 845 -- overflow the stack, the Arbno node saves the stack count required 846 -- by a single iteration, and the Concat function increments this to 847 -- include stack entries required by any successor. The PC_Arbno_Y 848 -- node uses this count to ensure that sufficient stack remains 849 -- before proceeding after matching each new instance. 850 851 -- The node numbering of the constituent pattern P is not affected. 852 -- Where N is the number of nodes in P, the Y node is numbered N + 1, 853 -- the E node is N + 2, and the X node is N + 3. 854 855 ---------------------- 856 -- Assign Immediate -- 857 ---------------------- 858 859 -- Immediate assignment (P * V) constructs the following structure 860 861 -- +---+ +---+ +---+ 862 -- | E |---->| P |---->| A |----> 863 -- +---+ +---+ +---+ 864 865 -- Here E is the PC_R_Enter node which matches null and creates two 866 -- stack entries. The first is a special entry whose node field is 867 -- not used at all, and whose cursor field has the initial cursor. 868 869 -- The second entry corresponds to a standard new region action. A 870 -- PC_R_Remove node is stacked, whose cursor field is used to store 871 -- the outer stack base, and the stack base is reset to point to 872 -- this PC_R_Remove node. Then the pattern P is matched, and it can 873 -- make history stack entries in the normal manner, so now the stack 874 -- looks like: 875 876 -- (stack entries made before assign pattern) 877 878 -- (Special entry, node field not used, 879 -- used only to save initial cursor) 880 881 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base 882 -- saved base value for the enclosing region) 883 884 -- (stack entries made by matching P) 885 886 -- If the match of P fails, then the PC_R_Remove entry is popped 887 -- and it removes both itself and the special entry underneath it, 888 -- restores the outer stack base, and signals failure. 889 890 -- If the match of P succeeds, then node A, which is the actual 891 -- PC_Assign_Imm node, executes the assignment (using the stack 892 -- base to locate the entry with the saved starting cursor value), 893 -- and the pops the inner region. There are two possibilities, if 894 -- matching P left no stack entries, then all traces of the inner 895 -- region can be removed. If there are stack entries, then we push 896 -- an PC_Region_Replace stack entry whose "cursor" value is the 897 -- inner stack base value, and then restore the outer stack base 898 -- value, so the stack looks like: 899 900 -- (stack entries made before assign pattern) 901 902 -- (Special entry, node field not used, 903 -- used only to save initial cursor) 904 905 -- (PC_R_Remove entry, "cursor" value is (negative) 906 -- saved base value for the enclosing region) 907 908 -- (stack entries made by matching P) 909 910 -- (PC_Region_Replace entry, "cursor" value is the (negative) 911 -- stack pointer value referencing the PC_R_Remove entry). 912 913 -- If a subsequent failure occurs, the PC_Region_Replace node restores 914 -- the inner stack base value and signals failure to explore rematches 915 -- of the pattern P. 916 917 -- The node numbering of the constituent pattern P is not affected. 918 -- Where N is the number of nodes in P, the A node is numbered N + 1, 919 -- and the E node is N + 2. 920 921 --------------------- 922 -- Assign On Match -- 923 --------------------- 924 925 -- The assign on match (**) pattern is quite similar to the assign 926 -- immediate pattern, except that the actual assignment has to be 927 -- delayed. The following structure is constructed: 928 929 -- +---+ +---+ +---+ 930 -- | E |---->| P |---->| A |----> 931 -- +---+ +---+ +---+ 932 933 -- The operation of this pattern is identical to that described above 934 -- for deferred assignment, up to the point where P has been matched. 935 936 -- The A node, which is the PC_Assign_OnM node first pushes a 937 -- PC_Assign node onto the history stack. This node saves the ending 938 -- cursor and acts as a flag for the final assignment, as further 939 -- described below. 940 941 -- It then stores a pointer to itself in the special entry node field. 942 -- This was otherwise unused, and is now used to retrieve the address 943 -- of the variable to be assigned at the end of the pattern. 944 945 -- After that the inner region is terminated in the usual manner, 946 -- by stacking a PC_R_Restore entry as described for the assign 947 -- immediate case. Note that the optimization of completely 948 -- removing the inner region does not happen in this case, since 949 -- we have at least one stack entry (the PC_Assign one we just made). 950 -- The stack now looks like: 951 952 -- (stack entries made before assign pattern) 953 954 -- (Special entry, node points to copy of 955 -- the PC_Assign_OnM node, and the 956 -- cursor field saves the initial cursor). 957 958 -- (PC_R_Remove entry, "cursor" value is (negative) 959 -- saved base value for the enclosing region) 960 961 -- (stack entries made by matching P) 962 963 -- (PC_Assign entry, saves final cursor) 964 965 -- (PC_Region_Replace entry, "cursor" value is (negative) 966 -- stack pointer value referencing the PC_R_Remove entry). 967 968 -- If a subsequent failure causes the PC_Assign node to execute it 969 -- simply removes itself and propagates the failure. 970 971 -- If the match succeeds, then the history stack is scanned for 972 -- PC_Assign nodes, and the assignments are executed (examination 973 -- of the above diagram will show that all the necessary data is 974 -- at hand for the assignment). 975 976 -- To optimize the common case where no assign-on-match operations 977 -- are present, a global flag Assign_OnM is maintained which is 978 -- initialize to False, and gets set True as part of the execution 979 -- of the PC_Assign_OnM node. The scan of the history stack for 980 -- PC_Assign entries is done only if this flag is set. 981 982 -- The node numbering of the constituent pattern P is not affected. 983 -- Where N is the number of nodes in P, the A node is numbered N + 1, 984 -- and the E node is N + 2. 985 986 --------- 987 -- Bal -- 988 --------- 989 990 -- Bal builds a single node: 991 992 -- +---+ 993 -- | B |----> 994 -- +---+ 995 996 -- The node B is the PC_Bal node which matches a parentheses balanced 997 -- string, starting at the current cursor position. It then updates 998 -- the cursor past this matched string, and stacks a pointer to itself 999 -- with this updated cursor value on the history stack, to extend the 1000 -- matched string on a subsequent failure. 1001 1002 -- Since this is a single node it is numbered 1 (the reason we include 1003 -- it in the compound patterns section is that it backtracks). 1004 1005 ------------ 1006 -- BreakX -- 1007 ------------ 1008 1009 -- BreakX builds the structure 1010 1011 -- +---+ +---+ 1012 -- | B |---->| A |----> 1013 -- +---+ +---+ 1014 -- ^ . 1015 -- | . 1016 -- | +---+ 1017 -- +<------| X | 1018 -- +---+ 1019 1020 -- Here the B node is the BreakX_xx node that performs a normal Break 1021 -- function. The A node is an alternative (PC_Alt) node that matches 1022 -- null, but stacks a pointer to node X (the PC_BreakX_X node) which 1023 -- extends the match one character (to eat up the previously detected 1024 -- break character), and then rematches the break. 1025 1026 -- The B node is numbered 3, the alternative node is 1, and the X 1027 -- node is 2. 1028 1029 ----------- 1030 -- Fence -- 1031 ----------- 1032 1033 -- Fence builds a single node: 1034 1035 -- +---+ 1036 -- | F |----> 1037 -- +---+ 1038 1039 -- The element F, PC_Fence, matches null, and stacks a pointer to a 1040 -- PC_Cancel element which will abort the match on a subsequent failure. 1041 1042 -- Since this is a single element it is numbered 1 (the reason we 1043 -- include it in the compound patterns section is that it backtracks). 1044 1045 -------------------- 1046 -- Fence Function -- 1047 -------------------- 1048 1049 -- A call to the Fence function builds the structure: 1050 1051 -- +---+ +---+ +---+ 1052 -- | E |---->| P |---->| X |----> 1053 -- +---+ +---+ +---+ 1054 1055 -- Here E is the PC_R_Enter node which matches null and creates two 1056 -- stack entries. The first is a special entry which is not used at 1057 -- all in the fence case (it is present merely for uniformity with 1058 -- other cases of region enter operations). 1059 1060 -- The second entry corresponds to a standard new region action. A 1061 -- PC_R_Remove node is stacked, whose cursor field is used to store 1062 -- the outer stack base, and the stack base is reset to point to 1063 -- this PC_R_Remove node. Then the pattern P is matched, and it can 1064 -- make history stack entries in the normal manner, so now the stack 1065 -- looks like: 1066 1067 -- (stack entries made before fence pattern) 1068 1069 -- (Special entry, not used at all) 1070 1071 -- (PC_R_Remove entry, "cursor" value is (negative) <-- Stack Base 1072 -- saved base value for the enclosing region) 1073 1074 -- (stack entries made by matching P) 1075 1076 -- If the match of P fails, then the PC_R_Remove entry is popped 1077 -- and it removes both itself and the special entry underneath it, 1078 -- restores the outer stack base, and signals failure. 1079 1080 -- If the match of P succeeds, then node X, the PC_Fence_X node, gets 1081 -- control. One might be tempted to think that at this point, the 1082 -- history stack entries made by matching P can just be removed since 1083 -- they certainly are not going to be used for rematching (that is 1084 -- whole point of Fence after all). However, this is wrong, because 1085 -- it would result in the loss of possible assign-on-match entries 1086 -- for deferred pattern assignments. 1087 1088 -- Instead what we do is to make a special entry whose node references 1089 -- PC_Fence_Y, and whose cursor saves the inner stack base value, i.e. 1090 -- the pointer to the PC_R_Remove entry. Then the outer stack base 1091 -- pointer is restored, so the stack looks like: 1092 1093 -- (stack entries made before assign pattern) 1094 1095 -- (Special entry, not used at all) 1096 1097 -- (PC_R_Remove entry, "cursor" value is (negative) 1098 -- saved base value for the enclosing region) 1099 1100 -- (stack entries made by matching P) 1101 1102 -- (PC_Fence_Y entry, "cursor" value is (negative) stack 1103 -- pointer value referencing the PC_R_Remove entry). 1104 1105 -- If a subsequent failure occurs, then the PC_Fence_Y entry removes 1106 -- the entire inner region, including all entries made by matching P, 1107 -- and alternatives prior to the Fence pattern are sought. 1108 1109 -- The node numbering of the constituent pattern P is not affected. 1110 -- Where N is the number of nodes in P, the X node is numbered N + 1, 1111 -- and the E node is N + 2. 1112 1113 ------------- 1114 -- Succeed -- 1115 ------------- 1116 1117 -- Succeed builds a single node: 1118 1119 -- +---+ 1120 -- | S |----> 1121 -- +---+ 1122 1123 -- The node S is the PC_Succeed node which matches null, and stacks 1124 -- a pointer to itself on the history stack, so that a subsequent 1125 -- failure repeats the same match. 1126 1127 -- Since this is a single node it is numbered 1 (the reason we include 1128 -- it in the compound patterns section is that it backtracks). 1129 1130 --------------------- 1131 -- Write Immediate -- 1132 --------------------- 1133 1134 -- The structure built for a write immediate operation (P * F, where 1135 -- F is a file access value) is: 1136 1137 -- +---+ +---+ +---+ 1138 -- | E |---->| P |---->| W |----> 1139 -- +---+ +---+ +---+ 1140 1141 -- Here E is the PC_R_Enter node and W is the PC_Write_Imm node. The 1142 -- handling is identical to that described above for Assign Immediate, 1143 -- except that at the point where a successful match occurs, the matched 1144 -- substring is written to the referenced file. 1145 1146 -- The node numbering of the constituent pattern P is not affected. 1147 -- Where N is the number of nodes in P, the W node is numbered N + 1, 1148 -- and the E node is N + 2. 1149 1150 -------------------- 1151 -- Write On Match -- 1152 -------------------- 1153 1154 -- The structure built for a write on match operation (P ** F, where 1155 -- F is a file access value) is: 1156 1157 -- +---+ +---+ +---+ 1158 -- | E |---->| P |---->| W |----> 1159 -- +---+ +---+ +---+ 1160 1161 -- Here E is the PC_R_Enter node and W is the PC_Write_OnM node. The 1162 -- handling is identical to that described above for Assign On Match, 1163 -- except that at the point where a successful match has completed, 1164 -- the matched substring is written to the referenced file. 1165 1166 -- The node numbering of the constituent pattern P is not affected. 1167 -- Where N is the number of nodes in P, the W node is numbered N + 1, 1168 -- and the E node is N + 2. 1169 ----------------------- 1170 -- Constant Patterns -- 1171 ----------------------- 1172 1173 -- The following pattern elements are referenced only from the pattern 1174 -- history stack. In each case the processing for the pattern element 1175 -- results in pattern match abort, or further failure, so there is no 1176 -- need for a successor and no need for a node number 1177 1178 CP_Assign : aliased PE := (PC_Assign, 0, N); 1179 CP_Cancel : aliased PE := (PC_Cancel, 0, N); 1180 CP_Fence_Y : aliased PE := (PC_Fence_Y, 0, N); 1181 CP_R_Remove : aliased PE := (PC_R_Remove, 0, N); 1182 CP_R_Restore : aliased PE := (PC_R_Restore, 0, N); 1183 1184 ----------------------- 1185 -- Local Subprograms -- 1186 ----------------------- 1187 1188 function Alternate (L, R : PE_Ptr) return PE_Ptr; 1189 function "or" (L, R : PE_Ptr) return PE_Ptr renames Alternate; 1190 -- Build pattern structure corresponding to the alternation of L, R. 1191 -- (i.e. try to match L, and if that fails, try to match R). 1192 1193 function Arbno_Simple (P : PE_Ptr) return PE_Ptr; 1194 -- Build simple Arbno pattern, P is a pattern that is guaranteed to 1195 -- match at least one character if it succeeds and to require no 1196 -- stack entries under all circumstances. The result returned is 1197 -- a simple Arbno structure as previously described. 1198 1199 function Bracket (E, P, A : PE_Ptr) return PE_Ptr; 1200 -- Given two single node pattern elements E and A, and a (possible 1201 -- complex) pattern P, construct the concatenation E-->P-->A and 1202 -- return a pointer to E. The concatenation does not affect the 1203 -- node numbering in P. A has a number one higher than the maximum 1204 -- number in P, and E has a number two higher than the maximum 1205 -- number in P (see for example the Assign_Immediate structure to 1206 -- understand a typical use of this function). 1207 1208 function BreakX_Make (B : PE_Ptr) return Pattern; 1209 -- Given a pattern element for a Break pattern, returns the 1210 -- corresponding BreakX compound pattern structure. 1211 1212 function Concat (L, R : PE_Ptr; Incr : Natural) return PE_Ptr; 1213 -- Creates a pattern element that represents a concatenation of the 1214 -- two given pattern elements (i.e. the pattern L followed by R). 1215 -- The result returned is always the same as L, but the pattern 1216 -- referenced by L is modified to have R as a successor. This 1217 -- procedure does not copy L or R, so if a copy is required, it 1218 -- is the responsibility of the caller. The Incr parameter is an 1219 -- amount to be added to the Nat field of any P_Arbno_Y node that is 1220 -- in the left operand, it represents the additional stack space 1221 -- required by the right operand. 1222 1223 function C_To_PE (C : PChar) return PE_Ptr; 1224 -- Given a character, constructs a pattern element that matches 1225 -- the single character. 1226 1227 function Copy (P : PE_Ptr) return PE_Ptr; 1228 -- Creates a copy of the pattern element referenced by the given 1229 -- pattern element reference. This is a deep copy, which means that 1230 -- it follows the Next and Alt pointers. 1231 1232 function Image (P : PE_Ptr) return String; 1233 -- Returns the image of the address of the referenced pattern element. 1234 -- This is equivalent to Image (To_Address (P)); 1235 1236 function Is_In (C : Character; Str : String) return Boolean; 1237 pragma Inline (Is_In); 1238 -- Determines if the character C is in string Str 1239 1240 procedure Logic_Error; 1241 -- Called to raise Program_Error with an appropriate message if an 1242 -- internal logic error is detected. 1243 1244 function Str_BF (A : Boolean_Func) return String; 1245 function Str_FP (A : File_Ptr) return String; 1246 function Str_NF (A : Natural_Func) return String; 1247 function Str_NP (A : Natural_Ptr) return String; 1248 function Str_PP (A : Pattern_Ptr) return String; 1249 function Str_VF (A : VString_Func) return String; 1250 function Str_VP (A : VString_Ptr) return String; 1251 -- These are debugging routines, which return a representation of the 1252 -- given access value (they are called only by Image and Dump) 1253 1254 procedure Set_Successor (Pat : PE_Ptr; Succ : PE_Ptr); 1255 -- Adjusts all EOP pointers in Pat to point to Succ. No other changes 1256 -- are made. In particular, Succ is unchanged, and no index numbers 1257 -- are modified. Note that Pat may not be equal to EOP on entry. 1258 1259 function S_To_PE (Str : PString) return PE_Ptr; 1260 -- Given a string, constructs a pattern element that matches the string 1261 1262 procedure Uninitialized_Pattern; 1263 pragma No_Return (Uninitialized_Pattern); 1264 -- Called to raise Program_Error with an appropriate error message if 1265 -- an uninitialized pattern is used in any pattern construction or 1266 -- pattern matching operation. 1267 1268 procedure XMatch 1269 (Subject : String; 1270 Pat_P : PE_Ptr; 1271 Pat_S : Natural; 1272 Start : out Natural; 1273 Stop : out Natural); 1274 -- This is the common pattern match routine. It is passed a string and 1275 -- a pattern, and it indicates success or failure, and on success the 1276 -- section of the string matched. It does not perform any assignments 1277 -- to the subject string, so pattern replacement is for the caller. 1278 -- 1279 -- Subject The subject string. The lower bound is always one. In the 1280 -- Match procedures, it is fine to use strings whose lower bound 1281 -- is not one, but we perform a one time conversion before the 1282 -- call to XMatch, so that XMatch does not have to be bothered 1283 -- with strange lower bounds. 1284 -- 1285 -- Pat_P Points to initial pattern element of pattern to be matched 1286 -- 1287 -- Pat_S Maximum required stack entries for pattern to be matched 1288 -- 1289 -- Start If match is successful, starting index of matched section. 1290 -- This value is always non-zero. A value of zero is used to 1291 -- indicate a failed match. 1292 -- 1293 -- Stop If match is successful, ending index of matched section. 1294 -- This can be zero if we match the null string at the start, 1295 -- in which case Start is set to zero, and Stop to one. If the 1296 -- Match fails, then the contents of Stop is undefined. 1297 1298 procedure XMatchD 1299 (Subject : String; 1300 Pat_P : PE_Ptr; 1301 Pat_S : Natural; 1302 Start : out Natural; 1303 Stop : out Natural); 1304 -- Identical in all respects to XMatch, except that trace information is 1305 -- output on Standard_Output during execution of the match. This is the 1306 -- version that is called if the original Match call has Debug => True. 1307 1308 --------- 1309 -- "&" -- 1310 --------- 1311 1312 function "&" (L : PString; R : Pattern) return Pattern is 1313 begin 1314 return (AFC with R.Stk, Concat (S_To_PE (L), Copy (R.P), R.Stk)); 1315 end "&"; 1316 1317 function "&" (L : Pattern; R : PString) return Pattern is 1318 begin 1319 return (AFC with L.Stk, Concat (Copy (L.P), S_To_PE (R), 0)); 1320 end "&"; 1321 1322 function "&" (L : PChar; R : Pattern) return Pattern is 1323 begin 1324 return (AFC with R.Stk, Concat (C_To_PE (L), Copy (R.P), R.Stk)); 1325 end "&"; 1326 1327 function "&" (L : Pattern; R : PChar) return Pattern is 1328 begin 1329 return (AFC with L.Stk, Concat (Copy (L.P), C_To_PE (R), 0)); 1330 end "&"; 1331 1332 function "&" (L : Pattern; R : Pattern) return Pattern is 1333 begin 1334 return (AFC with L.Stk + R.Stk, Concat (Copy (L.P), Copy (R.P), R.Stk)); 1335 end "&"; 1336 1337 --------- 1338 -- "*" -- 1339 --------- 1340 1341 -- Assign immediate 1342 1343 -- +---+ +---+ +---+ 1344 -- | E |---->| P |---->| A |----> 1345 -- +---+ +---+ +---+ 1346 1347 -- The node numbering of the constituent pattern P is not affected. 1348 -- Where N is the number of nodes in P, the A node is numbered N + 1, 1349 -- and the E node is N + 2. 1350 1351 function "*" (P : Pattern; Var : VString_Var) return Pattern is 1352 Pat : constant PE_Ptr := Copy (P.P); 1353 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); 1354 A : constant PE_Ptr := 1355 new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access); 1356 begin 1357 return (AFC with P.Stk + 3, Bracket (E, Pat, A)); 1358 end "*"; 1359 1360 function "*" (P : PString; Var : VString_Var) return Pattern is 1361 Pat : constant PE_Ptr := S_To_PE (P); 1362 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); 1363 A : constant PE_Ptr := 1364 new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access); 1365 begin 1366 return (AFC with 3, Bracket (E, Pat, A)); 1367 end "*"; 1368 1369 function "*" (P : PChar; Var : VString_Var) return Pattern is 1370 Pat : constant PE_Ptr := C_To_PE (P); 1371 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); 1372 A : constant PE_Ptr := 1373 new PE'(PC_Assign_Imm, 0, EOP, Var'Unrestricted_Access); 1374 begin 1375 return (AFC with 3, Bracket (E, Pat, A)); 1376 end "*"; 1377 1378 -- Write immediate 1379 1380 -- +---+ +---+ +---+ 1381 -- | E |---->| P |---->| W |----> 1382 -- +---+ +---+ +---+ 1383 1384 -- The node numbering of the constituent pattern P is not affected. 1385 -- Where N is the number of nodes in P, the W node is numbered N + 1, 1386 -- and the E node is N + 2. 1387 1388 function "*" (P : Pattern; Fil : File_Access) return Pattern is 1389 Pat : constant PE_Ptr := Copy (P.P); 1390 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); 1391 W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil); 1392 begin 1393 return (AFC with 3, Bracket (E, Pat, W)); 1394 end "*"; 1395 1396 function "*" (P : PString; Fil : File_Access) return Pattern is 1397 Pat : constant PE_Ptr := S_To_PE (P); 1398 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); 1399 W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil); 1400 begin 1401 return (AFC with 3, Bracket (E, Pat, W)); 1402 end "*"; 1403 1404 function "*" (P : PChar; Fil : File_Access) return Pattern is 1405 Pat : constant PE_Ptr := C_To_PE (P); 1406 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); 1407 W : constant PE_Ptr := new PE'(PC_Write_Imm, 0, EOP, Fil); 1408 begin 1409 return (AFC with 3, Bracket (E, Pat, W)); 1410 end "*"; 1411 1412 ---------- 1413 -- "**" -- 1414 ---------- 1415 1416 -- Assign on match 1417 1418 -- +---+ +---+ +---+ 1419 -- | E |---->| P |---->| A |----> 1420 -- +---+ +---+ +---+ 1421 1422 -- The node numbering of the constituent pattern P is not affected. 1423 -- Where N is the number of nodes in P, the A node is numbered N + 1, 1424 -- and the E node is N + 2. 1425 1426 function "**" (P : Pattern; Var : VString_Var) return Pattern is 1427 Pat : constant PE_Ptr := Copy (P.P); 1428 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); 1429 A : constant PE_Ptr := 1430 new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access); 1431 begin 1432 return (AFC with P.Stk + 3, Bracket (E, Pat, A)); 1433 end "**"; 1434 1435 function "**" (P : PString; Var : VString_Var) return Pattern is 1436 Pat : constant PE_Ptr := S_To_PE (P); 1437 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); 1438 A : constant PE_Ptr := 1439 new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access); 1440 begin 1441 return (AFC with 3, Bracket (E, Pat, A)); 1442 end "**"; 1443 1444 function "**" (P : PChar; Var : VString_Var) return Pattern is 1445 Pat : constant PE_Ptr := C_To_PE (P); 1446 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); 1447 A : constant PE_Ptr := 1448 new PE'(PC_Assign_OnM, 0, EOP, Var'Unrestricted_Access); 1449 begin 1450 return (AFC with 3, Bracket (E, Pat, A)); 1451 end "**"; 1452 1453 -- Write on match 1454 1455 -- +---+ +---+ +---+ 1456 -- | E |---->| P |---->| W |----> 1457 -- +---+ +---+ +---+ 1458 1459 -- The node numbering of the constituent pattern P is not affected. 1460 -- Where N is the number of nodes in P, the W node is numbered N + 1, 1461 -- and the E node is N + 2. 1462 1463 function "**" (P : Pattern; Fil : File_Access) return Pattern is 1464 Pat : constant PE_Ptr := Copy (P.P); 1465 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); 1466 W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil); 1467 begin 1468 return (AFC with P.Stk + 3, Bracket (E, Pat, W)); 1469 end "**"; 1470 1471 function "**" (P : PString; Fil : File_Access) return Pattern is 1472 Pat : constant PE_Ptr := S_To_PE (P); 1473 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); 1474 W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil); 1475 begin 1476 return (AFC with 3, Bracket (E, Pat, W)); 1477 end "**"; 1478 1479 function "**" (P : PChar; Fil : File_Access) return Pattern is 1480 Pat : constant PE_Ptr := C_To_PE (P); 1481 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); 1482 W : constant PE_Ptr := new PE'(PC_Write_OnM, 0, EOP, Fil); 1483 begin 1484 return (AFC with 3, Bracket (E, Pat, W)); 1485 end "**"; 1486 1487 --------- 1488 -- "+" -- 1489 --------- 1490 1491 function "+" (Str : VString_Var) return Pattern is 1492 begin 1493 return 1494 (AFC with 0, 1495 new PE'(PC_String_VP, 1, EOP, Str'Unrestricted_Access)); 1496 end "+"; 1497 1498 function "+" (Str : VString_Func) return Pattern is 1499 begin 1500 return (AFC with 0, new PE'(PC_String_VF, 1, EOP, Str)); 1501 end "+"; 1502 1503 function "+" (P : Pattern_Var) return Pattern is 1504 begin 1505 return 1506 (AFC with 3, 1507 new PE'(PC_Rpat, 1, EOP, P'Unrestricted_Access)); 1508 end "+"; 1509 1510 function "+" (P : Boolean_Func) return Pattern is 1511 begin 1512 return (AFC with 3, new PE'(PC_Pred_Func, 1, EOP, P)); 1513 end "+"; 1514 1515 ---------- 1516 -- "or" -- 1517 ---------- 1518 1519 function "or" (L : PString; R : Pattern) return Pattern is 1520 begin 1521 return (AFC with R.Stk + 1, S_To_PE (L) or Copy (R.P)); 1522 end "or"; 1523 1524 function "or" (L : Pattern; R : PString) return Pattern is 1525 begin 1526 return (AFC with L.Stk + 1, Copy (L.P) or S_To_PE (R)); 1527 end "or"; 1528 1529 function "or" (L : PString; R : PString) return Pattern is 1530 begin 1531 return (AFC with 1, S_To_PE (L) or S_To_PE (R)); 1532 end "or"; 1533 1534 function "or" (L : Pattern; R : Pattern) return Pattern is 1535 begin 1536 return (AFC with 1537 Natural'Max (L.Stk, R.Stk) + 1, Copy (L.P) or Copy (R.P)); 1538 end "or"; 1539 1540 function "or" (L : PChar; R : Pattern) return Pattern is 1541 begin 1542 return (AFC with 1, C_To_PE (L) or Copy (R.P)); 1543 end "or"; 1544 1545 function "or" (L : Pattern; R : PChar) return Pattern is 1546 begin 1547 return (AFC with 1, Copy (L.P) or C_To_PE (R)); 1548 end "or"; 1549 1550 function "or" (L : PChar; R : PChar) return Pattern is 1551 begin 1552 return (AFC with 1, C_To_PE (L) or C_To_PE (R)); 1553 end "or"; 1554 1555 function "or" (L : PString; R : PChar) return Pattern is 1556 begin 1557 return (AFC with 1, S_To_PE (L) or C_To_PE (R)); 1558 end "or"; 1559 1560 function "or" (L : PChar; R : PString) return Pattern is 1561 begin 1562 return (AFC with 1, C_To_PE (L) or S_To_PE (R)); 1563 end "or"; 1564 1565 ------------ 1566 -- Adjust -- 1567 ------------ 1568 1569 -- No two patterns share the same pattern elements, so the adjust 1570 -- procedure for a Pattern assignment must do a deep copy of the 1571 -- pattern element structure. 1572 1573 procedure Adjust (Object : in out Pattern) is 1574 begin 1575 Object.P := Copy (Object.P); 1576 end Adjust; 1577 1578 --------------- 1579 -- Alternate -- 1580 --------------- 1581 1582 function Alternate (L, R : PE_Ptr) return PE_Ptr is 1583 begin 1584 -- If the left pattern is null, then we just add the alternation 1585 -- node with an index one greater than the right hand pattern. 1586 1587 if L = EOP then 1588 return new PE'(PC_Alt, R.Index + 1, EOP, R); 1589 1590 -- If the left pattern is non-null, then build a reference vector 1591 -- for its elements, and adjust their index values to accommodate 1592 -- the right hand elements. Then add the alternation node. 1593 1594 else 1595 declare 1596 Refs : Ref_Array (1 .. L.Index); 1597 1598 begin 1599 Build_Ref_Array (L, Refs); 1600 1601 for J in Refs'Range loop 1602 Refs (J).Index := Refs (J).Index + R.Index; 1603 end loop; 1604 end; 1605 1606 return new PE'(PC_Alt, L.Index + 1, L, R); 1607 end if; 1608 end Alternate; 1609 1610 --------- 1611 -- Any -- 1612 --------- 1613 1614 function Any (Str : String) return Pattern is 1615 begin 1616 return (AFC with 0, new PE'(PC_Any_CS, 1, EOP, To_Set (Str))); 1617 end Any; 1618 1619 function Any (Str : VString) return Pattern is 1620 begin 1621 return Any (S (Str)); 1622 end Any; 1623 1624 function Any (Str : Character) return Pattern is 1625 begin 1626 return (AFC with 0, new PE'(PC_Any_CH, 1, EOP, Str)); 1627 end Any; 1628 1629 function Any (Str : Character_Set) return Pattern is 1630 begin 1631 return (AFC with 0, new PE'(PC_Any_CS, 1, EOP, Str)); 1632 end Any; 1633 1634 function Any (Str : not null access VString) return Pattern is 1635 begin 1636 return (AFC with 0, new PE'(PC_Any_VP, 1, EOP, VString_Ptr (Str))); 1637 end Any; 1638 1639 function Any (Str : VString_Func) return Pattern is 1640 begin 1641 return (AFC with 0, new PE'(PC_Any_VF, 1, EOP, Str)); 1642 end Any; 1643 1644 --------- 1645 -- Arb -- 1646 --------- 1647 1648 -- +---+ 1649 -- | X |----> 1650 -- +---+ 1651 -- . 1652 -- . 1653 -- +---+ 1654 -- | Y |----> 1655 -- +---+ 1656 1657 -- The PC_Arb_X element is numbered 2, and the PC_Arb_Y element is 1 1658 1659 function Arb return Pattern is 1660 Y : constant PE_Ptr := new PE'(PC_Arb_Y, 1, EOP); 1661 X : constant PE_Ptr := new PE'(PC_Arb_X, 2, EOP, Y); 1662 begin 1663 return (AFC with 1, X); 1664 end Arb; 1665 1666 ----------- 1667 -- Arbno -- 1668 ----------- 1669 1670 function Arbno (P : PString) return Pattern is 1671 begin 1672 if P'Length = 0 then 1673 return (AFC with 0, EOP); 1674 else 1675 return (AFC with 0, Arbno_Simple (S_To_PE (P))); 1676 end if; 1677 end Arbno; 1678 1679 function Arbno (P : PChar) return Pattern is 1680 begin 1681 return (AFC with 0, Arbno_Simple (C_To_PE (P))); 1682 end Arbno; 1683 1684 function Arbno (P : Pattern) return Pattern is 1685 Pat : constant PE_Ptr := Copy (P.P); 1686 1687 begin 1688 if P.Stk = 0 1689 and then OK_For_Simple_Arbno (Pat.Pcode) 1690 then 1691 return (AFC with 0, Arbno_Simple (Pat)); 1692 end if; 1693 1694 -- This is the complex case, either the pattern makes stack entries 1695 -- or it is possible for the pattern to match the null string (more 1696 -- accurately, we don't know that this is not the case). 1697 1698 -- +--------------------------+ 1699 -- | ^ 1700 -- V | 1701 -- +---+ | 1702 -- | X |----> | 1703 -- +---+ | 1704 -- . | 1705 -- . | 1706 -- +---+ +---+ +---+ | 1707 -- | E |---->| P |---->| Y |--->+ 1708 -- +---+ +---+ +---+ 1709 1710 -- The node numbering of the constituent pattern P is not affected. 1711 -- Where N is the number of nodes in P, the Y node is numbered N + 1, 1712 -- the E node is N + 2, and the X node is N + 3. 1713 1714 declare 1715 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); 1716 X : constant PE_Ptr := new PE'(PC_Arbno_X, 0, EOP, E); 1717 Y : constant PE_Ptr := new PE'(PC_Arbno_Y, 0, X, P.Stk + 3); 1718 EPY : constant PE_Ptr := Bracket (E, Pat, Y); 1719 begin 1720 X.Alt := EPY; 1721 X.Index := EPY.Index + 1; 1722 return (AFC with P.Stk + 3, X); 1723 end; 1724 end Arbno; 1725 1726 ------------------ 1727 -- Arbno_Simple -- 1728 ------------------ 1729 1730 -- +-------------+ 1731 -- | ^ 1732 -- V | 1733 -- +---+ | 1734 -- | S |----> | 1735 -- +---+ | 1736 -- . | 1737 -- . | 1738 -- +---+ | 1739 -- | P |---------->+ 1740 -- +---+ 1741 1742 -- The node numbering of the constituent pattern P is not affected. 1743 -- The S node has a node number of P.Index + 1. 1744 1745 -- Note that we know that P cannot be EOP, because a null pattern 1746 -- does not meet the requirements for simple Arbno. 1747 1748 function Arbno_Simple (P : PE_Ptr) return PE_Ptr is 1749 S : constant PE_Ptr := new PE'(PC_Arbno_S, P.Index + 1, EOP, P); 1750 begin 1751 Set_Successor (P, S); 1752 return S; 1753 end Arbno_Simple; 1754 1755 --------- 1756 -- Bal -- 1757 --------- 1758 1759 function Bal return Pattern is 1760 begin 1761 return (AFC with 1, new PE'(PC_Bal, 1, EOP)); 1762 end Bal; 1763 1764 ------------- 1765 -- Bracket -- 1766 ------------- 1767 1768 function Bracket (E, P, A : PE_Ptr) return PE_Ptr is 1769 begin 1770 if P = EOP then 1771 E.Pthen := A; 1772 E.Index := 2; 1773 A.Index := 1; 1774 1775 else 1776 E.Pthen := P; 1777 Set_Successor (P, A); 1778 E.Index := P.Index + 2; 1779 A.Index := P.Index + 1; 1780 end if; 1781 1782 return E; 1783 end Bracket; 1784 1785 ----------- 1786 -- Break -- 1787 ----------- 1788 1789 function Break (Str : String) return Pattern is 1790 begin 1791 return (AFC with 0, new PE'(PC_Break_CS, 1, EOP, To_Set (Str))); 1792 end Break; 1793 1794 function Break (Str : VString) return Pattern is 1795 begin 1796 return Break (S (Str)); 1797 end Break; 1798 1799 function Break (Str : Character) return Pattern is 1800 begin 1801 return (AFC with 0, new PE'(PC_Break_CH, 1, EOP, Str)); 1802 end Break; 1803 1804 function Break (Str : Character_Set) return Pattern is 1805 begin 1806 return (AFC with 0, new PE'(PC_Break_CS, 1, EOP, Str)); 1807 end Break; 1808 1809 function Break (Str : not null access VString) return Pattern is 1810 begin 1811 return (AFC with 0, 1812 new PE'(PC_Break_VP, 1, EOP, Str.all'Unchecked_Access)); 1813 end Break; 1814 1815 function Break (Str : VString_Func) return Pattern is 1816 begin 1817 return (AFC with 0, new PE'(PC_Break_VF, 1, EOP, Str)); 1818 end Break; 1819 1820 ------------ 1821 -- BreakX -- 1822 ------------ 1823 1824 function BreakX (Str : String) return Pattern is 1825 begin 1826 return BreakX_Make (new PE'(PC_BreakX_CS, 3, N, To_Set (Str))); 1827 end BreakX; 1828 1829 function BreakX (Str : VString) return Pattern is 1830 begin 1831 return BreakX (S (Str)); 1832 end BreakX; 1833 1834 function BreakX (Str : Character) return Pattern is 1835 begin 1836 return BreakX_Make (new PE'(PC_BreakX_CH, 3, N, Str)); 1837 end BreakX; 1838 1839 function BreakX (Str : Character_Set) return Pattern is 1840 begin 1841 return BreakX_Make (new PE'(PC_BreakX_CS, 3, N, Str)); 1842 end BreakX; 1843 1844 function BreakX (Str : not null access VString) return Pattern is 1845 begin 1846 return BreakX_Make (new PE'(PC_BreakX_VP, 3, N, VString_Ptr (Str))); 1847 end BreakX; 1848 1849 function BreakX (Str : VString_Func) return Pattern is 1850 begin 1851 return BreakX_Make (new PE'(PC_BreakX_VF, 3, N, Str)); 1852 end BreakX; 1853 1854 ----------------- 1855 -- BreakX_Make -- 1856 ----------------- 1857 1858 -- +---+ +---+ 1859 -- | B |---->| A |----> 1860 -- +---+ +---+ 1861 -- ^ . 1862 -- | . 1863 -- | +---+ 1864 -- +<------| X | 1865 -- +---+ 1866 1867 -- The B node is numbered 3, the alternative node is 1, and the X 1868 -- node is 2. 1869 1870 function BreakX_Make (B : PE_Ptr) return Pattern is 1871 X : constant PE_Ptr := new PE'(PC_BreakX_X, 2, B); 1872 A : constant PE_Ptr := new PE'(PC_Alt, 1, EOP, X); 1873 begin 1874 B.Pthen := A; 1875 return (AFC with 2, B); 1876 end BreakX_Make; 1877 1878 --------------------- 1879 -- Build_Ref_Array -- 1880 --------------------- 1881 1882 procedure Build_Ref_Array (E : PE_Ptr; RA : out Ref_Array) is 1883 1884 procedure Record_PE (E : PE_Ptr); 1885 -- Record given pattern element if not already recorded in RA, 1886 -- and also record any referenced pattern elements recursively. 1887 1888 --------------- 1889 -- Record_PE -- 1890 --------------- 1891 1892 procedure Record_PE (E : PE_Ptr) is 1893 begin 1894 PutD (" Record_PE called with PE_Ptr = " & Image (E)); 1895 1896 if E = EOP or else RA (E.Index) /= null then 1897 Put_LineD (", nothing to do"); 1898 return; 1899 1900 else 1901 Put_LineD (", recording" & IndexT'Image (E.Index)); 1902 RA (E.Index) := E; 1903 Record_PE (E.Pthen); 1904 1905 if E.Pcode in PC_Has_Alt then 1906 Record_PE (E.Alt); 1907 end if; 1908 end if; 1909 end Record_PE; 1910 1911 -- Start of processing for Build_Ref_Array 1912 1913 begin 1914 New_LineD; 1915 Put_LineD ("Entering Build_Ref_Array"); 1916 Record_PE (E); 1917 New_LineD; 1918 end Build_Ref_Array; 1919 1920 ------------- 1921 -- C_To_PE -- 1922 ------------- 1923 1924 function C_To_PE (C : PChar) return PE_Ptr is 1925 begin 1926 return new PE'(PC_Char, 1, EOP, C); 1927 end C_To_PE; 1928 1929 ------------ 1930 -- Cancel -- 1931 ------------ 1932 1933 function Cancel return Pattern is 1934 begin 1935 return (AFC with 0, new PE'(PC_Cancel, 1, EOP)); 1936 end Cancel; 1937 1938 ------------ 1939 -- Concat -- 1940 ------------ 1941 1942 -- Concat needs to traverse the left operand performing the following 1943 -- set of fixups: 1944 1945 -- a) Any successor pointers (Pthen fields) that are set to EOP are 1946 -- reset to point to the second operand. 1947 1948 -- b) Any PC_Arbno_Y node has its stack count field incremented 1949 -- by the parameter Incr provided for this purpose. 1950 1951 -- d) Num fields of all pattern elements in the left operand are 1952 -- adjusted to include the elements of the right operand. 1953 1954 -- Note: we do not use Set_Successor in the processing for Concat, since 1955 -- there is no point in doing two traversals, we may as well do everything 1956 -- at the same time. 1957 1958 function Concat (L, R : PE_Ptr; Incr : Natural) return PE_Ptr is 1959 begin 1960 if L = EOP then 1961 return R; 1962 1963 elsif R = EOP then 1964 return L; 1965 1966 else 1967 declare 1968 Refs : Ref_Array (1 .. L.Index); 1969 -- We build a reference array for L whose N'th element points to 1970 -- the pattern element of L whose original Index value is N. 1971 1972 P : PE_Ptr; 1973 1974 begin 1975 Build_Ref_Array (L, Refs); 1976 1977 for J in Refs'Range loop 1978 P := Refs (J); 1979 1980 P.Index := P.Index + R.Index; 1981 1982 if P.Pcode = PC_Arbno_Y then 1983 P.Nat := P.Nat + Incr; 1984 end if; 1985 1986 if P.Pthen = EOP then 1987 P.Pthen := R; 1988 end if; 1989 1990 if P.Pcode in PC_Has_Alt and then P.Alt = EOP then 1991 P.Alt := R; 1992 end if; 1993 end loop; 1994 end; 1995 1996 return L; 1997 end if; 1998 end Concat; 1999 2000 ---------- 2001 -- Copy -- 2002 ---------- 2003 2004 function Copy (P : PE_Ptr) return PE_Ptr is 2005 begin 2006 if P = null then 2007 Uninitialized_Pattern; 2008 2009 else 2010 declare 2011 Refs : Ref_Array (1 .. P.Index); 2012 -- References to elements in P, indexed by Index field 2013 2014 Copy : Ref_Array (1 .. P.Index); 2015 -- Holds copies of elements of P, indexed by Index field 2016 2017 E : PE_Ptr; 2018 2019 begin 2020 Build_Ref_Array (P, Refs); 2021 2022 -- Now copy all nodes 2023 2024 for J in Refs'Range loop 2025 Copy (J) := new PE'(Refs (J).all); 2026 end loop; 2027 2028 -- Adjust all internal references 2029 2030 for J in Copy'Range loop 2031 E := Copy (J); 2032 2033 -- Adjust successor pointer to point to copy 2034 2035 if E.Pthen /= EOP then 2036 E.Pthen := Copy (E.Pthen.Index); 2037 end if; 2038 2039 -- Adjust Alt pointer if there is one to point to copy 2040 2041 if E.Pcode in PC_Has_Alt and then E.Alt /= EOP then 2042 E.Alt := Copy (E.Alt.Index); 2043 end if; 2044 2045 -- Copy referenced string 2046 2047 if E.Pcode = PC_String then 2048 E.Str := new String'(E.Str.all); 2049 end if; 2050 end loop; 2051 2052 return Copy (P.Index); 2053 end; 2054 end if; 2055 end Copy; 2056 2057 ---------- 2058 -- Dump -- 2059 ---------- 2060 2061 procedure Dump (P : Pattern) is 2062 2063 subtype Count is Ada.Text_IO.Count; 2064 Scol : Count; 2065 -- Used to keep track of column in dump output 2066 2067 Refs : Ref_Array (1 .. P.P.Index); 2068 -- We build a reference array whose N'th element points to the 2069 -- pattern element whose Index value is N. 2070 2071 Cols : Natural := 2; 2072 -- Number of columns used for pattern numbers, minimum is 2 2073 2074 E : PE_Ptr; 2075 2076 procedure Write_Node_Id (E : PE_Ptr); 2077 -- Writes out a string identifying the given pattern element 2078 2079 ------------------- 2080 -- Write_Node_Id -- 2081 ------------------- 2082 2083 procedure Write_Node_Id (E : PE_Ptr) is 2084 begin 2085 if E = EOP then 2086 Put ("EOP"); 2087 2088 for J in 4 .. Cols loop 2089 Put (' '); 2090 end loop; 2091 2092 else 2093 declare 2094 Str : String (1 .. Cols); 2095 N : Natural := Natural (E.Index); 2096 2097 begin 2098 Put ("#"); 2099 2100 for J in reverse Str'Range loop 2101 Str (J) := Character'Val (48 + N mod 10); 2102 N := N / 10; 2103 end loop; 2104 2105 Put (Str); 2106 end; 2107 end if; 2108 end Write_Node_Id; 2109 2110 -- Start of processing for Dump 2111 2112 begin 2113 New_Line; 2114 Put ("Pattern Dump Output (pattern at " & 2115 Image (P'Address) & 2116 ", S = " & Natural'Image (P.Stk) & ')'); 2117 2118 Scol := Col; 2119 New_Line; 2120 2121 while Col < Scol loop 2122 Put ('-'); 2123 end loop; 2124 2125 New_Line; 2126 2127 -- If uninitialized pattern, dump line and we are done 2128 2129 if P.P = null then 2130 Put_Line ("Uninitialized pattern value"); 2131 return; 2132 end if; 2133 2134 -- If null pattern, just dump it and we are all done 2135 2136 if P.P = EOP then 2137 Put_Line ("EOP (null pattern)"); 2138 return; 2139 end if; 2140 2141 Build_Ref_Array (P.P, Refs); 2142 2143 -- Set number of columns required for node numbers 2144 2145 while 10 ** Cols - 1 < Integer (P.P.Index) loop 2146 Cols := Cols + 1; 2147 end loop; 2148 2149 -- Now dump the nodes in reverse sequence. We output them in reverse 2150 -- sequence since this corresponds to the natural order used to 2151 -- construct the patterns. 2152 2153 for J in reverse Refs'Range loop 2154 E := Refs (J); 2155 Write_Node_Id (E); 2156 Set_Col (Count (Cols) + 4); 2157 Put (Image (E)); 2158 Put (" "); 2159 Put (Pattern_Code'Image (E.Pcode)); 2160 Put (" "); 2161 Set_Col (21 + Count (Cols) + Address_Image_Length); 2162 Write_Node_Id (E.Pthen); 2163 Set_Col (24 + 2 * Count (Cols) + Address_Image_Length); 2164 2165 case E.Pcode is 2166 2167 when PC_Alt | 2168 PC_Arb_X | 2169 PC_Arbno_S | 2170 PC_Arbno_X => 2171 Write_Node_Id (E.Alt); 2172 2173 when PC_Rpat => 2174 Put (Str_PP (E.PP)); 2175 2176 when PC_Pred_Func => 2177 Put (Str_BF (E.BF)); 2178 2179 when PC_Assign_Imm | 2180 PC_Assign_OnM | 2181 PC_Any_VP | 2182 PC_Break_VP | 2183 PC_BreakX_VP | 2184 PC_NotAny_VP | 2185 PC_NSpan_VP | 2186 PC_Span_VP | 2187 PC_String_VP => 2188 Put (Str_VP (E.VP)); 2189 2190 when PC_Write_Imm | 2191 PC_Write_OnM => 2192 Put (Str_FP (E.FP)); 2193 2194 when PC_String => 2195 Put (Image (E.Str.all)); 2196 2197 when PC_String_2 => 2198 Put (Image (E.Str2)); 2199 2200 when PC_String_3 => 2201 Put (Image (E.Str3)); 2202 2203 when PC_String_4 => 2204 Put (Image (E.Str4)); 2205 2206 when PC_String_5 => 2207 Put (Image (E.Str5)); 2208 2209 when PC_String_6 => 2210 Put (Image (E.Str6)); 2211 2212 when PC_Setcur => 2213 Put (Str_NP (E.Var)); 2214 2215 when PC_Any_CH | 2216 PC_Break_CH | 2217 PC_BreakX_CH | 2218 PC_Char | 2219 PC_NotAny_CH | 2220 PC_NSpan_CH | 2221 PC_Span_CH => 2222 Put (''' & E.Char & '''); 2223 2224 when PC_Any_CS | 2225 PC_Break_CS | 2226 PC_BreakX_CS | 2227 PC_NotAny_CS | 2228 PC_NSpan_CS | 2229 PC_Span_CS => 2230 Put ('"' & To_Sequence (E.CS) & '"'); 2231 2232 when PC_Arbno_Y | 2233 PC_Len_Nat | 2234 PC_Pos_Nat | 2235 PC_RPos_Nat | 2236 PC_RTab_Nat | 2237 PC_Tab_Nat => 2238 Put (S (E.Nat)); 2239 2240 when PC_Pos_NF | 2241 PC_Len_NF | 2242 PC_RPos_NF | 2243 PC_RTab_NF | 2244 PC_Tab_NF => 2245 Put (Str_NF (E.NF)); 2246 2247 when PC_Pos_NP | 2248 PC_Len_NP | 2249 PC_RPos_NP | 2250 PC_RTab_NP | 2251 PC_Tab_NP => 2252 Put (Str_NP (E.NP)); 2253 2254 when PC_Any_VF | 2255 PC_Break_VF | 2256 PC_BreakX_VF | 2257 PC_NotAny_VF | 2258 PC_NSpan_VF | 2259 PC_Span_VF | 2260 PC_String_VF => 2261 Put (Str_VF (E.VF)); 2262 2263 when others => null; 2264 2265 end case; 2266 2267 New_Line; 2268 end loop; 2269 2270 New_Line; 2271 end Dump; 2272 2273 ---------- 2274 -- Fail -- 2275 ---------- 2276 2277 function Fail return Pattern is 2278 begin 2279 return (AFC with 0, new PE'(PC_Fail, 1, EOP)); 2280 end Fail; 2281 2282 ----------- 2283 -- Fence -- 2284 ----------- 2285 2286 -- Simple case 2287 2288 function Fence return Pattern is 2289 begin 2290 return (AFC with 1, new PE'(PC_Fence, 1, EOP)); 2291 end Fence; 2292 2293 -- Function case 2294 2295 -- +---+ +---+ +---+ 2296 -- | E |---->| P |---->| X |----> 2297 -- +---+ +---+ +---+ 2298 2299 -- The node numbering of the constituent pattern P is not affected. 2300 -- Where N is the number of nodes in P, the X node is numbered N + 1, 2301 -- and the E node is N + 2. 2302 2303 function Fence (P : Pattern) return Pattern is 2304 Pat : constant PE_Ptr := Copy (P.P); 2305 E : constant PE_Ptr := new PE'(PC_R_Enter, 0, EOP); 2306 X : constant PE_Ptr := new PE'(PC_Fence_X, 0, EOP); 2307 begin 2308 return (AFC with P.Stk + 1, Bracket (E, Pat, X)); 2309 end Fence; 2310 2311 -------------- 2312 -- Finalize -- 2313 -------------- 2314 2315 procedure Finalize (Object : in out Pattern) is 2316 2317 procedure Free is new Ada.Unchecked_Deallocation (PE, PE_Ptr); 2318 procedure Free is new Ada.Unchecked_Deallocation (String, String_Ptr); 2319 2320 begin 2321 -- Nothing to do if already freed 2322 2323 if Object.P = null then 2324 return; 2325 2326 -- Otherwise we must free all elements 2327 2328 else 2329 declare 2330 Refs : Ref_Array (1 .. Object.P.Index); 2331 -- References to elements in pattern to be finalized 2332 2333 begin 2334 Build_Ref_Array (Object.P, Refs); 2335 2336 for J in Refs'Range loop 2337 if Refs (J).Pcode = PC_String then 2338 Free (Refs (J).Str); 2339 end if; 2340 2341 Free (Refs (J)); 2342 end loop; 2343 2344 Object.P := null; 2345 end; 2346 end if; 2347 end Finalize; 2348 2349 ----------- 2350 -- Image -- 2351 ----------- 2352 2353 function Image (P : PE_Ptr) return String is 2354 begin 2355 return Image (To_Address (P)); 2356 end Image; 2357 2358 function Image (P : Pattern) return String is 2359 begin 2360 return S (Image (P)); 2361 end Image; 2362 2363 function Image (P : Pattern) return VString is 2364 2365 Kill_Ampersand : Boolean := False; 2366 -- Set True to delete next & to be output to Result 2367 2368 Result : VString := Nul; 2369 -- The result is accumulated here, using Append 2370 2371 Refs : Ref_Array (1 .. P.P.Index); 2372 -- We build a reference array whose N'th element points to the 2373 -- pattern element whose Index value is N. 2374 2375 procedure Delete_Ampersand; 2376 -- Deletes the ampersand at the end of Result 2377 2378 procedure Image_Seq (E : PE_Ptr; Succ : PE_Ptr; Paren : Boolean); 2379 -- E refers to a pattern structure whose successor is given by Succ. 2380 -- This procedure appends to Result a representation of this pattern. 2381 -- The Paren parameter indicates whether parentheses are required if 2382 -- the output is more than one element. 2383 2384 procedure Image_One (E : in out PE_Ptr); 2385 -- E refers to a pattern structure. This procedure appends to Result 2386 -- a representation of the single simple or compound pattern structure 2387 -- at the start of E and updates E to point to its successor. 2388 2389 ---------------------- 2390 -- Delete_Ampersand -- 2391 ---------------------- 2392 2393 procedure Delete_Ampersand is 2394 L : constant Natural := Length (Result); 2395 begin 2396 if L > 2 then 2397 Delete (Result, L - 1, L); 2398 end if; 2399 end Delete_Ampersand; 2400 2401 --------------- 2402 -- Image_One -- 2403 --------------- 2404 2405 procedure Image_One (E : in out PE_Ptr) is 2406 2407 ER : PE_Ptr := E.Pthen; 2408 -- Successor set as result in E unless reset 2409 2410 begin 2411 case E.Pcode is 2412 2413 when PC_Cancel => 2414 Append (Result, "Cancel"); 2415 2416 when PC_Alt => Alt : declare 2417 2418 Elmts_In_L : constant IndexT := E.Pthen.Index - E.Alt.Index; 2419 -- Number of elements in left pattern of alternation 2420 2421 Lowest_In_L : constant IndexT := E.Index - Elmts_In_L; 2422 -- Number of lowest index in elements of left pattern 2423 2424 E1 : PE_Ptr; 2425 2426 begin 2427 -- The successor of the alternation node must have a lower 2428 -- index than any node that is in the left pattern or a 2429 -- higher index than the alternation node itself. 2430 2431 while ER /= EOP 2432 and then ER.Index >= Lowest_In_L 2433 and then ER.Index < E.Index 2434 loop 2435 ER := ER.Pthen; 2436 end loop; 2437 2438 Append (Result, '('); 2439 2440 E1 := E; 2441 loop 2442 Image_Seq (E1.Pthen, ER, False); 2443 Append (Result, " or "); 2444 E1 := E1.Alt; 2445 exit when E1.Pcode /= PC_Alt; 2446 end loop; 2447 2448 Image_Seq (E1, ER, False); 2449 Append (Result, ')'); 2450 end Alt; 2451 2452 when PC_Any_CS => 2453 Append (Result, "Any (" & Image (To_Sequence (E.CS)) & ')'); 2454 2455 when PC_Any_VF => 2456 Append (Result, "Any (" & Str_VF (E.VF) & ')'); 2457 2458 when PC_Any_VP => 2459 Append (Result, "Any (" & Str_VP (E.VP) & ')'); 2460 2461 when PC_Arb_X => 2462 Append (Result, "Arb"); 2463 2464 when PC_Arbno_S => 2465 Append (Result, "Arbno ("); 2466 Image_Seq (E.Alt, E, False); 2467 Append (Result, ')'); 2468 2469 when PC_Arbno_X => 2470 Append (Result, "Arbno ("); 2471 Image_Seq (E.Alt.Pthen, Refs (E.Index - 2), False); 2472 Append (Result, ')'); 2473 2474 when PC_Assign_Imm => 2475 Delete_Ampersand; 2476 Append (Result, "* " & Str_VP (Refs (E.Index).VP)); 2477 2478 when PC_Assign_OnM => 2479 Delete_Ampersand; 2480 Append (Result, "** " & Str_VP (Refs (E.Index).VP)); 2481 2482 when PC_Any_CH => 2483 Append (Result, "Any ('" & E.Char & "')"); 2484 2485 when PC_Bal => 2486 Append (Result, "Bal"); 2487 2488 when PC_Break_CH => 2489 Append (Result, "Break ('" & E.Char & "')"); 2490 2491 when PC_Break_CS => 2492 Append (Result, "Break (" & Image (To_Sequence (E.CS)) & ')'); 2493 2494 when PC_Break_VF => 2495 Append (Result, "Break (" & Str_VF (E.VF) & ')'); 2496 2497 when PC_Break_VP => 2498 Append (Result, "Break (" & Str_VP (E.VP) & ')'); 2499 2500 when PC_BreakX_CH => 2501 Append (Result, "BreakX ('" & E.Char & "')"); 2502 ER := ER.Pthen; 2503 2504 when PC_BreakX_CS => 2505 Append (Result, "BreakX (" & Image (To_Sequence (E.CS)) & ')'); 2506 ER := ER.Pthen; 2507 2508 when PC_BreakX_VF => 2509 Append (Result, "BreakX (" & Str_VF (E.VF) & ')'); 2510 ER := ER.Pthen; 2511 2512 when PC_BreakX_VP => 2513 Append (Result, "BreakX (" & Str_VP (E.VP) & ')'); 2514 ER := ER.Pthen; 2515 2516 when PC_Char => 2517 Append (Result, ''' & E.Char & '''); 2518 2519 when PC_Fail => 2520 Append (Result, "Fail"); 2521 2522 when PC_Fence => 2523 Append (Result, "Fence"); 2524 2525 when PC_Fence_X => 2526 Append (Result, "Fence ("); 2527 Image_Seq (E.Pthen, Refs (E.Index - 1), False); 2528 Append (Result, ")"); 2529 ER := Refs (E.Index - 1).Pthen; 2530 2531 when PC_Len_Nat => 2532 Append (Result, "Len (" & E.Nat & ')'); 2533 2534 when PC_Len_NF => 2535 Append (Result, "Len (" & Str_NF (E.NF) & ')'); 2536 2537 when PC_Len_NP => 2538 Append (Result, "Len (" & Str_NP (E.NP) & ')'); 2539 2540 when PC_NotAny_CH => 2541 Append (Result, "NotAny ('" & E.Char & "')"); 2542 2543 when PC_NotAny_CS => 2544 Append (Result, "NotAny (" & Image (To_Sequence (E.CS)) & ')'); 2545 2546 when PC_NotAny_VF => 2547 Append (Result, "NotAny (" & Str_VF (E.VF) & ')'); 2548 2549 when PC_NotAny_VP => 2550 Append (Result, "NotAny (" & Str_VP (E.VP) & ')'); 2551 2552 when PC_NSpan_CH => 2553 Append (Result, "NSpan ('" & E.Char & "')"); 2554 2555 when PC_NSpan_CS => 2556 Append (Result, "NSpan (" & Image (To_Sequence (E.CS)) & ')'); 2557 2558 when PC_NSpan_VF => 2559 Append (Result, "NSpan (" & Str_VF (E.VF) & ')'); 2560 2561 when PC_NSpan_VP => 2562 Append (Result, "NSpan (" & Str_VP (E.VP) & ')'); 2563 2564 when PC_Null => 2565 Append (Result, """"""); 2566 2567 when PC_Pos_Nat => 2568 Append (Result, "Pos (" & E.Nat & ')'); 2569 2570 when PC_Pos_NF => 2571 Append (Result, "Pos (" & Str_NF (E.NF) & ')'); 2572 2573 when PC_Pos_NP => 2574 Append (Result, "Pos (" & Str_NP (E.NP) & ')'); 2575 2576 when PC_R_Enter => 2577 Kill_Ampersand := True; 2578 2579 when PC_Rest => 2580 Append (Result, "Rest"); 2581 2582 when PC_Rpat => 2583 Append (Result, "(+ " & Str_PP (E.PP) & ')'); 2584 2585 when PC_Pred_Func => 2586 Append (Result, "(+ " & Str_BF (E.BF) & ')'); 2587 2588 when PC_RPos_Nat => 2589 Append (Result, "RPos (" & E.Nat & ')'); 2590 2591 when PC_RPos_NF => 2592 Append (Result, "RPos (" & Str_NF (E.NF) & ')'); 2593 2594 when PC_RPos_NP => 2595 Append (Result, "RPos (" & Str_NP (E.NP) & ')'); 2596 2597 when PC_RTab_Nat => 2598 Append (Result, "RTab (" & E.Nat & ')'); 2599 2600 when PC_RTab_NF => 2601 Append (Result, "RTab (" & Str_NF (E.NF) & ')'); 2602 2603 when PC_RTab_NP => 2604 Append (Result, "RTab (" & Str_NP (E.NP) & ')'); 2605 2606 when PC_Setcur => 2607 Append (Result, "Setcur (" & Str_NP (E.Var) & ')'); 2608 2609 when PC_Span_CH => 2610 Append (Result, "Span ('" & E.Char & "')"); 2611 2612 when PC_Span_CS => 2613 Append (Result, "Span (" & Image (To_Sequence (E.CS)) & ')'); 2614 2615 when PC_Span_VF => 2616 Append (Result, "Span (" & Str_VF (E.VF) & ')'); 2617 2618 when PC_Span_VP => 2619 Append (Result, "Span (" & Str_VP (E.VP) & ')'); 2620 2621 when PC_String => 2622 Append (Result, Image (E.Str.all)); 2623 2624 when PC_String_2 => 2625 Append (Result, Image (E.Str2)); 2626 2627 when PC_String_3 => 2628 Append (Result, Image (E.Str3)); 2629 2630 when PC_String_4 => 2631 Append (Result, Image (E.Str4)); 2632 2633 when PC_String_5 => 2634 Append (Result, Image (E.Str5)); 2635 2636 when PC_String_6 => 2637 Append (Result, Image (E.Str6)); 2638 2639 when PC_String_VF => 2640 Append (Result, "(+" & Str_VF (E.VF) & ')'); 2641 2642 when PC_String_VP => 2643 Append (Result, "(+" & Str_VP (E.VP) & ')'); 2644 2645 when PC_Succeed => 2646 Append (Result, "Succeed"); 2647 2648 when PC_Tab_Nat => 2649 Append (Result, "Tab (" & E.Nat & ')'); 2650 2651 when PC_Tab_NF => 2652 Append (Result, "Tab (" & Str_NF (E.NF) & ')'); 2653 2654 when PC_Tab_NP => 2655 Append (Result, "Tab (" & Str_NP (E.NP) & ')'); 2656 2657 when PC_Write_Imm => 2658 Append (Result, '('); 2659 Image_Seq (E, Refs (E.Index - 1), True); 2660 Append (Result, " * " & Str_FP (Refs (E.Index - 1).FP)); 2661 ER := Refs (E.Index - 1).Pthen; 2662 2663 when PC_Write_OnM => 2664 Append (Result, '('); 2665 Image_Seq (E.Pthen, Refs (E.Index - 1), True); 2666 Append (Result, " ** " & Str_FP (Refs (E.Index - 1).FP)); 2667 ER := Refs (E.Index - 1).Pthen; 2668 2669 -- Other pattern codes should not appear as leading elements 2670 2671 when PC_Arb_Y | 2672 PC_Arbno_Y | 2673 PC_Assign | 2674 PC_BreakX_X | 2675 PC_EOP | 2676 PC_Fence_Y | 2677 PC_R_Remove | 2678 PC_R_Restore | 2679 PC_Unanchored => 2680 Append (Result, "???"); 2681 2682 end case; 2683 2684 E := ER; 2685 end Image_One; 2686 2687 --------------- 2688 -- Image_Seq -- 2689 --------------- 2690 2691 procedure Image_Seq (E : PE_Ptr; Succ : PE_Ptr; Paren : Boolean) is 2692 Indx : constant Natural := Length (Result); 2693 E1 : PE_Ptr := E; 2694 Mult : Boolean := False; 2695 2696 begin 2697 -- The image of EOP is "" (the null string) 2698 2699 if E = EOP then 2700 Append (Result, """"""); 2701 2702 -- Else generate appropriate concatenation sequence 2703 2704 else 2705 loop 2706 Image_One (E1); 2707 exit when E1 = Succ; 2708 exit when E1 = EOP; 2709 Mult := True; 2710 2711 if Kill_Ampersand then 2712 Kill_Ampersand := False; 2713 else 2714 Append (Result, " & "); 2715 end if; 2716 end loop; 2717 end if; 2718 2719 if Mult and Paren then 2720 Insert (Result, Indx + 1, "("); 2721 Append (Result, ")"); 2722 end if; 2723 end Image_Seq; 2724 2725 -- Start of processing for Image 2726 2727 begin 2728 Build_Ref_Array (P.P, Refs); 2729 Image_Seq (P.P, EOP, False); 2730 return Result; 2731 end Image; 2732 2733 ----------- 2734 -- Is_In -- 2735 ----------- 2736 2737 function Is_In (C : Character; Str : String) return Boolean is 2738 begin 2739 for J in Str'Range loop 2740 if Str (J) = C then 2741 return True; 2742 end if; 2743 end loop; 2744 2745 return False; 2746 end Is_In; 2747 2748 --------- 2749 -- Len -- 2750 --------- 2751 2752 function Len (Count : Natural) return Pattern is 2753 begin 2754 -- Note, the following is not just an optimization, it is needed 2755 -- to ensure that Arbno (Len (0)) does not generate an infinite 2756 -- matching loop (since PC_Len_Nat is OK_For_Simple_Arbno). 2757 2758 if Count = 0 then 2759 return (AFC with 0, new PE'(PC_Null, 1, EOP)); 2760 2761 else 2762 return (AFC with 0, new PE'(PC_Len_Nat, 1, EOP, Count)); 2763 end if; 2764 end Len; 2765 2766 function Len (Count : Natural_Func) return Pattern is 2767 begin 2768 return (AFC with 0, new PE'(PC_Len_NF, 1, EOP, Count)); 2769 end Len; 2770 2771 function Len (Count : not null access Natural) return Pattern is 2772 begin 2773 return (AFC with 0, new PE'(PC_Len_NP, 1, EOP, Natural_Ptr (Count))); 2774 end Len; 2775 2776 ----------------- 2777 -- Logic_Error -- 2778 ----------------- 2779 2780 procedure Logic_Error is 2781 begin 2782 raise Program_Error with 2783 "Internal logic error in GNAT.Spitbol.Patterns"; 2784 end Logic_Error; 2785 2786 ----------- 2787 -- Match -- 2788 ----------- 2789 2790 function Match 2791 (Subject : VString; 2792 Pat : Pattern) return Boolean 2793 is 2794 S : Big_String_Access; 2795 L : Natural; 2796 Start : Natural; 2797 Stop : Natural; 2798 pragma Unreferenced (Stop); 2799 2800 begin 2801 Get_String (Subject, S, L); 2802 2803 if Debug_Mode then 2804 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 2805 else 2806 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 2807 end if; 2808 2809 return Start /= 0; 2810 end Match; 2811 2812 function Match 2813 (Subject : String; 2814 Pat : Pattern) return Boolean 2815 is 2816 Start, Stop : Natural; 2817 pragma Unreferenced (Stop); 2818 2819 subtype String1 is String (1 .. Subject'Length); 2820 2821 begin 2822 if Debug_Mode then 2823 XMatchD (String1 (Subject), Pat.P, Pat.Stk, Start, Stop); 2824 else 2825 XMatch (String1 (Subject), Pat.P, Pat.Stk, Start, Stop); 2826 end if; 2827 2828 return Start /= 0; 2829 end Match; 2830 2831 function Match 2832 (Subject : VString_Var; 2833 Pat : Pattern; 2834 Replace : VString) return Boolean 2835 is 2836 Start : Natural; 2837 Stop : Natural; 2838 S : Big_String_Access; 2839 L : Natural; 2840 2841 begin 2842 Get_String (Subject, S, L); 2843 2844 if Debug_Mode then 2845 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 2846 else 2847 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 2848 end if; 2849 2850 if Start = 0 then 2851 return False; 2852 else 2853 Get_String (Replace, S, L); 2854 Replace_Slice 2855 (Subject'Unrestricted_Access.all, Start, Stop, S (1 .. L)); 2856 return True; 2857 end if; 2858 end Match; 2859 2860 function Match 2861 (Subject : VString_Var; 2862 Pat : Pattern; 2863 Replace : String) return Boolean 2864 is 2865 Start : Natural; 2866 Stop : Natural; 2867 S : Big_String_Access; 2868 L : Natural; 2869 2870 begin 2871 Get_String (Subject, S, L); 2872 2873 if Debug_Mode then 2874 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 2875 else 2876 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 2877 end if; 2878 2879 if Start = 0 then 2880 return False; 2881 else 2882 Replace_Slice 2883 (Subject'Unrestricted_Access.all, Start, Stop, Replace); 2884 return True; 2885 end if; 2886 end Match; 2887 2888 procedure Match 2889 (Subject : VString; 2890 Pat : Pattern) 2891 is 2892 S : Big_String_Access; 2893 L : Natural; 2894 2895 Start : Natural; 2896 Stop : Natural; 2897 pragma Unreferenced (Start, Stop); 2898 2899 begin 2900 Get_String (Subject, S, L); 2901 2902 if Debug_Mode then 2903 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 2904 else 2905 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 2906 end if; 2907 end Match; 2908 2909 procedure Match 2910 (Subject : String; 2911 Pat : Pattern) 2912 is 2913 Start, Stop : Natural; 2914 pragma Unreferenced (Start, Stop); 2915 2916 subtype String1 is String (1 .. Subject'Length); 2917 2918 begin 2919 if Debug_Mode then 2920 XMatchD (String1 (Subject), Pat.P, Pat.Stk, Start, Stop); 2921 else 2922 XMatch (String1 (Subject), Pat.P, Pat.Stk, Start, Stop); 2923 end if; 2924 end Match; 2925 2926 procedure Match 2927 (Subject : in out VString; 2928 Pat : Pattern; 2929 Replace : VString) 2930 is 2931 Start : Natural; 2932 Stop : Natural; 2933 S : Big_String_Access; 2934 L : Natural; 2935 2936 begin 2937 Get_String (Subject, S, L); 2938 2939 if Debug_Mode then 2940 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 2941 else 2942 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 2943 end if; 2944 2945 if Start /= 0 then 2946 Get_String (Replace, S, L); 2947 Replace_Slice (Subject, Start, Stop, S (1 .. L)); 2948 end if; 2949 end Match; 2950 2951 procedure Match 2952 (Subject : in out VString; 2953 Pat : Pattern; 2954 Replace : String) 2955 is 2956 Start : Natural; 2957 Stop : Natural; 2958 S : Big_String_Access; 2959 L : Natural; 2960 2961 begin 2962 Get_String (Subject, S, L); 2963 2964 if Debug_Mode then 2965 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 2966 else 2967 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 2968 end if; 2969 2970 if Start /= 0 then 2971 Replace_Slice (Subject, Start, Stop, Replace); 2972 end if; 2973 end Match; 2974 2975 function Match 2976 (Subject : VString; 2977 Pat : PString) return Boolean 2978 is 2979 Pat_Len : constant Natural := Pat'Length; 2980 S : Big_String_Access; 2981 L : Natural; 2982 2983 begin 2984 Get_String (Subject, S, L); 2985 2986 if Anchored_Mode then 2987 if Pat_Len > L then 2988 return False; 2989 else 2990 return Pat = S (1 .. Pat_Len); 2991 end if; 2992 2993 else 2994 for J in 1 .. L - Pat_Len + 1 loop 2995 if Pat = S (J .. J + (Pat_Len - 1)) then 2996 return True; 2997 end if; 2998 end loop; 2999 3000 return False; 3001 end if; 3002 end Match; 3003 3004 function Match 3005 (Subject : String; 3006 Pat : PString) return Boolean 3007 is 3008 Pat_Len : constant Natural := Pat'Length; 3009 Sub_Len : constant Natural := Subject'Length; 3010 SFirst : constant Natural := Subject'First; 3011 3012 begin 3013 if Anchored_Mode then 3014 if Pat_Len > Sub_Len then 3015 return False; 3016 else 3017 return Pat = Subject (SFirst .. SFirst + Pat_Len - 1); 3018 end if; 3019 3020 else 3021 for J in SFirst .. SFirst + Sub_Len - Pat_Len loop 3022 if Pat = Subject (J .. J + (Pat_Len - 1)) then 3023 return True; 3024 end if; 3025 end loop; 3026 3027 return False; 3028 end if; 3029 end Match; 3030 3031 function Match 3032 (Subject : VString_Var; 3033 Pat : PString; 3034 Replace : VString) return Boolean 3035 is 3036 Start : Natural; 3037 Stop : Natural; 3038 S : Big_String_Access; 3039 L : Natural; 3040 3041 begin 3042 Get_String (Subject, S, L); 3043 3044 if Debug_Mode then 3045 XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); 3046 else 3047 XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); 3048 end if; 3049 3050 if Start = 0 then 3051 return False; 3052 else 3053 Get_String (Replace, S, L); 3054 Replace_Slice 3055 (Subject'Unrestricted_Access.all, Start, Stop, S (1 .. L)); 3056 return True; 3057 end if; 3058 end Match; 3059 3060 function Match 3061 (Subject : VString_Var; 3062 Pat : PString; 3063 Replace : String) return Boolean 3064 is 3065 Start : Natural; 3066 Stop : Natural; 3067 S : Big_String_Access; 3068 L : Natural; 3069 3070 begin 3071 Get_String (Subject, S, L); 3072 3073 if Debug_Mode then 3074 XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); 3075 else 3076 XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); 3077 end if; 3078 3079 if Start = 0 then 3080 return False; 3081 else 3082 Replace_Slice 3083 (Subject'Unrestricted_Access.all, Start, Stop, Replace); 3084 return True; 3085 end if; 3086 end Match; 3087 3088 procedure Match 3089 (Subject : VString; 3090 Pat : PString) 3091 is 3092 S : Big_String_Access; 3093 L : Natural; 3094 3095 Start : Natural; 3096 Stop : Natural; 3097 pragma Unreferenced (Start, Stop); 3098 3099 begin 3100 Get_String (Subject, S, L); 3101 3102 if Debug_Mode then 3103 XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); 3104 else 3105 XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); 3106 end if; 3107 end Match; 3108 3109 procedure Match 3110 (Subject : String; 3111 Pat : PString) 3112 is 3113 Start, Stop : Natural; 3114 pragma Unreferenced (Start, Stop); 3115 3116 subtype String1 is String (1 .. Subject'Length); 3117 3118 begin 3119 if Debug_Mode then 3120 XMatchD (String1 (Subject), S_To_PE (Pat), 0, Start, Stop); 3121 else 3122 XMatch (String1 (Subject), S_To_PE (Pat), 0, Start, Stop); 3123 end if; 3124 end Match; 3125 3126 procedure Match 3127 (Subject : in out VString; 3128 Pat : PString; 3129 Replace : VString) 3130 is 3131 Start : Natural; 3132 Stop : Natural; 3133 S : Big_String_Access; 3134 L : Natural; 3135 3136 begin 3137 Get_String (Subject, S, L); 3138 3139 if Debug_Mode then 3140 XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); 3141 else 3142 XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); 3143 end if; 3144 3145 if Start /= 0 then 3146 Get_String (Replace, S, L); 3147 Replace_Slice (Subject, Start, Stop, S (1 .. L)); 3148 end if; 3149 end Match; 3150 3151 procedure Match 3152 (Subject : in out VString; 3153 Pat : PString; 3154 Replace : String) 3155 is 3156 Start : Natural; 3157 Stop : Natural; 3158 S : Big_String_Access; 3159 L : Natural; 3160 3161 begin 3162 Get_String (Subject, S, L); 3163 3164 if Debug_Mode then 3165 XMatchD (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); 3166 else 3167 XMatch (S (1 .. L), S_To_PE (Pat), 0, Start, Stop); 3168 end if; 3169 3170 if Start /= 0 then 3171 Replace_Slice (Subject, Start, Stop, Replace); 3172 end if; 3173 end Match; 3174 3175 function Match 3176 (Subject : VString_Var; 3177 Pat : Pattern; 3178 Result : Match_Result_Var) return Boolean 3179 is 3180 Start : Natural; 3181 Stop : Natural; 3182 S : Big_String_Access; 3183 L : Natural; 3184 3185 begin 3186 Get_String (Subject, S, L); 3187 3188 if Debug_Mode then 3189 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 3190 else 3191 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 3192 end if; 3193 3194 if Start = 0 then 3195 Result'Unrestricted_Access.all.Var := null; 3196 return False; 3197 3198 else 3199 Result'Unrestricted_Access.all.Var := Subject'Unrestricted_Access; 3200 Result'Unrestricted_Access.all.Start := Start; 3201 Result'Unrestricted_Access.all.Stop := Stop; 3202 return True; 3203 end if; 3204 end Match; 3205 3206 procedure Match 3207 (Subject : in out VString; 3208 Pat : Pattern; 3209 Result : out Match_Result) 3210 is 3211 Start : Natural; 3212 Stop : Natural; 3213 S : Big_String_Access; 3214 L : Natural; 3215 3216 begin 3217 Get_String (Subject, S, L); 3218 3219 if Debug_Mode then 3220 XMatchD (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 3221 else 3222 XMatch (S (1 .. L), Pat.P, Pat.Stk, Start, Stop); 3223 end if; 3224 3225 if Start = 0 then 3226 Result.Var := null; 3227 else 3228 Result.Var := Subject'Unrestricted_Access; 3229 Result.Start := Start; 3230 Result.Stop := Stop; 3231 end if; 3232 end Match; 3233 3234 --------------- 3235 -- New_LineD -- 3236 --------------- 3237 3238 procedure New_LineD is 3239 begin 3240 if Internal_Debug then 3241 New_Line; 3242 end if; 3243 end New_LineD; 3244 3245 ------------ 3246 -- NotAny -- 3247 ------------ 3248 3249 function NotAny (Str : String) return Pattern is 3250 begin 3251 return (AFC with 0, new PE'(PC_NotAny_CS, 1, EOP, To_Set (Str))); 3252 end NotAny; 3253 3254 function NotAny (Str : VString) return Pattern is 3255 begin 3256 return NotAny (S (Str)); 3257 end NotAny; 3258 3259 function NotAny (Str : Character) return Pattern is 3260 begin 3261 return (AFC with 0, new PE'(PC_NotAny_CH, 1, EOP, Str)); 3262 end NotAny; 3263 3264 function NotAny (Str : Character_Set) return Pattern is 3265 begin 3266 return (AFC with 0, new PE'(PC_NotAny_CS, 1, EOP, Str)); 3267 end NotAny; 3268 3269 function NotAny (Str : not null access VString) return Pattern is 3270 begin 3271 return (AFC with 0, new PE'(PC_NotAny_VP, 1, EOP, VString_Ptr (Str))); 3272 end NotAny; 3273 3274 function NotAny (Str : VString_Func) return Pattern is 3275 begin 3276 return (AFC with 0, new PE'(PC_NotAny_VF, 1, EOP, Str)); 3277 end NotAny; 3278 3279 ----------- 3280 -- NSpan -- 3281 ----------- 3282 3283 function NSpan (Str : String) return Pattern is 3284 begin 3285 return (AFC with 0, new PE'(PC_NSpan_CS, 1, EOP, To_Set (Str))); 3286 end NSpan; 3287 3288 function NSpan (Str : VString) return Pattern is 3289 begin 3290 return NSpan (S (Str)); 3291 end NSpan; 3292 3293 function NSpan (Str : Character) return Pattern is 3294 begin 3295 return (AFC with 0, new PE'(PC_NSpan_CH, 1, EOP, Str)); 3296 end NSpan; 3297 3298 function NSpan (Str : Character_Set) return Pattern is 3299 begin 3300 return (AFC with 0, new PE'(PC_NSpan_CS, 1, EOP, Str)); 3301 end NSpan; 3302 3303 function NSpan (Str : not null access VString) return Pattern is 3304 begin 3305 return (AFC with 0, new PE'(PC_NSpan_VP, 1, EOP, VString_Ptr (Str))); 3306 end NSpan; 3307 3308 function NSpan (Str : VString_Func) return Pattern is 3309 begin 3310 return (AFC with 0, new PE'(PC_NSpan_VF, 1, EOP, Str)); 3311 end NSpan; 3312 3313 --------- 3314 -- Pos -- 3315 --------- 3316 3317 function Pos (Count : Natural) return Pattern is 3318 begin 3319 return (AFC with 0, new PE'(PC_Pos_Nat, 1, EOP, Count)); 3320 end Pos; 3321 3322 function Pos (Count : Natural_Func) return Pattern is 3323 begin 3324 return (AFC with 0, new PE'(PC_Pos_NF, 1, EOP, Count)); 3325 end Pos; 3326 3327 function Pos (Count : not null access Natural) return Pattern is 3328 begin 3329 return (AFC with 0, new PE'(PC_Pos_NP, 1, EOP, Natural_Ptr (Count))); 3330 end Pos; 3331 3332 ---------- 3333 -- PutD -- 3334 ---------- 3335 3336 procedure PutD (Str : String) is 3337 begin 3338 if Internal_Debug then 3339 Put (Str); 3340 end if; 3341 end PutD; 3342 3343 --------------- 3344 -- Put_LineD -- 3345 --------------- 3346 3347 procedure Put_LineD (Str : String) is 3348 begin 3349 if Internal_Debug then 3350 Put_Line (Str); 3351 end if; 3352 end Put_LineD; 3353 3354 ------------- 3355 -- Replace -- 3356 ------------- 3357 3358 procedure Replace 3359 (Result : in out Match_Result; 3360 Replace : VString) 3361 is 3362 S : Big_String_Access; 3363 L : Natural; 3364 3365 begin 3366 Get_String (Replace, S, L); 3367 3368 if Result.Var /= null then 3369 Replace_Slice (Result.Var.all, Result.Start, Result.Stop, S (1 .. L)); 3370 Result.Var := null; 3371 end if; 3372 end Replace; 3373 3374 ---------- 3375 -- Rest -- 3376 ---------- 3377 3378 function Rest return Pattern is 3379 begin 3380 return (AFC with 0, new PE'(PC_Rest, 1, EOP)); 3381 end Rest; 3382 3383 ---------- 3384 -- Rpos -- 3385 ---------- 3386 3387 function Rpos (Count : Natural) return Pattern is 3388 begin 3389 return (AFC with 0, new PE'(PC_RPos_Nat, 1, EOP, Count)); 3390 end Rpos; 3391 3392 function Rpos (Count : Natural_Func) return Pattern is 3393 begin 3394 return (AFC with 0, new PE'(PC_RPos_NF, 1, EOP, Count)); 3395 end Rpos; 3396 3397 function Rpos (Count : not null access Natural) return Pattern is 3398 begin 3399 return (AFC with 0, new PE'(PC_RPos_NP, 1, EOP, Natural_Ptr (Count))); 3400 end Rpos; 3401 3402 ---------- 3403 -- Rtab -- 3404 ---------- 3405 3406 function Rtab (Count : Natural) return Pattern is 3407 begin 3408 return (AFC with 0, new PE'(PC_RTab_Nat, 1, EOP, Count)); 3409 end Rtab; 3410 3411 function Rtab (Count : Natural_Func) return Pattern is 3412 begin 3413 return (AFC with 0, new PE'(PC_RTab_NF, 1, EOP, Count)); 3414 end Rtab; 3415 3416 function Rtab (Count : not null access Natural) return Pattern is 3417 begin 3418 return (AFC with 0, new PE'(PC_RTab_NP, 1, EOP, Natural_Ptr (Count))); 3419 end Rtab; 3420 3421 ------------- 3422 -- S_To_PE -- 3423 ------------- 3424 3425 function S_To_PE (Str : PString) return PE_Ptr is 3426 Len : constant Natural := Str'Length; 3427 3428 begin 3429 case Len is 3430 when 0 => 3431 return new PE'(PC_Null, 1, EOP); 3432 3433 when 1 => 3434 return new PE'(PC_Char, 1, EOP, Str (Str'First)); 3435 3436 when 2 => 3437 return new PE'(PC_String_2, 1, EOP, Str); 3438 3439 when 3 => 3440 return new PE'(PC_String_3, 1, EOP, Str); 3441 3442 when 4 => 3443 return new PE'(PC_String_4, 1, EOP, Str); 3444 3445 when 5 => 3446 return new PE'(PC_String_5, 1, EOP, Str); 3447 3448 when 6 => 3449 return new PE'(PC_String_6, 1, EOP, Str); 3450 3451 when others => 3452 return new PE'(PC_String, 1, EOP, new String'(Str)); 3453 3454 end case; 3455 end S_To_PE; 3456 3457 ------------------- 3458 -- Set_Successor -- 3459 ------------------- 3460 3461 -- Note: this procedure is not used by the normal concatenation circuit, 3462 -- since other fixups are required on the left operand in this case, and 3463 -- they might as well be done all together. 3464 3465 procedure Set_Successor (Pat : PE_Ptr; Succ : PE_Ptr) is 3466 begin 3467 if Pat = null then 3468 Uninitialized_Pattern; 3469 3470 elsif Pat = EOP then 3471 Logic_Error; 3472 3473 else 3474 declare 3475 Refs : Ref_Array (1 .. Pat.Index); 3476 -- We build a reference array for L whose N'th element points to 3477 -- the pattern element of L whose original Index value is N. 3478 3479 P : PE_Ptr; 3480 3481 begin 3482 Build_Ref_Array (Pat, Refs); 3483 3484 for J in Refs'Range loop 3485 P := Refs (J); 3486 3487 if P.Pthen = EOP then 3488 P.Pthen := Succ; 3489 end if; 3490 3491 if P.Pcode in PC_Has_Alt and then P.Alt = EOP then 3492 P.Alt := Succ; 3493 end if; 3494 end loop; 3495 end; 3496 end if; 3497 end Set_Successor; 3498 3499 ------------ 3500 -- Setcur -- 3501 ------------ 3502 3503 function Setcur (Var : not null access Natural) return Pattern is 3504 begin 3505 return (AFC with 0, new PE'(PC_Setcur, 1, EOP, Natural_Ptr (Var))); 3506 end Setcur; 3507 3508 ---------- 3509 -- Span -- 3510 ---------- 3511 3512 function Span (Str : String) return Pattern is 3513 begin 3514 return (AFC with 0, new PE'(PC_Span_CS, 1, EOP, To_Set (Str))); 3515 end Span; 3516 3517 function Span (Str : VString) return Pattern is 3518 begin 3519 return Span (S (Str)); 3520 end Span; 3521 3522 function Span (Str : Character) return Pattern is 3523 begin 3524 return (AFC with 0, new PE'(PC_Span_CH, 1, EOP, Str)); 3525 end Span; 3526 3527 function Span (Str : Character_Set) return Pattern is 3528 begin 3529 return (AFC with 0, new PE'(PC_Span_CS, 1, EOP, Str)); 3530 end Span; 3531 3532 function Span (Str : not null access VString) return Pattern is 3533 begin 3534 return (AFC with 0, new PE'(PC_Span_VP, 1, EOP, VString_Ptr (Str))); 3535 end Span; 3536 3537 function Span (Str : VString_Func) return Pattern is 3538 begin 3539 return (AFC with 0, new PE'(PC_Span_VF, 1, EOP, Str)); 3540 end Span; 3541 3542 ------------ 3543 -- Str_BF -- 3544 ------------ 3545 3546 function Str_BF (A : Boolean_Func) return String is 3547 function To_A is new Ada.Unchecked_Conversion (Boolean_Func, Address); 3548 begin 3549 return "BF(" & Image (To_A (A)) & ')'; 3550 end Str_BF; 3551 3552 ------------ 3553 -- Str_FP -- 3554 ------------ 3555 3556 function Str_FP (A : File_Ptr) return String is 3557 begin 3558 return "FP(" & Image (A.all'Address) & ')'; 3559 end Str_FP; 3560 3561 ------------ 3562 -- Str_NF -- 3563 ------------ 3564 3565 function Str_NF (A : Natural_Func) return String is 3566 function To_A is new Ada.Unchecked_Conversion (Natural_Func, Address); 3567 begin 3568 return "NF(" & Image (To_A (A)) & ')'; 3569 end Str_NF; 3570 3571 ------------ 3572 -- Str_NP -- 3573 ------------ 3574 3575 function Str_NP (A : Natural_Ptr) return String is 3576 begin 3577 return "NP(" & Image (A.all'Address) & ')'; 3578 end Str_NP; 3579 3580 ------------ 3581 -- Str_PP -- 3582 ------------ 3583 3584 function Str_PP (A : Pattern_Ptr) return String is 3585 begin 3586 return "PP(" & Image (A.all'Address) & ')'; 3587 end Str_PP; 3588 3589 ------------ 3590 -- Str_VF -- 3591 ------------ 3592 3593 function Str_VF (A : VString_Func) return String is 3594 function To_A is new Ada.Unchecked_Conversion (VString_Func, Address); 3595 begin 3596 return "VF(" & Image (To_A (A)) & ')'; 3597 end Str_VF; 3598 3599 ------------ 3600 -- Str_VP -- 3601 ------------ 3602 3603 function Str_VP (A : VString_Ptr) return String is 3604 begin 3605 return "VP(" & Image (A.all'Address) & ')'; 3606 end Str_VP; 3607 3608 ------------- 3609 -- Succeed -- 3610 ------------- 3611 3612 function Succeed return Pattern is 3613 begin 3614 return (AFC with 1, new PE'(PC_Succeed, 1, EOP)); 3615 end Succeed; 3616 3617 --------- 3618 -- Tab -- 3619 --------- 3620 3621 function Tab (Count : Natural) return Pattern is 3622 begin 3623 return (AFC with 0, new PE'(PC_Tab_Nat, 1, EOP, Count)); 3624 end Tab; 3625 3626 function Tab (Count : Natural_Func) return Pattern is 3627 begin 3628 return (AFC with 0, new PE'(PC_Tab_NF, 1, EOP, Count)); 3629 end Tab; 3630 3631 function Tab (Count : not null access Natural) return Pattern is 3632 begin 3633 return (AFC with 0, new PE'(PC_Tab_NP, 1, EOP, Natural_Ptr (Count))); 3634 end Tab; 3635 3636 --------------------------- 3637 -- Uninitialized_Pattern -- 3638 --------------------------- 3639 3640 procedure Uninitialized_Pattern is 3641 begin 3642 raise Program_Error with 3643 "uninitialized value of type GNAT.Spitbol.Patterns.Pattern"; 3644 end Uninitialized_Pattern; 3645 3646 ------------ 3647 -- XMatch -- 3648 ------------ 3649 3650 procedure XMatch 3651 (Subject : String; 3652 Pat_P : PE_Ptr; 3653 Pat_S : Natural; 3654 Start : out Natural; 3655 Stop : out Natural) 3656 is 3657 Node : PE_Ptr; 3658 -- Pointer to current pattern node. Initialized from Pat_P, and then 3659 -- updated as the match proceeds through its constituent elements. 3660 3661 Length : constant Natural := Subject'Length; 3662 -- Length of string (= Subject'Last, since Subject'First is always 1) 3663 3664 Cursor : Integer := 0; 3665 -- If the value is non-negative, then this value is the index showing 3666 -- the current position of the match in the subject string. The next 3667 -- character to be matched is at Subject (Cursor + 1). Note that since 3668 -- our view of the subject string in XMatch always has a lower bound 3669 -- of one, regardless of original bounds, that this definition exactly 3670 -- corresponds to the cursor value as referenced by functions like Pos. 3671 -- 3672 -- If the value is negative, then this is a saved stack pointer, 3673 -- typically a base pointer of an inner or outer region. Cursor 3674 -- temporarily holds such a value when it is popped from the stack 3675 -- by Fail. In all cases, Cursor is reset to a proper non-negative 3676 -- cursor value before the match proceeds (e.g. by propagating the 3677 -- failure and popping a "real" cursor value from the stack. 3678 3679 PE_Unanchored : aliased PE := (PC_Unanchored, 0, Pat_P); 3680 -- Dummy pattern element used in the unanchored case 3681 3682 Stack : Stack_Type; 3683 -- The pattern matching failure stack for this call to Match 3684 3685 Stack_Ptr : Stack_Range; 3686 -- Current stack pointer. This points to the top element of the stack 3687 -- that is currently in use. At the outer level this is the special 3688 -- entry placed on the stack according to the anchor mode. 3689 3690 Stack_Init : constant Stack_Range := Stack'First + 1; 3691 -- This is the initial value of the Stack_Ptr and Stack_Base. The 3692 -- initial (Stack'First) element of the stack is not used so that 3693 -- when we pop the last element off, Stack_Ptr is still in range. 3694 3695 Stack_Base : Stack_Range; 3696 -- This value is the stack base value, i.e. the stack pointer for the 3697 -- first history stack entry in the current stack region. See separate 3698 -- section on handling of recursive pattern matches. 3699 3700 Assign_OnM : Boolean := False; 3701 -- Set True if assign-on-match or write-on-match operations may be 3702 -- present in the history stack, which must then be scanned on a 3703 -- successful match. 3704 3705 procedure Pop_Region; 3706 pragma Inline (Pop_Region); 3707 -- Used at the end of processing of an inner region. If the inner 3708 -- region left no stack entries, then all trace of it is removed. 3709 -- Otherwise a PC_Restore_Region entry is pushed to ensure proper 3710 -- handling of alternatives in the inner region. 3711 3712 procedure Push (Node : PE_Ptr); 3713 pragma Inline (Push); 3714 -- Make entry in pattern matching stack with current cursor value 3715 3716 procedure Push_Region; 3717 pragma Inline (Push_Region); 3718 -- This procedure makes a new region on the history stack. The 3719 -- caller first establishes the special entry on the stack, but 3720 -- does not push the stack pointer. Then this call stacks a 3721 -- PC_Remove_Region node, on top of this entry, using the cursor 3722 -- field of the PC_Remove_Region entry to save the outer level 3723 -- stack base value, and resets the stack base to point to this 3724 -- PC_Remove_Region node. 3725 3726 ---------------- 3727 -- Pop_Region -- 3728 ---------------- 3729 3730 procedure Pop_Region is 3731 begin 3732 -- If nothing was pushed in the inner region, we can just get 3733 -- rid of it entirely, leaving no traces that it was ever there 3734 3735 if Stack_Ptr = Stack_Base then 3736 Stack_Ptr := Stack_Base - 2; 3737 Stack_Base := Stack (Stack_Ptr + 2).Cursor; 3738 3739 -- If stuff was pushed in the inner region, then we have to 3740 -- push a PC_R_Restore node so that we properly handle possible 3741 -- rematches within the region. 3742 3743 else 3744 Stack_Ptr := Stack_Ptr + 1; 3745 Stack (Stack_Ptr).Cursor := Stack_Base; 3746 Stack (Stack_Ptr).Node := CP_R_Restore'Access; 3747 Stack_Base := Stack (Stack_Base).Cursor; 3748 end if; 3749 end Pop_Region; 3750 3751 ---------- 3752 -- Push -- 3753 ---------- 3754 3755 procedure Push (Node : PE_Ptr) is 3756 begin 3757 Stack_Ptr := Stack_Ptr + 1; 3758 Stack (Stack_Ptr).Cursor := Cursor; 3759 Stack (Stack_Ptr).Node := Node; 3760 end Push; 3761 3762 ----------------- 3763 -- Push_Region -- 3764 ----------------- 3765 3766 procedure Push_Region is 3767 begin 3768 Stack_Ptr := Stack_Ptr + 2; 3769 Stack (Stack_Ptr).Cursor := Stack_Base; 3770 Stack (Stack_Ptr).Node := CP_R_Remove'Access; 3771 Stack_Base := Stack_Ptr; 3772 end Push_Region; 3773 3774 -- Start of processing for XMatch 3775 3776 begin 3777 if Pat_P = null then 3778 Uninitialized_Pattern; 3779 end if; 3780 3781 -- Check we have enough stack for this pattern. This check deals with 3782 -- every possibility except a match of a recursive pattern, where we 3783 -- make a check at each recursion level. 3784 3785 if Pat_S >= Stack_Size - 1 then 3786 raise Pattern_Stack_Overflow; 3787 end if; 3788 3789 -- In anchored mode, the bottom entry on the stack is an abort entry 3790 3791 if Anchored_Mode then 3792 Stack (Stack_Init).Node := CP_Cancel'Access; 3793 Stack (Stack_Init).Cursor := 0; 3794 3795 -- In unanchored more, the bottom entry on the stack references 3796 -- the special pattern element PE_Unanchored, whose Pthen field 3797 -- points to the initial pattern element. The cursor value in this 3798 -- entry is the number of anchor moves so far. 3799 3800 else 3801 Stack (Stack_Init).Node := PE_Unanchored'Unchecked_Access; 3802 Stack (Stack_Init).Cursor := 0; 3803 end if; 3804 3805 Stack_Ptr := Stack_Init; 3806 Stack_Base := Stack_Ptr; 3807 Cursor := 0; 3808 Node := Pat_P; 3809 goto Match; 3810 3811 ----------------------------------------- 3812 -- Main Pattern Matching State Control -- 3813 ----------------------------------------- 3814 3815 -- This is a state machine which uses gotos to change state. The 3816 -- initial state is Match, to initiate the matching of the first 3817 -- element, so the goto Match above starts the match. In the 3818 -- following descriptions, we indicate the global values that 3819 -- are relevant for the state transition. 3820 3821 -- Come here if entire match fails 3822 3823 <<Match_Fail>> 3824 Start := 0; 3825 Stop := 0; 3826 return; 3827 3828 -- Come here if entire match succeeds 3829 3830 -- Cursor current position in subject string 3831 3832 <<Match_Succeed>> 3833 Start := Stack (Stack_Init).Cursor + 1; 3834 Stop := Cursor; 3835 3836 -- Scan history stack for deferred assignments or writes 3837 3838 if Assign_OnM then 3839 for S in Stack_Init .. Stack_Ptr loop 3840 if Stack (S).Node = CP_Assign'Access then 3841 declare 3842 Inner_Base : constant Stack_Range := 3843 Stack (S + 1).Cursor; 3844 Special_Entry : constant Stack_Range := 3845 Inner_Base - 1; 3846 Node_OnM : constant PE_Ptr := 3847 Stack (Special_Entry).Node; 3848 Start : constant Natural := 3849 Stack (Special_Entry).Cursor + 1; 3850 Stop : constant Natural := Stack (S).Cursor; 3851 3852 begin 3853 if Node_OnM.Pcode = PC_Assign_OnM then 3854 Set_Unbounded_String 3855 (Node_OnM.VP.all, Subject (Start .. Stop)); 3856 3857 elsif Node_OnM.Pcode = PC_Write_OnM then 3858 Put_Line (Node_OnM.FP.all, Subject (Start .. Stop)); 3859 3860 else 3861 Logic_Error; 3862 end if; 3863 end; 3864 end if; 3865 end loop; 3866 end if; 3867 3868 return; 3869 3870 -- Come here if attempt to match current element fails 3871 3872 -- Stack_Base current stack base 3873 -- Stack_Ptr current stack pointer 3874 3875 <<Fail>> 3876 Cursor := Stack (Stack_Ptr).Cursor; 3877 Node := Stack (Stack_Ptr).Node; 3878 Stack_Ptr := Stack_Ptr - 1; 3879 goto Match; 3880 3881 -- Come here if attempt to match current element succeeds 3882 3883 -- Cursor current position in subject string 3884 -- Node pointer to node successfully matched 3885 -- Stack_Base current stack base 3886 -- Stack_Ptr current stack pointer 3887 3888 <<Succeed>> 3889 Node := Node.Pthen; 3890 3891 -- Come here to match the next pattern element 3892 3893 -- Cursor current position in subject string 3894 -- Node pointer to node to be matched 3895 -- Stack_Base current stack base 3896 -- Stack_Ptr current stack pointer 3897 3898 <<Match>> 3899 3900 -------------------------------------------------- 3901 -- Main Pattern Match Element Matching Routines -- 3902 -------------------------------------------------- 3903 3904 -- Here is the case statement that processes the current node. The 3905 -- processing for each element does one of five things: 3906 3907 -- goto Succeed to move to the successor 3908 -- goto Match_Succeed if the entire match succeeds 3909 -- goto Match_Fail if the entire match fails 3910 -- goto Fail to signal failure of current match 3911 3912 -- Processing is NOT allowed to fall through 3913 3914 case Node.Pcode is 3915 3916 -- Cancel 3917 3918 when PC_Cancel => 3919 goto Match_Fail; 3920 3921 -- Alternation 3922 3923 when PC_Alt => 3924 Push (Node.Alt); 3925 Node := Node.Pthen; 3926 goto Match; 3927 3928 -- Any (one character case) 3929 3930 when PC_Any_CH => 3931 if Cursor < Length 3932 and then Subject (Cursor + 1) = Node.Char 3933 then 3934 Cursor := Cursor + 1; 3935 goto Succeed; 3936 else 3937 goto Fail; 3938 end if; 3939 3940 -- Any (character set case) 3941 3942 when PC_Any_CS => 3943 if Cursor < Length 3944 and then Is_In (Subject (Cursor + 1), Node.CS) 3945 then 3946 Cursor := Cursor + 1; 3947 goto Succeed; 3948 else 3949 goto Fail; 3950 end if; 3951 3952 -- Any (string function case) 3953 3954 when PC_Any_VF => declare 3955 U : constant VString := Node.VF.all; 3956 S : Big_String_Access; 3957 L : Natural; 3958 3959 begin 3960 Get_String (U, S, L); 3961 3962 if Cursor < Length 3963 and then Is_In (Subject (Cursor + 1), S (1 .. L)) 3964 then 3965 Cursor := Cursor + 1; 3966 goto Succeed; 3967 else 3968 goto Fail; 3969 end if; 3970 end; 3971 3972 -- Any (string pointer case) 3973 3974 when PC_Any_VP => declare 3975 U : constant VString := Node.VP.all; 3976 S : Big_String_Access; 3977 L : Natural; 3978 3979 begin 3980 Get_String (U, S, L); 3981 3982 if Cursor < Length 3983 and then Is_In (Subject (Cursor + 1), S (1 .. L)) 3984 then 3985 Cursor := Cursor + 1; 3986 goto Succeed; 3987 else 3988 goto Fail; 3989 end if; 3990 end; 3991 3992 -- Arb (initial match) 3993 3994 when PC_Arb_X => 3995 Push (Node.Alt); 3996 Node := Node.Pthen; 3997 goto Match; 3998 3999 -- Arb (extension) 4000 4001 when PC_Arb_Y => 4002 if Cursor < Length then 4003 Cursor := Cursor + 1; 4004 Push (Node); 4005 goto Succeed; 4006 else 4007 goto Fail; 4008 end if; 4009 4010 -- Arbno_S (simple Arbno initialize). This is the node that 4011 -- initiates the match of a simple Arbno structure. 4012 4013 when PC_Arbno_S => 4014 Push (Node.Alt); 4015 Node := Node.Pthen; 4016 goto Match; 4017 4018 -- Arbno_X (Arbno initialize). This is the node that initiates 4019 -- the match of a complex Arbno structure. 4020 4021 when PC_Arbno_X => 4022 Push (Node.Alt); 4023 Node := Node.Pthen; 4024 goto Match; 4025 4026 -- Arbno_Y (Arbno rematch). This is the node that is executed 4027 -- following successful matching of one instance of a complex 4028 -- Arbno pattern. 4029 4030 when PC_Arbno_Y => declare 4031 Null_Match : constant Boolean := 4032 Cursor = Stack (Stack_Base - 1).Cursor; 4033 4034 begin 4035 Pop_Region; 4036 4037 -- If arbno extension matched null, then immediately fail 4038 4039 if Null_Match then 4040 goto Fail; 4041 end if; 4042 4043 -- Here we must do a stack check to make sure enough stack 4044 -- is left. This check will happen once for each instance of 4045 -- the Arbno pattern that is matched. The Nat field of a 4046 -- PC_Arbno pattern contains the maximum stack entries needed 4047 -- for the Arbno with one instance and the successor pattern 4048 4049 if Stack_Ptr + Node.Nat >= Stack'Last then 4050 raise Pattern_Stack_Overflow; 4051 end if; 4052 4053 goto Succeed; 4054 end; 4055 4056 -- Assign. If this node is executed, it means the assign-on-match 4057 -- or write-on-match operation will not happen after all, so we 4058 -- is propagate the failure, removing the PC_Assign node. 4059 4060 when PC_Assign => 4061 goto Fail; 4062 4063 -- Assign immediate. This node performs the actual assignment 4064 4065 when PC_Assign_Imm => 4066 Set_Unbounded_String 4067 (Node.VP.all, 4068 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor)); 4069 Pop_Region; 4070 goto Succeed; 4071 4072 -- Assign on match. This node sets up for the eventual assignment 4073 4074 when PC_Assign_OnM => 4075 Stack (Stack_Base - 1).Node := Node; 4076 Push (CP_Assign'Access); 4077 Pop_Region; 4078 Assign_OnM := True; 4079 goto Succeed; 4080 4081 -- Bal 4082 4083 when PC_Bal => 4084 if Cursor >= Length or else Subject (Cursor + 1) = ')' then 4085 goto Fail; 4086 4087 elsif Subject (Cursor + 1) = '(' then 4088 declare 4089 Paren_Count : Natural := 1; 4090 4091 begin 4092 loop 4093 Cursor := Cursor + 1; 4094 4095 if Cursor >= Length then 4096 goto Fail; 4097 4098 elsif Subject (Cursor + 1) = '(' then 4099 Paren_Count := Paren_Count + 1; 4100 4101 elsif Subject (Cursor + 1) = ')' then 4102 Paren_Count := Paren_Count - 1; 4103 exit when Paren_Count = 0; 4104 end if; 4105 end loop; 4106 end; 4107 end if; 4108 4109 Cursor := Cursor + 1; 4110 Push (Node); 4111 goto Succeed; 4112 4113 -- Break (one character case) 4114 4115 when PC_Break_CH => 4116 while Cursor < Length loop 4117 if Subject (Cursor + 1) = Node.Char then 4118 goto Succeed; 4119 else 4120 Cursor := Cursor + 1; 4121 end if; 4122 end loop; 4123 4124 goto Fail; 4125 4126 -- Break (character set case) 4127 4128 when PC_Break_CS => 4129 while Cursor < Length loop 4130 if Is_In (Subject (Cursor + 1), Node.CS) then 4131 goto Succeed; 4132 else 4133 Cursor := Cursor + 1; 4134 end if; 4135 end loop; 4136 4137 goto Fail; 4138 4139 -- Break (string function case) 4140 4141 when PC_Break_VF => declare 4142 U : constant VString := Node.VF.all; 4143 S : Big_String_Access; 4144 L : Natural; 4145 4146 begin 4147 Get_String (U, S, L); 4148 4149 while Cursor < Length loop 4150 if Is_In (Subject (Cursor + 1), S (1 .. L)) then 4151 goto Succeed; 4152 else 4153 Cursor := Cursor + 1; 4154 end if; 4155 end loop; 4156 4157 goto Fail; 4158 end; 4159 4160 -- Break (string pointer case) 4161 4162 when PC_Break_VP => declare 4163 U : constant VString := Node.VP.all; 4164 S : Big_String_Access; 4165 L : Natural; 4166 4167 begin 4168 Get_String (U, S, L); 4169 4170 while Cursor < Length loop 4171 if Is_In (Subject (Cursor + 1), S (1 .. L)) then 4172 goto Succeed; 4173 else 4174 Cursor := Cursor + 1; 4175 end if; 4176 end loop; 4177 4178 goto Fail; 4179 end; 4180 4181 -- BreakX (one character case) 4182 4183 when PC_BreakX_CH => 4184 while Cursor < Length loop 4185 if Subject (Cursor + 1) = Node.Char then 4186 goto Succeed; 4187 else 4188 Cursor := Cursor + 1; 4189 end if; 4190 end loop; 4191 4192 goto Fail; 4193 4194 -- BreakX (character set case) 4195 4196 when PC_BreakX_CS => 4197 while Cursor < Length loop 4198 if Is_In (Subject (Cursor + 1), Node.CS) then 4199 goto Succeed; 4200 else 4201 Cursor := Cursor + 1; 4202 end if; 4203 end loop; 4204 4205 goto Fail; 4206 4207 -- BreakX (string function case) 4208 4209 when PC_BreakX_VF => declare 4210 U : constant VString := Node.VF.all; 4211 S : Big_String_Access; 4212 L : Natural; 4213 4214 begin 4215 Get_String (U, S, L); 4216 4217 while Cursor < Length loop 4218 if Is_In (Subject (Cursor + 1), S (1 .. L)) then 4219 goto Succeed; 4220 else 4221 Cursor := Cursor + 1; 4222 end if; 4223 end loop; 4224 4225 goto Fail; 4226 end; 4227 4228 -- BreakX (string pointer case) 4229 4230 when PC_BreakX_VP => declare 4231 U : constant VString := Node.VP.all; 4232 S : Big_String_Access; 4233 L : Natural; 4234 4235 begin 4236 Get_String (U, S, L); 4237 4238 while Cursor < Length loop 4239 if Is_In (Subject (Cursor + 1), S (1 .. L)) then 4240 goto Succeed; 4241 else 4242 Cursor := Cursor + 1; 4243 end if; 4244 end loop; 4245 4246 goto Fail; 4247 end; 4248 4249 -- BreakX_X (BreakX extension). See section on "Compound Pattern 4250 -- Structures". This node is the alternative that is stacked to 4251 -- skip past the break character and extend the break. 4252 4253 when PC_BreakX_X => 4254 Cursor := Cursor + 1; 4255 goto Succeed; 4256 4257 -- Character (one character string) 4258 4259 when PC_Char => 4260 if Cursor < Length 4261 and then Subject (Cursor + 1) = Node.Char 4262 then 4263 Cursor := Cursor + 1; 4264 goto Succeed; 4265 else 4266 goto Fail; 4267 end if; 4268 4269 -- End of Pattern 4270 4271 when PC_EOP => 4272 if Stack_Base = Stack_Init then 4273 goto Match_Succeed; 4274 4275 -- End of recursive inner match. See separate section on 4276 -- handing of recursive pattern matches for details. 4277 4278 else 4279 Node := Stack (Stack_Base - 1).Node; 4280 Pop_Region; 4281 goto Match; 4282 end if; 4283 4284 -- Fail 4285 4286 when PC_Fail => 4287 goto Fail; 4288 4289 -- Fence (built in pattern) 4290 4291 when PC_Fence => 4292 Push (CP_Cancel'Access); 4293 goto Succeed; 4294 4295 -- Fence function node X. This is the node that gets control 4296 -- after a successful match of the fenced pattern. 4297 4298 when PC_Fence_X => 4299 Stack_Ptr := Stack_Ptr + 1; 4300 Stack (Stack_Ptr).Cursor := Stack_Base; 4301 Stack (Stack_Ptr).Node := CP_Fence_Y'Access; 4302 Stack_Base := Stack (Stack_Base).Cursor; 4303 goto Succeed; 4304 4305 -- Fence function node Y. This is the node that gets control on 4306 -- a failure that occurs after the fenced pattern has matched. 4307 4308 -- Note: the Cursor at this stage is actually the inner stack 4309 -- base value. We don't reset this, but we do use it to strip 4310 -- off all the entries made by the fenced pattern. 4311 4312 when PC_Fence_Y => 4313 Stack_Ptr := Cursor - 2; 4314 goto Fail; 4315 4316 -- Len (integer case) 4317 4318 when PC_Len_Nat => 4319 if Cursor + Node.Nat > Length then 4320 goto Fail; 4321 else 4322 Cursor := Cursor + Node.Nat; 4323 goto Succeed; 4324 end if; 4325 4326 -- Len (Integer function case) 4327 4328 when PC_Len_NF => declare 4329 N : constant Natural := Node.NF.all; 4330 begin 4331 if Cursor + N > Length then 4332 goto Fail; 4333 else 4334 Cursor := Cursor + N; 4335 goto Succeed; 4336 end if; 4337 end; 4338 4339 -- Len (integer pointer case) 4340 4341 when PC_Len_NP => 4342 if Cursor + Node.NP.all > Length then 4343 goto Fail; 4344 else 4345 Cursor := Cursor + Node.NP.all; 4346 goto Succeed; 4347 end if; 4348 4349 -- NotAny (one character case) 4350 4351 when PC_NotAny_CH => 4352 if Cursor < Length 4353 and then Subject (Cursor + 1) /= Node.Char 4354 then 4355 Cursor := Cursor + 1; 4356 goto Succeed; 4357 else 4358 goto Fail; 4359 end if; 4360 4361 -- NotAny (character set case) 4362 4363 when PC_NotAny_CS => 4364 if Cursor < Length 4365 and then not Is_In (Subject (Cursor + 1), Node.CS) 4366 then 4367 Cursor := Cursor + 1; 4368 goto Succeed; 4369 else 4370 goto Fail; 4371 end if; 4372 4373 -- NotAny (string function case) 4374 4375 when PC_NotAny_VF => declare 4376 U : constant VString := Node.VF.all; 4377 S : Big_String_Access; 4378 L : Natural; 4379 4380 begin 4381 Get_String (U, S, L); 4382 4383 if Cursor < Length 4384 and then 4385 not Is_In (Subject (Cursor + 1), S (1 .. L)) 4386 then 4387 Cursor := Cursor + 1; 4388 goto Succeed; 4389 else 4390 goto Fail; 4391 end if; 4392 end; 4393 4394 -- NotAny (string pointer case) 4395 4396 when PC_NotAny_VP => declare 4397 U : constant VString := Node.VP.all; 4398 S : Big_String_Access; 4399 L : Natural; 4400 4401 begin 4402 Get_String (U, S, L); 4403 4404 if Cursor < Length 4405 and then 4406 not Is_In (Subject (Cursor + 1), S (1 .. L)) 4407 then 4408 Cursor := Cursor + 1; 4409 goto Succeed; 4410 else 4411 goto Fail; 4412 end if; 4413 end; 4414 4415 -- NSpan (one character case) 4416 4417 when PC_NSpan_CH => 4418 while Cursor < Length 4419 and then Subject (Cursor + 1) = Node.Char 4420 loop 4421 Cursor := Cursor + 1; 4422 end loop; 4423 4424 goto Succeed; 4425 4426 -- NSpan (character set case) 4427 4428 when PC_NSpan_CS => 4429 while Cursor < Length 4430 and then Is_In (Subject (Cursor + 1), Node.CS) 4431 loop 4432 Cursor := Cursor + 1; 4433 end loop; 4434 4435 goto Succeed; 4436 4437 -- NSpan (string function case) 4438 4439 when PC_NSpan_VF => declare 4440 U : constant VString := Node.VF.all; 4441 S : Big_String_Access; 4442 L : Natural; 4443 4444 begin 4445 Get_String (U, S, L); 4446 4447 while Cursor < Length 4448 and then Is_In (Subject (Cursor + 1), S (1 .. L)) 4449 loop 4450 Cursor := Cursor + 1; 4451 end loop; 4452 4453 goto Succeed; 4454 end; 4455 4456 -- NSpan (string pointer case) 4457 4458 when PC_NSpan_VP => declare 4459 U : constant VString := Node.VP.all; 4460 S : Big_String_Access; 4461 L : Natural; 4462 4463 begin 4464 Get_String (U, S, L); 4465 4466 while Cursor < Length 4467 and then Is_In (Subject (Cursor + 1), S (1 .. L)) 4468 loop 4469 Cursor := Cursor + 1; 4470 end loop; 4471 4472 goto Succeed; 4473 end; 4474 4475 -- Null string 4476 4477 when PC_Null => 4478 goto Succeed; 4479 4480 -- Pos (integer case) 4481 4482 when PC_Pos_Nat => 4483 if Cursor = Node.Nat then 4484 goto Succeed; 4485 else 4486 goto Fail; 4487 end if; 4488 4489 -- Pos (Integer function case) 4490 4491 when PC_Pos_NF => declare 4492 N : constant Natural := Node.NF.all; 4493 begin 4494 if Cursor = N then 4495 goto Succeed; 4496 else 4497 goto Fail; 4498 end if; 4499 end; 4500 4501 -- Pos (integer pointer case) 4502 4503 when PC_Pos_NP => 4504 if Cursor = Node.NP.all then 4505 goto Succeed; 4506 else 4507 goto Fail; 4508 end if; 4509 4510 -- Predicate function 4511 4512 when PC_Pred_Func => 4513 if Node.BF.all then 4514 goto Succeed; 4515 else 4516 goto Fail; 4517 end if; 4518 4519 -- Region Enter. Initiate new pattern history stack region 4520 4521 when PC_R_Enter => 4522 Stack (Stack_Ptr + 1).Cursor := Cursor; 4523 Push_Region; 4524 goto Succeed; 4525 4526 -- Region Remove node. This is the node stacked by an R_Enter. 4527 -- It removes the special format stack entry right underneath, and 4528 -- then restores the outer level stack base and signals failure. 4529 4530 -- Note: the cursor value at this stage is actually the (negative) 4531 -- stack base value for the outer level. 4532 4533 when PC_R_Remove => 4534 Stack_Base := Cursor; 4535 Stack_Ptr := Stack_Ptr - 1; 4536 goto Fail; 4537 4538 -- Region restore node. This is the node stacked at the end of an 4539 -- inner level match. Its function is to restore the inner level 4540 -- region, so that alternatives in this region can be sought. 4541 4542 -- Note: the Cursor at this stage is actually the negative of the 4543 -- inner stack base value, which we use to restore the inner region. 4544 4545 when PC_R_Restore => 4546 Stack_Base := Cursor; 4547 goto Fail; 4548 4549 -- Rest 4550 4551 when PC_Rest => 4552 Cursor := Length; 4553 goto Succeed; 4554 4555 -- Initiate recursive match (pattern pointer case) 4556 4557 when PC_Rpat => 4558 Stack (Stack_Ptr + 1).Node := Node.Pthen; 4559 Push_Region; 4560 4561 if Stack_Ptr + Node.PP.all.Stk >= Stack_Size then 4562 raise Pattern_Stack_Overflow; 4563 else 4564 Node := Node.PP.all.P; 4565 goto Match; 4566 end if; 4567 4568 -- RPos (integer case) 4569 4570 when PC_RPos_Nat => 4571 if Cursor = (Length - Node.Nat) then 4572 goto Succeed; 4573 else 4574 goto Fail; 4575 end if; 4576 4577 -- RPos (integer function case) 4578 4579 when PC_RPos_NF => declare 4580 N : constant Natural := Node.NF.all; 4581 begin 4582 if Length - Cursor = N then 4583 goto Succeed; 4584 else 4585 goto Fail; 4586 end if; 4587 end; 4588 4589 -- RPos (integer pointer case) 4590 4591 when PC_RPos_NP => 4592 if Cursor = (Length - Node.NP.all) then 4593 goto Succeed; 4594 else 4595 goto Fail; 4596 end if; 4597 4598 -- RTab (integer case) 4599 4600 when PC_RTab_Nat => 4601 if Cursor <= (Length - Node.Nat) then 4602 Cursor := Length - Node.Nat; 4603 goto Succeed; 4604 else 4605 goto Fail; 4606 end if; 4607 4608 -- RTab (integer function case) 4609 4610 when PC_RTab_NF => declare 4611 N : constant Natural := Node.NF.all; 4612 begin 4613 if Length - Cursor >= N then 4614 Cursor := Length - N; 4615 goto Succeed; 4616 else 4617 goto Fail; 4618 end if; 4619 end; 4620 4621 -- RTab (integer pointer case) 4622 4623 when PC_RTab_NP => 4624 if Cursor <= (Length - Node.NP.all) then 4625 Cursor := Length - Node.NP.all; 4626 goto Succeed; 4627 else 4628 goto Fail; 4629 end if; 4630 4631 -- Cursor assignment 4632 4633 when PC_Setcur => 4634 Node.Var.all := Cursor; 4635 goto Succeed; 4636 4637 -- Span (one character case) 4638 4639 when PC_Span_CH => declare 4640 P : Natural; 4641 4642 begin 4643 P := Cursor; 4644 while P < Length 4645 and then Subject (P + 1) = Node.Char 4646 loop 4647 P := P + 1; 4648 end loop; 4649 4650 if P /= Cursor then 4651 Cursor := P; 4652 goto Succeed; 4653 else 4654 goto Fail; 4655 end if; 4656 end; 4657 4658 -- Span (character set case) 4659 4660 when PC_Span_CS => declare 4661 P : Natural; 4662 4663 begin 4664 P := Cursor; 4665 while P < Length 4666 and then Is_In (Subject (P + 1), Node.CS) 4667 loop 4668 P := P + 1; 4669 end loop; 4670 4671 if P /= Cursor then 4672 Cursor := P; 4673 goto Succeed; 4674 else 4675 goto Fail; 4676 end if; 4677 end; 4678 4679 -- Span (string function case) 4680 4681 when PC_Span_VF => declare 4682 U : constant VString := Node.VF.all; 4683 S : Big_String_Access; 4684 L : Natural; 4685 P : Natural; 4686 4687 begin 4688 Get_String (U, S, L); 4689 4690 P := Cursor; 4691 while P < Length 4692 and then Is_In (Subject (P + 1), S (1 .. L)) 4693 loop 4694 P := P + 1; 4695 end loop; 4696 4697 if P /= Cursor then 4698 Cursor := P; 4699 goto Succeed; 4700 else 4701 goto Fail; 4702 end if; 4703 end; 4704 4705 -- Span (string pointer case) 4706 4707 when PC_Span_VP => declare 4708 U : constant VString := Node.VP.all; 4709 S : Big_String_Access; 4710 L : Natural; 4711 P : Natural; 4712 4713 begin 4714 Get_String (U, S, L); 4715 4716 P := Cursor; 4717 while P < Length 4718 and then Is_In (Subject (P + 1), S (1 .. L)) 4719 loop 4720 P := P + 1; 4721 end loop; 4722 4723 if P /= Cursor then 4724 Cursor := P; 4725 goto Succeed; 4726 else 4727 goto Fail; 4728 end if; 4729 end; 4730 4731 -- String (two character case) 4732 4733 when PC_String_2 => 4734 if (Length - Cursor) >= 2 4735 and then Subject (Cursor + 1 .. Cursor + 2) = Node.Str2 4736 then 4737 Cursor := Cursor + 2; 4738 goto Succeed; 4739 else 4740 goto Fail; 4741 end if; 4742 4743 -- String (three character case) 4744 4745 when PC_String_3 => 4746 if (Length - Cursor) >= 3 4747 and then Subject (Cursor + 1 .. Cursor + 3) = Node.Str3 4748 then 4749 Cursor := Cursor + 3; 4750 goto Succeed; 4751 else 4752 goto Fail; 4753 end if; 4754 4755 -- String (four character case) 4756 4757 when PC_String_4 => 4758 if (Length - Cursor) >= 4 4759 and then Subject (Cursor + 1 .. Cursor + 4) = Node.Str4 4760 then 4761 Cursor := Cursor + 4; 4762 goto Succeed; 4763 else 4764 goto Fail; 4765 end if; 4766 4767 -- String (five character case) 4768 4769 when PC_String_5 => 4770 if (Length - Cursor) >= 5 4771 and then Subject (Cursor + 1 .. Cursor + 5) = Node.Str5 4772 then 4773 Cursor := Cursor + 5; 4774 goto Succeed; 4775 else 4776 goto Fail; 4777 end if; 4778 4779 -- String (six character case) 4780 4781 when PC_String_6 => 4782 if (Length - Cursor) >= 6 4783 and then Subject (Cursor + 1 .. Cursor + 6) = Node.Str6 4784 then 4785 Cursor := Cursor + 6; 4786 goto Succeed; 4787 else 4788 goto Fail; 4789 end if; 4790 4791 -- String (case of more than six characters) 4792 4793 when PC_String => declare 4794 Len : constant Natural := Node.Str'Length; 4795 begin 4796 if (Length - Cursor) >= Len 4797 and then Node.Str.all = Subject (Cursor + 1 .. Cursor + Len) 4798 then 4799 Cursor := Cursor + Len; 4800 goto Succeed; 4801 else 4802 goto Fail; 4803 end if; 4804 end; 4805 4806 -- String (function case) 4807 4808 when PC_String_VF => declare 4809 U : constant VString := Node.VF.all; 4810 S : Big_String_Access; 4811 L : Natural; 4812 4813 begin 4814 Get_String (U, S, L); 4815 4816 if (Length - Cursor) >= L 4817 and then S (1 .. L) = Subject (Cursor + 1 .. Cursor + L) 4818 then 4819 Cursor := Cursor + L; 4820 goto Succeed; 4821 else 4822 goto Fail; 4823 end if; 4824 end; 4825 4826 -- String (pointer case) 4827 4828 when PC_String_VP => declare 4829 U : constant VString := Node.VP.all; 4830 S : Big_String_Access; 4831 L : Natural; 4832 4833 begin 4834 Get_String (U, S, L); 4835 4836 if (Length - Cursor) >= L 4837 and then S (1 .. L) = Subject (Cursor + 1 .. Cursor + L) 4838 then 4839 Cursor := Cursor + L; 4840 goto Succeed; 4841 else 4842 goto Fail; 4843 end if; 4844 end; 4845 4846 -- Succeed 4847 4848 when PC_Succeed => 4849 Push (Node); 4850 goto Succeed; 4851 4852 -- Tab (integer case) 4853 4854 when PC_Tab_Nat => 4855 if Cursor <= Node.Nat then 4856 Cursor := Node.Nat; 4857 goto Succeed; 4858 else 4859 goto Fail; 4860 end if; 4861 4862 -- Tab (integer function case) 4863 4864 when PC_Tab_NF => declare 4865 N : constant Natural := Node.NF.all; 4866 begin 4867 if Cursor <= N then 4868 Cursor := N; 4869 goto Succeed; 4870 else 4871 goto Fail; 4872 end if; 4873 end; 4874 4875 -- Tab (integer pointer case) 4876 4877 when PC_Tab_NP => 4878 if Cursor <= Node.NP.all then 4879 Cursor := Node.NP.all; 4880 goto Succeed; 4881 else 4882 goto Fail; 4883 end if; 4884 4885 -- Unanchored movement 4886 4887 when PC_Unanchored => 4888 4889 -- All done if we tried every position 4890 4891 if Cursor > Length then 4892 goto Match_Fail; 4893 4894 -- Otherwise extend the anchor point, and restack ourself 4895 4896 else 4897 Cursor := Cursor + 1; 4898 Push (Node); 4899 goto Succeed; 4900 end if; 4901 4902 -- Write immediate. This node performs the actual write 4903 4904 when PC_Write_Imm => 4905 Put_Line 4906 (Node.FP.all, 4907 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor)); 4908 Pop_Region; 4909 goto Succeed; 4910 4911 -- Write on match. This node sets up for the eventual write 4912 4913 when PC_Write_OnM => 4914 Stack (Stack_Base - 1).Node := Node; 4915 Push (CP_Assign'Access); 4916 Pop_Region; 4917 Assign_OnM := True; 4918 goto Succeed; 4919 4920 end case; 4921 4922 -- We are NOT allowed to fall though this case statement, since every 4923 -- match routine must end by executing a goto to the appropriate point 4924 -- in the finite state machine model. 4925 4926 pragma Warnings (Off); 4927 Logic_Error; 4928 pragma Warnings (On); 4929 end XMatch; 4930 4931 ------------- 4932 -- XMatchD -- 4933 ------------- 4934 4935 -- Maintenance note: There is a LOT of code duplication between XMatch 4936 -- and XMatchD. This is quite intentional, the point is to avoid any 4937 -- unnecessary debugging overhead in the XMatch case, but this does mean 4938 -- that any changes to XMatchD must be mirrored in XMatch. In case of 4939 -- any major changes, the proper approach is to delete XMatch, make the 4940 -- changes to XMatchD, and then make a copy of XMatchD, removing all 4941 -- calls to Dout, and all Put and Put_Line operations. This copy becomes 4942 -- the new XMatch. 4943 4944 procedure XMatchD 4945 (Subject : String; 4946 Pat_P : PE_Ptr; 4947 Pat_S : Natural; 4948 Start : out Natural; 4949 Stop : out Natural) 4950 is 4951 Node : PE_Ptr; 4952 -- Pointer to current pattern node. Initialized from Pat_P, and then 4953 -- updated as the match proceeds through its constituent elements. 4954 4955 Length : constant Natural := Subject'Length; 4956 -- Length of string (= Subject'Last, since Subject'First is always 1) 4957 4958 Cursor : Integer := 0; 4959 -- If the value is non-negative, then this value is the index showing 4960 -- the current position of the match in the subject string. The next 4961 -- character to be matched is at Subject (Cursor + 1). Note that since 4962 -- our view of the subject string in XMatch always has a lower bound 4963 -- of one, regardless of original bounds, that this definition exactly 4964 -- corresponds to the cursor value as referenced by functions like Pos. 4965 -- 4966 -- If the value is negative, then this is a saved stack pointer, 4967 -- typically a base pointer of an inner or outer region. Cursor 4968 -- temporarily holds such a value when it is popped from the stack 4969 -- by Fail. In all cases, Cursor is reset to a proper non-negative 4970 -- cursor value before the match proceeds (e.g. by propagating the 4971 -- failure and popping a "real" cursor value from the stack. 4972 4973 PE_Unanchored : aliased PE := (PC_Unanchored, 0, Pat_P); 4974 -- Dummy pattern element used in the unanchored case 4975 4976 Region_Level : Natural := 0; 4977 -- Keeps track of recursive region level. This is used only for 4978 -- debugging, it is the number of saved history stack base values. 4979 4980 Stack : Stack_Type; 4981 -- The pattern matching failure stack for this call to Match 4982 4983 Stack_Ptr : Stack_Range; 4984 -- Current stack pointer. This points to the top element of the stack 4985 -- that is currently in use. At the outer level this is the special 4986 -- entry placed on the stack according to the anchor mode. 4987 4988 Stack_Init : constant Stack_Range := Stack'First + 1; 4989 -- This is the initial value of the Stack_Ptr and Stack_Base. The 4990 -- initial (Stack'First) element of the stack is not used so that 4991 -- when we pop the last element off, Stack_Ptr is still in range. 4992 4993 Stack_Base : Stack_Range; 4994 -- This value is the stack base value, i.e. the stack pointer for the 4995 -- first history stack entry in the current stack region. See separate 4996 -- section on handling of recursive pattern matches. 4997 4998 Assign_OnM : Boolean := False; 4999 -- Set True if assign-on-match or write-on-match operations may be 5000 -- present in the history stack, which must then be scanned on a 5001 -- successful match. 5002 5003 procedure Dout (Str : String); 5004 -- Output string to standard error with bars indicating region level 5005 5006 procedure Dout (Str : String; A : Character); 5007 -- Calls Dout with the string S ('A') 5008 5009 procedure Dout (Str : String; A : Character_Set); 5010 -- Calls Dout with the string S ("A") 5011 5012 procedure Dout (Str : String; A : Natural); 5013 -- Calls Dout with the string S (A) 5014 5015 procedure Dout (Str : String; A : String); 5016 -- Calls Dout with the string S ("A") 5017 5018 function Img (P : PE_Ptr) return String; 5019 -- Returns a string of the form #nnn where nnn is P.Index 5020 5021 procedure Pop_Region; 5022 pragma Inline (Pop_Region); 5023 -- Used at the end of processing of an inner region. If the inner 5024 -- region left no stack entries, then all trace of it is removed. 5025 -- Otherwise a PC_Restore_Region entry is pushed to ensure proper 5026 -- handling of alternatives in the inner region. 5027 5028 procedure Push (Node : PE_Ptr); 5029 pragma Inline (Push); 5030 -- Make entry in pattern matching stack with current cursor value 5031 5032 procedure Push_Region; 5033 pragma Inline (Push_Region); 5034 -- This procedure makes a new region on the history stack. The 5035 -- caller first establishes the special entry on the stack, but 5036 -- does not push the stack pointer. Then this call stacks a 5037 -- PC_Remove_Region node, on top of this entry, using the cursor 5038 -- field of the PC_Remove_Region entry to save the outer level 5039 -- stack base value, and resets the stack base to point to this 5040 -- PC_Remove_Region node. 5041 5042 ---------- 5043 -- Dout -- 5044 ---------- 5045 5046 procedure Dout (Str : String) is 5047 begin 5048 for J in 1 .. Region_Level loop 5049 Put ("| "); 5050 end loop; 5051 5052 Put_Line (Str); 5053 end Dout; 5054 5055 procedure Dout (Str : String; A : Character) is 5056 begin 5057 Dout (Str & " ('" & A & "')"); 5058 end Dout; 5059 5060 procedure Dout (Str : String; A : Character_Set) is 5061 begin 5062 Dout (Str & " (" & Image (To_Sequence (A)) & ')'); 5063 end Dout; 5064 5065 procedure Dout (Str : String; A : Natural) is 5066 begin 5067 Dout (Str & " (" & A & ')'); 5068 end Dout; 5069 5070 procedure Dout (Str : String; A : String) is 5071 begin 5072 Dout (Str & " (" & Image (A) & ')'); 5073 end Dout; 5074 5075 --------- 5076 -- Img -- 5077 --------- 5078 5079 function Img (P : PE_Ptr) return String is 5080 begin 5081 return "#" & Integer (P.Index) & " "; 5082 end Img; 5083 5084 ---------------- 5085 -- Pop_Region -- 5086 ---------------- 5087 5088 procedure Pop_Region is 5089 begin 5090 Region_Level := Region_Level - 1; 5091 5092 -- If nothing was pushed in the inner region, we can just get 5093 -- rid of it entirely, leaving no traces that it was ever there 5094 5095 if Stack_Ptr = Stack_Base then 5096 Stack_Ptr := Stack_Base - 2; 5097 Stack_Base := Stack (Stack_Ptr + 2).Cursor; 5098 5099 -- If stuff was pushed in the inner region, then we have to 5100 -- push a PC_R_Restore node so that we properly handle possible 5101 -- rematches within the region. 5102 5103 else 5104 Stack_Ptr := Stack_Ptr + 1; 5105 Stack (Stack_Ptr).Cursor := Stack_Base; 5106 Stack (Stack_Ptr).Node := CP_R_Restore'Access; 5107 Stack_Base := Stack (Stack_Base).Cursor; 5108 end if; 5109 end Pop_Region; 5110 5111 ---------- 5112 -- Push -- 5113 ---------- 5114 5115 procedure Push (Node : PE_Ptr) is 5116 begin 5117 Stack_Ptr := Stack_Ptr + 1; 5118 Stack (Stack_Ptr).Cursor := Cursor; 5119 Stack (Stack_Ptr).Node := Node; 5120 end Push; 5121 5122 ----------------- 5123 -- Push_Region -- 5124 ----------------- 5125 5126 procedure Push_Region is 5127 begin 5128 Region_Level := Region_Level + 1; 5129 Stack_Ptr := Stack_Ptr + 2; 5130 Stack (Stack_Ptr).Cursor := Stack_Base; 5131 Stack (Stack_Ptr).Node := CP_R_Remove'Access; 5132 Stack_Base := Stack_Ptr; 5133 end Push_Region; 5134 5135 -- Start of processing for XMatchD 5136 5137 begin 5138 New_Line; 5139 Put_Line ("Initiating pattern match, subject = " & Image (Subject)); 5140 Put ("--------------------------------------"); 5141 5142 for J in 1 .. Length loop 5143 Put ('-'); 5144 end loop; 5145 5146 New_Line; 5147 Put_Line ("subject length = " & Length); 5148 5149 if Pat_P = null then 5150 Uninitialized_Pattern; 5151 end if; 5152 5153 -- Check we have enough stack for this pattern. This check deals with 5154 -- every possibility except a match of a recursive pattern, where we 5155 -- make a check at each recursion level. 5156 5157 if Pat_S >= Stack_Size - 1 then 5158 raise Pattern_Stack_Overflow; 5159 end if; 5160 5161 -- In anchored mode, the bottom entry on the stack is an abort entry 5162 5163 if Anchored_Mode then 5164 Stack (Stack_Init).Node := CP_Cancel'Access; 5165 Stack (Stack_Init).Cursor := 0; 5166 5167 -- In unanchored more, the bottom entry on the stack references 5168 -- the special pattern element PE_Unanchored, whose Pthen field 5169 -- points to the initial pattern element. The cursor value in this 5170 -- entry is the number of anchor moves so far. 5171 5172 else 5173 Stack (Stack_Init).Node := PE_Unanchored'Unchecked_Access; 5174 Stack (Stack_Init).Cursor := 0; 5175 end if; 5176 5177 Stack_Ptr := Stack_Init; 5178 Stack_Base := Stack_Ptr; 5179 Cursor := 0; 5180 Node := Pat_P; 5181 goto Match; 5182 5183 ----------------------------------------- 5184 -- Main Pattern Matching State Control -- 5185 ----------------------------------------- 5186 5187 -- This is a state machine which uses gotos to change state. The 5188 -- initial state is Match, to initiate the matching of the first 5189 -- element, so the goto Match above starts the match. In the 5190 -- following descriptions, we indicate the global values that 5191 -- are relevant for the state transition. 5192 5193 -- Come here if entire match fails 5194 5195 <<Match_Fail>> 5196 Dout ("match fails"); 5197 New_Line; 5198 Start := 0; 5199 Stop := 0; 5200 return; 5201 5202 -- Come here if entire match succeeds 5203 5204 -- Cursor current position in subject string 5205 5206 <<Match_Succeed>> 5207 Dout ("match succeeds"); 5208 Start := Stack (Stack_Init).Cursor + 1; 5209 Stop := Cursor; 5210 Dout ("first matched character index = " & Start); 5211 Dout ("last matched character index = " & Stop); 5212 Dout ("matched substring = " & Image (Subject (Start .. Stop))); 5213 5214 -- Scan history stack for deferred assignments or writes 5215 5216 if Assign_OnM then 5217 for S in Stack'First .. Stack_Ptr loop 5218 if Stack (S).Node = CP_Assign'Access then 5219 declare 5220 Inner_Base : constant Stack_Range := 5221 Stack (S + 1).Cursor; 5222 Special_Entry : constant Stack_Range := 5223 Inner_Base - 1; 5224 Node_OnM : constant PE_Ptr := 5225 Stack (Special_Entry).Node; 5226 Start : constant Natural := 5227 Stack (Special_Entry).Cursor + 1; 5228 Stop : constant Natural := Stack (S).Cursor; 5229 5230 begin 5231 if Node_OnM.Pcode = PC_Assign_OnM then 5232 Set_Unbounded_String 5233 (Node_OnM.VP.all, Subject (Start .. Stop)); 5234 Dout 5235 (Img (Stack (S).Node) & 5236 "deferred assignment of " & 5237 Image (Subject (Start .. Stop))); 5238 5239 elsif Node_OnM.Pcode = PC_Write_OnM then 5240 Put_Line (Node_OnM.FP.all, Subject (Start .. Stop)); 5241 Dout 5242 (Img (Stack (S).Node) & 5243 "deferred write of " & 5244 Image (Subject (Start .. Stop))); 5245 5246 else 5247 Logic_Error; 5248 end if; 5249 end; 5250 end if; 5251 end loop; 5252 end if; 5253 5254 New_Line; 5255 return; 5256 5257 -- Come here if attempt to match current element fails 5258 5259 -- Stack_Base current stack base 5260 -- Stack_Ptr current stack pointer 5261 5262 <<Fail>> 5263 Cursor := Stack (Stack_Ptr).Cursor; 5264 Node := Stack (Stack_Ptr).Node; 5265 Stack_Ptr := Stack_Ptr - 1; 5266 5267 if Cursor >= 0 then 5268 Dout ("failure, cursor reset to " & Cursor); 5269 end if; 5270 5271 goto Match; 5272 5273 -- Come here if attempt to match current element succeeds 5274 5275 -- Cursor current position in subject string 5276 -- Node pointer to node successfully matched 5277 -- Stack_Base current stack base 5278 -- Stack_Ptr current stack pointer 5279 5280 <<Succeed>> 5281 Dout ("success, cursor = " & Cursor); 5282 Node := Node.Pthen; 5283 5284 -- Come here to match the next pattern element 5285 5286 -- Cursor current position in subject string 5287 -- Node pointer to node to be matched 5288 -- Stack_Base current stack base 5289 -- Stack_Ptr current stack pointer 5290 5291 <<Match>> 5292 5293 -------------------------------------------------- 5294 -- Main Pattern Match Element Matching Routines -- 5295 -------------------------------------------------- 5296 5297 -- Here is the case statement that processes the current node. The 5298 -- processing for each element does one of five things: 5299 5300 -- goto Succeed to move to the successor 5301 -- goto Match_Succeed if the entire match succeeds 5302 -- goto Match_Fail if the entire match fails 5303 -- goto Fail to signal failure of current match 5304 5305 -- Processing is NOT allowed to fall through 5306 5307 case Node.Pcode is 5308 5309 -- Cancel 5310 5311 when PC_Cancel => 5312 Dout (Img (Node) & "matching Cancel"); 5313 goto Match_Fail; 5314 5315 -- Alternation 5316 5317 when PC_Alt => 5318 Dout 5319 (Img (Node) & "setting up alternative " & Img (Node.Alt)); 5320 Push (Node.Alt); 5321 Node := Node.Pthen; 5322 goto Match; 5323 5324 -- Any (one character case) 5325 5326 when PC_Any_CH => 5327 Dout (Img (Node) & "matching Any", Node.Char); 5328 5329 if Cursor < Length 5330 and then Subject (Cursor + 1) = Node.Char 5331 then 5332 Cursor := Cursor + 1; 5333 goto Succeed; 5334 else 5335 goto Fail; 5336 end if; 5337 5338 -- Any (character set case) 5339 5340 when PC_Any_CS => 5341 Dout (Img (Node) & "matching Any", Node.CS); 5342 5343 if Cursor < Length 5344 and then Is_In (Subject (Cursor + 1), Node.CS) 5345 then 5346 Cursor := Cursor + 1; 5347 goto Succeed; 5348 else 5349 goto Fail; 5350 end if; 5351 5352 -- Any (string function case) 5353 5354 when PC_Any_VF => declare 5355 U : constant VString := Node.VF.all; 5356 S : Big_String_Access; 5357 L : Natural; 5358 5359 begin 5360 Get_String (U, S, L); 5361 5362 Dout (Img (Node) & "matching Any", S (1 .. L)); 5363 5364 if Cursor < Length 5365 and then Is_In (Subject (Cursor + 1), S (1 .. L)) 5366 then 5367 Cursor := Cursor + 1; 5368 goto Succeed; 5369 else 5370 goto Fail; 5371 end if; 5372 end; 5373 5374 -- Any (string pointer case) 5375 5376 when PC_Any_VP => declare 5377 U : constant VString := Node.VP.all; 5378 S : Big_String_Access; 5379 L : Natural; 5380 5381 begin 5382 Get_String (U, S, L); 5383 Dout (Img (Node) & "matching Any", S (1 .. L)); 5384 5385 if Cursor < Length 5386 and then Is_In (Subject (Cursor + 1), S (1 .. L)) 5387 then 5388 Cursor := Cursor + 1; 5389 goto Succeed; 5390 else 5391 goto Fail; 5392 end if; 5393 end; 5394 5395 -- Arb (initial match) 5396 5397 when PC_Arb_X => 5398 Dout (Img (Node) & "matching Arb"); 5399 Push (Node.Alt); 5400 Node := Node.Pthen; 5401 goto Match; 5402 5403 -- Arb (extension) 5404 5405 when PC_Arb_Y => 5406 Dout (Img (Node) & "extending Arb"); 5407 5408 if Cursor < Length then 5409 Cursor := Cursor + 1; 5410 Push (Node); 5411 goto Succeed; 5412 else 5413 goto Fail; 5414 end if; 5415 5416 -- Arbno_S (simple Arbno initialize). This is the node that 5417 -- initiates the match of a simple Arbno structure. 5418 5419 when PC_Arbno_S => 5420 Dout (Img (Node) & 5421 "setting up Arbno alternative " & Img (Node.Alt)); 5422 Push (Node.Alt); 5423 Node := Node.Pthen; 5424 goto Match; 5425 5426 -- Arbno_X (Arbno initialize). This is the node that initiates 5427 -- the match of a complex Arbno structure. 5428 5429 when PC_Arbno_X => 5430 Dout (Img (Node) & 5431 "setting up Arbno alternative " & Img (Node.Alt)); 5432 Push (Node.Alt); 5433 Node := Node.Pthen; 5434 goto Match; 5435 5436 -- Arbno_Y (Arbno rematch). This is the node that is executed 5437 -- following successful matching of one instance of a complex 5438 -- Arbno pattern. 5439 5440 when PC_Arbno_Y => declare 5441 Null_Match : constant Boolean := 5442 Cursor = Stack (Stack_Base - 1).Cursor; 5443 5444 begin 5445 Dout (Img (Node) & "extending Arbno"); 5446 Pop_Region; 5447 5448 -- If arbno extension matched null, then immediately fail 5449 5450 if Null_Match then 5451 Dout ("Arbno extension matched null, so fails"); 5452 goto Fail; 5453 end if; 5454 5455 -- Here we must do a stack check to make sure enough stack 5456 -- is left. This check will happen once for each instance of 5457 -- the Arbno pattern that is matched. The Nat field of a 5458 -- PC_Arbno pattern contains the maximum stack entries needed 5459 -- for the Arbno with one instance and the successor pattern 5460 5461 if Stack_Ptr + Node.Nat >= Stack'Last then 5462 raise Pattern_Stack_Overflow; 5463 end if; 5464 5465 goto Succeed; 5466 end; 5467 5468 -- Assign. If this node is executed, it means the assign-on-match 5469 -- or write-on-match operation will not happen after all, so we 5470 -- is propagate the failure, removing the PC_Assign node. 5471 5472 when PC_Assign => 5473 Dout (Img (Node) & "deferred assign/write cancelled"); 5474 goto Fail; 5475 5476 -- Assign immediate. This node performs the actual assignment 5477 5478 when PC_Assign_Imm => 5479 Dout 5480 (Img (Node) & "executing immediate assignment of " & 5481 Image (Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor))); 5482 Set_Unbounded_String 5483 (Node.VP.all, 5484 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor)); 5485 Pop_Region; 5486 goto Succeed; 5487 5488 -- Assign on match. This node sets up for the eventual assignment 5489 5490 when PC_Assign_OnM => 5491 Dout (Img (Node) & "registering deferred assignment"); 5492 Stack (Stack_Base - 1).Node := Node; 5493 Push (CP_Assign'Access); 5494 Pop_Region; 5495 Assign_OnM := True; 5496 goto Succeed; 5497 5498 -- Bal 5499 5500 when PC_Bal => 5501 Dout (Img (Node) & "matching or extending Bal"); 5502 if Cursor >= Length or else Subject (Cursor + 1) = ')' then 5503 goto Fail; 5504 5505 elsif Subject (Cursor + 1) = '(' then 5506 declare 5507 Paren_Count : Natural := 1; 5508 5509 begin 5510 loop 5511 Cursor := Cursor + 1; 5512 5513 if Cursor >= Length then 5514 goto Fail; 5515 5516 elsif Subject (Cursor + 1) = '(' then 5517 Paren_Count := Paren_Count + 1; 5518 5519 elsif Subject (Cursor + 1) = ')' then 5520 Paren_Count := Paren_Count - 1; 5521 exit when Paren_Count = 0; 5522 end if; 5523 end loop; 5524 end; 5525 end if; 5526 5527 Cursor := Cursor + 1; 5528 Push (Node); 5529 goto Succeed; 5530 5531 -- Break (one character case) 5532 5533 when PC_Break_CH => 5534 Dout (Img (Node) & "matching Break", Node.Char); 5535 5536 while Cursor < Length loop 5537 if Subject (Cursor + 1) = Node.Char then 5538 goto Succeed; 5539 else 5540 Cursor := Cursor + 1; 5541 end if; 5542 end loop; 5543 5544 goto Fail; 5545 5546 -- Break (character set case) 5547 5548 when PC_Break_CS => 5549 Dout (Img (Node) & "matching Break", Node.CS); 5550 5551 while Cursor < Length loop 5552 if Is_In (Subject (Cursor + 1), Node.CS) then 5553 goto Succeed; 5554 else 5555 Cursor := Cursor + 1; 5556 end if; 5557 end loop; 5558 5559 goto Fail; 5560 5561 -- Break (string function case) 5562 5563 when PC_Break_VF => declare 5564 U : constant VString := Node.VF.all; 5565 S : Big_String_Access; 5566 L : Natural; 5567 5568 begin 5569 Get_String (U, S, L); 5570 Dout (Img (Node) & "matching Break", S (1 .. L)); 5571 5572 while Cursor < Length loop 5573 if Is_In (Subject (Cursor + 1), S (1 .. L)) then 5574 goto Succeed; 5575 else 5576 Cursor := Cursor + 1; 5577 end if; 5578 end loop; 5579 5580 goto Fail; 5581 end; 5582 5583 -- Break (string pointer case) 5584 5585 when PC_Break_VP => declare 5586 U : constant VString := Node.VP.all; 5587 S : Big_String_Access; 5588 L : Natural; 5589 5590 begin 5591 Get_String (U, S, L); 5592 Dout (Img (Node) & "matching Break", S (1 .. L)); 5593 5594 while Cursor < Length loop 5595 if Is_In (Subject (Cursor + 1), S (1 .. L)) then 5596 goto Succeed; 5597 else 5598 Cursor := Cursor + 1; 5599 end if; 5600 end loop; 5601 5602 goto Fail; 5603 end; 5604 5605 -- BreakX (one character case) 5606 5607 when PC_BreakX_CH => 5608 Dout (Img (Node) & "matching BreakX", Node.Char); 5609 5610 while Cursor < Length loop 5611 if Subject (Cursor + 1) = Node.Char then 5612 goto Succeed; 5613 else 5614 Cursor := Cursor + 1; 5615 end if; 5616 end loop; 5617 5618 goto Fail; 5619 5620 -- BreakX (character set case) 5621 5622 when PC_BreakX_CS => 5623 Dout (Img (Node) & "matching BreakX", Node.CS); 5624 5625 while Cursor < Length loop 5626 if Is_In (Subject (Cursor + 1), Node.CS) then 5627 goto Succeed; 5628 else 5629 Cursor := Cursor + 1; 5630 end if; 5631 end loop; 5632 5633 goto Fail; 5634 5635 -- BreakX (string function case) 5636 5637 when PC_BreakX_VF => declare 5638 U : constant VString := Node.VF.all; 5639 S : Big_String_Access; 5640 L : Natural; 5641 5642 begin 5643 Get_String (U, S, L); 5644 Dout (Img (Node) & "matching BreakX", S (1 .. L)); 5645 5646 while Cursor < Length loop 5647 if Is_In (Subject (Cursor + 1), S (1 .. L)) then 5648 goto Succeed; 5649 else 5650 Cursor := Cursor + 1; 5651 end if; 5652 end loop; 5653 5654 goto Fail; 5655 end; 5656 5657 -- BreakX (string pointer case) 5658 5659 when PC_BreakX_VP => declare 5660 U : constant VString := Node.VP.all; 5661 S : Big_String_Access; 5662 L : Natural; 5663 5664 begin 5665 Get_String (U, S, L); 5666 Dout (Img (Node) & "matching BreakX", S (1 .. L)); 5667 5668 while Cursor < Length loop 5669 if Is_In (Subject (Cursor + 1), S (1 .. L)) then 5670 goto Succeed; 5671 else 5672 Cursor := Cursor + 1; 5673 end if; 5674 end loop; 5675 5676 goto Fail; 5677 end; 5678 5679 -- BreakX_X (BreakX extension). See section on "Compound Pattern 5680 -- Structures". This node is the alternative that is stacked 5681 -- to skip past the break character and extend the break. 5682 5683 when PC_BreakX_X => 5684 Dout (Img (Node) & "extending BreakX"); 5685 Cursor := Cursor + 1; 5686 goto Succeed; 5687 5688 -- Character (one character string) 5689 5690 when PC_Char => 5691 Dout (Img (Node) & "matching '" & Node.Char & '''); 5692 5693 if Cursor < Length 5694 and then Subject (Cursor + 1) = Node.Char 5695 then 5696 Cursor := Cursor + 1; 5697 goto Succeed; 5698 else 5699 goto Fail; 5700 end if; 5701 5702 -- End of Pattern 5703 5704 when PC_EOP => 5705 if Stack_Base = Stack_Init then 5706 Dout ("end of pattern"); 5707 goto Match_Succeed; 5708 5709 -- End of recursive inner match. See separate section on 5710 -- handing of recursive pattern matches for details. 5711 5712 else 5713 Dout ("terminating recursive match"); 5714 Node := Stack (Stack_Base - 1).Node; 5715 Pop_Region; 5716 goto Match; 5717 end if; 5718 5719 -- Fail 5720 5721 when PC_Fail => 5722 Dout (Img (Node) & "matching Fail"); 5723 goto Fail; 5724 5725 -- Fence (built in pattern) 5726 5727 when PC_Fence => 5728 Dout (Img (Node) & "matching Fence"); 5729 Push (CP_Cancel'Access); 5730 goto Succeed; 5731 5732 -- Fence function node X. This is the node that gets control 5733 -- after a successful match of the fenced pattern. 5734 5735 when PC_Fence_X => 5736 Dout (Img (Node) & "matching Fence function"); 5737 Stack_Ptr := Stack_Ptr + 1; 5738 Stack (Stack_Ptr).Cursor := Stack_Base; 5739 Stack (Stack_Ptr).Node := CP_Fence_Y'Access; 5740 Stack_Base := Stack (Stack_Base).Cursor; 5741 Region_Level := Region_Level - 1; 5742 goto Succeed; 5743 5744 -- Fence function node Y. This is the node that gets control on 5745 -- a failure that occurs after the fenced pattern has matched. 5746 5747 -- Note: the Cursor at this stage is actually the inner stack 5748 -- base value. We don't reset this, but we do use it to strip 5749 -- off all the entries made by the fenced pattern. 5750 5751 when PC_Fence_Y => 5752 Dout (Img (Node) & "pattern matched by Fence caused failure"); 5753 Stack_Ptr := Cursor - 2; 5754 goto Fail; 5755 5756 -- Len (integer case) 5757 5758 when PC_Len_Nat => 5759 Dout (Img (Node) & "matching Len", Node.Nat); 5760 5761 if Cursor + Node.Nat > Length then 5762 goto Fail; 5763 else 5764 Cursor := Cursor + Node.Nat; 5765 goto Succeed; 5766 end if; 5767 5768 -- Len (Integer function case) 5769 5770 when PC_Len_NF => declare 5771 N : constant Natural := Node.NF.all; 5772 5773 begin 5774 Dout (Img (Node) & "matching Len", N); 5775 5776 if Cursor + N > Length then 5777 goto Fail; 5778 else 5779 Cursor := Cursor + N; 5780 goto Succeed; 5781 end if; 5782 end; 5783 5784 -- Len (integer pointer case) 5785 5786 when PC_Len_NP => 5787 Dout (Img (Node) & "matching Len", Node.NP.all); 5788 5789 if Cursor + Node.NP.all > Length then 5790 goto Fail; 5791 else 5792 Cursor := Cursor + Node.NP.all; 5793 goto Succeed; 5794 end if; 5795 5796 -- NotAny (one character case) 5797 5798 when PC_NotAny_CH => 5799 Dout (Img (Node) & "matching NotAny", Node.Char); 5800 5801 if Cursor < Length 5802 and then Subject (Cursor + 1) /= Node.Char 5803 then 5804 Cursor := Cursor + 1; 5805 goto Succeed; 5806 else 5807 goto Fail; 5808 end if; 5809 5810 -- NotAny (character set case) 5811 5812 when PC_NotAny_CS => 5813 Dout (Img (Node) & "matching NotAny", Node.CS); 5814 5815 if Cursor < Length 5816 and then not Is_In (Subject (Cursor + 1), Node.CS) 5817 then 5818 Cursor := Cursor + 1; 5819 goto Succeed; 5820 else 5821 goto Fail; 5822 end if; 5823 5824 -- NotAny (string function case) 5825 5826 when PC_NotAny_VF => declare 5827 U : constant VString := Node.VF.all; 5828 S : Big_String_Access; 5829 L : Natural; 5830 5831 begin 5832 Get_String (U, S, L); 5833 Dout (Img (Node) & "matching NotAny", S (1 .. L)); 5834 5835 if Cursor < Length 5836 and then 5837 not Is_In (Subject (Cursor + 1), S (1 .. L)) 5838 then 5839 Cursor := Cursor + 1; 5840 goto Succeed; 5841 else 5842 goto Fail; 5843 end if; 5844 end; 5845 5846 -- NotAny (string pointer case) 5847 5848 when PC_NotAny_VP => declare 5849 U : constant VString := Node.VP.all; 5850 S : Big_String_Access; 5851 L : Natural; 5852 5853 begin 5854 Get_String (U, S, L); 5855 Dout (Img (Node) & "matching NotAny", S (1 .. L)); 5856 5857 if Cursor < Length 5858 and then 5859 not Is_In (Subject (Cursor + 1), S (1 .. L)) 5860 then 5861 Cursor := Cursor + 1; 5862 goto Succeed; 5863 else 5864 goto Fail; 5865 end if; 5866 end; 5867 5868 -- NSpan (one character case) 5869 5870 when PC_NSpan_CH => 5871 Dout (Img (Node) & "matching NSpan", Node.Char); 5872 5873 while Cursor < Length 5874 and then Subject (Cursor + 1) = Node.Char 5875 loop 5876 Cursor := Cursor + 1; 5877 end loop; 5878 5879 goto Succeed; 5880 5881 -- NSpan (character set case) 5882 5883 when PC_NSpan_CS => 5884 Dout (Img (Node) & "matching NSpan", Node.CS); 5885 5886 while Cursor < Length 5887 and then Is_In (Subject (Cursor + 1), Node.CS) 5888 loop 5889 Cursor := Cursor + 1; 5890 end loop; 5891 5892 goto Succeed; 5893 5894 -- NSpan (string function case) 5895 5896 when PC_NSpan_VF => declare 5897 U : constant VString := Node.VF.all; 5898 S : Big_String_Access; 5899 L : Natural; 5900 5901 begin 5902 Get_String (U, S, L); 5903 Dout (Img (Node) & "matching NSpan", S (1 .. L)); 5904 5905 while Cursor < Length 5906 and then Is_In (Subject (Cursor + 1), S (1 .. L)) 5907 loop 5908 Cursor := Cursor + 1; 5909 end loop; 5910 5911 goto Succeed; 5912 end; 5913 5914 -- NSpan (string pointer case) 5915 5916 when PC_NSpan_VP => declare 5917 U : constant VString := Node.VP.all; 5918 S : Big_String_Access; 5919 L : Natural; 5920 5921 begin 5922 Get_String (U, S, L); 5923 Dout (Img (Node) & "matching NSpan", S (1 .. L)); 5924 5925 while Cursor < Length 5926 and then Is_In (Subject (Cursor + 1), S (1 .. L)) 5927 loop 5928 Cursor := Cursor + 1; 5929 end loop; 5930 5931 goto Succeed; 5932 end; 5933 5934 when PC_Null => 5935 Dout (Img (Node) & "matching null"); 5936 goto Succeed; 5937 5938 -- Pos (integer case) 5939 5940 when PC_Pos_Nat => 5941 Dout (Img (Node) & "matching Pos", Node.Nat); 5942 5943 if Cursor = Node.Nat then 5944 goto Succeed; 5945 else 5946 goto Fail; 5947 end if; 5948 5949 -- Pos (Integer function case) 5950 5951 when PC_Pos_NF => declare 5952 N : constant Natural := Node.NF.all; 5953 5954 begin 5955 Dout (Img (Node) & "matching Pos", N); 5956 5957 if Cursor = N then 5958 goto Succeed; 5959 else 5960 goto Fail; 5961 end if; 5962 end; 5963 5964 -- Pos (integer pointer case) 5965 5966 when PC_Pos_NP => 5967 Dout (Img (Node) & "matching Pos", Node.NP.all); 5968 5969 if Cursor = Node.NP.all then 5970 goto Succeed; 5971 else 5972 goto Fail; 5973 end if; 5974 5975 -- Predicate function 5976 5977 when PC_Pred_Func => 5978 Dout (Img (Node) & "matching predicate function"); 5979 5980 if Node.BF.all then 5981 goto Succeed; 5982 else 5983 goto Fail; 5984 end if; 5985 5986 -- Region Enter. Initiate new pattern history stack region 5987 5988 when PC_R_Enter => 5989 Dout (Img (Node) & "starting match of nested pattern"); 5990 Stack (Stack_Ptr + 1).Cursor := Cursor; 5991 Push_Region; 5992 goto Succeed; 5993 5994 -- Region Remove node. This is the node stacked by an R_Enter. 5995 -- It removes the special format stack entry right underneath, and 5996 -- then restores the outer level stack base and signals failure. 5997 5998 -- Note: the cursor value at this stage is actually the (negative) 5999 -- stack base value for the outer level. 6000 6001 when PC_R_Remove => 6002 Dout ("failure, match of nested pattern terminated"); 6003 Stack_Base := Cursor; 6004 Region_Level := Region_Level - 1; 6005 Stack_Ptr := Stack_Ptr - 1; 6006 goto Fail; 6007 6008 -- Region restore node. This is the node stacked at the end of an 6009 -- inner level match. Its function is to restore the inner level 6010 -- region, so that alternatives in this region can be sought. 6011 6012 -- Note: the Cursor at this stage is actually the negative of the 6013 -- inner stack base value, which we use to restore the inner region. 6014 6015 when PC_R_Restore => 6016 Dout ("failure, search for alternatives in nested pattern"); 6017 Region_Level := Region_Level + 1; 6018 Stack_Base := Cursor; 6019 goto Fail; 6020 6021 -- Rest 6022 6023 when PC_Rest => 6024 Dout (Img (Node) & "matching Rest"); 6025 Cursor := Length; 6026 goto Succeed; 6027 6028 -- Initiate recursive match (pattern pointer case) 6029 6030 when PC_Rpat => 6031 Stack (Stack_Ptr + 1).Node := Node.Pthen; 6032 Push_Region; 6033 Dout (Img (Node) & "initiating recursive match"); 6034 6035 if Stack_Ptr + Node.PP.all.Stk >= Stack_Size then 6036 raise Pattern_Stack_Overflow; 6037 else 6038 Node := Node.PP.all.P; 6039 goto Match; 6040 end if; 6041 6042 -- RPos (integer case) 6043 6044 when PC_RPos_Nat => 6045 Dout (Img (Node) & "matching RPos", Node.Nat); 6046 6047 if Cursor = (Length - Node.Nat) then 6048 goto Succeed; 6049 else 6050 goto Fail; 6051 end if; 6052 6053 -- RPos (integer function case) 6054 6055 when PC_RPos_NF => declare 6056 N : constant Natural := Node.NF.all; 6057 6058 begin 6059 Dout (Img (Node) & "matching RPos", N); 6060 6061 if Length - Cursor = N then 6062 goto Succeed; 6063 else 6064 goto Fail; 6065 end if; 6066 end; 6067 6068 -- RPos (integer pointer case) 6069 6070 when PC_RPos_NP => 6071 Dout (Img (Node) & "matching RPos", Node.NP.all); 6072 6073 if Cursor = (Length - Node.NP.all) then 6074 goto Succeed; 6075 else 6076 goto Fail; 6077 end if; 6078 6079 -- RTab (integer case) 6080 6081 when PC_RTab_Nat => 6082 Dout (Img (Node) & "matching RTab", Node.Nat); 6083 6084 if Cursor <= (Length - Node.Nat) then 6085 Cursor := Length - Node.Nat; 6086 goto Succeed; 6087 else 6088 goto Fail; 6089 end if; 6090 6091 -- RTab (integer function case) 6092 6093 when PC_RTab_NF => declare 6094 N : constant Natural := Node.NF.all; 6095 6096 begin 6097 Dout (Img (Node) & "matching RPos", N); 6098 6099 if Length - Cursor >= N then 6100 Cursor := Length - N; 6101 goto Succeed; 6102 else 6103 goto Fail; 6104 end if; 6105 end; 6106 6107 -- RTab (integer pointer case) 6108 6109 when PC_RTab_NP => 6110 Dout (Img (Node) & "matching RPos", Node.NP.all); 6111 6112 if Cursor <= (Length - Node.NP.all) then 6113 Cursor := Length - Node.NP.all; 6114 goto Succeed; 6115 else 6116 goto Fail; 6117 end if; 6118 6119 -- Cursor assignment 6120 6121 when PC_Setcur => 6122 Dout (Img (Node) & "matching Setcur"); 6123 Node.Var.all := Cursor; 6124 goto Succeed; 6125 6126 -- Span (one character case) 6127 6128 when PC_Span_CH => declare 6129 P : Natural := Cursor; 6130 6131 begin 6132 Dout (Img (Node) & "matching Span", Node.Char); 6133 6134 while P < Length 6135 and then Subject (P + 1) = Node.Char 6136 loop 6137 P := P + 1; 6138 end loop; 6139 6140 if P /= Cursor then 6141 Cursor := P; 6142 goto Succeed; 6143 else 6144 goto Fail; 6145 end if; 6146 end; 6147 6148 -- Span (character set case) 6149 6150 when PC_Span_CS => declare 6151 P : Natural := Cursor; 6152 6153 begin 6154 Dout (Img (Node) & "matching Span", Node.CS); 6155 6156 while P < Length 6157 and then Is_In (Subject (P + 1), Node.CS) 6158 loop 6159 P := P + 1; 6160 end loop; 6161 6162 if P /= Cursor then 6163 Cursor := P; 6164 goto Succeed; 6165 else 6166 goto Fail; 6167 end if; 6168 end; 6169 6170 -- Span (string function case) 6171 6172 when PC_Span_VF => declare 6173 U : constant VString := Node.VF.all; 6174 S : Big_String_Access; 6175 L : Natural; 6176 P : Natural; 6177 6178 begin 6179 Get_String (U, S, L); 6180 Dout (Img (Node) & "matching Span", S (1 .. L)); 6181 6182 P := Cursor; 6183 while P < Length 6184 and then Is_In (Subject (P + 1), S (1 .. L)) 6185 loop 6186 P := P + 1; 6187 end loop; 6188 6189 if P /= Cursor then 6190 Cursor := P; 6191 goto Succeed; 6192 else 6193 goto Fail; 6194 end if; 6195 end; 6196 6197 -- Span (string pointer case) 6198 6199 when PC_Span_VP => declare 6200 U : constant VString := Node.VP.all; 6201 S : Big_String_Access; 6202 L : Natural; 6203 P : Natural; 6204 6205 begin 6206 Get_String (U, S, L); 6207 Dout (Img (Node) & "matching Span", S (1 .. L)); 6208 6209 P := Cursor; 6210 while P < Length 6211 and then Is_In (Subject (P + 1), S (1 .. L)) 6212 loop 6213 P := P + 1; 6214 end loop; 6215 6216 if P /= Cursor then 6217 Cursor := P; 6218 goto Succeed; 6219 else 6220 goto Fail; 6221 end if; 6222 end; 6223 6224 -- String (two character case) 6225 6226 when PC_String_2 => 6227 Dout (Img (Node) & "matching " & Image (Node.Str2)); 6228 6229 if (Length - Cursor) >= 2 6230 and then Subject (Cursor + 1 .. Cursor + 2) = Node.Str2 6231 then 6232 Cursor := Cursor + 2; 6233 goto Succeed; 6234 else 6235 goto Fail; 6236 end if; 6237 6238 -- String (three character case) 6239 6240 when PC_String_3 => 6241 Dout (Img (Node) & "matching " & Image (Node.Str3)); 6242 6243 if (Length - Cursor) >= 3 6244 and then Subject (Cursor + 1 .. Cursor + 3) = Node.Str3 6245 then 6246 Cursor := Cursor + 3; 6247 goto Succeed; 6248 else 6249 goto Fail; 6250 end if; 6251 6252 -- String (four character case) 6253 6254 when PC_String_4 => 6255 Dout (Img (Node) & "matching " & Image (Node.Str4)); 6256 6257 if (Length - Cursor) >= 4 6258 and then Subject (Cursor + 1 .. Cursor + 4) = Node.Str4 6259 then 6260 Cursor := Cursor + 4; 6261 goto Succeed; 6262 else 6263 goto Fail; 6264 end if; 6265 6266 -- String (five character case) 6267 6268 when PC_String_5 => 6269 Dout (Img (Node) & "matching " & Image (Node.Str5)); 6270 6271 if (Length - Cursor) >= 5 6272 and then Subject (Cursor + 1 .. Cursor + 5) = Node.Str5 6273 then 6274 Cursor := Cursor + 5; 6275 goto Succeed; 6276 else 6277 goto Fail; 6278 end if; 6279 6280 -- String (six character case) 6281 6282 when PC_String_6 => 6283 Dout (Img (Node) & "matching " & Image (Node.Str6)); 6284 6285 if (Length - Cursor) >= 6 6286 and then Subject (Cursor + 1 .. Cursor + 6) = Node.Str6 6287 then 6288 Cursor := Cursor + 6; 6289 goto Succeed; 6290 else 6291 goto Fail; 6292 end if; 6293 6294 -- String (case of more than six characters) 6295 6296 when PC_String => declare 6297 Len : constant Natural := Node.Str'Length; 6298 6299 begin 6300 Dout (Img (Node) & "matching " & Image (Node.Str.all)); 6301 6302 if (Length - Cursor) >= Len 6303 and then Node.Str.all = Subject (Cursor + 1 .. Cursor + Len) 6304 then 6305 Cursor := Cursor + Len; 6306 goto Succeed; 6307 else 6308 goto Fail; 6309 end if; 6310 end; 6311 6312 -- String (function case) 6313 6314 when PC_String_VF => declare 6315 U : constant VString := Node.VF.all; 6316 S : Big_String_Access; 6317 L : Natural; 6318 6319 begin 6320 Get_String (U, S, L); 6321 Dout (Img (Node) & "matching " & Image (S (1 .. L))); 6322 6323 if (Length - Cursor) >= L 6324 and then S (1 .. L) = Subject (Cursor + 1 .. Cursor + L) 6325 then 6326 Cursor := Cursor + L; 6327 goto Succeed; 6328 else 6329 goto Fail; 6330 end if; 6331 end; 6332 6333 -- String (vstring pointer case) 6334 6335 when PC_String_VP => declare 6336 U : constant VString := Node.VP.all; 6337 S : Big_String_Access; 6338 L : Natural; 6339 6340 begin 6341 Get_String (U, S, L); 6342 Dout (Img (Node) & "matching " & Image (S (1 .. L))); 6343 6344 if (Length - Cursor) >= L 6345 and then S (1 .. L) = Subject (Cursor + 1 .. Cursor + L) 6346 then 6347 Cursor := Cursor + L; 6348 goto Succeed; 6349 else 6350 goto Fail; 6351 end if; 6352 end; 6353 6354 -- Succeed 6355 6356 when PC_Succeed => 6357 Dout (Img (Node) & "matching Succeed"); 6358 Push (Node); 6359 goto Succeed; 6360 6361 -- Tab (integer case) 6362 6363 when PC_Tab_Nat => 6364 Dout (Img (Node) & "matching Tab", Node.Nat); 6365 6366 if Cursor <= Node.Nat then 6367 Cursor := Node.Nat; 6368 goto Succeed; 6369 else 6370 goto Fail; 6371 end if; 6372 6373 -- Tab (integer function case) 6374 6375 when PC_Tab_NF => declare 6376 N : constant Natural := Node.NF.all; 6377 6378 begin 6379 Dout (Img (Node) & "matching Tab ", N); 6380 6381 if Cursor <= N then 6382 Cursor := N; 6383 goto Succeed; 6384 else 6385 goto Fail; 6386 end if; 6387 end; 6388 6389 -- Tab (integer pointer case) 6390 6391 when PC_Tab_NP => 6392 Dout (Img (Node) & "matching Tab ", Node.NP.all); 6393 6394 if Cursor <= Node.NP.all then 6395 Cursor := Node.NP.all; 6396 goto Succeed; 6397 else 6398 goto Fail; 6399 end if; 6400 6401 -- Unanchored movement 6402 6403 when PC_Unanchored => 6404 Dout ("attempting to move anchor point"); 6405 6406 -- All done if we tried every position 6407 6408 if Cursor > Length then 6409 goto Match_Fail; 6410 6411 -- Otherwise extend the anchor point, and restack ourself 6412 6413 else 6414 Cursor := Cursor + 1; 6415 Push (Node); 6416 goto Succeed; 6417 end if; 6418 6419 -- Write immediate. This node performs the actual write 6420 6421 when PC_Write_Imm => 6422 Dout (Img (Node) & "executing immediate write of " & 6423 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor)); 6424 6425 Put_Line 6426 (Node.FP.all, 6427 Subject (Stack (Stack_Base - 1).Cursor + 1 .. Cursor)); 6428 Pop_Region; 6429 goto Succeed; 6430 6431 -- Write on match. This node sets up for the eventual write 6432 6433 when PC_Write_OnM => 6434 Dout (Img (Node) & "registering deferred write"); 6435 Stack (Stack_Base - 1).Node := Node; 6436 Push (CP_Assign'Access); 6437 Pop_Region; 6438 Assign_OnM := True; 6439 goto Succeed; 6440 6441 end case; 6442 6443 -- We are NOT allowed to fall though this case statement, since every 6444 -- match routine must end by executing a goto to the appropriate point 6445 -- in the finite state machine model. 6446 6447 pragma Warnings (Off); 6448 Logic_Error; 6449 pragma Warnings (On); 6450 end XMatchD; 6451 6452end GNAT.Spitbol.Patterns; 6453