1------------------------------------------------------------------------------ 2-- -- 3-- GNAT COMPILER COMPONENTS -- 4-- -- 5-- G N A T . R E G E X P -- 6-- -- 7-- B o d y -- 8-- -- 9-- Copyright (C) 1999-2002 Ada Core Technologies, Inc. -- 10-- -- 11-- GNAT is free software; you can redistribute it and/or modify it under -- 12-- terms of the GNU General Public License as published by the Free Soft- -- 13-- ware Foundation; either version 2, or (at your option) any later ver- -- 14-- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- 15-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- 16-- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- 17-- for more details. You should have received a copy of the GNU General -- 18-- Public License distributed with GNAT; see file COPYING. If not, write -- 19-- to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, -- 20-- MA 02111-1307, USA. -- 21-- -- 22-- As a special exception, if other files instantiate generics from this -- 23-- unit, or you link this unit with other files to produce an executable, -- 24-- this unit does not by itself cause the resulting executable to be -- 25-- covered by the GNU General Public License. This exception does not -- 26-- however invalidate any other reasons why the executable file might be -- 27-- covered by the GNU Public License. -- 28-- -- 29-- GNAT was originally developed by the GNAT team at New York University. -- 30-- Extensive contributions were provided by Ada Core Technologies Inc. -- 31-- -- 32------------------------------------------------------------------------------ 33 34with Unchecked_Deallocation; 35with Ada.Exceptions; 36with GNAT.Case_Util; 37 38package body GNAT.Regexp is 39 40 Open_Paren : constant Character := '('; 41 Close_Paren : constant Character := ')'; 42 Open_Bracket : constant Character := '['; 43 Close_Bracket : constant Character := ']'; 44 45 type State_Index is new Natural; 46 type Column_Index is new Natural; 47 48 type Regexp_Array is array 49 (State_Index range <>, Column_Index range <>) of State_Index; 50 -- First index is for the state number 51 -- Second index is for the character type 52 -- Contents is the new State 53 54 type Regexp_Array_Access is access Regexp_Array; 55 -- Use this type through the functions Set below, so that it 56 -- can grow dynamically depending on the needs. 57 58 type Mapping is array (Character'Range) of Column_Index; 59 -- Mapping between characters and column in the Regexp_Array 60 61 type Boolean_Array is array (State_Index range <>) of Boolean; 62 63 type Regexp_Value 64 (Alphabet_Size : Column_Index; 65 Num_States : State_Index) is 66 record 67 Map : Mapping; 68 States : Regexp_Array (1 .. Num_States, 0 .. Alphabet_Size); 69 Is_Final : Boolean_Array (1 .. Num_States); 70 Case_Sensitive : Boolean; 71 end record; 72 -- Deterministic finite-state machine 73 74 ----------------------- 75 -- Local Subprograms -- 76 ----------------------- 77 78 procedure Set 79 (Table : in out Regexp_Array_Access; 80 State : State_Index; 81 Column : Column_Index; 82 Value : State_Index); 83 -- Sets a value in the table. If the table is too small, reallocate it 84 -- dynamically so that (State, Column) is a valid index in it. 85 86 function Get 87 (Table : Regexp_Array_Access; 88 State : State_Index; 89 Column : Column_Index) 90 return State_Index; 91 -- Returns the value in the table at (State, Column). 92 -- If this index does not exist in the table, returns 0 93 94 procedure Free is new Unchecked_Deallocation 95 (Regexp_Array, Regexp_Array_Access); 96 97 ------------ 98 -- Adjust -- 99 ------------ 100 101 procedure Adjust (R : in out Regexp) is 102 Tmp : Regexp_Access; 103 104 begin 105 Tmp := new Regexp_Value (Alphabet_Size => R.R.Alphabet_Size, 106 Num_States => R.R.Num_States); 107 Tmp.all := R.R.all; 108 R.R := Tmp; 109 end Adjust; 110 111 ------------- 112 -- Compile -- 113 ------------- 114 115 function Compile 116 (Pattern : String; 117 Glob : Boolean := False; 118 Case_Sensitive : Boolean := True) 119 return Regexp 120 is 121 S : String := Pattern; 122 -- The pattern which is really compiled (when the pattern is case 123 -- insensitive, we convert this string to lower-cases 124 125 Map : Mapping := (others => 0); 126 -- Mapping between characters and columns in the tables 127 128 Alphabet_Size : Column_Index := 0; 129 -- Number of significant characters in the regular expression. 130 -- This total does not include special operators, such as *, (, ... 131 132 procedure Create_Mapping; 133 -- Creates a mapping between characters in the regexp and columns 134 -- in the tables representing the regexp. Test that the regexp is 135 -- well-formed Modifies Alphabet_Size and Map 136 137 procedure Create_Primary_Table 138 (Table : out Regexp_Array_Access; 139 Num_States : out State_Index; 140 Start_State : out State_Index; 141 End_State : out State_Index); 142 -- Creates the first version of the regexp (this is a non determinist 143 -- finite state machine, which is unadapted for a fast pattern 144 -- matching algorithm). We use a recursive algorithm to process the 145 -- parenthesis sub-expressions. 146 -- 147 -- Table : at the end of the procedure : Column 0 is for any character 148 -- ('.') and the last columns are for no character (closure) 149 -- Num_States is set to the number of states in the table 150 -- Start_State is the number of the starting state in the regexp 151 -- End_State is the number of the final state when the regexp matches 152 153 procedure Create_Primary_Table_Glob 154 (Table : out Regexp_Array_Access; 155 Num_States : out State_Index; 156 Start_State : out State_Index; 157 End_State : out State_Index); 158 -- Same function as above, but it deals with the second possible 159 -- grammar for 'globbing pattern', which is a kind of subset of the 160 -- whole regular expression grammar. 161 162 function Create_Secondary_Table 163 (First_Table : Regexp_Array_Access; 164 Num_States : State_Index; 165 Start_State : State_Index; 166 End_State : State_Index) 167 return Regexp; 168 -- Creates the definitive table representing the regular expression 169 -- This is actually a transformation of the primary table First_Table, 170 -- where every state is grouped with the states in its 'no-character' 171 -- columns. The transitions between the new states are then recalculated 172 -- and if necessary some new states are created. 173 -- 174 -- Note that the resulting finite-state machine is not optimized in 175 -- terms of the number of states : it would be more time-consuming to 176 -- add a third pass to reduce the number of states in the machine, with 177 -- no speed improvement... 178 179 procedure Raise_Exception 180 (M : String; 181 Index : Integer); 182 pragma No_Return (Raise_Exception); 183 -- Raise an exception, indicating an error at character Index in S. 184 185 -------------------- 186 -- Create_Mapping -- 187 -------------------- 188 189 procedure Create_Mapping is 190 191 procedure Add_In_Map (C : Character); 192 -- Add a character in the mapping, if it is not already defined 193 194 ----------------- 195 -- Add_In_Map -- 196 ----------------- 197 198 procedure Add_In_Map (C : Character) is 199 begin 200 if Map (C) = 0 then 201 Alphabet_Size := Alphabet_Size + 1; 202 Map (C) := Alphabet_Size; 203 end if; 204 end Add_In_Map; 205 206 J : Integer := S'First; 207 Parenthesis_Level : Integer := 0; 208 Curly_Level : Integer := 0; 209 210 -- Start of processing for Create_Mapping 211 212 begin 213 while J <= S'Last loop 214 case S (J) is 215 when Open_Bracket => 216 J := J + 1; 217 218 if S (J) = '^' then 219 J := J + 1; 220 end if; 221 222 if S (J) = ']' or S (J) = '-' then 223 J := J + 1; 224 end if; 225 226 -- The first character never has a special meaning 227 228 loop 229 if J > S'Last then 230 Raise_Exception 231 ("Ran out of characters while parsing ", J); 232 end if; 233 234 exit when S (J) = Close_Bracket; 235 236 if S (J) = '-' 237 and then S (J + 1) /= Close_Bracket 238 then 239 declare 240 Start : constant Integer := J - 1; 241 242 begin 243 J := J + 1; 244 245 if S (J) = '\' then 246 J := J + 1; 247 end if; 248 249 for Char in S (Start) .. S (J) loop 250 Add_In_Map (Char); 251 end loop; 252 end; 253 else 254 if S (J) = '\' then 255 J := J + 1; 256 end if; 257 258 Add_In_Map (S (J)); 259 end if; 260 261 J := J + 1; 262 end loop; 263 264 -- A close bracket must follow a open_bracket, 265 -- and cannot be found alone on the line 266 267 when Close_Bracket => 268 Raise_Exception 269 ("Incorrect character ']' in regular expression", J); 270 271 when '\' => 272 if J < S'Last then 273 J := J + 1; 274 Add_In_Map (S (J)); 275 276 else 277 -- \ not allowed at the end of the regexp 278 279 Raise_Exception 280 ("Incorrect character '\' in regular expression", J); 281 end if; 282 283 when Open_Paren => 284 if not Glob then 285 Parenthesis_Level := Parenthesis_Level + 1; 286 else 287 Add_In_Map (Open_Paren); 288 end if; 289 290 when Close_Paren => 291 if not Glob then 292 Parenthesis_Level := Parenthesis_Level - 1; 293 294 if Parenthesis_Level < 0 then 295 Raise_Exception 296 ("')' is not associated with '(' in regular " 297 & "expression", J); 298 end if; 299 300 if S (J - 1) = Open_Paren then 301 Raise_Exception 302 ("Empty parenthesis not allowed in regular " 303 & "expression", J); 304 end if; 305 306 else 307 Add_In_Map (Close_Paren); 308 end if; 309 310 when '.' => 311 if Glob then 312 Add_In_Map ('.'); 313 end if; 314 315 when '{' => 316 if not Glob then 317 Add_In_Map (S (J)); 318 else 319 Curly_Level := Curly_Level + 1; 320 end if; 321 322 when '}' => 323 if not Glob then 324 Add_In_Map (S (J)); 325 else 326 Curly_Level := Curly_Level - 1; 327 end if; 328 329 when '*' | '?' => 330 if not Glob then 331 if J = S'First then 332 Raise_Exception 333 ("'*', '+', '?' and '|' operators can not be in " 334 & "first position in regular expression", J); 335 end if; 336 end if; 337 338 when '|' | '+' => 339 if not Glob then 340 if J = S'First then 341 342 -- These operators must apply to a sub-expression, 343 -- and cannot be found at the beginning of the line 344 345 Raise_Exception 346 ("'*', '+', '?' and '|' operators can not be in " 347 & "first position in regular expression", J); 348 end if; 349 350 else 351 Add_In_Map (S (J)); 352 end if; 353 354 when others => 355 Add_In_Map (S (J)); 356 end case; 357 358 J := J + 1; 359 end loop; 360 361 -- A closing parenthesis must follow an open parenthesis 362 363 if Parenthesis_Level /= 0 then 364 Raise_Exception 365 ("'(' must always be associated with a ')'", J); 366 end if; 367 368 if Curly_Level /= 0 then 369 Raise_Exception 370 ("'{' must always be associated with a '}'", J); 371 end if; 372 end Create_Mapping; 373 374 -------------------------- 375 -- Create_Primary_Table -- 376 -------------------------- 377 378 procedure Create_Primary_Table 379 (Table : out Regexp_Array_Access; 380 Num_States : out State_Index; 381 Start_State : out State_Index; 382 End_State : out State_Index) 383 is 384 Empty_Char : constant Column_Index := Alphabet_Size + 1; 385 386 Current_State : State_Index := 0; 387 -- Index of the last created state 388 389 procedure Add_Empty_Char 390 (State : State_Index; 391 To_State : State_Index); 392 -- Add a empty-character transition from State to To_State. 393 394 procedure Create_Repetition 395 (Repetition : Character; 396 Start_Prev : State_Index; 397 End_Prev : State_Index; 398 New_Start : out State_Index; 399 New_End : in out State_Index); 400 -- Create the table in case we have a '*', '+' or '?'. 401 -- Start_Prev .. End_Prev should indicate respectively the start and 402 -- end index of the previous expression, to which '*', '+' or '?' is 403 -- applied. 404 405 procedure Create_Simple 406 (Start_Index : Integer; 407 End_Index : Integer; 408 Start_State : out State_Index; 409 End_State : out State_Index); 410 -- Fill the table for the regexp Simple. 411 -- This is the recursive procedure called to handle () expressions 412 -- If End_State = 0, then the call to Create_Simple creates an 413 -- independent regexp, not a concatenation 414 -- Start_Index .. End_Index is the starting index in the string S. 415 -- 416 -- Warning: it may look like we are creating too many empty-string 417 -- transitions, but they are needed to get the correct regexp. 418 -- The table is filled as follow ( s means start-state, e means 419 -- end-state) : 420 -- 421 -- regexp state_num | a b * empty_string 422 -- ------- --------------------------------------- 423 -- a 1 (s) | 2 - - - 424 -- 2 (e) | - - - - 425 -- 426 -- ab 1 (s) | 2 - - - 427 -- 2 | - - - 3 428 -- 3 | - 4 - - 429 -- 4 (e) | - - - - 430 -- 431 -- a|b 1 | 2 - - - 432 -- 2 | - - - 6 433 -- 3 | - 4 - - 434 -- 4 | - - - 6 435 -- 5 (s) | - - - 1,3 436 -- 6 (e) | - - - - 437 -- 438 -- a* 1 | 2 - - - 439 -- 2 | - - - 4 440 -- 3 (s) | - - - 1,4 441 -- 4 (e) | - - - 3 442 -- 443 -- (a) 1 (s) | 2 - - - 444 -- 2 (e) | - - - - 445 -- 446 -- a+ 1 | 2 - - - 447 -- 2 | - - - 4 448 -- 3 (s) | - - - 1 449 -- 4 (e) | - - - 3 450 -- 451 -- a? 1 | 2 - - - 452 -- 2 | - - - 4 453 -- 3 (s) | - - - 1,4 454 -- 4 (e) | - - - - 455 -- 456 -- . 1 (s) | 2 2 2 - 457 -- 2 (e) | - - - - 458 459 function Next_Sub_Expression 460 (Start_Index : Integer; 461 End_Index : Integer) 462 return Integer; 463 -- Returns the index of the last character of the next sub-expression 464 -- in Simple. Index can not be greater than End_Index 465 466 -------------------- 467 -- Add_Empty_Char -- 468 -------------------- 469 470 procedure Add_Empty_Char 471 (State : State_Index; 472 To_State : State_Index) 473 is 474 J : Column_Index := Empty_Char; 475 476 begin 477 while Get (Table, State, J) /= 0 loop 478 J := J + 1; 479 end loop; 480 481 Set (Table, State, J, To_State); 482 end Add_Empty_Char; 483 484 ----------------------- 485 -- Create_Repetition -- 486 ----------------------- 487 488 procedure Create_Repetition 489 (Repetition : Character; 490 Start_Prev : State_Index; 491 End_Prev : State_Index; 492 New_Start : out State_Index; 493 New_End : in out State_Index) 494 is 495 begin 496 New_Start := Current_State + 1; 497 498 if New_End /= 0 then 499 Add_Empty_Char (New_End, New_Start); 500 end if; 501 502 Current_State := Current_State + 2; 503 New_End := Current_State; 504 505 Add_Empty_Char (End_Prev, New_End); 506 Add_Empty_Char (New_Start, Start_Prev); 507 508 if Repetition /= '+' then 509 Add_Empty_Char (New_Start, New_End); 510 end if; 511 512 if Repetition /= '?' then 513 Add_Empty_Char (New_End, New_Start); 514 end if; 515 end Create_Repetition; 516 517 ------------------- 518 -- Create_Simple -- 519 ------------------- 520 521 procedure Create_Simple 522 (Start_Index : Integer; 523 End_Index : Integer; 524 Start_State : out State_Index; 525 End_State : out State_Index) 526 is 527 J : Integer := Start_Index; 528 Last_Start : State_Index := 0; 529 530 begin 531 Start_State := 0; 532 End_State := 0; 533 while J <= End_Index loop 534 case S (J) is 535 when Open_Paren => 536 declare 537 J_Start : constant Integer := J + 1; 538 Next_Start : State_Index; 539 Next_End : State_Index; 540 541 begin 542 J := Next_Sub_Expression (J, End_Index); 543 Create_Simple (J_Start, J - 1, Next_Start, Next_End); 544 545 if J < End_Index 546 and then (S (J + 1) = '*' or else 547 S (J + 1) = '+' or else 548 S (J + 1) = '?') 549 then 550 J := J + 1; 551 Create_Repetition 552 (S (J), 553 Next_Start, 554 Next_End, 555 Last_Start, 556 End_State); 557 558 else 559 Last_Start := Next_Start; 560 561 if End_State /= 0 then 562 Add_Empty_Char (End_State, Last_Start); 563 end if; 564 565 End_State := Next_End; 566 end if; 567 end; 568 569 when '|' => 570 declare 571 Start_Prev : constant State_Index := Start_State; 572 End_Prev : constant State_Index := End_State; 573 Start_J : constant Integer := J + 1; 574 Start_Next : State_Index := 0; 575 End_Next : State_Index := 0; 576 577 begin 578 J := Next_Sub_Expression (J, End_Index); 579 580 -- Create a new state for the start of the alternative 581 582 Current_State := Current_State + 1; 583 Last_Start := Current_State; 584 Start_State := Last_Start; 585 586 -- Create the tree for the second part of alternative 587 588 Create_Simple (Start_J, J, Start_Next, End_Next); 589 590 -- Create the end state 591 592 Add_Empty_Char (Last_Start, Start_Next); 593 Add_Empty_Char (Last_Start, Start_Prev); 594 Current_State := Current_State + 1; 595 End_State := Current_State; 596 Add_Empty_Char (End_Prev, End_State); 597 Add_Empty_Char (End_Next, End_State); 598 end; 599 600 when Open_Bracket => 601 Current_State := Current_State + 1; 602 603 declare 604 Next_State : State_Index := Current_State + 1; 605 606 begin 607 J := J + 1; 608 609 if S (J) = '^' then 610 J := J + 1; 611 612 Next_State := 0; 613 614 for Column in 0 .. Alphabet_Size loop 615 Set (Table, Current_State, Column, 616 Value => Current_State + 1); 617 end loop; 618 end if; 619 620 -- Automatically add the first character 621 622 if S (J) = '-' or S (J) = ']' then 623 Set (Table, Current_State, Map (S (J)), 624 Value => Next_State); 625 J := J + 1; 626 end if; 627 628 -- Loop till closing bracket found 629 630 loop 631 exit when S (J) = Close_Bracket; 632 633 if S (J) = '-' 634 and then S (J + 1) /= ']' 635 then 636 declare 637 Start : constant Integer := J - 1; 638 639 begin 640 J := J + 1; 641 642 if S (J) = '\' then 643 J := J + 1; 644 end if; 645 646 for Char in S (Start) .. S (J) loop 647 Set (Table, Current_State, Map (Char), 648 Value => Next_State); 649 end loop; 650 end; 651 652 else 653 if S (J) = '\' then 654 J := J + 1; 655 end if; 656 657 Set (Table, Current_State, Map (S (J)), 658 Value => Next_State); 659 end if; 660 J := J + 1; 661 end loop; 662 end; 663 664 Current_State := Current_State + 1; 665 666 -- If the next symbol is a special symbol 667 668 if J < End_Index 669 and then (S (J + 1) = '*' or else 670 S (J + 1) = '+' or else 671 S (J + 1) = '?') 672 then 673 J := J + 1; 674 Create_Repetition 675 (S (J), 676 Current_State - 1, 677 Current_State, 678 Last_Start, 679 End_State); 680 681 else 682 Last_Start := Current_State - 1; 683 684 if End_State /= 0 then 685 Add_Empty_Char (End_State, Last_Start); 686 end if; 687 688 End_State := Current_State; 689 end if; 690 691 when '*' | '+' | '?' | Close_Paren | Close_Bracket => 692 Raise_Exception 693 ("Incorrect character in regular expression :", J); 694 695 when others => 696 Current_State := Current_State + 1; 697 698 -- Create the state for the symbol S (J) 699 700 if S (J) = '.' then 701 for K in 0 .. Alphabet_Size loop 702 Set (Table, Current_State, K, 703 Value => Current_State + 1); 704 end loop; 705 706 else 707 if S (J) = '\' then 708 J := J + 1; 709 end if; 710 711 Set (Table, Current_State, Map (S (J)), 712 Value => Current_State + 1); 713 end if; 714 715 Current_State := Current_State + 1; 716 717 -- If the next symbol is a special symbol 718 719 if J < End_Index 720 and then (S (J + 1) = '*' or else 721 S (J + 1) = '+' or else 722 S (J + 1) = '?') 723 then 724 J := J + 1; 725 Create_Repetition 726 (S (J), 727 Current_State - 1, 728 Current_State, 729 Last_Start, 730 End_State); 731 732 else 733 Last_Start := Current_State - 1; 734 735 if End_State /= 0 then 736 Add_Empty_Char (End_State, Last_Start); 737 end if; 738 739 End_State := Current_State; 740 end if; 741 742 end case; 743 744 if Start_State = 0 then 745 Start_State := Last_Start; 746 end if; 747 748 J := J + 1; 749 end loop; 750 end Create_Simple; 751 752 ------------------------- 753 -- Next_Sub_Expression -- 754 ------------------------- 755 756 function Next_Sub_Expression 757 (Start_Index : Integer; 758 End_Index : Integer) 759 return Integer 760 is 761 J : Integer := Start_Index; 762 Start_On_Alter : Boolean := False; 763 764 begin 765 if S (J) = '|' then 766 Start_On_Alter := True; 767 end if; 768 769 loop 770 exit when J = End_Index; 771 J := J + 1; 772 773 case S (J) is 774 when '\' => 775 J := J + 1; 776 777 when Open_Bracket => 778 loop 779 J := J + 1; 780 exit when S (J) = Close_Bracket; 781 782 if S (J) = '\' then 783 J := J + 1; 784 end if; 785 end loop; 786 787 when Open_Paren => 788 J := Next_Sub_Expression (J, End_Index); 789 790 when Close_Paren => 791 return J; 792 793 when '|' => 794 if Start_On_Alter then 795 return J - 1; 796 end if; 797 798 when others => 799 null; 800 end case; 801 end loop; 802 803 return J; 804 end Next_Sub_Expression; 805 806 -- Start of Create_Primary_Table 807 808 begin 809 Table.all := (others => (others => 0)); 810 Create_Simple (S'First, S'Last, Start_State, End_State); 811 Num_States := Current_State; 812 end Create_Primary_Table; 813 814 ------------------------------- 815 -- Create_Primary_Table_Glob -- 816 ------------------------------- 817 818 procedure Create_Primary_Table_Glob 819 (Table : out Regexp_Array_Access; 820 Num_States : out State_Index; 821 Start_State : out State_Index; 822 End_State : out State_Index) 823 is 824 Empty_Char : constant Column_Index := Alphabet_Size + 1; 825 826 Current_State : State_Index := 0; 827 -- Index of the last created state 828 829 procedure Add_Empty_Char 830 (State : State_Index; 831 To_State : State_Index); 832 -- Add a empty-character transition from State to To_State. 833 834 procedure Create_Simple 835 (Start_Index : Integer; 836 End_Index : Integer; 837 Start_State : out State_Index; 838 End_State : out State_Index); 839 -- Fill the table for the S (Start_Index .. End_Index). 840 -- This is the recursive procedure called to handle () expressions 841 842 -------------------- 843 -- Add_Empty_Char -- 844 -------------------- 845 846 procedure Add_Empty_Char 847 (State : State_Index; 848 To_State : State_Index) 849 is 850 J : Column_Index := Empty_Char; 851 852 begin 853 while Get (Table, State, J) /= 0 loop 854 J := J + 1; 855 end loop; 856 857 Set (Table, State, J, 858 Value => To_State); 859 end Add_Empty_Char; 860 861 ------------------- 862 -- Create_Simple -- 863 ------------------- 864 865 procedure Create_Simple 866 (Start_Index : Integer; 867 End_Index : Integer; 868 Start_State : out State_Index; 869 End_State : out State_Index) 870 is 871 J : Integer := Start_Index; 872 Last_Start : State_Index := 0; 873 874 begin 875 Start_State := 0; 876 End_State := 0; 877 878 while J <= End_Index loop 879 case S (J) is 880 881 when Open_Bracket => 882 Current_State := Current_State + 1; 883 884 declare 885 Next_State : State_Index := Current_State + 1; 886 887 begin 888 J := J + 1; 889 890 if S (J) = '^' then 891 J := J + 1; 892 Next_State := 0; 893 894 for Column in 0 .. Alphabet_Size loop 895 Set (Table, Current_State, Column, 896 Value => Current_State + 1); 897 end loop; 898 end if; 899 900 -- Automatically add the first character 901 902 if S (J) = '-' or S (J) = ']' then 903 Set (Table, Current_State, Map (S (J)), 904 Value => Current_State); 905 J := J + 1; 906 end if; 907 908 -- Loop till closing bracket found 909 910 loop 911 exit when S (J) = Close_Bracket; 912 913 if S (J) = '-' 914 and then S (J + 1) /= ']' 915 then 916 declare 917 Start : constant Integer := J - 1; 918 begin 919 J := J + 1; 920 921 if S (J) = '\' then 922 J := J + 1; 923 end if; 924 925 for Char in S (Start) .. S (J) loop 926 Set (Table, Current_State, Map (Char), 927 Value => Next_State); 928 end loop; 929 end; 930 931 else 932 if S (J) = '\' then 933 J := J + 1; 934 end if; 935 936 Set (Table, Current_State, Map (S (J)), 937 Value => Next_State); 938 end if; 939 J := J + 1; 940 end loop; 941 end; 942 943 Last_Start := Current_State; 944 Current_State := Current_State + 1; 945 946 if End_State /= 0 then 947 Add_Empty_Char (End_State, Last_Start); 948 end if; 949 950 End_State := Current_State; 951 952 when '{' => 953 declare 954 End_Sub : Integer; 955 Start_Regexp_Sub : State_Index; 956 End_Regexp_Sub : State_Index; 957 Create_Start : State_Index := 0; 958 959 Create_End : State_Index := 0; 960 -- Initialized to avoid junk warning 961 962 begin 963 while S (J) /= '}' loop 964 965 -- First step : find sub pattern 966 967 End_Sub := J + 1; 968 while S (End_Sub) /= ',' 969 and then S (End_Sub) /= '}' 970 loop 971 End_Sub := End_Sub + 1; 972 end loop; 973 974 -- Second step : create a sub pattern 975 976 Create_Simple 977 (J + 1, 978 End_Sub - 1, 979 Start_Regexp_Sub, 980 End_Regexp_Sub); 981 982 J := End_Sub; 983 984 -- Third step : create an alternative 985 986 if Create_Start = 0 then 987 Current_State := Current_State + 1; 988 Create_Start := Current_State; 989 Add_Empty_Char (Create_Start, Start_Regexp_Sub); 990 Current_State := Current_State + 1; 991 Create_End := Current_State; 992 Add_Empty_Char (End_Regexp_Sub, Create_End); 993 994 else 995 Current_State := Current_State + 1; 996 Add_Empty_Char (Current_State, Create_Start); 997 Create_Start := Current_State; 998 Add_Empty_Char (Create_Start, Start_Regexp_Sub); 999 Add_Empty_Char (End_Regexp_Sub, Create_End); 1000 end if; 1001 end loop; 1002 1003 if End_State /= 0 then 1004 Add_Empty_Char (End_State, Create_Start); 1005 end if; 1006 1007 End_State := Create_End; 1008 Last_Start := Create_Start; 1009 end; 1010 1011 when '*' => 1012 Current_State := Current_State + 1; 1013 1014 if End_State /= 0 then 1015 Add_Empty_Char (End_State, Current_State); 1016 end if; 1017 1018 Add_Empty_Char (Current_State, Current_State + 1); 1019 Add_Empty_Char (Current_State, Current_State + 3); 1020 Last_Start := Current_State; 1021 1022 Current_State := Current_State + 1; 1023 1024 for K in 0 .. Alphabet_Size loop 1025 Set (Table, Current_State, K, 1026 Value => Current_State + 1); 1027 end loop; 1028 1029 Current_State := Current_State + 1; 1030 Add_Empty_Char (Current_State, Current_State + 1); 1031 1032 Current_State := Current_State + 1; 1033 Add_Empty_Char (Current_State, Last_Start); 1034 End_State := Current_State; 1035 1036 when others => 1037 Current_State := Current_State + 1; 1038 1039 if S (J) = '?' then 1040 for K in 0 .. Alphabet_Size loop 1041 Set (Table, Current_State, K, 1042 Value => Current_State + 1); 1043 end loop; 1044 1045 else 1046 if S (J) = '\' then 1047 J := J + 1; 1048 end if; 1049 1050 -- Create the state for the symbol S (J) 1051 1052 Set (Table, Current_State, Map (S (J)), 1053 Value => Current_State + 1); 1054 end if; 1055 1056 Last_Start := Current_State; 1057 Current_State := Current_State + 1; 1058 1059 if End_State /= 0 then 1060 Add_Empty_Char (End_State, Last_Start); 1061 end if; 1062 1063 End_State := Current_State; 1064 1065 end case; 1066 1067 if Start_State = 0 then 1068 Start_State := Last_Start; 1069 end if; 1070 1071 J := J + 1; 1072 end loop; 1073 end Create_Simple; 1074 1075 -- Start of processing for Create_Primary_Table_Glob 1076 1077 begin 1078 Table.all := (others => (others => 0)); 1079 Create_Simple (S'First, S'Last, Start_State, End_State); 1080 Num_States := Current_State; 1081 end Create_Primary_Table_Glob; 1082 1083 ---------------------------- 1084 -- Create_Secondary_Table -- 1085 ---------------------------- 1086 1087 function Create_Secondary_Table 1088 (First_Table : Regexp_Array_Access; 1089 Num_States : State_Index; 1090 Start_State : State_Index; 1091 End_State : State_Index) 1092 return Regexp 1093 is 1094 pragma Warnings (Off, Num_States); 1095 1096 Last_Index : constant State_Index := First_Table'Last (1); 1097 type Meta_State is array (1 .. Last_Index) of Boolean; 1098 1099 Table : Regexp_Array (1 .. Last_Index, 0 .. Alphabet_Size) := 1100 (others => (others => 0)); 1101 1102 Meta_States : array (1 .. Last_Index + 1) of Meta_State := 1103 (others => (others => False)); 1104 1105 Temp_State_Not_Null : Boolean; 1106 1107 Is_Final : Boolean_Array (1 .. Last_Index) := (others => False); 1108 1109 Current_State : State_Index := 1; 1110 Nb_State : State_Index := 1; 1111 1112 procedure Closure 1113 (State : in out Meta_State; 1114 Item : State_Index); 1115 -- Compute the closure of the state (that is every other state which 1116 -- has a empty-character transition) and add it to the state 1117 1118 ------------- 1119 -- Closure -- 1120 ------------- 1121 1122 procedure Closure 1123 (State : in out Meta_State; 1124 Item : State_Index) 1125 is 1126 begin 1127 if State (Item) then 1128 return; 1129 end if; 1130 1131 State (Item) := True; 1132 1133 for Column in Alphabet_Size + 1 .. First_Table'Last (2) loop 1134 if First_Table (Item, Column) = 0 then 1135 return; 1136 end if; 1137 1138 Closure (State, First_Table (Item, Column)); 1139 end loop; 1140 end Closure; 1141 1142 -- Start of procesing for Create_Secondary_Table 1143 1144 begin 1145 -- Create a new state 1146 1147 Closure (Meta_States (Current_State), Start_State); 1148 1149 while Current_State <= Nb_State loop 1150 1151 -- If this new meta-state includes the primary table end state, 1152 -- then this meta-state will be a final state in the regexp 1153 1154 if Meta_States (Current_State)(End_State) then 1155 Is_Final (Current_State) := True; 1156 end if; 1157 1158 -- For every character in the regexp, calculate the possible 1159 -- transitions from Current_State 1160 1161 for Column in 0 .. Alphabet_Size loop 1162 Meta_States (Nb_State + 1) := (others => False); 1163 Temp_State_Not_Null := False; 1164 1165 for K in Meta_States (Current_State)'Range loop 1166 if Meta_States (Current_State)(K) 1167 and then First_Table (K, Column) /= 0 1168 then 1169 Closure 1170 (Meta_States (Nb_State + 1), First_Table (K, Column)); 1171 Temp_State_Not_Null := True; 1172 end if; 1173 end loop; 1174 1175 -- If at least one transition existed 1176 1177 if Temp_State_Not_Null then 1178 1179 -- Check if this new state corresponds to an old one 1180 1181 for K in 1 .. Nb_State loop 1182 if Meta_States (K) = Meta_States (Nb_State + 1) then 1183 Table (Current_State, Column) := K; 1184 exit; 1185 end if; 1186 end loop; 1187 1188 -- If not, create a new state 1189 1190 if Table (Current_State, Column) = 0 then 1191 Nb_State := Nb_State + 1; 1192 Table (Current_State, Column) := Nb_State; 1193 end if; 1194 end if; 1195 end loop; 1196 1197 Current_State := Current_State + 1; 1198 end loop; 1199 1200 -- Returns the regexp 1201 1202 declare 1203 R : Regexp_Access; 1204 1205 begin 1206 R := new Regexp_Value (Alphabet_Size => Alphabet_Size, 1207 Num_States => Nb_State); 1208 R.Map := Map; 1209 R.Is_Final := Is_Final (1 .. Nb_State); 1210 R.Case_Sensitive := Case_Sensitive; 1211 1212 for State in 1 .. Nb_State loop 1213 for K in 0 .. Alphabet_Size loop 1214 R.States (State, K) := Table (State, K); 1215 end loop; 1216 end loop; 1217 1218 return (Ada.Finalization.Controlled with R => R); 1219 end; 1220 end Create_Secondary_Table; 1221 1222 --------------------- 1223 -- Raise_Exception -- 1224 --------------------- 1225 1226 procedure Raise_Exception 1227 (M : String; 1228 Index : Integer) 1229 is 1230 begin 1231 Ada.Exceptions.Raise_Exception 1232 (Error_In_Regexp'Identity, M & " at offset " & Index'Img); 1233 end Raise_Exception; 1234 1235 -- Start of processing for Compile 1236 1237 begin 1238 -- Special case for the empty string: it always matches, and the 1239 -- following processing would fail on it. 1240 if S = "" then 1241 return (Ada.Finalization.Controlled with 1242 R => new Regexp_Value' 1243 (Alphabet_Size => 0, 1244 Num_States => 1, 1245 Map => (others => 0), 1246 States => (others => (others => 1)), 1247 Is_Final => (others => True), 1248 Case_Sensitive => True)); 1249 end if; 1250 1251 if not Case_Sensitive then 1252 GNAT.Case_Util.To_Lower (S); 1253 end if; 1254 1255 Create_Mapping; 1256 1257 -- Creates the primary table 1258 1259 declare 1260 Table : Regexp_Array_Access; 1261 Num_States : State_Index; 1262 Start_State : State_Index; 1263 End_State : State_Index; 1264 R : Regexp; 1265 1266 begin 1267 Table := new Regexp_Array (1 .. 100, 1268 0 .. Alphabet_Size + 10); 1269 if not Glob then 1270 Create_Primary_Table (Table, Num_States, Start_State, End_State); 1271 else 1272 Create_Primary_Table_Glob 1273 (Table, Num_States, Start_State, End_State); 1274 end if; 1275 1276 -- Creates the secondary table 1277 1278 R := Create_Secondary_Table 1279 (Table, Num_States, Start_State, End_State); 1280 Free (Table); 1281 return R; 1282 end; 1283 end Compile; 1284 1285 -------------- 1286 -- Finalize -- 1287 -------------- 1288 1289 procedure Finalize (R : in out Regexp) is 1290 procedure Free is new 1291 Unchecked_Deallocation (Regexp_Value, Regexp_Access); 1292 1293 begin 1294 Free (R.R); 1295 end Finalize; 1296 1297 --------- 1298 -- Get -- 1299 --------- 1300 1301 function Get 1302 (Table : Regexp_Array_Access; 1303 State : State_Index; 1304 Column : Column_Index) 1305 return State_Index 1306 is 1307 begin 1308 if State <= Table'Last (1) 1309 and then Column <= Table'Last (2) 1310 then 1311 return Table (State, Column); 1312 else 1313 return 0; 1314 end if; 1315 end Get; 1316 1317 ----------- 1318 -- Match -- 1319 ----------- 1320 1321 function Match (S : String; R : Regexp) return Boolean is 1322 Current_State : State_Index := 1; 1323 1324 begin 1325 if R.R = null then 1326 raise Constraint_Error; 1327 end if; 1328 1329 for Char in S'Range loop 1330 1331 if R.R.Case_Sensitive then 1332 Current_State := R.R.States (Current_State, R.R.Map (S (Char))); 1333 else 1334 Current_State := 1335 R.R.States (Current_State, 1336 R.R.Map (GNAT.Case_Util.To_Lower (S (Char)))); 1337 end if; 1338 1339 if Current_State = 0 then 1340 return False; 1341 end if; 1342 1343 end loop; 1344 1345 return R.R.Is_Final (Current_State); 1346 end Match; 1347 1348 --------- 1349 -- Set -- 1350 --------- 1351 1352 procedure Set 1353 (Table : in out Regexp_Array_Access; 1354 State : State_Index; 1355 Column : Column_Index; 1356 Value : State_Index) 1357 is 1358 New_Lines : State_Index; 1359 New_Columns : Column_Index; 1360 New_Table : Regexp_Array_Access; 1361 1362 begin 1363 if State <= Table'Last (1) 1364 and then Column <= Table'Last (2) 1365 then 1366 Table (State, Column) := Value; 1367 else 1368 -- Doubles the size of the table until it is big enough that 1369 -- (State, Column) is a valid index 1370 1371 New_Lines := Table'Last (1) * (State / Table'Last (1) + 1); 1372 New_Columns := Table'Last (2) * (Column / Table'Last (2) + 1); 1373 New_Table := new Regexp_Array (Table'First (1) .. New_Lines, 1374 Table'First (2) .. New_Columns); 1375 New_Table.all := (others => (others => 0)); 1376 1377 for J in Table'Range (1) loop 1378 for K in Table'Range (2) loop 1379 New_Table (J, K) := Table (J, K); 1380 end loop; 1381 end loop; 1382 1383 Free (Table); 1384 Table := New_Table; 1385 Table (State, Column) := Value; 1386 end if; 1387 end Set; 1388 1389end GNAT.Regexp; 1390