1------------------------------------------------------------------------------ 2-- -- 3-- Matreshka Project -- 4-- -- 5-- Localization, Internationalization, Globalization for Ada -- 6-- -- 7-- Runtime Library Component -- 8-- -- 9------------------------------------------------------------------------------ 10-- -- 11-- Copyright © 2011-2015, Vadim Godunko <vgodunko@gmail.com> -- 12-- All rights reserved. -- 13-- -- 14-- Redistribution and use in source and binary forms, with or without -- 15-- modification, are permitted provided that the following conditions -- 16-- are met: -- 17-- -- 18-- * Redistributions of source code must retain the above copyright -- 19-- notice, this list of conditions and the following disclaimer. -- 20-- -- 21-- * Redistributions in binary form must reproduce the above copyright -- 22-- notice, this list of conditions and the following disclaimer in the -- 23-- documentation and/or other materials provided with the distribution. -- 24-- -- 25-- * Neither the name of the Vadim Godunko, IE nor the names of its -- 26-- contributors may be used to endorse or promote products derived from -- 27-- this software without specific prior written permission. -- 28-- -- 29-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -- 30-- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -- 31-- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -- 32-- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -- 33-- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -- 34-- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -- 35-- TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -- 36-- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -- 37-- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -- 38-- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -- 39-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -- 40-- -- 41------------------------------------------------------------------------------ 42-- $Revision: 5259 $ $Date: 2015-05-06 17:32:50 +0300 (Wed, 06 May 2015) $ 43------------------------------------------------------------------------------ 44with League.Character_Sets.Internals; 45with League.Strings.Internals; 46with Matreshka.Internals.Regexps.Compiler; 47 48package body Matreshka.Internals.Finite_Automatons is 49 50 package Compiler renames Matreshka.Internals.Regexps.Compiler; 51 52 type Position is new Natural; 53 -- Position is index of a literal element of regexp 54 -- for example: (a|b)*abb 55 -- 1 2 345 56 57 -- Map each literal to corresponding character set 58 type Character_Set_Map is array (Position range <>) of 59 League.Character_Sets.Universal_Character_Set; 60 61 function To_Character_Set 62 (AST : Matreshka.Internals.Regexps.Shared_Pattern_Access; 63 Node : Positive) return League.Character_Sets.Universal_Character_Set; 64 -- Return character set corresponding to given regexp element 65 -- Raise Constraint_Error if element is not literal. 66 67 function Count_Positions 68 (AST : Matreshka.Internals.Regexps.Shared_Pattern_Access; 69 Root : Positive) 70 return Position; 71 -- Return count of literal elements in given regexp subexpression 72 73 function Count_Positions_In_List 74 (AST : Matreshka.Internals.Regexps.Shared_Pattern_Access; 75 Head : Positive) 76 return Position; 77 -- Return count of literal elements in given regexp subexpression sequence 78 79 function Count_Positions_In_Array 80 (List : Shared_Pattern_Array) 81 return Position; 82 83 function Nullable 84 (AST : Matreshka.Internals.Regexps.Shared_Pattern_Access; 85 Root : Positive) return Boolean; 86 -- Check if given regexp subexpression can match empty string 87 88 function Nullable_List 89 (AST : Matreshka.Internals.Regexps.Shared_Pattern_Access; 90 Head : Positive) return Boolean; 91 -- Check if given regexp subexpression sequence can match empty string 92 93 procedure Check 94 (AST : Matreshka.Internals.Regexps.Shared_Pattern_Access; 95 Head : Positive); 96 97 ----------- 98 -- Check -- 99 ----------- 100 101 procedure Check 102 (AST : Matreshka.Internals.Regexps.Shared_Pattern_Access; 103 Head : Positive) 104 is 105 procedure Walk (Root : Positive); 106 107 procedure Walk_List (Head : Positive); 108 109 procedure Walk (Root : Positive) is 110 Node : Matreshka.Internals.Regexps.Node renames AST.AST (Root); 111 begin 112 case Node.Kind is 113 when Matreshka.Internals.Regexps.N_None => 114 raise Constraint_Error with "'None' unsupported"; 115 116 when Matreshka.Internals.Regexps.N_Subexpression => 117 Walk_List (Compiler.Get_Expression (AST, Root)); 118 119 when Matreshka.Internals.Regexps.N_Match_Any | 120 Matreshka.Internals.Regexps.N_Match_Code | 121 Matreshka.Internals.Regexps.N_Match_Property | 122 Matreshka.Internals.Regexps.N_Character_Class | 123 Matreshka.Internals.Regexps.N_Member_Code | 124 Matreshka.Internals.Regexps.N_Member_Property | 125 Matreshka.Internals.Regexps.N_Member_Range => 126 null; 127 128 when Matreshka.Internals.Regexps.N_Anchor => 129 raise Constraint_Error with "'Anchor' unsupported"; 130 131 when Matreshka.Internals.Regexps.N_Multiplicity => 132 if not Node.Greedy then 133 raise Constraint_Error with "'Lazy' unsupported"; 134 elsif Node.Lower > 1 then 135 raise Constraint_Error with 136 "'Lower not 0 or 1' unsupported"; 137 elsif not (Node.Upper = Natural'Last or 138 (Node.Upper = 1 and Node.Lower = 0)) 139 then 140 raise Constraint_Error with 141 "'Upper not *' unsupported"; 142 end if; 143 144 Walk_List (Compiler.Get_Expression (AST, Root)); 145 146 when Matreshka.Internals.Regexps.N_Alternation => 147 Walk_List (Compiler.Get_Preferred (AST, Root)); 148 Walk_List (Compiler.Get_Fallback (AST, Root)); 149 end case; 150 end Walk; 151 152 procedure Walk_List (Head : Positive) is 153 Pos : Natural := Head; 154 begin 155 while Pos > 0 loop 156 Walk (Pos); 157 Pos := Compiler.Get_Next_Sibling (AST, Pos); 158 end loop; 159 end Walk_List; 160 begin 161 Walk_List (Head); 162 end Check; 163 164 ------------- 165 -- Compile -- 166 ------------- 167 168 procedure Compile 169 (Self : in out DFA_Constructor; 170 Start : League.Strings.Universal_String; 171 List : League.String_Vectors.Universal_String_Vector; 172 Actions : Rule_Index_Array) 173 is 174 Data : Shared_Pattern_Array (1 .. List.Length); 175 begin 176 if Data'Length = 0 then 177 return; 178 end if; 179 180 for J in Data'Range loop 181 Data (J) := Compiler.Compile 182 (League.Strings.Internals.Internal (List.Element (J))); 183 end loop; 184 185 Compile (Self, Start, Data, Actions); 186 end Compile; 187 188 ------------- 189 -- Compile -- 190 ------------- 191 192 procedure Compile 193 (Self : in out DFA_Constructor; 194 Start : League.Strings.Universal_String; 195 List : Shared_Pattern_Array; 196 Actions : Rule_Index_Array) 197 is 198 Max_Pos : constant Position := Count_Positions_In_Array (List); 199 type Position_Set is array (1 .. Max_Pos) of Boolean; 200 -- pragma Pack (Position_Set); 201 202 Empty : constant Position_Set := (others => False); 203 204 subtype Finish_Position is Position range 1 .. List'Length; 205 206 type Position_Set_Array is array (1 .. Max_Pos) of Position_Set; 207 208 Follow : Position_Set_Array := (others => Empty); 209 Chars : Character_Set_Map (1 .. Max_Pos); 210 211 function Head (Index : Positive) return Positive; 212 -- Return Head for List (Index) 213 214 procedure Add_To_Follow 215 (First : Position_Set; 216 Last : Position_Set); 217 -- Update Follow array according to First and Last position sets 218 219 procedure Walk 220 (AST : Matreshka.Internals.Regexps.Shared_Pattern_Access; 221 Root : Positive; 222 Pos : in out Position; 223 First : in out Position_Set; 224 Last : in out Position_Set); 225 -- Walk regexp subexpression and update Follow array for each literal 226 227 procedure Walk_List 228 (AST : Matreshka.Internals.Regexps.Shared_Pattern_Access; 229 Head : Positive; 230 Pos : in out Position; 231 First : in out Position_Set; 232 Last : in out Position_Set); 233 -- Walk regexp subexpressions and update Follow array for each literal 234 235 procedure Walk_Array 236 (List : Shared_Pattern_Array; 237 First : in out Position_Set); 238 -- Walk regexp array and add fictive symbols in final positions 239 240 function Get_Follows 241 (Set : Position_Set; 242 Map : Character_Set_Map; 243 Char : League.Character_Sets.Universal_Character_Set) 244 return Position_Set; 245 -- Get positions set reachable from Set on input belong to Char 246 247 procedure Split_To_Distinct_Sets 248 (Set : Position_Set; 249 Map : Character_Set_Map; 250 List : out Vectors.Vector); 251 -- Fill character set List with non-intersected subsets of 252 -- characters in Map (Set) 253 254 procedure Make_DFA 255 (Graph : in out Matreshka.Internals.Graphs.Constructor.Graph; 256 Start : out State; 257 Edges : in out Vectors.Vector; 258 Final : in out State_Maps.Map; 259 First : Position_Set; 260 Map : Character_Set_Map); 261 262 ------------------- 263 -- Add_To_Follow -- 264 ------------------- 265 266 procedure Add_To_Follow 267 (First : Position_Set; 268 Last : Position_Set) is 269 begin 270 for J in Last'Range loop 271 if Last (J) then 272 Follow (J) := Follow (J) or First; 273 end if; 274 end loop; 275 end Add_To_Follow; 276 277 ----------------- 278 -- Get_Follows -- 279 ----------------- 280 281 function Get_Follows 282 (Set : Position_Set; 283 Map : Character_Set_Map; 284 Char : League.Character_Sets.Universal_Character_Set) 285 return Position_Set 286 is 287 Result : Position_Set := Empty; 288 begin 289 for J in Set'Range loop 290 if Set (J) and then Char.Is_Subset (Map (J)) then 291 Result := Result or Follow (J); 292 end if; 293 end loop; 294 295 return Result; 296 end Get_Follows; 297 298 ---------- 299 -- Head -- 300 ---------- 301 302 function Head (Index : Positive) return Positive is 303 begin 304 return List (Index).List (List (Index).Start).Head; 305 end Head; 306 307 -------------- 308 -- Make_DFA -- 309 -------------- 310 311 procedure Make_DFA 312 (Graph : in out Matreshka.Internals.Graphs.Constructor.Graph; 313 Start : out State; 314 Edges : in out Vectors.Vector; 315 Final : in out State_Maps.Map; 316 First : Position_Set; 317 Map : Character_Set_Map) 318 is 319 use Matreshka.Internals.Graphs.Constructor; 320 321 function New_Node (Set : Position_Set) return Node; 322 -- Allocate new state/node, add it to Final if needed 323 324 package Maps is new Ada.Containers.Ordered_Maps (Position_Set, Node); 325 326 -------------- 327 -- New_Node -- 328 -------------- 329 330 function New_Node (Set : Position_Set) return Node is 331 Result : constant Node := Graph.New_Node; 332 Index : Rule_Index; 333 begin 334 if Set (Finish_Position) /= (Finish_Position => False) then 335 for J in Finish_Position loop 336 if Set (J) then 337 Index := Actions (Positive (J)); 338 exit; 339 end if; 340 end loop; 341 342 Final.Insert (Result.Index, Index); 343 end if; 344 345 return Result; 346 end New_Node; 347 348 Marked : Maps.Map; 349 Not_Marked : Maps.Map; 350 begin 351 declare 352 First_Node : constant Node := New_Node (First); 353 begin 354 Start := First_Node.Index; 355 Not_Marked.Insert (First, First_Node); 356 end; 357 358 while not Not_Marked.Is_Empty loop 359 declare 360 Source : constant Node := Not_Marked.First_Element; 361 Set : constant Position_Set := Not_Marked.First_Key; 362 List : Vectors.Vector; 363 begin 364 Not_Marked.Delete_First; 365 Marked.Insert (Set, Source); 366 Split_To_Distinct_Sets (Set, Map, List); 367 368 for J in List.First_Index .. List.Last_Index loop 369 declare 370 use type Ada.Containers.Count_Type; 371 372 Target : Node; 373 Cursor : Maps.Cursor; 374 Next : constant Position_Set := 375 Get_Follows (Set, Map, List.Element (J)); 376 begin 377 if Next /= Empty then 378 Cursor := Marked.Find (Next); 379 380 if Maps.Has_Element (Cursor) then 381 Target := Maps.Element (Cursor); 382 else 383 Cursor := Not_Marked.Find (Next); 384 385 if Maps.Has_Element (Cursor) then 386 Target := Maps.Element (Cursor); 387 else 388 Target := New_Node (Next); 389 Not_Marked.Insert (Next, Target); 390 end if; 391 end if; 392 393 -- Let's suppose edge allocation in sequent order 394 Edges.Set_Length (Edges.Length + 1); 395 396 Edges.Replace_Element 397 (Index => Source.New_Edge (Target), 398 New_Item => List.Element (J)); 399 400 end if; 401 end; 402 end loop; 403 end; 404 end loop; 405 end Make_DFA; 406 407 ---------------------------- 408 -- Split_To_Distinct_Sets -- 409 ---------------------------- 410 411 procedure Split_To_Distinct_Sets 412 (Set : Position_Set; 413 Map : Character_Set_Map; 414 List : out Vectors.Vector) is 415 begin 416 for J in Set'Range loop 417 if Set (J) then 418 declare 419 use League.Character_Sets; 420 Rest : Universal_Character_Set := Map (J); 421 begin 422 for K in List.First_Index .. List.Last_Index loop 423 declare 424 Item : constant Universal_Character_Set := 425 List.Element (K); 426 Intersection : constant Universal_Character_Set := 427 Item and Rest; 428 begin 429 if not Intersection.Is_Empty then 430 declare 431 Extra : constant Universal_Character_Set := 432 Item - Rest; 433 begin 434 if not Extra.Is_Empty then 435 List.Append (Extra); 436 end if; 437 438 Rest := Rest - Item; 439 List.Replace_Element (K, Intersection); 440 end; 441 end if; 442 end; 443 end loop; 444 445 if not Rest.Is_Empty then 446 List.Append (Rest); 447 end if; 448 end; 449 end if; 450 end loop; 451 end Split_To_Distinct_Sets; 452 453 ---------- 454 -- Walk -- 455 ---------- 456 457 procedure Walk 458 (AST : Matreshka.Internals.Regexps.Shared_Pattern_Access; 459 Root : Positive; 460 Pos : in out Position; 461 First : in out Position_Set; 462 Last : in out Position_Set) 463 is 464 Node : Matreshka.Internals.Regexps.Node renames AST.AST (Root); 465 466 begin 467 case Node.Kind is 468 when Matreshka.Internals.Regexps.N_None => 469 raise Constraint_Error; 470 471 when Matreshka.Internals.Regexps.N_Subexpression => 472 Walk_List 473 (AST, Compiler.Get_Expression (AST, Root), Pos, First, Last); 474 475 when Matreshka.Internals.Regexps.N_Match_Any | 476 Matreshka.Internals.Regexps.N_Match_Code | 477 Matreshka.Internals.Regexps.N_Match_Property | 478 Matreshka.Internals.Regexps.N_Character_Class | 479 Matreshka.Internals.Regexps.N_Anchor => 480 481 Chars (Pos) := To_Character_Set (AST, Root); 482 First (Pos) := True; 483 Last (Pos) := True; 484 Pos := Pos + 1; 485 486 when Matreshka.Internals.Regexps.N_Member_Code | 487 Matreshka.Internals.Regexps.N_Member_Property | 488 Matreshka.Internals.Regexps.N_Member_Range => 489 490 raise Constraint_Error; 491 492 when Matreshka.Internals.Regexps.N_Multiplicity => 493 declare 494 Result_First : Position_Set := Empty; 495 Result_Last : Position_Set := Empty; 496 begin 497 Walk_List 498 (AST, 499 Compiler.Get_Expression (AST, Root), 500 Pos, 501 Result_First, 502 Result_Last); 503 504 Add_To_Follow (Result_First, Result_Last); 505 First := First or Result_First; 506 Last := Last or Result_Last; 507 end; 508 509 when Matreshka.Internals.Regexps.N_Alternation => 510 Walk_List 511 (AST, 512 Compiler.Get_Preferred (AST, Root), 513 Pos, 514 First, 515 Last); 516 517 Walk_List 518 (AST, 519 Compiler.Get_Fallback (AST, Root), 520 Pos, 521 First, 522 Last); 523 end case; 524 end Walk; 525 526 --------------- 527 -- Walk_List -- 528 --------------- 529 530 procedure Walk_List 531 (AST : Matreshka.Internals.Regexps.Shared_Pattern_Access; 532 Head : Positive; 533 Pos : in out Position; 534 First : in out Position_Set; 535 Last : in out Position_Set) 536 is 537 Next : constant Natural := Compiler.Get_Next_Sibling (AST, Head); 538 begin 539 if Next = 0 then 540 Walk (AST, Head, Pos, First, Last); 541 else 542 declare 543 Result_First : Position_Set := Empty; 544 Result_Last : Position_Set := Empty; 545 begin 546 Walk (AST, Head, Pos, First, Result_Last); 547 Walk_List (AST, Next, Pos, Result_First, Last); 548 Add_To_Follow (Result_First, Result_Last); 549 550 if Nullable (AST, Head) then 551 First := First or Result_First; 552 end if; 553 554 if Nullable_List (AST, Next) then 555 Last := Last or Result_Last; 556 end if; 557 end; 558 end if; 559 end Walk_List; 560 561 ---------------- 562 -- Walk_Array -- 563 ---------------- 564 565 procedure Walk_Array 566 (List : Shared_Pattern_Array; 567 First : in out Position_Set) 568 is 569 Pos : Position := List'Length + 1; 570 Result_First : Position_Set; 571 Result_Last : Position_Set; 572 begin 573 for J in List'Range loop 574 Check (List (J), Head (J)); 575 Result_First := Empty; 576 Result_Last := Empty; 577 Walk_List 578 (List (J), 579 Head (J), 580 Pos, 581 First, 582 Result_Last); 583 -- Walk (Next, Pos, Result_First, Last); 584 -- Fictive termination symbol: 585 Result_First (Finish_Position (J)) := True; 586 Add_To_Follow (Result_First, Result_Last); 587 588 if Nullable_List (List (J), Head (J)) then 589 First := First or Result_First; 590 end if; 591 end loop; 592 end Walk_Array; 593 594 First : Position_Set := Empty; 595 Result : State; 596 begin 597 Walk_Array (List, First); 598 599 Make_DFA 600 (Self.Graph, 601 Result, 602 Self.Edge_Char_Set, 603 Self.Final, 604 First, 605 Chars); 606 607 Self.Start.Insert (Start, Result); 608 end Compile; 609 610 -------------- 611 -- Complete -- 612 -------------- 613 614 procedure Complete 615 (Input : in out DFA_Constructor; 616 Output : out DFA) is 617 begin 618 Output.Start := Input.Start; 619 Input.Graph.Complete (Output => Output.Graph); 620 Output.Edge_Char_Set := Input.Edge_Char_Set; 621 Output.Final := Input.Final; 622 end Complete; 623 624 --------------------- 625 -- Count_Positions -- 626 --------------------- 627 628 function Count_Positions 629 (AST : Matreshka.Internals.Regexps.Shared_Pattern_Access; 630 Root : Positive) 631 return Position 632 is 633 Node : Matreshka.Internals.Regexps.Node renames AST.AST (Root); 634 begin 635 case Node.Kind is 636 when Matreshka.Internals.Regexps.N_None => 637 raise Constraint_Error; 638 639 when Matreshka.Internals.Regexps.N_Subexpression => 640 return Count_Positions_In_List 641 (AST, Compiler.Get_Expression (AST, Root)); 642 643 when Matreshka.Internals.Regexps.N_Match_Any | 644 Matreshka.Internals.Regexps.N_Match_Code | 645 Matreshka.Internals.Regexps.N_Match_Property | 646 Matreshka.Internals.Regexps.N_Character_Class => 647 return 1; 648 when Matreshka.Internals.Regexps.N_Member_Code => 649 raise Constraint_Error; 650 when Matreshka.Internals.Regexps.N_Member_Property => 651 raise Constraint_Error; 652 when Matreshka.Internals.Regexps.N_Member_Range => 653 raise Constraint_Error; 654 when Matreshka.Internals.Regexps.N_Multiplicity => 655 return Count_Positions_In_List 656 (AST, Compiler.Get_Expression (AST, Root)); 657 658 when Matreshka.Internals.Regexps.N_Alternation => 659 return 660 Count_Positions_In_List (AST, Compiler.Get_Preferred (AST, Root)) 661 + 662 Count_Positions_In_List (AST, Compiler.Get_Fallback (AST, Root)); 663 664 when Matreshka.Internals.Regexps.N_Anchor => 665 return 1; 666 end case; 667 end Count_Positions; 668 669 ------------------------------ 670 -- Count_Positions_In_Array -- 671 ------------------------------ 672 673 function Count_Positions_In_Array 674 (List : Shared_Pattern_Array) 675 return Position 676 is 677 -- Terminate each regexp with fictive symbol 678 Result : Position := Position (List'Length); 679 begin 680 for J in List'Range loop 681 Result := Result + Count_Positions_In_List 682 (List (J), List (J).List (List (J).Start).Head); 683 end loop; 684 685 return Result; 686 end Count_Positions_In_Array; 687 688 ----------------------------- 689 -- Count_Positions_In_List -- 690 ----------------------------- 691 692 function Count_Positions_In_List 693 (AST : Matreshka.Internals.Regexps.Shared_Pattern_Access; 694 Head : Positive) 695 return Position 696 is 697 Result : Position := 0; 698 Pos : Natural := Head; 699 begin 700 while Pos > 0 loop 701 Result := Result + Count_Positions (AST, Pos); 702 Pos := Compiler.Get_Next_Sibling (AST, Pos); 703 end loop; 704 705 return Result; 706 end Count_Positions_In_List; 707 708 -------------- 709 -- Minimize -- 710 -------------- 711 712 procedure Minimize (Self : in out DFA) is 713 714 package Graphs renames Matreshka.Internals.Graphs; 715 716 function Check_Equive_Class (X, Y : State) return Boolean; 717 718 type State_Pair is array (1 .. 2) of State; 719 720 use type Matreshka.Internals.Graphs.Edge_Identifier; 721 722 package State_Pair_Maps is new Ada.Containers.Ordered_Maps 723 (State_Pair, Matreshka.Internals.Graphs.Edge_Identifier); 724 725 Last : constant State := Self.Graph.Node_Count; 726 Error_State : constant State := Last + 1; 727 728 type Equive_Array is array (1 .. Error_State) of State; 729 Equive : Equive_Array := (others => 1); 730 Next_Equive : Equive_Array := (others => 1); 731 732 function Check_Equive_Class (X, Y : State) return Boolean is 733 Node_X : constant Graphs.Node := Self.Graph.Get_Node (X); 734 Node_Y : constant Graphs.Node := Self.Graph.Get_Node (Y); 735 begin 736 for I in Node_X.First_Edge_Index .. Node_X.Last_Edge_Index loop 737 declare 738 use type League.Character_Sets.Universal_Character_Set; 739 740 Edge_X : constant Graphs.Edge := Self.Graph.Get_Edge (I); 741 Jump_X : constant State := Edge_X.Target_Node.Index; 742 Sym_X : League.Character_Sets.Universal_Character_Set := 743 Self.Edge_Char_Set.Element (Edge_X.Edge_Id); 744 begin 745 for J in Node_Y.First_Edge_Index .. Node_Y.Last_Edge_Index loop 746 declare 747 Edge_Y : constant Graphs.Edge := Self.Graph.Get_Edge (J); 748 Sym_Y : constant League.Character_Sets 749 .Universal_Character_Set := 750 Self.Edge_Char_Set.Element (Edge_Y.Edge_Id); 751 Jump_Y : constant State := Edge_Y.Target_Node.Index; 752 begin 753 if not 754 League.Character_Sets.Is_Empty (Sym_X and Sym_Y) 755 then 756 if Equive (Jump_X) /= Equive (Jump_Y) then 757 return False; 758 else 759 Sym_X := Sym_X - Sym_Y; 760 end if; 761 end if; 762 end; 763 end loop; 764 765 if not Sym_X.Is_Empty 766 and Equive (Jump_X) /= Equive (Error_State) 767 then 768 return False; 769 end if; 770 end; 771 end loop; 772 773 return True; 774 end Check_Equive_Class; 775 776 Current_Equive_Class : State'Base; 777 Prev_Equive_Class : State := 1; 778 Found : Boolean; 779 780 begin 781 Init_Equive_Classes : 782 for J in 1 .. Last loop 783 if Self.Final.Contains (J) then 784 Equive (J) := State (Self.Final.Element (J) + 1); 785 Prev_Equive_Class := State'Max (Prev_Equive_Class, Equive (J)); 786 end if; 787 end loop Init_Equive_Classes; 788 789 Try_Split_Equive_Classes : 790 loop 791 Current_Equive_Class := 0; 792 793 Set_Equive_Classes : 794 for I in 1 .. Last loop 795 Found := False; 796 797 Find_Existent_Class : 798 for J in 1 .. I - 1 loop 799 if Equive (I) = Equive (J) 800 and then 801 Self.Final.Contains (I) = Self.Final.Contains (J) 802 then 803 Found := Check_Equive_Class (I, J) 804 and then Check_Equive_Class (J, I); 805 806 if Found then 807 Next_Equive (I) := Next_Equive (J); 808 exit Find_Existent_Class; 809 end if; 810 end if; 811 end loop Find_Existent_Class; 812 813 if not Found then 814 Current_Equive_Class := Current_Equive_Class + 1; 815 Next_Equive (I) := Current_Equive_Class; 816 end if; 817 end loop Set_Equive_Classes; 818 819 Current_Equive_Class := Current_Equive_Class + 1; 820 Next_Equive (Error_State) := Current_Equive_Class; 821 822 exit Try_Split_Equive_Classes 823 when Prev_Equive_Class = Current_Equive_Class; 824 825 Prev_Equive_Class := Current_Equive_Class; 826 Equive := Next_Equive; 827 end loop Try_Split_Equive_Classes; 828 829 -- Create_DFA 830 831 declare 832 procedure Each_Start (Cursor : Start_Maps.Cursor); 833 834 use Matreshka.Internals.Graphs.Constructor; 835 Result : Graph; 836 Edges : Vectors.Vector; 837 Map : State_Pair_Maps.Map; 838 Final : State_Maps.Map; 839 Nodes : array (1 .. Current_Equive_Class - 1) of Node; 840 841 ---------------- 842 -- Each_Start -- 843 ---------------- 844 845 procedure Each_Start (Cursor : Start_Maps.Cursor) is 846 Old : constant State := Start_Maps.Element (Cursor); 847 begin 848 Self.Start.Replace_Element 849 (Cursor, Nodes (Equive (Old)).Index); 850 end Each_Start; 851 852 begin 853 for K in Nodes'Range loop 854 Nodes (K) := Result.New_Node; 855 end loop; 856 857 for I in 1 .. Last loop 858 declare 859 use type Ada.Containers.Count_Type; 860 861 procedure Append_Chars 862 (X : in out League.Character_Sets.Universal_Character_Set); 863 864 Edge_J : Graphs.Edge; 865 866 ------------------ 867 -- Append_Chars -- 868 ------------------ 869 870 procedure Append_Chars 871 (X : in out League.Character_Sets.Universal_Character_Set) 872 is 873 use type League.Character_Sets.Universal_Character_Set; 874 begin 875 X := X or Self.Edge_Char_Set.Element (Edge_J.Edge_Id); 876 end Append_Chars; 877 878 Node_X : constant Graphs.Node := Self.Graph.Get_Node (I); 879 Edge : Graphs.Edge_Identifier; 880 Pair : State_Pair; 881 Cursor : State_Pair_Maps.Cursor; 882 begin 883 for J in Node_X.First_Edge_Index .. Node_X.Last_Edge_Index loop 884 Edge_J := Self.Graph.Get_Edge (J); 885 Pair (1) := Equive (I); 886 Pair (2) := Equive (Edge_J.Target_Node.Index); 887 Cursor := Map.Find (Pair); 888 889 if State_Pair_Maps.Has_Element (Cursor) then 890 Edges.Update_Element 891 (State_Pair_Maps.Element (Cursor), 892 Append_Chars'Access); 893 else 894 Edge := Nodes (Pair (1)).New_Edge (Nodes (Pair (2))); 895 Map.Insert (Pair, Edge); 896 Edges.Set_Length (Edges.Length + 1); 897 898 Edges.Replace_Element 899 (Edge, 900 Self.Edge_Char_Set.Element (Edge_J.Edge_Id)); 901 end if; 902 end loop; 903 904 if Self.Final.Contains (I) then 905 Final.Include 906 (Nodes (Equive (I)).Index, 907 Self.Final.Element (I)); 908 end if; 909 end; 910 end loop; 911 912 Self.Start.Iterate (Each_Start'Access); 913 Self.Graph.Clear; 914 Result.Complete (Output => Self.Graph); 915 Self.Edge_Char_Set := Edges; 916 Self.Final := Final; 917 end; 918 end Minimize; 919 920 -------------- 921 -- Nullable -- 922 -------------- 923 924 function Nullable 925 (AST : Matreshka.Internals.Regexps.Shared_Pattern_Access; 926 Root : Positive) return Boolean 927 is 928 Node : Matreshka.Internals.Regexps.Node renames AST.AST (Root); 929 930 begin 931 case Node.Kind is 932 when Matreshka.Internals.Regexps.N_None => 933 raise Constraint_Error; 934 935 when Matreshka.Internals.Regexps.N_Subexpression => 936 return Nullable_List (AST, Compiler.Get_Expression (AST, Root)); 937 938 when Matreshka.Internals.Regexps.N_Match_Any | 939 Matreshka.Internals.Regexps.N_Match_Code | 940 Matreshka.Internals.Regexps.N_Match_Property | 941 Matreshka.Internals.Regexps.N_Character_Class | 942 Matreshka.Internals.Regexps.N_Anchor => 943 944 return False; 945 946 when Matreshka.Internals.Regexps.N_Member_Code | 947 Matreshka.Internals.Regexps.N_Member_Property | 948 Matreshka.Internals.Regexps.N_Member_Range => 949 950 raise Constraint_Error; 951 952 when Matreshka.Internals.Regexps.N_Multiplicity => 953 return Node.Lower = 0 or else 954 Nullable_List (AST, Compiler.Get_Expression (AST, Root)); 955 956 when Matreshka.Internals.Regexps.N_Alternation => 957 return Nullable_List (AST, Compiler.Get_Preferred (AST, Root)) 958 or else Nullable_List (AST, Compiler.Get_Fallback (AST, Root)); 959 end case; 960 end Nullable; 961 962 ------------------- 963 -- Nullable_List -- 964 ------------------- 965 966 function Nullable_List 967 (AST : Matreshka.Internals.Regexps.Shared_Pattern_Access; 968 Head : Positive) 969 return Boolean 970 is 971 Pos : Natural := Head; 972 begin 973 while Pos > 0 loop 974 if not Nullable (AST, Pos) then 975 return False; 976 end if; 977 978 Pos := Compiler.Get_Next_Sibling (AST, Pos); 979 end loop; 980 981 return True; 982 end Nullable_List; 983 984 ---------------------- 985 -- To_Character_Set -- 986 ---------------------- 987 988 function To_Character_Set 989 (AST : Matreshka.Internals.Regexps.Shared_Pattern_Access; 990 Node : Positive) return League.Character_Sets.Universal_Character_Set 991 is 992 use type League.Character_Sets.Universal_Character_Set; 993 begin 994 case AST.AST (Node).Kind is 995 when Matreshka.Internals.Regexps.N_None => 996 return League.Character_Sets.Empty_Universal_Character_Set; 997 998 when Matreshka.Internals.Regexps.N_Match_Any => 999 return not League.Character_Sets.Empty_Universal_Character_Set; 1000 1001 when Matreshka.Internals.Regexps.N_Member_Code | 1002 Matreshka.Internals.Regexps.N_Match_Code => 1003 return League.Character_Sets.To_Set 1004 ((1 => Wide_Wide_Character'Val (AST.AST (Node).Code))); 1005 1006 when Matreshka.Internals.Regexps.N_Match_Property | 1007 Matreshka.Internals.Regexps.N_Member_Property => 1008 declare 1009 Result : League.Character_Sets.Universal_Character_Set; 1010 begin 1011 case AST.AST (Node).Value.Kind is 1012 when Matreshka.Internals.Regexps.None => 1013 raise Constraint_Error; 1014 1015 when Matreshka.Internals.Regexps.General_Category => 1016 Result := League.Character_Sets.Internals.To_Set 1017 (AST.AST (Node).Value.GC_Flags); 1018 1019 when Matreshka.Internals.Regexps.Binary => 1020 Result := League.Character_Sets.Internals.To_Set 1021 (AST.AST (Node).Value.Property); 1022 end case; 1023 1024 if AST.AST (Node).Negative then 1025 return not Result; 1026 else 1027 return Result; 1028 end if; 1029 end; 1030 1031 when Matreshka.Internals.Regexps.N_Member_Range => 1032 return League.Character_Sets.To_Set 1033 (Low => Wide_Wide_Character'Val (AST.AST (Node).Low), 1034 High => Wide_Wide_Character'Val (AST.AST (Node).High)); 1035 1036 when Matreshka.Internals.Regexps.N_Character_Class => 1037 declare 1038 1039 Index : Natural := 1040 AST.List (AST.AST (Node).Members).Head; 1041 Result : League.Character_Sets.Universal_Character_Set; 1042 begin 1043 while Index > 0 loop 1044 Result := Result or To_Character_Set (AST, Index); 1045 Index := AST.AST (Index).Next; 1046 end loop; 1047 1048 if AST.AST (Node).Negated then 1049 return not Result; 1050 else 1051 return Result; 1052 end if; 1053 end; 1054 1055 when others => 1056 raise Constraint_Error; 1057 end case; 1058 end To_Character_Set; 1059 1060end Matreshka.Internals.Finite_Automatons; 1061