1------------------------------------------------------------------------------
2--                                                                          --
3--                            Matreshka Project                             --
4--                                                                          --
5--         Localization, Internationalization, Globalization for Ada        --
6--                                                                          --
7--                        Runtime Library Component                         --
8--                                                                          --
9------------------------------------------------------------------------------
10--                                                                          --
11-- Copyright © 2011-2015, Vadim Godunko <vgodunko@gmail.com>                --
12-- All rights reserved.                                                     --
13--                                                                          --
14-- Redistribution and use in source and binary forms, with or without       --
15-- modification, are permitted provided that the following conditions       --
16-- are met:                                                                 --
17--                                                                          --
18--  * Redistributions of source code must retain the above copyright        --
19--    notice, this list of conditions and the following disclaimer.         --
20--                                                                          --
21--  * Redistributions in binary form must reproduce the above copyright     --
22--    notice, this list of conditions and the following disclaimer in the   --
23--    documentation and/or other materials provided with the distribution.  --
24--                                                                          --
25--  * Neither the name of the Vadim Godunko, IE nor the names of its        --
26--    contributors may be used to endorse or promote products derived from  --
27--    this software without specific prior written permission.              --
28--                                                                          --
29-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS      --
30-- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT        --
31-- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR    --
32-- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT     --
33-- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,   --
34-- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED --
35-- TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR   --
36-- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF   --
37-- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     --
38-- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS       --
39-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.             --
40--                                                                          --
41------------------------------------------------------------------------------
42--  $Revision: 5259 $ $Date: 2015-05-06 17:32:50 +0300 (Wed, 06 May 2015) $
43------------------------------------------------------------------------------
44with League.Character_Sets.Internals;
45with League.Strings.Internals;
46with Matreshka.Internals.Regexps.Compiler;
47
48package body Matreshka.Internals.Finite_Automatons is
49
50   package Compiler renames Matreshka.Internals.Regexps.Compiler;
51
52   type Position is new Natural;
53   --  Position is index of a literal element of regexp
54   --  for example: (a|b)*abb
55   --                1 2  345
56
57   --  Map each literal to corresponding character set
58   type Character_Set_Map is array (Position range <>) of
59     League.Character_Sets.Universal_Character_Set;
60
61   function To_Character_Set
62     (AST  : Matreshka.Internals.Regexps.Shared_Pattern_Access;
63      Node : Positive) return League.Character_Sets.Universal_Character_Set;
64   --  Return character set corresponding to given regexp element
65   --  Raise Constraint_Error if element is not literal.
66
67   function Count_Positions
68     (AST  : Matreshka.Internals.Regexps.Shared_Pattern_Access;
69      Root : Positive)
70     return Position;
71   --  Return count of literal elements in given regexp subexpression
72
73   function Count_Positions_In_List
74     (AST  : Matreshka.Internals.Regexps.Shared_Pattern_Access;
75      Head : Positive)
76     return Position;
77   --  Return count of literal elements in given regexp subexpression sequence
78
79   function Count_Positions_In_Array
80     (List  : Shared_Pattern_Array)
81     return Position;
82
83   function Nullable
84     (AST  : Matreshka.Internals.Regexps.Shared_Pattern_Access;
85      Root : Positive) return Boolean;
86   --  Check if given regexp subexpression can match empty string
87
88   function Nullable_List
89     (AST  : Matreshka.Internals.Regexps.Shared_Pattern_Access;
90      Head : Positive) return Boolean;
91   --  Check if given regexp subexpression sequence can match empty string
92
93   procedure Check
94     (AST  : Matreshka.Internals.Regexps.Shared_Pattern_Access;
95      Head : Positive);
96
97   -----------
98   -- Check --
99   -----------
100
101   procedure Check
102     (AST  : Matreshka.Internals.Regexps.Shared_Pattern_Access;
103      Head : Positive)
104   is
105      procedure Walk (Root  : Positive);
106
107      procedure Walk_List (Head  : Positive);
108
109      procedure Walk (Root  : Positive) is
110         Node : Matreshka.Internals.Regexps.Node renames AST.AST (Root);
111      begin
112         case Node.Kind is
113            when Matreshka.Internals.Regexps.N_None =>
114               raise Constraint_Error with "'None' unsupported";
115
116            when Matreshka.Internals.Regexps.N_Subexpression =>
117               Walk_List (Compiler.Get_Expression (AST, Root));
118
119            when Matreshka.Internals.Regexps.N_Match_Any |
120              Matreshka.Internals.Regexps.N_Match_Code |
121              Matreshka.Internals.Regexps.N_Match_Property |
122              Matreshka.Internals.Regexps.N_Character_Class |
123              Matreshka.Internals.Regexps.N_Member_Code |
124              Matreshka.Internals.Regexps.N_Member_Property |
125              Matreshka.Internals.Regexps.N_Member_Range =>
126               null;
127
128            when Matreshka.Internals.Regexps.N_Anchor =>
129               raise Constraint_Error with "'Anchor' unsupported";
130
131            when Matreshka.Internals.Regexps.N_Multiplicity =>
132               if not Node.Greedy then
133                  raise Constraint_Error with "'Lazy' unsupported";
134               elsif Node.Lower > 1 then
135                  raise Constraint_Error with
136                    "'Lower not 0 or 1' unsupported";
137               elsif not (Node.Upper = Natural'Last or
138                            (Node.Upper = 1 and Node.Lower = 0))
139               then
140                  raise Constraint_Error with
141                    "'Upper not *' unsupported";
142               end if;
143
144               Walk_List (Compiler.Get_Expression (AST, Root));
145
146            when Matreshka.Internals.Regexps.N_Alternation =>
147               Walk_List (Compiler.Get_Preferred (AST, Root));
148               Walk_List (Compiler.Get_Fallback (AST, Root));
149         end case;
150      end Walk;
151
152      procedure Walk_List (Head  : Positive) is
153         Pos    : Natural := Head;
154      begin
155         while Pos > 0 loop
156            Walk (Pos);
157            Pos := Compiler.Get_Next_Sibling (AST, Pos);
158         end loop;
159      end Walk_List;
160   begin
161      Walk_List (Head);
162   end Check;
163
164   -------------
165   -- Compile --
166   -------------
167
168   procedure Compile
169     (Self    : in out DFA_Constructor;
170      Start   : League.Strings.Universal_String;
171      List    : League.String_Vectors.Universal_String_Vector;
172      Actions : Rule_Index_Array)
173   is
174      Data : Shared_Pattern_Array (1 .. List.Length);
175   begin
176      if Data'Length = 0 then
177         return;
178      end if;
179
180      for J in Data'Range loop
181         Data (J) := Compiler.Compile
182           (League.Strings.Internals.Internal (List.Element (J)));
183      end loop;
184
185      Compile (Self, Start, Data, Actions);
186   end Compile;
187
188   -------------
189   -- Compile --
190   -------------
191
192   procedure Compile
193     (Self    : in out DFA_Constructor;
194      Start   : League.Strings.Universal_String;
195      List    : Shared_Pattern_Array;
196      Actions : Rule_Index_Array)
197   is
198      Max_Pos : constant Position := Count_Positions_In_Array (List);
199      type Position_Set is array (1 .. Max_Pos) of Boolean;
200      --  pragma Pack (Position_Set);
201
202      Empty  : constant Position_Set := (others => False);
203
204      subtype Finish_Position is Position range 1 .. List'Length;
205
206      type Position_Set_Array is array (1 .. Max_Pos) of Position_Set;
207
208      Follow  : Position_Set_Array := (others => Empty);
209      Chars   : Character_Set_Map (1 .. Max_Pos);
210
211      function Head (Index : Positive) return Positive;
212      --  Return Head for List (Index)
213
214      procedure Add_To_Follow
215        (First : Position_Set;
216         Last  : Position_Set);
217      --  Update Follow array according to First and Last position sets
218
219      procedure Walk
220        (AST   : Matreshka.Internals.Regexps.Shared_Pattern_Access;
221         Root  : Positive;
222         Pos   : in out Position;
223         First : in out Position_Set;
224         Last  : in out Position_Set);
225      --  Walk regexp subexpression and update Follow array for each literal
226
227      procedure Walk_List
228        (AST   : Matreshka.Internals.Regexps.Shared_Pattern_Access;
229         Head  : Positive;
230         Pos   : in out Position;
231         First : in out Position_Set;
232         Last  : in out Position_Set);
233      --  Walk regexp subexpressions and update Follow array for each literal
234
235      procedure Walk_Array
236        (List  : Shared_Pattern_Array;
237         First : in out Position_Set);
238      --  Walk regexp array and add fictive symbols in final positions
239
240      function Get_Follows
241        (Set  : Position_Set;
242         Map  : Character_Set_Map;
243         Char : League.Character_Sets.Universal_Character_Set)
244        return Position_Set;
245      --  Get positions set reachable from Set on input belong to Char
246
247      procedure Split_To_Distinct_Sets
248        (Set  : Position_Set;
249         Map  : Character_Set_Map;
250         List : out Vectors.Vector);
251      --  Fill character set List with non-intersected subsets of
252      --  characters in Map (Set)
253
254      procedure Make_DFA
255        (Graph : in out Matreshka.Internals.Graphs.Constructor.Graph;
256         Start : out State;
257         Edges : in out Vectors.Vector;
258         Final : in out State_Maps.Map;
259         First : Position_Set;
260         Map   : Character_Set_Map);
261
262      -------------------
263      -- Add_To_Follow --
264      -------------------
265
266      procedure Add_To_Follow
267        (First : Position_Set;
268         Last  : Position_Set) is
269      begin
270         for J in Last'Range loop
271            if Last (J) then
272               Follow (J) := Follow (J) or First;
273            end if;
274         end loop;
275      end Add_To_Follow;
276
277      -----------------
278      -- Get_Follows --
279      -----------------
280
281      function Get_Follows
282        (Set  : Position_Set;
283         Map  : Character_Set_Map;
284         Char : League.Character_Sets.Universal_Character_Set)
285        return Position_Set
286      is
287         Result : Position_Set := Empty;
288      begin
289         for J in Set'Range loop
290            if Set (J) and then Char.Is_Subset (Map (J)) then
291               Result := Result or Follow (J);
292            end if;
293         end loop;
294
295         return Result;
296      end Get_Follows;
297
298      ----------
299      -- Head --
300      ----------
301
302      function Head (Index : Positive) return Positive is
303      begin
304         return List (Index).List (List (Index).Start).Head;
305      end Head;
306
307      --------------
308      -- Make_DFA --
309      --------------
310
311      procedure Make_DFA
312        (Graph : in out Matreshka.Internals.Graphs.Constructor.Graph;
313         Start : out State;
314         Edges : in out Vectors.Vector;
315         Final : in out State_Maps.Map;
316         First : Position_Set;
317         Map   : Character_Set_Map)
318      is
319         use Matreshka.Internals.Graphs.Constructor;
320
321         function New_Node (Set : Position_Set) return Node;
322         --  Allocate new state/node, add it to Final if needed
323
324         package Maps is new Ada.Containers.Ordered_Maps (Position_Set, Node);
325
326         --------------
327         -- New_Node --
328         --------------
329
330         function New_Node (Set : Position_Set) return Node is
331            Result : constant Node := Graph.New_Node;
332            Index  : Rule_Index;
333         begin
334            if Set (Finish_Position) /= (Finish_Position => False) then
335               for J in Finish_Position loop
336                  if Set (J) then
337                     Index := Actions (Positive (J));
338                     exit;
339                  end if;
340               end loop;
341
342               Final.Insert (Result.Index, Index);
343            end if;
344
345            return Result;
346         end New_Node;
347
348         Marked     : Maps.Map;
349         Not_Marked : Maps.Map;
350      begin
351         declare
352            First_Node : constant Node := New_Node (First);
353         begin
354            Start := First_Node.Index;
355            Not_Marked.Insert (First, First_Node);
356         end;
357
358         while not Not_Marked.Is_Empty loop
359            declare
360               Source : constant Node := Not_Marked.First_Element;
361               Set    : constant Position_Set := Not_Marked.First_Key;
362               List   : Vectors.Vector;
363            begin
364               Not_Marked.Delete_First;
365               Marked.Insert (Set, Source);
366               Split_To_Distinct_Sets (Set, Map, List);
367
368               for J in List.First_Index .. List.Last_Index loop
369                  declare
370                     use type Ada.Containers.Count_Type;
371
372                     Target : Node;
373                     Cursor : Maps.Cursor;
374                     Next   : constant Position_Set :=
375                       Get_Follows (Set, Map, List.Element (J));
376                  begin
377                     if Next /= Empty then
378                        Cursor := Marked.Find (Next);
379
380                        if Maps.Has_Element (Cursor) then
381                           Target := Maps.Element (Cursor);
382                        else
383                           Cursor := Not_Marked.Find (Next);
384
385                           if Maps.Has_Element (Cursor) then
386                              Target := Maps.Element (Cursor);
387                           else
388                              Target := New_Node (Next);
389                              Not_Marked.Insert (Next, Target);
390                           end if;
391                        end if;
392
393                        --  Let's suppose edge allocation in sequent order
394                        Edges.Set_Length (Edges.Length + 1);
395
396                        Edges.Replace_Element
397                          (Index    => Source.New_Edge (Target),
398                           New_Item => List.Element (J));
399
400                     end if;
401                  end;
402               end loop;
403            end;
404         end loop;
405      end Make_DFA;
406
407      ----------------------------
408      -- Split_To_Distinct_Sets --
409      ----------------------------
410
411      procedure Split_To_Distinct_Sets
412        (Set  : Position_Set;
413         Map  : Character_Set_Map;
414         List : out Vectors.Vector) is
415      begin
416         for J in Set'Range loop
417            if Set (J) then
418               declare
419                  use League.Character_Sets;
420                  Rest : Universal_Character_Set := Map (J);
421               begin
422                  for K in List.First_Index .. List.Last_Index loop
423                     declare
424                        Item : constant Universal_Character_Set :=
425                          List.Element (K);
426                        Intersection : constant Universal_Character_Set :=
427                          Item and Rest;
428                     begin
429                        if not Intersection.Is_Empty then
430                           declare
431                              Extra : constant Universal_Character_Set :=
432                                Item - Rest;
433                           begin
434                              if not Extra.Is_Empty then
435                                 List.Append (Extra);
436                              end if;
437
438                              Rest := Rest - Item;
439                              List.Replace_Element (K, Intersection);
440                           end;
441                        end if;
442                     end;
443                  end loop;
444
445                  if not Rest.Is_Empty then
446                     List.Append (Rest);
447                  end if;
448               end;
449            end if;
450         end loop;
451      end Split_To_Distinct_Sets;
452
453      ----------
454      -- Walk --
455      ----------
456
457      procedure Walk
458        (AST   : Matreshka.Internals.Regexps.Shared_Pattern_Access;
459         Root  : Positive;
460         Pos   : in out Position;
461         First : in out Position_Set;
462         Last  : in out Position_Set)
463      is
464         Node : Matreshka.Internals.Regexps.Node renames AST.AST (Root);
465
466      begin
467         case Node.Kind is
468            when Matreshka.Internals.Regexps.N_None =>
469               raise Constraint_Error;
470
471            when Matreshka.Internals.Regexps.N_Subexpression =>
472               Walk_List
473                 (AST, Compiler.Get_Expression (AST, Root), Pos, First, Last);
474
475            when Matreshka.Internals.Regexps.N_Match_Any |
476              Matreshka.Internals.Regexps.N_Match_Code |
477              Matreshka.Internals.Regexps.N_Match_Property |
478              Matreshka.Internals.Regexps.N_Character_Class |
479              Matreshka.Internals.Regexps.N_Anchor =>
480
481               Chars (Pos) := To_Character_Set (AST, Root);
482               First (Pos) := True;
483               Last (Pos) := True;
484               Pos := Pos + 1;
485
486            when Matreshka.Internals.Regexps.N_Member_Code |
487              Matreshka.Internals.Regexps.N_Member_Property |
488              Matreshka.Internals.Regexps.N_Member_Range =>
489
490               raise Constraint_Error;
491
492            when Matreshka.Internals.Regexps.N_Multiplicity =>
493               declare
494                  Result_First : Position_Set := Empty;
495                  Result_Last  : Position_Set := Empty;
496               begin
497                  Walk_List
498                    (AST,
499                     Compiler.Get_Expression (AST, Root),
500                     Pos,
501                     Result_First,
502                     Result_Last);
503
504                  Add_To_Follow (Result_First, Result_Last);
505                  First := First or Result_First;
506                  Last := Last or Result_Last;
507               end;
508
509            when Matreshka.Internals.Regexps.N_Alternation =>
510               Walk_List
511                 (AST,
512                  Compiler.Get_Preferred (AST, Root),
513                  Pos,
514                  First,
515                  Last);
516
517               Walk_List
518                 (AST,
519                  Compiler.Get_Fallback (AST, Root),
520                  Pos,
521                  First,
522                  Last);
523         end case;
524      end Walk;
525
526      ---------------
527      -- Walk_List --
528      ---------------
529
530      procedure Walk_List
531        (AST   : Matreshka.Internals.Regexps.Shared_Pattern_Access;
532         Head  : Positive;
533         Pos   : in out Position;
534         First : in out Position_Set;
535         Last  : in out Position_Set)
536      is
537         Next : constant Natural := Compiler.Get_Next_Sibling (AST, Head);
538      begin
539         if Next = 0 then
540            Walk (AST, Head, Pos, First, Last);
541         else
542            declare
543               Result_First : Position_Set := Empty;
544               Result_Last  : Position_Set := Empty;
545            begin
546               Walk (AST, Head, Pos, First, Result_Last);
547               Walk_List (AST, Next, Pos, Result_First, Last);
548               Add_To_Follow (Result_First, Result_Last);
549
550               if Nullable (AST, Head) then
551                  First := First or Result_First;
552               end if;
553
554               if Nullable_List (AST, Next) then
555                  Last := Last or Result_Last;
556               end if;
557            end;
558         end if;
559      end Walk_List;
560
561      ----------------
562      -- Walk_Array --
563      ----------------
564
565      procedure Walk_Array
566        (List  : Shared_Pattern_Array;
567         First : in out Position_Set)
568      is
569         Pos          : Position := List'Length + 1;
570         Result_First : Position_Set;
571         Result_Last  : Position_Set;
572      begin
573         for J in List'Range loop
574            Check (List (J), Head (J));
575            Result_First := Empty;
576            Result_Last  := Empty;
577            Walk_List
578              (List (J),
579               Head (J),
580               Pos,
581               First,
582               Result_Last);
583            --  Walk (Next, Pos, Result_First, Last);
584            --  Fictive termination symbol:
585            Result_First (Finish_Position (J)) := True;
586            Add_To_Follow (Result_First, Result_Last);
587
588            if Nullable_List (List (J), Head (J)) then
589               First := First or Result_First;
590            end if;
591         end loop;
592      end Walk_Array;
593
594      First  : Position_Set := Empty;
595      Result : State;
596   begin
597      Walk_Array (List, First);
598
599      Make_DFA
600        (Self.Graph,
601         Result,
602         Self.Edge_Char_Set,
603         Self.Final,
604         First,
605         Chars);
606
607      Self.Start.Insert (Start, Result);
608   end Compile;
609
610   --------------
611   -- Complete --
612   --------------
613
614   procedure Complete
615     (Input  : in out DFA_Constructor;
616      Output : out DFA) is
617   begin
618      Output.Start := Input.Start;
619      Input.Graph.Complete (Output => Output.Graph);
620      Output.Edge_Char_Set := Input.Edge_Char_Set;
621      Output.Final := Input.Final;
622   end Complete;
623
624   ---------------------
625   -- Count_Positions --
626   ---------------------
627
628   function Count_Positions
629     (AST  : Matreshka.Internals.Regexps.Shared_Pattern_Access;
630      Root : Positive)
631     return Position
632   is
633      Node : Matreshka.Internals.Regexps.Node renames AST.AST (Root);
634   begin
635      case Node.Kind is
636         when Matreshka.Internals.Regexps.N_None =>
637            raise Constraint_Error;
638
639         when Matreshka.Internals.Regexps.N_Subexpression =>
640            return Count_Positions_In_List
641              (AST, Compiler.Get_Expression (AST, Root));
642
643         when Matreshka.Internals.Regexps.N_Match_Any |
644          Matreshka.Internals.Regexps.N_Match_Code |
645          Matreshka.Internals.Regexps.N_Match_Property |
646          Matreshka.Internals.Regexps.N_Character_Class =>
647            return 1;
648         when Matreshka.Internals.Regexps.N_Member_Code =>
649            raise Constraint_Error;
650         when Matreshka.Internals.Regexps.N_Member_Property =>
651            raise Constraint_Error;
652         when Matreshka.Internals.Regexps.N_Member_Range =>
653            raise Constraint_Error;
654         when Matreshka.Internals.Regexps.N_Multiplicity =>
655            return Count_Positions_In_List
656              (AST, Compiler.Get_Expression (AST, Root));
657
658         when Matreshka.Internals.Regexps.N_Alternation =>
659            return
660              Count_Positions_In_List (AST, Compiler.Get_Preferred (AST, Root))
661              +
662              Count_Positions_In_List (AST, Compiler.Get_Fallback (AST, Root));
663
664         when Matreshka.Internals.Regexps.N_Anchor =>
665            return 1;
666      end case;
667   end Count_Positions;
668
669   ------------------------------
670   -- Count_Positions_In_Array --
671   ------------------------------
672
673   function Count_Positions_In_Array
674     (List  : Shared_Pattern_Array)
675     return Position
676   is
677      --  Terminate each regexp with fictive symbol
678      Result : Position := Position (List'Length);
679   begin
680      for J in List'Range loop
681         Result := Result + Count_Positions_In_List
682           (List (J), List (J).List (List (J).Start).Head);
683      end loop;
684
685      return Result;
686   end Count_Positions_In_Array;
687
688   -----------------------------
689   -- Count_Positions_In_List --
690   -----------------------------
691
692   function Count_Positions_In_List
693     (AST  : Matreshka.Internals.Regexps.Shared_Pattern_Access;
694      Head : Positive)
695     return Position
696   is
697      Result : Position := 0;
698      Pos    : Natural := Head;
699   begin
700      while Pos > 0 loop
701         Result := Result + Count_Positions (AST, Pos);
702         Pos := Compiler.Get_Next_Sibling (AST, Pos);
703      end loop;
704
705      return Result;
706   end Count_Positions_In_List;
707
708   --------------
709   -- Minimize --
710   --------------
711
712   procedure Minimize (Self : in out DFA) is
713
714      package Graphs renames Matreshka.Internals.Graphs;
715
716      function Check_Equive_Class (X, Y : State) return Boolean;
717
718      type State_Pair is array (1 .. 2) of State;
719
720      use type Matreshka.Internals.Graphs.Edge_Identifier;
721
722      package State_Pair_Maps is new Ada.Containers.Ordered_Maps
723        (State_Pair, Matreshka.Internals.Graphs.Edge_Identifier);
724
725      Last         : constant State := Self.Graph.Node_Count;
726      Error_State  : constant State := Last + 1;
727
728      type Equive_Array is array (1 .. Error_State) of State;
729      Equive       : Equive_Array := (others => 1);
730      Next_Equive  : Equive_Array := (others => 1);
731
732      function Check_Equive_Class (X, Y : State) return Boolean is
733         Node_X : constant Graphs.Node := Self.Graph.Get_Node (X);
734         Node_Y : constant Graphs.Node := Self.Graph.Get_Node (Y);
735      begin
736         for I in Node_X.First_Edge_Index .. Node_X.Last_Edge_Index loop
737            declare
738               use type League.Character_Sets.Universal_Character_Set;
739
740               Edge_X : constant Graphs.Edge := Self.Graph.Get_Edge (I);
741               Jump_X : constant State := Edge_X.Target_Node.Index;
742               Sym_X  : League.Character_Sets.Universal_Character_Set :=
743                 Self.Edge_Char_Set.Element (Edge_X.Edge_Id);
744            begin
745               for J in Node_Y.First_Edge_Index .. Node_Y.Last_Edge_Index loop
746                  declare
747                     Edge_Y : constant Graphs.Edge := Self.Graph.Get_Edge (J);
748                     Sym_Y  : constant League.Character_Sets
749                       .Universal_Character_Set :=
750                         Self.Edge_Char_Set.Element (Edge_Y.Edge_Id);
751                     Jump_Y : constant State := Edge_Y.Target_Node.Index;
752                  begin
753                     if not
754                       League.Character_Sets.Is_Empty (Sym_X and Sym_Y)
755                     then
756                        if Equive (Jump_X) /= Equive (Jump_Y) then
757                           return False;
758                        else
759                           Sym_X := Sym_X - Sym_Y;
760                        end if;
761                     end if;
762                  end;
763               end loop;
764
765               if not Sym_X.Is_Empty
766                 and Equive (Jump_X) /= Equive (Error_State)
767               then
768                  return False;
769               end if;
770            end;
771         end loop;
772
773         return True;
774      end Check_Equive_Class;
775
776      Current_Equive_Class : State'Base;
777      Prev_Equive_Class    : State := 1;
778      Found                : Boolean;
779
780   begin
781      Init_Equive_Classes :
782         for J in 1 .. Last loop
783            if Self.Final.Contains (J) then
784               Equive (J) := State (Self.Final.Element (J) + 1);
785               Prev_Equive_Class := State'Max (Prev_Equive_Class, Equive (J));
786            end if;
787         end loop Init_Equive_Classes;
788
789      Try_Split_Equive_Classes :
790         loop
791            Current_Equive_Class := 0;
792
793            Set_Equive_Classes :
794               for I in 1 .. Last loop
795                  Found := False;
796
797                  Find_Existent_Class :
798                     for J in 1 .. I - 1 loop
799                        if Equive (I) = Equive (J)
800                          and then
801                          Self.Final.Contains (I) = Self.Final.Contains (J)
802                        then
803                           Found := Check_Equive_Class (I, J)
804                             and then Check_Equive_Class (J, I);
805
806                           if Found then
807                              Next_Equive (I) := Next_Equive (J);
808                              exit Find_Existent_Class;
809                           end if;
810                        end if;
811                     end loop Find_Existent_Class;
812
813                  if not Found then
814                     Current_Equive_Class := Current_Equive_Class + 1;
815                     Next_Equive (I) := Current_Equive_Class;
816                  end if;
817               end loop Set_Equive_Classes;
818
819            Current_Equive_Class := Current_Equive_Class + 1;
820            Next_Equive (Error_State) := Current_Equive_Class;
821
822            exit Try_Split_Equive_Classes
823              when Prev_Equive_Class = Current_Equive_Class;
824
825            Prev_Equive_Class := Current_Equive_Class;
826            Equive := Next_Equive;
827         end loop Try_Split_Equive_Classes;
828
829      --  Create_DFA
830
831      declare
832         procedure Each_Start (Cursor : Start_Maps.Cursor);
833
834         use Matreshka.Internals.Graphs.Constructor;
835         Result : Graph;
836         Edges  : Vectors.Vector;
837         Map    : State_Pair_Maps.Map;
838         Final  : State_Maps.Map;
839         Nodes  : array (1 .. Current_Equive_Class - 1) of Node;
840
841         ----------------
842         -- Each_Start --
843         ----------------
844
845         procedure Each_Start (Cursor : Start_Maps.Cursor) is
846            Old : constant State := Start_Maps.Element (Cursor);
847         begin
848            Self.Start.Replace_Element
849              (Cursor, Nodes (Equive (Old)).Index);
850         end Each_Start;
851
852      begin
853         for K in Nodes'Range loop
854            Nodes (K) := Result.New_Node;
855         end loop;
856
857         for I in 1 .. Last loop
858            declare
859               use type Ada.Containers.Count_Type;
860
861               procedure Append_Chars
862                 (X : in out League.Character_Sets.Universal_Character_Set);
863
864               Edge_J : Graphs.Edge;
865
866               ------------------
867               -- Append_Chars --
868               ------------------
869
870               procedure Append_Chars
871                 (X : in out League.Character_Sets.Universal_Character_Set)
872               is
873                  use type League.Character_Sets.Universal_Character_Set;
874               begin
875                  X := X or Self.Edge_Char_Set.Element (Edge_J.Edge_Id);
876               end Append_Chars;
877
878               Node_X : constant Graphs.Node := Self.Graph.Get_Node (I);
879               Edge   : Graphs.Edge_Identifier;
880               Pair   : State_Pair;
881               Cursor : State_Pair_Maps.Cursor;
882            begin
883               for J in Node_X.First_Edge_Index .. Node_X.Last_Edge_Index loop
884                  Edge_J := Self.Graph.Get_Edge (J);
885                  Pair (1) := Equive (I);
886                  Pair (2) := Equive (Edge_J.Target_Node.Index);
887                  Cursor := Map.Find (Pair);
888
889                  if State_Pair_Maps.Has_Element (Cursor) then
890                     Edges.Update_Element
891                       (State_Pair_Maps.Element (Cursor),
892                        Append_Chars'Access);
893                  else
894                     Edge := Nodes (Pair (1)).New_Edge (Nodes (Pair (2)));
895                     Map.Insert (Pair, Edge);
896                     Edges.Set_Length (Edges.Length + 1);
897
898                     Edges.Replace_Element
899                       (Edge,
900                        Self.Edge_Char_Set.Element (Edge_J.Edge_Id));
901                  end if;
902               end loop;
903
904               if Self.Final.Contains (I) then
905                  Final.Include
906                    (Nodes (Equive (I)).Index,
907                     Self.Final.Element (I));
908               end if;
909            end;
910         end loop;
911
912         Self.Start.Iterate (Each_Start'Access);
913         Self.Graph.Clear;
914         Result.Complete (Output => Self.Graph);
915         Self.Edge_Char_Set := Edges;
916         Self.Final := Final;
917      end;
918   end Minimize;
919
920   --------------
921   -- Nullable --
922   --------------
923
924   function Nullable
925     (AST  : Matreshka.Internals.Regexps.Shared_Pattern_Access;
926      Root : Positive) return Boolean
927   is
928      Node : Matreshka.Internals.Regexps.Node renames AST.AST (Root);
929
930   begin
931      case Node.Kind is
932         when Matreshka.Internals.Regexps.N_None =>
933            raise Constraint_Error;
934
935         when Matreshka.Internals.Regexps.N_Subexpression =>
936            return Nullable_List (AST, Compiler.Get_Expression (AST, Root));
937
938         when Matreshka.Internals.Regexps.N_Match_Any |
939           Matreshka.Internals.Regexps.N_Match_Code |
940           Matreshka.Internals.Regexps.N_Match_Property |
941           Matreshka.Internals.Regexps.N_Character_Class |
942           Matreshka.Internals.Regexps.N_Anchor =>
943
944            return False;
945
946         when Matreshka.Internals.Regexps.N_Member_Code |
947           Matreshka.Internals.Regexps.N_Member_Property |
948           Matreshka.Internals.Regexps.N_Member_Range =>
949
950            raise Constraint_Error;
951
952         when Matreshka.Internals.Regexps.N_Multiplicity =>
953            return Node.Lower = 0 or else
954              Nullable_List (AST, Compiler.Get_Expression (AST, Root));
955
956         when Matreshka.Internals.Regexps.N_Alternation =>
957            return Nullable_List (AST, Compiler.Get_Preferred (AST, Root))
958              or else Nullable_List (AST, Compiler.Get_Fallback (AST, Root));
959      end case;
960   end Nullable;
961
962   -------------------
963   -- Nullable_List --
964   -------------------
965
966   function Nullable_List
967     (AST  : Matreshka.Internals.Regexps.Shared_Pattern_Access;
968      Head : Positive)
969     return Boolean
970   is
971      Pos : Natural := Head;
972   begin
973      while Pos > 0 loop
974         if not Nullable (AST, Pos) then
975            return False;
976         end if;
977
978         Pos := Compiler.Get_Next_Sibling (AST, Pos);
979      end loop;
980
981      return True;
982   end Nullable_List;
983
984   ----------------------
985   -- To_Character_Set --
986   ----------------------
987
988   function To_Character_Set
989     (AST  : Matreshka.Internals.Regexps.Shared_Pattern_Access;
990      Node : Positive) return League.Character_Sets.Universal_Character_Set
991   is
992      use type League.Character_Sets.Universal_Character_Set;
993   begin
994      case AST.AST (Node).Kind is
995         when Matreshka.Internals.Regexps.N_None =>
996            return League.Character_Sets.Empty_Universal_Character_Set;
997
998         when Matreshka.Internals.Regexps.N_Match_Any =>
999            return not League.Character_Sets.Empty_Universal_Character_Set;
1000
1001         when Matreshka.Internals.Regexps.N_Member_Code |
1002           Matreshka.Internals.Regexps.N_Match_Code =>
1003            return League.Character_Sets.To_Set
1004              ((1 => Wide_Wide_Character'Val (AST.AST (Node).Code)));
1005
1006         when Matreshka.Internals.Regexps.N_Match_Property |
1007           Matreshka.Internals.Regexps.N_Member_Property =>
1008            declare
1009               Result : League.Character_Sets.Universal_Character_Set;
1010            begin
1011               case AST.AST (Node).Value.Kind is
1012                  when Matreshka.Internals.Regexps.None =>
1013                     raise Constraint_Error;
1014
1015                  when Matreshka.Internals.Regexps.General_Category =>
1016                     Result := League.Character_Sets.Internals.To_Set
1017                       (AST.AST (Node).Value.GC_Flags);
1018
1019                  when Matreshka.Internals.Regexps.Binary =>
1020                     Result := League.Character_Sets.Internals.To_Set
1021                       (AST.AST (Node).Value.Property);
1022               end case;
1023
1024               if AST.AST (Node).Negative then
1025                  return not Result;
1026               else
1027                  return Result;
1028               end if;
1029            end;
1030
1031         when Matreshka.Internals.Regexps.N_Member_Range =>
1032            return League.Character_Sets.To_Set
1033              (Low  => Wide_Wide_Character'Val (AST.AST (Node).Low),
1034               High => Wide_Wide_Character'Val (AST.AST (Node).High));
1035
1036         when Matreshka.Internals.Regexps.N_Character_Class =>
1037            declare
1038
1039               Index  : Natural :=
1040                 AST.List (AST.AST (Node).Members).Head;
1041               Result : League.Character_Sets.Universal_Character_Set;
1042            begin
1043               while Index > 0 loop
1044                  Result := Result or To_Character_Set (AST, Index);
1045                  Index := AST.AST (Index).Next;
1046               end loop;
1047
1048               if AST.AST (Node).Negated then
1049                  return not Result;
1050               else
1051                  return Result;
1052               end if;
1053            end;
1054
1055         when others =>
1056            raise Constraint_Error;
1057      end case;
1058   end To_Character_Set;
1059
1060end Matreshka.Internals.Finite_Automatons;
1061