1------------------------------------------------------------------------------
2--                                                                          --
3--                         GNAT COMPILER COMPONENTS                         --
4--                                                                          --
5--                        S Y S T E M . R E G E X P                         --
6--                                                                          --
7--                                 B o d y                                  --
8--                                                                          --
9--                     Copyright (C) 1999-2021, AdaCore                     --
10--                                                                          --
11-- GNAT is free software;  you can  redistribute it  and/or modify it under --
12-- terms of the  GNU General Public License as published  by the Free Soft- --
13-- ware  Foundation;  either version 3,  or (at your option) any later ver- --
14-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
15-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
16-- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
17--                                                                          --
18-- As a special exception under Section 7 of GPL version 3, you are granted --
19-- additional permissions described in the GCC Runtime Library Exception,   --
20-- version 3.1, as published by the Free Software Foundation.               --
21--                                                                          --
22-- You should have received a copy of the GNU General Public License and    --
23-- a copy of the GCC Runtime Library Exception along with this program;     --
24-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
25-- <http://www.gnu.org/licenses/>.                                          --
26--                                                                          --
27-- GNAT was originally developed  by the GNAT team at  New York University. --
28-- Extensive contributions were provided by Ada Core Technologies Inc.      --
29--                                                                          --
30------------------------------------------------------------------------------
31
32with Ada.Unchecked_Deallocation;
33with System.Case_Util;
34
35package body System.Regexp is
36
37   Initial_Max_States_In_Primary_Table : constant := 100;
38   --  Initial size for the number of states in the indefinite state
39   --  machine. The number of states will be increased as needed.
40   --
41   --  This is also used as the maximal number of meta states (groups of
42   --  states) in the secondary table.
43
44   Open_Paren    : constant Character := '(';
45   Close_Paren   : constant Character := ')';
46   Open_Bracket  : constant Character := '[';
47   Close_Bracket : constant Character := ']';
48
49   type State_Index is new Natural;
50   type Column_Index is new Natural;
51
52   type Regexp_Array is array
53     (State_Index range <>, Column_Index range <>) of State_Index;
54   --  First index is for the state number. Second index is for the character
55   --  type. Contents is the new State.
56
57   type Regexp_Array_Access is access Regexp_Array;
58   --  Use this type through the functions Set below, so that it can grow
59   --  dynamically depending on the needs.
60
61   type Mapping is array (Character'Range) of Column_Index;
62   --  Mapping between characters and column in the Regexp_Array
63
64   type Boolean_Array is array (State_Index range <>) of Boolean;
65
66   type Regexp_Value
67     (Alphabet_Size : Column_Index;
68      Num_States    : State_Index) is
69   record
70      Map            : Mapping;
71      Case_Sensitive : Boolean;
72      States         : Regexp_Array (1 .. Num_States, 0 .. Alphabet_Size);
73      Is_Final       : Boolean_Array (1 .. Num_States);
74   end record;
75   --  Deterministic finite-state machine
76
77   -----------------------
78   -- Local Subprograms --
79   -----------------------
80
81   procedure Set
82     (Table  : in out Regexp_Array_Access;
83      State  : State_Index;
84      Column : Column_Index;
85      Value  : State_Index);
86   --  Sets a value in the table. If the table is too small, reallocate it
87   --  dynamically so that (State, Column) is a valid index in it.
88
89   function Get
90     (Table  : Regexp_Array_Access;
91      State  : State_Index;
92      Column : Column_Index) return State_Index;
93   --  Returns the value in the table at (State, Column). If this index does
94   --  not exist in the table, returns zero.
95
96   procedure Free is new Ada.Unchecked_Deallocation
97     (Regexp_Array, Regexp_Array_Access);
98
99   ------------
100   -- Adjust --
101   ------------
102
103   procedure Adjust (R : in out Regexp) is
104      Tmp : Regexp_Access;
105   begin
106      if R.R /= null then
107         Tmp := new Regexp_Value (Alphabet_Size => R.R.Alphabet_Size,
108                                  Num_States    => R.R.Num_States);
109         Tmp.all := R.R.all;
110         R.R := Tmp;
111      end if;
112   end Adjust;
113
114   -------------
115   -- Compile --
116   -------------
117
118   function Compile
119     (Pattern        : String;
120      Glob           : Boolean := False;
121      Case_Sensitive : Boolean := True) return Regexp
122   is
123      S : String := Pattern;
124      --  The pattern which is really compiled (when the pattern is case
125      --  insensitive, we convert this string to lower-cases).
126
127      Map : Mapping := [others => 0];
128      --  Mapping between characters and columns in the tables
129
130      Alphabet_Size : Column_Index := 0;
131      --  Number of significant characters in the regular expression.
132      --  This total does not include special operators, such as *, (, ...
133
134      procedure Check_Well_Formed_Pattern;
135      --  Check that the pattern to compile is well-formed, so that subsequent
136      --  code can rely on this without performing each time the checks to
137      --  avoid accessing the pattern outside its bounds. However, not all
138      --  well-formedness rules are checked. In particular, rules about special
139      --  characters not being treated as regular characters are not checked.
140
141      procedure Create_Mapping;
142      --  Creates a mapping between characters in the regexp and columns
143      --  in the tables representing the regexp. Test that the regexp is
144      --  well-formed Modifies Alphabet_Size and Map
145
146      procedure Create_Primary_Table
147        (Table       : out Regexp_Array_Access;
148         Num_States  : out State_Index;
149         Start_State : out State_Index;
150         End_State   : out State_Index);
151      --  Creates the first version of the regexp (this is a non deterministic
152      --  finite state machine, which is unadapted for a fast pattern
153      --  matching algorithm). We use a recursive algorithm to process the
154      --  parenthesis sub-expressions.
155      --
156      --  Table : at the end of the procedure : Column 0 is for any character
157      --  ('.') and the last columns are for no character (closure). Num_States
158      --  is set to the number of states in the table Start_State is the number
159      --  of the starting state in the regexp End_State is the number of the
160      --  final state when the regexp matches.
161
162      procedure Create_Primary_Table_Glob
163        (Table       : out Regexp_Array_Access;
164         Num_States  : out State_Index;
165         Start_State : out State_Index;
166         End_State   : out State_Index);
167      --  Same function as above, but it deals with the second possible
168      --  grammar for 'globbing pattern', which is a kind of subset of the
169      --  whole regular expression grammar.
170
171      function Create_Secondary_Table
172        (First_Table : Regexp_Array_Access;
173         Start_State : State_Index;
174         End_State   : State_Index) return Regexp;
175      --  Creates the definitive table representing the regular expression
176      --  This is actually a transformation of the primary table First_Table,
177      --  where every state is grouped with the states in its 'no-character'
178      --  columns. The transitions between the new states are then recalculated
179      --  and if necessary some new states are created.
180      --
181      --  Note that the resulting finite-state machine is not optimized in
182      --  terms of the number of states : it would be more time-consuming to
183      --  add a third pass to reduce the number of states in the machine, with
184      --  no speed improvement...
185
186      procedure Raise_Exception (M : String; Index : Integer);
187      pragma No_Return (Raise_Exception);
188      --  Raise an exception, indicating an error at character Index in S
189
190      -------------------------------
191      -- Check_Well_Formed_Pattern --
192      -------------------------------
193
194      procedure Check_Well_Formed_Pattern is
195         J : Integer;
196
197         Past_Elmt : Boolean := False;
198         --  Set to True everywhere an elmt has been parsed, if Glob=False,
199         --  meaning there can be now an occurrence of '*', '+' and '?'.
200
201         Past_Term : Boolean := False;
202         --  Set to True everywhere a term has been parsed, if Glob=False,
203         --  meaning there can be now an occurrence of '|'.
204
205         Parenthesis_Level : Integer := 0;
206         Curly_Level       : Integer := 0;
207
208         Last_Open : Integer := S'First - 1;
209         --  The last occurrence of an opening parenthesis, if Glob=False,
210         --  or the last occurrence of an opening curly brace, if Glob=True.
211
212         procedure Find_Close_Bracket;
213         --  Go through the pattern to find a closing bracket. Raise an
214         --  exception if none is found.
215
216         procedure Raise_Exception_If_No_More_Chars (K : Integer := 0);
217         --  If J + K > S'Last then call Raise_Exception
218
219         ------------------------
220         -- Find_Close_Bracket --
221         ------------------------
222
223         procedure Find_Close_Bracket is
224            Possible_Range_Start : Boolean := True;
225            --  Set True everywhere a range character '-' can occur
226
227         begin
228            loop
229               exit when S (J) = Close_Bracket;
230
231               Raise_Exception_If_No_More_Chars (1);
232               --  The current character is not a close_bracket, thus it should
233               --  be followed by at least one more char. If not, no close
234               --  bracket is present and the pattern is ill-formed.
235
236               if S (J) = '-' and then S (J + 1) /= Close_Bracket then
237                  if not Possible_Range_Start then
238                     Raise_Exception
239                        ("No mix of ranges is allowed in "
240                        & "regular expression", J);
241                  end if;
242
243                  J := J + 1;
244                  Raise_Exception_If_No_More_Chars (1);
245
246                  Possible_Range_Start := False;
247                  --  Range cannot be followed by '-' character,
248                  --  except as last character in the set.
249
250               else
251                  Possible_Range_Start := True;
252               end if;
253
254               if S (J) = '\' then
255                  J := J + 1;
256                  Raise_Exception_If_No_More_Chars (1);
257                  --  We ignore the next character and need to check we have
258                  --  one more available character. This is necessary for
259                  --  the erroneous [\] pattern which stands for [\]] or [\\].
260               end if;
261
262               J := J + 1;
263            end loop;
264         end Find_Close_Bracket;
265
266         --------------------------------------
267         -- Raise_Exception_If_No_More_Chars --
268         --------------------------------------
269
270         procedure Raise_Exception_If_No_More_Chars (K : Integer := 0) is
271         begin
272            if J + K > S'Last then
273               Raise_Exception ("Ill-formed pattern while parsing", J);
274            end if;
275         end Raise_Exception_If_No_More_Chars;
276
277      --  Start of processing for Check_Well_Formed_Pattern
278
279      begin
280         J := S'First;
281         while J <= S'Last loop
282            case S (J) is
283               when Open_Bracket =>
284                  J := J + 1;
285                  Raise_Exception_If_No_More_Chars;
286
287                  if not Glob then
288                     if S (J) = '^' then
289                        J := J + 1;
290                        Raise_Exception_If_No_More_Chars;
291                     end if;
292                  end if;
293
294                  --  Characters ']' and '-' are meant as literals when first
295                  --  in the list.  As such, they have no special meaning and
296                  --  we pass them.
297                  if S (J) = ']' or else S (J) = '-' then
298                     J := J + 1;
299                     Raise_Exception_If_No_More_Chars;
300                  end if;
301
302                  if S (J) = ']' then
303                     --  ??? This message is misleading since the check forbids
304                     --  the sets []] and [-] but not the empty set [].
305                     Raise_Exception
306                       ("Set of characters cannot be empty in regular "
307                          & "expression", J);
308                  end if;
309
310                  Find_Close_Bracket;
311
312                  --  A closing bracket can end an elmt or term
313
314                  Past_Elmt := True;
315                  Past_Term := True;
316
317               when Close_Bracket =>
318
319                  --  A close bracket must follow a open_bracket, and cannot be
320                  --  found alone on the line.
321
322                  Raise_Exception
323                    ("Incorrect character ']' in regular expression", J);
324
325               when '\' =>
326                  if J < S'Last then
327                     J := J + 1;
328
329                     --  Any character can be an elmt or a term
330
331                     Past_Elmt := True;
332                     Past_Term := True;
333
334                  else
335                     --  \ not allowed at the end of the regexp
336
337                     Raise_Exception
338                       ("Incorrect character '\' in regular expression", J);
339                  end if;
340
341               when Open_Paren =>
342                  if not Glob then
343                     Parenthesis_Level := Parenthesis_Level + 1;
344                     Last_Open := J;
345
346                     --  An open parenthesis does not end an elmt or term
347
348                     Past_Elmt := False;
349                     Past_Term := False;
350                  end if;
351
352               when Close_Paren =>
353                  if not Glob then
354                     Parenthesis_Level := Parenthesis_Level - 1;
355
356                     if Parenthesis_Level < 0 then
357                        Raise_Exception
358                          ("')' is not associated with '(' in regular "
359                           & "expression", J);
360                     end if;
361
362                     if J = Last_Open + 1 then
363                        Raise_Exception
364                          ("Empty parentheses not allowed in regular "
365                           & "expression", J);
366                     end if;
367
368                     if not Past_Term then
369                        Raise_Exception
370                          ("Closing parenthesis not allowed here in regular "
371                             & "expression", J);
372                     end if;
373
374                     --  A closing parenthesis can end an elmt or term
375
376                     Past_Elmt := True;
377                     Past_Term := True;
378                  end if;
379
380               when '{' =>
381                  if Glob then
382                     Curly_Level := Curly_Level + 1;
383                     Last_Open := J;
384
385                  else
386                     --  Any character can be an elmt or a term
387
388                     Past_Elmt := True;
389                     Past_Term := True;
390                  end if;
391
392                  --  No need to check for ',' as the code always accepts them
393
394               when '}' =>
395                  if Glob then
396                     Curly_Level := Curly_Level - 1;
397
398                     if Curly_Level < 0 then
399                        Raise_Exception
400                          ("'}' is not associated with '{' in regular "
401                           & "expression", J);
402                     end if;
403
404                     if J = Last_Open + 1 then
405                        Raise_Exception
406                          ("Empty curly braces not allowed in regular "
407                           & "expression", J);
408                     end if;
409
410                  else
411                     --  Any character can be an elmt or a term
412
413                     Past_Elmt := True;
414                     Past_Term := True;
415                  end if;
416
417               when '*' | '?' | '+' =>
418                  if not Glob then
419
420                     --  These operators must apply to an elmt sub-expression,
421                     --  and cannot be found if one has not just been parsed.
422
423                     if not Past_Elmt then
424                        Raise_Exception
425                          ("'*', '+' and '?' operators must be "
426                           & "applied to an element in regular expression", J);
427                     end if;
428
429                     Past_Elmt := False;
430                     Past_Term := True;
431                  end if;
432
433               when '|' =>
434                  if not Glob then
435
436                     --  This operator must apply to a term sub-expression,
437                     --  and cannot be found if one has not just been parsed.
438
439                     if not Past_Term then
440                        Raise_Exception
441                          ("'|' operator must be "
442                           & "applied to a term in regular expression", J);
443                     end if;
444
445                     --  A second term must follow
446                     Raise_Exception_If_No_More_Chars (K => 1);
447
448                     Past_Elmt := False;
449                     Past_Term := False;
450                  end if;
451
452               when others =>
453                  if not Glob then
454
455                     --  Any character can be an elmt or a term
456
457                     Past_Elmt := True;
458                     Past_Term := True;
459                  end if;
460            end case;
461
462            J := J + 1;
463         end loop;
464
465         --  A closing parenthesis must follow an open parenthesis
466
467         if Parenthesis_Level /= 0 then
468            Raise_Exception
469              ("'(' must always be associated with a ')'", J);
470         end if;
471
472         --  A closing curly brace must follow an open curly brace
473
474         if Curly_Level /= 0 then
475            Raise_Exception
476              ("'{' must always be associated with a '}'", J);
477         end if;
478      end Check_Well_Formed_Pattern;
479
480      --------------------
481      -- Create_Mapping --
482      --------------------
483
484      procedure Create_Mapping is
485
486         procedure Add_In_Map (C : Character);
487         --  Add a character in the mapping, if it is not already defined
488
489         ----------------
490         -- Add_In_Map --
491         ----------------
492
493         procedure Add_In_Map (C : Character) is
494         begin
495            if Map (C) = 0 then
496               Alphabet_Size := Alphabet_Size + 1;
497               Map (C) := Alphabet_Size;
498            end if;
499         end Add_In_Map;
500
501         J                 : Integer := S'First;
502         Parenthesis_Level : Integer := 0;
503         Curly_Level       : Integer := 0;
504         Last_Open         : Integer := S'First - 1;
505
506      --  Start of processing for Create_Mapping
507
508      begin
509         while J <= S'Last loop
510            case S (J) is
511               when Open_Bracket =>
512                  J := J + 1;
513
514                  if S (J) = '^' then
515                     J := J + 1;
516                  end if;
517
518                  if S (J) = ']' or else S (J) = '-' then
519                     J := J + 1;
520                  end if;
521
522                  --  The first character never has a special meaning
523
524                  loop
525                     if J > S'Last then
526                        Raise_Exception
527                          ("Ran out of characters while parsing ", J);
528                     end if;
529
530                     exit when S (J) = Close_Bracket;
531
532                     if S (J) = '-'
533                       and then S (J + 1) /= Close_Bracket
534                     then
535                        declare
536                           Start : constant Integer := J - 1;
537
538                        begin
539                           J := J + 1;
540
541                           if S (J) = '\' then
542                              J := J + 1;
543                           end if;
544
545                           for Char in S (Start) .. S (J) loop
546                              Add_In_Map (Char);
547                           end loop;
548                        end;
549                     else
550                        if S (J) = '\' then
551                           J := J + 1;
552                        end if;
553
554                        Add_In_Map (S (J));
555                     end if;
556
557                     J := J + 1;
558                  end loop;
559
560                  --  A close bracket must follow a open_bracket and cannot be
561                  --  found alone on the line
562
563               when Close_Bracket =>
564                  Raise_Exception
565                    ("Incorrect character ']' in regular expression", J);
566
567               when '\' =>
568                  if J < S'Last then
569                     J := J + 1;
570                     Add_In_Map (S (J));
571
572                  else
573                     --  Back slash \ not allowed at the end of the regexp
574
575                     Raise_Exception
576                       ("Incorrect character '\' in regular expression", J);
577                  end if;
578
579               when Open_Paren =>
580                  if not Glob then
581                     Parenthesis_Level := Parenthesis_Level + 1;
582                     Last_Open := J;
583                  else
584                     Add_In_Map (Open_Paren);
585                  end if;
586
587               when Close_Paren =>
588                  if not Glob then
589                     Parenthesis_Level := Parenthesis_Level - 1;
590
591                     if Parenthesis_Level < 0 then
592                        Raise_Exception
593                          ("')' is not associated with '(' in regular "
594                           & "expression", J);
595                     end if;
596
597                     if J = Last_Open + 1 then
598                        Raise_Exception
599                          ("Empty parenthesis not allowed in regular "
600                           & "expression", J);
601                     end if;
602
603                  else
604                     Add_In_Map (Close_Paren);
605                  end if;
606
607               when '.' =>
608                  if Glob then
609                     Add_In_Map ('.');
610                  end if;
611
612               when '{' =>
613                  if not Glob then
614                     Add_In_Map (S (J));
615                  else
616                     Curly_Level := Curly_Level + 1;
617                  end if;
618
619               when '}' =>
620                  if not Glob then
621                     Add_In_Map (S (J));
622                  else
623                     Curly_Level := Curly_Level - 1;
624                  end if;
625
626               when '*' | '?' =>
627                  if not Glob then
628                     if J = S'First then
629                        Raise_Exception
630                          ("'*', '+', '?' and '|' operators cannot be in "
631                           & "first position in regular expression", J);
632                     end if;
633                  end if;
634
635               when '|' | '+' =>
636                  if not Glob then
637                     if J = S'First then
638
639                        --  These operators must apply to a sub-expression,
640                        --  and cannot be found at the beginning of the line
641
642                        Raise_Exception
643                          ("'*', '+', '?' and '|' operators cannot be in "
644                           & "first position in regular expression", J);
645                     end if;
646
647                  else
648                     Add_In_Map (S (J));
649                  end if;
650
651               when others =>
652                  Add_In_Map (S (J));
653            end case;
654
655            J := J + 1;
656         end loop;
657
658         --  A closing parenthesis must follow an open parenthesis
659
660         if Parenthesis_Level /= 0 then
661            Raise_Exception
662              ("'(' must always be associated with a ')'", J);
663         end if;
664
665         if Curly_Level /= 0 then
666            Raise_Exception
667              ("'{' must always be associated with a '}'", J);
668         end if;
669      end Create_Mapping;
670
671      --------------------------
672      -- Create_Primary_Table --
673      --------------------------
674
675      procedure Create_Primary_Table
676        (Table       : out Regexp_Array_Access;
677         Num_States  : out State_Index;
678         Start_State : out State_Index;
679         End_State   : out State_Index)
680      is
681         Empty_Char : constant Column_Index := Alphabet_Size + 1;
682
683         Current_State : State_Index := 0;
684         --  Index of the last created state
685
686         procedure Add_Empty_Char
687           (State    : State_Index;
688            To_State : State_Index);
689         --  Add a empty-character transition from State to To_State
690
691         procedure Create_Repetition
692           (Repetition : Character;
693            Start_Prev : State_Index;
694            End_Prev   : State_Index;
695            New_Start  : out State_Index;
696            New_End    : in out State_Index);
697         --  Create the table in case we have a '*', '+' or '?'.
698         --  Start_Prev .. End_Prev should indicate respectively the start and
699         --  end index of the previous expression, to which '*', '+' or '?' is
700         --  applied.
701
702         procedure Create_Simple
703           (Start_Index : Integer;
704            End_Index   : Integer;
705            Start_State : out State_Index;
706            End_State   : out State_Index);
707         --  Fill the table for the regexp Simple. This is the recursive
708         --  procedure called to handle () expressions If End_State = 0, then
709         --  the call to Create_Simple creates an independent regexp, not a
710         --  concatenation Start_Index .. End_Index is the starting index in
711         --  the string S.
712         --
713         --  Warning: it may look like we are creating too many empty-string
714         --  transitions, but they are needed to get the correct regexp.
715         --  The table is filled as follow ( s means start-state, e means
716         --  end-state) :
717         --
718         --  regexp   state_num | a b * empty_string
719         --  -------  ------------------------------
720         --    a          1 (s) | 2 - - -
721         --               2 (e) | - - - -
722         --
723         --    ab         1 (s) | 2 - - -
724         --               2     | - - - 3
725         --               3     | - 4 - -
726         --               4 (e) | - - - -
727         --
728         --    a|b        1     | 2 - - -
729         --               2     | - - - 6
730         --               3     | - 4 - -
731         --               4     | - - - 6
732         --               5 (s) | - - - 1,3
733         --               6 (e) | - - - -
734         --
735         --    a*         1     | 2 - - -
736         --               2     | - - - 4
737         --               3 (s) | - - - 1,4
738         --               4 (e) | - - - 3
739         --
740         --    (a)        1 (s) | 2 - - -
741         --               2 (e) | - - - -
742         --
743         --    a+         1     | 2 - - -
744         --               2     | - - - 4
745         --               3 (s) | - - - 1
746         --               4 (e) | - - - 3
747         --
748         --    a?         1     | 2 - - -
749         --               2     | - - - 4
750         --               3 (s) | - - - 1,4
751         --               4 (e) | - - - -
752         --
753         --    .          1 (s) | 2 2 2 -
754         --               2 (e) | - - - -
755
756         function Next_Sub_Expression
757           (Start_Index : Integer;
758            End_Index   : Integer) return Integer;
759         --  Returns the index of the last character of the next sub-expression
760         --  in Simple. Index cannot be greater than End_Index.
761
762         --------------------
763         -- Add_Empty_Char --
764         --------------------
765
766         procedure Add_Empty_Char
767           (State    : State_Index;
768            To_State : State_Index)
769         is
770            J : Column_Index := Empty_Char;
771
772         begin
773            while Get (Table, State, J) /= 0 loop
774               J := J + 1;
775            end loop;
776
777            Set (Table, State, J, To_State);
778         end Add_Empty_Char;
779
780         -----------------------
781         -- Create_Repetition --
782         -----------------------
783
784         procedure Create_Repetition
785           (Repetition : Character;
786            Start_Prev : State_Index;
787            End_Prev   : State_Index;
788            New_Start  : out State_Index;
789            New_End    : in out State_Index)
790         is
791         begin
792            New_Start := Current_State + 1;
793
794            if New_End /= 0 then
795               Add_Empty_Char (New_End, New_Start);
796            end if;
797
798            Current_State := Current_State + 2;
799            New_End   := Current_State;
800
801            Add_Empty_Char (End_Prev, New_End);
802            Add_Empty_Char (New_Start, Start_Prev);
803
804            if Repetition /= '+' then
805               Add_Empty_Char (New_Start, New_End);
806            end if;
807
808            if Repetition /= '?' then
809               Add_Empty_Char (New_End, New_Start);
810            end if;
811         end Create_Repetition;
812
813         -------------------
814         -- Create_Simple --
815         -------------------
816
817         procedure Create_Simple
818           (Start_Index : Integer;
819            End_Index   : Integer;
820            Start_State : out State_Index;
821            End_State   : out State_Index)
822         is
823            J          : Integer := Start_Index;
824            Last_Start : State_Index := 0;
825
826         begin
827            Start_State := 0;
828            End_State   := 0;
829            while J <= End_Index loop
830               case S (J) is
831                  when Open_Paren =>
832                     declare
833                        J_Start    : constant Integer := J + 1;
834                        Next_Start : State_Index;
835                        Next_End   : State_Index;
836
837                     begin
838                        J := Next_Sub_Expression (J, End_Index);
839                        Create_Simple (J_Start, J - 1, Next_Start, Next_End);
840
841                        if J < End_Index
842                          and then (S (J + 1) = '*' or else
843                                    S (J + 1) = '+' or else
844                                    S (J + 1) = '?')
845                        then
846                           J := J + 1;
847                           Create_Repetition
848                             (S (J),
849                              Next_Start,
850                              Next_End,
851                              Last_Start,
852                              End_State);
853
854                        else
855                           Last_Start := Next_Start;
856
857                           if End_State /= 0 then
858                              Add_Empty_Char (End_State, Last_Start);
859                           end if;
860
861                           End_State := Next_End;
862                        end if;
863                     end;
864
865                  when '|' =>
866                     declare
867                        Start_Prev : constant State_Index := Start_State;
868                        End_Prev   : constant State_Index := End_State;
869                        Start_J    : constant Integer     := J + 1;
870                        Start_Next : State_Index := 0;
871                        End_Next   : State_Index := 0;
872
873                     begin
874                        J := Next_Sub_Expression (J, End_Index);
875
876                        --  Create a new state for the start of the alternative
877
878                        Current_State := Current_State + 1;
879                        Last_Start := Current_State;
880                        Start_State := Last_Start;
881
882                        --  Create the tree for the second part of alternative
883
884                        Create_Simple (Start_J, J, Start_Next, End_Next);
885
886                        --  Create the end state
887
888                        Add_Empty_Char (Last_Start, Start_Next);
889                        Add_Empty_Char (Last_Start, Start_Prev);
890                        Current_State := Current_State + 1;
891                        End_State := Current_State;
892                        Add_Empty_Char (End_Prev, End_State);
893                        Add_Empty_Char (End_Next, End_State);
894                     end;
895
896                  when Open_Bracket =>
897                     Current_State := Current_State + 1;
898
899                     declare
900                        Next_State : State_Index := Current_State + 1;
901
902                     begin
903                        J := J + 1;
904
905                        if S (J) = '^' then
906                           J := J + 1;
907
908                           Next_State := 0;
909
910                           for Column in 0 .. Alphabet_Size loop
911                              Set (Table, Current_State, Column,
912                                   Value => Current_State + 1);
913                           end loop;
914                        end if;
915
916                        --  Automatically add the first character
917
918                        if S (J) = '-' or else S (J) = ']' then
919                           Set (Table, Current_State, Map (S (J)),
920                                Value => Next_State);
921                           J := J + 1;
922                        end if;
923
924                        --  Loop till closing bracket found
925
926                        loop
927                           exit when S (J) = Close_Bracket;
928
929                           if S (J) = '-'
930                             and then S (J + 1) /= ']'
931                           then
932                              declare
933                                 Start : constant Integer := J - 1;
934
935                              begin
936                                 J := J + 1;
937
938                                 if S (J) = '\' then
939                                    J := J + 1;
940                                 end if;
941
942                                 for Char in S (Start) .. S (J) loop
943                                    Set (Table, Current_State, Map (Char),
944                                         Value => Next_State);
945                                 end loop;
946                              end;
947
948                           else
949                              if S (J) = '\' then
950                                 J := J + 1;
951                              end if;
952
953                              Set (Table, Current_State, Map (S (J)),
954                                   Value => Next_State);
955                           end if;
956                           J := J + 1;
957                        end loop;
958                     end;
959
960                     Current_State := Current_State + 1;
961
962                     --  If the next symbol is a special symbol
963
964                     if J < End_Index
965                       and then (S (J + 1) = '*' or else
966                                 S (J + 1) = '+' or else
967                                 S (J + 1) = '?')
968                     then
969                        J := J + 1;
970                        Create_Repetition
971                          (S (J),
972                           Current_State - 1,
973                           Current_State,
974                           Last_Start,
975                           End_State);
976
977                     else
978                        Last_Start := Current_State - 1;
979
980                        if End_State /= 0 then
981                           Add_Empty_Char (End_State, Last_Start);
982                        end if;
983
984                        End_State := Current_State;
985                     end if;
986
987                  when Close_Bracket
988                     | Close_Paren
989                     | '*' | '+' | '?'
990                  =>
991                     Raise_Exception
992                       ("Incorrect character in regular expression :", J);
993
994                  when others =>
995                     Current_State := Current_State + 1;
996
997                     --  Create the state for the symbol S (J)
998
999                     if S (J) = '.' then
1000                        for K in 0 .. Alphabet_Size loop
1001                           Set (Table, Current_State, K,
1002                                Value => Current_State + 1);
1003                        end loop;
1004
1005                     else
1006                        if S (J) = '\' then
1007                           J := J + 1;
1008                        end if;
1009
1010                        Set (Table, Current_State, Map (S (J)),
1011                             Value => Current_State + 1);
1012                     end if;
1013
1014                     Current_State := Current_State + 1;
1015
1016                     --  If the next symbol is a special symbol
1017
1018                     if J < End_Index
1019                       and then (S (J + 1) = '*' or else
1020                                 S (J + 1) = '+' or else
1021                                 S (J + 1) = '?')
1022                     then
1023                        J := J + 1;
1024                        Create_Repetition
1025                          (S (J),
1026                           Current_State - 1,
1027                           Current_State,
1028                           Last_Start,
1029                           End_State);
1030
1031                     else
1032                        Last_Start := Current_State - 1;
1033
1034                        if End_State /= 0 then
1035                           Add_Empty_Char (End_State, Last_Start);
1036                        end if;
1037
1038                        End_State := Current_State;
1039                     end if;
1040               end case;
1041
1042               if Start_State = 0 then
1043                  Start_State := Last_Start;
1044               end if;
1045
1046               J := J + 1;
1047            end loop;
1048         end Create_Simple;
1049
1050         -------------------------
1051         -- Next_Sub_Expression --
1052         -------------------------
1053
1054         function Next_Sub_Expression
1055           (Start_Index : Integer;
1056            End_Index   : Integer) return Integer
1057         is
1058            J              : Integer := Start_Index;
1059            Start_On_Alter : Boolean := False;
1060
1061         begin
1062            if S (J) = '|' then
1063               Start_On_Alter := True;
1064            end if;
1065
1066            loop
1067               exit when J = End_Index;
1068               J := J + 1;
1069
1070               case S (J) is
1071                  when '\' =>
1072                     J := J + 1;
1073
1074                  when Open_Bracket =>
1075                     loop
1076                        J := J + 1;
1077                        exit when S (J) = Close_Bracket;
1078
1079                        if S (J) = '\' then
1080                           J := J + 1;
1081                        end if;
1082                     end loop;
1083
1084                  when Open_Paren =>
1085                     J := Next_Sub_Expression (J, End_Index);
1086
1087                  when Close_Paren =>
1088                     return J;
1089
1090                  when '|' =>
1091                     if Start_On_Alter then
1092                        return J - 1;
1093                     end if;
1094
1095                  when others =>
1096                     null;
1097               end case;
1098            end loop;
1099
1100            return J;
1101         end Next_Sub_Expression;
1102
1103      --  Start of processing for Create_Primary_Table
1104
1105      begin
1106         Table.all := [others => [others => 0]];
1107         Create_Simple (S'First, S'Last, Start_State, End_State);
1108         Num_States := Current_State;
1109      end Create_Primary_Table;
1110
1111      -------------------------------
1112      -- Create_Primary_Table_Glob --
1113      -------------------------------
1114
1115      procedure Create_Primary_Table_Glob
1116        (Table       : out Regexp_Array_Access;
1117         Num_States  : out State_Index;
1118         Start_State : out State_Index;
1119         End_State   : out State_Index)
1120      is
1121         Empty_Char : constant Column_Index := Alphabet_Size + 1;
1122
1123         Current_State : State_Index := 0;
1124         --  Index of the last created state
1125
1126         procedure Add_Empty_Char
1127           (State    : State_Index;
1128            To_State : State_Index);
1129         --  Add a empty-character transition from State to To_State
1130
1131         procedure Create_Simple
1132           (Start_Index : Integer;
1133            End_Index   : Integer;
1134            Start_State : out State_Index;
1135            End_State   : out State_Index);
1136         --  Fill the table for the S (Start_Index .. End_Index).
1137         --  This is the recursive procedure called to handle () expressions
1138
1139         --------------------
1140         -- Add_Empty_Char --
1141         --------------------
1142
1143         procedure Add_Empty_Char
1144           (State    : State_Index;
1145            To_State : State_Index)
1146         is
1147            J : Column_Index;
1148
1149         begin
1150            J := Empty_Char;
1151            while Get (Table, State, J) /= 0 loop
1152               J := J + 1;
1153            end loop;
1154
1155            Set (Table, State, J, Value => To_State);
1156         end Add_Empty_Char;
1157
1158         -------------------
1159         -- Create_Simple --
1160         -------------------
1161
1162         procedure Create_Simple
1163           (Start_Index : Integer;
1164            End_Index   : Integer;
1165            Start_State : out State_Index;
1166            End_State   : out State_Index)
1167         is
1168            J          : Integer;
1169            Last_Start : State_Index := 0;
1170
1171         begin
1172            Start_State := 0;
1173            End_State   := 0;
1174
1175            J := Start_Index;
1176            while J <= End_Index loop
1177               case S (J) is
1178                  when Open_Bracket =>
1179                     Current_State := Current_State + 1;
1180
1181                     declare
1182                        Next_State : State_Index := Current_State + 1;
1183
1184                     begin
1185                        J := J + 1;
1186
1187                        if S (J) = '^' then
1188                           J := J + 1;
1189                           Next_State := 0;
1190
1191                           for Column in 0 .. Alphabet_Size loop
1192                              Set (Table, Current_State, Column,
1193                                   Value => Current_State + 1);
1194                           end loop;
1195                        end if;
1196
1197                        --  Automatically add the first character
1198
1199                        if S (J) = '-' or else S (J) = ']' then
1200                           Set (Table, Current_State, Map (S (J)),
1201                                Value => Current_State);
1202                           J := J + 1;
1203                        end if;
1204
1205                        --  Loop till closing bracket found
1206
1207                        loop
1208                           exit when S (J) = Close_Bracket;
1209
1210                           if S (J) = '-'
1211                             and then S (J + 1) /= ']'
1212                           then
1213                              declare
1214                                 Start : constant Integer := J - 1;
1215
1216                              begin
1217                                 J := J + 1;
1218
1219                                 if S (J) = '\' then
1220                                    J := J + 1;
1221                                 end if;
1222
1223                                 for Char in S (Start) .. S (J) loop
1224                                    Set (Table, Current_State, Map (Char),
1225                                         Value => Next_State);
1226                                 end loop;
1227                              end;
1228
1229                           else
1230                              if S (J) = '\' then
1231                                 J := J + 1;
1232                              end if;
1233
1234                              Set (Table, Current_State, Map (S (J)),
1235                                   Value => Next_State);
1236                           end if;
1237                           J := J + 1;
1238                        end loop;
1239                     end;
1240
1241                     Last_Start := Current_State;
1242                     Current_State := Current_State + 1;
1243
1244                     if End_State /= 0 then
1245                        Add_Empty_Char (End_State, Last_Start);
1246                     end if;
1247
1248                     End_State := Current_State;
1249
1250                  when '{' =>
1251                     declare
1252                        End_Sub          : Integer;
1253                        Start_Regexp_Sub : State_Index;
1254                        End_Regexp_Sub   : State_Index;
1255                        Create_Start     : State_Index := 0;
1256
1257                        Create_End : State_Index := 0;
1258                        --  Initialized to avoid junk warning
1259
1260                     begin
1261                        while S (J) /= '}' loop
1262
1263                           --  First step : find sub pattern
1264
1265                           End_Sub := J + 1;
1266                           while S (End_Sub) /= ','
1267                             and then S (End_Sub) /= '}'
1268                           loop
1269                              End_Sub := End_Sub + 1;
1270                           end loop;
1271
1272                           --  Second step : create a sub pattern
1273
1274                           Create_Simple
1275                             (J + 1,
1276                              End_Sub - 1,
1277                              Start_Regexp_Sub,
1278                              End_Regexp_Sub);
1279
1280                           J := End_Sub;
1281
1282                           --  Third step : create an alternative
1283
1284                           if Create_Start = 0 then
1285                              Current_State := Current_State + 1;
1286                              Create_Start := Current_State;
1287                              Add_Empty_Char (Create_Start, Start_Regexp_Sub);
1288                              Current_State := Current_State + 1;
1289                              Create_End := Current_State;
1290                              Add_Empty_Char (End_Regexp_Sub, Create_End);
1291
1292                           else
1293                              Current_State := Current_State + 1;
1294                              Add_Empty_Char (Current_State, Create_Start);
1295                              Create_Start := Current_State;
1296                              Add_Empty_Char (Create_Start, Start_Regexp_Sub);
1297                              Add_Empty_Char (End_Regexp_Sub, Create_End);
1298                           end if;
1299                        end loop;
1300
1301                        if End_State /= 0 then
1302                           Add_Empty_Char (End_State, Create_Start);
1303                        end if;
1304
1305                        End_State := Create_End;
1306                        Last_Start := Create_Start;
1307                     end;
1308
1309                  when '*' =>
1310                     Current_State := Current_State + 1;
1311
1312                     if End_State /= 0 then
1313                        Add_Empty_Char (End_State, Current_State);
1314                     end if;
1315
1316                     Add_Empty_Char (Current_State, Current_State + 1);
1317                     Add_Empty_Char (Current_State, Current_State + 3);
1318                     Last_Start := Current_State;
1319
1320                     Current_State := Current_State + 1;
1321
1322                     for K in 0 .. Alphabet_Size loop
1323                        Set (Table, Current_State, K,
1324                             Value => Current_State + 1);
1325                     end loop;
1326
1327                     Current_State := Current_State + 1;
1328                     Add_Empty_Char (Current_State, Current_State + 1);
1329
1330                     Current_State := Current_State + 1;
1331                     Add_Empty_Char (Current_State,  Last_Start);
1332                     End_State := Current_State;
1333
1334                  when others =>
1335                     Current_State := Current_State + 1;
1336
1337                     if S (J) = '?' then
1338                        for K in 0 .. Alphabet_Size loop
1339                           Set (Table, Current_State, K,
1340                                Value => Current_State + 1);
1341                        end loop;
1342
1343                     else
1344                        if S (J) = '\' then
1345                           J := J + 1;
1346                        end if;
1347
1348                        --  Create the state for the symbol S (J)
1349
1350                        Set (Table, Current_State, Map (S (J)),
1351                             Value => Current_State + 1);
1352                     end if;
1353
1354                     Last_Start := Current_State;
1355                     Current_State := Current_State + 1;
1356
1357                     if End_State /= 0 then
1358                        Add_Empty_Char (End_State, Last_Start);
1359                     end if;
1360
1361                     End_State := Current_State;
1362               end case;
1363
1364               if Start_State = 0 then
1365                  Start_State := Last_Start;
1366               end if;
1367
1368               J := J + 1;
1369            end loop;
1370         end Create_Simple;
1371
1372      --  Start of processing for Create_Primary_Table_Glob
1373
1374      begin
1375         Table.all := [others => [others => 0]];
1376         Create_Simple (S'First, S'Last, Start_State, End_State);
1377         Num_States := Current_State;
1378      end Create_Primary_Table_Glob;
1379
1380      ----------------------------
1381      -- Create_Secondary_Table --
1382      ----------------------------
1383
1384      function Create_Secondary_Table
1385        (First_Table : Regexp_Array_Access;
1386         Start_State : State_Index;
1387         End_State   : State_Index) return Regexp
1388      is
1389         Last_Index : constant State_Index := First_Table'Last (1);
1390
1391         type Meta_State is array (0 .. Last_Index) of Boolean;
1392         pragma Pack (Meta_State);
1393         --  Whether a state from first_table belongs to a metastate.
1394
1395         No_States : constant Meta_State := [others => False];
1396
1397         type Meta_States_Array is array (State_Index range <>) of Meta_State;
1398         type Meta_States_List is access all Meta_States_Array;
1399         procedure Unchecked_Free is new Ada.Unchecked_Deallocation
1400            (Meta_States_Array, Meta_States_List);
1401         Meta_States : Meta_States_List;
1402         --  Components of meta-states. A given state might belong to
1403         --  several meta-states.
1404         --  This array grows dynamically.
1405
1406         type Char_To_State is array (0 .. Alphabet_Size) of State_Index;
1407         type Meta_States_Transition_Arr is
1408            array (State_Index range <>) of Char_To_State;
1409         type Meta_States_Transition is access all Meta_States_Transition_Arr;
1410         procedure Unchecked_Free is new Ada.Unchecked_Deallocation
1411           (Meta_States_Transition_Arr, Meta_States_Transition);
1412         Table : Meta_States_Transition;
1413         --  Documents the transitions between each meta-state. The
1414         --  first index is the meta-state, the second column is the
1415         --  character seen in the input, the value is the new meta-state.
1416
1417         Temp_State_Not_Null : Boolean;
1418
1419         Current_State       : State_Index := 1;
1420         --  The current meta-state we are creating
1421
1422         Nb_State            : State_Index := 1;
1423         --  The total number of meta-states created so far.
1424
1425         procedure Closure
1426           (Meta_State : State_Index;
1427            State      : State_Index);
1428         --  Compute the closure of the state (that is every other state which
1429         --  has a empty-character transition) and add it to the state
1430
1431         procedure Ensure_Meta_State (Meta : State_Index);
1432         --  grows the Meta_States array as needed to make sure that there
1433         --  is enough space to store the new meta state.
1434
1435         -----------------------
1436         -- Ensure_Meta_State --
1437         -----------------------
1438
1439         procedure Ensure_Meta_State (Meta : State_Index) is
1440            Tmp  : Meta_States_List       := Meta_States;
1441            Tmp2 : Meta_States_Transition := Table;
1442
1443         begin
1444            if Meta_States = null then
1445               Meta_States := new Meta_States_Array
1446                  (1 .. State_Index'Max (Last_Index, Meta) + 1);
1447               Meta_States (Meta_States'Range) := [others => No_States];
1448
1449               Table := new Meta_States_Transition_Arr
1450                  (1 .. State_Index'Max (Last_Index, Meta) + 1);
1451               Table.all := [others => [others => 0]];
1452
1453            elsif Meta > Meta_States'Last then
1454               Meta_States := new Meta_States_Array
1455                  (1 .. State_Index'Max (2 * Tmp'Last, Meta));
1456               Meta_States (Tmp'Range) := Tmp.all;
1457               Meta_States (Tmp'Last + 1 .. Meta_States'Last) :=
1458                  [others => No_States];
1459               Unchecked_Free (Tmp);
1460
1461               Table := new Meta_States_Transition_Arr
1462                  (1 .. State_Index'Max (2 * Tmp2'Last, Meta) + 1);
1463               Table (Tmp2'Range) := Tmp2.all;
1464               Table (Tmp2'Last + 1 .. Table'Last) :=
1465                  [others => [others => 0]];
1466               Unchecked_Free (Tmp2);
1467            end if;
1468         end Ensure_Meta_State;
1469
1470         -------------
1471         -- Closure --
1472         -------------
1473
1474         procedure Closure
1475           (Meta_State : State_Index;
1476            State      : State_Index)
1477         is
1478         begin
1479            if not Meta_States (Meta_State)(State) then
1480               Meta_States (Meta_State)(State) := True;
1481
1482               --  For each transition on empty-character
1483
1484               for Column in Alphabet_Size + 1 .. First_Table'Last (2) loop
1485                  exit when First_Table (State, Column) = 0;
1486                  Closure (Meta_State, First_Table (State, Column));
1487               end loop;
1488            end if;
1489         end Closure;
1490
1491      --  Start of processing for Create_Secondary_Table
1492
1493      begin
1494         --  Create a new state
1495
1496         Ensure_Meta_State (Current_State);
1497         Closure (Current_State, Start_State);
1498
1499         while Current_State <= Nb_State loop
1500
1501            --  We will be trying, below, to create the next meta-state
1502
1503            Ensure_Meta_State (Nb_State + 1);
1504
1505            --  For every character in the regexp, calculate the possible
1506            --  transitions from Current_State.
1507
1508            for Column in 0 .. Alphabet_Size loop
1509               Temp_State_Not_Null := False;
1510
1511               for K in Meta_States (Current_State)'Range loop
1512                  if Meta_States (Current_State)(K)
1513                    and then First_Table (K, Column) /= 0
1514                  then
1515                     Closure (Nb_State + 1, First_Table (K, Column));
1516                     Temp_State_Not_Null := True;
1517                  end if;
1518               end loop;
1519
1520               --  If at least one transition existed
1521
1522               if Temp_State_Not_Null then
1523
1524                  --  Check if this new state corresponds to an old one
1525
1526                  for K in 1 .. Nb_State loop
1527                     if Meta_States (K) = Meta_States (Nb_State + 1) then
1528                        Table (Current_State)(Column) := K;
1529
1530                        --  Reset data, for the next time we try that state
1531
1532                        Meta_States (Nb_State + 1) := No_States;
1533                        exit;
1534                     end if;
1535                  end loop;
1536
1537                  --  If not, create a new state
1538
1539                  if Table (Current_State)(Column) = 0 then
1540                     Nb_State := Nb_State + 1;
1541                     Ensure_Meta_State (Nb_State + 1);
1542                     Table (Current_State)(Column) := Nb_State;
1543                  end if;
1544               end if;
1545            end loop;
1546
1547            Current_State := Current_State + 1;
1548         end loop;
1549
1550         --  Returns the regexp
1551
1552         declare
1553            R : Regexp_Access;
1554
1555         begin
1556            R := new Regexp_Value (Alphabet_Size => Alphabet_Size,
1557                                   Num_States    => Nb_State);
1558            R.Map            := Map;
1559            R.Case_Sensitive := Case_Sensitive;
1560
1561            for S in 1 .. Nb_State loop
1562               R.Is_Final (S) := Meta_States (S)(End_State);
1563            end loop;
1564
1565            for State in 1 .. Nb_State loop
1566               for K in 0 .. Alphabet_Size loop
1567                  R.States (State, K) := Table (State)(K);
1568               end loop;
1569            end loop;
1570
1571            Unchecked_Free (Meta_States);
1572            Unchecked_Free (Table);
1573
1574            return (Ada.Finalization.Controlled with R => R);
1575         end;
1576      end Create_Secondary_Table;
1577
1578      ---------------------
1579      -- Raise_Exception --
1580      ---------------------
1581
1582      procedure Raise_Exception (M : String; Index : Integer) is
1583      begin
1584         raise Error_In_Regexp with M & " at offset" & Index'Img;
1585      end Raise_Exception;
1586
1587   --  Start of processing for Compile
1588
1589   begin
1590      --  Special case for the empty string: it always matches, and the
1591      --  following processing would fail on it.
1592
1593      if S = "" then
1594         return (Ada.Finalization.Controlled with
1595                 R => new Regexp_Value'
1596                      (Alphabet_Size => 0,
1597                       Num_States    => 1,
1598                       Map           => [others => 0],
1599                       States        => [others => [others => 1]],
1600                       Is_Final      => [others => True],
1601                       Case_Sensitive => True));
1602      end if;
1603
1604      if not Case_Sensitive then
1605         System.Case_Util.To_Lower (S);
1606      end if;
1607
1608      --  Check the pattern is well-formed before any treatment
1609
1610      Check_Well_Formed_Pattern;
1611
1612      Create_Mapping;
1613
1614      --  Creates the primary table
1615
1616      declare
1617         Table       : Regexp_Array_Access;
1618         Num_States  : State_Index;
1619         Start_State : State_Index;
1620         End_State   : State_Index;
1621         R           : Regexp;
1622
1623      begin
1624         Table := new Regexp_Array (1 .. Initial_Max_States_In_Primary_Table,
1625                                    0 .. Alphabet_Size + 10);
1626         if not Glob then
1627            Create_Primary_Table (Table, Num_States, Start_State, End_State);
1628         else
1629            Create_Primary_Table_Glob
1630              (Table, Num_States, Start_State, End_State);
1631         end if;
1632
1633         --  Creates the secondary table
1634
1635         R := Create_Secondary_Table (Table, Start_State, End_State);
1636         Free (Table);
1637         return R;
1638      end;
1639   end Compile;
1640
1641   --------------
1642   -- Finalize --
1643   --------------
1644
1645   procedure Finalize (R : in out Regexp) is
1646      procedure Free is new
1647        Ada.Unchecked_Deallocation (Regexp_Value, Regexp_Access);
1648   begin
1649      Free (R.R);
1650   end Finalize;
1651
1652   ---------
1653   -- Get --
1654   ---------
1655
1656   function Get
1657     (Table  : Regexp_Array_Access;
1658      State  : State_Index;
1659      Column : Column_Index) return State_Index
1660   is
1661   begin
1662      if State <= Table'Last (1)
1663        and then Column <= Table'Last (2)
1664      then
1665         return Table (State, Column);
1666      else
1667         return 0;
1668      end if;
1669   end Get;
1670
1671   -----------
1672   -- Match --
1673   -----------
1674
1675   function Match (S : String; R : Regexp) return Boolean is
1676      Current_State : State_Index := 1;
1677
1678   begin
1679      if R.R = null then
1680         raise Constraint_Error;
1681      end if;
1682
1683      for Char in S'Range loop
1684
1685         if R.R.Case_Sensitive then
1686            Current_State := R.R.States (Current_State, R.R.Map (S (Char)));
1687         else
1688            Current_State :=
1689              R.R.States (Current_State,
1690                          R.R.Map (System.Case_Util.To_Lower (S (Char))));
1691         end if;
1692
1693         if Current_State = 0 then
1694            return False;
1695         end if;
1696
1697      end loop;
1698
1699      return R.R.Is_Final (Current_State);
1700   end Match;
1701
1702   ---------
1703   -- Set --
1704   ---------
1705
1706   procedure Set
1707     (Table  : in out Regexp_Array_Access;
1708      State  : State_Index;
1709      Column : Column_Index;
1710      Value  : State_Index)
1711   is
1712      New_Lines   : State_Index;
1713      New_Columns : Column_Index;
1714      New_Table   : Regexp_Array_Access;
1715
1716   begin
1717      if State <= Table'Last (1)
1718        and then Column <= Table'Last (2)
1719      then
1720         Table (State, Column) := Value;
1721      else
1722         --  Doubles the size of the table until it is big enough that
1723         --  (State, Column) is a valid index.
1724
1725         New_Lines := Table'Last (1) * (State / Table'Last (1) + 1);
1726         New_Columns := Table'Last (2) * (Column / Table'Last (2) + 1);
1727         New_Table := new Regexp_Array (Table'First (1) .. New_Lines,
1728                                        Table'First (2) .. New_Columns);
1729         New_Table.all := [others => [others => 0]];
1730
1731         for J in Table'Range (1) loop
1732            for K in Table'Range (2) loop
1733               New_Table (J, K) := Table (J, K);
1734            end loop;
1735         end loop;
1736
1737         Free (Table);
1738         Table := New_Table;
1739         Table (State, Column) := Value;
1740      end if;
1741   end Set;
1742
1743end System.Regexp;
1744