1------------------------------------------------------------------------------
2--                                                                          --
3--                         GNAT COMPILER COMPONENTS                         --
4--                                                                          --
5--                             S F N _ S C A N                              --
6--                                                                          --
7--                                 B o d y                                  --
8--                                                                          --
9--          Copyright (C) 2000-2020, Free Software Foundation, Inc.         --
10--                                                                          --
11-- GNAT is free software;  you can  redistribute it  and/or modify it under --
12-- terms of the  GNU General Public License as published  by the Free Soft- --
13-- ware  Foundation;  either version 3,  or (at your option) any later ver- --
14-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
15-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
16-- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
17-- for  more details.  You should have  received  a copy of the GNU General --
18-- Public License  distributed with GNAT; see file COPYING3.  If not, go to --
19-- http://www.gnu.org/licenses for a complete copy of the license.          --
20--                                                                          --
21-- GNAT was originally developed  by the GNAT team at  New York University. --
22-- Extensive contributions were provided by Ada Core Technologies Inc.      --
23--                                                                          --
24------------------------------------------------------------------------------
25
26with Ada.Exceptions; use Ada.Exceptions;
27
28package body SFN_Scan is
29
30   use ASCII;
31   --  Allow easy access to control character definitions
32
33   EOF : constant Character := ASCII.SUB;
34   --  The character SUB (16#1A#) is used in DOS-derived systems, such as
35   --  Windows to signal the end of a text file. If this character appears as
36   --  the last character of a file scanned by a call to Scan_SFN_Pragmas, then
37   --  it is ignored, otherwise it is treated as an illegal character.
38
39   type String_Ptr is access String;
40
41   S : String_Ptr;
42   --  Points to the gnat.adc input file
43
44   P : Natural;
45   --  Subscript of next character to process in S
46
47   Line_Num : Natural;
48   --  Current line number
49
50   Start_Of_Line : Natural;
51   --  Subscript of first character at start of current line
52
53   ----------------------
54   -- Local Procedures --
55   ----------------------
56
57   function Acquire_Integer return Natural;
58   --  This function skips white space, and then scans and returns
59   --  an unsigned integer. Raises Error if no integer is present
60   --  or if the integer is greater than 999.
61
62   function Acquire_String (B : Natural; E : Natural) return String;
63   --  This function takes a string scanned out by Scan_String, strips
64   --  the enclosing quote characters and any internal doubled quote
65   --  characters, and returns the result as a String. The arguments
66   --  B and E are as returned from a call to Scan_String. The lower
67   --  bound of the string returned is always 1.
68
69   function Acquire_Unit_Name return String;
70   --  Skips white space, and then scans and returns a unit name. The
71   --  unit name is cased exactly as it appears in the source file.
72   --  The terminating character must be white space, or a comma or
73   --  a right parenthesis or end of file.
74
75   function At_EOF return Boolean;
76   pragma Inline (At_EOF);
77   --  Returns True if at end of file, False if not. Note that this
78   --  function does NOT skip white space, so P is always unchanged.
79
80   procedure Check_Not_At_EOF;
81   pragma Inline (Check_Not_At_EOF);
82   --  Skips past white space if any, and then raises Error if at
83   --  end of file. Otherwise returns with P skipped past whitespace.
84
85   function Check_File_Type return Character;
86   --  Skips white space if any, and then looks for any of the tokens
87   --  Spec_File_Name, Body_File_Name, or Subunit_File_Name. If one
88   --  of these is found then the value returned is 's', 'b' or 'u'
89   --  respectively, and P is bumped past the token. If none of
90   --  these tokens is found, then P is unchanged (except for
91   --  possible skip of white space), and a space is returned.
92
93   function Check_Token (T : String) return Boolean;
94   --  Skips white space if any, and then checks if the string at the
95   --  current location matches the given string T, and the character
96   --  immediately following is non-alphabetic, non-numeric. If so,
97   --  P is stepped past the token, and True is returned. If not,
98   --  P is unchanged (except for possibly skipping past whitespace),
99   --  and False is returned. T may contain only lower-case letters
100   --  ('a' .. 'z').
101
102   procedure Error (Err : String);
103   pragma No_Return (Error);
104   --  Called if an error is detected. Raises Syntax_Error_In_GNAT_ADC
105   --  with a message of the form gnat.adc:line:col: xxx, where xxx is
106   --  the string Err passed as a parameter.
107
108   procedure Require_Token (T : String);
109   --  Skips white space if any, and then requires the given string
110   --  to be present. If it is, the P is stepped past it, otherwise
111   --  Error is raised, since this is a syntax error. Require_Token
112   --  is used only for sequences of special characters, so there
113   --  is no issue of terminators, or casing of letters.
114
115   procedure Scan_String (B : out Natural; E : out Natural);
116   --  Skips white space if any, then requires that a double quote
117   --  or percent be present (start of string). Raises error if
118   --  neither of these two characters is found. Otherwise scans
119   --  out the string, and returns with P pointing past the
120   --  closing quote and S (B .. E) contains the characters of the
121   --  string (including the enclosing quotes, with internal quotes
122   --  still doubled). Raises Error if the string is malformed.
123
124   procedure Skip_WS;
125   --  Skips P past any white space characters (end of line
126   --  characters, spaces, comments, horizontal tab characters).
127
128   ---------------------
129   -- Acquire_Integer --
130   ---------------------
131
132   function Acquire_Integer return Natural is
133      N : Natural := 0;
134
135   begin
136      Skip_WS;
137
138      if S (P) not in '0' .. '9' then
139         Error ("missing index parameter");
140      end if;
141
142      while S (P) in '0' .. '9' loop
143         N := N * 10 + Character'Pos (S (P)) - Character'Pos ('0');
144
145         if N > 999 then
146            Error ("index value greater than 999");
147         end if;
148
149         P := P + 1;
150      end loop;
151
152      return N;
153   end Acquire_Integer;
154
155   --------------------
156   -- Acquire_String --
157   --------------------
158
159   function Acquire_String (B : Natural; E : Natural) return String is
160      Str : String (1 .. E - B - 1);
161      Q   : constant Character := S (B);
162      J   : Natural;
163      Ptr : Natural;
164
165   begin
166      Ptr := B + 1;
167      J := 0;
168      while Ptr < E loop
169         J := J + 1;
170         Str (J) := S (Ptr);
171
172         if S (Ptr) = Q and then S (Ptr + 1) = Q then
173            Ptr := Ptr + 2;
174         else
175            Ptr := Ptr + 1;
176         end if;
177      end loop;
178
179      return Str (1 .. J);
180   end Acquire_String;
181
182   -----------------------
183   -- Acquire_Unit_Name --
184   -----------------------
185
186   function Acquire_Unit_Name return String is
187      B : Natural;
188
189   begin
190      Check_Not_At_EOF;
191      B := P;
192
193      while not At_EOF loop
194         exit when S (P) not in '0' .. '9'
195           and then S (P) /= '.'
196           and then S (P) /= '_'
197           and then not (S (P) = '[' and then S (P + 1) = '"')
198           and then not (S (P) = '"' and then S (P - 1) = '[')
199           and then not (S (P) = '"' and then S (P + 1) = ']')
200           and then not (S (P) = ']' and then S (P - 1) = '"')
201           and then S (P) < 'A';
202         P := P + 1;
203      end loop;
204
205      if P = B then
206         Error ("null unit name");
207      end if;
208
209      return S (B .. P - 1);
210   end Acquire_Unit_Name;
211
212   ------------
213   -- At_EOF --
214   ------------
215
216   function At_EOF return Boolean is
217   begin
218      --  Immediate return (False) if before last character of file
219
220      if P < S'Last then
221         return False;
222
223      --  Special case: DOS EOF character as last character of file is
224      --  allowed and treated as an end of file.
225
226      elsif P = S'Last then
227         return S (P) = EOF;
228
229      --  If beyond last character of file, then definitely at EOF
230
231      else
232         return True;
233      end if;
234   end At_EOF;
235
236   ---------------------
237   -- Check_File_Type --
238   ---------------------
239
240   function Check_File_Type return Character is
241   begin
242      if Check_Token ("spec_file_name") then
243         return 's';
244      elsif Check_Token ("body_file_name") then
245         return 'b';
246      elsif Check_Token ("subunit_file_name") then
247         return 'u';
248      else
249         return ' ';
250      end if;
251   end Check_File_Type;
252
253   ----------------------
254   -- Check_Not_At_EOF --
255   ----------------------
256
257   procedure Check_Not_At_EOF is
258   begin
259      Skip_WS;
260
261      if At_EOF then
262         Error ("unexpected end of file");
263      end if;
264
265      return;
266   end Check_Not_At_EOF;
267
268   -----------------
269   -- Check_Token --
270   -----------------
271
272   function Check_Token (T : String) return Boolean is
273      Save_P : Natural;
274      C : Character;
275
276   begin
277      Skip_WS;
278      Save_P := P;
279
280      for K in T'Range loop
281         if At_EOF then
282            P := Save_P;
283            return False;
284         end if;
285
286         C := S (P);
287
288         if C in 'A' .. 'Z' then
289            C := Character'Val (Character'Pos (C) +
290                                 (Character'Pos ('a') - Character'Pos ('A')));
291         end if;
292
293         if C /= T (K) then
294            P := Save_P;
295            return False;
296         end if;
297
298         P := P + 1;
299      end loop;
300
301      if At_EOF then
302         return True;
303      end if;
304
305      C := S (P);
306
307      if C in '0' .. '9'
308        or else C in 'a' .. 'z'
309        or else C in 'A' .. 'Z'
310        or else C > Character'Val (127)
311      then
312         P := Save_P;
313         return False;
314
315      else
316         return True;
317      end if;
318   end Check_Token;
319
320   -----------
321   -- Error --
322   -----------
323
324   procedure Error (Err : String) is
325      C : Natural := 0;
326      --  Column number
327
328      M : String (1 .. 80);
329      --  Buffer used to build resulting error msg
330
331      LM : Natural := 0;
332      --  Pointer to last set location in M
333
334      procedure Add_Nat (N : Natural);
335      --  Add chars of integer to error msg buffer
336
337      -------------
338      -- Add_Nat --
339      -------------
340
341      procedure Add_Nat (N : Natural) is
342      begin
343         if N > 9 then
344            Add_Nat (N / 10);
345         end if;
346
347         LM := LM + 1;
348         M (LM) := Character'Val (N mod 10 + Character'Pos ('0'));
349      end Add_Nat;
350
351   --  Start of processing for Error
352
353   begin
354      M (1 .. 9) := "gnat.adc:";
355      LM := 9;
356      Add_Nat (Line_Num);
357      LM := LM + 1;
358      M (LM) := ':';
359
360      --  Determine column number
361
362      for X in Start_Of_Line .. P loop
363         C := C + 1;
364
365         if S (X) = HT then
366            C := (C + 7) / 8 * 8;
367         end if;
368      end loop;
369
370      Add_Nat (C);
371      M (LM + 1) := ':';
372      LM := LM + 1;
373      M (LM + 1) := ' ';
374      LM := LM + 1;
375
376      M (LM + 1 .. LM + Err'Length) := Err;
377      LM := LM + Err'Length;
378
379      Raise_Exception (Syntax_Error_In_GNAT_ADC'Identity, M (1 .. LM));
380   end Error;
381
382   -------------------
383   -- Require_Token --
384   -------------------
385
386   procedure Require_Token (T : String) is
387      SaveP : Natural;
388
389   begin
390      Skip_WS;
391      SaveP := P;
392
393      for J in T'Range loop
394
395         if At_EOF or else S (P) /= T (J) then
396            declare
397               S : String (1 .. T'Length + 10);
398
399            begin
400               S (1 .. 9) := "missing """;
401               S (10 .. T'Length + 9) := T;
402               S (T'Length + 10) := '"';
403               P := SaveP;
404               Error (S);
405            end;
406
407         else
408            P := P + 1;
409         end if;
410      end loop;
411   end Require_Token;
412
413   ----------------------
414   -- Scan_SFN_Pragmas --
415   ----------------------
416
417   procedure Scan_SFN_Pragmas
418     (Source   : String;
419      SFN_Ptr  : Set_File_Name_Ptr;
420      SFNP_Ptr : Set_File_Name_Pattern_Ptr)
421   is
422      B, E : Natural;
423      Typ  : Character;
424      Cas  : Character;
425
426   begin
427      Line_Num := 1;
428      S := Source'Unrestricted_Access;
429      P := Source'First;
430      Start_Of_Line := P;
431
432      --  Loop through pragmas in file
433
434      Main_Scan_Loop : loop
435         Skip_WS;
436         exit Main_Scan_Loop when At_EOF;
437
438         --  Error if something other than pragma
439
440         if not Check_Token ("pragma") then
441            Error ("non pragma encountered");
442         end if;
443
444         --  Source_File_Name pragma case
445
446         if Check_Token ("source_file_name")
447              or else
448             Check_Token ("source_file_name_project")
449         then
450            Require_Token ("(");
451
452            Typ := Check_File_Type;
453
454            --  First format, with unit name first
455
456            if Typ = ' ' then
457               if Check_Token ("unit_name") then
458                  Require_Token ("=>");
459               end if;
460
461               declare
462                  U : constant String := Acquire_Unit_Name;
463
464               begin
465                  Require_Token (",");
466                  Typ := Check_File_Type;
467
468                  if Typ /= 's' and then Typ /= 'b' then
469                     Error ("bad pragma");
470                  end if;
471
472                  Require_Token ("=>");
473                  Scan_String (B, E);
474
475                  declare
476                     F : constant String := Acquire_String (B, E);
477                     X : Natural;
478
479                  begin
480                     --  Scan Index parameter if present
481
482                     if Check_Token (",") then
483                        if Check_Token ("index") then
484                           Require_Token ("=>");
485                        end if;
486
487                        X := Acquire_Integer;
488                     else
489                        X := 0;
490                     end if;
491
492                     Require_Token (")");
493                     Require_Token (";");
494                     SFN_Ptr.all (Typ, U, F, X);
495                  end;
496               end;
497
498            --  Second format with pattern string
499
500            else
501               Require_Token ("=>");
502               Scan_String (B, E);
503
504               declare
505                  Pat : constant String := Acquire_String (B, E);
506                  Nas : Natural := 0;
507
508               begin
509                  --  Check exactly one asterisk
510
511                  for J in Pat'Range loop
512                     if Pat (J) = '*' then
513                        Nas := Nas + 1;
514                     end if;
515                  end loop;
516
517                  if Nas /= 1 then
518                     Error ("** not allowed");
519                  end if;
520
521                  B := 0;
522                  E := 0;
523                  Cas := ' ';
524
525                  --  Loop to scan out Casing or Dot_Replacement parameters
526
527                  loop
528                     Check_Not_At_EOF;
529                     exit when S (P) = ')';
530                     Require_Token (",");
531
532                     if Check_Token ("casing") then
533                        Require_Token ("=>");
534
535                        if Cas /= ' ' then
536                           Error ("duplicate casing argument");
537                        elsif Check_Token ("lowercase") then
538                           Cas := 'l';
539                        elsif Check_Token ("uppercase") then
540                           Cas := 'u';
541                        elsif Check_Token ("mixedcase") then
542                           Cas := 'm';
543                        else
544                           Error ("invalid casing argument");
545                        end if;
546
547                     elsif Check_Token ("dot_replacement") then
548                        Require_Token ("=>");
549
550                        if E /= 0 then
551                           Error ("duplicate dot_replacement");
552                        else
553                           Scan_String (B, E);
554                        end if;
555
556                     else
557                        Error ("invalid argument");
558                     end if;
559                  end loop;
560
561                  Require_Token (")");
562                  Require_Token (";");
563
564                  if Cas = ' ' then
565                     Cas := 'l';
566                  end if;
567
568                  if E = 0 then
569                     SFNP_Ptr.all (Pat, Typ, ".", Cas);
570
571                  else
572                     declare
573                        Dot : constant String := Acquire_String (B, E);
574
575                     begin
576                        SFNP_Ptr.all (Pat, Typ, Dot, Cas);
577                     end;
578                  end if;
579               end;
580            end if;
581
582         --  Some other pragma, scan to semicolon at end of pragma
583
584         else
585            Skip_Loop : loop
586               exit Main_Scan_Loop when At_EOF;
587               exit Skip_Loop when S (P) = ';';
588
589               if S (P) = '"' or else S (P) = '%' then
590                  Scan_String (B, E);
591               else
592                  P := P + 1;
593               end if;
594            end loop Skip_Loop;
595
596            --  We successfully skipped to semicolon, so skip past it
597
598            P := P + 1;
599         end if;
600      end loop Main_Scan_Loop;
601
602   exception
603      when others =>
604         pragma Assert (P'Valid);
605         Cursor := P - S'First + 1;
606         raise;
607   end Scan_SFN_Pragmas;
608
609   -----------------
610   -- Scan_String --
611   -----------------
612
613   procedure Scan_String (B : out Natural; E : out Natural) is
614      Q : Character;
615
616   begin
617      Check_Not_At_EOF;
618
619      if S (P) = '"' then
620         Q := '"';
621      elsif S (P) = '%' then
622         Q := '%';
623      else
624         Error ("bad string");
625         Q := '"';
626      end if;
627
628      --  Scan out the string, B points to first char
629
630      B := P;
631      P := P + 1;
632
633      loop
634         if At_EOF or else S (P) = LF or else S (P) = CR then
635            Error -- CODEFIX
636              ("missing string quote");
637
638         elsif S (P) = HT then
639            Error ("tab character in string");
640
641         elsif S (P) /= Q then
642            P := P + 1;
643
644         --  We have a quote
645
646         else
647            P := P + 1;
648
649            --  Check for doubled quote
650
651            if not At_EOF and then S (P) = Q then
652               P := P + 1;
653
654            --  Otherwise this is the terminating quote
655
656            else
657               E := P - 1;
658               return;
659            end if;
660         end if;
661      end loop;
662   end Scan_String;
663
664   -------------
665   -- Skip_WS --
666   -------------
667
668   procedure Skip_WS is
669   begin
670      WS_Scan : while not At_EOF loop
671         case S (P) is
672
673            --  End of physical line
674
675            when CR | LF =>
676               Line_Num := Line_Num + 1;
677               P := P + 1;
678
679               while not At_EOF
680                 and then (S (P) = CR or else S (P) = LF)
681               loop
682                  Line_Num := Line_Num + 1;
683                  P := P + 1;
684               end loop;
685
686               Start_Of_Line := P;
687
688            --  All other cases of white space characters
689
690            when ' ' | FF | VT | HT =>
691               P := P + 1;
692
693            --  Comment
694
695            when '-' =>
696               P := P + 1;
697
698               if At_EOF then
699                  Error ("bad comment");
700
701               elsif S (P) = '-' then
702                  P := P + 1;
703
704                  while not At_EOF loop
705                     case S (P) is
706                        when CR | LF | FF | VT =>
707                           exit;
708                        when others =>
709                           P := P + 1;
710                     end case;
711                  end loop;
712
713               else
714                  P := P - 1;
715                  exit WS_Scan;
716               end if;
717
718            when others =>
719               exit WS_Scan;
720
721         end case;
722      end loop WS_Scan;
723   end Skip_WS;
724
725end SFN_Scan;
726