1------------------------------------------------------------------------------
2--                                                                          --
3--                         GNAT COMPILER COMPONENTS                         --
4--                                                                          --
5--                             S F N _ S C A N                              --
6--                                                                          --
7--                                 B o d y                                  --
8--                                                                          --
9--          Copyright (C) 2000-2002 Free Software Foundation, Inc.          --
10--                                                                          --
11-- GNAT is free software;  you can  redistribute it  and/or modify it under --
12-- terms of the  GNU General Public License as published  by the Free Soft- --
13-- ware  Foundation;  either version 2,  or (at your option) any later ver- --
14-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
15-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
16-- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
17-- for  more details.  You should have  received  a copy of the GNU General --
18-- Public License  distributed with GNAT;  see file COPYING.  If not, write --
19-- to  the Free Software Foundation,  59 Temple Place - Suite 330,  Boston, --
20-- MA 02111-1307, USA.                                                      --
21--                                                                          --
22-- As a special exception,  if other files  instantiate  generics from this --
23-- unit, or you link  this unit with other files  to produce an executable, --
24-- this  unit  does not  by itself cause  the resulting  executable  to  be --
25-- covered  by the  GNU  General  Public  License.  This exception does not --
26-- however invalidate  any other reasons why  the executable file  might be --
27-- covered by the  GNU Public License.                                      --
28--                                                                          --
29-- GNAT was originally developed  by the GNAT team at  New York University. --
30-- Extensive contributions were provided by Ada Core Technologies Inc.      --
31--                                                                          --
32------------------------------------------------------------------------------
33
34with Ada.Exceptions; use Ada.Exceptions;
35
36package body SFN_Scan is
37
38   use ASCII;
39   --  Allow easy access to control character definitions
40
41   EOF : constant Character := ASCII.SUB;
42   --  The character SUB (16#1A#) is used in DOS and other systems derived
43   --  from DOS (OS/2, NT etc) to signal the end of a text file. If this
44   --  character appears as the last character of a file scanned by a call
45   --  to Scan_SFN_Pragmas, then it is ignored, otherwise it is treated as
46   --  an illegal character.
47
48   type String_Ptr is access String;
49
50   S : String_Ptr;
51   --  Points to the gnat.adc input file
52
53   P : Natural;
54   --  Subscript of next character to process in S
55
56   Line_Num : Natural;
57   --  Current line number
58
59   Start_Of_Line : Natural;
60   --  Subscript of first character at start of current line
61
62   ----------------------
63   -- Local Procedures --
64   ----------------------
65
66   function Acquire_String (B : Natural; E : Natural) return String;
67   --  This function takes a string scanned out by Scan_String, strips
68   --  the enclosing quote characters and any internal doubled quote
69   --  characters, and returns the result as a String. The arguments
70   --  B and E are as returned from a call to Scan_String. The lower
71   --  bound of the string returned is always 1.
72
73   function Acquire_Unit_Name return String;
74   --  Skips white space, and then scans and returns a unit name. The
75   --  unit name is cased exactly as it appears in the source file.
76   --  The terminating character must be white space, or a comma or
77   --  a right parenthesis or end of file.
78
79   function At_EOF return Boolean;
80   pragma Inline (At_EOF);
81   --  Returns True if at end of file, False if not. Note that this
82   --  function does NOT skip white space, so P is always unchanged.
83
84   procedure Check_Not_At_EOF;
85   pragma Inline (Check_Not_At_EOF);
86   --  Skips past white space if any, and then raises Error if at
87   --  end of file. Otherwise returns with P skipped past whitespace.
88
89   function Check_File_Type return Character;
90   --  Skips white space if any, and then looks for any of the tokens
91   --  Spec_File_Name, Body_File_Name, or Subunit_File_Name. If one
92   --  of these is found then the value returned is 's', 'b' or 'u'
93   --  respectively, and P is bumped past the token. If none of
94   --  these tokens is found, then P is unchanged (except for
95   --  possible skip of white space), and a space is returned.
96
97   function Check_Token (T : String) return Boolean;
98   --  Skips white space if any, and then checks if the string at the
99   --  current location matches the given string T, and the character
100   --  immediately following is non-alphabetic, non-numeric. If so,
101   --  P is stepped past the token, and True is returned. If not,
102   --  P is unchanged (except for possibly skipping past whitespace),
103   --  and False is returned. S may contain only lower-case letters
104   --  ('a' .. 'z').
105
106   procedure Error (Err : String);
107   --  Called if an error is detected. Raises Syntax_Error_In_GNAT_ADC
108   --  with a message of the form gnat.adc:line:col: xxx, where xxx is
109   --  the string Err passed as a parameter.
110
111   procedure Require_Token (T : String);
112   --  Skips white space if any, and then requires the given string
113   --  to be present. If it is, the P is stepped past it, otherwise
114   --  Error is raised, since this is a syntax error. Require_Token
115   --  is used only for sequences of special characters, so there
116   --  is no issue of terminators, or casing of letters.
117
118   procedure Scan_String (B : out Natural; E : out Natural);
119   --  Skips white space if any, then requires that a double quote
120   --  or percent be present (start of string). Raises error if
121   --  neither of these two characters is found. Otherwise scans
122   --  out the string, and returns with P pointing past the
123   --  closing quote and S (B .. E) contains the characters of the
124   --  string (including the enclosing quotes, with internal quotes
125   --  still doubled). Raises Error if the string is malformed.
126
127   procedure Skip_WS;
128   --  Skips P past any white space characters (end of line
129   --  characters, spaces, comments, horizontal tab characters).
130
131   --------------------
132   -- Acquire_String --
133   --------------------
134
135   function Acquire_String (B : Natural; E : Natural) return String is
136      Str : String (1 .. E - B - 1);
137      Q   : constant Character := S (B);
138      J   : Natural;
139      Ptr : Natural;
140
141   begin
142      Ptr := B + 1;
143      J := 0;
144      while Ptr < E loop
145         J := J + 1;
146         Str (J) := S (Ptr);
147
148         if S (Ptr) = Q and then S (Ptr + 1) = Q then
149            Ptr := Ptr + 2;
150         else
151            Ptr := Ptr + 1;
152         end if;
153      end loop;
154
155      return Str (1 .. J);
156   end Acquire_String;
157
158   -----------------------
159   -- Acquire_Unit_Name --
160   -----------------------
161
162   function Acquire_Unit_Name return String is
163      B : Natural;
164
165   begin
166      Check_Not_At_EOF;
167      B := P;
168
169      while not At_EOF loop
170         exit when S (P) not in '0' .. '9'
171           and then S (P) /= '.'
172           and then S (P) /= '_'
173           and then not (S (P) = '[' and then S (P + 1) = '"')
174           and then not (S (P) = '"' and then S (P - 1) = '[')
175           and then not (S (P) = '"' and then S (P + 1) = ']')
176           and then not (S (P) = ']' and then S (P - 1) = '"')
177           and then S (P) < 'A';
178         P := P + 1;
179      end loop;
180
181      if P = B then
182         Error ("null unit name");
183      end if;
184
185      return S (B .. P - 1);
186   end Acquire_Unit_Name;
187
188   ------------
189   -- At_EOF --
190   ------------
191
192   function At_EOF return Boolean is
193   begin
194      --  Immediate return (False) if before last character of file
195
196      if P < S'Last then
197         return False;
198
199      --  Special case: DOS EOF character as last character of file is
200      --  allowed and treated as an end of file.
201
202      elsif P = S'Last then
203         return S (P) = EOF;
204
205      --  If beyond last character of file, then definitely at EOF
206
207      else
208         return True;
209      end if;
210   end At_EOF;
211
212   ---------------------
213   -- Check_File_Type --
214   ---------------------
215
216   function Check_File_Type return Character is
217   begin
218      if Check_Token ("spec_file_name") then
219         return 's';
220      elsif Check_Token ("body_file_name") then
221         return 'b';
222      elsif Check_Token ("subunit_file_name") then
223         return 'u';
224      else
225         return ' ';
226      end if;
227   end Check_File_Type;
228
229   ----------------------
230   -- Check_Not_At_EOF --
231   ----------------------
232
233   procedure Check_Not_At_EOF is
234   begin
235      Skip_WS;
236
237      if At_EOF then
238         Error ("unexpected end of file");
239      end if;
240
241      return;
242   end Check_Not_At_EOF;
243
244   -----------------
245   -- Check_Token --
246   -----------------
247
248   function Check_Token (T : String) return Boolean is
249      Save_P : Natural;
250      C : Character;
251
252   begin
253      Skip_WS;
254      Save_P := P;
255
256      for K in T'Range loop
257         if At_EOF then
258            P := Save_P;
259            return False;
260         end if;
261
262         C := S (P);
263
264         if C in 'A' .. 'Z' then
265            C := Character'Val (Character'Pos (C) +
266                                 (Character'Pos ('a') - Character'Pos ('A')));
267         end if;
268
269         if C /= T (K) then
270            P := Save_P;
271            return False;
272         end if;
273
274         P := P + 1;
275      end loop;
276
277      if At_EOF then
278         return True;
279      end if;
280
281      C := S (P);
282
283      if C in '0' .. '9'
284        or else C in 'a' .. 'z'
285        or else C in 'A' .. 'Z'
286        or else C > Character'Val (127)
287      then
288         P := Save_P;
289         return False;
290
291      else
292         return True;
293      end if;
294   end Check_Token;
295
296   -----------
297   -- Error --
298   -----------
299
300   procedure Error (Err : String) is
301      C : Natural := 0;
302      --  Column number
303
304      M : String (1 .. 80);
305      --  Buffer used to build resulting error msg
306
307      LM : Natural := 0;
308      --  Pointer to last set location in M
309
310      procedure Add_Nat (N : Natural);
311      --  Add chars of integer to error msg buffer
312
313      procedure Add_Nat (N : Natural) is
314      begin
315         if N > 9 then
316            Add_Nat (N / 10);
317         end if;
318
319         LM := LM + 1;
320         M (LM) := Character'Val (N mod 10 + Character'Pos ('0'));
321      end Add_Nat;
322
323   --  Start of processing for Error
324
325   begin
326      M (1 .. 9) := "gnat.adc:";
327      LM := 9;
328      Add_Nat (Line_Num);
329      LM := LM + 1;
330      M (LM) := ':';
331
332      --  Determine column number
333
334      for X in Start_Of_Line .. P loop
335         C := C + 1;
336
337         if S (X) = HT then
338            C := (C + 7) / 8 * 8;
339         end if;
340      end loop;
341
342      Add_Nat (C);
343      M (LM + 1) := ':';
344      LM := LM + 1;
345      M (LM + 1) := ' ';
346      LM := LM + 1;
347
348      M (LM + 1 .. LM + Err'Length) := Err;
349      LM := LM + Err'Length;
350
351      Raise_Exception (Syntax_Error_In_GNAT_ADC'Identity, M (1 .. LM));
352   end Error;
353
354   -------------------
355   -- Require_Token --
356   -------------------
357
358   procedure Require_Token (T : String) is
359      SaveP : Natural;
360
361   begin
362      Skip_WS;
363      SaveP := P;
364
365      for J in T'Range loop
366
367         if At_EOF or else S (P) /= T (J) then
368            declare
369               S : String (1 .. T'Length + 10);
370
371            begin
372               S (1 .. 9) := "missing """;
373               S (10 .. T'Length + 9) := T;
374               S (T'Length + 10) := '"';
375               P := SaveP;
376               Error (S);
377            end;
378
379         else
380            P := P + 1;
381         end if;
382      end loop;
383   end Require_Token;
384
385   ----------------------
386   -- Scan_SFN_Pragmas --
387   ----------------------
388
389   procedure Scan_SFN_Pragmas
390     (Source   : String;
391      SFN_Ptr  : Set_File_Name_Ptr;
392      SFNP_Ptr : Set_File_Name_Pattern_Ptr)
393   is
394      B, E : Natural;
395      Typ  : Character;
396      Cas  : Character;
397
398   begin
399      Line_Num := 1;
400      S := Source'Unrestricted_Access;
401      P := Source'First;
402      Start_Of_Line := P;
403
404      --  Loop through pragmas in file
405
406      Main_Scan_Loop : loop
407         Skip_WS;
408         exit Main_Scan_Loop when At_EOF;
409
410         --  Error if something other than pragma
411
412         if not Check_Token ("pragma") then
413            Error ("non pragma encountered");
414         end if;
415
416         --  Source_File_Name pragma case
417
418         if Check_Token ("source_file_name") then
419            Require_Token ("(");
420
421            Typ := Check_File_Type;
422
423            --  First format, with unit name first
424
425            if Typ = ' ' then
426               if Check_Token ("unit_name") then
427                  Require_Token ("=>");
428               end if;
429
430               declare
431                  U : constant String := Acquire_Unit_Name;
432
433               begin
434                  Require_Token (",");
435                  Typ := Check_File_Type;
436
437                  if Typ /= 's' and then Typ /= 'b' then
438                     Error ("bad pragma");
439                  end if;
440
441                  Require_Token ("=>");
442                  Scan_String (B, E);
443
444                  declare
445                     F : constant String := Acquire_String (B, E);
446
447                  begin
448                     Require_Token (")");
449                     Require_Token (";");
450                     SFN_Ptr.all (Typ, U, F);
451                  end;
452               end;
453
454            --  Second format with pattern string
455
456            else
457               Require_Token ("=>");
458               Scan_String (B, E);
459
460               declare
461                  Pat : constant String := Acquire_String (B, E);
462                  Nas : Natural := 0;
463
464               begin
465                  --  Check exactly one asterisk
466
467                  for J in Pat'Range loop
468                     if Pat (J) = '*' then
469                        Nas := Nas + 1;
470                     end if;
471                  end loop;
472
473                  if Nas /= 1 then
474                     Error ("** not allowed");
475                  end if;
476
477                  B := 0;
478                  E := 0;
479                  Cas := ' ';
480
481                  --  Loop to scan out Casing or Dot_Replacement parameters
482
483                  loop
484                     Check_Not_At_EOF;
485                     exit when S (P) = ')';
486                     Require_Token (",");
487
488                     if Check_Token ("casing") then
489                        Require_Token ("=>");
490
491                        if Cas /= ' ' then
492                           Error ("duplicate casing argument");
493                        elsif Check_Token ("lowercase") then
494                           Cas := 'l';
495                        elsif Check_Token ("uppercase") then
496                           Cas := 'u';
497                        elsif Check_Token ("mixedcase") then
498                           Cas := 'm';
499                        else
500                           Error ("invalid casing argument");
501                        end if;
502
503                     elsif Check_Token ("dot_replacement") then
504                        Require_Token ("=>");
505
506                        if E /= 0 then
507                           Error ("duplicate dot_replacement");
508                        else
509                           Scan_String (B, E);
510                        end if;
511
512                     else
513                        Error ("invalid argument");
514                     end if;
515                  end loop;
516
517                  Require_Token (")");
518                  Require_Token (";");
519
520                  if Cas = ' ' then
521                     Cas := 'l';
522                  end if;
523
524                  if E = 0 then
525                     SFNP_Ptr.all (Pat, Typ, ".", Cas);
526
527                  else
528                     declare
529                        Dot : constant String := Acquire_String (B, E);
530
531                     begin
532                        SFNP_Ptr.all (Pat, Typ, Dot, Cas);
533                     end;
534                  end if;
535               end;
536            end if;
537
538         --  Some other pragma, scan to semicolon at end of pragma
539
540         else
541            Skip_Loop : loop
542               exit Main_Scan_Loop when At_EOF;
543               exit Skip_Loop when S (P) = ';';
544
545               if S (P) = '"' or else S (P) = '%' then
546                  Scan_String (B, E);
547               else
548                  P := P + 1;
549               end if;
550            end loop Skip_Loop;
551
552            --  We successfuly skipped to semicolon, so skip past it
553
554            P := P + 1;
555         end if;
556      end loop Main_Scan_Loop;
557
558   exception
559      when others =>
560         Cursor := P - S'First + 1;
561         raise;
562   end Scan_SFN_Pragmas;
563
564   -----------------
565   -- Scan_String --
566   -----------------
567
568   procedure Scan_String (B : out Natural; E : out Natural) is
569      Q : Character;
570
571   begin
572      Check_Not_At_EOF;
573
574      if S (P) = '"' then
575         Q := '"';
576      elsif S (P) = '%' then
577         Q := '%';
578      else
579         Error ("bad string");
580         Q := '"';
581      end if;
582
583      --  Scan out the string, B points to first char
584
585      B := P;
586      P := P + 1;
587
588      loop
589         if At_EOF or else S (P) = LF or else S (P) = CR then
590            Error ("missing string quote");
591
592         elsif S (P) = HT then
593            Error ("tab character in string");
594
595         elsif S (P) /= Q then
596            P := P + 1;
597
598         --  We have a quote
599
600         else
601            P := P + 1;
602
603            --  Check for doubled quote
604
605            if not At_EOF and then S (P) = Q then
606               P := P + 1;
607
608            --  Otherwise this is the terminating quote
609
610            else
611               E := P - 1;
612               return;
613            end if;
614         end if;
615      end loop;
616   end Scan_String;
617
618   -------------
619   -- Skip_WS --
620   -------------
621
622   procedure Skip_WS is
623   begin
624      WS_Scan : while not At_EOF loop
625         case S (P) is
626
627            --  End of physical line
628
629            when CR | LF =>
630               Line_Num := Line_Num + 1;
631               P := P + 1;
632
633               while not At_EOF
634                 and then (S (P) = CR or else S (P) = LF)
635               loop
636                  Line_Num := Line_Num + 1;
637                  P := P + 1;
638               end loop;
639
640               Start_Of_Line := P;
641
642            --  All other cases of white space characters
643
644            when ' ' | FF | VT | HT =>
645               P := P + 1;
646
647            --  Comment
648
649            when '-' =>
650               P := P + 1;
651
652               if At_EOF then
653                  Error ("bad comment");
654
655               elsif S (P) = '-' then
656                  P := P + 1;
657
658                  while not At_EOF loop
659                     case S (P) is
660                        when CR | LF | FF | VT =>
661                           exit;
662                        when others =>
663                           P := P + 1;
664                     end case;
665                  end loop;
666
667               else
668                  P := P - 1;
669                  exit WS_Scan;
670               end if;
671
672            when others =>
673               exit WS_Scan;
674
675         end case;
676      end loop WS_Scan;
677   end Skip_WS;
678
679end SFN_Scan;
680