1-------------------------------------------------------------------------------
2--
3--  This file is part of AdaBrowse.
4--
5-- <STRONG>Copyright (c) 2002 by Thomas Wolf.</STRONG>
6-- <BLOCKQUOTE>
7--    AdaBrowse is free software; you can redistribute it and/or modify it
8--    under the terms of the  GNU General Public License as published by the
9--    Free Software  Foundation; either version 2, or (at your option) any
10--    later version. AdaBrowse is distributed in the hope that it will be
11--    useful, but <EM>without any warranty</EM>; without even the implied
12--    warranty of <EM>merchantability or fitness for a particular purpose.</EM>
13--    See the GNU General Public License for  more details. You should have
14--    received a copy of the GNU General Public License with this distribution,
15--    see file "<A HREF="GPL.txt">GPL.txt</A>". If not, write to the Free
16--    Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
17--    USA.
18-- </BLOCKQUOTE>
19--
20-- <DL><DT><STRONG>
21-- Author:</STRONG><DD>
22--   Thomas Wolf  (TW)
23--   <ADDRESS><A HREF="mailto:twolf@acm.org">twolf@acm.org</A></ADDRESS></DL>
24--
25-- <DL><DT><STRONG>
26-- Purpose:</STRONG><DD>
27--   Simplified Ada 95 parser. Parses the source until it finds the
28--   name of the library unit declaration. Note: this parser (and its
29--   scanner!) doesn't need to be hyper-fast, it'll only be used for
30--   krunched file names, and then parse the file only up to the
31--   unit name.</DL>
32--
33-- <!--
34-- Revision History
35--
36--   26-MAR-2002   TW  Initial version.
37--   21-JUN-2002   TW  Uses Util.Text now instead of Ada.Strings.Unbounded.
38-- -->
39-------------------------------------------------------------------------------
40
41pragma License (GPL);
42
43with Ada.Strings.Maps;
44with Ada.Text_IO;
45
46with Util.Files.Text_IO;
47with Util.Strings;
48with Util.Text.Internal;
49
50pragma Elaborate_All (Util.Files.Text_IO);
51pragma Elaborate_All (Util.Text);
52
53package body AD.Parse is
54
55   package UT renames Util.Text;
56
57   ----------------------------------------------------------------------------
58   --  Scanning routines. This is a very simple, line-based scanner. Not
59   --  particularly efficient, but does the job nicely.
60
61   package Scanner is
62
63      type Token is
64        (Other_Token,
65         Left_Paren_Token, Right_Paren_Token, Semicolon_Token, Period_Token,
66         With_Token, Use_Token, Pragma_Token, Type_Token, Package_Token,
67         Procedure_Token, Function_Token, Is_Token, New_Token, Return_Token,
68         Private_Token, Generic_Token, Name_Token, String_Token);
69
70      procedure Init (File_Name : in String);
71
72      procedure Advance;
73
74      function Current_Token return Token;
75
76      function Image   return UT.Unbounded_String;
77
78      procedure Close;
79
80      Scan_Error : exception;
81
82   private
83
84      pragma Inline (Current_Token);
85
86   end Scanner;
87
88   package body Scanner is
89
90      use Util.Strings;
91
92      F : Ada.Text_IO.File_Type;
93
94      function Ada_Skip_String
95        (S     : in String;
96         Delim : in Character)
97        return Natural
98      is
99      begin
100         return Skip_String (S, Delim, Delim);
101      end Ada_Skip_String;
102
103      function Get_Line is
104         new Util.Files.Text_IO.Next_Line
105               (Line_Continuation => "",
106                Comment_Start     => "--",
107                Delimiters        => Ada.Strings.Maps.To_Set ('"'),
108                Strings           => Ada_Skip_String);
109      --  Note: we only need to handle the double quote as a string delimiter,
110      --  for "--" can only occur in strings, but never in character literals.
111      --  Hence it isn't necessary to handle the single quote at all here.
112
113      Curr_Line : UT.Unbounded_String;
114      Curr      : UT.String_Access;
115      Curr_Idx  : Natural;
116
117      Curr_Token  : Token := Other_Token;
118      Token_Image : UT.Unbounded_String;
119      Token_Ptr   : UT.String_Access;
120      --  Set for 'Name_Token' and 'String_Token'; in the latter case, it
121      --  also contains the delimiting double quotes.
122
123      procedure Load_Line
124      is
125      begin
126         UT.Set (Curr_Line, Get_Line (F));
127         Curr      := UT.Internal.Get_Ptr (Curr_Line);
128         Curr_Idx  := 1;
129         if Curr_Idx > Curr'Last then
130            raise Scan_Error;
131         end if;
132      end Load_Line;
133
134      function Find_Token
135        return Token
136      is
137      begin
138         case Token_Ptr (Token_Ptr'First) is
139            when 'f' | 'F' =>
140               if To_Lower (Token_Ptr.all) = "function" then
141                  return Function_Token;
142               end if;
143            when 'g' | 'G' =>
144               if To_Lower (Token_Ptr.all) = "generic" then
145                  return Generic_Token;
146               end if;
147            when 'i' | 'I' =>
148               if To_Lower (Token_Ptr.all) = "is" then
149                  return Is_Token;
150               end if;
151            when 'n' | 'N' =>
152               if To_Lower (Token_Ptr.all) = "new" then
153                  return New_Token;
154               end if;
155            when 'p' | 'P' =>
156               declare
157                  S : constant String := To_Lower (Token_Ptr.all);
158               begin
159                  if S = "package" then
160                     return Package_Token;
161                  elsif S = "pragma" then
162                     return Pragma_Token;
163                  elsif S = "private" then
164                     return Private_Token;
165                  elsif S = "procedure" then
166                     return Procedure_Token;
167                  end if;
168               end;
169            when 'r' | 'R' =>
170               if To_Lower (Token_Ptr.all) = "return" then
171                  return Return_Token;
172               end if;
173            when 't' | 'T' =>
174               if To_Lower (Token_Ptr.all) = "type" then
175                  return Type_Token;
176               end if;
177            when 'u' | 'U' =>
178               if To_Lower (Token_Ptr.all) = "use" then
179                  return Use_Token;
180               end if;
181            when 'w' | 'W' =>
182               if To_Lower (Token_Ptr.all) = "with" then
183                  return With_Token;
184               end if;
185            when others =>
186               null;
187         end case;
188         return Name_Token;
189      end Find_Token;
190
191      Numeral          : constant Ada.Strings.Maps.Character_Set :=
192        Ada.Strings.Maps.To_Set ("0123456789_");
193
194      Based_Numeral    : constant Ada.Strings.Maps.Character_Set :=
195        Ada.Strings.Maps.To_Set ("0123456789_ABCDEFabcdef");
196
197      procedure Advance
198      is
199      begin
200         if Curr_Idx > Curr'Last then Load_Line; end if;
201         declare
202            Ch : Character := Curr (Curr_Idx);
203         begin
204            while Is_Blank (Ch) loop
205               Curr_Idx := Curr_Idx + 1;
206               if Curr_Idx > Curr'Last then
207                  Load_Line; Curr_Idx := 1;
208               end if;
209               Ch := Curr (Curr_Idx);
210            end loop;
211            case Ch is
212               when '(' =>
213                  Curr_Token := Left_Paren_Token;
214
215               when ')' =>
216                  Curr_Token := Right_Paren_Token;
217
218               when ';' =>
219                  Curr_Token := Semicolon_Token;
220
221               when '.' =>
222                  Curr_Token := Period_Token;
223
224               when 'A' .. 'Z' | 'a' .. 'z' =>
225                  --  Parse a name: any sequence of characters, digits, and
226                  --  underscores.
227                  declare
228                     Stop_Idx : constant Natural :=
229                       Identifier (Curr (Curr_Idx .. Curr'Last));
230                  begin
231                     UT.Set (Token_Image, Curr (Curr_Idx .. Stop_Idx));
232                     Token_Ptr := UT.Internal.Get_Ptr (Token_Image);
233                     Curr_Idx := Stop_Idx;
234                  end;
235                  Curr_Token := Find_Token;
236
237               when ''' =>
238                  if Curr_Idx + 2 <= Curr'Last and then
239                     Curr (Curr_Idx + 2) = '''
240                  then
241                     Curr_Idx := Curr_Idx + 2;
242                  end if;
243                  Curr_Token := Other_Token;
244
245               when '"' =>
246                  --  Skip a string.
247                  declare
248                     Stop_Idx : constant Natural :=
249                       Ada_Skip_String (Curr (Curr_Idx .. Curr'Last), '"');
250                  begin
251                     if Stop_Idx = 0 then
252                        raise Scan_Error;
253                     end if;
254                     UT.Set (Token_Image, Curr (Curr_Idx .. Stop_Idx));
255                     Token_Ptr := UT.Internal.Get_Ptr (Token_Image);
256                     Curr_Idx := Stop_Idx;
257                     Curr_Token := String_Token;
258                  end;
259
260               when '0' .. '9' =>
261                  --  Skip a number. Note: use a simplified syntax!
262                  declare
263                     Stop_Idx : Natural := Curr_Idx;
264                  begin
265                     while Stop_Idx <= Curr'Last and then
266                           Is_In (Numeral, Curr (Stop_Idx))
267                     loop
268                        Stop_Idx := Stop_Idx + 1;
269                     end loop;
270                     if Stop_Idx <= Curr'Last then
271                        if Curr (Stop_Idx) = '#' then
272                           Stop_Idx := Stop_Idx + 1;
273                           --  Actually, there must be at least one digit, and
274                           --  at most one period.
275                           while Stop_Idx <= Curr'Last and then
276                                 Is_In (Based_Numeral, Curr (Stop_Idx))
277                           loop
278                              Stop_Idx := Stop_Idx + 1;
279                           end loop;
280                           if Stop_Idx <= Curr'Last and then
281                              Curr (Stop_Idx) = '#'
282                           then
283                              Stop_Idx := Stop_Idx + 1;
284                           else
285                              raise Scan_Error;
286                           end if;
287                        elsif Curr (Stop_Idx) = '.' then
288                           Stop_Idx := Stop_Idx + 1;
289                           --  Actually, there must be at least one digit.
290                           while Stop_Idx <= Curr'Last and then
291                                 Is_In (Numeral, Curr (Stop_Idx))
292                           loop
293                              Stop_Idx := Stop_Idx + 1;
294                           end loop;
295                        end if; --  Fraction or Based
296                     end if;
297                     if Stop_Idx <= Curr'Last and then
298                        Curr (Stop_Idx) = 'E'
299                     then
300                        Stop_Idx := Stop_Idx + 1;
301                        if Stop_Idx > Curr'Last then raise Scan_Error; end if;
302                        case Curr (Stop_Idx) is
303                           when '0' .. '9' =>
304                              null;
305                           when '+' | '-' =>
306                              Stop_Idx := Stop_Idx + 1;
307                              if Stop_Idx > Curr'Last then
308                                 raise Scan_Error;
309                              end if;
310                           when others =>
311                              raise Scan_Error;
312                        end case;
313                        --  Actually, there must be at least one digit now.
314                        while Stop_Idx <= Curr'Last and then
315                              Is_In (Numeral, Curr (Stop_Idx))
316                        loop
317                           Stop_Idx := Stop_Idx + 1;
318                        end loop;
319                     end if; --  Exponent
320                     Curr_Idx := Stop_Idx - 1;
321                  end;
322                  Curr_Token := Other_Token;
323
324               when others =>
325                  Curr_Token := Other_Token;
326
327            end case;
328            Curr_Idx := Curr_Idx + 1;
329         end;
330      end Advance;
331
332      function Current_Token
333        return Token
334      is
335      begin
336         return Curr_Token;
337      end Current_Token;
338
339      function Image
340        return UT.Unbounded_String
341      is
342      begin
343         if Curr_Token = Name_Token or else
344            Curr_Token = String_Token
345         then
346            return Token_Image;
347         else
348            return UT.Null_Unbounded_String;
349         end if;
350      end Image;
351
352      procedure Init
353        (File_Name : in String)
354      is
355      begin
356         Ada.Text_IO.Open (F, Ada.Text_IO.In_File, File_Name);
357         Load_Line;
358         Advance;
359      end Init;
360
361      procedure Close
362      is
363      begin
364         if Ada.Text_IO.Is_Open (F) then
365            Ada.Text_IO.Close (F);
366         end if;
367      end Close;
368
369   end Scanner;
370
371   ----------------------------------------------------------------------------
372   --  Parsing routines. This is a very simple recursive descent parser, yet
373   --  it recognizes syntactically correct Ada 95 library unit headers up
374   --  to the library unit name. It doesn't do any error recovery, and it
375   --  skips source chunks that are not interesting. The sole purpose of this
376   --  is to get the name of the library unit, not any syntax or semantics
377   --  checking.
378
379   package Parser is
380
381      function Library_Unit
382        return String;
383
384      Parse_Error : exception;
385
386   end Parser;
387
388   package body Parser is
389
390      use Scanner;
391
392      procedure Skip_Parentheses
393      is
394         Level   : Natural := 0;
395      begin
396         loop
397            case Current_Token is
398               when Left_Paren_Token =>
399                  Level := Level + 1;
400
401               when Right_Paren_Token =>
402                  Level := Level - 1;
403
404               when others =>
405                  null;
406
407            end case;
408            Advance;
409            exit when Level = 0;
410         end loop;
411      end Skip_Parentheses;
412
413      procedure Skip_To_Semicolon
414      is
415      begin
416         while Current_Token /= Semicolon_Token loop
417            Advance;
418         end loop;
419      end Skip_To_Semicolon;
420
421      procedure Skip_To_Semicolon_Nested
422      is
423      begin
424         while Current_Token /= Semicolon_Token loop
425            if Current_Token = Left_Paren_Token then
426               Skip_Parentheses;
427            else
428               Advance;
429            end if;
430         end loop;
431      end Skip_To_Semicolon_Nested;
432
433      procedure Context_Clauses
434      is
435      begin
436         loop
437            case Current_Token is
438               when With_Token | Use_Token =>
439                  Skip_To_Semicolon;
440
441               when Pragma_Token =>
442                  Skip_To_Semicolon_Nested;
443
444               when others =>
445                  exit;
446
447            end case;
448            --  Skip the semicolon.
449            Advance;
450         end loop;
451      end Context_Clauses;
452
453      procedure Generic_Formals
454      is
455      begin
456         loop
457            case Current_Token is
458               when Pragma_Token =>
459                  --  Just to be on the safe side: allow pragmas in the generic
460                  --  formal part.
461                  Skip_To_Semicolon_Nested;
462
463               when Use_Token =>
464                  Skip_To_Semicolon;
465
466               when Type_Token =>
467                  --  Generic formal type.
468                  Advance;
469                  if Current_Token /= Name_Token then
470                     raise Parse_Error;
471                  end if;
472                  Advance;
473                  if Current_Token = Left_Paren_Token then
474                     --  Discriminants.
475                     Skip_Parentheses;
476                  end if;
477                  if Current_Token /= Is_Token then
478                     raise Parse_Error;
479                  end if;
480                  Skip_To_Semicolon;
481
482               when With_Token =>
483                  --  Generic formal subprogram or formal package.
484                  Advance;
485                  case Current_Token is
486                     when Package_Token =>
487                        Advance;
488                        if Current_Token /= Name_Token then
489                           raise Parse_Error;
490                        end if;
491                        Advance;
492                        if Current_Token /= Is_Token then
493                           raise Parse_Error;
494                        end if;
495                        Advance;
496                        if Current_Token /= New_Token then
497                           raise Parse_Error;
498                        end if;
499                        Advance;
500                        if Current_Token /= Name_Token then
501                           raise Parse_Error;
502                        end if;
503                        Advance;
504                        --  It may be an expanded name (Package.Name).
505                        while Current_Token = Period_Token loop
506                           Advance;
507                           if Current_Token /= Name_Token then
508                              raise Parse_Error;
509                           end if;
510                           Advance;
511                        end loop;
512                        if Current_Token = Left_Paren_Token then
513                           --  Generic actual part.
514                           Skip_Parentheses;
515                        end if;
516                        Skip_To_Semicolon;
517
518                     when Procedure_Token | Function_Token =>
519                        declare
520                           Initial : constant Token := Current_Token;
521                        begin
522                           Advance;
523                           if Current_Token /= Name_Token and then
524                              (Initial /= Function_Token or else
525                               Current_Token /= String_Token)
526                           then
527                              raise Parse_Error;
528                           end if;
529                           Advance;
530                           if Current_Token = Left_Paren_Token then
531                              --  Parameter specifications.
532                              Skip_Parentheses;
533                           end if;
534                           if Initial = Function_Token then
535                              --  Return type
536                              if Current_Token /= Return_Token then
537                                 raise Parse_Error;
538                              end if;
539                              Advance;
540                              if Current_Token /= Name_Token then
541                                 raise Parse_Error;
542                              end if;
543                              Advance;
544                           end if;
545                           Skip_To_Semicolon;
546                        end;
547
548                     when others =>
549                        raise Parse_Error;
550
551                  end case;
552
553               when Name_Token =>
554                  --  Generic formal object. Skip to first semicolon not within
555                  --  parentheses.
556                  Skip_To_Semicolon_Nested;
557
558               when Package_Token | Procedure_Token | Function_Token =>
559                  exit;
560
561               when others =>
562                  raise Parse_Error;
563
564            end case;
565            if Current_Token /= Semicolon_Token then
566               raise Parse_Error;
567            end if;
568            --  Skip the semicolon.
569            Advance;
570         end loop;
571      end Generic_Formals;
572
573      function Library_Unit
574        return String
575      is
576      begin
577         Context_Clauses;
578         if Current_Token = Private_Token then Advance; end if;
579         if Current_Token = Generic_Token then
580            Advance;
581            Generic_Formals;
582         end if;
583         case Current_Token is
584            when Package_Token | Procedure_Token | Function_Token =>
585               declare
586                  Initial   : constant Token := Current_Token;
587                  Unit_Name : UT.Unbounded_String;
588               begin
589                  --  Next one must be the unit name.
590                  Advance;
591                  if Current_Token = Name_Token or else
592                     (Initial = Function_Token and then
593                      Current_Token = String_Token)
594                  then
595                     Unit_Name := Image;
596                     declare
597                        Last_Token : Token := Current_Token;
598                     begin
599                        Advance;
600                        while Current_Token = Period_Token loop
601                           Advance;
602                           if Last_Token /= Name_Token then
603                              raise Parse_Error;
604                           end if;
605                           if Current_Token = Name_Token or else
606                              (Initial = Function_Token and then
607                               Current_Token = String_Token)
608                           then
609                              UT.Append (Unit_Name, '.');
610                              UT.Append (Unit_Name, Image);
611                              Last_Token := Current_Token;
612                              Advance;
613                           else
614                              raise Parse_Error;
615                           end if;
616                        end loop;
617                     end;
618                  else
619                     raise Parse_Error;
620                  end if;
621                  return UT.To_String (Unit_Name);
622               end;
623
624            when others =>
625               null;
626
627         end case;
628         return "";
629      end Library_Unit;
630
631   end Parser;
632
633   ----------------------------------------------------------------------------
634   --  Exported routines.
635
636   function Get_Unit_Name
637     (File_Name : in String)
638     return String
639   is
640   begin
641      Scanner.Init (File_Name);
642      declare
643         Unit_Name : constant String := Parser.Library_Unit;
644      begin
645         Scanner.Close;
646         return Unit_Name;
647      end;
648   exception
649      when others =>
650         Scanner.Close;
651         return "";
652   end Get_Unit_Name;
653
654end AD.Parse;
655