1------------------------------------------------------------------------------
2--                                                                          --
3--                         GNAT COMPILER COMPONENTS                         --
4--                                                                          --
5--                               S I N P U T                                --
6--                                                                          --
7--                                 S p e c                                  --
8--                                                                          --
9--          Copyright (C) 1992-2018, Free Software Foundation, Inc.         --
10--                                                                          --
11-- GNAT is free software;  you can  redistribute it  and/or modify it under --
12-- terms of the  GNU General Public License as published  by the Free Soft- --
13-- ware  Foundation;  either version 3,  or (at your option) any later ver- --
14-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
15-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
16-- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
17--                                                                          --
18-- As a special exception under Section 7 of GPL version 3, you are granted --
19-- additional permissions described in the GCC Runtime Library Exception,   --
20-- version 3.1, as published by the Free Software Foundation.               --
21--                                                                          --
22-- You should have received a copy of the GNU General Public License and    --
23-- a copy of the GCC Runtime Library Exception along with this program;     --
24-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
25-- <http://www.gnu.org/licenses/>.                                          --
26--                                                                          --
27-- GNAT was originally developed  by the GNAT team at  New York University. --
28-- Extensive contributions were provided by Ada Core Technologies Inc.      --
29--                                                                          --
30------------------------------------------------------------------------------
31
32--  This package contains the input routines used for reading the
33--  input source file. The actual I/O routines are in OS_Interface,
34--  with this module containing only the system independent processing.
35
36--  General Note: throughout the compiler, we use the term line or source
37--  line to refer to a physical line in the source, terminated by the end of
38--  physical line sequence.
39
40--  There are two distinct concepts of line terminator in GNAT
41
42--    A logical line terminator is what corresponds to the "end of a line" as
43--    described in RM 2.2 (13). Any of the characters FF, LF, CR or VT or any
44--    wide character that is a Line or Paragraph Separator acts as an end of
45--    logical line in this sense, and it is essentially irrelevant whether one
46--    or more appears in sequence (since if a sequence of such characters is
47--    regarded as separate ends of line, then the intervening logical lines
48--    are null in any case).
49
50--    A physical line terminator is a sequence of format effectors that is
51--    treated as ending a physical line. Physical lines have no Ada semantic
52--    significance, but they are significant for error reporting purposes,
53--    since errors are identified by line and column location.
54
55--  In GNAT, a physical line is ended by any of the sequences LF, CR/LF, or
56--  CR. LF is used in typical Unix systems, CR/LF in DOS systems, and CR
57--  alone in System 7. In addition, we recognize any of these sequences in
58--  any of the operating systems, for better behavior in treating foreign
59--  files (e.g. a Unix file with LF terminators transferred to a DOS system).
60--  Finally, wide character codes in categories Separator, Line and Separator,
61--  Paragraph are considered to be physical line terminators.
62
63with Alloc;
64with Casing; use Casing;
65with Namet;  use Namet;
66with System;
67with Table;
68with Types;  use Types;
69
70package Sinput is
71
72   type Type_Of_File is (
73   --  Indicates type of file being read
74
75      Src,
76      --  Normal Ada source file
77
78      Config,
79      --  Configuration pragma file
80
81      Def,
82      --  Preprocessing definition file
83
84      Preproc);
85      --  Source file with preprocessing commands to be preprocessed
86
87   type Instance_Id is new Nat;
88   No_Instance_Id : constant Instance_Id;
89
90   ----------------------------
91   -- Source License Control --
92   ----------------------------
93
94   --  The following type indicates the license state of a source if it
95   --  is known.
96
97   type License_Type is
98     (Unknown,
99      --  Licensing status of this source unit is unknown
100
101      Restricted,
102      --  This is a non-GPL'ed unit that is restricted from depending
103      --  on GPL'ed units (e.g. proprietary code is in this category)
104
105      GPL,
106      --  This file is licensed under the unmodified GPL. It is not allowed
107      --  to depend on Non_GPL units, and Non_GPL units may not depend on
108      --  this source unit.
109
110      Modified_GPL,
111      --  This file is licensed under the GNAT modified GPL (see header of
112      --  This file for wording of the modification). It may depend on other
113      --  Modified_GPL units or on unrestricted units.
114
115      Unrestricted);
116      --  The license on this file is permitted to depend on any other
117      --  units, or have other units depend on it, without violating the
118      --  license of this unit. Examples are public domain units, and
119      --  units defined in the RM).
120
121   --  The above license status is checked when the appropriate check is
122   --  activated and one source depends on another, and the licensing state
123   --  of both files is known:
124
125   --  The prohibited combinations are:
126
127   --    Restricted file may not depend on GPL file
128
129   --    GPL file may not depend on Restricted file
130
131   --    Modified GPL file may not depend on Restricted file
132   --    Modified_GPL file may not depend on GPL file
133
134   --  The reason for the last restriction here is that a client depending
135   --  on a modified GPL file must be sure that the license condition is
136   --  correct considered transitively.
137
138   --  The licensing status is determined either by the presence of a
139   --  specific pragma License, or by scanning the header for a predefined
140   --  statement, or any file if compiling in -gnatg mode.
141
142   -----------------------
143   -- Source File Table --
144   -----------------------
145
146   --  The source file table has an entry for each source file read in for
147   --  this run of the compiler. This table is (default) initialized when
148   --  the compiler is loaded, and simply accumulates entries as compilation
149   --  proceeds and various routines in Sinput and its child packages are
150   --  called to load required source files.
151
152   --  Virtual entries are also created for generic templates when they are
153   --  instantiated, as described in a separate section later on.
154
155   --  In the case where there are multiple main units (e.g. in the case of
156   --  the cross-reference tool), this table is not reset between these units,
157   --  so that a given source file is only read once if it is used by two
158   --  separate main units.
159
160   --  The entries in the table are accessed using a Source_File_Index that
161   --  ranges from 1 to Last_Source_File. Each entry has the following fields.
162
163   --  Note: fields marked read-only are set by Sinput or one of its child
164   --  packages when a source file table entry is created, and cannot be
165   --  subsequently modified, or alternatively are set only by very special
166   --  circumstances, documented in the comments.
167
168   --  File_Name : File_Name_Type (read-only)
169   --    Name of the source file (simple name with no directory information)
170
171   --  Full_File_Name : File_Name_Type (read-only)
172   --    Full file name (full name with directory info), used for generation
173   --    of error messages, etc.
174
175   --  File_Type : Type_Of_File (read-only)
176   --    Indicates type of file (source file, configuration pragmas file,
177   --    preprocessor definition file, preprocessor input file).
178
179   --  Reference_Name : File_Name_Type (read-only)
180   --    Name to be used for source file references in error messages where
181   --    only the simple name of the file is required. Identical to File_Name
182   --    unless pragma Source_Reference is used to change it. Only processing
183   --    for the Source_Reference pragma circuit may set this field.
184
185   --  Full_Ref_Name : File_Name_Type (read-only)
186   --    Name to be used for source file references in error messages where
187   --    the full name of the file is required. Identical to Full_File_Name
188   --    unless pragma Source_Reference is used to change it. Only processing
189   --    for the Source_Reference pragma may set this field.
190
191   --  Debug_Source_Name : File_Name_Type (read-only)
192   --    Name to be used for source file references in debugging information
193   --    where only the simple name of the file is required. Identical to
194   --    Reference_Name unless the -gnatD (debug source file) switch is used.
195   --    Only processing in Sprint that generates this file is permitted to
196   --    set this field.
197
198   --  Full_Debug_Name : File_Name_Type (read-only)
199   --    Name to be used for source file references in debugging information
200   --    where the full name of the file is required. This is identical to
201   --    Full_Ref_Name unless the -gnatD (debug source file) switch is used.
202   --    Only processing in Sprint that generates this file is permitted to
203   --    set this field.
204
205   --  Instance : Instance_Id (read-only)
206   --    For entries corresponding to a generic instantiation, unique
207   --    identifier denoting the full chain of nested instantiations. Set to
208   --    No_Instance_Id for the case of a normal, non-instantiation entry.
209   --    See below for details on the handling of generic instantiations.
210
211   --  License : License_Type;
212   --    License status of source file
213
214   --  Num_SRef_Pragmas : Nat;
215   --    Number of source reference pragmas present in source file
216
217   --  First_Mapped_Line : Logical_Line_Number;
218   --    This field stores logical line number of the first line in the
219   --    file that is not a Source_Reference pragma. If no source reference
220   --    pragmas are used, then the value is set to No_Line_Number.
221
222   --  Source_Text : Source_Buffer_Ptr (read-only)
223   --    Text of source file. Every source file has a distinct set of
224   --    nonoverlapping bounds, so it is possible to determine which
225   --    file is referenced from a given subscript (Source_Ptr) value.
226
227   --  Source_First : Source_Ptr; (read-only)
228   --    This is always equal to Source_Text'First, except during
229   --    construction of a debug output file (*.dg), when Source_Text = null,
230   --    and Source_First is the size so far. Likewise for Last.
231
232   --  Source_Last : Source_Ptr; (read-only)
233   --    Same idea as Source_Last, but for Last
234
235   --  Time_Stamp : Time_Stamp_Type; (read-only)
236   --    Time stamp of the source file
237
238   --  Source_Checksum : Word;
239   --    Computed checksum for contents of source file. See separate section
240   --    later on in this spec for a description of the checksum algorithm.
241
242   --  Last_Source_Line : Physical_Line_Number;
243   --    Physical line number of last source line. While a file is being
244   --    read, this refers to the last line scanned. Once a file has been
245   --    completely scanned, it is the number of the last line in the file,
246   --    and hence also gives the number of source lines in the file.
247
248   --  Keyword_Casing : Casing_Type;
249   --    Casing style used in file for keyword casing. This is initialized
250   --    to Unknown, and then set from the first occurrence of a keyword.
251   --    This value is used only for formatting of error messages.
252
253   --  Identifier_Casing : Casing_Type;
254   --    Casing style used in file for identifier casing. This is initialized
255   --    to Unknown, and then set from an identifier in the program as soon as
256   --    one is found whose casing is sufficiently clear to make a decision.
257   --    This value is used for formatting of error messages, and also is used
258   --    in the detection of keywords misused as identifiers.
259
260   --  Inlined_Call : Source_Ptr;
261   --    Source file location of the subprogram call if this source file entry
262   --    represents an inlined body or an inherited pragma. Set to No_Location
263   --    otherwise. This field is read-only for clients.
264
265   --  Inlined_Body : Boolean;
266   --    This can only be set True if Instantiation has a value other than
267   --    No_Location. If true it indicates that the instantiation is actually
268   --    an instance of an inlined body.
269
270   --  Inherited_Pragma : Boolean;
271   --    This can only be set True if Instantiation has a value other than
272   --    No_Location. If true it indicates that the instantiation is actually
273   --    an inherited class-wide pre- or postcondition.
274
275   --  Template : Source_File_Index; (read-only)
276   --    Source file index of the source file containing the template if this
277   --    is a generic instantiation. Set to No_Source_File for the normal case
278   --    of a non-instantiation entry. See Sinput-L for details.
279
280   --  Unit : Unit_Number_Type;
281   --    Identifies the unit contained in this source file. Set by
282   --    Initialize_Scanner, must not be subsequently altered.
283
284   --  The source file table is accessed by clients using the following
285   --  subprogram interface:
286
287   subtype SFI is Source_File_Index;
288
289   System_Source_File_Index : SFI;
290   --  The file system.ads is always read by the compiler to determine the
291   --  settings of the target parameters in the private part of System. This
292   --  variable records the source file index of system.ads. Typically this
293   --  will be 1 since system.ads is read first.
294
295   function Debug_Source_Name (S : SFI) return File_Name_Type;
296   function File_Name         (S : SFI) return File_Name_Type;
297   function File_Type         (S : SFI) return Type_Of_File;
298   function First_Mapped_Line (S : SFI) return Logical_Line_Number;
299   function Full_Debug_Name   (S : SFI) return File_Name_Type;
300   function Full_File_Name    (S : SFI) return File_Name_Type;
301   function Full_Ref_Name     (S : SFI) return File_Name_Type;
302   function Identifier_Casing (S : SFI) return Casing_Type;
303   function Inlined_Body      (S : SFI) return Boolean;
304   function Inherited_Pragma  (S : SFI) return Boolean;
305   function Inlined_Call      (S : SFI) return Source_Ptr;
306   function Instance          (S : SFI) return Instance_Id;
307   function Keyword_Casing    (S : SFI) return Casing_Type;
308   function Last_Source_Line  (S : SFI) return Physical_Line_Number;
309   function License           (S : SFI) return License_Type;
310   function Num_SRef_Pragmas  (S : SFI) return Nat;
311   function Reference_Name    (S : SFI) return File_Name_Type;
312   function Source_Checksum   (S : SFI) return Word;
313   function Source_First      (S : SFI) return Source_Ptr;
314   function Source_Last       (S : SFI) return Source_Ptr;
315   function Source_Text       (S : SFI) return Source_Buffer_Ptr;
316   function Template          (S : SFI) return Source_File_Index;
317   function Unit              (S : SFI) return Unit_Number_Type;
318   function Time_Stamp        (S : SFI) return Time_Stamp_Type;
319
320   procedure Set_Keyword_Casing    (S : SFI; C : Casing_Type);
321   procedure Set_Identifier_Casing (S : SFI; C : Casing_Type);
322   procedure Set_License           (S : SFI; L : License_Type);
323   procedure Set_Unit              (S : SFI; U : Unit_Number_Type);
324
325   function Last_Source_File return Source_File_Index;
326   --  Index of last source file table entry
327
328   function Num_Source_Files return Nat;
329   --  Number of source file table entries
330
331   procedure Initialize;
332   --  Initialize internal tables
333
334   procedure Lock;
335   --  Lock internal tables
336
337   procedure Unlock;
338   --  Unlock internal tables
339
340   Main_Source_File : Source_File_Index := No_Source_File;
341   --  This is set to the source file index of the main unit
342
343   -----------------------
344   -- Checksum Handling --
345   -----------------------
346
347   --  As a source file is scanned, a checksum is computed by taking all the
348   --  non-blank characters in the file, excluding comment characters, the
349   --  minus-minus sequence starting a comment, and all control characters
350   --  except ESC.
351
352   --  The checksum algorithm used is the standard CRC-32 algorithm, as
353   --  implemented by System.CRC32, except that we do not bother with the
354   --  final XOR with all 1 bits.
355
356   --  This algorithm ensures that the checksum includes all semantically
357   --  significant aspects of the program represented by the source file,
358   --  but is insensitive to layout, presence or contents of comments, wide
359   --  character representation method, or casing conventions outside strings.
360
361   --  Scans.Checksum is initialized appropriately at the start of scanning
362   --  a file, and copied into the Source_Checksum field of the file table
363   --  entry when the end of file is encountered.
364
365   -------------------------------------
366   -- Handling Generic Instantiations --
367   -------------------------------------
368
369   --  As described in Sem_Ch12, a generic instantiation involves making a
370   --  copy of the tree of the generic template. The source locations in
371   --  this tree directly reference the source of the template. However, it
372   --  is also possible to find the location of the instantiation.
373
374   --  This is achieved as follows. When an instantiation occurs, a new entry
375   --  is made in the source file table. The Source_Text of the instantiation
376   --  points to the same Source_Buffer as the Source_Text of the template, but
377   --  with different bounds. The separate range of Sloc values avoids
378   --  confusion, and means that the Sloc values can still be used to uniquely
379   --  identify the source file table entry. See Set_Dope below for the
380   --  low-level trickery that allows two different pointers to point at the
381   --  same array, but with different bounds.
382
383   --  The Instantiation_Id field of this source file index entry, set
384   --  to No_Instance_Id for normal entries, instead contains a value that
385   --  uniquely identifies a particular instantiation, and the associated
386   --  entry in the Instances table. The source location of the instantiation
387   --  can be retrieved using function Instantiation below. In the case of
388   --  nested instantiations, the Instances table can be used to trace the
389   --  complete chain of nested instantiations.
390
391   --  Two routines are used to build the special instance entries in the
392   --  source file table. Create_Instantiation_Source is first called to build
393   --  the virtual source table entry for the instantiation, and then the
394   --  Sloc values in the copy are adjusted using Adjust_Instantiation_Sloc.
395   --  See child unit Sinput.L for details on these two routines.
396
397   generic
398      with procedure Process (Id : Instance_Id; Inst_Sloc : Source_Ptr);
399   procedure Iterate_On_Instances;
400   --  Execute Process for each entry in the instance table
401
402   function Instantiation (S : SFI) return Source_Ptr;
403   --  For a source file entry that represents an inlined body, source location
404   --  of the inlined call. For a source file entry that represents an
405   --  inherited pragma, source location of the declaration to which the
406   --  overriding subprogram for the inherited pragma is attached. Otherwise,
407   --  for a source file entry that represents a generic instantiation, source
408   --  location of the instantiation. Returns No_Location in all other cases.
409
410   -----------------
411   -- Global Data --
412   -----------------
413
414   Current_Source_File : Source_File_Index := No_Source_File;
415   --  Source_File table index of source file currently being scanned.
416   --  Initialized so that some tools (such as gprbuild) can be built with
417   --  -gnatVa and pragma Initialize_Scalars without problems.
418
419   Current_Source_Unit : Unit_Number_Type;
420   --  Unit number of source file currently being scanned. The special value
421   --  of No_Unit indicates that the configuration pragma file is currently
422   --  being scanned (this has no entry in the unit table).
423
424   Source_gnat_adc : Source_File_Index := No_Source_File;
425   --  This is set if a gnat.adc file is present to reference this file
426
427   Source : Source_Buffer_Ptr;
428   --  Current source (copy of Source_File.Table (Current_Source_Unit).Source)
429
430   -----------------------------------------
431   -- Handling of Source Line Terminators --
432   -----------------------------------------
433
434   --  In this section we discuss in detail the issue of terminators used to
435   --  terminate source lines. The RM says that one or more format effectors
436   --  (other than horizontal tab) end a source line, and defines the set of
437   --  such format effectors, but does not talk about exactly how they are
438   --  represented in the source program (since in general the RM is not in
439   --  the business of specifying source program formats).
440
441   --  The type Types.Line_Terminator is defined as a subtype of Character
442   --  that includes CR/LF/VT/FF. The most common line enders in practice
443   --  are CR (some MAC systems), LF (Unix systems), and CR/LF (DOS/Windows
444   --  systems). Any of these sequences is recognized as ending a physical
445   --  source line, and if multiple such terminators appear (e.g. LF/LF),
446   --  then we consider we have an extra blank line.
447
448   --  VT and FF are recognized as terminating source lines, but they are
449   --  considered to end a logical line instead of a physical line, so that
450   --  the line numbering ignores such terminators. The use of VT and FF is
451   --  mandated by the standard, and correctly handled in a conforming manner
452   --  by GNAT, but their use is not recommended.
453
454   --  In addition to the set of characters defined by the type in Types, in
455   --  wide character encoding, then the codes returning True for a call to
456   --  System.UTF_32.Is_UTF_32_Line_Terminator are also recognized as ending a
457   --  source line. This includes the standard codes defined above in addition
458   --  to NEL (NEXT LINE), LINE SEPARATOR and PARAGRAPH SEPARATOR. Again, as in
459   --  the case of VT and FF, the standard requires we recognize these as line
460   --  terminators, but we consider them to be logical line terminators. The
461   --  only physical line terminators recognized are the standard ones (CR,
462   --  LF, or CR/LF).
463
464   --  However, we do not recognize the NEL (16#85#) character as having the
465   --  significance of an end of line character when operating in normal 8-bit
466   --  Latin-n input mode for the compiler. Instead the rule in this mode is
467   --  that all upper half control codes (16#80# .. 16#9F#) are illegal if they
468   --  occur in program text, and are ignored if they appear in comments.
469
470   --  First, note that this behavior is fully conforming with the standard.
471   --  The standard has nothing whatever to say about source representation
472   --  and implementations are completely free to make there own rules. In
473   --  this case, in 8-bit mode, GNAT decides that the 16#0085# character is
474   --  not a representation of the NEL character, even though it looks like it.
475   --  If you have NEL's in your program, which you expect to be treated as
476   --  end of line characters, you must use a wide character encoding such as
477   --  UTF-8 for this code to be recognized.
478
479   --  Second, an explanation of why we take this slightly surprising choice.
480   --  We have never encountered anyone actually using the NEL character to
481   --  end lines. One user raised the issue as a result of some experiments,
482   --  but no one has ever submitted a program encoded this way, in any of
483   --  the possible encodings. It seems that even when using wide character
484   --  codes extensively, the normal approach is to use standard line enders
485   --  (LF or CR/LF). So the failure to recognize NEL in this mode seems to
486   --  have no practical downside.
487
488   --  Moreover, what we have seen in a significant number of programs from
489   --  multiple sources is the practice of writing all program text in lower
490   --  half (ASCII) form, but using UTF-8 encoded wide characters freely in
491   --  comments, where the comments are terminated by normal line endings
492   --  (LF or CR/LF). The comments do not contain NEL codes, but they can and
493   --  do contain other UTF-8 encoding sequences where one of the bytes is the
494   --  NEL code. Now such programs can of course be compiled in UTF-8 mode,
495   --  but in practice they also compile fine in standard 8-bit mode without
496   --  specifying a character encoding. Since this is common practice, it would
497   --  be a significant upwards incompatibility to recognize NEL in 8-bit mode.
498
499   -----------------
500   -- Subprograms --
501   -----------------
502
503   procedure Backup_Line (P : in out Source_Ptr);
504   --  Back up the argument pointer to the start of the previous line. On
505   --  entry, P points to the start of a physical line in the source buffer.
506   --  On return, P is updated to point to the start of the previous line.
507   --  The caller has checked that a Line_Terminator character precedes P so
508   --  that there definitely is a previous line in the source buffer.
509
510   procedure Build_Location_String
511     (Buf : in out Bounded_String;
512      Loc : Source_Ptr);
513   --  This function builds a string literal of the form "name:line", where
514   --  name is the file name corresponding to Loc, and line is the line number.
515   --  If instantiations are involved, additional suffixes of the same form are
516   --  appended after the separating string " instantiated at ". The returned
517   --  string is appended to Buf.
518
519   function Build_Location_String (Loc : Source_Ptr) return String;
520   --  Functional form returning a String
521
522   procedure Check_For_BOM;
523   --  Check if the current source starts with a BOM. Scan_Ptr needs to be at
524   --  the start of the current source. If the current source starts with a
525   --  recognized BOM, then some flags such as Wide_Character_Encoding_Method
526   --  are set accordingly, and the Scan_Ptr on return points past this BOM.
527   --  An error message is output and Unrecoverable_Error raised if an
528   --  unrecognized BOM is detected. The call has no effect if no BOM is found.
529
530   function Get_Column_Number (P : Source_Ptr) return Column_Number;
531   --  The ones-origin column number of the specified Source_Ptr value is
532   --  determined and returned. Tab characters if present are assumed to
533   --  represent the standard 1,9,17.. spacing pattern.
534
535   function Get_Logical_Line_Number
536     (P : Source_Ptr) return Logical_Line_Number;
537   --  The line number of the specified source position is obtained by
538   --  doing a binary search on the source positions in the lines table
539   --  for the unit containing the given source position. The returned
540   --  value is the logical line number, already adjusted for the effect
541   --  of source reference pragmas. If P refers to the line of a source
542   --  reference pragma itself, then No_Line is returned. If no source
543   --  reference pragmas have been encountered, the value returned is
544   --  the same as the physical line number.
545
546   function Get_Logical_Line_Number_Img
547     (P : Source_Ptr) return String;
548   --  Same as above function, but returns the line number as a string of
549   --  decimal digits, with no leading space. Destroys Name_Buffer.
550
551   function Get_Physical_Line_Number
552     (P : Source_Ptr) return Physical_Line_Number;
553   --  The line number of the specified source position is obtained by
554   --  doing a binary search on the source positions in the lines table
555   --  for the unit containing the given source position. The returned
556   --  value is the physical line number in the source being compiled.
557
558   function Get_Source_File_Index (S : Source_Ptr) return Source_File_Index;
559   pragma Inline (Get_Source_File_Index);
560   --  Return file table index of file identified by given source pointer
561   --  value. This call must always succeed, since any valid source pointer
562   --  value belongs to some previously loaded source file.
563
564   function Instantiation_Depth (S : Source_Ptr) return Nat;
565   --  Determine instantiation depth for given Sloc value. A value of
566   --  zero means that the given Sloc is not in an instantiation.
567
568   function Line_Start (P : Source_Ptr) return Source_Ptr;
569   --  Finds the source position of the start of the line containing the
570   --  given source location.
571
572   function Line_Start
573     (L : Physical_Line_Number;
574      S : Source_File_Index) return Source_Ptr;
575   --  Finds the source position of the start of the given line in the
576   --  given source file, using a physical line number to identify the line.
577
578   function Num_Source_Lines (S : Source_File_Index) return Nat;
579   --  Returns the number of source lines (this is equivalent to reading
580   --  the value of Last_Source_Line, but returns Nat rather than a
581   --  physical line number).
582
583   procedure Register_Source_Ref_Pragma
584     (File_Name          : File_Name_Type;
585      Stripped_File_Name : File_Name_Type;
586      Mapped_Line        : Nat;
587      Line_After_Pragma  : Physical_Line_Number);
588   --  Register a source reference pragma, the parameter File_Name is the
589   --  file name from the pragma, and Stripped_File_Name is this name with
590   --  the directory information stripped. Both these parameters are set
591   --  to No_Name if no file name parameter was given in the pragma.
592   --  (which can only happen for the second and subsequent pragmas).
593   --  Mapped_Line is the line number parameter from the pragma, and
594   --  Line_After_Pragma is the physical line number of the line that
595   --  follows the line containing the Source_Reference pragma.
596
597   function Original_Location (S : Source_Ptr) return Source_Ptr;
598   --  Given a source pointer S, returns the corresponding source pointer
599   --  value ignoring instantiation copies. For locations that do not
600   --  correspond to instantiation copies of templates, the argument is
601   --  returned unchanged. For locations that do correspond to copies of
602   --  templates from instantiations, the location within the original
603   --  template is returned. This is useful in canonicalizing locations.
604
605   function Instantiation_Location (S : Source_Ptr) return Source_Ptr;
606   pragma Inline (Instantiation_Location);
607   --  Given a source pointer S, returns the corresponding source pointer
608   --  value of the instantiation if this location is within an instance.
609   --  If S is not within an instance, then this returns No_Location.
610
611   function Comes_From_Inlined_Body (S : Source_Ptr) return Boolean;
612   pragma Inline (Comes_From_Inlined_Body);
613   --  Given a source pointer S, returns whether it comes from an inlined body.
614   --  This allows distinguishing these source pointers from those that come
615   --  from instantiation of generics, since Instantiation_Location returns a
616   --  valid location in both cases.
617
618   function Comes_From_Inherited_Pragma (S : Source_Ptr) return Boolean;
619   pragma Inline (Comes_From_Inherited_Pragma);
620   --  Given a source pointer S, returns whether it comes from an inherited
621   --  pragma. This allows distinguishing these source pointers from those
622   --  that come from instantiation of generics, since Instantiation_Location
623   --  returns a valid location in both cases.
624
625   function Top_Level_Location (S : Source_Ptr) return Source_Ptr;
626   --  Given a source pointer S, returns the argument unchanged if it is
627   --  not in an instantiation. If S is in an instantiation, then it returns
628   --  the location of the top level instantiation, i.e. the outer level
629   --  instantiation in the nested case.
630
631   function Physical_To_Logical
632     (Line : Physical_Line_Number;
633      S    : Source_File_Index) return Logical_Line_Number;
634   --  Given a physical line number in source file whose source index is S,
635   --  return the corresponding logical line number. If the physical line
636   --  number is one containing a Source_Reference pragma, the result will
637   --  be No_Line_Number.
638
639   procedure Skip_Line_Terminators
640     (P        : in out Source_Ptr;
641      Physical : out Boolean);
642   --  On entry, P points to a line terminator that has been encountered,
643   --  which is one of FF,LF,VT,CR or a wide character sequence whose value is
644   --  in category Separator,Line or Separator,Paragraph. P points just past
645   --  the character that was scanned. The purpose of this routine is to
646   --  distinguish physical and logical line endings. A physical line ending
647   --  is one of:
648   --
649   --     CR on its own (MAC System 7)
650   --     LF on its own (Unix and unix-like systems)
651   --     CR/LF (DOS, Windows)
652   --     Wide character in Separator,Line or Separator,Paragraph category
653   --
654   --     Note: we no longer recognize LF/CR (which we did in some earlier
655   --     versions of GNAT. The reason for this is that this sequence is not
656   --     used and recognizing it generated confusion. For example given the
657   --     sequence LF/CR/LF we were interpreting that as (LF/CR) ending the
658   --     first line and a blank line ending with CR following, but it is
659   --     clearly better to interpret this as LF, with a blank line terminated
660   --     by CR/LF, given that LF and CR/LF are both in common use, but no
661   --     system we know of uses LF/CR.
662   --
663   --  A logical line ending (that is not a physical line ending) is one of:
664   --
665   --     VT on its own
666   --     FF on its own
667   --
668   --  On return, P is bumped past the line ending sequence (one of the above
669   --  seven possibilities). Physical is set to True to indicate that a
670   --  physical end of line was encountered, in which case this routine also
671   --  makes sure that the lines table for the current source file has an
672   --  appropriate entry for the start of the new physical line.
673
674   procedure Sloc_Range (N : Node_Id; Min, Max : out Source_Ptr);
675   --  Given a node, returns the minimum and maximum source locations of any
676   --  node in the syntactic subtree for the node. This is not quite the same
677   --  as the locations of the first and last token in the node construct
678   --  because parentheses at the outer level do not have a recorded Sloc.
679   --
680   --  Note: At each step of the tree traversal, we make sure to go back to
681   --  the Original_Node, since this function is concerned about original
682   --  (source) locations.
683   --
684   --  Note: if the tree for the expression contains no "real" Sloc values,
685   --  i.e. values > No_Location, then both Min and Max are set to
686   --  Sloc (Original_Node (N)).
687
688   function Source_Offset (S : Source_Ptr) return Nat;
689   --  Returns the zero-origin offset of the given source location from the
690   --  start of its corresponding unit. This is used for creating canonical
691   --  names in some situations.
692
693   procedure Write_Location (P : Source_Ptr);
694   --  Writes out a string of the form fff:nn:cc, where fff, nn, cc are the
695   --  file name, line number and column corresponding to the given source
696   --  location. No_Location and Standard_Location appear as the strings
697   --  <no location> and <standard location>. If the location is within an
698   --  instantiation, then the instance location is appended, enclosed in
699   --  square brackets (which can nest if necessary). Note that this routine
700   --  is used only for internal compiler debugging output purposes (which
701   --  is why the somewhat cryptic use of brackets is acceptable).
702
703   procedure wl (P : Source_Ptr);
704   pragma Export (Ada, wl);
705   --  Equivalent to Write_Location (P); Write_Eol; for calls from GDB
706
707   procedure Write_Time_Stamp (S : Source_File_Index);
708   --  Writes time stamp of specified file in YY-MM-DD HH:MM.SS format
709
710   procedure Tree_Read;
711   --  Initializes internal tables from current tree file using the relevant
712   --  Table.Tree_Read routines.
713
714   procedure Tree_Write;
715   --  Writes out internal tables to current tree file using the relevant
716   --  Table.Tree_Write routines.
717
718   procedure Clear_Source_File_Table;
719   --  This procedure frees memory allocated in the Source_File table (in the
720   --  private). It should only be used when it is guaranteed that all source
721   --  files that have been loaded so far will not be accessed before being
722   --  reloaded. It is intended for tools that parse several times sources,
723   --  to avoid memory leaks.
724
725private
726   pragma Inline (File_Name);
727   pragma Inline (Full_File_Name);
728   pragma Inline (File_Type);
729   pragma Inline (Reference_Name);
730   pragma Inline (Full_Ref_Name);
731   pragma Inline (Debug_Source_Name);
732   pragma Inline (Full_Debug_Name);
733   pragma Inline (Instance);
734   pragma Inline (License);
735   pragma Inline (Num_SRef_Pragmas);
736   pragma Inline (First_Mapped_Line);
737   pragma Inline (Source_Text);
738   pragma Inline (Source_First);
739   pragma Inline (Source_Last);
740   pragma Inline (Time_Stamp);
741   pragma Inline (Source_Checksum);
742   pragma Inline (Last_Source_Line);
743   pragma Inline (Keyword_Casing);
744   pragma Inline (Identifier_Casing);
745   pragma Inline (Inlined_Call);
746   pragma Inline (Inlined_Body);
747   pragma Inline (Inherited_Pragma);
748   pragma Inline (Template);
749   pragma Inline (Unit);
750
751   pragma Inline (Set_Keyword_Casing);
752   pragma Inline (Set_Identifier_Casing);
753
754   pragma Inline (Last_Source_File);
755   pragma Inline (Num_Source_Files);
756   pragma Inline (Num_Source_Lines);
757
758   pragma Inline (Line_Start);
759
760   No_Instance_Id : constant Instance_Id := 0;
761
762   -------------------------
763   -- Source_Lines Tables --
764   -------------------------
765
766   type Lines_Table_Type is
767     array (Physical_Line_Number) of Source_Ptr;
768   --  Type used for lines table. The entries are indexed by physical line
769   --  numbers. The values are the starting Source_Ptr values for the start
770   --  of the corresponding physical line. Note that we make this a bogus
771   --  big array, sized as required, so that we avoid the use of fat pointers.
772
773   type Lines_Table_Ptr is access all Lines_Table_Type;
774   --  Type used for pointers to line tables
775
776   type Logical_Lines_Table_Type is
777     array (Physical_Line_Number) of Logical_Line_Number;
778   --  Type used for logical lines table. This table is used if a source
779   --  reference pragma is present. It is indexed by physical line numbers,
780   --  and contains the corresponding logical line numbers. An entry that
781   --  corresponds to a source reference pragma is set to No_Line_Number.
782   --  Note that we make this a bogus big array, sized as required, so that
783   --  we avoid the use of fat pointers.
784
785   type Logical_Lines_Table_Ptr is access all Logical_Lines_Table_Type;
786   --  Type used for pointers to logical line tables
787
788   -----------------------
789   -- Source_File Table --
790   -----------------------
791
792   --  See earlier descriptions for meanings of public fields
793
794   type Source_File_Record is record
795      File_Name         : File_Name_Type;
796      Reference_Name    : File_Name_Type;
797      Debug_Source_Name : File_Name_Type;
798      Full_Debug_Name   : File_Name_Type;
799      Full_File_Name    : File_Name_Type;
800      Full_Ref_Name     : File_Name_Type;
801      Instance          : Instance_Id;
802      Num_SRef_Pragmas  : Nat;
803      First_Mapped_Line : Logical_Line_Number;
804      Source_Text       : Source_Buffer_Ptr;
805      Source_First      : Source_Ptr;
806      Source_Last       : Source_Ptr;
807      Source_Checksum   : Word;
808      Last_Source_Line  : Physical_Line_Number;
809      Template          : Source_File_Index;
810      Unit              : Unit_Number_Type;
811      Time_Stamp        : Time_Stamp_Type;
812      File_Type         : Type_Of_File;
813      Inlined_Call      : Source_Ptr;
814      Inlined_Body      : Boolean;
815      Inherited_Pragma  : Boolean;
816      License           : License_Type;
817      Keyword_Casing    : Casing_Type;
818      Identifier_Casing : Casing_Type;
819
820      --  The following fields are for internal use only (i.e. only in the
821      --  body of Sinput or its children, with no direct access by clients).
822
823      Sloc_Adjust : Source_Ptr;
824      --  A value to be added to Sloc values for this file to reference the
825      --  corresponding lines table. This is zero for the non-instantiation
826      --  case, and set so that the addition references the ultimate template
827      --  for the instantiation case. See Sinput-L for further details.
828
829      Lines_Table : Lines_Table_Ptr;
830      --  Pointer to lines table for this source. Updated as additional
831      --  lines are accessed using the Skip_Line_Terminators procedure.
832      --  Note: the lines table for an instantiation entry refers to the
833      --  original line numbers of the template see Sinput-L for details.
834
835      Logical_Lines_Table : Logical_Lines_Table_Ptr;
836      --  Pointer to logical lines table for this source. Non-null only if
837      --  a source reference pragma has been processed. Updated as lines
838      --  are accessed using the Skip_Line_Terminators procedure.
839
840      Lines_Table_Max : Physical_Line_Number;
841      --  Maximum subscript values for currently allocated Lines_Table
842      --  and (if present) the allocated Logical_Lines_Table. The value
843      --  Max_Source_Line gives the maximum used value, this gives the
844      --  maximum allocated value.
845
846      Index : Source_File_Index := 123456789; -- for debugging
847   end record;
848
849   --  The following representation clause ensures that the above record
850   --  has no holes. We do this so that when instances of this record are
851   --  written by Tree_Gen, we do not write uninitialized values to the file.
852
853   AS : constant Pos := Standard'Address_Size;
854
855   for Source_File_Record use record
856      File_Name           at  0 range 0 .. 31;
857      Reference_Name      at  4 range 0 .. 31;
858      Debug_Source_Name   at  8 range 0 .. 31;
859      Full_Debug_Name     at 12 range 0 .. 31;
860      Full_File_Name      at 16 range 0 .. 31;
861      Full_Ref_Name       at 20 range 0 .. 31;
862      Instance            at 48 range 0 .. 31;
863      Num_SRef_Pragmas    at 24 range 0 .. 31;
864      First_Mapped_Line   at 28 range 0 .. 31;
865      Source_First        at 32 range 0 .. 31;
866      Source_Last         at 36 range 0 .. 31;
867      Source_Checksum     at 40 range 0 .. 31;
868      Last_Source_Line    at 44 range 0 .. 31;
869      Template            at 52 range 0 .. 31;
870      Unit                at 56 range 0 .. 31;
871      Time_Stamp          at 60 range 0 .. 8 * Time_Stamp_Length - 1;
872      File_Type           at 74 range 0 .. 7;
873      Inlined_Call        at 88 range 0 .. 31;
874      Inlined_Body        at 75 range 0 .. 0;
875      Inherited_Pragma    at 75 range 1 .. 1;
876      License             at 76 range 0 .. 7;
877      Keyword_Casing      at 77 range 0 .. 7;
878      Identifier_Casing   at 78 range 0 .. 15;
879      Sloc_Adjust         at 80 range 0 .. 31;
880      Lines_Table_Max     at 84 range 0 .. 31;
881      Index               at 92 range 0 .. 31;
882
883      --  The following fields are pointers, so we have to specialize their
884      --  lengths using pointer size, obtained above as Standard'Address_Size.
885      --  Note that Source_Text is a fat pointer, so it has size = AS*2.
886
887      Source_Text         at 96 range 0      .. AS * 2 - 1;
888      Lines_Table         at 96 range AS * 2 .. AS * 3 - 1;
889      Logical_Lines_Table at 96 range AS * 3 .. AS * 4 - 1;
890   end record; -- Source_File_Record
891
892   for Source_File_Record'Size use 96 * 8 + AS * 4;
893   --  This ensures that we did not leave out any fields
894
895   package Source_File is new Table.Table
896     (Table_Component_Type => Source_File_Record,
897      Table_Index_Type     => Source_File_Index,
898      Table_Low_Bound      => 1,
899      Table_Initial        => Alloc.Source_File_Initial,
900      Table_Increment      => Alloc.Source_File_Increment,
901      Table_Name           => "Source_File");
902
903   --  Auxiliary table containing source location of instantiations. Index 0
904   --  is used for code that does not come from an instance.
905
906   package Instances is new Table.Table
907     (Table_Component_Type => Source_Ptr,
908      Table_Index_Type     => Instance_Id,
909      Table_Low_Bound      => 0,
910      Table_Initial        => Alloc.Source_File_Initial,
911      Table_Increment      => Alloc.Source_File_Increment,
912      Table_Name           => "Instances");
913
914   -----------------
915   -- Subprograms --
916   -----------------
917
918   procedure Alloc_Line_Tables
919     (S       : in out Source_File_Record;
920      New_Max : Nat);
921   --  Allocate or reallocate the lines table for the given source file so
922   --  that it can accommodate at least New_Max lines. Also allocates or
923   --  reallocates logical lines table if source ref pragmas are present.
924
925   procedure Add_Line_Tables_Entry
926     (S : in out Source_File_Record;
927      P : Source_Ptr);
928   --  Increment line table size by one (reallocating the lines table if
929   --  needed) and set the new entry to contain the value P. Also bumps
930   --  the Source_Line_Count field. If source reference pragmas are
931   --  present, also increments logical lines table size by one, and
932   --  sets new entry.
933
934   procedure Trim_Lines_Table (S : Source_File_Index);
935   --  Set lines table size for entry S in the source file table to
936   --  correspond to the current value of Num_Source_Lines, releasing
937   --  any unused storage. This is used by Sinput.L and Sinput.D.
938
939   procedure Set_Source_File_Index_Table (Xnew : Source_File_Index);
940   --  Sets entries in the Source_File_Index_Table for the newly created
941   --  Source_File table entry whose index is Xnew. The Source_First and
942   --  Source_Last fields of this entry must be set before the call.
943   --  See package body for details.
944
945   type Dope_Rec is record
946      First, Last : Source_Ptr'Base;
947   end record;
948   Dope_Rec_Size : constant := 2 * Source_Ptr'Base'Size;
949   for Dope_Rec'Size use Dope_Rec_Size;
950   for Dope_Rec'Alignment use Dope_Rec_Size / 8;
951   type Dope_Ptr is access all Dope_Rec;
952
953   procedure Set_Dope
954     (Src : System.Address; New_Dope : Dope_Ptr);
955   --  Src is the address of a variable of type Source_Buffer_Ptr, which is a
956   --  fat pointer. This sets the dope part of the fat pointer to point to the
957   --  specified New_Dope. This low-level processing is used to make the
958   --  Source_Text of an instance point to the same text as the template, but
959   --  with different bounds.
960
961   procedure Free_Dope (Src : System.Address);
962   --  Calls Unchecked_Deallocation on the dope part of the fat pointer Src
963
964   procedure Free_Source_Buffer (Src : in out Source_Buffer_Ptr);
965   --  Deallocates the source buffer
966
967end Sinput;
968