1------------------------------------------------------------------------------
2--                                                                          --
3--                         GNAT COMPILER COMPONENTS                         --
4--                                                                          --
5--                                N A M E T                                 --
6--                                                                          --
7--                                 S p e c                                  --
8--                                                                          --
9--          Copyright (C) 1992-2012, Free Software Foundation, Inc.         --
10--                                                                          --
11-- GNAT is free software;  you can  redistribute it  and/or modify it under --
12-- terms of the  GNU General Public License as published  by the Free Soft- --
13-- ware  Foundation;  either version 3,  or (at your option) any later ver- --
14-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
15-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
16-- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
17--                                                                          --
18-- As a special exception under Section 7 of GPL version 3, you are granted --
19-- additional permissions described in the GCC Runtime Library Exception,   --
20-- version 3.1, as published by the Free Software Foundation.               --
21--                                                                          --
22-- You should have received a copy of the GNU General Public License and    --
23-- a copy of the GCC Runtime Library Exception along with this program;     --
24-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
25-- <http://www.gnu.org/licenses/>.                                          --
26--                                                                          --
27-- GNAT was originally developed  by the GNAT team at  New York University. --
28-- Extensive contributions were provided by Ada Core Technologies Inc.      --
29--                                                                          --
30------------------------------------------------------------------------------
31
32with Alloc;
33with Table;
34with Hostparm; use Hostparm;
35with System;   use System;
36with Types;    use Types;
37
38package Namet is
39
40--  WARNING: There is a C version of this package. Any changes to this
41--  source file must be properly reflected in the C header file namet.h
42--  which is created manually from namet.ads and namet.adb.
43
44--  This package contains routines for handling the names table. The table
45--  is used to store character strings for identifiers and operator symbols,
46--  as well as other string values such as unit names and file names.
47
48--  The forms of the entries are as follows:
49
50--    Identifiers        Stored with upper case letters folded to lower case.
51--                       Upper half (16#80# bit set) and wide characters are
52--                       stored in an encoded form (Uhh for upper half char,
53--                       Whhhh for wide characters, WWhhhhhhhh as provided by
54--                       the routine Store_Encoded_Character, where hh are hex
55--                       digits for the character code using lower case a-f).
56--                       Normally the use of U or W in other internal names is
57--                       avoided, but these letters may be used in internal
58--                       names (without this special meaning), if they appear
59--                       as the last character of the name, or they are
60--                       followed by an upper case letter (other than the WW
61--                       sequence), or an underscore.
62
63--    Operator symbols   Stored with an initial letter O, and the remainder
64--                       of the name is the lower case characters XXX where
65--                       the name is Name_Op_XXX, see Snames spec for a full
66--                       list of the operator names. Normally the use of O
67--                       in other internal names is avoided, but it may be
68--                       used in internal names (without this special meaning)
69--                       if it is the last character of the name, or if it is
70--                       followed by an upper case letter or an underscore.
71
72--    Character literals Character literals have names that are used only for
73--                       debugging and error message purposes. The form is an
74--                       upper case Q followed by a single lower case letter,
75--                       or by a Uxx/Wxxxx/WWxxxxxxx encoding as described for
76--                       identifiers. The Set_Character_Literal_Name procedure
77--                       should be used to construct these encodings. Normally
78--                       the use of O in other internal names is avoided, but
79--                       it may be used in internal names (without this special
80--                       meaning) if it is the last character of the name, or
81--                       if it is followed by an upper case letter or an
82--                       underscore.
83
84--    Unit names         Stored with upper case letters folded to lower case,
85--                       using Uhh/Whhhh/WWhhhhhhhh encoding as described for
86--                       identifiers, and a %s or %b suffix for specs/bodies.
87--                       See package Uname for further details.
88
89--    File names         Are stored in the form provided by Osint. Typically
90--                       they may include wide character escape sequences and
91--                       upper case characters (in non-encoded form). Casing
92--                       is also derived from the external environment. Note
93--                       that file names provided by Osint must generally be
94--                       consistent with the names from Fname.Get_File_Name.
95
96--    Other strings      The names table is also used as a convenient storage
97--                       location for other variable length strings such as
98--                       error messages etc. There are no restrictions on what
99--                       characters may appear for such entries.
100
101--  Note: the encodings Uhh (upper half characters), Whhhh (wide characters),
102--  WWhhhhhhhh (wide wide characters) and Qx (character literal names) are
103--  described in the spec, since they are visible throughout the system (e.g.
104--  in debugging output). However, no code should depend on these particular
105--  encodings, so it should be possible to change the encodings by making
106--  changes only to the Namet specification (to change these comments) and the
107--  body (which actually implements the encodings).
108
109--  The names are hashed so that a given name appears only once in the table,
110--  except that names entered with Name_Enter as opposed to Name_Find are
111--  omitted from the hash table.
112
113--  The first 26 entries in the names table (with Name_Id values in the range
114--  First_Name_Id .. First_Name_Id + 25) represent names which are the one
115--  character lower case letters in the range a-z, and these names are created
116--  and initialized by the Initialize procedure.
117
118--  Two values, one of type Int and one of type Byte, are stored with each
119--  names table entry and subprograms are provided for setting and retrieving
120--  these associated values. The usage of these values is up to the client. In
121--  the compiler, the Int field is used to point to a chain of potentially
122--  visible entities (see Sem.Ch8 for details), and the Byte field is used to
123--  hold the Token_Type value for reserved words (see Sem for details). In the
124--  binder, the Byte field is unused, and the Int field is used in various
125--  ways depending on the name involved (see binder documentation).
126
127   Name_Buffer : String (1 .. 4 * Max_Line_Length);
128   --  This buffer is used to set the name to be stored in the table for the
129   --  Name_Find call, and to retrieve the name for the Get_Name_String call.
130   --  The limit here is intended to be an infinite value that ensures that we
131   --  never overflow the buffer (names this long are too absurd to worry!)
132
133   Name_Len : Natural;
134   --  Length of name stored in Name_Buffer. Used as an input parameter for
135   --  Name_Find, and as an output value by Get_Name_String, or Write_Name.
136
137   -----------------------------
138   -- Types for Namet Package --
139   -----------------------------
140
141   --  Name_Id values are used to identify entries in the names table. Except
142   --  for the special values No_Name and Error_Name, they are subscript values
143   --  for the Names table defined in this package.
144
145   --  Note that with only a few exceptions, which are clearly documented, the
146   --  type Name_Id should be regarded as a private type. In particular it is
147   --  never appropriate to perform arithmetic operations using this type.
148
149   type Name_Id is range Names_Low_Bound .. Names_High_Bound;
150   for Name_Id'Size use 32;
151   --  Type used to identify entries in the names table
152
153   No_Name : constant Name_Id := Names_Low_Bound;
154   --  The special Name_Id value No_Name is used in the parser to indicate
155   --  a situation where no name is present (e.g. on a loop or block).
156
157   Error_Name : constant Name_Id := Names_Low_Bound +  1;
158   --  The special Name_Id value Error_Name is used in the parser to
159   --  indicate that some kind of error was encountered in scanning out
160   --  the relevant name, so it does not have a representable label.
161
162   subtype Error_Name_Or_No_Name is Name_Id range No_Name .. Error_Name;
163   --  Used to test for either error name or no name
164
165   First_Name_Id : constant Name_Id := Names_Low_Bound + 2;
166   --  Subscript of first entry in names table
167
168   -----------------
169   -- Subprograms --
170   -----------------
171
172   procedure Finalize;
173   --  Called at the end of a use of the Namet package (before a subsequent
174   --  call to Initialize). Currently this routine is only used to generate
175   --  debugging output.
176
177   procedure Get_Name_String (Id : Name_Id);
178   --  Get_Name_String is used to retrieve the string associated with an entry
179   --  in the names table. The resulting string is stored in Name_Buffer and
180   --  Name_Len is set. It is an error to call Get_Name_String with one of the
181   --  special name Id values (No_Name or Error_Name).
182
183   function Get_Name_String (Id : Name_Id) return String;
184   --  This functional form returns the result as a string without affecting
185   --  the contents of either Name_Buffer or Name_Len. The lower bound is 1.
186
187   procedure Get_Unqualified_Name_String (Id : Name_Id);
188   --  Similar to the above except that qualification (as defined in unit
189   --  Exp_Dbug) is removed (including both preceding __ delimited names, and
190   --  also the suffixes used to indicate package body entities and to
191   --  distinguish between overloaded entities). Note that names are not
192   --  qualified until just before the call to gigi, so this routine is only
193   --  needed by processing that occurs after gigi has been called. This
194   --  includes all ASIS processing, since ASIS works on the tree written
195   --  after gigi has been called.
196
197   procedure Get_Name_String_And_Append (Id : Name_Id);
198   --  Like Get_Name_String but the resulting characters are appended to the
199   --  current contents of the entry stored in Name_Buffer, and Name_Len is
200   --  incremented to include the added characters.
201
202   procedure Get_Decoded_Name_String (Id : Name_Id);
203   --  Same calling sequence an interface as Get_Name_String, except that the
204   --  result is decoded, so that upper half characters and wide characters
205   --  appear as originally found in the source program text, operators have
206   --  their source forms (special characters and enclosed in quotes), and
207   --  character literals appear surrounded by apostrophes.
208
209   procedure Get_Unqualified_Decoded_Name_String (Id : Name_Id);
210   --  Similar to the above except that qualification (as defined in unit
211   --  Exp_Dbug) is removed (including both preceding __ delimited names, and
212   --  also the suffix used to indicate package body entities). Note that
213   --  names are not qualified until just before the call to gigi, so this
214   --  routine is only needed by processing that occurs after gigi has been
215   --  called. This includes all ASIS processing, since ASIS works on the tree
216   --  written after gigi has been called.
217
218   procedure Get_Decoded_Name_String_With_Brackets (Id : Name_Id);
219   --  This routine is similar to Decoded_Name, except that the brackets
220   --  notation (Uhh replaced by ["hh"], Whhhh replaced by ["hhhh"],
221   --  WWhhhhhhhh replaced by ["hhhhhhhh"]) is used for all non-lower half
222   --  characters, regardless of how Opt.Wide_Character_Encoding_Method is
223   --  set, and also in that characters in the range 16#80# .. 16#FF# are
224   --  converted to brackets notation in all cases. This routine can be used
225   --  when there is a requirement for a canonical representation not affected
226   --  by the character set options (e.g. in the binder generation of
227   --  symbols).
228
229   function Get_Name_Table_Byte (Id : Name_Id) return Byte;
230   pragma Inline (Get_Name_Table_Byte);
231   --  Fetches the Byte value associated with the given name
232
233   function Get_Name_Table_Info (Id : Name_Id) return Int;
234   pragma Inline (Get_Name_Table_Info);
235   --  Fetches the Int value associated with the given name
236
237   function Is_Operator_Name (Id : Name_Id) return Boolean;
238   --  Returns True if name given is of the form of an operator (that
239   --  is, it starts with an upper case O).
240
241   procedure Initialize;
242   --  This is a dummy procedure. It is retained for easy compatibility with
243   --  clients who used to call Initialize when this call was required. Now
244   --  initialization is performed automatically during package elaboration.
245   --  Note that this change fixes problems which existed prior to the change
246   --  of Initialize being called more than once. See also Reinitialize which
247   --  allows reinitialization of the tables.
248
249   procedure Lock;
250   --  Lock name tables before calling back end. We reserve some extra space
251   --  before locking to avoid unnecessary inefficiencies when we unlock.
252
253   procedure Reinitialize;
254   --  Clears the name tables and removes all existing entries from the table.
255
256   procedure Unlock;
257   --  Unlocks the name table to allow use of the extra space reserved by the
258   --  call to Lock. See gnat1drv for details of the need for this.
259
260   function Length_Of_Name (Id : Name_Id) return Nat;
261   pragma Inline (Length_Of_Name);
262   --  Returns length of given name in characters. This is the length of the
263   --  encoded name, as stored in the names table, the result is equivalent to
264   --  calling Get_Name_String and reading Name_Len, except that a call to
265   --  Length_Of_Name does not affect the contents of Name_Len and Name_Buffer.
266
267   function Name_Chars_Address return System.Address;
268   --  Return starting address of name characters table (used in Back_End call
269   --  to Gigi).
270
271   function Name_Find return Name_Id;
272   --  Name_Find is called with a string stored in Name_Buffer whose length is
273   --  in Name_Len (i.e. the characters of the name are in subscript positions
274   --  1 to Name_Len in Name_Buffer). It searches the names table to see if
275   --  the string has already been stored. If so the Id of the existing entry
276   --  is returned. Otherwise a new entry is created with its Name_Table_Info
277   --  field set to zero. The contents of Name_Buffer and Name_Len are not
278   --  modified by this call. Note that it is permissible for Name_Len to be
279   --  set to zero to lookup the null name string.
280
281   function Name_Enter return Name_Id;
282   --  Name_Enter has the same calling interface as Name_Find. The difference
283   --  is that it does not search the table for an existing match, and also
284   --  subsequent Name_Find calls using the same name will not locate the
285   --  entry created by this call. Thus multiple calls to Name_Enter with the
286   --  same name will create multiple entries in the name table with different
287   --  Name_Id values. This is useful in the case of created names, which are
288   --  never expected to be looked up. Note: Name_Enter should never be used
289   --  for one character names, since these are efficiently located without
290   --  hashing by Name_Find in any case.
291
292   function Name_Entries_Address return System.Address;
293   --  Return starting address of Names table (used in Back_End call to Gigi)
294
295   function Name_Entries_Count return Nat;
296   --  Return current number of entries in the names table
297
298   function Is_OK_Internal_Letter (C : Character) return Boolean;
299   pragma Inline (Is_OK_Internal_Letter);
300   --  Returns true if C is a suitable character for using as a prefix or a
301   --  suffix of an internally generated name, i.e. it is an upper case letter
302   --  other than one of the ones used for encoding source names (currently
303   --  the set of reserved letters is O, Q, U, W) and also returns False for
304   --  the letter X, which is reserved for debug output (see Exp_Dbug).
305
306   function Is_Internal_Name (Id : Name_Id) return Boolean;
307   --  Returns True if the name is an internal name (i.e. contains a character
308   --  for which Is_OK_Internal_Letter is true, or if the name starts or ends
309   --  with an underscore. This call destroys the value of Name_Len and
310   --  Name_Buffer (it loads these as for Get_Name_String).
311   --
312   --  Note: if the name is qualified (has a double underscore), then only the
313   --  final entity name is considered, not the qualifying names. Consider for
314   --  example that the name:
315   --
316   --    pkg__B_1__xyz
317   --
318   --  is not an internal name, because the B comes from the internal name of
319   --  a qualifying block, but the xyz means that this was indeed a declared
320   --  identifier called "xyz" within this block and there is nothing internal
321   --  about that name.
322
323   function Is_Internal_Name return Boolean;
324   --  Like the form with an Id argument, except that the name to be tested is
325   --  passed in Name_Buffer and Name_Len (which are not affected by the call).
326   --  Name_Buffer (it loads these as for Get_Name_String).
327
328   function Is_Valid_Name (Id : Name_Id) return Boolean;
329   --  True if Id is a valid name -- points to a valid entry in the
330   --  Name_Entries table.
331
332   procedure Reset_Name_Table;
333   --  This procedure is used when there are multiple source files to reset
334   --  the name table info entries associated with current entries in the
335   --  names table. There is no harm in keeping the names entries themselves
336   --  from one compilation to another, but we can't keep the entity info,
337   --  since this refers to tree nodes, which are destroyed between each main
338   --  source file.
339
340   procedure Add_Char_To_Name_Buffer (C : Character);
341   pragma Inline (Add_Char_To_Name_Buffer);
342   --  Add given character to the end of the string currently stored in the
343   --  Name_Buffer, incrementing Name_Len.
344
345   procedure Add_Nat_To_Name_Buffer (V : Nat);
346   --  Add decimal representation of given value to the end of the string
347   --  currently stored in Name_Buffer, incrementing Name_Len as required.
348
349   procedure Add_Str_To_Name_Buffer (S : String);
350   --  Add characters of string S to the end of the string currently stored
351   --  in the Name_Buffer, incrementing Name_Len by the length of the string.
352
353   procedure Insert_Str_In_Name_Buffer (S : String; Index : Positive);
354   --  Inserts given string in name buffer, starting at Index. Any existing
355   --  characters at or past this location get moved beyond the inserted string
356   --  and Name_Len is incremented by the length of the string.
357
358   procedure Set_Character_Literal_Name (C : Char_Code);
359   --  This procedure sets the proper encoded name for the character literal
360   --  for the given character code. On return Name_Buffer and Name_Len are
361   --  set to reflect the stored name.
362
363   procedure Set_Name_Table_Info (Id : Name_Id; Val : Int);
364   pragma Inline (Set_Name_Table_Info);
365   --  Sets the Int value associated with the given name
366
367   procedure Set_Name_Table_Byte (Id : Name_Id; Val : Byte);
368   pragma Inline (Set_Name_Table_Byte);
369   --  Sets the Byte value associated with the given name
370
371   procedure Store_Encoded_Character (C : Char_Code);
372   --  Stores given character code at the end of Name_Buffer, updating the
373   --  value in Name_Len appropriately. Lower case letters and digits are
374   --  stored unchanged. Other 8-bit characters are stored using the Uhh
375   --  encoding (hh = hex code), other 16-bit wide character values are stored
376   --  using the Whhhh (hhhh = hex code) encoding, and other 32-bit wide wide
377   --  character values are stored using the WWhhhhhhhh (hhhhhhhh = hex code).
378   --  Note that this procedure does not fold upper case letters (they are
379   --  stored using the Uhh encoding). If folding is required, it must be done
380   --  by the caller prior to the call.
381
382   procedure Tree_Read;
383   --  Initializes internal tables from current tree file using the relevant
384   --  Table.Tree_Read routines. Note that Initialize should not be called if
385   --  Tree_Read is used. Tree_Read includes all necessary initialization.
386
387   procedure Tree_Write;
388   --  Writes out internal tables to current tree file using the relevant
389   --  Table.Tree_Write routines.
390
391   procedure Get_Last_Two_Chars (N : Name_Id; C1, C2 : out Character);
392   --  Obtains last two characters of a name. C1 is last but one character
393   --  and C2 is last character. If name is less than two characters long,
394   --  then both C1 and C2 are set to ASCII.NUL on return.
395
396   procedure Write_Name (Id : Name_Id);
397   --  Write_Name writes the characters of the specified name using the
398   --  standard output procedures in package Output. No end of line is
399   --  written, just the characters of the name. On return Name_Buffer and
400   --  Name_Len are set as for a call to Get_Name_String. The name is written
401   --  in encoded form (i.e. including Uhh, Whhh, Qx, _op as they appear in
402   --  the name table). If Id is Error_Name, or No_Name, no text is output.
403
404   procedure Write_Name_Decoded (Id : Name_Id);
405   --  Like Write_Name, except that the name written is the decoded name, as
406   --  described for Get_Decoded_Name_String, and the resulting value stored
407   --  in Name_Len and Name_Buffer is the decoded name.
408
409   ------------------------------
410   -- File and Unit Name Types --
411   ------------------------------
412
413   --  These are defined here in Namet rather than Fname and Uname to avoid
414   --  problems with dependencies, and to avoid dragging in Fname and Uname
415   --  into many more files, but it would be cleaner to move to Fname/Uname.
416
417   type File_Name_Type is new Name_Id;
418   --  File names are stored in the names table and this type is used to
419   --  indicate that a Name_Id value is being used to hold a simple file name
420   --  (which does not include any directory information).
421
422   No_File : constant File_Name_Type := File_Name_Type (No_Name);
423   --  Constant used to indicate no file is present (this is used for example
424   --  when a search for a file indicates that no file of the name exists).
425
426   Error_File_Name : constant File_Name_Type := File_Name_Type (Error_Name);
427   --  The special File_Name_Type value Error_File_Name is used to indicate
428   --  a unit name where some previous processing has found an error.
429
430   subtype Error_File_Name_Or_No_File is
431     File_Name_Type range No_File .. Error_File_Name;
432   --  Used to test for either error file name or no file
433
434   type Path_Name_Type is new Name_Id;
435   --  Path names are stored in the names table and this type is used to
436   --  indicate that a Name_Id value is being used to hold a path name (that
437   --  may contain directory information).
438
439   No_Path : constant Path_Name_Type := Path_Name_Type (No_Name);
440   --  Constant used to indicate no path name is present
441
442   type Unit_Name_Type is new Name_Id;
443   --  Unit names are stored in the names table and this type is used to
444   --  indicate that a Name_Id value is being used to hold a unit name, which
445   --  terminates in %b for a body or %s for a spec.
446
447   No_Unit_Name : constant Unit_Name_Type := Unit_Name_Type (No_Name);
448   --  Constant used to indicate no file name present
449
450   Error_Unit_Name : constant Unit_Name_Type := Unit_Name_Type (Error_Name);
451   --  The special Unit_Name_Type value Error_Unit_Name is used to indicate
452   --  a unit name where some previous processing has found an error.
453
454   subtype Error_Unit_Name_Or_No_Unit_Name is
455     Unit_Name_Type range No_Unit_Name .. Error_Unit_Name;
456
457   ------------------------
458   -- Debugging Routines --
459   ------------------------
460
461   procedure wn (Id : Name_Id);
462   pragma Export (Ada, wn);
463   --  This routine is intended for debugging use only (i.e. it is intended to
464   --  be called from the debugger). It writes the characters of the specified
465   --  name using the standard output procedures in package Output, followed by
466   --  a new line. The name is written in encoded form (i.e. including Uhh,
467   --  Whhh, Qx, _op as they appear in the name table). If Id is Error_Name,
468   --  No_Name, or invalid an appropriate string is written (<Error_Name>,
469   --  <No_Name>, <invalid name>). Unlike Write_Name, this call does not affect
470   --  the contents of Name_Buffer or Name_Len.
471
472   ---------------------------
473   -- Table Data Structures --
474   ---------------------------
475
476   --  The following declarations define the data structures used to store
477   --  names. The definitions are in the private part of the package spec,
478   --  rather than the body, since they are referenced directly by gigi.
479
480private
481
482   --  This table stores the actual string names. Although logically there is
483   --  no need for a terminating character (since the length is stored in the
484   --  name entry table), we still store a NUL character at the end of every
485   --  name (for convenience in interfacing to the C world).
486
487   package Name_Chars is new Table.Table (
488     Table_Component_Type => Character,
489     Table_Index_Type     => Int,
490     Table_Low_Bound      => 0,
491     Table_Initial        => Alloc.Name_Chars_Initial,
492     Table_Increment      => Alloc.Name_Chars_Increment,
493     Table_Name           => "Name_Chars");
494
495   type Name_Entry is record
496      Name_Chars_Index : Int;
497      --  Starting location of characters in the Name_Chars table minus one
498      --  (i.e. pointer to character just before first character). The reason
499      --  for the bias of one is that indexes in Name_Buffer are one's origin,
500      --  so this avoids unnecessary adds and subtracts of 1.
501
502      Name_Len : Short;
503      --  Length of this name in characters
504
505      Byte_Info : Byte;
506      --  Byte value associated with this name
507
508      Name_Has_No_Encodings : Boolean;
509      --  This flag is set True if the name entry is known not to contain any
510      --  special character encodings. This is used to speed up repeated calls
511      --  to Get_Decoded_Name_String. A value of False means that it is not
512      --  known whether the name contains any such encodings.
513
514      Hash_Link : Name_Id;
515      --  Link to next entry in names table for same hash code
516
517      Int_Info : Int;
518      --  Int Value associated with this name
519   end record;
520
521   for Name_Entry use record
522      Name_Chars_Index      at  0 range 0 .. 31;
523      Name_Len              at  4 range 0 .. 15;
524      Byte_Info             at  6 range 0 .. 7;
525      Name_Has_No_Encodings at  7 range 0 .. 7;
526      Hash_Link             at  8 range 0 .. 31;
527      Int_Info              at 12 range 0 .. 31;
528   end record;
529
530   for Name_Entry'Size use 16 * 8;
531   --  This ensures that we did not leave out any fields
532
533   --  This is the table that is referenced by Name_Id entries.
534   --  It contains one entry for each unique name in the table.
535
536   package Name_Entries is new Table.Table (
537     Table_Component_Type => Name_Entry,
538     Table_Index_Type     => Name_Id'Base,
539     Table_Low_Bound      => First_Name_Id,
540     Table_Initial        => Alloc.Names_Initial,
541     Table_Increment      => Alloc.Names_Increment,
542     Table_Name           => "Name_Entries");
543
544end Namet;
545