1------------------------------------------------------------------------------
2--                                                                          --
3--                         GNAT COMPILER COMPONENTS                         --
4--                                                                          --
5--                                N A M E T                                 --
6--                                                                          --
7--                                 S p e c                                  --
8--                                                                          --
9--          Copyright (C) 1992-2003 Free Software Foundation, Inc.          --
10--                                                                          --
11-- GNAT is free software;  you can  redistribute it  and/or modify it under --
12-- terms of the  GNU General Public License as published  by the Free Soft- --
13-- ware  Foundation;  either version 2,  or (at your option) any later ver- --
14-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
15-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
16-- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
17-- for  more details.  You should have  received  a copy of the GNU General --
18-- Public License  distributed with GNAT;  see file COPYING.  If not, write --
19-- to  the Free Software Foundation,  59 Temple Place - Suite 330,  Boston, --
20-- MA 02111-1307, USA.                                                      --
21--                                                                          --
22-- As a special exception,  if other files  instantiate  generics from this --
23-- unit, or you link  this unit with other files  to produce an executable, --
24-- this  unit  does not  by itself cause  the resulting  executable  to  be --
25-- covered  by the  GNU  General  Public  License.  This exception does not --
26-- however invalidate  any other reasons why  the executable file  might be --
27-- covered by the  GNU Public License.                                      --
28--                                                                          --
29-- GNAT was originally developed  by the GNAT team at  New York University. --
30-- Extensive contributions were provided by Ada Core Technologies Inc.      --
31--                                                                          --
32------------------------------------------------------------------------------
33
34with Alloc;
35with Table;
36with System;   use System;
37with Types;    use Types;
38
39package Namet is
40
41--  WARNING: There is a C version of this package. Any changes to this
42--  source file must be properly reflected in the C header file namet.h
43--  which is created manually from namet.ads and namet.adb.
44
45--  This package contains routines for handling the names table. The table
46--  is used to store character strings for identifiers and operator symbols,
47--  as well as other string values such as unit names and file names.
48
49--  The forms of the entries are as follows:
50
51--    Identifiers        Stored with upper case letters folded to lower case.
52--                       Upper half (16#80# bit set) and wide characters are
53--                       stored in an encoded form (Uhh for upper half and
54--                       Whhhh for wide characters, as provided by the routine
55--                       Store_Encoded_Character, where hh are hex digits for
56--                       the character code using lower case a-f). Normally
57--                       the use of U or W in other internal names is avoided,
58--                       but these letters may be used in internal names
59--                       (without this special meaning), if the appear as
60--                       the last character of the name, or they are followed
61--                       by an upper case letter or an underscore.
62
63
64--    Operator symbols   Stored with an initial letter O, and the remainder
65--                       of the name is the lower case characters XXX where
66--                       the name is Name_Op_XXX, see Snames spec for a full
67--                       list of the operator names. Normally the use of O
68--                       in other internal names is avoided, but it may be
69--                       used in internal names (without this special meaning)
70--                       if it is the last character of the name, or if it is
71--                       followed by an upper case letter or an underscore.
72
73--    Character literals Character literals have names that are used only for
74--                       debugging and error message purposes. The form is a
75--                       upper case Q followed by a single lower case letter,
76--                       or by a Uxx or Wxxxx encoding as described for
77--                       identifiers. The Set_Character_Literal_Name procedure
78--                       should be used to construct these encodings. Normally
79--                       the use of O in other internal names is avoided, but
80--                       it may be used in internal names (without this special
81--                       meaning) if it is the last character of the name, or
82--                       if it is followed by an upper case letter or an
83--                       underscore.
84
85--    Unit names         Stored with upper case letters folded to lower case,
86--                       using Uhh/Whhhh encoding as described for identifiers,
87--                       and a %s or %b suffix for specs/bodies. See package
88--                       Uname for further details.
89
90--    File names         Are stored in the form provided by Osint. Typically
91--                       they may include wide character escape sequences and
92--                       upper case characters (in non-encoded form). Casing
93--                       is also derived from the external environment. Note
94--                       that file names provided by Osint must generally be
95--                       consistent with the names from Fname.Get_File_Name.
96
97--    Other strings      The names table is also used as a convenient storage
98--                       location for other variable length strings such as
99--                       error messages etc. There are no restrictions on what
100--                       characters may appear for such entries.
101
102--  Note: the encodings Uhh (upper half characters), Whhhh (wide characters),
103--  and Qx (character literal names) are described in the spec, since they
104--  are visible throughout the system (e.g. in debugging output). However,
105--  no code should depend on these particular encodings, so it should be
106--  possible to change the encodings by making changes only to the Namet
107--  specification (to change these comments) and the body (which actually
108--  implements the encodings).
109
110--  The names are hashed so that a given name appears only once in the table,
111--  except that names entered with Name_Enter as opposed to Name_Find are
112--  omitted from the hash table.
113
114--  The first 26 entries in the names table (with Name_Id values in the range
115--  First_Name_Id .. First_Name_Id + 25) represent names which are the one
116--  character lower case letters in the range a-z, and these names are created
117--  and initialized by the Initialize procedure.
118
119--  Two values, one of type Int and one of type Byte, are stored with each
120--  names table entry and subprograms are provided for setting and retrieving
121--  these associated values. The usage of these values is up to the client.
122--  In the compiler, the Int field is used to point to a chain of potentially
123--  visible entities (see Sem.Ch8 for details), and the Byte field is used
124--  to hold the Token_Type value for reserved words (see Sem for details).
125--  In the binder, the Byte field is unused, and the Int field is used in
126--  various ways depending on the name involved (see binder documentation).
127
128   Name_Buffer : String (1 .. 16*1024);
129   --  This buffer is used to set the name to be stored in the table for the
130   --  Name_Find call, and to retrieve the name for the Get_Name_String call.
131   --  The plus 1 in the length allows for cases of adding ASCII.NUL. The
132   --  16K here is intended to be an infinite value that ensures that we
133   --  never overflow the buffer (names this long are too absurd to worry!)
134
135   Name_Len : Natural;
136   --  Length of name stored in Name_Buffer. Used as an input parameter for
137   --  Name_Find, and as an output value by Get_Name_String, or Write_Name.
138
139   -----------------
140   -- Subprograms --
141   -----------------
142
143   procedure Finalize;
144   --  Called at the end of a use of the Namet package (before a subsequent
145   --  call to Initialize). Currently this routine is only used to generate
146   --  debugging output.
147
148   procedure Get_Name_String (Id : Name_Id);
149   --  Get_Name_String is used to retrieve the string associated with an entry
150   --  in the names table. The resulting string is stored in Name_Buffer
151   --  and Name_Len is set. It is an error to call Get_Name_String with one
152   --  of the special name Id values (No_Name or Error_Name).
153
154   function Get_Name_String (Id : Name_Id) return String;
155   --  This functional form returns the result as a string without affecting
156   --  the contents of either Name_Buffer or Name_Len.
157
158   procedure Get_Unqualified_Name_String (Id : Name_Id);
159   --  Similar to the above except that qualification (as defined in unit
160   --  Exp_Dbug) is removed (including both preceding __ delimited names,
161   --  and also the suffixes used to indicate package body entities and to
162   --  distinguish between overloaded entities). Note that names are not
163   --  qualified until just before the call to gigi, so this routine is
164   --  only needed by processing that occurs after gigi has been called.
165   --  This includes all ASIS processing, since ASIS works on the tree
166   --  written after gigi has been called.
167
168   procedure Get_Name_String_And_Append (Id : Name_Id);
169   --  Like Get_Name_String but the resulting characters are appended to
170   --  the current contents of the entry stored in Name_Buffer, and Name_Len
171   --  is incremented to include the added characters.
172
173   procedure Get_Decoded_Name_String (Id : Name_Id);
174   --  Same calling sequence an interface as Get_Name_String, except that the
175   --  result is decoded, so that upper half characters and wide characters
176   --  appear as originally found in the source program text, operators have
177   --  their source forms (special characters and enclosed in quotes), and
178   --  character literals appear surrounded by apostrophes.
179
180   procedure Get_Unqualified_Decoded_Name_String (Id : Name_Id);
181   --  Similar to the above except that qualification (as defined in unit
182   --  Exp_Dbug) is removed (including both preceding __ delimited names,
183   --  and also the suffix used to indicate package body entities). Note
184   --  that names are not qualified until just before the call to gigi, so
185   --  this routine is only needed by processing that occurs after gigi has
186   --  been called. This includes all ASIS processing, since ASIS works on
187   --  the tree written after gigi has been called.
188
189   procedure Get_Decoded_Name_String_With_Brackets (Id : Name_Id);
190   --  This routine is similar to Decoded_Name, except that the brackets
191   --  notation (Uhh replaced by ["hh"], Whhhh replaced by ["hhhh"]) is
192   --  used for all non-lower half characters, regardless of the setting
193   --  of Opt.Wide_Character_Encoding_Method, and also in that characters
194   --  in the range 16#80# .. 16#FF# are converted to brackets notation
195   --  in all cases. This routine can be used when there is a requirement
196   --  for a canonical representation not affected by the character set
197   --  options (e.g. in the binder generation of symbols).
198
199   function Get_Name_Table_Byte (Id : Name_Id) return Byte;
200   pragma Inline (Get_Name_Table_Byte);
201   --  Fetches the Byte value associated with the given name
202
203   function Get_Name_Table_Info (Id : Name_Id) return Int;
204   pragma Inline (Get_Name_Table_Info);
205   --  Fetches the Int value associated with the given name
206
207   procedure Initialize;
208   --  Initializes the names table, including initializing the first 26
209   --  entries in the table (for the 1-character lower case names a-z)
210   --  Note that Initialize must not be called if Tree_Read is used.
211
212   procedure Lock;
213   --  Lock name table before calling back end. Space for up to 10 extra
214   --  names and 1000 extra characters is reserved before the table is locked.
215
216   procedure Unlock;
217   --  Unlocks the name table to allow use of the 10 extra names and 1000
218   --  extra characters reserved by the Lock call. See gnat1drv for details
219   --  of the need for this.
220
221   function Length_Of_Name (Id : Name_Id) return Nat;
222   pragma Inline (Length_Of_Name);
223   --  Returns length of given name in characters. This is the length of the
224   --  encoded name, as stored in the names table, the result is equivalent to
225   --  calling Get_Name_String and reading Name_Len, except that a call to
226   --  Length_Of_Name does not affect the contents of Name_Len and Name_Buffer.
227
228   function Name_Chars_Address return System.Address;
229   --  Return starting address of name characters table (used in Back_End
230   --  call to Gigi).
231
232   function Name_Find return Name_Id;
233   --  Name_Find is called with a string stored in Name_Buffer whose length
234   --  is in Name_Len (i.e. the characters of the name are in subscript
235   --  positions 1 to Name_Len in Name_Buffer). It searches the names
236   --  table to see if the string has already been stored. If so the Id of
237   --  the existing entry is returned. Otherwise a new entry is created with
238   --  its Name_Table_Info field set to zero. The contents of Name_Buffer
239   --  and Name_Len are not modified by this call. Note that it is permissible
240   --  for Name_Len to be set to zero to lookup the null name string.
241
242   function Name_Enter return Name_Id;
243   --  Name_Enter has the same calling interface as Name_Find. The difference
244   --  is that it does not search the table for an existing match, and also
245   --  subsequent Name_Find calls using the same name will not locate the
246   --  entry created by this call. Thus multiple calls to Name_Enter with the
247   --  same name will create multiple entries in the name table with different
248   --  Name_Id values. This is useful in the case of created names, which are
249   --  never expected to be looked up. Note: Name_Enter should never be used
250   --  for one character names, since these are efficiently located without
251   --  hashing by Name_Find in any case.
252
253   function Name_Entries_Address return System.Address;
254   --  Return starting address of Names table. Used in Back_End call to Gigi.
255
256   function Name_Entries_Count return Nat;
257   --  Return current number of entries in the names table
258
259   function Is_OK_Internal_Letter (C : Character) return Boolean;
260   pragma Inline (Is_OK_Internal_Letter);
261   --  Returns true if C is a suitable character for using as a prefix or a
262   --  suffix of an internally generated name, i.e. it is an upper case letter
263   --  other than one of the ones used for encoding source names (currently
264   --  the set of reserved letters is O, Q, U, W) and also returns False for
265   --  the letter X, which is reserved for debug output (see Exp_Dbug).
266
267   function Is_Internal_Name (Id : Name_Id) return Boolean;
268   --  Returns True if the name is an internal name (i.e. contains a character
269   --  for which Is_OK_Internal_Letter is true, or if the name starts or ends
270   --  with an underscore. This call destroys the value of Name_Len and
271   --  Name_Buffer (it loads these as for Get_Name_String).
272   --
273   --  Note: if the name is qualified (has a double underscore), then
274   --  only the final entity name is considered, not the qualifying
275   --  names. Consider for example that the name:
276   --
277   --    pkg__B_1__xyz
278   --
279   --  is not an internal name, because the B comes from the internal
280   --  name of a qualifying block, but the xyz means that this was
281   --  indeed a declared identifier called "xyz" within this block
282   --  and there is nothing internal about that name.
283
284   function Is_Internal_Name return Boolean;
285   --  Like the form with an Id argument, except that the name to be tested is
286   --  passed in Name_Buffer and Name_Len (which are not affected by the call).
287   --  Name_Buffer (it loads these as for Get_Name_String).
288
289   procedure Reset_Name_Table;
290   --  This procedure is used when there are multiple source files to reset
291   --  the name table info entries associated with current entries in the
292   --  names table. There is no harm in keeping the names entries themselves
293   --  from one compilation to another, but we can't keep the entity info,
294   --  since this refers to tree nodes, which are destroyed between each
295   --  main source file.
296
297   procedure Add_Char_To_Name_Buffer (C : Character);
298   pragma Inline (Add_Char_To_Name_Buffer);
299   --  Add given character to the end of the string currently stored in the
300   --  Name_Buffer, incrementing Name_Len.
301
302   procedure Add_Nat_To_Name_Buffer (V : Nat);
303   --  Add decimal representation of given value to the end of the string
304   --  currently stored in Name_Buffer, incrementing Name_Len as required.
305
306   procedure Add_Str_To_Name_Buffer (S : String);
307   --  Add characters of string S to the end of the string currently stored
308   --  in the Name_Buffer, incrementing Name_Len by the length of the string.
309
310   procedure Set_Character_Literal_Name (C : Char_Code);
311   --  This procedure sets the proper encoded name for the character literal
312   --  for the given character code. On return Name_Buffer and Name_Len are
313   --  set to reflect the stored name.
314
315   procedure Set_Name_Table_Info (Id : Name_Id; Val : Int);
316   pragma Inline (Set_Name_Table_Info);
317   --  Sets the Int value associated with the given name
318
319   procedure Set_Name_Table_Byte (Id : Name_Id; Val : Byte);
320   pragma Inline (Set_Name_Table_Byte);
321   --  Sets the Byte value associated with the given name
322
323   procedure Store_Encoded_Character (C : Char_Code);
324   --  Stores given character code at the end of Name_Buffer, updating the
325   --  value in Name_Len appropriately. Lower case letters and digits are
326   --  stored unchanged. Other 8-bit characters are stored using the Uhh
327   --  encoding (hh = hex code), and other 16-bit wide-character values
328   --  are stored using the Whhhh (hhhh = hex code) encoding. Note that
329   --  this procedure does not fold upper case letters (they are stored
330   --  using the Uhh encoding). If folding is required, it must be done
331   --  by the caller prior to the call.
332
333   procedure Tree_Read;
334   --  Initializes internal tables from current tree file using Tree_Read.
335   --  Note that Initialize should not be called if Tree_Read is used.
336   --  Tree_Read includes all necessary initialization.
337
338   procedure Tree_Write;
339   --  Writes out internal tables to current tree file using Tree_Write
340
341   procedure Get_Last_Two_Chars (N : Name_Id; C1, C2 : out Character);
342   --  Obtains last two characters of a name. C1 is last but one character
343   --  and C2 is last character. If name is less than two characters long,
344   --  then both C1 and C2 are set to ASCII.NUL on return.
345
346   procedure Write_Name (Id : Name_Id);
347   --  Write_Name writes the characters of the specified name using the
348   --  standard output procedures in package Output. No end of line is
349   --  written, just the characters of the name. On return Name_Buffer and
350   --  Name_Len are set as for a call to Get_Name_String. The name is written
351   --  in encoded form (i.e. including Uhh, Whhh, Qx, _op as they appear in
352   --  the name table). If Id is Error_Name, or No_Name, no text is output.
353
354   procedure wn (Id : Name_Id);
355   pragma Export (Ada, wn);
356   --  Like Write_Name, but includes new line at end. Intended for use
357   --  from the debugger only.
358
359   procedure Write_Name_Decoded (Id : Name_Id);
360   --  Like Write_Name, except that the name written is the decoded name, as
361   --  described for Get_Decoded_Name_String, and the resulting value stored
362   --  in Name_Len and Name_Buffer is the decoded name.
363
364   ---------------------------
365   -- Table Data Structures --
366   ---------------------------
367
368   --  The following declarations define the data structures used to store
369   --  names. The definitions are in the private part of the package spec,
370   --  rather than the body, since they are referenced directly by gigi.
371
372private
373
374   --  This table stores the actual string names. Although logically there
375   --  is no need for a terminating character (since the length is stored
376   --  in the name entry table), we still store a NUL character at the end
377   --  of every name (for convenience in interfacing to the C world).
378
379   package Name_Chars is new Table.Table (
380     Table_Component_Type => Character,
381     Table_Index_Type     => Int,
382     Table_Low_Bound      => 0,
383     Table_Initial        => Alloc.Name_Chars_Initial,
384     Table_Increment      => Alloc.Name_Chars_Increment,
385     Table_Name           => "Name_Chars");
386
387   type Name_Entry is record
388      Name_Chars_Index : Int;
389      --  Starting location of characters in the Name_Chars table minus
390      --  one (i.e. pointer to character just before first character). The
391      --  reason for the bias of one is that indexes in Name_Buffer are
392      --  one's origin, so this avoids unnecessary adds and subtracts of 1.
393
394      Name_Len : Short;
395      --  Length of this name in characters
396
397      Byte_Info : Byte;
398      --  Byte value associated with this name
399
400      Hash_Link : Name_Id;
401      --  Link to next entry in names table for same hash code
402
403      Int_Info : Int;
404      --  Int Value associated with this name
405   end record;
406
407   --  This is the table that is referenced by Name_Id entries.
408   --  It contains one entry for each unique name in the table.
409
410   package Name_Entries is new Table.Table (
411     Table_Component_Type => Name_Entry,
412     Table_Index_Type     => Name_Id,
413     Table_Low_Bound      => First_Name_Id,
414     Table_Initial        => Alloc.Names_Initial,
415     Table_Increment      => Alloc.Names_Increment,
416     Table_Name           => "Name_Entries");
417
418end Namet;
419