1--  VHDL lexical scanner.
2--  Copyright (C) 2002 - 2014 Tristan Gingold
3--
4--  This program is free software: you can redistribute it and/or modify
5--  it under the terms of the GNU General Public License as published by
6--  the Free Software Foundation, either version 2 of the License, or
7--  (at your option) any later version.
8--
9--  This program is distributed in the hope that it will be useful,
10--  but WITHOUT ANY WARRANTY; without even the implied warranty of
11--  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12--  GNU General Public License for more details.
13--
14--  You should have received a copy of the GNU General Public License
15--  along with this program.  If not, see <gnu.org/licenses>.
16with Ada.Characters.Latin_1; use Ada.Characters.Latin_1;
17with Errorout; use Errorout;
18with Name_Table;
19with Files_Map; use Files_Map;
20with Std_Names;
21with Str_Table;
22with Flags; use Flags;
23
24package body Vhdl.Scanner is
25
26   -- This classification is a simplification of the categories of LRM93 13.1
27   -- LRM93 13.1
28   -- The only characters allowed in the text of a VHDL description are the
29   -- graphic characters and format effector.
30
31   type Character_Kind_Type is
32      (
33       -- Neither a format effector nor a graphic character.
34       Invalid,
35       Format_Effector,
36       Lower_Case_Letter,
37       Upper_Case_Letter,
38       Digit,
39       Special_Character,
40       Space_Character,
41       Other_Special_Character
42      );
43
44   --  LRM93 13.1
45   --  basic_graphic_character ::=
46   --    upper_case_letter | digit | special_character | space_character
47   --
48   --subtype Basic_Graphic_Character is
49   --  Character_Kind_Type range Upper_Case_Letter .. Space_Character;
50
51   --  LRM93 13.1
52   --  graphic_character ::=
53   --    basic_graphic_character | lower_case_letter | other_special_character
54   --
55   --  Note: There are 191 graphic characters.
56   subtype Graphic_Character is
57     Character_Kind_Type range Lower_Case_Letter .. Other_Special_Character;
58
59   --  letter ::= upper_case_letter | lower_case_letter
60   subtype Letter is
61     Character_Kind_Type range Lower_Case_Letter .. Upper_Case_Letter;
62
63   -- LRM93 13.1
64   -- The characters included in each of the categories of basic graphic
65   -- characters are defined as follows:
66   type Character_Array is array (Character) of Character_Kind_Type;
67   pragma Suppress_Initialization (Character_Array);
68   Characters_Kind : constant Character_Array :=
69     (NUL .. BS => Invalid,
70
71      -- Format effectors are the ISO (and ASCII) characters called horizontal
72      -- tabulation, vertical tabulation, carriage return, line feed, and form
73      -- feed.
74      HT | LF | VT | FF | CR => Format_Effector,
75
76      SO .. US => Invalid,
77
78      -- 1. upper case letters
79      'A' .. 'Z' | UC_A_Grave .. UC_O_Diaeresis |
80      UC_O_Oblique_Stroke .. UC_Icelandic_Thorn => Upper_Case_Letter,
81
82      -- 2. digits
83      '0' .. '9' => Digit,
84
85      -- 3. special characters
86      '"' | '#' | '&' | ''' | '(' | ')' | '+' | ',' | '-' | '.' | '/'
87        | ':' | ';' | '<' | '=' | '>' | '[' | ']'
88        | '_' | '|' | '*' => Special_Character,
89
90      -- 4. the space characters
91      ' ' | NBSP => Space_Character,
92
93      -- 5. lower case letters
94      'a' .. 'z' | LC_German_Sharp_S .. LC_O_Diaeresis |
95      LC_O_Oblique_Stroke .. LC_Y_Diaeresis => Lower_Case_Letter,
96
97      -- 6. other special characters
98      '!' | '$' | '%' | '@' | '?' | '\' | '^' | '`' | '{' | '}' | '~'
99        | Inverted_Exclamation .. Inverted_Question | Multiplication_Sign |
100        Division_Sign => Other_Special_Character,
101
102      --  '¡'    -- INVERTED EXCLAMATION MARK
103      --  '¢'    -- CENT SIGN
104      --  '£'    -- POUND SIGN
105      --  '¤'    -- CURRENCY SIGN
106      --  '¥'    -- YEN SIGN
107      --  '¦'    -- BROKEN BAR
108      --  '§'    -- SECTION SIGN
109      --  '¨'    -- DIAERESIS
110      --  '©'    -- COPYRIGHT SIGN
111      --  'ª'    -- FEMININE ORDINAL INDICATOR
112      --  '«'    -- LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
113      --  '¬'    -- NOT SIGN
114      --  '­'    -- SOFT HYPHEN
115      --  '®'    -- REGISTERED SIGN
116      --  '¯'    -- MACRON
117      --  '°'    -- DEGREE SIGN
118      --  '±'    -- PLUS-MINUS SIGN
119      --  '²'    -- SUPERSCRIPT TWO
120      --  '³'    -- SUPERSCRIPT THREE
121      --  '´'    -- ACUTE ACCENT
122      --  'µ'    -- MICRO SIGN
123      --  '¶'    -- PILCROW SIGN
124      --  '·'    -- MIDDLE DOT
125      --  '¸'    -- CEDILLA
126      --  '¹'    -- SUPERSCRIPT ONE
127      --  'º'    -- MASCULINE ORDINAL INDICATOR
128      --  '»'    -- RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
129      --  '¼'    -- VULGAR FRACTION ONE QUARTER
130      --  '½'    -- VULGAR FRACTION ONE HALF
131      --  '¾'    -- VULGAR FRACTION THREE QUARTERS
132      --  '¿'    -- INVERTED QUESTION MARK
133      --  '×'    -- MULTIPLICATION SIGN
134      --  '÷'    -- DIVISION SIGN
135
136      DEL .. APC => Invalid);
137
138   -- The context contains the whole internal state of the scanner, ie
139   -- it can be used to push/pop a lexical analysis, to restart the
140   -- scanner from a context marking a previous point.
141   type Scan_Context is record
142      Source : File_Buffer_Acc;
143      Source_File : Source_File_Entry;
144      Line_Number : Natural;
145      Line_Pos : Source_Ptr;
146      Prev_Pos : Source_Ptr;
147      Token_Pos : Source_Ptr;
148      Pos : Source_Ptr;
149      File_Len : Source_Ptr;
150      Token : Token_Type;
151      Prev_Token : Token_Type;
152
153      --  Tokens are ignored because of 'translate_off'.
154      Translate_Off : Boolean;
155
156      --  Additional values for the current token.
157      Bit_Str_Base : Character;
158      Bit_Str_Sign : Character;
159      Str_Id : String8_Id;
160      Str_Len : Nat32;
161      Identifier: Name_Id;
162      Lit_Int64 : Int64;
163      Lit_Fp64 : Fp64;
164   end record;
165   pragma Suppress_Initialization (Scan_Context);
166
167   -- The current context.
168   -- Default value is an invalid context.
169   Current_Context: Scan_Context := (Source => null,
170                                     Source_File => No_Source_File_Entry,
171                                     Line_Number => 0,
172                                     Line_Pos => 0,
173                                     Pos => 0,
174                                     Prev_Pos => 0,
175                                     Token_Pos => 0,
176                                     File_Len => 0,
177                                     Token => Tok_Invalid,
178                                     Prev_Token => Tok_Invalid,
179                                     Translate_Off => False,
180                                     Identifier => Null_Identifier,
181                                     Bit_Str_Base => ' ',
182                                     Bit_Str_Sign => ' ',
183                                     Str_Id => Null_String8,
184                                     Str_Len => 0,
185                                     Lit_Int64 => 0,
186                                     Lit_Fp64 => 0.0);
187
188   function Get_Current_Coord return Source_Coord_Type is
189   begin
190      return (File => Get_Current_Source_File,
191              Line_Pos => Current_Context.Line_Pos,
192              Line => Get_Current_Line,
193              Offset => Get_Current_Offset);
194   end Get_Current_Coord;
195
196   function Get_Token_Coord return Source_Coord_Type is
197   begin
198      return (File => Get_Current_Source_File,
199              Line_Pos => Current_Context.Line_Pos,
200              Line => Get_Current_Line,
201              Offset => Get_Token_Offset);
202   end Get_Token_Coord;
203
204   -- Disp a message during scan.
205   -- The current location is automatically displayed before the message.
206   -- Disp a message during scan.
207   procedure Error_Msg_Scan (Msg: String) is
208   begin
209      Report_Msg (Msgid_Error, Scan, Get_Current_Coord, Msg);
210   end Error_Msg_Scan;
211
212   procedure Error_Msg_Scan (Loc : Source_Coord_Type; Msg: String) is
213   begin
214      Report_Msg (Msgid_Error, Scan, Loc, Msg);
215   end Error_Msg_Scan;
216
217   procedure Error_Msg_Scan (Msg: String; Arg1 : Earg_Type) is
218   begin
219      Report_Msg (Msgid_Error, Scan, Get_Current_Coord, Msg, (1 => Arg1));
220   end Error_Msg_Scan;
221
222   -- Disp a message during scan.
223   procedure Warning_Msg_Scan (Id : Msgid_Warnings;
224                               Msg: String;
225                               Arg1 : Earg_Type) is
226   begin
227      Report_Msg (Id, Scan, Get_Current_Coord, Msg, (1 => Arg1));
228   end Warning_Msg_Scan;
229
230   procedure Warning_Msg_Scan (Id : Msgid_Warnings;
231                               Msg: String;
232                               Args : Earg_Arr := No_Eargs) is
233   begin
234      Report_Msg (Id, Scan, Get_Current_Coord, Msg, Args);
235   end Warning_Msg_Scan;
236
237   Source: File_Buffer_Acc renames Current_Context.Source;
238   Pos: Source_Ptr renames Current_Context.Pos;
239
240   -- When CURRENT_TOKEN is an identifier, its name_id is stored into
241   -- this global variable.
242   -- Function current_text can be used to convert it into an iir.
243   function Current_Identifier return Name_Id is
244   begin
245      return Current_Context.Identifier;
246   end Current_Identifier;
247
248   procedure Invalidate_Current_Identifier is
249   begin
250      Current_Context.Identifier := Null_Identifier;
251   end Invalidate_Current_Identifier;
252
253   procedure Invalidate_Current_Token is
254   begin
255      if Current_Token /= Tok_Invalid then
256         Current_Context.Prev_Token := Current_Token;
257         Current_Token := Tok_Invalid;
258      end if;
259   end Invalidate_Current_Token;
260
261   function Current_String_Id return String8_Id is
262   begin
263      return Current_Context.Str_Id;
264   end Current_String_Id;
265
266   function Current_String_Length return Nat32 is
267   begin
268      return Current_Context.Str_Len;
269   end Current_String_Length;
270
271   function Get_Bit_String_Base return Character is
272   begin
273      return Current_Context.Bit_Str_Base;
274   end Get_Bit_String_Base;
275
276   function Get_Bit_String_Sign return Character is
277   begin
278      return Current_Context.Bit_Str_Sign;
279   end Get_Bit_String_Sign;
280
281   function Current_Iir_Int64 return Int64 is
282   begin
283      return Current_Context.Lit_Int64;
284   end Current_Iir_Int64;
285
286   function Current_Iir_Fp64 return Fp64 is
287   begin
288      return Current_Context.Lit_Fp64;
289   end Current_Iir_Fp64;
290
291   function Get_Current_Source_File return Source_File_Entry is
292   begin
293      return Current_Context.Source_File;
294   end Get_Current_Source_File;
295
296   function Get_Current_Line return Natural is
297   begin
298      return Current_Context.Line_Number;
299   end Get_Current_Line;
300
301   function Get_Current_Offset return Natural is
302   begin
303      return Natural (Current_Context.Pos - Current_Context.Line_Pos);
304   end Get_Current_Offset;
305
306   function Get_Token_Offset return Natural is
307   begin
308      return Natural (Current_Context.Token_Pos - Current_Context.Line_Pos);
309   end Get_Token_Offset;
310
311   function Get_Token_Position return Source_Ptr is
312   begin
313      return Current_Context.Token_Pos;
314   end Get_Token_Position;
315
316   function Get_Token_Length return Int32 is
317   begin
318      return Int32 (Current_Context.Pos - Current_Context.Token_Pos);
319   end Get_Token_Length;
320
321   function Get_Position return Source_Ptr is
322   begin
323      return Current_Context.Pos;
324   end Get_Position;
325
326   function Get_Token_Location return Location_Type is
327   begin
328      return File_Pos_To_Location
329        (Current_Context.Source_File, Current_Context.Token_Pos);
330   end Get_Token_Location;
331
332   function Get_Prev_Location return Location_Type is
333   begin
334      return File_Pos_To_Location
335        (Current_Context.Source_File, Current_Context.Prev_Pos);
336   end Get_Prev_Location;
337
338   procedure Set_File (Source_File : Source_File_Entry)
339   is
340      N_Source: File_Buffer_Acc;
341   begin
342      pragma Assert (Current_Context.Source = null);
343      pragma Assert (Source_File /= No_Source_File_Entry);
344      N_Source := Get_File_Source (Source_File);
345      Current_Context := (Source => N_Source,
346                          Source_File => Source_File,
347                          Line_Number => 1,
348                          Line_Pos => 0,
349                          Prev_Pos => N_Source'First,
350                          Pos => N_Source'First,
351                          Token_Pos => 0, -- should be invalid,
352                          File_Len => Get_File_Length (Source_File),
353                          Token => Tok_Invalid,
354                          Prev_Token => Tok_Invalid,
355                          Translate_Off => False,
356                          Identifier => Null_Identifier,
357                          Bit_Str_Base => ' ',
358                          Bit_Str_Sign => ' ',
359                          Str_Id => Null_String8,
360                          Str_Len => 0,
361                          Lit_Int64 => -1,
362                          Lit_Fp64 => 0.0);
363      Current_Token := Tok_Invalid;
364   end Set_File;
365
366   function Detect_Encoding_Errors return Boolean
367   is
368      C : constant Character := Source (Pos);
369   begin
370      --  No need to check further if first character is plain ASCII-7
371      if C >= ' ' and C < Character'Val (127) then
372         return False;
373      end if;
374
375      --  UTF-8 BOM is EF BB BF
376      if Source (Pos + 0) = Character'Val (16#ef#)
377        and then Source (Pos + 1) = Character'Val (16#bb#)
378        and then Source (Pos + 2) = Character'Val (16#bf#)
379      then
380         Error_Msg_Scan
381           ("source encoding must be latin-1 (UTF-8 BOM detected)");
382         return True;
383      end if;
384
385      --  UTF-16 BE BOM is FE FF
386      if Source (Pos + 0) = Character'Val (16#fe#)
387        and then Source (Pos + 1) = Character'Val (16#ff#)
388      then
389         Error_Msg_Scan
390           ("source encoding must be latin-1 (UTF-16 BE BOM detected)");
391         return True;
392      end if;
393
394      --  UTF-16 LE BOM is FF FE
395      if Source (Pos + 0) = Character'Val (16#ff#)
396        and then Source (Pos + 1) = Character'Val (16#fe#)
397      then
398         Error_Msg_Scan
399           ("source encoding must be latin-1 (UTF-16 LE BOM detected)");
400         return True;
401      end if;
402
403      --  Certainly weird, but scanner/parser will catch it.
404      return False;
405   end Detect_Encoding_Errors;
406
407   procedure Set_Current_Position (Position: Source_Ptr)
408   is
409      Loc : Location_Type;
410      Offset: Natural;
411      File_Entry : Source_File_Entry;
412   begin
413      --  Scanner must have been initialized.
414      pragma Assert (Current_Context.Source /= null);
415
416      Current_Token := Tok_Invalid;
417      Current_Context.Pos := Position;
418      Loc := File_Pos_To_Location (Current_Context.Source_File,
419                                   Current_Context.Pos);
420      Location_To_Coord (Loc,
421                         File_Entry, Current_Context.Line_Pos,
422                         Current_Context.Line_Number, Offset);
423   end Set_Current_Position;
424
425   procedure Close_File is
426   begin
427      Current_Context.Source := null;
428   end Close_File;
429
430   -- Emit an error when a character above 128 was found.
431   -- This must be called only in vhdl87.
432   procedure Error_8bit is
433   begin
434      Error_Msg_Scan ("8 bits characters not allowed in vhdl87");
435   end Error_8bit;
436
437   -- Emit an error when a separator is expected.
438   procedure Error_Separator is
439   begin
440      Error_Msg_Scan ("a separator is required here");
441   end Error_Separator;
442
443   -- scan a decimal literal or a based literal.
444   --
445   -- LRM93 13.4.1
446   -- DECIMAL_LITERAL ::= INTEGER [ . INTEGER ] [ EXPONENT ]
447   -- EXPONENT ::= E [ + ] INTEGER | E - INTEGER
448   --
449   -- LRM93 13.4.2
450   -- BASED_LITERAL ::= BASE # BASED_INTEGER [ . BASED_INTEGER ] # EXPONENT
451   -- BASE ::= INTEGER
452   procedure Scan_Literal is separate;
453
454   --  Scan a string literal.
455   --
456   --  LRM93 13.6 / LRM08 15.7
457   --  A string literal is formed by a sequence of graphic characters
458   --  (possibly none) enclosed between two quotation marks used as string
459   --  brackets.
460   --  STRING_LITERAL ::= " { GRAPHIC_CHARACTER } "
461   --
462   --  IN: for a string, at the call of this procedure, the current character
463   --  must be either '"' or '%'.
464   procedure Scan_String
465   is
466      -- The quotation character (can be " or %).
467      Mark: Character;
468      -- Current character.
469      C : Character;
470      --  Current length.
471      Length : Nat32;
472   begin
473      --  String delimiter.
474      Mark := Source (Pos);
475      pragma Assert (Mark = '"' or else Mark = '%');
476
477      Pos := Pos + 1;
478      Length := 0;
479      Current_Context.Str_Id := Str_Table.Create_String8;
480      loop
481         C := Source (Pos);
482         if C = Mark then
483            --  LRM93 13.6
484            --  If a quotation mark value is to be represented in the sequence
485            --  of character values, then a pair of adjacent quoatation
486            --  characters marks must be written at the corresponding place
487            --  within the string literal.
488            --  LRM93 13.10
489            --  Any pourcent sign within the sequence of characters must then
490            --  be doubled, and each such doubled percent sign is interpreted
491            --  as a single percent sign value.
492            --  The same replacement is allowed for a bit string literal,
493            --  provieded that both bit string brackets are replaced.
494            Pos := Pos + 1;
495            exit when Source (Pos) /= Mark;
496         end if;
497
498         case Characters_Kind (C) is
499            when Format_Effector =>
500               if Mark = '%' then
501                  --  No matching '%' has been found.  Consider '%' was used
502                  --  as the remainder operator, instead of 'rem'.  This will
503                  --  improve the error message.
504                  Error_Msg_Scan
505                    (+Get_Token_Location,
506                     "'%%' is not a vhdl operator, use 'rem'");
507                  Current_Token := Tok_Rem;
508                  Pos := Current_Context.Token_Pos + 1;
509                  return;
510               end if;
511               if C = CR or C = LF then
512                  Error_Msg_Scan
513                    ("string cannot be multi-line, use concatenation");
514               else
515                  Error_Msg_Scan ("format effector not allowed in a string");
516               end if;
517               exit;
518            when Invalid =>
519               if C = Files_Map.EOT
520                 and then Pos >= Current_Context.File_Len
521               then
522                  Error_Msg_Scan ("string not terminated at end of file");
523                  exit;
524               end if;
525
526               Error_Msg_Scan
527                 ("invalid character not allowed, even in a string");
528            when Graphic_Character =>
529               if Vhdl_Std = Vhdl_87 and then C > Character'Val (127) then
530                  Error_8bit;
531               end if;
532         end case;
533
534         if C = '"' and Mark = '%' then
535            --  LRM93 13.10
536            --  The quotation marks (") used as string brackets at both ends of
537            --  a string literal can be replaced by percent signs (%), provided
538            --  that the enclosed sequence of characters constains no quotation
539            --  marks, and provided that both string brackets are replaced.
540            Error_Msg_Scan
541              ("'""' cannot be used in a string delimited with '%%'");
542         end if;
543
544         Length := Length + 1;
545         Str_Table.Append_String8 (Character'Pos (C));
546         Pos := Pos + 1;
547      end loop;
548
549      Current_Token := Tok_String;
550      Current_Context.Str_Len := Length;
551   end Scan_String;
552
553   --  Scan a bit string literal.
554   --
555   --  LRM93 13.7
556   --  A bit string literal is formed by a sequence of extended digits
557   --  (possibly none) enclosed between two quotations used as bit string
558   --  brackets, preceded by a base specifier.
559   --  BIT_STRING_LITERAL ::= BASE_SPECIFIER " [ BIT_VALUE ] "
560   --  BIT_VALUE ::= EXTENDED_DIGIT { [ UNDERLINE ] EXTENDED_DIGIT }
561   --
562   --  The current character must be a base specifier, followed by '"' or '%'.
563   --  The base must be valid.
564   procedure Scan_Bit_String (Base_Log : Nat32)
565   is
566      --  Position of character '0'.
567      Pos_0 : constant Nat8 := Character'Pos ('0');
568
569      --  Used for the base.
570      subtype Nat4 is Natural range 1 .. 4;
571      Base : constant Nat32 := 2 ** Nat4 (Base_Log);
572
573      -- The quotation character (can be " or %).
574      Orig_Pos : constant Source_Ptr := Pos;
575      Mark     : constant Character := Source (Orig_Pos);
576      -- Current character.
577      C : Character;
578      --  Current length.
579      Length : Nat32;
580      --  Digit value.
581      V, D : Nat8;
582      --  True if invalid character already found, to avoid duplicate message.
583      Has_Invalid : Boolean;
584   begin
585      pragma Assert (Mark = '"' or else Mark = '%');
586      Pos := Pos + 1;
587      Length := 0;
588      Has_Invalid := False;
589      Current_Context.Str_Id := Str_Table.Create_String8;
590      loop
591         << Again >> null;
592         C := Source (Pos);
593         Pos := Pos + 1;
594         exit when C = Mark;
595
596         -- LRM93 13.7
597         -- If the base specifier is 'B', the extended digits in the bit
598         -- value are restricted to 0 and 1.
599         -- If the base specifier is 'O', the extended digits int the bit
600         -- value are restricted to legal digits in the octal number
601         -- system, ie, the digits 0 through 7.
602         -- If the base specifier is 'X', the extended digits are all digits
603         -- together with the letters A through F.
604         case C is
605            when '0' .. '9' =>
606               V := Character'Pos (C) - Character'Pos ('0');
607            when 'A' .. 'F' =>
608               V := Character'Pos (C) - Character'Pos ('A') + 10;
609            when 'a' .. 'f' =>
610               --  LRM93 13.7
611               --  A letter in a bit string literal (...) can be written either
612               --  in lowercase or in upper case, with the same meaning.
613               V := Character'Pos (C) - Character'Pos ('a') + 10;
614            when '_' =>
615               if Source (Pos) = '_' then
616                  Error_Msg_Scan
617                    ("double underscore not allowed in a bit string");
618               end if;
619               if Source (Pos - 2) = Mark then
620                  Error_Msg_Scan
621                    ("underscore not allowed at the start of a bit string");
622               elsif Source (Pos) = Mark then
623                  Error_Msg_Scan
624                    ("underscore not allowed at the end of a bit string");
625               end if;
626               goto Again;
627            when '"' =>
628               pragma Assert (Mark = '%');
629               Error_Msg_Scan
630                 ("'""' cannot close a bit string opened by '%%'");
631               exit;
632            when '%' =>
633               pragma Assert (Mark = '"');
634               Error_Msg_Scan
635                 ("'%%' cannot close a bit string opened by '""'");
636               exit;
637            when others =>
638               if Characters_Kind (C) in Graphic_Character then
639                  if Vhdl_Std >= Vhdl_08 then
640                     V := Nat8'Last;
641                  else
642                     if not Has_Invalid then
643                        Error_Msg_Scan ("invalid character in bit string");
644                        Has_Invalid := True;
645                     end if;
646                     --  Continue the bit string
647                     V := 0;
648                  end if;
649               else
650                  if Mark = '%' then
651                     Error_Msg_Scan
652                       (+File_Pos_To_Location
653                          (Current_Context.Source_File, Orig_Pos),
654                        "'%%' is not a vhdl operator, use 'rem'");
655                     Current_Token := Tok_Rem;
656                     Pos := Orig_Pos + 1;
657                     return;
658                  else
659                     Error_Msg_Scan ("bit string not terminated");
660                     Pos := Pos - 1;
661                  end if;
662                  exit;
663               end if;
664         end case;
665
666         --  Expand bit value.
667         if Vhdl_Std >= Vhdl_08 and V > Base then
668            --  Expand as graphic character.
669            for I in 1 .. Base_Log loop
670               Str_Table.Append_String8_Char (C);
671            end loop;
672         else
673            --  Expand as extended digits.
674            case Base_Log is
675               when 1 =>
676                  if V > 1 then
677                     Error_Msg_Scan
678                       ("invalid character in a binary bit string");
679                     V := 1;
680                  end if;
681                  Str_Table.Append_String8 (Pos_0 + V);
682               when 3 =>
683                  if V > 7 then
684                     Error_Msg_Scan
685                       ("invalid character in a octal bit string");
686                     V := 7;
687                  end if;
688                  for I in 1 .. 3 loop
689                     D := V / 4;
690                     Str_Table.Append_String8 (Pos_0 + D);
691                     V := (V - 4 * D) * 2;
692                  end loop;
693               when 4 =>
694                  for I in 1 .. 4 loop
695                     D := V / 8;
696                     Str_Table.Append_String8 (Pos_0 + D);
697                     V := (V - 8 * D) * 2;
698                  end loop;
699               when others =>
700                  raise Internal_Error;
701            end case;
702         end if;
703
704         Length := Length + Base_Log;
705      end loop;
706
707      --  Note: the length of the bit string may be 0.
708
709      Current_Token := Tok_Bit_String;
710      Current_Context.Str_Len := Length;
711   end Scan_Bit_String;
712
713   --  Scan a decimal bit string literal.  For base specifier D the algorithm
714   --  is rather different: all the graphic characters shall be digits, and we
715   --  need to use a (not very efficient) arbitrary precision multiplication.
716   procedure Scan_Dec_Bit_String
717   is
718      use Str_Table;
719
720      Id : String8_Id;
721
722      --  Position of character '0'.
723      Pos_0 : constant Nat8 := Character'Pos ('0');
724
725      -- Current character.
726      C : Character;
727      --  Current length.
728      Length : Nat32;
729      --  Digit value.
730      V, D : Nat8;
731
732      type Carries_Type is array (0 .. 3) of Nat8;
733      Carries : Carries_Type;
734      No_Carries : constant Carries_Type := (others => Pos_0);
735
736      --  Shift right carries.  Note the Carries (0) is the LSB.
737      procedure Shr_Carries is
738      begin
739         Carries := (Carries (1), Carries (2), Carries (3), Pos_0);
740      end Shr_Carries;
741
742      procedure Append_Carries is
743      begin
744         --  Expand the bit string.  Note that position 1 of the string8 is
745         --  the MSB.
746         while Carries /= No_Carries loop
747            Append_String8 (Pos_0);
748            Length := Length + 1;
749            for I in reverse 2 .. Length loop
750               Set_Element_String8 (Id, I, Element_String8 (Id, I - 1));
751            end loop;
752            Set_Element_String8 (Id, 1, Carries (0));
753            Shr_Carries;
754         end loop;
755      end Append_Carries;
756
757      --  Add 1 to Carries.  Overflow is not allowed and should be prevented by
758      --  construction.
759      procedure Add_One_To_Carries is
760      begin
761         for I in Carries'Range loop
762            if Carries (I) = Pos_0 then
763               Carries (I) := Pos_0 + 1;
764               --  End of propagation.
765               exit;
766            else
767               Carries (I) := Pos_0;
768               --  Continue propagation.
769               pragma Assert (I < Carries'Last);
770            end if;
771         end loop;
772      end Add_One_To_Carries;
773   begin
774      pragma Assert (Source (Pos) = '"');
775      Pos := Pos + 1;
776      Length := 0;
777      Id := Create_String8;
778      Current_Context.Str_Id := Id;
779      loop
780         << Again >> null;
781         C := Source (Pos);
782         Pos := Pos + 1;
783         exit when C = '"';
784
785         if C in '0' .. '9' then
786            V := Character'Pos (C) - Character'Pos ('0');
787         elsif C = '_' then
788            if Source (Pos) = '_' then
789               Error_Msg_Scan
790                 ("double underscore not allowed in a bit string");
791            end if;
792            if Source (Pos - 2) = '"' then
793               Error_Msg_Scan
794                 ("underscore not allowed at the start of a bit string");
795            elsif Source (Pos) = '"' then
796               Error_Msg_Scan
797                 ("underscore not allowed at the end of a bit string");
798            end if;
799            goto Again;
800         else
801            if Characters_Kind (C) in Graphic_Character then
802               Error_Msg_Scan
803                 ("graphic character not allowed in decimal bit string");
804               --  Continue the bit string
805               V := 0;
806            else
807               Error_Msg_Scan ("bit string not terminated");
808               Pos := Pos - 1;
809               exit;
810            end if;
811         end if;
812
813         --  Multiply by 10.
814         Carries := (others => Pos_0);
815         for I in reverse 1 .. Length loop
816            --  Shift by 1 (*2).
817            D := Element_String8 (Id, I);
818            Set_Element_String8 (Id, I, Carries (0));
819            Shr_Carries;
820            --  Add D and D * 4.
821            if D /= Pos_0 then
822               Add_One_To_Carries;
823               --  Add_Four_To_Carries:
824               for I in 2 .. 3 loop
825                  if Carries (I) = Pos_0 then
826                     Carries (I) := Pos_0 + 1;
827                     --  End of propagation.
828                     exit;
829                  else
830                     Carries (I) := Pos_0;
831                     --  Continue propagation.
832                  end if;
833               end loop;
834            end if;
835         end loop;
836         Append_Carries;
837
838         --  Add V.
839         for I in Carries'Range loop
840            D := V / 2;
841            Carries (I) := Pos_0 + (V - 2 * D);
842            V := D;
843         end loop;
844         for I in reverse 1 .. Length loop
845            D := Element_String8 (Id, I);
846            if D /= Pos_0 then
847               Add_One_To_Carries;
848            end if;
849            Set_Element_String8 (Id, I, Carries (0));
850            Shr_Carries;
851            exit when Carries = No_Carries;
852         end loop;
853         Append_Carries;
854      end loop;
855
856      Current_Token := Tok_Bit_String;
857      Current_Context.Str_Len := Length;
858   end Scan_Dec_Bit_String;
859
860   --  LRM08 15.2 Character set
861   --  For each uppercase letter, there is a corresponding lowercase letter;
862   --  and for each lowercase letter except [y diaeresis] and [german sharp s],
863   --  there is a corresponding uppercase letter.
864   type Character_Map is array (Character) of Character;
865   To_Lower_Map : constant Character_Map :=
866     (
867      --  Uppercase ASCII letters.
868      'A' => 'a',
869      'B' => 'b',
870      'C' => 'c',
871      'D' => 'd',
872      'E' => 'e',
873      'F' => 'f',
874      'G' => 'g',
875      'H' => 'h',
876      'I' => 'i',
877      'J' => 'j',
878      'K' => 'k',
879      'L' => 'l',
880      'M' => 'm',
881      'N' => 'n',
882      'O' => 'o',
883      'P' => 'p',
884      'Q' => 'q',
885      'R' => 'r',
886      'S' => 's',
887      'T' => 't',
888      'U' => 'u',
889      'V' => 'v',
890      'W' => 'w',
891      'X' => 'x',
892      'Y' => 'y',
893      'Z' => 'z',
894
895      --  Lowercase ASCII letters.
896      'a' => 'a',
897      'b' => 'b',
898      'c' => 'c',
899      'd' => 'd',
900      'e' => 'e',
901      'f' => 'f',
902      'g' => 'g',
903      'h' => 'h',
904      'i' => 'i',
905      'j' => 'j',
906      'k' => 'k',
907      'l' => 'l',
908      'm' => 'm',
909      'n' => 'n',
910      'o' => 'o',
911      'p' => 'p',
912      'q' => 'q',
913      'r' => 'r',
914      's' => 's',
915      't' => 't',
916      'u' => 'u',
917      'v' => 'v',
918      'w' => 'w',
919      'x' => 'x',
920      'y' => 'y',
921      'z' => 'z',
922
923      --  Uppercase Latin-1 letters.
924      UC_A_Grave          => LC_A_Grave,
925      UC_A_Acute          => LC_A_Acute,
926      UC_A_Circumflex     => LC_A_Circumflex,
927      UC_A_Tilde          => LC_A_Tilde,
928      UC_A_Diaeresis      => LC_A_Diaeresis,
929      UC_A_Ring           => LC_A_Ring,
930      UC_AE_Diphthong     => LC_AE_Diphthong,
931      UC_C_Cedilla        => LC_C_Cedilla,
932      UC_E_Grave          => LC_E_Grave,
933      UC_E_Acute          => LC_E_Acute,
934      UC_E_Circumflex     => LC_E_Circumflex,
935      UC_E_Diaeresis      => LC_E_Diaeresis,
936      UC_I_Grave          => LC_I_Grave,
937      UC_I_Acute          => LC_I_Acute,
938      UC_I_Circumflex     => LC_I_Circumflex,
939      UC_I_Diaeresis      => LC_I_Diaeresis,
940      UC_Icelandic_Eth    => LC_Icelandic_Eth,
941      UC_N_Tilde          => LC_N_Tilde,
942      UC_O_Grave          => LC_O_Grave,
943      UC_O_Acute          => LC_O_Acute,
944      UC_O_Circumflex     => LC_O_Circumflex,
945      UC_O_Tilde          => LC_O_Tilde,
946      UC_O_Diaeresis      => LC_O_Diaeresis,
947      UC_O_Oblique_Stroke => LC_O_Oblique_Stroke,
948      UC_U_Grave          => LC_U_Grave,
949      UC_U_Acute          => LC_U_Acute,
950      UC_U_Circumflex     => LC_U_Circumflex,
951      UC_U_Diaeresis      => LC_U_Diaeresis,
952      UC_Y_Acute          => LC_Y_Acute,
953      UC_Icelandic_Thorn  => LC_Icelandic_Thorn,
954
955      --  Lowercase Latin-1 letters.
956      LC_A_Grave          => LC_A_Grave,
957      LC_A_Acute          => LC_A_Acute,
958      LC_A_Circumflex     => LC_A_Circumflex,
959      LC_A_Tilde          => LC_A_Tilde,
960      LC_A_Diaeresis      => LC_A_Diaeresis,
961      LC_A_Ring           => LC_A_Ring,
962      LC_AE_Diphthong     => LC_AE_Diphthong,
963      LC_C_Cedilla        => LC_C_Cedilla,
964      LC_E_Grave          => LC_E_Grave,
965      LC_E_Acute          => LC_E_Acute,
966      LC_E_Circumflex     => LC_E_Circumflex,
967      LC_E_Diaeresis      => LC_E_Diaeresis,
968      LC_I_Grave          => LC_I_Grave,
969      LC_I_Acute          => LC_I_Acute,
970      LC_I_Circumflex     => LC_I_Circumflex,
971      LC_I_Diaeresis      => LC_I_Diaeresis,
972      LC_Icelandic_Eth    => LC_Icelandic_Eth,
973      LC_N_Tilde          => LC_N_Tilde,
974      LC_O_Grave          => LC_O_Grave,
975      LC_O_Acute          => LC_O_Acute,
976      LC_O_Circumflex     => LC_O_Circumflex,
977      LC_O_Tilde          => LC_O_Tilde,
978      LC_O_Diaeresis      => LC_O_Diaeresis,
979      LC_O_Oblique_Stroke => LC_O_Oblique_Stroke,
980      LC_U_Grave          => LC_U_Grave,
981      LC_U_Acute          => LC_U_Acute,
982      LC_U_Circumflex     => LC_U_Circumflex,
983      LC_U_Diaeresis      => LC_U_Diaeresis,
984      LC_Y_Acute          => LC_Y_Acute,
985      LC_Icelandic_Thorn  => LC_Icelandic_Thorn,
986
987      --  Lowercase latin-1 characters without corresponding uppercase one.
988      LC_Y_Diaeresis      => LC_Y_Diaeresis,
989      LC_German_Sharp_S   => LC_German_Sharp_S,
990
991      --  Not a letter.
992      others => NUL);
993
994   procedure Error_Too_Long is
995   begin
996      Error_Msg_Scan ("identifier is too long (>"
997                        & Natural'Image (Max_Name_Length - 1) & ")");
998   end Error_Too_Long;
999
1000   -- LRM93 13.3.1
1001   -- Basic Identifiers
1002   -- A basic identifier consists only of letters, digits, and underlines.
1003   -- BASIC_IDENTIFIER ::= LETTER { [ UNDERLINE ] LETTER_OR_DIGIT }
1004   -- LETTER_OR_DIGIT ::= LETTER | DIGIT
1005   -- LETTER ::= UPPER_CASE_LETTER | LOWER_CASE_LETTER
1006   --
1007   -- NB: At the call of this procedure, the current character must be a legal
1008   -- character for a basic identifier.
1009   procedure Scan_Identifier (Allow_PSL : Boolean)
1010   is
1011      use Name_Table;
1012      --  Local copy for speed-up.
1013      Source : constant File_Buffer_Acc := Current_Context.Source;
1014      P : Source_Ptr;
1015
1016      --  Current and next character.
1017      C : Character;
1018
1019      Buffer : String (1 .. Max_Name_Length);
1020      Len : Natural;
1021   begin
1022      -- This is an identifier or a key word.
1023      Len := 0;
1024      P := Pos;
1025
1026      loop
1027         --  Source (pos) is correct.
1028         --  LRM93 13.3.1
1029         --   All characters if a basic identifier are signifiant, including
1030         --   any underline character inserted between a letter or digit and
1031         --   an adjacent letter or digit.
1032         --   Basic identifiers differing only in the use of the corresponding
1033         --   upper and lower case letters are considered as the same.
1034         --
1035         --  GHDL: This is achieved by converting all upper case letters into
1036         --  equivalent lower case letters.
1037         --  The opposite (converting to upper lower case letters) is not
1038         --  possible because two characters have no upper-case equivalent.
1039         C := Source (P);
1040         case C is
1041            when 'A' .. 'Z' =>
1042               C := Character'Val
1043                 (Character'Pos (C)
1044                    + Character'Pos ('a') - Character'Pos ('A'));
1045            when 'a' .. 'z' | '0' .. '9' =>
1046               null;
1047            when '_' =>
1048               if Source (P + 1) = '_' then
1049                  Error_Msg_Scan ("two underscores can't be consecutive");
1050               end if;
1051            when ' ' | ')' | '.' | ';' | ':' =>
1052               exit;
1053            when others =>
1054               --  Non common case.
1055               case Characters_Kind (C) is
1056                  when Upper_Case_Letter | Lower_Case_Letter =>
1057                     if Vhdl_Std = Vhdl_87 then
1058                        Error_8bit;
1059                     end if;
1060                     C := To_Lower_Map (C);
1061                     pragma Assert (C /= NUL);
1062                  when Digit =>
1063                     raise Internal_Error;
1064                  when others =>
1065                     exit;
1066               end case;
1067         end case;
1068
1069         --  Put character in name buffer.  FIXME: compute the hash at the same
1070         --  time ?
1071         if Len >= Max_Name_Length - 1 then
1072            if Len = Max_Name_Length -1 then
1073               Error_Msg_Scan ("identifier is too long (>"
1074                                 & Natural'Image (Max_Name_Length - 1) & ")");
1075               --  Accept this last one character, so that no error for the
1076               --  following characters.
1077               Len := Len + 1;
1078               Buffer (Len) := C;
1079            end if;
1080         else
1081            Len := Len + 1;
1082            Buffer (Len) := C;
1083         end if;
1084
1085         --  Next character.
1086         P := P + 1;
1087      end loop;
1088
1089      if Source (P - 1) = '_' then
1090         if Allow_PSL then
1091            --  Some PSL reserved words finish with '_'.
1092            P := P - 1;
1093            Len := Len - 1;
1094            C := '_';
1095         else
1096            --  Eat the trailing underscore.
1097            Error_Msg_Scan ("an identifier cannot finish with '_'");
1098         end if;
1099      end if;
1100
1101      --  Update position in the scan context.
1102      Pos := P;
1103
1104      -- LRM93 13.2
1105      -- At least one separator is required between an identifier or an
1106      -- abstract literal and an adjacent identifier or abstract literal.
1107      case Characters_Kind (C) is
1108         when Digit
1109           | Upper_Case_Letter
1110           | Lower_Case_Letter =>
1111            raise Internal_Error;
1112         when Other_Special_Character | Special_Character =>
1113            if (C = '"' or C = '%') and then Len <= 2 then
1114               if C = '%' and Vhdl_Std >= Vhdl_08 then
1115                  Error_Msg_Scan ("'%%' not allowed in vhdl 2008 "
1116                                    & "(was replacement character)");
1117                  --  Continue as a bit string.
1118               end if;
1119
1120               --  Good candidate for bit string.
1121
1122               --  LRM93 13.7
1123               --  BASE_SPECIFIER ::= B | O | X
1124               --
1125               --  A letter in a bit string literal (either an extended digit
1126               --  or the base specifier) can be written either in lower case
1127               --  or in upper case, with the same meaning.
1128               --
1129               --  LRM08 15.8 Bit string literals
1130               --  BASE_SPECICIER ::=
1131               --     B | O | X | UB | UO | UX | SB | SO | SX | D
1132               --
1133               --  An extended digit and the base specifier in a bit string
1134               --  literal can be written either in lowercase or in uppercase,
1135               --  with the same meaning.
1136               declare
1137                  Base : Nat32;
1138                  Cl : constant Character := Buffer (Len);
1139                  Cf : constant Character := Buffer (1);
1140               begin
1141                  Current_Context.Bit_Str_Base := Cl;
1142                  if Cl = 'b' then
1143                     Base := 1;
1144                  elsif Cl = 'o' then
1145                     Base := 3;
1146                  elsif Cl = 'x' then
1147                     Base := 4;
1148                  elsif Vhdl_Std >= Vhdl_08 and Len = 1 and Cf = 'd' then
1149                     Current_Context.Bit_Str_Sign := ' ';
1150                     Scan_Dec_Bit_String;
1151                     return;
1152                  else
1153                     Base := 0;
1154                  end if;
1155                  if Base > 0 then
1156                     if Len = 1 then
1157                        Current_Context.Bit_Str_Sign := ' ';
1158                        Scan_Bit_String (Base);
1159                        return;
1160                     elsif Vhdl_Std >= Vhdl_08
1161                       and then (Cf = 's' or Cf = 'u')
1162                     then
1163                        Current_Context.Bit_Str_Sign := Cf;
1164                        Scan_Bit_String (Base);
1165                        return;
1166                     end if;
1167                  end if;
1168               end;
1169            elsif Vhdl_Std > Vhdl_87 and then C = '\' then
1170               --  Start of extended identifier.  Cannot follow an identifier.
1171               Error_Separator;
1172            end if;
1173
1174         when Invalid =>
1175            --  Improve error message for use of UTF-8 quote marks.
1176            --  It's possible because in the sequence of UTF-8 bytes for the
1177            --  quote marks, there are invalid character (in the 128-160
1178            --  range).
1179            if C = Character'Val (16#80#)
1180              and then Buffer (Len) = Character'Val (16#e2#)
1181              and then (Source (Pos + 1) = Character'Val (16#98#)
1182                          or else Source (Pos + 1) = Character'Val (16#99#))
1183            then
1184               --  UTF-8 left or right single quote mark.
1185               if Len > 1 then
1186                  --  The first byte (0xe2) is part of the identifier.  An
1187                  --  error will be detected as the next byte (0x80) is
1188                  --  invalid.  Remove the first byte from the identifier, and
1189                  --  let's catch the error later.
1190                  Len := Len - 1;
1191                  Pos := Pos - 1;
1192               else
1193                  Error_Msg_Scan ("invalid use of UTF8 character for '");
1194                  Pos := Pos + 2;
1195
1196                  --  Distinguish between character literal and tick.  Don't
1197                  --  care about possible invalid character literal, as in any
1198                  --  case we have already emitted an error message.
1199                  if Current_Context.Prev_Token /= Tok_Identifier
1200                    and then Current_Context.Prev_Token /= Tok_Character
1201                    and then
1202                    (Source (Pos + 1) = '''
1203                       or else
1204                       (Source (Pos + 1) = Character'Val (16#e2#)
1205                          and then Source (Pos + 2) = Character'Val (16#80#)
1206                          and then Source (Pos + 3) = Character'Val (16#99#)))
1207                  then
1208                     Current_Token := Tok_Character;
1209                     Current_Context.Identifier :=
1210                       Name_Table.Get_Identifier (Source (Pos));
1211                     if Source (Pos + 1) = ''' then
1212                        Pos := Pos + 2;
1213                     else
1214                        Pos := Pos + 4;
1215                     end if;
1216                  else
1217                     Current_Token := Tok_Tick;
1218                  end if;
1219                  return;
1220               end if;
1221            end if;
1222         when Format_Effector
1223           | Space_Character =>
1224            null;
1225      end case;
1226
1227      -- Hash it.
1228      Current_Context.Identifier := Get_Identifier (Buffer (1 .. Len));
1229      Current_Token := Tok_Identifier;
1230   end Scan_Identifier;
1231
1232   procedure Scan_Psl_Keyword_Em (Tok : Token_Type; Tok_Em : Token_Type) is
1233   begin
1234      if Source (Pos) = '!' then
1235         Pos := Pos + 1;
1236         Current_Token := Tok_Em;
1237      else
1238         Current_Token := Tok;
1239      end if;
1240   end Scan_Psl_Keyword_Em;
1241   pragma Inline (Scan_Psl_Keyword_Em);
1242
1243   procedure Scan_Psl_Keyword_Em_Un
1244     (Tok, Tok_Em, Tok_Un, Tok_Em_Un : Token_Type) is
1245   begin
1246      if Source (Pos) = '!' then
1247         Pos := Pos + 1;
1248         if Source (Pos) = '_' then
1249            Pos := Pos + 1;
1250            Current_Token := Tok_Em_Un;
1251         else
1252            Current_Token := Tok_Em;
1253         end if;
1254      elsif Source (Pos) = '_' then
1255         Pos := Pos + 1;
1256         Current_Token := Tok_Un;
1257      else
1258         Current_Token := Tok;
1259      end if;
1260   end Scan_Psl_Keyword_Em_Un;
1261   pragma Inline (Scan_Psl_Keyword_Em_Un);
1262
1263   procedure Identifier_To_Token
1264   is
1265      use Std_Names;
1266   begin
1267      if Current_Identifier in Name_Id_Keywords then
1268         -- LRM93 13.9
1269         --   The identifiers listed below are called reserved words and are
1270         --   reserved for signifiances in the language.
1271         -- IN: this is also achieved in packages std_names and tokens.
1272         Current_Token := Token_Type'Val
1273           (Token_Type'Pos (Tok_First_Keyword)
1274              + Current_Identifier - Name_First_Keyword);
1275         case Current_Identifier is
1276            when Name_Id_AMS_Reserved_Words =>
1277               if not AMS_Vhdl then
1278                  if Is_Warning_Enabled (Warnid_Reserved_Word) then
1279                     Warning_Msg_Scan
1280                       (Warnid_Reserved_Word,
1281                        "using %i AMS-VHDL reserved word as an identifier",
1282                        +Current_Identifier);
1283                  end if;
1284                  Current_Token := Tok_Identifier;
1285               end if;
1286            when Name_Id_Vhdl08_Reserved_Words =>
1287               if Vhdl_Std < Vhdl_08 then
1288                  --  Some vhdl08 reserved words are PSL keywords.
1289                  if Flag_Psl then
1290                     case Current_Identifier is
1291                        when Name_Prev =>
1292                           Current_Token := Tok_Prev;
1293                        when Name_Stable =>
1294                           Current_Token := Tok_Stable;
1295                        when Name_Rose =>
1296                           Current_Token := Tok_Rose;
1297                        when Name_Fell =>
1298                           Current_Token := Tok_Fell;
1299                        when Name_Sequence =>
1300                           Current_Token := Tok_Sequence;
1301                        when Name_Property =>
1302                           Current_Token := Tok_Property;
1303                        when Name_Assume =>
1304                           Current_Token := Tok_Assume;
1305                        when Name_Cover =>
1306                           Current_Token := Tok_Cover;
1307                        when Name_Default =>
1308                           Current_Token := Tok_Default;
1309                        when Name_Restrict =>
1310                           Current_Token := Tok_Restrict;
1311                        when Name_Restrict_Guarantee =>
1312                           Current_Token := Tok_Restrict_Guarantee;
1313                        when Name_Vmode =>
1314                           Current_Token := Tok_Vmode;
1315                        when Name_Vprop =>
1316                           Current_Token := Tok_Vprop;
1317                        when Name_Vunit =>
1318                           Current_Token := Tok_Vunit;
1319                        when others =>
1320                           Current_Token := Tok_Identifier;
1321                     end case;
1322                  else
1323                     Current_Token := Tok_Identifier;
1324                  end if;
1325                  if Is_Warning_Enabled (Warnid_Reserved_Word)
1326                    and then Current_Token = Tok_Identifier
1327                  then
1328                     Warning_Msg_Scan
1329                       (Warnid_Reserved_Word,
1330                        "using %i vhdl-2008 reserved word as an identifier",
1331                        +Current_Identifier);
1332                  end if;
1333               end if;
1334            when Name_Id_Vhdl00_Reserved_Words =>
1335               if Vhdl_Std < Vhdl_00 then
1336                  if Is_Warning_Enabled (Warnid_Reserved_Word) then
1337                     Warning_Msg_Scan
1338                       (Warnid_Reserved_Word,
1339                        "using %i vhdl-2000 reserved word as an identifier",
1340                        +Current_Identifier);
1341                  end if;
1342                  Current_Token := Tok_Identifier;
1343               end if;
1344            when Name_Id_Vhdl93_Reserved_Words =>
1345               if Vhdl_Std = Vhdl_87 then
1346                  if Is_Warning_Enabled (Warnid_Reserved_Word) then
1347                     Report_Start_Group;
1348                     Warning_Msg_Scan
1349                       (Warnid_Reserved_Word,
1350                        "using %i vhdl93 reserved word as a vhdl87 identifier",
1351                        +Current_Identifier);
1352                     Warning_Msg_Scan
1353                       (Warnid_Reserved_Word,
1354                        "(use option --std=93 to compile as vhdl93)");
1355                     Report_End_Group;
1356                  end if;
1357                  Current_Token := Tok_Identifier;
1358               end if;
1359            when Name_Id_Vhdl87_Reserved_Words =>
1360               if Flag_Psl then
1361                  if Current_Token = Tok_Until then
1362                     Scan_Psl_Keyword_Em_Un (Tok_Until, Tok_Until_Em,
1363                                             Tok_Until_Un, Tok_Until_Em_Un);
1364                  elsif Current_Token = Tok_Next then
1365                     Scan_Psl_Keyword_Em (Tok_Next, Tok_Next_Em);
1366                  end if;
1367               end if;
1368            when others =>
1369               raise Program_Error;
1370         end case;
1371      elsif Flag_Psl then
1372         case Current_Identifier is
1373            when Name_Prev =>
1374               Current_Token := Tok_Prev;
1375            when Name_Stable =>
1376               Current_Token := Tok_Stable;
1377            when Name_Rose =>
1378               Current_Token := Tok_Rose;
1379            when Name_Fell =>
1380               Current_Token := Tok_Fell;
1381            when Name_Clock =>
1382               Current_Token := Tok_Psl_Clock;
1383            when Name_Const =>
1384               Current_Token := Tok_Psl_Const;
1385            when Name_Boolean =>
1386               Current_Token := Tok_Psl_Boolean;
1387            when Name_Sequence =>
1388               Current_Token := Tok_Sequence;
1389            when Name_Property =>
1390               Current_Token := Tok_Property;
1391            when Name_Endpoint =>
1392               Current_Token := Tok_Psl_Endpoint;
1393            when Name_Assume =>
1394               Current_Token := Tok_Assume;
1395            when Name_Cover =>
1396               Current_Token := Tok_Cover;
1397            when Name_Default =>
1398               Current_Token := Tok_Default;
1399            when Name_Restrict =>
1400               Current_Token := Tok_Restrict;
1401            when Name_Restrict_Guarantee =>
1402               Current_Token := Tok_Restrict_Guarantee;
1403            when Name_Inf =>
1404               Current_Token := Tok_Inf;
1405            when Name_Within =>
1406               Current_Token := Tok_Within;
1407            when Name_Abort =>
1408               Current_Token := Tok_Abort;
1409            when Name_Before =>
1410               Scan_Psl_Keyword_Em_Un (Tok_Before, Tok_Before_Em,
1411                                       Tok_Before_Un, Tok_Before_Em_Un);
1412            when Name_Always =>
1413               Current_Token := Tok_Always;
1414            when Name_Never =>
1415               Current_Token := Tok_Never;
1416            when Name_Eventually =>
1417               if Source (Pos) = '!' then
1418                  Pos := Pos + 1;
1419               else
1420                  Error_Msg_Scan ("'!' expected after 'eventually'");
1421               end if;
1422               Current_Token := Tok_Eventually_Em;
1423            when Name_Next_A =>
1424               Scan_Psl_Keyword_Em (Tok_Next_A, Tok_Next_A_Em);
1425            when Name_Next_E =>
1426               Scan_Psl_Keyword_Em (Tok_Next_E, Tok_Next_E_Em);
1427            when Name_Next_Event =>
1428               Scan_Psl_Keyword_Em (Tok_Next_Event, Tok_Next_Event_Em);
1429            when Name_Next_Event_A =>
1430               Scan_Psl_Keyword_Em (Tok_Next_Event_A, Tok_Next_Event_A_Em);
1431            when Name_Next_Event_E =>
1432               Scan_Psl_Keyword_Em (Tok_Next_Event_E, Tok_Next_Event_E_Em);
1433            when Name_Until =>
1434               raise Internal_Error;
1435            when others =>
1436               Current_Token := Tok_Identifier;
1437               if Source (Pos - 1) = '_' then
1438                  Error_Msg_Scan ("identifiers cannot finish with '_'");
1439               end if;
1440         end case;
1441      end if;
1442   end Identifier_To_Token;
1443
1444   --  LRM93 13.3.2
1445   --  EXTENDED_IDENTIFIER ::= \ GRAPHIC_CHARACTER { GRAPHIC_CHARACTER } \
1446   --
1447   --  Create an (extended) indentifier.
1448   --  Extended identifiers are stored as they appear (leading and tailing
1449   --  backslashes, doubling backslashes inside).
1450   procedure Scan_Extended_Identifier
1451   is
1452      use Name_Table;
1453      Buffer : String (1 .. Max_Name_Length);
1454      Len : Natural;
1455      C : Character;
1456   begin
1457      --  LRM93 13.3.2
1458      --  Moreover, every extended identifiers is distinct from any basic
1459      --  identifier.
1460      --  GHDL: This is satisfied by storing '\' in the name table.
1461      Len := 1;
1462      Buffer (1) := '\';
1463      loop
1464         --  Next character.
1465         Pos := Pos + 1;
1466         C := Source (Pos);
1467
1468         if C = '\' then
1469            --  LRM93 13.3.2
1470            --  If a backslash is to be used as one of the graphic characters
1471            --  of an extended literal, it must be doubled.
1472            --  LRM93 13.3.2
1473            --  (a doubled backslash couting as one character)
1474            if Len >= Max_Name_Length - 1 then
1475               if Len = Max_Name_Length - 1 then
1476                  Error_Too_Long;
1477                  --  Accept this last one.
1478                  Len := Len + 1;
1479                  Buffer (Len) := C;
1480               end if;
1481            else
1482               Len := Len + 1;
1483               Buffer (Len) := C;
1484            end if;
1485
1486            Pos := Pos + 1;
1487            C := Source (Pos);
1488
1489            exit when C /= '\';
1490         end if;
1491
1492         case Characters_Kind (C) is
1493            when Format_Effector =>
1494               Error_Msg_Scan ("format effector in extended identifier");
1495               exit;
1496            when Graphic_Character =>
1497               null;
1498            when Invalid =>
1499               if C = Files_Map.EOT
1500                 and then Pos >= Current_Context.File_Len
1501               then
1502                  Error_Msg_Scan
1503                    ("extended identifier not terminated at end of file");
1504               elsif C = LF or C = CR then
1505                  Error_Msg_Scan
1506                    ("extended identifier not terminated at end of line");
1507               else
1508                  Error_Msg_Scan ("invalid character in extended identifier");
1509               end if;
1510               exit;
1511         end case;
1512
1513         --  LRM93 13.3.2
1514         --  Extended identifiers differing only in the use of corresponding
1515         --  upper and lower case letters are distinct.
1516         if Len >= Max_Name_Length - 1 then
1517            if Len = Max_Name_Length - 1 then
1518               Error_Too_Long;
1519               --  Accept this last one.
1520               Len := Len + 1;
1521               Buffer (Len) := C;
1522            end if;
1523         else
1524            Len := Len + 1;
1525            Buffer (Len) := C;
1526         end if;
1527      end loop;
1528
1529      if Len <= 2 then
1530         Error_Msg_Scan ("empty extended identifier is not allowed");
1531      end if;
1532
1533      --  LRM93 13.2
1534      --  At least one separator is required between an identifier or an
1535      --  abstract literal and an adjacent identifier or abstract literal.
1536      case Characters_Kind (C) is
1537         when Digit
1538           | Upper_Case_Letter
1539           | Lower_Case_Letter =>
1540            Error_Separator;
1541         when Invalid
1542           | Format_Effector
1543           | Space_Character
1544           | Special_Character
1545           | Other_Special_Character =>
1546            null;
1547      end case;
1548
1549      -- Hash it.
1550      Current_Context.Identifier := Get_Identifier (Buffer (1 .. Len));
1551      Current_Token := Tok_Identifier;
1552   end Scan_Extended_Identifier;
1553
1554   procedure Convert_Identifier (Str : in out String; Err : out Boolean)
1555   is
1556      F : constant Integer := Str'First;
1557
1558      procedure Error_Bad is
1559      begin
1560         Error_Msg_Option ("bad character in identifier");
1561      end Error_Bad;
1562
1563      procedure Error_8bit is
1564      begin
1565         Error_Msg_Option ("8 bits characters not allowed in vhdl87");
1566      end Error_8bit;
1567
1568      C : Character;
1569   begin
1570      Err := True;
1571
1572      if Str'Length = 0 then
1573         Error_Msg_Option ("identifier required");
1574         return;
1575      end if;
1576
1577      if Str (F) = '\' then
1578         --  Extended identifier.
1579         if Vhdl_Std = Vhdl_87 then
1580            Error_Msg_Option ("extended identifiers not allowed in vhdl87");
1581            return;
1582         end if;
1583
1584         if Str'Last < F + 2 then
1585            Error_Msg_Option ("extended identifier is too short");
1586            return;
1587         end if;
1588         if Str (Str'Last) /= '\' then
1589            Error_Msg_Option ("extended identifier must finish with a '\'");
1590            return;
1591         end if;
1592         for I in F + 1 .. Str'Last - 1 loop
1593            C := Str (I);
1594            case Characters_Kind (C) is
1595               when Format_Effector =>
1596                  Error_Msg_Option ("format effector in extended identifier");
1597                  return;
1598               when Graphic_Character =>
1599                  if C = '\' then
1600                     if Str (I + 1) /= '\'
1601                       or else I = Str'Last - 1
1602                     then
1603                        Error_Msg_Option ("anti-slash must be doubled "
1604                                            & "in extended identifier");
1605                        return;
1606                     end if;
1607                  end if;
1608               when Invalid =>
1609                  Error_Bad;
1610                  return;
1611            end case;
1612         end loop;
1613      else
1614         --  Identifier
1615         for I in F .. Str'Last loop
1616            C := Str (I);
1617            case Characters_Kind (C) is
1618               when Upper_Case_Letter =>
1619                  if Vhdl_Std = Vhdl_87 and C > 'Z' then
1620                     Error_8bit;
1621                     return;
1622                  end if;
1623                  Str (I) := To_Lower_Map (C);
1624               when Lower_Case_Letter | Digit =>
1625                  if Vhdl_Std = Vhdl_87 and C > 'z' then
1626                     Error_8bit;
1627                     return;
1628                  end if;
1629               when Special_Character =>
1630                  -- The current character is legal in an identifier.
1631                  if C = '_' then
1632                     if I = 1 then
1633                        Error_Msg_Option
1634                          ("an identifier cannot start with an underscore");
1635                        return;
1636                     end if;
1637                     if Str (I - 1) = '_' then
1638                        Error_Msg_Option
1639                          ("two underscores can't be consecutive");
1640                        return;
1641                     end if;
1642                     if I = Str'Last then
1643                        Error_Msg_Option
1644                          ("an identifier cannot finish with an underscore");
1645                        return;
1646                     end if;
1647                  else
1648                     Error_Bad;
1649                     return;
1650                  end if;
1651               when others =>
1652                  Error_Bad;
1653                  return;
1654            end case;
1655         end loop;
1656      end if;
1657      Err := False;
1658   end Convert_Identifier;
1659
1660   --  Internal scanner function: return True if C must be considered as a line
1661   --  terminator.  This also includes EOT (which terminates the file or is
1662   --  invalid).
1663   function Is_EOL (C : Character) return Boolean is
1664   begin
1665      case C is
1666         when CR | LF | VT | FF | Files_Map.EOT =>
1667            return True;
1668         when others =>
1669            return False;
1670      end case;
1671   end Is_EOL;
1672
1673   --  Advance scanner till the first non-space character.
1674   procedure Skip_Spaces is
1675   begin
1676      while Source (Pos) = ' ' or Source (Pos) = HT loop
1677         Pos := Pos + 1;
1678      end loop;
1679   end Skip_Spaces;
1680
1681   --  Eat all characters until end-of-line (not included).
1682   procedure Skip_Until_EOL is
1683   begin
1684      while not Is_EOL (Source (Pos)) loop
1685         --  Don't warn about invalid character, it's somewhat out of the
1686         --  scope.
1687         Pos := Pos + 1;
1688      end loop;
1689   end Skip_Until_EOL;
1690
1691   --  Scan an identifier within a comment.  Only lower case letters are
1692   --  allowed.
1693   procedure Scan_Comment_Identifier (Id : out Name_Id; Create : Boolean)
1694   is
1695      use Name_Table;
1696      Buffer : String (1 .. Max_Name_Length);
1697      Len : Natural;
1698      C : Character;
1699   begin
1700      Id := Null_Identifier;
1701      Skip_Spaces;
1702
1703      --  The identifier shall start with a letter.
1704      case Source (Pos) is
1705         when 'a' .. 'z'
1706            | 'A' .. 'Z' =>
1707            null;
1708         when others =>
1709            return;
1710      end case;
1711
1712      --  Scan the identifier.
1713      Len := 0;
1714      loop
1715         C := Source (Pos);
1716         case C is
1717            when 'a' .. 'z' =>
1718               null;
1719            when 'A' .. 'Z' =>
1720               C := Character'Val (Character'Pos (C) + 32);
1721            when '_' =>
1722               null;
1723            when others =>
1724               exit;
1725         end case;
1726         Len := Len + 1;
1727         Buffer (Len) := C;
1728         Pos := Pos + 1;
1729      end loop;
1730
1731      --  Shall be followed by a space or a new line.
1732      if not (C = ' ' or else C = HT or else Is_EOL (C)) then
1733         return;
1734      end if;
1735
1736      if Create then
1737         Id := Get_Identifier (Buffer (1 .. Len));
1738      else
1739         Id := Get_Identifier_No_Create (Buffer (1 .. Len));
1740      end if;
1741   end Scan_Comment_Identifier;
1742
1743   package Directive_Protect is
1744      --  Called to scan a protect tool directive.
1745      procedure Scan_Protect_Directive;
1746   end Directive_Protect;
1747
1748   --  Body is put in a separate file to avoid pollution.
1749   package body Directive_Protect is separate;
1750
1751   --  Called to scan a tool directive.
1752   procedure Scan_Tool_Directive
1753   is
1754      procedure Error_Missing_Directive is
1755      begin
1756         Error_Msg_Scan ("tool directive required after '`'");
1757         Skip_Until_EOL;
1758      end Error_Missing_Directive;
1759
1760      C : Character;
1761   begin
1762      --  The current character is '`'.
1763      Pos := Pos + 1;
1764      Skip_Spaces;
1765
1766      --  Check and scan identifier.
1767      C := Source (Pos);
1768      if Characters_Kind (C) not in Letter then
1769         Error_Missing_Directive;
1770         return;
1771      end if;
1772
1773      Scan_Identifier (False);
1774
1775      if Current_Token /= Tok_Identifier then
1776         Error_Missing_Directive;
1777         return;
1778      end if;
1779
1780      Skip_Spaces;
1781
1782      --  Dispatch according to the identifier.
1783      if Current_Identifier = Std_Names.Name_Protect then
1784         Directive_Protect.Scan_Protect_Directive;
1785      else
1786         Error_Msg_Scan
1787           ("unknown tool directive %i ignored", +Current_Identifier);
1788         Skip_Until_EOL;
1789      end if;
1790   end Scan_Tool_Directive;
1791
1792   --  Skip until new_line after translate_on/translate_off.
1793   procedure Scan_Translate_On_Off (Id : Name_Id) is
1794   begin
1795      --  Expect new line.
1796      Skip_Spaces;
1797
1798      if not Is_EOL (Source (Pos)) then
1799         Warning_Msg_Scan (Warnid_Pragma, "garbage ignored after '%i'", +Id);
1800         loop
1801            Pos := Pos + 1;
1802            exit when Is_EOL (Source (Pos));
1803         end loop;
1804      end if;
1805   end Scan_Translate_On_Off;
1806
1807   procedure Scan_Translate_Off is
1808   begin
1809      if Current_Context.Translate_Off then
1810         Warning_Msg_Scan (Warnid_Pragma, "nested 'translate_off' ignored");
1811         return;
1812      end if;
1813
1814      --  'pragma translate_off' has just been scanned.
1815      Scan_Translate_On_Off (Std_Names.Name_Translate_Off);
1816
1817      Current_Context.Translate_Off := True;
1818
1819      --  Recursive scan until 'translate_on' is scanned.
1820      loop
1821         Scan;
1822         if not Current_Context.Translate_Off then
1823            --  That token is discarded.
1824            pragma Assert (Current_Token = Tok_Line_Comment);
1825            Flag_Comment := False;
1826            exit;
1827         elsif Current_Token = Tok_Eof then
1828            Warning_Msg_Scan (Warnid_Pragma,
1829                              "unterminated 'translate_off'");
1830            Current_Context.Translate_Off := False;
1831            exit;
1832         end if;
1833      end loop;
1834
1835      --  The scanner is now at the EOL of the translate_on or at the EOF.
1836      --  Continue scanning.
1837   end Scan_Translate_Off;
1838
1839   procedure Scan_Translate_On is
1840   begin
1841      if not Current_Context.Translate_Off then
1842         Warning_Msg_Scan
1843           (Warnid_Pragma,
1844            "'translate_on' without coresponding 'translate_off'");
1845         return;
1846      end if;
1847
1848      --  'pragma translate_off' has just been scanned.
1849      Scan_Translate_On_Off (Std_Names.Name_Translate_On);
1850
1851      Current_Context.Translate_Off := False;
1852
1853      --  Return a token that will be discarded.
1854      Flag_Comment := True;
1855   end Scan_Translate_On;
1856
1857   procedure Scan_Comment_Pragma
1858   is
1859      use Std_Names;
1860      Id : Name_Id;
1861   begin
1862      Scan_Comment_Identifier (Id, True);
1863      case Id is
1864         when Null_Identifier =>
1865            Warning_Msg_Scan
1866              (Warnid_Pragma, "incomplete pragma directive ignored");
1867         when Name_Translate =>
1868            Scan_Comment_Identifier (Id, True);
1869            case Id is
1870               when Name_On =>
1871                  Scan_Translate_On;
1872               when Name_Off =>
1873                  Scan_Translate_Off;
1874               when others =>
1875                  Warning_Msg_Scan
1876                    (Warnid_Pragma,
1877                     "pragma translate must be followed by 'on' or 'off'");
1878            end case;
1879         when Name_Translate_Off
1880           |  Name_Synthesis_Off =>
1881            Scan_Translate_Off;
1882         when Name_Translate_On
1883           |  Name_Synthesis_On =>
1884            Scan_Translate_On;
1885         when Name_Label
1886           |  Name_Label_Applies_To
1887           |  Name_Return_Port_Name
1888           |  Name_Map_To_Operator
1889           |  Name_Type_Function
1890           |  Name_Built_In =>
1891            --  Used by synopsys, discarded.
1892            Skip_Until_EOL;
1893         when others =>
1894            Warning_Msg_Scan
1895              (Warnid_Pragma, "unknown pragma %i ignored", +Id);
1896      end case;
1897   end Scan_Comment_Pragma;
1898
1899   --  Scan tokens within a comment.  Return TRUE if Current_Token was set,
1900   --  return FALSE to discard the comment (ie treat it like a real comment).
1901   function Scan_Comment return Boolean
1902   is
1903      use Std_Names;
1904      Id : Name_Id;
1905   begin
1906      Scan_Comment_Identifier (Id, False);
1907
1908      if Id = Null_Identifier then
1909         return False;
1910      end if;
1911
1912      case Id is
1913         when Name_Psl =>
1914            --  Accept tokens after '-- psl'.
1915            if Flag_Psl_Comment then
1916               Flag_Psl := True;
1917               Flag_Scan_In_Comment := True;
1918               return True;
1919            end if;
1920         when Name_Pragma
1921           | Name_Synthesis
1922           | Name_Synopsys =>
1923            if Flag_Pragma_Comment then
1924               Scan_Comment_Pragma;
1925               return False;
1926            end if;
1927         when others =>
1928            null;
1929      end case;
1930      return False;
1931   end Scan_Comment;
1932
1933   --  The Scan_Next_Line procedure must be called after each end-of-line to
1934   --  register to next line number.  This is called by Scan_CR_Newline and
1935   --  Scan_LF_Newline.
1936   procedure Scan_Next_Line is
1937   begin
1938      Files_Map.Skip_Gap (Current_Context.Source_File, Pos);
1939      Current_Context.Line_Number := Current_Context.Line_Number + 1;
1940      Current_Context.Line_Pos := Pos;
1941      File_Add_Line_Number
1942        (Current_Context.Source_File, Current_Context.Line_Number, Pos);
1943   end Scan_Next_Line;
1944
1945   --  Scan a CR end-of-line.
1946   procedure Scan_CR_Newline is
1947   begin
1948      -- Accept CR or CR+LF as line separator.
1949      if Source (Pos + 1) = LF then
1950         Pos := Pos + 2;
1951      else
1952         Pos := Pos + 1;
1953      end if;
1954      Scan_Next_Line;
1955   end Scan_CR_Newline;
1956
1957   --  Scan a LF end-of-line.
1958   procedure Scan_LF_Newline is
1959   begin
1960      -- Accept LF or LF+CR as line separator.
1961      if Source (Pos + 1) = CR then
1962         Pos := Pos + 2;
1963      else
1964         Pos := Pos + 1;
1965      end if;
1966      Scan_Next_Line;
1967   end Scan_LF_Newline;
1968
1969   --  Emit an error message for an invalid character.
1970   procedure Error_Bad_Character is
1971   begin
1972      --  Technically character literals, string literals, extended
1973      --  identifiers and comments.
1974      Error_Msg_Scan ("character %c can only be used in strings or comments",
1975                      +Source (Pos));
1976   end Error_Bad_Character;
1977
1978   procedure Scan_Block_Comment is
1979   begin
1980      Current_Context.Prev_Pos := Pos;
1981      Current_Context.Token_Pos := Pos;
1982
1983      loop
1984         case Source (Pos) is
1985            when '/' =>
1986               --  LRM08 15.9
1987               --  Moreover, an occurrence of a solidus character
1988               --  immediately followed by an asterisk character
1989               --  within a delimited comment is not interpreted as
1990               --  the start of a nested delimited comment.
1991               if Source (Pos + 1) = '*' then
1992                  Warning_Msg_Scan (Warnid_Nested_Comment,
1993                                    "'/*' found within a block comment");
1994               end if;
1995               Pos := Pos + 1;
1996            when '*' =>
1997               if Source (Pos + 1) = '/' then
1998                  if Pos > Current_Context.Token_Pos then
1999                     Current_Token := Tok_Block_Comment_Text;
2000                  else
2001                     Pos := Pos + 2;
2002                     Current_Token := Tok_Block_Comment_End;
2003                  end if;
2004                  return;
2005               else
2006                  Pos := Pos + 1;
2007               end if;
2008            when CR =>
2009               if Pos > Current_Context.Token_Pos then
2010                  Current_Token := Tok_Block_Comment_Text;
2011               else
2012                  Scan_CR_Newline;
2013                  Current_Token := Tok_Newline;
2014               end if;
2015               return;
2016            when LF =>
2017               if Pos > Current_Context.Token_Pos then
2018                  Current_Token := Tok_Block_Comment_Text;
2019               else
2020                  Scan_LF_Newline;
2021                  Current_Token := Tok_Newline;
2022               end if;
2023               return;
2024            when Files_Map.EOT =>
2025               if Pos >= Current_Context.File_Len then
2026                  --  Point at the start of the comment.
2027                  Error_Msg_Scan
2028                    (+Get_Token_Location,
2029                     "block comment not terminated at end of file");
2030                  Current_Token := Tok_Eof;
2031                  return;
2032               end if;
2033               Pos := Pos + 1;
2034            when others =>
2035               Pos := Pos + 1;
2036         end case;
2037      end loop;
2038   end Scan_Block_Comment;
2039
2040   -- Get a new token.
2041   procedure Scan is
2042   begin
2043      if Current_Token /= Tok_Invalid then
2044         Current_Context.Prev_Token := Current_Token;
2045      end if;
2046
2047      Current_Context.Prev_Pos := Pos;
2048
2049      << Again >> null;
2050
2051      --  Skip commonly used separators.
2052      --  (Like Skip_Spaces but manually inlined for speed).
2053      while Source (Pos) = ' ' or Source (Pos) = HT loop
2054         Pos := Pos + 1;
2055      end loop;
2056
2057      Current_Context.Token_Pos := Pos;
2058      Current_Context.Identifier := Null_Identifier;
2059
2060      case Source (Pos) is
2061         when HT | ' ' =>
2062            --  Must have already been skipped just above.
2063            raise Internal_Error;
2064         when NBSP =>
2065            if Vhdl_Std = Vhdl_87 then
2066               Error_Msg_Scan ("NBSP character not allowed in vhdl87");
2067            end if;
2068            Pos := Pos + 1;
2069            goto Again;
2070         when VT | FF =>
2071            Pos := Pos + 1;
2072            goto Again;
2073         when LF =>
2074            Scan_LF_Newline;
2075            if Flag_Newline then
2076               Current_Token := Tok_Newline;
2077               return;
2078            end if;
2079            goto Again;
2080         when CR =>
2081            Scan_CR_Newline;
2082            if Flag_Newline then
2083               Current_Token := Tok_Newline;
2084               return;
2085            end if;
2086            goto Again;
2087         when '-' =>
2088            if Source (Pos + 1) = '-' then
2089               -- This is a comment.
2090               -- LRM93 13.8
2091               --   A comment starts with two adjacent hyphens and extends up
2092               --   to the end of the line.
2093               --   A comment can appear on any line line of a VHDL
2094               --   description.
2095               --   The presence or absence of comments has no influence on
2096               --   whether a description is legal or illegal.
2097               --   Futhermore, comments do not influence the execution of a
2098               --   simulation module; their sole purpose is the enlightenment
2099               --   of the human reader.
2100               -- GHDL note: As a consequence, an obfruscating comment
2101               --  is out of purpose, and a warning could be reported :-)
2102               Pos := Pos + 2;
2103
2104               --  Scan inside a comment.  So we just ignore the two dashes.
2105               if Flag_Scan_In_Comment then
2106                  goto Again;
2107               end if;
2108
2109               --  Handle keywords in comment (PSL).
2110               if Flag_Comment_Keyword and then Scan_Comment then
2111                  goto Again;
2112               end if;
2113
2114               --  LRM93 13.2
2115               --  In any case, a sequence of one or more format
2116               --  effectors other than horizontal tabulation must
2117               --  cause at least one end of line.
2118               while not Is_EOL (Source (Pos)) loop
2119                  --  LRM93 13.1
2120                  --  The only characters allowed in the text of a VHDL
2121                  --  description are the graphic characters and the format
2122                  --  effectors.
2123
2124                  --  LRM02 13.1 Character set
2125                  --  The only characters allowed in the text of a VHDL
2126                  --  description (except within comments -- see 13.8) [...]
2127                  --
2128                  --  LRM02 13.8 Comments
2129                  --  A comment [...] may contain any character except the
2130                  --  format effectors vertical tab, carriage return, line
2131                  --  feed and form feed.
2132                  if not (Flags.Mb_Comment
2133                          or Flags.Flag_Relaxed_Rules
2134                          or Vhdl_Std >= Vhdl_02)
2135                    and then Characters_Kind (Source (Pos)) = Invalid
2136                  then
2137                     Error_Msg_Scan ("invalid character, even in a comment");
2138                  end if;
2139                  Pos := Pos + 1;
2140               end loop;
2141               if Flag_Comment then
2142                  Current_Token := Tok_Line_Comment;
2143                  return;
2144               end if;
2145               goto Again;
2146            elsif Flag_Psl and then Source (Pos + 1) = '>' then
2147               Current_Token := Tok_Minus_Greater;
2148               Pos := Pos + 2;
2149               return;
2150            else
2151               Current_Token := Tok_Minus;
2152               Pos := Pos + 1;
2153               return;
2154            end if;
2155         when '+' =>
2156            Current_Token := Tok_Plus;
2157            Pos := Pos + 1;
2158            return;
2159         when '*' =>
2160            if Source (Pos + 1) = '*' then
2161               Current_Token := Tok_Double_Star;
2162               Pos := Pos + 2;
2163            else
2164               Current_Token := Tok_Star;
2165               Pos := Pos + 1;
2166            end if;
2167            return;
2168         when '/' =>
2169            if Source (Pos + 1) = '=' then
2170               Current_Token := Tok_Not_Equal;
2171               Pos := Pos + 2;
2172            elsif Source (Pos + 1) = '*' then
2173               --  LRM08 15.9 Comments
2174               --  A delimited comment start with a solidus (slash) character
2175               --  immediately followed by an asterisk character and extends up
2176               --  to the first subsequent occurrence of an asterisk character
2177               --  immediately followed by a solidus character.
2178               if Vhdl_Std < Vhdl_08 then
2179                  Error_Msg_Scan
2180                    ("block comment are not allowed before vhdl 2008");
2181               end if;
2182
2183               --  Skip '/*'.
2184               Pos := Pos + 2;
2185
2186               if Flag_Comment then
2187                  Current_Token := Tok_Block_Comment_Start;
2188                  return;
2189               end if;
2190
2191               loop
2192                  Scan_Block_Comment;
2193                  exit when Current_Token = Tok_Block_Comment_End
2194                    or else Current_Token = Tok_Eof;
2195               end loop;
2196               goto Again;
2197            else
2198               Current_Token := Tok_Slash;
2199               Pos := Pos + 1;
2200            end if;
2201            return;
2202         when '(' =>
2203            Current_Token := Tok_Left_Paren;
2204            Pos := Pos + 1;
2205            return;
2206         when ')' =>
2207            Current_Token := Tok_Right_Paren;
2208            Pos := Pos + 1;
2209            return;
2210         when '|' =>
2211            if Flag_Psl then
2212               if Source (Pos + 1) = '|' then
2213                  Current_Token := Tok_Bar_Bar;
2214                  Pos := Pos + 2;
2215               elsif Source (Pos + 1) = '-'
2216                 and then Source (Pos + 2) = '>'
2217               then
2218                  Current_Token := Tok_Bar_Arrow;
2219                  Pos := Pos + 3;
2220               elsif Source (Pos + 1) = '='
2221                 and then Source (Pos + 2) = '>'
2222               then
2223                  Current_Token := Tok_Bar_Double_Arrow;
2224                  Pos := Pos + 3;
2225               else
2226                  Current_Token := Tok_Bar;
2227                  Pos := Pos + 1;
2228               end if;
2229            else
2230               Current_Token := Tok_Bar;
2231               Pos := Pos + 1;
2232            end if;
2233            return;
2234         when '!' =>
2235            if Flag_Psl then
2236               Current_Token := Tok_Exclam_Mark;
2237            else
2238               if Source (Pos + 1) = '=' then
2239                  --  != is not allowed in VHDL, but be friendly with C users.
2240                  Error_Msg_Scan
2241                    (+Get_Token_Location, "Use '/=' for inequality in vhdl");
2242                  Current_Token := Tok_Not_Equal;
2243                  Pos := Pos + 1;
2244               else
2245                  --  LRM93 13.10
2246                  --  A vertical line (|) can be replaced by an exclamation
2247                  --  mark (!) where used as a delimiter.
2248                  Current_Token := Tok_Bar;
2249               end if;
2250            end if;
2251            Pos := Pos + 1;
2252            return;
2253         when ':' =>
2254            if Source (Pos + 1) = '=' then
2255               Current_Token := Tok_Assign;
2256               Pos := Pos + 2;
2257            else
2258               Current_Token := Tok_Colon;
2259               Pos := Pos + 1;
2260            end if;
2261            return;
2262         when ';' =>
2263            Current_Token := Tok_Semi_Colon;
2264            Pos := Pos + 1;
2265            return;
2266         when ',' =>
2267            Current_Token := Tok_Comma;
2268            Pos := Pos + 1;
2269            return;
2270         when '.' =>
2271            if Source (Pos + 1) = '.' then
2272               --  Be Ada friendly...
2273               Error_Msg_Scan ("'..' is invalid in vhdl, replaced by 'to'");
2274               Current_Token := Tok_To;
2275               Pos := Pos + 2;
2276               return;
2277            end if;
2278            Current_Token := Tok_Dot;
2279            Pos := Pos + 1;
2280            return;
2281         when '&' =>
2282            if Flag_Psl and then Source (Pos + 1) = '&' then
2283               Current_Token := Tok_And_And;
2284               Pos := Pos + 2;
2285            else
2286               Current_Token := Tok_Ampersand;
2287               Pos := Pos + 1;
2288            end if;
2289            return;
2290         when '<' =>
2291            case Source (Pos + 1) is
2292               when '=' =>
2293                  Current_Token := Tok_Less_Equal;
2294                  Pos := Pos + 2;
2295               when '>' =>
2296                  Current_Token := Tok_Box;
2297                  Pos := Pos + 2;
2298               when '<' =>
2299                  Current_Token := Tok_Double_Less;
2300                  Pos := Pos + 2;
2301               when '-' =>
2302                  if Flag_Psl and then Source (Pos + 2) = '>' then
2303                     Current_Token := Tok_Equiv_Arrow;
2304                     Pos := Pos + 3;
2305                  else
2306                     Current_Token := Tok_Less;
2307                     Pos := Pos + 1;
2308                  end if;
2309               when others =>
2310                  Current_Token := Tok_Less;
2311                  Pos := Pos + 1;
2312            end case;
2313            return;
2314         when '>' =>
2315            case Source (Pos + 1) is
2316               when '=' =>
2317                  Current_Token := Tok_Greater_Equal;
2318                  Pos := Pos + 2;
2319               when '>' =>
2320                  Current_Token := Tok_Double_Greater;
2321                  Pos := Pos + 2;
2322               when others =>
2323                  Current_Token := Tok_Greater;
2324                  Pos := Pos + 1;
2325            end case;
2326            return;
2327         when '=' =>
2328            if Source (Pos + 1) = '=' then
2329               if AMS_Vhdl then
2330                  Current_Token := Tok_Equal_Equal;
2331               else
2332                  Error_Msg_Scan
2333                    ("'==' is not the vhdl equality, replaced by '='");
2334                  Current_Token := Tok_Equal;
2335               end if;
2336               Pos := Pos + 2;
2337            elsif Source (Pos + 1) = '>' then
2338               Current_Token := Tok_Double_Arrow;
2339               Pos := Pos + 2;
2340            else
2341               Current_Token := Tok_Equal;
2342               Pos := Pos + 1;
2343            end if;
2344            return;
2345         when ''' =>
2346            -- Handle cases such as character'('a')
2347            -- FIXME: what about f ()'length ? or .all'length
2348            if Current_Context.Prev_Token /= Tok_Identifier
2349              and then Current_Context.Prev_Token /= Tok_Character
2350              and then Source (Pos + 2) = '''
2351            then
2352               -- LRM93 13.5
2353               -- A character literal is formed by enclosing one of the 191
2354               -- graphic character (...) between two apostrophe characters.
2355               -- CHARACTER_LITERAL ::= ' GRAPHIC_CHARACTER '
2356               if Characters_Kind (Source (Pos + 1)) not in Graphic_Character
2357               then
2358                  Error_Msg_Scan
2359                    ("a character literal can only be a graphic character");
2360               elsif Vhdl_Std = Vhdl_87
2361                 and then Source (Pos + 1) > Character'Val (127)
2362               then
2363                  Error_8bit;
2364               end if;
2365               Current_Token := Tok_Character;
2366               Current_Context.Identifier :=
2367                 Name_Table.Get_Identifier (Source (Pos + 1));
2368               Pos := Pos + 3;
2369               return;
2370            elsif Source (Pos + 1) = ''' then
2371               Error_Msg_Scan ("empty quote is not allowed in vhdl");
2372               Current_Token := Tok_Character;
2373               Current_Context.Identifier := Name_Table.Get_Identifier (' ');
2374               Pos := Pos + 2;
2375               return;
2376            else
2377               Current_Token := Tok_Tick;
2378               Pos := Pos + 1;
2379            end if;
2380            return;
2381         when '0' .. '9' =>
2382            Scan_Literal;
2383
2384            --  LRM93 13.2
2385            --  At least one separator is required between an identifier or
2386            --  an abstract literal and an adjacent identifier or abstract
2387            --  literal.
2388            case Characters_Kind (Source (Pos)) is
2389               when Digit =>
2390                  --  Happen if d#ddd# is followed by a number.
2391                  Error_Msg_Scan ("space is required between numbers");
2392               when Upper_Case_Letter
2393                 | Lower_Case_Letter =>
2394                  --  Could call Error_Separator, but use a clearer message
2395                  --  for this common case.
2396                  --  Note: the term "unit name" is not correct here, since
2397                  --  it can be any identifier or even a keyword; however it
2398                  --  is probably the most common case (eg 10ns).
2399                  if Vhdl_Std >= Vhdl_08 and then Current_Token = Tok_Integer
2400                  then
2401                     Current_Token := Tok_Integer_Letter;
2402                  else
2403                     Error_Msg_Scan
2404                       ("space is required between number and unit name");
2405                  end if;
2406               when Other_Special_Character =>
2407                  if Vhdl_Std > Vhdl_87 and then Source (Pos) = '\' then
2408                     --  Start of extended identifier.
2409                     Error_Separator;
2410                  end if;
2411               when Invalid
2412                 | Format_Effector
2413                 | Space_Character
2414                 | Special_Character =>
2415                  null;
2416            end case;
2417            return;
2418         when '#' =>
2419            Error_Msg_Scan ("'#' is used for based literals and "
2420                              & "must be preceded by a base");
2421            --  Skip.
2422            Pos := Pos + 1;
2423            goto Again;
2424         when '"' =>
2425            Scan_String;
2426            return;
2427         when '%' =>
2428            if Vhdl_Std >= Vhdl_08 then
2429               Error_Msg_Scan
2430                 ("'%%' not allowed in vhdl 2008 (was replacement character)");
2431               --  Continue as a string.
2432            end if;
2433            Scan_String;
2434            return;
2435         when '[' =>
2436            if Flag_Psl then
2437               if Source (Pos + 1) = '*' then
2438                  Current_Token := Tok_Brack_Star;
2439                  Pos := Pos + 2;
2440               elsif Source (Pos + 1) = '+'
2441                 and then Source (Pos + 2) = ']'
2442               then
2443                  Current_Token := Tok_Brack_Plus_Brack;
2444                  Pos := Pos + 3;
2445               elsif Source (Pos + 1) = '-'
2446                 and then Source (Pos + 2) = '>'
2447               then
2448                  Current_Token := Tok_Brack_Arrow;
2449                  Pos := Pos + 3;
2450               elsif Source (Pos + 1) = '=' then
2451                  Current_Token := Tok_Brack_Equal;
2452                  Pos := Pos + 2;
2453               else
2454                  Current_Token := Tok_Left_Bracket;
2455                  Pos := Pos + 1;
2456               end if;
2457            else
2458               if Vhdl_Std = Vhdl_87 then
2459                  Error_Msg_Scan
2460                    ("'[' is an invalid character in vhdl87, replaced by '('");
2461                  Current_Token := Tok_Left_Paren;
2462               else
2463                  Current_Token := Tok_Left_Bracket;
2464               end if;
2465               Pos := Pos + 1;
2466            end if;
2467            return;
2468         when ']' =>
2469            if Vhdl_Std = Vhdl_87 and not Flag_Psl then
2470               Error_Msg_Scan
2471                 ("']' is an invalid character in vhdl87, replaced by ')'");
2472               Current_Token := Tok_Right_Paren;
2473            else
2474               Current_Token := Tok_Right_Bracket;
2475            end if;
2476            Pos := Pos + 1;
2477            return;
2478         when '{' =>
2479            Current_Token := Tok_Left_Curly;
2480            Pos := Pos + 1;
2481            return;
2482         when '}' =>
2483            Current_Token := Tok_Right_Curly;
2484            Pos := Pos + 1;
2485            return;
2486         when '\' =>
2487            if Vhdl_Std = Vhdl_87 then
2488               Error_Msg_Scan
2489                 ("extended identifiers are not allowed in vhdl87");
2490            end if;
2491            Scan_Extended_Identifier;
2492            return;
2493         when '^' =>
2494            if Vhdl_Std >= Vhdl_08 then
2495               Current_Token := Tok_Caret;
2496            else
2497               Current_Token := Tok_Xor;
2498               Error_Msg_Scan ("'^' is not a VHDL operator, use 'xor'");
2499            end if;
2500            Pos := Pos + 1;
2501            return;
2502         when '~' =>
2503            Error_Msg_Scan ("'~' is not a VHDL operator, use 'not'");
2504            Pos := Pos + 1;
2505            Current_Token := Tok_Not;
2506            return;
2507         when '?' =>
2508            if Vhdl_Std < Vhdl_08 then
2509               Error_Bad_Character;
2510               Pos := Pos + 1;
2511               goto Again;
2512            else
2513               if Source (Pos + 1) = '<' then
2514                  if Source (Pos + 2) = '=' then
2515                     Current_Token := Tok_Match_Less_Equal;
2516                     Pos := Pos + 3;
2517                  else
2518                     Current_Token := Tok_Match_Less;
2519                     Pos := Pos + 2;
2520                  end if;
2521               elsif Source (Pos + 1) = '>' then
2522                  if Source (Pos + 2) = '=' then
2523                     Current_Token := Tok_Match_Greater_Equal;
2524                     Pos := Pos + 3;
2525                  else
2526                     Current_Token := Tok_Match_Greater;
2527                     Pos := Pos + 2;
2528                  end if;
2529               elsif Source (Pos + 1) = '?' then
2530                  Current_Token := Tok_Condition;
2531                  Pos := Pos + 2;
2532               elsif Source (Pos + 1) = '=' then
2533                  Current_Token := Tok_Match_Equal;
2534                  Pos := Pos + 2;
2535               elsif Source (Pos + 1) = '/'
2536                 and then Source (Pos + 2) = '='
2537               then
2538                  Current_Token := Tok_Match_Not_Equal;
2539                  Pos := Pos + 3;
2540               else
2541                  Error_Msg_Scan ("unknown matching operator");
2542                  Pos := Pos + 1;
2543                  goto Again;
2544               end if;
2545            end if;
2546            return;
2547         when '`' =>
2548            if Vhdl_Std >= Vhdl_08 then
2549               Scan_Tool_Directive;
2550            else
2551               Error_Bad_Character;
2552               Skip_Until_EOL;
2553            end if;
2554            goto Again;
2555         when '$'
2556           | Inverted_Exclamation .. Inverted_Question
2557           | Multiplication_Sign | Division_Sign =>
2558            Error_Bad_Character;
2559            Pos := Pos + 1;
2560            goto Again;
2561         when '@' =>
2562            if Vhdl_Std >= Vhdl_08 or Flag_Psl then
2563               Current_Token := Tok_Arobase;
2564               Pos := Pos + 1;
2565               return;
2566            else
2567               Error_Bad_Character;
2568               Pos := Pos + 1;
2569               goto Again;
2570            end if;
2571         when '_' =>
2572            Error_Msg_Scan ("an identifier can't start with '_'");
2573            Scan_Identifier (Flag_Psl);
2574            --  Cannot be a reserved word.
2575            return;
2576         when 'A' .. 'Z' | 'a' .. 'z' =>
2577            Scan_Identifier (Flag_Psl);
2578            if Current_Token = Tok_Identifier then
2579               Identifier_To_Token;
2580            end if;
2581            return;
2582         when UC_A_Grave .. UC_O_Diaeresis
2583           | UC_O_Oblique_Stroke .. UC_Icelandic_Thorn
2584           | LC_German_Sharp_S .. LC_O_Diaeresis
2585           | LC_O_Oblique_Stroke .. LC_Y_Diaeresis =>
2586            if Vhdl_Std = Vhdl_87 then
2587               Error_Msg_Scan
2588                 ("non 7-bit latin-1 letters are not allowed in vhdl87");
2589            end if;
2590            Scan_Identifier (False);
2591            --  Not a reserved word.
2592            return;
2593         when NUL .. ETX | ENQ .. BS | SO .. US | DEL .. APC =>
2594            Error_Msg_Scan
2595              ("control character that is not CR, LF, FF, HT or VT " &
2596               "is not allowed");
2597            Pos := Pos + 1;
2598            goto Again;
2599         when Files_Map.EOT =>
2600            if Pos >= Current_Context.File_Len then
2601               --  FIXME: should conditionnaly emit a warning if the file
2602               --   is not terminated by an end of line.
2603               Current_Token := Tok_Eof;
2604            else
2605               Error_Msg_Scan ("EOT is not allowed inside the file");
2606               Pos := Pos + 1;
2607               goto Again;
2608            end if;
2609            return;
2610      end case;
2611      --  Not reachable: all case should use goto Again or return.
2612   end Scan;
2613
2614   function Is_Whitespace (C : Character) return Boolean is
2615   begin
2616      if C = ' ' then
2617         return True;
2618      elsif Vhdl_Std > Vhdl_87 and C = NBSP then
2619         return True;
2620      else
2621         return False;
2622      end if;
2623   end Is_Whitespace;
2624end Vhdl.Scanner;
2625