1-- VHDL lexical scanner. 2-- Copyright (C) 2002 - 2014 Tristan Gingold 3-- 4-- This program is free software: you can redistribute it and/or modify 5-- it under the terms of the GNU General Public License as published by 6-- the Free Software Foundation, either version 2 of the License, or 7-- (at your option) any later version. 8-- 9-- This program is distributed in the hope that it will be useful, 10-- but WITHOUT ANY WARRANTY; without even the implied warranty of 11-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12-- GNU General Public License for more details. 13-- 14-- You should have received a copy of the GNU General Public License 15-- along with this program. If not, see <gnu.org/licenses>. 16with Ada.Characters.Latin_1; use Ada.Characters.Latin_1; 17with Errorout; use Errorout; 18with Name_Table; 19with Files_Map; use Files_Map; 20with Std_Names; 21with Str_Table; 22with Flags; use Flags; 23 24package body Vhdl.Scanner is 25 26 -- This classification is a simplification of the categories of LRM93 13.1 27 -- LRM93 13.1 28 -- The only characters allowed in the text of a VHDL description are the 29 -- graphic characters and format effector. 30 31 type Character_Kind_Type is 32 ( 33 -- Neither a format effector nor a graphic character. 34 Invalid, 35 Format_Effector, 36 Lower_Case_Letter, 37 Upper_Case_Letter, 38 Digit, 39 Special_Character, 40 Space_Character, 41 Other_Special_Character 42 ); 43 44 -- LRM93 13.1 45 -- basic_graphic_character ::= 46 -- upper_case_letter | digit | special_character | space_character 47 -- 48 --subtype Basic_Graphic_Character is 49 -- Character_Kind_Type range Upper_Case_Letter .. Space_Character; 50 51 -- LRM93 13.1 52 -- graphic_character ::= 53 -- basic_graphic_character | lower_case_letter | other_special_character 54 -- 55 -- Note: There are 191 graphic characters. 56 subtype Graphic_Character is 57 Character_Kind_Type range Lower_Case_Letter .. Other_Special_Character; 58 59 -- letter ::= upper_case_letter | lower_case_letter 60 subtype Letter is 61 Character_Kind_Type range Lower_Case_Letter .. Upper_Case_Letter; 62 63 -- LRM93 13.1 64 -- The characters included in each of the categories of basic graphic 65 -- characters are defined as follows: 66 type Character_Array is array (Character) of Character_Kind_Type; 67 pragma Suppress_Initialization (Character_Array); 68 Characters_Kind : constant Character_Array := 69 (NUL .. BS => Invalid, 70 71 -- Format effectors are the ISO (and ASCII) characters called horizontal 72 -- tabulation, vertical tabulation, carriage return, line feed, and form 73 -- feed. 74 HT | LF | VT | FF | CR => Format_Effector, 75 76 SO .. US => Invalid, 77 78 -- 1. upper case letters 79 'A' .. 'Z' | UC_A_Grave .. UC_O_Diaeresis | 80 UC_O_Oblique_Stroke .. UC_Icelandic_Thorn => Upper_Case_Letter, 81 82 -- 2. digits 83 '0' .. '9' => Digit, 84 85 -- 3. special characters 86 '"' | '#' | '&' | ''' | '(' | ')' | '+' | ',' | '-' | '.' | '/' 87 | ':' | ';' | '<' | '=' | '>' | '[' | ']' 88 | '_' | '|' | '*' => Special_Character, 89 90 -- 4. the space characters 91 ' ' | NBSP => Space_Character, 92 93 -- 5. lower case letters 94 'a' .. 'z' | LC_German_Sharp_S .. LC_O_Diaeresis | 95 LC_O_Oblique_Stroke .. LC_Y_Diaeresis => Lower_Case_Letter, 96 97 -- 6. other special characters 98 '!' | '$' | '%' | '@' | '?' | '\' | '^' | '`' | '{' | '}' | '~' 99 | Inverted_Exclamation .. Inverted_Question | Multiplication_Sign | 100 Division_Sign => Other_Special_Character, 101 102 -- '¡' -- INVERTED EXCLAMATION MARK 103 -- '¢' -- CENT SIGN 104 -- '£' -- POUND SIGN 105 -- '¤' -- CURRENCY SIGN 106 -- '¥' -- YEN SIGN 107 -- '¦' -- BROKEN BAR 108 -- '§' -- SECTION SIGN 109 -- '¨' -- DIAERESIS 110 -- '©' -- COPYRIGHT SIGN 111 -- 'ª' -- FEMININE ORDINAL INDICATOR 112 -- '«' -- LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 113 -- '¬' -- NOT SIGN 114 -- '' -- SOFT HYPHEN 115 -- '®' -- REGISTERED SIGN 116 -- '¯' -- MACRON 117 -- '°' -- DEGREE SIGN 118 -- '±' -- PLUS-MINUS SIGN 119 -- '²' -- SUPERSCRIPT TWO 120 -- '³' -- SUPERSCRIPT THREE 121 -- '´' -- ACUTE ACCENT 122 -- 'µ' -- MICRO SIGN 123 -- '¶' -- PILCROW SIGN 124 -- '·' -- MIDDLE DOT 125 -- '¸' -- CEDILLA 126 -- '¹' -- SUPERSCRIPT ONE 127 -- 'º' -- MASCULINE ORDINAL INDICATOR 128 -- '»' -- RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 129 -- '¼' -- VULGAR FRACTION ONE QUARTER 130 -- '½' -- VULGAR FRACTION ONE HALF 131 -- '¾' -- VULGAR FRACTION THREE QUARTERS 132 -- '¿' -- INVERTED QUESTION MARK 133 -- '×' -- MULTIPLICATION SIGN 134 -- '÷' -- DIVISION SIGN 135 136 DEL .. APC => Invalid); 137 138 -- The context contains the whole internal state of the scanner, ie 139 -- it can be used to push/pop a lexical analysis, to restart the 140 -- scanner from a context marking a previous point. 141 type Scan_Context is record 142 Source : File_Buffer_Acc; 143 Source_File : Source_File_Entry; 144 Line_Number : Natural; 145 Line_Pos : Source_Ptr; 146 Prev_Pos : Source_Ptr; 147 Token_Pos : Source_Ptr; 148 Pos : Source_Ptr; 149 File_Len : Source_Ptr; 150 Token : Token_Type; 151 Prev_Token : Token_Type; 152 153 -- Tokens are ignored because of 'translate_off'. 154 Translate_Off : Boolean; 155 156 -- Additional values for the current token. 157 Bit_Str_Base : Character; 158 Bit_Str_Sign : Character; 159 Str_Id : String8_Id; 160 Str_Len : Nat32; 161 Identifier: Name_Id; 162 Lit_Int64 : Int64; 163 Lit_Fp64 : Fp64; 164 end record; 165 pragma Suppress_Initialization (Scan_Context); 166 167 -- The current context. 168 -- Default value is an invalid context. 169 Current_Context: Scan_Context := (Source => null, 170 Source_File => No_Source_File_Entry, 171 Line_Number => 0, 172 Line_Pos => 0, 173 Pos => 0, 174 Prev_Pos => 0, 175 Token_Pos => 0, 176 File_Len => 0, 177 Token => Tok_Invalid, 178 Prev_Token => Tok_Invalid, 179 Translate_Off => False, 180 Identifier => Null_Identifier, 181 Bit_Str_Base => ' ', 182 Bit_Str_Sign => ' ', 183 Str_Id => Null_String8, 184 Str_Len => 0, 185 Lit_Int64 => 0, 186 Lit_Fp64 => 0.0); 187 188 function Get_Current_Coord return Source_Coord_Type is 189 begin 190 return (File => Get_Current_Source_File, 191 Line_Pos => Current_Context.Line_Pos, 192 Line => Get_Current_Line, 193 Offset => Get_Current_Offset); 194 end Get_Current_Coord; 195 196 function Get_Token_Coord return Source_Coord_Type is 197 begin 198 return (File => Get_Current_Source_File, 199 Line_Pos => Current_Context.Line_Pos, 200 Line => Get_Current_Line, 201 Offset => Get_Token_Offset); 202 end Get_Token_Coord; 203 204 -- Disp a message during scan. 205 -- The current location is automatically displayed before the message. 206 -- Disp a message during scan. 207 procedure Error_Msg_Scan (Msg: String) is 208 begin 209 Report_Msg (Msgid_Error, Scan, Get_Current_Coord, Msg); 210 end Error_Msg_Scan; 211 212 procedure Error_Msg_Scan (Loc : Source_Coord_Type; Msg: String) is 213 begin 214 Report_Msg (Msgid_Error, Scan, Loc, Msg); 215 end Error_Msg_Scan; 216 217 procedure Error_Msg_Scan (Msg: String; Arg1 : Earg_Type) is 218 begin 219 Report_Msg (Msgid_Error, Scan, Get_Current_Coord, Msg, (1 => Arg1)); 220 end Error_Msg_Scan; 221 222 -- Disp a message during scan. 223 procedure Warning_Msg_Scan (Id : Msgid_Warnings; 224 Msg: String; 225 Arg1 : Earg_Type) is 226 begin 227 Report_Msg (Id, Scan, Get_Current_Coord, Msg, (1 => Arg1)); 228 end Warning_Msg_Scan; 229 230 procedure Warning_Msg_Scan (Id : Msgid_Warnings; 231 Msg: String; 232 Args : Earg_Arr := No_Eargs) is 233 begin 234 Report_Msg (Id, Scan, Get_Current_Coord, Msg, Args); 235 end Warning_Msg_Scan; 236 237 Source: File_Buffer_Acc renames Current_Context.Source; 238 Pos: Source_Ptr renames Current_Context.Pos; 239 240 -- When CURRENT_TOKEN is an identifier, its name_id is stored into 241 -- this global variable. 242 -- Function current_text can be used to convert it into an iir. 243 function Current_Identifier return Name_Id is 244 begin 245 return Current_Context.Identifier; 246 end Current_Identifier; 247 248 procedure Invalidate_Current_Identifier is 249 begin 250 Current_Context.Identifier := Null_Identifier; 251 end Invalidate_Current_Identifier; 252 253 procedure Invalidate_Current_Token is 254 begin 255 if Current_Token /= Tok_Invalid then 256 Current_Context.Prev_Token := Current_Token; 257 Current_Token := Tok_Invalid; 258 end if; 259 end Invalidate_Current_Token; 260 261 function Current_String_Id return String8_Id is 262 begin 263 return Current_Context.Str_Id; 264 end Current_String_Id; 265 266 function Current_String_Length return Nat32 is 267 begin 268 return Current_Context.Str_Len; 269 end Current_String_Length; 270 271 function Get_Bit_String_Base return Character is 272 begin 273 return Current_Context.Bit_Str_Base; 274 end Get_Bit_String_Base; 275 276 function Get_Bit_String_Sign return Character is 277 begin 278 return Current_Context.Bit_Str_Sign; 279 end Get_Bit_String_Sign; 280 281 function Current_Iir_Int64 return Int64 is 282 begin 283 return Current_Context.Lit_Int64; 284 end Current_Iir_Int64; 285 286 function Current_Iir_Fp64 return Fp64 is 287 begin 288 return Current_Context.Lit_Fp64; 289 end Current_Iir_Fp64; 290 291 function Get_Current_Source_File return Source_File_Entry is 292 begin 293 return Current_Context.Source_File; 294 end Get_Current_Source_File; 295 296 function Get_Current_Line return Natural is 297 begin 298 return Current_Context.Line_Number; 299 end Get_Current_Line; 300 301 function Get_Current_Offset return Natural is 302 begin 303 return Natural (Current_Context.Pos - Current_Context.Line_Pos); 304 end Get_Current_Offset; 305 306 function Get_Token_Offset return Natural is 307 begin 308 return Natural (Current_Context.Token_Pos - Current_Context.Line_Pos); 309 end Get_Token_Offset; 310 311 function Get_Token_Position return Source_Ptr is 312 begin 313 return Current_Context.Token_Pos; 314 end Get_Token_Position; 315 316 function Get_Token_Length return Int32 is 317 begin 318 return Int32 (Current_Context.Pos - Current_Context.Token_Pos); 319 end Get_Token_Length; 320 321 function Get_Position return Source_Ptr is 322 begin 323 return Current_Context.Pos; 324 end Get_Position; 325 326 function Get_Token_Location return Location_Type is 327 begin 328 return File_Pos_To_Location 329 (Current_Context.Source_File, Current_Context.Token_Pos); 330 end Get_Token_Location; 331 332 function Get_Prev_Location return Location_Type is 333 begin 334 return File_Pos_To_Location 335 (Current_Context.Source_File, Current_Context.Prev_Pos); 336 end Get_Prev_Location; 337 338 procedure Set_File (Source_File : Source_File_Entry) 339 is 340 N_Source: File_Buffer_Acc; 341 begin 342 pragma Assert (Current_Context.Source = null); 343 pragma Assert (Source_File /= No_Source_File_Entry); 344 N_Source := Get_File_Source (Source_File); 345 Current_Context := (Source => N_Source, 346 Source_File => Source_File, 347 Line_Number => 1, 348 Line_Pos => 0, 349 Prev_Pos => N_Source'First, 350 Pos => N_Source'First, 351 Token_Pos => 0, -- should be invalid, 352 File_Len => Get_File_Length (Source_File), 353 Token => Tok_Invalid, 354 Prev_Token => Tok_Invalid, 355 Translate_Off => False, 356 Identifier => Null_Identifier, 357 Bit_Str_Base => ' ', 358 Bit_Str_Sign => ' ', 359 Str_Id => Null_String8, 360 Str_Len => 0, 361 Lit_Int64 => -1, 362 Lit_Fp64 => 0.0); 363 Current_Token := Tok_Invalid; 364 end Set_File; 365 366 function Detect_Encoding_Errors return Boolean 367 is 368 C : constant Character := Source (Pos); 369 begin 370 -- No need to check further if first character is plain ASCII-7 371 if C >= ' ' and C < Character'Val (127) then 372 return False; 373 end if; 374 375 -- UTF-8 BOM is EF BB BF 376 if Source (Pos + 0) = Character'Val (16#ef#) 377 and then Source (Pos + 1) = Character'Val (16#bb#) 378 and then Source (Pos + 2) = Character'Val (16#bf#) 379 then 380 Error_Msg_Scan 381 ("source encoding must be latin-1 (UTF-8 BOM detected)"); 382 return True; 383 end if; 384 385 -- UTF-16 BE BOM is FE FF 386 if Source (Pos + 0) = Character'Val (16#fe#) 387 and then Source (Pos + 1) = Character'Val (16#ff#) 388 then 389 Error_Msg_Scan 390 ("source encoding must be latin-1 (UTF-16 BE BOM detected)"); 391 return True; 392 end if; 393 394 -- UTF-16 LE BOM is FF FE 395 if Source (Pos + 0) = Character'Val (16#ff#) 396 and then Source (Pos + 1) = Character'Val (16#fe#) 397 then 398 Error_Msg_Scan 399 ("source encoding must be latin-1 (UTF-16 LE BOM detected)"); 400 return True; 401 end if; 402 403 -- Certainly weird, but scanner/parser will catch it. 404 return False; 405 end Detect_Encoding_Errors; 406 407 procedure Set_Current_Position (Position: Source_Ptr) 408 is 409 Loc : Location_Type; 410 Offset: Natural; 411 File_Entry : Source_File_Entry; 412 begin 413 -- Scanner must have been initialized. 414 pragma Assert (Current_Context.Source /= null); 415 416 Current_Token := Tok_Invalid; 417 Current_Context.Pos := Position; 418 Loc := File_Pos_To_Location (Current_Context.Source_File, 419 Current_Context.Pos); 420 Location_To_Coord (Loc, 421 File_Entry, Current_Context.Line_Pos, 422 Current_Context.Line_Number, Offset); 423 end Set_Current_Position; 424 425 procedure Close_File is 426 begin 427 Current_Context.Source := null; 428 end Close_File; 429 430 -- Emit an error when a character above 128 was found. 431 -- This must be called only in vhdl87. 432 procedure Error_8bit is 433 begin 434 Error_Msg_Scan ("8 bits characters not allowed in vhdl87"); 435 end Error_8bit; 436 437 -- Emit an error when a separator is expected. 438 procedure Error_Separator is 439 begin 440 Error_Msg_Scan ("a separator is required here"); 441 end Error_Separator; 442 443 -- scan a decimal literal or a based literal. 444 -- 445 -- LRM93 13.4.1 446 -- DECIMAL_LITERAL ::= INTEGER [ . INTEGER ] [ EXPONENT ] 447 -- EXPONENT ::= E [ + ] INTEGER | E - INTEGER 448 -- 449 -- LRM93 13.4.2 450 -- BASED_LITERAL ::= BASE # BASED_INTEGER [ . BASED_INTEGER ] # EXPONENT 451 -- BASE ::= INTEGER 452 procedure Scan_Literal is separate; 453 454 -- Scan a string literal. 455 -- 456 -- LRM93 13.6 / LRM08 15.7 457 -- A string literal is formed by a sequence of graphic characters 458 -- (possibly none) enclosed between two quotation marks used as string 459 -- brackets. 460 -- STRING_LITERAL ::= " { GRAPHIC_CHARACTER } " 461 -- 462 -- IN: for a string, at the call of this procedure, the current character 463 -- must be either '"' or '%'. 464 procedure Scan_String 465 is 466 -- The quotation character (can be " or %). 467 Mark: Character; 468 -- Current character. 469 C : Character; 470 -- Current length. 471 Length : Nat32; 472 begin 473 -- String delimiter. 474 Mark := Source (Pos); 475 pragma Assert (Mark = '"' or else Mark = '%'); 476 477 Pos := Pos + 1; 478 Length := 0; 479 Current_Context.Str_Id := Str_Table.Create_String8; 480 loop 481 C := Source (Pos); 482 if C = Mark then 483 -- LRM93 13.6 484 -- If a quotation mark value is to be represented in the sequence 485 -- of character values, then a pair of adjacent quoatation 486 -- characters marks must be written at the corresponding place 487 -- within the string literal. 488 -- LRM93 13.10 489 -- Any pourcent sign within the sequence of characters must then 490 -- be doubled, and each such doubled percent sign is interpreted 491 -- as a single percent sign value. 492 -- The same replacement is allowed for a bit string literal, 493 -- provieded that both bit string brackets are replaced. 494 Pos := Pos + 1; 495 exit when Source (Pos) /= Mark; 496 end if; 497 498 case Characters_Kind (C) is 499 when Format_Effector => 500 if Mark = '%' then 501 -- No matching '%' has been found. Consider '%' was used 502 -- as the remainder operator, instead of 'rem'. This will 503 -- improve the error message. 504 Error_Msg_Scan 505 (+Get_Token_Location, 506 "'%%' is not a vhdl operator, use 'rem'"); 507 Current_Token := Tok_Rem; 508 Pos := Current_Context.Token_Pos + 1; 509 return; 510 end if; 511 if C = CR or C = LF then 512 Error_Msg_Scan 513 ("string cannot be multi-line, use concatenation"); 514 else 515 Error_Msg_Scan ("format effector not allowed in a string"); 516 end if; 517 exit; 518 when Invalid => 519 if C = Files_Map.EOT 520 and then Pos >= Current_Context.File_Len 521 then 522 Error_Msg_Scan ("string not terminated at end of file"); 523 exit; 524 end if; 525 526 Error_Msg_Scan 527 ("invalid character not allowed, even in a string"); 528 when Graphic_Character => 529 if Vhdl_Std = Vhdl_87 and then C > Character'Val (127) then 530 Error_8bit; 531 end if; 532 end case; 533 534 if C = '"' and Mark = '%' then 535 -- LRM93 13.10 536 -- The quotation marks (") used as string brackets at both ends of 537 -- a string literal can be replaced by percent signs (%), provided 538 -- that the enclosed sequence of characters constains no quotation 539 -- marks, and provided that both string brackets are replaced. 540 Error_Msg_Scan 541 ("'""' cannot be used in a string delimited with '%%'"); 542 end if; 543 544 Length := Length + 1; 545 Str_Table.Append_String8 (Character'Pos (C)); 546 Pos := Pos + 1; 547 end loop; 548 549 Current_Token := Tok_String; 550 Current_Context.Str_Len := Length; 551 end Scan_String; 552 553 -- Scan a bit string literal. 554 -- 555 -- LRM93 13.7 556 -- A bit string literal is formed by a sequence of extended digits 557 -- (possibly none) enclosed between two quotations used as bit string 558 -- brackets, preceded by a base specifier. 559 -- BIT_STRING_LITERAL ::= BASE_SPECIFIER " [ BIT_VALUE ] " 560 -- BIT_VALUE ::= EXTENDED_DIGIT { [ UNDERLINE ] EXTENDED_DIGIT } 561 -- 562 -- The current character must be a base specifier, followed by '"' or '%'. 563 -- The base must be valid. 564 procedure Scan_Bit_String (Base_Log : Nat32) 565 is 566 -- Position of character '0'. 567 Pos_0 : constant Nat8 := Character'Pos ('0'); 568 569 -- Used for the base. 570 subtype Nat4 is Natural range 1 .. 4; 571 Base : constant Nat32 := 2 ** Nat4 (Base_Log); 572 573 -- The quotation character (can be " or %). 574 Orig_Pos : constant Source_Ptr := Pos; 575 Mark : constant Character := Source (Orig_Pos); 576 -- Current character. 577 C : Character; 578 -- Current length. 579 Length : Nat32; 580 -- Digit value. 581 V, D : Nat8; 582 -- True if invalid character already found, to avoid duplicate message. 583 Has_Invalid : Boolean; 584 begin 585 pragma Assert (Mark = '"' or else Mark = '%'); 586 Pos := Pos + 1; 587 Length := 0; 588 Has_Invalid := False; 589 Current_Context.Str_Id := Str_Table.Create_String8; 590 loop 591 << Again >> null; 592 C := Source (Pos); 593 Pos := Pos + 1; 594 exit when C = Mark; 595 596 -- LRM93 13.7 597 -- If the base specifier is 'B', the extended digits in the bit 598 -- value are restricted to 0 and 1. 599 -- If the base specifier is 'O', the extended digits int the bit 600 -- value are restricted to legal digits in the octal number 601 -- system, ie, the digits 0 through 7. 602 -- If the base specifier is 'X', the extended digits are all digits 603 -- together with the letters A through F. 604 case C is 605 when '0' .. '9' => 606 V := Character'Pos (C) - Character'Pos ('0'); 607 when 'A' .. 'F' => 608 V := Character'Pos (C) - Character'Pos ('A') + 10; 609 when 'a' .. 'f' => 610 -- LRM93 13.7 611 -- A letter in a bit string literal (...) can be written either 612 -- in lowercase or in upper case, with the same meaning. 613 V := Character'Pos (C) - Character'Pos ('a') + 10; 614 when '_' => 615 if Source (Pos) = '_' then 616 Error_Msg_Scan 617 ("double underscore not allowed in a bit string"); 618 end if; 619 if Source (Pos - 2) = Mark then 620 Error_Msg_Scan 621 ("underscore not allowed at the start of a bit string"); 622 elsif Source (Pos) = Mark then 623 Error_Msg_Scan 624 ("underscore not allowed at the end of a bit string"); 625 end if; 626 goto Again; 627 when '"' => 628 pragma Assert (Mark = '%'); 629 Error_Msg_Scan 630 ("'""' cannot close a bit string opened by '%%'"); 631 exit; 632 when '%' => 633 pragma Assert (Mark = '"'); 634 Error_Msg_Scan 635 ("'%%' cannot close a bit string opened by '""'"); 636 exit; 637 when others => 638 if Characters_Kind (C) in Graphic_Character then 639 if Vhdl_Std >= Vhdl_08 then 640 V := Nat8'Last; 641 else 642 if not Has_Invalid then 643 Error_Msg_Scan ("invalid character in bit string"); 644 Has_Invalid := True; 645 end if; 646 -- Continue the bit string 647 V := 0; 648 end if; 649 else 650 if Mark = '%' then 651 Error_Msg_Scan 652 (+File_Pos_To_Location 653 (Current_Context.Source_File, Orig_Pos), 654 "'%%' is not a vhdl operator, use 'rem'"); 655 Current_Token := Tok_Rem; 656 Pos := Orig_Pos + 1; 657 return; 658 else 659 Error_Msg_Scan ("bit string not terminated"); 660 Pos := Pos - 1; 661 end if; 662 exit; 663 end if; 664 end case; 665 666 -- Expand bit value. 667 if Vhdl_Std >= Vhdl_08 and V > Base then 668 -- Expand as graphic character. 669 for I in 1 .. Base_Log loop 670 Str_Table.Append_String8_Char (C); 671 end loop; 672 else 673 -- Expand as extended digits. 674 case Base_Log is 675 when 1 => 676 if V > 1 then 677 Error_Msg_Scan 678 ("invalid character in a binary bit string"); 679 V := 1; 680 end if; 681 Str_Table.Append_String8 (Pos_0 + V); 682 when 3 => 683 if V > 7 then 684 Error_Msg_Scan 685 ("invalid character in a octal bit string"); 686 V := 7; 687 end if; 688 for I in 1 .. 3 loop 689 D := V / 4; 690 Str_Table.Append_String8 (Pos_0 + D); 691 V := (V - 4 * D) * 2; 692 end loop; 693 when 4 => 694 for I in 1 .. 4 loop 695 D := V / 8; 696 Str_Table.Append_String8 (Pos_0 + D); 697 V := (V - 8 * D) * 2; 698 end loop; 699 when others => 700 raise Internal_Error; 701 end case; 702 end if; 703 704 Length := Length + Base_Log; 705 end loop; 706 707 -- Note: the length of the bit string may be 0. 708 709 Current_Token := Tok_Bit_String; 710 Current_Context.Str_Len := Length; 711 end Scan_Bit_String; 712 713 -- Scan a decimal bit string literal. For base specifier D the algorithm 714 -- is rather different: all the graphic characters shall be digits, and we 715 -- need to use a (not very efficient) arbitrary precision multiplication. 716 procedure Scan_Dec_Bit_String 717 is 718 use Str_Table; 719 720 Id : String8_Id; 721 722 -- Position of character '0'. 723 Pos_0 : constant Nat8 := Character'Pos ('0'); 724 725 -- Current character. 726 C : Character; 727 -- Current length. 728 Length : Nat32; 729 -- Digit value. 730 V, D : Nat8; 731 732 type Carries_Type is array (0 .. 3) of Nat8; 733 Carries : Carries_Type; 734 No_Carries : constant Carries_Type := (others => Pos_0); 735 736 -- Shift right carries. Note the Carries (0) is the LSB. 737 procedure Shr_Carries is 738 begin 739 Carries := (Carries (1), Carries (2), Carries (3), Pos_0); 740 end Shr_Carries; 741 742 procedure Append_Carries is 743 begin 744 -- Expand the bit string. Note that position 1 of the string8 is 745 -- the MSB. 746 while Carries /= No_Carries loop 747 Append_String8 (Pos_0); 748 Length := Length + 1; 749 for I in reverse 2 .. Length loop 750 Set_Element_String8 (Id, I, Element_String8 (Id, I - 1)); 751 end loop; 752 Set_Element_String8 (Id, 1, Carries (0)); 753 Shr_Carries; 754 end loop; 755 end Append_Carries; 756 757 -- Add 1 to Carries. Overflow is not allowed and should be prevented by 758 -- construction. 759 procedure Add_One_To_Carries is 760 begin 761 for I in Carries'Range loop 762 if Carries (I) = Pos_0 then 763 Carries (I) := Pos_0 + 1; 764 -- End of propagation. 765 exit; 766 else 767 Carries (I) := Pos_0; 768 -- Continue propagation. 769 pragma Assert (I < Carries'Last); 770 end if; 771 end loop; 772 end Add_One_To_Carries; 773 begin 774 pragma Assert (Source (Pos) = '"'); 775 Pos := Pos + 1; 776 Length := 0; 777 Id := Create_String8; 778 Current_Context.Str_Id := Id; 779 loop 780 << Again >> null; 781 C := Source (Pos); 782 Pos := Pos + 1; 783 exit when C = '"'; 784 785 if C in '0' .. '9' then 786 V := Character'Pos (C) - Character'Pos ('0'); 787 elsif C = '_' then 788 if Source (Pos) = '_' then 789 Error_Msg_Scan 790 ("double underscore not allowed in a bit string"); 791 end if; 792 if Source (Pos - 2) = '"' then 793 Error_Msg_Scan 794 ("underscore not allowed at the start of a bit string"); 795 elsif Source (Pos) = '"' then 796 Error_Msg_Scan 797 ("underscore not allowed at the end of a bit string"); 798 end if; 799 goto Again; 800 else 801 if Characters_Kind (C) in Graphic_Character then 802 Error_Msg_Scan 803 ("graphic character not allowed in decimal bit string"); 804 -- Continue the bit string 805 V := 0; 806 else 807 Error_Msg_Scan ("bit string not terminated"); 808 Pos := Pos - 1; 809 exit; 810 end if; 811 end if; 812 813 -- Multiply by 10. 814 Carries := (others => Pos_0); 815 for I in reverse 1 .. Length loop 816 -- Shift by 1 (*2). 817 D := Element_String8 (Id, I); 818 Set_Element_String8 (Id, I, Carries (0)); 819 Shr_Carries; 820 -- Add D and D * 4. 821 if D /= Pos_0 then 822 Add_One_To_Carries; 823 -- Add_Four_To_Carries: 824 for I in 2 .. 3 loop 825 if Carries (I) = Pos_0 then 826 Carries (I) := Pos_0 + 1; 827 -- End of propagation. 828 exit; 829 else 830 Carries (I) := Pos_0; 831 -- Continue propagation. 832 end if; 833 end loop; 834 end if; 835 end loop; 836 Append_Carries; 837 838 -- Add V. 839 for I in Carries'Range loop 840 D := V / 2; 841 Carries (I) := Pos_0 + (V - 2 * D); 842 V := D; 843 end loop; 844 for I in reverse 1 .. Length loop 845 D := Element_String8 (Id, I); 846 if D /= Pos_0 then 847 Add_One_To_Carries; 848 end if; 849 Set_Element_String8 (Id, I, Carries (0)); 850 Shr_Carries; 851 exit when Carries = No_Carries; 852 end loop; 853 Append_Carries; 854 end loop; 855 856 Current_Token := Tok_Bit_String; 857 Current_Context.Str_Len := Length; 858 end Scan_Dec_Bit_String; 859 860 -- LRM08 15.2 Character set 861 -- For each uppercase letter, there is a corresponding lowercase letter; 862 -- and for each lowercase letter except [y diaeresis] and [german sharp s], 863 -- there is a corresponding uppercase letter. 864 type Character_Map is array (Character) of Character; 865 To_Lower_Map : constant Character_Map := 866 ( 867 -- Uppercase ASCII letters. 868 'A' => 'a', 869 'B' => 'b', 870 'C' => 'c', 871 'D' => 'd', 872 'E' => 'e', 873 'F' => 'f', 874 'G' => 'g', 875 'H' => 'h', 876 'I' => 'i', 877 'J' => 'j', 878 'K' => 'k', 879 'L' => 'l', 880 'M' => 'm', 881 'N' => 'n', 882 'O' => 'o', 883 'P' => 'p', 884 'Q' => 'q', 885 'R' => 'r', 886 'S' => 's', 887 'T' => 't', 888 'U' => 'u', 889 'V' => 'v', 890 'W' => 'w', 891 'X' => 'x', 892 'Y' => 'y', 893 'Z' => 'z', 894 895 -- Lowercase ASCII letters. 896 'a' => 'a', 897 'b' => 'b', 898 'c' => 'c', 899 'd' => 'd', 900 'e' => 'e', 901 'f' => 'f', 902 'g' => 'g', 903 'h' => 'h', 904 'i' => 'i', 905 'j' => 'j', 906 'k' => 'k', 907 'l' => 'l', 908 'm' => 'm', 909 'n' => 'n', 910 'o' => 'o', 911 'p' => 'p', 912 'q' => 'q', 913 'r' => 'r', 914 's' => 's', 915 't' => 't', 916 'u' => 'u', 917 'v' => 'v', 918 'w' => 'w', 919 'x' => 'x', 920 'y' => 'y', 921 'z' => 'z', 922 923 -- Uppercase Latin-1 letters. 924 UC_A_Grave => LC_A_Grave, 925 UC_A_Acute => LC_A_Acute, 926 UC_A_Circumflex => LC_A_Circumflex, 927 UC_A_Tilde => LC_A_Tilde, 928 UC_A_Diaeresis => LC_A_Diaeresis, 929 UC_A_Ring => LC_A_Ring, 930 UC_AE_Diphthong => LC_AE_Diphthong, 931 UC_C_Cedilla => LC_C_Cedilla, 932 UC_E_Grave => LC_E_Grave, 933 UC_E_Acute => LC_E_Acute, 934 UC_E_Circumflex => LC_E_Circumflex, 935 UC_E_Diaeresis => LC_E_Diaeresis, 936 UC_I_Grave => LC_I_Grave, 937 UC_I_Acute => LC_I_Acute, 938 UC_I_Circumflex => LC_I_Circumflex, 939 UC_I_Diaeresis => LC_I_Diaeresis, 940 UC_Icelandic_Eth => LC_Icelandic_Eth, 941 UC_N_Tilde => LC_N_Tilde, 942 UC_O_Grave => LC_O_Grave, 943 UC_O_Acute => LC_O_Acute, 944 UC_O_Circumflex => LC_O_Circumflex, 945 UC_O_Tilde => LC_O_Tilde, 946 UC_O_Diaeresis => LC_O_Diaeresis, 947 UC_O_Oblique_Stroke => LC_O_Oblique_Stroke, 948 UC_U_Grave => LC_U_Grave, 949 UC_U_Acute => LC_U_Acute, 950 UC_U_Circumflex => LC_U_Circumflex, 951 UC_U_Diaeresis => LC_U_Diaeresis, 952 UC_Y_Acute => LC_Y_Acute, 953 UC_Icelandic_Thorn => LC_Icelandic_Thorn, 954 955 -- Lowercase Latin-1 letters. 956 LC_A_Grave => LC_A_Grave, 957 LC_A_Acute => LC_A_Acute, 958 LC_A_Circumflex => LC_A_Circumflex, 959 LC_A_Tilde => LC_A_Tilde, 960 LC_A_Diaeresis => LC_A_Diaeresis, 961 LC_A_Ring => LC_A_Ring, 962 LC_AE_Diphthong => LC_AE_Diphthong, 963 LC_C_Cedilla => LC_C_Cedilla, 964 LC_E_Grave => LC_E_Grave, 965 LC_E_Acute => LC_E_Acute, 966 LC_E_Circumflex => LC_E_Circumflex, 967 LC_E_Diaeresis => LC_E_Diaeresis, 968 LC_I_Grave => LC_I_Grave, 969 LC_I_Acute => LC_I_Acute, 970 LC_I_Circumflex => LC_I_Circumflex, 971 LC_I_Diaeresis => LC_I_Diaeresis, 972 LC_Icelandic_Eth => LC_Icelandic_Eth, 973 LC_N_Tilde => LC_N_Tilde, 974 LC_O_Grave => LC_O_Grave, 975 LC_O_Acute => LC_O_Acute, 976 LC_O_Circumflex => LC_O_Circumflex, 977 LC_O_Tilde => LC_O_Tilde, 978 LC_O_Diaeresis => LC_O_Diaeresis, 979 LC_O_Oblique_Stroke => LC_O_Oblique_Stroke, 980 LC_U_Grave => LC_U_Grave, 981 LC_U_Acute => LC_U_Acute, 982 LC_U_Circumflex => LC_U_Circumflex, 983 LC_U_Diaeresis => LC_U_Diaeresis, 984 LC_Y_Acute => LC_Y_Acute, 985 LC_Icelandic_Thorn => LC_Icelandic_Thorn, 986 987 -- Lowercase latin-1 characters without corresponding uppercase one. 988 LC_Y_Diaeresis => LC_Y_Diaeresis, 989 LC_German_Sharp_S => LC_German_Sharp_S, 990 991 -- Not a letter. 992 others => NUL); 993 994 procedure Error_Too_Long is 995 begin 996 Error_Msg_Scan ("identifier is too long (>" 997 & Natural'Image (Max_Name_Length - 1) & ")"); 998 end Error_Too_Long; 999 1000 -- LRM93 13.3.1 1001 -- Basic Identifiers 1002 -- A basic identifier consists only of letters, digits, and underlines. 1003 -- BASIC_IDENTIFIER ::= LETTER { [ UNDERLINE ] LETTER_OR_DIGIT } 1004 -- LETTER_OR_DIGIT ::= LETTER | DIGIT 1005 -- LETTER ::= UPPER_CASE_LETTER | LOWER_CASE_LETTER 1006 -- 1007 -- NB: At the call of this procedure, the current character must be a legal 1008 -- character for a basic identifier. 1009 procedure Scan_Identifier (Allow_PSL : Boolean) 1010 is 1011 use Name_Table; 1012 -- Local copy for speed-up. 1013 Source : constant File_Buffer_Acc := Current_Context.Source; 1014 P : Source_Ptr; 1015 1016 -- Current and next character. 1017 C : Character; 1018 1019 Buffer : String (1 .. Max_Name_Length); 1020 Len : Natural; 1021 begin 1022 -- This is an identifier or a key word. 1023 Len := 0; 1024 P := Pos; 1025 1026 loop 1027 -- Source (pos) is correct. 1028 -- LRM93 13.3.1 1029 -- All characters if a basic identifier are signifiant, including 1030 -- any underline character inserted between a letter or digit and 1031 -- an adjacent letter or digit. 1032 -- Basic identifiers differing only in the use of the corresponding 1033 -- upper and lower case letters are considered as the same. 1034 -- 1035 -- GHDL: This is achieved by converting all upper case letters into 1036 -- equivalent lower case letters. 1037 -- The opposite (converting to upper lower case letters) is not 1038 -- possible because two characters have no upper-case equivalent. 1039 C := Source (P); 1040 case C is 1041 when 'A' .. 'Z' => 1042 C := Character'Val 1043 (Character'Pos (C) 1044 + Character'Pos ('a') - Character'Pos ('A')); 1045 when 'a' .. 'z' | '0' .. '9' => 1046 null; 1047 when '_' => 1048 if Source (P + 1) = '_' then 1049 Error_Msg_Scan ("two underscores can't be consecutive"); 1050 end if; 1051 when ' ' | ')' | '.' | ';' | ':' => 1052 exit; 1053 when others => 1054 -- Non common case. 1055 case Characters_Kind (C) is 1056 when Upper_Case_Letter | Lower_Case_Letter => 1057 if Vhdl_Std = Vhdl_87 then 1058 Error_8bit; 1059 end if; 1060 C := To_Lower_Map (C); 1061 pragma Assert (C /= NUL); 1062 when Digit => 1063 raise Internal_Error; 1064 when others => 1065 exit; 1066 end case; 1067 end case; 1068 1069 -- Put character in name buffer. FIXME: compute the hash at the same 1070 -- time ? 1071 if Len >= Max_Name_Length - 1 then 1072 if Len = Max_Name_Length -1 then 1073 Error_Msg_Scan ("identifier is too long (>" 1074 & Natural'Image (Max_Name_Length - 1) & ")"); 1075 -- Accept this last one character, so that no error for the 1076 -- following characters. 1077 Len := Len + 1; 1078 Buffer (Len) := C; 1079 end if; 1080 else 1081 Len := Len + 1; 1082 Buffer (Len) := C; 1083 end if; 1084 1085 -- Next character. 1086 P := P + 1; 1087 end loop; 1088 1089 if Source (P - 1) = '_' then 1090 if Allow_PSL then 1091 -- Some PSL reserved words finish with '_'. 1092 P := P - 1; 1093 Len := Len - 1; 1094 C := '_'; 1095 else 1096 -- Eat the trailing underscore. 1097 Error_Msg_Scan ("an identifier cannot finish with '_'"); 1098 end if; 1099 end if; 1100 1101 -- Update position in the scan context. 1102 Pos := P; 1103 1104 -- LRM93 13.2 1105 -- At least one separator is required between an identifier or an 1106 -- abstract literal and an adjacent identifier or abstract literal. 1107 case Characters_Kind (C) is 1108 when Digit 1109 | Upper_Case_Letter 1110 | Lower_Case_Letter => 1111 raise Internal_Error; 1112 when Other_Special_Character | Special_Character => 1113 if (C = '"' or C = '%') and then Len <= 2 then 1114 if C = '%' and Vhdl_Std >= Vhdl_08 then 1115 Error_Msg_Scan ("'%%' not allowed in vhdl 2008 " 1116 & "(was replacement character)"); 1117 -- Continue as a bit string. 1118 end if; 1119 1120 -- Good candidate for bit string. 1121 1122 -- LRM93 13.7 1123 -- BASE_SPECIFIER ::= B | O | X 1124 -- 1125 -- A letter in a bit string literal (either an extended digit 1126 -- or the base specifier) can be written either in lower case 1127 -- or in upper case, with the same meaning. 1128 -- 1129 -- LRM08 15.8 Bit string literals 1130 -- BASE_SPECICIER ::= 1131 -- B | O | X | UB | UO | UX | SB | SO | SX | D 1132 -- 1133 -- An extended digit and the base specifier in a bit string 1134 -- literal can be written either in lowercase or in uppercase, 1135 -- with the same meaning. 1136 declare 1137 Base : Nat32; 1138 Cl : constant Character := Buffer (Len); 1139 Cf : constant Character := Buffer (1); 1140 begin 1141 Current_Context.Bit_Str_Base := Cl; 1142 if Cl = 'b' then 1143 Base := 1; 1144 elsif Cl = 'o' then 1145 Base := 3; 1146 elsif Cl = 'x' then 1147 Base := 4; 1148 elsif Vhdl_Std >= Vhdl_08 and Len = 1 and Cf = 'd' then 1149 Current_Context.Bit_Str_Sign := ' '; 1150 Scan_Dec_Bit_String; 1151 return; 1152 else 1153 Base := 0; 1154 end if; 1155 if Base > 0 then 1156 if Len = 1 then 1157 Current_Context.Bit_Str_Sign := ' '; 1158 Scan_Bit_String (Base); 1159 return; 1160 elsif Vhdl_Std >= Vhdl_08 1161 and then (Cf = 's' or Cf = 'u') 1162 then 1163 Current_Context.Bit_Str_Sign := Cf; 1164 Scan_Bit_String (Base); 1165 return; 1166 end if; 1167 end if; 1168 end; 1169 elsif Vhdl_Std > Vhdl_87 and then C = '\' then 1170 -- Start of extended identifier. Cannot follow an identifier. 1171 Error_Separator; 1172 end if; 1173 1174 when Invalid => 1175 -- Improve error message for use of UTF-8 quote marks. 1176 -- It's possible because in the sequence of UTF-8 bytes for the 1177 -- quote marks, there are invalid character (in the 128-160 1178 -- range). 1179 if C = Character'Val (16#80#) 1180 and then Buffer (Len) = Character'Val (16#e2#) 1181 and then (Source (Pos + 1) = Character'Val (16#98#) 1182 or else Source (Pos + 1) = Character'Val (16#99#)) 1183 then 1184 -- UTF-8 left or right single quote mark. 1185 if Len > 1 then 1186 -- The first byte (0xe2) is part of the identifier. An 1187 -- error will be detected as the next byte (0x80) is 1188 -- invalid. Remove the first byte from the identifier, and 1189 -- let's catch the error later. 1190 Len := Len - 1; 1191 Pos := Pos - 1; 1192 else 1193 Error_Msg_Scan ("invalid use of UTF8 character for '"); 1194 Pos := Pos + 2; 1195 1196 -- Distinguish between character literal and tick. Don't 1197 -- care about possible invalid character literal, as in any 1198 -- case we have already emitted an error message. 1199 if Current_Context.Prev_Token /= Tok_Identifier 1200 and then Current_Context.Prev_Token /= Tok_Character 1201 and then 1202 (Source (Pos + 1) = ''' 1203 or else 1204 (Source (Pos + 1) = Character'Val (16#e2#) 1205 and then Source (Pos + 2) = Character'Val (16#80#) 1206 and then Source (Pos + 3) = Character'Val (16#99#))) 1207 then 1208 Current_Token := Tok_Character; 1209 Current_Context.Identifier := 1210 Name_Table.Get_Identifier (Source (Pos)); 1211 if Source (Pos + 1) = ''' then 1212 Pos := Pos + 2; 1213 else 1214 Pos := Pos + 4; 1215 end if; 1216 else 1217 Current_Token := Tok_Tick; 1218 end if; 1219 return; 1220 end if; 1221 end if; 1222 when Format_Effector 1223 | Space_Character => 1224 null; 1225 end case; 1226 1227 -- Hash it. 1228 Current_Context.Identifier := Get_Identifier (Buffer (1 .. Len)); 1229 Current_Token := Tok_Identifier; 1230 end Scan_Identifier; 1231 1232 procedure Scan_Psl_Keyword_Em (Tok : Token_Type; Tok_Em : Token_Type) is 1233 begin 1234 if Source (Pos) = '!' then 1235 Pos := Pos + 1; 1236 Current_Token := Tok_Em; 1237 else 1238 Current_Token := Tok; 1239 end if; 1240 end Scan_Psl_Keyword_Em; 1241 pragma Inline (Scan_Psl_Keyword_Em); 1242 1243 procedure Scan_Psl_Keyword_Em_Un 1244 (Tok, Tok_Em, Tok_Un, Tok_Em_Un : Token_Type) is 1245 begin 1246 if Source (Pos) = '!' then 1247 Pos := Pos + 1; 1248 if Source (Pos) = '_' then 1249 Pos := Pos + 1; 1250 Current_Token := Tok_Em_Un; 1251 else 1252 Current_Token := Tok_Em; 1253 end if; 1254 elsif Source (Pos) = '_' then 1255 Pos := Pos + 1; 1256 Current_Token := Tok_Un; 1257 else 1258 Current_Token := Tok; 1259 end if; 1260 end Scan_Psl_Keyword_Em_Un; 1261 pragma Inline (Scan_Psl_Keyword_Em_Un); 1262 1263 procedure Identifier_To_Token 1264 is 1265 use Std_Names; 1266 begin 1267 if Current_Identifier in Name_Id_Keywords then 1268 -- LRM93 13.9 1269 -- The identifiers listed below are called reserved words and are 1270 -- reserved for signifiances in the language. 1271 -- IN: this is also achieved in packages std_names and tokens. 1272 Current_Token := Token_Type'Val 1273 (Token_Type'Pos (Tok_First_Keyword) 1274 + Current_Identifier - Name_First_Keyword); 1275 case Current_Identifier is 1276 when Name_Id_AMS_Reserved_Words => 1277 if not AMS_Vhdl then 1278 if Is_Warning_Enabled (Warnid_Reserved_Word) then 1279 Warning_Msg_Scan 1280 (Warnid_Reserved_Word, 1281 "using %i AMS-VHDL reserved word as an identifier", 1282 +Current_Identifier); 1283 end if; 1284 Current_Token := Tok_Identifier; 1285 end if; 1286 when Name_Id_Vhdl08_Reserved_Words => 1287 if Vhdl_Std < Vhdl_08 then 1288 -- Some vhdl08 reserved words are PSL keywords. 1289 if Flag_Psl then 1290 case Current_Identifier is 1291 when Name_Prev => 1292 Current_Token := Tok_Prev; 1293 when Name_Stable => 1294 Current_Token := Tok_Stable; 1295 when Name_Rose => 1296 Current_Token := Tok_Rose; 1297 when Name_Fell => 1298 Current_Token := Tok_Fell; 1299 when Name_Sequence => 1300 Current_Token := Tok_Sequence; 1301 when Name_Property => 1302 Current_Token := Tok_Property; 1303 when Name_Assume => 1304 Current_Token := Tok_Assume; 1305 when Name_Cover => 1306 Current_Token := Tok_Cover; 1307 when Name_Default => 1308 Current_Token := Tok_Default; 1309 when Name_Restrict => 1310 Current_Token := Tok_Restrict; 1311 when Name_Restrict_Guarantee => 1312 Current_Token := Tok_Restrict_Guarantee; 1313 when Name_Vmode => 1314 Current_Token := Tok_Vmode; 1315 when Name_Vprop => 1316 Current_Token := Tok_Vprop; 1317 when Name_Vunit => 1318 Current_Token := Tok_Vunit; 1319 when others => 1320 Current_Token := Tok_Identifier; 1321 end case; 1322 else 1323 Current_Token := Tok_Identifier; 1324 end if; 1325 if Is_Warning_Enabled (Warnid_Reserved_Word) 1326 and then Current_Token = Tok_Identifier 1327 then 1328 Warning_Msg_Scan 1329 (Warnid_Reserved_Word, 1330 "using %i vhdl-2008 reserved word as an identifier", 1331 +Current_Identifier); 1332 end if; 1333 end if; 1334 when Name_Id_Vhdl00_Reserved_Words => 1335 if Vhdl_Std < Vhdl_00 then 1336 if Is_Warning_Enabled (Warnid_Reserved_Word) then 1337 Warning_Msg_Scan 1338 (Warnid_Reserved_Word, 1339 "using %i vhdl-2000 reserved word as an identifier", 1340 +Current_Identifier); 1341 end if; 1342 Current_Token := Tok_Identifier; 1343 end if; 1344 when Name_Id_Vhdl93_Reserved_Words => 1345 if Vhdl_Std = Vhdl_87 then 1346 if Is_Warning_Enabled (Warnid_Reserved_Word) then 1347 Report_Start_Group; 1348 Warning_Msg_Scan 1349 (Warnid_Reserved_Word, 1350 "using %i vhdl93 reserved word as a vhdl87 identifier", 1351 +Current_Identifier); 1352 Warning_Msg_Scan 1353 (Warnid_Reserved_Word, 1354 "(use option --std=93 to compile as vhdl93)"); 1355 Report_End_Group; 1356 end if; 1357 Current_Token := Tok_Identifier; 1358 end if; 1359 when Name_Id_Vhdl87_Reserved_Words => 1360 if Flag_Psl then 1361 if Current_Token = Tok_Until then 1362 Scan_Psl_Keyword_Em_Un (Tok_Until, Tok_Until_Em, 1363 Tok_Until_Un, Tok_Until_Em_Un); 1364 elsif Current_Token = Tok_Next then 1365 Scan_Psl_Keyword_Em (Tok_Next, Tok_Next_Em); 1366 end if; 1367 end if; 1368 when others => 1369 raise Program_Error; 1370 end case; 1371 elsif Flag_Psl then 1372 case Current_Identifier is 1373 when Name_Prev => 1374 Current_Token := Tok_Prev; 1375 when Name_Stable => 1376 Current_Token := Tok_Stable; 1377 when Name_Rose => 1378 Current_Token := Tok_Rose; 1379 when Name_Fell => 1380 Current_Token := Tok_Fell; 1381 when Name_Clock => 1382 Current_Token := Tok_Psl_Clock; 1383 when Name_Const => 1384 Current_Token := Tok_Psl_Const; 1385 when Name_Boolean => 1386 Current_Token := Tok_Psl_Boolean; 1387 when Name_Sequence => 1388 Current_Token := Tok_Sequence; 1389 when Name_Property => 1390 Current_Token := Tok_Property; 1391 when Name_Endpoint => 1392 Current_Token := Tok_Psl_Endpoint; 1393 when Name_Assume => 1394 Current_Token := Tok_Assume; 1395 when Name_Cover => 1396 Current_Token := Tok_Cover; 1397 when Name_Default => 1398 Current_Token := Tok_Default; 1399 when Name_Restrict => 1400 Current_Token := Tok_Restrict; 1401 when Name_Restrict_Guarantee => 1402 Current_Token := Tok_Restrict_Guarantee; 1403 when Name_Inf => 1404 Current_Token := Tok_Inf; 1405 when Name_Within => 1406 Current_Token := Tok_Within; 1407 when Name_Abort => 1408 Current_Token := Tok_Abort; 1409 when Name_Before => 1410 Scan_Psl_Keyword_Em_Un (Tok_Before, Tok_Before_Em, 1411 Tok_Before_Un, Tok_Before_Em_Un); 1412 when Name_Always => 1413 Current_Token := Tok_Always; 1414 when Name_Never => 1415 Current_Token := Tok_Never; 1416 when Name_Eventually => 1417 if Source (Pos) = '!' then 1418 Pos := Pos + 1; 1419 else 1420 Error_Msg_Scan ("'!' expected after 'eventually'"); 1421 end if; 1422 Current_Token := Tok_Eventually_Em; 1423 when Name_Next_A => 1424 Scan_Psl_Keyword_Em (Tok_Next_A, Tok_Next_A_Em); 1425 when Name_Next_E => 1426 Scan_Psl_Keyword_Em (Tok_Next_E, Tok_Next_E_Em); 1427 when Name_Next_Event => 1428 Scan_Psl_Keyword_Em (Tok_Next_Event, Tok_Next_Event_Em); 1429 when Name_Next_Event_A => 1430 Scan_Psl_Keyword_Em (Tok_Next_Event_A, Tok_Next_Event_A_Em); 1431 when Name_Next_Event_E => 1432 Scan_Psl_Keyword_Em (Tok_Next_Event_E, Tok_Next_Event_E_Em); 1433 when Name_Until => 1434 raise Internal_Error; 1435 when others => 1436 Current_Token := Tok_Identifier; 1437 if Source (Pos - 1) = '_' then 1438 Error_Msg_Scan ("identifiers cannot finish with '_'"); 1439 end if; 1440 end case; 1441 end if; 1442 end Identifier_To_Token; 1443 1444 -- LRM93 13.3.2 1445 -- EXTENDED_IDENTIFIER ::= \ GRAPHIC_CHARACTER { GRAPHIC_CHARACTER } \ 1446 -- 1447 -- Create an (extended) indentifier. 1448 -- Extended identifiers are stored as they appear (leading and tailing 1449 -- backslashes, doubling backslashes inside). 1450 procedure Scan_Extended_Identifier 1451 is 1452 use Name_Table; 1453 Buffer : String (1 .. Max_Name_Length); 1454 Len : Natural; 1455 C : Character; 1456 begin 1457 -- LRM93 13.3.2 1458 -- Moreover, every extended identifiers is distinct from any basic 1459 -- identifier. 1460 -- GHDL: This is satisfied by storing '\' in the name table. 1461 Len := 1; 1462 Buffer (1) := '\'; 1463 loop 1464 -- Next character. 1465 Pos := Pos + 1; 1466 C := Source (Pos); 1467 1468 if C = '\' then 1469 -- LRM93 13.3.2 1470 -- If a backslash is to be used as one of the graphic characters 1471 -- of an extended literal, it must be doubled. 1472 -- LRM93 13.3.2 1473 -- (a doubled backslash couting as one character) 1474 if Len >= Max_Name_Length - 1 then 1475 if Len = Max_Name_Length - 1 then 1476 Error_Too_Long; 1477 -- Accept this last one. 1478 Len := Len + 1; 1479 Buffer (Len) := C; 1480 end if; 1481 else 1482 Len := Len + 1; 1483 Buffer (Len) := C; 1484 end if; 1485 1486 Pos := Pos + 1; 1487 C := Source (Pos); 1488 1489 exit when C /= '\'; 1490 end if; 1491 1492 case Characters_Kind (C) is 1493 when Format_Effector => 1494 Error_Msg_Scan ("format effector in extended identifier"); 1495 exit; 1496 when Graphic_Character => 1497 null; 1498 when Invalid => 1499 if C = Files_Map.EOT 1500 and then Pos >= Current_Context.File_Len 1501 then 1502 Error_Msg_Scan 1503 ("extended identifier not terminated at end of file"); 1504 elsif C = LF or C = CR then 1505 Error_Msg_Scan 1506 ("extended identifier not terminated at end of line"); 1507 else 1508 Error_Msg_Scan ("invalid character in extended identifier"); 1509 end if; 1510 exit; 1511 end case; 1512 1513 -- LRM93 13.3.2 1514 -- Extended identifiers differing only in the use of corresponding 1515 -- upper and lower case letters are distinct. 1516 if Len >= Max_Name_Length - 1 then 1517 if Len = Max_Name_Length - 1 then 1518 Error_Too_Long; 1519 -- Accept this last one. 1520 Len := Len + 1; 1521 Buffer (Len) := C; 1522 end if; 1523 else 1524 Len := Len + 1; 1525 Buffer (Len) := C; 1526 end if; 1527 end loop; 1528 1529 if Len <= 2 then 1530 Error_Msg_Scan ("empty extended identifier is not allowed"); 1531 end if; 1532 1533 -- LRM93 13.2 1534 -- At least one separator is required between an identifier or an 1535 -- abstract literal and an adjacent identifier or abstract literal. 1536 case Characters_Kind (C) is 1537 when Digit 1538 | Upper_Case_Letter 1539 | Lower_Case_Letter => 1540 Error_Separator; 1541 when Invalid 1542 | Format_Effector 1543 | Space_Character 1544 | Special_Character 1545 | Other_Special_Character => 1546 null; 1547 end case; 1548 1549 -- Hash it. 1550 Current_Context.Identifier := Get_Identifier (Buffer (1 .. Len)); 1551 Current_Token := Tok_Identifier; 1552 end Scan_Extended_Identifier; 1553 1554 procedure Convert_Identifier (Str : in out String; Err : out Boolean) 1555 is 1556 F : constant Integer := Str'First; 1557 1558 procedure Error_Bad is 1559 begin 1560 Error_Msg_Option ("bad character in identifier"); 1561 end Error_Bad; 1562 1563 procedure Error_8bit is 1564 begin 1565 Error_Msg_Option ("8 bits characters not allowed in vhdl87"); 1566 end Error_8bit; 1567 1568 C : Character; 1569 begin 1570 Err := True; 1571 1572 if Str'Length = 0 then 1573 Error_Msg_Option ("identifier required"); 1574 return; 1575 end if; 1576 1577 if Str (F) = '\' then 1578 -- Extended identifier. 1579 if Vhdl_Std = Vhdl_87 then 1580 Error_Msg_Option ("extended identifiers not allowed in vhdl87"); 1581 return; 1582 end if; 1583 1584 if Str'Last < F + 2 then 1585 Error_Msg_Option ("extended identifier is too short"); 1586 return; 1587 end if; 1588 if Str (Str'Last) /= '\' then 1589 Error_Msg_Option ("extended identifier must finish with a '\'"); 1590 return; 1591 end if; 1592 for I in F + 1 .. Str'Last - 1 loop 1593 C := Str (I); 1594 case Characters_Kind (C) is 1595 when Format_Effector => 1596 Error_Msg_Option ("format effector in extended identifier"); 1597 return; 1598 when Graphic_Character => 1599 if C = '\' then 1600 if Str (I + 1) /= '\' 1601 or else I = Str'Last - 1 1602 then 1603 Error_Msg_Option ("anti-slash must be doubled " 1604 & "in extended identifier"); 1605 return; 1606 end if; 1607 end if; 1608 when Invalid => 1609 Error_Bad; 1610 return; 1611 end case; 1612 end loop; 1613 else 1614 -- Identifier 1615 for I in F .. Str'Last loop 1616 C := Str (I); 1617 case Characters_Kind (C) is 1618 when Upper_Case_Letter => 1619 if Vhdl_Std = Vhdl_87 and C > 'Z' then 1620 Error_8bit; 1621 return; 1622 end if; 1623 Str (I) := To_Lower_Map (C); 1624 when Lower_Case_Letter | Digit => 1625 if Vhdl_Std = Vhdl_87 and C > 'z' then 1626 Error_8bit; 1627 return; 1628 end if; 1629 when Special_Character => 1630 -- The current character is legal in an identifier. 1631 if C = '_' then 1632 if I = 1 then 1633 Error_Msg_Option 1634 ("an identifier cannot start with an underscore"); 1635 return; 1636 end if; 1637 if Str (I - 1) = '_' then 1638 Error_Msg_Option 1639 ("two underscores can't be consecutive"); 1640 return; 1641 end if; 1642 if I = Str'Last then 1643 Error_Msg_Option 1644 ("an identifier cannot finish with an underscore"); 1645 return; 1646 end if; 1647 else 1648 Error_Bad; 1649 return; 1650 end if; 1651 when others => 1652 Error_Bad; 1653 return; 1654 end case; 1655 end loop; 1656 end if; 1657 Err := False; 1658 end Convert_Identifier; 1659 1660 -- Internal scanner function: return True if C must be considered as a line 1661 -- terminator. This also includes EOT (which terminates the file or is 1662 -- invalid). 1663 function Is_EOL (C : Character) return Boolean is 1664 begin 1665 case C is 1666 when CR | LF | VT | FF | Files_Map.EOT => 1667 return True; 1668 when others => 1669 return False; 1670 end case; 1671 end Is_EOL; 1672 1673 -- Advance scanner till the first non-space character. 1674 procedure Skip_Spaces is 1675 begin 1676 while Source (Pos) = ' ' or Source (Pos) = HT loop 1677 Pos := Pos + 1; 1678 end loop; 1679 end Skip_Spaces; 1680 1681 -- Eat all characters until end-of-line (not included). 1682 procedure Skip_Until_EOL is 1683 begin 1684 while not Is_EOL (Source (Pos)) loop 1685 -- Don't warn about invalid character, it's somewhat out of the 1686 -- scope. 1687 Pos := Pos + 1; 1688 end loop; 1689 end Skip_Until_EOL; 1690 1691 -- Scan an identifier within a comment. Only lower case letters are 1692 -- allowed. 1693 procedure Scan_Comment_Identifier (Id : out Name_Id; Create : Boolean) 1694 is 1695 use Name_Table; 1696 Buffer : String (1 .. Max_Name_Length); 1697 Len : Natural; 1698 C : Character; 1699 begin 1700 Id := Null_Identifier; 1701 Skip_Spaces; 1702 1703 -- The identifier shall start with a letter. 1704 case Source (Pos) is 1705 when 'a' .. 'z' 1706 | 'A' .. 'Z' => 1707 null; 1708 when others => 1709 return; 1710 end case; 1711 1712 -- Scan the identifier. 1713 Len := 0; 1714 loop 1715 C := Source (Pos); 1716 case C is 1717 when 'a' .. 'z' => 1718 null; 1719 when 'A' .. 'Z' => 1720 C := Character'Val (Character'Pos (C) + 32); 1721 when '_' => 1722 null; 1723 when others => 1724 exit; 1725 end case; 1726 Len := Len + 1; 1727 Buffer (Len) := C; 1728 Pos := Pos + 1; 1729 end loop; 1730 1731 -- Shall be followed by a space or a new line. 1732 if not (C = ' ' or else C = HT or else Is_EOL (C)) then 1733 return; 1734 end if; 1735 1736 if Create then 1737 Id := Get_Identifier (Buffer (1 .. Len)); 1738 else 1739 Id := Get_Identifier_No_Create (Buffer (1 .. Len)); 1740 end if; 1741 end Scan_Comment_Identifier; 1742 1743 package Directive_Protect is 1744 -- Called to scan a protect tool directive. 1745 procedure Scan_Protect_Directive; 1746 end Directive_Protect; 1747 1748 -- Body is put in a separate file to avoid pollution. 1749 package body Directive_Protect is separate; 1750 1751 -- Called to scan a tool directive. 1752 procedure Scan_Tool_Directive 1753 is 1754 procedure Error_Missing_Directive is 1755 begin 1756 Error_Msg_Scan ("tool directive required after '`'"); 1757 Skip_Until_EOL; 1758 end Error_Missing_Directive; 1759 1760 C : Character; 1761 begin 1762 -- The current character is '`'. 1763 Pos := Pos + 1; 1764 Skip_Spaces; 1765 1766 -- Check and scan identifier. 1767 C := Source (Pos); 1768 if Characters_Kind (C) not in Letter then 1769 Error_Missing_Directive; 1770 return; 1771 end if; 1772 1773 Scan_Identifier (False); 1774 1775 if Current_Token /= Tok_Identifier then 1776 Error_Missing_Directive; 1777 return; 1778 end if; 1779 1780 Skip_Spaces; 1781 1782 -- Dispatch according to the identifier. 1783 if Current_Identifier = Std_Names.Name_Protect then 1784 Directive_Protect.Scan_Protect_Directive; 1785 else 1786 Error_Msg_Scan 1787 ("unknown tool directive %i ignored", +Current_Identifier); 1788 Skip_Until_EOL; 1789 end if; 1790 end Scan_Tool_Directive; 1791 1792 -- Skip until new_line after translate_on/translate_off. 1793 procedure Scan_Translate_On_Off (Id : Name_Id) is 1794 begin 1795 -- Expect new line. 1796 Skip_Spaces; 1797 1798 if not Is_EOL (Source (Pos)) then 1799 Warning_Msg_Scan (Warnid_Pragma, "garbage ignored after '%i'", +Id); 1800 loop 1801 Pos := Pos + 1; 1802 exit when Is_EOL (Source (Pos)); 1803 end loop; 1804 end if; 1805 end Scan_Translate_On_Off; 1806 1807 procedure Scan_Translate_Off is 1808 begin 1809 if Current_Context.Translate_Off then 1810 Warning_Msg_Scan (Warnid_Pragma, "nested 'translate_off' ignored"); 1811 return; 1812 end if; 1813 1814 -- 'pragma translate_off' has just been scanned. 1815 Scan_Translate_On_Off (Std_Names.Name_Translate_Off); 1816 1817 Current_Context.Translate_Off := True; 1818 1819 -- Recursive scan until 'translate_on' is scanned. 1820 loop 1821 Scan; 1822 if not Current_Context.Translate_Off then 1823 -- That token is discarded. 1824 pragma Assert (Current_Token = Tok_Line_Comment); 1825 Flag_Comment := False; 1826 exit; 1827 elsif Current_Token = Tok_Eof then 1828 Warning_Msg_Scan (Warnid_Pragma, 1829 "unterminated 'translate_off'"); 1830 Current_Context.Translate_Off := False; 1831 exit; 1832 end if; 1833 end loop; 1834 1835 -- The scanner is now at the EOL of the translate_on or at the EOF. 1836 -- Continue scanning. 1837 end Scan_Translate_Off; 1838 1839 procedure Scan_Translate_On is 1840 begin 1841 if not Current_Context.Translate_Off then 1842 Warning_Msg_Scan 1843 (Warnid_Pragma, 1844 "'translate_on' without coresponding 'translate_off'"); 1845 return; 1846 end if; 1847 1848 -- 'pragma translate_off' has just been scanned. 1849 Scan_Translate_On_Off (Std_Names.Name_Translate_On); 1850 1851 Current_Context.Translate_Off := False; 1852 1853 -- Return a token that will be discarded. 1854 Flag_Comment := True; 1855 end Scan_Translate_On; 1856 1857 procedure Scan_Comment_Pragma 1858 is 1859 use Std_Names; 1860 Id : Name_Id; 1861 begin 1862 Scan_Comment_Identifier (Id, True); 1863 case Id is 1864 when Null_Identifier => 1865 Warning_Msg_Scan 1866 (Warnid_Pragma, "incomplete pragma directive ignored"); 1867 when Name_Translate => 1868 Scan_Comment_Identifier (Id, True); 1869 case Id is 1870 when Name_On => 1871 Scan_Translate_On; 1872 when Name_Off => 1873 Scan_Translate_Off; 1874 when others => 1875 Warning_Msg_Scan 1876 (Warnid_Pragma, 1877 "pragma translate must be followed by 'on' or 'off'"); 1878 end case; 1879 when Name_Translate_Off 1880 | Name_Synthesis_Off => 1881 Scan_Translate_Off; 1882 when Name_Translate_On 1883 | Name_Synthesis_On => 1884 Scan_Translate_On; 1885 when Name_Label 1886 | Name_Label_Applies_To 1887 | Name_Return_Port_Name 1888 | Name_Map_To_Operator 1889 | Name_Type_Function 1890 | Name_Built_In => 1891 -- Used by synopsys, discarded. 1892 Skip_Until_EOL; 1893 when others => 1894 Warning_Msg_Scan 1895 (Warnid_Pragma, "unknown pragma %i ignored", +Id); 1896 end case; 1897 end Scan_Comment_Pragma; 1898 1899 -- Scan tokens within a comment. Return TRUE if Current_Token was set, 1900 -- return FALSE to discard the comment (ie treat it like a real comment). 1901 function Scan_Comment return Boolean 1902 is 1903 use Std_Names; 1904 Id : Name_Id; 1905 begin 1906 Scan_Comment_Identifier (Id, False); 1907 1908 if Id = Null_Identifier then 1909 return False; 1910 end if; 1911 1912 case Id is 1913 when Name_Psl => 1914 -- Accept tokens after '-- psl'. 1915 if Flag_Psl_Comment then 1916 Flag_Psl := True; 1917 Flag_Scan_In_Comment := True; 1918 return True; 1919 end if; 1920 when Name_Pragma 1921 | Name_Synthesis 1922 | Name_Synopsys => 1923 if Flag_Pragma_Comment then 1924 Scan_Comment_Pragma; 1925 return False; 1926 end if; 1927 when others => 1928 null; 1929 end case; 1930 return False; 1931 end Scan_Comment; 1932 1933 -- The Scan_Next_Line procedure must be called after each end-of-line to 1934 -- register to next line number. This is called by Scan_CR_Newline and 1935 -- Scan_LF_Newline. 1936 procedure Scan_Next_Line is 1937 begin 1938 Files_Map.Skip_Gap (Current_Context.Source_File, Pos); 1939 Current_Context.Line_Number := Current_Context.Line_Number + 1; 1940 Current_Context.Line_Pos := Pos; 1941 File_Add_Line_Number 1942 (Current_Context.Source_File, Current_Context.Line_Number, Pos); 1943 end Scan_Next_Line; 1944 1945 -- Scan a CR end-of-line. 1946 procedure Scan_CR_Newline is 1947 begin 1948 -- Accept CR or CR+LF as line separator. 1949 if Source (Pos + 1) = LF then 1950 Pos := Pos + 2; 1951 else 1952 Pos := Pos + 1; 1953 end if; 1954 Scan_Next_Line; 1955 end Scan_CR_Newline; 1956 1957 -- Scan a LF end-of-line. 1958 procedure Scan_LF_Newline is 1959 begin 1960 -- Accept LF or LF+CR as line separator. 1961 if Source (Pos + 1) = CR then 1962 Pos := Pos + 2; 1963 else 1964 Pos := Pos + 1; 1965 end if; 1966 Scan_Next_Line; 1967 end Scan_LF_Newline; 1968 1969 -- Emit an error message for an invalid character. 1970 procedure Error_Bad_Character is 1971 begin 1972 -- Technically character literals, string literals, extended 1973 -- identifiers and comments. 1974 Error_Msg_Scan ("character %c can only be used in strings or comments", 1975 +Source (Pos)); 1976 end Error_Bad_Character; 1977 1978 procedure Scan_Block_Comment is 1979 begin 1980 Current_Context.Prev_Pos := Pos; 1981 Current_Context.Token_Pos := Pos; 1982 1983 loop 1984 case Source (Pos) is 1985 when '/' => 1986 -- LRM08 15.9 1987 -- Moreover, an occurrence of a solidus character 1988 -- immediately followed by an asterisk character 1989 -- within a delimited comment is not interpreted as 1990 -- the start of a nested delimited comment. 1991 if Source (Pos + 1) = '*' then 1992 Warning_Msg_Scan (Warnid_Nested_Comment, 1993 "'/*' found within a block comment"); 1994 end if; 1995 Pos := Pos + 1; 1996 when '*' => 1997 if Source (Pos + 1) = '/' then 1998 if Pos > Current_Context.Token_Pos then 1999 Current_Token := Tok_Block_Comment_Text; 2000 else 2001 Pos := Pos + 2; 2002 Current_Token := Tok_Block_Comment_End; 2003 end if; 2004 return; 2005 else 2006 Pos := Pos + 1; 2007 end if; 2008 when CR => 2009 if Pos > Current_Context.Token_Pos then 2010 Current_Token := Tok_Block_Comment_Text; 2011 else 2012 Scan_CR_Newline; 2013 Current_Token := Tok_Newline; 2014 end if; 2015 return; 2016 when LF => 2017 if Pos > Current_Context.Token_Pos then 2018 Current_Token := Tok_Block_Comment_Text; 2019 else 2020 Scan_LF_Newline; 2021 Current_Token := Tok_Newline; 2022 end if; 2023 return; 2024 when Files_Map.EOT => 2025 if Pos >= Current_Context.File_Len then 2026 -- Point at the start of the comment. 2027 Error_Msg_Scan 2028 (+Get_Token_Location, 2029 "block comment not terminated at end of file"); 2030 Current_Token := Tok_Eof; 2031 return; 2032 end if; 2033 Pos := Pos + 1; 2034 when others => 2035 Pos := Pos + 1; 2036 end case; 2037 end loop; 2038 end Scan_Block_Comment; 2039 2040 -- Get a new token. 2041 procedure Scan is 2042 begin 2043 if Current_Token /= Tok_Invalid then 2044 Current_Context.Prev_Token := Current_Token; 2045 end if; 2046 2047 Current_Context.Prev_Pos := Pos; 2048 2049 << Again >> null; 2050 2051 -- Skip commonly used separators. 2052 -- (Like Skip_Spaces but manually inlined for speed). 2053 while Source (Pos) = ' ' or Source (Pos) = HT loop 2054 Pos := Pos + 1; 2055 end loop; 2056 2057 Current_Context.Token_Pos := Pos; 2058 Current_Context.Identifier := Null_Identifier; 2059 2060 case Source (Pos) is 2061 when HT | ' ' => 2062 -- Must have already been skipped just above. 2063 raise Internal_Error; 2064 when NBSP => 2065 if Vhdl_Std = Vhdl_87 then 2066 Error_Msg_Scan ("NBSP character not allowed in vhdl87"); 2067 end if; 2068 Pos := Pos + 1; 2069 goto Again; 2070 when VT | FF => 2071 Pos := Pos + 1; 2072 goto Again; 2073 when LF => 2074 Scan_LF_Newline; 2075 if Flag_Newline then 2076 Current_Token := Tok_Newline; 2077 return; 2078 end if; 2079 goto Again; 2080 when CR => 2081 Scan_CR_Newline; 2082 if Flag_Newline then 2083 Current_Token := Tok_Newline; 2084 return; 2085 end if; 2086 goto Again; 2087 when '-' => 2088 if Source (Pos + 1) = '-' then 2089 -- This is a comment. 2090 -- LRM93 13.8 2091 -- A comment starts with two adjacent hyphens and extends up 2092 -- to the end of the line. 2093 -- A comment can appear on any line line of a VHDL 2094 -- description. 2095 -- The presence or absence of comments has no influence on 2096 -- whether a description is legal or illegal. 2097 -- Futhermore, comments do not influence the execution of a 2098 -- simulation module; their sole purpose is the enlightenment 2099 -- of the human reader. 2100 -- GHDL note: As a consequence, an obfruscating comment 2101 -- is out of purpose, and a warning could be reported :-) 2102 Pos := Pos + 2; 2103 2104 -- Scan inside a comment. So we just ignore the two dashes. 2105 if Flag_Scan_In_Comment then 2106 goto Again; 2107 end if; 2108 2109 -- Handle keywords in comment (PSL). 2110 if Flag_Comment_Keyword and then Scan_Comment then 2111 goto Again; 2112 end if; 2113 2114 -- LRM93 13.2 2115 -- In any case, a sequence of one or more format 2116 -- effectors other than horizontal tabulation must 2117 -- cause at least one end of line. 2118 while not Is_EOL (Source (Pos)) loop 2119 -- LRM93 13.1 2120 -- The only characters allowed in the text of a VHDL 2121 -- description are the graphic characters and the format 2122 -- effectors. 2123 2124 -- LRM02 13.1 Character set 2125 -- The only characters allowed in the text of a VHDL 2126 -- description (except within comments -- see 13.8) [...] 2127 -- 2128 -- LRM02 13.8 Comments 2129 -- A comment [...] may contain any character except the 2130 -- format effectors vertical tab, carriage return, line 2131 -- feed and form feed. 2132 if not (Flags.Mb_Comment 2133 or Flags.Flag_Relaxed_Rules 2134 or Vhdl_Std >= Vhdl_02) 2135 and then Characters_Kind (Source (Pos)) = Invalid 2136 then 2137 Error_Msg_Scan ("invalid character, even in a comment"); 2138 end if; 2139 Pos := Pos + 1; 2140 end loop; 2141 if Flag_Comment then 2142 Current_Token := Tok_Line_Comment; 2143 return; 2144 end if; 2145 goto Again; 2146 elsif Flag_Psl and then Source (Pos + 1) = '>' then 2147 Current_Token := Tok_Minus_Greater; 2148 Pos := Pos + 2; 2149 return; 2150 else 2151 Current_Token := Tok_Minus; 2152 Pos := Pos + 1; 2153 return; 2154 end if; 2155 when '+' => 2156 Current_Token := Tok_Plus; 2157 Pos := Pos + 1; 2158 return; 2159 when '*' => 2160 if Source (Pos + 1) = '*' then 2161 Current_Token := Tok_Double_Star; 2162 Pos := Pos + 2; 2163 else 2164 Current_Token := Tok_Star; 2165 Pos := Pos + 1; 2166 end if; 2167 return; 2168 when '/' => 2169 if Source (Pos + 1) = '=' then 2170 Current_Token := Tok_Not_Equal; 2171 Pos := Pos + 2; 2172 elsif Source (Pos + 1) = '*' then 2173 -- LRM08 15.9 Comments 2174 -- A delimited comment start with a solidus (slash) character 2175 -- immediately followed by an asterisk character and extends up 2176 -- to the first subsequent occurrence of an asterisk character 2177 -- immediately followed by a solidus character. 2178 if Vhdl_Std < Vhdl_08 then 2179 Error_Msg_Scan 2180 ("block comment are not allowed before vhdl 2008"); 2181 end if; 2182 2183 -- Skip '/*'. 2184 Pos := Pos + 2; 2185 2186 if Flag_Comment then 2187 Current_Token := Tok_Block_Comment_Start; 2188 return; 2189 end if; 2190 2191 loop 2192 Scan_Block_Comment; 2193 exit when Current_Token = Tok_Block_Comment_End 2194 or else Current_Token = Tok_Eof; 2195 end loop; 2196 goto Again; 2197 else 2198 Current_Token := Tok_Slash; 2199 Pos := Pos + 1; 2200 end if; 2201 return; 2202 when '(' => 2203 Current_Token := Tok_Left_Paren; 2204 Pos := Pos + 1; 2205 return; 2206 when ')' => 2207 Current_Token := Tok_Right_Paren; 2208 Pos := Pos + 1; 2209 return; 2210 when '|' => 2211 if Flag_Psl then 2212 if Source (Pos + 1) = '|' then 2213 Current_Token := Tok_Bar_Bar; 2214 Pos := Pos + 2; 2215 elsif Source (Pos + 1) = '-' 2216 and then Source (Pos + 2) = '>' 2217 then 2218 Current_Token := Tok_Bar_Arrow; 2219 Pos := Pos + 3; 2220 elsif Source (Pos + 1) = '=' 2221 and then Source (Pos + 2) = '>' 2222 then 2223 Current_Token := Tok_Bar_Double_Arrow; 2224 Pos := Pos + 3; 2225 else 2226 Current_Token := Tok_Bar; 2227 Pos := Pos + 1; 2228 end if; 2229 else 2230 Current_Token := Tok_Bar; 2231 Pos := Pos + 1; 2232 end if; 2233 return; 2234 when '!' => 2235 if Flag_Psl then 2236 Current_Token := Tok_Exclam_Mark; 2237 else 2238 if Source (Pos + 1) = '=' then 2239 -- != is not allowed in VHDL, but be friendly with C users. 2240 Error_Msg_Scan 2241 (+Get_Token_Location, "Use '/=' for inequality in vhdl"); 2242 Current_Token := Tok_Not_Equal; 2243 Pos := Pos + 1; 2244 else 2245 -- LRM93 13.10 2246 -- A vertical line (|) can be replaced by an exclamation 2247 -- mark (!) where used as a delimiter. 2248 Current_Token := Tok_Bar; 2249 end if; 2250 end if; 2251 Pos := Pos + 1; 2252 return; 2253 when ':' => 2254 if Source (Pos + 1) = '=' then 2255 Current_Token := Tok_Assign; 2256 Pos := Pos + 2; 2257 else 2258 Current_Token := Tok_Colon; 2259 Pos := Pos + 1; 2260 end if; 2261 return; 2262 when ';' => 2263 Current_Token := Tok_Semi_Colon; 2264 Pos := Pos + 1; 2265 return; 2266 when ',' => 2267 Current_Token := Tok_Comma; 2268 Pos := Pos + 1; 2269 return; 2270 when '.' => 2271 if Source (Pos + 1) = '.' then 2272 -- Be Ada friendly... 2273 Error_Msg_Scan ("'..' is invalid in vhdl, replaced by 'to'"); 2274 Current_Token := Tok_To; 2275 Pos := Pos + 2; 2276 return; 2277 end if; 2278 Current_Token := Tok_Dot; 2279 Pos := Pos + 1; 2280 return; 2281 when '&' => 2282 if Flag_Psl and then Source (Pos + 1) = '&' then 2283 Current_Token := Tok_And_And; 2284 Pos := Pos + 2; 2285 else 2286 Current_Token := Tok_Ampersand; 2287 Pos := Pos + 1; 2288 end if; 2289 return; 2290 when '<' => 2291 case Source (Pos + 1) is 2292 when '=' => 2293 Current_Token := Tok_Less_Equal; 2294 Pos := Pos + 2; 2295 when '>' => 2296 Current_Token := Tok_Box; 2297 Pos := Pos + 2; 2298 when '<' => 2299 Current_Token := Tok_Double_Less; 2300 Pos := Pos + 2; 2301 when '-' => 2302 if Flag_Psl and then Source (Pos + 2) = '>' then 2303 Current_Token := Tok_Equiv_Arrow; 2304 Pos := Pos + 3; 2305 else 2306 Current_Token := Tok_Less; 2307 Pos := Pos + 1; 2308 end if; 2309 when others => 2310 Current_Token := Tok_Less; 2311 Pos := Pos + 1; 2312 end case; 2313 return; 2314 when '>' => 2315 case Source (Pos + 1) is 2316 when '=' => 2317 Current_Token := Tok_Greater_Equal; 2318 Pos := Pos + 2; 2319 when '>' => 2320 Current_Token := Tok_Double_Greater; 2321 Pos := Pos + 2; 2322 when others => 2323 Current_Token := Tok_Greater; 2324 Pos := Pos + 1; 2325 end case; 2326 return; 2327 when '=' => 2328 if Source (Pos + 1) = '=' then 2329 if AMS_Vhdl then 2330 Current_Token := Tok_Equal_Equal; 2331 else 2332 Error_Msg_Scan 2333 ("'==' is not the vhdl equality, replaced by '='"); 2334 Current_Token := Tok_Equal; 2335 end if; 2336 Pos := Pos + 2; 2337 elsif Source (Pos + 1) = '>' then 2338 Current_Token := Tok_Double_Arrow; 2339 Pos := Pos + 2; 2340 else 2341 Current_Token := Tok_Equal; 2342 Pos := Pos + 1; 2343 end if; 2344 return; 2345 when ''' => 2346 -- Handle cases such as character'('a') 2347 -- FIXME: what about f ()'length ? or .all'length 2348 if Current_Context.Prev_Token /= Tok_Identifier 2349 and then Current_Context.Prev_Token /= Tok_Character 2350 and then Source (Pos + 2) = ''' 2351 then 2352 -- LRM93 13.5 2353 -- A character literal is formed by enclosing one of the 191 2354 -- graphic character (...) between two apostrophe characters. 2355 -- CHARACTER_LITERAL ::= ' GRAPHIC_CHARACTER ' 2356 if Characters_Kind (Source (Pos + 1)) not in Graphic_Character 2357 then 2358 Error_Msg_Scan 2359 ("a character literal can only be a graphic character"); 2360 elsif Vhdl_Std = Vhdl_87 2361 and then Source (Pos + 1) > Character'Val (127) 2362 then 2363 Error_8bit; 2364 end if; 2365 Current_Token := Tok_Character; 2366 Current_Context.Identifier := 2367 Name_Table.Get_Identifier (Source (Pos + 1)); 2368 Pos := Pos + 3; 2369 return; 2370 elsif Source (Pos + 1) = ''' then 2371 Error_Msg_Scan ("empty quote is not allowed in vhdl"); 2372 Current_Token := Tok_Character; 2373 Current_Context.Identifier := Name_Table.Get_Identifier (' '); 2374 Pos := Pos + 2; 2375 return; 2376 else 2377 Current_Token := Tok_Tick; 2378 Pos := Pos + 1; 2379 end if; 2380 return; 2381 when '0' .. '9' => 2382 Scan_Literal; 2383 2384 -- LRM93 13.2 2385 -- At least one separator is required between an identifier or 2386 -- an abstract literal and an adjacent identifier or abstract 2387 -- literal. 2388 case Characters_Kind (Source (Pos)) is 2389 when Digit => 2390 -- Happen if d#ddd# is followed by a number. 2391 Error_Msg_Scan ("space is required between numbers"); 2392 when Upper_Case_Letter 2393 | Lower_Case_Letter => 2394 -- Could call Error_Separator, but use a clearer message 2395 -- for this common case. 2396 -- Note: the term "unit name" is not correct here, since 2397 -- it can be any identifier or even a keyword; however it 2398 -- is probably the most common case (eg 10ns). 2399 if Vhdl_Std >= Vhdl_08 and then Current_Token = Tok_Integer 2400 then 2401 Current_Token := Tok_Integer_Letter; 2402 else 2403 Error_Msg_Scan 2404 ("space is required between number and unit name"); 2405 end if; 2406 when Other_Special_Character => 2407 if Vhdl_Std > Vhdl_87 and then Source (Pos) = '\' then 2408 -- Start of extended identifier. 2409 Error_Separator; 2410 end if; 2411 when Invalid 2412 | Format_Effector 2413 | Space_Character 2414 | Special_Character => 2415 null; 2416 end case; 2417 return; 2418 when '#' => 2419 Error_Msg_Scan ("'#' is used for based literals and " 2420 & "must be preceded by a base"); 2421 -- Skip. 2422 Pos := Pos + 1; 2423 goto Again; 2424 when '"' => 2425 Scan_String; 2426 return; 2427 when '%' => 2428 if Vhdl_Std >= Vhdl_08 then 2429 Error_Msg_Scan 2430 ("'%%' not allowed in vhdl 2008 (was replacement character)"); 2431 -- Continue as a string. 2432 end if; 2433 Scan_String; 2434 return; 2435 when '[' => 2436 if Flag_Psl then 2437 if Source (Pos + 1) = '*' then 2438 Current_Token := Tok_Brack_Star; 2439 Pos := Pos + 2; 2440 elsif Source (Pos + 1) = '+' 2441 and then Source (Pos + 2) = ']' 2442 then 2443 Current_Token := Tok_Brack_Plus_Brack; 2444 Pos := Pos + 3; 2445 elsif Source (Pos + 1) = '-' 2446 and then Source (Pos + 2) = '>' 2447 then 2448 Current_Token := Tok_Brack_Arrow; 2449 Pos := Pos + 3; 2450 elsif Source (Pos + 1) = '=' then 2451 Current_Token := Tok_Brack_Equal; 2452 Pos := Pos + 2; 2453 else 2454 Current_Token := Tok_Left_Bracket; 2455 Pos := Pos + 1; 2456 end if; 2457 else 2458 if Vhdl_Std = Vhdl_87 then 2459 Error_Msg_Scan 2460 ("'[' is an invalid character in vhdl87, replaced by '('"); 2461 Current_Token := Tok_Left_Paren; 2462 else 2463 Current_Token := Tok_Left_Bracket; 2464 end if; 2465 Pos := Pos + 1; 2466 end if; 2467 return; 2468 when ']' => 2469 if Vhdl_Std = Vhdl_87 and not Flag_Psl then 2470 Error_Msg_Scan 2471 ("']' is an invalid character in vhdl87, replaced by ')'"); 2472 Current_Token := Tok_Right_Paren; 2473 else 2474 Current_Token := Tok_Right_Bracket; 2475 end if; 2476 Pos := Pos + 1; 2477 return; 2478 when '{' => 2479 Current_Token := Tok_Left_Curly; 2480 Pos := Pos + 1; 2481 return; 2482 when '}' => 2483 Current_Token := Tok_Right_Curly; 2484 Pos := Pos + 1; 2485 return; 2486 when '\' => 2487 if Vhdl_Std = Vhdl_87 then 2488 Error_Msg_Scan 2489 ("extended identifiers are not allowed in vhdl87"); 2490 end if; 2491 Scan_Extended_Identifier; 2492 return; 2493 when '^' => 2494 if Vhdl_Std >= Vhdl_08 then 2495 Current_Token := Tok_Caret; 2496 else 2497 Current_Token := Tok_Xor; 2498 Error_Msg_Scan ("'^' is not a VHDL operator, use 'xor'"); 2499 end if; 2500 Pos := Pos + 1; 2501 return; 2502 when '~' => 2503 Error_Msg_Scan ("'~' is not a VHDL operator, use 'not'"); 2504 Pos := Pos + 1; 2505 Current_Token := Tok_Not; 2506 return; 2507 when '?' => 2508 if Vhdl_Std < Vhdl_08 then 2509 Error_Bad_Character; 2510 Pos := Pos + 1; 2511 goto Again; 2512 else 2513 if Source (Pos + 1) = '<' then 2514 if Source (Pos + 2) = '=' then 2515 Current_Token := Tok_Match_Less_Equal; 2516 Pos := Pos + 3; 2517 else 2518 Current_Token := Tok_Match_Less; 2519 Pos := Pos + 2; 2520 end if; 2521 elsif Source (Pos + 1) = '>' then 2522 if Source (Pos + 2) = '=' then 2523 Current_Token := Tok_Match_Greater_Equal; 2524 Pos := Pos + 3; 2525 else 2526 Current_Token := Tok_Match_Greater; 2527 Pos := Pos + 2; 2528 end if; 2529 elsif Source (Pos + 1) = '?' then 2530 Current_Token := Tok_Condition; 2531 Pos := Pos + 2; 2532 elsif Source (Pos + 1) = '=' then 2533 Current_Token := Tok_Match_Equal; 2534 Pos := Pos + 2; 2535 elsif Source (Pos + 1) = '/' 2536 and then Source (Pos + 2) = '=' 2537 then 2538 Current_Token := Tok_Match_Not_Equal; 2539 Pos := Pos + 3; 2540 else 2541 Error_Msg_Scan ("unknown matching operator"); 2542 Pos := Pos + 1; 2543 goto Again; 2544 end if; 2545 end if; 2546 return; 2547 when '`' => 2548 if Vhdl_Std >= Vhdl_08 then 2549 Scan_Tool_Directive; 2550 else 2551 Error_Bad_Character; 2552 Skip_Until_EOL; 2553 end if; 2554 goto Again; 2555 when '$' 2556 | Inverted_Exclamation .. Inverted_Question 2557 | Multiplication_Sign | Division_Sign => 2558 Error_Bad_Character; 2559 Pos := Pos + 1; 2560 goto Again; 2561 when '@' => 2562 if Vhdl_Std >= Vhdl_08 or Flag_Psl then 2563 Current_Token := Tok_Arobase; 2564 Pos := Pos + 1; 2565 return; 2566 else 2567 Error_Bad_Character; 2568 Pos := Pos + 1; 2569 goto Again; 2570 end if; 2571 when '_' => 2572 Error_Msg_Scan ("an identifier can't start with '_'"); 2573 Scan_Identifier (Flag_Psl); 2574 -- Cannot be a reserved word. 2575 return; 2576 when 'A' .. 'Z' | 'a' .. 'z' => 2577 Scan_Identifier (Flag_Psl); 2578 if Current_Token = Tok_Identifier then 2579 Identifier_To_Token; 2580 end if; 2581 return; 2582 when UC_A_Grave .. UC_O_Diaeresis 2583 | UC_O_Oblique_Stroke .. UC_Icelandic_Thorn 2584 | LC_German_Sharp_S .. LC_O_Diaeresis 2585 | LC_O_Oblique_Stroke .. LC_Y_Diaeresis => 2586 if Vhdl_Std = Vhdl_87 then 2587 Error_Msg_Scan 2588 ("non 7-bit latin-1 letters are not allowed in vhdl87"); 2589 end if; 2590 Scan_Identifier (False); 2591 -- Not a reserved word. 2592 return; 2593 when NUL .. ETX | ENQ .. BS | SO .. US | DEL .. APC => 2594 Error_Msg_Scan 2595 ("control character that is not CR, LF, FF, HT or VT " & 2596 "is not allowed"); 2597 Pos := Pos + 1; 2598 goto Again; 2599 when Files_Map.EOT => 2600 if Pos >= Current_Context.File_Len then 2601 -- FIXME: should conditionnaly emit a warning if the file 2602 -- is not terminated by an end of line. 2603 Current_Token := Tok_Eof; 2604 else 2605 Error_Msg_Scan ("EOT is not allowed inside the file"); 2606 Pos := Pos + 1; 2607 goto Again; 2608 end if; 2609 return; 2610 end case; 2611 -- Not reachable: all case should use goto Again or return. 2612 end Scan; 2613 2614 function Is_Whitespace (C : Character) return Boolean is 2615 begin 2616 if C = ' ' then 2617 return True; 2618 elsif Vhdl_Std > Vhdl_87 and C = NBSP then 2619 return True; 2620 else 2621 return False; 2622 end if; 2623 end Is_Whitespace; 2624end Vhdl.Scanner; 2625