1------------------------------------------------------------------------------ 2-- -- 3-- GNAT COMPILER COMPONENTS -- 4-- -- 5-- S C N G -- 6-- -- 7-- B o d y -- 8-- -- 9-- Copyright (C) 1992-2015, Free Software Foundation, Inc. -- 10-- -- 11-- GNAT is free software; you can redistribute it and/or modify it under -- 12-- terms of the GNU General Public License as published by the Free Soft- -- 13-- ware Foundation; either version 3, or (at your option) any later ver- -- 14-- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- 15-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- 16-- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- 17-- for more details. You should have received a copy of the GNU General -- 18-- Public License distributed with GNAT; see file COPYING3. If not, go to -- 19-- http://www.gnu.org/licenses for a complete copy of the license. -- 20-- -- 21-- GNAT was originally developed by the GNAT team at New York University. -- 22-- Extensive contributions were provided by Ada Core Technologies Inc. -- 23-- -- 24------------------------------------------------------------------------------ 25 26with Atree; use Atree; 27with Csets; use Csets; 28with Hostparm; use Hostparm; 29with Namet; use Namet; 30with Opt; use Opt; 31with Restrict; use Restrict; 32with Rident; use Rident; 33with Scans; use Scans; 34with Sinput; use Sinput; 35with Snames; use Snames; 36with Stringt; use Stringt; 37with Stylesw; use Stylesw; 38with Uintp; use Uintp; 39with Urealp; use Urealp; 40with Widechar; use Widechar; 41 42pragma Warnings (Off); 43-- This package is used also by gnatcoll 44with System.CRC32; 45with System.UTF_32; use System.UTF_32; 46with System.WCh_Con; use System.WCh_Con; 47pragma Warnings (On); 48 49package body Scng is 50 51 use ASCII; 52 -- Make control characters visible 53 54 Special_Characters : array (Character) of Boolean := (others => False); 55 -- For characters that are Special token, the value is True 56 57 Comment_Is_Token : Boolean := False; 58 -- True if comments are tokens 59 60 End_Of_Line_Is_Token : Boolean := False; 61 -- True if End_Of_Line is a token 62 63 ----------------------- 64 -- Local Subprograms -- 65 ----------------------- 66 67 procedure Accumulate_Token_Checksum; 68 pragma Inline (Accumulate_Token_Checksum); 69 -- Called after each numeric literal and identifier/keyword. For keywords, 70 -- the token used is Tok_Identifier. This allows detection of additional 71 -- spaces added in sources when using the builder switch -m. 72 73 procedure Accumulate_Token_Checksum_GNAT_6_3; 74 -- Used in place of Accumulate_Token_Checksum for GNAT versions 5.04 to 75 -- 6.3, when Tok_Some was not included in Token_Type and the actual 76 -- Token_Type was used for keywords. This procedure is never used in the 77 -- compiler or gnatmake, only in gprbuild. 78 79 procedure Accumulate_Token_Checksum_GNAT_5_03; 80 -- Used in place of Accumulate_Token_Checksum for GNAT version 5.03, when 81 -- Tok_Interface, Tok_Some, Tok_Synchronized and Tok_Overriding were not 82 -- included in Token_Type and the actual Token_Type was used for keywords. 83 -- This procedure is never used in the compiler or gnatmake, only in 84 -- gprbuild. 85 86 procedure Accumulate_Checksum (C : Character); 87 pragma Inline (Accumulate_Checksum); 88 -- This routine accumulates the checksum given character C. During the 89 -- scanning of a source file, this routine is called with every character 90 -- in the source, excluding blanks, and all control characters (except 91 -- that ESC is included in the checksum). Upper case letters not in string 92 -- literals are folded by the caller. See Sinput spec for the documentation 93 -- of the checksum algorithm. Note: checksum values are only used if we 94 -- generate code, so it is not necessary to worry about making the right 95 -- sequence of calls in any error situation. 96 97 procedure Accumulate_Checksum (C : Char_Code); 98 pragma Inline (Accumulate_Checksum); 99 -- This version is identical, except that the argument, C, is a character 100 -- code value instead of a character. This is used when wide characters 101 -- are scanned. We use the character code rather than the ASCII characters 102 -- so that the checksum is independent of wide character encoding method. 103 104 procedure Initialize_Checksum; 105 pragma Inline (Initialize_Checksum); 106 -- Initialize checksum value 107 108 ------------------------- 109 -- Accumulate_Checksum -- 110 ------------------------- 111 112 procedure Accumulate_Checksum (C : Character) is 113 begin 114 System.CRC32.Update (System.CRC32.CRC32 (Checksum), C); 115 end Accumulate_Checksum; 116 117 procedure Accumulate_Checksum (C : Char_Code) is 118 begin 119 if C > 16#FFFF# then 120 Accumulate_Checksum (Character'Val (C / 2 ** 24)); 121 Accumulate_Checksum (Character'Val ((C / 2 ** 16) mod 256)); 122 Accumulate_Checksum (Character'Val ((C / 256) mod 256)); 123 else 124 Accumulate_Checksum (Character'Val (C / 256)); 125 end if; 126 127 Accumulate_Checksum (Character'Val (C mod 256)); 128 end Accumulate_Checksum; 129 130 ------------------------------- 131 -- Accumulate_Token_Checksum -- 132 ------------------------------- 133 134 procedure Accumulate_Token_Checksum is 135 begin 136 System.CRC32.Update 137 (System.CRC32.CRC32 (Checksum), 138 Character'Val (Token_Type'Pos (Token))); 139 end Accumulate_Token_Checksum; 140 141 ---------------------------------------- 142 -- Accumulate_Token_Checksum_GNAT_6_3 -- 143 ---------------------------------------- 144 145 procedure Accumulate_Token_Checksum_GNAT_6_3 is 146 begin 147 -- Individual values of Token_Type are used, instead of subranges, so 148 -- that additions or suppressions of enumerated values in type 149 -- Token_Type are detected by the compiler. 150 151 case Token is 152 when Tok_Integer_Literal | Tok_Real_Literal | Tok_String_Literal | 153 Tok_Char_Literal | Tok_Operator_Symbol | Tok_Identifier | 154 Tok_Double_Asterisk | Tok_Ampersand | Tok_Minus | Tok_Plus | 155 Tok_Asterisk | Tok_Mod | Tok_Rem | Tok_Slash | Tok_New | 156 Tok_Abs | Tok_Others | Tok_Null | Tok_Dot | Tok_Apostrophe | 157 Tok_Left_Paren | Tok_Delta | Tok_Digits | Tok_Range | 158 Tok_Right_Paren | Tok_Comma | Tok_And | Tok_Or | Tok_Xor | 159 Tok_Less | Tok_Equal | Tok_Greater | Tok_Not_Equal | 160 Tok_Greater_Equal | Tok_Less_Equal | Tok_In | Tok_Not | 161 Tok_Box | Tok_Colon_Equal | Tok_Colon | Tok_Greater_Greater | 162 Tok_Abstract | Tok_Access | Tok_Aliased | Tok_All | Tok_Array | 163 Tok_At | Tok_Body | Tok_Constant | Tok_Do | Tok_Is | 164 Tok_Interface | Tok_Limited | Tok_Of | Tok_Out | Tok_Record | 165 Tok_Renames | Tok_Reverse => 166 167 System.CRC32.Update 168 (System.CRC32.CRC32 (Checksum), 169 Character'Val (Token_Type'Pos (Token))); 170 171 when Tok_Some => 172 173 System.CRC32.Update 174 (System.CRC32.CRC32 (Checksum), 175 Character'Val (Token_Type'Pos (Tok_Identifier))); 176 177 when Tok_Tagged | Tok_Then | Tok_Less_Less | Tok_Abort | Tok_Accept | 178 Tok_Case | Tok_Delay | Tok_Else | Tok_Elsif | Tok_End | 179 Tok_Exception | Tok_Exit | Tok_Goto | Tok_If | Tok_Pragma | 180 Tok_Raise | Tok_Requeue | Tok_Return | Tok_Select | 181 Tok_Terminate | Tok_Until | Tok_When | Tok_Begin | Tok_Declare | 182 Tok_For | Tok_Loop | Tok_While | Tok_Entry | Tok_Protected | 183 Tok_Task | Tok_Type | Tok_Subtype | Tok_Overriding | 184 Tok_Synchronized | Tok_Use | Tok_Function | Tok_Generic | 185 Tok_Package | Tok_Procedure | Tok_Private | Tok_With | 186 Tok_Separate | Tok_EOF | Tok_Semicolon | Tok_Arrow | 187 Tok_Vertical_Bar | Tok_Dot_Dot | Tok_Project | Tok_Extends | 188 Tok_External | Tok_External_As_List | Tok_Comment | 189 Tok_End_Of_Line | Tok_Special | Tok_SPARK_Hide | No_Token => 190 191 System.CRC32.Update 192 (System.CRC32.CRC32 (Checksum), 193 Character'Val (Token_Type'Pos (Token_Type'Pred (Token)))); 194 end case; 195 end Accumulate_Token_Checksum_GNAT_6_3; 196 197 ----------------------------------------- 198 -- Accumulate_Token_Checksum_GNAT_5_03 -- 199 ----------------------------------------- 200 201 procedure Accumulate_Token_Checksum_GNAT_5_03 is 202 begin 203 -- Individual values of Token_Type are used, instead of subranges, so 204 -- that additions or suppressions of enumerated values in type 205 -- Token_Type are detected by the compiler. 206 207 case Token is 208 when Tok_Integer_Literal | Tok_Real_Literal | Tok_String_Literal | 209 Tok_Char_Literal | Tok_Operator_Symbol | Tok_Identifier | 210 Tok_Double_Asterisk | Tok_Ampersand | Tok_Minus | Tok_Plus | 211 Tok_Asterisk | Tok_Mod | Tok_Rem | Tok_Slash | Tok_New | 212 Tok_Abs | Tok_Others | Tok_Null | Tok_Dot | Tok_Apostrophe | 213 Tok_Left_Paren | Tok_Delta | Tok_Digits | Tok_Range | 214 Tok_Right_Paren | Tok_Comma | Tok_And | Tok_Or | Tok_Xor | 215 Tok_Less | Tok_Equal | Tok_Greater | Tok_Not_Equal | 216 Tok_Greater_Equal | Tok_Less_Equal | Tok_In | Tok_Not | 217 Tok_Box | Tok_Colon_Equal | Tok_Colon | Tok_Greater_Greater | 218 Tok_Abstract | Tok_Access | Tok_Aliased | Tok_All | Tok_Array | 219 Tok_At | Tok_Body | Tok_Constant | Tok_Do | Tok_Is => 220 221 System.CRC32.Update 222 (System.CRC32.CRC32 (Checksum), 223 Character'Val (Token_Type'Pos (Token))); 224 225 when Tok_Interface | Tok_Some | Tok_Overriding | Tok_Synchronized => 226 System.CRC32.Update 227 (System.CRC32.CRC32 (Checksum), 228 Character'Val (Token_Type'Pos (Tok_Identifier))); 229 230 when Tok_Limited | Tok_Of | Tok_Out | Tok_Record | 231 Tok_Renames | Tok_Reverse => 232 233 System.CRC32.Update 234 (System.CRC32.CRC32 (Checksum), 235 Character'Val (Token_Type'Pos (Token) - 1)); 236 237 when Tok_Tagged | Tok_Then | Tok_Less_Less | Tok_Abort | Tok_Accept | 238 Tok_Case | Tok_Delay | Tok_Else | Tok_Elsif | Tok_End | 239 Tok_Exception | Tok_Exit | Tok_Goto | Tok_If | Tok_Pragma | 240 Tok_Raise | Tok_Requeue | Tok_Return | Tok_Select | 241 Tok_Terminate | Tok_Until | Tok_When | Tok_Begin | Tok_Declare | 242 Tok_For | Tok_Loop | Tok_While | Tok_Entry | Tok_Protected | 243 Tok_Task | Tok_Type | Tok_Subtype => 244 245 System.CRC32.Update 246 (System.CRC32.CRC32 (Checksum), 247 Character'Val (Token_Type'Pos (Token) - 2)); 248 249 when Tok_Use | Tok_Function | Tok_Generic | 250 Tok_Package | Tok_Procedure | Tok_Private | Tok_With | 251 Tok_Separate | Tok_EOF | Tok_Semicolon | Tok_Arrow | 252 Tok_Vertical_Bar | Tok_Dot_Dot | Tok_Project | Tok_Extends | 253 Tok_External | Tok_External_As_List | Tok_Comment | 254 Tok_End_Of_Line | Tok_Special | Tok_SPARK_Hide | No_Token => 255 256 System.CRC32.Update 257 (System.CRC32.CRC32 (Checksum), 258 Character'Val (Token_Type'Pos (Token) - 4)); 259 end case; 260 end Accumulate_Token_Checksum_GNAT_5_03; 261 262 ----------------------- 263 -- Check_End_Of_Line -- 264 ----------------------- 265 266 procedure Check_End_Of_Line is 267 Len : constant Int := 268 Int (Scan_Ptr) - 269 Int (Current_Line_Start) - 270 Wide_Char_Byte_Count; 271 272 -- Start of processing for Check_End_Of_Line 273 274 begin 275 if Style_Check then 276 Style.Check_Line_Terminator (Len); 277 end if; 278 279 -- Deal with checking maximum line length 280 281 if Style_Check and Style_Check_Max_Line_Length then 282 Style.Check_Line_Max_Length (Len); 283 284 -- If style checking is inactive, check maximum line length against 285 -- standard value. 286 287 elsif Len > Max_Line_Length then 288 Error_Msg 289 ("this line is too long", 290 Current_Line_Start + Source_Ptr (Max_Line_Length)); 291 end if; 292 293 -- Now one more checking circuit. Normally we are only enforcing a limit 294 -- of physical characters, with tabs counting as one character. But if 295 -- after tab expansion we would have a total line length that exceeded 296 -- 32766, that would really cause trouble, because column positions 297 -- would exceed the maximum we allow for a column count. Note: the limit 298 -- is 32766 rather than 32767, since we use a value of 32767 for special 299 -- purposes (see Sinput). Now we really do not want to go messing with 300 -- tabs in the normal case, so what we do is to check for a line that 301 -- has more than 4096 physical characters. Any shorter line could not 302 -- be a problem, even if it was all tabs. 303 304 if Len >= 4096 then 305 declare 306 Col : Natural; 307 Ptr : Source_Ptr; 308 309 begin 310 Col := 1; 311 Ptr := Current_Line_Start; 312 loop 313 exit when Ptr = Scan_Ptr; 314 315 if Source (Ptr) = ASCII.HT then 316 Col := (Col - 1 + 8) / 8 * 8 + 1; 317 else 318 Col := Col + 1; 319 end if; 320 321 if Col > 32766 then 322 Error_Msg 323 ("this line is longer than 32766 characters", 324 Current_Line_Start); 325 raise Unrecoverable_Error; 326 end if; 327 328 Ptr := Ptr + 1; 329 end loop; 330 end; 331 end if; 332 333 -- Reset wide character byte count for next line 334 335 Wide_Char_Byte_Count := 0; 336 end Check_End_Of_Line; 337 338 ---------------------------- 339 -- Determine_Token_Casing -- 340 ---------------------------- 341 342 function Determine_Token_Casing return Casing_Type is 343 begin 344 return Determine_Casing (Source (Token_Ptr .. Scan_Ptr - 1)); 345 end Determine_Token_Casing; 346 347 ------------------------- 348 -- Initialize_Checksum -- 349 ------------------------- 350 351 procedure Initialize_Checksum is 352 begin 353 System.CRC32.Initialize (System.CRC32.CRC32 (Checksum)); 354 end Initialize_Checksum; 355 356 ------------------------ 357 -- Initialize_Scanner -- 358 ------------------------ 359 360 procedure Initialize_Scanner (Index : Source_File_Index) is 361 begin 362 -- Establish reserved words 363 364 Scans.Initialize_Ada_Keywords; 365 366 -- Initialize scan control variables 367 368 Current_Source_File := Index; 369 Source := Source_Text (Current_Source_File); 370 Scan_Ptr := Source_First (Current_Source_File); 371 Token := No_Token; 372 Token_Ptr := Scan_Ptr; 373 Current_Line_Start := Scan_Ptr; 374 Token_Node := Empty; 375 Token_Name := No_Name; 376 Start_Column := Set_Start_Column; 377 First_Non_Blank_Location := Scan_Ptr; 378 379 Initialize_Checksum; 380 Wide_Char_Byte_Count := 0; 381 382 -- Do not call Scan, otherwise the License stuff does not work in Scn 383 384 end Initialize_Scanner; 385 386 ------------------------------ 387 -- Reset_Special_Characters -- 388 ------------------------------ 389 390 procedure Reset_Special_Characters is 391 begin 392 Special_Characters := (others => False); 393 end Reset_Special_Characters; 394 395 ---------- 396 -- Scan -- 397 ---------- 398 399 procedure Scan is 400 401 Start_Of_Comment : Source_Ptr; 402 -- Record start of comment position 403 404 Underline_Found : Boolean; 405 -- During scanning of an identifier, set to True if last character 406 -- scanned was an underline or other punctuation character. This 407 -- is used to flag the error of two underlines/punctuations in a 408 -- row or ending an identifier with a underline/punctuation. Here 409 -- punctuation means any UTF_32 character in the Unicode category 410 -- Punctuation,Connector. 411 412 Wptr : Source_Ptr; 413 -- Used to remember start of last wide character scanned 414 415 function Double_Char_Token (C : Character) return Boolean; 416 -- This function is used for double character tokens like := or <>. It 417 -- checks if the character following Source (Scan_Ptr) is C, and if so 418 -- bumps Scan_Ptr past the pair of characters and returns True. A space 419 -- between the two characters is also recognized with an appropriate 420 -- error message being issued. If C is not present, False is returned. 421 -- Note that Double_Char_Token can only be used for tokens defined in 422 -- the Ada syntax (it's use for error cases like && is not appropriate 423 -- since we do not want a junk message for a case like &-space-&). 424 425 procedure Error_Illegal_Character; 426 -- Give illegal character error, Scan_Ptr points to character. On 427 -- return, Scan_Ptr is bumped past the illegal character. 428 429 procedure Error_Illegal_Wide_Character; 430 -- Give illegal wide character message. On return, Scan_Ptr is bumped 431 -- past the illegal character, which may still leave us pointing to 432 -- junk, not much we can do if the escape sequence is messed up. 433 434 procedure Error_No_Double_Underline; 435 -- Signal error of two underline or punctuation characters in a row. 436 -- Called with Scan_Ptr pointing to second underline/punctuation char. 437 438 procedure Nlit; 439 -- This is the procedure for scanning out numeric literals. On entry, 440 -- Scan_Ptr points to the digit that starts the numeric literal (the 441 -- checksum for this character has not been accumulated yet). On return 442 -- Scan_Ptr points past the last character of the numeric literal, Token 443 -- and Token_Node are set appropriately, and the checksum is updated. 444 445 procedure Slit; 446 -- This is the procedure for scanning out string literals. On entry, 447 -- Scan_Ptr points to the opening string quote (the checksum for this 448 -- character has not been accumulated yet). On return Scan_Ptr points 449 -- past the closing quote of the string literal, Token and Token_Node 450 -- are set appropriately, and the checksum is updated. 451 452 procedure Skip_Other_Format_Characters; 453 -- Skips past any "other format" category characters at the current 454 -- cursor location (does not skip past spaces or any other characters). 455 456 function Start_Of_Wide_Character return Boolean; 457 -- Returns True if the scan pointer is pointing to the start of a wide 458 -- character sequence, does not modify the scan pointer in any case. 459 460 ----------------------- 461 -- Double_Char_Token -- 462 ----------------------- 463 464 function Double_Char_Token (C : Character) return Boolean is 465 begin 466 if Source (Scan_Ptr + 1) = C then 467 Accumulate_Checksum (C); 468 Scan_Ptr := Scan_Ptr + 2; 469 return True; 470 471 elsif Source (Scan_Ptr + 1) = ' ' 472 and then Source (Scan_Ptr + 2) = C 473 then 474 Scan_Ptr := Scan_Ptr + 1; 475 Error_Msg_S -- CODEFIX 476 ("no space allowed here"); 477 Scan_Ptr := Scan_Ptr + 2; 478 return True; 479 480 else 481 return False; 482 end if; 483 end Double_Char_Token; 484 485 ----------------------------- 486 -- Error_Illegal_Character -- 487 ----------------------------- 488 489 procedure Error_Illegal_Character is 490 begin 491 Error_Msg_S ("illegal character"); 492 Scan_Ptr := Scan_Ptr + 1; 493 end Error_Illegal_Character; 494 495 ---------------------------------- 496 -- Error_Illegal_Wide_Character -- 497 ---------------------------------- 498 499 procedure Error_Illegal_Wide_Character is 500 begin 501 Scan_Ptr := Scan_Ptr + 1; 502 Error_Msg ("illegal wide character", Wptr); 503 end Error_Illegal_Wide_Character; 504 505 ------------------------------- 506 -- Error_No_Double_Underline -- 507 ------------------------------- 508 509 procedure Error_No_Double_Underline is 510 begin 511 Underline_Found := False; 512 513 -- There are four cases, and we special case the messages 514 515 if Source (Scan_Ptr) = '_' then 516 if Source (Scan_Ptr - 1) = '_' then 517 Error_Msg_S -- CODEFIX 518 ("two consecutive underlines not permitted"); 519 else 520 Error_Msg_S ("underline cannot follow punctuation character"); 521 end if; 522 523 else 524 if Source (Scan_Ptr - 1) = '_' then 525 Error_Msg_S ("punctuation character cannot follow underline"); 526 else 527 Error_Msg_S 528 ("two consecutive punctuation characters not permitted"); 529 end if; 530 end if; 531 end Error_No_Double_Underline; 532 533 ---------- 534 -- Nlit -- 535 ---------- 536 537 procedure Nlit is 538 539 C : Character; 540 -- Current source program character 541 542 Base_Char : Character; 543 -- Either # or : (character at start of based number) 544 545 Base : Int; 546 -- Value of base 547 548 UI_Base : Uint; 549 -- Value of base in Uint format 550 551 UI_Int_Value : Uint; 552 -- Value of integer scanned by Scan_Integer in Uint format 553 554 UI_Num_Value : Uint; 555 -- Value of integer in numeric value being scanned 556 557 Scale : Int; 558 -- Scale value for real literal 559 560 UI_Scale : Uint; 561 -- Scale in Uint format 562 563 Exponent_Is_Negative : Boolean; 564 -- Set true for negative exponent 565 566 Extended_Digit_Value : Int; 567 -- Extended digit value 568 569 Point_Scanned : Boolean; 570 -- Flag for decimal point scanned in numeric literal 571 572 ----------------------- 573 -- Local Subprograms -- 574 ----------------------- 575 576 procedure Error_Digit_Expected; 577 -- Signal error of bad digit, Scan_Ptr points to the location at 578 -- which the digit was expected on input, and is unchanged on return. 579 580 procedure Scan_Integer; 581 -- Scan integer literal. On entry, Scan_Ptr points to a digit, on 582 -- exit Scan_Ptr points past the last character of the integer. 583 -- 584 -- For each digit encountered, UI_Int_Value is multiplied by 10, and 585 -- the value of the digit added to the result. In addition, the value 586 -- in Scale is decremented by one for each actual digit scanned. 587 588 -------------------------- 589 -- Error_Digit_Expected -- 590 -------------------------- 591 592 procedure Error_Digit_Expected is 593 begin 594 Error_Msg_S ("digit expected"); 595 end Error_Digit_Expected; 596 597 ------------------ 598 -- Scan_Integer -- 599 ------------------ 600 601 procedure Scan_Integer is 602 C : Character; 603 -- Next character scanned 604 605 begin 606 C := Source (Scan_Ptr); 607 608 -- Loop through digits (allowing underlines) 609 610 loop 611 Accumulate_Checksum (C); 612 UI_Int_Value := 613 UI_Int_Value * 10 + (Character'Pos (C) - Character'Pos ('0')); 614 Scan_Ptr := Scan_Ptr + 1; 615 Scale := Scale - 1; 616 C := Source (Scan_Ptr); 617 618 -- Case of underline encountered 619 620 if C = '_' then 621 622 -- We do not accumulate the '_' in the checksum, so that 623 -- 1_234 is equivalent to 1234, and does not trigger 624 -- compilation for "minimal recompilation" (gnatmake -m). 625 626 loop 627 Scan_Ptr := Scan_Ptr + 1; 628 C := Source (Scan_Ptr); 629 exit when C /= '_'; 630 Error_No_Double_Underline; 631 end loop; 632 633 if C not in '0' .. '9' then 634 Error_Digit_Expected; 635 exit; 636 end if; 637 638 else 639 exit when C not in '0' .. '9'; 640 end if; 641 end loop; 642 end Scan_Integer; 643 644 -- Start of processing for Nlit 645 646 begin 647 Base := 10; 648 UI_Base := Uint_10; 649 UI_Int_Value := Uint_0; 650 Based_Literal_Uses_Colon := False; 651 Scale := 0; 652 Scan_Integer; 653 Point_Scanned := False; 654 UI_Num_Value := UI_Int_Value; 655 656 -- Various possibilities now for continuing the literal are period, 657 -- E/e (for exponent), or :/# (for based literal). 658 659 Scale := 0; 660 C := Source (Scan_Ptr); 661 662 if C = '.' then 663 664 -- Scan out point, but do not scan past .. which is a range 665 -- sequence, and must not be eaten up scanning a numeric literal. 666 667 while C = '.' and then Source (Scan_Ptr + 1) /= '.' loop 668 Accumulate_Checksum ('.'); 669 670 if Point_Scanned then 671 Error_Msg_S ("duplicate point ignored"); 672 end if; 673 674 Point_Scanned := True; 675 Scan_Ptr := Scan_Ptr + 1; 676 C := Source (Scan_Ptr); 677 678 if C not in '0' .. '9' then 679 Error_Msg 680 ("real literal cannot end with point", Scan_Ptr - 1); 681 else 682 Scan_Integer; 683 UI_Num_Value := UI_Int_Value; 684 end if; 685 end loop; 686 687 -- Based literal case. The base is the value we already scanned. 688 -- In the case of colon, we insist that the following character 689 -- is indeed an extended digit or a period. This catches a number 690 -- of common errors, as well as catching the well known tricky 691 -- bug otherwise arising from "x : integer range 1 .. 10:= 6;" 692 693 elsif C = '#' 694 or else (C = ':' and then 695 (Source (Scan_Ptr + 1) = '.' 696 or else 697 Source (Scan_Ptr + 1) in '0' .. '9' 698 or else 699 Source (Scan_Ptr + 1) in 'A' .. 'Z' 700 or else 701 Source (Scan_Ptr + 1) in 'a' .. 'z')) 702 then 703 Accumulate_Checksum (C); 704 Base_Char := C; 705 UI_Base := UI_Int_Value; 706 707 if Base_Char = ':' then 708 Based_Literal_Uses_Colon := True; 709 end if; 710 711 if UI_Base < 2 or else UI_Base > 16 then 712 Error_Msg_SC ("base not 2-16"); 713 UI_Base := Uint_16; 714 end if; 715 716 Base := UI_To_Int (UI_Base); 717 Scan_Ptr := Scan_Ptr + 1; 718 719 -- Scan out extended integer [. integer] 720 721 C := Source (Scan_Ptr); 722 UI_Int_Value := Uint_0; 723 Scale := 0; 724 725 loop 726 if C in '0' .. '9' then 727 Accumulate_Checksum (C); 728 Extended_Digit_Value := 729 Int'(Character'Pos (C)) - Int'(Character'Pos ('0')); 730 731 elsif C in 'A' .. 'F' then 732 Accumulate_Checksum (Character'Val (Character'Pos (C) + 32)); 733 Extended_Digit_Value := 734 Int'(Character'Pos (C)) - Int'(Character'Pos ('A')) + 10; 735 736 elsif C in 'a' .. 'f' then 737 Accumulate_Checksum (C); 738 Extended_Digit_Value := 739 Int'(Character'Pos (C)) - Int'(Character'Pos ('a')) + 10; 740 741 else 742 Error_Msg_S ("extended digit expected"); 743 exit; 744 end if; 745 746 if Extended_Digit_Value >= Base then 747 Error_Msg_S ("digit '>= base"); 748 end if; 749 750 UI_Int_Value := UI_Int_Value * UI_Base + Extended_Digit_Value; 751 Scale := Scale - 1; 752 Scan_Ptr := Scan_Ptr + 1; 753 C := Source (Scan_Ptr); 754 755 if C = '_' then 756 loop 757 Accumulate_Checksum ('_'); 758 Scan_Ptr := Scan_Ptr + 1; 759 C := Source (Scan_Ptr); 760 exit when C /= '_'; 761 Error_No_Double_Underline; 762 end loop; 763 764 elsif C = '.' then 765 Accumulate_Checksum ('.'); 766 767 if Point_Scanned then 768 Error_Msg_S ("duplicate point ignored"); 769 end if; 770 771 Scan_Ptr := Scan_Ptr + 1; 772 C := Source (Scan_Ptr); 773 Point_Scanned := True; 774 Scale := 0; 775 776 elsif C = Base_Char then 777 Accumulate_Checksum (C); 778 Scan_Ptr := Scan_Ptr + 1; 779 exit; 780 781 elsif C = '#' or else C = ':' then 782 Error_Msg_S ("based number delimiters must match"); 783 Scan_Ptr := Scan_Ptr + 1; 784 exit; 785 786 elsif not Identifier_Char (C) then 787 if Base_Char = '#' then 788 Error_Msg_S -- CODEFIX 789 ("missing '#"); 790 else 791 Error_Msg_S -- CODEFIX 792 ("missing ':"); 793 end if; 794 795 exit; 796 end if; 797 798 end loop; 799 800 UI_Num_Value := UI_Int_Value; 801 end if; 802 803 -- Scan out exponent 804 805 if not Point_Scanned then 806 Scale := 0; 807 UI_Scale := Uint_0; 808 else 809 UI_Scale := UI_From_Int (Scale); 810 end if; 811 812 if Source (Scan_Ptr) = 'e' or else Source (Scan_Ptr) = 'E' then 813 Accumulate_Checksum ('e'); 814 Scan_Ptr := Scan_Ptr + 1; 815 Exponent_Is_Negative := False; 816 817 if Source (Scan_Ptr) = '+' then 818 Accumulate_Checksum ('+'); 819 Scan_Ptr := Scan_Ptr + 1; 820 821 elsif Source (Scan_Ptr) = '-' then 822 Accumulate_Checksum ('-'); 823 824 if not Point_Scanned then 825 Error_Msg_S 826 ("negative exponent not allowed for integer literal"); 827 else 828 Exponent_Is_Negative := True; 829 end if; 830 831 Scan_Ptr := Scan_Ptr + 1; 832 end if; 833 834 UI_Int_Value := Uint_0; 835 836 if Source (Scan_Ptr) in '0' .. '9' then 837 Scan_Integer; 838 else 839 Error_Digit_Expected; 840 end if; 841 842 if Exponent_Is_Negative then 843 UI_Scale := UI_Scale - UI_Int_Value; 844 else 845 UI_Scale := UI_Scale + UI_Int_Value; 846 end if; 847 end if; 848 849 -- Case of real literal to be returned 850 851 if Point_Scanned then 852 Token := Tok_Real_Literal; 853 Real_Literal_Value := 854 UR_From_Components ( 855 Num => UI_Num_Value, 856 Den => -UI_Scale, 857 Rbase => Base); 858 859 -- Case of integer literal to be returned 860 861 else 862 Token := Tok_Integer_Literal; 863 864 if UI_Scale = 0 then 865 Int_Literal_Value := UI_Num_Value; 866 867 -- Avoid doing possibly expensive calculations in cases like 868 -- parsing 163E800_000# when semantics will not be done anyway. 869 -- This is especially useful when parsing garbled input. 870 871 elsif Operating_Mode /= Check_Syntax 872 and then (Serious_Errors_Detected = 0 or else Try_Semantics) 873 then 874 Int_Literal_Value := UI_Num_Value * UI_Base ** UI_Scale; 875 876 else 877 Int_Literal_Value := No_Uint; 878 end if; 879 end if; 880 881 if Checksum_Accumulate_Token_Checksum then 882 Accumulate_Token_Checksum; 883 end if; 884 885 return; 886 end Nlit; 887 888 ---------- 889 -- Slit -- 890 ---------- 891 892 procedure Slit is 893 894 Delimiter : Character; 895 -- Delimiter (first character of string) 896 897 C : Character; 898 -- Current source program character 899 900 Code : Char_Code; 901 -- Current character code value 902 903 Err : Boolean; 904 -- Error flag for Scan_Wide call 905 906 String_Start : Source_Ptr; 907 -- Point to first character of string 908 909 procedure Error_Bad_String_Char; 910 -- Signal bad character in string/character literal. On entry 911 -- Scan_Ptr points to the improper character encountered during the 912 -- scan. Scan_Ptr is not modified, so it still points to the bad 913 -- character on return. 914 915 procedure Error_Unterminated_String; 916 -- Procedure called if a line terminator character is encountered 917 -- during scanning a string, meaning that the string is not properly 918 -- terminated. 919 920 procedure Set_String; 921 -- Procedure used to distinguish between string and operator symbol. 922 -- On entry the string has been scanned out, and its characters start 923 -- at Token_Ptr and end one character before Scan_Ptr. On exit Token 924 -- is set to Tok_String_Literal/Tok_Operator_Symbol as appropriate, 925 -- and Token_Node is appropriately initialized. In addition, in the 926 -- operator symbol case, Token_Name is appropriately set, and the 927 -- flags [Wide_]Wide_Character_Found are set appropriately. 928 929 --------------------------- 930 -- Error_Bad_String_Char -- 931 --------------------------- 932 933 procedure Error_Bad_String_Char is 934 C : constant Character := Source (Scan_Ptr); 935 936 begin 937 if C = HT then 938 Error_Msg_S ("horizontal tab not allowed in string"); 939 940 elsif C = VT or else C = FF then 941 Error_Msg_S ("format effector not allowed in string"); 942 943 elsif C in Upper_Half_Character then 944 Error_Msg_S ("(Ada 83) upper half character not allowed"); 945 946 else 947 Error_Msg_S ("control character not allowed in string"); 948 end if; 949 end Error_Bad_String_Char; 950 951 ------------------------------- 952 -- Error_Unterminated_String -- 953 ------------------------------- 954 955 procedure Error_Unterminated_String is 956 S : Source_Ptr; 957 958 begin 959 -- An interesting little refinement. Consider the following 960 -- examples: 961 962 -- A := "this is an unterminated string; 963 -- A := "this is an unterminated string & 964 -- P(A, "this is a parameter that didn't get terminated); 965 -- P("this is a parameter that didn't get terminated, A); 966 967 -- We fiddle a little to do slightly better placement in these 968 -- cases also if there is white space at the end of the line we 969 -- place the flag at the start of this white space, not at the 970 -- end. Note that we only have to test for blanks, since tabs 971 -- aren't allowed in strings in the first place and would have 972 -- caused an error message. 973 974 -- Two more cases that we treat specially are: 975 976 -- A := "this string uses the wrong terminator' 977 -- A := "this string uses the wrong terminator' & 978 979 -- In these cases we give a different error message as well 980 981 -- We actually reposition the scan pointer to the point where we 982 -- place the flag in these cases, since it seems a better bet on 983 -- the original intention. 984 985 while Source (Scan_Ptr - 1) = ' ' 986 or else Source (Scan_Ptr - 1) = '&' 987 loop 988 Scan_Ptr := Scan_Ptr - 1; 989 Unstore_String_Char; 990 end loop; 991 992 -- Check for case of incorrect string terminator, but single quote 993 -- is not considered incorrect if the opening terminator misused 994 -- a single quote (error message already given). 995 996 if Delimiter /= ''' 997 and then Source (Scan_Ptr - 1) = ''' 998 then 999 Unstore_String_Char; 1000 Error_Msg 1001 ("incorrect string terminator character", Scan_Ptr - 1); 1002 return; 1003 end if; 1004 1005 -- Backup over semicolon or right-paren/semicolon sequence 1006 1007 if Source (Scan_Ptr - 1) = ';' then 1008 Scan_Ptr := Scan_Ptr - 1; 1009 Unstore_String_Char; 1010 1011 if Source (Scan_Ptr - 1) = ')' then 1012 Scan_Ptr := Scan_Ptr - 1; 1013 Unstore_String_Char; 1014 end if; 1015 end if; 1016 1017 -- See if there is a comma in the string, if so, guess that 1018 -- the first comma terminates the string. 1019 1020 S := String_Start; 1021 while S < Scan_Ptr loop 1022 if Source (S) = ',' then 1023 while Scan_Ptr > S loop 1024 Scan_Ptr := Scan_Ptr - 1; 1025 Unstore_String_Char; 1026 end loop; 1027 1028 exit; 1029 end if; 1030 1031 S := S + 1; 1032 end loop; 1033 1034 -- Now we have adjusted the scan pointer, give message 1035 1036 Error_Msg_S -- CODEFIX 1037 ("missing string quote"); 1038 end Error_Unterminated_String; 1039 1040 ---------------- 1041 -- Set_String -- 1042 ---------------- 1043 1044 procedure Set_String is 1045 Slen : constant Int := Int (Scan_Ptr - Token_Ptr - 2); 1046 C1 : Character; 1047 C2 : Character; 1048 C3 : Character; 1049 1050 begin 1051 -- Token_Name is currently set to Error_Name. The following 1052 -- section of code resets Token_Name to the proper Name_Op_xx 1053 -- value if the string is a valid operator symbol, otherwise it is 1054 -- left set to Error_Name. 1055 1056 if Slen = 1 then 1057 C1 := Source (Token_Ptr + 1); 1058 1059 case C1 is 1060 when '=' => 1061 Token_Name := Name_Op_Eq; 1062 1063 when '>' => 1064 Token_Name := Name_Op_Gt; 1065 1066 when '<' => 1067 Token_Name := Name_Op_Lt; 1068 1069 when '+' => 1070 Token_Name := Name_Op_Add; 1071 1072 when '-' => 1073 Token_Name := Name_Op_Subtract; 1074 1075 when '&' => 1076 Token_Name := Name_Op_Concat; 1077 1078 when '*' => 1079 Token_Name := Name_Op_Multiply; 1080 1081 when '/' => 1082 Token_Name := Name_Op_Divide; 1083 1084 when others => 1085 null; 1086 end case; 1087 1088 elsif Slen = 2 then 1089 C1 := Source (Token_Ptr + 1); 1090 C2 := Source (Token_Ptr + 2); 1091 1092 if C1 = '*' and then C2 = '*' then 1093 Token_Name := Name_Op_Expon; 1094 1095 elsif C2 = '=' then 1096 1097 if C1 = '/' then 1098 Token_Name := Name_Op_Ne; 1099 elsif C1 = '<' then 1100 Token_Name := Name_Op_Le; 1101 elsif C1 = '>' then 1102 Token_Name := Name_Op_Ge; 1103 end if; 1104 1105 elsif (C1 = 'O' or else C1 = 'o') and then -- OR 1106 (C2 = 'R' or else C2 = 'r') 1107 then 1108 Token_Name := Name_Op_Or; 1109 end if; 1110 1111 elsif Slen = 3 then 1112 C1 := Source (Token_Ptr + 1); 1113 C2 := Source (Token_Ptr + 2); 1114 C3 := Source (Token_Ptr + 3); 1115 1116 if (C1 = 'A' or else C1 = 'a') and then -- AND 1117 (C2 = 'N' or else C2 = 'n') and then 1118 (C3 = 'D' or else C3 = 'd') 1119 then 1120 Token_Name := Name_Op_And; 1121 1122 elsif (C1 = 'A' or else C1 = 'a') and then -- ABS 1123 (C2 = 'B' or else C2 = 'b') and then 1124 (C3 = 'S' or else C3 = 's') 1125 then 1126 Token_Name := Name_Op_Abs; 1127 1128 elsif (C1 = 'M' or else C1 = 'm') and then -- MOD 1129 (C2 = 'O' or else C2 = 'o') and then 1130 (C3 = 'D' or else C3 = 'd') 1131 then 1132 Token_Name := Name_Op_Mod; 1133 1134 elsif (C1 = 'N' or else C1 = 'n') and then -- NOT 1135 (C2 = 'O' or else C2 = 'o') and then 1136 (C3 = 'T' or else C3 = 't') 1137 then 1138 Token_Name := Name_Op_Not; 1139 1140 elsif (C1 = 'R' or else C1 = 'r') and then -- REM 1141 (C2 = 'E' or else C2 = 'e') and then 1142 (C3 = 'M' or else C3 = 'm') 1143 then 1144 Token_Name := Name_Op_Rem; 1145 1146 elsif (C1 = 'X' or else C1 = 'x') and then -- XOR 1147 (C2 = 'O' or else C2 = 'o') and then 1148 (C3 = 'R' or else C3 = 'r') 1149 then 1150 Token_Name := Name_Op_Xor; 1151 end if; 1152 1153 end if; 1154 1155 -- If it is an operator symbol, then Token_Name is set. If it is 1156 -- some other string value, then Token_Name still contains 1157 -- Error_Name. 1158 1159 if Token_Name = Error_Name then 1160 Token := Tok_String_Literal; 1161 1162 else 1163 Token := Tok_Operator_Symbol; 1164 end if; 1165 end Set_String; 1166 1167 -- Start of processing for Slit 1168 1169 begin 1170 -- On entry, Scan_Ptr points to the opening character of the string 1171 -- which is either a percent, double quote, or apostrophe (single 1172 -- quote). The latter case is an error detected by the character 1173 -- literal circuit. 1174 1175 String_Start := Scan_Ptr; 1176 1177 Delimiter := Source (Scan_Ptr); 1178 Accumulate_Checksum (Delimiter); 1179 1180 Start_String; 1181 Wide_Character_Found := False; 1182 Wide_Wide_Character_Found := False; 1183 Scan_Ptr := Scan_Ptr + 1; 1184 1185 -- Loop to scan out characters of string literal 1186 1187 loop 1188 C := Source (Scan_Ptr); 1189 1190 if C = Delimiter then 1191 Accumulate_Checksum (C); 1192 Scan_Ptr := Scan_Ptr + 1; 1193 exit when Source (Scan_Ptr) /= Delimiter; 1194 Code := Get_Char_Code (C); 1195 Accumulate_Checksum (C); 1196 Scan_Ptr := Scan_Ptr + 1; 1197 1198 else 1199 if C = '"' and then Delimiter = '%' then 1200 Error_Msg_S 1201 ("quote not allowed in percent delimited string"); 1202 Code := Get_Char_Code (C); 1203 Scan_Ptr := Scan_Ptr + 1; 1204 1205 elsif Start_Of_Wide_Character then 1206 Wptr := Scan_Ptr; 1207 Scan_Wide (Source, Scan_Ptr, Code, Err); 1208 1209 if Err then 1210 Error_Illegal_Wide_Character; 1211 Code := Get_Char_Code (' '); 1212 end if; 1213 1214 Accumulate_Checksum (Code); 1215 1216 -- In Ada 95 mode we allow any wide characters in a string 1217 -- but in Ada 2005, the set of characters allowed has been 1218 -- restricted to graphic characters. 1219 1220 if Ada_Version >= Ada_2005 1221 and then Is_UTF_32_Non_Graphic (UTF_32 (Code)) 1222 then 1223 Error_Msg 1224 ("(Ada 2005) non-graphic character not permitted " & 1225 "in string literal", Wptr); 1226 end if; 1227 1228 else 1229 Accumulate_Checksum (C); 1230 1231 if C not in Graphic_Character then 1232 if C in Line_Terminator then 1233 Error_Unterminated_String; 1234 exit; 1235 1236 elsif C in Upper_Half_Character then 1237 if Ada_Version = Ada_83 then 1238 Error_Bad_String_Char; 1239 end if; 1240 1241 else 1242 Error_Bad_String_Char; 1243 end if; 1244 end if; 1245 1246 Code := Get_Char_Code (C); 1247 Scan_Ptr := Scan_Ptr + 1; 1248 end if; 1249 end if; 1250 1251 Store_String_Char (Code); 1252 1253 if not In_Character_Range (Code) then 1254 if In_Wide_Character_Range (Code) then 1255 Wide_Character_Found := True; 1256 else 1257 Wide_Wide_Character_Found := True; 1258 end if; 1259 end if; 1260 end loop; 1261 1262 String_Literal_Id := End_String; 1263 Set_String; 1264 return; 1265 end Slit; 1266 1267 ---------------------------------- 1268 -- Skip_Other_Format_Characters -- 1269 ---------------------------------- 1270 1271 procedure Skip_Other_Format_Characters is 1272 P : Source_Ptr; 1273 Code : Char_Code; 1274 Err : Boolean; 1275 1276 begin 1277 while Start_Of_Wide_Character loop 1278 P := Scan_Ptr; 1279 Scan_Wide (Source, Scan_Ptr, Code, Err); 1280 1281 if not Is_UTF_32_Other (UTF_32 (Code)) then 1282 Scan_Ptr := P; 1283 return; 1284 end if; 1285 end loop; 1286 end Skip_Other_Format_Characters; 1287 1288 ----------------------------- 1289 -- Start_Of_Wide_Character -- 1290 ----------------------------- 1291 1292 function Start_Of_Wide_Character return Boolean is 1293 C : constant Character := Source (Scan_Ptr); 1294 1295 begin 1296 -- ESC encoding method with ESC present 1297 1298 if C = ESC 1299 and then Wide_Character_Encoding_Method in WC_ESC_Encoding_Method 1300 then 1301 return True; 1302 1303 -- Upper half character with upper half encoding 1304 1305 elsif C in Upper_Half_Character and then Upper_Half_Encoding then 1306 return True; 1307 1308 -- Brackets encoding 1309 1310 elsif C = '[' 1311 and then Source (Scan_Ptr + 1) = '"' 1312 and then Identifier_Char (Source (Scan_Ptr + 2)) 1313 then 1314 return True; 1315 1316 -- Not the start of a wide character 1317 1318 else 1319 return False; 1320 end if; 1321 end Start_Of_Wide_Character; 1322 1323 -- Start of processing for Scan 1324 1325 begin 1326 Prev_Token := Token; 1327 Prev_Token_Ptr := Token_Ptr; 1328 Token_Name := Error_Name; 1329 1330 -- The following loop runs more than once only if a format effector 1331 -- (tab, vertical tab, form feed, line feed, carriage return) is 1332 -- encountered and skipped, or some error situation, such as an 1333 -- illegal character, is encountered. 1334 1335 <<Scan_Next_Character>> 1336 1337 loop 1338 -- Skip past blanks, loop is opened up for speed 1339 1340 while Source (Scan_Ptr) = ' ' loop 1341 if Source (Scan_Ptr + 1) /= ' ' then 1342 Scan_Ptr := Scan_Ptr + 1; 1343 exit; 1344 end if; 1345 1346 if Source (Scan_Ptr + 2) /= ' ' then 1347 Scan_Ptr := Scan_Ptr + 2; 1348 exit; 1349 end if; 1350 1351 if Source (Scan_Ptr + 3) /= ' ' then 1352 Scan_Ptr := Scan_Ptr + 3; 1353 exit; 1354 end if; 1355 1356 if Source (Scan_Ptr + 4) /= ' ' then 1357 Scan_Ptr := Scan_Ptr + 4; 1358 exit; 1359 end if; 1360 1361 if Source (Scan_Ptr + 5) /= ' ' then 1362 Scan_Ptr := Scan_Ptr + 5; 1363 exit; 1364 end if; 1365 1366 if Source (Scan_Ptr + 6) /= ' ' then 1367 Scan_Ptr := Scan_Ptr + 6; 1368 exit; 1369 end if; 1370 1371 if Source (Scan_Ptr + 7) /= ' ' then 1372 Scan_Ptr := Scan_Ptr + 7; 1373 exit; 1374 end if; 1375 1376 Scan_Ptr := Scan_Ptr + 8; 1377 end loop; 1378 1379 -- We are now at a non-blank character, which is the first character 1380 -- of the token we will scan, and hence the value of Token_Ptr. 1381 1382 Token_Ptr := Scan_Ptr; 1383 1384 -- Here begins the main case statement which transfers control on the 1385 -- basis of the non-blank character we have encountered. 1386 1387 case Source (Scan_Ptr) is 1388 1389 -- Line terminator characters 1390 1391 when CR | LF | FF | VT => 1392 goto Scan_Line_Terminator; 1393 1394 -- Horizontal tab, just skip past it 1395 1396 when HT => 1397 if Style_Check then 1398 Style.Check_HT; 1399 end if; 1400 1401 Scan_Ptr := Scan_Ptr + 1; 1402 1403 -- End of file character, treated as an end of file only if it is 1404 -- the last character in the buffer, otherwise it is ignored. 1405 1406 when EOF => 1407 if Scan_Ptr = Source_Last (Current_Source_File) then 1408 Check_End_Of_Line; 1409 1410 if Style_Check then 1411 Style.Check_EOF; 1412 end if; 1413 1414 Token := Tok_EOF; 1415 return; 1416 else 1417 Scan_Ptr := Scan_Ptr + 1; 1418 end if; 1419 1420 -- Ampersand 1421 1422 when '&' => 1423 Accumulate_Checksum ('&'); 1424 1425 if Source (Scan_Ptr + 1) = '&' then 1426 Error_Msg_S -- CODEFIX 1427 ("'&'& should be `AND THEN`"); 1428 Scan_Ptr := Scan_Ptr + 2; 1429 Token := Tok_And; 1430 return; 1431 1432 else 1433 Scan_Ptr := Scan_Ptr + 1; 1434 Token := Tok_Ampersand; 1435 return; 1436 end if; 1437 1438 -- Asterisk (can be multiplication operator or double asterisk which 1439 -- is the exponentiation compound delimiter). 1440 1441 when '*' => 1442 Accumulate_Checksum ('*'); 1443 1444 if Source (Scan_Ptr + 1) = '*' then 1445 Accumulate_Checksum ('*'); 1446 Scan_Ptr := Scan_Ptr + 2; 1447 Token := Tok_Double_Asterisk; 1448 return; 1449 1450 else 1451 Scan_Ptr := Scan_Ptr + 1; 1452 Token := Tok_Asterisk; 1453 return; 1454 end if; 1455 1456 -- Colon, which can either be an isolated colon, or part of an 1457 -- assignment compound delimiter. 1458 1459 when ':' => 1460 Accumulate_Checksum (':'); 1461 1462 if Double_Char_Token ('=') then 1463 Token := Tok_Colon_Equal; 1464 1465 if Style_Check then 1466 Style.Check_Colon_Equal; 1467 end if; 1468 1469 return; 1470 1471 elsif Source (Scan_Ptr + 1) = '-' 1472 and then Source (Scan_Ptr + 2) /= '-' 1473 then 1474 Token := Tok_Colon_Equal; 1475 Error_Msg -- CODEFIX 1476 (":- should be :=", Scan_Ptr); 1477 Scan_Ptr := Scan_Ptr + 2; 1478 return; 1479 1480 else 1481 Scan_Ptr := Scan_Ptr + 1; 1482 Token := Tok_Colon; 1483 1484 if Style_Check then 1485 Style.Check_Colon; 1486 end if; 1487 1488 return; 1489 end if; 1490 1491 -- Left parenthesis 1492 1493 when '(' => 1494 Accumulate_Checksum ('('); 1495 Scan_Ptr := Scan_Ptr + 1; 1496 Token := Tok_Left_Paren; 1497 1498 if Style_Check then 1499 Style.Check_Left_Paren; 1500 end if; 1501 1502 return; 1503 1504 -- Left bracket 1505 1506 when '[' => 1507 if Source (Scan_Ptr + 1) = '"' then 1508 goto Scan_Wide_Character; 1509 1510 else 1511 Error_Msg_S ("illegal character, replaced by ""("""); 1512 Scan_Ptr := Scan_Ptr + 1; 1513 Token := Tok_Left_Paren; 1514 return; 1515 end if; 1516 1517 -- Left brace 1518 1519 when '{' => 1520 Error_Msg_S ("illegal character, replaced by ""("""); 1521 Scan_Ptr := Scan_Ptr + 1; 1522 Token := Tok_Left_Paren; 1523 return; 1524 1525 -- Comma 1526 1527 when ',' => 1528 Accumulate_Checksum (','); 1529 Scan_Ptr := Scan_Ptr + 1; 1530 Token := Tok_Comma; 1531 1532 if Style_Check then 1533 Style.Check_Comma; 1534 end if; 1535 1536 return; 1537 1538 -- Dot, which is either an isolated period, or part of a double dot 1539 -- compound delimiter sequence. We also check for the case of a 1540 -- digit following the period, to give a better error message. 1541 1542 when '.' => 1543 Accumulate_Checksum ('.'); 1544 1545 if Double_Char_Token ('.') then 1546 Token := Tok_Dot_Dot; 1547 1548 if Style_Check then 1549 Style.Check_Dot_Dot; 1550 end if; 1551 1552 return; 1553 1554 elsif Source (Scan_Ptr + 1) in '0' .. '9' then 1555 Error_Msg_S ("numeric literal cannot start with point"); 1556 Scan_Ptr := Scan_Ptr + 1; 1557 1558 else 1559 Scan_Ptr := Scan_Ptr + 1; 1560 Token := Tok_Dot; 1561 return; 1562 end if; 1563 1564 -- Equal, which can either be an equality operator, or part of the 1565 -- arrow (=>) compound delimiter. 1566 1567 when '=' => 1568 Accumulate_Checksum ('='); 1569 1570 if Double_Char_Token ('>') then 1571 Token := Tok_Arrow; 1572 1573 if Style_Check then 1574 Style.Check_Arrow (Inside_Depends); 1575 end if; 1576 1577 return; 1578 1579 elsif Source (Scan_Ptr + 1) = '=' then 1580 Error_Msg_S -- CODEFIX 1581 ("== should be ="); 1582 Scan_Ptr := Scan_Ptr + 1; 1583 end if; 1584 1585 Scan_Ptr := Scan_Ptr + 1; 1586 Token := Tok_Equal; 1587 return; 1588 1589 -- Greater than, which can be a greater than operator, greater than 1590 -- or equal operator, or first character of a right label bracket. 1591 1592 when '>' => 1593 Accumulate_Checksum ('>'); 1594 1595 if Double_Char_Token ('=') then 1596 Token := Tok_Greater_Equal; 1597 return; 1598 1599 elsif Double_Char_Token ('>') then 1600 Token := Tok_Greater_Greater; 1601 return; 1602 1603 else 1604 Scan_Ptr := Scan_Ptr + 1; 1605 Token := Tok_Greater; 1606 return; 1607 end if; 1608 1609 -- Less than, which can be a less than operator, less than or equal 1610 -- operator, or the first character of a left label bracket, or the 1611 -- first character of a box (<>) compound delimiter. 1612 1613 when '<' => 1614 Accumulate_Checksum ('<'); 1615 1616 if Double_Char_Token ('=') then 1617 Token := Tok_Less_Equal; 1618 return; 1619 1620 elsif Double_Char_Token ('>') then 1621 Token := Tok_Box; 1622 1623 if Style_Check then 1624 Style.Check_Box; 1625 end if; 1626 1627 return; 1628 1629 elsif Double_Char_Token ('<') then 1630 Token := Tok_Less_Less; 1631 return; 1632 1633 else 1634 Scan_Ptr := Scan_Ptr + 1; 1635 Token := Tok_Less; 1636 return; 1637 end if; 1638 1639 -- Minus, which is either a subtraction operator, or the first 1640 -- character of double minus starting a comment 1641 1642 when '-' => Minus_Case : begin 1643 if Source (Scan_Ptr + 1) = '>' then 1644 Error_Msg_S ("invalid token"); 1645 Scan_Ptr := Scan_Ptr + 2; 1646 Token := Tok_Arrow; 1647 return; 1648 1649 elsif Source (Scan_Ptr + 1) /= '-' then 1650 Accumulate_Checksum ('-'); 1651 Scan_Ptr := Scan_Ptr + 1; 1652 Token := Tok_Minus; 1653 return; 1654 1655 -- Comment 1656 1657 else -- Source (Scan_Ptr + 1) = '-' then 1658 if Style_Check then 1659 Style.Check_Comment; 1660 end if; 1661 1662 Scan_Ptr := Scan_Ptr + 2; 1663 1664 -- If we are in preprocessor mode with Replace_In_Comments set, 1665 -- then we return the "--" as a token on its own. 1666 1667 if Replace_In_Comments then 1668 Token := Tok_Comment; 1669 return; 1670 end if; 1671 1672 -- Otherwise scan out the comment 1673 1674 Start_Of_Comment := Scan_Ptr; 1675 1676 -- Loop to scan comment (this loop runs more than once only if 1677 -- a horizontal tab or other non-graphic character is scanned) 1678 1679 loop 1680 -- Scan to non graphic character (opened up for speed) 1681 1682 -- Note that we just eat left brackets, which means that 1683 -- bracket notation cannot be used for end of line 1684 -- characters in comments. This seems a reasonable choice, 1685 -- since no one would ever use brackets notation in a real 1686 -- program in this situation, and if we allow brackets 1687 -- notation, we forbid some valid comments which contain a 1688 -- brackets sequence that happens to match an end of line 1689 -- character. 1690 1691 loop 1692 exit when Source (Scan_Ptr) not in Graphic_Character; 1693 Scan_Ptr := Scan_Ptr + 1; 1694 exit when Source (Scan_Ptr) not in Graphic_Character; 1695 Scan_Ptr := Scan_Ptr + 1; 1696 exit when Source (Scan_Ptr) not in Graphic_Character; 1697 Scan_Ptr := Scan_Ptr + 1; 1698 exit when Source (Scan_Ptr) not in Graphic_Character; 1699 Scan_Ptr := Scan_Ptr + 1; 1700 exit when Source (Scan_Ptr) not in Graphic_Character; 1701 Scan_Ptr := Scan_Ptr + 1; 1702 end loop; 1703 1704 -- Keep going if horizontal tab 1705 1706 if Source (Scan_Ptr) = HT then 1707 if Style_Check then 1708 Style.Check_HT; 1709 end if; 1710 1711 Scan_Ptr := Scan_Ptr + 1; 1712 1713 -- Terminate scan of comment if line terminator 1714 1715 elsif Source (Scan_Ptr) in Line_Terminator then 1716 exit; 1717 1718 -- Terminate scan of comment if end of file encountered 1719 -- (embedded EOF character or real last character in file) 1720 1721 elsif Source (Scan_Ptr) = EOF then 1722 exit; 1723 1724 -- If we have a wide character, we have to scan it out, 1725 -- because it might be a legitimate line terminator 1726 1727 elsif Start_Of_Wide_Character then 1728 declare 1729 Wptr : constant Source_Ptr := Scan_Ptr; 1730 Code : Char_Code; 1731 Err : Boolean; 1732 1733 begin 1734 Scan_Wide (Source, Scan_Ptr, Code, Err); 1735 1736 -- If not well formed wide character, then just skip 1737 -- past it and ignore it. 1738 1739 if Err then 1740 Scan_Ptr := Wptr + 1; 1741 1742 -- If UTF_32 terminator, terminate comment scan 1743 1744 elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then 1745 Scan_Ptr := Wptr; 1746 exit; 1747 end if; 1748 end; 1749 1750 -- Keep going if character in 80-FF range, or is ESC. These 1751 -- characters are allowed in comments by RM-2.1(1), 2.7(2). 1752 -- They are allowed even in Ada 83 mode according to the 1753 -- approved AI. ESC was added to the AI in June 93. 1754 1755 elsif Source (Scan_Ptr) in Upper_Half_Character 1756 or else Source (Scan_Ptr) = ESC 1757 then 1758 Scan_Ptr := Scan_Ptr + 1; 1759 1760 -- Otherwise we have an illegal comment character, ignore 1761 -- this error in relaxed semantics mode. 1762 1763 else 1764 if Relaxed_RM_Semantics then 1765 Scan_Ptr := Scan_Ptr + 1; 1766 else 1767 Error_Illegal_Character; 1768 end if; 1769 end if; 1770 end loop; 1771 1772 -- Note that, except when comments are tokens, we do NOT 1773 -- execute a return here, instead we fall through to reexecute 1774 -- the scan loop to look for a token. 1775 1776 if Comment_Is_Token then 1777 Name_Len := Integer (Scan_Ptr - Start_Of_Comment); 1778 Name_Buffer (1 .. Name_Len) := 1779 String (Source (Start_Of_Comment .. Scan_Ptr - 1)); 1780 Comment_Id := Name_Find; 1781 Token := Tok_Comment; 1782 return; 1783 end if; 1784 1785 -- If the SPARK restriction is set for this unit, then generate 1786 -- a token Tok_SPARK_Hide for a SPARK HIDE directive. 1787 1788 if Restriction_Check_Required (SPARK_05) 1789 and then Source (Start_Of_Comment) = '#' 1790 then 1791 declare 1792 Scan_SPARK_Ptr : Source_Ptr; 1793 1794 begin 1795 Scan_SPARK_Ptr := Start_Of_Comment + 1; 1796 1797 -- Scan out blanks 1798 1799 while Source (Scan_SPARK_Ptr) = ' ' 1800 or else Source (Scan_SPARK_Ptr) = HT 1801 loop 1802 Scan_SPARK_Ptr := Scan_SPARK_Ptr + 1; 1803 end loop; 1804 1805 -- Recognize HIDE directive. SPARK input cannot be 1806 -- encoded as wide characters, so only deal with 1807 -- lower/upper case. 1808 1809 if (Source (Scan_SPARK_Ptr) = 'h' 1810 or else Source (Scan_SPARK_Ptr) = 'H') 1811 and then (Source (Scan_SPARK_Ptr + 1) = 'i' 1812 or else Source (Scan_SPARK_Ptr + 1) = 'I') 1813 and then (Source (Scan_SPARK_Ptr + 2) = 'd' 1814 or else Source (Scan_SPARK_Ptr + 2) = 'D') 1815 and then (Source (Scan_SPARK_Ptr + 3) = 'e' 1816 or else Source (Scan_SPARK_Ptr + 3) = 'E') 1817 and then (Source (Scan_SPARK_Ptr + 4) = ' ' 1818 or else Source (Scan_SPARK_Ptr + 4) = HT) 1819 then 1820 Token := Tok_SPARK_Hide; 1821 return; 1822 end if; 1823 end; 1824 end if; 1825 end if; 1826 end Minus_Case; 1827 1828 -- Double quote or percent starting a string literal 1829 1830 when '"' | '%' => 1831 Slit; 1832 Post_Scan; 1833 return; 1834 1835 -- Apostrophe. This can either be the start of a character literal, 1836 -- or an isolated apostrophe used in a qualified expression or an 1837 -- attribute. In the following: 1838 1839 -- A := CHARACTER'('A'); 1840 1841 -- the first apostrophe is treated as an isolated apostrophe, and the 1842 -- second one is treated as the start of the character literal 'A'. 1843 -- Note that RM-2.2(7) does not require a separator between "'" and 1844 -- "(" in the above, so we cannot use lookahead to distinguish the 1845 -- cases; we use look-back instead. Analysis of the grammar shows 1846 -- that some tokens can be followed by an apostrophe, and some by a 1847 -- character literal, but none by both. Some cannot be followed by 1848 -- either, so it doesn't matter what we do in those cases, except to 1849 -- get good error behavior. 1850 1851 when ''' => Char_Literal_Case : declare 1852 Code : Char_Code; 1853 Err : Boolean; 1854 1855 begin 1856 Accumulate_Checksum ('''); 1857 Scan_Ptr := Scan_Ptr + 1; 1858 1859 -- Distinguish between apostrophe and character literal. It's an 1860 -- apostrophe if the previous token is one of the following. 1861 -- Reserved words are included for things like A.all'Address and 1862 -- T'Digits'Img. Strings literals are included for things like 1863 -- "abs"'Address. Other literals are included to give better error 1864 -- behavior for illegal cases like 123'Img. 1865 1866 if Prev_Token = Tok_Identifier 1867 or else Prev_Token = Tok_Right_Paren 1868 or else Prev_Token = Tok_All 1869 or else Prev_Token = Tok_Delta 1870 or else Prev_Token = Tok_Digits 1871 or else Prev_Token = Tok_Project 1872 or else Prev_Token in Token_Class_Literal 1873 then 1874 Token := Tok_Apostrophe; 1875 1876 if Style_Check then 1877 Style.Check_Apostrophe; 1878 end if; 1879 1880 return; 1881 1882 -- Otherwise the apostrophe starts a character literal 1883 1884 else 1885 -- Case of wide character literal 1886 1887 if Start_Of_Wide_Character then 1888 Wptr := Scan_Ptr; 1889 Scan_Wide (Source, Scan_Ptr, Code, Err); 1890 Accumulate_Checksum (Code); 1891 1892 if Err then 1893 Error_Illegal_Wide_Character; 1894 Code := Character'Pos (' '); 1895 1896 -- In Ada 95 mode we allow any wide character in a character 1897 -- literal, but in Ada 2005, the set of characters allowed 1898 -- is restricted to graphic characters. 1899 1900 elsif Ada_Version >= Ada_2005 1901 and then Is_UTF_32_Non_Graphic (UTF_32 (Code)) 1902 then 1903 Error_Msg -- CODEFIX???? 1904 ("(Ada 2005) non-graphic character not permitted " & 1905 "in character literal", Wptr); 1906 end if; 1907 1908 if Source (Scan_Ptr) /= ''' then 1909 Error_Msg_S ("missing apostrophe"); 1910 else 1911 Scan_Ptr := Scan_Ptr + 1; 1912 end if; 1913 1914 -- If we do not find a closing quote in the expected place then 1915 -- assume that we have a misguided attempt at a string literal. 1916 1917 -- However, if previous token is RANGE, then we return an 1918 -- apostrophe instead since this gives better error recovery 1919 1920 elsif Source (Scan_Ptr + 1) /= ''' then 1921 if Prev_Token = Tok_Range then 1922 Token := Tok_Apostrophe; 1923 return; 1924 1925 else 1926 Scan_Ptr := Scan_Ptr - 1; 1927 Error_Msg_S 1928 ("strings are delimited by double quote character"); 1929 Slit; 1930 Post_Scan; 1931 return; 1932 end if; 1933 1934 -- Otherwise we have a (non-wide) character literal 1935 1936 else 1937 Accumulate_Checksum (Source (Scan_Ptr)); 1938 1939 if Source (Scan_Ptr) not in Graphic_Character then 1940 if Source (Scan_Ptr) in Upper_Half_Character then 1941 if Ada_Version = Ada_83 then 1942 Error_Illegal_Character; 1943 end if; 1944 1945 else 1946 Error_Illegal_Character; 1947 end if; 1948 end if; 1949 1950 Code := Get_Char_Code (Source (Scan_Ptr)); 1951 Scan_Ptr := Scan_Ptr + 2; 1952 end if; 1953 1954 -- Fall through here with Scan_Ptr updated past the closing 1955 -- quote, and Code set to the Char_Code value for the literal 1956 1957 Accumulate_Checksum ('''); 1958 Token := Tok_Char_Literal; 1959 Set_Character_Literal_Name (Code); 1960 Token_Name := Name_Find; 1961 Character_Code := Code; 1962 Post_Scan; 1963 return; 1964 end if; 1965 end Char_Literal_Case; 1966 1967 -- Right parenthesis 1968 1969 when ')' => 1970 Accumulate_Checksum (')'); 1971 Scan_Ptr := Scan_Ptr + 1; 1972 Token := Tok_Right_Paren; 1973 1974 if Style_Check then 1975 Style.Check_Right_Paren; 1976 end if; 1977 1978 return; 1979 1980 -- Right bracket or right brace, treated as right paren 1981 1982 when ']' | '}' => 1983 Error_Msg_S ("illegal character, replaced by "")"""); 1984 Scan_Ptr := Scan_Ptr + 1; 1985 Token := Tok_Right_Paren; 1986 return; 1987 1988 -- Slash (can be division operator or first character of not equal) 1989 1990 when '/' => 1991 Accumulate_Checksum ('/'); 1992 1993 if Double_Char_Token ('=') then 1994 Token := Tok_Not_Equal; 1995 return; 1996 else 1997 Scan_Ptr := Scan_Ptr + 1; 1998 Token := Tok_Slash; 1999 return; 2000 end if; 2001 2002 -- Semicolon 2003 2004 when ';' => 2005 Accumulate_Checksum (';'); 2006 Scan_Ptr := Scan_Ptr + 1; 2007 Token := Tok_Semicolon; 2008 2009 if Style_Check then 2010 Style.Check_Semicolon; 2011 end if; 2012 2013 return; 2014 2015 -- Vertical bar 2016 2017 when '|' => Vertical_Bar_Case : begin 2018 Accumulate_Checksum ('|'); 2019 2020 -- Special check for || to give nice message 2021 2022 if Source (Scan_Ptr + 1) = '|' then 2023 Error_Msg_S -- CODEFIX 2024 ("""'|'|"" should be `OR ELSE`"); 2025 Scan_Ptr := Scan_Ptr + 2; 2026 Token := Tok_Or; 2027 return; 2028 2029 else 2030 Scan_Ptr := Scan_Ptr + 1; 2031 Token := Tok_Vertical_Bar; 2032 2033 if Style_Check then 2034 Style.Check_Vertical_Bar; 2035 end if; 2036 2037 Post_Scan; 2038 return; 2039 end if; 2040 end Vertical_Bar_Case; 2041 2042 -- Exclamation, replacement character for vertical bar 2043 2044 when '!' => Exclamation_Case : begin 2045 Accumulate_Checksum ('!'); 2046 2047 if Source (Scan_Ptr + 1) = '=' then 2048 Error_Msg_S -- CODEFIX 2049 ("'!= should be /="); 2050 Scan_Ptr := Scan_Ptr + 2; 2051 Token := Tok_Not_Equal; 2052 return; 2053 2054 else 2055 Scan_Ptr := Scan_Ptr + 1; 2056 Token := Tok_Vertical_Bar; 2057 Post_Scan; 2058 return; 2059 end if; 2060 end Exclamation_Case; 2061 2062 -- Plus 2063 2064 when '+' => Plus_Case : begin 2065 Accumulate_Checksum ('+'); 2066 Scan_Ptr := Scan_Ptr + 1; 2067 Token := Tok_Plus; 2068 return; 2069 end Plus_Case; 2070 2071 -- Digits starting a numeric literal 2072 2073 when '0' .. '9' => 2074 2075 -- First a bit of a scan ahead to see if we have a case of an 2076 -- identifier starting with a digit (remembering exponent case). 2077 2078 declare 2079 C : constant Character := Source (Scan_Ptr + 1); 2080 2081 begin 2082 -- OK literal if digit followed by digit or underscore 2083 2084 if C in '0' .. '9' or else C = '_' then 2085 null; 2086 2087 -- OK literal if digit not followed by identifier char 2088 2089 elsif not Identifier_Char (C) then 2090 null; 2091 2092 -- OK literal if digit followed by e/E followed by digit/sign. 2093 -- We also allow underscore after the E, which is an error, but 2094 -- better handled by Nlit than deciding this is an identifier. 2095 2096 elsif (C = 'e' or else C = 'E') 2097 and then (Source (Scan_Ptr + 2) in '0' .. '9' 2098 or else Source (Scan_Ptr + 2) = '+' 2099 or else Source (Scan_Ptr + 2) = '-' 2100 or else Source (Scan_Ptr + 2) = '_') 2101 then 2102 null; 2103 2104 -- Here we have what really looks like an identifier that 2105 -- starts with a digit, so give error msg. 2106 2107 else 2108 Error_Msg_S ("identifier may not start with digit"); 2109 Name_Len := 1; 2110 Underline_Found := False; 2111 Name_Buffer (1) := Source (Scan_Ptr); 2112 Accumulate_Checksum (Name_Buffer (1)); 2113 Scan_Ptr := Scan_Ptr + 1; 2114 goto Scan_Identifier; 2115 end if; 2116 end; 2117 2118 -- Here we have an OK integer literal 2119 2120 Nlit; 2121 2122 -- Check for proper delimiter, ignoring other format characters 2123 2124 Skip_Other_Format_Characters; 2125 2126 if Identifier_Char (Source (Scan_Ptr)) then 2127 Error_Msg_S 2128 ("delimiter required between literal and identifier"); 2129 end if; 2130 2131 Post_Scan; 2132 return; 2133 2134 -- Lower case letters 2135 2136 when 'a' .. 'z' => 2137 Name_Len := 1; 2138 Underline_Found := False; 2139 Name_Buffer (1) := Source (Scan_Ptr); 2140 Accumulate_Checksum (Name_Buffer (1)); 2141 Scan_Ptr := Scan_Ptr + 1; 2142 goto Scan_Identifier; 2143 2144 -- Upper case letters 2145 2146 when 'A' .. 'Z' => 2147 Name_Len := 1; 2148 Underline_Found := False; 2149 Name_Buffer (1) := 2150 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32); 2151 Accumulate_Checksum (Name_Buffer (1)); 2152 Scan_Ptr := Scan_Ptr + 1; 2153 goto Scan_Identifier; 2154 2155 -- Underline character 2156 2157 when '_' => 2158 if Special_Characters ('_') then 2159 Token_Ptr := Scan_Ptr; 2160 Scan_Ptr := Scan_Ptr + 1; 2161 Token := Tok_Special; 2162 Special_Character := '_'; 2163 return; 2164 end if; 2165 2166 Error_Msg_S ("identifier cannot start with underline"); 2167 Name_Len := 1; 2168 Name_Buffer (1) := '_'; 2169 Scan_Ptr := Scan_Ptr + 1; 2170 Underline_Found := False; 2171 goto Scan_Identifier; 2172 2173 -- Space (not possible, because we scanned past blanks) 2174 2175 when ' ' => 2176 raise Program_Error; 2177 2178 -- Characters in top half of ASCII 8-bit chart 2179 2180 when Upper_Half_Character => 2181 2182 -- Wide character case 2183 2184 if Upper_Half_Encoding then 2185 goto Scan_Wide_Character; 2186 2187 -- Otherwise we have OK Latin-1 character 2188 2189 else 2190 -- Upper half characters may possibly be identifier letters 2191 -- but can never be digits, so Identifier_Char can be used to 2192 -- test for a valid start of identifier character. 2193 2194 if Identifier_Char (Source (Scan_Ptr)) then 2195 Name_Len := 0; 2196 Underline_Found := False; 2197 goto Scan_Identifier; 2198 else 2199 Error_Illegal_Character; 2200 end if; 2201 end if; 2202 2203 when ESC => 2204 2205 -- ESC character, possible start of identifier if wide characters 2206 -- using ESC encoding are allowed in identifiers, which we can 2207 -- tell by looking at the Identifier_Char flag for ESC, which is 2208 -- only true if these conditions are met. In Ada 2005 mode, may 2209 -- also be valid UTF_32 space or line terminator character. 2210 2211 if Identifier_Char (ESC) then 2212 Name_Len := 0; 2213 goto Scan_Wide_Character; 2214 else 2215 Error_Illegal_Character; 2216 end if; 2217 2218 -- Invalid control characters 2219 2220 when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS | ASCII.SO | 2221 SI | DLE | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN | 2222 EM | FS | GS | RS | US | DEL 2223 => 2224 Error_Illegal_Character; 2225 2226 -- Invalid graphic characters 2227 2228 when '#' | '$' | '?' | '@' | '`' | '\' | '^' | '~' => 2229 2230 -- If Set_Special_Character has been called for this character, 2231 -- set Scans.Special_Character and return a Special token. 2232 2233 if Special_Characters (Source (Scan_Ptr)) then 2234 Token_Ptr := Scan_Ptr; 2235 Token := Tok_Special; 2236 Special_Character := Source (Scan_Ptr); 2237 Scan_Ptr := Scan_Ptr + 1; 2238 return; 2239 2240 -- Check for something looking like a preprocessor directive 2241 2242 elsif Source (Scan_Ptr) = '#' 2243 and then (Source (Scan_Ptr + 1 .. Scan_Ptr + 2) = "if" 2244 or else 2245 Source (Scan_Ptr + 1 .. Scan_Ptr + 5) = "elsif" 2246 or else 2247 Source (Scan_Ptr + 1 .. Scan_Ptr + 4) = "else" 2248 or else 2249 Source (Scan_Ptr + 1 .. Scan_Ptr + 3) = "end") 2250 then 2251 Error_Msg_S 2252 ("preprocessor directive ignored, preprocessor not active"); 2253 2254 -- Skip to end of line 2255 2256 loop 2257 if Source (Scan_Ptr) in Graphic_Character 2258 or else 2259 Source (Scan_Ptr) = HT 2260 then 2261 Scan_Ptr := Scan_Ptr + 1; 2262 2263 -- Done if line terminator or EOF 2264 2265 elsif Source (Scan_Ptr) in Line_Terminator 2266 or else 2267 Source (Scan_Ptr) = EOF 2268 then 2269 exit; 2270 2271 -- If we have a wide character, we have to scan it out, 2272 -- because it might be a legitimate line terminator 2273 2274 elsif Start_Of_Wide_Character then 2275 declare 2276 Wptr : constant Source_Ptr := Scan_Ptr; 2277 Code : Char_Code; 2278 Err : Boolean; 2279 2280 begin 2281 Scan_Wide (Source, Scan_Ptr, Code, Err); 2282 2283 -- If not well formed wide character, then just skip 2284 -- past it and ignore it. 2285 2286 if Err then 2287 Scan_Ptr := Wptr + 1; 2288 2289 -- If UTF_32 terminator, terminate comment scan 2290 2291 elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then 2292 Scan_Ptr := Wptr; 2293 exit; 2294 end if; 2295 end; 2296 2297 -- Else keep going (don't worry about bad comment chars 2298 -- in this context, we just want to find the end of line. 2299 2300 else 2301 Scan_Ptr := Scan_Ptr + 1; 2302 end if; 2303 end loop; 2304 2305 -- Otherwise, this is an illegal character 2306 2307 else 2308 Error_Illegal_Character; 2309 end if; 2310 2311 -- End switch on non-blank character 2312 2313 end case; 2314 2315 -- End loop past format effectors. The exit from this loop is by 2316 -- executing a return statement following completion of token scan 2317 -- (control never falls out of this loop to the code which follows) 2318 2319 end loop; 2320 2321 -- Wide_Character scanning routine. On entry we have encountered the 2322 -- initial character of a wide character sequence. 2323 2324 <<Scan_Wide_Character>> 2325 2326 declare 2327 Code : Char_Code; 2328 Cat : Category; 2329 Err : Boolean; 2330 2331 begin 2332 Wptr := Scan_Ptr; 2333 Scan_Wide (Source, Scan_Ptr, Code, Err); 2334 2335 -- If bad wide character, signal error and continue scan 2336 2337 if Err then 2338 Error_Illegal_Wide_Character; 2339 goto Scan_Next_Character; 2340 end if; 2341 2342 Cat := Get_Category (UTF_32 (Code)); 2343 2344 -- If OK letter, reset scan ptr and go scan identifier 2345 2346 if Is_UTF_32_Letter (Cat) then 2347 Scan_Ptr := Wptr; 2348 Name_Len := 0; 2349 Underline_Found := False; 2350 goto Scan_Identifier; 2351 2352 -- If OK wide space, ignore and keep scanning (we do not include 2353 -- any ignored spaces in checksum) 2354 2355 elsif Is_UTF_32_Space (Cat) then 2356 goto Scan_Next_Character; 2357 2358 -- If other format character, ignore and keep scanning (again we 2359 -- do not include in the checksum) (this is for AI-0079). 2360 2361 elsif Is_UTF_32_Other (Cat) then 2362 goto Scan_Next_Character; 2363 2364 -- If OK wide line terminator, terminate current line 2365 2366 elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then 2367 Scan_Ptr := Wptr; 2368 goto Scan_Line_Terminator; 2369 2370 -- Punctuation is an error (at start of identifier) 2371 2372 elsif Is_UTF_32_Punctuation (Cat) then 2373 Error_Msg ("identifier cannot start with punctuation", Wptr); 2374 Scan_Ptr := Wptr; 2375 Name_Len := 0; 2376 Underline_Found := False; 2377 goto Scan_Identifier; 2378 2379 -- Mark character is an error (at start of identifier) 2380 2381 elsif Is_UTF_32_Mark (Cat) then 2382 Error_Msg ("identifier cannot start with mark character", Wptr); 2383 Scan_Ptr := Wptr; 2384 Name_Len := 0; 2385 Underline_Found := False; 2386 goto Scan_Identifier; 2387 2388 -- Extended digit character is an error. Could be bad start of 2389 -- identifier or bad literal. Not worth doing too much to try to 2390 -- distinguish these cases, but we will do a little bit. 2391 2392 elsif Is_UTF_32_Digit (Cat) then 2393 Error_Msg 2394 ("identifier cannot start with digit character", Wptr); 2395 Scan_Ptr := Wptr; 2396 Name_Len := 0; 2397 Underline_Found := False; 2398 goto Scan_Identifier; 2399 2400 -- All other wide characters are illegal here 2401 2402 else 2403 Error_Illegal_Wide_Character; 2404 goto Scan_Next_Character; 2405 end if; 2406 end; 2407 2408 -- Routine to scan line terminator. On entry Scan_Ptr points to a 2409 -- character which is one of FF,LR,CR,VT, or one of the wide characters 2410 -- that is treated as a line terminator. 2411 2412 <<Scan_Line_Terminator>> 2413 2414 -- Check line too long 2415 2416 Check_End_Of_Line; 2417 2418 -- Set Token_Ptr, if End_Of_Line is a token, for the case when it is 2419 -- a physical line. 2420 2421 if End_Of_Line_Is_Token then 2422 Token_Ptr := Scan_Ptr; 2423 end if; 2424 2425 declare 2426 Physical : Boolean; 2427 2428 begin 2429 Skip_Line_Terminators (Scan_Ptr, Physical); 2430 2431 -- If we are at start of physical line, update scan pointers to 2432 -- reflect the start of the new line. 2433 2434 if Physical then 2435 Current_Line_Start := Scan_Ptr; 2436 Start_Column := Set_Start_Column; 2437 First_Non_Blank_Location := Scan_Ptr; 2438 2439 -- If End_Of_Line is a token, we return it as it is a 2440 -- physical line. 2441 2442 if End_Of_Line_Is_Token then 2443 Token := Tok_End_Of_Line; 2444 return; 2445 end if; 2446 end if; 2447 end; 2448 2449 goto Scan_Next_Character; 2450 2451 -- Identifier scanning routine. On entry, some initial characters of 2452 -- the identifier may have already been stored in Name_Buffer. If so, 2453 -- Name_Len has the number of characters stored, otherwise Name_Len is 2454 -- set to zero on entry. Underline_Found is also set False on entry. 2455 2456 <<Scan_Identifier>> 2457 2458 -- This loop scans as fast as possible past lower half letters and 2459 -- digits, which we expect to be the most common characters. 2460 2461 loop 2462 if Source (Scan_Ptr) in 'a' .. 'z' 2463 or else Source (Scan_Ptr) in '0' .. '9' 2464 then 2465 Name_Buffer (Name_Len + 1) := Source (Scan_Ptr); 2466 Accumulate_Checksum (Source (Scan_Ptr)); 2467 2468 elsif Source (Scan_Ptr) in 'A' .. 'Z' then 2469 Name_Buffer (Name_Len + 1) := 2470 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32); 2471 Accumulate_Checksum (Name_Buffer (Name_Len + 1)); 2472 2473 else 2474 exit; 2475 end if; 2476 2477 Underline_Found := False; 2478 Scan_Ptr := Scan_Ptr + 1; 2479 Name_Len := Name_Len + 1; 2480 end loop; 2481 2482 -- If we fall through, then we have encountered either an underline 2483 -- character, or an extended identifier character (i.e. one from the 2484 -- upper half), or a wide character, or an identifier terminator. The 2485 -- initial test speeds us up in the most common case where we have 2486 -- an identifier terminator. Note that ESC is an identifier character 2487 -- only if a wide character encoding method that uses ESC encoding 2488 -- is active, so if we find an ESC character we know that we have a 2489 -- wide character. 2490 2491 if Identifier_Char (Source (Scan_Ptr)) 2492 or else (Source (Scan_Ptr) in Upper_Half_Character 2493 and then Upper_Half_Encoding) 2494 then 2495 -- Case of underline 2496 2497 if Source (Scan_Ptr) = '_' then 2498 Accumulate_Checksum ('_'); 2499 2500 if Underline_Found then 2501 Error_No_Double_Underline; 2502 else 2503 Underline_Found := True; 2504 Name_Len := Name_Len + 1; 2505 Name_Buffer (Name_Len) := '_'; 2506 end if; 2507 2508 Scan_Ptr := Scan_Ptr + 1; 2509 goto Scan_Identifier; 2510 2511 -- Upper half character 2512 2513 elsif Source (Scan_Ptr) in Upper_Half_Character 2514 and then not Upper_Half_Encoding 2515 then 2516 Accumulate_Checksum (Source (Scan_Ptr)); 2517 Store_Encoded_Character 2518 (Get_Char_Code (Fold_Lower (Source (Scan_Ptr)))); 2519 Scan_Ptr := Scan_Ptr + 1; 2520 Underline_Found := False; 2521 goto Scan_Identifier; 2522 2523 -- Left bracket not followed by a quote terminates an identifier. 2524 -- This is an error, but we don't want to give a junk error msg 2525 -- about wide characters in this case. 2526 2527 elsif Source (Scan_Ptr) = '[' 2528 and then Source (Scan_Ptr + 1) /= '"' 2529 then 2530 null; 2531 2532 -- We know we have a wide character encoding here (the current 2533 -- character is either ESC, left bracket, or an upper half 2534 -- character depending on the encoding method). 2535 2536 else 2537 -- Scan out the wide character and insert the appropriate 2538 -- encoding into the name table entry for the identifier. 2539 2540 declare 2541 Code : Char_Code; 2542 Err : Boolean; 2543 Chr : Character; 2544 Cat : Category; 2545 2546 begin 2547 Wptr := Scan_Ptr; 2548 Scan_Wide (Source, Scan_Ptr, Code, Err); 2549 2550 -- If error, signal error 2551 2552 if Err then 2553 Error_Illegal_Wide_Character; 2554 2555 -- If the character scanned is a normal identifier 2556 -- character, then we treat it that way. 2557 2558 elsif In_Character_Range (Code) 2559 and then Identifier_Char (Get_Character (Code)) 2560 then 2561 Chr := Get_Character (Code); 2562 Accumulate_Checksum (Chr); 2563 Store_Encoded_Character 2564 (Get_Char_Code (Fold_Lower (Chr))); 2565 Underline_Found := False; 2566 2567 -- Here if not a normal identifier character 2568 2569 else 2570 Cat := Get_Category (UTF_32 (Code)); 2571 2572 -- Wide character in Unicode category "Other, Format" 2573 -- is not accepted in an identifier. This is because it 2574 -- it is considered a security risk (AI-0091). 2575 2576 -- However, it is OK for such a character to appear at 2577 -- the end of an identifier. 2578 2579 if Is_UTF_32_Other (Cat) then 2580 if not Identifier_Char (Source (Scan_Ptr)) then 2581 goto Scan_Identifier_Complete; 2582 else 2583 Error_Msg 2584 ("identifier cannot contain other_format " 2585 & "character", Wptr); 2586 goto Scan_Identifier; 2587 end if; 2588 2589 -- Wide character in category Separator,Space terminates 2590 2591 elsif Is_UTF_32_Space (Cat) then 2592 goto Scan_Identifier_Complete; 2593 end if; 2594 2595 -- Here if wide character is part of the identifier 2596 2597 -- Make sure we are allowing wide characters in 2598 -- identifiers. Note that we allow wide character 2599 -- notation for an OK identifier character. This in 2600 -- particular allows bracket or other notation to be 2601 -- used for upper half letters. 2602 2603 -- Wide characters are always allowed in Ada 2005 2604 2605 if Identifier_Character_Set /= 'w' 2606 and then Ada_Version < Ada_2005 2607 then 2608 Error_Msg 2609 ("wide character not allowed in identifier", Wptr); 2610 end if; 2611 2612 -- If OK letter, store it folding to upper case. Note 2613 -- that we include the folded letter in the checksum. 2614 2615 if Is_UTF_32_Letter (Cat) then 2616 Code := 2617 Char_Code (UTF_32_To_Upper_Case (UTF_32 (Code))); 2618 Accumulate_Checksum (Code); 2619 Store_Encoded_Character (Code); 2620 Underline_Found := False; 2621 2622 -- If OK extended digit or mark, then store it 2623 2624 elsif Is_UTF_32_Digit (Cat) 2625 or else Is_UTF_32_Mark (Cat) 2626 then 2627 Accumulate_Checksum (Code); 2628 Store_Encoded_Character (Code); 2629 Underline_Found := False; 2630 2631 -- Wide punctuation is also stored, but counts as an 2632 -- underline character for error checking purposes. 2633 2634 elsif Is_UTF_32_Punctuation (Cat) then 2635 Accumulate_Checksum (Code); 2636 2637 if Underline_Found then 2638 declare 2639 Cend : constant Source_Ptr := Scan_Ptr; 2640 begin 2641 Scan_Ptr := Wptr; 2642 Error_No_Double_Underline; 2643 Scan_Ptr := Cend; 2644 end; 2645 2646 else 2647 Store_Encoded_Character (Code); 2648 Underline_Found := True; 2649 end if; 2650 2651 -- Any other wide character is not acceptable 2652 2653 else 2654 Error_Msg 2655 ("invalid wide character in identifier", Wptr); 2656 end if; 2657 end if; 2658 2659 goto Scan_Identifier; 2660 end; 2661 end if; 2662 end if; 2663 2664 -- Scan of identifier is complete. The identifier is stored in 2665 -- Name_Buffer, and Scan_Ptr points past the last character. 2666 2667 <<Scan_Identifier_Complete>> 2668 Token_Name := Name_Find; 2669 2670 -- Check for identifier ending with underline or punctuation char 2671 2672 if Underline_Found then 2673 Underline_Found := False; 2674 2675 if Source (Scan_Ptr - 1) = '_' then 2676 Error_Msg 2677 ("identifier cannot end with underline", Scan_Ptr - 1); 2678 else 2679 Error_Msg 2680 ("identifier cannot end with punctuation character", Wptr); 2681 end if; 2682 end if; 2683 2684 -- We will assume it is an identifier, not a keyword, so that the 2685 -- checksum is independent of the Ada version. 2686 2687 Token := Tok_Identifier; 2688 2689 -- Here is where we check if it was a keyword 2690 2691 if Is_Keyword_Name (Token_Name) then 2692 if Opt.Checksum_GNAT_6_3 then 2693 Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name)); 2694 2695 if Checksum_Accumulate_Token_Checksum then 2696 if Checksum_GNAT_5_03 then 2697 Accumulate_Token_Checksum_GNAT_5_03; 2698 else 2699 Accumulate_Token_Checksum_GNAT_6_3; 2700 end if; 2701 end if; 2702 2703 else 2704 Accumulate_Token_Checksum; 2705 Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name)); 2706 end if; 2707 2708 -- Keyword style checks 2709 2710 if Style_Check then 2711 2712 -- Deal with possible style check for non-lower case keyword, 2713 -- but we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords 2714 -- for this purpose if they appear as attribute designators. 2715 -- Actually we only check the first character for speed. 2716 2717 -- Ada 2005 (AI-284): Do not apply the style check in case of 2718 -- "pragma Interface" 2719 2720 -- Ada 2005 (AI-340): Do not apply the style check in case of 2721 -- MOD attribute. 2722 2723 if Source (Token_Ptr) <= 'Z' 2724 and then (Prev_Token /= Tok_Apostrophe 2725 or else 2726 (Token /= Tok_Access and then 2727 Token /= Tok_Delta and then 2728 Token /= Tok_Digits and then 2729 Token /= Tok_Mod and then 2730 Token /= Tok_Range)) 2731 and then (Token /= Tok_Interface 2732 or else 2733 (Token = Tok_Interface 2734 and then Prev_Token /= Tok_Pragma)) 2735 then 2736 Style.Non_Lower_Case_Keyword; 2737 end if; 2738 2739 -- Check THEN/ELSE style rules. These do not apply to AND THEN 2740 -- or OR ELSE, and do not apply in if expressions. 2741 2742 if (Token = Tok_Then and then Prev_Token /= Tok_And) 2743 or else 2744 (Token = Tok_Else and then Prev_Token /= Tok_Or) 2745 then 2746 if Inside_If_Expression = 0 then 2747 Style.Check_Separate_Stmt_Lines; 2748 end if; 2749 end if; 2750 end if; 2751 2752 -- We must reset Token_Name since this is not an identifier and 2753 -- if we leave Token_Name set, the parser gets confused because 2754 -- it thinks it is dealing with an identifier instead of the 2755 -- corresponding keyword. 2756 2757 Token_Name := No_Name; 2758 return; 2759 2760 -- It is an identifier after all 2761 2762 else 2763 if Checksum_Accumulate_Token_Checksum then 2764 Accumulate_Token_Checksum; 2765 end if; 2766 2767 Post_Scan; 2768 return; 2769 end if; 2770 end Scan; 2771 2772 -------------------------- 2773 -- Set_Comment_As_Token -- 2774 -------------------------- 2775 2776 procedure Set_Comment_As_Token (Value : Boolean) is 2777 begin 2778 Comment_Is_Token := Value; 2779 end Set_Comment_As_Token; 2780 2781 ------------------------------ 2782 -- Set_End_Of_Line_As_Token -- 2783 ------------------------------ 2784 2785 procedure Set_End_Of_Line_As_Token (Value : Boolean) is 2786 begin 2787 End_Of_Line_Is_Token := Value; 2788 end Set_End_Of_Line_As_Token; 2789 2790 --------------------------- 2791 -- Set_Special_Character -- 2792 --------------------------- 2793 2794 procedure Set_Special_Character (C : Character) is 2795 begin 2796 case C is 2797 when '#' | '$' | '_' | '?' | '@' | '`' | '\' | '^' | '~' => 2798 Special_Characters (C) := True; 2799 2800 when others => 2801 null; 2802 end case; 2803 end Set_Special_Character; 2804 2805 ---------------------- 2806 -- Set_Start_Column -- 2807 ---------------------- 2808 2809 -- Note: it seems at first glance a little expensive to compute this value 2810 -- for every source line (since it is certainly not used for all source 2811 -- lines). On the other hand, it doesn't take much more work to skip past 2812 -- the initial white space on the line counting the columns than it would 2813 -- to scan past the white space using the standard scanning circuits. 2814 2815 function Set_Start_Column return Column_Number is 2816 Start_Column : Column_Number := 0; 2817 2818 begin 2819 -- Outer loop scans past horizontal tab characters 2820 2821 Tabs_Loop : loop 2822 2823 -- Inner loop scans past blanks as fast as possible, bumping Scan_Ptr 2824 -- past the blanks and adjusting Start_Column to account for them. 2825 2826 Blanks_Loop : loop 2827 if Source (Scan_Ptr) = ' ' then 2828 if Source (Scan_Ptr + 1) = ' ' then 2829 if Source (Scan_Ptr + 2) = ' ' then 2830 if Source (Scan_Ptr + 3) = ' ' then 2831 if Source (Scan_Ptr + 4) = ' ' then 2832 if Source (Scan_Ptr + 5) = ' ' then 2833 if Source (Scan_Ptr + 6) = ' ' then 2834 Scan_Ptr := Scan_Ptr + 7; 2835 Start_Column := Start_Column + 7; 2836 else 2837 Scan_Ptr := Scan_Ptr + 6; 2838 Start_Column := Start_Column + 6; 2839 exit Blanks_Loop; 2840 end if; 2841 else 2842 Scan_Ptr := Scan_Ptr + 5; 2843 Start_Column := Start_Column + 5; 2844 exit Blanks_Loop; 2845 end if; 2846 else 2847 Scan_Ptr := Scan_Ptr + 4; 2848 Start_Column := Start_Column + 4; 2849 exit Blanks_Loop; 2850 end if; 2851 else 2852 Scan_Ptr := Scan_Ptr + 3; 2853 Start_Column := Start_Column + 3; 2854 exit Blanks_Loop; 2855 end if; 2856 else 2857 Scan_Ptr := Scan_Ptr + 2; 2858 Start_Column := Start_Column + 2; 2859 exit Blanks_Loop; 2860 end if; 2861 else 2862 Scan_Ptr := Scan_Ptr + 1; 2863 Start_Column := Start_Column + 1; 2864 exit Blanks_Loop; 2865 end if; 2866 else 2867 exit Blanks_Loop; 2868 end if; 2869 end loop Blanks_Loop; 2870 2871 -- Outer loop keeps going only if a horizontal tab follows 2872 2873 if Source (Scan_Ptr) = HT then 2874 if Style_Check then 2875 Style.Check_HT; 2876 end if; 2877 2878 Scan_Ptr := Scan_Ptr + 1; 2879 Start_Column := (Start_Column / 8) * 8 + 8; 2880 else 2881 exit Tabs_Loop; 2882 end if; 2883 end loop Tabs_Loop; 2884 2885 return Start_Column; 2886 2887 -- A constraint error can happen only if we have a compiler with checks on 2888 -- and a line with a ludicrous number of tabs or spaces at the start. In 2889 -- such a case, we really don't care if Start_Column is right or not. 2890 2891 exception 2892 when Constraint_Error => 2893 return Start_Column; 2894 end Set_Start_Column; 2895 2896end Scng; 2897