1------------------------------------------------------------------------------ 2-- -- 3-- GNAT COMPILER COMPONENTS -- 4-- -- 5-- S C N G -- 6-- -- 7-- B o d y -- 8-- -- 9-- Copyright (C) 1992-2012, Free Software Foundation, Inc. -- 10-- -- 11-- GNAT is free software; you can redistribute it and/or modify it under -- 12-- terms of the GNU General Public License as published by the Free Soft- -- 13-- ware Foundation; either version 3, or (at your option) any later ver- -- 14-- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- 15-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- 16-- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- 17-- for more details. You should have received a copy of the GNU General -- 18-- Public License distributed with GNAT; see file COPYING3. If not, go to -- 19-- http://www.gnu.org/licenses for a complete copy of the license. -- 20-- -- 21-- GNAT was originally developed by the GNAT team at New York University. -- 22-- Extensive contributions were provided by Ada Core Technologies Inc. -- 23-- -- 24------------------------------------------------------------------------------ 25 26with Atree; use Atree; 27with Csets; use Csets; 28with Hostparm; use Hostparm; 29with Namet; use Namet; 30with Opt; use Opt; 31with Restrict; use Restrict; 32with Rident; use Rident; 33with Scans; use Scans; 34with Sinput; use Sinput; 35with Snames; use Snames; 36with Stringt; use Stringt; 37with Stylesw; use Stylesw; 38with Uintp; use Uintp; 39with Urealp; use Urealp; 40with Widechar; use Widechar; 41 42pragma Warnings (Off); 43-- This package is used also by gnatcoll 44with System.CRC32; 45with System.UTF_32; use System.UTF_32; 46with System.WCh_Con; use System.WCh_Con; 47pragma Warnings (On); 48 49package body Scng is 50 51 use ASCII; 52 -- Make control characters visible 53 54 Special_Characters : array (Character) of Boolean := (others => False); 55 -- For characters that are Special token, the value is True 56 57 Comment_Is_Token : Boolean := False; 58 -- True if comments are tokens 59 60 End_Of_Line_Is_Token : Boolean := False; 61 -- True if End_Of_Line is a token 62 63 ----------------------- 64 -- Local Subprograms -- 65 ----------------------- 66 67 procedure Accumulate_Token_Checksum; 68 pragma Inline (Accumulate_Token_Checksum); 69 -- Called after each numeric literal and identifier/keyword. For keywords, 70 -- the token used is Tok_Identifier. This allows to detect additional 71 -- spaces added in sources when using the builder switch -m. 72 73 procedure Accumulate_Token_Checksum_GNAT_6_3; 74 -- Used in place of Accumulate_Token_Checksum for GNAT versions 5.04 to 75 -- 6.3, when Tok_Some was not included in Token_Type and the actual 76 -- Token_Type was used for keywords. This procedure is never used in the 77 -- compiler or gnatmake, only in gprbuild. 78 79 procedure Accumulate_Token_Checksum_GNAT_5_03; 80 -- Used in place of Accumulate_Token_Checksum for GNAT version 5.03, when 81 -- Tok_Interface, Tok_Some, Tok_Synchronized and Tok_Overriding were not 82 -- included in Token_Type and the actual Token_Type was used for keywords. 83 -- This procedure is never used in the compiler or gnatmake, only in 84 -- gprbuild. 85 86 procedure Accumulate_Checksum (C : Character); 87 pragma Inline (Accumulate_Checksum); 88 -- This routine accumulates the checksum given character C. During the 89 -- scanning of a source file, this routine is called with every character 90 -- in the source, excluding blanks, and all control characters (except 91 -- that ESC is included in the checksum). Upper case letters not in string 92 -- literals are folded by the caller. See Sinput spec for the documentation 93 -- of the checksum algorithm. Note: checksum values are only used if we 94 -- generate code, so it is not necessary to worry about making the right 95 -- sequence of calls in any error situation. 96 97 procedure Accumulate_Checksum (C : Char_Code); 98 pragma Inline (Accumulate_Checksum); 99 -- This version is identical, except that the argument, C, is a character 100 -- code value instead of a character. This is used when wide characters 101 -- are scanned. We use the character code rather than the ASCII characters 102 -- so that the checksum is independent of wide character encoding method. 103 104 procedure Initialize_Checksum; 105 pragma Inline (Initialize_Checksum); 106 -- Initialize checksum value 107 108 ------------------------- 109 -- Accumulate_Checksum -- 110 ------------------------- 111 112 procedure Accumulate_Checksum (C : Character) is 113 begin 114 System.CRC32.Update (System.CRC32.CRC32 (Checksum), C); 115 end Accumulate_Checksum; 116 117 procedure Accumulate_Checksum (C : Char_Code) is 118 begin 119 if C > 16#FFFF# then 120 Accumulate_Checksum (Character'Val (C / 2 ** 24)); 121 Accumulate_Checksum (Character'Val ((C / 2 ** 16) mod 256)); 122 Accumulate_Checksum (Character'Val ((C / 256) mod 256)); 123 else 124 Accumulate_Checksum (Character'Val (C / 256)); 125 end if; 126 127 Accumulate_Checksum (Character'Val (C mod 256)); 128 end Accumulate_Checksum; 129 130 ------------------------------- 131 -- Accumulate_Token_Checksum -- 132 ------------------------------- 133 134 procedure Accumulate_Token_Checksum is 135 begin 136 System.CRC32.Update 137 (System.CRC32.CRC32 (Checksum), 138 Character'Val (Token_Type'Pos (Token))); 139 end Accumulate_Token_Checksum; 140 141 ---------------------------------------- 142 -- Accumulate_Token_Checksum_GNAT_6_3 -- 143 ---------------------------------------- 144 145 procedure Accumulate_Token_Checksum_GNAT_6_3 is 146 begin 147 -- Individual values of Token_Type are used, instead of subranges, so 148 -- that additions or suppressions of enumerated values in type 149 -- Token_Type are detected by the compiler. 150 151 case Token is 152 when Tok_Integer_Literal | Tok_Real_Literal | Tok_String_Literal | 153 Tok_Char_Literal | Tok_Operator_Symbol | Tok_Identifier | 154 Tok_Double_Asterisk | Tok_Ampersand | Tok_Minus | Tok_Plus | 155 Tok_Asterisk | Tok_Mod | Tok_Rem | Tok_Slash | Tok_New | 156 Tok_Abs | Tok_Others | Tok_Null | Tok_Dot | Tok_Apostrophe | 157 Tok_Left_Paren | Tok_Delta | Tok_Digits | Tok_Range | 158 Tok_Right_Paren | Tok_Comma | Tok_And | Tok_Or | Tok_Xor | 159 Tok_Less | Tok_Equal | Tok_Greater | Tok_Not_Equal | 160 Tok_Greater_Equal | Tok_Less_Equal | Tok_In | Tok_Not | 161 Tok_Box | Tok_Colon_Equal | Tok_Colon | Tok_Greater_Greater | 162 Tok_Abstract | Tok_Access | Tok_Aliased | Tok_All | Tok_Array | 163 Tok_At | Tok_Body | Tok_Constant | Tok_Do | Tok_Is | 164 Tok_Interface | Tok_Limited | Tok_Of | Tok_Out | Tok_Record | 165 Tok_Renames | Tok_Reverse => 166 167 System.CRC32.Update 168 (System.CRC32.CRC32 (Checksum), 169 Character'Val (Token_Type'Pos (Token))); 170 171 when Tok_Some => 172 173 System.CRC32.Update 174 (System.CRC32.CRC32 (Checksum), 175 Character'Val (Token_Type'Pos (Tok_Identifier))); 176 177 when Tok_Tagged | Tok_Then | Tok_Less_Less | Tok_Abort | Tok_Accept | 178 Tok_Case | Tok_Delay | Tok_Else | Tok_Elsif | Tok_End | 179 Tok_Exception | Tok_Exit | Tok_Goto | Tok_If | Tok_Pragma | 180 Tok_Raise | Tok_Requeue | Tok_Return | Tok_Select | 181 Tok_Terminate | Tok_Until | Tok_When | Tok_Begin | Tok_Declare | 182 Tok_For | Tok_Loop | Tok_While | Tok_Entry | Tok_Protected | 183 Tok_Task | Tok_Type | Tok_Subtype | Tok_Overriding | 184 Tok_Synchronized | Tok_Use | Tok_Function | Tok_Generic | 185 Tok_Package | Tok_Procedure | Tok_Private | Tok_With | 186 Tok_Separate | Tok_EOF | Tok_Semicolon | Tok_Arrow | 187 Tok_Vertical_Bar | Tok_Dot_Dot | Tok_Project | Tok_Extends | 188 Tok_External | Tok_External_As_List | Tok_Comment | 189 Tok_End_Of_Line | Tok_Special | Tok_SPARK_Hide | No_Token => 190 191 System.CRC32.Update 192 (System.CRC32.CRC32 (Checksum), 193 Character'Val (Token_Type'Pos (Token_Type'Pred (Token)))); 194 end case; 195 end Accumulate_Token_Checksum_GNAT_6_3; 196 197 ----------------------------------------- 198 -- Accumulate_Token_Checksum_GNAT_5_03 -- 199 ----------------------------------------- 200 201 procedure Accumulate_Token_Checksum_GNAT_5_03 is 202 begin 203 -- Individual values of Token_Type are used, instead of subranges, so 204 -- that additions or suppressions of enumerated values in type 205 -- Token_Type are detected by the compiler. 206 207 case Token is 208 when Tok_Integer_Literal | Tok_Real_Literal | Tok_String_Literal | 209 Tok_Char_Literal | Tok_Operator_Symbol | Tok_Identifier | 210 Tok_Double_Asterisk | Tok_Ampersand | Tok_Minus | Tok_Plus | 211 Tok_Asterisk | Tok_Mod | Tok_Rem | Tok_Slash | Tok_New | 212 Tok_Abs | Tok_Others | Tok_Null | Tok_Dot | Tok_Apostrophe | 213 Tok_Left_Paren | Tok_Delta | Tok_Digits | Tok_Range | 214 Tok_Right_Paren | Tok_Comma | Tok_And | Tok_Or | Tok_Xor | 215 Tok_Less | Tok_Equal | Tok_Greater | Tok_Not_Equal | 216 Tok_Greater_Equal | Tok_Less_Equal | Tok_In | Tok_Not | 217 Tok_Box | Tok_Colon_Equal | Tok_Colon | Tok_Greater_Greater | 218 Tok_Abstract | Tok_Access | Tok_Aliased | Tok_All | Tok_Array | 219 Tok_At | Tok_Body | Tok_Constant | Tok_Do | Tok_Is => 220 221 System.CRC32.Update 222 (System.CRC32.CRC32 (Checksum), 223 Character'Val (Token_Type'Pos (Token))); 224 225 when Tok_Interface | Tok_Some | Tok_Overriding | Tok_Synchronized => 226 System.CRC32.Update 227 (System.CRC32.CRC32 (Checksum), 228 Character'Val (Token_Type'Pos (Tok_Identifier))); 229 230 when Tok_Limited | Tok_Of | Tok_Out | Tok_Record | 231 Tok_Renames | Tok_Reverse => 232 233 System.CRC32.Update 234 (System.CRC32.CRC32 (Checksum), 235 Character'Val (Token_Type'Pos (Token) - 1)); 236 237 when Tok_Tagged | Tok_Then | Tok_Less_Less | Tok_Abort | Tok_Accept | 238 Tok_Case | Tok_Delay | Tok_Else | Tok_Elsif | Tok_End | 239 Tok_Exception | Tok_Exit | Tok_Goto | Tok_If | Tok_Pragma | 240 Tok_Raise | Tok_Requeue | Tok_Return | Tok_Select | 241 Tok_Terminate | Tok_Until | Tok_When | Tok_Begin | Tok_Declare | 242 Tok_For | Tok_Loop | Tok_While | Tok_Entry | Tok_Protected | 243 Tok_Task | Tok_Type | Tok_Subtype => 244 245 System.CRC32.Update 246 (System.CRC32.CRC32 (Checksum), 247 Character'Val (Token_Type'Pos (Token) - 2)); 248 249 when Tok_Use | Tok_Function | Tok_Generic | 250 Tok_Package | Tok_Procedure | Tok_Private | Tok_With | 251 Tok_Separate | Tok_EOF | Tok_Semicolon | Tok_Arrow | 252 Tok_Vertical_Bar | Tok_Dot_Dot | Tok_Project | Tok_Extends | 253 Tok_External | Tok_External_As_List | Tok_Comment | 254 Tok_End_Of_Line | Tok_Special | Tok_SPARK_Hide | No_Token => 255 256 System.CRC32.Update 257 (System.CRC32.CRC32 (Checksum), 258 Character'Val (Token_Type'Pos (Token) - 4)); 259 end case; 260 end Accumulate_Token_Checksum_GNAT_5_03; 261 262 ---------------------------- 263 -- Determine_Token_Casing -- 264 ---------------------------- 265 266 function Determine_Token_Casing return Casing_Type is 267 begin 268 return Determine_Casing (Source (Token_Ptr .. Scan_Ptr - 1)); 269 end Determine_Token_Casing; 270 271 ------------------------- 272 -- Initialize_Checksum -- 273 ------------------------- 274 275 procedure Initialize_Checksum is 276 begin 277 System.CRC32.Initialize (System.CRC32.CRC32 (Checksum)); 278 end Initialize_Checksum; 279 280 ------------------------ 281 -- Initialize_Scanner -- 282 ------------------------ 283 284 procedure Initialize_Scanner (Index : Source_File_Index) is 285 begin 286 -- Establish reserved words 287 288 Scans.Initialize_Ada_Keywords; 289 290 -- Initialize scan control variables 291 292 Current_Source_File := Index; 293 Source := Source_Text (Current_Source_File); 294 Scan_Ptr := Source_First (Current_Source_File); 295 Token := No_Token; 296 Token_Ptr := Scan_Ptr; 297 Current_Line_Start := Scan_Ptr; 298 Token_Node := Empty; 299 Token_Name := No_Name; 300 Start_Column := Set_Start_Column; 301 First_Non_Blank_Location := Scan_Ptr; 302 303 Initialize_Checksum; 304 Wide_Char_Byte_Count := 0; 305 306 -- Do not call Scan, otherwise the License stuff does not work in Scn 307 308 end Initialize_Scanner; 309 310 ------------------------------ 311 -- Reset_Special_Characters -- 312 ------------------------------ 313 314 procedure Reset_Special_Characters is 315 begin 316 Special_Characters := (others => False); 317 end Reset_Special_Characters; 318 319 ---------- 320 -- Scan -- 321 ---------- 322 323 procedure Scan is 324 325 Start_Of_Comment : Source_Ptr; 326 -- Record start of comment position 327 328 Underline_Found : Boolean; 329 -- During scanning of an identifier, set to True if last character 330 -- scanned was an underline or other punctuation character. This 331 -- is used to flag the error of two underlines/punctuations in a 332 -- row or ending an identifier with a underline/punctuation. Here 333 -- punctuation means any UTF_32 character in the Unicode category 334 -- Punctuation,Connector. 335 336 Wptr : Source_Ptr; 337 -- Used to remember start of last wide character scanned 338 339 procedure Check_End_Of_Line; 340 -- Called when end of line encountered. Checks that line is not too 341 -- long, and that other style checks for the end of line are met. 342 343 function Double_Char_Token (C : Character) return Boolean; 344 -- This function is used for double character tokens like := or <>. It 345 -- checks if the character following Source (Scan_Ptr) is C, and if so 346 -- bumps Scan_Ptr past the pair of characters and returns True. A space 347 -- between the two characters is also recognized with an appropriate 348 -- error message being issued. If C is not present, False is returned. 349 -- Note that Double_Char_Token can only be used for tokens defined in 350 -- the Ada syntax (it's use for error cases like && is not appropriate 351 -- since we do not want a junk message for a case like &-space-&). 352 353 procedure Error_Illegal_Character; 354 -- Give illegal character error, Scan_Ptr points to character. On 355 -- return, Scan_Ptr is bumped past the illegal character. 356 357 procedure Error_Illegal_Wide_Character; 358 -- Give illegal wide character message. On return, Scan_Ptr is bumped 359 -- past the illegal character, which may still leave us pointing to 360 -- junk, not much we can do if the escape sequence is messed up! 361 362 procedure Error_Long_Line; 363 -- Signal error of excessively long line 364 365 procedure Error_No_Double_Underline; 366 -- Signal error of two underline or punctuation characters in a row. 367 -- Called with Scan_Ptr pointing to second underline/punctuation char. 368 369 procedure Nlit; 370 -- This is the procedure for scanning out numeric literals. On entry, 371 -- Scan_Ptr points to the digit that starts the numeric literal (the 372 -- checksum for this character has not been accumulated yet). On return 373 -- Scan_Ptr points past the last character of the numeric literal, Token 374 -- and Token_Node are set appropriately, and the checksum is updated. 375 376 procedure Slit; 377 -- This is the procedure for scanning out string literals. On entry, 378 -- Scan_Ptr points to the opening string quote (the checksum for this 379 -- character has not been accumulated yet). On return Scan_Ptr points 380 -- past the closing quote of the string literal, Token and Token_Node 381 -- are set appropriately, and the checksum is updated. 382 383 procedure Skip_Other_Format_Characters; 384 -- Skips past any "other format" category characters at the current 385 -- cursor location (does not skip past spaces or any other characters). 386 387 function Start_Of_Wide_Character return Boolean; 388 -- Returns True if the scan pointer is pointing to the start of a wide 389 -- character sequence, does not modify the scan pointer in any case. 390 391 ----------------------- 392 -- Check_End_Of_Line -- 393 ----------------------- 394 395 procedure Check_End_Of_Line is 396 Len : constant Int := 397 Int (Scan_Ptr) - 398 Int (Current_Line_Start) - 399 Wide_Char_Byte_Count; 400 401 begin 402 if Style_Check then 403 Style.Check_Line_Terminator (Len); 404 end if; 405 406 -- Deal with checking maximum line length 407 408 if Style_Check and Style_Check_Max_Line_Length then 409 Style.Check_Line_Max_Length (Len); 410 411 -- If style checking is inactive, check maximum line length against 412 -- standard value. 413 414 elsif Len > Max_Line_Length then 415 Error_Long_Line; 416 end if; 417 418 -- Now one more checking circuit. Normally we are only enforcing a 419 -- limit of physical characters, with tabs counting as one character. 420 -- But if after tab expansion we would have a total line length that 421 -- exceeded 32766, that would really cause trouble, because column 422 -- positions would exceed the maximum we allow for a column count. 423 -- Note: the limit is 32766 rather than 32767, since we use a value 424 -- of 32767 for special purposes (see Sinput). Now we really do not 425 -- want to go messing with tabs in the normal case, so what we do is 426 -- to check for a line that has more than 4096 physical characters. 427 -- Any shorter line could not be a problem, even if it was all tabs. 428 429 if Len >= 4096 then 430 declare 431 Col : Natural; 432 Ptr : Source_Ptr; 433 434 begin 435 Col := 1; 436 Ptr := Current_Line_Start; 437 loop 438 exit when Ptr = Scan_Ptr; 439 440 if Source (Ptr) = ASCII.HT then 441 Col := (Col - 1 + 8) / 8 * 8 + 1; 442 else 443 Col := Col + 1; 444 end if; 445 446 if Col > 32766 then 447 Error_Msg 448 ("this line is longer than 32766 characters", 449 Current_Line_Start); 450 raise Unrecoverable_Error; 451 end if; 452 453 Ptr := Ptr + 1; 454 end loop; 455 end; 456 end if; 457 458 -- Reset wide character byte count for next line 459 460 Wide_Char_Byte_Count := 0; 461 end Check_End_Of_Line; 462 463 ----------------------- 464 -- Double_Char_Token -- 465 ----------------------- 466 467 function Double_Char_Token (C : Character) return Boolean is 468 begin 469 if Source (Scan_Ptr + 1) = C then 470 Accumulate_Checksum (C); 471 Scan_Ptr := Scan_Ptr + 2; 472 return True; 473 474 elsif Source (Scan_Ptr + 1) = ' ' 475 and then Source (Scan_Ptr + 2) = C 476 then 477 Scan_Ptr := Scan_Ptr + 1; 478 Error_Msg_S -- CODEFIX 479 ("no space allowed here"); 480 Scan_Ptr := Scan_Ptr + 2; 481 return True; 482 483 else 484 return False; 485 end if; 486 end Double_Char_Token; 487 488 ----------------------------- 489 -- Error_Illegal_Character -- 490 ----------------------------- 491 492 procedure Error_Illegal_Character is 493 begin 494 Error_Msg_S ("illegal character"); 495 Scan_Ptr := Scan_Ptr + 1; 496 end Error_Illegal_Character; 497 498 ---------------------------------- 499 -- Error_Illegal_Wide_Character -- 500 ---------------------------------- 501 502 procedure Error_Illegal_Wide_Character is 503 begin 504 Scan_Ptr := Scan_Ptr + 1; 505 Error_Msg ("illegal wide character", Wptr); 506 end Error_Illegal_Wide_Character; 507 508 --------------------- 509 -- Error_Long_Line -- 510 --------------------- 511 512 procedure Error_Long_Line is 513 begin 514 Error_Msg 515 ("this line is too long", 516 Current_Line_Start + Source_Ptr (Max_Line_Length)); 517 end Error_Long_Line; 518 519 ------------------------------- 520 -- Error_No_Double_Underline -- 521 ------------------------------- 522 523 procedure Error_No_Double_Underline is 524 begin 525 Underline_Found := False; 526 527 -- There are four cases, and we special case the messages 528 529 if Source (Scan_Ptr) = '_' then 530 if Source (Scan_Ptr - 1) = '_' then 531 Error_Msg_S -- CODEFIX 532 ("two consecutive underlines not permitted"); 533 else 534 Error_Msg_S ("underline cannot follow punctuation character"); 535 end if; 536 537 else 538 if Source (Scan_Ptr - 1) = '_' then 539 Error_Msg_S ("punctuation character cannot follow underline"); 540 else 541 Error_Msg_S 542 ("two consecutive punctuation characters not permitted"); 543 end if; 544 end if; 545 end Error_No_Double_Underline; 546 547 ---------- 548 -- Nlit -- 549 ---------- 550 551 procedure Nlit is 552 553 C : Character; 554 -- Current source program character 555 556 Base_Char : Character; 557 -- Either # or : (character at start of based number) 558 559 Base : Int; 560 -- Value of base 561 562 UI_Base : Uint; 563 -- Value of base in Uint format 564 565 UI_Int_Value : Uint; 566 -- Value of integer scanned by Scan_Integer in Uint format 567 568 UI_Num_Value : Uint; 569 -- Value of integer in numeric value being scanned 570 571 Scale : Int; 572 -- Scale value for real literal 573 574 UI_Scale : Uint; 575 -- Scale in Uint format 576 577 Exponent_Is_Negative : Boolean; 578 -- Set true for negative exponent 579 580 Extended_Digit_Value : Int; 581 -- Extended digit value 582 583 Point_Scanned : Boolean; 584 -- Flag for decimal point scanned in numeric literal 585 586 ----------------------- 587 -- Local Subprograms -- 588 ----------------------- 589 590 procedure Error_Digit_Expected; 591 -- Signal error of bad digit, Scan_Ptr points to the location at 592 -- which the digit was expected on input, and is unchanged on return. 593 594 procedure Scan_Integer; 595 -- Procedure to scan integer literal. On entry, Scan_Ptr points to a 596 -- digit, on exit Scan_Ptr points past the last character of the 597 -- integer. 598 -- 599 -- For each digit encountered, UI_Int_Value is multiplied by 10, and 600 -- the value of the digit added to the result. In addition, the 601 -- value in Scale is decremented by one for each actual digit 602 -- scanned. 603 604 -------------------------- 605 -- Error_Digit_Expected -- 606 -------------------------- 607 608 procedure Error_Digit_Expected is 609 begin 610 Error_Msg_S ("digit expected"); 611 end Error_Digit_Expected; 612 613 ------------------ 614 -- Scan_Integer -- 615 ------------------ 616 617 procedure Scan_Integer is 618 C : Character; 619 -- Next character scanned 620 621 begin 622 C := Source (Scan_Ptr); 623 624 -- Loop through digits (allowing underlines) 625 626 loop 627 Accumulate_Checksum (C); 628 UI_Int_Value := 629 UI_Int_Value * 10 + (Character'Pos (C) - Character'Pos ('0')); 630 Scan_Ptr := Scan_Ptr + 1; 631 Scale := Scale - 1; 632 C := Source (Scan_Ptr); 633 634 -- Case of underline encountered 635 636 if C = '_' then 637 638 -- We do not accumulate the '_' in the checksum, so that 639 -- 1_234 is equivalent to 1234, and does not trigger 640 -- compilation for "minimal recompilation" (gnatmake -m). 641 642 loop 643 Scan_Ptr := Scan_Ptr + 1; 644 C := Source (Scan_Ptr); 645 exit when C /= '_'; 646 Error_No_Double_Underline; 647 end loop; 648 649 if C not in '0' .. '9' then 650 Error_Digit_Expected; 651 exit; 652 end if; 653 654 else 655 exit when C not in '0' .. '9'; 656 end if; 657 end loop; 658 end Scan_Integer; 659 660 -- Start of Processing for Nlit 661 662 begin 663 Base := 10; 664 UI_Base := Uint_10; 665 UI_Int_Value := Uint_0; 666 Based_Literal_Uses_Colon := False; 667 Scale := 0; 668 Scan_Integer; 669 Point_Scanned := False; 670 UI_Num_Value := UI_Int_Value; 671 672 -- Various possibilities now for continuing the literal are period, 673 -- E/e (for exponent), or :/# (for based literal). 674 675 Scale := 0; 676 C := Source (Scan_Ptr); 677 678 if C = '.' then 679 680 -- Scan out point, but do not scan past .. which is a range 681 -- sequence, and must not be eaten up scanning a numeric literal. 682 683 while C = '.' and then Source (Scan_Ptr + 1) /= '.' loop 684 Accumulate_Checksum ('.'); 685 686 if Point_Scanned then 687 Error_Msg_S ("duplicate point ignored"); 688 end if; 689 690 Point_Scanned := True; 691 Scan_Ptr := Scan_Ptr + 1; 692 C := Source (Scan_Ptr); 693 694 if C not in '0' .. '9' then 695 Error_Msg 696 ("real literal cannot end with point", Scan_Ptr - 1); 697 else 698 Scan_Integer; 699 UI_Num_Value := UI_Int_Value; 700 end if; 701 end loop; 702 703 -- Based literal case. The base is the value we already scanned. 704 -- In the case of colon, we insist that the following character 705 -- is indeed an extended digit or a period. This catches a number 706 -- of common errors, as well as catching the well known tricky 707 -- bug otherwise arising from "x : integer range 1 .. 10:= 6;" 708 709 elsif C = '#' 710 or else (C = ':' and then 711 (Source (Scan_Ptr + 1) = '.' 712 or else 713 Source (Scan_Ptr + 1) in '0' .. '9' 714 or else 715 Source (Scan_Ptr + 1) in 'A' .. 'Z' 716 or else 717 Source (Scan_Ptr + 1) in 'a' .. 'z')) 718 then 719 Accumulate_Checksum (C); 720 Base_Char := C; 721 UI_Base := UI_Int_Value; 722 723 if Base_Char = ':' then 724 Based_Literal_Uses_Colon := True; 725 end if; 726 727 if UI_Base < 2 or else UI_Base > 16 then 728 Error_Msg_SC ("base not 2-16"); 729 UI_Base := Uint_16; 730 end if; 731 732 Base := UI_To_Int (UI_Base); 733 Scan_Ptr := Scan_Ptr + 1; 734 735 -- Scan out extended integer [. integer] 736 737 C := Source (Scan_Ptr); 738 UI_Int_Value := Uint_0; 739 Scale := 0; 740 741 loop 742 if C in '0' .. '9' then 743 Accumulate_Checksum (C); 744 Extended_Digit_Value := 745 Int'(Character'Pos (C)) - Int'(Character'Pos ('0')); 746 747 elsif C in 'A' .. 'F' then 748 Accumulate_Checksum (Character'Val (Character'Pos (C) + 32)); 749 Extended_Digit_Value := 750 Int'(Character'Pos (C)) - Int'(Character'Pos ('A')) + 10; 751 752 elsif C in 'a' .. 'f' then 753 Accumulate_Checksum (C); 754 Extended_Digit_Value := 755 Int'(Character'Pos (C)) - Int'(Character'Pos ('a')) + 10; 756 757 else 758 Error_Msg_S ("extended digit expected"); 759 exit; 760 end if; 761 762 if Extended_Digit_Value >= Base then 763 Error_Msg_S ("digit '>= base"); 764 end if; 765 766 UI_Int_Value := UI_Int_Value * UI_Base + Extended_Digit_Value; 767 Scale := Scale - 1; 768 Scan_Ptr := Scan_Ptr + 1; 769 C := Source (Scan_Ptr); 770 771 if C = '_' then 772 loop 773 Accumulate_Checksum ('_'); 774 Scan_Ptr := Scan_Ptr + 1; 775 C := Source (Scan_Ptr); 776 exit when C /= '_'; 777 Error_No_Double_Underline; 778 end loop; 779 780 elsif C = '.' then 781 Accumulate_Checksum ('.'); 782 783 if Point_Scanned then 784 Error_Msg_S ("duplicate point ignored"); 785 end if; 786 787 Scan_Ptr := Scan_Ptr + 1; 788 C := Source (Scan_Ptr); 789 Point_Scanned := True; 790 Scale := 0; 791 792 elsif C = Base_Char then 793 Accumulate_Checksum (C); 794 Scan_Ptr := Scan_Ptr + 1; 795 exit; 796 797 elsif C = '#' or else C = ':' then 798 Error_Msg_S ("based number delimiters must match"); 799 Scan_Ptr := Scan_Ptr + 1; 800 exit; 801 802 elsif not Identifier_Char (C) then 803 if Base_Char = '#' then 804 Error_Msg_S -- CODEFIX 805 ("missing '#"); 806 else 807 Error_Msg_S -- CODEFIX 808 ("missing ':"); 809 end if; 810 811 exit; 812 end if; 813 814 end loop; 815 816 UI_Num_Value := UI_Int_Value; 817 end if; 818 819 -- Scan out exponent 820 821 if not Point_Scanned then 822 Scale := 0; 823 UI_Scale := Uint_0; 824 else 825 UI_Scale := UI_From_Int (Scale); 826 end if; 827 828 if Source (Scan_Ptr) = 'e' or else Source (Scan_Ptr) = 'E' then 829 Accumulate_Checksum ('e'); 830 Scan_Ptr := Scan_Ptr + 1; 831 Exponent_Is_Negative := False; 832 833 if Source (Scan_Ptr) = '+' then 834 Accumulate_Checksum ('+'); 835 Scan_Ptr := Scan_Ptr + 1; 836 837 elsif Source (Scan_Ptr) = '-' then 838 Accumulate_Checksum ('-'); 839 840 if not Point_Scanned then 841 Error_Msg_S 842 ("negative exponent not allowed for integer literal"); 843 else 844 Exponent_Is_Negative := True; 845 end if; 846 847 Scan_Ptr := Scan_Ptr + 1; 848 end if; 849 850 UI_Int_Value := Uint_0; 851 852 if Source (Scan_Ptr) in '0' .. '9' then 853 Scan_Integer; 854 else 855 Error_Digit_Expected; 856 end if; 857 858 if Exponent_Is_Negative then 859 UI_Scale := UI_Scale - UI_Int_Value; 860 else 861 UI_Scale := UI_Scale + UI_Int_Value; 862 end if; 863 end if; 864 865 -- Case of real literal to be returned 866 867 if Point_Scanned then 868 Token := Tok_Real_Literal; 869 Real_Literal_Value := 870 UR_From_Components ( 871 Num => UI_Num_Value, 872 Den => -UI_Scale, 873 Rbase => Base); 874 875 -- Case of integer literal to be returned 876 877 else 878 Token := Tok_Integer_Literal; 879 880 if UI_Scale = 0 then 881 Int_Literal_Value := UI_Num_Value; 882 883 -- Avoid doing possibly expensive calculations in cases like 884 -- parsing 163E800_000# when semantics will not be done anyway. 885 -- This is especially useful when parsing garbled input. 886 887 elsif Operating_Mode /= Check_Syntax 888 and then (Serious_Errors_Detected = 0 or else Try_Semantics) 889 then 890 Int_Literal_Value := UI_Num_Value * UI_Base ** UI_Scale; 891 892 else 893 Int_Literal_Value := No_Uint; 894 end if; 895 end if; 896 897 if Checksum_Accumulate_Token_Checksum then 898 Accumulate_Token_Checksum; 899 end if; 900 901 return; 902 end Nlit; 903 904 ---------- 905 -- Slit -- 906 ---------- 907 908 procedure Slit is 909 910 Delimiter : Character; 911 -- Delimiter (first character of string) 912 913 C : Character; 914 -- Current source program character 915 916 Code : Char_Code; 917 -- Current character code value 918 919 Err : Boolean; 920 -- Error flag for Scan_Wide call 921 922 String_Start : Source_Ptr; 923 -- Point to first character of string 924 925 procedure Error_Bad_String_Char; 926 -- Signal bad character in string/character literal. On entry 927 -- Scan_Ptr points to the improper character encountered during the 928 -- scan. Scan_Ptr is not modified, so it still points to the bad 929 -- character on return. 930 931 procedure Error_Unterminated_String; 932 -- Procedure called if a line terminator character is encountered 933 -- during scanning a string, meaning that the string is not properly 934 -- terminated. 935 936 procedure Set_String; 937 -- Procedure used to distinguish between string and operator symbol. 938 -- On entry the string has been scanned out, and its characters start 939 -- at Token_Ptr and end one character before Scan_Ptr. On exit Token 940 -- is set to Tok_String_Literal/Tok_Operator_Symbol as appropriate, 941 -- and Token_Node is appropriately initialized. In addition, in the 942 -- operator symbol case, Token_Name is appropriately set, and the 943 -- flags [Wide_]Wide_Character_Found are set appropriately. 944 945 --------------------------- 946 -- Error_Bad_String_Char -- 947 --------------------------- 948 949 procedure Error_Bad_String_Char is 950 C : constant Character := Source (Scan_Ptr); 951 952 begin 953 if C = HT then 954 Error_Msg_S ("horizontal tab not allowed in string"); 955 956 elsif C = VT or else C = FF then 957 Error_Msg_S ("format effector not allowed in string"); 958 959 elsif C in Upper_Half_Character then 960 Error_Msg_S ("(Ada 83) upper half character not allowed"); 961 962 else 963 Error_Msg_S ("control character not allowed in string"); 964 end if; 965 end Error_Bad_String_Char; 966 967 ------------------------------- 968 -- Error_Unterminated_String -- 969 ------------------------------- 970 971 procedure Error_Unterminated_String is 972 S : Source_Ptr; 973 974 begin 975 -- An interesting little refinement. Consider the following 976 -- examples: 977 978 -- A := "this is an unterminated string; 979 -- A := "this is an unterminated string & 980 -- P(A, "this is a parameter that didn't get terminated); 981 -- P("this is a parameter that didn't get terminated, A); 982 983 -- We fiddle a little to do slightly better placement in these 984 -- cases also if there is white space at the end of the line we 985 -- place the flag at the start of this white space, not at the 986 -- end. Note that we only have to test for blanks, since tabs 987 -- aren't allowed in strings in the first place and would have 988 -- caused an error message. 989 990 -- Two more cases that we treat specially are: 991 992 -- A := "this string uses the wrong terminator' 993 -- A := "this string uses the wrong terminator' & 994 995 -- In these cases we give a different error message as well 996 997 -- We actually reposition the scan pointer to the point where we 998 -- place the flag in these cases, since it seems a better bet on 999 -- the original intention. 1000 1001 while Source (Scan_Ptr - 1) = ' ' 1002 or else Source (Scan_Ptr - 1) = '&' 1003 loop 1004 Scan_Ptr := Scan_Ptr - 1; 1005 Unstore_String_Char; 1006 end loop; 1007 1008 -- Check for case of incorrect string terminator, but single quote 1009 -- is not considered incorrect if the opening terminator misused 1010 -- a single quote (error message already given). 1011 1012 if Delimiter /= ''' 1013 and then Source (Scan_Ptr - 1) = ''' 1014 then 1015 Unstore_String_Char; 1016 Error_Msg 1017 ("incorrect string terminator character", Scan_Ptr - 1); 1018 return; 1019 end if; 1020 1021 -- Backup over semicolon or right-paren/semicolon sequence 1022 1023 if Source (Scan_Ptr - 1) = ';' then 1024 Scan_Ptr := Scan_Ptr - 1; 1025 Unstore_String_Char; 1026 1027 if Source (Scan_Ptr - 1) = ')' then 1028 Scan_Ptr := Scan_Ptr - 1; 1029 Unstore_String_Char; 1030 end if; 1031 end if; 1032 1033 -- See if there is a comma in the string, if so, guess that 1034 -- the first comma terminates the string. 1035 1036 S := String_Start; 1037 while S < Scan_Ptr loop 1038 if Source (S) = ',' then 1039 while Scan_Ptr > S loop 1040 Scan_Ptr := Scan_Ptr - 1; 1041 Unstore_String_Char; 1042 end loop; 1043 1044 exit; 1045 end if; 1046 1047 S := S + 1; 1048 end loop; 1049 1050 -- Now we have adjusted the scan pointer, give message 1051 1052 Error_Msg_S -- CODEFIX 1053 ("missing string quote"); 1054 end Error_Unterminated_String; 1055 1056 ---------------- 1057 -- Set_String -- 1058 ---------------- 1059 1060 procedure Set_String is 1061 Slen : constant Int := Int (Scan_Ptr - Token_Ptr - 2); 1062 C1 : Character; 1063 C2 : Character; 1064 C3 : Character; 1065 1066 begin 1067 -- Token_Name is currently set to Error_Name. The following 1068 -- section of code resets Token_Name to the proper Name_Op_xx 1069 -- value if the string is a valid operator symbol, otherwise it is 1070 -- left set to Error_Name. 1071 1072 if Slen = 1 then 1073 C1 := Source (Token_Ptr + 1); 1074 1075 case C1 is 1076 when '=' => 1077 Token_Name := Name_Op_Eq; 1078 1079 when '>' => 1080 Token_Name := Name_Op_Gt; 1081 1082 when '<' => 1083 Token_Name := Name_Op_Lt; 1084 1085 when '+' => 1086 Token_Name := Name_Op_Add; 1087 1088 when '-' => 1089 Token_Name := Name_Op_Subtract; 1090 1091 when '&' => 1092 Token_Name := Name_Op_Concat; 1093 1094 when '*' => 1095 Token_Name := Name_Op_Multiply; 1096 1097 when '/' => 1098 Token_Name := Name_Op_Divide; 1099 1100 when others => 1101 null; 1102 end case; 1103 1104 elsif Slen = 2 then 1105 C1 := Source (Token_Ptr + 1); 1106 C2 := Source (Token_Ptr + 2); 1107 1108 if C1 = '*' and then C2 = '*' then 1109 Token_Name := Name_Op_Expon; 1110 1111 elsif C2 = '=' then 1112 1113 if C1 = '/' then 1114 Token_Name := Name_Op_Ne; 1115 elsif C1 = '<' then 1116 Token_Name := Name_Op_Le; 1117 elsif C1 = '>' then 1118 Token_Name := Name_Op_Ge; 1119 end if; 1120 1121 elsif (C1 = 'O' or else C1 = 'o') and then -- OR 1122 (C2 = 'R' or else C2 = 'r') 1123 then 1124 Token_Name := Name_Op_Or; 1125 end if; 1126 1127 elsif Slen = 3 then 1128 C1 := Source (Token_Ptr + 1); 1129 C2 := Source (Token_Ptr + 2); 1130 C3 := Source (Token_Ptr + 3); 1131 1132 if (C1 = 'A' or else C1 = 'a') and then -- AND 1133 (C2 = 'N' or else C2 = 'n') and then 1134 (C3 = 'D' or else C3 = 'd') 1135 then 1136 Token_Name := Name_Op_And; 1137 1138 elsif (C1 = 'A' or else C1 = 'a') and then -- ABS 1139 (C2 = 'B' or else C2 = 'b') and then 1140 (C3 = 'S' or else C3 = 's') 1141 then 1142 Token_Name := Name_Op_Abs; 1143 1144 elsif (C1 = 'M' or else C1 = 'm') and then -- MOD 1145 (C2 = 'O' or else C2 = 'o') and then 1146 (C3 = 'D' or else C3 = 'd') 1147 then 1148 Token_Name := Name_Op_Mod; 1149 1150 elsif (C1 = 'N' or else C1 = 'n') and then -- NOT 1151 (C2 = 'O' or else C2 = 'o') and then 1152 (C3 = 'T' or else C3 = 't') 1153 then 1154 Token_Name := Name_Op_Not; 1155 1156 elsif (C1 = 'R' or else C1 = 'r') and then -- REM 1157 (C2 = 'E' or else C2 = 'e') and then 1158 (C3 = 'M' or else C3 = 'm') 1159 then 1160 Token_Name := Name_Op_Rem; 1161 1162 elsif (C1 = 'X' or else C1 = 'x') and then -- XOR 1163 (C2 = 'O' or else C2 = 'o') and then 1164 (C3 = 'R' or else C3 = 'r') 1165 then 1166 Token_Name := Name_Op_Xor; 1167 end if; 1168 1169 end if; 1170 1171 -- If it is an operator symbol, then Token_Name is set. If it is 1172 -- some other string value, then Token_Name still contains 1173 -- Error_Name. 1174 1175 if Token_Name = Error_Name then 1176 Token := Tok_String_Literal; 1177 1178 else 1179 Token := Tok_Operator_Symbol; 1180 end if; 1181 end Set_String; 1182 1183 -- Start of processing for Slit 1184 1185 begin 1186 -- On entry, Scan_Ptr points to the opening character of the string 1187 -- which is either a percent, double quote, or apostrophe (single 1188 -- quote). The latter case is an error detected by the character 1189 -- literal circuit. 1190 1191 String_Start := Scan_Ptr; 1192 1193 Delimiter := Source (Scan_Ptr); 1194 Accumulate_Checksum (Delimiter); 1195 1196 Start_String; 1197 Wide_Character_Found := False; 1198 Wide_Wide_Character_Found := False; 1199 Scan_Ptr := Scan_Ptr + 1; 1200 1201 -- Loop to scan out characters of string literal 1202 1203 loop 1204 C := Source (Scan_Ptr); 1205 1206 if C = Delimiter then 1207 Accumulate_Checksum (C); 1208 Scan_Ptr := Scan_Ptr + 1; 1209 exit when Source (Scan_Ptr) /= Delimiter; 1210 Code := Get_Char_Code (C); 1211 Accumulate_Checksum (C); 1212 Scan_Ptr := Scan_Ptr + 1; 1213 1214 else 1215 if C = '"' and then Delimiter = '%' then 1216 Error_Msg_S 1217 ("quote not allowed in percent delimited string"); 1218 Code := Get_Char_Code (C); 1219 Scan_Ptr := Scan_Ptr + 1; 1220 1221 elsif Start_Of_Wide_Character then 1222 Wptr := Scan_Ptr; 1223 Scan_Wide (Source, Scan_Ptr, Code, Err); 1224 1225 if Err then 1226 Error_Illegal_Wide_Character; 1227 Code := Get_Char_Code (' '); 1228 end if; 1229 1230 Accumulate_Checksum (Code); 1231 1232 -- In Ada 95 mode we allow any wide characters in a string 1233 -- but in Ada 2005, the set of characters allowed has been 1234 -- restricted to graphic characters. 1235 1236 if Ada_Version >= Ada_2005 1237 and then Is_UTF_32_Non_Graphic (UTF_32 (Code)) 1238 then 1239 Error_Msg 1240 ("(Ada 2005) non-graphic character not permitted " & 1241 "in string literal", Wptr); 1242 end if; 1243 1244 else 1245 Accumulate_Checksum (C); 1246 1247 if C not in Graphic_Character then 1248 if C in Line_Terminator then 1249 Error_Unterminated_String; 1250 exit; 1251 1252 elsif C in Upper_Half_Character then 1253 if Ada_Version = Ada_83 then 1254 Error_Bad_String_Char; 1255 end if; 1256 1257 else 1258 Error_Bad_String_Char; 1259 end if; 1260 end if; 1261 1262 Code := Get_Char_Code (C); 1263 Scan_Ptr := Scan_Ptr + 1; 1264 end if; 1265 end if; 1266 1267 Store_String_Char (Code); 1268 1269 if not In_Character_Range (Code) then 1270 if In_Wide_Character_Range (Code) then 1271 Wide_Character_Found := True; 1272 else 1273 Wide_Wide_Character_Found := True; 1274 end if; 1275 end if; 1276 end loop; 1277 1278 String_Literal_Id := End_String; 1279 Set_String; 1280 return; 1281 end Slit; 1282 1283 ---------------------------------- 1284 -- Skip_Other_Format_Characters -- 1285 ---------------------------------- 1286 1287 procedure Skip_Other_Format_Characters is 1288 P : Source_Ptr; 1289 Code : Char_Code; 1290 Err : Boolean; 1291 1292 begin 1293 while Start_Of_Wide_Character loop 1294 P := Scan_Ptr; 1295 Scan_Wide (Source, Scan_Ptr, Code, Err); 1296 1297 if not Is_UTF_32_Other (UTF_32 (Code)) then 1298 Scan_Ptr := P; 1299 return; 1300 end if; 1301 end loop; 1302 end Skip_Other_Format_Characters; 1303 1304 ----------------------------- 1305 -- Start_Of_Wide_Character -- 1306 ----------------------------- 1307 1308 function Start_Of_Wide_Character return Boolean is 1309 C : constant Character := Source (Scan_Ptr); 1310 1311 begin 1312 -- ESC encoding method with ESC present 1313 1314 if C = ESC 1315 and then Wide_Character_Encoding_Method in WC_ESC_Encoding_Method 1316 then 1317 return True; 1318 1319 -- Upper half character with upper half encoding 1320 1321 elsif C in Upper_Half_Character and then Upper_Half_Encoding then 1322 return True; 1323 1324 -- Brackets encoding 1325 1326 elsif C = '[' 1327 and then Source (Scan_Ptr + 1) = '"' 1328 and then Identifier_Char (Source (Scan_Ptr + 2)) 1329 then 1330 return True; 1331 1332 -- Not the start of a wide character 1333 1334 else 1335 return False; 1336 end if; 1337 end Start_Of_Wide_Character; 1338 1339 -- Start of processing for Scan 1340 1341 begin 1342 Prev_Token := Token; 1343 Prev_Token_Ptr := Token_Ptr; 1344 Token_Name := Error_Name; 1345 1346 -- The following loop runs more than once only if a format effector 1347 -- (tab, vertical tab, form feed, line feed, carriage return) is 1348 -- encountered and skipped, or some error situation, such as an 1349 -- illegal character, is encountered. 1350 1351 <<Scan_Next_Character>> 1352 1353 loop 1354 -- Skip past blanks, loop is opened up for speed 1355 1356 while Source (Scan_Ptr) = ' ' loop 1357 if Source (Scan_Ptr + 1) /= ' ' then 1358 Scan_Ptr := Scan_Ptr + 1; 1359 exit; 1360 end if; 1361 1362 if Source (Scan_Ptr + 2) /= ' ' then 1363 Scan_Ptr := Scan_Ptr + 2; 1364 exit; 1365 end if; 1366 1367 if Source (Scan_Ptr + 3) /= ' ' then 1368 Scan_Ptr := Scan_Ptr + 3; 1369 exit; 1370 end if; 1371 1372 if Source (Scan_Ptr + 4) /= ' ' then 1373 Scan_Ptr := Scan_Ptr + 4; 1374 exit; 1375 end if; 1376 1377 if Source (Scan_Ptr + 5) /= ' ' then 1378 Scan_Ptr := Scan_Ptr + 5; 1379 exit; 1380 end if; 1381 1382 if Source (Scan_Ptr + 6) /= ' ' then 1383 Scan_Ptr := Scan_Ptr + 6; 1384 exit; 1385 end if; 1386 1387 if Source (Scan_Ptr + 7) /= ' ' then 1388 Scan_Ptr := Scan_Ptr + 7; 1389 exit; 1390 end if; 1391 1392 Scan_Ptr := Scan_Ptr + 8; 1393 end loop; 1394 1395 -- We are now at a non-blank character, which is the first character 1396 -- of the token we will scan, and hence the value of Token_Ptr. 1397 1398 Token_Ptr := Scan_Ptr; 1399 1400 -- Here begins the main case statement which transfers control on the 1401 -- basis of the non-blank character we have encountered. 1402 1403 case Source (Scan_Ptr) is 1404 1405 -- Line terminator characters 1406 1407 when CR | LF | FF | VT => 1408 goto Scan_Line_Terminator; 1409 1410 -- Horizontal tab, just skip past it 1411 1412 when HT => 1413 if Style_Check then 1414 Style.Check_HT; 1415 end if; 1416 1417 Scan_Ptr := Scan_Ptr + 1; 1418 1419 -- End of file character, treated as an end of file only if it is 1420 -- the last character in the buffer, otherwise it is ignored. 1421 1422 when EOF => 1423 if Scan_Ptr = Source_Last (Current_Source_File) then 1424 Check_End_Of_Line; 1425 1426 if Style_Check then 1427 Style.Check_EOF; 1428 end if; 1429 1430 Token := Tok_EOF; 1431 return; 1432 else 1433 Scan_Ptr := Scan_Ptr + 1; 1434 end if; 1435 1436 -- Ampersand 1437 1438 when '&' => 1439 Accumulate_Checksum ('&'); 1440 1441 if Source (Scan_Ptr + 1) = '&' then 1442 Error_Msg_S -- CODEFIX 1443 ("'&'& should be `AND THEN`"); 1444 Scan_Ptr := Scan_Ptr + 2; 1445 Token := Tok_And; 1446 return; 1447 1448 else 1449 Scan_Ptr := Scan_Ptr + 1; 1450 Token := Tok_Ampersand; 1451 return; 1452 end if; 1453 1454 -- Asterisk (can be multiplication operator or double asterisk which 1455 -- is the exponentiation compound delimiter). 1456 1457 when '*' => 1458 Accumulate_Checksum ('*'); 1459 1460 if Source (Scan_Ptr + 1) = '*' then 1461 Accumulate_Checksum ('*'); 1462 Scan_Ptr := Scan_Ptr + 2; 1463 Token := Tok_Double_Asterisk; 1464 return; 1465 1466 else 1467 Scan_Ptr := Scan_Ptr + 1; 1468 Token := Tok_Asterisk; 1469 return; 1470 end if; 1471 1472 -- Colon, which can either be an isolated colon, or part of an 1473 -- assignment compound delimiter. 1474 1475 when ':' => 1476 Accumulate_Checksum (':'); 1477 1478 if Double_Char_Token ('=') then 1479 Token := Tok_Colon_Equal; 1480 1481 if Style_Check then 1482 Style.Check_Colon_Equal; 1483 end if; 1484 1485 return; 1486 1487 elsif Source (Scan_Ptr + 1) = '-' 1488 and then Source (Scan_Ptr + 2) /= '-' 1489 then 1490 Token := Tok_Colon_Equal; 1491 Error_Msg -- CODEFIX 1492 (":- should be :=", Scan_Ptr); 1493 Scan_Ptr := Scan_Ptr + 2; 1494 return; 1495 1496 else 1497 Scan_Ptr := Scan_Ptr + 1; 1498 Token := Tok_Colon; 1499 1500 if Style_Check then 1501 Style.Check_Colon; 1502 end if; 1503 1504 return; 1505 end if; 1506 1507 -- Left parenthesis 1508 1509 when '(' => 1510 Accumulate_Checksum ('('); 1511 Scan_Ptr := Scan_Ptr + 1; 1512 Token := Tok_Left_Paren; 1513 1514 if Style_Check then 1515 Style.Check_Left_Paren; 1516 end if; 1517 1518 return; 1519 1520 -- Left bracket 1521 1522 when '[' => 1523 if Source (Scan_Ptr + 1) = '"' then 1524 goto Scan_Wide_Character; 1525 1526 else 1527 Error_Msg_S ("illegal character, replaced by ""("""); 1528 Scan_Ptr := Scan_Ptr + 1; 1529 Token := Tok_Left_Paren; 1530 return; 1531 end if; 1532 1533 -- Left brace 1534 1535 when '{' => 1536 Error_Msg_S ("illegal character, replaced by ""("""); 1537 Scan_Ptr := Scan_Ptr + 1; 1538 Token := Tok_Left_Paren; 1539 return; 1540 1541 -- Comma 1542 1543 when ',' => 1544 Accumulate_Checksum (','); 1545 Scan_Ptr := Scan_Ptr + 1; 1546 Token := Tok_Comma; 1547 1548 if Style_Check then 1549 Style.Check_Comma; 1550 end if; 1551 1552 return; 1553 1554 -- Dot, which is either an isolated period, or part of a double dot 1555 -- compound delimiter sequence. We also check for the case of a 1556 -- digit following the period, to give a better error message. 1557 1558 when '.' => 1559 Accumulate_Checksum ('.'); 1560 1561 if Double_Char_Token ('.') then 1562 Token := Tok_Dot_Dot; 1563 1564 if Style_Check then 1565 Style.Check_Dot_Dot; 1566 end if; 1567 1568 return; 1569 1570 elsif Source (Scan_Ptr + 1) in '0' .. '9' then 1571 Error_Msg_S ("numeric literal cannot start with point"); 1572 Scan_Ptr := Scan_Ptr + 1; 1573 1574 else 1575 Scan_Ptr := Scan_Ptr + 1; 1576 Token := Tok_Dot; 1577 return; 1578 end if; 1579 1580 -- Equal, which can either be an equality operator, or part of the 1581 -- arrow (=>) compound delimiter. 1582 1583 when '=' => 1584 Accumulate_Checksum ('='); 1585 1586 if Double_Char_Token ('>') then 1587 Token := Tok_Arrow; 1588 1589 if Style_Check then 1590 Style.Check_Arrow; 1591 end if; 1592 1593 return; 1594 1595 elsif Source (Scan_Ptr + 1) = '=' then 1596 Error_Msg_S -- CODEFIX 1597 ("== should be ="); 1598 Scan_Ptr := Scan_Ptr + 1; 1599 end if; 1600 1601 Scan_Ptr := Scan_Ptr + 1; 1602 Token := Tok_Equal; 1603 return; 1604 1605 -- Greater than, which can be a greater than operator, greater than 1606 -- or equal operator, or first character of a right label bracket. 1607 1608 when '>' => 1609 Accumulate_Checksum ('>'); 1610 1611 if Double_Char_Token ('=') then 1612 Token := Tok_Greater_Equal; 1613 return; 1614 1615 elsif Double_Char_Token ('>') then 1616 Token := Tok_Greater_Greater; 1617 return; 1618 1619 else 1620 Scan_Ptr := Scan_Ptr + 1; 1621 Token := Tok_Greater; 1622 return; 1623 end if; 1624 1625 -- Less than, which can be a less than operator, less than or equal 1626 -- operator, or the first character of a left label bracket, or the 1627 -- first character of a box (<>) compound delimiter. 1628 1629 when '<' => 1630 Accumulate_Checksum ('<'); 1631 1632 if Double_Char_Token ('=') then 1633 Token := Tok_Less_Equal; 1634 return; 1635 1636 elsif Double_Char_Token ('>') then 1637 Token := Tok_Box; 1638 1639 if Style_Check then 1640 Style.Check_Box; 1641 end if; 1642 1643 return; 1644 1645 elsif Double_Char_Token ('<') then 1646 Token := Tok_Less_Less; 1647 return; 1648 1649 else 1650 Scan_Ptr := Scan_Ptr + 1; 1651 Token := Tok_Less; 1652 return; 1653 end if; 1654 1655 -- Minus, which is either a subtraction operator, or the first 1656 -- character of double minus starting a comment 1657 1658 when '-' => Minus_Case : begin 1659 if Source (Scan_Ptr + 1) = '>' then 1660 Error_Msg_S ("invalid token"); 1661 Scan_Ptr := Scan_Ptr + 2; 1662 Token := Tok_Arrow; 1663 return; 1664 1665 elsif Source (Scan_Ptr + 1) /= '-' then 1666 Accumulate_Checksum ('-'); 1667 Scan_Ptr := Scan_Ptr + 1; 1668 Token := Tok_Minus; 1669 return; 1670 1671 -- Comment 1672 1673 else -- Source (Scan_Ptr + 1) = '-' then 1674 if Style_Check then 1675 Style.Check_Comment; 1676 end if; 1677 1678 Scan_Ptr := Scan_Ptr + 2; 1679 1680 -- If we are in preprocessor mode with Replace_In_Comments set, 1681 -- then we return the "--" as a token on its own. 1682 1683 if Replace_In_Comments then 1684 Token := Tok_Comment; 1685 return; 1686 end if; 1687 1688 -- Otherwise scan out the comment 1689 1690 Start_Of_Comment := Scan_Ptr; 1691 1692 -- Loop to scan comment (this loop runs more than once only if 1693 -- a horizontal tab or other non-graphic character is scanned) 1694 1695 loop 1696 -- Scan to non graphic character (opened up for speed) 1697 1698 -- Note that we just eat left brackets, which means that 1699 -- bracket notation cannot be used for end of line 1700 -- characters in comments. This seems a reasonable choice, 1701 -- since no one would ever use brackets notation in a real 1702 -- program in this situation, and if we allow brackets 1703 -- notation, we forbid some valid comments which contain a 1704 -- brackets sequence that happens to match an end of line 1705 -- character. 1706 1707 loop 1708 exit when Source (Scan_Ptr) not in Graphic_Character; 1709 Scan_Ptr := Scan_Ptr + 1; 1710 exit when Source (Scan_Ptr) not in Graphic_Character; 1711 Scan_Ptr := Scan_Ptr + 1; 1712 exit when Source (Scan_Ptr) not in Graphic_Character; 1713 Scan_Ptr := Scan_Ptr + 1; 1714 exit when Source (Scan_Ptr) not in Graphic_Character; 1715 Scan_Ptr := Scan_Ptr + 1; 1716 exit when Source (Scan_Ptr) not in Graphic_Character; 1717 Scan_Ptr := Scan_Ptr + 1; 1718 end loop; 1719 1720 -- Keep going if horizontal tab 1721 1722 if Source (Scan_Ptr) = HT then 1723 if Style_Check then 1724 Style.Check_HT; 1725 end if; 1726 1727 Scan_Ptr := Scan_Ptr + 1; 1728 1729 -- Terminate scan of comment if line terminator 1730 1731 elsif Source (Scan_Ptr) in Line_Terminator then 1732 exit; 1733 1734 -- Terminate scan of comment if end of file encountered 1735 -- (embedded EOF character or real last character in file) 1736 1737 elsif Source (Scan_Ptr) = EOF then 1738 exit; 1739 1740 -- If we have a wide character, we have to scan it out, 1741 -- because it might be a legitimate line terminator 1742 1743 elsif Start_Of_Wide_Character then 1744 declare 1745 Wptr : constant Source_Ptr := Scan_Ptr; 1746 Code : Char_Code; 1747 Err : Boolean; 1748 1749 begin 1750 Scan_Wide (Source, Scan_Ptr, Code, Err); 1751 1752 -- If not well formed wide character, then just skip 1753 -- past it and ignore it. 1754 1755 if Err then 1756 Scan_Ptr := Wptr + 1; 1757 1758 -- If UTF_32 terminator, terminate comment scan 1759 1760 elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then 1761 Scan_Ptr := Wptr; 1762 exit; 1763 end if; 1764 end; 1765 1766 -- Keep going if character in 80-FF range, or is ESC. These 1767 -- characters are allowed in comments by RM-2.1(1), 2.7(2). 1768 -- They are allowed even in Ada 83 mode according to the 1769 -- approved AI. ESC was added to the AI in June 93. 1770 1771 elsif Source (Scan_Ptr) in Upper_Half_Character 1772 or else Source (Scan_Ptr) = ESC 1773 then 1774 Scan_Ptr := Scan_Ptr + 1; 1775 1776 -- Otherwise we have an illegal comment character 1777 1778 else 1779 Error_Illegal_Character; 1780 end if; 1781 end loop; 1782 1783 -- Note that, except when comments are tokens, we do NOT 1784 -- execute a return here, instead we fall through to reexecute 1785 -- the scan loop to look for a token. 1786 1787 if Comment_Is_Token then 1788 Name_Len := Integer (Scan_Ptr - Start_Of_Comment); 1789 Name_Buffer (1 .. Name_Len) := 1790 String (Source (Start_Of_Comment .. Scan_Ptr - 1)); 1791 Comment_Id := Name_Find; 1792 Token := Tok_Comment; 1793 return; 1794 end if; 1795 1796 -- If the SPARK restriction is set for this unit, then generate 1797 -- a token Tok_SPARK_Hide for a SPARK HIDE directive. 1798 1799 if Restriction_Check_Required (SPARK) 1800 and then Source (Start_Of_Comment) = '#' 1801 then 1802 declare 1803 Scan_SPARK_Ptr : Source_Ptr; 1804 1805 begin 1806 Scan_SPARK_Ptr := Start_Of_Comment + 1; 1807 1808 -- Scan out blanks 1809 1810 while Source (Scan_SPARK_Ptr) = ' ' 1811 or else Source (Scan_SPARK_Ptr) = HT 1812 loop 1813 Scan_SPARK_Ptr := Scan_SPARK_Ptr + 1; 1814 end loop; 1815 1816 -- Recognize HIDE directive. SPARK input cannot be 1817 -- encoded as wide characters, so only deal with 1818 -- lower/upper case. 1819 1820 if (Source (Scan_SPARK_Ptr) = 'h' 1821 or else Source (Scan_SPARK_Ptr) = 'H') 1822 and then (Source (Scan_SPARK_Ptr + 1) = 'i' 1823 or else Source (Scan_SPARK_Ptr + 1) = 'I') 1824 and then (Source (Scan_SPARK_Ptr + 2) = 'd' 1825 or else Source (Scan_SPARK_Ptr + 2) = 'D') 1826 and then (Source (Scan_SPARK_Ptr + 3) = 'e' 1827 or else Source (Scan_SPARK_Ptr + 3) = 'E') 1828 and then (Source (Scan_SPARK_Ptr + 4) = ' ' 1829 or else Source (Scan_SPARK_Ptr + 4) = HT) 1830 then 1831 Token := Tok_SPARK_Hide; 1832 return; 1833 end if; 1834 end; 1835 end if; 1836 end if; 1837 end Minus_Case; 1838 1839 -- Double quote or percent starting a string literal 1840 1841 when '"' | '%' => 1842 Slit; 1843 Post_Scan; 1844 return; 1845 1846 -- Apostrophe. This can either be the start of a character literal, 1847 -- or an isolated apostrophe used in a qualified expression or an 1848 -- attribute. We treat it as a character literal if it does not 1849 -- follow a right parenthesis, identifier, the keyword ALL or 1850 -- a literal. This means that we correctly treat constructs like: 1851 1852 -- A := CHARACTER'('A'); 1853 1854 -- Note that RM-2.2(7) does not require a separator between 1855 -- "CHARACTER" and "'" in the above. 1856 1857 when ''' => Char_Literal_Case : declare 1858 Code : Char_Code; 1859 Err : Boolean; 1860 1861 begin 1862 Accumulate_Checksum ('''); 1863 Scan_Ptr := Scan_Ptr + 1; 1864 1865 -- Here is where we make the test to distinguish the cases. Treat 1866 -- as apostrophe if previous token is an identifier, right paren 1867 -- or the reserved word "all" (latter case as in A.all'Address) 1868 -- (or the reserved word "project" in project files). Also treat 1869 -- it as apostrophe after a literal (this catches some legitimate 1870 -- cases, like A."abs"'Address, and also gives better error 1871 -- behavior for impossible cases like 123'xxx). 1872 1873 if Prev_Token = Tok_Identifier 1874 or else Prev_Token = Tok_Right_Paren 1875 or else Prev_Token = Tok_All 1876 or else Prev_Token = Tok_Project 1877 or else Prev_Token in Token_Class_Literal 1878 then 1879 Token := Tok_Apostrophe; 1880 1881 if Style_Check then 1882 Style.Check_Apostrophe; 1883 end if; 1884 1885 return; 1886 1887 -- Otherwise the apostrophe starts a character literal 1888 1889 else 1890 -- Case of wide character literal 1891 1892 if Start_Of_Wide_Character then 1893 Wptr := Scan_Ptr; 1894 Scan_Wide (Source, Scan_Ptr, Code, Err); 1895 Accumulate_Checksum (Code); 1896 1897 if Err then 1898 Error_Illegal_Wide_Character; 1899 Code := Character'Pos (' '); 1900 1901 -- In Ada 95 mode we allow any wide character in a character 1902 -- literal, but in Ada 2005, the set of characters allowed 1903 -- is restricted to graphic characters. 1904 1905 elsif Ada_Version >= Ada_2005 1906 and then Is_UTF_32_Non_Graphic (UTF_32 (Code)) 1907 then 1908 Error_Msg -- CODEFIX???? 1909 ("(Ada 2005) non-graphic character not permitted " & 1910 "in character literal", Wptr); 1911 end if; 1912 1913 if Source (Scan_Ptr) /= ''' then 1914 Error_Msg_S ("missing apostrophe"); 1915 else 1916 Scan_Ptr := Scan_Ptr + 1; 1917 end if; 1918 1919 -- If we do not find a closing quote in the expected place then 1920 -- assume that we have a misguided attempt at a string literal. 1921 1922 -- However, if previous token is RANGE, then we return an 1923 -- apostrophe instead since this gives better error recovery 1924 1925 elsif Source (Scan_Ptr + 1) /= ''' then 1926 if Prev_Token = Tok_Range then 1927 Token := Tok_Apostrophe; 1928 return; 1929 1930 else 1931 Scan_Ptr := Scan_Ptr - 1; 1932 Error_Msg_S 1933 ("strings are delimited by double quote character"); 1934 Slit; 1935 Post_Scan; 1936 return; 1937 end if; 1938 1939 -- Otherwise we have a (non-wide) character literal 1940 1941 else 1942 Accumulate_Checksum (Source (Scan_Ptr)); 1943 1944 if Source (Scan_Ptr) not in Graphic_Character then 1945 if Source (Scan_Ptr) in Upper_Half_Character then 1946 if Ada_Version = Ada_83 then 1947 Error_Illegal_Character; 1948 end if; 1949 1950 else 1951 Error_Illegal_Character; 1952 end if; 1953 end if; 1954 1955 Code := Get_Char_Code (Source (Scan_Ptr)); 1956 Scan_Ptr := Scan_Ptr + 2; 1957 end if; 1958 1959 -- Fall through here with Scan_Ptr updated past the closing 1960 -- quote, and Code set to the Char_Code value for the literal 1961 1962 Accumulate_Checksum ('''); 1963 Token := Tok_Char_Literal; 1964 Set_Character_Literal_Name (Code); 1965 Token_Name := Name_Find; 1966 Character_Code := Code; 1967 Post_Scan; 1968 return; 1969 end if; 1970 end Char_Literal_Case; 1971 1972 -- Right parenthesis 1973 1974 when ')' => 1975 Accumulate_Checksum (')'); 1976 Scan_Ptr := Scan_Ptr + 1; 1977 Token := Tok_Right_Paren; 1978 1979 if Style_Check then 1980 Style.Check_Right_Paren; 1981 end if; 1982 1983 return; 1984 1985 -- Right bracket or right brace, treated as right paren 1986 1987 when ']' | '}' => 1988 Error_Msg_S ("illegal character, replaced by "")"""); 1989 Scan_Ptr := Scan_Ptr + 1; 1990 Token := Tok_Right_Paren; 1991 return; 1992 1993 -- Slash (can be division operator or first character of not equal) 1994 1995 when '/' => 1996 Accumulate_Checksum ('/'); 1997 1998 if Double_Char_Token ('=') then 1999 Token := Tok_Not_Equal; 2000 return; 2001 else 2002 Scan_Ptr := Scan_Ptr + 1; 2003 Token := Tok_Slash; 2004 return; 2005 end if; 2006 2007 -- Semicolon 2008 2009 when ';' => 2010 Accumulate_Checksum (';'); 2011 Scan_Ptr := Scan_Ptr + 1; 2012 Token := Tok_Semicolon; 2013 2014 if Style_Check then 2015 Style.Check_Semicolon; 2016 end if; 2017 2018 return; 2019 2020 -- Vertical bar 2021 2022 when '|' => Vertical_Bar_Case : begin 2023 Accumulate_Checksum ('|'); 2024 2025 -- Special check for || to give nice message 2026 2027 if Source (Scan_Ptr + 1) = '|' then 2028 Error_Msg_S -- CODEFIX 2029 ("""'|'|"" should be `OR ELSE`"); 2030 Scan_Ptr := Scan_Ptr + 2; 2031 Token := Tok_Or; 2032 return; 2033 2034 else 2035 Scan_Ptr := Scan_Ptr + 1; 2036 Token := Tok_Vertical_Bar; 2037 2038 if Style_Check then 2039 Style.Check_Vertical_Bar; 2040 end if; 2041 2042 Post_Scan; 2043 return; 2044 end if; 2045 end Vertical_Bar_Case; 2046 2047 -- Exclamation, replacement character for vertical bar 2048 2049 when '!' => Exclamation_Case : begin 2050 Accumulate_Checksum ('!'); 2051 2052 if Source (Scan_Ptr + 1) = '=' then 2053 Error_Msg_S -- CODEFIX 2054 ("'!= should be /="); 2055 Scan_Ptr := Scan_Ptr + 2; 2056 Token := Tok_Not_Equal; 2057 return; 2058 2059 else 2060 Scan_Ptr := Scan_Ptr + 1; 2061 Token := Tok_Vertical_Bar; 2062 Post_Scan; 2063 return; 2064 end if; 2065 end Exclamation_Case; 2066 2067 -- Plus 2068 2069 when '+' => Plus_Case : begin 2070 Accumulate_Checksum ('+'); 2071 Scan_Ptr := Scan_Ptr + 1; 2072 Token := Tok_Plus; 2073 return; 2074 end Plus_Case; 2075 2076 -- Digits starting a numeric literal 2077 2078 when '0' .. '9' => 2079 2080 -- First a bit of a scan ahead to see if we have a case of an 2081 -- identifier starting with a digit (remembering exponent case). 2082 2083 declare 2084 C : constant Character := Source (Scan_Ptr + 1); 2085 2086 begin 2087 -- OK literal if digit followed by digit or underscore 2088 2089 if C in '0' .. '9' or else C = '_' then 2090 null; 2091 2092 -- OK literal if digit not followed by identifier char 2093 2094 elsif not Identifier_Char (C) then 2095 null; 2096 2097 -- OK literal if digit followed by e/E followed by digit/sign. 2098 -- We also allow underscore after the E, which is an error, but 2099 -- better handled by Nlit than deciding this is an identifier. 2100 2101 elsif (C = 'e' or else C = 'E') 2102 and then (Source (Scan_Ptr + 2) in '0' .. '9' 2103 or else Source (Scan_Ptr + 2) = '+' 2104 or else Source (Scan_Ptr + 2) = '-' 2105 or else Source (Scan_Ptr + 2) = '_') 2106 then 2107 null; 2108 2109 -- Here we have what really looks like an identifier that 2110 -- starts with a digit, so give error msg. 2111 2112 else 2113 Error_Msg_S ("identifier may not start with digit"); 2114 Name_Len := 1; 2115 Underline_Found := False; 2116 Name_Buffer (1) := Source (Scan_Ptr); 2117 Accumulate_Checksum (Name_Buffer (1)); 2118 Scan_Ptr := Scan_Ptr + 1; 2119 goto Scan_Identifier; 2120 end if; 2121 end; 2122 2123 -- Here we have an OK integer literal 2124 2125 Nlit; 2126 2127 -- Check for proper delimiter, ignoring other format characters 2128 2129 Skip_Other_Format_Characters; 2130 2131 if Identifier_Char (Source (Scan_Ptr)) then 2132 Error_Msg_S 2133 ("delimiter required between literal and identifier"); 2134 end if; 2135 2136 Post_Scan; 2137 return; 2138 2139 -- Lower case letters 2140 2141 when 'a' .. 'z' => 2142 Name_Len := 1; 2143 Underline_Found := False; 2144 Name_Buffer (1) := Source (Scan_Ptr); 2145 Accumulate_Checksum (Name_Buffer (1)); 2146 Scan_Ptr := Scan_Ptr + 1; 2147 goto Scan_Identifier; 2148 2149 -- Upper case letters 2150 2151 when 'A' .. 'Z' => 2152 Name_Len := 1; 2153 Underline_Found := False; 2154 Name_Buffer (1) := 2155 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32); 2156 Accumulate_Checksum (Name_Buffer (1)); 2157 Scan_Ptr := Scan_Ptr + 1; 2158 goto Scan_Identifier; 2159 2160 -- Underline character 2161 2162 when '_' => 2163 if Special_Characters ('_') then 2164 Token_Ptr := Scan_Ptr; 2165 Scan_Ptr := Scan_Ptr + 1; 2166 Token := Tok_Special; 2167 Special_Character := '_'; 2168 return; 2169 end if; 2170 2171 Error_Msg_S ("identifier cannot start with underline"); 2172 Name_Len := 1; 2173 Name_Buffer (1) := '_'; 2174 Scan_Ptr := Scan_Ptr + 1; 2175 Underline_Found := False; 2176 goto Scan_Identifier; 2177 2178 -- Space (not possible, because we scanned past blanks) 2179 2180 when ' ' => 2181 raise Program_Error; 2182 2183 -- Characters in top half of ASCII 8-bit chart 2184 2185 when Upper_Half_Character => 2186 2187 -- Wide character case 2188 2189 if Upper_Half_Encoding then 2190 goto Scan_Wide_Character; 2191 2192 -- Otherwise we have OK Latin-1 character 2193 2194 else 2195 -- Upper half characters may possibly be identifier letters 2196 -- but can never be digits, so Identifier_Char can be used to 2197 -- test for a valid start of identifier character. 2198 2199 if Identifier_Char (Source (Scan_Ptr)) then 2200 Name_Len := 0; 2201 Underline_Found := False; 2202 goto Scan_Identifier; 2203 else 2204 Error_Illegal_Character; 2205 end if; 2206 end if; 2207 2208 when ESC => 2209 2210 -- ESC character, possible start of identifier if wide characters 2211 -- using ESC encoding are allowed in identifiers, which we can 2212 -- tell by looking at the Identifier_Char flag for ESC, which is 2213 -- only true if these conditions are met. In Ada 2005 mode, may 2214 -- also be valid UTF_32 space or line terminator character. 2215 2216 if Identifier_Char (ESC) then 2217 Name_Len := 0; 2218 goto Scan_Wide_Character; 2219 else 2220 Error_Illegal_Character; 2221 end if; 2222 2223 -- Invalid control characters 2224 2225 when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS | ASCII.SO | 2226 SI | DLE | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN | 2227 EM | FS | GS | RS | US | DEL 2228 => 2229 Error_Illegal_Character; 2230 2231 -- Invalid graphic characters 2232 2233 when '#' | '$' | '?' | '@' | '`' | '\' | '^' | '~' => 2234 2235 -- If Set_Special_Character has been called for this character, 2236 -- set Scans.Special_Character and return a Special token. 2237 2238 if Special_Characters (Source (Scan_Ptr)) then 2239 Token_Ptr := Scan_Ptr; 2240 Token := Tok_Special; 2241 Special_Character := Source (Scan_Ptr); 2242 Scan_Ptr := Scan_Ptr + 1; 2243 return; 2244 2245 -- Check for something looking like a preprocessor directive 2246 2247 elsif Source (Scan_Ptr) = '#' 2248 and then (Source (Scan_Ptr + 1 .. Scan_Ptr + 2) = "if" 2249 or else 2250 Source (Scan_Ptr + 1 .. Scan_Ptr + 5) = "elsif" 2251 or else 2252 Source (Scan_Ptr + 1 .. Scan_Ptr + 4) = "else" 2253 or else 2254 Source (Scan_Ptr + 1 .. Scan_Ptr + 3) = "end") 2255 then 2256 Error_Msg_S 2257 ("preprocessor directive ignored, preprocessor not active"); 2258 2259 -- Skip to end of line 2260 2261 loop 2262 if Source (Scan_Ptr) in Graphic_Character 2263 or else 2264 Source (Scan_Ptr) = HT 2265 then 2266 Scan_Ptr := Scan_Ptr + 1; 2267 2268 -- Done if line terminator or EOF 2269 2270 elsif Source (Scan_Ptr) in Line_Terminator 2271 or else 2272 Source (Scan_Ptr) = EOF 2273 then 2274 exit; 2275 2276 -- If we have a wide character, we have to scan it out, 2277 -- because it might be a legitimate line terminator 2278 2279 elsif Start_Of_Wide_Character then 2280 declare 2281 Wptr : constant Source_Ptr := Scan_Ptr; 2282 Code : Char_Code; 2283 Err : Boolean; 2284 2285 begin 2286 Scan_Wide (Source, Scan_Ptr, Code, Err); 2287 2288 -- If not well formed wide character, then just skip 2289 -- past it and ignore it. 2290 2291 if Err then 2292 Scan_Ptr := Wptr + 1; 2293 2294 -- If UTF_32 terminator, terminate comment scan 2295 2296 elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then 2297 Scan_Ptr := Wptr; 2298 exit; 2299 end if; 2300 end; 2301 2302 -- Else keep going (don't worry about bad comment chars 2303 -- in this context, we just want to find the end of line. 2304 2305 else 2306 Scan_Ptr := Scan_Ptr + 1; 2307 end if; 2308 end loop; 2309 2310 -- Otherwise, this is an illegal character 2311 2312 else 2313 Error_Illegal_Character; 2314 end if; 2315 2316 -- End switch on non-blank character 2317 2318 end case; 2319 2320 -- End loop past format effectors. The exit from this loop is by 2321 -- executing a return statement following completion of token scan 2322 -- (control never falls out of this loop to the code which follows) 2323 2324 end loop; 2325 2326 -- Wide_Character scanning routine. On entry we have encountered the 2327 -- initial character of a wide character sequence. 2328 2329 <<Scan_Wide_Character>> 2330 2331 declare 2332 Code : Char_Code; 2333 Cat : Category; 2334 Err : Boolean; 2335 2336 begin 2337 Wptr := Scan_Ptr; 2338 Scan_Wide (Source, Scan_Ptr, Code, Err); 2339 2340 -- If bad wide character, signal error and continue scan 2341 2342 if Err then 2343 Error_Illegal_Wide_Character; 2344 goto Scan_Next_Character; 2345 end if; 2346 2347 Cat := Get_Category (UTF_32 (Code)); 2348 2349 -- If OK letter, reset scan ptr and go scan identifier 2350 2351 if Is_UTF_32_Letter (Cat) then 2352 Scan_Ptr := Wptr; 2353 Name_Len := 0; 2354 Underline_Found := False; 2355 goto Scan_Identifier; 2356 2357 -- If OK wide space, ignore and keep scanning (we do not include 2358 -- any ignored spaces in checksum) 2359 2360 elsif Is_UTF_32_Space (Cat) then 2361 goto Scan_Next_Character; 2362 2363 -- If other format character, ignore and keep scanning (again we 2364 -- do not include in the checksum) (this is for AI-0079). 2365 2366 elsif Is_UTF_32_Other (Cat) then 2367 goto Scan_Next_Character; 2368 2369 -- If OK wide line terminator, terminate current line 2370 2371 elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then 2372 Scan_Ptr := Wptr; 2373 goto Scan_Line_Terminator; 2374 2375 -- Punctuation is an error (at start of identifier) 2376 2377 elsif Is_UTF_32_Punctuation (Cat) then 2378 Error_Msg ("identifier cannot start with punctuation", Wptr); 2379 Scan_Ptr := Wptr; 2380 Name_Len := 0; 2381 Underline_Found := False; 2382 goto Scan_Identifier; 2383 2384 -- Mark character is an error (at start of identifier) 2385 2386 elsif Is_UTF_32_Mark (Cat) then 2387 Error_Msg ("identifier cannot start with mark character", Wptr); 2388 Scan_Ptr := Wptr; 2389 Name_Len := 0; 2390 Underline_Found := False; 2391 goto Scan_Identifier; 2392 2393 -- Extended digit character is an error. Could be bad start of 2394 -- identifier or bad literal. Not worth doing too much to try to 2395 -- distinguish these cases, but we will do a little bit. 2396 2397 elsif Is_UTF_32_Digit (Cat) then 2398 Error_Msg 2399 ("identifier cannot start with digit character", Wptr); 2400 Scan_Ptr := Wptr; 2401 Name_Len := 0; 2402 Underline_Found := False; 2403 goto Scan_Identifier; 2404 2405 -- All other wide characters are illegal here 2406 2407 else 2408 Error_Illegal_Wide_Character; 2409 goto Scan_Next_Character; 2410 end if; 2411 end; 2412 2413 -- Routine to scan line terminator. On entry Scan_Ptr points to a 2414 -- character which is one of FF,LR,CR,VT, or one of the wide characters 2415 -- that is treated as a line terminator. 2416 2417 <<Scan_Line_Terminator>> 2418 2419 -- Check line too long 2420 2421 Check_End_Of_Line; 2422 2423 -- Set Token_Ptr, if End_Of_Line is a token, for the case when it is 2424 -- a physical line. 2425 2426 if End_Of_Line_Is_Token then 2427 Token_Ptr := Scan_Ptr; 2428 end if; 2429 2430 declare 2431 Physical : Boolean; 2432 2433 begin 2434 Skip_Line_Terminators (Scan_Ptr, Physical); 2435 2436 -- If we are at start of physical line, update scan pointers to 2437 -- reflect the start of the new line. 2438 2439 if Physical then 2440 Current_Line_Start := Scan_Ptr; 2441 Start_Column := Set_Start_Column; 2442 First_Non_Blank_Location := Scan_Ptr; 2443 2444 -- If End_Of_Line is a token, we return it as it is a 2445 -- physical line. 2446 2447 if End_Of_Line_Is_Token then 2448 Token := Tok_End_Of_Line; 2449 return; 2450 end if; 2451 end if; 2452 end; 2453 2454 goto Scan_Next_Character; 2455 2456 -- Identifier scanning routine. On entry, some initial characters of 2457 -- the identifier may have already been stored in Name_Buffer. If so, 2458 -- Name_Len has the number of characters stored, otherwise Name_Len is 2459 -- set to zero on entry. Underline_Found is also set False on entry. 2460 2461 <<Scan_Identifier>> 2462 2463 -- This loop scans as fast as possible past lower half letters and 2464 -- digits, which we expect to be the most common characters. 2465 2466 loop 2467 if Source (Scan_Ptr) in 'a' .. 'z' 2468 or else Source (Scan_Ptr) in '0' .. '9' 2469 then 2470 Name_Buffer (Name_Len + 1) := Source (Scan_Ptr); 2471 Accumulate_Checksum (Source (Scan_Ptr)); 2472 2473 elsif Source (Scan_Ptr) in 'A' .. 'Z' then 2474 Name_Buffer (Name_Len + 1) := 2475 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32); 2476 Accumulate_Checksum (Name_Buffer (Name_Len + 1)); 2477 2478 else 2479 exit; 2480 end if; 2481 2482 Underline_Found := False; 2483 Scan_Ptr := Scan_Ptr + 1; 2484 Name_Len := Name_Len + 1; 2485 end loop; 2486 2487 -- If we fall through, then we have encountered either an underline 2488 -- character, or an extended identifier character (i.e. one from the 2489 -- upper half), or a wide character, or an identifier terminator. The 2490 -- initial test speeds us up in the most common case where we have 2491 -- an identifier terminator. Note that ESC is an identifier character 2492 -- only if a wide character encoding method that uses ESC encoding 2493 -- is active, so if we find an ESC character we know that we have a 2494 -- wide character. 2495 2496 if Identifier_Char (Source (Scan_Ptr)) 2497 or else (Source (Scan_Ptr) in Upper_Half_Character 2498 and then Upper_Half_Encoding) 2499 then 2500 -- Case of underline 2501 2502 if Source (Scan_Ptr) = '_' then 2503 Accumulate_Checksum ('_'); 2504 2505 if Underline_Found then 2506 Error_No_Double_Underline; 2507 else 2508 Underline_Found := True; 2509 Name_Len := Name_Len + 1; 2510 Name_Buffer (Name_Len) := '_'; 2511 end if; 2512 2513 Scan_Ptr := Scan_Ptr + 1; 2514 goto Scan_Identifier; 2515 2516 -- Upper half character 2517 2518 elsif Source (Scan_Ptr) in Upper_Half_Character 2519 and then not Upper_Half_Encoding 2520 then 2521 Accumulate_Checksum (Source (Scan_Ptr)); 2522 Store_Encoded_Character 2523 (Get_Char_Code (Fold_Lower (Source (Scan_Ptr)))); 2524 Scan_Ptr := Scan_Ptr + 1; 2525 Underline_Found := False; 2526 goto Scan_Identifier; 2527 2528 -- Left bracket not followed by a quote terminates an identifier. 2529 -- This is an error, but we don't want to give a junk error msg 2530 -- about wide characters in this case! 2531 2532 elsif Source (Scan_Ptr) = '[' 2533 and then Source (Scan_Ptr + 1) /= '"' 2534 then 2535 null; 2536 2537 -- We know we have a wide character encoding here (the current 2538 -- character is either ESC, left bracket, or an upper half 2539 -- character depending on the encoding method). 2540 2541 else 2542 -- Scan out the wide character and insert the appropriate 2543 -- encoding into the name table entry for the identifier. 2544 2545 declare 2546 Code : Char_Code; 2547 Err : Boolean; 2548 Chr : Character; 2549 Cat : Category; 2550 2551 begin 2552 Wptr := Scan_Ptr; 2553 Scan_Wide (Source, Scan_Ptr, Code, Err); 2554 2555 -- If error, signal error 2556 2557 if Err then 2558 Error_Illegal_Wide_Character; 2559 2560 -- If the character scanned is a normal identifier 2561 -- character, then we treat it that way. 2562 2563 elsif In_Character_Range (Code) 2564 and then Identifier_Char (Get_Character (Code)) 2565 then 2566 Chr := Get_Character (Code); 2567 Accumulate_Checksum (Chr); 2568 Store_Encoded_Character 2569 (Get_Char_Code (Fold_Lower (Chr))); 2570 Underline_Found := False; 2571 2572 -- Here if not a normal identifier character 2573 2574 else 2575 Cat := Get_Category (UTF_32 (Code)); 2576 2577 -- Wide character in Unicode category "Other, Format" 2578 -- is not accepted in an identifier. This is because it 2579 -- it is considered a security risk (AI-0091). 2580 2581 -- However, it is OK for such a character to appear at 2582 -- the end of an identifier. 2583 2584 if Is_UTF_32_Other (Cat) then 2585 if not Identifier_Char (Source (Scan_Ptr)) then 2586 goto Scan_Identifier_Complete; 2587 else 2588 Error_Msg 2589 ("identifier cannot contain other_format " 2590 & "character", Wptr); 2591 goto Scan_Identifier; 2592 end if; 2593 2594 -- Wide character in category Separator,Space terminates 2595 2596 elsif Is_UTF_32_Space (Cat) then 2597 goto Scan_Identifier_Complete; 2598 end if; 2599 2600 -- Here if wide character is part of the identifier 2601 2602 -- Make sure we are allowing wide characters in 2603 -- identifiers. Note that we allow wide character 2604 -- notation for an OK identifier character. This in 2605 -- particular allows bracket or other notation to be 2606 -- used for upper half letters. 2607 2608 -- Wide characters are always allowed in Ada 2005 2609 2610 if Identifier_Character_Set /= 'w' 2611 and then Ada_Version < Ada_2005 2612 then 2613 Error_Msg 2614 ("wide character not allowed in identifier", Wptr); 2615 end if; 2616 2617 -- If OK letter, store it folding to upper case. Note 2618 -- that we include the folded letter in the checksum. 2619 2620 if Is_UTF_32_Letter (Cat) then 2621 Code := 2622 Char_Code (UTF_32_To_Upper_Case (UTF_32 (Code))); 2623 Accumulate_Checksum (Code); 2624 Store_Encoded_Character (Code); 2625 Underline_Found := False; 2626 2627 -- If OK extended digit or mark, then store it 2628 2629 elsif Is_UTF_32_Digit (Cat) 2630 or else Is_UTF_32_Mark (Cat) 2631 then 2632 Accumulate_Checksum (Code); 2633 Store_Encoded_Character (Code); 2634 Underline_Found := False; 2635 2636 -- Wide punctuation is also stored, but counts as an 2637 -- underline character for error checking purposes. 2638 2639 elsif Is_UTF_32_Punctuation (Cat) then 2640 Accumulate_Checksum (Code); 2641 2642 if Underline_Found then 2643 declare 2644 Cend : constant Source_Ptr := Scan_Ptr; 2645 begin 2646 Scan_Ptr := Wptr; 2647 Error_No_Double_Underline; 2648 Scan_Ptr := Cend; 2649 end; 2650 2651 else 2652 Store_Encoded_Character (Code); 2653 Underline_Found := True; 2654 end if; 2655 2656 -- Any other wide character is not acceptable 2657 2658 else 2659 Error_Msg 2660 ("invalid wide character in identifier", Wptr); 2661 end if; 2662 end if; 2663 2664 goto Scan_Identifier; 2665 end; 2666 end if; 2667 end if; 2668 2669 -- Scan of identifier is complete. The identifier is stored in 2670 -- Name_Buffer, and Scan_Ptr points past the last character. 2671 2672 <<Scan_Identifier_Complete>> 2673 Token_Name := Name_Find; 2674 2675 -- Check for identifier ending with underline or punctuation char 2676 2677 if Underline_Found then 2678 Underline_Found := False; 2679 2680 if Source (Scan_Ptr - 1) = '_' then 2681 Error_Msg 2682 ("identifier cannot end with underline", Scan_Ptr - 1); 2683 else 2684 Error_Msg 2685 ("identifier cannot end with punctuation character", Wptr); 2686 end if; 2687 end if; 2688 2689 -- We will assume it is an identifier, not a keyword, so that the 2690 -- checksum is independent of the Ada version. 2691 2692 Token := Tok_Identifier; 2693 2694 -- Here is where we check if it was a keyword 2695 2696 if Is_Keyword_Name (Token_Name) then 2697 if Opt.Checksum_GNAT_6_3 then 2698 Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name)); 2699 2700 if Checksum_Accumulate_Token_Checksum then 2701 if Checksum_GNAT_5_03 then 2702 Accumulate_Token_Checksum_GNAT_5_03; 2703 else 2704 Accumulate_Token_Checksum_GNAT_6_3; 2705 end if; 2706 end if; 2707 2708 else 2709 Accumulate_Token_Checksum; 2710 Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name)); 2711 end if; 2712 2713 -- Keyword style checks 2714 2715 if Style_Check then 2716 2717 -- Deal with possible style check for non-lower case keyword, 2718 -- but we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords 2719 -- for this purpose if they appear as attribute designators. 2720 -- Actually we only check the first character for speed. 2721 2722 -- Ada 2005 (AI-284): Do not apply the style check in case of 2723 -- "pragma Interface" 2724 2725 -- Ada 2005 (AI-340): Do not apply the style check in case of 2726 -- MOD attribute. 2727 2728 if Source (Token_Ptr) <= 'Z' 2729 and then (Prev_Token /= Tok_Apostrophe 2730 or else 2731 (Token /= Tok_Access and then 2732 Token /= Tok_Delta and then 2733 Token /= Tok_Digits and then 2734 Token /= Tok_Mod and then 2735 Token /= Tok_Range)) 2736 and then (Token /= Tok_Interface 2737 or else 2738 (Token = Tok_Interface 2739 and then Prev_Token /= Tok_Pragma)) 2740 then 2741 Style.Non_Lower_Case_Keyword; 2742 end if; 2743 2744 -- Check THEN/ELSE style rules. These do not apply to AND THEN 2745 -- or OR ELSE, and do not apply in if expressions. 2746 2747 if (Token = Tok_Then and then Prev_Token /= Tok_And) 2748 or else 2749 (Token = Tok_Else and then Prev_Token /= Tok_Or) 2750 then 2751 if Inside_If_Expression = 0 then 2752 Style.Check_Separate_Stmt_Lines; 2753 end if; 2754 end if; 2755 end if; 2756 2757 -- We must reset Token_Name since this is not an identifier and 2758 -- if we leave Token_Name set, the parser gets confused because 2759 -- it thinks it is dealing with an identifier instead of the 2760 -- corresponding keyword. 2761 2762 Token_Name := No_Name; 2763 return; 2764 2765 -- It is an identifier after all 2766 2767 else 2768 if Checksum_Accumulate_Token_Checksum then 2769 Accumulate_Token_Checksum; 2770 end if; 2771 2772 Post_Scan; 2773 return; 2774 end if; 2775 end Scan; 2776 2777 -------------------------- 2778 -- Set_Comment_As_Token -- 2779 -------------------------- 2780 2781 procedure Set_Comment_As_Token (Value : Boolean) is 2782 begin 2783 Comment_Is_Token := Value; 2784 end Set_Comment_As_Token; 2785 2786 ------------------------------ 2787 -- Set_End_Of_Line_As_Token -- 2788 ------------------------------ 2789 2790 procedure Set_End_Of_Line_As_Token (Value : Boolean) is 2791 begin 2792 End_Of_Line_Is_Token := Value; 2793 end Set_End_Of_Line_As_Token; 2794 2795 --------------------------- 2796 -- Set_Special_Character -- 2797 --------------------------- 2798 2799 procedure Set_Special_Character (C : Character) is 2800 begin 2801 case C is 2802 when '#' | '$' | '_' | '?' | '@' | '`' | '\' | '^' | '~' => 2803 Special_Characters (C) := True; 2804 2805 when others => 2806 null; 2807 end case; 2808 end Set_Special_Character; 2809 2810 ---------------------- 2811 -- Set_Start_Column -- 2812 ---------------------- 2813 2814 -- Note: it seems at first glance a little expensive to compute this value 2815 -- for every source line (since it is certainly not used for all source 2816 -- lines). On the other hand, it doesn't take much more work to skip past 2817 -- the initial white space on the line counting the columns than it would 2818 -- to scan past the white space using the standard scanning circuits. 2819 2820 function Set_Start_Column return Column_Number is 2821 Start_Column : Column_Number := 0; 2822 2823 begin 2824 -- Outer loop scans past horizontal tab characters 2825 2826 Tabs_Loop : loop 2827 2828 -- Inner loop scans past blanks as fast as possible, bumping Scan_Ptr 2829 -- past the blanks and adjusting Start_Column to account for them. 2830 2831 Blanks_Loop : loop 2832 if Source (Scan_Ptr) = ' ' then 2833 if Source (Scan_Ptr + 1) = ' ' then 2834 if Source (Scan_Ptr + 2) = ' ' then 2835 if Source (Scan_Ptr + 3) = ' ' then 2836 if Source (Scan_Ptr + 4) = ' ' then 2837 if Source (Scan_Ptr + 5) = ' ' then 2838 if Source (Scan_Ptr + 6) = ' ' then 2839 Scan_Ptr := Scan_Ptr + 7; 2840 Start_Column := Start_Column + 7; 2841 else 2842 Scan_Ptr := Scan_Ptr + 6; 2843 Start_Column := Start_Column + 6; 2844 exit Blanks_Loop; 2845 end if; 2846 else 2847 Scan_Ptr := Scan_Ptr + 5; 2848 Start_Column := Start_Column + 5; 2849 exit Blanks_Loop; 2850 end if; 2851 else 2852 Scan_Ptr := Scan_Ptr + 4; 2853 Start_Column := Start_Column + 4; 2854 exit Blanks_Loop; 2855 end if; 2856 else 2857 Scan_Ptr := Scan_Ptr + 3; 2858 Start_Column := Start_Column + 3; 2859 exit Blanks_Loop; 2860 end if; 2861 else 2862 Scan_Ptr := Scan_Ptr + 2; 2863 Start_Column := Start_Column + 2; 2864 exit Blanks_Loop; 2865 end if; 2866 else 2867 Scan_Ptr := Scan_Ptr + 1; 2868 Start_Column := Start_Column + 1; 2869 exit Blanks_Loop; 2870 end if; 2871 else 2872 exit Blanks_Loop; 2873 end if; 2874 end loop Blanks_Loop; 2875 2876 -- Outer loop keeps going only if a horizontal tab follows 2877 2878 if Source (Scan_Ptr) = HT then 2879 if Style_Check then 2880 Style.Check_HT; 2881 end if; 2882 2883 Scan_Ptr := Scan_Ptr + 1; 2884 Start_Column := (Start_Column / 8) * 8 + 8; 2885 else 2886 exit Tabs_Loop; 2887 end if; 2888 end loop Tabs_Loop; 2889 2890 return Start_Column; 2891 2892 -- A constraint error can happen only if we have a compiler with checks on 2893 -- and a line with a ludicrous number of tabs or spaces at the start. In 2894 -- such a case, we really don't care if Start_Column is right or not. 2895 2896 exception 2897 when Constraint_Error => 2898 return Start_Column; 2899 end Set_Start_Column; 2900 2901end Scng; 2902