1------------------------------------------------------------------------------ 2-- -- 3-- GNAT COMPILER COMPONENTS -- 4-- -- 5-- S C N G -- 6-- -- 7-- B o d y -- 8-- -- 9-- Copyright (C) 1992-2021, Free Software Foundation, Inc. -- 10-- -- 11-- GNAT is free software; you can redistribute it and/or modify it under -- 12-- terms of the GNU General Public License as published by the Free Soft- -- 13-- ware Foundation; either version 3, or (at your option) any later ver- -- 14-- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- 15-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- 16-- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- 17-- for more details. You should have received a copy of the GNU General -- 18-- Public License distributed with GNAT; see file COPYING3. If not, go to -- 19-- http://www.gnu.org/licenses for a complete copy of the license. -- 20-- -- 21-- GNAT was originally developed by the GNAT team at New York University. -- 22-- Extensive contributions were provided by Ada Core Technologies Inc. -- 23-- -- 24------------------------------------------------------------------------------ 25 26with Atree; use Atree; 27with Csets; use Csets; 28with Errout; use Errout; 29with Hostparm; use Hostparm; 30with Namet; use Namet; 31with Opt; use Opt; 32with Scans; use Scans; 33with Sinput; use Sinput; 34with Snames; use Snames; 35with Stringt; use Stringt; 36with Stylesw; use Stylesw; 37with Uintp; use Uintp; 38with Urealp; use Urealp; 39with Widechar; use Widechar; 40 41pragma Warnings (Off); 42-- This package is used also by gnatcoll 43with System.CRC32; 44with System.UTF_32; use System.UTF_32; 45with System.WCh_Con; use System.WCh_Con; 46pragma Warnings (On); 47 48package body Scng is 49 50 use ASCII; 51 -- Make control characters visible 52 53 Special_Characters : array (Character) of Boolean := (others => False); 54 -- For characters that are Special token, the value is True 55 56 Comment_Is_Token : Boolean := False; 57 -- True if comments are tokens 58 59 End_Of_Line_Is_Token : Boolean := False; 60 -- True if End_Of_Line is a token 61 62 ----------------------- 63 -- Local Subprograms -- 64 ----------------------- 65 66 procedure Accumulate_Token_Checksum; 67 pragma Inline (Accumulate_Token_Checksum); 68 -- Called after each numeric literal and identifier/keyword. For keywords, 69 -- the token used is Tok_Identifier. This allows detection of additional 70 -- spaces added in sources when using the builder switch -m. 71 72 procedure Accumulate_Checksum (C : Character); 73 pragma Inline (Accumulate_Checksum); 74 -- This routine accumulates the checksum given character C. During the 75 -- scanning of a source file, this routine is called with every character 76 -- in the source, excluding blanks, and all control characters (except 77 -- that ESC is included in the checksum). Upper case letters not in string 78 -- literals are folded by the caller. See Sinput spec for the documentation 79 -- of the checksum algorithm. Note: checksum values are only used if we 80 -- generate code, so it is not necessary to worry about making the right 81 -- sequence of calls in any error situation. 82 83 procedure Accumulate_Checksum (C : Char_Code); 84 pragma Inline (Accumulate_Checksum); 85 -- This version is identical, except that the argument, C, is a character 86 -- code value instead of a character. This is used when wide characters 87 -- are scanned. We use the character code rather than the ASCII characters 88 -- so that the checksum is independent of wide character encoding method. 89 90 procedure Initialize_Checksum; 91 pragma Inline (Initialize_Checksum); 92 -- Initialize checksum value 93 94 ------------------------- 95 -- Accumulate_Checksum -- 96 ------------------------- 97 98 procedure Accumulate_Checksum (C : Character) is 99 begin 100 System.CRC32.Update (System.CRC32.CRC32 (Checksum), C); 101 end Accumulate_Checksum; 102 103 procedure Accumulate_Checksum (C : Char_Code) is 104 begin 105 if C > 16#FFFF# then 106 Accumulate_Checksum (Character'Val (C / 2 ** 24)); 107 Accumulate_Checksum (Character'Val ((C / 2 ** 16) mod 256)); 108 Accumulate_Checksum (Character'Val ((C / 256) mod 256)); 109 else 110 Accumulate_Checksum (Character'Val (C / 256)); 111 end if; 112 113 Accumulate_Checksum (Character'Val (C mod 256)); 114 end Accumulate_Checksum; 115 116 ------------------------------- 117 -- Accumulate_Token_Checksum -- 118 ------------------------------- 119 120 procedure Accumulate_Token_Checksum is 121 begin 122 System.CRC32.Update 123 (System.CRC32.CRC32 (Checksum), 124 Character'Val (Token_Type'Pos (Token))); 125 end Accumulate_Token_Checksum; 126 127 ----------------------- 128 -- Check_End_Of_Line -- 129 ----------------------- 130 131 procedure Check_End_Of_Line is 132 Len : constant Int := 133 Int (Scan_Ptr) - 134 Int (Current_Line_Start) - 135 Wide_Char_Byte_Count; 136 137 -- Start of processing for Check_End_Of_Line 138 139 begin 140 if Style_Check then 141 Style.Check_Line_Terminator (Len); 142 end if; 143 144 -- Deal with checking maximum line length 145 146 if Style_Check and Style_Check_Max_Line_Length then 147 Style.Check_Line_Max_Length (Len); 148 149 -- If style checking is inactive, check maximum line length against 150 -- standard value. 151 152 elsif Len > Max_Line_Length then 153 Error_Msg 154 ("this line is too long", 155 Current_Line_Start + Source_Ptr (Max_Line_Length)); 156 end if; 157 158 -- Now one more checking circuit. Normally we are only enforcing a limit 159 -- of physical characters, with tabs counting as one character. But if 160 -- after tab expansion we would have a total line length that exceeded 161 -- 32766, that would really cause trouble, because column positions 162 -- would exceed the maximum we allow for a column count. Note: the limit 163 -- is 32766 rather than 32767, since we use a value of 32767 for special 164 -- purposes (see Sinput). Now we really do not want to go messing with 165 -- tabs in the normal case, so what we do is to check for a line that 166 -- has more than 4096 physical characters. Any shorter line could not 167 -- be a problem, even if it was all tabs. 168 169 if Len >= 4096 then 170 declare 171 Col : Natural; 172 Ptr : Source_Ptr; 173 174 begin 175 Col := 1; 176 Ptr := Current_Line_Start; 177 loop 178 exit when Ptr = Scan_Ptr; 179 180 if Source (Ptr) = ASCII.HT then 181 Col := (Col - 1 + 8) / 8 * 8 + 1; 182 else 183 Col := Col + 1; 184 end if; 185 186 if Col > 32766 then 187 Error_Msg 188 ("this line is longer than 32766 characters", 189 Current_Line_Start); 190 raise Unrecoverable_Error; 191 end if; 192 193 Ptr := Ptr + 1; 194 end loop; 195 end; 196 end if; 197 198 -- Reset wide character byte count for next line 199 200 Wide_Char_Byte_Count := 0; 201 end Check_End_Of_Line; 202 203 ---------------------------- 204 -- Determine_Token_Casing -- 205 ---------------------------- 206 207 function Determine_Token_Casing return Casing_Type is 208 begin 209 return Determine_Casing (Source (Token_Ptr .. Scan_Ptr - 1)); 210 end Determine_Token_Casing; 211 212 ------------------------- 213 -- Initialize_Checksum -- 214 ------------------------- 215 216 procedure Initialize_Checksum is 217 begin 218 System.CRC32.Initialize (System.CRC32.CRC32 (Checksum)); 219 end Initialize_Checksum; 220 221 ------------------------ 222 -- Initialize_Scanner -- 223 ------------------------ 224 225 procedure Initialize_Scanner (Index : Source_File_Index) is 226 begin 227 -- Establish reserved words 228 229 Scans.Initialize_Ada_Keywords; 230 231 -- Initialize scan control variables 232 233 Current_Source_File := Index; 234 Source := Source_Text (Current_Source_File); 235 Scan_Ptr := Source_First (Current_Source_File); 236 Token := No_Token; 237 Token_Ptr := Scan_Ptr; 238 Current_Line_Start := Scan_Ptr; 239 Token_Node := Empty; 240 Token_Name := No_Name; 241 Start_Column := Set_Start_Column; 242 First_Non_Blank_Location := Scan_Ptr; 243 244 Initialize_Checksum; 245 Wide_Char_Byte_Count := 0; 246 247 -- Do not call Scan, otherwise the License stuff does not work in Scn 248 249 end Initialize_Scanner; 250 251 ------------------------------ 252 -- Reset_Special_Characters -- 253 ------------------------------ 254 255 procedure Reset_Special_Characters is 256 begin 257 Special_Characters := (others => False); 258 end Reset_Special_Characters; 259 260 ---------- 261 -- Scan -- 262 ---------- 263 264 procedure Scan is 265 266 Start_Of_Comment : Source_Ptr; 267 -- Record start of comment position 268 269 Underline_Found : Boolean; 270 -- During scanning of an identifier, set to True if last character 271 -- scanned was an underline or other punctuation character. This 272 -- is used to flag the error of two underlines/punctuations in a 273 -- row or ending an identifier with a underline/punctuation. Here 274 -- punctuation means any UTF_32 character in the Unicode category 275 -- Punctuation,Connector. 276 277 Wptr : Source_Ptr; 278 -- Used to remember start of last wide character scanned 279 280 function Double_Char_Token (C : Character) return Boolean; 281 -- This function is used for double character tokens like := or <>. It 282 -- checks if the character following Source (Scan_Ptr) is C, and if so 283 -- bumps Scan_Ptr past the pair of characters and returns True. A space 284 -- between the two characters is also recognized with an appropriate 285 -- error message being issued. If C is not present, False is returned. 286 -- Note that Double_Char_Token can only be used for tokens defined in 287 -- the Ada syntax (it's use for error cases like && is not appropriate 288 -- since we do not want a junk message for a case like &-space-&). 289 290 procedure Error_Illegal_Character; 291 -- Give illegal character error, Scan_Ptr points to character. On 292 -- return, Scan_Ptr is bumped past the illegal character. 293 294 procedure Error_Illegal_Wide_Character; 295 -- Give illegal wide character message. On return, Scan_Ptr is bumped 296 -- past the illegal character, which may still leave us pointing to 297 -- junk, not much we can do if the escape sequence is messed up. 298 299 procedure Error_No_Double_Underline; 300 -- Signal error of two underline or punctuation characters in a row. 301 -- Called with Scan_Ptr pointing to second underline/punctuation char. 302 303 procedure Nlit; 304 -- This is the procedure for scanning out numeric literals. On entry, 305 -- Scan_Ptr points to the digit that starts the numeric literal (the 306 -- checksum for this character has not been accumulated yet). On return 307 -- Scan_Ptr points past the last character of the numeric literal, Token 308 -- and Token_Node are set appropriately, and the checksum is updated. 309 310 procedure Slit; 311 -- This is the procedure for scanning out string literals. On entry, 312 -- Scan_Ptr points to the opening string quote (the checksum for this 313 -- character has not been accumulated yet). On return Scan_Ptr points 314 -- past the closing quote of the string literal, Token and Token_Node 315 -- are set appropriately, and the checksum is updated. 316 317 procedure Skip_Other_Format_Characters; 318 -- Skips past any "other format" category characters at the current 319 -- cursor location (does not skip past spaces or any other characters). 320 321 function Start_Of_Wide_Character return Boolean; 322 -- Returns True if the scan pointer is pointing to the start of a wide 323 -- character sequence, does not modify the scan pointer in any case. 324 325 procedure Check_Bidi (Code : Char_Code); 326 -- Give a warning if Code is a bidirectional character, which can cause 327 -- security vulnerabilities. See the following article: 328 -- 329 -- @article{boucher_trojansource_2021, 330 -- title = {Trojan {Source}: {Invisible} {Vulnerabilities}}, 331 -- author = {Nicholas Boucher and Ross Anderson}, 332 -- year = {2021}, 333 -- journal = {Preprint}, 334 -- eprint = {2111.00169}, 335 -- archivePrefix = {arXiv}, 336 -- primaryClass = {cs.CR}, 337 -- url = {https://arxiv.org/abs/2111.00169} 338 -- } 339 340 ---------------- 341 -- Check_Bidi -- 342 ---------------- 343 344 type Bidi_Characters is 345 (LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI); 346 Bidi_Character_Codes : constant array (Bidi_Characters) of Char_Code := 347 (LRE => 16#202A#, 348 RLE => 16#202B#, 349 LRO => 16#202D#, 350 RLO => 16#202E#, 351 LRI => 16#2066#, 352 RLI => 16#2067#, 353 FSI => 16#2068#, 354 PDF => 16#202C#, 355 PDI => 16#2069#); 356 -- Above are the bidirectional characters, along with their Unicode code 357 -- points. 358 359 procedure Check_Bidi (Code : Char_Code) is 360 begin 361 for Bidi_Code of Bidi_Character_Codes loop 362 if Code = Bidi_Code then 363 Error_Msg ("??bidirectional wide character", Wptr); 364 end if; 365 end loop; 366 end Check_Bidi; 367 368 ----------------------- 369 -- Double_Char_Token -- 370 ----------------------- 371 372 function Double_Char_Token (C : Character) return Boolean is 373 begin 374 if Source (Scan_Ptr + 1) = C then 375 Accumulate_Checksum (C); 376 Scan_Ptr := Scan_Ptr + 2; 377 return True; 378 379 elsif Source (Scan_Ptr + 1) = ' ' 380 and then Source (Scan_Ptr + 2) = C 381 then 382 Scan_Ptr := Scan_Ptr + 1; 383 Error_Msg_S -- CODEFIX 384 ("no space allowed here"); 385 Scan_Ptr := Scan_Ptr + 2; 386 return True; 387 388 else 389 return False; 390 end if; 391 end Double_Char_Token; 392 393 ----------------------------- 394 -- Error_Illegal_Character -- 395 ----------------------------- 396 397 procedure Error_Illegal_Character is 398 begin 399 Error_Msg_S ("illegal character"); 400 Scan_Ptr := Scan_Ptr + 1; 401 end Error_Illegal_Character; 402 403 ---------------------------------- 404 -- Error_Illegal_Wide_Character -- 405 ---------------------------------- 406 407 procedure Error_Illegal_Wide_Character is 408 begin 409 Scan_Ptr := Scan_Ptr + 1; 410 Error_Msg ("illegal wide character", Wptr); 411 end Error_Illegal_Wide_Character; 412 413 ------------------------------- 414 -- Error_No_Double_Underline -- 415 ------------------------------- 416 417 procedure Error_No_Double_Underline is 418 begin 419 Underline_Found := False; 420 421 -- There are four cases, and we special case the messages 422 423 if Source (Scan_Ptr) = '_' then 424 if Source (Scan_Ptr - 1) = '_' then 425 Error_Msg_S -- CODEFIX 426 ("two consecutive underlines not permitted"); 427 else 428 Error_Msg_S ("underline cannot follow punctuation character"); 429 end if; 430 431 else 432 if Source (Scan_Ptr - 1) = '_' then 433 Error_Msg_S ("punctuation character cannot follow underline"); 434 else 435 Error_Msg_S 436 ("two consecutive punctuation characters not permitted"); 437 end if; 438 end if; 439 end Error_No_Double_Underline; 440 441 ---------- 442 -- Nlit -- 443 ---------- 444 445 procedure Nlit is 446 447 C : Character; 448 -- Current source program character 449 450 Base_Char : Character; 451 -- Either # or : (character at start of based number) 452 453 Base : Int; 454 -- Value of base 455 456 UI_Base : Uint; 457 -- Value of base in Uint format 458 459 UI_Int_Value : Uint; 460 -- Value of integer scanned by Scan_Integer in Uint format 461 462 UI_Num_Value : Uint; 463 -- Value of integer in numeric value being scanned 464 465 Scale : Int; 466 -- Scale value for real literal 467 468 UI_Scale : Uint; 469 -- Scale in Uint format 470 471 Exponent_Is_Negative : Boolean; 472 -- Set true for negative exponent 473 474 Extended_Digit_Value : Int; 475 -- Extended digit value 476 477 Point_Scanned : Boolean; 478 -- Flag for decimal point scanned in numeric literal 479 480 ----------------------- 481 -- Local Subprograms -- 482 ----------------------- 483 484 procedure Error_Digit_Expected; 485 -- Signal error of bad digit, Scan_Ptr points to the location at 486 -- which the digit was expected on input, and is unchanged on return. 487 488 procedure Scan_Integer; 489 -- Scan integer literal. On entry, Scan_Ptr points to a digit, on 490 -- exit Scan_Ptr points past the last character of the integer. 491 -- 492 -- For each digit encountered, UI_Int_Value is multiplied by 10, and 493 -- the value of the digit added to the result. In addition, the value 494 -- in Scale is decremented by one for each actual digit scanned. 495 496 -------------------------- 497 -- Error_Digit_Expected -- 498 -------------------------- 499 500 procedure Error_Digit_Expected is 501 begin 502 Error_Msg_S ("digit expected"); 503 end Error_Digit_Expected; 504 505 ------------------ 506 -- Scan_Integer -- 507 ------------------ 508 509 procedure Scan_Integer is 510 C : Character; 511 -- Next character scanned 512 513 begin 514 C := Source (Scan_Ptr); 515 516 -- Loop through digits (allowing underlines) 517 518 loop 519 Accumulate_Checksum (C); 520 UI_Int_Value := 521 UI_Int_Value * 10 + (Character'Pos (C) - Character'Pos ('0')); 522 Scan_Ptr := Scan_Ptr + 1; 523 Scale := Scale - 1; 524 C := Source (Scan_Ptr); 525 526 -- Case of underline encountered 527 528 if C = '_' then 529 530 -- We do not accumulate the '_' in the checksum, so that 531 -- 1_234 is equivalent to 1234, and does not trigger 532 -- compilation for "minimal recompilation" (gnatmake -m). 533 534 loop 535 Scan_Ptr := Scan_Ptr + 1; 536 C := Source (Scan_Ptr); 537 exit when C /= '_'; 538 Error_No_Double_Underline; 539 end loop; 540 541 if C not in '0' .. '9' then 542 Error_Digit_Expected; 543 exit; 544 end if; 545 546 else 547 exit when C not in '0' .. '9'; 548 end if; 549 end loop; 550 end Scan_Integer; 551 552 -- Start of processing for Nlit 553 554 begin 555 Base := 10; 556 UI_Base := Uint_10; 557 UI_Int_Value := Uint_0; 558 Based_Literal_Uses_Colon := False; 559 Scale := 0; 560 Scan_Integer; 561 Point_Scanned := False; 562 UI_Num_Value := UI_Int_Value; 563 564 -- Various possibilities now for continuing the literal are period, 565 -- E/e (for exponent), or :/# (for based literal). 566 567 Scale := 0; 568 C := Source (Scan_Ptr); 569 570 if C = '.' then 571 572 -- Scan out point, but do not scan past .. which is a range 573 -- sequence, and must not be eaten up scanning a numeric literal. 574 575 while C = '.' and then Source (Scan_Ptr + 1) /= '.' loop 576 Accumulate_Checksum ('.'); 577 578 if Point_Scanned then 579 Error_Msg_S ("duplicate point ignored"); 580 end if; 581 582 Point_Scanned := True; 583 Scan_Ptr := Scan_Ptr + 1; 584 C := Source (Scan_Ptr); 585 586 if C not in '0' .. '9' then 587 Error_Msg 588 ("real literal cannot end with point", Scan_Ptr - 1); 589 else 590 Scan_Integer; 591 UI_Num_Value := UI_Int_Value; 592 end if; 593 end loop; 594 595 -- Based literal case. The base is the value we already scanned. 596 -- In the case of colon, we insist that the following character 597 -- is indeed an extended digit or a period. This catches a number 598 -- of common errors, as well as catching the well known tricky 599 -- bug otherwise arising from "x : integer range 1 .. 10:= 6;" 600 601 elsif C = '#' 602 or else (C = ':' and then 603 (Source (Scan_Ptr + 1) = '.' 604 or else 605 Source (Scan_Ptr + 1) in '0' .. '9' 606 or else 607 Source (Scan_Ptr + 1) in 'A' .. 'Z' 608 or else 609 Source (Scan_Ptr + 1) in 'a' .. 'z')) 610 then 611 Accumulate_Checksum (C); 612 Base_Char := C; 613 UI_Base := UI_Int_Value; 614 615 if Base_Char = ':' then 616 Based_Literal_Uses_Colon := True; 617 end if; 618 619 if UI_Base < 2 or else UI_Base > 16 then 620 Error_Msg_SC ("base not 2-16"); 621 UI_Base := Uint_16; 622 end if; 623 624 Base := UI_To_Int (UI_Base); 625 Scan_Ptr := Scan_Ptr + 1; 626 627 -- Scan out extended integer [. integer] 628 629 C := Source (Scan_Ptr); 630 UI_Int_Value := Uint_0; 631 Scale := 0; 632 633 loop 634 if C in '0' .. '9' then 635 Accumulate_Checksum (C); 636 Extended_Digit_Value := 637 Int'(Character'Pos (C)) - Int'(Character'Pos ('0')); 638 639 elsif C in 'A' .. 'F' then 640 Accumulate_Checksum (Character'Val (Character'Pos (C) + 32)); 641 Extended_Digit_Value := 642 Int'(Character'Pos (C)) - Int'(Character'Pos ('A')) + 10; 643 644 elsif C in 'a' .. 'f' then 645 Accumulate_Checksum (C); 646 Extended_Digit_Value := 647 Int'(Character'Pos (C)) - Int'(Character'Pos ('a')) + 10; 648 649 else 650 Error_Msg_S ("extended digit expected"); 651 exit; 652 end if; 653 654 if Extended_Digit_Value >= Base then 655 Error_Msg_S ("digit '>= base"); 656 end if; 657 658 UI_Int_Value := UI_Int_Value * UI_Base + Extended_Digit_Value; 659 Scale := Scale - 1; 660 Scan_Ptr := Scan_Ptr + 1; 661 C := Source (Scan_Ptr); 662 663 if C = '_' then 664 loop 665 Accumulate_Checksum ('_'); 666 Scan_Ptr := Scan_Ptr + 1; 667 C := Source (Scan_Ptr); 668 exit when C /= '_'; 669 Error_No_Double_Underline; 670 end loop; 671 672 elsif C = '.' then 673 Accumulate_Checksum ('.'); 674 675 if Point_Scanned then 676 Error_Msg_S ("duplicate point ignored"); 677 end if; 678 679 Scan_Ptr := Scan_Ptr + 1; 680 C := Source (Scan_Ptr); 681 Point_Scanned := True; 682 Scale := 0; 683 684 elsif C = Base_Char then 685 Accumulate_Checksum (C); 686 Scan_Ptr := Scan_Ptr + 1; 687 exit; 688 689 elsif C = '#' or else C = ':' then 690 Error_Msg_S ("based number delimiters must match"); 691 Scan_Ptr := Scan_Ptr + 1; 692 exit; 693 694 elsif not Identifier_Char (C) then 695 if Base_Char = '#' then 696 Error_Msg_S -- CODEFIX 697 ("missing '#"); 698 else 699 Error_Msg_S -- CODEFIX 700 ("missing ':"); 701 end if; 702 703 exit; 704 end if; 705 706 end loop; 707 708 UI_Num_Value := UI_Int_Value; 709 end if; 710 711 -- Scan out exponent 712 713 if not Point_Scanned then 714 Scale := 0; 715 UI_Scale := Uint_0; 716 else 717 UI_Scale := UI_From_Int (Scale); 718 end if; 719 720 if Source (Scan_Ptr) = 'e' or else Source (Scan_Ptr) = 'E' then 721 Accumulate_Checksum ('e'); 722 Scan_Ptr := Scan_Ptr + 1; 723 Exponent_Is_Negative := False; 724 725 if Source (Scan_Ptr) = '+' then 726 Accumulate_Checksum ('+'); 727 Scan_Ptr := Scan_Ptr + 1; 728 729 elsif Source (Scan_Ptr) = '-' then 730 Accumulate_Checksum ('-'); 731 732 if not Point_Scanned then 733 Error_Msg_S 734 ("negative exponent not allowed for integer literal"); 735 else 736 Exponent_Is_Negative := True; 737 end if; 738 739 Scan_Ptr := Scan_Ptr + 1; 740 end if; 741 742 UI_Int_Value := Uint_0; 743 744 if Source (Scan_Ptr) in '0' .. '9' then 745 Scan_Integer; 746 else 747 Error_Digit_Expected; 748 end if; 749 750 if Exponent_Is_Negative then 751 UI_Scale := UI_Scale - UI_Int_Value; 752 else 753 UI_Scale := UI_Scale + UI_Int_Value; 754 end if; 755 end if; 756 757 -- Case of real literal to be returned 758 759 if Point_Scanned then 760 Token := Tok_Real_Literal; 761 Real_Literal_Value := 762 UR_From_Components ( 763 Num => UI_Num_Value, 764 Den => -UI_Scale, 765 Rbase => Base); 766 767 -- Case of integer literal to be returned 768 769 else 770 Token := Tok_Integer_Literal; 771 772 if UI_Scale = 0 then 773 Int_Literal_Value := UI_Num_Value; 774 775 -- Avoid doing possibly expensive calculations in cases like 776 -- parsing 163E800_000# when semantics will not be done anyway. 777 -- This is especially useful when parsing garbled input. 778 779 elsif Operating_Mode /= Check_Syntax 780 and then (Serious_Errors_Detected = 0 or else Try_Semantics) 781 then 782 Int_Literal_Value := UI_Num_Value * UI_Base ** UI_Scale; 783 784 else 785 Int_Literal_Value := No_Uint; 786 end if; 787 end if; 788 789 Accumulate_Token_Checksum; 790 end Nlit; 791 792 ---------- 793 -- Slit -- 794 ---------- 795 796 procedure Slit is 797 798 Delimiter : Character; 799 -- Delimiter (first character of string) 800 801 C : Character; 802 -- Current source program character 803 804 Code : Char_Code; 805 -- Current character code value 806 807 Err : Boolean; 808 -- Error flag for Scan_Wide call 809 810 String_Start : Source_Ptr; 811 -- Point to first character of string 812 813 procedure Error_Bad_String_Char; 814 -- Signal bad character in string/character literal. On entry 815 -- Scan_Ptr points to the improper character encountered during the 816 -- scan. Scan_Ptr is not modified, so it still points to the bad 817 -- character on return. 818 819 procedure Error_Unterminated_String; 820 -- Procedure called if a line terminator character is encountered 821 -- during scanning a string, meaning that the string is not properly 822 -- terminated. 823 824 procedure Set_String; 825 -- Procedure used to distinguish between string and operator symbol. 826 -- On entry the string has been scanned out, and its characters start 827 -- at Token_Ptr and end one character before Scan_Ptr. On exit Token 828 -- is set to Tok_String_Literal/Tok_Operator_Symbol as appropriate, 829 -- and Token_Node is appropriately initialized. In addition, in the 830 -- operator symbol case, Token_Name is appropriately set, and the 831 -- flags [Wide_]Wide_Character_Found are set appropriately. 832 833 --------------------------- 834 -- Error_Bad_String_Char -- 835 --------------------------- 836 837 procedure Error_Bad_String_Char is 838 C : constant Character := Source (Scan_Ptr); 839 840 begin 841 if C = HT then 842 Error_Msg_S ("horizontal tab not allowed in string"); 843 844 elsif C = VT or else C = FF then 845 Error_Msg_S ("format effector not allowed in string"); 846 847 elsif C in Upper_Half_Character then 848 Error_Msg_S ("(Ada 83) upper half character not allowed"); 849 850 else 851 Error_Msg_S ("control character not allowed in string"); 852 end if; 853 end Error_Bad_String_Char; 854 855 ------------------------------- 856 -- Error_Unterminated_String -- 857 ------------------------------- 858 859 procedure Error_Unterminated_String is 860 S : Source_Ptr; 861 862 begin 863 -- An interesting little refinement. Consider the following 864 -- examples: 865 866 -- A := "this is an unterminated string; 867 -- A := "this is an unterminated string & 868 -- P(A, "this is a parameter that didn't get terminated); 869 -- P("this is a parameter that didn't get terminated, A); 870 871 -- We fiddle a little to do slightly better placement in these 872 -- cases also if there is white space at the end of the line we 873 -- place the flag at the start of this white space, not at the 874 -- end. Note that we only have to test for blanks, since tabs 875 -- aren't allowed in strings in the first place and would have 876 -- caused an error message. 877 878 -- Two more cases that we treat specially are: 879 880 -- A := "this string uses the wrong terminator' 881 -- A := "this string uses the wrong terminator' & 882 883 -- In these cases we give a different error message as well 884 885 -- We actually reposition the scan pointer to the point where we 886 -- place the flag in these cases, since it seems a better bet on 887 -- the original intention. 888 889 while Source (Scan_Ptr - 1) = ' ' 890 or else Source (Scan_Ptr - 1) = '&' 891 loop 892 Scan_Ptr := Scan_Ptr - 1; 893 Unstore_String_Char; 894 end loop; 895 896 -- Check for case of incorrect string terminator, but single quote 897 -- is not considered incorrect if the opening terminator misused 898 -- a single quote (error message already given). 899 900 if Delimiter /= ''' 901 and then Source (Scan_Ptr - 1) = ''' 902 then 903 Unstore_String_Char; 904 Error_Msg 905 ("incorrect string terminator character", Scan_Ptr - 1); 906 return; 907 end if; 908 909 -- Backup over semicolon or right-paren/semicolon sequence 910 911 if Source (Scan_Ptr - 1) = ';' then 912 Scan_Ptr := Scan_Ptr - 1; 913 Unstore_String_Char; 914 915 if Source (Scan_Ptr - 1) = ')' then 916 Scan_Ptr := Scan_Ptr - 1; 917 Unstore_String_Char; 918 end if; 919 end if; 920 921 -- See if there is a comma in the string, if so, guess that 922 -- the first comma terminates the string. 923 924 S := String_Start; 925 while S < Scan_Ptr loop 926 if Source (S) = ',' then 927 while Scan_Ptr > S loop 928 Scan_Ptr := Scan_Ptr - 1; 929 Unstore_String_Char; 930 end loop; 931 932 exit; 933 end if; 934 935 S := S + 1; 936 end loop; 937 938 -- Now we have adjusted the scan pointer, give message 939 940 Error_Msg_S -- CODEFIX 941 ("missing string quote"); 942 end Error_Unterminated_String; 943 944 ---------------- 945 -- Set_String -- 946 ---------------- 947 948 procedure Set_String is 949 Slen : constant Int := Int (Scan_Ptr - Token_Ptr - 2); 950 C1 : Character; 951 C2 : Character; 952 C3 : Character; 953 954 begin 955 -- Token_Name is currently set to Error_Name. The following 956 -- section of code resets Token_Name to the proper Name_Op_xx 957 -- value if the string is a valid operator symbol, otherwise it is 958 -- left set to Error_Name. 959 960 if Slen = 1 then 961 C1 := Source (Token_Ptr + 1); 962 963 case C1 is 964 when '=' => 965 Token_Name := Name_Op_Eq; 966 967 when '>' => 968 Token_Name := Name_Op_Gt; 969 970 when '<' => 971 Token_Name := Name_Op_Lt; 972 973 when '+' => 974 Token_Name := Name_Op_Add; 975 976 when '-' => 977 Token_Name := Name_Op_Subtract; 978 979 when '&' => 980 Token_Name := Name_Op_Concat; 981 982 when '*' => 983 Token_Name := Name_Op_Multiply; 984 985 when '/' => 986 Token_Name := Name_Op_Divide; 987 988 when others => 989 null; 990 end case; 991 992 elsif Slen = 2 then 993 C1 := Source (Token_Ptr + 1); 994 C2 := Source (Token_Ptr + 2); 995 996 if C1 = '*' and then C2 = '*' then 997 Token_Name := Name_Op_Expon; 998 999 elsif C2 = '=' then 1000 1001 if C1 = '/' then 1002 Token_Name := Name_Op_Ne; 1003 elsif C1 = '<' then 1004 Token_Name := Name_Op_Le; 1005 elsif C1 = '>' then 1006 Token_Name := Name_Op_Ge; 1007 end if; 1008 1009 elsif (C1 = 'O' or else C1 = 'o') and then -- OR 1010 (C2 = 'R' or else C2 = 'r') 1011 then 1012 Token_Name := Name_Op_Or; 1013 end if; 1014 1015 elsif Slen = 3 then 1016 C1 := Source (Token_Ptr + 1); 1017 C2 := Source (Token_Ptr + 2); 1018 C3 := Source (Token_Ptr + 3); 1019 1020 if (C1 = 'A' or else C1 = 'a') and then -- AND 1021 (C2 = 'N' or else C2 = 'n') and then 1022 (C3 = 'D' or else C3 = 'd') 1023 then 1024 Token_Name := Name_Op_And; 1025 1026 elsif (C1 = 'A' or else C1 = 'a') and then -- ABS 1027 (C2 = 'B' or else C2 = 'b') and then 1028 (C3 = 'S' or else C3 = 's') 1029 then 1030 Token_Name := Name_Op_Abs; 1031 1032 elsif (C1 = 'M' or else C1 = 'm') and then -- MOD 1033 (C2 = 'O' or else C2 = 'o') and then 1034 (C3 = 'D' or else C3 = 'd') 1035 then 1036 Token_Name := Name_Op_Mod; 1037 1038 elsif (C1 = 'N' or else C1 = 'n') and then -- NOT 1039 (C2 = 'O' or else C2 = 'o') and then 1040 (C3 = 'T' or else C3 = 't') 1041 then 1042 Token_Name := Name_Op_Not; 1043 1044 elsif (C1 = 'R' or else C1 = 'r') and then -- REM 1045 (C2 = 'E' or else C2 = 'e') and then 1046 (C3 = 'M' or else C3 = 'm') 1047 then 1048 Token_Name := Name_Op_Rem; 1049 1050 elsif (C1 = 'X' or else C1 = 'x') and then -- XOR 1051 (C2 = 'O' or else C2 = 'o') and then 1052 (C3 = 'R' or else C3 = 'r') 1053 then 1054 Token_Name := Name_Op_Xor; 1055 end if; 1056 1057 end if; 1058 1059 -- If it is an operator symbol, then Token_Name is set. If it is 1060 -- some other string value, then Token_Name still contains 1061 -- Error_Name. 1062 1063 if Token_Name = Error_Name then 1064 Token := Tok_String_Literal; 1065 1066 else 1067 Token := Tok_Operator_Symbol; 1068 end if; 1069 end Set_String; 1070 1071 -- Start of processing for Slit 1072 1073 begin 1074 -- On entry, Scan_Ptr points to the opening character of the string 1075 -- which is either a percent, double quote, or apostrophe (single 1076 -- quote). The latter case is an error detected by the character 1077 -- literal circuit. 1078 1079 String_Start := Scan_Ptr; 1080 1081 Delimiter := Source (Scan_Ptr); 1082 Accumulate_Checksum (Delimiter); 1083 1084 Start_String; 1085 Wide_Character_Found := False; 1086 Wide_Wide_Character_Found := False; 1087 Scan_Ptr := Scan_Ptr + 1; 1088 1089 -- Loop to scan out characters of string literal 1090 1091 loop 1092 C := Source (Scan_Ptr); 1093 1094 if C = Delimiter then 1095 Accumulate_Checksum (C); 1096 Scan_Ptr := Scan_Ptr + 1; 1097 exit when Source (Scan_Ptr) /= Delimiter; 1098 Code := Get_Char_Code (C); 1099 Accumulate_Checksum (C); 1100 Scan_Ptr := Scan_Ptr + 1; 1101 1102 else 1103 if C = '"' and then Delimiter = '%' then 1104 Error_Msg_S 1105 ("quote not allowed in percent delimited string"); 1106 Code := Get_Char_Code (C); 1107 Scan_Ptr := Scan_Ptr + 1; 1108 1109 elsif Start_Of_Wide_Character then 1110 Wptr := Scan_Ptr; 1111 Scan_Wide (Source, Scan_Ptr, Code, Err); 1112 1113 if Err then 1114 Error_Illegal_Wide_Character; 1115 Code := Get_Char_Code (' '); 1116 else 1117 Check_Bidi (Code); 1118 end if; 1119 1120 Accumulate_Checksum (Code); 1121 1122 -- In Ada 95 mode we allow any wide characters in a string 1123 -- but in Ada 2005, the set of characters allowed has been 1124 -- restricted to graphic characters. 1125 1126 if Ada_Version >= Ada_2005 1127 and then Is_UTF_32_Non_Graphic (UTF_32 (Code)) 1128 then 1129 Error_Msg 1130 ("(Ada 2005) non-graphic character not permitted " & 1131 "in string literal", Wptr); 1132 end if; 1133 1134 else 1135 Accumulate_Checksum (C); 1136 1137 if C not in Graphic_Character then 1138 if C in Line_Terminator then 1139 Error_Unterminated_String; 1140 exit; 1141 1142 elsif C in Upper_Half_Character then 1143 if Ada_Version = Ada_83 then 1144 Error_Bad_String_Char; 1145 end if; 1146 1147 else 1148 Error_Bad_String_Char; 1149 end if; 1150 end if; 1151 1152 Code := Get_Char_Code (C); 1153 Scan_Ptr := Scan_Ptr + 1; 1154 end if; 1155 end if; 1156 1157 Store_String_Char (Code); 1158 1159 if not In_Character_Range (Code) then 1160 if In_Wide_Character_Range (Code) then 1161 Wide_Character_Found := True; 1162 else 1163 Wide_Wide_Character_Found := True; 1164 end if; 1165 end if; 1166 end loop; 1167 1168 String_Literal_Id := End_String; 1169 Set_String; 1170 return; 1171 end Slit; 1172 1173 ---------------------------------- 1174 -- Skip_Other_Format_Characters -- 1175 ---------------------------------- 1176 1177 procedure Skip_Other_Format_Characters is 1178 P : Source_Ptr; 1179 Code : Char_Code; 1180 Err : Boolean; 1181 1182 begin 1183 while Start_Of_Wide_Character loop 1184 P := Scan_Ptr; 1185 Scan_Wide (Source, Scan_Ptr, Code, Err); 1186 1187 if not Is_UTF_32_Other (UTF_32 (Code)) then 1188 Scan_Ptr := P; 1189 return; 1190 end if; 1191 end loop; 1192 end Skip_Other_Format_Characters; 1193 1194 ----------------------------- 1195 -- Start_Of_Wide_Character -- 1196 ----------------------------- 1197 1198 function Start_Of_Wide_Character return Boolean is 1199 C : constant Character := Source (Scan_Ptr); 1200 1201 begin 1202 -- ESC encoding method with ESC present 1203 1204 if C = ESC 1205 and then Wide_Character_Encoding_Method in WC_ESC_Encoding_Method 1206 then 1207 return True; 1208 1209 -- Upper half character with upper half encoding 1210 1211 elsif C in Upper_Half_Character and then Upper_Half_Encoding then 1212 return True; 1213 1214 -- Brackets encoding 1215 1216 elsif C = '[' 1217 and then Source (Scan_Ptr + 1) = '"' 1218 and then Identifier_Char (Source (Scan_Ptr + 2)) 1219 then 1220 return True; 1221 1222 -- Not the start of a wide character 1223 1224 else 1225 return False; 1226 end if; 1227 end Start_Of_Wide_Character; 1228 1229 Token_Contains_Uppercase : Boolean; 1230 1231 -- Start of processing for Scan 1232 1233 begin 1234 Prev_Token := Token; 1235 Prev_Token_Ptr := Token_Ptr; 1236 Token_Name := Error_Name; 1237 1238 -- The following loop runs more than once only if a format effector 1239 -- (tab, vertical tab, form feed, line feed, carriage return) is 1240 -- encountered and skipped, or some error situation, such as an 1241 -- illegal character, is encountered. 1242 1243 <<Scan_Next_Character>> 1244 1245 loop 1246 -- Skip past blanks, loop is opened up for speed 1247 1248 while Source (Scan_Ptr) = ' ' loop 1249 if Source (Scan_Ptr + 1) /= ' ' then 1250 Scan_Ptr := Scan_Ptr + 1; 1251 exit; 1252 end if; 1253 1254 if Source (Scan_Ptr + 2) /= ' ' then 1255 Scan_Ptr := Scan_Ptr + 2; 1256 exit; 1257 end if; 1258 1259 if Source (Scan_Ptr + 3) /= ' ' then 1260 Scan_Ptr := Scan_Ptr + 3; 1261 exit; 1262 end if; 1263 1264 if Source (Scan_Ptr + 4) /= ' ' then 1265 Scan_Ptr := Scan_Ptr + 4; 1266 exit; 1267 end if; 1268 1269 if Source (Scan_Ptr + 5) /= ' ' then 1270 Scan_Ptr := Scan_Ptr + 5; 1271 exit; 1272 end if; 1273 1274 if Source (Scan_Ptr + 6) /= ' ' then 1275 Scan_Ptr := Scan_Ptr + 6; 1276 exit; 1277 end if; 1278 1279 if Source (Scan_Ptr + 7) /= ' ' then 1280 Scan_Ptr := Scan_Ptr + 7; 1281 exit; 1282 end if; 1283 1284 Scan_Ptr := Scan_Ptr + 8; 1285 end loop; 1286 1287 -- We are now at a non-blank character, which is the first character 1288 -- of the token we will scan, and hence the value of Token_Ptr. 1289 1290 Token_Ptr := Scan_Ptr; 1291 1292 Token_Contains_Uppercase := False; 1293 1294 -- Here begins the main case statement which transfers control on the 1295 -- basis of the non-blank character we have encountered. 1296 1297 case Source (Scan_Ptr) is 1298 1299 -- Line terminator characters 1300 1301 when CR | LF | FF | VT => 1302 goto Scan_Line_Terminator; 1303 1304 -- Horizontal tab, just skip past it 1305 1306 when HT => 1307 if Style_Check then 1308 Style.Check_HT; 1309 end if; 1310 1311 Scan_Ptr := Scan_Ptr + 1; 1312 1313 -- End of file character, treated as an end of file only if it is 1314 -- the last character in the buffer, otherwise it is ignored. 1315 1316 when EOF => 1317 if Scan_Ptr = Source_Last (Current_Source_File) then 1318 Check_End_Of_Line; 1319 1320 if Style_Check then 1321 Style.Check_EOF; 1322 end if; 1323 1324 Token := Tok_EOF; 1325 return; 1326 else 1327 Scan_Ptr := Scan_Ptr + 1; 1328 end if; 1329 1330 -- Ampersand 1331 1332 when '&' => 1333 Accumulate_Checksum ('&'); 1334 1335 if Source (Scan_Ptr + 1) = '&' then 1336 Error_Msg_S -- CODEFIX 1337 ("'&'& should be `AND THEN`"); 1338 Scan_Ptr := Scan_Ptr + 2; 1339 Token := Tok_And; 1340 return; 1341 1342 else 1343 Scan_Ptr := Scan_Ptr + 1; 1344 Token := Tok_Ampersand; 1345 return; 1346 end if; 1347 1348 -- AI12-0125-03 : @ is target_name 1349 1350 when '@' => 1351 Error_Msg_Ada_2022_Feature ("target name", Token_Ptr); 1352 1353 Accumulate_Checksum ('@'); 1354 Scan_Ptr := Scan_Ptr + 1; 1355 Token := Tok_At_Sign; 1356 return; 1357 1358 -- Asterisk (can be multiplication operator or double asterisk which 1359 -- is the exponentiation compound delimiter). 1360 1361 when '*' => 1362 Accumulate_Checksum ('*'); 1363 1364 if Source (Scan_Ptr + 1) = '*' then 1365 Accumulate_Checksum ('*'); 1366 Scan_Ptr := Scan_Ptr + 2; 1367 Token := Tok_Double_Asterisk; 1368 return; 1369 1370 else 1371 Scan_Ptr := Scan_Ptr + 1; 1372 Token := Tok_Asterisk; 1373 return; 1374 end if; 1375 1376 -- Colon, which can either be an isolated colon, or part of an 1377 -- assignment compound delimiter. 1378 1379 when ':' => 1380 Accumulate_Checksum (':'); 1381 1382 if Double_Char_Token ('=') then 1383 Token := Tok_Colon_Equal; 1384 1385 if Style_Check then 1386 Style.Check_Colon_Equal; 1387 end if; 1388 1389 return; 1390 1391 elsif Source (Scan_Ptr + 1) = '-' 1392 and then Source (Scan_Ptr + 2) /= '-' 1393 then 1394 Token := Tok_Colon_Equal; 1395 Error_Msg -- CODEFIX 1396 (":- should be :=", Scan_Ptr); 1397 Scan_Ptr := Scan_Ptr + 2; 1398 return; 1399 1400 else 1401 Scan_Ptr := Scan_Ptr + 1; 1402 Token := Tok_Colon; 1403 1404 if Style_Check then 1405 Style.Check_Colon; 1406 end if; 1407 1408 return; 1409 end if; 1410 1411 -- Left parenthesis 1412 1413 when '(' => 1414 Accumulate_Checksum ('('); 1415 Scan_Ptr := Scan_Ptr + 1; 1416 Token := Tok_Left_Paren; 1417 1418 if Style_Check then 1419 Style.Check_Left_Paren; 1420 end if; 1421 1422 return; 1423 1424 -- Left bracket 1425 1426 when '[' => 1427 1428 -- [] under -gnat2022 is an aggregate notation and the special 1429 -- wide character notation becomes unsupported since the two 1430 -- are ambiguous. 1431 1432 if Ada_Version >= Ada_2022 then 1433 Scan_Ptr := Scan_Ptr + 1; 1434 Token := Tok_Left_Bracket; 1435 return; 1436 1437 elsif Source (Scan_Ptr + 1) = '"' then 1438 goto Scan_Wide_Character; 1439 1440 else 1441 Error_Msg_S ("illegal character, replaced by ""("""); 1442 Scan_Ptr := Scan_Ptr + 1; 1443 Token := Tok_Left_Paren; 1444 return; 1445 end if; 1446 1447 -- Left brace 1448 1449 when '{' => 1450 Error_Msg_S ("illegal character, replaced by ""("""); 1451 Scan_Ptr := Scan_Ptr + 1; 1452 Token := Tok_Left_Paren; 1453 return; 1454 1455 -- Comma 1456 1457 when ',' => 1458 Accumulate_Checksum (','); 1459 Scan_Ptr := Scan_Ptr + 1; 1460 Token := Tok_Comma; 1461 1462 if Style_Check then 1463 Style.Check_Comma; 1464 end if; 1465 1466 return; 1467 1468 -- Dot, which is either an isolated period, or part of a double dot 1469 -- compound delimiter sequence. We also check for the case of a 1470 -- digit following the period, to give a better error message. 1471 1472 when '.' => 1473 Accumulate_Checksum ('.'); 1474 1475 if Double_Char_Token ('.') then 1476 Token := Tok_Dot_Dot; 1477 1478 if Style_Check then 1479 Style.Check_Dot_Dot; 1480 end if; 1481 1482 return; 1483 1484 elsif Source (Scan_Ptr + 1) in '0' .. '9' then 1485 Error_Msg_S ("numeric literal cannot start with point"); 1486 Scan_Ptr := Scan_Ptr + 1; 1487 1488 else 1489 Scan_Ptr := Scan_Ptr + 1; 1490 Token := Tok_Dot; 1491 return; 1492 end if; 1493 1494 -- Equal, which can either be an equality operator, or part of the 1495 -- arrow (=>) compound delimiter. 1496 1497 when '=' => 1498 Accumulate_Checksum ('='); 1499 1500 if Double_Char_Token ('>') then 1501 Token := Tok_Arrow; 1502 1503 if Style_Check then 1504 Style.Check_Arrow (Inside_Depends); 1505 end if; 1506 1507 return; 1508 1509 elsif Source (Scan_Ptr + 1) = '=' then 1510 Error_Msg_S -- CODEFIX 1511 ("== should be ="); 1512 Scan_Ptr := Scan_Ptr + 1; 1513 end if; 1514 1515 Scan_Ptr := Scan_Ptr + 1; 1516 Token := Tok_Equal; 1517 return; 1518 1519 -- Greater than, which can be a greater than operator, greater than 1520 -- or equal operator, or first character of a right label bracket. 1521 1522 when '>' => 1523 Accumulate_Checksum ('>'); 1524 1525 if Double_Char_Token ('=') then 1526 Token := Tok_Greater_Equal; 1527 return; 1528 1529 elsif Double_Char_Token ('>') then 1530 Token := Tok_Greater_Greater; 1531 return; 1532 1533 else 1534 Scan_Ptr := Scan_Ptr + 1; 1535 Token := Tok_Greater; 1536 return; 1537 end if; 1538 1539 -- Less than, which can be a less than operator, less than or equal 1540 -- operator, or the first character of a left label bracket, or the 1541 -- first character of a box (<>) compound delimiter. 1542 1543 when '<' => 1544 Accumulate_Checksum ('<'); 1545 1546 if Double_Char_Token ('=') then 1547 Token := Tok_Less_Equal; 1548 return; 1549 1550 elsif Double_Char_Token ('>') then 1551 Token := Tok_Box; 1552 1553 if Style_Check then 1554 Style.Check_Box; 1555 end if; 1556 1557 return; 1558 1559 elsif Double_Char_Token ('<') then 1560 Token := Tok_Less_Less; 1561 return; 1562 1563 else 1564 Scan_Ptr := Scan_Ptr + 1; 1565 Token := Tok_Less; 1566 return; 1567 end if; 1568 1569 -- Minus, which is either a subtraction operator, or the first 1570 -- character of double minus starting a comment 1571 1572 when '-' => Minus_Case : begin 1573 if Source (Scan_Ptr + 1) = '>' then 1574 Error_Msg_S ("invalid token"); 1575 Scan_Ptr := Scan_Ptr + 2; 1576 Token := Tok_Arrow; 1577 return; 1578 1579 elsif Source (Scan_Ptr + 1) /= '-' then 1580 Accumulate_Checksum ('-'); 1581 Scan_Ptr := Scan_Ptr + 1; 1582 Token := Tok_Minus; 1583 return; 1584 1585 -- Comment 1586 1587 else -- Source (Scan_Ptr + 1) = '-' then 1588 if Style_Check then 1589 Style.Check_Comment; 1590 end if; 1591 1592 Scan_Ptr := Scan_Ptr + 2; 1593 1594 -- If we are in preprocessor mode with Replace_In_Comments set, 1595 -- then we return the "--" as a token on its own. 1596 1597 if Replace_In_Comments then 1598 Token := Tok_Comment; 1599 return; 1600 end if; 1601 1602 -- Otherwise scan out the comment 1603 1604 Start_Of_Comment := Scan_Ptr; 1605 1606 -- Loop to scan comment (this loop runs more than once only if 1607 -- a horizontal tab or other non-graphic character is scanned) 1608 1609 loop 1610 -- Scan to non graphic character (opened up for speed) 1611 1612 -- Note that we just eat left brackets, which means that 1613 -- bracket notation cannot be used for end of line 1614 -- characters in comments. This seems a reasonable choice, 1615 -- since no one would ever use brackets notation in a real 1616 -- program in this situation, and if we allow brackets 1617 -- notation, we forbid some valid comments which contain a 1618 -- brackets sequence that happens to match an end of line 1619 -- character. 1620 1621 loop 1622 exit when Source (Scan_Ptr) not in Graphic_Character; 1623 Scan_Ptr := Scan_Ptr + 1; 1624 exit when Source (Scan_Ptr) not in Graphic_Character; 1625 Scan_Ptr := Scan_Ptr + 1; 1626 exit when Source (Scan_Ptr) not in Graphic_Character; 1627 Scan_Ptr := Scan_Ptr + 1; 1628 exit when Source (Scan_Ptr) not in Graphic_Character; 1629 Scan_Ptr := Scan_Ptr + 1; 1630 exit when Source (Scan_Ptr) not in Graphic_Character; 1631 Scan_Ptr := Scan_Ptr + 1; 1632 end loop; 1633 1634 -- Keep going if horizontal tab 1635 1636 if Source (Scan_Ptr) = HT then 1637 if Style_Check then 1638 Style.Check_HT; 1639 end if; 1640 1641 Scan_Ptr := Scan_Ptr + 1; 1642 1643 -- Terminate scan of comment if line terminator 1644 1645 elsif Source (Scan_Ptr) in Line_Terminator then 1646 exit; 1647 1648 -- Terminate scan of comment if end of file encountered 1649 -- (embedded EOF character or real last character in file) 1650 1651 elsif Source (Scan_Ptr) = EOF then 1652 exit; 1653 1654 -- If we have a wide character, we have to scan it out, 1655 -- because it might be a legitimate line terminator 1656 1657 elsif Start_Of_Wide_Character then 1658 declare 1659 Code : Char_Code; 1660 Err : Boolean; 1661 1662 begin 1663 Wptr := Scan_Ptr; 1664 Scan_Wide (Source, Scan_Ptr, Code, Err); 1665 1666 -- If not well formed wide character, then just skip 1667 -- past it and ignore it. 1668 1669 if Err then 1670 Scan_Ptr := Wptr + 1; 1671 1672 -- If UTF_32 terminator, terminate comment scan 1673 1674 elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then 1675 Scan_Ptr := Wptr; 1676 exit; 1677 else 1678 Check_Bidi (Code); 1679 end if; 1680 end; 1681 1682 -- Keep going if character in 80-FF range, or is ESC. These 1683 -- characters are allowed in comments by RM-2.1(1), 2.7(2). 1684 -- They are allowed even in Ada 83 mode according to the 1685 -- approved AI. ESC was added to the AI in June 93. 1686 1687 elsif Source (Scan_Ptr) in Upper_Half_Character 1688 or else Source (Scan_Ptr) = ESC 1689 then 1690 Scan_Ptr := Scan_Ptr + 1; 1691 1692 -- Otherwise we have an illegal comment character, ignore 1693 -- this error in relaxed semantics mode. 1694 1695 else 1696 if Relaxed_RM_Semantics then 1697 Scan_Ptr := Scan_Ptr + 1; 1698 else 1699 Error_Illegal_Character; 1700 end if; 1701 end if; 1702 end loop; 1703 1704 -- Note that, except when comments are tokens, we do NOT 1705 -- execute a return here, instead we fall through to reexecute 1706 -- the scan loop to look for a token. 1707 1708 if Comment_Is_Token then 1709 Name_Len := Integer (Scan_Ptr - Start_Of_Comment); 1710 Name_Buffer (1 .. Name_Len) := 1711 String (Source (Start_Of_Comment .. Scan_Ptr - 1)); 1712 Comment_Id := Name_Find; 1713 Token := Tok_Comment; 1714 return; 1715 end if; 1716 end if; 1717 end Minus_Case; 1718 1719 -- Double quote or percent starting a string literal 1720 1721 when '"' | '%' => 1722 Slit; 1723 Post_Scan; 1724 return; 1725 1726 -- Apostrophe. This can either be the start of a character literal, 1727 -- or an isolated apostrophe used in a qualified expression or an 1728 -- attribute. In the following: 1729 1730 -- A := CHARACTER'('A'); 1731 1732 -- the first apostrophe is treated as an isolated apostrophe, and the 1733 -- second one is treated as the start of the character literal 'A'. 1734 -- Note that RM-2.2(7) does not require a separator between "'" and 1735 -- "(" in the above, so we cannot use lookahead to distinguish the 1736 -- cases; we use look-back instead. Analysis of the grammar shows 1737 -- that some tokens can be followed by an apostrophe, and some by a 1738 -- character literal, but none by both. Some cannot be followed by 1739 -- either, so it doesn't matter what we do in those cases, except to 1740 -- get good error behavior. 1741 1742 when ''' => Char_Literal_Case : declare 1743 Code : Char_Code; 1744 Err : Boolean; 1745 1746 begin 1747 Accumulate_Checksum ('''); 1748 Scan_Ptr := Scan_Ptr + 1; 1749 1750 -- Distinguish between apostrophe and character literal. It's an 1751 -- apostrophe if the previous token is one of the following. 1752 -- Reserved words are included for things like A.all'Address and 1753 -- T'Digits'Img. Strings literals are included for things like 1754 -- "abs"'Address. Other literals are included to give better error 1755 -- behavior for illegal cases like 123'Img. 1756 -- 1757 -- In Ada 2022, a target name (i.e. @) is a valid prefix of an 1758 -- attribute, and functions like a name. 1759 1760 if Prev_Token = Tok_All 1761 or else Prev_Token = Tok_At_Sign 1762 or else Prev_Token = Tok_Delta 1763 or else Prev_Token = Tok_Digits 1764 or else Prev_Token = Tok_Identifier 1765 or else Prev_Token = Tok_Project 1766 or else Prev_Token = Tok_Right_Paren 1767 or else Prev_Token = Tok_Right_Bracket 1768 or else Prev_Token in Token_Class_Literal 1769 then 1770 Token := Tok_Apostrophe; 1771 1772 if Style_Check then 1773 Style.Check_Apostrophe; 1774 end if; 1775 1776 return; 1777 1778 -- Otherwise the apostrophe starts a character literal 1779 1780 else 1781 -- Case of wide character literal 1782 1783 if Start_Of_Wide_Character then 1784 Wptr := Scan_Ptr; 1785 Scan_Wide (Source, Scan_Ptr, Code, Err); 1786 1787 if Err then 1788 Error_Illegal_Wide_Character; 1789 Code := Character'Pos (' '); 1790 1791 -- In Ada 95 mode we allow any wide character in a character 1792 -- literal, but in later versions, the set of characters 1793 -- allowed is restricted to graphic characters. 1794 1795 elsif Ada_Version >= Ada_2005 1796 and then Is_UTF_32_Non_Graphic (UTF_32 (Code)) 1797 then 1798 Error_Msg -- CODEFIX 1799 ("(Ada 2005) non-graphic character not permitted " & 1800 "in character literal", Wptr); 1801 else 1802 Check_Bidi (Code); 1803 end if; 1804 1805 Accumulate_Checksum (Code); 1806 1807 if Source (Scan_Ptr) /= ''' then 1808 Error_Msg_S ("missing apostrophe"); 1809 else 1810 Scan_Ptr := Scan_Ptr + 1; 1811 end if; 1812 1813 -- If we do not find a closing quote in the expected place then 1814 -- assume that we have a misguided attempt at a string literal. 1815 1816 -- However, if previous token is RANGE, then we return an 1817 -- apostrophe instead since this gives better error recovery 1818 1819 elsif Source (Scan_Ptr + 1) /= ''' then 1820 if Prev_Token = Tok_Range then 1821 Token := Tok_Apostrophe; 1822 return; 1823 1824 else 1825 Scan_Ptr := Scan_Ptr - 1; 1826 Error_Msg_S 1827 ("strings are delimited by double quote character"); 1828 Slit; 1829 Post_Scan; 1830 return; 1831 end if; 1832 1833 -- Otherwise we have a (non-wide) character literal 1834 1835 else 1836 Accumulate_Checksum (Source (Scan_Ptr)); 1837 1838 if Source (Scan_Ptr) not in Graphic_Character then 1839 if Source (Scan_Ptr) in Upper_Half_Character then 1840 if Ada_Version = Ada_83 then 1841 Error_Illegal_Character; 1842 end if; 1843 1844 else 1845 Error_Illegal_Character; 1846 end if; 1847 end if; 1848 1849 Code := Get_Char_Code (Source (Scan_Ptr)); 1850 Scan_Ptr := Scan_Ptr + 2; 1851 end if; 1852 1853 -- Fall through here with Scan_Ptr updated past the closing 1854 -- quote, and Code set to the Char_Code value for the literal 1855 1856 Accumulate_Checksum ('''); 1857 Token := Tok_Char_Literal; 1858 Set_Character_Literal_Name (Code); 1859 Token_Name := Name_Find; 1860 Character_Code := Code; 1861 Post_Scan; 1862 return; 1863 end if; 1864 end Char_Literal_Case; 1865 1866 -- Right parenthesis 1867 1868 when ')' => 1869 Accumulate_Checksum (')'); 1870 Scan_Ptr := Scan_Ptr + 1; 1871 Token := Tok_Right_Paren; 1872 1873 if Style_Check then 1874 Style.Check_Right_Paren; 1875 end if; 1876 1877 return; 1878 1879 -- Right bracket or right brace, treated as right paren but proper 1880 -- aggregate delimiter in Ada 2022. 1881 1882 when ']' | '}' => 1883 if Ada_Version >= Ada_2022 then 1884 Token := Tok_Right_Bracket; 1885 1886 else 1887 Error_Msg_S ("illegal character, replaced by "")"""); 1888 Token := Tok_Right_Paren; 1889 end if; 1890 1891 Scan_Ptr := Scan_Ptr + 1; 1892 return; 1893 1894 -- Slash (can be division operator or first character of not equal) 1895 1896 when '/' => 1897 Accumulate_Checksum ('/'); 1898 1899 if Double_Char_Token ('=') then 1900 Token := Tok_Not_Equal; 1901 return; 1902 else 1903 Scan_Ptr := Scan_Ptr + 1; 1904 Token := Tok_Slash; 1905 return; 1906 end if; 1907 1908 -- Semicolon 1909 1910 when ';' => 1911 Accumulate_Checksum (';'); 1912 Scan_Ptr := Scan_Ptr + 1; 1913 Token := Tok_Semicolon; 1914 1915 if Style_Check then 1916 Style.Check_Semicolon; 1917 end if; 1918 1919 return; 1920 1921 -- Vertical bar 1922 1923 when '|' => Vertical_Bar_Case : begin 1924 Accumulate_Checksum ('|'); 1925 1926 -- Special check for || to give nice message 1927 1928 if Source (Scan_Ptr + 1) = '|' then 1929 Error_Msg_S -- CODEFIX 1930 ("""'|'|"" should be `OR ELSE`"); 1931 Scan_Ptr := Scan_Ptr + 2; 1932 Token := Tok_Or; 1933 return; 1934 1935 else 1936 Scan_Ptr := Scan_Ptr + 1; 1937 Token := Tok_Vertical_Bar; 1938 1939 if Style_Check then 1940 Style.Check_Vertical_Bar; 1941 end if; 1942 1943 Post_Scan; 1944 return; 1945 end if; 1946 end Vertical_Bar_Case; 1947 1948 -- Exclamation, replacement character for vertical bar 1949 1950 when '!' => Exclamation_Case : begin 1951 Accumulate_Checksum ('!'); 1952 1953 if Source (Scan_Ptr + 1) = '=' then 1954 Error_Msg_S -- CODEFIX 1955 ("'!= should be /="); 1956 Scan_Ptr := Scan_Ptr + 2; 1957 Token := Tok_Not_Equal; 1958 return; 1959 1960 else 1961 Scan_Ptr := Scan_Ptr + 1; 1962 Token := Tok_Vertical_Bar; 1963 Post_Scan; 1964 return; 1965 end if; 1966 end Exclamation_Case; 1967 1968 -- Plus 1969 1970 when '+' => Plus_Case : begin 1971 Accumulate_Checksum ('+'); 1972 Scan_Ptr := Scan_Ptr + 1; 1973 Token := Tok_Plus; 1974 return; 1975 end Plus_Case; 1976 1977 -- Digits starting a numeric literal 1978 1979 when '0' .. '9' => 1980 1981 -- First a bit of a scan ahead to see if we have a case of an 1982 -- identifier starting with a digit (remembering exponent case). 1983 1984 declare 1985 C : constant Character := Source (Scan_Ptr + 1); 1986 1987 begin 1988 -- OK literal if digit followed by digit or underscore 1989 1990 if C in '0' .. '9' or else C = '_' then 1991 null; 1992 1993 -- OK literal if digit not followed by identifier char 1994 1995 elsif not Identifier_Char (C) then 1996 null; 1997 1998 -- OK literal if digit followed by e/E followed by digit/sign. 1999 -- We also allow underscore after the E, which is an error, but 2000 -- better handled by Nlit than deciding this is an identifier. 2001 2002 elsif (C = 'e' or else C = 'E') 2003 and then (Source (Scan_Ptr + 2) in '0' .. '9' 2004 or else Source (Scan_Ptr + 2) = '+' 2005 or else Source (Scan_Ptr + 2) = '-' 2006 or else Source (Scan_Ptr + 2) = '_') 2007 then 2008 null; 2009 2010 -- Here we have what really looks like an identifier that 2011 -- starts with a digit, so give error msg. 2012 2013 else 2014 Error_Msg_S ("identifier may not start with digit"); 2015 Name_Len := 1; 2016 Underline_Found := False; 2017 Name_Buffer (1) := Source (Scan_Ptr); 2018 Accumulate_Checksum (Name_Buffer (1)); 2019 Scan_Ptr := Scan_Ptr + 1; 2020 goto Scan_Identifier; 2021 end if; 2022 end; 2023 2024 -- Here we have an OK integer literal 2025 2026 Nlit; 2027 2028 -- Check for proper delimiter, ignoring other format characters 2029 2030 Skip_Other_Format_Characters; 2031 2032 if Identifier_Char (Source (Scan_Ptr)) then 2033 Error_Msg_S 2034 ("delimiter required between literal and identifier"); 2035 end if; 2036 2037 Post_Scan; 2038 return; 2039 2040 -- Lower case letters 2041 2042 when 'a' .. 'z' => 2043 Name_Len := 1; 2044 Underline_Found := False; 2045 Name_Buffer (1) := Source (Scan_Ptr); 2046 Accumulate_Checksum (Name_Buffer (1)); 2047 Scan_Ptr := Scan_Ptr + 1; 2048 goto Scan_Identifier; 2049 2050 -- Upper case letters 2051 2052 when 'A' .. 'Z' => 2053 Token_Contains_Uppercase := True; 2054 Name_Len := 1; 2055 Underline_Found := False; 2056 Name_Buffer (1) := 2057 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32); 2058 Accumulate_Checksum (Name_Buffer (1)); 2059 Scan_Ptr := Scan_Ptr + 1; 2060 goto Scan_Identifier; 2061 2062 -- Underline character 2063 2064 when '_' => 2065 if Special_Characters ('_') then 2066 Token_Ptr := Scan_Ptr; 2067 Scan_Ptr := Scan_Ptr + 1; 2068 Token := Tok_Special; 2069 Special_Character := '_'; 2070 return; 2071 end if; 2072 2073 Error_Msg_S ("identifier cannot start with underline"); 2074 Name_Len := 1; 2075 Name_Buffer (1) := '_'; 2076 Scan_Ptr := Scan_Ptr + 1; 2077 Underline_Found := False; 2078 goto Scan_Identifier; 2079 2080 -- Space (not possible, because we scanned past blanks) 2081 2082 when ' ' => 2083 raise Program_Error; 2084 2085 -- Characters in top half of ASCII 8-bit chart 2086 2087 when Upper_Half_Character => 2088 2089 -- Wide character case 2090 2091 if Upper_Half_Encoding then 2092 goto Scan_Wide_Character; 2093 2094 -- Otherwise we have OK Latin-1 character 2095 2096 else 2097 -- Upper half characters may possibly be identifier letters 2098 -- but can never be digits, so Identifier_Char can be used to 2099 -- test for a valid start of identifier character. 2100 2101 if Identifier_Char (Source (Scan_Ptr)) then 2102 Name_Len := 0; 2103 Underline_Found := False; 2104 goto Scan_Identifier; 2105 else 2106 Error_Illegal_Character; 2107 end if; 2108 end if; 2109 2110 when ESC => 2111 2112 -- ESC character, possible start of identifier if wide characters 2113 -- using ESC encoding are allowed in identifiers, which we can 2114 -- tell by looking at the Identifier_Char flag for ESC, which is 2115 -- only true if these conditions are met. In Ada 2005 mode, may 2116 -- also be valid UTF_32 space or line terminator character. 2117 2118 if Identifier_Char (ESC) then 2119 Name_Len := 0; 2120 goto Scan_Wide_Character; 2121 else 2122 Error_Illegal_Character; 2123 end if; 2124 2125 -- Invalid control characters 2126 2127 when ACK 2128 | ASCII.SO 2129 | BEL 2130 | BS 2131 | CAN 2132 | DC1 2133 | DC2 2134 | DC3 2135 | DC4 2136 | DEL 2137 | DLE 2138 | EM 2139 | ENQ 2140 | EOT 2141 | ETB 2142 | ETX 2143 | FS 2144 | GS 2145 | NAK 2146 | NUL 2147 | RS 2148 | SI 2149 | SOH 2150 | STX 2151 | SYN 2152 | US 2153 => 2154 Error_Illegal_Character; 2155 2156 -- Invalid graphic characters 2157 -- Note that '@' is handled elsewhere, because following AI12-125 2158 -- it denotes the target_name of an assignment. 2159 2160 when '#' | '$' | '?' | '`' | '\' | '^' | '~' => 2161 2162 -- If Set_Special_Character has been called for this character, 2163 -- set Scans.Special_Character and return a Special token. 2164 2165 if Special_Characters (Source (Scan_Ptr)) then 2166 Token_Ptr := Scan_Ptr; 2167 Token := Tok_Special; 2168 Special_Character := Source (Scan_Ptr); 2169 Scan_Ptr := Scan_Ptr + 1; 2170 return; 2171 2172 -- Check for something looking like a preprocessor directive 2173 2174 elsif Source (Scan_Ptr) = '#' 2175 and then (Source (Scan_Ptr + 1 .. Scan_Ptr + 2) = "if" 2176 or else 2177 Source (Scan_Ptr + 1 .. Scan_Ptr + 5) = "elsif" 2178 or else 2179 Source (Scan_Ptr + 1 .. Scan_Ptr + 4) = "else" 2180 or else 2181 Source (Scan_Ptr + 1 .. Scan_Ptr + 3) = "end") 2182 then 2183 Error_Msg_S 2184 ("preprocessor directive ignored, preprocessor not active"); 2185 2186 -- Skip to end of line 2187 2188 loop 2189 if Source (Scan_Ptr) in Graphic_Character 2190 or else 2191 Source (Scan_Ptr) = HT 2192 then 2193 Scan_Ptr := Scan_Ptr + 1; 2194 2195 -- Done if line terminator or EOF 2196 2197 elsif Source (Scan_Ptr) in Line_Terminator 2198 or else 2199 Source (Scan_Ptr) = EOF 2200 then 2201 exit; 2202 2203 -- If we have a wide character, we have to scan it out, 2204 -- because it might be a legitimate line terminator 2205 2206 elsif Start_Of_Wide_Character then 2207 declare 2208 Wptr : constant Source_Ptr := Scan_Ptr; 2209 Code : Char_Code; 2210 Err : Boolean; 2211 2212 begin 2213 Scan_Wide (Source, Scan_Ptr, Code, Err); 2214 2215 -- If not well formed wide character, then just skip 2216 -- past it and ignore it. 2217 2218 if Err then 2219 Scan_Ptr := Wptr + 1; 2220 2221 -- If UTF_32 terminator, terminate comment scan 2222 2223 elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then 2224 Scan_Ptr := Wptr; 2225 exit; 2226 end if; 2227 end; 2228 2229 -- Else keep going (don't worry about bad comment chars 2230 -- in this context, we just want to find the end of line. 2231 2232 else 2233 Scan_Ptr := Scan_Ptr + 1; 2234 end if; 2235 end loop; 2236 2237 -- Otherwise, this is an illegal character 2238 2239 else 2240 Error_Illegal_Character; 2241 end if; 2242 2243 -- End switch on non-blank character 2244 2245 end case; 2246 2247 -- End loop past format effectors. The exit from this loop is by 2248 -- executing a return statement following completion of token scan 2249 -- (control never falls out of this loop to the code that follows). 2250 2251 end loop; 2252 2253 pragma Assert (False); 2254 2255 -- Wide_Character scanning routine. On entry we have encountered the 2256 -- initial character of a wide character sequence. 2257 2258 <<Scan_Wide_Character>> 2259 declare 2260 Code : Char_Code; 2261 Cat : Category; 2262 Err : Boolean; 2263 2264 begin 2265 Wptr := Scan_Ptr; 2266 Scan_Wide (Source, Scan_Ptr, Code, Err); 2267 2268 -- If bad wide character, signal error and continue scan 2269 2270 if Err then 2271 Error_Illegal_Wide_Character; 2272 goto Scan_Next_Character; 2273 end if; 2274 2275 Cat := Get_Category (UTF_32 (Code)); 2276 2277 -- If OK letter, reset scan ptr and go scan identifier 2278 2279 if Is_UTF_32_Letter (Cat) then 2280 Scan_Ptr := Wptr; 2281 Name_Len := 0; 2282 Underline_Found := False; 2283 goto Scan_Identifier; 2284 2285 -- If OK wide space, ignore and keep scanning (we do not include 2286 -- any ignored spaces in checksum) 2287 2288 elsif Is_UTF_32_Space (Cat) then 2289 goto Scan_Next_Character; 2290 2291 -- If other format character, ignore and keep scanning (again we 2292 -- do not include in the checksum) (this is for AI-0079). 2293 2294 elsif Is_UTF_32_Other (Cat) then 2295 goto Scan_Next_Character; 2296 2297 -- If OK wide line terminator, terminate current line 2298 2299 elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then 2300 Scan_Ptr := Wptr; 2301 goto Scan_Line_Terminator; 2302 2303 -- Punctuation is an error (at start of identifier) 2304 2305 elsif Is_UTF_32_Punctuation (Cat) then 2306 Error_Msg ("identifier cannot start with punctuation", Wptr); 2307 Scan_Ptr := Wptr; 2308 Name_Len := 0; 2309 Underline_Found := False; 2310 goto Scan_Identifier; 2311 2312 -- Mark character is an error (at start of identifier) 2313 2314 elsif Is_UTF_32_Mark (Cat) then 2315 Error_Msg ("identifier cannot start with mark character", Wptr); 2316 Scan_Ptr := Wptr; 2317 Name_Len := 0; 2318 Underline_Found := False; 2319 goto Scan_Identifier; 2320 2321 -- Extended digit character is an error. Could be bad start of 2322 -- identifier or bad literal. Not worth doing too much to try to 2323 -- distinguish these cases, but we will do a little bit. 2324 2325 elsif Is_UTF_32_Digit (Cat) then 2326 Error_Msg 2327 ("identifier cannot start with digit character", Wptr); 2328 Scan_Ptr := Wptr; 2329 Name_Len := 0; 2330 Underline_Found := False; 2331 goto Scan_Identifier; 2332 2333 -- All other wide characters are illegal here 2334 2335 else 2336 Error_Illegal_Wide_Character; 2337 goto Scan_Next_Character; 2338 end if; 2339 end; 2340 2341 -- Routine to scan line terminator. On entry Scan_Ptr points to a 2342 -- character which is one of FF,LR,CR,VT, or one of the wide characters 2343 -- that is treated as a line terminator. 2344 2345 <<Scan_Line_Terminator>> 2346 2347 -- Check line too long 2348 2349 Check_End_Of_Line; 2350 2351 -- Set Token_Ptr, if End_Of_Line is a token, for the case when it is 2352 -- a physical line. 2353 2354 if End_Of_Line_Is_Token then 2355 Token_Ptr := Scan_Ptr; 2356 end if; 2357 2358 declare 2359 Physical : Boolean; 2360 2361 begin 2362 Skip_Line_Terminators (Scan_Ptr, Physical); 2363 2364 -- If we are at start of physical line, update scan pointers to 2365 -- reflect the start of the new line. 2366 2367 if Physical then 2368 Current_Line_Start := Scan_Ptr; 2369 Start_Column := Set_Start_Column; 2370 First_Non_Blank_Location := Scan_Ptr; 2371 2372 -- If End_Of_Line is a token, we return it as it is a 2373 -- physical line. 2374 2375 if End_Of_Line_Is_Token then 2376 Token := Tok_End_Of_Line; 2377 return; 2378 end if; 2379 end if; 2380 end; 2381 2382 goto Scan_Next_Character; 2383 2384 -- Identifier scanning routine. On entry, some initial characters of 2385 -- the identifier may have already been stored in Name_Buffer. If so, 2386 -- Name_Len has the number of characters stored, otherwise Name_Len is 2387 -- set to zero on entry. Underline_Found is also set False on entry. 2388 2389 <<Scan_Identifier>> 2390 2391 -- This loop scans as fast as possible past lower half letters and 2392 -- digits, which we expect to be the most common characters. 2393 2394 loop 2395 if Source (Scan_Ptr) in 'a' .. 'z' 2396 or else Source (Scan_Ptr) in '0' .. '9' 2397 then 2398 Name_Buffer (Name_Len + 1) := Source (Scan_Ptr); 2399 Accumulate_Checksum (Source (Scan_Ptr)); 2400 2401 elsif Source (Scan_Ptr) in 'A' .. 'Z' then 2402 Token_Contains_Uppercase := True; 2403 2404 Name_Buffer (Name_Len + 1) := 2405 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32); 2406 Accumulate_Checksum (Name_Buffer (Name_Len + 1)); 2407 2408 else 2409 exit; 2410 end if; 2411 2412 Underline_Found := False; 2413 Scan_Ptr := Scan_Ptr + 1; 2414 Name_Len := Name_Len + 1; 2415 end loop; 2416 2417 -- If we fall through, then we have encountered either an underline 2418 -- character, or an extended identifier character (i.e. one from the 2419 -- upper half), or a wide character, or an identifier terminator. The 2420 -- initial test speeds us up in the most common case where we have 2421 -- an identifier terminator. Note that ESC is an identifier character 2422 -- only if a wide character encoding method that uses ESC encoding 2423 -- is active, so if we find an ESC character we know that we have a 2424 -- wide character. 2425 2426 if Identifier_Char (Source (Scan_Ptr)) 2427 or else (Source (Scan_Ptr) in Upper_Half_Character 2428 and then Upper_Half_Encoding) 2429 then 2430 -- Case of underline 2431 2432 if Source (Scan_Ptr) = '_' then 2433 Accumulate_Checksum ('_'); 2434 2435 if Underline_Found then 2436 Error_No_Double_Underline; 2437 else 2438 Underline_Found := True; 2439 Name_Len := Name_Len + 1; 2440 Name_Buffer (Name_Len) := '_'; 2441 end if; 2442 2443 Scan_Ptr := Scan_Ptr + 1; 2444 goto Scan_Identifier; 2445 2446 -- Upper half character 2447 2448 elsif Source (Scan_Ptr) in Upper_Half_Character 2449 and then not Upper_Half_Encoding 2450 then 2451 Accumulate_Checksum (Source (Scan_Ptr)); 2452 Store_Encoded_Character 2453 (Get_Char_Code (Fold_Lower (Source (Scan_Ptr)))); 2454 Scan_Ptr := Scan_Ptr + 1; 2455 Underline_Found := False; 2456 goto Scan_Identifier; 2457 2458 -- Left bracket not followed by a quote terminates an identifier. 2459 -- This is an error, but we don't want to give a junk error msg 2460 -- about wide characters in this case. 2461 2462 elsif Source (Scan_Ptr) = '[' 2463 and then Source (Scan_Ptr + 1) /= '"' 2464 then 2465 null; 2466 2467 -- We know we have a wide character encoding here (the current 2468 -- character is either ESC, left bracket, or an upper half 2469 -- character depending on the encoding method). 2470 2471 else 2472 -- Scan out the wide character and insert the appropriate 2473 -- encoding into the name table entry for the identifier. 2474 2475 declare 2476 Code : Char_Code; 2477 Err : Boolean; 2478 Chr : Character; 2479 Cat : Category; 2480 2481 begin 2482 Wptr := Scan_Ptr; 2483 Scan_Wide (Source, Scan_Ptr, Code, Err); 2484 2485 -- If error, signal error 2486 2487 if Err then 2488 Error_Illegal_Wide_Character; 2489 2490 -- If the character scanned is a normal identifier 2491 -- character, then we treat it that way. 2492 2493 elsif In_Character_Range (Code) 2494 and then Identifier_Char (Get_Character (Code)) 2495 then 2496 Chr := Get_Character (Code); 2497 Accumulate_Checksum (Chr); 2498 Store_Encoded_Character 2499 (Get_Char_Code (Fold_Lower (Chr))); 2500 Underline_Found := False; 2501 2502 -- Here if not a normal identifier character 2503 2504 else 2505 Cat := Get_Category (UTF_32 (Code)); 2506 2507 -- Wide character in Unicode category "Other, Format" 2508 -- is not accepted in an identifier. This is because it 2509 -- it is considered a security risk (AI-0091). 2510 2511 -- However, it is OK for such a character to appear at 2512 -- the end of an identifier. 2513 2514 if Is_UTF_32_Other (Cat) then 2515 if not Identifier_Char (Source (Scan_Ptr)) then 2516 goto Scan_Identifier_Complete; 2517 else 2518 Error_Msg 2519 ("identifier cannot contain other_format " 2520 & "character", Wptr); 2521 goto Scan_Identifier; 2522 end if; 2523 2524 -- Wide character in category Separator,Space terminates 2525 2526 elsif Is_UTF_32_Space (Cat) then 2527 goto Scan_Identifier_Complete; 2528 end if; 2529 2530 -- Here if wide character is part of the identifier 2531 2532 -- Make sure we are allowing wide characters in 2533 -- identifiers. Note that we allow wide character 2534 -- notation for an OK identifier character. This in 2535 -- particular allows bracket or other notation to be 2536 -- used for upper half letters. 2537 2538 -- Wide characters are always allowed in Ada 2005 2539 2540 if Identifier_Character_Set /= 'w' 2541 and then Ada_Version < Ada_2005 2542 then 2543 Error_Msg 2544 ("wide character not allowed in identifier", Wptr); 2545 end if; 2546 2547 -- AI12-0004: An identifier shall only contain characters 2548 -- that may be present in Normalization Form KC. 2549 2550 if not Is_UTF_32_NFKC (UTF_32 (Code)) then 2551 Error_Msg 2552 ("invalid wide character in identifier", Wptr); 2553 2554 -- If OK letter, store it folding to upper case. Note 2555 -- that we include the folded letter in the checksum. 2556 2557 elsif Is_UTF_32_Letter (Cat) then 2558 Code := 2559 Char_Code (UTF_32_To_Upper_Case (UTF_32 (Code))); 2560 Accumulate_Checksum (Code); 2561 Store_Encoded_Character (Code); 2562 Underline_Found := False; 2563 2564 -- If OK extended digit or mark, then store it 2565 2566 elsif Is_UTF_32_Digit (Cat) 2567 or else Is_UTF_32_Mark (Cat) 2568 then 2569 Accumulate_Checksum (Code); 2570 Store_Encoded_Character (Code); 2571 Underline_Found := False; 2572 2573 -- Wide punctuation is also stored, but counts as an 2574 -- underline character for error checking purposes. 2575 2576 elsif Is_UTF_32_Punctuation (Cat) then 2577 Accumulate_Checksum (Code); 2578 2579 if Underline_Found then 2580 declare 2581 Cend : constant Source_Ptr := Scan_Ptr; 2582 begin 2583 Scan_Ptr := Wptr; 2584 Error_No_Double_Underline; 2585 Scan_Ptr := Cend; 2586 end; 2587 2588 else 2589 Store_Encoded_Character (Code); 2590 Underline_Found := True; 2591 end if; 2592 2593 -- Any other wide character is not acceptable 2594 2595 else 2596 Error_Msg 2597 ("invalid wide character in identifier", Wptr); 2598 end if; 2599 end if; 2600 2601 goto Scan_Identifier; 2602 end; 2603 end if; 2604 end if; 2605 2606 -- Scan of identifier is complete. The identifier is stored in 2607 -- Name_Buffer, and Scan_Ptr points past the last character. 2608 2609 <<Scan_Identifier_Complete>> 2610 Token_Name := Name_Find; 2611 2612 -- Check for identifier ending with underline or punctuation char 2613 2614 if Underline_Found then 2615 Underline_Found := False; 2616 2617 if Source (Scan_Ptr - 1) = '_' then 2618 Error_Msg 2619 ("identifier cannot end with underline", Scan_Ptr - 1); 2620 else 2621 Error_Msg 2622 ("identifier cannot end with punctuation character", Wptr); 2623 end if; 2624 end if; 2625 2626 -- We will assume it is an identifier, not a keyword, so that the 2627 -- checksum is independent of the Ada version. 2628 2629 Token := Tok_Identifier; 2630 2631 -- Check if it is a keyword 2632 2633 if Is_Keyword_Name (Token_Name) then 2634 Accumulate_Token_Checksum; 2635 Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name)); 2636 2637 -- See Exp_Put_Image for documentation of Tagged_Seen 2638 2639 if Token = Tok_Tagged then 2640 Tagged_Seen := True; 2641 end if; 2642 2643 -- Keyword style checks 2644 2645 if Style_Check then 2646 2647 -- Deal with possible style check for non-lower case keyword, 2648 -- but we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords 2649 -- for this purpose if they appear as attribute designators. 2650 -- Actually we only check the first character for speed. 2651 2652 -- Ada 2005 (AI-284): Do not apply the style check in case of 2653 -- "pragma Interface" 2654 2655 -- Ada 2005 (AI-340): Do not apply the style check in case of 2656 -- MOD attribute. 2657 2658 if Token_Contains_Uppercase 2659 and then (Prev_Token /= Tok_Apostrophe 2660 or else 2661 (Token /= Tok_Access and then 2662 Token /= Tok_Delta and then 2663 Token /= Tok_Digits and then 2664 Token /= Tok_Mod and then 2665 Token /= Tok_Range)) 2666 and then (Token /= Tok_Interface 2667 or else 2668 (Token = Tok_Interface 2669 and then Prev_Token /= Tok_Pragma)) 2670 then 2671 Style.Non_Lower_Case_Keyword; 2672 end if; 2673 2674 -- Check THEN/ELSE style rules. These do not apply to AND THEN 2675 -- or OR ELSE, and do not apply in if expressions. 2676 2677 if (Token = Tok_Then and then Prev_Token /= Tok_And) 2678 or else 2679 (Token = Tok_Else and then Prev_Token /= Tok_Or) 2680 then 2681 if Inside_If_Expression = 0 then 2682 Style.Check_Separate_Stmt_Lines; 2683 end if; 2684 end if; 2685 end if; 2686 2687 -- We must reset Token_Name since this is not an identifier and 2688 -- if we leave Token_Name set, the parser gets confused because 2689 -- it thinks it is dealing with an identifier instead of the 2690 -- corresponding keyword. 2691 2692 Token_Name := No_Name; 2693 return; 2694 2695 -- It is an identifier after all 2696 2697 else 2698 Accumulate_Token_Checksum; 2699 Post_Scan; 2700 end if; 2701 end Scan; 2702 2703 -------------------------- 2704 -- Set_Comment_As_Token -- 2705 -------------------------- 2706 2707 procedure Set_Comment_As_Token (Value : Boolean) is 2708 begin 2709 Comment_Is_Token := Value; 2710 end Set_Comment_As_Token; 2711 2712 ------------------------------ 2713 -- Set_End_Of_Line_As_Token -- 2714 ------------------------------ 2715 2716 procedure Set_End_Of_Line_As_Token (Value : Boolean) is 2717 begin 2718 End_Of_Line_Is_Token := Value; 2719 end Set_End_Of_Line_As_Token; 2720 2721 --------------------------- 2722 -- Set_Special_Character -- 2723 --------------------------- 2724 2725 procedure Set_Special_Character (C : Character) is 2726 begin 2727 case C is 2728 when '#' | '$' | '_' | '?' | '@' | '`' | '\' | '^' | '~' => 2729 Special_Characters (C) := True; 2730 2731 when others => 2732 null; 2733 end case; 2734 end Set_Special_Character; 2735 2736 ---------------------- 2737 -- Set_Start_Column -- 2738 ---------------------- 2739 2740 -- Note: it seems at first glance a little expensive to compute this value 2741 -- for every source line (since it is certainly not used for all source 2742 -- lines). On the other hand, it doesn't take much more work to skip past 2743 -- the initial white space on the line counting the columns than it would 2744 -- to scan past the white space using the standard scanning circuits. 2745 2746 function Set_Start_Column return Column_Number is 2747 Start_Column : Column_Number := 0; 2748 2749 begin 2750 -- Outer loop scans past horizontal tab characters 2751 2752 Tabs_Loop : loop 2753 2754 -- Inner loop scans past blanks, bumping Scan_Ptr past the blanks and 2755 -- adjusting Start_Column to account for them. 2756 2757 Blanks_Loop : 2758 while Source (Scan_Ptr) = ' ' loop 2759 Scan_Ptr := Scan_Ptr + 1; 2760 Start_Column := Start_Column + 1; 2761 end loop Blanks_Loop; 2762 2763 -- Outer loop keeps going only if a horizontal tab follows 2764 2765 if Source (Scan_Ptr) = HT then 2766 if Style_Check then 2767 Style.Check_HT; 2768 end if; 2769 2770 Scan_Ptr := Scan_Ptr + 1; 2771 Start_Column := (Start_Column / 8) * 8 + 8; 2772 else 2773 exit Tabs_Loop; 2774 end if; 2775 end loop Tabs_Loop; 2776 2777 return Start_Column; 2778 2779 -- A constraint error can happen only if we have a compiler with checks on 2780 -- and a line with a ludicrous number of tabs or spaces at the start. In 2781 -- such a case, we really don't care if Start_Column is right or not. 2782 2783 exception 2784 when Constraint_Error => 2785 return Column_Number'Last; 2786 end Set_Start_Column; 2787 2788end Scng; 2789