1---------------------------------------------------------------------- 2-- Framework.Language.Scanner - Package body -- 3-- -- 4-- This software is (c) The European Organisation for the Safety -- 5-- of Air Navigation (EUROCONTROL) and Adalog 2004-2005. The Ada -- 6-- Controller is free software; you can redistribute it and/or -- 7-- modify it under terms of the GNU General Public License as -- 8-- published by the Free Software Foundation; either version 2, or -- 9-- (at your option) any later version. This unit is distributed -- 10-- in the hope that it will be useful, but WITHOUT ANY WARRANTY; -- 11-- without even the implied warranty of MERCHANTABILITY or FITNESS -- 12-- FOR A PARTICULAR PURPOSE. See the GNU General Public License -- 13-- for more details. You should have received a copy of the GNU -- 14-- General Public License distributed with this program; see file -- 15-- COPYING. If not, write to the Free Software Foundation, 59 -- 16-- Temple Place - Suite 330, Boston, MA 02111-1307, USA. -- 17-- -- 18-- As a special exception, if other files instantiate generics -- 19-- from the units of this program, or if you link this unit with -- 20-- other files to produce an executable, this unit does not by -- 21-- itself cause the resulting executable to be covered by the GNU -- 22-- General Public License. This exception does not however -- 23-- invalidate any other reasons why the executable file might be -- 24-- covered by the GNU Public License. -- 25-- -- 26-- This software is distributed in the hope that it will be -- 27-- useful, but WITHOUT ANY WARRANTY; without even the implied -- 28-- warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR -- 29-- PURPOSE. -- 30---------------------------------------------------------------------- 31 32with -- Standard Ada units 33 Ada.Characters.Handling, 34 Ada.Strings.Wide_Fixed, 35 Ada.Wide_Text_IO; 36 37with -- Application specific units 38 Utilities; 39package body Framework.Language.Scanner is 40 use Ada.Wide_Text_IO; 41 42 ------------------------------------------------------------------ 43 -- Internal utilities -- 44 ------------------------------------------------------------------ 45 46 -- Invariants: 47 -- The_Token is the current token 48 -- Cur_Char is the next character to process, undefined if 49 -- At_Eol is true meaning that the current character is the 50 -- end of line. 51 -- To get a more natural behaviour in interactive mode, Next_Token 52 -- just marks the token as delayed, actual scanning of token will take 53 -- place at the first call to Current_Token. 54 55 The_Token : Token; 56 Token_Delayed : Boolean := True; 57 String_Token : Boolean; 58 59 Origin_Is_String : Boolean := False; 60 Cur_Char : Wide_Character; 61 At_Eol : Boolean; 62 Source_String : Ada.Strings.Wide_Unbounded.Unbounded_Wide_String; 63 Source_Last : Natural; 64 65 Current_File : Ada.Strings.Wide_Unbounded.Unbounded_Wide_String; 66 Current_Line : Asis.Text.Line_Number; 67 Current_Column : Asis.Text.Character_Position; 68 Current_Prompt : Ada.Strings.Wide_Unbounded.Unbounded_Wide_String; 69 Prompt_Active : Boolean := False; 70 71 --------------- 72 -- Next_Char -- 73 --------------- 74 75 -- Buffer size is arbitrary, make it big enough for (almost) all 76 -- input lines to fit in. Note that the size of input lines is NOT 77 -- limited by the buffer size, it is just a matter of optimization 78 Buffer : Wide_String (1..200); 79 Buf_Inx : Natural := 1; 80 Buf_Last : Natural := 0; 81 82 procedure Next_Char is 83 use Ada.Strings.Wide_Fixed; 84 use type Asis.ASIS_Integer; -- Gela-ASIS compatibility 85 begin 86 if Buf_Inx = Buf_Last and Buf_Last = Buffer'Last then 87 -- Buffer was too short, read next part 88 -- (may read an empty string, but it's OK) 89 if Origin_Is_String then 90 Buf_Last := Integer'Min (Buffer'Length, Length (Source_String) - Source_Last); 91 Buffer (1 .. Buf_Last) := Slice (Source_String, Source_Last + 1, Source_Last + Buf_Last); 92 Source_Last := Source_Last + Buf_Last; 93 else 94 Get_Line (Current_Input, Buffer, Buf_Last); 95 end if; 96 Buf_Inx := 1; 97 Current_Column := Current_Column + 1; 98 99 elsif At_Eol then 100 if Origin_Is_String then 101 -- if From_String, End of Line => End Of File 102 raise End_Error; 103 end if; 104 105 if Current_Prompt /= Null_Unbounded_Wide_String then 106 if Prompt_Active then 107 -- Here, we have fresh new user input 108 -- => Cancel any previous error flag 109 Rule_Error_Occurred := False; 110 Put (Current_Error, To_Wide_String (Current_Prompt) & ": "); 111 else 112 Put (Current_Error, Length (Current_Prompt) * '.' & ": "); 113 end if; 114 end if; 115 116 Get_Line (Current_Input, Buffer, Buf_Last); 117 Buf_Inx := 1; 118 Current_Line := Current_Line + 1; 119 Current_Column := 1; 120 121 else 122 Buf_Inx := Buf_Inx + 1; 123 Current_Column := Current_Column + 1; 124 end if; 125 126 if Buf_Inx > Buf_Last then 127 -- Includes case of empty line 128 At_Eol := True; 129 return; 130 end if; 131 132 At_Eol := False; 133 Prompt_Active := False; 134 Cur_Char := Buffer (Buf_Inx); 135 end Next_Char; 136 137 --------------------- 138 -- Look_Ahead_Char -- 139 --------------------- 140 141 function Look_Ahead_Char return Wide_Character is 142 begin 143 if Buf_Inx = Buf_Last and Buf_Last = Buffer'Last then 144 -- Buffer was too short, read next part 145 -- (may read an empty string, but it's OK) 146 -- Keep current char in buffer to maintain invariant 147 Buffer (1) := Cur_Char; 148 if Origin_Is_String then 149 Buf_Last := Integer'Min (Buffer'Length, Length (Source_String) - Source_Last) - 1; 150 Buffer (2 .. Buf_Last) := Slice (Source_String, Source_Last + 1, Source_Last + Buf_Last); 151 Source_Last := Source_Last + Buf_Last; 152 else 153 Get_Line (Current_Input, Buffer (2 .. Buffer'Last), Buf_Last); 154 end if; 155 Buf_Inx := 1; 156 end if; 157 158 if Buf_Inx = Buffer'Last then 159 -- End of line, pretend there is an extra space 160 return ' '; 161 end if; 162 return Buffer (Buf_Inx + 1); 163 end Look_Ahead_Char; 164 165 166 ----------------------- 167 -- Actual_Next_Token -- 168 ----------------------- 169 170 -- The following declaration ensures that we get an error if we add a Character_Token 171 -- and forget to modify the following elements. 172 Char_Tokens : constant Wide_String (Token_Kind'Pos (Character_Token_Kind'First) .. 173 Token_Kind'Pos (Character_Token_Kind'Last)) 174 := "{}()<>|':;,.="; 175 Char_Token_Values : constant array (Char_Tokens'Range) of Token 176 := ((Kind => Left_Bracket, Position => Null_Location), 177 (Kind => Right_Bracket, Position => Null_Location), 178 (Kind => Left_Parenthesis, Position => Null_Location), 179 (Kind => Right_Parenthesis, Position => Null_Location), 180 (Kind => Left_Angle, Position => Null_Location), 181 (Kind => Right_Angle, Position => Null_Location), 182 (Kind => Vertical_Bar, Position => Null_Location), 183 (Kind => Tick, Position => Null_Location), 184 (Kind => Colon, Position => Null_Location), 185 (Kind => Semi_Colon, Position => Null_Location), 186 (Kind => Comma, Position => Null_Location), 187 (Kind => Period, Position => Null_Location), 188 (Kind => Equal, Position => Null_Location)); 189 190 procedure Actual_Next_Token (Force_String : Boolean := False) is 191 use Ada.Strings.Wide_Fixed; 192 use Thick_Queries, Utilities; 193 194 First_Line : Asis.Text.Line_Number; 195 First_Column : Asis.Text.Character_Position; 196 197 procedure Get_Name (Extended : Boolean) is 198 use Ada.Characters.Handling; 199 begin 200 The_Token := (Kind => Name, 201 Position => (Current_File, First_Line, First_Column), 202 Name_Length => 1, 203 Name_Text => (1 => Cur_Char, others => ' '), 204 Key => Not_A_Key); 205 Next_Char; 206 207 loop --## rule line off simplifiable_statements ## exit OK, since we have several ones 208 exit when At_Eol; 209 210 if Extended then 211 exit when Cur_Char = ';'; 212 else 213 exit when Cur_Char <= ' '; 214 exit when not Is_Letter (To_Character (Cur_Char)) 215 and not Is_Digit (To_Character (Cur_Char)) 216 and Cur_Char /= '_'; 217 end if; 218 219 if The_Token.Name_Length = The_Token.Name_Text'Last then 220 Syntax_Error ("Identifier too long", The_Token.Position); 221 end if; 222 The_Token.Name_Length := The_Token.Name_Length + 1; 223 The_Token.Name_Text (The_Token.Name_Length) := Cur_Char; 224 Next_Char; 225 end loop; 226 end Get_Name; 227 228 procedure Get_String is 229 Quote_Char : constant Wide_Character := Cur_Char; 230 begin 231 The_Token := (Kind => String_Value, 232 Position => (Current_File, First_Line, First_Column), 233 String_Length => 0, 234 String_Text => (others => ' ')); 235 Next_Char; 236 loop 237 if At_Eol then 238 Syntax_Error ("Unterminated quoted string", The_Token.Position); 239 end if; 240 if Cur_Char = Quote_Char then 241 Next_Char; 242 exit when At_Eol or Cur_Char /= Quote_Char; 243 end if; 244 245 if The_Token.String_Length = The_Token.String_Text'Last then 246 Syntax_Error ("String too long", The_Token.Position); 247 end if; 248 The_Token.String_Length := The_Token.String_Length + 1; 249 The_Token.String_Text (The_Token.String_Length) := Cur_Char; 250 Next_Char; 251 end loop; 252 end Get_String; 253 254 function Get_Integer return Biggest_Int is 255 -- Precondition: Cur_Char in '0'..'9' or '-' 256 Result : Biggest_Int; 257 Negative : Boolean := False; 258 Num_Base : Biggest_Int := 10; 259 260 function Get_Numeral (Base : in Biggest_Int; Stop_On_E : Boolean) return Biggest_Int is 261 -- Precondition: Cur_Char in '0'..'9', 'a'..'f', 'A'..'F' 262 Num : Biggest_Int := 0; 263 Digit : Biggest_Int; 264 Prev_Is_US : Boolean := False; 265 begin 266 while not At_Eol loop 267 if Stop_On_E and then (Cur_Char = 'e' or Cur_Char = 'E') then 268 exit; 269 end if; 270 case Cur_Char is 271 when '0' .. '9' => 272 Digit := Wide_Character'Pos (Cur_Char) - Wide_Character'Pos ('0'); 273 Prev_Is_US := False; 274 when 'a' .. 'f' => 275 Digit := Wide_Character'Pos (Cur_Char) - Wide_Character'Pos ('a') + 10; 276 Prev_Is_US := False; 277 when 'A' .. 'F' => 278 Digit := Wide_Character'Pos (Cur_Char) - Wide_Character'Pos ('A') + 10; 279 Prev_Is_US := False; 280 when '_' => 281 if Prev_Is_US then 282 Syntax_Error ("Consecutive underscores not allowed in numbers", 283 (Current_File, Current_Line, Current_Column)); 284 end if; 285 Prev_Is_US := True; 286 when others => 287 exit; 288 end case; 289 Next_Char; 290 291 if Digit >= Base then 292 Syntax_Error ("Invalid character in number", 293 (Current_File, Current_Line, Current_Column)); 294 end if; 295 if not Prev_Is_US then 296 Num := Num * Base + Digit; 297 end if; 298 end loop; 299 if Prev_Is_US then 300 Syntax_Error ("Trailing underscores not allowed in numbers", 301 (Current_File, Current_Line, Current_Column)); 302 end if; 303 return Num; 304 end Get_Numeral; 305 begin -- Get_Integer 306 if Cur_Char = '-' then 307 Negative := True; 308 Next_Char; 309 if Cur_Char not in '0' .. '9' then 310 Syntax_Error ("Invalid character in number", 311 (Current_File, Current_Line, Current_Column)); 312 end if; 313 end if; 314 315 Result := Get_Numeral (Base => 10, Stop_On_E => True); 316 317 if Cur_Char = '#' then 318 Next_Char; 319 Num_Base := Result; 320 if Num_Base not in 2..16 then 321 Syntax_Error ("Invalid value for base", 322 (Current_File, Current_Line, Current_Column)); 323 end if; 324 Result := Get_Numeral (Num_Base, Stop_On_E => False); 325 if Cur_Char /= '#' then 326 Syntax_Error ("Missing closing '#' in based number", 327 (Current_File, Current_Line, Current_Column)); 328 end if; 329 Next_Char; 330 end if; 331 332 if Cur_Char = 'e' or Cur_Char = 'E' then 333 Next_Char; 334 if Cur_Char not in '0' .. '9' then 335 Syntax_Error ("Exponent must be followed by (unsigned) number", 336 (Current_File, Current_Line, Current_Column)); 337 end if; 338 Result := Result * Num_Base ** Integer (Get_Numeral(Base => 10, Stop_On_E => True)); 339 end if; 340 341 if Negative then 342 Result := -Result; 343 end if; 344 return Result; 345 end Get_Integer; 346 347 begin -- Actual_Next_Token 348 Token_Delayed := False; 349 350 if The_Token.Kind = Eof then 351 -- Eof found => stay there 352 return; 353 end if; 354 355 loop 356 if At_Eol then 357 -- Skip empty lines 358 Next_Char; 359 elsif Cur_Char = '#' or else (Cur_Char = '-' and then Look_Ahead_Char = '-') then 360 -- Skip comment 361 while not At_Eol loop 362 Next_Char; 363 end loop; 364 elsif Cur_Char > ' ' then 365 exit; 366 else 367 Next_Char; 368 end if; 369 end loop; 370 371 -- Here we have read a non-blank character 372 First_Line := Current_Line; 373 First_Column := Current_Column; 374 375 if Force_String then 376 Get_Name (Extended => True); 377 378 else 379 case Cur_Char is 380 when '{' | '}' | '(' | ')' | '<' | '>' | '|' | ''' | ':' | ';' | ',' | '.' | '=' => 381 The_Token := Char_Token_Values (Index (Char_Tokens, Cur_Char & "")); 382 The_Token.Position := (Current_File, First_Line, First_Column); 383 Next_Char; 384 385 when '0' .. '9' | '-' => 386 declare 387 Integer_Part : Biggest_Int; 388 Fractional_Part : Float; 389 Exponent_Part : Integer := 0; 390 Exponent_Sign : Integer := +1; 391 begin 392 begin 393 Integer_Part := Get_Integer; 394 exception 395 when Constraint_Error => 396 The_Token := (Kind => Bad_Integer, 397 Position => (Current_File, First_Line, First_Column)); 398 return; 399 end; 400 401 if Cur_Char = '.' then 402 Next_Char; 403 if Cur_Char not in '0' .. '9' then 404 Syntax_Error ("Illegal real value", (Current_File, Current_Line, Current_Column)); 405 end if; 406 407 begin 408 Fractional_Part := Float (Get_Integer); 409 exception 410 when Constraint_Error => 411 The_Token := (Kind => Bad_Float, 412 Position => (Current_File, First_Line, First_Column)); 413 return; 414 end; 415 416 while Fractional_Part >= 1.0 loop 417 Fractional_Part := Fractional_Part / 10.0; 418 end loop; 419 if Integer_Part < 0 then 420 Fractional_Part := -Fractional_Part; 421 end if; 422 423 if Cur_Char = 'e' or Cur_Char = 'E' then 424 Next_Char; 425 case Cur_Char is 426 when '+' => 427 Next_Char; 428 when '-' => 429 Exponent_Sign := -1; 430 Next_Char; 431 when others => 432 null; 433 end case; 434 435 if Cur_Char not in '0' .. '9' then 436 Syntax_Error ("Illegal exponent of real value", 437 (Current_File, Current_Line, Current_Column)); 438 end if; 439 440 Exponent_Part := Integer (Get_Integer); 441 end if; 442 443 The_Token := (Kind => Float_Value, 444 Position => (Current_File, First_Line, First_Column), 445 Fvalue => (Float (Integer_Part) + Fractional_Part) 446 * 10.0 ** (Exponent_Sign * Exponent_Part)); 447 else 448 The_Token := (Kind => Integer_Value, 449 Position => (Current_File, First_Line, First_Column), 450 Value => Integer_Part); 451 end if; 452 end; 453 454 when '~' | '"' => 455 Get_String; 456 457 when 'a' .. 'z' | 'A' .. 'Z' | '_' => -- We allow '_' because of "_anonymous_" 458 Get_Name (Extended => False); 459 460 -- Check for keywords 461 declare 462 To_Check : constant Wide_String 463 := "KEY_" & To_Upper (The_Token.Name_Text (1..The_Token.Name_Length)); 464 begin 465 for Key in Key_Kind range Key_Kind'First .. Key_Kind'Pred (Not_A_Key) loop 466 if To_Check = Key_Kind'Wide_Image (Key) then 467 The_Token.Key := Key; 468 exit; 469 end if; 470 end loop; 471 end; 472 473 when others => 474 declare 475 Bad_Char : constant Wide_Character := Cur_Char; 476 begin 477 Next_Char; 478 Syntax_Error ("Unexpected character: " & Bad_Char, 479 (Current_File, Current_Line, Current_Column)); 480 end; 481 end case; 482 end if; 483 exception 484 when End_Error => 485 The_Token := (Kind => Eof, 486 Position => (Current_File, Current_Line, Current_Column)); 487 when others => 488 The_Token := (Kind => Bad_Token, 489 Position => (Current_File, First_Line, First_Column)); 490 raise; 491 end Actual_Next_Token; 492 493 ------------------------------------------------------------------ 494 -- Exported subprograms -- 495 ------------------------------------------------------------------ 496 497 ------------------- 498 -- Current_Token -- 499 ------------------- 500 501 function Current_Token return Token is 502 begin 503 if Token_Delayed then 504 Actual_Next_Token (String_Token); 505 end if; 506 return The_Token; 507 end Current_Token; 508 509 ---------------- 510 -- Next_Token -- 511 ---------------- 512 513 procedure Next_Token (Force_String : Boolean := False; No_Delay : Boolean := False) is 514 begin 515 if No_Delay then 516 Actual_Next_Token (Force_String); 517 else 518 Token_Delayed := True; 519 String_Token := Force_String; 520 end if; 521 end Next_Token; 522 523 ---------------- 524 -- Set_Prompt -- 525 ---------------- 526 527 procedure Set_Prompt (Prompt : Wide_String) is 528 begin 529 Current_Prompt := To_Unbounded_Wide_String (Prompt); 530 Prompt_Active := True; 531 end Set_Prompt; 532 533 --------------------- 534 -- Activate_Prompt -- 535 --------------------- 536 537 procedure Activate_Prompt is 538 begin 539 Prompt_Active := True; 540 end Activate_Prompt; 541 542 ---------------- 543 -- Start_Scan -- 544 ---------------- 545 546 procedure Start_Scan (From_String : Boolean; Source : Wide_String) is 547 use type Asis.ASIS_Integer; -- Gela-ASIS compatibility 548 begin 549 Origin_Is_String := From_String; 550 Current_Line := 1; 551 Current_Column := 0; 552 553 if Origin_Is_String then 554 Source_String := To_Unbounded_Wide_String (Source); 555 Current_File := Null_Unbounded_Wide_String; 556 557 Buf_Last := Integer'Min (Buffer'Length, Length (Source_String)); 558 Buffer (1 .. Buf_Last) := Slice (Source_String, 1, Buf_Last); 559 Source_Last := Buf_Last; 560 561 else 562 Current_File := To_Unbounded_Wide_String (Source); 563 if Current_Prompt /= Null_Unbounded_Wide_String then 564 Put (Current_Error, To_Wide_String (Current_Prompt) & ": "); 565 end if; 566 567 -- Get a non-empty line: 568 loop 569 Get_Line (Current_Input, Buffer, Buf_Last); 570 exit when Buf_Last >= 1; 571 Current_Line := Current_Line + 1; 572 end loop; 573 end if; 574 575 Buf_Inx := 1; 576 Cur_Char := Buffer (Buf_Inx); 577 At_Eol := Buf_Inx > Buf_Last; -- True for empty input string 578 Prompt_Active := False; 579 The_Token := (Kind => Semi_Colon, Position => The_Token.Position); -- Make sure it is not Eof 580 581 exception 582 when End_Error => 583 The_Token := (Kind => Eof, 584 Position => (Current_File, Current_Line, Current_Column)); 585 end Start_Scan; 586 587 ----------- 588 -- Image -- 589 ----------- 590 591 function Image (T : Token; Quote_String : Boolean := False) return Wide_String is 592 use Thick_Queries; 593 function Double_Quotes (S : Wide_String) return Wide_String is 594 begin 595 for I in S'Range loop 596 if S (I) = '"' then 597 return S (S'First .. I) & '"' & Double_Quotes (S (I + 1 .. S'Last)); 598 end if; 599 end loop; 600 return S; 601 end Double_Quotes; 602 603 begin -- Image 604 case T.Kind is 605 when Name => 606 return T.Name_Text (1 .. T.Name_Length); 607 when Integer_Value => 608 return Biggest_Int_Img (T.Value); 609 when Float_Value => 610 declare 611 Result : constant Wide_String := Float'Wide_Image (T.Fvalue); 612 begin 613 if T.Fvalue < 0.0 then 614 return Result; 615 else 616 return Result (2 .. Result'Length); 617 end if; 618 end; 619 when String_Value => 620 if Quote_String then 621 return '"' & Double_Quotes (T.String_Text (1 .. T.String_Length)) & '"'; 622 else 623 return T.String_Text (1 .. T.String_Length); 624 end if; 625 when Bad_Integer | Bad_Float | Bad_Token => 626 return "#####"; 627 when Character_Token_Kind => 628 return (1 => Char_Tokens (Character_Token_Kind'Pos (T.Kind))); 629 when Eof => 630 Utilities.Failure ("Token image for Eof"); 631 end case; 632 end Image; 633 634 ---------------- 635 -- Save_State -- 636 ---------------- 637 638 procedure Save_State (State : out Scanner_State) is 639 begin 640 State := (The_Token, 641 Token_Delayed, 642 Origin_Is_String, 643 Cur_Char, 644 At_Eol, 645 Source_String, 646 Source_Last, 647 648 Buffer, 649 Buf_Inx, 650 Buf_Last, 651 652 Current_File, 653 Current_Line, 654 Current_Column, 655 Current_Prompt, 656 Prompt_Active); 657 end Save_State; 658 659 ------------------- 660 -- Restore_State -- 661 ------------------- 662 663 procedure Restore_State (State : in Scanner_State) is 664 begin 665 The_Token := State.The_Token; 666 Token_Delayed := State.Token_Delayed; 667 Origin_Is_String := State.Origin_Is_String; 668 Cur_Char := State.Cur_Char; 669 At_Eol := State.At_Eol; 670 Source_String := State.Source_String; 671 Source_Last := State.Source_Last; 672 673 Buffer := State.Buffer; 674 Buf_Inx := State.Buf_Inx; 675 Buf_Last := State.Buf_Last; 676 677 Current_File := State.Current_File; 678 Current_Line := State.Current_Line; 679 Current_Column := State.Current_Column; 680 Current_Prompt := State.Current_Prompt; 681 Prompt_Active := State.Prompt_Active; 682 end Restore_State; 683 684 --------------- 685 -- Is_String -- 686 --------------- 687 688 function Is_String (T : Token; Expected : Wide_String) return Boolean is 689 use Utilities; 690 begin 691 if T.Kind /= Name then 692 return False; 693 end if; 694 695 return To_Upper (T.Name_Text (1 .. T.Name_Length)) = Expected; 696 end Is_String; 697 698 ------------------- 699 -- Reference_Dir -- 700 ------------------- 701 702 function Reference_Dir return Wide_String is 703 F_Name : constant Wide_String := To_Wide_String (Current_File); 704 Last : Natural := F_Name'Last; 705 begin 706 while Last >= 1 and then (F_Name (Last) /= '/' and F_Name (Last) /= '\') loop 707 Last := Last - 1; 708 end loop; 709 return F_Name (1..Last); 710 end Reference_Dir; 711 712end Framework.Language.Scanner; 713 714