1------------------------------------------------------------------------------- 2-- 3-- This file is part of AdaBrowse. 4-- 5-- <STRONG>Copyright (c) 2002 by Thomas Wolf.</STRONG> 6-- <BLOCKQUOTE> 7-- AdaBrowse is free software; you can redistribute it and/or modify it 8-- under the terms of the GNU General Public License as published by the 9-- Free Software Foundation; either version 2, or (at your option) any 10-- later version. AdaBrowse is distributed in the hope that it will be 11-- useful, but <EM>without any warranty</EM>; without even the implied 12-- warranty of <EM>merchantability or fitness for a particular purpose.</EM> 13-- See the GNU General Public License for more details. You should have 14-- received a copy of the GNU General Public License with this distribution, 15-- see file "<A HREF="GPL.txt">GPL.txt</A>". If not, write to the Free 16-- Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, 17-- USA. 18-- </BLOCKQUOTE> 19-- 20-- <DL><DT><STRONG> 21-- Author:</STRONG><DD> 22-- Thomas Wolf (TW) 23-- <ADDRESS><A HREF="mailto:twolf@acm.org">twolf@acm.org</A></ADDRESS></DL> 24-- 25-- <DL><DT><STRONG> 26-- Purpose:</STRONG><DD> 27-- Simplified Ada 95 parser. Parses the source until it finds the 28-- name of the library unit declaration. Note: this parser (and its 29-- scanner!) doesn't need to be hyper-fast, it'll only be used for 30-- krunched file names, and then parse the file only up to the 31-- unit name.</DL> 32-- 33-- <!-- 34-- Revision History 35-- 36-- 26-MAR-2002 TW Initial version. 37-- 21-JUN-2002 TW Uses Util.Text now instead of Ada.Strings.Unbounded. 38-- --> 39------------------------------------------------------------------------------- 40 41pragma License (GPL); 42 43with Ada.Strings.Maps; 44with Ada.Text_IO; 45 46with Util.Files.Text_IO; 47with Util.Strings; 48with Util.Text.Internal; 49 50pragma Elaborate_All (Util.Files.Text_IO); 51pragma Elaborate_All (Util.Text); 52 53package body AD.Parse is 54 55 package UT renames Util.Text; 56 57 ---------------------------------------------------------------------------- 58 -- Scanning routines. This is a very simple, line-based scanner. Not 59 -- particularly efficient, but does the job nicely. 60 61 package Scanner is 62 63 type Token is 64 (Other_Token, 65 Left_Paren_Token, Right_Paren_Token, Semicolon_Token, Period_Token, 66 With_Token, Use_Token, Pragma_Token, Type_Token, Package_Token, 67 Procedure_Token, Function_Token, Is_Token, New_Token, Return_Token, 68 Private_Token, Generic_Token, Name_Token, String_Token); 69 70 procedure Init (File_Name : in String); 71 72 procedure Advance; 73 74 function Current_Token return Token; 75 76 function Image return UT.Unbounded_String; 77 78 procedure Close; 79 80 Scan_Error : exception; 81 82 private 83 84 pragma Inline (Current_Token); 85 86 end Scanner; 87 88 package body Scanner is 89 90 use Util.Strings; 91 92 F : Ada.Text_IO.File_Type; 93 94 function Ada_Skip_String 95 (S : in String; 96 Delim : in Character) 97 return Natural 98 is 99 begin 100 return Skip_String (S, Delim, Delim); 101 end Ada_Skip_String; 102 103 function Get_Line is 104 new Util.Files.Text_IO.Next_Line 105 (Line_Continuation => "", 106 Comment_Start => "--", 107 Delimiters => Ada.Strings.Maps.To_Set ('"'), 108 Strings => Ada_Skip_String); 109 -- Note: we only need to handle the double quote as a string delimiter, 110 -- for "--" can only occur in strings, but never in character literals. 111 -- Hence it isn't necessary to handle the single quote at all here. 112 113 Curr_Line : UT.Unbounded_String; 114 Curr : UT.String_Access; 115 Curr_Idx : Natural; 116 117 Curr_Token : Token := Other_Token; 118 Token_Image : UT.Unbounded_String; 119 Token_Ptr : UT.String_Access; 120 -- Set for 'Name_Token' and 'String_Token'; in the latter case, it 121 -- also contains the delimiting double quotes. 122 123 procedure Load_Line 124 is 125 begin 126 UT.Set (Curr_Line, Get_Line (F)); 127 Curr := UT.Internal.Get_Ptr (Curr_Line); 128 Curr_Idx := 1; 129 if Curr_Idx > Curr'Last then 130 raise Scan_Error; 131 end if; 132 end Load_Line; 133 134 function Find_Token 135 return Token 136 is 137 begin 138 case Token_Ptr (Token_Ptr'First) is 139 when 'f' | 'F' => 140 if To_Lower (Token_Ptr.all) = "function" then 141 return Function_Token; 142 end if; 143 when 'g' | 'G' => 144 if To_Lower (Token_Ptr.all) = "generic" then 145 return Generic_Token; 146 end if; 147 when 'i' | 'I' => 148 if To_Lower (Token_Ptr.all) = "is" then 149 return Is_Token; 150 end if; 151 when 'n' | 'N' => 152 if To_Lower (Token_Ptr.all) = "new" then 153 return New_Token; 154 end if; 155 when 'p' | 'P' => 156 declare 157 S : constant String := To_Lower (Token_Ptr.all); 158 begin 159 if S = "package" then 160 return Package_Token; 161 elsif S = "pragma" then 162 return Pragma_Token; 163 elsif S = "private" then 164 return Private_Token; 165 elsif S = "procedure" then 166 return Procedure_Token; 167 end if; 168 end; 169 when 'r' | 'R' => 170 if To_Lower (Token_Ptr.all) = "return" then 171 return Return_Token; 172 end if; 173 when 't' | 'T' => 174 if To_Lower (Token_Ptr.all) = "type" then 175 return Type_Token; 176 end if; 177 when 'u' | 'U' => 178 if To_Lower (Token_Ptr.all) = "use" then 179 return Use_Token; 180 end if; 181 when 'w' | 'W' => 182 if To_Lower (Token_Ptr.all) = "with" then 183 return With_Token; 184 end if; 185 when others => 186 null; 187 end case; 188 return Name_Token; 189 end Find_Token; 190 191 Numeral : constant Ada.Strings.Maps.Character_Set := 192 Ada.Strings.Maps.To_Set ("0123456789_"); 193 194 Based_Numeral : constant Ada.Strings.Maps.Character_Set := 195 Ada.Strings.Maps.To_Set ("0123456789_ABCDEFabcdef"); 196 197 procedure Advance 198 is 199 begin 200 if Curr_Idx > Curr'Last then Load_Line; end if; 201 declare 202 Ch : Character := Curr (Curr_Idx); 203 begin 204 while Is_Blank (Ch) loop 205 Curr_Idx := Curr_Idx + 1; 206 if Curr_Idx > Curr'Last then 207 Load_Line; Curr_Idx := 1; 208 end if; 209 Ch := Curr (Curr_Idx); 210 end loop; 211 case Ch is 212 when '(' => 213 Curr_Token := Left_Paren_Token; 214 215 when ')' => 216 Curr_Token := Right_Paren_Token; 217 218 when ';' => 219 Curr_Token := Semicolon_Token; 220 221 when '.' => 222 Curr_Token := Period_Token; 223 224 when 'A' .. 'Z' | 'a' .. 'z' => 225 -- Parse a name: any sequence of characters, digits, and 226 -- underscores. 227 declare 228 Stop_Idx : constant Natural := 229 Identifier (Curr (Curr_Idx .. Curr'Last)); 230 begin 231 UT.Set (Token_Image, Curr (Curr_Idx .. Stop_Idx)); 232 Token_Ptr := UT.Internal.Get_Ptr (Token_Image); 233 Curr_Idx := Stop_Idx; 234 end; 235 Curr_Token := Find_Token; 236 237 when ''' => 238 if Curr_Idx + 2 <= Curr'Last and then 239 Curr (Curr_Idx + 2) = ''' 240 then 241 Curr_Idx := Curr_Idx + 2; 242 end if; 243 Curr_Token := Other_Token; 244 245 when '"' => 246 -- Skip a string. 247 declare 248 Stop_Idx : constant Natural := 249 Ada_Skip_String (Curr (Curr_Idx .. Curr'Last), '"'); 250 begin 251 if Stop_Idx = 0 then 252 raise Scan_Error; 253 end if; 254 UT.Set (Token_Image, Curr (Curr_Idx .. Stop_Idx)); 255 Token_Ptr := UT.Internal.Get_Ptr (Token_Image); 256 Curr_Idx := Stop_Idx; 257 Curr_Token := String_Token; 258 end; 259 260 when '0' .. '9' => 261 -- Skip a number. Note: use a simplified syntax! 262 declare 263 Stop_Idx : Natural := Curr_Idx; 264 begin 265 while Stop_Idx <= Curr'Last and then 266 Is_In (Numeral, Curr (Stop_Idx)) 267 loop 268 Stop_Idx := Stop_Idx + 1; 269 end loop; 270 if Stop_Idx <= Curr'Last then 271 if Curr (Stop_Idx) = '#' then 272 Stop_Idx := Stop_Idx + 1; 273 -- Actually, there must be at least one digit, and 274 -- at most one period. 275 while Stop_Idx <= Curr'Last and then 276 Is_In (Based_Numeral, Curr (Stop_Idx)) 277 loop 278 Stop_Idx := Stop_Idx + 1; 279 end loop; 280 if Stop_Idx <= Curr'Last and then 281 Curr (Stop_Idx) = '#' 282 then 283 Stop_Idx := Stop_Idx + 1; 284 else 285 raise Scan_Error; 286 end if; 287 elsif Curr (Stop_Idx) = '.' then 288 Stop_Idx := Stop_Idx + 1; 289 -- Actually, there must be at least one digit. 290 while Stop_Idx <= Curr'Last and then 291 Is_In (Numeral, Curr (Stop_Idx)) 292 loop 293 Stop_Idx := Stop_Idx + 1; 294 end loop; 295 end if; -- Fraction or Based 296 end if; 297 if Stop_Idx <= Curr'Last and then 298 Curr (Stop_Idx) = 'E' 299 then 300 Stop_Idx := Stop_Idx + 1; 301 if Stop_Idx > Curr'Last then raise Scan_Error; end if; 302 case Curr (Stop_Idx) is 303 when '0' .. '9' => 304 null; 305 when '+' | '-' => 306 Stop_Idx := Stop_Idx + 1; 307 if Stop_Idx > Curr'Last then 308 raise Scan_Error; 309 end if; 310 when others => 311 raise Scan_Error; 312 end case; 313 -- Actually, there must be at least one digit now. 314 while Stop_Idx <= Curr'Last and then 315 Is_In (Numeral, Curr (Stop_Idx)) 316 loop 317 Stop_Idx := Stop_Idx + 1; 318 end loop; 319 end if; -- Exponent 320 Curr_Idx := Stop_Idx - 1; 321 end; 322 Curr_Token := Other_Token; 323 324 when others => 325 Curr_Token := Other_Token; 326 327 end case; 328 Curr_Idx := Curr_Idx + 1; 329 end; 330 end Advance; 331 332 function Current_Token 333 return Token 334 is 335 begin 336 return Curr_Token; 337 end Current_Token; 338 339 function Image 340 return UT.Unbounded_String 341 is 342 begin 343 if Curr_Token = Name_Token or else 344 Curr_Token = String_Token 345 then 346 return Token_Image; 347 else 348 return UT.Null_Unbounded_String; 349 end if; 350 end Image; 351 352 procedure Init 353 (File_Name : in String) 354 is 355 begin 356 Ada.Text_IO.Open (F, Ada.Text_IO.In_File, File_Name); 357 Load_Line; 358 Advance; 359 end Init; 360 361 procedure Close 362 is 363 begin 364 if Ada.Text_IO.Is_Open (F) then 365 Ada.Text_IO.Close (F); 366 end if; 367 end Close; 368 369 end Scanner; 370 371 ---------------------------------------------------------------------------- 372 -- Parsing routines. This is a very simple recursive descent parser, yet 373 -- it recognizes syntactically correct Ada 95 library unit headers up 374 -- to the library unit name. It doesn't do any error recovery, and it 375 -- skips source chunks that are not interesting. The sole purpose of this 376 -- is to get the name of the library unit, not any syntax or semantics 377 -- checking. 378 379 package Parser is 380 381 function Library_Unit 382 return String; 383 384 Parse_Error : exception; 385 386 end Parser; 387 388 package body Parser is 389 390 use Scanner; 391 392 procedure Skip_Parentheses 393 is 394 Level : Natural := 0; 395 begin 396 loop 397 case Current_Token is 398 when Left_Paren_Token => 399 Level := Level + 1; 400 401 when Right_Paren_Token => 402 Level := Level - 1; 403 404 when others => 405 null; 406 407 end case; 408 Advance; 409 exit when Level = 0; 410 end loop; 411 end Skip_Parentheses; 412 413 procedure Skip_To_Semicolon 414 is 415 begin 416 while Current_Token /= Semicolon_Token loop 417 Advance; 418 end loop; 419 end Skip_To_Semicolon; 420 421 procedure Skip_To_Semicolon_Nested 422 is 423 begin 424 while Current_Token /= Semicolon_Token loop 425 if Current_Token = Left_Paren_Token then 426 Skip_Parentheses; 427 else 428 Advance; 429 end if; 430 end loop; 431 end Skip_To_Semicolon_Nested; 432 433 procedure Context_Clauses 434 is 435 begin 436 loop 437 case Current_Token is 438 when With_Token | Use_Token => 439 Skip_To_Semicolon; 440 441 when Pragma_Token => 442 Skip_To_Semicolon_Nested; 443 444 when others => 445 exit; 446 447 end case; 448 -- Skip the semicolon. 449 Advance; 450 end loop; 451 end Context_Clauses; 452 453 procedure Generic_Formals 454 is 455 begin 456 loop 457 case Current_Token is 458 when Pragma_Token => 459 -- Just to be on the safe side: allow pragmas in the generic 460 -- formal part. 461 Skip_To_Semicolon_Nested; 462 463 when Use_Token => 464 Skip_To_Semicolon; 465 466 when Type_Token => 467 -- Generic formal type. 468 Advance; 469 if Current_Token /= Name_Token then 470 raise Parse_Error; 471 end if; 472 Advance; 473 if Current_Token = Left_Paren_Token then 474 -- Discriminants. 475 Skip_Parentheses; 476 end if; 477 if Current_Token /= Is_Token then 478 raise Parse_Error; 479 end if; 480 Skip_To_Semicolon; 481 482 when With_Token => 483 -- Generic formal subprogram or formal package. 484 Advance; 485 case Current_Token is 486 when Package_Token => 487 Advance; 488 if Current_Token /= Name_Token then 489 raise Parse_Error; 490 end if; 491 Advance; 492 if Current_Token /= Is_Token then 493 raise Parse_Error; 494 end if; 495 Advance; 496 if Current_Token /= New_Token then 497 raise Parse_Error; 498 end if; 499 Advance; 500 if Current_Token /= Name_Token then 501 raise Parse_Error; 502 end if; 503 Advance; 504 -- It may be an expanded name (Package.Name). 505 while Current_Token = Period_Token loop 506 Advance; 507 if Current_Token /= Name_Token then 508 raise Parse_Error; 509 end if; 510 Advance; 511 end loop; 512 if Current_Token = Left_Paren_Token then 513 -- Generic actual part. 514 Skip_Parentheses; 515 end if; 516 Skip_To_Semicolon; 517 518 when Procedure_Token | Function_Token => 519 declare 520 Initial : constant Token := Current_Token; 521 begin 522 Advance; 523 if Current_Token /= Name_Token and then 524 (Initial /= Function_Token or else 525 Current_Token /= String_Token) 526 then 527 raise Parse_Error; 528 end if; 529 Advance; 530 if Current_Token = Left_Paren_Token then 531 -- Parameter specifications. 532 Skip_Parentheses; 533 end if; 534 if Initial = Function_Token then 535 -- Return type 536 if Current_Token /= Return_Token then 537 raise Parse_Error; 538 end if; 539 Advance; 540 if Current_Token /= Name_Token then 541 raise Parse_Error; 542 end if; 543 Advance; 544 end if; 545 Skip_To_Semicolon; 546 end; 547 548 when others => 549 raise Parse_Error; 550 551 end case; 552 553 when Name_Token => 554 -- Generic formal object. Skip to first semicolon not within 555 -- parentheses. 556 Skip_To_Semicolon_Nested; 557 558 when Package_Token | Procedure_Token | Function_Token => 559 exit; 560 561 when others => 562 raise Parse_Error; 563 564 end case; 565 if Current_Token /= Semicolon_Token then 566 raise Parse_Error; 567 end if; 568 -- Skip the semicolon. 569 Advance; 570 end loop; 571 end Generic_Formals; 572 573 function Library_Unit 574 return String 575 is 576 begin 577 Context_Clauses; 578 if Current_Token = Private_Token then Advance; end if; 579 if Current_Token = Generic_Token then 580 Advance; 581 Generic_Formals; 582 end if; 583 case Current_Token is 584 when Package_Token | Procedure_Token | Function_Token => 585 declare 586 Initial : constant Token := Current_Token; 587 Unit_Name : UT.Unbounded_String; 588 begin 589 -- Next one must be the unit name. 590 Advance; 591 if Current_Token = Name_Token or else 592 (Initial = Function_Token and then 593 Current_Token = String_Token) 594 then 595 Unit_Name := Image; 596 declare 597 Last_Token : Token := Current_Token; 598 begin 599 Advance; 600 while Current_Token = Period_Token loop 601 Advance; 602 if Last_Token /= Name_Token then 603 raise Parse_Error; 604 end if; 605 if Current_Token = Name_Token or else 606 (Initial = Function_Token and then 607 Current_Token = String_Token) 608 then 609 UT.Append (Unit_Name, '.'); 610 UT.Append (Unit_Name, Image); 611 Last_Token := Current_Token; 612 Advance; 613 else 614 raise Parse_Error; 615 end if; 616 end loop; 617 end; 618 else 619 raise Parse_Error; 620 end if; 621 return UT.To_String (Unit_Name); 622 end; 623 624 when others => 625 null; 626 627 end case; 628 return ""; 629 end Library_Unit; 630 631 end Parser; 632 633 ---------------------------------------------------------------------------- 634 -- Exported routines. 635 636 function Get_Unit_Name 637 (File_Name : in String) 638 return String 639 is 640 begin 641 Scanner.Init (File_Name); 642 declare 643 Unit_Name : constant String := Parser.Library_Unit; 644 begin 645 Scanner.Close; 646 return Unit_Name; 647 end; 648 exception 649 when others => 650 Scanner.Close; 651 return ""; 652 end Get_Unit_Name; 653 654end AD.Parse; 655