1------------------------------------------------------------------------------ 2-- XML/Ada - An XML suite for Ada95 -- 3-- -- 4-- Copyright (C) 2001-2020, AdaCore -- 5-- -- 6-- This library is free software; you can redistribute it and/or modify it -- 7-- under terms of the GNU General Public License as published by the Free -- 8-- Software Foundation; either version 3, or (at your option) any later -- 9-- version. This library is distributed in the hope that it will be useful, -- 10-- but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHAN- -- 11-- TABILITY or FITNESS FOR A PARTICULAR PURPOSE. -- 12-- -- 13-- As a special exception under Section 7 of GPL version 3, you are granted -- 14-- additional permissions described in the GCC Runtime Library Exception, -- 15-- version 3.1, as published by the Free Software Foundation. -- 16-- -- 17-- You should have received a copy of the GNU General Public License and -- 18-- a copy of the GCC Runtime Library Exception along with this program; -- 19-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- 20-- <http://www.gnu.org/licenses/>. -- 21-- -- 22------------------------------------------------------------------------------ 23 24pragma Ada_05; 25 26with Ada.Exceptions; use Ada.Exceptions; 27with Ada.Text_IO; use Ada.Text_IO; 28with GNAT.Directory_Operations; use GNAT.Directory_Operations; 29with Input_Sources.File; use Input_Sources.File; 30with Input_Sources.Strings; use Input_Sources.Strings; 31with Input_Sources; use Input_Sources; 32with Interfaces; use Interfaces; 33with Sax.Attributes; use Sax.Attributes; 34with Sax.Encodings; use Sax.Encodings; 35with Sax.Exceptions; use Sax.Exceptions; 36with Sax.Locators; use Sax.Locators; 37with Sax.Models; use Sax.Models; 38with Sax.Symbols; use Sax.Symbols; 39with Unchecked_Deallocation; 40with Unicode.CES; use Unicode.CES; 41with Unicode.CES.Basic_8bit; use Unicode.CES.Basic_8bit; 42with Unicode.Names.Basic_Latin; use Unicode.Names.Basic_Latin; 43with Unicode; use Unicode; 44 45package body Sax.Readers is 46 47 use Entity_Table, Attributes_Table, Notations_Table; 48 use Symbol_Table_Pointers; 49 50 Debug_Lexical : constant Boolean := False; 51 Debug_Input : constant Boolean := False; 52 Debug_Internal : constant Boolean := False; 53 -- Set to True if you want to debug this package 54 55 Initial_Buffer_Length : constant := 10000; 56 -- Initial length of the internal buffer that stores CDATA, tag names,... 57 58 -------------------- 59 -- Error messages -- 60 -------------------- 61 -- The comment indicates the section of the XML or Namespaces specification 62 -- relevant for that error 63 64 Error_Attlist_DefaultDecl : constant String := 65 "Invalid default declaration for the attribute"; -- 3.3.2 66 Error_Attlist_Invalid_Enum : constant String := 67 "Invalid character ',' in ATTLIST enumeration"; -- 3.3.1 68 Error_Attlist_Type : constant String := 69 "Invalid type for attribute"; -- WF 70 Error_Attribute_External_Entity : constant String := 71 "Attribute values cannot reference external entities"; 72 Error_Attribute_Is_Name : constant String := 73 "Attribute must contain Names: "; -- NS 6 and 3.3.1 74 Error_Attribute_Is_Ncname : constant String := 75 "Attribute must contain Names with no colon: "; -- NS 6 and 3.3.1 76 Error_Attribute_Is_Nmtoken : constant String := 77 "Attribute must contain Nmtokens: "; -- 2.3 and 3.3.1 78 Error_Attribute_Less_Than : constant String := 79 "'<' not authorized in attribute values"; -- 2.3 80 Error_Attribute_Less_Than_Suggests : constant String := -- 2.3 81 "'<' not authorized in attribute values. Possible end of value at "; 82 Error_Attribute_Ref_Unparsed_Entity : constant String := 83 "Attribute must reference an existing unparsed entity: "; 84 Error_Cdata_End : constant String := 85 "CDATA sections must end with ']]>'"; -- 2.7 86 Error_Cdata_Unterminated : constant String := 87 "CDATA must be followed immediately by '['"; 88 Error_Charref_Toplevel : constant String := 89 "Character references cannot appear at top-level"; -- 2.1 90 Error_Charref_Invalid_Char : constant String := 91 "Invalid character in character reference: "; -- 4.1 92 Error_Comment_End : constant String := 93 "Comments must end with '-->'"; -- 2.5 94 Error_Comment_Unterminated : constant String := 95 "Unterminated comment in stream"; -- WF 96 Error_Comment_Dash_Dash : constant String := 97 "'--' cannot appear in comments"; -- 2.5 98 Error_Conditional_Location : constant String := -- 3.4 99 "INCLUDE and IGNORE sections only allowed in the external DTD subset"; 100 Error_Conditional_Syntax : constant String := 101 "Conditional sections need '[' after INCLUDE or IGNORE"; -- 3.4 102 Error_Content_Model_Closing_Paren : constant String := 103 "Closing parenthesis must be followed by '*' in mixed content"; -- 3.2.2 104 Error_Content_Model_Empty_List : constant String := 105 "Invalid content model: list of choices cannot be empty"; 106 Error_Content_Model_Expect_Operator : constant String := 107 "Expecting operator in content model"; 108 Error_Content_Model_Invalid : constant String := 109 "Invalid content model"; 110 Error_Content_Model_Invalid_Multiplier : constant String := 111 "Invalid location for '+', '?' or '*' operators"; -- 3.2.1 112 Error_Content_Model_Invalid_Name : constant String := 113 "Invalid name in content model: "; 114 Error_Content_Model_Invalid_Seq : constant String := 115 "Missing content particle in sequence"; -- 3.2.1 116 Error_Content_Model_Invalid_Start : constant String := 117 "Invalid content model, cannot start with #"; 118 Error_Content_Model_Mixing : constant String := 119 "Cannot mix ',' and '|' in content model"; 120 Error_Content_Model_Nested_Groups : constant String := 121 "Nested groups and occurrence operators not allowed in mixed content"; 122 -- 3.3.2 123 Error_Content_Model_Pcdata : constant String := 124 "#PCDATA can only be used with '|' connectors"; -- 3.2.2 125 Error_Content_Model_Pcdata_First : constant String := 126 "#PCDATA must be first in list"; -- 3.2.2 127 Error_Content_Model_Pcdata_Occurrence : constant String := 128 "Occurrence on #PCDATA must be '*'"; -- 3.2.2 129 Error_Entity_Definition : constant String := 130 "Invalid definition for ENTITY"; 131 Error_Entity_Definition_Unterminated : constant String := 132 "Expecting end of ENTITY definition"; 133 Error_Entity_Name : constant String := "Invalid entity name"; -- 4.1 134 Error_Entity_Not_Standalone : constant String := 135 "Entity declared in external subset, but document is standalone"; -- 4.1 136 Error_Entity_Self_Ref : constant String := 137 "Entity cannot reference itself"; -- 4.1 138 Error_Entity_Toplevel : constant String := 139 "Entity references cannot appear at top-level"; -- 2.1 140 Error_Entity_Undefined : constant String := "Undefined entity"; -- 4.1 141 Error_Entityref_Unterminated : constant String := 142 "Entity references must end with ';'." & ASCII.LF 143 & "Did you want to use &?"; -- 4.1 144 Error_Entity_Nested : constant String := 145 "Replacement text for entities must be properly nested"; -- 3.2.1 146 Error_Entity_Self_Contained : constant String := 147 "Entity values must be self-contained"; -- 4.5 or 4.3.2 148 Error_Expecting_Space : constant String := 149 "Expecting a space"; -- WF or 3.3 150 Error_External_Entity_Not_Found : constant String := 151 "External entity not found: "; 152 Error_Invalid_Char : constant String := 153 "Invalid character code:"; -- 2.2 or 4.1 154 Error_Invalid_Declaration : constant String := "Invalid declaration"; 155 Error_Invalid_Encoding : constant String := "Invalid character encoding"; 156 Error_Invalid_Content_Model : constant String := "Invalid content model"; 157 Error_Invalid_Language : constant String := 158 "Invalid language specification"; -- 2.12 159 Error_Invalid_Name : constant String := 160 "Invalid name: "; -- 3.1 161 Error_Invalid_Notation_Decl : constant String := 162 "Invalid notation declaration"; -- WF 163 Error_Invalid_Space : constant String := 164 "Value of xml:space must be (default|preserve)"; -- 2.10 165 Error_Is_Name : constant String := "Expecting a Name"; -- 3.3.1 166 Error_Is_Ncname : constant String := 167 "Expecting a Name with no colon"; -- NS 6 and 3.3.1 168 Error_Missing_Operand : constant String := 169 "Missing operand before this operator"; 170 Error_Mixed_Contents : constant String := 171 "Mixed contents cannot be used in a list or a sequence"; -- 3.2.1 172 Error_Ndata_ParamEntity : constant String := -- 4.2 173 "NDATA annotation not allowed for parameter entities"; 174 Error_Ndata_Space : constant String := -- 4.2.2 175 "Expecting space before NDATA declaration"; 176 Error_Ndata_String : constant String := 177 "Expecting string after NDATA"; 178 Error_ParamEntity_In_Attribute : constant String := 179 "Parameter entities cannot occur in attribute values"; 180 -- WF PE in internal subset 181 Error_Notation_Undeclared : constant String := 182 "Notation must be declared: "; -- VC 4.2.2 or 3.3.1 183 Error_Prefix_Not_Declared : constant String := 184 "Prefix must be declared before its use: "; -- WF 185 Error_Public_String : constant String := 186 "Expecting a string after PUBLIC"; 187 Error_Public_Sysid : constant String := 188 "Expecting SystemID after PUBLIC"; 189 Error_Public_Sysid_Space : constant String := 190 "Require whitespace between public and system IDs"; -- 4.2.2 191 Error_Public_Invalid : constant String := 192 "Invalid PubID character: "; 193 Error_System_String : constant String := 194 "Expecting a string after SYSTEM"; 195 Error_System_URI : constant String := -- 4.2.2 196 "SYSTEM identifiers may not contain URI fragments starting with #"; 197 Error_Unknown_Declaration : constant String := 198 "Unknown declaration in DTD"; -- WF 199 Error_Unexpected_Chars1 : constant String := 200 "Invalid characters '<!-' in stream"; -- WF 201 Error_Unexpected_Chars2 : constant String := 202 "Unexpected characters between ']' and '>' in the DTD"; -- 2.8 203 Error_Unexpected_Chars3 : constant String := 204 "Text may not contain the litteral ']]>'"; -- 2.4 205 Error_Unterminated_String : constant String := 206 "Unterminated string"; -- 2.3 207 Error_Unterminated_String_Suggests : constant String := 208 "Unterminated string, possible end at "; -- 2.3 209 210 ------------ 211 -- Tokens -- 212 ------------ 213 214 type Token_Type is 215 (Double_String_Delimiter, -- " 216 Single_String_Delimiter, -- ' 217 Comment, -- <!--...--> (Data is the comment) 218 Start_Of_Tag, -- < 219 Start_Of_End_Tag, -- </ 220 End_Of_Start_Tag, -- /> 221 Start_Of_PI, -- <? 222 End_Of_PI, -- ?> 223 End_Of_Tag, -- > 224 Equal, -- = (in tags) 225 Colon, -- : (in tags) 226 Open_Paren, -- ( (while parsing content model in ATTLIST) 227 Internal_DTD_Start, -- [ (while in DTD) 228 Internal_DTD_End, -- ] (while in DTD) 229 Include, -- <![INCLUDE[ 230 Ignore, -- <![IGNORE[ 231 Start_Conditional, -- <![ 232 End_Conditional, -- ]]> 233 Space, -- Any number of spaces (Data is the spaces) 234 Text, -- any text (Data is the identifier) 235 Name, -- same as text, but contains only valid 236 -- name characters 237 Char_Ref, -- A character reference. Data is the character 238 Cdata_Section, -- <![CDATA 239 Doctype_Start, -- <!DOCTYPE 240 System, -- SYSTEM (while in DTD) 241 Public, -- PUBLIC (while in DTD) 242 Ndata, -- NDATA (while in DTD) 243 Any, -- ANY (while in DTD) 244 Empty, -- EMPTY (while in DTD) 245 Notation, -- NOTATION (while in DTD or ATTLIST) 246 Entity_Def, -- <!ENTITY (while in DTD) 247 Element_Def, -- <!ELEMENT (while in DTD) 248 Attlist_Def, -- <!ATTLIST (while in DTD) 249 Id_Type, -- ID (while in ATTLIST) Data is "ID" 250 Idref, -- IDREF (while in ATTLIST) Data is "IDREF" 251 Idrefs, -- IDREFS (while in ATTLIST) Data is "IDREFS" 252 Cdata, -- CDATA (while in ATTLIST) Data is "CDATA" 253 Entity, -- ENTITY (while in ATTLIST) Data is "ENTITY" 254 Entities, -- ENTITIES (while in ATTLIST) Data="ENTITIES" 255 Nmtoken, -- NMTOKEN (while in ATTLIST) Data="NMTOKEN" 256 Nmtokens, -- NMTOKENS (while in ATTLIST) Data="NMTOKENS" 257 Required, -- REQUIRED (while in ATTLIST) Data="#REQUIRED" 258 Implied, -- IMPLIED (while in ATTLIST) Data="#IMPLIED" 259 Fixed, -- FIXED (while in ATTLIST) Data="#FIXED" 260 End_Of_Input -- End of input was seen. 261 ); 262 263 type Token is record 264 Typ : Token_Type; 265 First, Last : Natural; -- Indexes in the buffer 266 Location : Sax.Locators.Location; 267 From_Entity : Boolean; -- Whether the characters come from the 268 -- expansion of an entity. 269 end record; 270 271 Null_Token : constant Token := (End_Of_Input, 1, 0, No_Location, False); 272 273 Default_State : constant Parser_State := 274 (Name => "Def", 275 Ignore_Special => False, 276 Detect_End_Of_PI => False, 277 Greater_Special => False, 278 Less_Special => False, 279 Expand_Param_Entities => False, 280 Expand_Entities => True, 281 Report_Character_Ref => False, 282 Expand_Character_Ref => True, 283 In_DTD => False, 284 Recognize_External => False, 285 Handle_Strings => False, 286 In_Tag => False, 287 Report_Parenthesis => False, 288 In_Attlist => False); 289 Attr_Value_State : constant Parser_State := 290 (Name => "Att", 291 Ignore_Special => True, 292 Detect_End_Of_PI => False, 293 Greater_Special => False, 294 Less_Special => True, 295 Expand_Param_Entities => False, 296 Expand_Entities => True, 297 Report_Character_Ref => True, 298 Expand_Character_Ref => False, 299 In_DTD => False, 300 Recognize_External => False, 301 Handle_Strings => True, 302 In_Tag => False, 303 Report_Parenthesis => False, 304 In_Attlist => False); 305 Non_Interpreted_String_State : constant Parser_State := 306 (Name => "Str", 307 Ignore_Special => True, 308 Detect_End_Of_PI => False, 309 Greater_Special => False, 310 Less_Special => False, 311 Expand_Param_Entities => False, 312 Expand_Entities => False, 313 Report_Character_Ref => False, 314 Expand_Character_Ref => False, 315 In_DTD => False, 316 Recognize_External => False, 317 Handle_Strings => True, 318 In_Tag => False, 319 Report_Parenthesis => False, 320 In_Attlist => False); 321 DTD_State : constant Parser_State := 322 (Name => "DTD", 323 Ignore_Special => False, 324 Detect_End_Of_PI => False, 325 Greater_Special => True, 326 Less_Special => False, 327 Expand_Param_Entities => True, 328 Expand_Entities => True, 329 Report_Character_Ref => False, 330 Expand_Character_Ref => True, 331 In_DTD => True, 332 Recognize_External => True, 333 Handle_Strings => True, 334 In_Tag => False, 335 Report_Parenthesis => False, 336 In_Attlist => False); 337 PI_State : constant Parser_State := 338 (Name => "PI ", 339 Ignore_Special => True, 340 Detect_End_Of_PI => True, 341 Greater_Special => False, 342 Less_Special => False, 343 Expand_Param_Entities => False, 344 Expand_Entities => False, 345 Report_Character_Ref => False, 346 Expand_Character_Ref => False, 347 In_DTD => False, 348 Recognize_External => False, 349 Handle_Strings => True, 350 In_Tag => False, 351 Report_Parenthesis => False, 352 In_Attlist => False); 353 Entity_Def_State : constant Parser_State := 354 (Name => "Ent", 355 Ignore_Special => False, 356 Detect_End_Of_PI => False, 357 Greater_Special => True, 358 Less_Special => False, 359 Expand_Param_Entities => False, 360 Expand_Entities => False, 361 Report_Character_Ref => False, 362 Expand_Character_Ref => True, 363 In_DTD => True, 364 Recognize_External => True, 365 Handle_Strings => True, 366 In_Tag => False, 367 Report_Parenthesis => False, 368 In_Attlist => False); 369 Element_Def_State : constant Parser_State := 370 (Name => "Ele", 371 Ignore_Special => False, 372 Detect_End_Of_PI => False, 373 Greater_Special => True, 374 Less_Special => False, 375 Expand_Param_Entities => True, 376 Expand_Entities => False, 377 Report_Character_Ref => False, 378 Expand_Character_Ref => True, 379 In_DTD => True, 380 Recognize_External => True, 381 Handle_Strings => True, 382 In_Tag => True, 383 Report_Parenthesis => True, 384 In_Attlist => False); 385 Attribute_Def_State : constant Parser_State := 386 (Name => "AtD", 387 Ignore_Special => False, 388 Detect_End_Of_PI => False, 389 Greater_Special => True, 390 Less_Special => False, 391 Expand_Param_Entities => True, 392 Expand_Entities => False, 393 Report_Character_Ref => False, 394 Expand_Character_Ref => True, 395 In_DTD => True, 396 Recognize_External => False, 397 Handle_Strings => True, 398 In_Tag => True, 399 Report_Parenthesis => True, 400 In_Attlist => True); 401 Attribute_Def_Name_State : constant Parser_State := 402 (Name => "ADN", 403 Ignore_Special => False, 404 Detect_End_Of_PI => False, 405 Greater_Special => True, 406 Less_Special => False, 407 Expand_Param_Entities => True, 408 Expand_Entities => False, 409 Report_Character_Ref => False, 410 Expand_Character_Ref => True, 411 In_DTD => True, 412 Recognize_External => False, 413 Handle_Strings => True, 414 In_Tag => True, 415 Report_Parenthesis => True, 416 In_Attlist => False); 417 Entity_Str_Def_State : constant Parser_State := 418 (Name => "EtS", 419 Ignore_Special => True, 420 Detect_End_Of_PI => False, 421 Greater_Special => False, 422 Less_Special => False, 423 Expand_Param_Entities => True, 424 Expand_Entities => False, 425 Report_Character_Ref => False, 426 Expand_Character_Ref => True, 427 In_DTD => True, 428 Recognize_External => False, 429 Handle_Strings => True, 430 In_Tag => False, 431 Report_Parenthesis => False, 432 In_Attlist => False); 433 Attlist_Str_Def_State : constant Parser_State := 434 (Name => "AtS", 435 Ignore_Special => True, 436 Detect_End_Of_PI => False, 437 Greater_Special => False, 438 Less_Special => False, 439 Expand_Param_Entities => False, 440 Expand_Entities => True, 441 Report_Character_Ref => False, 442 Expand_Character_Ref => True, 443 In_DTD => True, 444 Recognize_External => False, 445 Handle_Strings => True, 446 In_Tag => False, 447 Report_Parenthesis => False, 448 In_Attlist => False); 449 Tag_State : constant Parser_State := 450 (Name => "Tag", 451 Ignore_Special => False, 452 Greater_Special => True, 453 Less_Special => False, 454 Detect_End_Of_PI => False, 455 Expand_Param_Entities => False, 456 Expand_Entities => False, 457 Report_Character_Ref => False, 458 Expand_Character_Ref => True, 459 In_DTD => False, 460 Recognize_External => False, 461 Handle_Strings => True, 462 In_Tag => True, 463 Report_Parenthesis => False, 464 In_Attlist => False); 465 466 -------------------------- 467 -- Internal subprograms -- 468 -------------------------- 469 470 procedure Unchecked_Free is new Unchecked_Deallocation 471 (Input_Source'Class, Input_Source_Access); 472 procedure Unchecked_Free is new Unchecked_Deallocation 473 (Hook_Data'Class, Hook_Data_Access); 474 procedure Unchecked_Free is new Ada.Unchecked_Deallocation 475 (Sax_Attribute_Array, Sax_Attribute_Array_Access); 476 477 function Debug_Encode (C : Unicode_Char) return Byte_Sequence; 478 -- Return an encoded string matching C (matching Sax.Encodins.Encoding) 479 480 procedure Test_Valid_Char 481 (Parser : in out Sax_Reader'Class; C : Unicode_Char; Loc : Token); 482 -- Raise an error if C is not valid in XML. The error is reported at 483 -- location Loc. 484 485 function Is_Pubid_Char (C : Unicode_Char) return Boolean; 486 -- Return True if C is a valid character for a Public ID (2.3 specs) 487 488 procedure Test_Valid_Lang 489 (Parser : in out Sax_Reader'Class; Lang : Byte_Sequence); 490 -- Return True if Lang matches the rules for languages 491 492 procedure Test_Valid_Space 493 (Parser : in out Sax_Reader'Class; Space : Byte_Sequence); 494 -- Return True if Space matches the rules for the xml:space attribute 495 496 procedure Next_Char 497 (Input : in out Input_Source'Class; 498 Parser : in out Sax_Reader'Class); 499 -- Return the next character, and increments the locators. 500 -- If there are no more characters in the input streams, Parser is setup 501 -- so that End_Of_Stream (Parser) returns True. 502 503 procedure Lookup_Char 504 (Input : in out Input_Source'Class; 505 Parser : in out Sax_Reader'Class; 506 Char : out Unicode_Char); 507 -- Lookup one character, but put it back in the input so that the next call 508 -- to Next_Char will return it again. This does not change 509 -- Parser.Last_Read. 510 511 function End_Of_Stream (Parser : Sax_Reader'Class) return Boolean; 512 pragma Inline (End_Of_Stream); 513 -- Return True if there are no more characters in the parser. 514 -- Note that this indicates that no more character remains to be read, and 515 -- is different from checking Eof on the current input (since for instance 516 -- a new input is open for an entity). 517 518 function Create_Attribute_List 519 (Attrs : Sax_Attribute_List) return Sax.Attributes.Attributes; 520 -- Create the list of attributes from Parser.Attributes. 521 -- This function has the side effect of resetting 522 -- Parser.Attributes_Count to 0, and freeing memory as appropriate 523 524 procedure Put_In_Buffer 525 (Parser : in out Sax_Reader'Class; Char : Unicode_Char); 526 pragma Inline (Put_In_Buffer); 527 528 procedure Put_In_Buffer 529 (Parser : in out Sax_Reader'Class; Str : Byte_Sequence); 530 pragma Inline (Put_In_Buffer); 531 -- Put the last character read in the internal buffer 532 533 procedure Next_Token 534 (Input : in out Input_Sources.Input_Source'Class; 535 Parser : in out Sax_Reader'Class; 536 Id : out Token; 537 Coalesce_Space : Boolean := False); 538 -- Return the next identifier in the input stream. 539 -- Locator is modified accordingly (line and column). 540 -- If Coalesce_Space is True, then all the Name or Text tokens preceded or 541 -- followed by Space tokens are grouped together and returned as a single 542 -- Text token. 543 -- Id.Typ is set to End_Of_Input if there are no more token to be read. 544 545 procedure Next_Token_Skip_Spaces 546 (Input : in out Input_Sources.Input_Source'Class; 547 Parser : in out Sax_Reader'Class; 548 Id : out Token; 549 Must_Have : Boolean := False); 550 -- Same as Next_Token, except it skips spaces. If Must_Have is True, 551 -- then the first token read must be a space, or an error is raised 552 -- Id.Typ is set to End_Of_Input if there are no more token to be read. 553 554 procedure Next_NS_Token_Skip_Spaces 555 (Input : in out Input_Sources.Input_Source'Class; 556 Parser : in out Sax_Reader'Class; 557 NS_Id : out Token; 558 Name_Id : out Token); 559 -- Skip spaces, if any, then read a "ns:name" or "name" token. 560 561 function Find_Symbol (Parser : Sax_Reader'Class; T : Token) return Symbol; 562 function Find_Symbol 563 (Parser : Sax_Reader'Class; First, Last : Token) return Symbol; 564 -- Return the value of the symbol 565 566 procedure Reset_Buffer 567 (Parser : in out Sax_Reader'Class; Id : Token := Null_Token); 568 -- Clears the internal buffer in Parser. 569 -- If Id is not Null_Token, then only the characters starting from 570 -- Id.First are removed 571 572 procedure Set_State 573 (Parser : in out Sax_Reader'Class; State : Parser_State); 574 -- Set the current state for the parser 575 576 function Get_State (Parser : Sax_Reader'Class) return Parser_State; 577 -- Return the current state. 578 579 procedure Close_Namespaces 580 (Parser : in out Sax_Reader'Class; List : XML_NS); 581 -- Close all namespaces in the list, and report appropriate SAX events 582 583 procedure Check_Valid_Name_Or_NCname 584 (Parser : in out Sax_Reader'Class; 585 Name : Token); 586 -- Check that Name is a valid Name (if namespaces are not supported) or 587 -- a NCname if namespaces are supported. 588 589 procedure Check_Attribute_Value 590 (Parser : in out Sax_Reader'Class; 591 Local_Name : Symbol; 592 Typ : Attribute_Type; 593 Value : Symbol; 594 Error_Loc : Token); 595 -- Check Validity Constraints for a single attribute. Only call this 596 -- subprogram for a validating parser 597 598 procedure Syntactic_Parse 599 (Parser : in out Sax_Reader'Class; 600 Input : in out Input_Sources.Input_Source'Class); 601 -- Internal syntactical parser. 602 603 procedure Find_NS 604 (Parser : in out Sax_Reader'Class; 605 Prefix : Token; 606 NS : out XML_NS; 607 Include_Default_NS : Boolean := True); 608 -- Internal version of Find_NS 609 610 function Qname_From_Name 611 (Parser : Sax_Reader'Class; Prefix, Local_Name : Token) 612 return Byte_Sequence; 613 function Qname_From_Name (Prefix, Local_Name : Symbol) return Byte_Sequence; 614 -- Create the qualified name from the namespace URI and the local name. 615 616 procedure Add_Namespace 617 (Parser : in out Sax_Reader'Class; 618 Node : Element_Access; 619 Prefix : Symbol; 620 URI : Symbol; 621 Report_Event : Boolean := True); 622 -- Same as above, with strings 623 624 procedure Add_Namespace_No_Event 625 (Parser : in out Sax_Reader'Class; 626 Prefix, URI : Symbol); 627 -- Create a new default namespace in the parser 628 629 procedure Free (Parser : in out Sax_Reader'Class); 630 -- Free the memory allocated for the parser, including the namespaces, 631 -- entities,... 632 633 procedure Free (Elem : in out Element_Access); 634 -- Free the memory of Elem (and its contents). Note that this doesn't free 635 -- the parent of Elem). 636 -- On Exit, Elem is set to its parent. 637 638 procedure Parse_Element_Model 639 (Input : in out Input_Sources.Input_Source'Class; 640 Parser : in out Sax_Reader'Class; 641 Result : out Element_Model_Ptr; 642 Attlist : Boolean := False; 643 Open_Was_Read : Boolean); 644 -- Parse the following characters in the stream so as to create an 645 -- element or attribute contents model, ie the tree matching an 646 -- expression like "(foo|bar)+". 647 -- Nmtokens should be true if the names in the model should follow the 648 -- Nmtoken rule in XML specifications rather than the Name rule. 649 -- If Open_Was_Read, then the opening parenthesis is considered to have 650 -- been read already and is automatically inserted into the stack. 651 -- Attlist should be set to true if this is the model in <!ELEMENT> 652 653 procedure Fatal_Error 654 (Parser : in out Sax_Reader'Class; 655 Msg : String; 656 Loc : Sax.Locators.Location := No_Location); 657 procedure Fatal_Error 658 (Parser : in out Sax_Reader'Class; 659 Msg : String; 660 Loc : Token); 661 -- Raises a fatal error. 662 -- The error is reported at location Id (or the current parser location 663 -- if Id is Null_Token). 664 -- The user application should not return from this call. Thus, a 665 -- Program_Error is raised if it does return. 666 667 procedure Error 668 (Parser : in out Sax_Reader'Class; 669 Msg : String; 670 Loc : Sax.Locators.Location); 671 procedure Error 672 (Parser : in out Sax_Reader'Class; 673 Msg : String; 674 Id : Token); 675 -- Same as Fatal_Error, but reports an error instead 676 677 procedure Warning 678 (Parser : in out Sax_Reader'Class; 679 Msg : String; 680 Loc : Sax.Locators.Location); 681 procedure Warning 682 (Parser : in out Sax_Reader'Class; 683 Msg : String; 684 Id : Token := Null_Token); 685 -- Same as Fatal_Error, but reports a warning instead 686 687 function Location 688 (Parser : Sax_Reader'Class; 689 Loc : Sax.Locators.Location) return Byte_Sequence; 690 -- Return the location of the start of Id as a string. 691 692 function Resolve_URI 693 (Parser : Sax_Reader'Class; 694 System_Id : Symbol; 695 URI : Symbol) return Symbol; 696 -- Return a fully resolved URI, based on the system identifier set for 697 -- Machine, and URI. 698 -- [System_Id] should be the result of [System_Id (Parser)] at the time the 699 -- URI was found. 700 701 function System_Id (Parser : Sax_Reader'Class) return Symbol; 702 function Public_Id (Parser : Sax_Reader'Class) return Symbol; 703 pragma Inline (System_Id, Public_Id); 704 -- Return the current system id that we are parsing 705 706 procedure Close_Inputs 707 (Parser : in out Sax_Reader'Class; 708 Inputs : in out Entity_Input_Source_Access); 709 -- Close the inputs that have been completely read. This should be 710 -- called every time one starts an entity, so that calls to 711 -- Start_Entity/End_Entity are properly nested, and error messages 712 -- point to the right entity. 713 714 procedure Debug_Print (Parser : Sax_Reader'Class; Id : Token); 715 -- Print the contents of Id 716 717 ----------------- 718 -- Find_Symbol -- 719 ----------------- 720 721 function Find_Symbol 722 (Parser : Sax_Reader'Class; Str : Byte_Sequence) return Symbol is 723 begin 724 return Find (Get (Parser.Symbols), Str); 725 end Find_Symbol; 726 727 ----------------- 728 -- Find_Symbol -- 729 ----------------- 730 731 function Find_Symbol (Parser : Sax_Reader'Class; T : Token) return Symbol is 732 begin 733 return Find (Get (Parser.Symbols), Parser.Buffer (T.First .. T.Last)); 734 end Find_Symbol; 735 736 ----------------- 737 -- Find_Symbol -- 738 ----------------- 739 740 function Find_Symbol 741 (Parser : Sax_Reader'Class; First, Last : Token) return Symbol is 742 begin 743 return Find (Get (Parser.Symbols), 744 Parser.Buffer (First.First .. Last.Last)); 745 end Find_Symbol; 746 747 ------------------- 748 -- End_Of_Stream -- 749 ------------------- 750 751 function End_Of_Stream (Parser : Sax_Reader'Class) return Boolean is 752 begin 753 return not Parser.Last_Read_Is_Valid 754 and Parser.Last_Read = 16#FFFF#; 755 end End_Of_Stream; 756 757 ------------------ 758 -- Debug_Encode -- 759 ------------------ 760 761 function Debug_Encode (C : Unicode_Char) return Byte_Sequence is 762 Buffer : Byte_Sequence (1 .. 20); 763 Index : Natural := Buffer'First - 1; 764 begin 765 Encoding.Encode (C, Buffer, Index); 766 return Buffer (Buffer'First .. Index); 767 end Debug_Encode; 768 769 --------------- 770 -- System_Id -- 771 --------------- 772 773 function System_Id (Parser : Sax_Reader'Class) return Symbol is 774 begin 775 if Parser.Inputs = null then 776 return Parser.System_Id; 777 else 778 return Parser.Inputs.System_Id; 779 end if; 780 end System_Id; 781 782 --------------- 783 -- Public_Id -- 784 --------------- 785 786 function Public_Id (Parser : Sax_Reader'Class) return Symbol is 787 begin 788 if Parser.Inputs = null then 789 return Parser.Public_Id; 790 else 791 return Parser.Inputs.Public_Id; 792 end if; 793 end Public_Id; 794 795 ---------- 796 -- Free -- 797 ---------- 798 799 procedure Free (Elem : in out Element_Access) is 800 procedure Free_Element is new Unchecked_Deallocation 801 (Element, Element_Access); 802 Tmp : constant Element_Access := Elem.Parent; 803 begin 804 Free (Elem.Namespaces); 805 Free_Element (Elem); 806 Elem := Tmp; 807 end Free; 808 809 --------------------------- 810 -- Create_Attribute_List -- 811 --------------------------- 812 813 function Create_Attribute_List 814 (Attrs : Sax_Attribute_List) return Sax.Attributes.Attributes 815 is 816 function Get_Or_Null (S : Symbol) return String; 817 function Get_Or_Null (S : Symbol) return String is 818 begin 819 if S = No_Symbol then 820 return ""; 821 else 822 return Get (S).all; 823 end if; 824 end Get_Or_Null; 825 826 Attributes : Sax.Attributes.Attributes; 827 begin 828 for J in 1 .. Attrs.Count loop 829 Add_Attribute 830 (Attr => Attributes, 831 URI => Get_Or_Null (Attrs.List (J).URI), 832 Local_Name => Get (Attrs.List (J).Local_Name).all, 833 Qname => 834 Qname_From_Name 835 (Prefix => Attrs.List (J).Prefix, 836 Local_Name => Attrs.List (J).Local_Name), 837 Att_Type => Attrs.List (J).Att_Type, 838 Content => Unknown_Model, -- not needed anyway 839 Value => Get (Attrs.List (J).Value).all, 840 Default_Decl => Attrs.List (J).Default_Decl); 841 end loop; 842 843 return Attributes; 844 845 exception 846 when others => 847 Clear (Attributes); 848 raise; 849 end Create_Attribute_List; 850 851 ----------------- 852 -- Resolve_URI -- 853 ----------------- 854 855 function Resolve_URI 856 (Parser : Sax_Reader'Class; 857 System_Id : Symbol; 858 URI : Symbol) return Symbol 859 is 860 C : Unicode_Char; 861 URI_Str : constant Cst_Byte_Sequence_Access := Get (URI); 862 URI_Index : Positive := URI_Str'First; 863 begin 864 pragma Assert (URI /= No_Symbol); 865 866 if URI = Empty_String then 867 return System_Id; 868 end if; 869 870 -- ??? Only resolve paths for now 871 Encoding.Read (URI_Str.all, URI_Index, C); 872 if C = Solidus then 873 return URI; 874 else 875 declare 876 System_Str : constant Cst_Byte_Sequence_Access := Get (System_Id); 877 Index : Natural := System_Str'First; 878 Basename_Start : Natural := System_Str'First; 879 begin 880 while Index <= System_Str'Last loop 881 Encoding.Read (System_Str.all, Index, C); 882 if C = Solidus or else C = Reverse_Solidus then 883 Basename_Start := Index; 884 end if; 885 end loop; 886 return Find_Symbol 887 (Parser, 888 System_Str (System_Str'First .. Basename_Start - 1) 889 & URI_Str.all); 890 end; 891 end if; 892 end Resolve_URI; 893 894 -------------- 895 -- Location -- 896 -------------- 897 898 function Location (Parser : Sax_Reader'Class; Loc : Sax.Locators.Location) 899 return Byte_Sequence 900 is 901 Line : constant Byte_Sequence := Natural'Image (Loc.Line); 902 Col : constant Byte_Sequence := Natural'Image (Loc.Column); 903 begin 904 if Parser.Close_Inputs = null then 905 if Use_Basename_In_Error_Messages (Parser) then 906 return Base_Name (Get (Get_Public_Id (Parser.Locator)).all) & ':' 907 & Line (Line'First + 1 .. Line'Last) 908 & ':' & Col (Col'First + 1 .. Col'Last); 909 else 910 return Get (Get_Public_Id (Parser.Locator)).all & ':' 911 & Line (Line'First + 1 .. Line'Last) 912 & ':' & Col (Col'First + 1 .. Col'Last); 913 end if; 914 else 915 if Use_Basename_In_Error_Messages (Parser) then 916 return Base_Name (Get_Public_Id (Parser.Close_Inputs.Input.all)) 917 & ':' & Line (Line'First + 1 .. Line'Last) 918 & ':' & Col (Col'First + 1 .. Col'Last); 919 else 920 return Get_Public_Id (Parser.Close_Inputs.Input.all) & ':' 921 & Line (Line'First + 1 .. Line'Last) 922 & ':' & Col (Col'First + 1 .. Col'Last); 923 end if; 924 end if; 925 end Location; 926 927 ----------------- 928 -- Fatal_Error -- 929 ----------------- 930 931 procedure Fatal_Error 932 (Parser : in out Sax_Reader'Class; 933 Msg : String; 934 Loc : Sax.Locators.Location := No_Location) 935 is 936 Id2 : Sax.Locators.Location := Loc; 937 begin 938 if Id2 = No_Location then 939 Id2 := Parser.Current_Location; 940 end if; 941 Parser.Buffer_Length := 0; 942 943 -- So that when calling Close_Inputs, we do generate an End_Entity 944 Parser.State.Ignore_Special := True; 945 946 begin 947 -- Must be called before End_Document, as per the SAX standard 948 Fatal_Error 949 (Parser, Create (Location (Parser, Id2) & ": " & Msg, Id2)); 950 End_Document (Parser); 951 exception 952 when E : others => 953 begin 954 End_Document (Parser); 955 exception 956 when others => null; 957 end; 958 959 -- Priority is given to the Fatal_Error, whatever 960 -- End_Document raises 961 Reraise_Occurrence (E); 962 end; 963 964 raise Program_Error; 965 end Fatal_Error; 966 967 ----------------- 968 -- Fatal_Error -- 969 ----------------- 970 971 procedure Fatal_Error 972 (Parser : in out Sax_Reader'Class; 973 Msg : String; 974 Loc : Token) is 975 begin 976 Fatal_Error (Parser, Msg, Loc.Location); 977 end Fatal_Error; 978 979 ----------- 980 -- Error -- 981 ----------- 982 983 procedure Error 984 (Parser : in out Sax_Reader'Class; 985 Msg : String; 986 Loc : Sax.Locators.Location) 987 is 988 Id2 : Sax.Locators.Location := Loc; 989 begin 990 if Id2 = No_Location then 991 Id2 := Parser.Current_Location; 992 end if; 993 Error (Parser, Create (Location (Parser, Id2) & ": " & Msg, Id2)); 994 end Error; 995 996 procedure Error 997 (Parser : in out Sax_Reader'Class; 998 Msg : String; 999 Id : Token) is 1000 begin 1001 Error (Parser, Msg, Id.Location); 1002 end Error; 1003 1004 ----------- 1005 -- Error -- 1006 ----------- 1007 1008 procedure Error (Parser : in out Sax_Reader'Class; Msg : String) is 1009 begin 1010 Error (Parser, Msg, No_Location); 1011 end Error; 1012 1013 ------------- 1014 -- Warning -- 1015 ------------- 1016 1017 procedure Warning 1018 (Parser : in out Sax_Reader'Class; 1019 Msg : String; 1020 Loc : Sax.Locators.Location) 1021 is 1022 Id2 : Sax.Locators.Location := Loc; 1023 begin 1024 if Id2 = No_Location then 1025 Id2 := Parser.Current_Location; 1026 end if; 1027 Warning (Parser, Create (Location (Parser, Id2) & ": " & Msg, Id2)); 1028 end Warning; 1029 1030 procedure Warning 1031 (Parser : in out Sax_Reader'Class; 1032 Msg : String; 1033 Id : Token := Null_Token) 1034 is 1035 Id2 : Sax.Locators.Location := Id.Location; 1036 begin 1037 if Id2 = No_Location then 1038 Id2 := Parser.Current_Location; 1039 end if; 1040 1041 Warning (Parser, Create (Location (Parser, Id2) & ": " & Msg, Id2)); 1042 end Warning; 1043 1044 ----------------- 1045 -- Lookup_Char -- 1046 ----------------- 1047 1048 procedure Lookup_Char 1049 (Input : in out Input_Source'Class; 1050 Parser : in out Sax_Reader'Class; 1051 Char : out Unicode_Char) 1052 is 1053 begin 1054 if Parser.Inputs /= null then 1055 if Eof (Parser.Inputs.Input.all) then 1056 if Debug_Input then 1057 Put_Line ("++Input Lookup_Char: <at end of stream>"); 1058 end if; 1059 Char := Unicode_Char'Last; 1060 else 1061 Input_Sources.Next_Char (Parser.Inputs.Input.all, Char); 1062 end if; 1063 else 1064 if Eof (Input) then 1065 if Debug_Input then 1066 Put_Line ("++Input Lookup_Char 2: <at end of stream>"); 1067 end if; 1068 Char := Unicode_Char'Last; 1069 else 1070 Input_Sources.Next_Char (Input, Char); 1071 end if; 1072 end if; 1073 1074 if Debug_Input then 1075 Put_Line ("++Input Lookup_Char: " & Unicode_Char'Image (Char)); 1076 end if; 1077 1078 Parser.Lookup_Char := Char; 1079 end Lookup_Char; 1080 1081 --------------- 1082 -- Next_Char -- 1083 --------------- 1084 1085 procedure Next_Char 1086 (Input : in out Input_Source'Class; 1087 Parser : in out Sax_Reader'Class) 1088 is 1089 procedure Internal (Stream : in out Input_Source'Class); 1090 pragma Inline (Internal); 1091 1092 -------------- 1093 -- Internal -- 1094 -------------- 1095 1096 procedure Internal (Stream : in out Input_Source'Class) is 1097 C : Unicode_Char; 1098 begin 1099 if Parser.Lookup_Char /= Unicode_Char'Last then 1100 C := Parser.Lookup_Char; 1101 Parser.Lookup_Char := Unicode_Char'Last; 1102 else 1103 Next_Char (Stream, C); 1104 end if; 1105 1106 -- XML specs say that #xD#xA must be converted to one single #xA. 1107 -- A single #xD must be converted to one single #xA 1108 1109 if C = Carriage_Return then 1110 Parser.Previous_Char_Was_CR := True; 1111 1112 -- When expanding an internal entity, do not normalize the 1113 -- character (which has already been normalized when creating the 1114 -- entity, and therefore comes from a character ref 1115 if Parser.Inputs = null 1116 or else Parser.Inputs.External 1117 then 1118 Parser.Last_Read := Line_Feed; 1119 else 1120 Parser.Last_Read := Carriage_Return; 1121 end if; 1122 1123 elsif C = Line_Feed and then Parser.Previous_Char_Was_CR then 1124 Parser.Previous_Char_Was_CR := False; 1125 1126 -- When expanding an internal entity, do not strip the CRLF 1127 -- sequences: since they have already been stripped when the 1128 -- entity was created, the sequences that remain were created 1129 -- through character references and should therefore 1130 -- be kept as is. 1131 if Parser.Inputs = null 1132 or else Parser.Inputs.External 1133 then 1134 Next_Char (Stream, Parser); 1135 end if; 1136 1137 else 1138 Parser.Last_Read := C; 1139 1140 if Parser.Feature_Test_Valid_Chars then 1141 Test_Valid_Char (Parser, Parser.Last_Read, Null_Token); 1142 end if; 1143 end if; 1144 end Internal; 1145 1146 Input_A : Entity_Input_Source_Access; 1147 1148 begin 1149 -- First thing is to take into account location changes due to the 1150 -- previous character. 1151 if Parser.Last_Read_Is_Valid then 1152 if Parser.Last_Read = Line_Feed 1153 and then not Parser.Previous_Char_Was_CR 1154 then 1155 Set_Column_Number (Parser.Locator, 0); 1156 Increase_Line_Number (Parser.Locator); 1157 end if; 1158 1159 elsif Parser.Inputs /= null then 1160 Set_Location (Parser.Locator, Parser.Inputs.Save_Loc); 1161 1162 if Parser.Inputs.External then 1163 Parser.In_External_Entity := False; 1164 -- ??? Should test whether we are still in an external entity. 1165 -- However, this is only used for the <?xml?> PI, and at this 1166 -- point we have already read and discarded it, so it doesn't 1167 -- really matter. 1168 end if; 1169 1170 -- Insert the closed input at the end of the Close_Input list, so 1171 -- that the next call to Next_Token properly closes the entity. 1172 -- This can not be done here, otherwise End_Entity is called too 1173 -- early, and the error messages do not point to the right entity. 1174 if Parser.Close_Inputs = null then 1175 Parser.Close_Inputs := Parser.Inputs; 1176 else 1177 Input_A := Parser.Close_Inputs; 1178 while Input_A.Next /= null loop 1179 Input_A := Input_A.Next; 1180 end loop; 1181 Input_A.Next := Parser.Inputs; 1182 end if; 1183 1184 Input_A := Parser.Inputs; 1185 Parser.Inputs := Parser.Inputs.Next; 1186 Input_A.Next := null; 1187 end if; 1188 1189 -- Read the text of the entity if there is any 1190 1191 if Parser.Inputs /= null then 1192 if Parser.Inputs.Input = null 1193 or else Eof (Parser.Inputs.Input.all) 1194 then 1195 if Debug_Input then 1196 Put_Line ("++Input END OF INPUT"); 1197 end if; 1198 1199 Parser.Last_Read := Unicode_Char'Val (16#00#); 1200 Parser.Last_Read_Is_Valid := False; 1201 return; 1202 end if; 1203 1204 Parser.Last_Read_Is_Valid := True; 1205 Increase_Column_Number (Parser.Locator); 1206 Internal (Parser.Inputs.Input.all); 1207 1208 -- Else read from the initial input stream 1209 elsif Eof (Input) then 1210 if Debug_Input then 1211 Put_Line 1212 ("++Input " & To_String (Parser.Locator) & " END_OF_INPUT"); 1213 end if; 1214 Parser.Last_Read := 16#FFFF#; 1215 Parser.Last_Read_Is_Valid := False; 1216 1217 else 1218 Parser.Last_Read_Is_Valid := True; 1219 Increase_Column_Number (Parser.Locator); 1220 Internal (Input); 1221 end if; 1222 1223 if Debug_Input and then Parser.Last_Read_Is_Valid then 1224 Put ("++Input " & To_String (Parser.Locator) 1225 & "(" & Unicode_Char'Image (Parser.Last_Read) & ")= "); 1226 if Parser.Last_Read /= Line_Feed then 1227 Put_Line (Debug_Encode (Parser.Last_Read)); 1228 else 1229 Put_Line ("Line_Feed"); 1230 end if; 1231 end if; 1232 1233 exception 1234 when Unicode.CES.Invalid_Encoding => 1235 Fatal_Error (Parser, Error_Invalid_Encoding); 1236 end Next_Char; 1237 1238 ------------------- 1239 -- Put_In_Buffer -- 1240 ------------------- 1241 1242 procedure Put_In_Buffer 1243 (Parser : in out Sax_Reader'Class; Char : Unicode_Char) 1244 is 1245 W : constant Natural := Encoding.Width (Char); 1246 Tmp : Byte_Sequence_Access; 1247 begin 1248 -- Loop until we have enough memory to store the string 1249 while Parser.Buffer_Length + W > Parser.Buffer'Last loop 1250 Tmp := Parser.Buffer; 1251 Parser.Buffer := new Byte_Sequence 1252 (1 .. Tmp'Length * 2); 1253 Parser.Buffer (1 .. Tmp'Length) := Tmp.all; 1254 Free (Tmp); 1255 end loop; 1256 1257 Encoding.Encode (Char, Parser.Buffer.all, Parser.Buffer_Length); 1258 end Put_In_Buffer; 1259 1260 ------------------- 1261 -- Put_In_Buffer -- 1262 ------------------- 1263 1264 procedure Put_In_Buffer 1265 (Parser : in out Sax_Reader'Class; Str : Byte_Sequence) 1266 is 1267 Tmp : Byte_Sequence_Access; 1268 begin 1269 -- Loop until we have enough memory to store the string 1270 while Parser.Buffer_Length + Str'Length > Parser.Buffer'Last loop 1271 Tmp := Parser.Buffer; 1272 Parser.Buffer := new Byte_Sequence (1 .. Tmp'Length * 2); 1273 Parser.Buffer (1 .. Tmp'Length) := Tmp.all; 1274 Free (Tmp); 1275 end loop; 1276 1277 Parser.Buffer 1278 (Parser.Buffer_Length + 1 .. Parser.Buffer_Length + Str'Length) := Str; 1279 Parser.Buffer_Length := Parser.Buffer_Length + Str'Length; 1280 end Put_In_Buffer; 1281 1282 --------------------- 1283 -- Test_Valid_Lang -- 1284 --------------------- 1285 1286 procedure Test_Valid_Lang 1287 (Parser : in out Sax_Reader'Class; Lang : Byte_Sequence) is 1288 begin 1289 -- XML Errata 41: An empty xml:lang attribute is valid 1290 if Lang /= "" and then not Is_Valid_Language_Name (Lang) then 1291 Error (Parser, Error_Invalid_Language); 1292 end if; 1293 end Test_Valid_Lang; 1294 1295 ---------------------- 1296 -- Test_Valid_Space -- 1297 ---------------------- 1298 1299 procedure Test_Valid_Space 1300 (Parser : in out Sax_Reader'Class; Space : Byte_Sequence) is 1301 begin 1302 if Space /= Default_Sequence 1303 and then Space /= Preserve_Sequence 1304 then 1305 Error (Parser, Error_Invalid_Space); 1306 end if; 1307 end Test_Valid_Space; 1308 1309 ------------------- 1310 -- Is_Pubid_Char -- 1311 ------------------- 1312 1313 function Is_Pubid_Char (C : Unicode_Char) return Boolean is 1314 begin 1315 return C = Unicode.Names.Basic_Latin.Space 1316 or else C = Line_Feed 1317 or else C in Latin_Small_Letter_A .. Latin_Small_Letter_Z 1318 or else C in Latin_Capital_Letter_A .. Latin_Capital_Letter_Z 1319 or else C in Digit_Zero .. Digit_Nine 1320 or else C = Hyphen_Minus 1321 or else C = Apostrophe 1322 or else C = Left_Parenthesis 1323 or else C = Right_Parenthesis 1324 or else C = Plus_Sign 1325 or else C = Comma 1326 or else C = Full_Stop 1327 or else C = Solidus 1328 or else C = Unicode.Names.Basic_Latin.Colon 1329 or else C = Equals_Sign 1330 or else C = Question_Mark 1331 or else C = Semicolon 1332 or else C = Exclamation_Mark 1333 or else C = Asterisk 1334 or else C = Number_Sign 1335 or else C = Commercial_At 1336 or else C = Dollar_Sign 1337 or else C = Low_Line 1338 or else C = Percent_Sign; 1339 end Is_Pubid_Char; 1340 1341 --------------------- 1342 -- Test_Valid_Char -- 1343 --------------------- 1344 1345 procedure Test_Valid_Char 1346 (Parser : in out Sax_Reader'Class; C : Unicode_Char; Loc : Token) 1347 is 1348 Id : Sax.Locators.Location; 1349 begin 1350 if not (C = 16#9# 1351 or else C = 16#A# 1352 or else C = 16#D# 1353 or else C in Unicode.Names.Basic_Latin.Space .. 16#D7FF# 1354 or else C in 16#E000# .. 16#FFFD# 1355 or else C in 16#10000# .. 16#10FFFF#) 1356 then 1357 if Loc /= Null_Token then 1358 Id := Loc.Location; 1359 else 1360 Id := No_Location; 1361 Id.Line := Get_Line_Number (Parser.Locator); 1362 Id.Column := Get_Column_Number (Parser.Locator); 1363 end if; 1364 Fatal_Error (Parser, Error_Invalid_Char & Unicode_Char'Image (C), Id); 1365 end if; 1366 end Test_Valid_Char; 1367 1368 ------------- 1369 -- Find_NS -- 1370 ------------- 1371 1372 procedure Find_NS 1373 (Parser : in out Sax_Reader'Class; 1374 Prefix : Token; 1375 NS : out XML_NS; 1376 Include_Default_NS : Boolean := True) is 1377 begin 1378 Find_NS 1379 (Parser, 1380 Find_Symbol (Parser, Parser.Buffer (Prefix.First .. Prefix.Last)), 1381 NS, Include_Default_NS); 1382 if NS = No_XML_NS then 1383 Fatal_Error 1384 (Parser, Error_Prefix_Not_Declared & 1385 Parser.Buffer (Prefix.First .. Prefix.Last)); 1386 end if; 1387 end Find_NS; 1388 1389 ------------- 1390 -- Find_NS -- 1391 ------------- 1392 1393 procedure Find_NS 1394 (Parser : Sax_Reader'Class; 1395 Prefix : Sax.Symbols.Symbol; 1396 NS : out XML_NS; 1397 Include_Default_NS : Boolean := True) 1398 is 1399 E : Element_Access := Parser.Current_Node; 1400 begin 1401 loop 1402 if E = null then 1403 NS := Find_NS_In_List 1404 (Parser.Default_Namespaces, Prefix, Include_Default_NS, False); 1405 else 1406 NS := Find_NS_In_List 1407 (E.Namespaces, Prefix, Include_Default_NS, True); 1408 end if; 1409 1410 exit when NS /= No_XML_NS or else E = null; 1411 E := E.Parent; 1412 end loop; 1413 end Find_NS; 1414 1415 ---------------------- 1416 -- Find_NS_From_URI -- 1417 ---------------------- 1418 1419 procedure Find_NS_From_URI 1420 (Parser : in out Sax_Reader'Class; 1421 URI : Symbol; 1422 NS : out XML_NS) 1423 is 1424 E : Element_Access := Parser.Current_Node; 1425 begin 1426 loop 1427 -- Search in the default namespaces 1428 if E = null then 1429 NS := Find_NS_From_URI_In_List (Parser.Default_Namespaces, URI); 1430 else 1431 NS := Find_NS_From_URI_In_List (E.Namespaces, URI); 1432 end if; 1433 1434 exit when NS /= No_XML_NS or else E = null; 1435 E := E.Parent; 1436 end loop; 1437 end Find_NS_From_URI; 1438 1439 --------------------- 1440 -- Qname_From_Name -- 1441 --------------------- 1442 1443 function Qname_From_Name 1444 (Parser : Sax_Reader'Class; Prefix, Local_Name : Token) 1445 return Byte_Sequence is 1446 begin 1447 if Prefix = Null_Token then 1448 return Parser.Buffer (Local_Name.First .. Local_Name.Last); 1449 else 1450 return Parser.Buffer (Prefix.First .. Prefix.Last) 1451 & Colon_Sequence 1452 & Parser.Buffer (Local_Name.First .. Local_Name.Last); 1453 end if; 1454 end Qname_From_Name; 1455 1456 --------------------- 1457 -- Qname_From_Name -- 1458 --------------------- 1459 1460 function Qname_From_Name 1461 (Prefix, Local_Name : Symbol) return Byte_Sequence is 1462 begin 1463 if Prefix = No_Symbol or else Prefix = Empty_String then 1464 return Get (Local_Name).all; 1465 else 1466 return Get (Prefix).all & Colon_Sequence & Get (Local_Name).all; 1467 end if; 1468 end Qname_From_Name; 1469 1470 ----------------------- 1471 -- Prefix_From_Qname -- 1472 ----------------------- 1473 1474 function Prefix_From_Qname (Qname : Byte_Sequence) return Byte_Sequence is 1475 Index : Natural := Qname'First; 1476 C : Unicode_Char; 1477 Previous : Natural; 1478 begin 1479 while Index <= Qname'Last loop 1480 Previous := Index; 1481 Encoding.Read (Qname, Index, C); 1482 if C = Unicode.Names.Basic_Latin.Colon then 1483 return Qname (Qname'First .. Previous - 1); 1484 end if; 1485 end loop; 1486 return ""; 1487 end Prefix_From_Qname; 1488 1489 ---------------------------- 1490 -- Add_Namespace_No_Event -- 1491 ---------------------------- 1492 1493 procedure Add_Namespace_No_Event 1494 (Parser : in out Sax_Reader'Class; 1495 Prefix, URI : Symbol) is 1496 begin 1497 Add_Namespace (Parser, null, Prefix, URI, Report_Event => False); 1498 end Add_Namespace_No_Event; 1499 1500 ------------------- 1501 -- Add_Namespace -- 1502 ------------------- 1503 1504 procedure Add_Namespace 1505 (Parser : in out Sax_Reader'Class; 1506 Node : Element_Access; 1507 Prefix : Symbol; 1508 URI : Symbol; 1509 Report_Event : Boolean := True) 1510 is 1511 Same_As : XML_NS := No_XML_NS; 1512 begin 1513 -- Was there a previous definition of this namespace ? 1514 Find_NS_From_URI (Parser, URI, Same_As); 1515 1516 if Node = null then 1517 Add_NS_To_List (Parser.Default_Namespaces, Same_As, Prefix, URI); 1518 else 1519 Add_NS_To_List (Node.Namespaces, Same_As, Prefix, URI); 1520 end if; 1521 1522 if Report_Event then 1523 Start_Prefix_Mapping (Parser, Prefix => Prefix, URI => URI); 1524 end if; 1525 end Add_Namespace; 1526 1527 ------------------ 1528 -- Close_Inputs -- 1529 ------------------ 1530 1531 procedure Close_Inputs 1532 (Parser : in out Sax_Reader'Class; 1533 Inputs : in out Entity_Input_Source_Access) 1534 is 1535 procedure Free is new Unchecked_Deallocation 1536 (Entity_Input_Source, Entity_Input_Source_Access); 1537 Input_A : Entity_Input_Source_Access; 1538 begin 1539 while Inputs /= null loop 1540 -- ??? Could use Input_Sources.Locator.Free 1541 if Inputs.Input /= null then 1542 Close (Inputs.Input.all); 1543 Unchecked_Free (Inputs.Input); 1544 end if; 1545 1546 -- not in string context 1547 if not Parser.State.Ignore_Special then 1548 End_Entity (Parser, Inputs.Name); 1549 end if; 1550 1551 Input_A := Inputs; 1552 Inputs := Inputs.Next; 1553 Free (Input_A); 1554 end loop; 1555 end Close_Inputs; 1556 1557 ----------------- 1558 -- Debug_Print -- 1559 ----------------- 1560 1561 procedure Debug_Print (Parser : Sax_Reader'Class; Id : Token) is 1562 begin 1563 Put ("++Lex (" & Parser.State.Name & ") at " 1564 & To_String (Parser.Locator) 1565 & " (" & Token_Type'Image (Id.Typ) & ") at " 1566 & To_String (Id.Location)); 1567 if Parser.State.Ignore_Special then 1568 Put (" (in string)"); 1569 end if; 1570 1571 if Id.Typ = Space then 1572 declare 1573 J : Natural := Id.First; 1574 C : Unicode_Char; 1575 begin 1576 Put (" --"); 1577 while J <= Id.Last loop 1578 Encoding.Read (Parser.Buffer.all, J, C); 1579 Put (Unicode_Char'Image (C)); 1580 end loop; 1581 Put ("--"); 1582 end; 1583 1584 elsif Id.Last >= Id.First then 1585 Put (" --" & Parser.Buffer (Id.First .. Id.Last) & "--"); 1586 end if; 1587 1588 Put_Line 1589 (" buffer=" 1590 & Parser.Buffer (Parser.Buffer'First .. Parser.Buffer_Length) 1591 & "--"); 1592 end Debug_Print; 1593 1594 ---------------- 1595 -- Next_Token -- 1596 ---------------- 1597 1598 procedure Next_Token 1599 (Input : in out Input_Source'Class; 1600 Parser : in out Sax_Reader'Class; 1601 Id : out Token; 1602 Coalesce_Space : Boolean := False) 1603 is 1604 function Looking_At (Str : Byte_Sequence) return Boolean; 1605 -- True if the next characters read (including the current one) in the 1606 -- stream match Str. Characters read are stored in the buffer 1607 1608 procedure Handle_Comments; 1609 -- <!- has been seen in the buffer, check if this is a comment and 1610 -- handle it appropriately. The first character after '<!-' has 1611 -- already been read on calling this subprogram. 1612 -- Raise an error message when the end of the input stream is seen. 1613 1614 procedure Handle_Character_Ref; 1615 -- '&#' has been seen in the buffer, check if this is a character 1616 -- entity reference and handle it appropriately 1617 1618 procedure Handle_Less_Than_Sign; 1619 -- Handle '<', '<!', '<!--', '<![',... sequences 1620 1621 procedure Handle_Entity_Ref; 1622 -- '&' has been read (as well as the following character). Skips till 1623 -- the end of the entity, ie ';'. Saves the name of the entity in the 1624 -- buffer. 1625 -- Parser.Last_Read is left to ';', but it is not put in the buffer. 1626 1627 ---------------- 1628 -- Looking_At -- 1629 ---------------- 1630 1631 function Looking_At (Str : Byte_Sequence) return Boolean is 1632 C : Unicode_Char; 1633 Index : Natural := Str'First; 1634 begin 1635 while Index <= Str'Last loop 1636 Encoding.Read (Str, Index, C); 1637 1638 if C /= Parser.Last_Read 1639 or else not Parser.Last_Read_Is_Valid 1640 then 1641 return False; 1642 end if; 1643 Put_In_Buffer (Parser, Parser.Last_Read); 1644 Next_Char (Input, Parser); 1645 end loop; 1646 return True; 1647 end Looking_At; 1648 1649 --------------------- 1650 -- Handle_Comments -- 1651 --------------------- 1652 1653 procedure Handle_Comments is 1654 begin 1655 if not Eof (Input) then 1656 Next_Char (Input, Parser); 1657 if Parser.Last_Read = Hyphen_Minus then 1658 Id.Typ := Comment; -- In case we reach the eof in the loop 1659 -- Note that if the file ends exactly with '<!--', we get 1660 -- an empty text. But at least we will detect the error. 1661 -- It also fails if we have a non-terminated comment and the 1662 -- last character in the file is '-'. Doesn't seem worth 1663 -- paying the cost for some extra tests to handle this. 1664 loop 1665 Next_Char (Input, Parser); 1666 if End_Of_Stream (Parser) then 1667 Fatal_Error (Parser, Error_Comment_End, Id); 1668 Id.Typ := End_Of_Input; 1669 return; 1670 1671 elsif Parser.Last_Read = Hyphen_Minus then 1672 Next_Char (Input, Parser); 1673 if End_Of_Stream (Parser) then 1674 Fatal_Error (Parser, Error_Comment_Unterminated); 1675 Id.Typ := End_Of_Input; 1676 return; 1677 1678 elsif Parser.Last_Read = Hyphen_Minus then 1679 if Parser.Last_Read_Is_Valid then 1680 Next_Char (Input, Parser); 1681 if Parser.Last_Read = Greater_Than_Sign then 1682 exit; 1683 end if; 1684 end if; 1685 Parser.Buffer_Length := Id.First - 1; 1686 Id.Location.Line := Get_Line_Number (Parser.Locator); 1687 Id.Location.Column := 1688 Get_Column_Number (Parser.Locator) - 2; 1689 -- 2 = 2 * Hyphen_Minus 1690 Fatal_Error (Parser, Error_Comment_Dash_Dash, Id); 1691 else 1692 Put_In_Buffer (Parser, Hyphen_Minus); 1693 Put_In_Buffer (Parser, Parser.Last_Read); 1694 end if; 1695 else 1696 Put_In_Buffer (Parser, Parser.Last_Read); 1697 end if; 1698 end loop; 1699 1700 if Parser.Feature_Validation 1701 and then System_Id (Parser) /= Id.Location.System_Id 1702 then 1703 Error (Parser, Error_Entity_Self_Contained, Id); 1704 end if; 1705 1706 Next_Char (Input, Parser); 1707 return; 1708 end if; 1709 end if; 1710 Fatal_Error (Parser, Error_Unexpected_Chars1); 1711 Id.Typ := End_Of_Input; 1712 end Handle_Comments; 1713 1714 -------------------------- 1715 -- Handle_Character_Ref -- 1716 -------------------------- 1717 1718 procedure Handle_Character_Ref is 1719 Val : Unicode_Char := 0; 1720 begin 1721 if Parser.State.Expand_Character_Ref then 1722 Id.Typ := Text; 1723 else 1724 Id.Typ := Char_Ref; 1725 end if; 1726 1727 if Parser.Current_Node = null 1728 and then Parser.State.Name = Default_State.Name 1729 then 1730 Fatal_Error (Parser, Error_Charref_Toplevel, Id); 1731 end if; 1732 1733 Next_Char (Input, Parser); 1734 if Parser.Last_Read = Latin_Small_Letter_X then 1735 Next_Char (Input, Parser); 1736 1737 while Parser.Last_Read_Is_Valid 1738 and then Parser.Last_Read /= Semicolon 1739 loop 1740 if Parser.Last_Read in Digit_Zero .. Digit_Nine then 1741 Val := Val * 16 + Parser.Last_Read - Digit_Zero; 1742 1743 elsif Parser.Last_Read in 1744 Latin_Capital_Letter_A .. Latin_Capital_Letter_F 1745 then 1746 Val := Val * 16 + Parser.Last_Read - Latin_Capital_Letter_A 1747 + 10; 1748 1749 elsif Parser.Last_Read in 1750 Latin_Small_Letter_A .. Latin_Small_Letter_F 1751 then 1752 Val := Val * 16 + Parser.Last_Read - Latin_Small_Letter_A 1753 + 10; 1754 1755 else 1756 Id.Location.Line := Get_Line_Number (Parser.Locator); 1757 Id.Location.Column := Get_Column_Number (Parser.Locator); 1758 Fatal_Error 1759 (Parser, Error_Charref_Invalid_Char 1760 & Debug_Encode (Parser.Last_Read), Id); 1761 end if; 1762 Next_Char (Input, Parser); 1763 end loop; 1764 else 1765 while Parser.Last_Read_Is_Valid 1766 and then Parser.Last_Read /= Semicolon 1767 loop 1768 if Parser.Last_Read in Digit_Zero .. Digit_Nine then 1769 Val := Val * 10 + Parser.Last_Read - Digit_Zero; 1770 else 1771 Id.Location.Line := Get_Line_Number (Parser.Locator); 1772 Id.Location.Column := Get_Column_Number (Parser.Locator); 1773 Fatal_Error 1774 (Parser, Error_Charref_Invalid_Char 1775 & Debug_Encode (Parser.Last_Read), Id); 1776 end if; 1777 Next_Char (Input, Parser); 1778 end loop; 1779 end if; 1780 1781 if Parser.Feature_Test_Valid_Chars then 1782 Test_Valid_Char (Parser, Val, Id); 1783 end if; 1784 Put_In_Buffer (Parser, Val); 1785 Next_Char (Input, Parser); 1786 Id.From_Entity := True; 1787 end Handle_Character_Ref; 1788 1789 --------------------------- 1790 -- Handle_Less_Than_Sign -- 1791 --------------------------- 1792 1793 procedure Handle_Less_Than_Sign is 1794 Num_Closing_Bracket : Natural; 1795 Id2 : Token; 1796 begin 1797 Id.Typ := Start_Of_Tag; 1798 Next_Char (Input, Parser); 1799 case Parser.Last_Read is 1800 when Solidus => 1801 Id.Typ := Start_Of_End_Tag; 1802 Next_Char (Input, Parser); 1803 1804 when Exclamation_Mark => 1805 Next_Char (Input, Parser); 1806 if Parser.Last_Read = Hyphen_Minus then 1807 Handle_Comments; 1808 1809 elsif Looking_At (Doctype_Sequence) then 1810 Reset_Buffer (Parser, Id); 1811 Id.Typ := Doctype_Start; 1812 1813 elsif Parser.Last_Read = Left_Square_Bracket then 1814 Next_Char (Input, Parser); 1815 1816 if Parser.Last_Read = Latin_Capital_Letter_C then 1817 1818 if not Looking_At (Cdata_Sequence) then 1819 Fatal_Error (Parser, Error_Invalid_Declaration, Id); 1820 end if; 1821 1822 if Parser.Last_Read /= Left_Square_Bracket then 1823 Fatal_Error (Parser, Error_Cdata_Unterminated, Id); 1824 end if; 1825 1826 Reset_Buffer (Parser, Id); 1827 Id.Typ := Cdata_Section; 1828 Num_Closing_Bracket := 1; 1829 loop 1830 Next_Char (Input, Parser); 1831 1832 if End_Of_Stream (Parser) then 1833 Id.Typ := End_Of_Input; 1834 Fatal_Error (Parser, Error_Cdata_End, Id); 1835 return; 1836 1837 elsif Parser.Last_Read_Is_Valid then 1838 Put_In_Buffer (Parser, Parser.Last_Read); 1839 1840 if Parser.Last_Read = Right_Square_Bracket then 1841 Num_Closing_Bracket := Num_Closing_Bracket + 1; 1842 1843 elsif Parser.Last_Read = Greater_Than_Sign 1844 and then Num_Closing_Bracket >= 2 1845 then 1846 Parser.Buffer_Length := Parser.Buffer_Length 1847 - 2 * Encoding.Width (Right_Square_Bracket) 1848 - Encoding.Width (Greater_Than_Sign); 1849 exit; 1850 1851 else 1852 Num_Closing_Bracket := 0; 1853 end if; 1854 end if; 1855 end loop; 1856 1857 if Id.Location.System_Id /= System_Id (Parser) then 1858 Fatal_Error (Parser, Error_Entity_Self_Contained, Id); 1859 end if; 1860 1861 if not Eof (Input) then 1862 Next_Char (Input, Parser); 1863 else 1864 Parser.Last_Read := 16#FFFF#; 1865 end if; 1866 1867 else 1868 while Is_White_Space (Parser.Last_Read) loop 1869 Next_Char (Input, Parser); 1870 end loop; 1871 1872 if Parser.Last_Read = Latin_Capital_Letter_I 1873 or else Parser.Last_Read = Percent_Sign 1874 then 1875 -- Skip spaces: if we are expending a parameter 1876 -- entity, it must start with spaces (4.4.8) 1877 Next_Token_Skip_Spaces (Input, Parser, Id2); 1878 if Parser.Buffer (Id2.First .. Id2.Last) = 1879 Include_Sequence 1880 then 1881 Reset_Buffer (Parser, Id2); 1882 Id.Typ := Include; 1883 elsif Parser.Buffer (Id2.First .. Id2.Last) = 1884 Ignore_Sequence 1885 then 1886 Reset_Buffer (Parser, Id2); 1887 Id.Typ := Ignore; 1888 else 1889 Fatal_Error (Parser, Error_Invalid_Declaration, Id); 1890 end if; 1891 1892 if not Parser.State.In_DTD 1893 or else not Parser.In_External_Entity 1894 then 1895 Fatal_Error 1896 (Parser, Error_Conditional_Location, Id); 1897 end if; 1898 1899 Next_Token_Skip_Spaces (Input, Parser, Id2); 1900 if Id2.Typ /= Internal_DTD_Start then 1901 Fatal_Error (Parser, Error_Conditional_Syntax, Id2); 1902 end if; 1903 1904 elsif Parser.State.In_DTD then 1905 Id.Typ := Start_Conditional; 1906 else 1907 Fatal_Error (Parser, Error_Unexpected_Chars1, Id); 1908 end if; 1909 end if; 1910 1911 elsif not Parser.State.In_DTD then 1912 Fatal_Error (Parser, Error_Unexpected_Chars1, Id); 1913 1914 elsif Looking_At (Attlist_Sequence) 1915 -- Since parameter entities are expanded with spaces, we can 1916 -- have one following ATTLIST immediately 1917 and then (Is_White_Space (Parser.Last_Read) 1918 or else Parser.Last_Read = Percent_Sign) 1919 then 1920 Reset_Buffer (Parser, Id); 1921 Id.Typ := Attlist_Def; 1922 1923 elsif Parser.Last_Read = Latin_Capital_Letter_E then 1924 Next_Char (Input, Parser); 1925 if Looking_At (Ntity_Sequence) then 1926 Reset_Buffer (Parser, Id); 1927 Id.Typ := Entity_Def; 1928 1929 elsif Looking_At (Element_Sequence) then 1930 Reset_Buffer (Parser, Id); 1931 Id.Typ := Element_Def; 1932 1933 else 1934 Fatal_Error (Parser, Error_Unknown_Declaration); 1935 end if; 1936 1937 elsif Looking_At (Notation_Sequence) 1938 -- Since parameter entities are expanded with spaces, we can 1939 -- have one following NOTATION immediately 1940 and then (Is_White_Space (Parser.Last_Read) 1941 or else Parser.Last_Read = Percent_Sign) 1942 then 1943 Reset_Buffer (Parser, Id); 1944 Id.Typ := Notation; 1945 1946 else 1947 Put_In_Buffer (Parser, Less_Than_Sign); 1948 Put_In_Buffer (Parser, Exclamation_Mark); 1949 Id.Typ := Text; 1950 end if; 1951 1952 when Question_Mark => 1953 Id.Typ := Start_Of_PI; 1954 Next_Char (Input, Parser); 1955 1956 when others => null; 1957 end case; 1958 end Handle_Less_Than_Sign; 1959 1960 ----------------------- 1961 -- Handle_Entity_Ref -- 1962 ----------------------- 1963 1964 procedure Handle_Entity_Ref is 1965 begin 1966 if not Parser.Last_Read_Is_Valid 1967 or else Is_Valid_Name_Startchar 1968 (Parser.Last_Read, Parser.XML_Version) 1969 then 1970 while Parser.Last_Read_Is_Valid 1971 and then Parser.Last_Read /= Semicolon 1972 and then Is_Valid_Name_Char 1973 (Parser.Last_Read, Parser.XML_Version) 1974 loop 1975 Put_In_Buffer (Parser, Parser.Last_Read); 1976 Next_Char (Input, Parser); 1977 end loop; 1978 1979 if not Parser.Last_Read_Is_Valid 1980 or else System_Id (Parser) /= Id.Location.System_Id 1981 then 1982 Fatal_Error (Parser, Error_Entity_Self_Contained, Id); 1983 end if; 1984 1985 if Parser.Last_Read /= Semicolon then 1986 Fatal_Error (Parser, Error_Entityref_Unterminated, Id); 1987 end if; 1988 1989 Id.From_Entity := True; 1990 1991 else 1992 Fatal_Error (Parser, Error_Entity_Name, Id); 1993 end if; 1994 end Handle_Entity_Ref; 1995 1996 type Entity_Ref is (None, Entity, Param_Entity); 1997 Is_Entity_Ref : Entity_Ref := None; 1998 Old_System_Id : Symbol; 1999 begin 2000 if not Parser.Last_Read_Is_Valid then 2001 Next_Char (Input, Parser); 2002 end if; 2003 2004 Id.First := Parser.Buffer_Length + 1; 2005 Id.Last := Parser.Buffer_Length; 2006 Id.Typ := End_Of_Input; 2007 Id.Location.System_Id := System_Id (Parser); 2008 Id.Location.Public_Id := Public_Id (Parser); 2009 Id.Location.Line := Get_Line_Number (Parser.Locator); 2010 Id.Location.Column := Get_Column_Number (Parser.Locator); 2011 Id.From_Entity := False; 2012 2013 Close_Inputs (Parser, Parser.Close_Inputs); 2014 2015 if Eof (Input) and then Parser.Last_Read = 16#FFFF# then 2016 Id.Location.Column := Id.Location.Column + 1; 2017 return; 2018 end if; 2019 2020 if Is_White_Space (Parser.Last_Read) then 2021 Id.Typ := Space; 2022 loop 2023 Put_In_Buffer (Parser, Parser.Last_Read); 2024 Next_Char (Input, Parser); 2025 exit when not Is_White_Space (Parser.Last_Read); 2026 end loop; 2027 2028 -- If we are ignoring special characters 2029 elsif Id.Typ = End_Of_Input 2030 and then (Parser.Ignore_State_Special 2031 or else Parser.State.Ignore_Special) 2032 and then not Parser.State.Detect_End_Of_PI 2033 then 2034 Id.Typ := Text; 2035 Parser.Ignore_State_Special := True; 2036 while Parser.Last_Read_Is_Valid loop 2037 exit when Parser.Last_Read = Ampersand 2038 and then (Parser.State.Expand_Entities 2039 or else Parser.State.Expand_Character_Ref); 2040 exit when Parser.Last_Read = Percent_Sign 2041 and then Parser.State.Expand_Param_Entities; 2042 exit when (Parser.Last_Read = Apostrophe 2043 or else Parser.Last_Read = Quotation_Mark) 2044 and then Parser.State.Handle_Strings 2045 and then (Parser.Inputs = null 2046 or else Parser.Inputs.Handle_Strings); 2047 exit when Parser.Last_Read = Less_Than_Sign 2048 and then Parser.State.Less_Special; 2049 Put_In_Buffer (Parser, Parser.Last_Read); 2050 Next_Char (Input, Parser); 2051 end loop; 2052 end if; 2053 2054 -- If we haven't found a non-empty token yet 2055 if Id.Typ = End_Of_Input 2056 or else Id.First > Parser.Buffer_Length 2057 then 2058 case Parser.Last_Read is 2059 when Less_Than_Sign => 2060 if Parser.State.Less_Special then 2061 Id.Typ := Start_Of_Tag; 2062 Next_Char (Input, Parser); 2063 elsif Parser.State.Detect_End_Of_PI then 2064 Put_In_Buffer (Parser, Parser.Last_Read); 2065 Id.Typ := Text; 2066 Next_Char (Input, Parser); 2067 else 2068 Handle_Less_Than_Sign; 2069 end if; 2070 2071 when Question_Mark => 2072 if Eof (Input) then 2073 Put_In_Buffer (Parser, Parser.Last_Read); 2074 Id.Typ := Text; 2075 else 2076 Next_Char (Input, Parser); 2077 if Parser.Last_Read = Greater_Than_Sign then 2078 Id.Typ := End_Of_PI; 2079 Next_Char (Input, Parser); 2080 elsif Parser.Last_Read = Question_Mark then 2081 Put_In_Buffer (Parser, Question_Mark); 2082 Id.Typ := Text; 2083 else 2084 Put_In_Buffer (Parser, Question_Mark); 2085 Id.Typ := Text; 2086 end if; 2087 end if; 2088 2089 when Greater_Than_Sign => 2090 if Parser.State.Greater_Special then 2091 Id.Typ := End_Of_Tag; 2092 else 2093 Put_In_Buffer (Parser, Parser.Last_Read); 2094 Id.Typ := Text; 2095 end if; 2096 Next_Char (Input, Parser); 2097 2098 when Equals_Sign => 2099 if Parser.State.In_Tag then 2100 Id.Typ := Equal; 2101 else 2102 Put_In_Buffer (Parser, Parser.Last_Read); 2103 Id.Typ := Text; 2104 end if; 2105 Next_Char (Input, Parser); 2106 2107 when Unicode.Names.Basic_Latin.Colon => 2108 if Parser.State.In_Tag then 2109 if Parser.Feature_Namespace then 2110 Id.Typ := Colon; 2111 else 2112 Put_In_Buffer (Parser, Parser.Last_Read); 2113 Id.Typ := Name; 2114 end if; 2115 else 2116 Put_In_Buffer (Parser, Parser.Last_Read); 2117 Id.Typ := Text; 2118 end if; 2119 Next_Char (Input, Parser); 2120 2121 when Ampersand => 2122 Id.Typ := Text; -- So that eof would at least report an error 2123 if Eof (Input) 2124 and then Parser.State.Expand_Entities 2125 then 2126 Fatal_Error (Parser, Error_Entityref_Unterminated, Id); 2127 end if; 2128 2129 Next_Char (Input, Parser); 2130 if Parser.Last_Read = Number_Sign 2131 and then (Parser.State.Expand_Character_Ref 2132 or Parser.State.Report_Character_Ref) 2133 then 2134 Handle_Character_Ref; 2135 if System_Id (Parser) /= Id.Location.System_Id then 2136 Fatal_Error (Parser, Error_Entity_Self_Contained, Id); 2137 end if; 2138 2139 elsif Parser.Last_Read /= Number_Sign 2140 and then Parser.State.Expand_Entities 2141 then 2142 Handle_Entity_Ref; 2143 Is_Entity_Ref := Entity; 2144 2145 elsif Parser.Last_Read /= Number_Sign 2146 and then Parser.State.Ignore_Special -- string context 2147 and then not Parser.State.Detect_End_Of_PI -- not in PI 2148 then 2149 -- Inside a string (entity value), we still need to check 2150 -- that the '&' marks the beginning of an entity reference. 2151 Put_In_Buffer (Parser, Ampersand); 2152 Handle_Entity_Ref; 2153 Put_In_Buffer (Parser, Parser.Last_Read); 2154 Next_Char (Input, Parser); 2155 2156 else 2157 Put_In_Buffer (Parser, Ampersand); 2158 end if; 2159 2160 when Percent_Sign => 2161 Put_In_Buffer (Parser, Parser.Last_Read); 2162 Id.Typ := Text; 2163 2164 Next_Char (Input, Parser); 2165 if Parser.State.Expand_Param_Entities then 2166 while Parser.Last_Read /= Semicolon 2167 and then Is_Valid_Name_Char 2168 (Parser.Last_Read, Parser.XML_Version) 2169 loop 2170 Put_In_Buffer (Parser, Parser.Last_Read); 2171 Next_Char (Input, Parser); 2172 end loop; 2173 2174 if Parser.Last_Read /= Semicolon then 2175 Fatal_Error (Parser, Error_Entityref_Unterminated); 2176 end if; 2177 Is_Entity_Ref := Param_Entity; 2178 end if; 2179 2180 when Quotation_Mark => 2181 if Parser.State.Handle_Strings then 2182 Id.Typ := Double_String_Delimiter; 2183 Next_Char (Input, Parser); 2184 else 2185 Id.Typ := Text; 2186 Put_In_Buffer (Parser, Parser.Last_Read); 2187 Next_Char (Input, Parser); 2188 end if; 2189 2190 when Apostrophe => 2191 if Parser.State.Handle_Strings then 2192 Id.Typ := Single_String_Delimiter; 2193 Next_Char (Input, Parser); 2194 else 2195 Id.Typ := Text; 2196 Put_In_Buffer (Parser, Parser.Last_Read); 2197 Next_Char (Input, Parser); 2198 end if; 2199 2200 when Left_Square_Bracket => 2201 if Parser.State.In_DTD then 2202 Id.Typ := Internal_DTD_Start; 2203 else 2204 Put_In_Buffer (Parser, Parser.Last_Read); 2205 Id.Typ := Text; 2206 end if; 2207 Next_Char (Input, Parser); 2208 2209 when Right_Square_Bracket => 2210 if Parser.State.In_DTD 2211 and then not Parser.In_External_Entity 2212 then 2213 Id.Typ := Internal_DTD_End; 2214 loop 2215 Next_Char (Input, Parser); 2216 exit when Parser.Last_Read = Greater_Than_Sign; 2217 2218 if Parser.Last_Read_Is_Valid 2219 and then not Is_White_Space (Parser.Last_Read) 2220 then 2221 Fatal_Error (Parser, Error_Unexpected_Chars2, Id); 2222 end if; 2223 end loop; 2224 Next_Char (Input, Parser); 2225 2226 -- In string context ? 2227 elsif Parser.State.Ignore_Special then 2228 Id.Typ := Text; 2229 Put_In_Buffer (Parser, Parser.Last_Read); 2230 Next_Char (Input, Parser); 2231 2232 else 2233 declare 2234 Num_Bracket : Natural := 1; 2235 begin 2236 Id.Typ := Text; 2237 2238 loop 2239 Put_In_Buffer (Parser, Parser.Last_Read); 2240 Next_Char (Input, Parser); 2241 2242 if Parser.Last_Read = Right_Square_Bracket then 2243 Num_Bracket := Num_Bracket + 1; 2244 2245 elsif Num_Bracket >= 2 2246 and Parser.Last_Read = Greater_Than_Sign 2247 then 2248 if Parser.State.In_DTD 2249 and then Parser.In_External_Entity 2250 then 2251 Id.Typ := End_Conditional; 2252 Reset_Buffer (Parser, Id); 2253 Next_Char (Input, Parser); 2254 exit; 2255 else 2256 Id.Location.Column := 2257 Id.Location.Column + Num_Bracket - 2; 2258 Fatal_Error 2259 (Parser, Error_Unexpected_Chars3, Id); 2260 end if; 2261 else 2262 exit; 2263 end if; 2264 end loop; 2265 end; 2266 end if; 2267 2268 when Solidus => 2269 Id.Typ := Text; 2270 Next_Char (Input, Parser); 2271 if Parser.State.Greater_Special 2272 and then Parser.Last_Read = Greater_Than_Sign 2273 then 2274 Id.Typ := End_Of_Start_Tag; 2275 Next_Char (Input, Parser); 2276 else 2277 Put_In_Buffer (Parser, Solidus); 2278 end if; 2279 2280 when others => 2281 if Parser.State.Recognize_External then 2282 2283 if Parser.Last_Read = Latin_Capital_Letter_A then 2284 if Looking_At (Any_Sequence) then 2285 Reset_Buffer (Parser, Id); 2286 Id.Typ := Any; 2287 else 2288 Id.Typ := Name; 2289 end if; 2290 2291 elsif Parser.Last_Read = Latin_Capital_Letter_E then 2292 if Looking_At (Empty_Sequence) then 2293 Reset_Buffer (Parser, Id); 2294 Id.Typ := Empty; 2295 else 2296 Id.Typ := Name; 2297 end if; 2298 2299 elsif Parser.Last_Read = Latin_Capital_Letter_N then 2300 if Looking_At (Ndata_Sequence) then 2301 Reset_Buffer (Parser, Id); 2302 Id.Typ := Ndata; 2303 else 2304 Id.Typ := Name; 2305 end if; 2306 2307 elsif Parser.Last_Read = Latin_Capital_Letter_P then 2308 if Looking_At (Public_Sequence) then 2309 Reset_Buffer (Parser, Id); 2310 Id.Typ := Public; 2311 else 2312 Id.Typ := Name; 2313 end if; 2314 2315 elsif Parser.Last_Read = Latin_Capital_Letter_S then 2316 if Looking_At (System_Sequence) then 2317 Reset_Buffer (Parser, Id); 2318 Id.Typ := System; 2319 else 2320 Id.Typ := Name; 2321 end if; 2322 end if; 2323 end if; 2324 2325 if Parser.State.Report_Parenthesis 2326 and then Parser.Last_Read = Left_Parenthesis 2327 then 2328 Reset_Buffer (Parser, Id); 2329 Id.Typ := Open_Paren; 2330 Next_Char (Input, Parser); 2331 return; 2332 end if; 2333 2334 if Parser.State.In_Attlist then 2335 if Parser.Last_Read = Latin_Capital_Letter_C then 2336 if Looking_At (Cdata_Sequence) then 2337 Id.Typ := Cdata; 2338 else 2339 Id.Typ := Name; 2340 end if; 2341 2342 elsif Parser.Last_Read = Latin_Capital_Letter_E 2343 and then Looking_At (Entit_Sequence) 2344 then 2345 if Looking_At (Ies_Sequence) then 2346 Id.Typ := Entities; 2347 elsif Parser.Last_Read = Latin_Capital_Letter_Y then 2348 Id.Typ := Entity; 2349 Put_In_Buffer (Parser, Parser.Last_Read); 2350 Next_Char (Input, Parser); 2351 else 2352 Fatal_Error (Parser, Error_Attlist_Type); 2353 end if; 2354 2355 elsif Parser.Last_Read = Latin_Capital_Letter_I 2356 and then Looking_At (Id_Sequence) 2357 then 2358 if Looking_At (Ref_Sequence) then 2359 if Parser.Last_Read = Latin_Capital_Letter_S then 2360 Id.Typ := Idrefs; 2361 Put_In_Buffer (Parser, Parser.Last_Read); 2362 Next_Char (Input, Parser); 2363 else 2364 Id.Typ := Idref; 2365 end if; 2366 else 2367 Id.Typ := Id_Type; 2368 end if; 2369 2370 elsif Parser.Last_Read = Latin_Capital_Letter_N then 2371 Next_Char (Input, Parser); 2372 if Looking_At (Mtoken_Sequence) then 2373 if Parser.Last_Read = Latin_Capital_Letter_S then 2374 Id.Typ := Nmtokens; 2375 Next_Char (Input, Parser); 2376 else 2377 Id.Typ := Nmtoken; 2378 end if; 2379 elsif Looking_At (Otation_Sequence) then 2380 Id.Typ := Notation; 2381 else 2382 Fatal_Error (Parser, Error_Attlist_Type); 2383 end if; 2384 2385 elsif Parser.Last_Read = Number_Sign then 2386 Put_In_Buffer (Parser, Parser.Last_Read); 2387 Next_Char (Input, Parser); 2388 if Looking_At (Implied_Sequence) then 2389 Id.Typ := Implied; 2390 elsif Looking_At (Required_Sequence) then 2391 Id.Typ := Required; 2392 elsif Looking_At (Fixed_Sequence) then 2393 Id.Typ := Fixed; 2394 else 2395 Fatal_Error (Parser, Error_Attlist_DefaultDecl); 2396 end if; 2397 end if; 2398 end if; 2399 end case; 2400 2401 -- try to coalesce as many things as possible into a single 2402 -- text event 2403 if Id.Typ = End_Of_Input then 2404 if Is_Valid_Name_Startchar (Parser.Last_Read, Parser.XML_Version) 2405 or else Parser.Last_Read = Low_Line 2406 then 2407 Id.Typ := Name; 2408 Put_In_Buffer (Parser, Parser.Last_Read); 2409 Next_Char (Input, Parser); 2410 else 2411 Id.Typ := Text; 2412 end if; 2413 end if; 2414 2415 if Id.Typ = Name and then not Coalesce_Space then 2416 while 2417 (Parser.Last_Read /= Unicode.Names.Basic_Latin.Colon 2418 or else not Parser.Feature_Namespace) 2419 and then 2420 Is_Valid_NCname_Char (Parser.Last_Read, Parser.XML_Version) 2421 loop 2422 Put_In_Buffer (Parser, Parser.Last_Read); 2423 Next_Char (Input, Parser); 2424 end loop; 2425 2426 elsif Is_Entity_Ref = None 2427 and then (Id.Typ = Text 2428 or else (Coalesce_Space and then Id.Typ = Name)) 2429 then 2430 if not Parser.Last_Read_Is_Valid then 2431 Next_Char (Input, Parser); 2432 2433 else 2434 loop 2435 if Is_White_Space (Parser.Last_Read) then 2436 exit when not Coalesce_Space; 2437 2438 else 2439 case Parser.Last_Read is 2440 when Greater_Than_Sign => 2441 exit when Parser.State.Greater_Special; 2442 2443 when Less_Than_Sign -- Start of new tag 2444 | Ampersand -- for Entities 2445 | Right_Square_Bracket -- for CData ]]> 2446 | Quotation_Mark -- for attributes a="..." 2447 | Apostrophe -- for attributes a='...' 2448 | Equals_Sign => -- for attributes 2449 exit; 2450 2451 when Solidus => -- For <NODE/> 2452 declare 2453 C : Unicode_Char; 2454 begin 2455 Lookup_Char (Input, Parser, C); 2456 exit when C = Greater_Than_Sign 2457 or else Id.Typ = Name; 2458 end; 2459 2460 when Percent_Sign => 2461 exit when Parser.State.Expand_Param_Entities; 2462 2463 when Question_Mark => 2464 exit when Parser.State.Detect_End_Of_PI; 2465 2466 when others => 2467 null; 2468 end case; 2469 end if; 2470 2471 Put_In_Buffer (Parser, Parser.Last_Read); 2472 Next_Char (Input, Parser); 2473 exit when not Parser.Last_Read_Is_Valid; 2474 end loop; 2475 end if; 2476 end if; 2477 2478 Parser.Ignore_State_Special := False; 2479 end if; 2480 2481 if Coalesce_Space and then Id.Typ = Space then 2482 -- First character is necessarily not a space, so we'll change the 2483 -- type of the token to text 2484 declare 2485 Save_Length : constant Natural := Parser.Buffer_Length; 2486 begin 2487 while Parser.Last_Read_Is_Valid 2488 and then (not Parser.State.Greater_Special 2489 or else Parser.Last_Read /= Greater_Than_Sign) 2490 and then Parser.Last_Read /= Less_Than_Sign 2491 and then Parser.Last_Read /= Ampersand 2492 and then (not Parser.State.Expand_Param_Entities 2493 or else Parser.Last_Read /= Percent_Sign) 2494 and then Parser.Last_Read /= Equals_Sign 2495 and then Parser.Last_Read /= Quotation_Mark 2496 and then Parser.Last_Read /= Right_Square_Bracket 2497 and then Parser.Last_Read /= Apostrophe 2498 and then Parser.Last_Read /= Solidus 2499 and then (Parser.Last_Read /= Question_Mark 2500 or else not Parser.State.Detect_End_Of_PI) 2501 loop 2502 Put_In_Buffer (Parser, Parser.Last_Read); 2503 Next_Char (Input, Parser); 2504 end loop; 2505 2506 -- Special case for ']': since the parser needs to detect whether 2507 -- this is the beginning of ']]>', this will be done in the next 2508 -- call to Next_Token. However, we shouldn't report the spaces as 2509 -- Ignorable_Whitespace in this case. 2510 2511 if Parser.Last_Read = Right_Square_Bracket 2512 or else Parser.Buffer_Length /= Save_Length 2513 then 2514 Id.Typ := Text; 2515 end if; 2516 end; 2517 end if; 2518 2519 Id.Last := Parser.Buffer_Length; 2520 2521 if Debug_Lexical then 2522 Debug_Print (Parser, Id); 2523 end if; 2524 2525 -- Internal entities should be processes inline 2526 2527 if Is_Entity_Ref /= None then 2528 declare 2529 N : constant Symbol := Find_Symbol (Parser, Id); 2530 V : constant Entity_Entry_Access := Get (Parser.Entities, N); 2531 begin 2532 Reset_Buffer (Parser, Id); 2533 if N = Parser.Lt_Sequence then 2534 Put_In_Buffer (Parser, Less_Than_Sign); 2535 Id.Typ := Text; 2536 Id.Last := Parser.Buffer_Length; 2537 Next_Char (Input, Parser); 2538 2539 elsif N = Parser.Gt_Sequence then 2540 Put_In_Buffer (Parser, Greater_Than_Sign); 2541 Id.Typ := Text; 2542 Id.Last := Parser.Buffer_Length; 2543 Next_Char (Input, Parser); 2544 2545 elsif N = Parser.Amp_Sequence then 2546 Put_In_Buffer (Parser, Ampersand); 2547 Id.Typ := Text; 2548 Id.Last := Parser.Buffer_Length; 2549 Next_Char (Input, Parser); 2550 2551 elsif N = Parser.Apos_Sequence then 2552 Put_In_Buffer (Parser, Apostrophe); 2553 Id.Typ := Text; 2554 Id.Last := Parser.Buffer_Length; 2555 Next_Char (Input, Parser); 2556 2557 elsif N = Parser.Quot_Sequence then 2558 Put_In_Buffer (Parser, Quotation_Mark); 2559 Id.Typ := Text; 2560 Id.Last := Parser.Buffer_Length; 2561 Next_Char (Input, Parser); 2562 2563 elsif V = null then 2564 declare 2565 Sym : constant Cst_Byte_Sequence_Access := Get (N); 2566 begin 2567 Skipped_Entity (Parser, N); 2568 if N = Parser.Symbol_Ampersand 2569 or else N = Parser.Symbol_Percent 2570 then 2571 Fatal_Error (Parser, Error_Entity_Name & " '" 2572 & Sym.all & "'", Id); 2573 2574 elsif Sym (Sym'First) = '%' then 2575 Error (Parser, Error_Entity_Undefined & " '" 2576 & Sym.all & "'", Id); 2577 2578 elsif not Parser.In_External_Entity then 2579 -- WF Entity Declared 2580 Fatal_Error 2581 (Parser, Error_Entity_Undefined & " '" 2582 & Sym.all & ''', Id); 2583 2584 else 2585 -- if Parser.Feature_Validation then 2586 -- VC Entity Declared 2587 Error 2588 (Parser, Error_Entity_Undefined & " '" 2589 & Sym.all & ''', Id); 2590 end if; 2591 end; 2592 2593 Id.Typ := Text; 2594 Id.Last := Id.First - 1; 2595 Next_Char (Input, Parser); 2596 2597 else 2598 if Parser.Standalone_Document 2599 and then V.External_Declaration 2600 then 2601 -- 4.1 WF Entity Declared 2602 Fatal_Error 2603 (Parser, Error_Entity_Not_Standalone, Id); 2604 end if; 2605 2606 if Is_Entity_Ref = Entity 2607 and then Parser.Current_Node = null 2608 and then not Parser.State.In_DTD 2609 then 2610 Fatal_Error (Parser, Error_Entity_Toplevel, Id); 2611 2612 -- Else if we are in the internal subset of the DTD, and in 2613 -- a context other than a declaration 2614 elsif Is_Entity_Ref = Param_Entity 2615 and then not Parser.In_External_Entity 2616 and then Parser.State.Name /= DTD_State.Name 2617 then 2618 Fatal_Error (Parser, Error_ParamEntity_In_Attribute, Id); 2619 end if; 2620 2621 Close_Inputs (Parser, Parser.Close_Inputs); 2622 2623 -- not in string context 2624 if not Parser.State.Ignore_Special then 2625 Start_Entity (Parser, N); 2626 end if; 2627 2628 if V.Already_Read then 2629 Fatal_Error (Parser, Error_Entity_Self_Ref, Id); 2630 end if; 2631 2632 V.Already_Read := True; 2633 2634 Parser.Element_Id := Parser.Element_Id + 1; 2635 2636 if Debug_Internal then 2637 Put_Line ("Expanding entity " & Get (N).all & " External=" 2638 & V.External'Img 2639 & " Value=" & Get (V.Value).all); 2640 end if; 2641 2642 Old_System_Id := Get_System_Id (Parser.Locator); 2643 2644 Parser.Inputs := new Entity_Input_Source' 2645 (External => V.External, 2646 Name => N, 2647 Input => null, 2648 Save_Loc => Get_Location (Parser.Locator), 2649 System_Id => Find_Symbol 2650 (Parser, Get (System_Id (Parser)).all & '#' & Get (N).all), 2651 Public_Id => Find_Symbol 2652 (Parser, Get (Public_Id (Parser)).all & '#' & Get (N).all), 2653 Handle_Strings => not Parser.State.Ignore_Special, 2654 Next => Parser.Inputs); 2655 2656 if V.External then 2657 if Parser.State.Name = Attlist_Str_Def_State.Name 2658 or else Parser.State.Name = Attr_Value_State.Name 2659 then 2660 Fatal_Error (Parser, Error_Attribute_External_Entity, Id); 2661 end if; 2662 2663 declare 2664 URI : constant Symbol := 2665 Resolve_URI (Parser, Old_System_Id, V.Value); 2666 begin 2667 Parser.Inputs.Input := Resolve_Entity 2668 (Parser, 2669 Public_Id => Get (V.Public).all, 2670 System_Id => Get (URI).all); 2671 2672 -- If either there is no entity resolver or if the 2673 -- standard algorithm should be used 2674 2675 if Parser.Inputs.Input = null then 2676 Parser.Inputs.Input := new File_Input; 2677 Open (Get (URI).all, 2678 File_Input (Parser.Inputs.Input.all)); 2679 Set_Public_Id 2680 (Parser.Inputs.Input.all, Get (V.Value).all); 2681 Set_System_Id (Parser.Inputs.Input.all, Get (URI).all); 2682 end if; 2683 2684 Parser.Inputs.Name := Find_Symbol 2685 (Parser, Get_System_Id (Parser.Inputs.Input.all)); 2686 2687 Set_System_Id (Parser.Locator, URI); 2688 Set_Public_Id (Parser.Locator, V.Value); 2689 2690 exception 2691 when Name_Error => 2692 Error 2693 (Parser, Error_External_Entity_Not_Found 2694 & Get (URI).all, Id); 2695 Unchecked_Free (Parser.Inputs.Input); 2696 when E : Mismatching_BOM => 2697 Error (Parser, Exception_Message (E)); 2698 Unchecked_Free (Parser.Inputs.Input); 2699 end; 2700 2701 Parser.In_External_Entity := True; 2702 else 2703 Parser.Inputs.Input := new String_Input; 2704 2705 -- 4.4.8: Expansion of parameter entities must include 2706 -- a leading and trailing space, unless we are within an 2707 -- entity value. 2708 if Is_Entity_Ref = Param_Entity 2709 and then not Parser.State.Ignore_Special 2710 then 2711 Open (' ' & Get (V.Value).all & ' ', 2712 Encoding, 2713 String_Input (Parser.Inputs.Input.all)); 2714 else 2715 Open (Get (V.Value).all, Encoding, 2716 String_Input (Parser.Inputs.Input.all)); 2717 end if; 2718 Set_Public_Id 2719 (Parser.Locator, 2720 Find_Symbol (Parser, "entity " & Get (N).all)); 2721 Set_Public_Id 2722 (Parser.Inputs.Input.all, 2723 Get (Get_Public_Id (Parser.Locator)).all); 2724 end if; 2725 2726 if Parser.Inputs.Input = null then 2727 Skipped_Entity (Parser, V.Name); 2728 Next_Char (Input, Parser); 2729 Next_Token (Input, Parser, Id); 2730 2731 else 2732 Set_Line_Number (Parser.Locator, 1); 2733 Set_Column_Number 2734 (Parser.Locator, 2735 Prolog_Size (Parser.Inputs.Input.all)); 2736 2737 Next_Char (Input, Parser); 2738 Next_Token (Input, Parser, Id); 2739 2740 V.Already_Read := False; 2741 end if; 2742 end if; 2743 end; 2744 end if; 2745 end Next_Token; 2746 2747 ---------------------------- 2748 -- Next_Token_Skip_Spaces -- 2749 ---------------------------- 2750 2751 procedure Next_Token_Skip_Spaces 2752 (Input : in out Input_Sources.Input_Source'Class; 2753 Parser : in out Sax_Reader'Class; 2754 Id : out Token; 2755 Must_Have : Boolean := False) is 2756 begin 2757 Next_Token (Input, Parser, Id); 2758 if Must_Have and then Id.Typ /= Space then 2759 Fatal_Error (Parser, Error_Expecting_Space, Id); 2760 end if; 2761 while Id.Typ = Space loop 2762 Reset_Buffer (Parser, Id); 2763 Next_Token (Input, Parser, Id); 2764 end loop; 2765 end Next_Token_Skip_Spaces; 2766 2767 ------------------------------- 2768 -- Next_NS_Token_Skip_Spaces -- 2769 ------------------------------- 2770 2771 procedure Next_NS_Token_Skip_Spaces 2772 (Input : in out Input_Sources.Input_Source'Class; 2773 Parser : in out Sax_Reader'Class; 2774 NS_Id : out Token; 2775 Name_Id : out Token) 2776 is 2777 Id : Token; 2778 Saved_In_Tag : constant Boolean := Parser.State.In_Tag; 2779 begin 2780 NS_Id := Null_Token; 2781 Next_Token (Input, Parser, Id); 2782 while Id.Typ = Space loop 2783 Reset_Buffer (Parser, Id); 2784 Next_Token (Input, Parser, Id); 2785 end loop; 2786 Name_Id := Id; 2787 2788 if Name_Id.Typ = Colon then 2789 -- An empty namespace, used in the XML testsuite ? 2790 NS_Id := Null_Token; 2791 Reset_Buffer (Parser, Id); 2792 Next_Token (Input, Parser, Name_Id); 2793 2794 elsif Name_Id.Typ = Name then 2795 if Parser.Last_Read_Is_Valid 2796 and then Parser.Last_Read = Unicode.Names.Basic_Latin.Colon 2797 and then Parser.Feature_Namespace 2798 then 2799 Parser.State.In_Tag := True; -- Get COLON on its own 2800 Next_Token (Input, Parser, Id); 2801 Parser.State.In_Tag := Saved_In_Tag; 2802 2803 NS_Id := Name_Id; 2804 Reset_Buffer (Parser, Id); 2805 Next_Token (Input, Parser, Name_Id); 2806 end if; 2807 end if; 2808 end Next_NS_Token_Skip_Spaces; 2809 2810 ------------------ 2811 -- Reset_Buffer -- 2812 ------------------ 2813 2814 procedure Reset_Buffer 2815 (Parser : in out Sax_Reader'Class; Id : Token := Null_Token) is 2816 begin 2817 Parser.Buffer_Length := Id.First - 1; 2818 end Reset_Buffer; 2819 2820 --------------- 2821 -- Set_State -- 2822 --------------- 2823 2824 procedure Set_State 2825 (Parser : in out Sax_Reader'Class; State : Parser_State) is 2826 begin 2827 Parser.State := State; 2828 end Set_State; 2829 2830 --------------- 2831 -- Get_State -- 2832 --------------- 2833 2834 function Get_State (Parser : Sax_Reader'Class) return Parser_State is 2835 begin 2836 return Parser.State; 2837 end Get_State; 2838 2839 ------------------------- 2840 -- Parse_Element_Model -- 2841 ------------------------- 2842 2843 procedure Parse_Element_Model 2844 (Input : in out Input_Source'Class; 2845 Parser : in out Sax_Reader'Class; 2846 Result : out Element_Model_Ptr; 2847 Attlist : Boolean := False; 2848 Open_Was_Read : Boolean) 2849 is 2850 -- ??? Would be nice to get rid of this hard-coded limitation in stacks 2851 Stack_Size : constant Natural := 1024; 2852 Operand_Stack : Element_Model_Array (1 .. Stack_Size); 2853 Operand_Index : Natural := Operand_Stack'First; 2854 Operator_Stack : array (1 .. Stack_Size) of Unicode_Char; 2855 Operator_Index : Natural := Operator_Stack'First; 2856 Expect_Operator : Boolean := not Open_Was_Read; 2857 2858 procedure Parse_Element_Model_From_Entity (Name : Symbol); 2859 -- Parse the element model defined in the entity Name, and leave the 2860 -- contents on the stacks. 2861 2862 procedure Parse 2863 (Input : in out Input_Source'Class; 2864 Result : out Element_Model_Ptr; 2865 Open_Was_Read : Boolean; 2866 Is_Recursive_Call : Boolean); 2867 -- Parse the content model read in Input 2868 -- Is_Recursive_Call should be true when called from itself or from 2869 -- Parse_Element_Model_From_Entity. 2870 2871 ------------------------------------- 2872 -- Parse_Element_Model_From_Entity -- 2873 ------------------------------------- 2874 2875 procedure Parse_Element_Model_From_Entity (Name : Symbol) is 2876 Loc : Sax.Locators.Location; 2877 Last : constant Unicode_Char := Parser.Last_Read; 2878 Input_S : String_Input; 2879 Val : constant Entity_Entry_Access := Get (Parser.Entities, Name); 2880 M : Element_Model_Ptr; 2881 begin 2882 if Val = null then 2883 Fatal_Error 2884 (Parser, 2885 Error_Entity_Undefined & ' ' & Get (Name).all); 2886 2887 elsif Val.Value = Empty_String then 2888 return; 2889 2890 else 2891 Loc := Get_Location (Parser.Locator); 2892 Set_Line_Number (Parser.Locator, 1); 2893 Set_Column_Number (Parser.Locator, 1); 2894 Set_Public_Id 2895 (Parser.Locator, 2896 Find_Symbol (Parser, "entity " & Get (Name).all)); 2897 2898 Open (Get (Val.Value).all, Encoding, Input_S); 2899 Next_Char (Input_S, Parser); 2900 Parse (Input_S, M, False, True); 2901 -- Parse_Element_Model (Input_S, Parser, M, Attlist, False); 2902 Close (Input_S); 2903 2904 Set_Location (Parser.Locator, Loc); 2905 Parser.Last_Read := Last; 2906 end if; 2907 end Parse_Element_Model_From_Entity; 2908 2909 ----------- 2910 -- Parse -- 2911 ----------- 2912 2913 procedure Parse 2914 (Input : in out Input_Source'Class; 2915 Result : out Element_Model_Ptr; 2916 Open_Was_Read : Boolean; 2917 Is_Recursive_Call : Boolean) 2918 is 2919 Num_Items : Positive; 2920 Current_Item, Current_Operand : Natural; 2921 Start_Sub : Natural := Parser.Buffer_Length + 1; 2922 M : Element_Model_Ptr; 2923 Found : Boolean; 2924 Start_Id : constant Symbol := System_Id (Parser); 2925 Start_Token : Token; 2926 Test_Multiplier : Boolean; 2927 Can_Be_Mixed : Boolean; 2928 Num_Parenthesis : Integer := 0; 2929 Already_Displayed_Self_Contained_Error : Boolean := False; 2930 2931 begin 2932 Start_Token := Null_Token; 2933 Start_Token.Location.Line := Get_Line_Number (Parser.Locator); 2934 Start_Token.Location.Column := Get_Column_Number (Parser.Locator); 2935 2936 if Open_Was_Read then 2937 Start_Token.Location.Column := Start_Token.Location.Column - 1; 2938 end if; 2939 2940 while Is_White_Space (Parser.Last_Read) loop 2941 Next_Char (Input, Parser); 2942 end loop; 2943 2944 loop 2945 if End_Of_Stream (Parser) then 2946 if not Is_Recursive_Call then 2947 for J in Operand_Stack'First .. Operand_Index - 1 loop 2948 Free (Operand_Stack (J)); 2949 end loop; 2950 2951 elsif Num_Parenthesis /= 0 then 2952 Fatal_Error (Parser, Error_Entity_Nested, Start_Token); 2953 2954 elsif Parser.Buffer_Length >= Start_Sub then 2955 Operand_Stack (Operand_Index) := 2956 new Element_Model (Element_Ref); 2957 Operand_Stack (Operand_Index).Name := Find_Symbol 2958 (Parser, 2959 Parser.Buffer (Start_Sub .. Parser.Buffer_Length)); 2960 Operand_Index := Operand_Index + 1; 2961 Parser.Buffer_Length := Start_Sub - 1; 2962 end if; 2963 2964 exit; 2965 end if; 2966 2967 if Parser.Feature_Validation 2968 and then (not Parser.Last_Read_Is_Valid 2969 or else System_Id (Parser) /= Start_Id) 2970 and then not Already_Displayed_Self_Contained_Error 2971 then 2972 Already_Displayed_Self_Contained_Error := True; 2973 Error (Parser, Error_Entity_Self_Contained, Start_Token); 2974 end if; 2975 2976 Test_Multiplier := False; 2977 2978 -- Process the operator 2979 case Parser.Last_Read is 2980 when Left_Parenthesis => 2981 Operator_Stack (Operator_Index) := Parser.Last_Read; 2982 Operator_Index := Operator_Index + 1; 2983 Expect_Operator := False; 2984 Next_Char (Input, Parser); 2985 Num_Parenthesis := Num_Parenthesis + 1; 2986 2987 when Right_Parenthesis => 2988 Num_Parenthesis := Num_Parenthesis - 1; 2989 Num_Items := 1; 2990 Current_Item := Operator_Index - 1; 2991 Current_Operand := Operand_Index - 1; 2992 Can_Be_Mixed := Current_Operand >= Operand_Stack'First 2993 and then 2994 (Operand_Stack (Current_Operand).Content = Character_Data 2995 or else Operand_Stack (Current_Operand).Content 2996 = Element_Ref); 2997 2998 if Current_Operand >= Operand_Stack'First 2999 and then Is_Mixed (Operand_Stack (Current_Operand)) 3000 then 3001 Fatal_Error (Parser, Error_Mixed_Contents); 3002 end if; 3003 3004 while Current_Item >= Operator_Stack'First 3005 and then 3006 Operator_Stack (Current_Item) /= Left_Parenthesis 3007 loop 3008 if Operator_Stack (Current_Item) /= Comma 3009 and then Operator_Stack (Current_Item) /= Vertical_Line 3010 then 3011 Fatal_Error 3012 (Parser, Error_Invalid_Content_Model, Start_Token); 3013 end if; 3014 3015 if Current_Operand = 0 then 3016 Fatal_Error 3017 (Parser, Error_Missing_Operand, Start_Token); 3018 end if; 3019 3020 Current_Operand := Current_Operand - 1; 3021 3022 if Current_Operand < Operand_Stack'First then 3023 Fatal_Error 3024 (Parser, Error_Invalid_Content_Model, Start_Token); 3025 end if; 3026 3027 if Operand_Stack (Current_Operand).Content 3028 /= Character_Data and then 3029 Operand_Stack (Current_Operand).Content /= Element_Ref 3030 then 3031 Can_Be_Mixed := False; 3032 end if; 3033 3034 if Is_Mixed (Operand_Stack (Current_Operand)) then 3035 Fatal_Error (Parser, Error_Mixed_Contents); 3036 end if; 3037 3038 Num_Items := Num_Items + 1; 3039 Current_Item := Current_Item - 1; 3040 end loop; 3041 3042 if Current_Item < Operator_Stack'First then 3043 Fatal_Error 3044 (Parser, Error_Invalid_Content_Model, Start_Token); 3045 end if; 3046 3047 if Current_Operand < Operand_Stack'First then 3048 Fatal_Error 3049 (Parser, Error_Content_Model_Empty_List, Start_Token); 3050 end if; 3051 3052 if Operator_Stack (Operator_Index - 1) = Comma then 3053 M := new Element_Model (Sequence); 3054 else 3055 if not Can_Be_Mixed 3056 and then Operand_Stack (Current_Operand).Content 3057 = Character_Data 3058 then 3059 Fatal_Error 3060 (Parser, Error_Content_Model_Nested_Groups); 3061 end if; 3062 3063 M := new Element_Model (Any_Of); 3064 end if; 3065 M.List := new Element_Model_Array (1 .. Num_Items); 3066 for J in Current_Operand .. Operand_Index - 1 loop 3067 M.List (J - Current_Operand + 1) := Operand_Stack (J); 3068 end loop; 3069 Operand_Index := Current_Operand + 1; 3070 Operand_Stack (Current_Operand) := M; 3071 Operator_Index := Current_Item; 3072 Expect_Operator := False; 3073 Test_Multiplier := True; 3074 Next_Char (Input, Parser); 3075 3076 if not End_Of_Stream (Parser) 3077 and then Current_Operand >= Operand_Stack'First 3078 and then Is_Mixed (Operand_Stack (Current_Operand)) 3079 and then Operand_Stack (Current_Operand).List'Length >= 2 3080 and then Parser.Last_Read /= Asterisk 3081 then 3082 Fatal_Error 3083 (Parser, Error_Content_Model_Closing_Paren); 3084 end if; 3085 3086 when Comma | Vertical_Line => 3087 if Attlist and then Parser.Last_Read = Comma then 3088 Fatal_Error (Parser, Error_Attlist_Invalid_Enum); 3089 end if; 3090 3091 if Parser.Last_Read = Comma 3092 and then Operand_Index - 1 < Operand_Stack'First 3093 then 3094 Fatal_Error (Parser, Error_Content_Model_Invalid_Seq); 3095 end if; 3096 3097 if Parser.Last_Read = Comma 3098 and then Operator_Stack (Operator_Index - 1) 3099 = Left_Parenthesis 3100 and then Operand_Stack (Operand_Index - 1).Content 3101 = Character_Data 3102 then 3103 Fatal_Error (Parser, Error_Content_Model_Pcdata); 3104 end if; 3105 3106 if Operator_Index = Operator_Stack'First 3107 or else 3108 (Operator_Stack (Operator_Index - 1) /= Parser.Last_Read 3109 and then 3110 Operator_Stack (Operator_Index - 1) /= 3111 Left_Parenthesis) 3112 then 3113 Fatal_Error (Parser, Error_Content_Model_Mixing); 3114 end if; 3115 Operator_Stack (Operator_Index) := Parser.Last_Read; 3116 Operator_Index := Operator_Index + 1; 3117 Expect_Operator := False; 3118 Next_Char (Input, Parser); 3119 3120 when Asterisk | Question_Mark | Plus_Sign => 3121 Fatal_Error 3122 (Parser, Error_Content_Model_Invalid_Multiplier, 3123 Start_Token); 3124 3125 when Number_Sign => 3126 if Expect_Operator then 3127 Fatal_Error 3128 (Parser, Error_Content_Model_Invalid_Start, 3129 Start_Token); 3130 end if; 3131 Expect_Operator := True; 3132 3133 -- #PCDATA can only be the first element of a choice list 3134 -- ??? Note that in that case the Choice model can only be a 3135 -- list of names, not a parenthesis expression. 3136 Start_Sub := Parser.Buffer_Length + 1; 3137 3138 Next_Char (Input, Parser); 3139 Found := (Parser.Last_Read = Latin_Capital_Letter_P); 3140 if Found then 3141 Next_Char (Input, Parser); 3142 Found := (Parser.Last_Read = Latin_Capital_Letter_C); 3143 if Found then 3144 Next_Char (Input, Parser); 3145 Found := (Parser.Last_Read = Latin_Capital_Letter_D); 3146 if Found then 3147 Next_Char (Input, Parser); 3148 Found := Parser.Last_Read = Latin_Capital_Letter_A; 3149 if Found then 3150 Next_Char (Input, Parser); 3151 Found := 3152 (Parser.Last_Read = Latin_Capital_Letter_T); 3153 if Found then 3154 Next_Char (Input, Parser); 3155 Found := 3156 (Parser.Last_Read = Latin_Capital_Letter_A); 3157 end if; 3158 end if; 3159 end if; 3160 end if; 3161 end if; 3162 3163 if not Found then 3164 Fatal_Error 3165 (Parser, Error_Content_Model_Invalid_Seq, Start_Token); 3166 end if; 3167 3168 if Operator_Stack (Operator_Index - 1) 3169 /= Left_Parenthesis 3170 then 3171 Fatal_Error (Parser, Error_Content_Model_Pcdata_First); 3172 end if; 3173 3174 Operand_Stack (Operand_Index) := 3175 new Element_Model (Character_Data); 3176 Operand_Index := Operand_Index + 1; 3177 Parser.Buffer_Length := Start_Sub - 1; 3178 Next_Char (Input, Parser); 3179 3180 when Percent_Sign => 3181 if not Parser.In_External_Entity 3182 and then Parser.State.Name /= DTD_State.Name 3183 then 3184 Fatal_Error (Parser, Error_ParamEntity_In_Attribute); 3185 end if; 3186 3187 Start_Sub := Parser.Buffer_Length + 1; 3188 3189 while Parser.Last_Read_Is_Valid 3190 and then Parser.Last_Read /= Semicolon 3191 loop 3192 Put_In_Buffer (Parser, Parser.Last_Read); 3193 Next_Char (Input, Parser); 3194 end loop; 3195 3196 Parse_Element_Model_From_Entity 3197 (Find_Symbol 3198 (Parser, 3199 Parser.Buffer (Start_Sub .. Parser.Buffer_Length))); 3200 Parser.Buffer_Length := Start_Sub - 1; 3201 Next_Char (Input, Parser); 3202 3203 when others => 3204 if Parser.Last_Read_Is_Valid then 3205 if Expect_Operator then 3206 Fatal_Error 3207 (Parser, Error_Content_Model_Expect_Operator); 3208 end if; 3209 Expect_Operator := True; 3210 3211 -- ??? Should test Is_Nmtoken 3212 Start_Sub := Parser.Buffer_Length + 1; 3213 3214 while Parser.Last_Read = Unicode.Names.Basic_Latin.Colon 3215 or else Is_Valid_Name_Char 3216 (Parser.Last_Read, Parser.XML_Version) 3217 loop 3218 Put_In_Buffer (Parser, Parser.Last_Read); 3219 Next_Char (Input, Parser); 3220 end loop; 3221 3222 if Start_Sub > Parser.Buffer_Length then 3223 Error (Parser, Error_Content_Model_Invalid_Name 3224 & Debug_Encode (Parser.Last_Read), 3225 Start_Token); 3226 end if; 3227 3228 Operand_Stack (Operand_Index) := 3229 new Element_Model (Element_Ref); 3230 Operand_Stack (Operand_Index).Name := Find_Symbol 3231 (Parser, 3232 Parser.Buffer (Start_Sub .. Parser.Buffer_Length)); 3233 Operand_Index := Operand_Index + 1; 3234 Parser.Buffer_Length := Start_Sub - 1; 3235 Test_Multiplier := True; 3236 3237 else 3238 -- Could happen with improper entity nesting 3239 Next_Char (Input, Parser); 3240 end if; 3241 3242 end case; 3243 3244 if Test_Multiplier then 3245 case Parser.Last_Read is 3246 when Asterisk => 3247 if Operand_Index = Operand_Stack'First then 3248 Fatal_Error 3249 (Parser, Error_Content_Model_Invalid_Multiplier); 3250 end if; 3251 Operand_Stack (Operand_Index - 1) := new Element_Model' 3252 (Repeat, 0, Positive'Last, 3253 Operand_Stack (Operand_Index - 1)); 3254 Expect_Operator := True; 3255 Next_Char (Input, Parser); 3256 3257 when Plus_Sign => 3258 if Operand_Index = Operand_Stack'First then 3259 Fatal_Error 3260 (Parser, Error_Content_Model_Invalid_Multiplier); 3261 end if; 3262 if Is_Mixed (Operand_Stack (Operand_Index - 1)) then 3263 Fatal_Error 3264 (Parser, Error_Content_Model_Pcdata_Occurrence); 3265 end if; 3266 3267 Operand_Stack (Operand_Index - 1) := new Element_Model' 3268 (Repeat, 1, 3269 Positive'Last, Operand_Stack (Operand_Index - 1)); 3270 Expect_Operator := True; 3271 Next_Char (Input, Parser); 3272 3273 when Question_Mark => 3274 if Operand_Index = Operand_Stack'First then 3275 Fatal_Error 3276 (Parser, Error_Content_Model_Invalid_Multiplier); 3277 end if; 3278 if Is_Mixed (Operand_Stack (Operand_Index - 1)) then 3279 Fatal_Error 3280 (Parser, Error_Content_Model_Pcdata_Occurrence); 3281 end if; 3282 Operand_Stack (Operand_Index - 1) := new Element_Model' 3283 (Repeat, 0, 1, Operand_Stack (Operand_Index - 1)); 3284 Expect_Operator := True; 3285 Next_Char (Input, Parser); 3286 3287 when others => null; 3288 end case; 3289 end if; 3290 3291 exit when Operator_Index = Operator_Stack'First 3292 and then Operand_Index = Operand_Stack'First + 1; 3293 3294 while Is_White_Space (Parser.Last_Read) loop 3295 Next_Char (Input, Parser); 3296 end loop; 3297 end loop; 3298 3299 if not Is_Recursive_Call then 3300 if Operator_Index /= Operator_Stack'First 3301 or else Operand_Index /= Operand_Stack'First + 1 3302 then 3303 Error 3304 (Parser, Error_Content_Model_Invalid, Start_Token); 3305 end if; 3306 3307 Result := Operand_Stack (Operand_Stack'First); 3308 3309 elsif Num_Parenthesis /= 0 then 3310 Error (Parser, Error_Entity_Nested, Start_Token); 3311 end if; 3312 3313 exception 3314 when others => 3315 if not Is_Recursive_Call then 3316 for J in Operand_Stack'First .. Operand_Index - 1 loop 3317 Free (Operand_Stack (J)); 3318 end loop; 3319 end if; 3320 raise; 3321 end Parse; 3322 3323 begin 3324 if Open_Was_Read then 3325 -- Insert the opening parenthesis into the operators stack 3326 Operator_Stack (Operator_Stack'First) := Left_Parenthesis; 3327 Operator_Index := Operator_Index + 1; 3328 end if; 3329 3330 Parse (Input, Result, Open_Was_Read, False); 3331 end Parse_Element_Model; 3332 3333 -------------------------------- 3334 -- Check_Valid_Name_Or_NCname -- 3335 -------------------------------- 3336 3337 procedure Check_Valid_Name_Or_NCname 3338 (Parser : in out Sax_Reader'Class; 3339 Name : Token) 3340 is 3341 begin 3342 if Parser.Feature_Namespace then 3343 if not Is_Valid_NCname 3344 (Parser.Buffer (Name.First .. Name.Last), Parser.XML_Version) 3345 then 3346 Fatal_Error (Parser, Error_Is_Ncname, Name); 3347 end if; 3348 else 3349 if not Is_Valid_Name 3350 (Parser.Buffer (Name.First .. Name.Last), Parser.XML_Version) 3351 then 3352 Fatal_Error (Parser, Error_Is_Name, Name); 3353 end if; 3354 end if; 3355 end Check_Valid_Name_Or_NCname; 3356 3357 --------------------------- 3358 -- Check_Attribute_Value -- 3359 --------------------------- 3360 3361 procedure Check_Attribute_Value 3362 (Parser : in out Sax_Reader'Class; 3363 Local_Name : Symbol; 3364 Typ : Attribute_Type; 3365 Value : Symbol; 3366 Error_Loc : Token) 3367 is 3368 Ent : Entity_Entry_Access; 3369 Val : constant Cst_Byte_Sequence_Access := Get (Value); 3370 begin 3371 case Typ is 3372 when Id | Idref => 3373 if Parser.Feature_Namespace then 3374 if not Is_Valid_NCname (Val.all, Parser.XML_Version) then 3375 -- Always a non-fatal error, since we are dealing with 3376 -- namespaces 3377 Error (Parser, Error_Attribute_Is_Ncname 3378 & Get (Local_Name).all, Error_Loc); 3379 end if; 3380 else 3381 if not Is_Valid_Name (Val.all, Parser.XML_Version) then 3382 Error (Parser, Error_Attribute_Is_Name 3383 & Get (Local_Name).all, Error_Loc); 3384 end if; 3385 end if; 3386 3387 when Idrefs => 3388 if Parser.Feature_Namespace then 3389 if not Is_Valid_NCnames (Val.all, Parser.XML_Version) then 3390 Error (Parser, Error_Attribute_Is_Ncname 3391 & Get (Local_Name).all, Error_Loc); 3392 end if; 3393 else 3394 if not Is_Valid_Names (Val.all, Parser.XML_Version) then 3395 Error (Parser, Error_Attribute_Is_Name 3396 & Get (Local_Name).all, Error_Loc); 3397 end if; 3398 end if; 3399 3400 when Nmtoken => 3401 if not Is_Valid_Nmtoken (Val.all, Parser.XML_Version) then 3402 Error (Parser, Error_Attribute_Is_Nmtoken 3403 & Get (Local_Name).all, Error_Loc); 3404 end if; 3405 3406 when Nmtokens => 3407 if not Is_Valid_Nmtokens (Val.all, Parser.XML_Version) then 3408 Error (Parser, Error_Attribute_Is_Nmtoken 3409 & Get (Local_Name).all, Error_Loc); 3410 end if; 3411 3412 when Entity => 3413 if not Is_Valid_Name (Val.all, Parser.XML_Version) then 3414 Error (Parser, Error_Attribute_Is_Name 3415 & Get (Local_Name).all, Error_Loc); 3416 end if; 3417 3418 Ent := Get (Parser.Entities, Value); 3419 if Ent = null or else not Ent.Unparsed then 3420 Error (Parser, Error_Attribute_Ref_Unparsed_Entity 3421 & Get (Local_Name).all, Error_Loc); 3422 end if; 3423 3424 when Entities => 3425 declare 3426 Index : Integer := Val'First; 3427 Last, Previous : Integer; 3428 C : Unicode_Char; 3429 begin 3430 Last := Index; 3431 while Last <= Val'Last loop 3432 Previous := Last; 3433 Encoding.Read (Val.all, Last, C); 3434 if C = Unicode.Names.Basic_Latin.Space 3435 or else Last > Val'Last 3436 then 3437 if not Is_Valid_Name (Val (Index .. Previous), 3438 Parser.XML_Version) 3439 then 3440 Error (Parser, Error_Attribute_Is_Name 3441 & Get (Local_Name).all, 3442 Error_Loc); 3443 end if; 3444 3445 Ent := Get 3446 (Parser.Entities, 3447 Find_Symbol (Parser, Val (Index .. Previous))); 3448 if Ent = null or else not Ent.Unparsed then 3449 Error (Parser, Error_Attribute_Ref_Unparsed_Entity 3450 & Get (Local_Name).all, 3451 Error_Loc); 3452 end if; 3453 Index := Last; 3454 end if; 3455 end loop; 3456 end; 3457 3458 when others => 3459 null; 3460 end case; 3461 end Check_Attribute_Value; 3462 3463 ------------ 3464 -- Append -- 3465 ------------ 3466 3467 procedure Append 3468 (List : in out Sax_Attribute_List; 3469 Local_Name : Sax.Symbols.Symbol; 3470 Prefix : Sax.Symbols.Symbol; 3471 Att_Type : Attribute_Type := Cdata; 3472 URI : Sax.Symbols.Symbol := No_Symbol; 3473 Value : Sax.Symbols.Symbol; 3474 Location : Sax.Locators.Location; 3475 Default_Decl : Default_Declaration := Default; 3476 If_Unique : Boolean := False) 3477 is 3478 Tmp : Sax_Attribute_Array_Access; 3479 begin 3480 if If_Unique then 3481 for A in 1 .. List.Count loop 3482 if List.List (A).Local_Name = Local_Name 3483 and then List.List (A).Prefix = Prefix 3484 then 3485 return; 3486 end if; 3487 end loop; 3488 end if; 3489 3490 if List.List = null or else List.Count = List.List'Last then 3491 Tmp := List.List; 3492 if Tmp /= null then 3493 List.List := new Sax_Attribute_Array (Tmp'First .. Tmp'Last + 1); 3494 List.List (Tmp'Range) := Tmp.all; 3495 Unchecked_Free (Tmp); 3496 else 3497 List.List := new Sax_Attribute_Array (1 .. 1); 3498 List.Count := 0; 3499 end if; 3500 end if; 3501 3502 -- The URI cannot be resolved at this point, since it will 3503 -- depend on the contents of the document at the place where 3504 -- the attribute is used. 3505 3506 List.Count := List.Count + 1; 3507 List.List (List.Count) := Sax_Attribute' 3508 (Prefix => Prefix, 3509 Local_Name => Local_Name, 3510 Value => Value, 3511 Non_Normalized_Value => Value, 3512 Att_Type => Att_Type, 3513 URI => URI, 3514 Default_Decl => Default_Decl, 3515 Location => Location); 3516 end Append; 3517 3518 --------------------- 3519 -- Syntactic_Parse -- 3520 --------------------- 3521 3522 procedure Syntactic_Parse 3523 (Parser : in out Sax_Reader'Class; 3524 Input : in out Input_Sources.Input_Source'Class) 3525 is 3526 Id : Token := Null_Token; 3527 3528 procedure Parse_Start_Tag; 3529 -- Process an element start and its attributes <!name name="value"..> 3530 3531 procedure Parse_Attributes 3532 (Elem_NS_Id, Elem_Name_Id : Token; Id : in out Token); 3533 -- Process the list of attributes in a start tag, and store them in 3534 -- Parser.Attributes. 3535 -- Id should have been initialized to the first token in the attributes 3536 -- list, and will be left on the first token after it. 3537 -- Return the list of attributes for this element 3538 -- On exit, NS_Count is set to the number of references to Elem_NS_Id 3539 -- among the attributes. The count for other XML_NS that the one of the 3540 -- element is directly increment in the corresponding XML_NS, but for 3541 -- the element we want to keep it virgin until we have called the 3542 -- validation hook. 3543 3544 procedure Resolve_Attribute_Namespaces; 3545 -- For each attributes defined in Parser.Attributes, set its URI for 3546 -- the namespace 3547 3548 procedure Check_And_Define_Namespace 3549 (Prefix, URI : Symbol; Location : Sax.Locators.Location); 3550 -- An attribute defining a namespace was found. Check that the values 3551 -- are valid, and register the new namespace. If Prefix is Null_Token, 3552 -- the default namespace is defined 3553 3554 function Get_String (Str : Token) return String; 3555 function Get_String (First, Last : Token) return String; 3556 pragma Inline (Get_String); 3557 -- Return the string pointed to by the token 3558 3559 procedure Add_Default_Attributes (DTD_Attr : Sax_Attribute_Array_Access); 3560 -- Add all DEFAULT attributes declared in the DTD into the attributes of 3561 -- the current element, if they weren't overriden by the user 3562 3563 procedure Parse_End_Tag; 3564 -- Process an element end </name> 3565 3566 procedure Parse_Doctype; 3567 -- Process the DTD declaration 3568 3569 procedure Parse_Doctype_Contents; 3570 -- Process the DTD's contents 3571 3572 procedure Parse_Entity_Def (Id : in out Token); 3573 -- Parse an <!ENTITY declaration 3574 3575 procedure Parse_Element_Def (Id : in out Token); 3576 -- Parse an <!ELEMENT declaration 3577 3578 procedure Parse_Notation_Def (Id : in out Token); 3579 -- Parse an <!NOTATION declaration 3580 3581 procedure Parse_Attlist_Def (Id : in out Token); 3582 -- Parse an <!ATTLIST declaration 3583 3584 procedure Parse_PI (Id : in out Token); 3585 -- Parse a <?...?> processing instruction 3586 3587 procedure End_Element; 3588 -- End the current element. Its namespace prefix and local_name are 3589 -- given in the parameters. 3590 3591 procedure Get_String 3592 (Id : in out Token; 3593 State : Parser_State; 3594 Str_Start, Str_End : out Token; 3595 Normalize : Boolean := False; 3596 Collapse_Spaces : Boolean := False); 3597 -- Get all the character till the end of the string. Id should contain 3598 -- the initial quote that starts the string. 3599 -- On exit, Str_Start is set to the first token of the string, and 3600 -- Str_End to the last token. 3601 -- If Normalize is True, then all space characters are converted to 3602 -- ' '. 3603 -- If Collapse_Spaces is True, then all duplicate spaces sequences are 3604 -- collapsed into a single space character. Leading and trailing spaces 3605 -- are also removed. 3606 3607 procedure Get_Name_NS (Id : in out Token; NS_Id, Name_Id : out Token); 3608 -- Read the next tokens so as to match either a single name or 3609 -- a "ns:name" name. 3610 -- Id should initially point to the candidate token for the name, and 3611 -- will be left on the token following that name. 3612 -- An error is raised if we can't even match a Name. 3613 3614 procedure Get_External 3615 (Id : in out Token; 3616 System_Start, System_End, Public_Start, Public_End : out Token; 3617 Allow_Publicid : Boolean := False); 3618 -- Parse a PUBLIC or SYSTEM definition and its arguments. 3619 -- Id should initially point to the keyword itself, and will be set to 3620 -- the first identifier following the full definition 3621 -- If Allow_Publicid is True, then PUBLIC might be followed by a single 3622 -- string, as in rule [83] of the XML specifications. 3623 3624 procedure Check_Standalone_Value (Id : in out Token); 3625 procedure Check_Encoding_Value (Id : in out Token); 3626 procedure Check_Version_Value (Id : in out Token); 3627 -- Check the arguments for the <?xml?> processing instruction. 3628 -- Each of this procedures gets the arguments from Next_Token, up to, 3629 -- and including, the following space or End_Of_PI character. 3630 -- They raise errors appropriately 3631 3632 procedure Check_Model; 3633 -- Check that the last element inserted matches the model. This 3634 -- procedure should not be called for the root element. 3635 3636 ---------------- 3637 -- Get_String -- 3638 ---------------- 3639 3640 procedure Get_String 3641 (Id : in out Token; 3642 State : Parser_State; 3643 Str_Start, Str_End : out Token; 3644 Normalize : Boolean := False; 3645 Collapse_Spaces : Boolean := False) 3646 is 3647 T : constant Token := Id; 3648 Saved_State : constant Parser_State := Get_State (Parser); 3649 Possible_End : Token := Null_Token; 3650 C : Unicode_Char; 3651 Index : Natural; 3652 Last_Space : Natural := 0; 3653 Had_Space : Boolean := Collapse_Spaces; -- Avoid leading spaces 3654 3655 begin 3656 if Debug_Internal then 3657 Put_Line ("Get_String Normalize=" 3658 & Boolean'Image (Normalize) 3659 & " Collapse_Spaces=" 3660 & Boolean'Image (Collapse_Spaces)); 3661 end if; 3662 Set_State (Parser, State); 3663 Next_Token (Input, Parser, Id); 3664 Str_Start := Id; 3665 Str_End := Id; 3666 3667 while Id.Typ /= T.Typ and then Id.Typ /= End_Of_Input loop 3668 Str_End := Id; 3669 case Id.Typ is 3670 when Double_String_Delimiter => 3671 Str_End.First := Parser.Buffer_Length + 1; 3672 Put_In_Buffer (Parser, Quotation_Mark); 3673 Str_End.Last := Parser.Buffer_Length; 3674 Possible_End := Str_End; 3675 Had_Space := False; 3676 when Single_String_Delimiter => 3677 Str_End.First := Parser.Buffer_Length + 1; 3678 Put_In_Buffer (Parser, Apostrophe); 3679 Str_End.Last := Parser.Buffer_Length; 3680 Possible_End := Str_End; 3681 Had_Space := False; 3682 when Start_Of_Tag => 3683 if Possible_End = Null_Token then 3684 Fatal_Error (Parser, Error_Attribute_Less_Than, Id); 3685 else 3686 Fatal_Error 3687 (Parser, Error_Attribute_Less_Than_Suggests 3688 & Location (Parser, Possible_End.Location), Id); 3689 end if; 3690 when Char_Ref => 3691 -- 3.3.3 item 3: character references are kept as is 3692 if Get_String (Id) = Space_Sequence then 3693 if Collapse_Spaces and Had_Space then 3694 Reset_Buffer (Parser, Id); 3695 end if; 3696 Had_Space := True; 3697 Last_Space := Parser.Buffer_Length; 3698 else 3699 Had_Space := False; 3700 end if; 3701 3702 when others => 3703 if Normalize or Collapse_Spaces then 3704 declare 3705 Str : constant Byte_Sequence := 3706 Parser.Buffer (Id.First .. Id.Last); 3707 begin 3708 Reset_Buffer (Parser, Id); 3709 Index := Str'First; 3710 while Index <= Str'Last loop 3711 Encoding.Read (Str, Index, C); 3712 3713 -- ??? If we have a character reference, we must 3714 -- replace the character it represents, and not do 3715 -- entity replacement. How to do that, we have lost 3716 -- that information 3717 3718 -- When parsing an attribute value, we should still 3719 -- process white spaces, therefore the test for 3720 -- Ignore_Special 3721 if Is_White_Space (C) then 3722 if not Collapse_Spaces or not Had_Space then 3723 Put_In_Buffer 3724 (Parser, Unicode.Names.Basic_Latin.Space); 3725 end if; 3726 Had_Space := True; 3727 Last_Space := Parser.Buffer_Length; 3728 else 3729 Had_Space := False; 3730 Put_In_Buffer (Parser, C); 3731 end if; 3732 end loop; 3733 end; 3734 Str_End.Last := Parser.Buffer_Length; 3735 end if; 3736 end case; 3737 Next_Token (Input, Parser, Id); 3738 end loop; 3739 3740 if Collapse_Spaces and then Had_Space and then Last_Space /= 0 then 3741 Str_End.Last := Last_Space - 1; 3742 end if; 3743 3744 if Id.Typ = End_Of_Input then 3745 if Possible_End = Null_Token then 3746 Fatal_Error (Parser, Error_Unterminated_String); 3747 else 3748 Fatal_Error (Parser, Error_Unterminated_String_Suggests 3749 & Location (Parser, Possible_End.Location), T); 3750 end if; 3751 end if; 3752 Set_State (Parser, Saved_State); 3753 end Get_String; 3754 3755 ------------------ 3756 -- Get_External -- 3757 ------------------ 3758 3759 procedure Get_External 3760 (Id : in out Token; 3761 System_Start, System_End, Public_Start, Public_End : out Token; 3762 Allow_Publicid : Boolean := False) 3763 is 3764 Had_Space : Boolean; 3765 C : Unicode_Char; 3766 Index : Natural; 3767 begin 3768 System_Start := Null_Token; 3769 System_End := Null_Token; 3770 Public_Start := Null_Token; 3771 Public_End := Null_Token; 3772 3773 -- Check the arguments for PUBLIC 3774 if Id.Typ = Public then 3775 Next_Token_Skip_Spaces (Input, Parser, Id, Must_Have => True); 3776 if Id.Typ /= Double_String_Delimiter 3777 and then Id.Typ /= Single_String_Delimiter 3778 then 3779 Fatal_Error (Parser, Error_Public_String); 3780 else 3781 Get_String 3782 (Id, Non_Interpreted_String_State, Public_Start, Public_End); 3783 3784 Index := Public_Start.First; 3785 while Index <= Public_End.Last loop 3786 Encoding.Read (Parser.Buffer.all, Index, C); 3787 3788 if not Is_Pubid_Char (C) then 3789 Fatal_Error 3790 (Parser, Error_Public_Invalid & "'" 3791 & Debug_Encode (C) & "'", Public_Start); 3792 end if; 3793 end loop; 3794 end if; 3795 3796 Next_Token (Input, Parser, Id); 3797 Had_Space := (Id.Typ = Space); 3798 if Had_Space then 3799 Next_Token (Input, Parser, Id); 3800 elsif Allow_Publicid then 3801 return; 3802 end if; 3803 3804 if Id.Typ /= Double_String_Delimiter 3805 and then Id.Typ /= Single_String_Delimiter 3806 then 3807 if not Allow_Publicid then 3808 Fatal_Error (Parser, Error_Public_Sysid); 3809 end if; 3810 else 3811 if not Had_Space then 3812 Fatal_Error (Parser, Error_Public_Sysid_Space, Id); 3813 end if; 3814 Get_String 3815 (Id, Non_Interpreted_String_State, System_Start, System_End); 3816 Next_Token (Input, Parser, Id); 3817 end if; 3818 3819 -- Check the arguments for SYSTEM 3820 elsif Id.Typ = System then 3821 Next_Token_Skip_Spaces (Input, Parser, Id, Must_Have => True); 3822 if Id.Typ /= Double_String_Delimiter 3823 and then Id.Typ /= Single_String_Delimiter 3824 then 3825 Fatal_Error (Parser, Error_System_String); 3826 else 3827 Get_String 3828 (Id, Non_Interpreted_String_State, System_Start, System_End); 3829 Next_Token (Input, Parser, Id); 3830 end if; 3831 end if; 3832 end Get_External; 3833 3834 ----------------- 3835 -- Get_Name_NS -- 3836 ----------------- 3837 3838 procedure Get_Name_NS (Id : in out Token; NS_Id, Name_Id : out Token) is 3839 begin 3840 Name_Id := Id; 3841 3842 if Id.Typ = Text then 3843 Fatal_Error 3844 (Parser, Error_Invalid_Name & "'" 3845 & Parser.Buffer (Id.First .. Id.Last) & "'", Id); 3846 -- An empty namespace ? This seems to be useful only for the XML 3847 -- conformance suite, so we only handle the case of a single ':' 3848 -- to mean both an empty prefix and empty local name. 3849 elsif Name_Id.Typ = Colon then 3850 Name_Id.Typ := Text; 3851 NS_Id := Name_Id; 3852 Next_Token (Input, Parser, Id); 3853 3854 elsif Id.Typ /= Name then 3855 Fatal_Error (Parser, Error_Is_Name, Id); 3856 3857 else 3858 Next_Token (Input, Parser, Id); 3859 if Id.Typ = Colon then 3860 NS_Id := Name_Id; 3861 Next_Token (Input, Parser, Name_Id); 3862 if Name_Id.Typ /= Name then 3863 Fatal_Error (Parser, Error_Is_Name); 3864 end if; 3865 Next_Token (Input, Parser, Id); 3866 else 3867 NS_Id := Null_Token; 3868 end if; 3869 end if; 3870 end Get_Name_NS; 3871 3872 ---------------------- 3873 -- Parse_Entity_Def -- 3874 ---------------------- 3875 3876 procedure Parse_Entity_Def (Id : in out Token) is 3877 Is_Parameter : Token := Null_Token; 3878 Name_Id : Token; 3879 Def_Start, Def_End : Token := Null_Token; 3880 Ndata_Id : Token := Null_Token; 3881 Public_Start, Public_End : Token := Null_Token; 3882 System_Start, System_End : Token := Null_Token; 3883 Had_Space : Boolean; 3884 Sym : Symbol; 3885 begin 3886 Set_State (Parser, Entity_Def_State); 3887 Next_Token_Skip_Spaces (Input, Parser, Name_Id, True); 3888 3889 if Debug_Internal then 3890 Put_Line ("Parsing entity definition " 3891 & Parser.Buffer (Name_Id.First .. Name_Id.Last)); 3892 end if; 3893 3894 if Name_Id.Typ = Text 3895 and then Parser.Buffer (Name_Id.First .. Name_Id.Last) = 3896 Percent_Sign_Sequence 3897 then 3898 Is_Parameter := Name_Id; 3899 Next_Token_Skip_Spaces (Input, Parser, Name_Id); 3900 end if; 3901 3902 if Name_Id.Typ /= Name then 3903 Fatal_Error (Parser, Error_Is_Name); 3904 end if; 3905 3906 Check_Valid_Name_Or_NCname (Parser, Name_Id); 3907 3908 Next_Token_Skip_Spaces (Input, Parser, Id, Must_Have => True); 3909 3910 if Id.Typ = Public or else Id.Typ = System then 3911 Get_External 3912 (Id, System_Start, System_End, Public_Start, Public_End); 3913 3914 if Contains_URI_Fragment 3915 (Parser.Buffer (System_Start.First .. System_End.Last)) 3916 then 3917 Error (Parser, Error_System_URI, Id); 3918 end if; 3919 3920 Had_Space := (Id.Typ = Space); 3921 if Had_Space then 3922 Next_Token (Input, Parser, Id); 3923 end if; 3924 3925 if Id.Typ = Ndata then 3926 if not Had_Space then 3927 Fatal_Error (Parser, Error_Ndata_Space, Id); 3928 end if; 3929 3930 if Is_Parameter /= Null_Token then 3931 Fatal_Error (Parser, Error_Ndata_ParamEntity, Id); 3932 end if; 3933 Next_Token_Skip_Spaces (Input, Parser, Ndata_Id, True); 3934 3935 if Ndata_Id.Typ /= Text and then Ndata_Id.Typ /= Name then 3936 Fatal_Error (Parser, Error_Ndata_String); 3937 else 3938 Sym := Find_Symbol (Parser, Ndata_Id); 3939 3940 if Parser.Feature_Validation 3941 and then Get (Parser.Notations, Sym) = Null_Notation 3942 then 3943 -- The notation might be declared later in the same DTD 3944 Set (Parser.Notations, 3945 (Name => Sym, 3946 Declaration_Seen => False)); 3947 end if; 3948 3949 Next_Token_Skip_Spaces (Input, Parser, Id); 3950 end if; 3951 end if; 3952 3953 elsif Id.Typ = Double_String_Delimiter 3954 or else Id.Typ = Single_String_Delimiter 3955 then 3956 Get_String (Id, Entity_Str_Def_State, Def_Start, Def_End); 3957 Next_Token_Skip_Spaces (Input, Parser, Id); 3958 else 3959 Fatal_Error (Parser, Error_Entity_Definition); 3960 end if; 3961 3962 if Id.Typ /= End_Of_Tag then 3963 Fatal_Error (Parser, Error_Entity_Definition_Unterminated); 3964 end if; 3965 3966 -- Only report the first definition 3967 3968 Sym := Find_Symbol 3969 (Parser, 3970 Parser.Buffer (Is_Parameter.First .. Is_Parameter.Last) 3971 & Parser.Buffer (Name_Id.First .. Name_Id.Last)); 3972 3973 if Get (Parser.Entities, Sym) /= null then 3974 null; 3975 3976 elsif Def_End /= Null_Token then 3977 Set (Parser.Entities, 3978 new Entity_Entry' 3979 (Name => Find_Symbol 3980 (Parser, 3981 Parser.Buffer (Is_Parameter.First .. Is_Parameter.Last) 3982 & Parser.Buffer (Name_Id.First .. Name_Id.Last)), 3983 Value => Find_Symbol 3984 (Parser, 3985 Parser.Buffer (Def_Start.First .. Def_End.Last)), 3986 Public => No_Symbol, 3987 Unparsed => False, 3988 External_Declaration => (Parser.Inputs /= null 3989 and then Parser.Inputs.External) 3990 or else Parser.In_External_Entity, 3991 External => False, 3992 Already_Read => False)); 3993 if Debug_Internal then 3994 Put_Line ("Internal_Entity_Decl: " 3995 & Parser.Buffer (Name_Id.First .. Name_Id.Last) & "=" 3996 & Parser.Buffer (Def_Start.First .. Def_End.Last) 3997 & " length=" 3998 & Integer'Image (Def_End.Last - Def_Start.First + 1)); 3999 end if; 4000 Internal_Entity_Decl 4001 (Parser, 4002 Name => Parser.Buffer (Is_Parameter.First .. Is_Parameter.Last) 4003 & Parser.Buffer (Name_Id.First .. Name_Id.Last), 4004 Value => Parser.Buffer (Def_Start.First .. Def_End.Last)); 4005 4006 elsif Ndata_Id /= Null_Token then 4007 Set (Parser.Entities, 4008 new Entity_Entry' 4009 (Name => Find_Symbol 4010 (Parser, 4011 Parser.Buffer (Is_Parameter.First .. Is_Parameter.Last) 4012 & Parser.Buffer (Name_Id.First .. Name_Id.Last)), 4013 Value => No_Symbol, 4014 Public => No_Symbol, 4015 Unparsed => True, 4016 External_Declaration => (Parser.Inputs /= null 4017 and then Parser.Inputs.External) 4018 or else Parser.In_External_Entity, 4019 External => False, 4020 Already_Read => True)); 4021 Unparsed_Entity_Decl 4022 (Parser, 4023 Name => Parser.Buffer (Is_Parameter.First .. Is_Parameter.Last) 4024 & Parser.Buffer (Name_Id.First .. Name_Id.Last), 4025 System_Id => 4026 Parser.Buffer (System_Start.First .. System_End.Last), 4027 Notation_Name => 4028 Parser.Buffer (Ndata_Id.First .. Ndata_Id.Last)); 4029 4030 else 4031 Set 4032 (Parser.Entities, 4033 new Entity_Entry' 4034 (Name => Find_Symbol 4035 (Parser, 4036 Parser.Buffer (Is_Parameter.First .. Is_Parameter.Last) 4037 & Parser.Buffer (Name_Id.First .. Name_Id.Last)), 4038 Value => Find_Symbol 4039 (Parser, 4040 Parser.Buffer (System_Start.First .. System_End.Last)), 4041 Public => Find_Symbol 4042 (Parser, 4043 Parser.Buffer (Public_Start.First .. Public_End.Last)), 4044 Unparsed => False, 4045 External_Declaration => (Parser.Inputs /= null 4046 and then Parser.Inputs.External) 4047 or else Parser.In_External_Entity, 4048 External => True, 4049 Already_Read => False)); 4050 4051 External_Entity_Decl 4052 (Parser, 4053 Name => Parser.Buffer (Is_Parameter.First .. Is_Parameter.Last) 4054 & Parser.Buffer (Name_Id.First .. Name_Id.Last), 4055 Public_Id => Parser.Buffer 4056 (Public_Start.First .. Public_End.Last), 4057 System_Id => Parser.Buffer 4058 (System_Start.First .. System_End.Last)); 4059 end if; 4060 4061 if Is_Parameter /= Null_Token then 4062 Reset_Buffer (Parser, Is_Parameter); 4063 else 4064 Reset_Buffer (Parser, Name_Id); 4065 end if; 4066 Set_State (Parser, DTD_State); 4067 end Parse_Entity_Def; 4068 4069 ----------------------- 4070 -- Parse_Element_Def -- 4071 ----------------------- 4072 4073 procedure Parse_Element_Def (Id : in out Token) is 4074 Name_Id : Token; 4075 M : Element_Model_Ptr; 4076 M2 : Content_Model; 4077 NS_Id : Token; 4078 begin 4079 Set_State (Parser, Element_Def_State); 4080 4081 Next_NS_Token_Skip_Spaces (Input, Parser, NS_Id, Name_Id); 4082 4083 if Name_Id.Typ /= Name then 4084 Fatal_Error (Parser, Error_Is_Name); 4085 end if; 4086 4087 Next_Token_Skip_Spaces (Input, Parser, Id, Must_Have => True); 4088 4089 case Id.Typ is 4090 when Empty => M := new Element_Model (Empty); 4091 when Any => M := new Element_Model (Anything); 4092 when Open_Paren => 4093 Parse_Element_Model 4094 (Input, Parser, M, Attlist => False, Open_Was_Read => True); 4095 when others => 4096 Fatal_Error (Parser, "Invalid content model: expecting" 4097 & " '(', 'EMPTY' or 'ANY'", Id); 4098 end case; 4099 Next_Token_Skip_Spaces (Input, Parser, Id); 4100 4101 if Id.Typ /= End_Of_Tag then 4102 Free (M); 4103 Fatal_Error (Parser, "Expecting end of ELEMENT definition"); 4104 end if; 4105 4106 M2 := Create_Model (M); 4107 Element_Decl 4108 (Parser, Parser.Buffer (Name_Id.First .. Name_Id.Last), M2); 4109 Unref (M2); 4110 4111 if NS_Id /= Null_Token then 4112 Reset_Buffer (Parser, NS_Id); 4113 else 4114 Reset_Buffer (Parser, Name_Id); 4115 end if; 4116 4117 Set_State (Parser, DTD_State); 4118 end Parse_Element_Def; 4119 4120 ------------------------ 4121 -- Parse_Notation_Def -- 4122 ------------------------ 4123 4124 procedure Parse_Notation_Def (Id : in out Token) is 4125 Public_Start, Public_End : Token := Null_Token; 4126 System_Start, System_End : Token := Null_Token; 4127 Name_Id : Token; 4128 Sym : Symbol; 4129 begin 4130 Set_State (Parser, Element_Def_State); 4131 Next_Token_Skip_Spaces (Input, Parser, Name_Id); 4132 4133 Check_Valid_Name_Or_NCname (Parser, Name_Id); 4134 4135 if Name_Id.Typ /= Name then 4136 Fatal_Error (Parser, Error_Is_Name); 4137 end if; 4138 4139 Next_Token_Skip_Spaces (Input, Parser, Id); 4140 4141 if Id.Typ = Public or else Id.Typ = System then 4142 Get_External 4143 (Id, System_Start, System_End, Public_Start, Public_End, True); 4144 if Id.Typ = Space then 4145 Next_Token (Input, Parser, Id); 4146 end if; 4147 else 4148 Fatal_Error (Parser, Error_Invalid_Notation_Decl); 4149 end if; 4150 4151 if Id.Typ /= End_Of_Tag then 4152 Fatal_Error (Parser, "Expecting end of NOTATION definition"); 4153 end if; 4154 4155 if Contains_URI_Fragment 4156 (Parser.Buffer (System_Start.First .. System_End.Last)) 4157 then 4158 Error (Parser, Error_System_URI); 4159 end if; 4160 4161 if Parser.Hooks.Notation_Decl /= null then 4162 Parser.Hooks.Notation_Decl 4163 (Parser'Access, 4164 Name => Parser.Buffer (Name_Id.First .. Name_Id.Last), 4165 Public_Id => 4166 Parser.Buffer (Public_Start.First .. Public_End.Last), 4167 System_Id => 4168 Parser.Buffer (System_Start.First .. System_End.Last)); 4169 end if; 4170 4171 Notation_Decl 4172 (Parser, 4173 Name => Parser.Buffer (Name_Id.First .. Name_Id.Last), 4174 Public_Id => Parser.Buffer (Public_Start.First .. Public_End.Last), 4175 System_Id => 4176 Parser.Buffer (System_Start.First .. System_End.Last)); 4177 4178 if Parser.Feature_Validation then 4179 Sym := Find_Symbol (Parser, Name_Id); 4180 Remove (Parser.Notations, Sym); 4181 Set (Parser.Notations, 4182 (Name => Sym, 4183 Declaration_Seen => True)); 4184 end if; 4185 4186 Set_State (Parser, DTD_State); 4187 Reset_Buffer (Parser, Name_Id); 4188 end Parse_Notation_Def; 4189 4190 ----------------------- 4191 -- Parse_Attlist_Def -- 4192 ----------------------- 4193 4194 procedure Parse_Attlist_Def (Id : in out Token) is 4195 M : Element_Model_Ptr; 4196 M2 : Content_Model; 4197 Default_Start, Default_End : Token; 4198 Ename_Id, Ename_NS_Id, Name_Id, NS_Id, Type_Id : Token; 4199 Default_Id : Token; 4200 Attr : Attributes_Table.Element_Ptr; 4201 Default_Decl : Default_Declaration; 4202 Att_Type : Attribute_Type; 4203 Ename, SName : Symbol; 4204 begin 4205 Set_State (Parser, Element_Def_State); 4206 4207 Next_NS_Token_Skip_Spaces (Input, Parser, Ename_NS_Id, Ename_Id); 4208 4209 if Ename_Id.Typ /= Name then 4210 Fatal_Error (Parser, Error_Is_Name, Ename_Id); 4211 end if; 4212 4213 Ename := Find_Symbol (Parser, Ename_Id); 4214 4215 Attr := Get_Ptr (Parser.Default_Atts, Ename); 4216 if Attr = null then 4217 declare 4218 Attr2 : constant Attributes_Entry := 4219 (Element_Name => Ename, 4220 Attributes => (0, null)); 4221 begin 4222 Set (Parser.Default_Atts, Attr2); 4223 Attr := Get_Ptr (Parser.Default_Atts, Ename); 4224 end; 4225 end if; 4226 4227 if Id.Typ = Space then 4228 Next_Token_Skip_Spaces (Input, Parser, Id); 4229 end if; 4230 4231 loop 4232 -- Temporarily disable In_Attlist, so that the names like "NAME" 4233 -- are parsed as names and not as NMTOKEN. 4234 Set_State (Parser, Attribute_Def_Name_State); 4235 4236 Next_Token_Skip_Spaces (Input, Parser, Id); 4237 exit when Id.Typ = End_Of_Tag or else Id.Typ = End_Of_Input; 4238 4239 Get_Name_NS (Id, NS_Id, Name_Id); 4240 SName := Find_Symbol (Parser, Name_Id); 4241 4242 if Id.Typ /= Space then 4243 Fatal_Error (Parser, Error_Expecting_Space, Id); -- 3.3 4244 end if; 4245 4246 Set_State (Parser, Attribute_Def_State); 4247 Next_Token_Skip_Spaces (Input, Parser, Id); 4248 4249 Type_Id := Id; 4250 Default_Start := Null_Token; 4251 Default_End := Null_Token; 4252 case Type_Id.Typ is 4253 when Id_Type => Att_Type := Sax.Attributes.Id; 4254 when Idref => Att_Type := Sax.Attributes.Idref; 4255 when Idrefs => Att_Type := Sax.Attributes.Idrefs; 4256 when Cdata => Att_Type := Sax.Attributes.Cdata; 4257 when Nmtoken => Att_Type := Sax.Attributes.Nmtoken; 4258 when Nmtokens => Att_Type := Sax.Attributes.Nmtokens; 4259 when Entity => Att_Type := Sax.Attributes.Entity; 4260 when Entities => Att_Type := Sax.Attributes.Entities; 4261 when Notation => 4262 Att_Type := Notation; 4263 Next_Token (Input, Parser, Id); 4264 if Id.Typ /= Space then 4265 Fatal_Error 4266 (Parser, -- 3.3.1 4267 "Space is required between NOTATION keyword" 4268 & " and list of enumerated", Id); 4269 end if; 4270 Parse_Element_Model (Input, Parser, M, True, False); 4271 4272 if Parser.Feature_Validation then 4273 for J in M.List'Range loop 4274 if Get (Parser.Notations, M.List (J).Name) /= 4275 Null_Notation 4276 then 4277 Error 4278 (Parser, Error_Notation_Undeclared 4279 & Get (M.List (J).Name).all, Id); 4280 end if; 4281 end loop; 4282 end if; 4283 4284 when Open_Paren => 4285 Att_Type := Enumeration; 4286 Parse_Element_Model (Input, Parser, M, True, True); 4287 4288 when others => 4289 Fatal_Error (Parser, Error_Attlist_Type); 4290 end case; 4291 4292 declare 4293 QName : constant Byte_Sequence := 4294 Qname_From_Name (Parser, NS_Id, Name_Id); 4295 Default_Val : Symbol; 4296 begin 4297 Next_Token_Skip_Spaces (Input, Parser, Default_Id, True); 4298 if Default_Id.Typ = Implied then 4299 Default_Decl := Sax.Attributes.Implied; 4300 elsif Default_Id.Typ = Required then 4301 Default_Decl := Sax.Attributes.Required; 4302 else 4303 Id := Default_Id; 4304 if Default_Id.Typ = Fixed then 4305 Next_Token_Skip_Spaces (Input, Parser, Id, True); 4306 Default_Decl := Sax.Attributes.Fixed; 4307 else 4308 Default_Decl := Sax.Attributes.Default; 4309 end if; 4310 4311 if Id.Typ = Double_String_Delimiter 4312 or else Id.Typ = Single_String_Delimiter 4313 then 4314 Get_String 4315 (Id, Attlist_Str_Def_State, Default_Start, Default_End, 4316 Normalize => True, Collapse_Spaces => True); 4317 4318 -- Errata 9 on XML 1.0 specs: the default value must be 4319 -- syntactically correct. Validity will only be checked 4320 -- if the attribute is used. 4321 4322 Default_Val := Find_Symbol 4323 (Parser, Default_Start, Default_End); 4324 4325 if Parser.Feature_Validation then 4326 Check_Attribute_Value 4327 (Parser, 4328 Local_Name => SName, 4329 Typ => Att_Type, 4330 Value => Default_Val, 4331 Error_Loc => Default_Start); 4332 end if; 4333 else 4334 Fatal_Error 4335 (Parser, "Invalid default value for attribute"); 4336 end if; 4337 end if; 4338 4339 if Parser.Feature_Validation 4340 and then Att_Type = Sax.Attributes.Id 4341 and then Default_Decl /= Sax.Attributes.Implied 4342 and then Default_Decl /= Sax.Attributes.Required 4343 then 4344 Error 4345 (Parser, 4346 "Default value for an ID attribute must be" 4347 & " IMPLIED or REQUIRED", 4348 Default_Id); 4349 end if; 4350 4351 -- Always report the attribute, even when we know the value 4352 -- won't be used. We can't do it coherently otherwise, in case 4353 -- an attribute is seen in the external subset, and then 4354 -- overriden in the internal subset. 4355 M2 := Create_Model (M); 4356 Attribute_Decl 4357 (Parser, 4358 Ename => Parser.Buffer (Ename_Id.First .. Ename_Id.Last), 4359 Aname => QName, 4360 Typ => Att_Type, 4361 Content => M2, 4362 Value_Default => Default_Decl, 4363 Value => Parser.Buffer 4364 (Default_Start.First .. Default_End.Last)); 4365 Unref (M2); 4366 4367 Append 4368 (List => Attr.Attributes, 4369 If_Unique => True, 4370 Location => Name_Id.Location, 4371 Local_Name => SName, 4372 Prefix => Find_Symbol (Parser, NS_Id), 4373 Value => Default_Val, 4374 Att_Type => Att_Type, 4375 Default_Decl => Default_Decl); 4376 end; 4377 4378 -- M will be freed automatically when the Default_Atts field is 4379 -- freed. However, we need to reset it for the next attribute 4380 -- in the list. 4381 M := null; 4382 4383 if NS_Id /= Null_Token then 4384 Reset_Buffer (Parser, NS_Id); 4385 else 4386 Reset_Buffer (Parser, Name_Id); 4387 end if; 4388 Set_State (Parser, Element_Def_State); 4389 end loop; 4390 4391 if Id.Typ /= End_Of_Tag then 4392 Fatal_Error (Parser, "Expecting end of ATTLIST definition"); 4393 end if; 4394 4395 Set_State (Parser, DTD_State); 4396 4397 if Ename_NS_Id /= Null_Token then 4398 Reset_Buffer (Parser, Ename_NS_Id); 4399 else 4400 Reset_Buffer (Parser, Ename_Id); 4401 end if; 4402 4403 exception 4404 when others => 4405 Free (M); 4406 raise; 4407 end Parse_Attlist_Def; 4408 4409 ----------------- 4410 -- Check_Model -- 4411 ----------------- 4412 4413 procedure Check_Model is 4414 begin 4415 null; 4416 end Check_Model; 4417 4418 ---------------- 4419 -- Get_String -- 4420 ---------------- 4421 4422 function Get_String (Str : Token) return String is 4423 begin 4424 return Parser.Buffer (Str.First .. Str.Last); 4425 end Get_String; 4426 4427 ---------------- 4428 -- Get_String -- 4429 ---------------- 4430 4431 function Get_String (First, Last : Token) return String is 4432 begin 4433 return Parser.Buffer (First.First .. Last.Last); 4434 end Get_String; 4435 4436 -------------------------------- 4437 -- Check_And_Define_Namespace -- 4438 -------------------------------- 4439 4440 procedure Check_And_Define_Namespace 4441 (Prefix, URI : Symbol; Location : Sax.Locators.Location) is 4442 begin 4443 if Prefix = Empty_String then 4444 -- [2] Empty value is legal for the default namespace, and 4445 -- provides unbinding 4446 null; 4447 4448 else 4449 if Prefix = Parser.Xmlns_Sequence then 4450 Fatal_Error -- NS 3 4451 (Parser, "Cannot redefine the xmlns prefix", Location); 4452 4453 elsif URI = Empty_String then 4454 Fatal_Error 4455 (Parser, -- NS 2.2 4456 "Cannot use an empty URI for namespaces", Location); 4457 4458 elsif Prefix = Parser.Xml_Sequence then 4459 if URI /= Parser.Namespaces_URI_Sequence then 4460 Fatal_Error -- NS 3 4461 (Parser, "Cannot redefine the xml prefix", Location); 4462 end if; 4463 4464 elsif URI = Parser.Namespaces_URI_Sequence then 4465 Fatal_Error 4466 (Parser, -- NS 3 4467 "Cannot bind the namespace URI to a prefix other" 4468 & " than xml", Location); 4469 end if; 4470 end if; 4471 4472 if URI /= Empty_String 4473 and then not Is_Valid_IRI 4474 (Get (URI).all, Version => Parser.XML_Version) 4475 then 4476 if Parser.Feature_Allow_Relative_IRI then 4477 Warning 4478 (Parser, 4479 "Invalid absolute IRI (Internationalized Resource" 4480 & " Identifier) for namespace: """ & Get (URI).all & """", 4481 Location); 4482 else 4483 Error 4484 (Parser, 4485 "Invalid absolute IRI (Internationalized Resource" 4486 & " Identifier) for namespace: """ & Get (URI).all & """", 4487 Location); 4488 -- NS 2 4489 end if; 4490 end if; 4491 4492 Add_Namespace (Parser, Parser.Current_Node, Prefix, URI); 4493 end Check_And_Define_Namespace; 4494 4495 ---------------------------- 4496 -- Add_Default_Attributes -- 4497 ---------------------------- 4498 4499 procedure Add_Default_Attributes 4500 (DTD_Attr : Sax_Attribute_Array_Access) 4501 is 4502 Found : Boolean; 4503 Is_Xmlns : Boolean; 4504 begin 4505 -- Add all the default attributes to the element. 4506 -- We shouldn't add an attribute if it was overriden by the user 4507 4508 if DTD_Attr /= null then 4509 for J in DTD_Attr'Range loop 4510 -- We must compare Qnames, since namespaces haven't been 4511 -- resolved in the default attributes. 4512 if DTD_Attr (J).Default_Decl = Default 4513 or else DTD_Attr (J).Default_Decl = Fixed 4514 then 4515 Found := False; 4516 4517 for A in 1 .. Parser.Attributes.Count loop 4518 if Parser.Attributes.List (A).Local_Name = 4519 DTD_Attr (J).Local_Name 4520 and then Parser.Attributes.List (A).Prefix = 4521 DTD_Attr (J).Prefix 4522 then 4523 Found := True; 4524 exit; 4525 end if; 4526 end loop; 4527 4528 if not Found then 4529 Is_Xmlns := DTD_Attr (J).Prefix = Parser.Xmlns_Sequence; 4530 4531 if Parser.Feature_Namespace_Prefixes 4532 or else not Is_Xmlns 4533 then 4534 Append 4535 (List => Parser.Attributes, 4536 If_Unique => True, 4537 Location => No_Location, 4538 Local_Name => DTD_Attr (J).Local_Name, 4539 Prefix => DTD_Attr (J).Prefix, 4540 Value => DTD_Attr (J).Value, 4541 Att_Type => DTD_Attr (J).Att_Type, 4542 Default_Decl => DTD_Attr (J).Default_Decl); 4543 end if; 4544 4545 -- Is this a namespace declaration ? 4546 if Is_Xmlns then 4547 -- Following warning is because for parser that don't 4548 -- read external DTDs, the behavior would be different 4549 -- for the same document. 4550 Warning 4551 (Parser, 4552 "namespace-declaring attribute inserted via " 4553 & "DTD defaulting mechanisms are not good style"); 4554 Add_Namespace 4555 (Parser, Parser.Current_Node, 4556 Prefix => DTD_Attr (J).Local_Name, 4557 URI => DTD_Attr (J).Value); 4558 end if; 4559 end if; 4560 end if; 4561 end loop; 4562 end if; 4563 end Add_Default_Attributes; 4564 4565 ---------------------------------- 4566 -- Resolve_Attribute_Namespaces -- 4567 ---------------------------------- 4568 4569 procedure Resolve_Attribute_Namespaces is 4570 NS : XML_NS; 4571 begin 4572 if Parser.Feature_Namespace then 4573 for J in 1 .. Parser.Attributes.Count loop 4574 Find_NS (Parser, Parser.Attributes.List (J).Prefix, NS, 4575 Include_Default_NS => False); 4576 if NS = No_XML_NS then 4577 Fatal_Error 4578 (Parser, Error_Prefix_Not_Declared 4579 & Get (Parser.Attributes.List (J).Prefix).all); 4580 end if; 4581 4582 for A in 1 .. J - 1 loop 4583 if Parser.Attributes.List (A).URI = Get_URI (NS) 4584 and then Parser.Attributes.List (A).Local_Name = 4585 Parser.Attributes.List (J).Local_Name 4586 then 4587 Fatal_Error -- 3.1 4588 (Parser, "Attributes may appear only once: " 4589 & To_QName 4590 (Get_URI (NS), 4591 Parser.Attributes.List (J).Local_Name), 4592 Parser.Attributes.List (J).Location); 4593 end if; 4594 end loop; 4595 4596 Parser.Attributes.List (J).URI := Get_URI (NS); 4597 end loop; 4598 end if; 4599 end Resolve_Attribute_Namespaces; 4600 4601 ---------------------- 4602 -- Parse_Attributes -- 4603 ---------------------- 4604 4605 procedure Parse_Attributes 4606 (Elem_NS_Id, Elem_Name_Id : Token; Id : in out Token) 4607 is 4608 Elem : constant Symbol := Find_Symbol 4609 (Parser, Qname_From_Name (Parser, Elem_NS_Id, Elem_Name_Id)); 4610 Attr : constant Sax_Attribute_List := Get 4611 (Parser.Default_Atts, Elem).Attributes; 4612 -- The attributes as defined in the DTD 4613 4614 Attr_NS_Id : Token; 4615 Attr_Name_Id : Token; 4616 Value_Start : Token; 4617 Value_End : Token; 4618 Add_Attr : Boolean; 4619 A : Integer; 4620 Attr_Name, Attr_Prefix, Attr_Value : Symbol; 4621 Attr_Type : Attribute_Type; 4622 4623 function Find_Declaration return Integer; 4624 -- Return the position of the declaration for Attr_Prefix:Attr_Name 4625 -- in Attr, or -1 if no declaration exists 4626 4627 procedure Check_Required_Attributes; 4628 -- Check whether all required attributes have been defined 4629 4630 ---------------------- 4631 -- Find_Declaration -- 4632 ---------------------- 4633 4634 function Find_Declaration return Integer is 4635 begin 4636 if Attr.List /= null then 4637 -- First test: same prefix and local name. We will test later 4638 -- for a same URI 4639 4640 for A in Attr.List'First .. Attr.Count loop 4641 if Attr.List (A).Local_Name = Attr_Name 4642 and then Attr.List (A).Prefix = Attr_Prefix 4643 then 4644 return A; 4645 end if; 4646 end loop; 4647 end if; 4648 return -1; 4649 end Find_Declaration; 4650 4651 ------------------------------- 4652 -- Check_Required_Attributes -- 4653 ------------------------------- 4654 4655 procedure Check_Required_Attributes is 4656 Found : Boolean; 4657 begin 4658 if Parser.Feature_Validation and then Attr.List /= null then 4659 for A in Attr.List'First .. Attr.Count loop 4660 if Attr.List (A).Default_Decl = Required then 4661 Found := False; 4662 4663 for T in 1 .. Parser.Attributes.Count loop 4664 if Parser.Attributes.List (T).Local_Name = 4665 Attr.List (A).Local_Name 4666 and then Parser.Attributes.List (T).Prefix = 4667 Attr.List (A).Prefix 4668 then 4669 Found := True; 4670 exit; 4671 end if; 4672 end loop; 4673 4674 if not Found then 4675 Error 4676 (Parser, "[VC 3.3.2] Required attribute '" 4677 & To_QName (Attr.List (A).Prefix, 4678 Attr.List (A).Local_Name) 4679 & "' must be defined"); 4680 end if; 4681 end if; 4682 end loop; 4683 end if; 4684 end Check_Required_Attributes; 4685 4686 begin 4687 Parser.Attributes.Count := 0; 4688 4689 while Id.Typ /= End_Of_Tag 4690 and then Id.Typ /= End_Of_Input 4691 and then Id.Typ /= End_Of_Start_Tag 4692 loop 4693 Get_Name_NS (Id, Attr_NS_Id, Attr_Name_Id); 4694 if Id.Typ = Space then 4695 Next_Token (Input, Parser, Id); 4696 end if; 4697 4698 if Id.Typ /= Equal then 4699 Fatal_Error -- 3.1 4700 (Parser, "Attributes must have an explicit value", Id); 4701 end if; 4702 4703 Attr_Name := Find_Symbol (Parser, Attr_Name_Id); 4704 Attr_Prefix := Find_Symbol (Parser, Attr_NS_Id); 4705 4706 A := Find_Declaration; 4707 4708 Next_Token_Skip_Spaces (Input, Parser, Id); 4709 if Id.Typ /= Double_String_Delimiter 4710 and then Id.Typ /= Single_String_Delimiter 4711 then 4712 Fatal_Error -- 3.1 4713 (Parser, "Attribute values must be quoted", Id); 4714 end if; 4715 4716 -- 3.3.3: If the attribute's type is not CDATA, we must 4717 -- normalize it, ie collapse sequence of spaces. 4718 -- ??? What if the information comes from an XML Schema instead 4719 -- of a DTD 4720 -- ??? That should be done only after we have processed the 4721 -- namespaces, otherwise we do not know what attribute we are 4722 -- dealing with 4723 -- In XML Schema 1.1 Part 1, Section 3.1.4, it is indicated that 4724 -- we should always normalize attribute values according to the 4725 -- whitespace property of their type. As a result, we do not 4726 -- normalize here by default if the attribute was registered, and 4727 -- it will be done by the schema parser if we are using one 4728 -- (see Hook_Start_Element). 4729 4730 Get_String 4731 (Id, Attr_Value_State, Value_Start, Value_End, 4732 Normalize => True, 4733 Collapse_Spaces => A /= -1 4734 and then Attr.List (A).Att_Type /= Cdata); 4735 4736 Attr_Value := Find_Symbol (Parser, Value_Start, Value_End); 4737 Add_Attr := True; 4738 4739 -- Is this a namespace declaration ? 4740 4741 if Parser.Feature_Namespace 4742 and then Attr_Prefix = Parser.Xmlns_Sequence 4743 then 4744 Check_And_Define_Namespace 4745 (Prefix => Attr_Name, 4746 URI => Attr_Value, 4747 Location => Attr_Name_Id.Location); 4748 Add_Attr := Parser.Feature_Namespace_Prefixes; 4749 4750 -- Is this the declaration of the default namespace (xmlns="uri") 4751 4752 elsif Parser.Feature_Namespace 4753 and then Attr_NS_Id = Null_Token 4754 and then Attr_Name = Parser.Xmlns_Sequence 4755 then 4756 if Get (Attr_Value).all = Xmlns_URI_Sequence 4757 or else Get (Attr_Value).all = Namespaces_URI_Sequence 4758 then 4759 Fatal_Error 4760 (Parser, 4761 "The xml namespace cannot be declared as the default" 4762 & " namespace"); 4763 end if; 4764 4765 -- We might have a FIXED declaration for this attribute in the 4766 -- DTD, as per the XML Conformance testsuite 4767 if Parser.Feature_Validation 4768 and then A /= -1 4769 then 4770 if Attr.List (A).Default_Decl = Fixed 4771 and then Attr.List (A).Value /= Attr_Value 4772 then 4773 Error 4774 (Parser, 4775 "[VC 3.3.2] xmlns attribute doesn't match FIXED value", 4776 Value_Start); 4777 end if; 4778 end if; 4779 4780 Check_And_Define_Namespace 4781 (Prefix => Empty_String, 4782 URI => Attr_Value, 4783 Location => Attr_Name_Id.Location); 4784 Add_Attr := Parser.Feature_Namespace_Prefixes; 4785 4786 else 4787 -- All attributes must be defined (including xml:lang, that 4788 -- requires additional testing afterwards) 4789 if Parser.Feature_Validation then 4790 if Attr.List = null then 4791 Error 4792 (Parser, "[VC] No attribute allowed for element " 4793 & Get (Parser.Current_Node.Name).all, 4794 Attr_Name_Id); 4795 elsif A = -1 then 4796 Error 4797 (Parser, "[VC] Attribute not declared in DTD: " 4798 & To_QName (Attr_Prefix, Attr_Name), 4799 Attr_Name_Id); 4800 end if; 4801 end if; 4802 4803 if Get_String (Attr_NS_Id) = Xml_Sequence then 4804 if Get_String (Attr_Name_Id) = Lang_Sequence then 4805 Test_Valid_Lang 4806 (Parser, Get_String (Value_Start, Value_End)); 4807 4808 elsif Get_String (Attr_Name_Id) = Space_Word_Sequence then 4809 Test_Valid_Space 4810 (Parser, Get_String (Value_Start, Value_End)); 4811 end if; 4812 end if; 4813 end if; 4814 4815 -- Register the attribute in the temporary list, until we can 4816 -- properly resolve namespaces 4817 4818 if Add_Attr then 4819 if Debug_Internal then 4820 Put_Line 4821 ("Register attribute: " 4822 & Qname_From_Name (Parser, Attr_NS_Id, Attr_Name_Id) 4823 & " value=" & Get_String (Value_Start, Value_End)); 4824 end if; 4825 4826 if A /= -1 then 4827 if Attr.List (A).Default_Decl = Fixed 4828 and then Attr.List (A).Value /= Attr_Value 4829 then 4830 Error 4831 (Parser, "[VC 3.3.2] Fixed attribute '" 4832 & To_QName (Attr_Prefix, Attr_Name) 4833 & "' must have the defined value", 4834 Attr_Name_Id.Location); 4835 end if; 4836 4837 Attr_Type := Attr.List (A).Att_Type; 4838 else 4839 Attr_Type := Cdata; 4840 end if; 4841 4842 Append 4843 (List => Parser.Attributes, 4844 If_Unique => False, 4845 Location => Attr_Name_Id.Location, 4846 Local_Name => Attr_Name, 4847 Prefix => Attr_Prefix, 4848 Att_Type => Attr_Type, 4849 Value => Attr_Value); 4850 end if; 4851 4852 if Attr_NS_Id /= Null_Token then 4853 Reset_Buffer (Parser, Attr_NS_Id); 4854 else 4855 Reset_Buffer (Parser, Attr_Name_Id); 4856 end if; 4857 4858 Next_Token (Input, Parser, Id); 4859 if Id.Typ = Space then 4860 Next_Token (Input, Parser, Id); 4861 elsif Id.Typ /= End_Of_Tag and then Id.Typ /= End_Of_Start_Tag then 4862 Fatal_Error (Parser, Error_Expecting_Space, Id); 4863 end if; 4864 end loop; 4865 4866 Check_Required_Attributes; 4867 4868 Add_Default_Attributes (Attr.List); 4869 4870 -- Check attribute values. We must do that after adding the default 4871 -- attributes, so that they are properly checked as well. It would be 4872 -- nice to be able to check them only once, but that can't be done 4873 -- when they are declared (since they might be referencing entities 4874 -- declared after them in the DTD) 4875 4876 if Parser.Feature_Validation then 4877 for Att in 1 .. Parser.Attributes.Count loop 4878 Check_Attribute_Value 4879 (Parser, 4880 Local_Name => Parser.Attributes.List (Att).Local_Name, 4881 Typ => Parser.Attributes.List (Att).Att_Type, 4882 Value => Parser.Attributes.List (Att).Value, 4883 Error_Loc => Elem_Name_Id); 4884 end loop; 4885 end if; 4886 end Parse_Attributes; 4887 4888 --------------------- 4889 -- Parse_Start_Tag -- 4890 --------------------- 4891 4892 procedure Parse_Start_Tag is 4893 Open_Id : constant Token := Id; 4894 Elem_Name_Id, Elem_NS_Id : Token; 4895 NS : XML_NS; 4896 4897 begin 4898 Set_State (Parser, Tag_State); 4899 4900 Parser.Current_Node := new Element' 4901 (NS => No_XML_NS, 4902 Name => No_Symbol, 4903 Namespaces => No_XML_NS, 4904 Start => Id.Location, 4905 Start_Tag_End => Id.Location, 4906 Parent => Parser.Current_Node); 4907 4908 Next_Token (Input, Parser, Id); 4909 Get_Name_NS (Id, Elem_NS_Id, Elem_Name_Id); 4910 4911 Parser.Current_Node.Name := Find_Symbol (Parser, Elem_Name_Id); 4912 4913 if Parser.Current_Node.Parent = null then 4914 Parser.Num_Toplevel_Elements := Parser.Num_Toplevel_Elements + 1; 4915 if Parser.Num_Toplevel_Elements > 1 then 4916 Fatal_Error -- 2.1 4917 (Parser, "Too many children for top-level node," 4918 & " when adding <" 4919 & Qname_From_Name (Parser, Elem_NS_Id, Elem_Name_Id) 4920 & ">", Open_Id); 4921 end if; 4922 4923 if Parser.Feature_Validation then 4924 if Parser.DTD_Name = No_Symbol then 4925 Error -- VC 2.8 4926 (Parser, "No DTD defined for this document", Id); 4927 4928 elsif Parser.DTD_Name /= Parser.Current_Node.Name then 4929 Error 4930 (Parser, "[VC 2.8] Name of root element doesn't match name" 4931 & " of DTD ('" 4932 & Get (Parser.DTD_Name).all & "')", Id); 4933 end if; 4934 end if; 4935 4936 elsif Parser.Feature_Validation then 4937 Check_Model; 4938 end if; 4939 4940 if Elem_NS_Id /= Null_Token 4941 and then Get_String (Elem_NS_Id) = Xmlns_Sequence 4942 then 4943 Fatal_Error (Parser, "Elements must not have the prefix xmlns"); 4944 end if; 4945 4946 -- Call the hook before checking the attributes. This might mean we 4947 -- are passing incorrect attributes (or missing ones), but the hook 4948 -- is used for validation (otherwise standard users should use 4949 -- Start_Element itself). 4950 -- We want the count of elements in the NS to not include the current 4951 -- context. 4952 4953 if Debug_Internal then 4954 Put_Line 4955 ("Start_Element " 4956 & Qname_From_Name (Parser, Elem_NS_Id, Elem_Name_Id)); 4957 end if; 4958 4959 -- We need to process the attributes first, because they might define 4960 -- the namespace for the element 4961 4962 if Id.Typ = Space then 4963 Next_Token (Input, Parser, Id); 4964 Parse_Attributes (Elem_NS_Id, Elem_Name_Id, Id); 4965 4966 elsif Id.Typ /= End_Of_Tag 4967 and then Id.Typ /= End_Of_Start_Tag 4968 then 4969 Fatal_Error (Parser, Error_Expecting_Space, Id); 4970 4971 else 4972 -- We still need to check the attributes, in case we have none but 4973 -- some where required 4974 Parse_Attributes (Elem_NS_Id, Elem_Name_Id, Id); 4975 end if; 4976 4977 Resolve_Attribute_Namespaces; 4978 4979 -- And report the elements to the callbacks 4980 4981 Set_State (Parser, Default_State); 4982 Find_NS (Parser, Elem_NS_Id, NS); 4983 4984 Parser.Current_Node.NS := NS; 4985 4986 if Parser.Hooks.Start_Element /= null then 4987 Parser.Hooks.Start_Element 4988 (Parser'Unchecked_Access, Parser.Current_Node, 4989 Parser.Attributes'Access); 4990 end if; 4991 4992 -- This does not take into account the use of the namespace by the 4993 -- attributes. 4994 -- ??? That would be costly to again do a Find_NS for each of the 4995 -- attributes. ??? We don't do a Find_NS anymore, so that would be 4996 -- doable in fact. 4997 Increment_Count (NS); 4998 4999 Parser.Current_Node.Start_Tag_End := Get_Location (Parser.Locator); 5000 5001 pragma Warnings (Off, "overlaps with actual"); 5002 Start_Element 5003 (Parser, 5004 NS => NS, 5005 Local_Name => Parser.Current_Node.Name, 5006 Atts => Parser.Attributes); 5007 pragma Warnings (On, "overlaps with actual"); 5008 5009 if Id.Typ = End_Of_Start_Tag then 5010 End_Element; 5011 end if; 5012 5013 if Elem_NS_Id /= Null_Token then 5014 Reset_Buffer (Parser, Elem_NS_Id); 5015 else 5016 Reset_Buffer (Parser, Elem_Name_Id); 5017 end if; 5018 5019 if Id.Typ = End_Of_Input then 5020 Fatal_Error (Parser, "Unexpected end of stream"); 5021 end if; 5022 end Parse_Start_Tag; 5023 5024 ---------------------------- 5025 -- Parse_Doctype_Contents -- 5026 ---------------------------- 5027 5028 procedure Parse_Doctype_Contents is 5029 Start_Id : Symbol; 5030 5031 Num_Include : Natural := 0; 5032 -- Number of <![INCLUDE[ sections at the top of the external 5033 -- subset. 5034 5035 Num_Ignore : Natural := 0; 5036 -- Number of <![IGNORE[ and <![INCLUDE[ sections, starting at the 5037 -- first ignore section. 5038 begin 5039 loop 5040 Next_Token_Skip_Spaces (Input, Parser, Id); 5041 Start_Id := Id.Location.System_Id; 5042 5043 if Id.Typ = Ignore then 5044 Num_Ignore := Num_Ignore + 1; 5045 5046 elsif Id.Typ = Include or else Id.Typ = Start_Conditional then 5047 if Num_Ignore > 0 then 5048 Num_Ignore := Num_Ignore + 1; 5049 else 5050 Num_Include := Num_Include + 1; 5051 end if; 5052 5053 elsif Id.Typ = End_Conditional then 5054 if Num_Include + Num_Ignore = 0 then 5055 Fatal_Error (Parser, Error_Unexpected_Chars3, Id); 5056 elsif Num_Ignore > 0 then 5057 Num_Ignore := Num_Ignore - 1; 5058 else 5059 Num_Include := Num_Include - 1; 5060 end if; 5061 5062 elsif Id.Typ = End_Of_Input then 5063 exit; 5064 5065 elsif Num_Ignore = 0 then 5066 case Id.Typ is 5067 when End_Of_Tag | Internal_DTD_End => 5068 exit; 5069 when Entity_Def => Parse_Entity_Def (Id); 5070 when Element_Def => Parse_Element_Def (Id); 5071 when Notation => Parse_Notation_Def (Id); 5072 when Attlist_Def => Parse_Attlist_Def (Id); 5073 when Text | Name => 5074 if Id.First < Id.Last then 5075 Fatal_Error 5076 (Parser, "Unexpected character in the DTD"); 5077 else 5078 Reset_Buffer (Parser, Id); 5079 end if; 5080 when Comment => 5081 Comment (Parser, Parser.Buffer (Id.First .. Id.Last)); 5082 Reset_Buffer (Parser, Id); 5083 when Start_Of_PI => 5084 Parse_PI (Id); 5085 when others => 5086 Fatal_Error -- 2.8 5087 (Parser, "Element not allowed in the DTD", Id); 5088 end case; 5089 5090 else 5091 Reset_Buffer (Parser, Id); 5092 end if; 5093 5094 -- XML 1.0 Errata 14 or XML 1.1 section 4.3.2: nesting of entities 5095 -- doesn't apply for well-formedness in the DTD 5096 if Parser.Feature_Validation then 5097 if Start_Id /= Id.Location.System_Id then 5098 Error (Parser, Error_Entity_Self_Contained, Id); 5099 end if; 5100 end if; 5101 end loop; 5102 5103 if Num_Ignore + Num_Include /= 0 then 5104 Fatal_Error -- 3.4 5105 (Parser, "Conditional section must be properly terminated", 5106 Id); 5107 end if; 5108 end Parse_Doctype_Contents; 5109 5110 ------------------- 5111 -- Parse_Doctype -- 5112 ------------------- 5113 5114 procedure Parse_Doctype is 5115 Public_Start, Public_End : Token := Null_Token; 5116 System_Start, System_End : Token := Null_Token; 5117 Name_Id : Token; 5118 NS_Id : Token; 5119 begin 5120 Set_State (Parser, DTD_State); 5121 5122 Next_NS_Token_Skip_Spaces (Input, Parser, NS_Id, Name_Id); 5123 5124 if Name_Id.Typ /= Name then 5125 Fatal_Error (Parser, "Expecting name after <!DOCTYPE"); 5126 end if; 5127 5128 Next_Token_Skip_Spaces (Input, Parser, Id); 5129 5130 Get_External (Id, System_Start, System_End, Public_Start, Public_End); 5131 if Id.Typ = Space then 5132 Next_Token (Input, Parser, Id); 5133 end if; 5134 Start_DTD 5135 (Parser, 5136 Name => Parser.Buffer (Name_Id.First .. Name_Id.Last), 5137 Public_Id => Parser.Buffer (Public_Start.First .. Public_End.Last), 5138 System_Id => 5139 Parser.Buffer (System_Start.First .. System_End.Last)); 5140 5141 if Parser.Feature_Validation then 5142 Parser.DTD_Name := Find_Symbol (Parser, Name_Id); 5143 end if; 5144 5145 if Id.Typ = Internal_DTD_Start then 5146 Parse_Doctype_Contents; 5147 if Id.Typ /= Internal_DTD_End then 5148 Fatal_Error -- 2.8 5149 (Parser, "Expecting end of internal subset ']>'", Id); 5150 end if; 5151 elsif Id.Typ /= End_Of_Tag then 5152 Fatal_Error (Parser, "Expecting end of DTD"); 5153 end if; 5154 5155 -- Read the external subset if required. This needs to be read 5156 -- after the internal subset only, so that the latter gets 5157 -- priority (XML specifications 2.8) 5158 if System_End.Last >= System_Start.First then 5159 declare 5160 Loc : constant Sax.Locators.Location := 5161 Get_Location (Parser.Locator); 5162 System : constant Symbol := 5163 Find_Symbol 5164 (Parser, 5165 Parser.Buffer (System_Start.First .. System_End.Last)); 5166 URI : constant Symbol := 5167 Resolve_URI (Parser, System_Id (Parser), System); 5168 In_External : constant Boolean := Parser.In_External_Entity; 5169 Input_F : File_Input; 5170 Saved_Last_Read : constant Unicode_Char := Parser.Last_Read; 5171 begin 5172 Open (Get (URI).all, Input_F); 5173 5174 -- Protect against the case where the last character read was 5175 -- a LineFeed. 5176 Parser.Last_Read := Unicode_Char'Val (16#00#); 5177 Parser.Last_Read_Is_Valid := False; 5178 5179 Set_Line_Number (Parser.Locator, 1); 5180 Set_Column_Number (Parser.Locator, Prolog_Size (Input_F)); 5181 Set_System_Id (Parser.Locator, URI); 5182 Set_Public_Id (Parser.Locator, System); 5183 5184 if NS_Id /= Null_Token then 5185 Reset_Buffer (Parser, NS_Id); 5186 else 5187 Reset_Buffer (Parser, Name_Id); 5188 end if; 5189 5190 Parser.In_External_Entity := True; 5191 5192 Syntactic_Parse (Parser, Input_F); 5193 Close (Input_F); 5194 Parser.In_External_Entity := In_External; 5195 5196 Set_Location (Parser.Locator, Loc); 5197 Parser.Last_Read := Saved_Last_Read; 5198 Parser.Last_Read_Is_Valid := True; 5199 exception 5200 when Name_Error => 5201 Close (Input_F); 5202 Error 5203 (Parser, 5204 "External subset not found: " 5205 & Parser.Buffer (System_Start.First .. System_End.Last), 5206 Id); 5207 5208 if NS_Id /= Null_Token then 5209 Reset_Buffer (Parser, NS_Id); 5210 else 5211 Reset_Buffer (Parser, Name_Id); 5212 end if; 5213 5214 when others => 5215 Close (Input_F); 5216 raise; 5217 end; 5218 5219 else 5220 if NS_Id /= Null_Token then 5221 Reset_Buffer (Parser, NS_Id); 5222 else 5223 Reset_Buffer (Parser, Name_Id); 5224 end if; 5225 end if; 5226 5227 -- Check that all declarations are fully declared 5228 if Parser.Feature_Validation then 5229 declare 5230 Iter : Notations_Table.Iterator := First (Parser.Notations); 5231 begin 5232 while Iter /= Notations_Table.No_Iterator loop 5233 if not Current (Iter).Declaration_Seen then 5234 Error (Parser, Error_Notation_Undeclared 5235 & Get (Current (Iter).Name).all); 5236 end if; 5237 Next (Parser.Notations, Iter); 5238 end loop; 5239 end; 5240 end if; 5241 5242 Parser.In_External_Entity := False; 5243 End_DTD (Parser); 5244 Set_State (Parser, Default_State); 5245 end Parse_Doctype; 5246 5247 ----------------- 5248 -- End_Element -- 5249 ----------------- 5250 5251 procedure End_Element is 5252 begin 5253 if Parser.Hooks.End_Element /= null then 5254 Parser.Hooks.End_Element 5255 (Parser'Unchecked_Access, Parser.Current_Node); 5256 end if; 5257 5258 End_Element 5259 (Parser, NS => Parser.Current_Node.NS, 5260 Local_Name => Parser.Current_Node.Name); 5261 5262 -- Tag must end in the same entity 5263 if Parser.Feature_Validation 5264 and then 5265 Id.Location.System_Id /= Parser.Current_Node.Start.System_Id 5266 then 5267 Error (Parser, Error_Entity_Self_Contained, Id); 5268 end if; 5269 5270 Close_Namespaces (Parser, Parser.Current_Node.Namespaces); 5271 5272 -- Move back to the parent node (after freeing the current node) 5273 Free (Parser.Current_Node); 5274 end End_Element; 5275 5276 ------------------- 5277 -- Parse_End_Tag -- 5278 ------------------- 5279 5280 procedure Parse_End_Tag is 5281 Open_Id : constant Token := Id; 5282 NS_Id, Name_Id : Token := Null_Token; 5283 begin 5284 Set_State (Parser, Tag_State); 5285 5286 Next_Token (Input, Parser, Id); 5287 Get_Name_NS (Id, NS_Id, Name_Id); 5288 if Id.Typ = Space then 5289 Next_Token (Input, Parser, Id); 5290 end if; 5291 5292 if Id.Typ /= End_Of_Tag then 5293 Fatal_Error (Parser, "Tags must end with a '>' symbol", Id); 5294 -- 3.1 5295 end if; 5296 5297 if Parser.Current_Node = null then 5298 Fatal_Error -- 3 5299 (Parser, "No start tag found for this end tag", Id); 5300 end if; 5301 5302 -- Tag must end in the same entity 5303 if Parser.Feature_Validation 5304 and then Id.Location.System_Id /= 5305 Parser.Current_Node.Start.System_Id 5306 then 5307 Error (Parser, Error_Entity_Self_Contained, Id); 5308 end if; 5309 5310 if Parser.Current_Node = null then 5311 Fatal_Error 5312 (Parser, -- WF element type match 5313 "Unexpected closing tag", Open_Id); 5314 5315 elsif Parser.Buffer (NS_Id.First .. NS_Id.Last) /= 5316 Get (Get_Prefix (Parser.Current_Node.NS)).all 5317 or else Parser.Buffer (Name_Id.First .. Name_Id.Last) /= 5318 Get (Parser.Current_Node.Name).all 5319 then 5320 -- Well-Formedness Constraint: Element Type Match 5321 if Get_Prefix (Parser.Current_Node.NS) /= Empty_String then 5322 Fatal_Error 5323 (Parser, -- WF element type match 5324 "Name differ for closing tag (expecting " 5325 & Get (Get_Prefix (Parser.Current_Node.NS)).all 5326 & ':' & Get (Parser.Current_Node.Name).all 5327 & ", opened line" 5328 & Integer'Image (Parser.Current_Node.Start.Line) 5329 & ')', 5330 Open_Id); 5331 else 5332 Fatal_Error 5333 (Parser, -- WF element type match 5334 "Name differ for closing tag (" 5335 & "expecting " & Get (Parser.Current_Node.Name).all 5336 & ", opened line" 5337 & Integer'Image (Parser.Current_Node.Start.Line) 5338 & ')', 5339 Open_Id); 5340 end if; 5341 end if; 5342 5343 End_Element; 5344 5345 Set_State (Parser, Default_State); 5346 if NS_Id /= Null_Token then 5347 Reset_Buffer (Parser, NS_Id); 5348 else 5349 Reset_Buffer (Parser, Name_Id); 5350 end if; 5351 end Parse_End_Tag; 5352 5353 ------------------------- 5354 -- Check_Version_Value -- 5355 ------------------------- 5356 5357 procedure Check_Version_Value (Id : in out Token) is 5358 C : Unicode_Char; 5359 J : Natural; 5360 Value_Start, Value_End : Token; 5361 Tmp_Version : XML_Versions; 5362 begin 5363 Next_Token_Skip_Spaces (Input, Parser, Id); 5364 if Id.Typ /= Equal then 5365 Fatal_Error (Parser, "Expecting '=' sign", Id); 5366 end if; 5367 5368 Next_Token_Skip_Spaces (Input, Parser, Id); 5369 if Id.Typ /= Double_String_Delimiter 5370 and then Id.Typ /= Single_String_Delimiter 5371 then 5372 Fatal_Error (Parser, "Expecting version value", Id); 5373 end if; 5374 Get_String (Id, Attr_Value_State, Value_Start, Value_End); 5375 5376 J := Value_Start.First; 5377 while J <= Value_End.Last loop 5378 Encoding.Read (Parser.Buffer.all, J, C); 5379 if not (C in Latin_Small_Letter_A .. Latin_Small_Letter_Z) 5380 and then 5381 not (C in Latin_Capital_Letter_A .. Latin_Capital_Letter_Z) 5382 and then not (C in Digit_Zero .. Digit_Nine) 5383 and then C /= Low_Line 5384 and then C /= Full_Stop 5385 and then C /= Unicode.Names.Basic_Latin.Colon 5386 and then C /= Hyphen_Minus 5387 then 5388 Fatal_Error -- 2.8 5389 (Parser, "Illegal version number in <?xml?> processing" 5390 & " instruction", Value_Start); 5391 end if; 5392 end loop; 5393 5394 if Parser.Buffer (Value_Start.First .. Value_End.Last) = "1.1" then 5395 Tmp_Version := XML_1_1; 5396 5397 elsif Parser.Buffer (Value_Start.First .. Value_End.Last) = "1.0" then 5398 Tmp_Version := XML_1_0; 5399 5400 else 5401 case Parser.XML_Version is 5402 when XML_1_0_Third_Edition 5403 | XML_1_0_Fourth_Edition => 5404 Error 5405 (Parser, "Unsupported version of XML: " 5406 & Parser.Buffer (Value_Start.First .. Value_End.Last)); 5407 5408 when XML_1_0_Fifth_Edition 5409 | XML_1_0 5410 | XML_1_1 => 5411 null; 5412 end case; 5413 end if; 5414 5415 if Parser.In_External_Entity 5416 and then 5417 ((Tmp_Version = XML_1_1 5418 and then Parser.XML_Version /= XML_1_1) 5419 or else 5420 (Tmp_Version /= XML_1_1 5421 and then Parser.XML_Version = XML_1_1)) 5422 then 5423 Fatal_Error 5424 (Parser, 5425 "External entity doesn't have the same" 5426 & " XML version as document"); 5427 end if; 5428 5429 -- Override the version in the parser, but only if the one set 5430 -- doesn't match yet. In particular, this allows users to set their 5431 -- preferred edition of XML 1.0 5432 5433 if Tmp_Version = XML_1_1 5434 and then Parser.XML_Version /= XML_1_1 5435 then 5436 Parser.XML_Version := XML_1_1; 5437 elsif Tmp_Version = XML_1_0 5438 and then Parser.XML_Version = XML_1_1 5439 then 5440 Parser.XML_Version := XML_1_0; 5441 end if; 5442 5443 Next_Token (Input, Parser, Id); 5444 if Id.Typ = Space then 5445 Next_Token (Input, Parser, Id); 5446 elsif Id.Typ /= End_Of_PI then 5447 Fatal_Error (Parser, "values must be separated by spaces", Id); 5448 end if; 5449 end Check_Version_Value; 5450 5451 -------------------------- 5452 -- Check_Encoding_Value -- 5453 -------------------------- 5454 5455 procedure Check_Encoding_Value (Id : in out Token) is 5456 Inp : Input_Source_Access := Input'Unchecked_Access; 5457 C : Unicode_Char; 5458 J : Natural; 5459 Value_Start, Value_End : Token; 5460 Tmp : Positive; 5461 begin 5462 -- If we are parsing an external entity, everything applies to it. 5463 -- See test xmltest/valid/ext-sa/008.xml 5464 if Parser.Inputs /= null then 5465 Inp := Parser.Inputs.Input; 5466 end if; 5467 5468 Next_Token_Skip_Spaces (Inp.all, Parser, Id); 5469 if Id.Typ /= Equal then 5470 Fatal_Error (Parser, "Expecting '=' sign"); 5471 end if; 5472 5473 Next_Token_Skip_Spaces (Inp.all, Parser, Id); 5474 if Id.Typ /= Double_String_Delimiter 5475 and then Id.Typ /= Single_String_Delimiter 5476 then 5477 Fatal_Error (Parser, "Expecting encoding value"); 5478 end if; 5479 Get_String (Id, Attr_Value_State, Value_Start, Value_End); 5480 5481 if Value_End.Last < Value_Start.First then 5482 Fatal_Error -- 4.3.3 5483 (Parser, "Empty value for encoding not allowed"); 5484 else 5485 Tmp := Value_Start.First; 5486 Encoding.Read (Parser.Buffer.all, Tmp, C); 5487 if not (C in Latin_Small_Letter_A .. Latin_Small_Letter_Z) 5488 and then not 5489 (C in Latin_Capital_Letter_A .. Latin_Capital_Letter_Z) 5490 then 5491 Fatal_Error -- 4.3.3 5492 (Parser, "Illegal character '" 5493 & Debug_Encode (C) & "' in encoding value", Value_Start); 5494 end if; 5495 5496 J := Value_Start.First + Encoding.Width (C); 5497 while J <= Value_End.Last loop 5498 Encoding.Read (Parser.Buffer.all, J, C); 5499 if not (C in Latin_Small_Letter_A .. Latin_Small_Letter_Z) 5500 and then not 5501 (C in Latin_Capital_Letter_A .. Latin_Capital_Letter_Z) 5502 and then not (C in Digit_Zero .. Digit_Nine) 5503 and then C /= Full_Stop 5504 and then C /= Low_Line 5505 and then C /= Hyphen_Minus 5506 then 5507 Fatal_Error -- 4.3.3 5508 (Parser, "Illegal character '" 5509 & Debug_Encode (C) & "' in encoding value", 5510 Value_Start); 5511 end if; 5512 end loop; 5513 end if; 5514 5515 -- Check we indeed have a following space 5516 5517 Next_Token (Inp.all, Parser, Id); 5518 if Id.Typ = Space then 5519 Next_Token (Inp.all, Parser, Id); 5520 elsif Id.Typ /= End_Of_PI then 5521 Fatal_Error (Parser, "values must be separated by spaces", Id); 5522 end if; 5523 5524 -- Change the encoding for the streams, if needed 5525 Set_Stream_Encoding 5526 (Inp.all, Parser.Buffer (Value_Start.First .. Value_End.Last)); 5527 end Check_Encoding_Value; 5528 5529 ---------------------------- 5530 -- Check_Standalone_Value -- 5531 ---------------------------- 5532 5533 procedure Check_Standalone_Value (Id : in out Token) is 5534 Value_Start, Value_End : Token; 5535 begin 5536 Next_Token_Skip_Spaces (Input, Parser, Id); 5537 if Id.Typ /= Equal then 5538 Fatal_Error (Parser, "Expecting '=' sign"); 5539 end if; 5540 5541 Next_Token_Skip_Spaces (Input, Parser, Id); 5542 if Id.Typ /= Double_String_Delimiter 5543 and then Id.Typ /= Single_String_Delimiter 5544 then 5545 Fatal_Error 5546 (Parser, "Parameter to 'standalone' must be quoted", Id); 5547 end if; 5548 Get_String (Id, Attr_Value_State, Value_Start, Value_End); 5549 5550 if Parser.Buffer (Value_Start.First .. Value_End.Last) /= Yes_Sequence 5551 and then Parser.Buffer (Value_Start.First .. Value_End.Last) /= 5552 No_Sequence 5553 then 5554 Fatal_Error 5555 (Parser, -- 2.9 [32] 5556 "Invalid value for standalone parameter in <?xml?>", 5557 Value_Start); 5558 end if; 5559 5560 Parser.Standalone_Document := 5561 Parser.Buffer (Value_Start.First .. Value_End.Last) = 5562 Yes_Sequence; 5563 5564 Next_Token (Input, Parser, Id); 5565 if Id.Typ = Space then 5566 Next_Token (Input, Parser, Id); 5567 elsif Id.Typ /= End_Of_PI then 5568 Fatal_Error (Parser, "values must be separated by spaces", Id); 5569 end if; 5570 end Check_Standalone_Value; 5571 5572 -------------- 5573 -- Parse_PI -- 5574 -------------- 5575 5576 procedure Parse_PI (Id : in out Token) is 5577 State : constant Parser_State := Get_State (Parser); 5578 Open_Id : constant Token := Id; 5579 Name_Id, Data_Start : Token; 5580 Data_End : Token := Null_Token; 5581 begin 5582 Set_State (Parser, PI_State); 5583 5584 Next_Token (Input, Parser, Name_Id); 5585 if Name_Id.Typ /= Name then 5586 Fatal_Error 5587 (Parser, -- 2.6 5588 "Processing Instruction must specify a target name", 5589 Name_Id); 5590 end if; 5591 5592 Check_Valid_Name_Or_NCname (Parser, Name_Id); 5593 5594 Next_Token (Input, Parser, Id); 5595 if Id.Typ /= Space and then Id.Typ /= End_Of_PI then 5596 Fatal_Error (Parser, "Must have space between target and data"); 5597 elsif Id.Typ = Space then 5598 Next_Token (Input, Parser, Id); 5599 end if; 5600 5601 -- Special handling for <?xml?> 5602 if Parser.Buffer (Name_Id.First .. Name_Id.Last) = Xml_Sequence then 5603 5604 if Open_Id.Location.Line /= 1 5605 or else 5606 (Parser.Inputs = null 5607 and then Open_Id.Location.Column /= 1 + Prolog_Size (Input)) 5608 or else 5609 (Parser.Inputs /= null 5610 and then Open_Id.Location.Column /= 5611 1 + Prolog_Size (Parser.Inputs.Input.all)) 5612 or else (Parser.Inputs /= null 5613 and then not Parser.Inputs.External) 5614 then 5615 Fatal_Error 5616 (Parser, -- 2.8 5617 "<?xml?> instruction must be first in document", 5618 Open_Id); 5619 end if; 5620 5621 -- ??? No true for text declaratinos 4.3.1 (external parsed 5622 -- entities) 5623 Set_State (Parser, Tag_State); 5624 5625 if Parser.Buffer (Id.First .. Id.Last) = Version_Sequence then 5626 Check_Version_Value (Id); 5627 elsif not Parser.In_External_Entity then 5628 Fatal_Error 5629 (Parser, "'version' must be the first argument to <?xml?>", 5630 Id); 5631 end if; 5632 5633 if Id.Typ = Name 5634 and then Parser.Buffer (Id.First .. Id.Last) = Encoding_Sequence 5635 then 5636 Check_Encoding_Value (Id); 5637 elsif Parser.In_External_Entity then 5638 Fatal_Error 5639 (Parser, "'encoding' must be specified for <?xml?> in" 5640 & " external entities", Id); 5641 end if; 5642 5643 if not Parser.In_External_Entity 5644 and then Id.Typ = Name 5645 and then Parser.Buffer (Id.First .. Id.Last) = 5646 Standalone_Sequence 5647 then 5648 Check_Standalone_Value (Id); 5649 end if; 5650 5651 if Id.Typ /= End_Of_PI then 5652 if Parser.In_External_Entity then 5653 Fatal_Error 5654 (Parser, 5655 "Text declarations <?xml?> in external entity cannot" 5656 & " specify parameters other than 'version' and" 5657 & " 'encoding'", Id); 5658 else 5659 Fatal_Error 5660 (Parser, 5661 "<?xml..?> arguments can only be 'version', 'encoding' or" 5662 & " 'standalone', in that order", Id); 5663 end if; 5664 end if; 5665 5666 else 5667 -- (2.6)[17]: Name can not be 'xml' (case insensitive) 5668 declare 5669 C : Unicode_Char; 5670 J : Natural := Name_Id.First; 5671 begin 5672 Encoding.Read (Parser.Buffer.all, J, C); 5673 5674 if C = Latin_Small_Letter_X 5675 or else C = Latin_Capital_Letter_X 5676 then 5677 Encoding.Read (Parser.Buffer.all, J, C); 5678 5679 if C = Latin_Capital_Letter_M 5680 or else C = Latin_Small_Letter_M 5681 then 5682 Encoding.Read (Parser.Buffer.all, J, C); 5683 5684 if (C = Latin_Capital_Letter_L 5685 or else C = Latin_Small_Letter_L) 5686 and then J = Name_Id.Last + 1 5687 then 5688 Fatal_Error 5689 (Parser, -- 2.6 5690 "'" 5691 & Parser.Buffer (Name_Id.First .. Name_Id.Last) 5692 & "' is not a valid processing instruction target", 5693 Name_Id); 5694 end if; 5695 end if; 5696 end if; 5697 end; 5698 5699 Data_Start := Id; 5700 5701 while Id.Typ /= End_Of_PI and then Id.Typ /= End_Of_Input loop 5702 Data_End := Id; 5703 5704 if Id.Typ = Double_String_Delimiter then 5705 Put_In_Buffer (Parser, """"); 5706 Data_End.Last := Data_End.Last + 1; 5707 elsif Id.Typ = Single_String_Delimiter then 5708 Put_In_Buffer (Parser, "'"); 5709 Data_End.Last := Data_End.Last + 1; 5710 end if; 5711 5712 Next_Token (Input, Parser, Id); 5713 end loop; 5714 5715 if Id.Typ = End_Of_Input then 5716 Fatal_Error -- 2.6 5717 (Parser, "Processing instruction must end with '?>'", 5718 Open_Id); 5719 end if; 5720 5721 Processing_Instruction 5722 (Parser, 5723 Target => Parser.Buffer (Name_Id.First .. Name_Id.Last), 5724 Data => Parser.Buffer (Data_Start.First .. Data_End.Last)); 5725 end if; 5726 5727 Set_State (Parser, State); 5728 Reset_Buffer (Parser, Name_Id); 5729 end Parse_PI; 5730 5731 begin 5732 -- Initialize the parser with the first character of the stream. 5733 if Eof (Input) then 5734 return; 5735 end if; 5736 Next_Char (Input, Parser); 5737 5738 if Parser.State.In_DTD then 5739 Parse_Doctype_Contents; 5740 end if; 5741 5742 loop 5743 -- Unless in string, buffer should be empty at this point. Strings 5744 -- are special-cased just in case we are currently substituting 5745 -- entities while in a string. 5746 pragma Assert (Parser.State.Ignore_Special 5747 or else Parser.Buffer_Length = 0); 5748 5749 Next_Token (Input, Parser, Id, 5750 Coalesce_Space => Parser.Current_Node /= null); 5751 exit when Id.Typ = End_Of_Input; 5752 5753 case Id.Typ is 5754 when Start_Of_PI => 5755 Parse_PI (Id); 5756 5757 when Cdata_Section => 5758 if Parser.Current_Node = null then 5759 Fatal_Error -- 2.1 5760 (Parser, "Non-white space found at top level", Id); 5761 end if; 5762 Start_Cdata (Parser); 5763 5764 if Parser.Hooks.Characters /= null then 5765 Parser.Hooks.Characters 5766 (Parser'Unchecked_Access, 5767 Parser.Buffer (Id.First .. Id.Last)); 5768 end if; 5769 5770 Characters (Parser, Parser.Buffer (Id.First .. Id.Last)); 5771 End_Cdata (Parser); 5772 Reset_Buffer (Parser, Id); 5773 5774 when Text | Name => 5775 if Parser.Current_Node = null then 5776 Fatal_Error -- 2.1 5777 (Parser, "Non-white space found at top level", Id); 5778 end if; 5779 5780 if Parser.Hooks.Characters /= null then 5781 Parser.Hooks.Characters 5782 (Parser'Unchecked_Access, 5783 Parser.Buffer (Id.First .. Id.Last)); 5784 end if; 5785 5786 Characters (Parser, Parser.Buffer (Id.First .. Id.Last)); 5787 Reset_Buffer (Parser, Id); 5788 5789 when Sax.Readers.Space => 5790 -- If "xml:space" attribute is preserve 5791 -- then same as Text 5792 5793 if Parser.Hooks.Whitespace /= null then 5794 Parser.Hooks.Whitespace 5795 (Parser'Unchecked_Access, 5796 Parser.Buffer (Id.First .. Id.Last)); 5797 end if; 5798 5799 Ignorable_Whitespace 5800 (Parser, Parser.Buffer (Id.First .. Id.Last)); 5801 Reset_Buffer (Parser, Id); 5802 5803 when Comment => 5804 Comment (Parser, Parser.Buffer (Id.First .. Id.Last)); 5805 Reset_Buffer (Parser, Id); 5806 5807 when Start_Of_Tag => 5808 Parse_Start_Tag; 5809 5810 when Start_Of_End_Tag => 5811 Parse_End_Tag; 5812 5813 when Doctype_Start => 5814 Parse_Doctype; 5815 5816 when others => 5817 Fatal_Error (Parser, "Currently ignored: " 5818 & Token_Type'Image (Id.Typ)); 5819 end case; 5820 end loop; 5821 end Syntactic_Parse; 5822 5823 ---------- 5824 -- Free -- 5825 ---------- 5826 5827 procedure Free (Parser : in out Sax_Reader'Class) is 5828 Tmp, Tmp2 : Element_Access; 5829 begin 5830 Close_Inputs (Parser, Parser.Inputs); 5831 Close_Inputs (Parser, Parser.Close_Inputs); 5832 5833 Free (Parser.Default_Namespaces); 5834 Free (Parser.Buffer); 5835 Parser.Buffer_Length := 0; 5836 5837 Parser.Attributes.Count := 0; 5838 Unchecked_Free (Parser.Attributes.List); 5839 5840 -- Free the nodes, in case there are still some open 5841 Tmp := Parser.Current_Node; 5842 while Tmp /= null loop 5843 Tmp2 := Tmp.Parent; 5844 Free (Tmp); 5845 Tmp := Tmp2; 5846 end loop; 5847 5848 -- Free the content model for the default attributes 5849 -- is done automatically when the attributes are reset 5850 5851 if Parser.Hooks.Data /= null then 5852 Free (Parser.Hooks.Data.all); 5853 Unchecked_Free (Parser.Hooks.Data); 5854 end if; 5855 5856 -- Free the internal tables 5857 Reset (Parser.Entities); 5858 Reset (Parser.Default_Atts); 5859 Reset (Parser.Notations); 5860 5861 Free (Parser.Locator); 5862 end Free; 5863 5864 --------------- 5865 -- Set_Hooks -- 5866 --------------- 5867 5868 procedure Set_Hooks 5869 (Handler : in out Sax_Reader; 5870 Data : Hook_Data_Access := null; 5871 Start_Element : Start_Element_Hook := null; 5872 End_Element : End_Element_Hook := null; 5873 Characters : Characters_Hook := null; 5874 Whitespace : Whitespace_Hook := null; 5875 Doc_Locator : Set_Doc_Locator_Hook := null; 5876 Notation_Decl : Notation_Decl_Hook := null) is 5877 begin 5878 if Handler.Hooks.Data /= null then 5879 Free (Handler.Hooks.Data.all); 5880 Unchecked_Free (Handler.Hooks.Data); 5881 end if; 5882 5883 Handler.Hooks := 5884 (Data => Data, 5885 Start_Element => Start_Element, 5886 End_Element => End_Element, 5887 Characters => Characters, 5888 Whitespace => Whitespace, 5889 Doc_Locator => Doc_Locator, 5890 Notation_Decl => Notation_Decl); 5891 end Set_Hooks; 5892 5893 ------------------------ 5894 -- Initialize_Symbols -- 5895 ------------------------ 5896 5897 procedure Initialize_Symbols (Parser : in out Sax_Reader) is 5898 begin 5899 if Parser.Lt_Sequence = No_Symbol then 5900 if Get (Parser.Symbols) = null then 5901 if Debug_Internal then 5902 Put_Line ("Initialize_Symbols: creating new table"); 5903 end if; 5904 Parser.Symbols := Sax.Utils.Allocate; 5905 end if; 5906 5907 Parser.Lt_Sequence := Find_Symbol (Parser, Lt_Sequence); 5908 Parser.Gt_Sequence := Find_Symbol (Parser, Gt_Sequence); 5909 Parser.Amp_Sequence := Find_Symbol (Parser, Amp_Sequence); 5910 Parser.Apos_Sequence := Find_Symbol (Parser, Apos_Sequence); 5911 Parser.Quot_Sequence := Find_Symbol (Parser, Quot_Sequence); 5912 Parser.Xmlns_Sequence := Find_Symbol (Parser, Xmlns_Sequence); 5913 Parser.Xml_Sequence := Find_Symbol (Parser, Xml_Sequence); 5914 Parser.Symbol_Percent := Find_Symbol (Parser, "%"); 5915 Parser.Symbol_Ampersand := Find_Symbol (Parser, "&"); 5916 Parser.Namespaces_URI_Sequence := 5917 Find_Symbol (Parser, Namespaces_URI_Sequence); 5918 end if; 5919 end Initialize_Symbols; 5920 5921 ---------------------- 5922 -- Close_Namespaces -- 5923 ---------------------- 5924 5925 procedure Close_Namespaces 5926 (Parser : in out Sax_Reader'Class; List : XML_NS) 5927 is 5928 NS : XML_NS := List; 5929 begin 5930 while NS /= No_XML_NS loop 5931 if Get_Prefix (NS) /= Empty_String 5932 and then Get_Prefix (NS) /= Parser.Xmlns_Sequence 5933 then 5934 End_Prefix_Mapping (Parser, Get_Prefix (NS)); 5935 end if; 5936 NS := Next_In_List (NS); 5937 end loop; 5938 end Close_Namespaces; 5939 5940 ----------- 5941 -- Parse -- 5942 ----------- 5943 5944 procedure Parse 5945 (Parser : in out Sax_Reader; 5946 Input : in out Input_Sources.Input_Source'Class) is 5947 begin 5948 Initialize_Symbols (Parser); 5949 5950 Parser.Locator := Sax.Locators.Create; 5951 Parser.Public_Id := Find_Symbol (Parser, Get_Public_Id (Input)); 5952 Set_Public_Id (Parser.Locator, Parser.Public_Id); 5953 Parser.System_Id := Find_Symbol (Parser, Get_System_Id (Input)); 5954 Set_System_Id (Parser.Locator, Parser.System_Id); 5955 Set_Column_Number (Parser.Locator, Prolog_Size (Input)); 5956 Set_Line_Number (Parser.Locator, 1); 5957 Parser.Lookup_Char := Unicode.Unicode_Char'Last; 5958 Parser.Current_Node := null; 5959 Parser.Num_Toplevel_Elements := 0; 5960 Parser.Previous_Char_Was_CR := False; 5961 Parser.Ignore_State_Special := False; 5962 Parser.In_External_Entity := False; 5963 Parser.Last_Read_Is_Valid := False; 5964 Parser.Buffer := new Byte_Sequence (1 .. Initial_Buffer_Length); 5965 Set_State (Parser, Default_State); 5966 5967 pragma Warnings (Off, "overlaps with actual"); 5968 Add_Namespace_No_Event 5969 (Parser, 5970 Prefix => Parser.Xml_Sequence, 5971 URI => Find_Symbol 5972 (Parser, 5973 Encodings.From_Utf32 5974 (Basic_8bit.To_Utf32 ("http://www.w3.org/XML/1998/namespace")))); 5975 Add_Namespace_No_Event 5976 (Parser, Parser.Xmlns_Sequence, Parser.Xmlns_Sequence); 5977 Add_Namespace_No_Event (Parser, Empty_String, Empty_String); 5978 5979 if Parser.Hooks.Doc_Locator /= null then 5980 Parser.Hooks.Doc_Locator (Parser, Parser.Locator); 5981 end if; 5982 5983 Set_Document_Locator (Sax_Reader'Class (Parser), Parser.Locator); 5984 5985 Start_Document (Sax_Reader'Class (Parser)); 5986 Syntactic_Parse (Sax_Reader'Class (Parser), Input); 5987 Close_Namespaces (Parser, Parser.Default_Namespaces); 5988 pragma Warnings (On, "overlaps with actual"); 5989 5990 -- All the nodes must have been closed at the end of the document 5991 if Parser.Current_Node /= null then 5992 Fatal_Error -- 2.1 5993 (Parser, "Node <" & Get (Parser.Current_Node.Name).all 5994 & "> is not closed"); 5995 end if; 5996 5997 if Parser.Num_Toplevel_Elements = 0 then 5998 Fatal_Error (Parser, "No root element specified"); -- 2.1 5999 end if; 6000 6001 End_Document (Sax_Reader'Class (Parser)); 6002 6003 Free (Parser); 6004 6005 exception 6006 when others => 6007 Free (Parser); 6008 raise; 6009 end Parse; 6010 6011 ---------- 6012 -- Hash -- 6013 ---------- 6014 6015 function Hash (Str : String) return Unsigned_32 is 6016 Result : Unsigned_32 := Str'Length; 6017 begin 6018 for J in Str'Range loop 6019 Result := Rotate_Left (Result, 1) + 6020 Unsigned_32 (Character'Pos (Str (J))); 6021 end loop; 6022 6023 return Result; 6024 end Hash; 6025 6026 ------------- 6027 -- Get_Key -- 6028 ------------- 6029 6030 function Get_Key (Entity : Entity_Entry_Access) return Symbol is 6031 begin 6032 return Entity.Name; 6033 end Get_Key; 6034 6035 ---------- 6036 -- Free -- 6037 ---------- 6038 6039 procedure Free (Att : in out Attributes_Entry) is 6040 begin 6041 Unchecked_Free (Att.Attributes.List); 6042 Att.Attributes.Count := 0; 6043 end Free; 6044 6045 ------------- 6046 -- Get_Key -- 6047 ------------- 6048 6049 function Get_Key (Att : Attributes_Entry) return Symbol is 6050 begin 6051 return Att.Element_Name; 6052 end Get_Key; 6053 6054 ---------- 6055 -- Free -- 6056 ---------- 6057 6058 procedure Free (Notation : in out Notation_Entry) is 6059 pragma Unreferenced (Notation); 6060 begin 6061 null; 6062 end Free; 6063 6064 ------------- 6065 -- Get_Key -- 6066 ------------- 6067 6068 function Get_Key (Notation : Notation_Entry) return Symbol is 6069 begin 6070 return Notation.Name; 6071 end Get_Key; 6072 6073 ----------------- 6074 -- Get_Feature -- 6075 ----------------- 6076 6077 function Get_Feature (Parser : Sax_Reader; Name : String) return Boolean is 6078 begin 6079 if Name = Namespace_Feature then 6080 return Parser.Feature_Namespace; 6081 6082 elsif Name = Namespace_Prefixes_Feature then 6083 return Parser.Feature_Namespace_Prefixes; 6084 6085 elsif Name = External_General_Entities_Feature then 6086 return Parser.Feature_External_General_Entities; 6087 6088 elsif Name = External_Parameter_Entities_Feature then 6089 return Parser.Feature_External_Parameter_Entities; 6090 6091 elsif Name = Validation_Feature then 6092 return Parser.Feature_Validation; 6093 6094 elsif Name = Parameter_Entities_Feature then 6095 return False; -- ??? Unsupported for now 6096 6097 elsif Name = Test_Valid_Chars_Feature then 6098 return Parser.Feature_Test_Valid_Chars; 6099 6100 elsif Name = Allow_Relative_IRI_Feature then 6101 return Parser.Feature_Allow_Relative_IRI; 6102 6103 elsif Name = Schema_Validation_Feature then 6104 return Parser.Feature_Schema_Validation; 6105 end if; 6106 6107 return False; 6108 end Get_Feature; 6109 6110 ----------------- 6111 -- Set_Feature -- 6112 ----------------- 6113 6114 procedure Set_Feature 6115 (Parser : in out Sax_Reader; Name : String; Value : Boolean) is 6116 begin 6117 if Name = Namespace_Feature then 6118 Parser.Feature_Namespace := Value; 6119 6120 elsif Name = Namespace_Prefixes_Feature then 6121 Parser.Feature_Namespace_Prefixes := Value; 6122 6123 elsif Name = External_General_Entities_Feature then 6124 Parser.Feature_External_General_Entities := Value; 6125 6126 elsif Name = External_Parameter_Entities_Feature then 6127 Parser.Feature_External_Parameter_Entities := Value; 6128 6129 elsif Name = Validation_Feature then 6130 Parser.Feature_Validation := Value; 6131 6132 elsif Name = Test_Valid_Chars_Feature then 6133 Parser.Feature_Test_Valid_Chars := Value; 6134 6135 elsif Name = Schema_Validation_Feature then 6136 Parser.Feature_Schema_Validation := Value; 6137 6138 elsif Name = Allow_Relative_IRI_Feature then 6139 Parser.Feature_Allow_Relative_IRI := Value; 6140 end if; 6141 end Set_Feature; 6142 6143 ----------------- 6144 -- Fatal_Error -- 6145 ----------------- 6146 6147 procedure Fatal_Error 6148 (Handler : in out Sax_Reader; Except : Sax_Parse_Exception'Class) 6149 is 6150 pragma Warnings (Off, Handler); 6151 begin 6152 Raise_Exception 6153 (XML_Fatal_Error'Identity, 6154 Get_Message (Except)); 6155 end Fatal_Error; 6156 6157 -------------------------- 6158 -- Start_Prefix_Mapping -- 6159 -------------------------- 6160 6161 procedure Start_Prefix_Mapping 6162 (Handler : in out Reader; 6163 Prefix : Sax.Symbols.Symbol; 6164 URI : Sax.Symbols.Symbol) 6165 is 6166 begin 6167 Start_Prefix_Mapping 6168 (Reader'Class (Handler), Get (Prefix).all, Get (URI).all); 6169 end Start_Prefix_Mapping; 6170 6171 ------------------------ 6172 -- End_Prefix_Mapping -- 6173 ------------------------ 6174 6175 procedure End_Prefix_Mapping (Handler : in out Reader; Prefix : Symbol) is 6176 begin 6177 End_Prefix_Mapping 6178 (Reader'Class (Handler), Get (Prefix).all); 6179 end End_Prefix_Mapping; 6180 6181 ------------------- 6182 -- Start_Element -- 6183 ------------------- 6184 6185 procedure Start_Element 6186 (Handler : in out Reader; 6187 NS : Sax.Utils.XML_NS; 6188 Local_Name : Sax.Symbols.Symbol; 6189 Atts : Sax_Attribute_List) 6190 is 6191 Attributes : Sax.Attributes.Attributes := Create_Attribute_List (Atts); 6192 begin 6193 Start_Element 6194 (Reader'Class (Handler), 6195 Namespace_URI => Get (Get_URI (NS)).all, 6196 Local_Name => Get (Local_Name).all, 6197 Qname => Qname_From_Name (Get_Prefix (NS), Local_Name), 6198 Atts => Attributes); 6199 Clear (Attributes); 6200 6201 exception 6202 when others => 6203 Clear (Attributes); 6204 raise; 6205 end Start_Element; 6206 6207 ----------------- 6208 -- End_Element -- 6209 ----------------- 6210 6211 procedure End_Element 6212 (Handler : in out Reader; 6213 NS : Sax.Utils.XML_NS; 6214 Local_Name : Sax.Symbols.Symbol) is 6215 begin 6216 End_Element 6217 (Reader'Class (Handler), 6218 Namespace_URI => Get (Get_URI (NS)).all, 6219 Local_Name => Get (Local_Name).all, 6220 Qname => Qname_From_Name (Get_Prefix (NS), Local_Name)); 6221 end End_Element; 6222 6223 -------------------- 6224 -- Skipped_Entity -- 6225 -------------------- 6226 6227 procedure Skipped_Entity 6228 (Handler : in out Reader; 6229 Name : Sax.Symbols.Symbol) is 6230 begin 6231 Skipped_Entity (Reader'Class (Handler), Get (Name).all); 6232 end Skipped_Entity; 6233 6234 ------------------ 6235 -- Start_Entity -- 6236 ------------------ 6237 6238 procedure Start_Entity 6239 (Handler : in out Reader; 6240 Name : Sax.Symbols.Symbol) is 6241 begin 6242 Start_Entity (Reader'Class (Handler), Get (Name).all); 6243 end Start_Entity; 6244 6245 ---------------- 6246 -- End_Entity -- 6247 ---------------- 6248 6249 procedure End_Entity 6250 (Handler : in out Reader; 6251 Name : Sax.Symbols.Symbol) is 6252 begin 6253 End_Entity (Reader'Class (Handler), Get (Name).all); 6254 end End_Entity; 6255 6256 -------------------- 6257 -- Resolve_Entity -- 6258 -------------------- 6259 6260 function Resolve_Entity 6261 (Handler : Sax_Reader; 6262 Public_Id : Unicode.CES.Byte_Sequence; 6263 System_Id : Unicode.CES.Byte_Sequence) 6264 return Input_Sources.Input_Source_Access 6265 is 6266 pragma Warnings (Off, Handler); 6267 pragma Warnings (Off, Public_Id); 6268 pragma Warnings (Off, System_Id); 6269 begin 6270 return null; 6271 end Resolve_Entity; 6272 6273 -------------------- 6274 -- Get_Hooks_Data -- 6275 -------------------- 6276 6277 function Get_Hooks_Data (Handler : Sax_Reader) return Hook_Data_Access is 6278 begin 6279 return Handler.Hooks.Data; 6280 end Get_Hooks_Data; 6281 6282 ------------------------------------ 6283 -- Use_Basename_In_Error_Messages -- 6284 ------------------------------------ 6285 6286 procedure Use_Basename_In_Error_Messages 6287 (Parser : in out Sax_Reader; 6288 Use_Basename : Boolean := True) 6289 is 6290 begin 6291 Parser.Basename_In_Messages := Use_Basename; 6292 end Use_Basename_In_Error_Messages; 6293 6294 ------------------------------------ 6295 -- Use_Basename_In_Error_Messages -- 6296 ------------------------------------ 6297 6298 function Use_Basename_In_Error_Messages 6299 (Parser : Sax_Reader) return Boolean is 6300 begin 6301 return Parser.Basename_In_Messages; 6302 end Use_Basename_In_Error_Messages; 6303 6304 ------------ 6305 -- Get_NS -- 6306 ------------ 6307 6308 function Get_NS (Elem : Element_Access) return XML_NS is 6309 begin 6310 return Elem.NS; 6311 end Get_NS; 6312 6313 -------------------- 6314 -- Get_Local_Name -- 6315 -------------------- 6316 6317 function Get_Local_Name (Elem : Element_Access) return Symbol is 6318 begin 6319 return Elem.Name; 6320 end Get_Local_Name; 6321 6322 -------------- 6323 -- To_QName -- 6324 -------------- 6325 6326 function To_QName 6327 (Namespace_URI, Local_Name : Sax.Symbols.Symbol) 6328 return Unicode.CES.Byte_Sequence is 6329 begin 6330 if Namespace_URI = Empty_String then 6331 return Get (Local_Name).all; 6332 else 6333 return '{' & Get (Namespace_URI).all & '}' & Get (Local_Name).all; 6334 end if; 6335 end To_QName; 6336 6337 -------------- 6338 -- To_QName -- 6339 -------------- 6340 6341 function To_QName 6342 (Elem : Element_Access) return Unicode.CES.Byte_Sequence is 6343 begin 6344 return To_QName (Get_URI (Elem.NS), Elem.Name); 6345 end To_QName; 6346 6347 ---------------------- 6348 -- Set_Symbol_Table -- 6349 ---------------------- 6350 6351 procedure Set_Symbol_Table 6352 (Parser : in out Sax_Reader; 6353 Symbols : Symbol_Table) is 6354 begin 6355 Parser.Lt_Sequence := No_Symbol; 6356 Parser.Symbols := Symbols; 6357 end Set_Symbol_Table; 6358 6359 ---------------------- 6360 -- Get_Symbol_Table -- 6361 ---------------------- 6362 6363 function Get_Symbol_Table (Parser : Sax_Reader'Class) return Symbol_Table is 6364 begin 6365 return Parser.Symbols; 6366 end Get_Symbol_Table; 6367 6368 --------------- 6369 -- Get_Index -- 6370 --------------- 6371 6372 function Get_Index 6373 (List : Sax_Attribute_List; 6374 URI : Sax.Symbols.Symbol; 6375 Local_Name : Sax.Symbols.Symbol) return Integer is 6376 begin 6377 for A in 1 .. List.Count loop 6378 if List.List (A).URI = URI 6379 and then List.List (A).Local_Name = Local_Name 6380 then 6381 return A; 6382 end if; 6383 end loop; 6384 return -1; 6385 end Get_Index; 6386 6387 --------------- 6388 -- Get_Index -- 6389 --------------- 6390 6391 function Get_Index 6392 (Handler : Sax_Reader'Class; 6393 List : Sax_Attribute_List; 6394 URI : Unicode.CES.Byte_Sequence; 6395 Local_Name : Unicode.CES.Byte_Sequence) return Integer is 6396 begin 6397 return Get_Index 6398 (List, 6399 URI => Find_Symbol (Handler, URI), 6400 Local_Name => Find_Symbol (Handler, Local_Name)); 6401 end Get_Index; 6402 6403 --------------- 6404 -- Get_Value -- 6405 --------------- 6406 6407 function Get_Value 6408 (List : Sax_Attribute_List; Index : Integer) return Sax.Symbols.Symbol is 6409 begin 6410 if Index < 0 then 6411 return No_Symbol; 6412 else 6413 return List.List (Index).Value; 6414 end if; 6415 end Get_Value; 6416 6417 --------------- 6418 -- Set_Value -- 6419 --------------- 6420 6421 procedure Set_Value 6422 (List : Sax_Attribute_List; 6423 Index : Integer; 6424 Val : Sax.Symbols.Symbol) is 6425 begin 6426 List.List (Index).Value := Val; 6427 end Set_Value; 6428 6429 ------------------ 6430 -- Get_Location -- 6431 ------------------ 6432 6433 function Get_Location 6434 (List : Sax_Attribute_List; Index : Integer) return Sax.Locators.Location 6435 is 6436 begin 6437 if Index < 0 then 6438 return No_Location; 6439 else 6440 return List.List (Index).Location; 6441 end if; 6442 end Get_Location; 6443 6444 ------------------------ 6445 -- Start_Tag_Location -- 6446 ------------------------ 6447 6448 function Start_Tag_Location 6449 (Elem : Element_Access) return Sax.Locators.Location is 6450 begin 6451 return Elem.Start; 6452 end Start_Tag_Location; 6453 6454 ---------------------------- 6455 -- Start_Tag_End_Location -- 6456 ---------------------------- 6457 6458 function Start_Tag_End_Location 6459 (Elem : Element_Access) return Sax.Locators.Location is 6460 begin 6461 return Elem.Start_Tag_End; 6462 end Start_Tag_End_Location; 6463 6464 ------------------------------ 6465 -- Get_Non_Normalized_Value -- 6466 ------------------------------ 6467 6468 function Get_Non_Normalized_Value 6469 (List : Sax_Attribute_List; Index : Integer) return Sax.Symbols.Symbol is 6470 begin 6471 return List.List (Index).Non_Normalized_Value; 6472 end Get_Non_Normalized_Value; 6473 6474 -------------------------- 6475 -- Get_Value_As_Boolean -- 6476 -------------------------- 6477 6478 function Get_Value_As_Boolean 6479 (List : Sax_Attribute_List; Index : Integer; Default : Boolean := False) 6480 return Boolean 6481 is 6482 Val : Symbol; 6483 begin 6484 if Index < 0 then 6485 return Default; 6486 else 6487 Val := Get_Value (List, Index); 6488 return Get (Val).all = "true" or else Get (Val).all = "1"; 6489 end if; 6490 end Get_Value_As_Boolean; 6491 6492 -------------------------- 6493 -- Set_Normalized_Value -- 6494 -------------------------- 6495 6496 procedure Set_Normalized_Value 6497 (List : Sax_Attribute_List; Index : Integer; Value : Sax.Symbols.Symbol) 6498 is 6499 begin 6500 List.List (Index).Value := Value; 6501 end Set_Normalized_Value; 6502 6503 -------------- 6504 -- Get_Type -- 6505 -------------- 6506 6507 function Get_Type 6508 (List : Sax_Attribute_List; Index : Integer) 6509 return Sax.Attributes.Attribute_Type is 6510 begin 6511 return List.List (Index).Att_Type; 6512 end Get_Type; 6513 6514 -------------- 6515 -- Set_Type -- 6516 -------------- 6517 6518 procedure Set_Type 6519 (List : Sax_Attribute_List; Index : Integer; 6520 Typ : Sax.Attributes.Attribute_Type) is 6521 begin 6522 List.List (Index).Att_Type := Typ; 6523 end Set_Type; 6524 6525 ---------------- 6526 -- Get_Length -- 6527 ---------------- 6528 6529 function Get_Length (List : Sax_Attribute_List) return Natural is 6530 begin 6531 return List.Count; 6532 end Get_Length; 6533 6534 ---------------- 6535 -- Get_Prefix -- 6536 ---------------- 6537 6538 function Get_Prefix 6539 (List : Sax_Attribute_List; Index : Integer) return Sax.Symbols.Symbol is 6540 begin 6541 return List.List (Index).Prefix; 6542 end Get_Prefix; 6543 6544 -------------- 6545 -- Get_Name -- 6546 -------------- 6547 6548 function Get_Name 6549 (List : Sax_Attribute_List; Index : Integer) return Qualified_Name is 6550 begin 6551 return (NS => List.List (Index).URI, 6552 Local => List.List (Index).Local_Name); 6553 end Get_Name; 6554 6555 --------------- 6556 -- Get_Qname -- 6557 --------------- 6558 6559 function Get_Qname 6560 (List : Sax_Attribute_List; Index : Integer) 6561 return Unicode.CES.Byte_Sequence 6562 is 6563 begin 6564 return Qname_From_Name (List.List (Index).Prefix, 6565 List.List (Index).Local_Name); 6566 end Get_Qname; 6567 6568 ---------------------- 6569 -- Current_Location -- 6570 ---------------------- 6571 6572 function Current_Location 6573 (Handler : Sax_Reader) return Sax.Locators.Location is 6574 begin 6575 return Get_Location (Handler.Locator); 6576 end Current_Location; 6577 6578 --------------------- 6579 -- Set_XML_Version -- 6580 --------------------- 6581 6582 procedure Set_XML_Version 6583 (Parser : in out Sax_Reader; XML : XML_Versions := XML_1_0_Fifth_Edition) 6584 is 6585 begin 6586 if XML = XML_1_0 then 6587 Parser.XML_Version := XML_1_0_Fifth_Edition; 6588 else 6589 Parser.XML_Version := XML; 6590 end if; 6591 end Set_XML_Version; 6592 6593 --------------------- 6594 -- Get_XML_Version -- 6595 --------------------- 6596 6597 function Get_XML_Version (Parser : Sax_Reader) return XML_Versions is 6598 begin 6599 return Parser.XML_Version; 6600 end Get_XML_Version; 6601 6602end Sax.Readers; 6603