1------------------------------------------------------------------------------ 2-- -- 3-- Matreshka Project -- 4-- -- 5-- XML Processor -- 6-- -- 7-- Runtime Library Component -- 8-- -- 9------------------------------------------------------------------------------ 10-- -- 11-- Copyright © 2010-2014, Vadim Godunko <vgodunko@gmail.com> -- 12-- All rights reserved. -- 13-- -- 14-- Redistribution and use in source and binary forms, with or without -- 15-- modification, are permitted provided that the following conditions -- 16-- are met: -- 17-- -- 18-- * Redistributions of source code must retain the above copyright -- 19-- notice, this list of conditions and the following disclaimer. -- 20-- -- 21-- * Redistributions in binary form must reproduce the above copyright -- 22-- notice, this list of conditions and the following disclaimer in the -- 23-- documentation and/or other materials provided with the distribution. -- 24-- -- 25-- * Neither the name of the Vadim Godunko, IE nor the names of its -- 26-- contributors may be used to endorse or promote products derived from -- 27-- this software without specific prior written permission. -- 28-- -- 29-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -- 30-- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -- 31-- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -- 32-- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -- 33-- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -- 34-- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -- 35-- TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -- 36-- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -- 37-- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -- 38-- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -- 39-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -- 40-- -- 41------------------------------------------------------------------------------ 42-- $Revision: 4777 $ $Date: 2014-03-29 11:52:24 +0400 (Sat, 29 Mar 2014) $ 43------------------------------------------------------------------------------ 44with Matreshka.Internals.Strings.Configuration; 45with Matreshka.Internals.Strings.Operations; 46with Matreshka.Internals.Unicode.Characters.Latin; 47with XML.SAX.Simple_Readers.Callbacks; 48with XML.SAX.Simple_Readers.Scanner.Tables; 49 50package body XML.SAX.Simple_Readers.Scanner.Actions is 51 52 use Matreshka.Internals.Strings.Configuration; 53 use Matreshka.Internals.Unicode; 54 use Matreshka.Internals.Unicode.Characters.Latin; 55 use Matreshka.Internals.Utf16; 56 use Matreshka.Internals.XML; 57 use Matreshka.Internals.XML.Entity_Tables; 58 use Matreshka.Internals.XML.Symbol_Tables; 59 60 procedure Resolve_Symbol 61 (Self : in out Simple_Reader'Class; 62 Trim_Left : Natural; 63 Trim_Right : Natural; 64 Trim_Whitespace : Boolean; 65 Can_Be_Qname : Boolean; 66 Not_Qname : Boolean; 67 Error : out Boolean; 68 Symbol : out Matreshka.Internals.XML.Symbol_Identifier); 69 -- Converts name to symbol. Trim_Left, Trim_Right, Trim_Whitespace can be 70 -- used to trim several characters from head of tail of matched substring, 71 -- and to trim leading whitespaces. Not_Qname specify that resolved name 72 -- is not a qualified name at all (it is enumeration element of attribute 73 -- of non-NOTATION type). Can_Be_Qname specify that resolved name is 74 -- qualified name when namespace processing is enabled. Subprogram sets 75 -- Error when error is detected and Symbol when symbol is resolved. 76 77 procedure Character_Reference_To_Code_Point 78 (Self : in out Simple_Reader'Class; 79 Hex : Boolean; 80 Code : out Code_Point; 81 Valid : out Boolean); 82 -- Converts scanned character reference to code point. Reports errors to 83 -- application is any and sets Valid to False. Otherwise sets Code to 84 -- referenced code point and sets Valid to True. 85 86 --------------------------------------- 87 -- Character_Reference_To_Code_Point -- 88 --------------------------------------- 89 90 procedure Character_Reference_To_Code_Point 91 (Self : in out Simple_Reader'Class; 92 Hex : Boolean; 93 Code : out Code_Point; 94 Valid : out Boolean) 95 is 96 Zero_Fixup : constant := Digit_Zero; 97 Upper_Fixup : constant := Latin_Capital_Letter_A - 16#0A#; 98 Lower_Fixup : constant := Latin_Small_Letter_A - 16#0A#; 99 100 FP : Utf16_String_Index := Self.Scanner_State.YY_Base_Position; 101 LP : Utf16_String_Index := Self.Scanner_State.YY_Current_Position; 102 Aux : Code_Unit_32 := 0; 103 D : Code_Point; 104 105 begin 106 -- NOTE: Sequences of leading and trailing character always fixed: 107 -- "&#" for decimal representation and "&#x" for hexadecimal 108 -- representation for the leading sequence of characters and ";" for 109 -- trailing; thus we can just add/subtract required number of code point 110 -- positions instead of doing more expensive iteration with analysis of 111 -- UTF-16 code units. 112 -- 113 -- Actual value has limited character set ([0-9] or [0-9a-fA-F]), all 114 -- of characters is on BMP also, thus expensive decoding can be omitted 115 -- also. 116 117 if Hex then 118 -- Trim three leading characters and trailing character. 119 120 FP := FP + 3; 121 LP := LP - 1; 122 123 while FP < LP loop 124 D := Code_Point (Self.Scanner_State.Data.Value (FP)); 125 FP := FP + 1; 126 127 if D in Latin_Capital_Letter_A .. Latin_Capital_Letter_F then 128 Aux := (Aux * 16) + D - Upper_Fixup; 129 130 elsif D in Latin_Small_Letter_A .. Latin_Small_Letter_F then 131 Aux := (Aux * 16) + D - Lower_Fixup; 132 133 else 134 Aux := (Aux * 16) + D - Zero_Fixup; 135 end if; 136 137 -- Check whether collected code is inside range of Unicode code 138 -- points. Then it is outside reset to maximum value and exit 139 -- the loop. Error will be reported later in this subprogram. 140 141 if Aux not in Code_Point then 142 Aux := Code_Unit_32'Last; 143 144 exit; 145 end if; 146 end loop; 147 148 else 149 -- Trim two leading characters and trailing character. 150 151 FP := FP + 2; 152 LP := LP - 1; 153 154 while FP < LP loop 155 D := Code_Point (Self.Scanner_State.Data.Value (FP)); 156 FP := FP + 1; 157 158 Aux := (Aux * 10) + D - Zero_Fixup; 159 160 -- Check whether collected code is inside range of Unicode code 161 -- points. Then it is outside reset to maximum value and exit 162 -- the loop. Error will be reported later in this subprogram. 163 164 if Aux not in Code_Point then 165 Aux := Code_Unit_32'Last; 166 167 exit; 168 end if; 169 end loop; 170 end if; 171 172 -- [XML1.0/1.1 4.1 WFC: Legal Character] 173 -- 174 -- "Characters referred to using character references MUST match the 175 -- production for Char." 176 -- 177 -- Check whether character reference is resolved into valid character. 178 179 case Self.Version is 180 when XML_1_0 => 181 -- [XML1.0 2.2 Characters] 182 -- 183 -- [2] Char ::= 184 -- #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] 185 -- | [#x10000-#x10FFFF] 186 187 Valid := 188 Aux = 16#0009# 189 or Aux = 16#000A# 190 or Aux = 16#000D# 191 or Aux in 16#0020# .. 16#D7FF# 192 or Aux in 16#E000# .. 16#FFFD# 193 or Aux in 16#1_0000# .. 16#10_FFFF#; 194 195 when XML_1_1 => 196 -- [XML1.1 2.2 Characters] 197 -- 198 -- [2] Char ::= 199 -- [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] 200 201 Valid := 202 Aux in 16#0001# .. 16#D7FF# 203 or Aux in 16#E000# .. 16#FFFD# 204 or Aux in 16#1_0000# .. 16#10_FFFF#; 205 end case; 206 207 if not Valid then 208 Callbacks.Call_Fatal_Error 209 (Self, 210 League.Strings.To_Universal_String 211 ("[XML 4.1 WFC: Legal Character] character reference refers to" 212 & " invalid character")); 213 Self.Error_Reported := True; 214 215 else 216 Code := Aux; 217 end if; 218 end Character_Reference_To_Code_Point; 219 220 ---------------------------------------- 221 -- On_Asterisk_In_Content_Declaration -- 222 ---------------------------------------- 223 224 function On_Asterisk_In_Content_Declaration 225 (Self : in out Simple_Reader'Class) return Token is 226 begin 227 if Self.Whitespace_Matched then 228 Callbacks.Call_Fatal_Error 229 (Self, 230 League.Strings.To_Universal_String 231 ("[XML [47], [48], [51]] illegal whitespace before asterisk")); 232 233 return Error; 234 235 else 236 return Token_Asterisk; 237 end if; 238 end On_Asterisk_In_Content_Declaration; 239 240 ----------------------------------------------------- 241 -- On_Attribute_Name_In_Attribute_List_Declaration -- 242 ----------------------------------------------------- 243 244 function On_Attribute_Name_In_Attribute_List_Declaration 245 (Self : in out Simple_Reader'Class) return Token 246 is 247 Qname_Error : Boolean; 248 249 begin 250 -- [53] AttDef ::= S Name S AttType S DefaultDecl 251 -- 252 -- Checks whitespace before the attribute name is present. 253 254 if not Self.Whitespace_Matched then 255 Callbacks.Call_Fatal_Error 256 (Self, 257 League.Strings.To_Universal_String 258 ("[XML [53] AttDef]" 259 & " no whitespace before attribute name")); 260 261 return Error; 262 end if; 263 264 Self.Whitespace_Matched := False; 265 266 Resolve_Symbol 267 (Self, 0, 0, False, True, False, Qname_Error, Self.YYLVal.Symbol); 268 269 if Qname_Error then 270 return Error; 271 272 else 273 Enter_Start_Condition (Self, Tables.ATTLIST_TYPE); 274 275 return Token_Name; 276 end if; 277 end On_Attribute_Name_In_Attribute_List_Declaration; 278 279 ----------------------- 280 -- On_Attribute_Type -- 281 ----------------------- 282 283 function On_Attribute_Type 284 (Self : in out Simple_Reader'Class; 285 Type_Token : Token) return Token is 286 begin 287 -- Checks ithat whitespace before attribute type keyword is detected 288 -- and report error when check fail. 289 290 if not Self.Whitespace_Matched then 291 -- XXX This is recoverable error. 292 293 Callbacks.Call_Fatal_Error 294 (Self, 295 League.Strings.To_Universal_String 296 ("whitespace required before attribute type")); 297 298 return Error; 299 end if; 300 301 Self.Whitespace_Matched := False; 302 303 return Type_Token; 304 end On_Attribute_Type; 305 306 --------------------------------------- 307 -- On_Attribute_Value_Character_Data -- 308 --------------------------------------- 309 310 procedure On_Attribute_Value_Character_Data 311 (Self : in out Simple_Reader'Class) 312 is 313 Next : Utf16_String_Index := Self.Scanner_State.YY_Base_Position; 314 Code : Code_Point; 315 316 begin 317 -- Allocates buffer of necessary size to avoid memory reallocation. It 318 -- can be larger when needed if attribute value normalization is 319 -- activated, but usually not too large. 320 321 Matreshka.Internals.Strings.Mutate 322 (Self.Character_Data, 323 Self.Character_Data.Unused 324 + Self.Scanner_State.YY_Current_Position 325 - Self.Scanner_State.YY_Base_Position 326 + 1); 327 328 -- Two mostly equivalent paths are separated, because they are on the 329 -- performance critical path. 330 331 if Self.Normalize_Value then 332 -- Normalization is required for attribute's value. 333 334 while Next /= Self.Scanner_State.YY_Current_Position loop 335 Unchecked_Next (Self.Scanner_State.Data.Value, Next, Code); 336 337 -- It can be reasonable to implement this step of normalization 338 -- on SIMD. 339 340 if Code = Character_Tabulation 341 or Code = Line_Feed 342 or Code = Carriage_Return 343 then 344 Code := Space; 345 end if; 346 347 if Code = Space then 348 if not Self.Space_Before then 349 Unchecked_Store 350 (Self.Character_Data.Value, 351 Self.Character_Data.Unused, 352 Code); 353 Self.Character_Data.Length := Self.Character_Data.Length + 1; 354 Self.Space_Before := True; 355 end if; 356 357 else 358 Unchecked_Store 359 (Self.Character_Data.Value, 360 Self.Character_Data.Unused, 361 Code); 362 Self.Character_Data.Length := Self.Character_Data.Length + 1; 363 Self.Space_Before := False; 364 end if; 365 end loop; 366 367 else 368 -- XXX Can be optimized by adding special operation Append_Slice. 369 370 while Next /= Self.Scanner_State.YY_Current_Position loop 371 Unchecked_Next (Self.Scanner_State.Data.Value, Next, Code); 372 373 -- It can be reasonable to implement this step of normalization 374 -- on SIMD. 375 376 if Code = Character_Tabulation 377 or Code = Line_Feed 378 or Code = Carriage_Return 379 then 380 Code := Space; 381 end if; 382 383 Unchecked_Store 384 (Self.Character_Data.Value, 385 Self.Character_Data.Unused, 386 Code); 387 Self.Character_Data.Length := Self.Character_Data.Length + 1; 388 end loop; 389 end if; 390 end On_Attribute_Value_Character_Data; 391 392 ---------------------------------------- 393 -- On_Attribute_Value_Close_Delimiter -- 394 ---------------------------------------- 395 396 function On_Attribute_Value_Close_Delimiter 397 (Self : in out Simple_Reader'Class) return Boolean 398 is 399 -- NOTE: Attribute value delimiter can be ' or " and both are 400 -- represented as single UTF-16 code unit, thus expensive UTF-16 401 -- decoding can be avoided. 402 403 Delimiter : constant Matreshka.Internals.Unicode.Code_Point 404 := Code_Point 405 (Self.Scanner_State.Data.Value 406 (Self.Scanner_State.YY_Base_Position)); 407 408 begin 409 if Self.Scanner_State.Delimiter /= Delimiter then 410 Matreshka.Internals.Strings.Operations.Unterminated_Append 411 (Self.Character_Data, Delimiter); 412 413 return False; 414 415 else 416 if Self.Normalize_Value and then Self.Space_Before then 417 -- One space character is at the end of the prepared string, it 418 -- must be removed from the result. 419 420 Self.Character_Data.Length := Self.Character_Data.Length - 1; 421 Self.Character_Data.Unused := Self.Character_Data.Unused - 1; 422 end if; 423 424 String_Handler.Fill_Null_Terminator (Self.Character_Data); 425 Matreshka.Internals.Strings.Reference (Self.Character_Data); 426 Set_String_Internal 427 (Item => Self.YYLVal, 428 String => Self.Character_Data, 429 Is_Whitespace => False); 430 Reset_Whitespace_Matched (Self); 431 Pop_Start_Condition (Self); 432 433 return True; 434 end if; 435 end On_Attribute_Value_Close_Delimiter; 436 437 ------------------------------------------- 438 -- On_Attribute_Value_In_XML_Declaration -- 439 ------------------------------------------- 440 441 function On_Attribute_Value_In_XML_Declaration 442 (Self : in out Simple_Reader'Class) return Token is 443 begin 444 Set_String_Internal 445 (Item => Self.YYLVal, 446 String => YY_Text (Self, 1, 1), 447 Is_Whitespace => False); 448 Reset_Whitespace_Matched (Self); 449 450 return Token_String_Segment; 451 end On_Attribute_Value_In_XML_Declaration; 452 453 --------------------------------------- 454 -- On_Attribute_Value_Open_Delimiter -- 455 --------------------------------------- 456 457 function On_Attribute_Value_Open_Delimiter 458 (Self : in out Simple_Reader'Class; 459 State : Interfaces.Unsigned_32) return Boolean is 460 begin 461 if not Self.Whitespace_Matched then 462 -- XXX This is recoverable error. 463 464 Callbacks.Call_Fatal_Error 465 (Self, 466 League.Strings.To_Universal_String 467 ("whitespace required before default value")); 468 469 return False; 470 end if; 471 472 On_Attribute_Value_Open_Delimiter (Self, State); 473 474 return True; 475 end On_Attribute_Value_Open_Delimiter; 476 477 --------------------------------------- 478 -- On_Attribute_Value_Open_Delimiter -- 479 --------------------------------------- 480 481 procedure On_Attribute_Value_Open_Delimiter 482 (Self : in out Simple_Reader'Class; 483 State : Interfaces.Unsigned_32) is 484 begin 485 -- NOTE: Attribute value delimiter can be ' or " and both are 486 -- represented as single UTF-16 code unit, thus expensive UTF-16 487 -- decoding can be avoided. 488 489 Self.Scanner_State.Delimiter := 490 Code_Point 491 (Self.Scanner_State.Data.Value (Self.Scanner_State.YY_Base_Position)); 492 Matreshka.Internals.Strings.Operations.Reset (Self.Character_Data); 493 494 case Self.Version is 495 when XML_1_0 => 496 Push_And_Enter_Start_Condition 497 (Self, State, Tables.ATTRIBUTE_VALUE_10); 498 499 when XML_1_1 => 500 Push_And_Enter_Start_Condition 501 (Self, State, Tables.ATTRIBUTE_VALUE_11); 502 end case; 503 end On_Attribute_Value_Open_Delimiter; 504 505 -------------- 506 -- On_CDATA -- 507 -------------- 508 509 function On_CDATA (Self : in out Simple_Reader'Class) return Token is 510 begin 511 -- Segment of CDATA section (production [20]) optionally terminated by 512 -- end of CDATA section mark (production [21]). 513 514 if Self.Scanner_State.YY_Current_Position 515 - Self.Scanner_State.YY_Base_Position >= 3 516 and then (Code_Point 517 (Self.Scanner_State.Data.Value 518 (Self.Scanner_State.YY_Current_Position - 1)) 519 = Greater_Than_Sign 520 and Code_Point 521 (Self.Scanner_State.Data.Value 522 (Self.Scanner_State.YY_Current_Position - 2)) 523 = Right_Square_Bracket 524 and Code_Point 525 (Self.Scanner_State.Data.Value 526 (Self.Scanner_State.YY_Current_Position - 3)) 527 = Right_Square_Bracket) 528 then 529 -- Character data ends with ']]>', move backward before end of CDATA 530 -- section literal. End of CDATA section literal will be processed 531 -- on next cycle. 532 533 YY_Move_Backward (Self); 534 YY_Move_Backward (Self); 535 YY_Move_Backward (Self); 536 end if; 537 538 Matreshka.Internals.Strings.Operations.Copy_Slice 539 (Self.Character_Data, 540 Self.Scanner_State.Data, 541 Self.Scanner_State.YY_Base_Position, 542 Self.Scanner_State.YY_Current_Position 543 - Self.Scanner_State.YY_Base_Position, 544 Self.Scanner_State.YY_Current_Index 545 - Self.Scanner_State.YY_Base_Index); 546 547 Matreshka.Internals.Strings.Reference (Self.Character_Data); 548 Set_String_Internal 549 (Item => Self.YYLVal, 550 String => Self.Character_Data, 551 Is_Whitespace => False); 552 553 return Token_String_Segment; 554 end On_CDATA; 555 556 ----------------------- 557 -- On_Character_Data -- 558 ----------------------- 559 560 function On_Character_Data 561 (Self : in out Simple_Reader'Class) return Token 562 is 563 C : constant Code_Point 564 := Code_Point 565 (Self.Scanner_State.Data.Value 566 (Self.Scanner_State.YY_Current_Position - 1)); 567 568 begin 569 if Self.Element_Names.Is_Empty then 570 -- Document content not entered. 571 572 Callbacks.Call_Fatal_Error 573 (Self, 574 League.Strings.To_Universal_String 575 ("Text may not appear after the root element")); 576 Self.Error_Reported := True; 577 578 return Error; 579 end if; 580 581 if C = Less_Than_Sign or C = Ampersand then 582 -- Matched string end with '<' or '&' which is start character of 583 -- tag or reference accordingly. 584 585 YY_Move_Backward (Self); 586 587 elsif C = Greater_Than_Sign 588 and then Self.Scanner_State.YY_Current_Position 589 - Self.Scanner_State.YY_Base_Position >= 3 590 and then (Code_Point 591 (Self.Scanner_State.Data.Value 592 (Self.Scanner_State.YY_Current_Position - 2)) 593 = Right_Square_Bracket 594 and Code_Point 595 (Self.Scanner_State.Data.Value 596 (Self.Scanner_State.YY_Current_Position - 3)) 597 = Right_Square_Bracket) 598 then 599 -- Matched string ends with literal ']]>' which is invalid in 600 -- character data. 601 602 if Self.Scanner_State.YY_Current_Position 603 - Self.Scanner_State.YY_Base_Position = 3 604 then 605 -- Exactly ']]>' found. 606 607 Callbacks.Call_Fatal_Error 608 (Self, 609 League.Strings.To_Universal_String 610 ("[[14] CharData]" 611 & " Text may not contain a literal ']]>' sequence")); 612 Self.Error_Reported := True; 613 614 return Error; 615 616 else 617 -- String ends with ']]>', move backward to report character data 618 -- in this cycle and report error in next cycle. 619 620 YY_Move_Backward (Self); 621 YY_Move_Backward (Self); 622 YY_Move_Backward (Self); 623 end if; 624 end if; 625 626 Matreshka.Internals.Strings.Operations.Copy_Slice 627 (Self.Character_Data, 628 Self.Scanner_State.Data, 629 Self.Scanner_State.YY_Base_Position, 630 Self.Scanner_State.YY_Current_Position 631 - Self.Scanner_State.YY_Base_Position, 632 Self.Scanner_State.YY_Current_Index 633 - Self.Scanner_State.YY_Base_Index); 634 635 Matreshka.Internals.Strings.Reference (Self.Character_Data); 636 Set_String_Internal 637 (Item => Self.YYLVal, 638 String => Self.Character_Data, 639 Is_Whitespace => False); 640 641 return Token_String_Segment; 642 end On_Character_Data; 643 644 ---------------------------- 645 -- On_Character_Reference -- 646 ---------------------------- 647 648 function On_Character_Reference 649 (Self : in out Simple_Reader'Class; 650 Hex : Boolean) return Token 651 is 652 Code : Code_Point; 653 Valid : Boolean; 654 655 begin 656 Character_Reference_To_Code_Point (Self, Hex, Code, Valid); 657 658 if not Valid then 659 return Error; 660 end if; 661 662 -- XXX Whitespace must be detected and reported in token. 663 664 if not Matreshka.Internals.Strings.Can_Be_Reused 665 (Self.Character_Buffer, 2) 666 then 667 -- Preallocated buffer can't be reused for some reason (most 668 -- probably because application made copy of the previous character 669 -- reference), so new buffer need to be preallocated. Requested 670 -- size of the buffer is maximum number of UTF-16 code unit to 671 -- store one Unicode code point. 672 673 Matreshka.Internals.Strings.Dereference (Self.Character_Buffer); 674 Self.Character_Buffer := Matreshka.Internals.Strings.Allocate (2); 675 end if; 676 677 Self.Character_Buffer.Unused := 0; 678 Self.Character_Buffer.Length := 1; 679 Matreshka.Internals.Utf16.Unchecked_Store 680 (Self.Character_Buffer.Value, 681 Self.Character_Buffer.Unused, 682 Code); 683 Matreshka.Internals.Strings.Reference (Self.Character_Buffer); 684 Set_String_Internal 685 (Item => Self.YYLVal, 686 String => Self.Character_Buffer, 687 Is_Whitespace => False); 688 689 return Token_String_Segment; 690 end On_Character_Reference; 691 692 ----------------------------------------------- 693 -- On_Character_Reference_In_Attribute_Value -- 694 ----------------------------------------------- 695 696 function On_Character_Reference_In_Attribute_Value 697 (Self : in out Simple_Reader'Class; 698 Hex : Boolean) return Boolean 699 is 700 Code : Code_Point; 701 Valid : Boolean; 702 703 begin 704 Character_Reference_To_Code_Point (Self, Hex, Code, Valid); 705 706 if not Valid then 707 return False; 708 end if; 709 710 if Self.Normalize_Value then 711 if Code = Space then 712 if not Self.Space_Before then 713 Matreshka.Internals.Strings.Operations.Unterminated_Append 714 (Self.Character_Data, Code); 715 Self.Space_Before := True; 716 end if; 717 718 else 719 Matreshka.Internals.Strings.Operations.Unterminated_Append 720 (Self.Character_Data, Code); 721 Self.Space_Before := False; 722 end if; 723 724 else 725 Matreshka.Internals.Strings.Operations.Unterminated_Append 726 (Self.Character_Data, Code); 727 end if; 728 729 return True; 730 end On_Character_Reference_In_Attribute_Value; 731 732 ----------------------- 733 -- On_Close_Of_CDATA -- 734 ----------------------- 735 736 function On_Close_Of_CDATA 737 (Self : in out Simple_Reader'Class) return Token is 738 begin 739 Pop_Start_Condition (Self); 740 741 return Token_CData_Close; 742 end On_Close_Of_CDATA; 743 744 ------------------------------------- 745 -- On_Close_Of_Conditional_Section -- 746 ------------------------------------- 747 748 function On_Close_Of_Conditional_Section 749 (Self : in out Simple_Reader'Class) return Token is 750 begin 751 if Self.Conditional_Depth = 0 then 752 Callbacks.Call_Fatal_Error 753 (Self, 754 League.Strings.To_Universal_String 755 ("']]>' doesn't close conditional section")); 756 757 return Error; 758 end if; 759 760 Self.Conditional_Depth := Self.Conditional_Depth - 1; 761 762 if Self.Ignore_Depth /= 0 then 763 Self.Ignore_Depth := Self.Ignore_Depth - 1; 764 end if; 765 766 if Self.Ignore_Depth /= 0 then 767 case Self.Version is 768 when XML_1_0 => 769 Enter_Start_Condition (Self, Tables.CONDITIONAL_IGNORE_10); 770 771 when XML_1_1 => 772 Enter_Start_Condition (Self, Tables.CONDITIONAL_IGNORE_11); 773 end case; 774 775 else 776 case Self.Version is 777 when XML_1_0 => 778 Enter_Start_Condition (Self, Tables.DOCTYPE_INTSUBSET_10); 779 780 when XML_1_1 => 781 Enter_Start_Condition (Self, Tables.DOCTYPE_INTSUBSET_11); 782 end case; 783 end if; 784 785 return Token_Conditional_Close; 786 end On_Close_Of_Conditional_Section; 787 788 ----------------------------- 789 -- On_Close_Of_Declaration -- 790 ----------------------------- 791 792 function On_Close_Of_Declaration 793 (Self : in out Simple_Reader'Class) return Token is 794 begin 795 case Self.Version is 796 when XML_1_0 => 797 Enter_Start_Condition (Self, Tables.DOCTYPE_INTSUBSET_10); 798 799 when XML_1_1 => 800 Enter_Start_Condition (Self, Tables.DOCTYPE_INTSUBSET_11); 801 end case; 802 803 return Token_Close; 804 end On_Close_Of_Declaration; 805 806 ------------------------------------------- 807 -- On_Close_Of_Document_Type_Declaration -- 808 ------------------------------------------- 809 810 function On_Close_Of_Document_Type_Declaration 811 (Self : in out Simple_Reader'Class) return Boolean 812 is 813 Success : Boolean; 814 pragma Unreferenced (Success); 815 816 begin 817 if Self.External_Subset_Entity /= No_Entity 818 and Self.Validation.Load_DTD 819 and not Self.External_Subset_Done 820 then 821 -- External subset is declared, need to be loaded and not processed, 822 -- push it into the scanner stack to process before reporting of 823 -- close of document type declaration. 824 825 Self.External_Subset_Done := True; 826 YY_Move_Backward (Self); 827 828 Success := 829 Scanner.Push_Entity 830 (Self => Self, 831 Entity => Self.External_Subset_Entity, 832 In_Document_Type => True, 833 In_Literal => False); 834 835 -- XXX Error processing is not implemented. 836 837 return False; 838 839 else 840 case Self.Version is 841 when XML_1_0 => 842 Enter_Start_Condition (Self, Tables.DOCUMENT_10); 843 844 when XML_1_1 => 845 Enter_Start_Condition (Self, Tables.DOCUMENT_11); 846 end case; 847 848 return True; 849 end if; 850 end On_Close_Of_Document_Type_Declaration; 851 852 ----------------------------------- 853 -- On_Close_Of_Empty_Element_Tag -- 854 ----------------------------------- 855 856 function On_Close_Of_Empty_Element_Tag 857 (Self : in out Simple_Reader'Class) return Token is 858 begin 859 case Self.Version is 860 when XML_1_0 => 861 Enter_Start_Condition (Self, Tables.DOCUMENT_10); 862 863 when XML_1_1 => 864 if Is_Internal_General_Entity 865 (Self.Entities, Self.Scanner_State.Entity) 866 then 867 -- Character references are resolved when replacement text of 868 -- internal general entity is constructed. In XML 1.1 character 869 -- references can refer to restricted characters which is not 870 -- valid in text, but valid in replacement text. 871 872 Enter_Start_Condition (Self, Tables.DOCUMENT_U11); 873 874 else 875 Enter_Start_Condition (Self, Tables.DOCUMENT_11); 876 end if; 877 end case; 878 879 return Token_Empty_Close; 880 end On_Close_Of_Empty_Element_Tag; 881 882 ---------------------------------------- 883 -- On_Close_Of_Processing_Instruction -- 884 ---------------------------------------- 885 886 function On_Close_Of_Processing_Instruction 887 (Self : in out Simple_Reader'Class; 888 Is_Empty : Boolean) return Token is 889 begin 890 if Is_Empty then 891 Set_String_Internal 892 (Item => Self.YYLVal, 893 String => Matreshka.Internals.Strings.Shared_Empty'Access, 894 Is_Whitespace => False); 895 896 else 897 if not Self.Whitespace_Matched then 898 raise Program_Error 899 with "no whitespace before processing instruction data"; 900 -- XXX This is recoverable error. 901 end if; 902 903 Set_String_Internal 904 (Item => Self.YYLVal, 905 String => YY_Text (Self, 0, 2), 906 Is_Whitespace => False); 907 end if; 908 909 Pop_Start_Condition (Self); 910 911 return Token_Pi_Close; 912 end On_Close_Of_Processing_Instruction; 913 914 --------------------- 915 -- On_Close_Of_Tag -- 916 --------------------- 917 918 function On_Close_Of_Tag 919 (Self : in out Simple_Reader'Class) return Token is 920 begin 921 case Self.Version is 922 when XML_1_0 => 923 Enter_Start_Condition (Self, Tables.DOCUMENT_10); 924 925 when XML_1_1 => 926 if Is_Internal_General_Entity 927 (Self.Entities, Self.Scanner_State.Entity) 928 then 929 -- Character references are resolved when replacement text of 930 -- internal general entity is constructed. In XML 1.1 character 931 -- references can refer to restricted characters which is not 932 -- valid in text, but valid in replacement text. 933 934 Enter_Start_Condition (Self, Tables.DOCUMENT_U11); 935 936 else 937 Enter_Start_Condition (Self, Tables.DOCUMENT_11); 938 end if; 939 end case; 940 941 return Token_Close; 942 end On_Close_Of_Tag; 943 944 ----------------------------------------- 945 -- On_Close_Of_XML_Or_Text_Declaration -- 946 ----------------------------------------- 947 948 function On_Close_Of_XML_Or_Text_Declaration 949 (Self : in out Simple_Reader'Class) return Token is 950 begin 951 Set_String_Internal 952 (Item => Self.YYLVal, 953 String => Matreshka.Internals.Strings.Shared_Empty'Access, 954 Is_Whitespace => False); 955 956 if Self.Scanner_State.Entity /= No_Entity then 957 -- End of text declaration of the external entity is reached, 958 -- save current position to start from it next time entity is 959 -- referenced. 960 961 Set_First_Position 962 (Self.Entities, 963 Self.Scanner_State.Entity, 964 Self.Scanner_State.YY_Current_Position); 965 end if; 966 967 Pop_Start_Condition (Self); 968 969 return Token_Pi_Close; 970 end On_Close_Of_XML_Or_Text_Declaration; 971 972 ------------------------------------------------- 973 -- On_Close_Parenthesis_In_Content_Declaration -- 974 ------------------------------------------------- 975 976 function On_Close_Parenthesis_In_Content_Declaration 977 (Self : in out Simple_Reader'Class) return Token is 978 begin 979 -- Whitespace can't be present between close parenthesis and 980 -- multiplicity indicator if any, so reset whitespace matching flag. 981 982 Self.Whitespace_Matched := False; 983 984 return Token_Close_Parenthesis; 985 end On_Close_Parenthesis_In_Content_Declaration; 986 987 ------------------------------------------------ 988 -- On_Close_Parenthesis_In_Notation_Attribute -- 989 ------------------------------------------------ 990 991 function On_Close_Parenthesis_In_Notation_Attribute 992 (Self : in out Simple_Reader'Class) return Token is 993 begin 994 -- Resets whitespace matching flag. 995 996 Self.Whitespace_Matched := False; 997 998 return Token_Close_Parenthesis; 999 end On_Close_Parenthesis_In_Notation_Attribute; 1000 1001 -------------------------------------- 1002 -- On_Conditional_Section_Directive -- 1003 -------------------------------------- 1004 1005 procedure On_Conditional_Section_Directive 1006 (Self : in out Simple_Reader'Class; 1007 Include : Boolean) is 1008 begin 1009 -- XXX Syntax check must be added! 1010 1011 Self.Conditional_Directive := True; 1012 Self.Conditional_Depth := Self.Conditional_Depth + 1; 1013 1014 if Self.Ignore_Depth /= 0 or not Include then 1015 Self.Ignore_Depth := Self.Ignore_Depth + 1; 1016 end if; 1017 end On_Conditional_Section_Directive; 1018 1019 ---------------------------------------------- 1020 -- On_Content_Of_Ignore_Conditional_Section -- 1021 ---------------------------------------------- 1022 1023 procedure On_Content_Of_Ignore_Conditional_Section 1024 (Self : in out Simple_Reader'Class) is 1025 begin 1026 YY_Move_Backward (Self); 1027 YY_Move_Backward (Self); 1028 YY_Move_Backward (Self); 1029 end On_Content_Of_Ignore_Conditional_Section; 1030 1031 ---------------------------- 1032 -- On_Default_Declaration -- 1033 ---------------------------- 1034 1035 function On_Default_Declaration 1036 (Self : in out Simple_Reader'Class; 1037 State : Interfaces.Unsigned_32; 1038 Default_Token : Token) return Token is 1039 begin 1040 -- Checks ithat whitespace before attribute type keyword is detected 1041 -- and report error when check fail. 1042 1043 if not Self.Whitespace_Matched then 1044 -- XXX This is recoverable error. 1045 1046 Callbacks.Call_Fatal_Error 1047 (Self, 1048 League.Strings.To_Universal_String 1049 ("whitespace required before default declaration")); 1050 1051 return Error; 1052 end if; 1053 1054 Self.Whitespace_Matched := False; 1055 Enter_Start_Condition (Self, State); 1056 1057 return Default_Token; 1058 end On_Default_Declaration; 1059 1060 --------------------------------------------------- 1061 -- On_Element_Name_In_Attribute_List_Declaration -- 1062 --------------------------------------------------- 1063 1064 function On_Element_Name_In_Attribute_List_Declaration 1065 (Self : in out Simple_Reader'Class) return Token 1066 is 1067 Qname_Error : Boolean; 1068 1069 begin 1070 -- [XML [52]] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 1071 -- 1072 -- Checks whitespace before the element name is present. 1073 1074 if not Self.Whitespace_Matched then 1075 Callbacks.Call_Fatal_Error 1076 (Self, 1077 League.Strings.To_Universal_String 1078 ("[XML [52] AttlistDecl]" 1079 & " no whitespace before element's name")); 1080 1081 return Error; 1082 end if; 1083 1084 Self.Whitespace_Matched := False; 1085 1086 Resolve_Symbol 1087 (Self, 0, 0, False, True, False, Qname_Error, Self.YYLVal.Symbol); 1088 1089 if Qname_Error then 1090 return Error; 1091 1092 else 1093 Enter_Start_Condition (Self, Tables.ATTLIST_DECL); 1094 1095 return Token_Name; 1096 end if; 1097 end On_Element_Name_In_Attribute_List_Declaration; 1098 1099 ------------------------- 1100 -- On_Encoding_Keyword -- 1101 ------------------------- 1102 1103 function On_Encoding_Keyword 1104 (Self : in out Simple_Reader'Class) return Token is 1105 begin 1106 if not Self.Whitespace_Matched then 1107 Callbacks.Call_Fatal_Error 1108 (Self, 1109 League.Strings.To_Universal_String 1110 ("no whitespace before 'encoding'")); 1111 Self.Error_Reported := True; 1112 -- XXX This is recoverable error. 1113 1114 return Error; 1115 1116 else 1117 return Token_Encoding; 1118 end if; 1119 end On_Encoding_Keyword; 1120 1121 ------------------------------------- 1122 -- On_Entity_Value_Close_Delimiter -- 1123 ------------------------------------- 1124 1125 function On_Entity_Value_Close_Delimiter 1126 (Self : in out Simple_Reader'Class) return Token 1127 is 1128 -- NOTE: Entity value delimiter can be ' or " and both are 1129 -- represented as single UTF-16 code unit, thus expensive UTF-16 1130 -- decoding can be avoided. 1131 1132 Delimiter : constant Matreshka.Internals.Unicode.Code_Point 1133 := Code_Point 1134 (Self.Scanner_State.Data.Value 1135 (Self.Scanner_State.YY_Base_Position)); 1136 1137 begin 1138 if Self.Scanner_State.In_Literal 1139 or else Self.Scanner_State.Delimiter /= Delimiter 1140 then 1141 Set_String_Internal 1142 (Item => Self.YYLVal, 1143 String => YY_Text (Self), 1144 Is_Whitespace => False); 1145 1146 return Token_String_Segment; 1147 1148 else 1149 Enter_Start_Condition (Self, Tables.ENTITY_DEF); 1150 1151 return Token_Value_Close; 1152 end if; 1153 end On_Entity_Value_Close_Delimiter; 1154 1155 ------------------------------------ 1156 -- On_Entity_Value_Open_Delimiter -- 1157 ------------------------------------ 1158 1159 function On_Entity_Value_Open_Delimiter 1160 (Self : in out Simple_Reader'Class) return Token is 1161 begin 1162 -- NOTE: Entity value delimiter can be ' or " and both are 1163 -- represented as single UTF-16 code unit, thus expensive UTF-16 1164 -- decoding can be avoided. 1165 1166 Self.Scanner_State.Delimiter := 1167 Code_Point 1168 (Self.Scanner_State.Data.Value (Self.Scanner_State.YY_Base_Position)); 1169 1170 if not Self.Whitespace_Matched then 1171 Callbacks.Call_Fatal_Error 1172 (Self, 1173 League.Strings.To_Universal_String 1174 ("[[71] GEDecl, [72] PEDecl]" 1175 & " no whitespace before entity value")); 1176 1177 return Error; 1178 end if; 1179 1180 Self.Whitespace_Matched := False; 1181 1182 case Self.Version is 1183 when XML_1_0 => 1184 Enter_Start_Condition (Self, Tables.ENTITY_VALUE_10); 1185 1186 when XML_1_1 => 1187 Enter_Start_Condition (Self, Tables.ENTITY_VALUE_11); 1188 end case; 1189 1190 return Token_Value_Open; 1191 end On_Entity_Value_Open_Delimiter; 1192 1193 ---------------------------------------------------- 1194 -- On_General_Entity_Reference_In_Attribute_Value -- 1195 ---------------------------------------------------- 1196 1197 function On_General_Entity_Reference_In_Attribute_Value 1198 (Self : in out Simple_Reader'Class) return Boolean 1199 is 1200 Qualified_Name : Symbol_Identifier; 1201 Qname_Error : Boolean; 1202 Entity : Entity_Identifier; 1203 State : Scanner_State_Information; 1204 1205 begin 1206 Resolve_Symbol 1207 (Self, 1, 1, False, False, False, Qname_Error, Qualified_Name); 1208 1209 if Qname_Error then 1210 return False; 1211 end if; 1212 1213 Entity := General_Entity (Self.Symbols, Qualified_Name); 1214 1215 -- [XML1.1 4.1 WFC: Entity Declared] 1216 -- 1217 -- "In a document without any DTD, a document with only an internal 1218 -- DTD subset which contains no parameter entity references, or a 1219 -- document with "standalone='yes'", for an entity reference that 1220 -- does not occur within the external subset or a parameter entity, 1221 -- the Name given in the entity reference MUST match that in an 1222 -- entity declaration that does not occur within the external subset 1223 -- or a parameter entity, except that well-formed documents need not 1224 -- declare any of the following entities: amp, lt, gt, apos, quot. 1225 -- The declaration of a general entity MUST precede any reference 1226 -- to it which appears in a default value in an attribute-list 1227 -- declaration. 1228 -- 1229 -- Note that non-validating processors are not obligated to to read 1230 -- and process entity declarations occurring in parameter entities 1231 -- or in the external subset; for such documents, the rule that an 1232 -- entity must be declared is a well-formedness constraint only if 1233 -- standalone='yes'." 1234 -- 1235 -- Check whether entity is declared. 1236 -- 1237 -- XXX This is probably too strong check, need to be arranged with 1238 -- standalone documents and validation. 1239 1240 if Entity = No_Entity then 1241 Callbacks.Call_Fatal_Error 1242 (Self, 1243 League.Strings.To_Universal_String 1244 ("[XML1.1 4.1 WFC: Entity Declared]" 1245 & " general entity must be declared")); 1246 1247 return False; 1248 1249 elsif Enclosing_Entity (Self.Entities, Entity) = No_Entity then 1250 -- All predefined entities doesn't have enclosing entity. 1251 1252 null; 1253 1254 elsif Self.Is_Standalone 1255 and not Is_Parameter_Entity (Self.Entities, Self.Scanner_State.Entity) 1256 and not Is_External_Subset (Self.Entities, Self.Scanner_State.Entity) 1257 and not Is_Document_Entity 1258 (Self.Entities, Enclosing_Entity (Self.Entities, Entity)) 1259 then 1260 Callbacks.Call_Fatal_Error 1261 (Self, 1262 League.Strings.To_Universal_String 1263 ("[XML1.1 4.1 WFC: Entity Declared]" 1264 & " general entity must not be declared externally")); 1265 1266 return False; 1267 end if; 1268 1269 -- [XML1.1 4.1 WFC: Parsed Entity] 1270 -- 1271 -- "An entity reference MUST NOT contain the name of an unparsed 1272 -- entity. Unparsed entities may be referred to only in attribute 1273 -- values declared to be of type ENTITY or ENTITIES." 1274 -- 1275 -- Check whether referenced entity is not unparsed external general 1276 -- entity. XXX Attribute's value type must be checked also. 1277 1278 if Is_External_Unparsed_General_Entity (Self.Entities, Entity) then 1279 Callbacks.Call_Fatal_Error 1280 (Self, 1281 League.Strings.To_Universal_String 1282 ("[XML1.1 4.1 WFC: Parsed Entity]" 1283 & " an entity reference must not contain the name of an" 1284 & " unparsed entity")); 1285 1286 return False; 1287 end if; 1288 1289 -- [XML1.1 3.1 WFC: No External Entity References] 1290 -- 1291 -- "Attribute values MUST NOT contain direct or indirect entity 1292 -- references to external entities." 1293 -- 1294 -- Check whether referenced entity is not parsed external general 1295 -- entity. 1296 1297 if Is_External_Parsed_General_Entity (Self.Entities, Entity) then 1298 Callbacks.Call_Fatal_Error 1299 (Self, 1300 League.Strings.To_Universal_String 1301 ("[XML1.1 3.1 WFC: No External Entity References]" 1302 & " attribute value must not contain entity reference to" 1303 & " external entity")); 1304 1305 return False; 1306 end if; 1307 1308 -- [XML1.1 4.1 WFC: No Recursion] 1309 -- 1310 -- "A parsed entity MUST NOT contain a recursive reference to itself, 1311 -- either directly or indirectly." 1312 -- 1313 -- Check whether there is no replacement text of the same entity in the 1314 -- scanner stack. 1315 1316 if Self.Scanner_State.Entity = Entity then 1317 Callbacks.Call_Fatal_Error 1318 (Self, 1319 League.Strings.To_Universal_String 1320 ("[XML1.1 4.1 WFC: No Recursion]" 1321 & " parsed entity must not containt a direct recursive" 1322 & " reference to itself")); 1323 1324 return False; 1325 end if; 1326 1327 for J in 1 .. Integer (Self.Scanner_Stack.Length) loop 1328 State := Self.Scanner_Stack.Element (J); 1329 1330 if State.Entity = Entity then 1331 Callbacks.Call_Fatal_Error 1332 (Self, 1333 League.Strings.To_Universal_String 1334 ("[XML1.1 4.1 WFC: No Recursion]" 1335 & " parsed entity must not containt a indirect recursive" 1336 & " reference to itself")); 1337 1338 return False; 1339 end if; 1340 end loop; 1341 1342 return 1343 Push_Entity 1344 (Self => Self, 1345 Entity => Entity, 1346 In_Document_Type => False, 1347 In_Literal => True); 1348 end On_General_Entity_Reference_In_Attribute_Value; 1349 1350 ----------------------------------------------------- 1351 -- On_General_Entity_Reference_In_Document_Content -- 1352 ----------------------------------------------------- 1353 1354 function On_General_Entity_Reference_In_Document_Content 1355 (Self : in out Simple_Reader'Class) return Token 1356 is 1357 Qualified_Name : Symbol_Identifier; 1358 Qname_Error : Boolean; 1359 Entity : Entity_Identifier; 1360 State : Scanner_State_Information; 1361 Deep : Natural; 1362 1363 begin 1364 Resolve_Symbol 1365 (Self, 1, 1, False, False, False, Qname_Error, Qualified_Name); 1366 1367 if Qname_Error then 1368 return Error; 1369 end if; 1370 1371 -- [1] document ::= 1372 -- ( prolog element Misc* ) - ( Char* RestrictedChar Char* ) 1373 -- 1374 -- [39] element ::= EmptyElemTag | STag content ETag 1375 -- 1376 -- [43] content ::= 1377 -- CharData? 1378 -- ((element | Reference | CDSect | PI | Comment) CharData?)* 1379 -- 1380 -- Check that entity is referenced inside element content. 1381 1382 if Self.Element_Names.Is_Empty then 1383 Callbacks.Call_Fatal_Error 1384 (Self, 1385 League.Strings.To_Universal_String 1386 ("entity reference must be in content of element")); 1387 1388 return Error; 1389 end if; 1390 1391 Entity := General_Entity (Self.Symbols, Qualified_Name); 1392 1393 -- [XML1.1 4.1 WFC: Entity Declared] 1394 -- 1395 -- "In a document without any DTD, a document with only an internal 1396 -- DTD subset which contains no parameter entity references, or a 1397 -- document with "standalone='yes'", for an entity reference that 1398 -- does not occur within the external subset or a parameter entity, 1399 -- the Name given in the entity reference MUST match that in an 1400 -- entity declaration that does not occur within the external subset 1401 -- or a parameter entity, except that well-formed documents need not 1402 -- declare any of the following entities: amp, lt, gt, apos, quot. 1403 -- The declaration of a general entity MUST precede any reference 1404 -- to it which appears in a default value in an attribute-list 1405 -- declaration. 1406 -- 1407 -- Note that non-validating processors are not obligated to to read 1408 -- and process entity declarations occurring in parameter entities 1409 -- or in the external subset; for such documents, the rule that an 1410 -- entity must be declared is a well-formedness constraint only if 1411 -- standalone='yes'." 1412 -- 1413 -- Check whether entity is declared. 1414 -- 1415 -- XXX This is probably too strong check, need to be arranged with 1416 -- standalone documents and validation. 1417 1418 if Entity = No_Entity then 1419 Callbacks.Call_Fatal_Error 1420 (Self, 1421 League.Strings.To_Universal_String 1422 ("[XML1.1 4.1 WFC: Entity Declared]" 1423 & " general entity must be declared")); 1424 1425 return Error; 1426 1427 elsif Enclosing_Entity (Self.Entities, Entity) = No_Entity then 1428 -- All predefined entities doesn't have enclosing entity. 1429 1430 null; 1431 1432 elsif Self.Is_Standalone 1433 and not Is_Parameter_Entity (Self.Entities, Self.Scanner_State.Entity) 1434 and not Is_External_Subset (Self.Entities, Self.Scanner_State.Entity) 1435 and not Is_Document_Entity 1436 (Self.Entities, Enclosing_Entity (Self.Entities, Entity)) 1437 then 1438 Callbacks.Call_Fatal_Error 1439 (Self, 1440 League.Strings.To_Universal_String 1441 ("[XML1.1 4.1 WFC: Entity Declared]" 1442 & " general entity must not be declared externally")); 1443 1444 return Error; 1445 end if; 1446 1447 -- [XML1.1 4.1 WFC: Parsed Entity] 1448 -- 1449 -- "An entity reference MUST NOT contain the name of an unparsed 1450 -- entity. Unparsed entities may be referred to only in attribute 1451 -- values declared to be of type ENTITY or ENTITIES." 1452 -- 1453 -- Check whether referenced entity is not unparsed external general 1454 -- entity. 1455 1456 if Is_External_Unparsed_General_Entity (Self.Entities, Entity) then 1457 Callbacks.Call_Fatal_Error 1458 (Self, 1459 League.Strings.To_Universal_String 1460 ("[XML1.1 4.1 WFC: Parsed Entity]" 1461 & " an entity reference must not contain the name of an" 1462 & " unparsed entity")); 1463 1464 return Error; 1465 end if; 1466 1467 -- [XML1.1 4.1 WFC: No Recursion] 1468 -- 1469 -- "A parsed entity MUST NOT contain a recursive reference to itself, 1470 -- either directly or indirectly." 1471 -- 1472 -- Check whether there is no replacement text of the same entity in the 1473 -- scanner stack. 1474 1475 if Self.Scanner_State.Entity = Entity then 1476 Callbacks.Call_Fatal_Error 1477 (Self, 1478 League.Strings.To_Universal_String 1479 ("[XML1.1 4.1 WFC: No Recursion]" 1480 & " parsed entity must not containt a direct recursive" 1481 & " reference to itself")); 1482 1483 return Error; 1484 end if; 1485 1486 for J in 1 .. Integer (Self.Scanner_Stack.Length) loop 1487 State := Self.Scanner_Stack.Element (J); 1488 1489 if State.Entity = Entity then 1490 Callbacks.Call_Fatal_Error 1491 (Self, 1492 League.Strings.To_Universal_String 1493 ("[XML1.1 4.1 WFC: No Recursion]" 1494 & " parsed entity must not containt a indirect recursive" 1495 & " reference to itself")); 1496 1497 return Error; 1498 end if; 1499 end loop; 1500 1501 Deep := Integer (Self.Scanner_Stack.Length); 1502 1503 if not Push_Entity 1504 (Self => Self, 1505 Entity => Entity, 1506 In_Document_Type => False, 1507 In_Literal => False) 1508 then 1509 return Error; 1510 1511 elsif Deep = Integer (Self.Scanner_Stack.Length) then 1512 -- Entity doesn't pushed in stack because its replacement text 1513 -- is empty. 1514 1515 return End_Of_Input; 1516 1517 else 1518 Self.Scanner_State.Start_Issued := True; 1519 1520 return Token_Entity_Start; 1521 end if; 1522 end On_General_Entity_Reference_In_Document_Content; 1523 1524 ------------------------------------------------- 1525 -- On_General_Entity_Reference_In_Entity_Value -- 1526 ------------------------------------------------- 1527 1528 function On_General_Entity_Reference_In_Entity_Value 1529 (Self : in out Simple_Reader'Class) return Token 1530 is 1531 Qualified_Name : Symbol_Identifier; 1532 Qname_Error : Boolean; 1533 1534 begin 1535 Resolve_Symbol 1536 (Self, 1, 1, False, False, False, Qname_Error, Qualified_Name); 1537 1538 if Qname_Error then 1539 return Error; 1540 1541 else 1542 Set_String_Internal 1543 (Item => Self.YYLVal, 1544 String => YY_Text (Self), 1545 Is_Whitespace => False); 1546 1547 return Token_String_Segment; 1548 end if; 1549 end On_General_Entity_Reference_In_Entity_Value; 1550 1551 ------------------------------------------ 1552 -- On_Less_Than_Sign_In_Attribute_Value -- 1553 ------------------------------------------ 1554 1555 function On_Less_Than_Sign_In_Attribute_Value 1556 (Self : in out Simple_Reader'Class) return Token is 1557 begin 1558 -- [3.1 WFC: No < in Attribute Values] 1559 -- 1560 -- "The replacement text of any entity referred to directly or 1561 -- indirectly in an attribute value MUST NOT contain a <." 1562 1563 Callbacks.Call_Fatal_Error 1564 (Self, 1565 League.Strings.To_Universal_String 1566 ("[3.1 WFC: No < in Attribute Values]" 1567 & " '<' can't be used in attribute value")); 1568 Self.Error_Reported := True; 1569 1570 return Error; 1571 end On_Less_Than_Sign_In_Attribute_Value; 1572 1573 ---------------------------------------------------- 1574 -- On_Name_In_Attribute_List_Declaration_Notation -- 1575 ---------------------------------------------------- 1576 1577 function On_Name_In_Attribute_List_Declaration_Notation 1578 (Self : in out Simple_Reader'Class) return Token 1579 is 1580 Qname_Error : Boolean; 1581 1582 begin 1583 -- [XMLNS 7] 1584 -- 1585 -- "It follows that in a namespace-well-formed document: 1586 -- 1587 -- - All element and attribute names contain either zero or one colon; 1588 -- 1589 -- - No entity names, processing instruction targets, or notation 1590 -- names contain any colons." 1591 -- 1592 -- This code is used to handle names in both NOTATION and enumeration 1593 -- attribute declarations, thus it must distinguish colon handling. 1594 1595 Resolve_Symbol 1596 (Self, 1597 0, 1598 0, 1599 False, 1600 True, 1601 not Self.Notation_Attribute, 1602 Qname_Error, 1603 Self.YYLVal.Symbol); 1604 1605 if Qname_Error then 1606 return Error; 1607 1608 else 1609 return Token_Name; 1610 end if; 1611 end On_Name_In_Attribute_List_Declaration_Notation; 1612 1613 ------------------------------------ 1614 -- On_Name_In_Element_Declaration -- 1615 ------------------------------------ 1616 1617 function On_Name_In_Element_Declaration 1618 (Self : in out Simple_Reader'Class) return Token 1619 is 1620 Qname_Error : Boolean; 1621 1622 begin 1623 -- Production [45] requires whitespace after name and before content 1624 -- specification, so whitespace indicator is reset here. 1625 1626 Self.Whitespace_Matched := False; 1627 1628 Resolve_Symbol 1629 (Self, 0, 0, False, True, False, Qname_Error, Self.YYLVal.Symbol); 1630 1631 if Qname_Error then 1632 return Error; 1633 1634 else 1635 Enter_Start_Condition (Self, Tables.ELEMENT_DECL); 1636 1637 return Token_Name; 1638 end if; 1639 end On_Name_In_Element_Declaration; 1640 1641 --------------------------------------------- 1642 -- On_Name_In_Element_Declaration_Children -- 1643 --------------------------------------------- 1644 1645 function On_Name_In_Element_Declaration_Children 1646 (Self : in out Simple_Reader'Class) return Token 1647 is 1648 Qname_Error : Boolean; 1649 1650 begin 1651 -- Production [48] checks that no whitespace separates Name from 1652 -- following multiplicity indicator, so whitespace indicator must be 1653 -- reset here. 1654 1655 Self.Whitespace_Matched := False; 1656 1657 Resolve_Symbol 1658 (Self, 0, 0, False, True, False, Qname_Error, Self.YYLVal.Symbol); 1659 1660 if Qname_Error then 1661 return Error; 1662 1663 else 1664 return Token_Name; 1665 end if; 1666 end On_Name_In_Element_Declaration_Children; 1667 1668 ---------------------------------- 1669 -- On_Name_In_Element_Start_Tag -- 1670 ---------------------------------- 1671 1672 function On_Name_In_Element_Start_Tag 1673 (Self : in out Simple_Reader'Class) return Token 1674 is 1675 Qname_Error : Boolean; 1676 1677 begin 1678 if not Self.Whitespace_Matched then 1679 Callbacks.Call_Fatal_Error 1680 (Self, 1681 League.Strings.To_Universal_String 1682 ("whitespace is missing before attribute name")); 1683 -- XXX It is recoverable error. 1684 1685 return Error; 1686 end if; 1687 1688 Resolve_Symbol 1689 (Self, 0, 0, False, True, False, Qname_Error, Self.YYLVal.Symbol); 1690 1691 if Qname_Error then 1692 return Error; 1693 1694 else 1695 return Token_Name; 1696 end if; 1697 end On_Name_In_Element_Start_Tag; 1698 1699 ----------------------------------- 1700 -- On_Name_In_Entity_Declaration -- 1701 ----------------------------------- 1702 1703 function On_Name_In_Entity_Declaration 1704 (Self : in out Simple_Reader'Class) return Token 1705 is 1706 Qname_Error : Boolean; 1707 1708 begin 1709 -- [XML1.1 4.2] 1710 -- 1711 -- [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 1712 -- [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 1713 -- 1714 -- Check whether whitespace is present before the name. 1715 1716 if not Self.Whitespace_Matched then 1717 Callbacks.Call_Fatal_Error 1718 (Self, 1719 League.Strings.To_Universal_String 1720 ("[XML1.1 4.2 productions [71], [72]]" 1721 & " whitespace must be present before the name")); 1722 -- XXX This is recoverable error. 1723 1724 return Error; 1725 end if; 1726 1727 Resolve_Symbol 1728 (Self, 0, 0, False, False, False, Qname_Error, Self.YYLVal.Symbol); 1729 1730 if Qname_Error then 1731 return Error; 1732 1733 else 1734 Self.Whitespace_Matched := False; 1735 Enter_Start_Condition (Self, Tables.ENTITY_DEF); 1736 1737 return Token_Name; 1738 end if; 1739 end On_Name_In_Entity_Declaration; 1740 1741 -------------------------------------------- 1742 -- On_Name_In_Entity_Declaration_Notation -- 1743 -------------------------------------------- 1744 1745 function On_Name_In_Entity_Declaration_Notation 1746 (Self : in out Simple_Reader'Class) return Token 1747 is 1748 Qname_Error : Boolean; 1749 1750 begin 1751 -- [XML1.1 4.2.2] 1752 -- 1753 -- [76] NDataDecl ::= S 'NDATA' S Name 1754 -- 1755 -- Check whether whitespace is present before the name. 1756 1757 if not Self.Whitespace_Matched then 1758 Callbacks.Call_Fatal_Error 1759 (Self, 1760 League.Strings.To_Universal_String 1761 ("[XML1.1 4.2 production [76]]" 1762 & " whitespace must be present before the name of notation")); 1763 -- XXX This is recoverable error. 1764 1765 return Error; 1766 end if; 1767 1768 Resolve_Symbol 1769 (Self, 0, 0, False, False, False, Qname_Error, Self.YYLVal.Symbol); 1770 1771 if Qname_Error then 1772 return Error; 1773 1774 else 1775 Enter_Start_Condition (Self, Tables.ENTITY_DEF); 1776 1777 return Token_Name; 1778 end if; 1779 end On_Name_In_Entity_Declaration_Notation; 1780 1781 -------------- 1782 -- On_NDATA -- 1783 -------------- 1784 1785 function On_NDATA (Self : in out Simple_Reader'Class) return Token is 1786 begin 1787 if not Self.Whitespace_Matched then 1788 -- XXX This is recoverable error. 1789 1790 Callbacks.Call_Fatal_Error 1791 (Self, 1792 League.Strings.To_Universal_String 1793 ("whitespace required before NDATA")); 1794 Self.Error_Reported := True; 1795 1796 return Error; 1797 1798 else 1799 Self.Whitespace_Matched := False; 1800 Enter_Start_Condition (Self, Tables.ENTITY_NDATA); 1801 1802 return Token_Ndata; 1803 end if; 1804 end On_NDATA; 1805 1806 --------------------------- 1807 -- On_No_XML_Declaration -- 1808 --------------------------- 1809 1810 procedure On_No_XML_Declaration (Self : in out Simple_Reader'Class) is 1811 begin 1812 -- Move scanner's position back to the start of the document or external 1813 -- parsed entity. Entity's XML version and encoding are set up 1814 -- automatically. 1815 1816 YY_Move_Backward (Self); 1817 Pop_Start_Condition (Self); 1818 end On_No_XML_Declaration; 1819 1820 ------------------------------------------- 1821 -- On_Open_Of_Attribute_List_Declaration -- 1822 ------------------------------------------- 1823 1824 function On_Open_Of_Attribute_List_Declaration 1825 (Self : in out Simple_Reader'Class) return Token is 1826 begin 1827 Enter_Start_Condition (Self, Tables.ATTLIST_NAME); 1828 Self.Whitespace_Matched := False; 1829 1830 return Token_Attlist_Decl_Open; 1831 end On_Open_Of_Attribute_List_Declaration; 1832 1833 ---------------------- 1834 -- On_Open_Of_CDATA -- 1835 ---------------------- 1836 1837 function On_Open_Of_CDATA 1838 (Self : in out Simple_Reader'Class) return Token 1839 is 1840 Condition : Interfaces.Unsigned_32; 1841 1842 begin 1843 case Start_Condition (Self) is 1844 when Tables.DOCUMENT_10 => 1845 Condition := Tables.CDATA_10; 1846 1847 when Tables.DOCUMENT_11 => 1848 Condition := Tables.CDATA_11; 1849 1850 when Tables.DOCUMENT_U11 => 1851 Condition := Tables.CDATA_U11; 1852 1853 when others => 1854 raise Program_Error; 1855 end case; 1856 1857 Push_Current_And_Enter_Start_Condition (Self, Condition); 1858 1859 return Token_CData_Open; 1860 end On_Open_Of_CDATA; 1861 1862 ------------------------------------ 1863 -- On_Open_Of_Conditional_Section -- 1864 ------------------------------------ 1865 1866 function On_Open_Of_Conditional_Section 1867 (Self : in out Simple_Reader'Class) return Token is 1868 begin 1869 -- [XML [28b], [31]] Conditional section can be present only in external 1870 -- subset of DTD. 1871 1872 if Is_Document_Entity (Self.Entities, Self.Scanner_State.Entity) then 1873 Callbacks.Call_Fatal_Error 1874 (Self, 1875 League.Strings.To_Universal_String 1876 ("[XML [28b], [31]]" 1877 & " conditional sections may only appear in the external" 1878 & " DTD subset")); 1879 1880 return Error; 1881 end if; 1882 1883 if Self.Ignore_Depth = 0 then 1884 Enter_Start_Condition (Self, Tables.CONDITIONAL_DIRECTIVE); 1885 Self.Conditional_Directive := False; 1886 1887 else 1888 Self.Conditional_Depth := Self.Conditional_Depth + 1; 1889 Self.Ignore_Depth := Self.Ignore_Depth + 1; 1890 Self.Conditional_Directive := True; 1891 end if; 1892 1893 return Token_Conditional_Open; 1894 end On_Open_Of_Conditional_Section; 1895 1896 -------------------------------------------- 1897 -- On_Open_Of_Conditional_Section_Content -- 1898 -------------------------------------------- 1899 1900 function On_Open_Of_Conditional_Section_Content 1901 (Self : in out Simple_Reader'Class) return Boolean is 1902 begin 1903 -- XXX Syntax rules must be checked! 1904 1905 if not Self.Conditional_Directive then 1906 Callbacks.Call_Fatal_Error 1907 (Self, 1908 League.Strings.To_Universal_String 1909 ("conditional directive is missing")); 1910 1911 return False; 1912 end if; 1913 1914 if Self.Ignore_Depth /= 0 then 1915 case Self.Version is 1916 when XML_1_0 => 1917 Enter_Start_Condition (Self, Tables.CONDITIONAL_IGNORE_10); 1918 1919 when XML_1_1 => 1920 Enter_Start_Condition (Self, Tables.CONDITIONAL_IGNORE_11); 1921 end case; 1922 1923 else 1924 case Self.Version is 1925 when XML_1_0 => 1926 Enter_Start_Condition (Self, Tables.DOCTYPE_INTSUBSET_10); 1927 1928 when XML_1_1 => 1929 Enter_Start_Condition (Self, Tables.DOCTYPE_INTSUBSET_11); 1930 end case; 1931 end if; 1932 1933 return True; 1934 end On_Open_Of_Conditional_Section_Content; 1935 1936 ------------------------------------------ 1937 -- On_Open_Of_Document_Type_Declaration -- 1938 ------------------------------------------ 1939 1940 function On_Open_Of_Document_Type_Declaration 1941 (Self : in out Simple_Reader'Class) return Token 1942 is 1943 Qname_Error : Boolean; 1944 1945 begin 1946 Resolve_Symbol 1947 (Self, 10, 0, True, True, False, Qname_Error, Self.YYLVal.Symbol); 1948 1949 if Qname_Error then 1950 return Error; 1951 1952 else 1953 Enter_Start_Condition (Self, Tables.DOCTYPE_EXTINT); 1954 1955 return Token_Doctype_Decl_Open; 1956 end if; 1957 end On_Open_Of_Document_Type_Declaration; 1958 1959 ------------------------------------ 1960 -- On_Open_Of_Element_Declaration -- 1961 ------------------------------------ 1962 1963 function On_Open_Of_Element_Declaration 1964 (Self : in out Simple_Reader'Class) return Token is 1965 begin 1966 Enter_Start_Condition (Self, Tables.ELEMENT_NAME); 1967 1968 return Token_Element_Decl_Open; 1969 end On_Open_Of_Element_Declaration; 1970 1971 ------------------------ 1972 -- On_Open_Of_End_Tag -- 1973 ------------------------ 1974 1975 function On_Open_Of_End_Tag 1976 (Self : in out Simple_Reader'Class) return Token 1977 is 1978 Qname_Error : Boolean; 1979 1980 begin 1981 Resolve_Symbol 1982 (Self, 2, 0, False, True, False, Qname_Error, Self.YYLVal.Symbol); 1983 1984 if Qname_Error then 1985 return Error; 1986 1987 else 1988 Enter_Start_Condition (Self, Tables.ELEMENT_START); 1989 1990 return Token_End_Open; 1991 end if; 1992 end On_Open_Of_End_Tag; 1993 1994 -------------------------------- 1995 -- On_Open_Of_Internal_Subset -- 1996 -------------------------------- 1997 1998 function On_Open_Of_Internal_Subset 1999 (Self : in out Simple_Reader'Class) return Token is 2000 begin 2001 case Self.Version is 2002 when XML_1_0 => 2003 Enter_Start_Condition (Self, Tables.DOCTYPE_INTSUBSET_10); 2004 2005 when XML_1_1 => 2006 Enter_Start_Condition (Self, Tables.DOCTYPE_INTSUBSET_11); 2007 end case; 2008 2009 return Token_Internal_Subset_Open; 2010 end On_Open_Of_Internal_Subset; 2011 2012 ------------------------------------- 2013 -- On_Open_Of_Notation_Declaration -- 2014 ------------------------------------- 2015 2016 function On_Open_Of_Notation_Declaration 2017 (Self : in out Simple_Reader'Class) return Token 2018 is 2019 Qname_Error : Boolean; 2020 2021 begin 2022 Resolve_Symbol 2023 (Self, 10, 0, True, False, False, Qname_Error, Self.YYLVal.Symbol); 2024 2025 if Qname_Error then 2026 return Error; 2027 2028 else 2029 Push_Current_And_Enter_Start_Condition (Self, Tables.NOTATION_DECL); 2030 2031 return Token_Notation_Decl_Open; 2032 end if; 2033 end On_Open_Of_Notation_Declaration; 2034 2035 --------------------------------------- 2036 -- On_Open_Of_Processing_Instruction -- 2037 --------------------------------------- 2038 2039 function On_Open_Of_Processing_Instruction 2040 (Self : in out Simple_Reader'Class) return Token 2041 is 2042 Qname_Error : Boolean; 2043 2044 begin 2045 Resolve_Symbol 2046 (Self, 2, 0, False, False, False, Qname_Error, Self.YYLVal.Symbol); 2047 2048 if Qname_Error then 2049 return Error; 2050 2051 else 2052 Push_And_Enter_Start_Condition 2053 (Self, Start_Condition (Self), Tables.PI); 2054 Reset_Whitespace_Matched (Self); 2055 2056 return Token_Pi_Open; 2057 end if; 2058 end On_Open_Of_Processing_Instruction; 2059 2060 -------------------------- 2061 -- On_Open_Of_Start_Tag -- 2062 -------------------------- 2063 2064 function On_Open_Of_Start_Tag 2065 (Self : in out Simple_Reader'Class) return Token 2066 is 2067 Qname_Error : Boolean; 2068 2069 begin 2070 Resolve_Symbol 2071 (Self, 1, 0, False, True, False, Qname_Error, Self.YYLVal.Symbol); 2072 2073 if Qname_Error then 2074 return Error; 2075 2076 else 2077 Enter_Start_Condition (Self, Tables.ELEMENT_START); 2078 2079 return Token_Element_Open; 2080 end if; 2081 end On_Open_Of_Start_Tag; 2082 2083 ---------------------------------------- 2084 -- On_Open_Of_XML_Or_Text_Declaration -- 2085 ---------------------------------------- 2086 2087 function On_Open_Of_XML_Or_Text_Declaration 2088 (Self : in out Simple_Reader'Class) return Token is 2089 begin 2090 Self.Whitespace_Matched := False; 2091 2092 Push_And_Enter_Start_Condition 2093 (Self, Start_Condition (Self), Tables.XML_DECL); 2094 2095 return Token_Xml_Decl_Open; 2096 end On_Open_Of_XML_Or_Text_Declaration; 2097 2098 ------------------------------------------------ 2099 -- On_Open_Parenthesis_In_Content_Declaration -- 2100 ------------------------------------------------ 2101 2102 function On_Open_Parenthesis_In_Content_Declaration 2103 (Self : in out Simple_Reader'Class) return Token 2104 is 2105 use type Interfaces.Unsigned_32; 2106 2107 begin 2108 if Start_Condition (Self) = Tables.ELEMENT_DECL then 2109 -- Check whitespace from rule [45] elementdecl. This subprogram 2110 -- changes scanner's start condition, so handing of nested 2111 -- declarations skip check below. 2112 2113 if not Self.Whitespace_Matched then 2114 Callbacks.Call_Fatal_Error 2115 (Self, 2116 League.Strings.To_Universal_String 2117 ("[XML [45]] no whitespace after name")); 2118 2119 return Error; 2120 end if; 2121 2122 Enter_Start_Condition (Self, Tables.ELEMENT_CHILDREN); 2123 end if; 2124 2125 return Token_Open_Parenthesis; 2126 end On_Open_Parenthesis_In_Content_Declaration; 2127 2128 ----------------------------------------------- 2129 -- On_Open_Parenthesis_In_Notation_Attribute -- 2130 ----------------------------------------------- 2131 2132 function On_Open_Parenthesis_In_Notation_Attribute 2133 (Self : in out Simple_Reader'Class) return Token is 2134 begin 2135 -- Checks ithat whitespace before open parenthesis is detected 2136 -- and report error when check fail. 2137 2138 if not Self.Whitespace_Matched then 2139 -- XXX This is recoverable error. 2140 2141 Callbacks.Call_Fatal_Error 2142 (Self, 2143 League.Strings.To_Universal_String 2144 ("whitespace required before open parenthesis")); 2145 2146 return Error; 2147 end if; 2148 2149 return Token_Open_Parenthesis; 2150 end On_Open_Parenthesis_In_Notation_Attribute; 2151 2152 ----------------------------------------------------------- 2153 -- On_Parameter_Entity_Reference_In_Document_Declaration -- 2154 ----------------------------------------------------------- 2155 2156 function On_Parameter_Entity_Reference_In_Document_Declaration 2157 (Self : in out Simple_Reader'Class) return Token 2158 is 2159 Qualified_Name : Symbol_Identifier; 2160 Qname_Error : Boolean; 2161 Entity : Entity_Identifier; 2162 Deep : Natural; 2163 2164 begin 2165 Resolve_Symbol 2166 (Self, 1, 1, False, False, False, Qname_Error, Qualified_Name); 2167 2168 if Qname_Error then 2169 return Error; 2170 2171 end if; 2172 2173 Entity := Parameter_Entity (Self.Symbols, Qualified_Name); 2174 2175 if Entity = No_Entity then 2176 Callbacks.Call_Fatal_Error 2177 (Self, 2178 League.Strings.To_Universal_String 2179 ("parameter entity must be declared")); 2180 2181 return Error; 2182 end if; 2183 2184 Deep := Integer (Self.Scanner_Stack.Length); 2185 2186 if not Push_Entity 2187 (Self => Self, 2188 Entity => Entity, 2189 In_Document_Type => False, 2190 In_Literal => False) 2191 then 2192 return Error; 2193 2194 elsif Deep = Integer (Self.Scanner_Stack.Length) then 2195 -- Entity doesn't pushed in stack because its replacement text 2196 -- is empty. 2197 2198 return End_Of_Input; 2199 2200 else 2201 Self.Scanner_State.Start_Issued := True; 2202 2203 return Token_Entity_Start; 2204 end if; 2205 end On_Parameter_Entity_Reference_In_Document_Declaration; 2206 2207 --------------------------------------------------- 2208 -- On_Parameter_Entity_Reference_In_Entity_Value -- 2209 --------------------------------------------------- 2210 2211 function On_Parameter_Entity_Reference_In_Entity_Value 2212 (Self : in out Simple_Reader'Class) return Boolean 2213 is 2214 Qualified_Name : Symbol_Identifier; 2215 Qname_Error : Boolean; 2216 Entity : Entity_Identifier; 2217 2218 begin 2219 Resolve_Symbol 2220 (Self, 1, 1, False, False, False, Qname_Error, Qualified_Name); 2221 2222 if Qname_Error then 2223 return False; 2224 2225 else 2226 Entity := Parameter_Entity (Self.Symbols, Qualified_Name); 2227 2228 -- XML WFC: PEs in Internal Subset 2229 -- 2230 -- "In the internal DTD subset, parameter-entity references MUST NOT 2231 -- occur within markup declarations; they may occur where markup 2232 -- declarations can occur. (This does not apply to references that 2233 -- occur in external parameter entities or to the external subset.)" 2234 -- 2235 -- Check whether parameter entity reference doesn't occure in the 2236 -- entity value of the entity declared in internal subset. 2237 2238 if Is_Document_Entity (Self.Entities, Self.Scanner_State.Entity) then 2239 Callbacks.Call_Fatal_Error 2240 (Self, 2241 League.Strings.To_Universal_String 2242 ("[XML 2.8 WFC: PEs in Internal Subset]" 2243 & " parameter-entity reference in internal subset must not" 2244 & " occur within markup declaration")); 2245 2246 return False; 2247 end if; 2248 2249 -- XML VC: Entity Declared 2250 -- 2251 -- "In a document with an external subset or parameter entity 2252 -- references with "standalone='no'", the Name given in the entity 2253 -- reference MUST match that in an entity declaration. For 2254 -- interoperability, valid documents SHOULD declare the entities amp, 2255 -- lt, gt, apos, quot, in the form specified in 4.6 Predefined 2256 -- Entities. The declaration of a parameter entity MUST precede any 2257 -- reference to it. Similarly, the declaration of a general entity 2258 -- MUST precede any attribute-list declaration containing a default 2259 -- value with a direct or indirect reference to that general entity." 2260 -- 2261 -- XXX Parameter entity must not be declared at the point of 2262 -- reference, except in some conditions in validating mode; so, 2263 -- check below must be improved, as well as behavior in 2264 -- non-validating mode must be checked. 2265 2266 if Entity = No_Entity then 2267 Callbacks.Call_Fatal_Error 2268 (Self, 2269 League.Strings.To_Universal_String 2270 ("parameter entity must be declared")); 2271 2272 return False; 2273 end if; 2274 2275 return 2276 Push_Entity 2277 (Self => Self, 2278 Entity => Entity, 2279 In_Document_Type => False, 2280 In_Literal => True); 2281 end if; 2282 end On_Parameter_Entity_Reference_In_Entity_Value; 2283 2284 --------------------------------------------------------- 2285 -- On_Parameter_Entity_Reference_In_Markup_Declaration -- 2286 --------------------------------------------------------- 2287 2288 function On_Parameter_Entity_Reference_In_Markup_Declaration 2289 (Self : in out Simple_Reader'Class) return Boolean 2290 is 2291 Qualified_Name : Symbol_Identifier; 2292 Qname_Error : Boolean; 2293 Entity : Entity_Identifier; 2294 2295 begin 2296 Resolve_Symbol 2297 (Self, 1, 1, False, False, False, Qname_Error, Qualified_Name); 2298 2299 if Qname_Error then 2300 return False; 2301 2302 else 2303 -- [XML 2.8] WFC: PEs in Internal Subset 2304 -- 2305 -- "In the internal DTD subset, parameter-entity references MUST NOT 2306 -- occur within markup declarations; they may occur where markup 2307 -- declarations can occur. (This does not apply to references that 2308 -- occur in external parameter entities or to the external subset.)" 2309 -- 2310 -- Check whether external subset is processed. 2311 2312 if not Self.External_Subset_Done then 2313 Callbacks.Call_Fatal_Error 2314 (Self, 2315 League.Strings.To_Universal_String 2316 ("[XML 2.8 WFC: PEs in Internal Subset]" 2317 & " parameter-entity reference in internal subset must not" 2318 & " occur within markup declaration")); 2319 2320 return False; 2321 end if; 2322 2323 Entity := Parameter_Entity (Self.Symbols, Qualified_Name); 2324 2325 if Entity = No_Entity then 2326 Callbacks.Call_Fatal_Error 2327 (Self, 2328 League.Strings.To_Universal_String 2329 ("parameter entity must be declared")); 2330 2331 return False; 2332 end if; 2333 2334 return 2335 Push_Entity 2336 (Self => Self, 2337 Entity => Entity, 2338 In_Document_Type => False, 2339 In_Literal => True); 2340 end if; 2341 end On_Parameter_Entity_Reference_In_Markup_Declaration; 2342 2343 --------------------- 2344 -- On_Percent_Sign -- 2345 --------------------- 2346 2347 function On_Percent_Sign 2348 (Self : in out Simple_Reader'Class) return Token is 2349 begin 2350 if not Self.Whitespace_Matched then 2351 Callbacks.Call_Fatal_Error 2352 (Self, 2353 League.Strings.To_Universal_String 2354 ("no whitespace before percent")); 2355 Self.Error_Reported := True; 2356 -- XXX This is recoverable error. 2357 2358 return Error; 2359 2360 else 2361 Self.Whitespace_Matched := False; 2362 2363 return Token_Percent; 2364 end if; 2365 end On_Percent_Sign; 2366 2367 ------------------------------------ 2368 -- On_Plus_In_Content_Declaration -- 2369 ------------------------------------ 2370 2371 function On_Plus_In_Content_Declaration 2372 (Self : in out Simple_Reader'Class) return Token is 2373 begin 2374 if Self.Whitespace_Matched then 2375 Callbacks.Call_Fatal_Error 2376 (Self, 2377 League.Strings.To_Universal_String 2378 ("[XML [47], [48]] illegal whitespace before plus")); 2379 2380 return Error; 2381 2382 else 2383 return Token_Plus; 2384 end if; 2385 end On_Plus_In_Content_Declaration; 2386 2387 ----------------------- 2388 -- On_Public_Literal -- 2389 ----------------------- 2390 2391 function On_Public_Literal 2392 (Self : in out Simple_Reader'Class) return Token 2393 is 2394 Next : Utf16_String_Index 2395 := Self.Scanner_State.YY_Base_Position + 1; 2396 -- Skip literal open delimiter. 2397 Code : Code_Point; 2398 Space_Before : Boolean := True; 2399 2400 begin 2401 if not Self.Whitespace_Matched then 2402 Callbacks.Call_Fatal_Error 2403 (Self, 2404 League.Strings.To_Universal_String 2405 ("[[75] ExternalID, [83] PublicID]" 2406 & " whitespace is required before pubid literal")); 2407 2408 return Error; 2409 end if; 2410 2411 Self.Whitespace_Matched := False; 2412 Enter_Start_Condition (Self, Tables.EXTERNAL_ID_SYS); 2413 2414 -- [XML 4.2.2] External Entities 2415 -- 2416 -- "[Definition: In addition to a system identifier, an external 2417 -- identifier may include a public identifier.] An XML processor 2418 -- attempting to retrieve the entity's content may use any combination 2419 -- of the public and system identifiers as well as additional 2420 -- information outside the scope of this specification to try to 2421 -- generate an alternative URI reference. If the processor is unable to 2422 -- do so, it MUST use the URI reference specified in the system literal. 2423 -- Before a match is attempted, all strings of white space in the public 2424 -- identifier MUST be normalized to single space characters (#x20), and 2425 -- leading and trailing white space MUST be removed." 2426 -- 2427 -- Normalize public identifier. 2428 2429 Matreshka.Internals.Strings.Operations.Reset (Self.Character_Data); 2430 2431 while Next /= Self.Scanner_State.YY_Current_Position - 1 loop 2432 -- Exclude literal close delimiter. 2433 2434 Unchecked_Next (Self.Scanner_State.Data.Value, Next, Code); 2435 2436 -- It can be reasonable to implement this step of normalization on 2437 -- SIMD. 2438 2439 if Code = Character_Tabulation 2440 or Code = Line_Feed 2441 or Code = Carriage_Return 2442 then 2443 Code := Space; 2444 end if; 2445 2446 if Code = Space then 2447 if not Space_Before then 2448 Matreshka.Internals.Strings.Operations.Unterminated_Append 2449 (Self.Character_Data, Code); 2450 Space_Before := True; 2451 end if; 2452 2453 else 2454 Matreshka.Internals.Strings.Operations.Unterminated_Append 2455 (Self.Character_Data, Code); 2456 Space_Before := False; 2457 end if; 2458 end loop; 2459 2460 if Space_Before and Self.Character_Data.Unused /= 0 then 2461 -- Remove traling space. 2462 2463 Self.Character_Data.Length := Self.Character_Data.Length - 1; 2464 Self.Character_Data.Unused := Self.Character_Data.Unused - 1; 2465 end if; 2466 2467 String_Handler.Fill_Null_Terminator (Self.Character_Data); 2468 Matreshka.Internals.Strings.Reference (Self.Character_Data); 2469 Set_String_Internal 2470 (Item => Self.YYLVal, 2471 String => Self.Character_Data, 2472 Is_Whitespace => False); 2473 2474 return Token_Public_Literal; 2475 end On_Public_Literal; 2476 2477 --------------------------------------------- 2478 -- On_Question_Mark_In_Content_Declaration -- 2479 --------------------------------------------- 2480 2481 function On_Question_Mark_In_Content_Declaration 2482 (Self : in out Simple_Reader'Class) return Token is 2483 begin 2484 if Self.Whitespace_Matched then 2485 Callbacks.Call_Fatal_Error 2486 (Self, 2487 League.Strings.To_Universal_String 2488 ("[XML [47], [48]] illegal whitespace before question mark")); 2489 2490 return Error; 2491 2492 else 2493 return Token_Question; 2494 end if; 2495 end On_Question_Mark_In_Content_Declaration; 2496 2497 --------------------------- 2498 -- On_Standalone_Keyword -- 2499 --------------------------- 2500 2501 function On_Standalone_Keyword 2502 (Self : in out Simple_Reader'Class) return Token is 2503 begin 2504 if not Self.Whitespace_Matched then 2505 Callbacks.Call_Fatal_Error 2506 (Self, 2507 League.Strings.To_Universal_String 2508 ("no whitespace before 'standalone'")); 2509 Self.Error_Reported := True; 2510 -- XXX This is recoverable error. 2511 2512 return Error; 2513 2514 else 2515 return Token_Standalone; 2516 end if; 2517 end On_Standalone_Keyword; 2518 2519 ---------------------------------------- 2520 -- On_System_Keyword_In_Document_Type -- 2521 ---------------------------------------- 2522 2523 function On_System_Keyword_In_Document_Type 2524 (Self : in out Simple_Reader'Class) return Token is 2525 begin 2526 Reset_Whitespace_Matched (Self); 2527 Push_And_Enter_Start_Condition 2528 (Self, Tables.DOCTYPE_INT, Tables.EXTERNAL_ID_SYS); 2529 2530 return Token_System; 2531 end On_System_Keyword_In_Document_Type; 2532 2533 --------------------------------------------- 2534 -- On_System_Keyword_In_Entity_Or_Notation -- 2535 --------------------------------------------- 2536 2537 function On_System_Keyword_In_Entity_Or_Notation 2538 (Self : in out Simple_Reader'Class) return Token is 2539 begin 2540 Reset_Whitespace_Matched (Self); 2541 Push_Current_And_Enter_Start_Condition (Self, Tables.EXTERNAL_ID_SYS); 2542 2543 return Token_System; 2544 end On_System_Keyword_In_Entity_Or_Notation; 2545 2546 ----------------------- 2547 -- On_System_Literal -- 2548 ----------------------- 2549 2550 function On_System_Literal 2551 (Self : in out Simple_Reader'Class) return Token is 2552 begin 2553 if not Self.Whitespace_Matched then 2554 Callbacks.Call_Fatal_Error 2555 (Self, 2556 League.Strings.To_Universal_String 2557 ("[[75] ExternalID]" 2558 & " whitespace is required before system literal")); 2559 2560 return Error; 2561 end if; 2562 2563 Self.Whitespace_Matched := False; 2564 Pop_Start_Condition (Self); 2565 Set_String_Internal 2566 (Item => Self.YYLVal, 2567 String => YY_Text (Self, 1, 1), 2568 Is_Whitespace => False); 2569 2570 return Token_System_Literal; 2571 end On_System_Literal; 2572 2573 ----------------------------- 2574 -- On_Unexpected_Character -- 2575 ----------------------------- 2576 2577 function On_Unexpected_Character 2578 (Self : in out Simple_Reader'Class) return Token is 2579 begin 2580 Callbacks.Call_Fatal_Error 2581 (Self, 2582 League.Strings.To_Universal_String ("unexpected character")); 2583 2584 return Error; 2585 end On_Unexpected_Character; 2586 2587 ------------------------ 2588 -- On_Version_Keyword -- 2589 ------------------------ 2590 2591 function On_Version_Keyword 2592 (Self : in out Simple_Reader'Class) return Token is 2593 begin 2594 if not Self.Whitespace_Matched then 2595 Callbacks.Call_Fatal_Error 2596 (Self, 2597 League.Strings.To_Universal_String 2598 ("no whitespace before 'version'")); 2599 Self.Error_Reported := True; 2600 -- XXX This is recoverable error. 2601 2602 return Error; 2603 2604 else 2605 return Token_Version; 2606 end if; 2607 end On_Version_Keyword; 2608 2609 ------------------------------- 2610 -- On_Whitespace_In_Document -- 2611 ------------------------------- 2612 2613 function On_Whitespace_In_Document 2614 (Self : in out Simple_Reader'Class) return Boolean 2615 is 2616 C : constant Code_Point 2617 := Code_Point 2618 (Self.Scanner_State.Data.Value 2619 (Self.Scanner_State.YY_Current_Position - 1)); 2620 2621 begin 2622 if C = Less_Than_Sign or C = Ampersand then 2623 -- Move back when trailing context is available. 2624 2625 YY_Move_Backward (Self); 2626 end if; 2627 2628 if Self.Element_Names.Is_Empty then 2629 -- Document content not entered. 2630 2631 return False; 2632 2633 else 2634 Matreshka.Internals.Strings.Operations.Copy_Slice 2635 (Self.Character_Data, 2636 Self.Scanner_State.Data, 2637 Self.Scanner_State.YY_Base_Position, 2638 Self.Scanner_State.YY_Current_Position 2639 - Self.Scanner_State.YY_Base_Position, 2640 Self.Scanner_State.YY_Current_Index 2641 - Self.Scanner_State.YY_Base_Index); 2642 2643 Matreshka.Internals.Strings.Reference (Self.Character_Data); 2644 Set_String_Internal 2645 (Item => Self.YYLVal, 2646 String => Self.Character_Data, 2647 Is_Whitespace => True); 2648 2649 return True; 2650 end if; 2651 end On_Whitespace_In_Document; 2652 2653 --------------------------------------------- 2654 -- On_Whitespace_In_Processing_Instruction -- 2655 --------------------------------------------- 2656 2657 procedure On_Whitespace_In_Processing_Instruction 2658 (Self : in out Simple_Reader'Class) is 2659 begin 2660 -- Whitespace between processing instruction's target and data are 2661 -- required, so set flag which indicates their presence. 2662 2663 Self.Whitespace_Matched := True; 2664 2665 case Self.Version is 2666 when XML_1_0 => 2667 Enter_Start_Condition (Self, Tables.PI_DATA_10); 2668 2669 when XML_1_1 => 2670 Enter_Start_Condition (Self, Tables.PI_DATA_11); 2671 end case; 2672 end On_Whitespace_In_Processing_Instruction; 2673 2674 -------------------- 2675 -- Resolve_Symbol -- 2676 -------------------- 2677 2678 procedure Resolve_Symbol 2679 (Self : in out Simple_Reader'Class; 2680 Trim_Left : Natural; 2681 Trim_Right : Natural; 2682 Trim_Whitespace : Boolean; 2683 Can_Be_Qname : Boolean; 2684 Not_Qname : Boolean; 2685 Error : out Boolean; 2686 Symbol : out Matreshka.Internals.XML.Symbol_Identifier) 2687 is 2688 -- Trailing and leading character as well as whitespace characters 2689 -- belongs to BMP and don't require expensive UTF-16 decoding. 2690 2691 FP : Utf16_String_Index 2692 := Self.Scanner_State.YY_Base_Position 2693 + Utf16_String_Index (Trim_Left); 2694 FI : Positive 2695 := Self.Scanner_State.YY_Base_Index + Trim_Left; 2696 LP : constant Utf16_String_Index 2697 := Self.Scanner_State.YY_Current_Position 2698 - Utf16_String_Index (Trim_Right); 2699 LI : constant Positive 2700 := Self.Scanner_State.YY_Current_Index - Trim_Right; 2701 C : Code_Point; 2702 E : Matreshka.Internals.XML.Symbol_Tables.Qualified_Name_Errors; 2703 2704 begin 2705 if Trim_Whitespace then 2706 loop 2707 C := Code_Point (Self.Scanner_State.Data.Value (FP)); 2708 2709 exit when 2710 C /= Space 2711 and then C /= Character_Tabulation 2712 and then C /= Carriage_Return 2713 and then C /= Line_Feed; 2714 2715 FP := FP + 1; 2716 FI := FI + 1; 2717 end loop; 2718 end if; 2719 2720 Matreshka.Internals.XML.Symbol_Tables.Insert 2721 (Self.Symbols, 2722 Self.Scanner_State.Data, 2723 FP, 2724 LP - FP, 2725 LI - FI, 2726 Self.Namespaces.Enabled, 2727 E, 2728 Symbol); 2729 2730 Error := False; 2731 2732 if Self.Namespaces.Enabled and not Not_Qname then 2733 case E is 2734 when Valid => 2735 if not Can_Be_Qname 2736 and Local_Name (Self.Symbols, Symbol) /= Symbol 2737 then 2738 Error := True; 2739 Symbol := No_Symbol; 2740 Callbacks.Call_Fatal_Error 2741 (Self, 2742 League.Strings.To_Universal_String 2743 ("[NSXML1.1] qualified name must not be used here")); 2744 end if; 2745 2746 when Colon_At_Start => 2747 Error := True; 2748 Symbol := No_Symbol; 2749 Callbacks.Call_Fatal_Error 2750 (Self, 2751 League.Strings.To_Universal_String 2752 ("[NSXML1.1]" 2753 & " qualified name must not start with colon character")); 2754 2755 when Colon_At_End => 2756 Error := True; 2757 Symbol := No_Symbol; 2758 Callbacks.Call_Fatal_Error 2759 (Self, 2760 League.Strings.To_Universal_String 2761 ("[NSXML1.1]" 2762 & " qualified name must not end with colon character")); 2763 2764 when Multiple_Colons => 2765 Error := True; 2766 Symbol := No_Symbol; 2767 Callbacks.Call_Fatal_Error 2768 (Self, 2769 League.Strings.To_Universal_String 2770 ("[NSXML1.1]" 2771 & " qualified name must not contain more than one colon" 2772 & " character")); 2773 2774 when First_Character_Is_Not_NS_Name_Start_Char => 2775 Error := True; 2776 Symbol := No_Symbol; 2777 Callbacks.Call_Fatal_Error 2778 (Self, 2779 League.Strings.To_Universal_String 2780 ("[NSXML1.1] first character of local name is invalid")); 2781 end case; 2782 end if; 2783 end Resolve_Symbol; 2784 2785end XML.SAX.Simple_Readers.Scanner.Actions; 2786