1 ------------------------------------------------------------------------------ 2 -- -- 3 -- Matreshka Project -- 4 -- -- 5 -- XML Processor -- 6 -- -- 7 -- Runtime Library Component -- 8 -- -- 9 ------------------------------------------------------------------------------ 10 -- -- 11 -- Copyright © 2010-2012, Vadim Godunko <vgodunko@gmail.com> -- 12 -- All rights reserved. -- 13 -- -- 14 -- Redistribution and use in source and binary forms, with or without -- 15 -- modification, are permitted provided that the following conditions -- 16 -- are met: -- 17 -- -- 18 -- * Redistributions of source code must retain the above copyright -- 19 -- notice, this list of conditions and the following disclaimer. -- 20 -- -- 21 -- * Redistributions in binary form must reproduce the above copyright -- 22 -- notice, this list of conditions and the following disclaimer in the -- 23 -- documentation and/or other materials provided with the distribution. -- 24 -- -- 25 -- * Neither the name of the Vadim Godunko, IE nor the names of its -- 26 -- contributors may be used to endorse or promote products derived from -- 27 -- this software without specific prior written permission. -- 28 -- -- 29 -- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -- 30 -- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -- 31 -- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -- 32 -- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -- 33 -- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -- 34 -- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -- 35 -- TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -- 36 -- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -- 37 -- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -- 38 -- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -- 39 -- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -- 40 -- -- 41 ------------------------------------------------------------------------------ 42 -- This unit uses some ideas of regular expression transformation from: -- 43 -- -- 44 -- Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions", -- 45 -- Technical Report TR 1998-17, School of Computing Science, Simon Fraser -- 46 -- University, November, 1998. -- 47 -- Copyright (c) 1998, Robert D. Cameron. -- 48 -- The following code may be freely used and distributed provided that -- 49 -- this copyright and citation notice remains intact and that modifications -- 50 -- or additions are clearly identified. -- 51 -- -- 52 ------------------------------------------------------------------------------ 53 -- $Revision: 2996 $ $Date: 2012-05-16 14:41:32 +0400 (Wed, 16 May 2012) $ 54 ------------------------------------------------------------------------------ 55 -- 56 -- [3] S ::= (#x20 | #x9 | #xD | #xA)+ 57 -- 58 WHITE_SPACE [\u0020\u0009\u000D\u000A]+ 59 60 -- 61 -- [5] Name ::= NameStartChar (NameChar)* 62 -- 63 NAME [:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u10000-\uEFFFF][-.:0-9A-Z_a-z\u00B7\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u036F\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u203F-\u2040\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u10000-\uEFFFF]* 64 65 -- 66 -- [7] Nmtoken ::= (NameChar)+ 67 -- 68 NMTOKEN [-.:0-9A-Z_a-z\u00B7\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u036F\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u203F-\u2040\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u10000-\uEFFFF]+ 69 70 -- 71 -- [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] 72 -- 73 PUBID_CHAR [-'()+,./:=?;!*#@$_%a-zA-Z0-9\u0020\u000D\u000A] 74 PUBID_CHAR_NO_APOSTROPHE [-()+,./:=?;!*#@$_%a-zA-Z0-9\u0020\u000D\u000A] 75 76 XMLDECL_ATTRIBUTE_CHAR [A-Za-z0-9._-] 77 -- Synthetic set of character. XMLDECL_ATTRIBUTE_CHAR is a union of characters 78 -- allowed by [26] VersionNum, [81] EncName, [32] SDDecl. 79 80 -- 81 -- Special subranges of Char. 82 -- 83 -- NOTE: Negative sets with smaller number of characters improves aflex 84 -- performance and memory requirements extremely. 85 -- 86 -- XML 1.0: 87 -- 88 -- [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] 89 -- 90 -- Negative form: [^\u0001-\u0008\u000B\u000C\u000E-\u001F\uD800-\uDFFF\uFFFE\uFFFF] 91 -- 92 -- XML 1.1: 93 -- 94 -- [2] Char ::= [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] 95 -- [2a] RestrictedChar ::= [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F] 96 -- 97 -- Negative form: [^\uD800-\uDFFF\uFFFE\uFFFF] 98 -- Note, RestrictedChar can't appear in document's text except in character 99 -- reference, so for scanner: 100 -- 101 -- Char ::= #x9 | #xA | #xD | [#x20-#x7E] | #x85 | [#xA0-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] 102 -- 103 -- Negative form: [^\u0001-\u0008\u000B\u000C\u000E-\u001F\u007F-\u0084\u0086-\u009F\uD800-\uDFFF\uFFFE\uFFFF] 104 -- 105 CHAR10 [^\u0001-\u0008\u000B\u000C\u000E-\u001F\uD800-\uDFFF\uFFFE\uFFFF] 106 ICHAR10 [\u0001-\u0008\u000B\u000C\u000E-\u001F\uD800-\uDFFF\uFFFE\uFFFF] 107 CHAR11 [^\u0001-\u0008\u000B\u000C\u000E-\u001F\u007F-\u0084\u0086-\u009F\uD800-\uDFFF\uFFFE\uFFFF] 108 ICHAR11 [\u0001-\u0008\u000B\u000C\u000E-\u001F\u007F-\u0084\u0086-\u009F\uD800-\uDFFF\uFFFE\uFFFF] 109 ICHARU11 [\uD800-\uDFFF\uFFFE\uFFFF] 110 -- [^"%&'] 111 CHAR10_NO_QUOTATION_PERCENT_AMPERSAND_APOSTROPHE [^\u0022\u0025\u0026\u0027\u0001-\u0008\u000B\u000C\u000E-\u001F\uD800-\uDFFF\uFFFE\uFFFF] 112 CHAR11_NO_QUOTATION_PERCENT_AMPERSAND_APOSTROPHE [^\u0022\u0025\u0026\u0027\u0001-\u0008\u000B\u000C\u000E-\u001F\u007F-\u0084\u0086-\u009F\uD800-\uDFFF\uFFFE\uFFFF] 113 -- [^"&'<] 114 CHAR10_NO_QUOTATION_AMPERSAND_APOSTROPHE_LESS [^\u0022\u0026\u0027\u003C\u0001-\u0008\u000B\u000C\u000E-\u001F\uD800-\uDFFF\uFFFE\uFFFF] 115 CHAR11_NO_QUOTATION_AMPERSAND_APOSTROPHE_LESS [^\u0022\u0026\u0027\u003C\u0001-\u0008\u000B\u000C\u000E-\u001F\u007F-\u0084\u0086-\u009F\uD800-\uDFFF\uFFFE\uFFFF] 116 -- (Char - '-') for comment, rule [15]. 117 CHAR10_NO_HYPHEN [^\u002D\u0001-\u0008\u000B\u000C\u000E-\u001F\uD800-\uDFFF\uFFFE\uFFFF] 118 CHAR11_NO_HYPHEN [^\u002D\u0001-\u0008\u000B\u000C\u000E-\u001F\u007F-\u0084\u0086-\u009F\uD800-\uDFFF\uFFFE\uFFFF] 119 -- [^<&]] for CharDara, rule [14]. 120 CHAR10_NO_AMPERSAND_LESS_RIGHT_SQUARE [^\u0026\u003C\u005D\u0001-\u0008\u000B\u000C\u000E-\u001F\uD800-\uDFFF\uFFFE\uFFFF] 121 CHAR11_NO_AMPERSAND_LESS_RIGHT_SQUARE [^\u0026\u003C\u005D\u0001-\u0008\u000B\u000C\u000E-\u001F\u007F-\u0084\u0086-\u009F\uD800-\uDFFF\uFFFE\uFFFF] 122 CHARU11_NO_AMPERSAND_LESS_RIGHT_SQUARE [^\u0026\u003C\u005D\uD800-\uDFFF\uFFFE\uFFFF] 123 -- [^<>&]] 124 CHAR10_NO_AMPERSAND_LESS_GREATER_RIGHT_SQUARE [^\u0026\u003C\u003E\u005D\u0001-\u0008\u000B\u000C\u000E-\u001F\uD800-\uDFFF\uFFFE\uFFFF] 125 CHAR11_NO_AMPERSAND_LESS_GREATER_RIGHT_SQUARE [^\u0026\u003C\u003E\u005D\u0001-\u0008\u000B\u000C\u000E-\u001F\u007F-\u0084\u0086-\u009F\uD800-\uDFFF\uFFFE\uFFFF] 126 CHARU11_NO_AMPERSAND_LESS_GREATER_RIGHT_SQUARE [^\u0026\u003C\u003E\u005D\uD800-\uDFFF\uFFFE\uFFFF] 127 -- [^]], for CData, rule [20]. 128 CHAR10_NO_RIGHT_SQUARE [^\u005D\u0001-\u0008\u000B\u000C\u000E-\u001F\uD800-\uDFFF\uFFFE\uFFFF] 129 CHAR11_NO_RIGHT_SQUARE [^\u005D\u0001-\u0008\u000B\u000C\u000E-\u001F\u007F-\u0084\u0086-\u009F\uD800-\uDFFF\uFFFE\uFFFF] 130 CHARU11_NO_RIGHT_SQUARE [^\u005D\uD800-\uDFFF\uFFFE\uFFFF] 131 -- [^>]] 132 CHAR10_NO_GREATER_RIGHT_SQUARE [^\u003E\u005D\u0001-\u0008\u000B\u000C\u000E-\u001F\uD800-\uDFFF\uFFFE\uFFFF] 133 CHAR11_NO_GREATER_RIGHT_SQUARE [^\u003E\u005D\u0001-\u0008\u000B\u000C\u000E-\u001F\u007F-\u0084\u0086-\u009F\uD800-\uDFFF\uFFFE\uFFFF] 134 CHARU11_NO_GREATER_RIGHT_SQUARE [^\u003E\u005D\uD800-\uDFFF\uFFFE\uFFFF] 135 -- [^?], for PI, rule [16]. 136 CHAR10_NO_QUESTION [^\u003F\u0001-\u0008\u000B\u000C\u000E-\u001F\uD800-\uDFFF\uFFFE\uFFFF] 137 CHAR11_NO_QUESTION [^\u003F\u0001-\u0008\u000B\u000C\u000E-\u001F\u007F-\u0084\u0086-\u009F\uD800-\uDFFF\uFFFE\uFFFF] 138 -- [^>?] 139 CHAR10_NO_GREATER_QUESTION [^\u003E\u003F\u0001-\u0008\u000B\u000C\u000E-\u001F\uD800-\uDFFF\uFFFE\uFFFF] 140 CHAR11_NO_GREATER_QUESTION [^\u003E\u003F\u0001-\u0008\u000B\u000C\u000E-\u001F\u007F-\u0084\u0086-\u009F\uD800-\uDFFF\uFFFE\uFFFF] 141 -- [^<]] 142 CHAR10_NO_LESS_RIGHT_SQUARE [^\u003C\u005D\u0001-\u0008\u000B\u000C\u000E-\u001F\uD800-\uDFFF\uFFFE\uFFFF] 143 CHAR11_NO_LESS_RIGHT_SQUARE [^\u003C\u005D\u0001-\u0008\u000B\u000C\u000E-\u001F\u007F-\u0084\u0086-\u009F\uD800-\uDFFF\uFFFE\uFFFF] 144 145 %x XML_DECL 146 %x DOCUMENT_10 147 %x DOCUMENT_11 148 %x DOCUMENT_U11 149 %x CDATA_10 150 %x CDATA_11 151 %x CDATA_U11 152 %x PI 153 %x PI_DATA_10 154 %x PI_DATA_11 155 %x DOCTYPE_EXTINT 156 %x DOCTYPE_INT 157 %x DOCTYPE_INTSUBSET_10 158 %x DOCTYPE_INTSUBSET_11 159 %x ELEMENT_NAME 160 %x ELEMENT_DECL 161 %x ELEMENT_CHILDREN 162 %x ATTLIST_DECL 163 %x ATTLIST_NAME 164 %x ATTLIST_TYPE 165 %x NOTATION_DECL 166 %x ENTITY_DECL 167 %x ENTITY_DEF 168 %x ENTITY_NDATA 169 %x ENTITY_VALUE_10 170 %x ENTITY_VALUE_11 171 %x CONDITIONAL_DIRECTIVE 172 %x CONDITIONAL_IGNORE_10 173 %x CONDITIONAL_IGNORE_11 174 %x ELEMENT_START 175 %x ATTRIBUTE_VALUE_10 176 %x ATTRIBUTE_VALUE_11 177 -- Factored out processing for ExternalID, rule [75]. When PUBLIC or SYSTEM 178 -- word is recognized, scanner must be switched to EXTERNAL_ID_PUB or 179 -- EXTERNAL_ID_SYS state and set state to which scanner is returned after 180 -- recognition of id. 181 %x EXTERNAL_ID_SYS 182 %x EXTERNAL_ID_PUB 183 184 %% 185 186 "<?xml" { 187 -- Open of XML declaration or text declaration, rules [23], [77]. 188 189 return Actions.On_Open_Of_XML_Or_Text_Declaration (Self); 190 } 191 192 .|\n { 193 -- Any character except literal "<?xml" means there is no XML declaration 194 -- in this document/external parsed entity. 195 196 Actions.On_No_XML_Declaration (Self); 197 } 198 199 <DOCTYPE_INTSUBSET_10,DOCTYPE_INTSUBSET_11,DOCUMENT_10,DOCUMENT_11,DOCUMENT_U11>"<?"{NAME} { 200 -- Open of processing instruction, rule [16]. Rule [17] is implemented 201 -- implicitly by ordering of open of XMLDecl and open of PI. 202 203 return Actions.On_Open_Of_Processing_Instruction (Self); 204 } 205 206 <DOCUMENT_10,DOCUMENT_11,DOCUMENT_U11>"<!DOCTYPE"{WHITE_SPACE}{NAME} { 207 -- Open tag of document type declaration and name of root element, 208 -- rule [28]. 209 210 return Actions.On_Open_Of_Document_Type_Declaration (Self); 211 } 212 213 <DOCUMENT_10,DOCUMENT_11,DOCUMENT_U11>"<"{NAME} { 214 -- Open of start tag, rule [40], or empty element, rule [44]. 215 216 return Actions.On_Open_Of_Start_Tag (Self); 217 } 218 219 <DOCUMENT_10,DOCUMENT_11,DOCUMENT_U11>"</"{NAME} { 220 -- Open of end tag, rule [42]. 221 222 return Actions.On_Open_Of_End_Tag (Self); 223 } 224 225 -- Rule [14] 226 -- 227 -- [^<&]* - ([^<&]* ']]>' [^<&]*) 228 -- 229 -- rewritten as: 230 -- 231 -- [^<&\]]* ((']' [^<&\]]+) | (']'{2,} [^<>&\]]+))* ']'* 232 -- 233 -- with optional trailing context 234 -- 235 -- "]]>" | '&' | '<' 236 -- 237 -- Optional traling context is needed to find longest possible match and to 238 -- determine illegal literal ']]>' in character data. 239 -- 240 -- Whitespace rule is separated to prevent from reporting before start of 241 -- root element and after close of root context. Inside document content 242 -- this rule cover sequence of ignorable whitespaces. 243 244 <DOCUMENT_10,DOCUMENT_11,DOCUMENT_U11>{WHITE_SPACE}("<"|"&")? { 245 -- Segment of whitespaces. 246 247 if Actions.On_Whitespace_In_Document (Self) then 248 return Token_String_Segment; 249 end if; 250 } 251 252 <DOCUMENT_10>({CHAR10_NO_AMPERSAND_LESS_RIGHT_SQUARE}+("]"{CHAR10_NO_AMPERSAND_LESS_RIGHT_SQUARE}+|"]"{2,}{CHAR10_NO_AMPERSAND_LESS_GREATER_RIGHT_SQUARE}+)*"]"*("]]>"|"<"|"&")?)|(("]"{CHAR10_NO_AMPERSAND_LESS_RIGHT_SQUARE}+|"]"{2,}{CHAR10_NO_AMPERSAND_LESS_GREATER_RIGHT_SQUARE}+)+"]"*("]]>"|"<"|"&")?)|("]"+("]]>"|"<"|"&")?)|("]]>") { 253 -- Segment of character data, rule [14]. 254 255 return Actions.On_Character_Data (Self); 256 } 257 258 <DOCUMENT_11>({CHAR11_NO_AMPERSAND_LESS_RIGHT_SQUARE}+("]"{CHAR11_NO_AMPERSAND_LESS_RIGHT_SQUARE}+|"]"{2,}{CHAR11_NO_AMPERSAND_LESS_GREATER_RIGHT_SQUARE}+)*"]"*("]]>"|"<"|"&")?)|(("]"{CHAR11_NO_AMPERSAND_LESS_RIGHT_SQUARE}+|"]"{2,}{CHAR11_NO_AMPERSAND_LESS_GREATER_RIGHT_SQUARE}+)+"]"*("]]>"|"<"|"&")?)|("]"+("]]>"|"<"|"&")?)|("]]>") { 259 -- Segment of character data, rule [14]. 260 261 return Actions.On_Character_Data (Self); 262 } 263 264 <DOCUMENT_U11>({CHARU11_NO_AMPERSAND_LESS_RIGHT_SQUARE}+("]"{CHARU11_NO_AMPERSAND_LESS_RIGHT_SQUARE}+|"]"{2,}{CHARU11_NO_AMPERSAND_LESS_GREATER_RIGHT_SQUARE}+)*"]"*("]]>"|"<"|"&")?)|(("]"{CHARU11_NO_AMPERSAND_LESS_RIGHT_SQUARE}+|"]"{2,}{CHARU11_NO_AMPERSAND_LESS_GREATER_RIGHT_SQUARE}+)+"]"*("]]>"|"<"|"&")?)|("]"+("]]>"|"<"|"&")?)|("]]>") { 265 -- Segment of character data, rule [14]. 266 267 return Actions.On_Character_Data (Self); 268 } 269 270 <DOCUMENT_10>"<![CDATA[" { 271 -- Start of CDATA section, production [19]. 272 273 return Actions.On_Open_Of_CDATA (Self); 274 } 275 276 <DOCUMENT_11>"<![CDATA[" { 277 -- Start of CDATA section, production [19]. 278 279 return Actions.On_Open_Of_CDATA (Self); 280 } 281 282 <DOCUMENT_U11>"<![CDATA[" { 283 -- Start of CDATA section, production [19]. 284 285 return Actions.On_Open_Of_CDATA (Self); 286 } 287 288 <CDATA_10>({CHAR10_NO_RIGHT_SQUARE}+("]"{CHAR10_NO_RIGHT_SQUARE}+|"]"{2,}{CHAR10_NO_GREATER_RIGHT_SQUARE}+)*"]"*("]]>")?)|(("]"{CHAR10_NO_RIGHT_SQUARE}+|"]"{2,}{CHAR10_NO_GREATER_RIGHT_SQUARE}+)+"]"*("]]>")?)|("]"+("]]>")?) { 289 -- Text data of CDATA section, production [20]. 290 291 return Actions.On_CDATA (Self); 292 } 293 294 <CDATA_11>({CHAR11_NO_RIGHT_SQUARE}+("]"{CHAR11_NO_RIGHT_SQUARE}+|"]"{2,}{CHAR11_NO_GREATER_RIGHT_SQUARE}+)*"]"*("]]>")?)|(("]"{CHAR11_NO_RIGHT_SQUARE}+|"]"{2,}{CHAR11_NO_GREATER_RIGHT_SQUARE}+)+"]"*("]]>")?)|("]"+("]]>")?) { 295 -- Text data of CDATA section, production [20]. 296 297 return Actions.On_CDATA (Self); 298 } 299 300 <CDATA_U11>({CHARU11_NO_RIGHT_SQUARE}+("]"{CHARU11_NO_RIGHT_SQUARE}+|"]"{2,}{CHARU11_NO_GREATER_RIGHT_SQUARE}+)*"]"*("]]>")?)|(("]"{CHARU11_NO_RIGHT_SQUARE}+|"]"{2,}{CHARU11_NO_GREATER_RIGHT_SQUARE}+)+"]"*("]]>")?)|("]"+("]]>")?) { 301 -- Text data of CDATA section, production [20]. 302 303 return Actions.On_CDATA (Self); 304 } 305 306 <CDATA_10>"]]>" { 307 -- End of CDATA section, production [21]. 308 309 return Actions.On_Close_Of_CDATA (Self); 310 } 311 312 <CDATA_11>"]]>" { 313 -- End of CDATA section, production [21]. 314 315 return Actions.On_Close_Of_CDATA (Self); 316 } 317 318 <CDATA_U11>"]]>" { 319 -- End of CDATA section, production [21]. 320 321 return Actions.On_Close_Of_CDATA (Self); 322 } 323 324 <DOCUMENT_10,DOCUMENT_11,DOCUMENT_U11>"&"{NAME}";" { 325 -- General entity reference rule [68] in document content. 326 327 declare 328 Aux : constant Token 329 := Actions.On_General_Entity_Reference_In_Document_Content (Self); 330 331 begin 332 -- By convention, End_Of_Input means that replacement text of the 333 -- referenced entity is empty and it is not pushed into the scanner 334 -- stack. 335 336 if Aux /= End_Of_Input then 337 return Aux; 338 end if; 339 end; 340 } 341 342 <XML_DECL>version { 343 -- [24] VersionInfo 344 345 return Actions.On_Version_Keyword (Self); 346 } 347 348 <XML_DECL>encoding { 349 -- [80] EncodingDecl 350 351 return Actions.On_Encoding_Keyword (Self); 352 } 353 354 <XML_DECL>standalone { 355 -- [32] SDDecl 356 357 return Actions.On_Standalone_Keyword (Self); 358 } 359 360 <XML_DECL>'{XMLDECL_ATTRIBUTE_CHAR}*'|\"{XMLDECL_ATTRIBUTE_CHAR}*\" { 361 -- Synthetic rule. XMLDECL_ATTRIBUTE_CHAR is a union of characters allowed 362 -- by [26] VersionNum, [81] EncName, [32] SDDecl. Precise check is 363 -- processed while parsing. 364 365 return Actions.On_Attribute_Value_In_XML_Declaration (Self); 366 } 367 368 <XML_DECL>"?>" { 369 -- Close of XML declaration (production [23]) or text declaration 370 -- (production [77]). 371 372 return Actions.On_Close_Of_XML_Or_Text_Declaration (Self); 373 } 374 375 <PI,PI_DATA_10,PI_DATA_11>"?>" { 376 -- Close of processing instruction (rule [16]). 377 378 return Actions.On_Close_Of_Processing_Instruction (Self, True); 379 } 380 381 <PI>{WHITE_SPACE} { 382 -- Ignore all whitespaces is followed by processing insturction's name, 383 -- rule [16]. 384 385 Actions.On_Whitespace_In_Processing_Instruction (Self); 386 } 387 388 <PI_DATA_10>{CHAR10_NO_QUESTION}*"?"+({CHAR10_NO_GREATER_QUESTION}{CHAR10_NO_QUESTION}*"?"+)*">" { 389 -- Segment of data and close delimiter of the processing instruction, rule 390 -- [16]. 391 392 return Actions.On_Close_Of_Processing_Instruction (Self, False); 393 } 394 395 <PI_DATA_11>{CHAR11_NO_QUESTION}*"?"+({CHAR11_NO_GREATER_QUESTION}{CHAR11_NO_QUESTION}*"?"+)*">" { 396 -- Segment of data and close delimiter of the processing instruction, rule 397 -- [16]. 398 399 return Actions.On_Close_Of_Processing_Instruction (Self, False); 400 } 401 402 <DOCTYPE_EXTINT>"SYSTEM" { 403 -- Keyword SYSTEM, rule [75]. 404 405 return Actions.On_System_Keyword_In_Document_Type (Self); 406 } 407 408 <EXTERNAL_ID_SYS>\"[^"]*\"|\'[^']*\' { 409 -- System literal, rule [11], used in rule [75]. 410 411 return Actions.On_System_Literal (Self); 412 } 413 414 <EXTERNAL_ID_SYS>">" { 415 -- Productions [82], [83] allows absence of system literal in 416 -- notation declaration. 417 418 Pop_Start_Condition (Self); 419 420 if Start_Condition (Self) = NOTATION_DECL then 421 Pop_Start_Condition (Self); 422 end if; 423 424 return Token_Close; 425 } 426 427 <DOCTYPE_EXTINT>"PUBLIC" { 428 -- Keyword PUBLIC, rule [75]. 429 430 Reset_Whitespace_Matched (Self); 431 Push_And_Enter_Start_Condition (Self, DOCTYPE_INT, EXTERNAL_ID_PUB); 432 433 return Token_Public; 434 } 435 436 <EXTERNAL_ID_PUB>\"{PUBID_CHAR}*\"|\'{PUBID_CHAR_NO_APOSTROPHE}*\' { 437 -- Public id literal, rule [12], used in rule [75]. 438 439 return Actions.On_Public_Literal (Self); 440 } 441 442 <DOCTYPE_EXTINT,DOCTYPE_INT>"[" { 443 -- Open of internal subset declaration, rule [28]. 444 445 return Actions.On_Open_Of_Internal_Subset (Self); 446 } 447 448 <DOCTYPE_INTSUBSET_10,DOCTYPE_INTSUBSET_11>"]" { 449 -- Close of internal subset declaration, rule [28]. 450 451 Enter_Start_Condition (Self, DOCTYPE_INT); 452 453 return Token_Internal_Subset_Close; 454 } 455 456 <DOCUMENT_10,DOCTYPE_INTSUBSET_10>"<!--"({CHAR10_NO_HYPHEN}|-{CHAR10_NO_HYPHEN})*"-->" { 457 -- Text of comment, rule [15]. 458 459 Set_String_Internal 460 (Item => YYLVal, 461 String => YY_Text_Internal (4, 3), 462 Is_Whitespace => False); 463 464 return Token_Comment; 465 } 466 467 <DOCUMENT_11,DOCUMENT_U11,DOCTYPE_INTSUBSET_11>"<!--"({CHAR11_NO_HYPHEN}|-{CHAR11_NO_HYPHEN})*"-->" { 468 -- Text of comment, rule [15]. 469 470 Set_String_Internal 471 (Item => YYLVal, 472 String => YY_Text_Internal (4, 3), 473 Is_Whitespace => False); 474 475 return Token_Comment; 476 } 477 478 <DOCTYPE_INTSUBSET_10,DOCTYPE_INTSUBSET_11>"%"{NAME}";" { 479 -- Parameter entity reference rule [69] in document type declaration. 480 481 return Actions.On_Parameter_Entity_Reference_In_Document_Declaration (Self); 482 } 483 484 <DOCTYPE_INTSUBSET_10,DOCTYPE_INTSUBSET_11>"<!ENTITY" { 485 -- Open of entity declaration, rules [71], [72]. 486 487 Enter_Start_Condition (Self, ENTITY_DECL); 488 Reset_Whitespace_Matched (Self); 489 490 return Token_Entity_Decl_Open; 491 } 492 493 <DOCTYPE_INTSUBSET_10,DOCTYPE_INTSUBSET_11>"<!ELEMENT" { 494 -- Open of element declaration and name of the element, rule [45]. 495 496 return Actions.On_Open_Of_Element_Declaration (Self); 497 } 498 499 <DOCTYPE_INTSUBSET_10,DOCTYPE_INTSUBSET_11>"<!ATTLIST" { 500 -- Open of attribute list declaration, rule [52]. 501 502 return Actions.On_Open_Of_Attribute_List_Declaration (Self); 503 } 504 505 <DOCTYPE_INTSUBSET_10,DOCTYPE_INTSUBSET_11>"<!NOTATION"{WHITE_SPACE}{NAME} { 506 -- Open of notation declaration, production [82]. 507 508 return Actions.On_Open_Of_Notation_Declaration (Self); 509 } 510 511 <DOCTYPE_INTSUBSET_10,DOCTYPE_INTSUBSET_11,CONDITIONAL_IGNORE_10,CONDITIONAL_IGNORE_11>"<![" { 512 -- Start of conditional section. 513 514 return Actions.On_Open_Of_Conditional_Section (Self); 515 } 516 517 <DOCTYPE_INTSUBSET_10,DOCTYPE_INTSUBSET_11,CONDITIONAL_IGNORE_10,CONDITIONAL_IGNORE_11>"]]>" { 518 -- Close of conditional section. 519 520 return Actions.On_Close_Of_Conditional_Section (Self); 521 } 522 523 <NOTATION_DECL>">" { 524 -- Close of notation declaration, production [82]. 525 526 Pop_Start_Condition (Self); 527 528 return Token_Close; 529 } 530 531 <ENTITY_DECL>{NAME} { 532 -- Name in entity declaration, rules [71], [72]. 533 534 return Actions.On_Name_In_Entity_Declaration (Self); 535 } 536 537 <ENTITY_DECL>"%" { 538 -- Percent mark in parameter entity declaration, rule [72]. 539 540 return Actions.On_Percent_Sign (Self); 541 } 542 543 <ENTITY_DEF>["'] { 544 -- Entity value, rule [9]. 545 546 return Actions.On_Entity_Value_Open_Delimiter (Self); 547 } 548 549 <ENTITY_DEF,NOTATION_DECL>"SYSTEM" { 550 -- Entity value as ExternalID, rule [75], used by rules [73], [74]. 551 552 return Actions.On_System_Keyword_In_Entity_Or_Notation (Self); 553 } 554 555 <ENTITY_DEF,NOTATION_DECL>"PUBLIC" { 556 -- Entity value as ExternalID, rule [75], used by rules [73], [74]. 557 -- Notation as ExternalID or Public_ID (productions [75], [82], [83]). 558 559 Reset_Whitespace_Matched (Self); 560 Push_Current_And_Enter_Start_Condition (Self, EXTERNAL_ID_PUB); 561 562 return Token_Public; 563 } 564 565 <ENTITY_DEF>"NDATA" { 566 -- NDATA keyword, rule [76]. 567 568 return Actions.On_NDATA (Self); 569 } 570 571 <ENTITY_NDATA>{NAME} { 572 -- Name of NDATA, rule [76]. 573 574 return Actions.On_Name_In_Entity_Declaration_Notation (Self); 575 } 576 577 <ENTITY_VALUE_10>{CHAR10_NO_QUOTATION_PERCENT_AMPERSAND_APOSTROPHE}+ { 578 Set_String_Internal 579 (Item => YYLVal, 580 String => YY_Text_Internal, 581 Is_Whitespace => False); 582 583 return Token_String_Segment; 584 } 585 586 <ENTITY_VALUE_11>{CHAR11_NO_QUOTATION_PERCENT_AMPERSAND_APOSTROPHE}+ { 587 Set_String_Internal 588 (Item => YYLVal, 589 String => YY_Text_Internal, 590 Is_Whitespace => False); 591 592 return Token_String_Segment; 593 } 594 595 <ENTITY_VALUE_10,ENTITY_VALUE_11>["'] { 596 -- Close of entity value, rule [9]. 597 598 return Actions.On_Entity_Value_Close_Delimiter (Self); 599 } 600 601 <DOCUMENT_10,DOCUMENT_11,DOCUMENT_U11,ENTITY_VALUE_10,ENTITY_VALUE_11>"&#"[0-9]+";" { 602 -- Decimal form of character reference rule [66] in entity value rule [9]; 603 -- or content of element, rule [43]. 604 605 return Actions.On_Character_Reference (Self, False); 606 } 607 608 <ATTRIBUTE_VALUE_10,ATTRIBUTE_VALUE_11>"&#"[0-9]+";" { 609 -- Decimal form of character reference rule [66] in attribute value, 610 -- rule [10]. 611 612 if not Actions.On_Character_Reference_In_Attribute_Value 613 (Self, False) 614 then 615 return Error; 616 end if; 617 } 618 619 <DOCUMENT_10,DOCUMENT_11,DOCUMENT_U11,ENTITY_VALUE_10,ENTITY_VALUE_11>"&#x"[0-9a-fA-F]+";" { 620 -- Hexadecimal form of character reference rule [66] in entity value rule 621 -- [9] or content of element, rule [43]. 622 623 return Actions.On_Character_Reference (Self, True); 624 } 625 626 <ATTRIBUTE_VALUE_10,ATTRIBUTE_VALUE_11>"&#x"[0-9a-fA-F]+";" { 627 -- Hexadecimal form of character reference rule [66] in attribute value, 628 -- rule [10]. 629 630 if not Actions.On_Character_Reference_In_Attribute_Value 631 (Self, True) 632 then 633 return Error; 634 end if; 635 } 636 637 <ENTITY_VALUE_10,ENTITY_VALUE_11>"&"{NAME}";" { 638 -- General entity reference rule [68] in entity value rule [9]. 639 640 return Actions.On_General_Entity_Reference_In_Entity_Value (Self); 641 } 642 643 <ENTITY_VALUE_10,ENTITY_VALUE_11>"%"{NAME}";" { 644 -- Parameter entity reference rule [69] in entity value rule [9]. 645 -- 646 -- Processing of parameter entity uses separate scanner's state, thus 647 -- after processing current state is restored automatically. This allows 648 -- to reuse code for three modes: parsing of entity value delimited by 649 -- quotation; parsing of entity value delimited by apostrophe; and 650 -- parsing of parameter entity replacement text when it is referenced 651 -- in any of two form of entity value. 652 653 if not Actions.On_Parameter_Entity_Reference_In_Entity_Value (Self) then 654 return Error; 655 end if; 656 } 657 658 <ELEMENT_NAME>{NAME} { 659 -- Name of the element in element declaration. 660 661 return Actions.On_Name_In_Element_Declaration (Self); 662 } 663 664 <ELEMENT_DECL>EMPTY { 665 -- EMPTY keyword, rule [46]. 666 667 return Token_Empty; 668 } 669 670 <ELEMENT_DECL>ANY { 671 -- ANY keyword, rule [46]. 672 673 return Token_Any; 674 } 675 676 <ELEMENT_DECL,ELEMENT_CHILDREN>"(" { 677 -- Open parenthesis, rules [49], [50], [51]. 678 679 return Actions.On_Open_Parenthesis_In_Content_Declaration (Self); 680 } 681 682 <ELEMENT_CHILDREN>")" { 683 -- Close parenthesis, rules [49], [50], [51]. 684 685 return Actions.On_Close_Parenthesis_In_Content_Declaration (Self); 686 } 687 688 <ELEMENT_CHILDREN>"?" { 689 -- Question mark in rules [47], [48]. 690 691 return Actions.On_Question_Mark_In_Content_Declaration (Self); 692 } 693 694 <ELEMENT_CHILDREN>"*" { 695 -- Asterisk in rules [47], [48], [51]. 696 697 return Actions.On_Asterisk_In_Content_Declaration (Self); 698 } 699 700 <ELEMENT_CHILDREN>"+" { 701 -- Plus sign in rules [47], [48]. 702 703 return Actions.On_Plus_In_Content_Declaration (Self); 704 } 705 706 <ELEMENT_CHILDREN>"|" { 707 -- Vertical bar in rule [49]. 708 709 return Token_Vertical_Bar; 710 } 711 712 <ELEMENT_CHILDREN>"," { 713 -- Comma in rule [50]. 714 715 return Token_Comma; 716 } 717 718 <ELEMENT_CHILDREN>"#PCDATA" { 719 -- #PCDATA in rule [51]. 720 721 return Token_Pcdata; 722 } 723 724 <ELEMENT_CHILDREN>{NAME} { 725 -- Name in element's children declaration, rules [48], [51]. 726 727 return Actions.On_Name_In_Element_Declaration_Children (Self); 728 } 729 730 <ELEMENT_DECL,ELEMENT_CHILDREN,ATTLIST_DECL,ATTLIST_TYPE,ENTITY_DEF>">" { 731 -- Close token of entity declaration, rules [71], [72]. 732 -- Close of element declaration, rule [45]. 733 -- Close of attribute list declaration, rule [52]. 734 735 return Actions.On_Close_Of_Declaration (Self); 736 } 737 738 <ATTLIST_NAME>{NAME} { 739 -- Element's name in attribute list declaration, rule [52]. 740 741 return Actions.On_Element_Name_In_Attribute_List_Declaration (Self); 742 } 743 744 <ATTLIST_DECL>{NAME} { 745 -- Name of the attribute, rule [53]. 746 747 return Actions.On_Attribute_Name_In_Attribute_List_Declaration (Self); 748 } 749 750 <ATTLIST_TYPE>"CDATA" { 751 -- CDATA keyword, rule [55]. 752 753 return Actions.On_Attribute_Type (Self, Token_Cdata); 754 } 755 756 <ATTLIST_TYPE>"ID" { 757 -- ID keyword, rule [56]. 758 759 return Actions.On_Attribute_Type (Self, Token_Id); 760 } 761 762 <ATTLIST_TYPE>"IDREF" { 763 -- IDREF keyword, rule [56]. 764 765 return Actions.On_Attribute_Type (Self, Token_Idref); 766 } 767 768 <ATTLIST_TYPE>"IDREFS" { 769 -- IDREFS keyword, rule [56]. 770 771 return Actions.On_Attribute_Type (Self, Token_Idrefs); 772 } 773 774 <ATTLIST_TYPE>"ENTITY" { 775 -- ENTITY keyword, rule [56]. 776 777 return Actions.On_Attribute_Type (Self, Token_Entity); 778 } 779 780 <ATTLIST_TYPE>"ENTITIES" { 781 -- ENTITIES keyword, rule [56]. 782 783 return Actions.On_Attribute_Type (Self, Token_Entities); 784 } 785 786 <ATTLIST_TYPE>"NMTOKEN" { 787 -- NMTOKEN keyword, rule [56]. 788 789 return Actions.On_Attribute_Type (Self, Token_Nmtoken); 790 } 791 792 <ATTLIST_TYPE>"NMTOKENS" { 793 -- NMTOKENS keyword, rule [56]. 794 795 return Actions.On_Attribute_Type (Self, Token_Nmtokens); 796 } 797 798 <ATTLIST_TYPE>"NOTATION" { 799 -- NOTATION keyword, rule [58]. 800 801 return Actions.On_Attribute_Type (Self, Token_Notation); 802 } 803 804 <ATTLIST_TYPE>"#REQUIRED" { 805 -- #REQUIRED keyword, rule [60]. 806 807 return Actions.On_Default_Declaration (Self, ATTLIST_DECL, Token_Required); 808 } 809 810 <ATTLIST_TYPE>"#IMPLIED" { 811 -- #IMPLIED keyword, rule [60]. 812 813 return Actions.On_Default_Declaration (Self, ATTLIST_DECL, Token_Implied); 814 } 815 816 <ATTLIST_TYPE>"#FIXED" { 817 -- #FIXED keyword, rule [60]. 818 819 return Actions.On_Default_Declaration (Self, ATTLIST_TYPE, Token_Fixed); 820 } 821 822 <ATTLIST_TYPE>"(" { 823 -- Open parenthesis, rules [58], [59]. 824 825 return Actions.On_Open_Parenthesis_In_Notation_Attribute (Self); 826 } 827 828 <ATTLIST_TYPE>")" { 829 -- Close parenthesis, rules [58], [59]. 830 831 return Actions.On_Close_Parenthesis_In_Notation_Attribute (Self); 832 } 833 834 <ATTLIST_TYPE>"|" { 835 -- Vertical bar, rules [58], [59]. 836 837 return Token_Vertical_Bar; 838 } 839 840 <ATTLIST_TYPE>{NAME} { 841 -- Name in the rule [58]. 842 843 return Actions.On_Name_In_Attribute_List_Declaration_Notation (Self); 844 } 845 846 <ATTLIST_TYPE>{NMTOKEN} { 847 -- Nmtoken in the rule [59]. 848 849 Set_String_Internal 850 (Item => YYLVal, 851 String => YY_Text_Internal, 852 Is_Whitespace => False); 853 -- XXX Need to add flag to mark Nmtoken. 854 855 return Token_Name; 856 } 857 858 <ATTLIST_TYPE>['"] { 859 -- Open delimiter of attribute value, rule [10]. 860 861 if not Actions.On_Attribute_Value_Open_Delimiter (Self, ATTLIST_DECL) then 862 return Error; 863 end if; 864 } 865 866 <ATTLIST_DECL,ATTLIST_NAME,ATTLIST_TYPE,ELEMENT_NAME,ELEMENT_DECL,ELEMENT_CHILDREN,CONDITIONAL_DIRECTIVE>"%"{NAME}";" { 867 -- Parameter entity reference rule [69] in attribute declaration. 868 -- Parameter entity reference in element's children declaration, [51]. 869 870 if not Actions.On_Parameter_Entity_Reference_In_Markup_Declaration (Self) then 871 return Error; 872 end if; 873 } 874 875 <DOCTYPE_EXTINT,DOCTYPE_INT,DOCTYPE_INTSUBSET_10,DOCTYPE_INTSUBSET_11,CONDITIONAL_DIRECTIVE>{WHITE_SPACE} { 876 -- All white spaces from rules [28] are ignored. 877 -- Whitespace before name in rule [76] is ignored. 878 879 null; 880 } 881 882 <CONDITIONAL_DIRECTIVE>"IGNORE" { 883 -- IGNORE directive of the conditional section. 884 885 Actions.On_Conditional_Section_Directive (Self, False); 886 } 887 888 <CONDITIONAL_DIRECTIVE>"INCLUDE" { 889 -- INCLUDE directive of the conditional section. 890 891 Actions.On_Conditional_Section_Directive (Self, True); 892 } 893 894 <CONDITIONAL_DIRECTIVE>"[" { 895 -- Start of content of conditional section. 896 897 if not Actions.On_Open_Of_Conditional_Section_Content (Self) then 898 return Error; 899 end if; 900 } 901 902 <CONDITIONAL_IGNORE_10>{CHAR10_NO_LESS_RIGHT_SQUARE}+|"<"|"<!"|"]" { 903 -- Content of ignore conditional section. It ends with "]]>" or "<![". 904 905 null; 906 } 907 908 <CONDITIONAL_IGNORE_11>{CHAR11_NO_LESS_RIGHT_SQUARE}+|"<"|"<!"|"]" { 909 -- Content of ignore conditional section. It ends with "]]>" or "<![". 910 911 null; 912 } 913 914 <XML_DECL,EXTERNAL_ID_PUB,EXTERNAL_ID_SYS,ENTITY_DECL,ENTITY_DEF,ENTITY_NDATA,NOTATION_DECL,ELEMENT_START,ATTLIST_NAME,ATTLIST_DECL,ATTLIST_TYPE,ELEMENT_NAME,ELEMENT_DECL,ELEMENT_CHILDREN>{WHITE_SPACE} { 915 -- White spaces in entity declaration are not optional, rules [71], [72], 916 -- [75], [76]. 917 -- 918 -- White spaces in start tag, rule [40], are ignored, but white space 919 -- between attribute value and name of the next attribute are must be 920 -- present. 921 -- 922 -- All white spaces from rules [23], [24], [25], [32], [80], [82] are 923 -- ignored, but white space between attribute value and name of the 924 -- next attribute are must be present. 925 -- 926 -- Production [45] requires whitespace after the name and before 927 -- content specification. 928 -- 929 -- Productions [47], [48] don't allow spaces before multiplicity 930 -- indicator. 931 932 Set_Whitespace_Matched (Self); 933 } 934 935 <ELEMENT_START>{NAME} { 936 -- Name of the attribute, rule [41]. 937 938 return Actions.On_Name_In_Element_Start_Tag (Self); 939 } 940 941 <XML_DECL,ELEMENT_START>"=" { 942 -- Equal sign as attribute's name value delimiter, rule [25] in rules [41], 943 -- [24], [32], [80]. 944 945 return Token_Equal; 946 } 947 948 <ELEMENT_START>"/>" { 949 -- Close of empty element tag, rule [44]. 950 951 return Actions.On_Close_Of_Empty_Element_Tag (Self); 952 } 953 954 <DOCTYPE_EXTINT,DOCTYPE_INT>">" { 955 -- Close tag of document type declaration, rule [28]. 956 957 if Actions.On_Close_Of_Document_Type_Declaration (Self) then 958 return Token_Close; 959 end if; 960 } 961 962 <ELEMENT_START>">" { 963 -- Close of tag, rule [40]. 964 -- Close tag of document type declaration, rule [28]. 965 966 return Actions.On_Close_Of_Tag (Self); 967 } 968 969 <ELEMENT_START>['"] { 970 -- Open delimiter of attribute value, rule [10]. 971 972 Actions.On_Attribute_Value_Open_Delimiter (Self, ELEMENT_START); 973 } 974 975 <ATTRIBUTE_VALUE_10,ATTRIBUTE_VALUE_11>['"] { 976 -- Close delimiter of attribute value, rule [10]. 977 978 if Actions.On_Attribute_Value_Close_Delimiter (Self) then 979 return Token_String_Segment; 980 end if; 981 } 982 983 <ATTRIBUTE_VALUE_10>{CHAR10_NO_QUOTATION_AMPERSAND_APOSTROPHE_LESS}+ { 984 -- Value of attribute, rule [10]. 985 986 Actions.On_Attribute_Value_Character_Data (Self); 987 } 988 989 <ATTRIBUTE_VALUE_11>{CHAR11_NO_QUOTATION_AMPERSAND_APOSTROPHE_LESS}+ { 990 -- Value of attribute, rule [10]. 991 992 Actions.On_Attribute_Value_Character_Data (Self); 993 } 994 995 <ATTRIBUTE_VALUE_10,ATTRIBUTE_VALUE_11>"<" { 996 -- Less-than sign can't be used in the attribute value. 997 998 return Actions.On_Less_Than_Sign_In_Attribute_Value (Self); 999 } 1000 1001 <ATTRIBUTE_VALUE_10,ATTRIBUTE_VALUE_11>"&"{NAME}";" { 1002 -- General entity reference rule [68] in attribute value, rule [10]. 1003 1004 if not Actions.On_General_Entity_Reference_In_Attribute_Value (Self) then 1005 return Error; 1006 end if; 1007 } 1008 1009 <XML_DECL,DOCUMENT_10,DOCUMENT_11,DOCUMENT_U11,CDATA_10,CDATA_11,CDATA_U11,PI,PI_DATA_10,PI_DATA_11,DOCTYPE_EXTINT,DOCTYPE_INT,DOCTYPE_INTSUBSET_10,DOCTYPE_INTSUBSET_11,ELEMENT_NAME,ELEMENT_DECL,ELEMENT_CHILDREN,ATTLIST_DECL,ATTLIST_NAME,ATTLIST_TYPE,NOTATION_DECL,ENTITY_DECL,ENTITY_DEF,ENTITY_NDATA,ENTITY_VALUE_10,ENTITY_VALUE_11,CONDITIONAL_DIRECTIVE,CONDITIONAL_IGNORE_10,CONDITIONAL_IGNORE_11,ELEMENT_START,ATTRIBUTE_VALUE_10,ATTRIBUTE_VALUE_11,EXTERNAL_ID_SYS,EXTERNAL_ID_PUB>.|\n { 1010 -- Unexpected character. 1011 1012 return Actions.On_Unexpected_Character (Self); 1013 } 1014 1015 %% 1016 with Xml_Parser_Tokens; 1017 ## 1018 function YYLex return Xml_Parser_Tokens.Token; 1019 ## 1020 with Ada.Characters.Wide_Wide_Latin_1; 1021 with Ada.Wide_Wide_Text_IO; 1022 with League.Strings; 1023 with Matreshka.Internals.XML.Symbol_Tables; 1024 with Matreshka.Internals.Strings; 1025 ## 1026 use Xml_Parser_Tokens; 1027 1028 procedure Enter_Start_Condition 1029 (Self : access Integer; State : Integer) is separate; 1030 1031 procedure Push_Current_And_Enter_Start_Condition 1032 (Self : access Integer; Enter : Integer) is separate; 1033 1034 procedure Push_And_Enter_Start_Condition 1035 (Self : access Integer; Push : Integer; Set : Integer) is separate; 1036 1037 procedure Push_Start_Condition 1038 (Self : access Integer; State : Integer) is separate; 1039 1040 procedure Pop_Start_Condition (Self : access Integer) is separate; 1041 1042 function Start_Condition (Self : access Integer) return Integer is separate; 1043 1044 procedure Set_Whitespace_Matched (Self : access Integer) is separate; 1045 1046 function Get_Whitespace_Matched 1047 (Self : access Integer) return Boolean is separate; 1048 1049 procedure Reset_Whitespace_Matched (Self : access Integer) is separate; 1050 1051 function YY_Text_Internal 1052 (Trim_Left : Natural := 0; 1053 Trim_Right : Natural := 0; 1054 Trim_Whitespace : Boolean := False) 1055 return Matreshka.Internals.Strings.Shared_String_Access is separate; 1056 1057 Self : access Integer; 1058 YYLVal : YYSType; 1059 Put_Line : access procedure (Item : League.Strings.Universal_String); 1060 1061 procedure Process_General_Entity_Reference_In_Attribute_Value 1062 (Self : access Integer; 1063 Symbol : Matreshka.Internals.XML.Symbol_Identifier) 1064 is separate; 1065 1066 procedure Set_String_Internal 1067 (Item : in out YYSType; 1068 String : Matreshka.Internals.Strings.Shared_String_Access; 1069 Is_Whitespace : Boolean) is separate; 1070 1071 package Actions is 1072 1073 procedure On_No_XML_Declaration (Self : access Integer); 1074 1075 procedure On_Whitespace_In_Processing_Instruction (Self : access Integer); 1076 1077 function On_Unexpected_Character (Self : access Integer) return Token; 1078 1079 function On_Less_Than_Sign_In_Attribute_Value 1080 (Self : access Integer) return Token; 1081 1082 function On_Character_Data (Self : access Integer) return Token; 1083 1084 function On_Whitespace_In_Document 1085 (Self : access Integer) return Boolean; 1086 1087 function On_Open_Of_Internal_Subset (Self : access Integer) return Token; 1088 1089 function On_Close_Of_Declaration (Self : access Integer) return Token; 1090 1091 function On_Open_Of_End_Tag (Self : access Integer) return Token; 1092 1093 function On_Open_Of_Start_Tag (Self : access Integer) return Token; 1094 1095 function On_Close_Of_Tag (Self : access Integer) return Token; 1096 1097 function On_Close_Of_Empty_Element_Tag (Self : access Integer) return Token; 1098 1099 function On_Open_Of_XML_Or_Text_Declaration 1100 (Self : access Integer) return Token; 1101 1102 function On_Open_Of_Processing_Instruction 1103 (Self : access Integer) return Token; 1104 1105 function On_Close_Of_Processing_Instruction 1106 (Self : access Integer; 1107 Is_Empty : Boolean) return Token; 1108 1109 function On_Close_Of_XML_Or_Text_Declaration 1110 (Self : access Integer) return Token; 1111 1112 function On_Open_Of_Document_Type_Declaration 1113 (Self : access Integer) return Token; 1114 1115 function On_General_Entity_Reference_In_Attribute_Value 1116 (Self : access Integer) return Boolean; 1117 1118 function On_General_Entity_Reference_In_Document_Content 1119 (Self : access Integer) return Token; 1120 1121 function On_General_Entity_Reference_In_Entity_Value 1122 (Self : access Integer) return Token; 1123 1124 function On_Parameter_Entity_Reference_In_Entity_Value 1125 (Self : access Integer) return Boolean; 1126 1127 function On_Parameter_Entity_Reference_In_Markup_Declaration 1128 (Self : access Integer) return Boolean; 1129 1130 function On_Parameter_Entity_Reference_In_Document_Declaration 1131 (Self : access Integer) return Token; 1132 1133 function On_Open_Of_Element_Declaration 1134 (Self : access Integer) return Token; 1135 1136 function On_Open_Of_Attribute_List_Declaration 1137 (Self : access Integer) return Token; 1138 1139 function On_Element_Name_In_Attribute_List_Declaration 1140 (Self : access Integer) return Token; 1141 1142 function On_Open_Of_Notation_Declaration 1143 (Self : access Integer) return Token; 1144 1145 function On_Attribute_Name_In_Attribute_List_Declaration 1146 (Self : access Integer) return Token; 1147 1148 function On_Name_In_Element_Declaration 1149 (Self : access Integer) return Token; 1150 1151 function On_Name_In_Element_Declaration_Children 1152 (Self : access Integer) return Token; 1153 1154 function On_Name_In_Element_Start_Tag 1155 (Self : access Integer) return Token; 1156 1157 function On_Name_In_Entity_Declaration 1158 (Self : access Integer) return Token; 1159 1160 function On_Name_In_Entity_Declaration_Notation 1161 (Self : access Integer) return Token; 1162 1163 function On_Name_In_Attribute_List_Declaration_Notation 1164 (Self : access Integer) return Token; 1165 1166 function On_Public_Literal (Self : access Integer) return Token; 1167 1168 function On_System_Literal (Self : access Integer) return Token; 1169 1170 function On_Entity_Value_Close_Delimiter 1171 (Self : access Integer) return Token; 1172 1173 function On_Entity_Value_Open_Delimiter 1174 (Self : access Integer) return Token; 1175 1176 function On_Character_Reference 1177 (Self : access Integer; 1178 Hax : Boolean) return Token; 1179 1180 function On_Character_Reference_In_Attribute_Value 1181 (Self : access Integer; 1182 Hex : Boolean) return Boolean; 1183 1184 function On_Attribute_Type 1185 (Self : access Integer; 1186 Type_Token : Token) return Token; 1187 1188 function On_Default_Declaration 1189 (Self : access Integer; 1190 State : Integer; 1191 Type_Token : Token) return Token; 1192 1193 procedure On_Attribute_Value_Character_Data (Self : access Integer); 1194 1195 function On_Attribute_Value_Open_Delimiter 1196 (Self : access Integer; 1197 State : Integer) return Boolean; 1198 1199 procedure On_Attribute_Value_Open_Delimiter 1200 (Self : access Integer; 1201 State : Integer); 1202 1203 function On_Attribute_Value_Close_Delimiter 1204 (Self : access Integer) return Boolean; 1205 1206 function On_Close_Of_Document_Type_Declaration 1207 (Self : access Integer) return Boolean; 1208 1209 function On_Open_Of_Conditional_Section 1210 (Self : access Integer) return Token; 1211 1212 procedure On_Conditional_Section_Directive 1213 (Self : access Integer; 1214 Include : Boolean); 1215 1216 function On_Open_Of_Conditional_Section_Content 1217 (Self : access Integer) return Boolean; 1218 1219 function On_Close_Of_Conditional_Section 1220 (Self : access Integer) return Token; 1221 1222 procedure On_Content_Of_Ignore_Conditional_Section 1223 (Self : access Integer); 1224 1225 function On_NDATA (Self : access Integer) return Token; 1226 1227 function On_Version_Keyword (Self : access Integer) return Token; 1228 1229 function On_Encoding_Keyword (Self : access Integer) return Token; 1230 1231 function On_Standalone_Keyword (Self : access Integer) return Token; 1232 1233 function On_Percent_Sign (Self : access Integer) return Token; 1234 1235 function On_CDATA (Self : access Integer) return Token; 1236 1237 function On_Open_Parenthesis_In_Content_Declaration 1238 (Self : access Integer) return Token; 1239 1240 function On_Close_Parenthesis_In_Content_Declaration 1241 (Self : access Integer) return Token; 1242 1243 function On_Question_Mark_In_Content_Declaration 1244 (Self : access Integer) return Token; 1245 1246 function On_Plus_In_Content_Declaration 1247 (Self : access Integer) return Token; 1248 1249 function On_Asterisk_In_Content_Declaration 1250 (Self : access Integer) return Token; 1251 1252 function On_Open_Parenthesis_In_Notation_Attribute 1253 (Self : access Integer) return Token; 1254 1255 function On_Close_Parenthesis_In_Notation_Attribute 1256 (Self : access Integer) return Token; 1257 1258 function On_Open_Of_CDATA 1259 (Self : access Integer) return Token; 1260 1261 function On_Close_Of_CDATA 1262 (Self : access Integer) return Token; 1263 1264 function On_Attribute_Value_In_XML_Declaration 1265 (Self : access Integer) return Token; 1266 1267 function On_System_Keyword_In_Document_Type 1268 (Self : access Integer) return Token; 1269 1270 function On_System_Keyword_In_Entity_Or_Notation 1271 (Self : access Integer) return Token; 1272 1273 end Actions; 1274 1275 package body Actions is separate; 1276