1%%-*-erlang-*- 2%% %CopyrightBegin% 3%% 4%% Copyright Ericsson AB 2008-2017. All Rights Reserved. 5%% 6%% Licensed under the Apache License, Version 2.0 (the "License"); 7%% you may not use this file except in compliance with the License. 8%% You may obtain a copy of the License at 9%% 10%% http://www.apache.org/licenses/LICENSE-2.0 11%% 12%% Unless required by applicable law or agreed to in writing, software 13%% distributed under the License is distributed on an "AS IS" BASIS, 14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15%% See the License for the specific language governing permissions and 16%% limitations under the License. 17%% 18%% %CopyrightEnd% 19%%---------------------------------------------------------------------- 20%% Start of common source 21%%---------------------------------------------------------------------- 22%-compile(export_all). 23 24%%---------------------------------------------------------------------- 25%% Include files 26%%---------------------------------------------------------------------- 27-include("xmerl_sax_parser.hrl"). 28 29%%---------------------------------------------------------------------- 30%% External exports 31%%---------------------------------------------------------------------- 32-export([parse/2, 33 parse_dtd/2, 34 is_name_char/1, 35 is_name_start/1]). 36 37%%---------------------------------------------------------------------- 38%% Internal exports 39%%---------------------------------------------------------------------- 40-export([ 41 cf/3, 42 cf/4, 43 cf/5 44 ]). 45 46%%---------------------------------------------------------------------- 47%% Records 48%%---------------------------------------------------------------------- 49 50%%---------------------------------------------------------------------- 51%% Macros 52%%---------------------------------------------------------------------- 53-define(HTTP_DEF_PORT, 80). 54 55%%====================================================================== 56%% External functions 57%%====================================================================== 58%%---------------------------------------------------------------------- 59%% Function: parse(Xml, State) -> Result 60%% Input: Xml = string() | binary() 61%% State = #xmerl_sax_parser_state{} 62%% Output: Result = {ok, Rest, EventState} | 63%% EventState = term() 64%% Description: Parsing XML from input stream. 65%%---------------------------------------------------------------------- 66parse(Xml, State) -> 67 RefTable = maps:new(), 68 69 try 70 State1 = event_callback(startDocument, State), 71 Result = parse_document(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}), 72 handle_end_document(Result) 73 catch 74 throw:Exception -> 75 handle_end_document(Exception); 76 _:OtherError -> 77 handle_end_document({other, OtherError, State}) 78 end. 79 80 % case catch parse_document(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of 81 % {ok, Rest, State2} -> 82 % State3 = event_callback(endDocument, State2), 83 % case check_if_rest_ok(State3#xmerl_sax_parser_state.input_type, Rest) of 84 % true -> 85 % {ok, State3#xmerl_sax_parser_state.event_state, Rest}; 86 % false -> 87 % format_error(fatal_error, State3, "Input found after legal document") 88 % end; 89 % {fatal_error, {State2, Reason}} -> 90 % State3 = event_callback(endDocument, State2), 91 % format_error(fatal_error, State3, Reason); 92 % {event_receiver_error, State2, {Tag, Reason}} -> 93 % State3 = event_callback(endDocument, State2), 94 % format_error(Tag, State3, Reason); 95 % {endDocument, Rest, State2} -> 96 % State3 = event_callback(endDocument, State2), 97 % {ok, State3#xmerl_sax_parser_state.event_state, Rest}; 98 % Other -> 99 % _State2 = event_callback(endDocument, State1), 100 % {fatal_error, Other} 101 % end. 102 103%%---------------------------------------------------------------------- 104%% Function: parse_dtd(Xml, State) -> Result 105%% Input: Xml = string() | binary() 106%% State = #xmerl_sax_parser_state{} 107%% Output: Result = {ok, Rest, EventState} | 108%% EventState = term() 109%% Description: Parsing XML DTD from input stream. 110%%---------------------------------------------------------------------- 111parse_dtd(Xml, State) -> 112 RefTable = maps:new(), 113 114 try 115 State1 = event_callback(startDocument, State), 116 Result = parse_external_entity_1(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}), 117 handle_end_document(Result) 118 catch 119 throw:Exception -> 120 handle_end_document(Exception); 121 _:OtherError -> 122 handle_end_document({other, OtherError, State}) 123 end. 124 125 126 % case catch parse_external_entity_1(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of 127 % {fatal_error, {State2, Reason}} -> 128 % State3 = event_callback(endDocument, State2), 129 % format_error(fatal_error, State3, Reason); 130 % {event_receiver_error, State2, {Tag, Reason}} -> 131 % State3 = event_callback(endDocument, State2), 132 % format_error(Tag, State3, Reason); 133 % {Rest, State2} when is_record(State2, xmerl_sax_parser_state) -> 134 % State3 = event_callback(endDocument, State2), 135 % {ok, State3#xmerl_sax_parser_state.event_state, Rest}; 136 % {endDocument, Rest, State2} when is_record(State2, xmerl_sax_parser_state) -> 137 % State3 = event_callback(endDocument, State2), 138 % {ok, State3#xmerl_sax_parser_state.event_state, Rest}; 139 % Other -> 140 % _State2 = event_callback(endDocument, State1), 141 % {fatal_error, Other} 142 % end. 143 144 145%%====================================================================== 146%% Internal functions 147%%====================================================================== 148 149%%---------------------------------------------------------------------- 150%% Function: handle_end_document(ParserResult) -> Result 151%% Input: ParseResult = term() 152%% Output: Result = {ok, Rest, EventState} | 153%% EventState = term() 154%% Description: Ends the parsing and formats output 155%%---------------------------------------------------------------------- 156handle_end_document({ok, Rest, State}) -> 157 %%ok case from parse 158 try 159 State1 = event_callback(endDocument, State), 160 case check_if_rest_ok(State1#xmerl_sax_parser_state.input_type, Rest) of 161 true -> 162 {ok, State1#xmerl_sax_parser_state.event_state, Rest}; 163 false -> 164 format_error(fatal_error, State1, "Input found after legal document") 165 end 166 catch 167 throw:{event_receiver_error, State2, {Tag, Reason}} -> 168 format_error(Tag, State2, Reason); 169 _:Other -> 170 {fatal_error, Other} 171 end; 172handle_end_document({endDocument, Rest, State}) -> 173 %% ok case from parse and parse_dtd 174 try 175 State1 = event_callback(endDocument, State), 176 {ok, State1#xmerl_sax_parser_state.event_state, Rest} 177 catch 178 throw:{event_receiver_error, State2, {Tag, Reason}} -> 179 format_error(Tag, State2, Reason); 180 _:Other -> 181 {fatal_error, Other} 182 end; 183handle_end_document({fatal_error, {State, Reason}}) -> 184 try 185 State1 = event_callback(endDocument, State), 186 format_error(fatal_error, State1, Reason) 187 catch 188 throw:{event_receiver_error, State2, {Tag, Reason}} -> 189 format_error(Tag, State2, Reason); 190 _:Other -> 191 {fatal_error, Other} 192 end; 193handle_end_document({event_receiver_error, State, {Tag, Reason}}) -> 194 try 195 State1 = event_callback(endDocument, State), 196 format_error(Tag, State1, Reason) 197 catch 198 throw:{event_receiver_error, State2, {Tag, Reason}} -> 199 format_error(Tag, State2, Reason); 200 _:Other -> 201 {fatal_error, Other} 202 end; 203handle_end_document({Rest, State}) when is_record(State, xmerl_sax_parser_state) -> 204 %%ok case from parse_dtd 205 try 206 State1 = event_callback(endDocument, State), 207 {ok, State1#xmerl_sax_parser_state.event_state, Rest} 208 catch 209 throw:{event_receiver_error, State2, {Tag, Reason}} -> 210 format_error(Tag, State2, Reason); 211 _:Other -> 212 {fatal_error, Other} 213 end; 214handle_end_document({other, Error, State}) -> 215 try 216 _State1 = event_callback(endDocument, State), 217 {fatal_error, Error} 218 catch 219 throw:{event_receiver_error, State2, {Tag, Reason}} -> 220 format_error(Tag, State2, Reason); 221 _:Other -> 222 {fatal_error, Other} 223 end. 224 225%%---------------------------------------------------------------------- 226%% Function: parse_document(Rest, State) -> Result 227%% Input: Rest = string() | binary() 228%% State = #xmerl_sax_parser_state{} 229%% Output: Result = {ok, Rest, State} 230%% Description: Parsing an XML document 231%% [1] document ::= prolog element Misc* 232%%---------------------------------------------------------------------- 233parse_document(Rest, State) when is_record(State, xmerl_sax_parser_state) -> 234 {Rest1, State1} = parse_byte_order_mark(Rest, State), 235 {Rest2, State2} = parse_misc(Rest1, State1, true), 236 {ok, Rest2, State2}. 237 238?PARSE_BYTE_ORDER_MARK(Bytes, State). 239 240%%---------------------------------------------------------------------- 241%% Function: parse_xml_decl(Rest, State) -> Result 242%% Input: Rest = string() | binary() 243%% State = #xmerl_sax_parser_state{} 244%% Output: Result = {Rest, State} 245%% Description: Parsing the xml directive in the prolog. 246%% [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 247%% [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 248%%---------------------------------------------------------------------- 249parse_xml_decl(?STRING_EMPTY, State) -> 250 cf(?STRING_EMPTY, State, fun parse_xml_decl/2); 251parse_xml_decl(?STRING("<") = Bytes, State) -> 252 cf(Bytes, State, fun parse_xml_decl/2); 253parse_xml_decl(?STRING("<?") = Bytes, State) -> 254 cf(Bytes, State, fun parse_xml_decl/2); 255parse_xml_decl(?STRING("<?x") = Bytes, State) -> 256 cf(Bytes, State, fun parse_xml_decl/2); 257parse_xml_decl(?STRING("<?xm") = Bytes, State) -> 258 cf(Bytes, State, fun parse_xml_decl/2); 259parse_xml_decl(?STRING("<?xml") = Bytes, State) -> 260 cf(Bytes, State, fun parse_xml_decl/2); 261parse_xml_decl(?STRING_REST("<?xml", Rest1), State) -> 262 parse_xml_decl_rest(Rest1, State); 263?PARSE_XML_DECL(Bytes, State). 264 265parse_xml_decl_rest(?STRING_UNBOUND_REST(C, Rest) = Bytes, State) -> 266 if 267 ?is_whitespace(C) -> 268 {_XmlAttributes, Rest1, State1} = parse_version_info(Rest, State, []), 269 parse_prolog(Rest1, State1); 270 true -> 271 parse_prolog(?STRING_REST("<?xml", Bytes), State) 272 end; 273parse_xml_decl_rest(Bytes, State) -> 274 unicode_incomplete_check([Bytes, State, fun parse_xml_decl_rest/2], undefined). 275 276 277 278%%---------------------------------------------------------------------- 279%% Function: parse_prolog(Rest, State) -> Result 280%% Input: Rest = string() | binary() 281%% State = #xmerl_sax_parser_state{} 282%% Output: Result = {Rest, State} 283%% Description: Parsing XML prolog 284%% [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 285%%---------------------------------------------------------------------- 286parse_prolog(?STRING_EMPTY, State) -> 287 cf(?STRING_EMPTY, State, fun parse_prolog/2); 288parse_prolog(?STRING("<") = Bytes, State) -> 289 cf(Bytes, State, fun parse_prolog/2); 290parse_prolog(?STRING_REST("<?", Rest), State) -> 291 case parse_pi(Rest, State) of 292 {Rest1, State1} -> 293 parse_prolog(Rest1, State1); 294 {endDocument, Rest1, State1} -> 295 parse_prolog(Rest1, State1) 296 end; 297parse_prolog(?STRING_REST("<!", Rest), State) -> 298 parse_prolog_1(Rest, State); 299parse_prolog(?STRING_REST("<", Rest), State) -> 300 parse_stag(Rest, State); 301parse_prolog(?STRING_UNBOUND_REST(C, _) = Rest, State) when ?is_whitespace(C) -> 302 {_WS, Rest1, State1} = whitespace(Rest, State, []), 303 parse_prolog(Rest1, State1); 304parse_prolog(Bytes, State) -> 305 unicode_incomplete_check([Bytes, State, fun parse_prolog/2], 306 "expecting < or whitespace"). 307 308parse_prolog_1(?STRING_EMPTY, State) -> 309 cf(?STRING_EMPTY, State, fun parse_prolog_1/2); 310parse_prolog_1(?STRING("D") = Bytes, State) -> 311 cf(Bytes, State, fun parse_prolog_1/2); 312parse_prolog_1(?STRING("DO") = Bytes, State) -> 313 cf(Bytes, State, fun parse_prolog_1/2); 314parse_prolog_1(?STRING("DOC") = Bytes, State) -> 315 cf(Bytes, State, fun parse_prolog_1/2); 316parse_prolog_1(?STRING("DOCT") = Bytes, State) -> 317 cf(Bytes, State, fun parse_prolog_1/2); 318parse_prolog_1(?STRING("DOCTY") = Bytes, State) -> 319 cf(Bytes, State, fun parse_prolog_1/2); 320parse_prolog_1(?STRING("DOCTYP") = Bytes, State) -> 321 cf(Bytes, State, fun parse_prolog_1/2); 322parse_prolog_1(?STRING_REST("DOCTYPE", Rest), State) -> 323 {Rest1, State1} = parse_doctype(Rest, State), 324 State2 = event_callback(endDTD, State1), 325 parse_prolog(Rest1, State2); 326parse_prolog_1(?STRING("-"), State) -> 327 cf(?STRING("-"), State, fun parse_prolog_1/2); 328parse_prolog_1(?STRING_REST("--", Rest), State) -> 329 {Rest1, State1} = parse_comment(Rest, State, []), 330 parse_prolog(Rest1, State1); 331parse_prolog_1(Bytes, State) -> 332 unicode_incomplete_check([Bytes, State, fun parse_prolog_1/2], 333 "expecting comment or DOCTYPE"). 334 335 336 337%%---------------------------------------------------------------------- 338%% Function: parse_version_info(Rest, State, Acc) -> Result 339%% Input: Rest = string() | binary() 340%% State = #xmerl_sax_parser_state{} 341%% Acc = [{Name, Value}] 342%% Name = string() 343%% Value = string() 344%% Output: Result = {[{Name, Value}], Rest, State} 345%% Description: Parsing the version number in the XML directive. 346%% [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 347%%---------------------------------------------------------------------- 348parse_version_info(?STRING_EMPTY, State, Acc) -> 349 cf(?STRING_EMPTY, State, Acc, fun parse_version_info/3); 350parse_version_info(?STRING_UNBOUND_REST(C, _) = Rest, State, Acc) when ?is_whitespace(C) -> 351 {_WS, Rest1, State1} = whitespace(Rest, State, []), 352 parse_version_info(Rest1, State1, Acc); 353parse_version_info(?STRING_UNBOUND_REST(C,Rest), State, Acc) -> 354 case is_name_start(C) of 355 true -> 356 case parse_name(Rest, State, [C]) of 357 {"version", Rest1, State1} -> 358 {Rest2, State2} = parse_eq(Rest1, State1), 359 {Version, Rest3, State3} = parse_att_value(Rest2, State2), 360 parse_xml_decl_rest(Rest3, State3, [{"version",Version}|Acc]); 361 {_, _, State1} -> 362 ?fatal_error(State1, "expecting attribute version") 363 end; 364 false -> 365 ?fatal_error(State, "expecting attribute version") 366 end; 367parse_version_info(Bytes, State, Acc) -> 368 unicode_incomplete_check([Bytes, State, Acc, fun parse_version_info/3], 369 undefined). 370 371 372 373%%---------------------------------------------------------------------- 374%% Function: parse_xml_decl_rest(Rest, State, Acc) -> Result 375%% Input: Rest = string() | binary() 376%% State = #xmerl_sax_parser_state{} 377%% Acc = [{Name, Value}] 378%% Name = string() 379%% Value = string() 380%% Output: Result = {[{Name, Value}], Rest, State} 381%% Description: Checks if there is more to parse in the XML directive. 382%%---------------------------------------------------------------------- 383parse_xml_decl_rest(?STRING_EMPTY, State, Acc) -> 384 cf(?STRING_EMPTY, State, Acc, fun parse_xml_decl_rest/3); 385parse_xml_decl_rest(?STRING("?") = Rest, State, Acc) -> 386 cf(Rest, State, Acc, fun parse_xml_decl_rest/3); 387parse_xml_decl_rest(?STRING_REST("?>", Rest), State, Acc) -> 388 {lists:reverse(Acc), Rest, State}; 389parse_xml_decl_rest(?STRING_UNBOUND_REST(C, _) = Rest, State, Acc) when ?is_whitespace(C) -> 390 {_WS, Rest1, State1} = whitespace(Rest, State, []), 391 parse_xml_decl_encoding(Rest1, State1, Acc); 392parse_xml_decl_rest(Bytes, State, Acc) -> 393 unicode_incomplete_check([Bytes, State, Acc, fun parse_xml_decl_rest/3], 394 "expecting encoding, standalone, whitespace or ?>"). 395 396 397%%---------------------------------------------------------------------- 398%% Function: parse_xml_decl_encoding(Rest, State, Acc) -> Result 399%% Input: Rest = string() | binary() 400%% State = #xmerl_sax_parser_state{} 401%% Acc = [{Name, Value}] 402%% Name = string() 403%% Value = string() 404%% Output: Result = {[{Name, Value}], Rest, State} 405%% Description: Parse the encoding attribute in the XML directive. 406%% [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) 407% [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 408%%---------------------------------------------------------------------- 409parse_xml_decl_encoding(?STRING_EMPTY, State, Acc) -> 410 cf(?STRING_EMPTY, State, Acc, fun parse_xml_decl_encoding/3); 411parse_xml_decl_encoding(?STRING_REST("e", Rest), State, Acc) -> 412 case parse_name(Rest, State,[$e]) of 413 {"encoding", Rest1, State1} -> 414 {Rest2, State2} = parse_eq(Rest1, State1), 415 {Enc, Rest3, State3} = parse_att_value(Rest2, State2), 416 parse_xml_decl_encoding_1(Rest3, State3, [{"encoding",Enc} |Acc]); 417 {Name, _Rest1, State1} -> 418 ?fatal_error(State1, "Attribute " ++ Name ++ 419 " not allowed in xml declaration") 420 end; 421parse_xml_decl_encoding(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> 422 parse_xml_decl_standalone(Bytes, State, Acc); 423parse_xml_decl_encoding(Bytes, State, Acc) -> 424 unicode_incomplete_check([Bytes, State, Acc, fun parse_xml_decl_encoding/3], 425 undefined). 426 427 428parse_xml_decl_encoding_1(?STRING_UNBOUND_REST(C, _) = Bytes, State, Acc) when ?is_whitespace(C) -> 429 {_WS, Rest1, State1} = whitespace(Bytes, State, []), 430 parse_xml_decl_standalone(Rest1, State1, Acc); 431parse_xml_decl_encoding_1(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> 432 parse_xml_decl_rest(Bytes, State, Acc); 433parse_xml_decl_encoding_1(Bytes, State, Acc) -> 434 unicode_incomplete_check([Bytes, State, Acc, fun parse_xml_decl_encoding_1/3], 435 undefined). 436 437 438%%---------------------------------------------------------------------- 439%% Function: parse_xml_decl_standalone(Rest, State, Acc) -> Result 440%% Input: Rest = string() | binary() 441%% State = #xmerl_sax_parser_state{} 442%% Acc = [{Name, Value}] 443%% Name = string() 444%% Value = string() 445%% Output: Result = {[{Name, Value}], Rest, State} 446%% Description: Parse the standalone attribute in the XML directive. 447%% [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | 448%% ('"' ('yes' | 'no') '"')) 449%%---------------------------------------------------------------------- 450parse_xml_decl_standalone(?STRING_EMPTY, State, Acc) -> 451 cf(?STRING_EMPTY, State, Acc, fun parse_xml_decl_standalone/3); 452parse_xml_decl_standalone(?STRING_REST("s", Rest), State, Acc) -> 453 case parse_name(Rest, State,[$s]) of 454 {"standalone", Rest1, State1} -> 455 {Rest2, State2} = parse_eq(Rest1, State1), 456 {Standalone, Rest3, State3} = parse_att_value(Rest2, State2), 457 case Standalone of 458 "yes" -> ok; 459 "no" -> ok; 460 _ -> 461 ?fatal_error(State3, "Wrong value of attribute standalone in xml declaration, must be yes or no") 462 end, 463 {_WS, Rest4, State4} = whitespace(Rest3, State3, []), 464 parse_xml_decl_rest(Rest4, State4#xmerl_sax_parser_state{standalone=list_to_atom(Standalone)}, 465 [{"standalone",Standalone} |Acc]); 466 {Name, _Rest1, State1} -> 467 ?fatal_error(State1, "Attribute " ++ Name ++ 468 " not allowed in xml declaration") 469 end; 470parse_xml_decl_standalone(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> 471 parse_xml_decl_rest(Bytes, State, Acc); 472parse_xml_decl_standalone(Bytes, State, Acc) -> 473 unicode_incomplete_check([Bytes, State, Acc, fun parse_xml_decl_standalone/3], 474 undefined). 475 476 477 478%%---------------------------------------------------------------------- 479%% Function: parse_pi(Rest, State) -> Result 480%% Input: Rest = string() | binary() 481%% State = #xmerl_sax_parser_state{} 482%% Output: Result = {Rest, State} 483%% Description: Parse processing instructions. 484%% [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 485%% [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 486%%---------------------------------------------------------------------- 487parse_pi(?STRING_EMPTY, State) -> 488 cf(?STRING_EMPTY, State, fun parse_pi/2); 489parse_pi(?STRING_UNBOUND_REST(C, Rest) = Bytes, State) -> 490 case is_name_start(C) of 491 true -> 492 {PiTarget, Rest1, State1} = 493 parse_name(Rest, State, [C]), 494 case string:to_lower(PiTarget) of 495 "xml" -> 496 case check_if_new_doc_allowed(State#xmerl_sax_parser_state.input_type, 497 State#xmerl_sax_parser_state.end_tags) of 498 true -> 499 {endDocument, Bytes, State}; 500 false -> 501 ?fatal_error(State1, "<?xml ...?> not first in document") 502 end; 503 _ -> 504 {PiData, Rest2, State2} = parse_pi_1(Rest1, State1), 505 State3 = event_callback({processingInstruction, PiTarget, PiData}, State2), 506 {Rest2, State3} 507 end; 508 false -> 509 ?fatal_error(State, "expecting name") 510 end; 511parse_pi(Bytes, State) -> 512 unicode_incomplete_check([Bytes, State, fun parse_pi/2], undefined). 513 514check_if_new_doc_allowed(stream, []) -> 515 true; 516check_if_new_doc_allowed(_, _) -> 517 false. 518 519check_if_rest_ok(file, []) -> 520 true; 521check_if_rest_ok(file, <<>>) -> 522 true; 523check_if_rest_ok(stream, _) -> 524 true; 525check_if_rest_ok(_, _) -> 526 false. 527 528 529%%---------------------------------------------------------------------- 530%% Function: parse_pi_1(Rest, State) -> Result 531%% Input: Rest = string() | binary() 532%% State = #xmerl_sax_parser_state{} 533%% Output: Result = {Rest, State} 534%% Description: Parse processing instructions. 535%%---------------------------------------------------------------------- 536parse_pi_1(?STRING_EMPTY, State) -> 537 cf(?STRING_EMPTY, State, fun parse_pi_1/2); 538parse_pi_1(?STRING_UNBOUND_REST(C,_) = Rest, State) when ?is_whitespace(C) -> 539 {_WS, Rest1, State1} = 540 whitespace(Rest, State, []), 541 parse_pi_data(Rest1, State1, []); 542parse_pi_1(?STRING_REST("?>", Rest), State) -> 543 {[], Rest, State}; 544parse_pi_1(Bytes, State) -> 545 unicode_incomplete_check([Bytes, State, fun parse_pi/2], 546 "expecting whitespace or '?>'"). 547 548 549%%---------------------------------------------------------------------- 550%% Function: parse_name(Rest, State, Acc) -> Result 551%% Input: Rest = string() | binary() 552%% State = #xmerl_sax_parser_state{} 553%% Acc = string() 554%% Output: Result = {Name, Rest, State} 555%% Name = string() 556%% Description: Parse a name. Next character is put in the accumulator 557%% if it's a valid name character. 558%% [5] Name ::= (Letter | '_' | ':') (NameChar)* 559%%---------------------------------------------------------------------- 560parse_name(?STRING_EMPTY, State, Acc) -> 561 cf(?STRING_EMPTY, State, Acc, fun parse_name/3); 562parse_name(?STRING_UNBOUND_REST(C, Rest) = Bytes, State, Acc) -> 563 case is_name_char(C) of 564 true -> 565 parse_name(Rest, State, [C|Acc]); 566 false -> 567 {lists:reverse(Acc), Bytes, State} 568 end; 569parse_name(Bytes, State, Acc) -> 570 unicode_incomplete_check([Bytes, State, Acc, fun parse_name/3], undefined). 571 572 573%%---------------------------------------------------------------------- 574%% Function: parse_ns_name(Rest, State, Prefix, Name) -> Result 575%% Input: Rest = string() | binary() 576%% State = #xmerl_sax_parser_state{} 577%% Prefix = string() 578%% Name = string() 579%% Output: Result = {{Prefix, Name}, Rest, State} 580%% Name = string() 581%% Description: Parse a namespace name. Next character is put in the 582%% accumulator if it's a valid name character. 583%% The difference between this function and parse_name/3 is 584%% that a colon is interpreted as a separator between the 585%% namespace prefix and the name. 586%%---------------------------------------------------------------------- 587parse_ns_name(?STRING_EMPTY, State, Prefix, Name) -> 588 cf(?STRING_EMPTY, State, Prefix, Name, fun parse_ns_name/4); 589parse_ns_name(?STRING_UNBOUND_REST($:, Rest), State, [], Name) -> 590 parse_ns_name(Rest, State, lists:reverse(Name), []); 591parse_ns_name(?STRING_UNBOUND_REST(C, Rest) = Bytes, State, Prefix, Name) -> 592 case is_name_char(C) of 593 true -> 594 parse_ns_name(Rest, State, Prefix, [C|Name]); 595 false -> 596 {{Prefix,lists:reverse(Name)}, Bytes, State} 597 end; 598parse_ns_name(Bytes, State, Prefix, Name) -> 599 unicode_incomplete_check([Bytes, State, Prefix, Name, fun parse_ns_name/4], 600 undefined). 601 602 603%%---------------------------------------------------------------------- 604%% Function: parse_pi_data(Rest, State, Acc) -> Result 605%% Input: Rest = string() | binary() 606%% State = #xmerl_sax_parser_state{} 607%% Acc = string() 608%% Output: Result = {PiData, Rest, State} 609%% PiData = string() 610%% Description: Parse the data part of the processing instruction. 611%% If next character is valid it's put in the accumulator. 612%%---------------------------------------------------------------------- 613parse_pi_data(?STRING_EMPTY, State, Acc) -> 614 cf(?STRING_EMPTY, State, Acc, fun parse_pi_data/3); 615parse_pi_data(?STRING("?") = Bytes, State, Acc) -> 616 cf(Bytes, State, Acc, fun parse_pi_data/3); 617parse_pi_data(?STRING("\r") = Bytes, State, Acc) -> 618 cf(Bytes, State, Acc, fun parse_pi_data/3); 619parse_pi_data(?STRING_REST("?>", Rest), State, Acc) -> 620 {lists:reverse(Acc), Rest, State}; 621parse_pi_data(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) -> 622 parse_pi_data(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]); 623parse_pi_data(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) -> 624 parse_pi_data(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]); 625parse_pi_data(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) -> 626 parse_pi_data(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]); 627parse_pi_data(?STRING_UNBOUND_REST(C, Rest), State, Acc) when ?is_char(C)-> 628 parse_pi_data(Rest, State, [C|Acc]); 629parse_pi_data(Bytes, State, Acc) -> 630 unicode_incomplete_check([Bytes, State, Acc, fun parse_pi_data/3], 631 "not an character"). 632 633 634%%---------------------------------------------------------------------- 635%% Function: parse_cdata(Rest, State) -> Result 636%% Input: Rest = string() | binary() 637%% State = #xmerl_sax_parser_state{} 638%% Output: Result = {Rest, State} 639%% Description: Start the parsing of a CDATA block. 640%% [18] CDSect ::= CDStart CData CDEnd 641%% [19] CDStart ::= '<![CDATA[' 642%% [20] CData ::= (Char* - (Char* ']]>' Char*)) 643%% [21] CDEnd ::= ']]>' 644%%---------------------------------------------------------------------- 645parse_cdata(?STRING_EMPTY, State) -> 646 cf(?STRING_EMPTY, State, fun parse_cdata/2); 647parse_cdata(?STRING("[") = Bytes, State) -> 648 cf(Bytes, State, fun parse_cdata/2); 649parse_cdata(?STRING("[C") = Bytes, State) -> 650 cf(Bytes, State, fun parse_cdata/2); 651parse_cdata(?STRING("[CD") = Bytes, State) -> 652 cf(Bytes, State, fun parse_cdata/2); 653parse_cdata(?STRING("[CDA") = Bytes, State) -> 654 cf(Bytes, State, fun parse_cdata/2); 655parse_cdata(?STRING("[CDAT") = Bytes, State) -> 656 cf(Bytes, State, fun parse_cdata/2); 657parse_cdata(?STRING("[CDATA") = Bytes, State) -> 658 cf(Bytes, State, fun parse_cdata/2); 659parse_cdata(?STRING_REST("[CDATA[", Rest), State) -> 660 State1 = event_callback(startCDATA, State), 661 parse_cdata(Rest, State1, []); 662parse_cdata(Bytes, State) -> 663 unicode_incomplete_check([Bytes, State, fun parse_cdata/2], 664 "expecting comment or CDATA"). 665 666 667%%---------------------------------------------------------------------- 668%% Function: parse_cdata(Rest, State, Acc) -> Result 669%% Input: Rest = string() | binary() 670%% State = #xmerl_sax_parser_state{} 671%% Acc = string() 672%% Output: Result = {Rest, State} 673%% Description: Parse a CDATA block. 674%%---------------------------------------------------------------------- 675parse_cdata(?STRING_EMPTY, State, Acc) -> 676 cf(?STRING_EMPTY, State, Acc, fun parse_cdata/3); 677parse_cdata(?STRING("\r") = Bytes, State, Acc) -> 678 cf(Bytes, State, Acc, fun parse_cdata/3); 679parse_cdata(?STRING("]") = Bytes, State, Acc) -> 680 cf(Bytes, State, Acc, fun parse_cdata/3); 681parse_cdata(?STRING("]]") = Bytes, State, Acc) -> 682 cf(Bytes, State, Acc, fun parse_cdata/3); 683parse_cdata(?STRING_REST("]]>", Rest), State, Acc) -> 684 State1 = event_callback({characters, lists:reverse(Acc)}, State), 685 State2 = event_callback(endCDATA, State1), 686 parse_content(Rest, State2, [], true); 687parse_cdata(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) -> 688 parse_cdata(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]); 689parse_cdata(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) -> 690 parse_cdata(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]); 691parse_cdata(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) -> 692 parse_cdata(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]); 693parse_cdata(?STRING_UNBOUND_REST(C, Rest), State, Acc) when ?is_char(C) -> 694 parse_cdata(Rest, State, [C|Acc]); 695parse_cdata(?STRING_UNBOUND_REST(C, _), State, _) -> 696 ?fatal_error(State, "CDATA contains bad character value: " ++ [C]); 697parse_cdata(Bytes, State, Acc) -> 698 unicode_incomplete_check([Bytes, State, Acc, fun parse_cdata/3], 699 undefined). 700 701 702%%---------------------------------------------------------------------- 703%% Function: parse_comment(Rest, State, Acc) -> Result 704%% Input: Rest = string() | binary() 705%% State = #xmerl_sax_parser_state{} 706%% Acc = string() 707%% Output: Result = {Rest, State} 708%% Description: Parse a comment. 709%% [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 710%%---------------------------------------------------------------------- 711parse_comment(?STRING_EMPTY, State, Acc) -> 712 cf(?STRING_EMPTY, State, Acc, fun parse_comment/3); 713parse_comment(?STRING("\r") = Bytes, State, Acc) -> 714 cf(Bytes, State, Acc, fun parse_comment/3); 715parse_comment(?STRING("-") = Bytes, State, Acc) -> 716 cf(Bytes, State, Acc, fun parse_comment/3); 717parse_comment(?STRING("--") = Bytes, State, Acc) -> 718 cf(Bytes, State, Acc, fun parse_comment/3); 719parse_comment(?STRING_REST("-->", Rest), State, Acc) -> 720 State1 = event_callback({comment, lists:reverse(Acc)}, State), 721 {Rest, State1}; 722parse_comment(?STRING_REST("--", _), State, _) -> 723 ?fatal_error(State, "comment contains '--'"); 724parse_comment(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) -> 725 parse_comment(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf|Acc]); 726parse_comment(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) -> 727 parse_comment(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf|Acc]); 728parse_comment(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) -> 729 parse_comment(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf|Acc]); 730parse_comment(?STRING_UNBOUND_REST(C, Rest), State, Acc) -> 731 if 732 ?is_char(C) -> 733 parse_comment(Rest, State, [C|Acc]); 734 true -> 735 ?fatal_error(State, "Bad character in comment: " ++ C) 736 end; 737parse_comment(Bytes, State, Acc) -> 738 unicode_incomplete_check([Bytes, State, Acc, fun parse_comment/3], 739 undefined). 740 741 742%%---------------------------------------------------------------------- 743%% Function: parse_misc(Rest, State, Eod) -> Result 744%% Input: Rest = string() | binary() 745%% State = #xmerl_sax_parser_state{} 746%% Eod = true |false 747%% Output: Result = {Rest, State} 748%% Description: Parse a misc clause, could be a comment, a processing 749%% instruction or whitespace. If the input stream is empty 750%% (Eod parameter true) then we return current state and quit. 751%% [27] Misc ::= Comment | PI | S 752%%---------------------------------------------------------------------- 753parse_misc(?STRING_EMPTY, State, true) -> 754 {?STRING_EMPTY, State}; 755parse_misc(?STRING_EMPTY, State, Eod) -> 756 cf(?STRING_EMPTY, State, Eod, fun parse_misc/3); 757parse_misc(?STRING("<") = Rest, State, Eod) -> 758 cf(Rest, State, Eod, fun parse_misc/3); 759parse_misc(?STRING_REST("<?", Rest), State, Eod) -> 760 case parse_pi(Rest, State) of 761 {Rest1, State1} -> 762 parse_misc(Rest1, State1, Eod); 763 {endDocument, _Rest1, State1} -> 764 IValue = ?TO_INPUT_FORMAT("<?"), 765 {?APPEND_STRING(IValue, Rest), State1} 766 end; 767parse_misc(?STRING("<!") = Rest, State, Eod) -> 768 cf(Rest, State, Eod, fun parse_misc/3); 769parse_misc(?STRING("<!-") = Rest, State, Eod) -> 770 cf(Rest, State, Eod, fun parse_misc/3); 771parse_misc(?STRING_REST("<!--", Rest), State, Eod) -> 772 {Rest1, State1} = parse_comment(Rest, State, []), 773 parse_misc(Rest1, State1, Eod); 774parse_misc(?STRING_UNBOUND_REST(C, _) = Rest, State, Eod) when ?is_whitespace(C) -> 775 {_WS, Rest1, State1} = whitespace(Rest, State, []), 776 parse_misc(Rest1, State1, Eod); 777parse_misc(Rest, State, _Eod) -> 778 {Rest, State}. 779%% unicode_incomplete_check([Bytes, State, Eod, fun parse_misc/3], 780%% "expecting comment or PI"). 781 782%%---------------------------------------------------------------------- 783%% Function: parse_stag(Rest, State) -> Result 784%% Input: Rest = string() | binary() 785%% State = #xmerl_sax_parser_state{} 786%% Output: Result = {Rest, State} 787%% Description: Parsing a start tag. 788%% [40] STag ::= '<' Name (S Attribute)* S? '>' 789%%---------------------------------------------------------------------- 790parse_stag(?STRING_EMPTY, State) -> 791 cf(?STRING_EMPTY, State, fun parse_stag/2); 792parse_stag(?STRING_UNBOUND_REST(C, Rest), State) -> 793 case is_name_start(C) of 794 true -> 795 {TagName, Rest1, State1} = 796 parse_ns_name(Rest, State, [], [C]), 797 parse_attributes(Rest1, State1, {TagName, [], []}); 798 false -> 799 ?fatal_error(State, "expecting name") 800 end; 801parse_stag(Bytes, State) -> 802 unicode_incomplete_check([Bytes, State, fun parse_stag/2], 803 undefined). 804 805%%---------------------------------------------------------------------- 806%% Function: parse_attributes(Rest, State, CurrentTag) -> Result 807%% Input: Rest = string() | binary() 808%% State = #xmerl_sax_parser_state{} 809%% CurrentTag = {Name, AttList, NewNsList} 810%% Name = string() 811%% AttList = [{Name, Value}] 812%% NewNsList = [{Name, Value}] 813%% Output: Result = {Rest, State} 814%% Description: Parsing the attribute list in the start tag. The current 815%% tag tuple contains the tag name, a list of attributes 816%% (exclusive NS attributes) and a list of new NS attributes. 817%% [41] Attribute ::= Name Eq AttValue 818%%---------------------------------------------------------------------- 819parse_attributes(?STRING_EMPTY, State, CurrentTag) -> 820 cf(?STRING_EMPTY, State, CurrentTag, fun parse_attributes/3); 821parse_attributes(?STRING("/") = Bytes, State, CurrentTag) -> 822 cf(Bytes, State, CurrentTag, fun parse_attributes/3); 823parse_attributes(?STRING_REST("/>", Rest), State, {Tag, AttList, NewNsList}) -> 824 CompleteNsList = NewNsList ++ State#xmerl_sax_parser_state.ns, 825 {Uri, LocalName, QName, Attributes} = fix_ns(Tag, AttList, CompleteNsList), 826 State1 = send_start_prefix_mapping_event(lists:reverse(NewNsList), State), 827 State2 = event_callback({startElement, Uri, LocalName, QName, Attributes}, State1), 828 State3 = event_callback({endElement, Uri, LocalName, QName}, State2), 829 State4 = send_end_prefix_mapping_event(NewNsList, State3), 830 parse_content(Rest, State4, [], true); 831parse_attributes(?STRING_REST(">", Rest), #xmerl_sax_parser_state{end_tags=ETags, ns = OldNsList} = State, 832 {Tag, AttList, NewNsList}) -> 833 CompleteNsList = NewNsList ++ OldNsList, 834 {Uri, LocalName, QName, Attributes} = fix_ns(Tag, AttList, CompleteNsList), 835 State1 = send_start_prefix_mapping_event(lists:reverse(NewNsList), State), 836 State2 = event_callback({startElement, Uri, LocalName, QName, Attributes}, State1), 837 parse_content(Rest, State2#xmerl_sax_parser_state{end_tags=[{Tag, Uri, LocalName, QName, 838 OldNsList, NewNsList} |ETags], 839 ns = CompleteNsList}, 840 [], true); 841parse_attributes(?STRING_UNBOUND_REST(C, _) = Rest, State, CurrentTag) when ?is_whitespace(C) -> 842 {_WS, Rest1, State1} = whitespace(Rest, State, []), 843 parse_attributes(Rest1, State1, CurrentTag); 844parse_attributes(?STRING_UNBOUND_REST(C, Rest), State, {Tag, AttList, NsList}) -> 845 case is_name_start(C) of 846 true -> 847 {AttrName, Rest1, State1} = 848 parse_ns_name(Rest, State, [], [C]), 849 {Rest2, State2} = parse_eq(Rest1, State1), 850 {AttValue, Rest3, State3} = parse_att_value(Rest2, State2), 851 case AttrName of 852 {"xmlns", NsName} -> 853 parse_attributes(Rest3, State3, {Tag, AttList, [{NsName, AttValue} |NsList]}); 854 {"", "xmlns"} -> 855 parse_attributes(Rest3, State3, {Tag, AttList, [{"", AttValue} |NsList]}); 856 {_Prefix, _LocalName} -> 857 case lists:keyfind(AttrName, 1, AttList) of 858 false -> 859 parse_attributes(Rest3, State3, {Tag, [{AttrName, AttValue}|AttList], NsList}); 860 _ -> 861 ElName = 862 case Tag of 863 {"", N} -> N; 864 {Ns, N} -> Ns ++ ":" ++ N 865 end, 866 ?fatal_error(State, "Attribute exist more than once in element: " ++ ElName) 867 end 868 end; 869 false -> 870 ?fatal_error(State, "Invalid start character in attribute name: " ++ [C]) 871 end; 872parse_attributes(Bytes, State, CurrentTag) -> 873 unicode_incomplete_check([Bytes, State, CurrentTag, fun parse_attributes/3], 874 "expecting name, whitespace, /> or >"). 875 876 877 878%%---------------------------------------------------------------------- 879%% Function: fix_ns({Prefix, Name}, Attributes, Ns) -> Result 880%% Input: Prefix = string() 881%% Name = string() 882%% Attributes = [{Name, Value}] 883%% Ns = [{Prefix, Uri}] 884%% Uri = string() 885%% Output: Result = {Uri, Name, QualifiedName, Attributes} 886%% QualifiedName = string() 887%% Description: Fix the name space prefixing for the attributes and start tag. 888%%---------------------------------------------------------------------- 889% fix_ns({"", Name}, Attributes, Ns) -> 890% Attributes2 = fix_attributes_ns(Attributes, Ns, []), 891% {"", Name, Name, Attributes2}; 892fix_ns({Prefix, Name}, Attributes, Ns) -> 893 Uri = 894 case lists:keysearch(Prefix, 1, Ns) of 895 {value, {Prefix, U}} -> 896 U; 897 false -> 898 "" 899 end, 900 Attributes2 = fix_attributes_ns(Attributes, Ns, []), 901 902 {Uri, Name, {Prefix, Name}, Attributes2}. 903 904%%---------------------------------------------------------------------- 905%% Function: fix_attributes_ns(Attributes, Ns, Acc) -> Result 906%% Input: Attributes = [{{Prefix, Name}, Value}] 907%% Prefix = string() 908%% Name = string() 909%% Value = string() 910%% Ns = [{Prefix, Uri}] 911%% Uri = string() 912%% Output: Result = [{Uri, Name, Value}] 913%% Description: Fix the name spaces for the attributes. 914%%---------------------------------------------------------------------- 915fix_attributes_ns([], _, Acc) -> 916 Acc; 917fix_attributes_ns([{{"", Name}, AttrValue} | Attrs], Ns, Acc) -> 918 fix_attributes_ns(Attrs, Ns, [{"", "", Name, AttrValue} |Acc]); 919fix_attributes_ns([{{Prefix, Name}, AttrValue} | Attrs], Ns, Acc) -> 920 Uri = 921 case lists:keysearch(Prefix, 1, Ns) of 922 {value, {Prefix, U}} -> 923 U; 924 false -> 925 "" 926 end, 927 fix_attributes_ns(Attrs, Ns, [{Uri, Prefix, Name, AttrValue} |Acc]). 928 929 930%%---------------------------------------------------------------------- 931%% Function: send_start_prefix_mapping_event(Ns, State) -> Result 932%% Input: Ns = [{Prefix, Uri}] 933%% Prefix = string() 934%% Uri = string() 935%% State = #xmerl_sax_parser_state{} 936%% Output: Result = #xmerl_sax_parser_state{} 937%% Description: Loops over a name space list and sends startPrefixMapping events. 938%%---------------------------------------------------------------------- 939send_start_prefix_mapping_event([], State) -> 940 State; 941send_start_prefix_mapping_event([{Prefix, Uri} |Ns], State) -> 942 State1 = event_callback({startPrefixMapping, Prefix, Uri}, State), 943 send_start_prefix_mapping_event(Ns, State1). 944 945 946%%---------------------------------------------------------------------- 947%% Function: send_end_prefix_mapping_event(Ns, State) -> Result 948%% Input: Ns = [{Prefix, Uri}] 949%% Prefix = string() 950%% Uri = string() 951%% State = #xmerl_sax_parser_state{} 952%% Output: Result = #xmerl_sax_parser_state{} 953%% Description: Loops over a name space list and sends endPrefixMapping events. 954%%---------------------------------------------------------------------- 955send_end_prefix_mapping_event([], State) -> 956 State; 957send_end_prefix_mapping_event([{Prefix, _Uri} |Ns], State) -> 958 State1 = event_callback({endPrefixMapping, Prefix}, State), 959 send_end_prefix_mapping_event(Ns, State1). 960 961 962%%---------------------------------------------------------------------- 963%% Function: parse_eq(Rest, State) -> Result 964%% Input: Rest = string() | binary() 965%% State = #xmerl_sax_parser_state{} 966%% Output: Result = {Rest, State} 967%% Description: Parsing an '=' from the stream. 968%% [25] Eq ::= S? '=' S? 969%%---------------------------------------------------------------------- 970parse_eq(?STRING_EMPTY, State) -> 971 cf(?STRING_EMPTY, State, fun parse_eq/2); 972parse_eq(?STRING_REST("=", Rest), State) -> 973 {Rest, State}; 974parse_eq(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) -> 975 {_WS, Rest, State1} = 976 whitespace(Bytes, State, []), 977 parse_eq(Rest, State1); 978parse_eq(Bytes, State) -> 979 unicode_incomplete_check([Bytes, State, fun parse_eq/2], 980 "expecting = or whitespace"). 981 982 983%%---------------------------------------------------------------------- 984%% Function: parse_att_value(Rest, State) -> Result 985%% Input: Rest = string() | binary() 986%% State = #xmerl_sax_parser_state{} 987%% Output: Result = {Rest, State} 988%% Description: Start the parsing of an attribute value by checking the delimiter 989%% [10] AttValue ::= '"' ([^<&"] | Reference)* '"' 990%% | "'" ([^<&'] | Reference)* "'" 991%%---------------------------------------------------------------------- 992parse_att_value(?STRING_EMPTY, State) -> 993 cf(?STRING_EMPTY, State, fun parse_att_value/2); 994parse_att_value(?STRING_UNBOUND_REST(C, Rest), State) when C == $'; C == $" -> 995 parse_att_value(Rest, State, C, []); 996parse_att_value(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) -> 997 {_WS, Rest, State1} = 998 whitespace(Bytes, State, []), 999 parse_att_value(Rest, State1); 1000parse_att_value(Bytes, State) -> 1001 unicode_incomplete_check([Bytes, State, fun parse_att_value/2], 1002 "\', \" or whitespace expected"). 1003 1004 1005%%---------------------------------------------------------------------- 1006%% Function : parse_att_value(Rest, State, Stop, Acc) -> Result 1007%% Parameters: Rest = string() | binary() 1008%% State = #xmerl_sax_parser_state{} 1009%% Stop = $' | $" 1010%% Acc = string() 1011%% Result : {Value, Rest, State} 1012%% Value = string() 1013%% Description: Parse an attribute value 1014%%---------------------------------------------------------------------- 1015parse_att_value(?STRING_EMPTY, State, undefined, Acc) -> 1016 {Acc, [], State}; %% stop clause when parsing references 1017parse_att_value(?STRING_EMPTY, State, Stop, Acc) -> 1018 cf(?STRING_EMPTY, State, Stop, Acc, fun parse_att_value/4); 1019parse_att_value(?STRING("\r") = Bytes, State, Stop, Acc) -> 1020 cf(Bytes, State, Stop, Acc, fun parse_att_value/4); 1021parse_att_value(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) -> 1022 parse_att_value(Rest, 1023 State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]); 1024parse_att_value(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) -> 1025 parse_att_value(Rest, 1026 State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]); 1027parse_att_value(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) -> 1028 parse_att_value(Rest, 1029 State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]); 1030parse_att_value(?STRING_REST("\t", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) -> 1031 parse_att_value(Rest, 1032 State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]); 1033parse_att_value(?STRING_REST("&", Rest), State, Stop, Acc) -> 1034 {Ref, Rest1, State1} = parse_reference(Rest, State, true), 1035 case Ref of 1036 {character, _, CharValue} -> 1037 parse_att_value(Rest1, State1, Stop, [CharValue | Acc]); 1038 {internal_general, true, _, Value} -> 1039 parse_att_value(Rest1, State1, Stop, Value ++ Acc); 1040 {internal_general, false, _, Value} -> 1041 {ParsedValue, [], State2} = parse_att_value(?TO_INPUT_FORMAT(Value), State1, undefined, []), 1042 parse_att_value(Rest1, State2, Stop, ParsedValue ++ Acc); 1043 {external_general, Name, _} -> 1044 ?fatal_error(State1, "External parsed entity reference in attribute value: " ++ Name); 1045 {not_found, Name} -> 1046 case State#xmerl_sax_parser_state.skip_external_dtd of 1047 false -> 1048 ?fatal_error(State1, "Entity not declared: " ++ Name); %%VC: Entity Declared 1049 true -> 1050 parse_att_value(Rest1, State1, Stop, ";" ++ lists:reverse(Name) ++ "&" ++ Acc) 1051 end; 1052 {unparsed, Name, _} -> 1053 ?fatal_error(State1, "Unparsed entity reference in attribute value: " ++ Name) 1054 end; 1055parse_att_value(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) -> 1056 {lists:reverse(Acc), Rest, State}; 1057parse_att_value(?STRING_UNBOUND_REST($<, _Rest), State, _Stop, _Acc) -> 1058 ?fatal_error(State, "< not allowed in attribute value"); 1059parse_att_value(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) -> 1060 if 1061 ?is_char(C) -> 1062 parse_att_value(Rest, State, Stop, [C|Acc]); 1063 true -> 1064 ?fatal_error(State, lists:flatten(io_lib:format("Bad character in attribute value: ~p", [C]))) 1065 end; 1066parse_att_value(Bytes, State, Stop, Acc) -> 1067 unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_att_value/4], 1068 undefined). 1069 1070 1071%%---------------------------------------------------------------------- 1072%% Function : parse_etag(Rest, State) -> Result 1073%% Parameters: Rest = string() | binary() 1074%% State = #xmerl_sax_parser_state{} 1075%% Result : {Rest, State} 1076%% Description: Parse the end tag 1077%% [42] ETag ::= '</' Name S? '>' 1078%%---------------------------------------------------------------------- 1079parse_etag(?STRING_EMPTY, State) -> 1080 cf(?STRING_EMPTY, State, fun parse_etag/2); 1081parse_etag(?STRING_UNBOUND_REST(C, Rest), 1082 #xmerl_sax_parser_state{end_tags=[{ETag, _Uri, _LocalName, _QName, _OldNsList, _NewNsList} 1083 |_RestOfETags]} = State) -> 1084 case is_name_start(C) of 1085 true -> 1086 {Tag, Rest1, State1} = parse_ns_name(Rest, State, [], [C]), 1087 case Tag == ETag of 1088 true -> 1089 {_WS, Rest2, State2} = whitespace(Rest1, State1, []), 1090 parse_etag_1(Rest2, State2, Tag); 1091 false -> 1092 case State1#xmerl_sax_parser_state.match_end_tags of 1093 true -> 1094 {P,TN} = Tag, 1095 ?fatal_error(State1, "EndTag: " ++ P ++ ":" ++ TN ++ 1096 ", does not match StartTag"); 1097 false -> 1098 {_WS, Rest2, State2} = whitespace(Rest1, State1, []), 1099 parse_etag_1(Rest2, State2, Tag) 1100 end 1101 end; 1102 false -> 1103 ?fatal_error(State, "Name expected") 1104 end; 1105parse_etag(?STRING_UNBOUND_REST(_C, _) = Rest, #xmerl_sax_parser_state{end_tags=[]}= State) -> 1106 {Rest, State}; 1107parse_etag(Bytes, State) -> 1108 unicode_incomplete_check([Bytes, State, fun parse_etag/2], 1109 undefined). 1110 1111parse_etag_1(?STRING_REST(">", Rest), 1112 #xmerl_sax_parser_state{end_tags=[{_ETag, Uri, LocalName, QName, OldNsList, NewNsList} 1113 |RestOfETags], 1114 input_type=InputType} = State, _Tag) -> 1115 State1 = event_callback({endElement, Uri, LocalName, QName}, State), 1116 State2 = send_end_prefix_mapping_event(NewNsList, State1), 1117 case check_if_new_doc_allowed(InputType, RestOfETags) of 1118 true -> 1119 throw({endDocument, Rest, State2#xmerl_sax_parser_state{ns = OldNsList}}); 1120 false -> 1121 parse_content(Rest, 1122 State2#xmerl_sax_parser_state{end_tags=RestOfETags, 1123 ns = OldNsList}, 1124 [], true) 1125 end; 1126parse_etag_1(?STRING_UNBOUND_REST(_C, _), State, Tag) -> 1127 {P,TN} = Tag, 1128 ?fatal_error(State, "Bad EndTag: " ++ P ++ ":" ++ TN); 1129parse_etag_1(Bytes, State, Tag) -> 1130 unicode_incomplete_check([Bytes, State, Tag, fun parse_etag_1/3], 1131 undefined). 1132 1133%%---------------------------------------------------------------------- 1134%% Function: parse_content(Rest, State, Acc, IgnorableWS) -> Result 1135%% Parameters: Rest = string() | binary() 1136%% State = #xmerl_sax_parser_state{} 1137%% Acc = string() 1138%% IgnorableWS = true | false 1139%% Result : {Rest, State} 1140%% Description: Parsing the content part of tags 1141%% [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 1142%%---------------------------------------------------------------------- 1143parse_content(?STRING_EMPTY, State, Acc, IgnorableWS) -> 1144 case check_if_document_complete(State, "No more bytes") of 1145 true -> 1146 State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State), 1147 {?STRING_EMPTY, State1}; 1148 false -> 1149 case catch cf(?STRING_EMPTY, State, Acc, IgnorableWS, fun parse_content/4) of 1150 {Rest, State1} when is_record(State1, xmerl_sax_parser_state) -> 1151 {Rest, State1}; 1152 {fatal_error, {State1, Msg}} -> 1153 case check_if_document_complete(State1, Msg) of 1154 true -> 1155 State2 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State1), 1156 {?STRING_EMPTY, State2}; 1157 false -> 1158 ?fatal_error(State1, Msg) 1159 end; 1160 Other -> 1161 throw(Other) 1162 end 1163 end; 1164parse_content(?STRING("\r") = Bytes, State, Acc, IgnorableWS) -> 1165 cf(Bytes, State, Acc, IgnorableWS, fun parse_content/4); 1166parse_content(?STRING("<") = Bytes, State, Acc, IgnorableWS) -> 1167 cf(Bytes, State, Acc, IgnorableWS, fun parse_content/4); 1168parse_content(?STRING_REST("</", Rest), State, Acc, IgnorableWS) -> 1169 State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State), 1170 parse_etag(Rest, State1); 1171parse_content(?STRING("<!") = Bytes, State, _Acc, IgnorableWS) -> 1172 cf(Bytes, State, [], IgnorableWS, fun parse_content/4); 1173parse_content(?STRING("<!-") = Bytes, State, _Acc, IgnorableWS) -> 1174 cf(Bytes, State, [], IgnorableWS, fun parse_content/4); 1175parse_content(?STRING_REST("<!--", Rest), State, Acc, IgnorableWS) -> 1176 State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State), 1177 {Rest1, State2} = parse_comment(Rest, State1, []), 1178 parse_content(Rest1, State2, [], true); 1179parse_content(?STRING_REST("<?", Rest), State, Acc, IgnorableWS) -> 1180 State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State), 1181 case parse_pi(Rest, State1) of 1182 {Rest1, State2} -> 1183 parse_content(Rest1, State2, [], true); 1184 {endDocument, _Rest1, State2} -> 1185 IValue = ?TO_INPUT_FORMAT("<?"), 1186 {?APPEND_STRING(IValue, Rest), State2} 1187 end; 1188parse_content(?STRING_REST("<!", Rest1) = Rest, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) -> 1189 case ET of 1190 [] -> 1191 {Rest, State}; %% Skicka ignorable WS ??? 1192 _ -> 1193 State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State), 1194 parse_cdata(Rest1, State1) 1195 end; 1196parse_content(?STRING_REST("<", Rest1) = Rest, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) -> 1197 case ET of 1198 [] -> 1199 {Rest, State}; %% Skicka ignorable WS ??? 1200 _ -> 1201 State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State), 1202 parse_stag(Rest1, State1) 1203 end; 1204parse_content(?STRING_REST("\n", Rest), State, Acc, IgnorableWS) -> 1205 N = State#xmerl_sax_parser_state.line_no, 1206 parse_content(Rest, State#xmerl_sax_parser_state{line_no=N+1},[?lf |Acc], IgnorableWS); 1207parse_content(?STRING_REST("\r\n", Rest), State, Acc, IgnorableWS) -> 1208 N = State#xmerl_sax_parser_state.line_no, 1209 parse_content(Rest, State#xmerl_sax_parser_state{line_no=N+1},[?lf |Acc], IgnorableWS); 1210parse_content(?STRING_REST("\r", Rest), State, Acc, IgnorableWS) -> 1211 N = State#xmerl_sax_parser_state.line_no, 1212 parse_content(Rest, State#xmerl_sax_parser_state{line_no=N+1},[?lf |Acc], IgnorableWS); 1213parse_content(?STRING_REST(" ", Rest), State, Acc, IgnorableWS) -> 1214 parse_content(Rest, State,[?space |Acc], IgnorableWS); 1215parse_content(?STRING_REST("\t", Rest), State, Acc, IgnorableWS) -> 1216 parse_content(Rest, State,[?tab |Acc], IgnorableWS); 1217parse_content(?STRING_REST("]]>", _Rest), State, _Acc, _IgnorableWS) -> 1218 ?fatal_error(State, "\"]]>\" is not allowed in content"); 1219parse_content(?STRING_UNBOUND_REST(_C, _) = Rest, 1220 #xmerl_sax_parser_state{end_tags = []} = State, 1221 _Acc, _IgnorableWS) -> 1222 {Rest, State}; 1223parse_content(?STRING_REST("&", Rest), State, Acc, _IgnorableWS) -> 1224 {Ref, Rest1, State1} = parse_reference(Rest, State, true), 1225 case Ref of 1226 {character, _, CharValue} -> 1227 parse_content(Rest1, State1, [CharValue | Acc], false); 1228 {internal_general, true, _, Value} -> 1229 parse_content(Rest1, State1, Value ++ Acc, false); 1230 {internal_general, false, _, Value} -> 1231 IValue = ?TO_INPUT_FORMAT(Value), 1232 parse_content(?APPEND_STRING(IValue, Rest1), State1, Acc, false); 1233 {external_general, _, {PubId, SysId}} -> 1234 State2 = parse_external_entity(State1, PubId, SysId), 1235 parse_content(Rest1, State2, Acc, false); 1236 {not_found, Name} -> 1237 case State#xmerl_sax_parser_state.skip_external_dtd of 1238 false -> 1239 ?fatal_error(State1, "Entity not declared: " ++ Name); %%VC: Entity Declared 1240 true -> 1241 parse_content(Rest1, State1, ";" ++ lists:reverse(Name) ++ "&" ++ Acc, false) 1242 end; 1243 {unparsed, Name, _} -> 1244 ?fatal_error(State1, "Unparsed entity reference in content: " ++ Name) 1245 end; 1246parse_content(?STRING_UNBOUND_REST(C, Rest), State, Acc, _IgnorableWS) -> 1247 if 1248 ?is_char(C) -> 1249 parse_content(Rest, State, [C|Acc], false); 1250 true -> 1251 ?fatal_error(State, lists:flatten(io_lib:format("Bad character in content: ~p", [C]))) 1252 end; 1253parse_content(Bytes, State, Acc, IgnorableWS) -> 1254 unicode_incomplete_check([Bytes, State, Acc, IgnorableWS, fun parse_content/4], 1255 undefined). 1256 1257 1258%%---------------------------------------------------------------------- 1259%% Function: check_if_document_complete(State, ErrorMsg) -> Result 1260%% Parameters: State = #xmerl_sax_parser_state{} 1261%% ErrorMsg = string() 1262%% Result : boolean() 1263%% Description: Checks that the document is complete if we don't have more data.. 1264%%---------------------------------------------------------------------- 1265check_if_document_complete(#xmerl_sax_parser_state{end_tags = []}, 1266 "No more bytes") -> 1267 true; 1268check_if_document_complete(#xmerl_sax_parser_state{end_tags = []}, 1269 "Continuation function undefined") -> 1270 true; 1271check_if_document_complete(_, _) -> 1272 false. 1273 1274%%---------------------------------------------------------------------- 1275%% Function: send_character_event(Length, IgnorableWS, String, State) -> Result 1276%% Parameters: Length = integer() 1277%% IgnorableWS = true | false 1278%% String = string() 1279%% State = #xmerl_sax_parser_state{} 1280%% Result : #xmerl_sax_parser_state{} 1281%% Description: Sends the correct type of character event depending on if 1282%% it's whitespaces that can be ignored or not. 1283%%---------------------------------------------------------------------- 1284send_character_event(0, _, _, State) -> 1285 State; 1286send_character_event(_, false, String, State) -> 1287 event_callback({characters, String}, State); 1288send_character_event(_, true, String, State) -> 1289 event_callback({ignorableWhitespace, String}, State). 1290 1291 1292%%---------------------------------------------------------------------- 1293%% Function: whitespace(Rest, State, Acc) -> Result 1294%% Parameters: Rest = string() | binary() 1295%% State = #xmerl_sax_parser_state{} 1296%% Acc = string() 1297%% Result : {Rest, State} 1298%% Description: Parse whitespaces. 1299%% [3] S ::= (#x20 | #x9 | #xD | #xA)+ 1300%%---------------------------------------------------------------------- 1301whitespace(?STRING_EMPTY, State, Acc) -> 1302 case cf(?STRING_EMPTY, State, Acc, fun whitespace/3) of 1303 {?STRING_EMPTY, State} -> 1304 {lists:reverse(Acc), ?STRING_EMPTY, State}; 1305 Ret -> 1306 Ret 1307 end; 1308whitespace(?STRING("\r") = Bytes, State, Acc) -> 1309 case cf(Bytes, State, Acc, fun whitespace/3) of 1310 {?STRING("\r") = Bytes, State} -> 1311 {lists:reverse(Acc), Bytes, State}; 1312 Ret -> 1313 Ret 1314 end; 1315whitespace(?STRING_REST("\n", Rest), State, Acc) -> 1316 N = State#xmerl_sax_parser_state.line_no, 1317 whitespace(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]); 1318whitespace(?STRING_REST("\r\n", Rest), State, Acc) -> 1319 N = State#xmerl_sax_parser_state.line_no, 1320 whitespace(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]); 1321whitespace(?STRING_REST("\r", Rest), State, Acc) -> 1322 N = State#xmerl_sax_parser_state.line_no, 1323 whitespace(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]); 1324whitespace(?STRING_UNBOUND_REST(C, Rest), State, Acc) when ?is_whitespace(C) -> 1325 whitespace(Rest, State, [C|Acc]); 1326?WHITESPACE(Bytes, State, Acc). 1327 1328%%---------------------------------------------------------------------- 1329%% Function: parse_reference(Rest, State, HaveToExist) -> Result 1330%% Parameters: Rest = string() | binary() 1331%% State = #xmerl_sax_parser_state{} 1332%% Result : {Value, Rest, State} 1333%% Description: Parse entity references. 1334%% [66] CharRef ::= '&#' [0-9]+ ';' 1335%% | '&#x' [0-9a-fA-F]+ ';' 1336%% [67] Reference ::= EntityRef | CharRef 1337%% [68] EntityRef ::= '&' Name ';' 1338%%---------------------------------------------------------------------- 1339parse_reference(?STRING_EMPTY, State, HaveToExist) -> 1340 cf(?STRING_EMPTY, State, HaveToExist, fun parse_reference/3); 1341parse_reference(?STRING("#") = Bytes, State, HaveToExist) -> 1342 cf(Bytes, State, HaveToExist, fun parse_reference/3); 1343parse_reference(?STRING_REST("#x", Rest), State, _HaveToExist) -> 1344 {CharValue, RefString, Rest1, State1} = parse_hex(Rest, State, []), 1345 if 1346 ?is_char(CharValue) -> 1347 {{character, is_delimiter(CharValue), CharValue}, 1348 Rest1, State1}; 1349 true -> 1350 ?fatal_error(State1, "Not a legal character: #x" ++ RefString) %%WFC: Legal Character 1351 end; 1352parse_reference(?STRING_REST("#", Rest), State, _HaveToExist) -> 1353 {CharValue, RefString, Rest1, State1} = parse_digit(Rest, State, []), 1354 if 1355 ?is_char(CharValue) -> 1356 {{character, is_delimiter(CharValue), CharValue}, 1357 Rest1, State1}; 1358 true -> 1359 ?fatal_error(State1, "Not a legal character: #" ++ RefString)%%WFC: Legal Character 1360 end; 1361parse_reference(?STRING_UNBOUND_REST(C, Rest), State, HaveToExist) -> 1362 case is_name_start(C) of 1363 true -> 1364 {Name, Rest1, State1} = parse_name(Rest, State, [C]), 1365 parse_reference_1(Rest1, State1, HaveToExist, Name); 1366 false -> 1367 ?fatal_error(State, "name expected") 1368 end; 1369parse_reference(Bytes, State, HaveToExist) -> 1370 unicode_incomplete_check([Bytes, State, HaveToExist, fun parse_reference/3], 1371 underfined). 1372 1373 1374parse_reference_1(?STRING_REST(";", Rest), State, HaveToExist, Name) -> 1375 case look_up_reference(Name, HaveToExist, State) of 1376 {internal_general, Name, RefValue} -> 1377 {{internal_general, is_delimiter(RefValue), Name, RefValue}, 1378 Rest, State}; 1379 Result -> 1380 {Result, Rest, State} 1381 end; 1382parse_reference_1(Bytes, State, HaveToExist, Name) -> 1383 unicode_incomplete_check([Bytes, State, HaveToExist, Name, fun parse_reference_1/4], 1384 "Missing semicolon after reference: " ++ Name). 1385 1386 1387 1388%%---------------------------------------------------------------------- 1389%% Function: is_delimiter(Character) -> Result 1390%% Parameters: Character 1391%% Result : 1392%%---------------------------------------------------------------------- 1393is_delimiter(38) -> 1394 true; 1395is_delimiter(60) -> 1396 true; 1397is_delimiter(62) -> 1398 true; 1399is_delimiter(39) -> 1400 true; 1401is_delimiter(34) -> 1402 true; 1403is_delimiter("&") -> 1404 true; 1405is_delimiter("<") -> 1406 true; 1407is_delimiter(">") -> 1408 true; 1409is_delimiter("'") -> 1410 true; 1411is_delimiter("\"") -> 1412 true; 1413is_delimiter(_) -> 1414 false. 1415 1416%%---------------------------------------------------------------------- 1417%% Function: parse_pe_reference(Rest, State) -> Result 1418%% Parameters: Rest = string() | binary() 1419%% State = #xmerl_sax_parser_state{} 1420%% Acc = string() 1421%% Result : {Result, Rest, State} 1422%% Description: Parse a parameter entity reference. 1423%% [69] PEReference ::= '%' Name ';' 1424%%---------------------------------------------------------------------- 1425parse_pe_reference(?STRING_EMPTY, State) -> 1426 cf(?STRING_EMPTY, State, fun parse_pe_reference/2); 1427parse_pe_reference(?STRING_UNBOUND_REST(C, Rest), State) -> 1428 case is_name_start(C) of 1429 true -> 1430 {Name, Rest1, State1} = parse_name(Rest, State, [C]), 1431 parse_pe_reference_1(Rest1, State1, Name); 1432 false -> 1433 ?fatal_error(State, "Name expected") 1434 end; 1435parse_pe_reference(Bytes, State) -> 1436 unicode_incomplete_check([Bytes, State, fun parse_pe_reference/2], 1437 underfined). 1438 1439 1440parse_pe_reference_1(?STRING_REST(";", Rest), State, Name) -> 1441 Name1 = "%" ++ Name, 1442 Result = look_up_reference(Name1, true, State), 1443 {Result, Rest, State}; 1444parse_pe_reference_1(Bytes, State, Name) -> 1445 unicode_incomplete_check([Bytes, State, Name, fun parse_pe_reference_1/3], 1446 "missing ; after reference " ++ Name). 1447 1448 1449%%---------------------------------------------------------------------- 1450%% Function: insert_reference(Name, Ref, State) -> Result 1451%% Parameters: Name = string() 1452%% Ref = {Type, Value} 1453%% Type = atom() 1454%% Value = term() 1455%% State = #xmerl_sax_parser_state{} 1456%% Result : 1457%%---------------------------------------------------------------------- 1458insert_reference(Name, Value, #xmerl_sax_parser_state{ref_table = Map} = State) -> 1459 case maps:find(Name, Map) of 1460 error -> 1461 State#xmerl_sax_parser_state{ref_table = maps:put(Name, Value, Map)}; 1462 _ -> 1463 State 1464 end. 1465 1466 1467%%---------------------------------------------------------------------- 1468%% Function: look_up_reference(Reference, State) -> Result 1469%% Parameters: Reference = string() 1470%% State = #xmerl_sax_parser_state{} 1471%% Result : 1472%%---------------------------------------------------------------------- 1473look_up_reference("amp", _, _) -> 1474 {internal_general, "amp", "&"}; 1475look_up_reference("lt", _, _) -> 1476 {internal_general, "lt", "<"}; 1477look_up_reference("gt", _, _) -> 1478 {internal_general, "gt", ">"}; 1479look_up_reference("apos", _, _) -> 1480 {internal_general, "apos", "'"}; 1481look_up_reference("quot", _, _) -> 1482 {internal_general, "quot", "\""}; 1483look_up_reference(Name, HaveToExist, State) -> 1484 case maps:find(Name, State#xmerl_sax_parser_state.ref_table) of 1485 {ok, {Type, Value}} -> 1486 {Type, Name, Value}; 1487 _ -> 1488 case HaveToExist of 1489 true -> 1490 case State#xmerl_sax_parser_state.standalone of 1491 yes -> 1492 ?fatal_error(State, "Entity not declared: " ++ Name); %%WFC: Entity Declared 1493 no -> 1494 {not_found, Name} %%VC: Entity Declared 1495 end; 1496 false -> 1497 {not_found, Name} 1498 end 1499 end. 1500 1501 1502%%---------------------------------------------------------------------- 1503%% Function: parse_hex(Rest, State, Acc) -> Result 1504%% Parameters: Rest = string() | binary() 1505%% State = #xmerl_sax_parser_state{} 1506%% Acc = string() 1507%% Result : {Value, Reference, Rest, State} 1508%% Value = integer() 1509%% Reference = string() 1510%% Description: Parse a hex reference. 1511%%---------------------------------------------------------------------- 1512parse_hex(?STRING_EMPTY, State, Acc) -> 1513 cf(?STRING_EMPTY, State, Acc, fun parse_hex/3); 1514parse_hex(?STRING_REST(";", Rest), State, Acc) -> 1515 RefString = lists:reverse(Acc), 1516 {erlang:list_to_integer(RefString, 16), RefString, Rest, State}; 1517parse_hex(?STRING_UNBOUND_REST(C, Rest), State, Acc) when ?is_hex_digit(C) -> 1518 parse_hex(Rest, State, [C |Acc]); 1519parse_hex(Bytes, State, Acc) -> 1520 unicode_incomplete_check([Bytes, State, Acc, fun parse_hex/3], 1521 "Bad hex value in reference: "). 1522 1523 1524%%---------------------------------------------------------------------- 1525%% Function: parse_digit(Rest, State, Acc) -> Result 1526%% Parameters: Rest = string() | binary() 1527%% State = #xmerl_sax_parser_state{} 1528%% Acc = string() 1529%% Result : {Value, Reference, Rest, State} 1530%% Value = integer() 1531%% Reference = string() 1532%% Description: Parse a decimal reference. 1533%%---------------------------------------------------------------------- 1534parse_digit(?STRING_EMPTY, State, Acc) -> 1535 cf(?STRING_EMPTY, State, Acc, fun parse_digit/3); 1536parse_digit(?STRING_REST(";", Rest), State, Acc) -> 1537 RefString = lists:reverse(Acc), 1538 {list_to_integer(RefString), RefString, Rest, State}; 1539parse_digit(?STRING_UNBOUND_REST(C, Rest), State, Acc) -> 1540 case is_digit(C) of 1541 true -> 1542 parse_digit(Rest, State, [C |Acc]); 1543 false -> 1544 ?fatal_error(State, "Character in reference not a digit: " ++ [C]) 1545 end; 1546parse_digit(Bytes, State, Acc) -> 1547 unicode_incomplete_check([Bytes, State, Acc, fun parse_digit/3], 1548 undefined). 1549 1550%%---------------------------------------------------------------------- 1551%% Function: parse_system_litteral(Rest, State, Stop, Acc) -> Result 1552%% Parameters: Rest = string() | binary() 1553%% State = #xmerl_sax_parser_state{} 1554%% Stop = $' | $" 1555%% Acc = string() 1556%% Result : {Value, Reference, Rest, State} 1557%% Value = integer() 1558%% Reference = string() 1559%% Description: Parse a system litteral. 1560%% [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 1561%%---------------------------------------------------------------------- 1562parse_system_litteral(?STRING_EMPTY, State, Stop, Acc) -> 1563 cf(?STRING_EMPTY, State, Stop, Acc, fun parse_system_litteral/4); 1564parse_system_litteral(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) -> 1565 {lists:reverse(Acc), Rest, State}; 1566parse_system_litteral(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) -> 1567 parse_system_litteral(Rest, State, Stop, [C |Acc]); 1568parse_system_litteral(Bytes, State, Stop, Acc) -> 1569 unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_system_litteral/4], 1570 undefined). 1571 1572%%---------------------------------------------------------------------- 1573%% Function: parse_pubid_litteral(Rest, State, Stop, Acc) -> Result 1574%% Parameters: Rest = string() | binary() 1575%% State = #xmerl_sax_parser_state{} 1576%% Stop = $' | $" 1577%% Acc = string() 1578%% Result : {Value, Reference, Rest, State} 1579%% Value = integer() 1580%% Reference = string() 1581%% Description: Parse a public idlitteral. 1582%% [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 1583%%---------------------------------------------------------------------- 1584parse_pubid_litteral(?STRING_EMPTY, State, Stop, Acc) -> 1585 cf(?STRING_EMPTY, State, Stop, Acc, fun parse_pubid_litteral/4); 1586parse_pubid_litteral(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) -> 1587 {lists:reverse(Acc), Rest, State}; 1588parse_pubid_litteral(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) -> 1589 case is_pubid_char(C) of 1590 true -> 1591 parse_pubid_litteral(Rest, State, Stop, [C |Acc]); 1592 false -> 1593 ?fatal_error(State, "Character not allowed in pubid litteral: " ++ [C]) 1594 end; 1595parse_pubid_litteral(Bytes, State, Stop, Acc) -> 1596 unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_pubid_litteral/4], 1597 undefined). 1598 1599%%====================================================================== 1600%% DTD Parsing 1601%%====================================================================== 1602 1603%%---------------------------------------------------------------------- 1604%% Function : parse_doctype(Rest, State, Level, Acc) -> Result 1605%% Parameters: Rest = string() | binary() 1606%% State = #xmerl_sax_parser_state{} 1607%% Level = integer() 1608%% Acc = string() 1609%% Result : {string(), Rest, State} 1610%% Description: This function is just searching the end of the doctype 1611%% declaration and doesn't parse it. It's used when the 1612%% parse_dtd option is set to skip. 1613%%---------------------------------------------------------------------- 1614%% Just returns doctype as string 1615%% parse_doctype(?STRING_EMPTY, State, Level, Acc) -> 1616%% cf(?STRING_EMPTY, State, Level, Acc, fun parse_doctype/4); 1617%% parse_doctype(?STRING("\r"), State, Level, Acc) -> 1618%% cf(?STRING("\r"), State, Level, Acc, fun parse_doctype/4); 1619%% parse_doctype(?STRING_REST(">", Rest), State, 0, Acc) -> 1620%% {Acc, Rest, State}; 1621%% parse_doctype(?STRING_REST(">", Rest), State, Level, Acc) -> 1622%% parse_doctype(Rest, State, Level-1, Acc); 1623%% parse_doctype(?STRING_REST("<", Rest), State, Level, Acc) -> 1624%% parse_doctype(Rest, State, Level+1, [$<|Acc]); 1625%% parse_doctype(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Level, Acc) -> 1626%% parse_doctype(Rest, State#xmerl_sax_parser_state{line_no=N+1}, Level, [?lf |Acc]); 1627%% parse_doctype(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Level, Acc) -> 1628%% parse_doctype(Rest, State#xmerl_sax_parser_state{line_no=N+1}, Level, [?lf |Acc]); 1629%% parse_doctype(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Level, Acc) -> 1630%% parse_doctype(Rest, State#xmerl_sax_parser_state{line_no=N+1}, Level, [?lf |Acc]); 1631%% parse_doctype(?STRING_UNBOUND_REST(C, Rest), State, Level, Acc) -> 1632%% parse_doctype(Rest, State, Level, [C|Acc]); 1633%% parse_doctype(Bytes, State, Level, Acc) -> 1634%% unicode_incomplete_check([Bytes, State, Level, Acc, fun parse_doctype/4], 1635%% undefined). 1636 1637 1638%%---------------------------------------------------------------------- 1639%% Function : parse_doctype(Rest, State) -> Result 1640%% Parameters: Rest = string() | binary() 1641%% State = #xmerl_sax_parser_state{} 1642%% Result : {Rest, State} 1643%% Description: This function starts an parsing of the DTD 1644%% that sends apropriate events. 1645%% [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 1646%% ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 1647%%---------------------------------------------------------------------- 1648parse_doctype(?STRING_EMPTY, State) -> 1649 cf(?STRING_EMPTY, State, fun parse_doctype/2); 1650parse_doctype(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) -> 1651 {_WS, Rest, State1} = whitespace(Bytes, State, []), 1652 parse_doctype(Rest, State1); 1653parse_doctype(?STRING_UNBOUND_REST(C, Rest), State) -> 1654 case is_name_start(C) of 1655 true -> 1656 {Name, Rest1, State1} = parse_name(Rest, State, [C]), 1657 parse_doctype_1(Rest1, State1, Name, false); 1658 false -> 1659 ?fatal_error(State, "expecting name or whitespace") 1660 end; 1661parse_doctype(Bytes, State) -> 1662 unicode_incomplete_check([Bytes, State, fun parse_doctype/2], 1663 undefined). 1664 1665 1666%%---------------------------------------------------------------------- 1667%% Function : parse_doctype_1(Rest, State, Name, Definition) -> Result 1668%% Parameters: Rest = string() | binary() 1669%% State = #xmerl_sax_parser_state{} 1670%% Name = string() 1671%% Definition = true |false 1672%% Result : {Rest, State} 1673%% Description: Gets the DTD name as a parameter and contine parse the DOCTYPE 1674%% directive 1675%%---------------------------------------------------------------------- 1676parse_doctype_1(?STRING_EMPTY, State, Name, Definition) -> 1677 cf(?STRING_EMPTY, State, Name, Definition, fun parse_doctype_1/4); 1678parse_doctype_1(?STRING_REST(">", Rest), State, _, _) -> 1679 {Rest, State}; 1680parse_doctype_1(?STRING_REST("[", Rest), State, Name, Definition) -> 1681 State1 = 1682 case Definition of 1683 false -> 1684 event_callback({startDTD, Name, "", ""}, State); 1685 true -> 1686 State 1687 end, 1688 {Rest1, State2} = parse_doctype_decl(Rest, State1), 1689 {_WS, Rest2, State3} = whitespace(Rest1, State2, []), 1690 parse_doctype_2(Rest2, State3); 1691parse_doctype_1(?STRING_UNBOUND_REST(C, _) = Rest, State, Name, Definition) when ?is_whitespace(C) -> 1692 {_WS, Rest1, State1} = whitespace(Rest, State, []), 1693 parse_doctype_1(Rest1, State1, Name, Definition); 1694parse_doctype_1(?STRING_UNBOUND_REST(C, _) = Rest, State, Name, _Definition) when C == $S; C == $P -> 1695 {PubId, SysId, Rest1, State1} = parse_external_id(Rest, State, false), 1696 State2 = event_callback({startDTD, Name, PubId, SysId}, State1), 1697 State3 = 1698 case State2#xmerl_sax_parser_state.skip_external_dtd of 1699 false -> 1700 parse_external_entity(State2#xmerl_sax_parser_state{file_type=dtd}, PubId, SysId); 1701 true -> 1702 State2 1703 end, 1704 parse_doctype_1(Rest1, State3, Name, true); 1705parse_doctype_1(Bytes, State, Name, Definition) -> 1706 unicode_incomplete_check([Bytes, State, Name, Definition, fun parse_doctype_1/4], 1707 "expecting >, external id or declaration part"). 1708 1709 1710parse_doctype_2(?STRING_REST(">", Rest), State) -> 1711 {Rest, State}; 1712parse_doctype_2(Bytes, State) -> 1713 unicode_incomplete_check([Bytes, State, fun parse_doctype_2/2], 1714 "expecting >"). 1715 1716 1717%%---------------------------------------------------------------------- 1718%% Function : parse_external_entity(State, PubId, SysId) -> Result 1719%% Parameters: State = #xmerl_sax_parser_state{} 1720%% PubId = string() 1721%% SysId = string() 1722%% Result : {Rest, State} 1723%% Description: Starts the parsing of an external entity by calling the resolver and 1724%% then sends the input to the parsing function. 1725%%---------------------------------------------------------------------- 1726%% The public id is not handled 1727parse_external_entity(State, _PubId, SysId) -> 1728 1729 ExtRef = check_uri(SysId, State#xmerl_sax_parser_state.current_location), 1730 1731 SaveState = event_callback({startEntity, SysId}, State), 1732 1733 State1 = State#xmerl_sax_parser_state{line_no=1, 1734 continuation_state=undefined, 1735 continuation_fun=fun xmerl_sax_parser:default_continuation_cb/1, 1736 end_tags = []}, 1737 1738 1739 {EventState, RefTable} = handle_external_entity(ExtRef, State1), 1740 1741 NewState = event_callback({endEntity, SysId}, 1742 SaveState#xmerl_sax_parser_state{event_state=EventState, 1743 ref_table=RefTable}), 1744 NewState#xmerl_sax_parser_state{file_type=normal}. 1745 1746 1747 1748%%---------------------------------------------------------------------- 1749%% Function : handle_external_entity(ExtRef, State) -> Result 1750%% Parameters: ExtRef = {file, string()} | {http, string()} 1751%% State = #xmerl_sax_parser_state{} 1752%% Result : string() | binary() 1753%% Description: Returns working directory, entity and the opened 1754%% filedescriptor. 1755%%---------------------------------------------------------------------- 1756handle_external_entity({file, FileToOpen}, State) -> 1757 1758 case file:open(FileToOpen, [raw, read, binary]) of 1759 {error, Reason} -> 1760 ?fatal_error(State, "Couldn't open external entity "++ FileToOpen ++ " : " 1761 ++ file:format_error(Reason)); 1762 {ok, FD} -> 1763 {?STRING_EMPTY, EntityState} = 1764 parse_external_entity_1(<<>>, 1765 State#xmerl_sax_parser_state{continuation_state=FD, 1766 current_location=filename:dirname(FileToOpen), 1767 entity=filename:basename(FileToOpen), 1768 input_type=file}), 1769 ok = file:close(FD), 1770 {EntityState#xmerl_sax_parser_state.event_state, 1771 EntityState#xmerl_sax_parser_state.ref_table} 1772 end; 1773handle_external_entity({http, Url}, State) -> 1774 1775 try 1776 {Host, Port, Key} = http(Url), 1777 TmpFile = http_get_file(Host, Port, Key), 1778 case file:open(TmpFile, [raw, read, binary]) of 1779 {error, Reason} -> 1780 ?fatal_error(State, "Couldn't open temporary file " ++ TmpFile ++ " : " 1781 ++ file:format_error(Reason)); 1782 {ok, FD} -> 1783 {?STRING_EMPTY, EntityState} = 1784 parse_external_entity_byte_order_mark(<<>>, 1785 State#xmerl_sax_parser_state{continuation_state=FD, 1786 current_location=filename:dirname(Url), 1787 entity=filename:basename(Url), 1788 input_type=file}), 1789 ok = file:close(FD), 1790 ok = file:delete(TmpFile), 1791 {EntityState#xmerl_sax_parser_state.event_state, 1792 EntityState#xmerl_sax_parser_state.ref_table} 1793 1794 end 1795 catch 1796 throw:{error, Error} -> 1797 ?fatal_error(State, Error) 1798 end; 1799handle_external_entity({Tag, _Url}, State) -> 1800 ?fatal_error(State, "Unsupported URI type: " ++ atom_to_list(Tag)). 1801 1802?PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State). 1803 1804%%---------------------------------------------------------------------- 1805%% Function : parse_external_entity_1(Rest, State) -> Result 1806%% Parameters: Rest = string() | binary() 1807%% State = #xmerl_sax_parser_state{} 1808%% Result : {Rest, State} 1809%% Description: Parse the external entity. 1810%%---------------------------------------------------------------------- 1811parse_external_entity_1(?STRING_EMPTY, #xmerl_sax_parser_state{file_type=Type} = State) -> 1812 case catch cf(?STRING_EMPTY, State, fun parse_external_entity_1/2) of 1813 {Rest, State1} when is_record(State1, xmerl_sax_parser_state) -> 1814 {Rest, State1}; 1815 {fatal_error, {State1, "No more bytes"}} when Type == dtd; Type == entity -> 1816 {?STRING_EMPTY, State1}; 1817 Other -> 1818 throw(Other) 1819 end; 1820parse_external_entity_1(?STRING("<") = Bytes, State) -> 1821 cf(Bytes, State, fun parse_external_entity_1/2); 1822parse_external_entity_1(?STRING("<?") = Bytes, State) -> 1823 cf(Bytes, State, fun parse_external_entity_1/2); 1824parse_external_entity_1(?STRING("<?x") = Bytes, State) -> 1825 cf(Bytes, State, fun parse_external_entity_1/2); 1826parse_external_entity_1(?STRING("<?xm") = Bytes, State) -> 1827 cf(Bytes, State, fun parse_external_entity_1/2); 1828parse_external_entity_1(?STRING("<?xml") = Bytes, State) -> 1829 cf(Bytes, State, fun parse_external_entity_1/2); 1830parse_external_entity_1(?STRING_REST("<?xml", Rest) = Bytes, 1831 #xmerl_sax_parser_state{file_type=Type} = State) -> 1832 {Rest1, State1} = 1833 case is_next_char_whitespace(Rest, State) of 1834 false -> 1835 {Bytes, State}; 1836 true -> 1837 {_XmlAttributes, R, S} = parse_version_info(Rest, State, []), 1838 %S1 = event_callback({processingInstruction, "xml", XmlAttributes}, S),% The XML decl. should not be reported as a PI 1839 {R, S} 1840 end, 1841 case Type of 1842 dtd -> 1843 case catch parse_doctype_decl(Rest1, State1) of 1844 {Rest2, State2} when is_record(State2, xmerl_sax_parser_state) -> 1845 {Rest2, State2}; 1846 {fatal_error, {State2, "No more bytes"}} -> 1847 {?STRING_EMPTY, State2}; 1848 Other -> 1849 throw(Other) 1850 end; 1851 1852 _ -> % Type is normal or entity 1853 parse_content(Rest1, State1, [], true) 1854 end; 1855parse_external_entity_1(?STRING_UNBOUND_REST(_C, _) = Bytes, 1856 #xmerl_sax_parser_state{file_type=Type} = State) -> 1857 case Type of 1858 normal -> 1859 parse_content(Bytes, State, [], true); 1860 dtd -> 1861 parse_doctype_decl(Bytes, State); 1862 entity -> 1863 parse_doctype_decl(Bytes, State) end; 1864parse_external_entity_1(Bytes, State) -> 1865 unicode_incomplete_check([Bytes, State, fun parse_external_entity_1/2], 1866 undefined). 1867 1868%%---------------------------------------------------------------------- 1869%% Function : is_next_char_whitespace(Bytes, State) -> Result 1870%% Parameters: Bytes = string() | binary() 1871%% State = #xmerl_sax_parser_state{} 1872%% Result : true | false 1873%% Description: Checks if first character is whitespace. 1874%%---------------------------------------------------------------------- 1875is_next_char_whitespace(?STRING_UNBOUND_REST(C, _), _) when ?is_whitespace(C) -> 1876 true; 1877is_next_char_whitespace(?STRING_UNBOUND_REST(_C, _), _) -> 1878 false; 1879is_next_char_whitespace(Bytes, State) -> 1880 unicode_incomplete_check([Bytes, State, fun is_next_char_whitespace/2], 1881 undefined). 1882 1883%%---------------------------------------------------------------------- 1884%% Function : parse_external_id(Rest, State, OptionalSystemId) -> Result 1885%% Parameters: Rest = string() | binary() 1886%% State = #xmerl_sax_parser_state{} 1887%% OptionalSystemId = true | false 1888%% Result : {PubId, SysId, Rest, State} 1889%% PubId = string() 1890%% SysId = string() 1891%% Description: Parse an external id. The function is used in two cases one 1892%% where the system is optional and one where it's required 1893%% after a public id. 1894%% [75] ExternalID ::= 'SYSTEM' S SystemLiteral 1895%% | 'PUBLIC' S PubidLiteral S SystemLiteral 1896%%---------------------------------------------------------------------- 1897parse_external_id(?STRING_EMPTY, State, OptionalSystemId) -> 1898 cf(?STRING_EMPTY, State, OptionalSystemId, fun parse_external_id/3); 1899parse_external_id(?STRING("S") = Bytes, State,OptionalSystemId) -> 1900 cf(Bytes, State, OptionalSystemId, fun parse_external_id/3); 1901parse_external_id(?STRING("SY") = Bytes, State, OptionalSystemId) -> 1902 cf(Bytes, State, OptionalSystemId, fun parse_external_id/3); 1903parse_external_id(?STRING("SYS") = Bytes, State, OptionalSystemId) -> 1904 cf(Bytes, State, OptionalSystemId, fun parse_external_id/3); 1905parse_external_id(?STRING("SYST") = Bytes, State, OptionalSystemId) -> 1906 cf(Bytes, State, OptionalSystemId, fun parse_external_id/3); 1907parse_external_id(?STRING("SYSTE") = Bytes, State, OptionalSystemId) -> 1908 cf(Bytes, State, OptionalSystemId, fun parse_external_id/3); 1909parse_external_id(?STRING_REST("SYSTEM", Rest), State, _) -> 1910 {SysId, Rest1, State1} = parse_system_id(Rest, State, false), 1911 {"", SysId, Rest1, State1}; 1912parse_external_id(?STRING("P") = Bytes, State, OptionalSystemId) -> 1913 cf(Bytes, State, OptionalSystemId, fun parse_external_id/3); 1914parse_external_id(?STRING("PU") = Bytes, State, OptionalSystemId) -> 1915 cf(Bytes, State, OptionalSystemId, fun parse_external_id/3); 1916parse_external_id(?STRING("PUB") = Bytes, State, OptionalSystemId) -> 1917 cf(Bytes, State, OptionalSystemId, fun parse_external_id/3); 1918parse_external_id(?STRING("PUBL") = Bytes, State, OptionalSystemId) -> 1919 cf(Bytes, State, OptionalSystemId, fun parse_external_id/3); 1920parse_external_id(?STRING("PUBLI") = Bytes, State, OptionalSystemId) -> 1921 cf(Bytes, State, OptionalSystemId, fun parse_external_id/3); 1922parse_external_id(?STRING_REST("PUBLIC", Rest), State, OptionalSystemId) -> 1923 parse_public_id(Rest, State, OptionalSystemId); 1924parse_external_id(Bytes, State, OptionalSystemId) -> 1925 unicode_incomplete_check([Bytes, State, OptionalSystemId, fun parse_external_id/3], 1926 "expecting SYSTEM or PUBLIC"). 1927 1928 1929%%---------------------------------------------------------------------- 1930%% Function : parse_system_id(Rest, State, OptionalSystemId) -> Result 1931%% Parameters: Rest = string() | binary() 1932%% State = #xmerl_sax_parser_state{} 1933%% OptionalSystemId = true | false 1934%% Result : {SysId, Rest, State} 1935%% SysId = string() 1936%% Description: Parse a system id. The function is used in two cases one 1937%% where the system is optional and one where it's required. 1938%%---------------------------------------------------------------------- 1939parse_system_id(?STRING_UNBOUND_REST(C, _) = Bytes, State, OptionalSystemId) when ?is_whitespace(C) -> 1940 {_WS, Rest, State1} = whitespace(Bytes, State, []), 1941 check_system_litteral(Rest, State1, OptionalSystemId); 1942parse_system_id(?STRING_UNBOUND_REST(_C, _) = Bytes, State, true) -> 1943 {"", Bytes, State}; 1944parse_system_id(Bytes, State, OptionalSystemId) -> 1945 unicode_incomplete_check([Bytes, State, OptionalSystemId, fun parse_system_id/3], 1946 "whitespace expected"). 1947 1948check_system_litteral(?STRING_UNBOUND_REST(C, Rest), State, _OptionalSystemId) when C == $'; C == $" -> 1949 parse_system_litteral(Rest, State, C, []); 1950check_system_litteral(?STRING_UNBOUND_REST(_C, _) = Bytes, State, true) -> 1951 {"", Bytes, State}; 1952check_system_litteral(Bytes, State, OptionalSystemId) -> 1953 unicode_incomplete_check([Bytes, State, OptionalSystemId, fun check_system_litteral/3], 1954 "\" or \' expected"). 1955 1956 1957%%---------------------------------------------------------------------- 1958%% Function : parse_public_id(Rest, State, OptionalSystemId) -> Result 1959%% Parameters: Rest = string() | binary() 1960%% State = #xmerl_sax_parser_state{} 1961%% OptionalSystemId = true | false 1962%% Result : {PubId, SysId, Rest, State} 1963%% PubId = string() 1964%% SysId = string() 1965%% Description: Parse a public id. The function is used in two cases one 1966%% where the following system is optional and one where it's required. 1967%%---------------------------------------------------------------------- 1968parse_public_id(?STRING_UNBOUND_REST(C, _) = Bytes, State, OptionalSystemId) when ?is_whitespace(C) -> 1969 {_WS, Rest, State1} = whitespace(Bytes, State, []), 1970 check_public_litteral(Rest, State1, OptionalSystemId); 1971parse_public_id(Bytes, State,OptionalSystemId) -> 1972 unicode_incomplete_check([Bytes, State, OptionalSystemId, fun parse_public_id/3], 1973 "whitespace expected"). 1974 1975 1976check_public_litteral(?STRING_UNBOUND_REST(C, Rest), State, OptionalSystemId) when C == $'; C == $" -> 1977 {PubId, Rest1, State1} = parse_pubid_litteral(Rest, State, C, []), 1978 {SysId, Rest2, State2} = parse_system_id(Rest1, State1, OptionalSystemId), 1979 {PubId, SysId, Rest2, State2}; 1980check_public_litteral(Bytes, State, OptionalSystemId) -> 1981 unicode_incomplete_check([Bytes, State, OptionalSystemId, fun check_public_litteral/3], 1982 "\" or \' expected"). 1983 1984 1985%%---------------------------------------------------------------------- 1986%% Function : parse_doctype_decl(Rest, State) -> Result 1987%% Parameters: Rest = string() | binary() 1988%% State = #xmerl_sax_parser_state{} 1989%% Result : {Rest, State} 1990%% Description: Parse the DOCTYPE declaration part 1991%% [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl 1992%% | NotationDecl | PI | Comment 1993%%---------------------------------------------------------------------- 1994parse_doctype_decl(?STRING_EMPTY, State) -> 1995 cf(?STRING_EMPTY, State, fun parse_doctype_decl/2); 1996parse_doctype_decl(?STRING("<"), State) -> 1997 cf(?STRING("<"), State, fun parse_doctype_decl/2); 1998parse_doctype_decl(?STRING_REST("<?", Rest), State) -> 1999 case parse_pi(Rest, State) of 2000 {Rest1, State1} -> 2001 parse_doctype_decl(Rest1, State1); 2002 {endDocument, _Rest1, State1} -> 2003 IValue = ?TO_INPUT_FORMAT("<?"), 2004 {?APPEND_STRING(IValue, Rest), State1} 2005 end; 2006parse_doctype_decl(?STRING_REST("%", Rest), State) -> 2007 {Ref, Rest1, State1} = parse_pe_reference(Rest, State), 2008 case Ref of 2009 {internal_parameter, _, RefValue} -> 2010 IValue = ?TO_INPUT_FORMAT(" " ++ RefValue ++ " "), 2011 parse_doctype_decl(?APPEND_STRING(IValue, Rest1), State1); 2012 {external_parameter, _, {PubId, SysId}} -> 2013 State2 = parse_external_entity(State1#xmerl_sax_parser_state{file_type = entity}, PubId, SysId), 2014 parse_doctype_decl(Rest1, State2); 2015 {not_found, Name} -> 2016 case State#xmerl_sax_parser_state.skip_external_dtd of 2017 false -> 2018 ?fatal_error(State1, "Entity not declared: " ++ Name); %%WFC: Entity Declared 2019 true -> 2020 parse_doctype_decl(Rest1, State1) 2021 end 2022 end; 2023parse_doctype_decl(?STRING_REST("<!", Rest1), State) -> 2024 parse_doctype_decl_1(Rest1, State); 2025parse_doctype_decl(?STRING_REST("]", Rest), State) -> 2026 {Rest, State}; 2027parse_doctype_decl(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) -> 2028 {_WS, Rest, State1} = whitespace(Bytes, State, []), 2029 parse_doctype_decl(Rest, State1); 2030parse_doctype_decl(Bytes, State) -> 2031 unicode_incomplete_check([Bytes, State, fun parse_doctype_decl/2], 2032 "expecting ELEMENT, ATTLIST, ENTITY, NOTATION or comment"). 2033 2034 2035%%---------------------------------------------------------------------- 2036%% Function : parse_doctype_decl_1(Rest, State) -> Result 2037%% Parameters: Rest = string() | binary() 2038%% State = #xmerl_sax_parser_state{} 2039%% Result : {Rest, State} 2040%% Description: Main switching function for the different markup declarations 2041%% of the DOCTYPE. 2042%%---------------------------------------------------------------------- 2043parse_doctype_decl_1(?STRING_EMPTY, State) -> 2044 cf(?STRING_EMPTY, State, fun parse_doctype_decl_1/2); 2045 2046parse_doctype_decl_1(?STRING("E") = Bytes, State) -> 2047 cf(Bytes, State, fun parse_doctype_decl_1/2); 2048parse_doctype_decl_1(?STRING("EL") = Bytes, State) -> 2049 cf(Bytes, State, fun parse_doctype_decl_1/2); 2050parse_doctype_decl_1(?STRING("ELE") = Bytes, State) -> 2051 cf(Bytes, State, fun parse_doctype_decl_1/2); 2052parse_doctype_decl_1(?STRING("ELEM") = Bytes, State) -> 2053 cf(Bytes, State, fun parse_doctype_decl_1/2); 2054parse_doctype_decl_1(?STRING("ELEME") = Bytes, State) -> 2055 cf(Bytes, State, fun parse_doctype_decl_1/2); 2056parse_doctype_decl_1(?STRING("ELEMEN") = Bytes, State) -> 2057 cf(Bytes, State, fun parse_doctype_decl_1/2); 2058parse_doctype_decl_1(?STRING_REST("ELEMENT", Rest), State) -> 2059 {Rest1, State1} = parse_element_decl(Rest, State), 2060 parse_doctype_decl(Rest1, State1); 2061 2062parse_doctype_decl_1(?STRING("A") = Bytes, State) -> 2063 cf(Bytes, State, fun parse_doctype_decl_1/2); 2064parse_doctype_decl_1(?STRING("AT") = Bytes, State) -> 2065 cf(Bytes, State, fun parse_doctype_decl_1/2); 2066parse_doctype_decl_1(?STRING("ATT") = Bytes, State) -> 2067 cf(Bytes, State, fun parse_doctype_decl_1/2); 2068parse_doctype_decl_1(?STRING("ATTL") = Bytes, State) -> 2069 cf(Bytes, State, fun parse_doctype_decl_1/2); 2070parse_doctype_decl_1(?STRING("ATTLI") = Bytes, State) -> 2071 cf(Bytes, State, fun parse_doctype_decl_1/2); 2072parse_doctype_decl_1(?STRING("ATTLIS") = Bytes, State) -> 2073 cf(Bytes, State, fun parse_doctype_decl_1/2); 2074parse_doctype_decl_1(?STRING_REST("ATTLIST", Rest), State) -> 2075 {Rest1, State1} = parse_att_list_decl(Rest, State), 2076 parse_doctype_decl(Rest1, State1); 2077 2078%% E clause not needed here because already taken care of above. 2079parse_doctype_decl_1(?STRING("EN") = Bytes, State) -> 2080 cf(Bytes, State, fun parse_doctype_decl_1/2); 2081parse_doctype_decl_1(?STRING("ENT") = Bytes, State) -> 2082 cf(Bytes, State, fun parse_doctype_decl_1/2); 2083parse_doctype_decl_1(?STRING("ENTI") = Bytes, State) -> 2084 cf(Bytes, State, fun parse_doctype_decl_1/2); 2085parse_doctype_decl_1(?STRING("ENTIT") = Bytes, State) -> 2086 cf(Bytes, State, fun parse_doctype_decl_1/2); 2087parse_doctype_decl_1(?STRING_REST("ENTITY", Rest), State) -> 2088 {Rest1, State1} = parse_entity_decl(Rest, State), 2089 parse_doctype_decl(Rest1, State1); 2090 2091parse_doctype_decl_1(?STRING("N") = Bytes, State) -> 2092 cf(Bytes, State, fun parse_doctype_decl_1/2); 2093parse_doctype_decl_1(?STRING("NO") = Bytes, State) -> 2094 cf(Bytes, State, fun parse_doctype_decl_1/2); 2095parse_doctype_decl_1(?STRING("NOT") = Bytes, State) -> 2096 cf(Bytes, State, fun parse_doctype_decl_1/2); 2097parse_doctype_decl_1(?STRING("NOTA") = Bytes, State) -> 2098 cf(Bytes, State, fun parse_doctype_decl_1/2); 2099parse_doctype_decl_1(?STRING("NOTAT") = Bytes, State) -> 2100 cf(Bytes, State, fun parse_doctype_decl_1/2); 2101parse_doctype_decl_1(?STRING("NOTATI") = Bytes, State) -> 2102 cf(Bytes, State, fun parse_doctype_decl_1/2); 2103parse_doctype_decl_1(?STRING("NOTATIO") = Bytes, State) -> 2104 cf(Bytes, State, fun parse_doctype_decl_1/2); 2105parse_doctype_decl_1(?STRING_REST("NOTATION", Rest), State) -> 2106 {Rest1, State1} = parse_notation_decl(Rest, State), 2107 parse_doctype_decl(Rest1, State1); 2108parse_doctype_decl_1(?STRING("-") = Bytes, State) -> 2109 cf(Bytes, State, fun parse_doctype_decl_1/2); 2110parse_doctype_decl_1(?STRING_REST("--", Rest), State) -> 2111 {Rest1, State1} = parse_comment(Rest, State, []), 2112 parse_doctype_decl(Rest1, State1); 2113parse_doctype_decl_1(Bytes, State) -> 2114 unicode_incomplete_check([Bytes, State, fun parse_doctype_decl_1/2], 2115 "expecting ELEMENT, ATTLIST, ENTITY, NOTATION or comment"). 2116 2117 2118%%---------------------------------------------------------------------- 2119%% Function : parse_element_decl(Rest, State) -> Result 2120%% Parameters: Rest = string() | binary() 2121%% State = #xmerl_sax_parser_state{} 2122%% Result : {Rest, State} 2123%% Description: Parse element declarations. 2124%% [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 2125%%---------------------------------------------------------------------- 2126parse_element_decl(?STRING_EMPTY, State) -> 2127 cf(?STRING_EMPTY, State, fun parse_element_decl/2); 2128parse_element_decl(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) -> 2129 {_WS, Rest, State1} = whitespace(Bytes, State, []), 2130 parse_element_decl_1(Rest, State1); 2131parse_element_decl(Bytes, State) -> 2132 unicode_incomplete_check([Bytes, State, fun parse_element_decl/2], 2133 "whitespace expected"). 2134 2135parse_element_decl_1(?STRING_UNBOUND_REST(C, Rest), State) -> 2136 case is_name_start(C) of 2137 true -> 2138 {Name, Rest1, State1} = parse_name(Rest, State, [C]), 2139 {Model, Rest2, State2} = parse_element_content(Rest1, State1), 2140 State3 = event_callback({elementDecl, Name, Model}, State2), 2141 {Rest2, State3}; 2142 false -> 2143 ?fatal_error(State, "name expected") 2144 end; 2145parse_element_decl_1(Bytes, State) -> 2146 unicode_incomplete_check([Bytes, State, fun parse_element_decl_1/2], 2147 undefined). 2148 2149 2150%%---------------------------------------------------------------------- 2151%% Function : parse_element_content(Rest, State) -> Result 2152%% Parameters: Rest = string() | binary() 2153%% State = #xmerl_sax_parser_state{} 2154%% Result : {Rest, State} 2155%% Description: Parse contents of an element declaration. 2156%% [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 2157%%---------------------------------------------------------------------- 2158parse_element_content(?STRING_EMPTY, State) -> 2159 cf(?STRING_EMPTY, State, fun parse_element_content/2); 2160parse_element_content(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) -> 2161 {_WS, Rest, State1} = whitespace(Bytes, State, []), 2162 parse_element_content_1(Rest, State1, []); 2163parse_element_content(Bytes, State) -> 2164 unicode_incomplete_check([Bytes, State, fun parse_element_content/2], 2165 "whitespace expected"). 2166 2167 2168%%---------------------------------------------------------------------- 2169%% Function : parse_element_content_1(Rest, State, Acc) -> Result 2170%% Parameters: Rest = string() | binary() 2171%% State = #xmerl_sax_parser_state{} 2172%% Acc = string() 2173%% Result : {Content, Rest, State} 2174%% Content = string() 2175%% Description: Parse contents of an element declaration. 2176%%---------------------------------------------------------------------- 2177parse_element_content_1(?STRING_EMPTY, State, Acc) -> 2178 cf(?STRING_EMPTY, State, Acc, fun parse_element_content_1/3); 2179parse_element_content_1(?STRING_REST(">", Rest), State, Acc) -> 2180 {lists:reverse(delete_leading_whitespace(Acc)), Rest, State}; 2181parse_element_content_1(?STRING_UNBOUND_REST(C, Rest), State, Acc) -> 2182 parse_element_content_1(Rest, State, [C|Acc]); 2183parse_element_content_1(Bytes, State, Acc) -> 2184 unicode_incomplete_check([Bytes, State, Acc, fun parse_element_content_1/3], 2185 undefined). 2186 2187delete_leading_whitespace([C |Acc]) when ?is_whitespace(C)-> 2188 delete_leading_whitespace(Acc); 2189delete_leading_whitespace(Acc) -> 2190 Acc. 2191 2192%%---------------------------------------------------------------------- 2193%% Function : parse_att_list_decl(Rest, State) -> Result 2194%% Parameters: Rest = string() | binary() 2195%% State = #xmerl_sax_parser_state{} 2196%% Result : {Rest, State} 2197%% Description: Parse an attribute list declaration. 2198%% [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 2199%%---------------------------------------------------------------------- 2200parse_att_list_decl(?STRING_EMPTY, State) -> 2201 cf(?STRING_EMPTY, State, fun parse_att_list_decl/2); 2202parse_att_list_decl(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) -> 2203 {_WS, Rest, State1} = whitespace(Bytes, State, []), 2204 parse_att_list_decl_1(Rest, State1); 2205parse_att_list_decl(Bytes, State) -> 2206 unicode_incomplete_check([Bytes, State, fun parse_att_list_decl/2], 2207 "whitespace expected"). 2208 2209 2210parse_att_list_decl_1(?STRING_UNBOUND_REST(C, Rest), State) -> 2211 case is_name_start(C) of 2212 true -> 2213 {ElementName, Rest1, State1} = parse_name(Rest, State, [C]), 2214 parse_att_defs(Rest1, State1, ElementName); 2215 false -> 2216 ?fatal_error(State, "name expected") 2217 end; 2218parse_att_list_decl_1(Bytes, State) -> 2219 unicode_incomplete_check([Bytes, State, fun parse_att_list_decl_1/2], 2220 undefined). 2221 2222 2223%%---------------------------------------------------------------------- 2224%% Function : parse_att_defs(Rest, State, ElementName) -> Result 2225%% Parameters: Rest = string() | binary() 2226%% State = #xmerl_sax_parser_state{} 2227%% ElementName = string() 2228%% Result : {Rest, State} 2229%% Description: Parse an attribute definition. 2230%% [53] AttDef ::= S Name S AttType S DefaultDecl 2231%%---------------------------------------------------------------------- 2232parse_att_defs(?STRING_EMPTY, State, ElementName) -> 2233 cf(?STRING_EMPTY, State, ElementName, fun parse_att_defs/3); 2234parse_att_defs(?STRING_REST(">", Rest), State, _ElementName) -> 2235 {Rest, State}; 2236parse_att_defs(?STRING_UNBOUND_REST(C, _) = Rest, State, ElementName) when ?is_whitespace(C) -> 2237 {_WS, Rest1, State1} = whitespace(Rest, State, []), 2238 parse_att_defs(Rest1, State1, ElementName); 2239parse_att_defs(?STRING_UNBOUND_REST(C, Rest), State, ElementName) -> 2240 case is_name_start(C) of 2241 true -> 2242 {AttrName, Rest1, State1} = parse_name(Rest, State, [C]), 2243 {Type, Rest2, State2} = parse_att_type(Rest1, State1), 2244 {Mode, Value, Rest3, State3} = parse_default_decl(Rest2, State2), 2245 State4 = event_callback({attributeDecl, ElementName, AttrName, Type, Mode, Value}, State3), 2246 parse_att_defs(Rest3, State4, ElementName); 2247 false -> 2248 ?fatal_error(State, "whitespace or name expected") 2249 end; 2250parse_att_defs(Bytes, State, ElementName) -> 2251 unicode_incomplete_check([Bytes, State, ElementName, fun parse_att_defs/3], 2252 undefined). 2253 2254 2255%%---------------------------------------------------------------------- 2256%% Function : parse_att_type(Rest, State) -> Result 2257%% Parameters: Rest = string() | binary() 2258%% State = #xmerl_sax_parser_state{} 2259%% Result : {Type, Rest, State} 2260%% Type = string() 2261%% Description: Parse an attribute type. 2262%% [54] AttType ::= StringType | TokenizedType | EnumeratedType 2263%% [55] StringType ::= 'CDATA' 2264%% [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' 2265%% | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' 2266%% [57] EnumeratedType ::= NotationType | Enumeration 2267%% [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 2268%% [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 2269%%---------------------------------------------------------------------- 2270parse_att_type(?STRING_EMPTY, State) -> 2271 cf(?STRING_EMPTY, State, fun parse_att_type/2); 2272parse_att_type(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) -> 2273 {_WS, Rest, State1} = whitespace(Bytes, State, []), 2274 case parse_att_type_1(Rest, State1, []) of 2275 {Type, Rest1, State2} when Type == "("; Type == "NOTATION" -> 2276 {T, Rest2, State3} = parse_until_right_paren(Rest1, State2, []), 2277 {Type ++ T, Rest2, State3}; 2278 {Type, Rest1, State2} -> 2279 case check_att_type(Type) of 2280 true -> 2281 {Type, Rest1, State2}; 2282 false -> 2283 ?fatal_error(State2, "wrong attribute type") 2284 end 2285 end; 2286parse_att_type(Bytes, State) -> 2287 unicode_incomplete_check([Bytes, State, fun parse_att_type/2], 2288 "whitespace expected"). 2289 2290 2291%%---------------------------------------------------------------------- 2292%% Function : parse_att_type_1(Rest, State, Acc) -> Result 2293%% Parameters: Rest = string() | binary() 2294%% State = #xmerl_sax_parser_state{} 2295%% Acc = string() 2296%% Result : {Type, Rest, State} 2297%% Type = string() 2298%% Description: Parse an attribute type. 2299%%---------------------------------------------------------------------- 2300parse_att_type_1(?STRING_EMPTY, State, Acc) -> 2301 cf(?STRING_EMPTY, State, Acc, fun parse_att_type_1/3); 2302parse_att_type_1(?STRING_UNBOUND_REST(C, _) = Bytes, State, Acc) when ?is_whitespace(C) -> 2303 {lists:reverse(Acc), Bytes, State}; 2304parse_att_type_1(?STRING_REST("(", Rest), State, []) -> 2305 {"(", Rest, State}; 2306parse_att_type_1(?STRING_UNBOUND_REST(C, Rest), State, Acc) -> 2307 parse_att_type_1(Rest, State, [C|Acc]); 2308parse_att_type_1(Bytes, State, Acc) -> 2309 unicode_incomplete_check([Bytes, State, Acc, fun parse_att_type_1/3], 2310 undefined). 2311 2312%%---------------------------------------------------------------------- 2313%% Function : check_att_type(Type) -> Result 2314%% Parameters: Type = string() 2315%% Result : true | false 2316%% Description:Check if an attribute type is valid. 2317%%---------------------------------------------------------------------- 2318check_att_type("CDATA") -> 2319 true; 2320check_att_type("ID") -> 2321 true; 2322check_att_type("IDREF") -> 2323 true; 2324check_att_type("IDREFS") -> 2325 true; 2326check_att_type("ENTITY") -> 2327 true; 2328check_att_type("ENTITIES") -> 2329 true; 2330check_att_type("NMTOKEN") -> 2331 true; 2332check_att_type("NMTOKENS") -> 2333 true; 2334check_att_type(_) -> 2335 false. 2336 2337 2338%%---------------------------------------------------------------------- 2339%% Function : parse_until_right_paren(Rest, State, Acc) -> Result 2340%% Parameters: Rest = string() | binary() 2341%% State = #xmerl_sax_parser_state{} 2342%% Acc = string() 2343%% Result : {Type, Rest, State} 2344%% Type = string() 2345%% Description: Parse an enumurated type until ')'. 2346%%---------------------------------------------------------------------- 2347parse_until_right_paren(?STRING_EMPTY, State, Acc) -> 2348 cf(?STRING_EMPTY, State, Acc, fun parse_until_right_paren/3); 2349parse_until_right_paren(?STRING_REST(")", Rest), State, Acc) -> 2350 {lists:reverse(")" ++ Acc), Rest, State}; 2351parse_until_right_paren(?STRING_UNBOUND_REST(C, Rest), State, Acc) -> 2352 parse_until_right_paren(Rest, State, [C|Acc]); 2353parse_until_right_paren(Bytes, State, Acc) -> 2354 unicode_incomplete_check([Bytes, State, Acc, fun parse_until_right_paren/3], 2355 undefined). 2356 2357 2358%%---------------------------------------------------------------------- 2359%% Function : parse_default_decl(Rest, State) -> Result 2360%% Parameters: Rest = string() | binary() 2361%% State = #xmerl_sax_parser_state{} 2362%% Result : {Default, Rest, State} 2363%% Default = string() 2364%% Description: Parse a default declaration. 2365%% [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) 2366%%---------------------------------------------------------------------- 2367parse_default_decl(?STRING_EMPTY, State) -> 2368 cf(?STRING_EMPTY, State, fun parse_default_decl/2); 2369parse_default_decl(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) -> 2370 {_WS, Rest, State1} = whitespace(Bytes, State, []), 2371 parse_default_decl_1(Rest, State1); 2372parse_default_decl(Bytes, State) -> 2373 unicode_incomplete_check([Bytes, State, fun parse_default_decl/2], 2374 "whitespace expected"). 2375 2376 2377%%---------------------------------------------------------------------- 2378%% Function : parse_default_decl_1(Rest, State) -> Result 2379%% Parameters: Rest = string() | binary() 2380%% State = #xmerl_sax_parser_state{} 2381%% Result : {Default, Rest, State} 2382%% Default = string() 2383%% Description: Parse a default declaration. 2384%%---------------------------------------------------------------------- 2385parse_default_decl_1(?STRING_EMPTY, State) -> 2386 cf(?STRING_EMPTY, State, fun parse_default_decl_1/2); 2387parse_default_decl_1(?STRING_REST("#", _Rest) = Bytes, State) -> 2388 case Bytes of 2389 ?STRING("#R") -> 2390 cf(Bytes, State, fun parse_default_decl_1/2); 2391 ?STRING("#RE") -> 2392 cf(Bytes, State, fun parse_default_decl_1/2); 2393 ?STRING("#REQ") -> 2394 cf(Bytes, State, fun parse_default_decl_1/2); 2395 ?STRING("#REQU") -> 2396 cf(Bytes, State, fun parse_default_decl_1/2); 2397 ?STRING("#REQUI") -> 2398 cf(Bytes, State, fun parse_default_decl_1/2); 2399 ?STRING("#REQUIR") -> 2400 cf(Bytes, State, fun parse_default_decl_1/2); 2401 ?STRING("#REQUIRE") -> 2402 cf(Bytes, State, fun parse_default_decl_1/2); 2403 ?STRING_REST("#REQUIRED", Rest1) -> 2404 {"#REQUIRED", undefined, Rest1, State}; 2405 2406 ?STRING("#I") -> 2407 cf(Bytes, State, fun parse_default_decl_1/2); 2408 ?STRING("#IM") -> 2409 cf(Bytes, State, fun parse_default_decl_1/2); 2410 ?STRING("#IMP") -> 2411 cf(Bytes, State, fun parse_default_decl_1/2); 2412 ?STRING("#IMPL") -> 2413 cf(Bytes, State, fun parse_default_decl_1/2); 2414 ?STRING("#IMPLI") -> 2415 cf(Bytes, State, fun parse_default_decl_1/2); 2416 ?STRING("#IMPLIE") -> 2417 cf(Bytes, State, fun parse_default_decl_1/2); 2418 ?STRING_REST("#IMPLIED", Rest1) -> 2419 {"#IMPLIED", undefined, Rest1, State}; 2420 2421 ?STRING("#F") -> 2422 cf(Bytes, State, fun parse_default_decl_1/2); 2423 ?STRING("#FI") -> 2424 cf(Bytes, State, fun parse_default_decl_1/2); 2425 ?STRING("#FIX") -> 2426 cf(Bytes, State, fun parse_default_decl_1/2); 2427 ?STRING("#FIXE") -> 2428 cf(Bytes, State, fun parse_default_decl_1/2); 2429 ?STRING_REST("#FIXED", Rest1) -> 2430 parse_fixed(Rest1, State); 2431 _ -> 2432 ?fatal_error(State, "REQUIRED, IMPLIED or FIXED expected after #") 2433 end; 2434parse_default_decl_1(?STRING_UNBOUND_REST(C, Rest), State) when C == $'; C == $" -> 2435 {DefaultValue, Rest1, State1} = parse_att_value(Rest, State, C, []), 2436 {"", DefaultValue, Rest1, State1}; 2437parse_default_decl_1(Bytes, State) -> 2438 unicode_incomplete_check([Bytes, State, fun parse_default_decl_1/2], 2439 "bad default declaration"). 2440 2441 2442parse_fixed(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) -> 2443 {DefaultValue, Rest, State1} = parse_att_value(Bytes, State), % parse_att_value removes leading WS 2444 {"#FIXED", DefaultValue, Rest, State1}; 2445parse_fixed(Bytes, State) -> 2446 unicode_incomplete_check([Bytes, State, fun parse_fixed/2], 2447 "whitespace expected"). 2448 2449%%---------------------------------------------------------------------- 2450%% Function : parse_entity_decl(Rest, State) -> Result 2451%% Parameters: Rest = string() | binary() 2452%% State = #xmerl_sax_parser_state{} 2453%% Result : {Rest, State} 2454%% Description: Parse an entity declaration. 2455%% [70] EntityDecl ::= GEDecl | PEDecl 2456%% [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 2457%% [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 2458%%---------------------------------------------------------------------- 2459parse_entity_decl(?STRING_EMPTY, State) -> 2460 cf(?STRING_EMPTY, State, fun parse_entity_decl/2); 2461parse_entity_decl(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) -> 2462 {_WS, Rest, State1} = whitespace(Bytes, State, []), 2463 parse_entity_decl_1(Rest, State1); 2464parse_entity_decl(Bytes, State) -> 2465 unicode_incomplete_check([Bytes, State, fun parse_entity_decl/2], 2466 "whitespace expected"). 2467 2468 2469%%---------------------------------------------------------------------- 2470%% Function : parse_entity_decl_1(Rest, State) -> Result 2471%% Parameters: Rest = string() | binary() 2472%% State = #xmerl_sax_parser_state{} 2473%% Result : {Rest, State} 2474%% Description: Parse an entity declaration. 2475%%---------------------------------------------------------------------- 2476parse_entity_decl_1(?STRING_EMPTY, State) -> 2477 cf(?STRING_EMPTY, State, fun parse_entity_decl_1/2); 2478parse_entity_decl_1(?STRING_REST("%", Rest), State) -> 2479 case is_next_char_whitespace(Rest, State) of 2480 true -> 2481 {_WS, Rest1, State1} = whitespace(Rest, State, []), 2482 parse_pe_name(Rest1, State1); 2483 false -> 2484 ?fatal_error(State, "whitespace expected") 2485 end; 2486parse_entity_decl_1(?STRING_UNBOUND_REST(C, Rest), State) -> 2487 case is_name_start(C) of 2488 true -> 2489 {Name, Rest1, State1} = parse_name(Rest, State, [C]), 2490 case is_next_char_whitespace(Rest1, State1) of 2491 true -> 2492 {_WS, Rest2, State2} = whitespace(Rest1, State1, []), 2493 parse_entity_def(Rest2, State2, Name); 2494 false -> 2495 ?fatal_error(State1, "whitespace expected") 2496 end; 2497 false -> 2498 ?fatal_error(State, "name or % expected") 2499 end; 2500parse_entity_decl_1(Bytes, State) -> 2501 unicode_incomplete_check([Bytes, State, fun parse_entity_decl_1/2], 2502 undefined). 2503 2504 2505 2506 2507parse_pe_name(?STRING_UNBOUND_REST(C, Rest), State) -> 2508 case is_name_start(C) of 2509 true -> 2510 {Name, Rest1, State1} = parse_name(Rest, State, [C]), 2511 case is_next_char_whitespace(Rest1, State1) of 2512 true -> 2513 {_WS, Rest2, State2} = whitespace(Rest1, State1, []), 2514 parse_pe_def(Rest2, State2, Name); 2515 false -> 2516 ?fatal_error(State1, "whitespace expected") 2517 end; 2518 false -> 2519 ?fatal_error(State, "name expected") 2520 end; 2521parse_pe_name(Bytes, State) -> 2522 unicode_incomplete_check([Bytes, State, fun parse_pe_name/2], 2523 undefined). 2524 2525 2526 2527%%---------------------------------------------------------------------- 2528%% Function : parse_entity_def(Rest, State, Name) -> Result 2529%% Parameters: Rest = string() | binary() 2530%% State = #xmerl_sax_parser_state{} 2531%% Name = string() 2532%% Result : {Rest, State} 2533%% Description: Parse an entity definition. 2534%% [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 2535%%---------------------------------------------------------------------- 2536parse_entity_def(?STRING_EMPTY, State, Name) -> 2537 cf(?STRING_EMPTY, State, Name, fun parse_entity_def/3); 2538parse_entity_def(?STRING_UNBOUND_REST(C, Rest), State, Name) when C == $'; C == $" -> 2539 {Value, Rest1, State1} = parse_entity_value(Rest, State, C, []), 2540 State2 = insert_reference(Name, {internal_general, Value}, State1), 2541 State3 = event_callback({internalEntityDecl, Name, Value}, State2), 2542 {_WS, Rest2, State4} = whitespace(Rest1, State3, []), 2543 parse_def_end(Rest2, State4); 2544parse_entity_def(?STRING_UNBOUND_REST(C, _) = Rest, State, Name) when C == $S; C == $P -> 2545 {PubId, SysId, Rest1, State1} = parse_external_id(Rest, State, false), 2546 {Ndata, Rest2, State2} = parse_ndata(Rest1, State1), 2547 case Ndata of 2548 undefined -> 2549 State3 = insert_reference(Name, {external_general, {PubId, SysId}}, 2550 State2), 2551 State4 = event_callback({externalEntityDecl, Name, PubId, SysId}, State3), 2552 {Rest2, State4}; 2553 _ -> 2554 State3 = insert_reference(Name, {unparsed, {PubId, SysId, Ndata}}, 2555 State2), 2556 State4 = event_callback({unparsedEntityDecl, Name, PubId, SysId, Ndata}, State3), 2557 {Rest2, State4} 2558 end; 2559parse_entity_def(Bytes, State, Name) -> 2560 unicode_incomplete_check([Bytes, State, Name, fun parse_entity_def/3], 2561 "\", \', SYSTEM or PUBLIC expected"). 2562 2563 2564parse_def_end(?STRING_REST(">", Rest), State) -> 2565 {Rest, State}; 2566parse_def_end(Bytes, State) -> 2567 unicode_incomplete_check([Bytes, State, fun parse_def_end/2], 2568 "> expected"). 2569 2570 2571 2572%%---------------------------------------------------------------------- 2573%% Function : parse_ndata(Rest, State) -> Result 2574%% Parameters: Rest = string() | binary() 2575%% State = #xmerl_sax_parser_state{} 2576%% Result : {Rest, State} 2577%% Description: Parse an NDATA declaration. 2578%% [76] NDataDecl ::= S 'NDATA' S Name 2579%%---------------------------------------------------------------------- 2580parse_ndata(?STRING_EMPTY, State) -> 2581 cf(?STRING_EMPTY, State, fun parse_ndata/2); 2582parse_ndata(?STRING_REST(">", Rest), State) -> 2583 {undefined, Rest, State}; 2584parse_ndata(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) -> 2585 {_WS, Rest1, State1} = whitespace(Bytes, State, []), 2586 parse_ndata_decl(Rest1, State1); 2587parse_ndata(Bytes, State) -> 2588 unicode_incomplete_check([Bytes, State, fun parse_ndata/2], 2589 "Space before NDATA or > expected"). 2590 2591%%---------------------------------------------------------------------- 2592%% Function : parse_entity_value(Rest, State, Stop, Acc) -> Result 2593%% Parameters: Rest = string() | binary() 2594%% State = #xmerl_sax_parser_state{} 2595%% Stop = $' | $" 2596%% Acc = string() 2597%% Result : {Value, Rest, State} 2598%% Value = string() 2599%% Description: Parse an entity value 2600%%---------------------------------------------------------------------- 2601parse_entity_value(?STRING_EMPTY, State, undefined, Acc) -> 2602 {Acc, [], State}; %% stop clause when parsing references 2603parse_entity_value(?STRING_EMPTY, State, Stop, Acc) -> 2604 cf(?STRING_EMPTY, State, Stop, Acc, fun parse_entity_value/4); 2605parse_entity_value(?STRING("\r"), State, Stop, Acc) -> 2606 cf(?STRING("\r"), State, Stop, Acc, fun parse_entity_value/4); 2607parse_entity_value(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) -> 2608 parse_entity_value(Rest, 2609 State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]); 2610parse_entity_value(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) -> 2611 parse_entity_value(Rest, 2612 State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]); 2613parse_entity_value(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) -> 2614 parse_entity_value(Rest, 2615 State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]); 2616parse_entity_value(?STRING_REST("\t", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) -> 2617 parse_entity_value(Rest, 2618 State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]); 2619parse_entity_value(?STRING_REST("&", Rest), State, Stop, Acc) -> 2620 {Ref, Rest1, State1} = parse_reference(Rest, State, false), 2621 case Ref of 2622 {character, _, CharValue} -> 2623 parse_entity_value(Rest1, State1, Stop, [CharValue | Acc]); 2624 {internal_general, _, Name, _} -> 2625 parse_entity_value(Rest1, State1, Stop, ";" ++ lists:reverse(Name) ++ "&" ++ Acc); 2626 {external_general, Name, _} -> 2627 parse_entity_value(Rest1, State1, Stop, ";" ++ lists:reverse(Name) ++ "&" ++ Acc); 2628 {not_found, Name} -> 2629 parse_entity_value(Rest1, State1, Stop, ";" ++ lists:reverse(Name) ++ "&" ++ Acc); 2630 {unparsed, Name, _} -> 2631 ?fatal_error(State1, "Unparsed entity reference in entity value: " ++ Name) 2632 end; 2633parse_entity_value(?STRING_REST("%", Rest), #xmerl_sax_parser_state{file_type=Type} = State, Stop, Acc) -> 2634 {Ref, Rest1, State1} = parse_pe_reference(Rest, State), 2635 case Type of 2636 normal -> %WFC: PEs in Internal Subset 2637 {_, Name, _} = Ref, 2638 ?fatal_error(State1, "A parameter reference may not occur not within " 2639 "markup declarations in the internal DTD subset: " ++ Name); 2640 _ -> 2641 case Ref of 2642 {internal_parameter, _, RefValue} -> 2643 IValue = ?TO_INPUT_FORMAT(" " ++ RefValue ++ " "), 2644 parse_entity_value(?APPEND_STRING(IValue, Rest1), State1, Stop, Acc); 2645 {external_parameter, _, {_PubId, _SysId}} -> 2646 ?fatal_error(State1, "Parameter references in entity value not supported yet."); 2647 {not_found, Name} -> 2648 case State#xmerl_sax_parser_state.skip_external_dtd of 2649 false -> 2650 ?fatal_error(State1, "Entity not declared: " ++ Name); %%VC: Entity Declared 2651 true -> 2652 parse_entity_value(Rest1, State1, Stop, ";" ++ lists:reverse(Name) ++ "&" ++ Acc) 2653 end 2654 2655 end 2656 end; 2657parse_entity_value(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) -> 2658 {lists:reverse(Acc), Rest, State}; 2659parse_entity_value(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) -> 2660 if 2661 ?is_char(C) -> 2662 parse_entity_value(Rest, State, Stop, [C|Acc]); 2663 true -> 2664 ?fatal_error(State, lists:flatten(io_lib:format("Bad character in entity value: ~p", [C]))) 2665 end; 2666parse_entity_value(Bytes, State, Stop, Acc) -> 2667 unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_entity_value/4], 2668 undefined). 2669 2670%%---------------------------------------------------------------------- 2671%% Function : parse_ndata_decl(Rest, State) -> Result 2672%% Parameters: Rest = string() | binary() 2673%% State = #xmerl_sax_parser_state{} 2674%% Result : {Name, Rest, State} 2675%% Name = string() 2676%% Description: Parse an NDATA declaration. 2677%% [76] NDataDecl ::= S 'NDATA' S Name 2678%%---------------------------------------------------------------------- 2679parse_ndata_decl(?STRING_EMPTY, State) -> 2680 cf(?STRING_EMPTY, State, fun parse_ndata_decl/2); 2681parse_ndata_decl(?STRING_REST(">", Rest), State) -> 2682 {undefined, Rest, State}; 2683parse_ndata_decl(?STRING("N") = Bytes, State) -> 2684 cf(Bytes, State, fun parse_ndata_decl/2); 2685parse_ndata_decl(?STRING("ND") = Bytes, State) -> 2686 cf(Bytes, State, fun parse_ndata_decl/2); 2687parse_ndata_decl(?STRING("NDA") = Bytes, State) -> 2688 cf(Bytes, State, fun parse_ndata_decl/2); 2689parse_ndata_decl(?STRING("NDAT") = Bytes, State) -> 2690 cf(Bytes, State, fun parse_ndata_decl/2); 2691parse_ndata_decl(?STRING_REST("NDATA", Rest), State) -> 2692 parse_ndata_decl_1(Rest, State); 2693parse_ndata_decl(Bytes, State) -> 2694 unicode_incomplete_check([Bytes, State, fun parse_ndata_decl/2], 2695 "NDATA or > expected"). 2696 2697 2698parse_ndata_decl_1(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) -> 2699 {_WS, Rest, State1} = whitespace(Bytes, State, []), 2700 parse_ndecl_name(Rest, State1); 2701parse_ndata_decl_1(Bytes, State) -> 2702 unicode_incomplete_check([Bytes, State, fun parse_ndata_decl/2], 2703 "whitespace expected"). 2704 2705 2706parse_ndecl_name(?STRING_UNBOUND_REST(C, Rest), State) -> 2707 case is_name_start(C) of 2708 true -> 2709 {Name, Rest1, State1} = parse_name(Rest, State, [C]), 2710 {_WS, Rest2, State2} = whitespace(Rest1, State1, []), 2711 {Rest3, State3} = parse_def_end(Rest2, State2), 2712 {Name, Rest3, State3}; 2713 false -> 2714 ?fatal_error(State, "name expected") 2715 end; 2716parse_ndecl_name(Bytes, State) -> 2717 unicode_incomplete_check([Bytes, State, fun parse_ndecl_name/2], 2718 undefined). 2719 2720%%---------------------------------------------------------------------- 2721%% Function : parse_pe_def(Rest, State, Name) -> Result 2722%% Parameters: Rest = string() | binary() 2723%% State = #xmerl_sax_parser_state{} 2724%% Name = string() 2725%% Result : {Rest, State} 2726%% Description: Parse an parameter entity definition. 2727%% [74] PEDef ::= EntityValue | ExternalID 2728%%---------------------------------------------------------------------- 2729parse_pe_def(?STRING_EMPTY, State, Name) -> 2730 cf(?STRING_EMPTY, State, Name, fun parse_pe_def/3); 2731parse_pe_def(?STRING_UNBOUND_REST(C, Rest), State, Name) when C == $'; C == $" -> 2732 {Value, Rest1, State1} = parse_entity_value(Rest, State, C, []), 2733 Name1 = "%" ++ Name, 2734 State2 = insert_reference(Name1, {internal_parameter, Value}, 2735 State1), 2736 State3 = event_callback({internalEntityDecl, Name1, Value}, State2), 2737 {_WS, Rest2, State4} = whitespace(Rest1, State3, []), 2738 parse_def_end(Rest2, State4); 2739parse_pe_def(?STRING_UNBOUND_REST(C, _) = Bytes, State, Name) when C == $S; C == $P -> 2740 {PubId, SysId, Rest1, State1} = parse_external_id(Bytes, State, false), 2741 Name1 = "%" ++ Name, 2742 State2 = insert_reference(Name1, {external_parameter, {PubId, SysId}}, 2743 State1), 2744 State3 = event_callback({externalEntityDecl, Name1, PubId, SysId}, State2), 2745 {_WS, Rest2, State4} = whitespace(Rest1, State3, []), 2746 parse_def_end(Rest2, State4); 2747parse_pe_def(Bytes, State, Name) -> 2748 unicode_incomplete_check([Bytes, State, Name, fun parse_pe_def/3], 2749 "\", \', SYSTEM or PUBLIC expected"). 2750 2751 2752%%---------------------------------------------------------------------- 2753%% Function : parse_notation_decl(Rest, State) -> Result 2754%% Parameters: Rest = string() | binary() 2755%% State = #xmerl_sax_parser_state{} 2756%% Result : {Rest, State} 2757%% Description: Parse a NOTATION declaration. 2758%% [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 2759%%---------------------------------------------------------------------- 2760parse_notation_decl(?STRING_EMPTY, State) -> 2761 cf(?STRING_EMPTY, State, fun parse_notation_decl/2); 2762parse_notation_decl(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) -> 2763 {_WS, Rest, State1} = whitespace(Bytes, State, []), 2764 parse_notation_decl_1(Rest, State1); 2765parse_notation_decl(Bytes, State) -> 2766 unicode_incomplete_check([Bytes, State, fun parse_notation_decl/2], 2767 "whitespace expected"). 2768 2769 2770parse_notation_decl_1(?STRING_UNBOUND_REST(C, Rest), State) -> 2771 case is_name_start(C) of 2772 true -> 2773 {Name, Rest1, State1} = parse_name(Rest, State, [C]), 2774 {PubId, SysId, Rest2, State2} = parse_notation_id(Rest1, State1), 2775 State3 = event_callback({notationDecl, Name, PubId, SysId}, State2), 2776 {Rest2, State3}; 2777 false -> 2778 ?fatal_error(State, "name expected") 2779 end; 2780parse_notation_decl_1(Bytes, State) -> 2781 unicode_incomplete_check([Bytes, State, fun parse_notation_decl_1/2], 2782 undefined). 2783 2784%%---------------------------------------------------------------------- 2785%% Function : parse_notation_id(Rest, State) -> Result 2786%% Parameters: Rest = string() | binary() 2787%% State = #xmerl_sax_parser_state{} 2788%% Result : {PubId, SysId, Rest, State} 2789%% PubId = string() 2790%% SysId = string() 2791%% Description: Parse a NOTATION identity. The public id case is a special 2792%% variant of extenal id where just the public part is allowed. 2793%% This is allowed if the third parameter in parse_external_id/3 2794%% is true. 2795%% [83] PublicID ::= 'PUBLIC' S PubidLiteral 2796%%---------------------------------------------------------------------- 2797parse_notation_id(?STRING_EMPTY, State) -> 2798 cf(?STRING_EMPTY, State, fun parse_notation_id/2); 2799%parse_notation_id(?STRING_REST(">", Rest), State) -> 2800% {"", "", Rest, State}; 2801parse_notation_id(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) -> 2802 {_WS, Rest, State1} = whitespace(Bytes, State, []), 2803 parse_notation_id_1(Rest, State1); 2804parse_notation_id(Bytes, State) -> 2805 unicode_incomplete_check([Bytes, State, fun parse_notation_id/2], 2806 "whitespace expected"). 2807 2808%%---------------------------------------------------------------------- 2809%% Function : parse_notation_id_1(Rest, State) -> Result 2810%% Parameters: Rest = string() | binary() 2811%% State = #xmerl_sax_parser_state{} 2812%% Result : {PubId, SysId, Rest, State} 2813%% PubId = string() 2814%% SysId = string() 2815%% Description: Parse a NOTATION identity. 2816%%---------------------------------------------------------------------- 2817parse_notation_id_1(?STRING_EMPTY, State) -> 2818 cf(?STRING_EMPTY, State, fun parse_notation_id_1/2); 2819parse_notation_id_1(?STRING_UNBOUND_REST(C, _) = Bytes, State) when C == $S; C == $P -> 2820 {PubId, SysId, Rest1, State1} = parse_external_id(Bytes, State, true), 2821 {_WS, Rest2, State2} = whitespace(Rest1, State1, []), 2822 {Rest3, State3} = parse_def_end(Rest2, State2), 2823 {PubId, SysId, Rest3, State3}; 2824%parse_notation_id_1(?STRING_REST(">", Rest), State) -> 2825% {"", "", Rest, State}; 2826parse_notation_id_1(Bytes, State) -> 2827 unicode_incomplete_check([Bytes, State, fun parse_notation_id_1/2], 2828 "external id or public id expected"). 2829 2830 2831%%====================================================================== 2832%% Character checks and definitions 2833%%====================================================================== 2834 2835%%---------------------------------------------------------------------- 2836%% Definitions of the first 256 characters 2837%% 0 - not classified, 2838%% 1 - base_char or ideographic, 2839%% 2 - combining_char or digit or extender, 2840%% 3 - $. or $- or $_ or $: 2841%%---------------------------------------------------------------------- 2842-define(SMALL, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 2843 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,0,2,2,2,2,2,2,2,2,2,2,3,0, 2844 0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 2845 1,0,0,0,0,3,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 2846 1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 2847 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 2848 0,0,0,2,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 2849 1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 2850 1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1}). 2851 2852 2853%%---------------------------------------------------------------------- 2854%% Function : is_name_start(Char) -> Result 2855%% Parameters: Char = char() 2856%% Result : true | false 2857%% Description: Check if character is a valid start of a name. 2858%% [5] Name ::= (Letter | '_' | ':') (NameChar)* 2859%%---------------------------------------------------------------------- 2860is_name_start($_) -> 2861 true; 2862is_name_start($:) -> 2863 true; 2864is_name_start(C) -> 2865 is_letter(C). 2866 2867 2868%%---------------------------------------------------------------------- 2869%% Function : is_name_start(Char) -> Result 2870%% Parameters: Char = char() 2871%% Result : true | false 2872%% Description: Check if character is a valid name character. 2873%% [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' 2874%% | CombiningChar | Extender 2875%%---------------------------------------------------------------------- 2876is_name_char(C) -> 2877 try element(C, ?SMALL) > 0 2878 catch _:_ -> 2879 case is_letter(C) of 2880 true -> 2881 true; 2882 false -> 2883 case is_digit(C) of 2884 true -> true; 2885 false -> 2886 case is_combining_char(C) of 2887 true -> true; 2888 false -> 2889 is_extender(C) 2890 end 2891 end 2892 end 2893 end. 2894 2895 2896%%---------------------------------------------------------------------- 2897%% Function : is_pubid_char(Char) -> Result 2898%% Parameters: Char = char() 2899%% Result : true | false 2900%% Description: Check if character is a public identity character. 2901%% [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] 2902%% | [-'()+,./:=?;!*#@$_%] 2903%%---------------------------------------------------------------------- 2904is_pubid_char(?space) -> 2905 true; 2906is_pubid_char(?cr) -> 2907 true; 2908is_pubid_char(?lf) -> 2909 true; 2910is_pubid_char($!) -> 2911 true; 2912is_pubid_char($:) -> 2913 true; 2914is_pubid_char($;) -> 2915 true; 2916is_pubid_char($=) -> 2917 true; 2918is_pubid_char($@) -> 2919 true; 2920is_pubid_char($_) -> 2921 true; 2922is_pubid_char(C) when $# =< C, C =< $% -> 2923 true; 2924is_pubid_char(C) when $' =< C, C =< $/ -> 2925 true; 2926is_pubid_char(C) -> 2927 case is_letter(C) of 2928 true -> 2929 true; 2930 false -> 2931 is_digit(C) 2932 end. 2933 2934 2935%%---------------------------------------------------------------------- 2936%% Function : is_letter(Char) -> Result 2937%% Parameters: Char = char() 2938%% Result : true | false 2939%% Description: Check if character is a letter. 2940%% [84] Letter ::= BaseChar | Ideographic 2941%%---------------------------------------------------------------------- 2942is_letter(C) -> 2943 try element(C, ?SMALL) =:= 1 2944 catch _:_ -> 2945 case is_base_char(C) of 2946 false -> 2947 is_ideographic(C); 2948 true -> 2949 true 2950 end 2951 end. 2952 2953 2954%%---------------------------------------------------------------------- 2955%% Function : is_letter(Char) -> Result 2956%% Parameters: Char = char() 2957%% Result : true | false 2958%% Description: Check if character is a basic character. 2959%% [85] BaseChar 2960%%---------------------------------------------------------------------- 2961is_base_char(C) when C >= 16#0041, C =< 16#005A -> true; %% ASCII Latin 2962is_base_char(C) when C >= 16#0061, C =< 16#007A -> true; 2963is_base_char(C) when C >= 16#00C0, C =< 16#00D6 -> true; %% ISO Latin 2964is_base_char(C) when C >= 16#00D8, C =< 16#00F6 -> true; 2965is_base_char(C) when C >= 16#00F8, C =< 16#00FF -> true; 2966is_base_char(C) when C >= 16#0100, C =< 16#0131 -> true; %% Accented Latin 2967is_base_char(C) when C >= 16#0134, C =< 16#013E -> true; 2968is_base_char(C) when C >= 16#0141, C =< 16#0148 -> true; 2969is_base_char(C) when C >= 16#014A, C =< 16#017E -> true; 2970is_base_char(C) when C >= 16#0180, C =< 16#01C3 -> true; 2971is_base_char(C) when C >= 16#01CD, C =< 16#01F0 -> true; 2972is_base_char(C) when C >= 16#01F4, C =< 16#01F5 -> true; 2973is_base_char(C) when C >= 16#01FA, C =< 16#0217 -> true; 2974is_base_char(C) when C >= 16#0250, C =< 16#02A8 -> true; %% IPA 2975is_base_char(C) when C >= 16#02BB, C =< 16#02C1 -> true; %% Spacing Modifiers 2976is_base_char(16#0386) -> true; %% Greek 2977is_base_char(C) when C >= 16#0388, C =< 16#038A -> true; 2978is_base_char(16#038C) -> true; 2979is_base_char(C) when C >= 16#038E, C =< 16#03A1 -> true; 2980is_base_char(C) when C >= 16#03A3, C =< 16#03CE -> true; 2981is_base_char(C) when C >= 16#03D0, C =< 16#03D6 -> true; 2982is_base_char(16#03DA) -> true; 2983is_base_char(16#03DC) -> true; 2984is_base_char(16#03DE) -> true; 2985is_base_char(16#03E0) -> true; 2986is_base_char(C) when C >= 16#03E2, C =< 16#03F3 -> true; 2987is_base_char(C) when C >= 16#0401, C =< 16#040C -> true; %% Cyrillic 2988is_base_char(C) when C >= 16#040E, C =< 16#044F -> true; 2989is_base_char(C) when C >= 16#0451, C =< 16#045C -> true; 2990is_base_char(C) when C >= 16#045E, C =< 16#0481 -> true; 2991is_base_char(C) when C >= 16#0490, C =< 16#04C4 -> true; 2992is_base_char(C) when C >= 16#04C7, C =< 16#04C8 -> true; 2993is_base_char(C) when C >= 16#04CB, C =< 16#04CC -> true; 2994is_base_char(C) when C >= 16#04D0, C =< 16#04EB -> true; 2995is_base_char(C) when C >= 16#04EE, C =< 16#04F5 -> true; 2996is_base_char(C) when C >= 16#04F8, C =< 16#04F9 -> true; 2997is_base_char(C) when C >= 16#0531, C =< 16#0556 -> true; %% Armenian 2998is_base_char(16#0559) -> true; 2999is_base_char(C) when C >= 16#0561, C =< 16#0586 -> true; 3000is_base_char(C) when C >= 16#05D0, C =< 16#05EA -> true; %% Hebrew 3001is_base_char(C) when C >= 16#05F0, C =< 16#05F2 -> true; 3002is_base_char(C) when C >= 16#0621, C =< 16#063A -> true; %% Arabic 3003is_base_char(C) when C >= 16#0641, C =< 16#064A -> true; 3004is_base_char(C) when C >= 16#0671, C =< 16#06B7 -> true; 3005is_base_char(C) when C >= 16#06BA, C =< 16#06BE -> true; 3006is_base_char(C) when C >= 16#06C0, C =< 16#06CE -> true; 3007is_base_char(C) when C >= 16#06D0, C =< 16#06D3 -> true; 3008is_base_char(16#06D5) -> true; 3009is_base_char(C) when C >= 16#06E5, C =< 16#06E6 -> true; 3010is_base_char(C) when C >= 16#0905, C =< 16#0939 -> true; %% Devanagari 3011is_base_char(16#093D) -> true; 3012is_base_char(C) when C >= 16#0958, C =< 16#0961 -> true; 3013is_base_char(C) when C >= 16#0985, C =< 16#098C -> true; %% Bengali 3014is_base_char(C) when C >= 16#098F, C =< 16#0990 -> true; 3015is_base_char(C) when C >= 16#0993, C =< 16#09A8 -> true; 3016is_base_char(C) when C >= 16#09AA, C =< 16#09B0 -> true; 3017is_base_char(16#09B2) -> true; 3018is_base_char(C) when C >= 16#09B6, C =< 16#09B9 -> true; 3019is_base_char(C) when C >= 16#09DC, C =< 16#09DD -> true; 3020is_base_char(C) when C >= 16#09DF, C =< 16#09E1 -> true; 3021is_base_char(C) when C >= 16#09F0, C =< 16#09F1 -> true; 3022is_base_char(C) when C >= 16#0A05, C =< 16#0A0A -> true; %% Gurmukhi 3023is_base_char(C) when C >= 16#0A0F, C =< 16#0A10 -> true; 3024is_base_char(C) when C >= 16#0A13, C =< 16#0A28 -> true; 3025is_base_char(C) when C >= 16#0A2A, C =< 16#0A30 -> true; 3026is_base_char(C) when C >= 16#0A32, C =< 16#0A33 -> true; 3027is_base_char(C) when C >= 16#0A35, C =< 16#0A36 -> true; 3028is_base_char(C) when C >= 16#0A38, C =< 16#0A39 -> true; 3029is_base_char(C) when C >= 16#0A59, C =< 16#0A5C -> true; 3030is_base_char(16#0A5E) -> true; 3031is_base_char(C) when C >= 16#0A72, C =< 16#0A74 -> true; 3032is_base_char(C) when C >= 16#0A85, C =< 16#0A8B -> true; %% Gujarati 3033is_base_char(16#0A8D) -> true; 3034is_base_char(C) when C >= 16#0A8F, C =< 16#0A91 -> true; 3035is_base_char(C) when C >= 16#0A93, C =< 16#0AA8 -> true; 3036is_base_char(C) when C >= 16#0AAA, C =< 16#0AB0 -> true; 3037is_base_char(C) when C >= 16#0AB2, C =< 16#0AB3 -> true; 3038is_base_char(C) when C >= 16#0AB5, C =< 16#0AB9 -> true; 3039is_base_char(16#0ABD) -> true; 3040is_base_char(16#0AE0) -> true; 3041is_base_char(C) when C >= 16#0B05, C =< 16#0B0C -> true; %% Oriya 3042is_base_char(C) when C >= 16#0B0F, C =< 16#0B10 -> true; 3043is_base_char(C) when C >= 16#0B13, C =< 16#0B28 -> true; 3044is_base_char(C) when C >= 16#0B2A, C =< 16#0B30 -> true; 3045is_base_char(C) when C >= 16#0B32, C =< 16#0B33 -> true; 3046is_base_char(C) when C >= 16#0B36, C =< 16#0B39 -> true; 3047is_base_char(16#0B3D) -> true; 3048is_base_char(C) when C >= 16#0B5C, C =< 16#0B5D -> true; 3049is_base_char(C) when C >= 16#0B5F, C =< 16#0B61 -> true; 3050is_base_char(C) when C >= 16#0B85, C =< 16#0B8A -> true; %% Tamil 3051is_base_char(C) when C >= 16#0B8E, C =< 16#0B90 -> true; 3052is_base_char(C) when C >= 16#0B92, C =< 16#0B95 -> true; 3053is_base_char(C) when C >= 16#0B99, C =< 16#0B9A -> true; 3054is_base_char(16#0B9C) -> true; 3055is_base_char(C) when C >= 16#0B9E, C =< 16#0B9F -> true; 3056is_base_char(C) when C >= 16#0BA3, C =< 16#0BA4 -> true; 3057is_base_char(C) when C >= 16#0BA8, C =< 16#0BAA -> true; 3058is_base_char(C) when C >= 16#0BAE, C =< 16#0BB5 -> true; 3059is_base_char(C) when C >= 16#0BB7, C =< 16#0BB9 -> true; 3060is_base_char(C) when C >= 16#0C05, C =< 16#0C0C -> true; %% Telugu 3061is_base_char(C) when C >= 16#0C0E, C =< 16#0C10 -> true; 3062is_base_char(C) when C >= 16#0C12, C =< 16#0C28 -> true; 3063is_base_char(C) when C >= 16#0C2A, C =< 16#0C33 -> true; 3064is_base_char(C) when C >= 16#0C35, C =< 16#0C39 -> true; 3065is_base_char(C) when C >= 16#0C60, C =< 16#0C61 -> true; 3066is_base_char(C) when C >= 16#0C85, C =< 16#0C8C -> true; %% Kannada 3067is_base_char(C) when C >= 16#0C8E, C =< 16#0C90 -> true; 3068is_base_char(C) when C >= 16#0C92, C =< 16#0CA8 -> true; 3069is_base_char(C) when C >= 16#0CAA, C =< 16#0CB3 -> true; 3070is_base_char(C) when C >= 16#0CB5, C =< 16#0CB9 -> true; 3071is_base_char(16#0CDE) -> true; 3072is_base_char(C) when C >= 16#0CE0, C =< 16#0CE1 -> true; 3073is_base_char(C) when C >= 16#0D05, C =< 16#0D0C -> true; %% Malayalam 3074is_base_char(C) when C >= 16#0D0E, C =< 16#0D10 -> true; 3075is_base_char(C) when C >= 16#0D12, C =< 16#0D28 -> true; 3076is_base_char(C) when C >= 16#0D2A, C =< 16#0D39 -> true; 3077is_base_char(C) when C >= 16#0D60, C =< 16#0D61 -> true; 3078is_base_char(C) when C >= 16#0E01, C =< 16#0E2E -> true; %% Thai 3079is_base_char(16#0E30) -> true; 3080is_base_char(C) when C >= 16#0E32, C =< 16#0E33 -> true; 3081is_base_char(C) when C >= 16#0E40, C =< 16#0E45 -> true; 3082is_base_char(C) when C >= 16#0E81, C =< 16#0E82 -> true; %% Lao 3083is_base_char(16#0E84) -> true; 3084is_base_char(C) when C >= 16#0E87, C =< 16#0E88 -> true; 3085is_base_char(16#0E8A) -> true; 3086is_base_char(16#0E8D) -> true; 3087is_base_char(C) when C >= 16#0E94, C =< 16#0E97 -> true; 3088is_base_char(C) when C >= 16#0E99, C =< 16#0E9F -> true; 3089is_base_char(C) when C >= 16#0EA1, C =< 16#0EA3 -> true; 3090is_base_char(16#0EA5) -> true; 3091is_base_char(16#0EA7) -> true; 3092is_base_char(C) when C >= 16#0EAA, C =< 16#0EAB -> true; 3093is_base_char(C) when C >= 16#0EAD, C =< 16#0EAE -> true; 3094is_base_char(16#0EB0) -> true; 3095is_base_char(C) when C >= 16#0EB2, C =< 16#0EB3 -> true; 3096is_base_char(16#0EBD) -> true; 3097is_base_char(C) when C >= 16#0EC0, C =< 16#0EC4 -> true; 3098is_base_char(C) when C >= 16#0F40, C =< 16#0F47 -> true; %% Tibetan 3099is_base_char(C) when C >= 16#0F49, C =< 16#0F69 -> true; 3100is_base_char(C) when C >= 16#10A0, C =< 16#10C5 -> true; %% Hangul Jamo 3101is_base_char(C) when C >= 16#10D0, C =< 16#10F6 -> true; 3102is_base_char(16#1100) -> true; 3103is_base_char(C) when C >= 16#1102, C =< 16#1103 -> true; 3104is_base_char(C) when C >= 16#1105, C =< 16#1107 -> true; 3105is_base_char(16#1109) -> true; 3106is_base_char(C) when C >= 16#110B, C =< 16#110C -> true; 3107is_base_char(C) when C >= 16#110E, C =< 16#1112 -> true; 3108is_base_char(16#113C) -> true; 3109is_base_char(16#113E) -> true; 3110is_base_char(16#1140) -> true; 3111is_base_char(16#114C) -> true; 3112is_base_char(16#114E) -> true; 3113is_base_char(16#1150) -> true; 3114is_base_char(C) when C >= 16#1154, C =< 16#1155 -> true; 3115is_base_char(16#1159) -> true; 3116is_base_char(C) when C >= 16#115F, C =< 16#1161 -> true; 3117is_base_char(16#1163) -> true; 3118is_base_char(16#1165) -> true; 3119is_base_char(16#1167) -> true; 3120is_base_char(16#1169) -> true; 3121is_base_char(C) when C >= 16#116D, C =< 16#116E -> true; 3122is_base_char(C) when C >= 16#1172, C =< 16#1173 -> true; 3123is_base_char(16#1175) -> true; 3124is_base_char(16#119E) -> true; 3125is_base_char(16#11A8) -> true; 3126is_base_char(16#11AB) -> true; 3127is_base_char(C) when C >= 16#11AE, C =< 16#11AF -> true; 3128is_base_char(C) when C >= 16#11B7, C =< 16#11B8 -> true; 3129is_base_char(16#11BA) -> true; 3130is_base_char(C) when C >= 16#11BC, C =< 16#11C2 -> true; 3131is_base_char(16#11EB) -> true; 3132is_base_char(16#11F0) -> true; 3133is_base_char(16#11F9) -> true; 3134is_base_char(C) when C >= 16#1E00, C =< 16#1E9B -> true; %% Latin Extended Additional 3135is_base_char(C) when C >= 16#1EA0, C =< 16#1EF9 -> true; 3136is_base_char(C) when C >= 16#1F00, C =< 16#1F15 -> true; %% Greek Extended 3137is_base_char(C) when C >= 16#1F18, C =< 16#1F1D -> true; 3138is_base_char(C) when C >= 16#1F20, C =< 16#1F45 -> true; 3139is_base_char(C) when C >= 16#1F48, C =< 16#1F4D -> true; 3140is_base_char(C) when C >= 16#1F50, C =< 16#1F57 -> true; 3141is_base_char(16#1F59) -> true; 3142is_base_char(16#1F5B) -> true; 3143is_base_char(16#1F5D) -> true; 3144is_base_char(C) when C >= 16#1F5F, C =< 16#1F7D -> true; 3145is_base_char(C) when C >= 16#1F80, C =< 16#1FB4 -> true; 3146is_base_char(C) when C >= 16#1FB6, C =< 16#1FBC -> true; 3147is_base_char(16#1FBE) -> true; 3148is_base_char(C) when C >= 16#1FC2, C =< 16#1FC4 -> true; 3149is_base_char(C) when C >= 16#1FC6, C =< 16#1FCC -> true; 3150is_base_char(C) when C >= 16#1FD0, C =< 16#1FD3 -> true; 3151is_base_char(C) when C >= 16#1FD6, C =< 16#1FDB -> true; 3152is_base_char(C) when C >= 16#1FE0, C =< 16#1FEC -> true; 3153is_base_char(C) when C >= 16#1FF2, C =< 16#1FF4 -> true; 3154is_base_char(C) when C >= 16#1FF6, C =< 16#1FFC -> true; 3155is_base_char(16#2126) -> true; %% Letterlike Symbols 3156is_base_char(C) when C >= 16#212A, C =< 16#212B -> true; 3157is_base_char(16#212E) -> true; 3158is_base_char(C) when C >= 16#2180, C =< 16#2182 -> true; %% Number Forms 3159is_base_char(C) when C >= 16#3041, C =< 16#3094 -> true; %% Hiragana 3160is_base_char(C) when C >= 16#30A1, C =< 16#30FA -> true; %% Katakana 3161is_base_char(C) when C >= 16#3105, C =< 16#312C -> true; %% Bopomofo 3162is_base_char(C) when C >= 16#ac00, C =< 16#d7a3 -> true; %% Hangul Syllables 3163is_base_char(_) -> 3164 false. 3165 3166%%---------------------------------------------------------------------- 3167%% Function : is_ideographic(Char) -> Result 3168%% Parameters: Char = char() 3169%% Result : true | false 3170%% Description: Check if character is an ideographic letter. 3171%% [86] Ideographic 3172%%---------------------------------------------------------------------- 3173is_ideographic(C) when C >= 16#4e00, C =< 16#9fa5 -> true; %% Unified CJK Ideographs 3174is_ideographic(16#3007) -> true; %% CJK Symbols and Punctuation 3175is_ideographic(C) when C >= 16#3021, C =< 16#3029 -> true; 3176is_ideographic(_) -> 3177 false. 3178 3179%%---------------------------------------------------------------------- 3180%% Function : is_ideographic(Char) -> Result 3181%% Parameters: Char = char() 3182%% Result : true | false 3183%% Description: Check if character is a combining character. 3184%% [87] CombiningChar 3185%%---------------------------------------------------------------------- 3186is_combining_char(C) when C >= 16#0300, C =< 16#0345 -> true; %% Combining Diacritics 3187is_combining_char(C) when C >= 16#0360, C =< 16#0361 -> true; 3188is_combining_char(C) when C >= 16#0483, C =< 16#0486 -> true; %% Cyrillic Combining Diacritics 3189is_combining_char(C) when C >= 16#0591, C =< 16#05a1 -> true; %% Hebrew Combining Diacritics 3190is_combining_char(C) when C >= 16#05a3, C =< 16#05b9 -> true; 3191is_combining_char(C) when C >= 16#05bb, C =< 16#05bd -> true; 3192is_combining_char(16#05bf) -> true; 3193is_combining_char(C) when C >= 16#05c1, C =< 16#05c2 -> true; 3194is_combining_char(16#05c4) -> true; 3195is_combining_char(C) when C >= 16#064b, C =< 16#0652 -> true; %% Arabic Combining Diacritics 3196is_combining_char(16#0670) -> true; 3197is_combining_char(C) when C >= 16#06d6, C =< 16#06dc -> true; 3198is_combining_char(C) when C >= 16#06dd, C =< 16#06df -> true; 3199is_combining_char(C) when C >= 16#06e0, C =< 16#06e4 -> true; 3200is_combining_char(C) when C >= 16#06e7, C =< 16#06e8 -> true; 3201is_combining_char(C) when C >= 16#06ea, C =< 16#06ed -> true; 3202is_combining_char(C) when C >= 16#0901, C =< 16#0903 -> true; %% Devanagari Combining Diacritics 3203is_combining_char(16#093c) -> true; 3204is_combining_char(C) when C >= 16#093e, C =< 16#094c -> true; 3205is_combining_char(16#094d) -> true; 3206is_combining_char(C) when C >= 16#0951, C =< 16#0954 -> true; 3207is_combining_char(C) when C >= 16#0962, C =< 16#0963 -> true; 3208is_combining_char(C) when C >= 16#0981, C =< 16#0983 -> true; %% Bengali Combining Diacritics 3209is_combining_char(16#09bc) -> true; 3210is_combining_char(16#09be) -> true; 3211is_combining_char(16#09bf) -> true; 3212is_combining_char(C) when C >= 16#09c0, C =< 16#09c4 -> true; 3213is_combining_char(C) when C >= 16#09c7, C =< 16#09c8 -> true; 3214is_combining_char(C) when C >= 16#09cb, C =< 16#09cd -> true; 3215is_combining_char(16#09d7) -> true; 3216is_combining_char(C) when C >= 16#09e2, C =< 16#09e3 -> true; 3217is_combining_char(16#0a02) -> true; %% Gurmukhi Combining Diacritics 3218is_combining_char(16#0a3c) -> true; 3219is_combining_char(16#0a3e) -> true; 3220is_combining_char(16#0a3f) -> true; 3221is_combining_char(C) when C >= 16#0a40, C =< 16#0a42 -> true; 3222is_combining_char(C) when C >= 16#0a47, C =< 16#0a48 -> true; 3223is_combining_char(C) when C >= 16#0a4b, C =< 16#0a4d -> true; 3224is_combining_char(C) when C >= 16#0a70, C =< 16#0a71 -> true; 3225is_combining_char(C) when C >= 16#0a81, C =< 16#0a83 -> true; %% Gujarati Combining Diacritics 3226is_combining_char(16#0abc) -> true; 3227is_combining_char(C) when C >= 16#0abe, C =< 16#0ac5 -> true; 3228is_combining_char(C) when C >= 16#0ac7, C =< 16#0ac9 -> true; 3229is_combining_char(C) when C >= 16#0acb, C =< 16#0acd -> true; 3230is_combining_char(C) when C >= 16#0b01, C =< 16#0b03 -> true; %% Oriya Combining Diacritics 3231is_combining_char(16#0b3c) -> true; 3232is_combining_char(C) when C >= 16#0b3e, C =< 16#0b43 -> true; 3233is_combining_char(C) when C >= 16#0b47, C =< 16#0b48 -> true; 3234is_combining_char(C) when C >= 16#0b4b, C =< 16#0b4d -> true; 3235is_combining_char(C) when C >= 16#0b56, C =< 16#0b57 -> true; 3236is_combining_char(C) when C >= 16#0b82, C =< 16#0b83 -> true; %% Tamil Combining Diacritics 3237is_combining_char(C) when C >= 16#0bbe, C =< 16#0bc2 -> true; 3238is_combining_char(C) when C >= 16#0bc6, C =< 16#0bc8 -> true; 3239is_combining_char(C) when C >= 16#0bca, C =< 16#0bcd -> true; 3240is_combining_char(16#0bd7) -> true; 3241is_combining_char(C) when C >= 16#0c01, C =< 16#0c03 -> true; %% Telugu Combining Diacritics 3242is_combining_char(C) when C >= 16#0c3e, C =< 16#0c44 -> true; 3243is_combining_char(C) when C >= 16#0c46, C =< 16#0c48 -> true; 3244is_combining_char(C) when C >= 16#0c4a, C =< 16#0c4d -> true; 3245is_combining_char(C) when C >= 16#0c55, C =< 16#0c56 -> true; 3246is_combining_char(C) when C >= 16#0c82, C =< 16#0c83 -> true; %% Kannada Combining Diacritics 3247is_combining_char(C) when C >= 16#0cbe, C =< 16#0cc4 -> true; 3248is_combining_char(C) when C >= 16#0cc6, C =< 16#0cc8 -> true; 3249is_combining_char(C) when C >= 16#0cca, C =< 16#0ccd -> true; 3250is_combining_char(C) when C >= 16#0cd5, C =< 16#0cd6 -> true; 3251is_combining_char(C) when C >= 16#0d02, C =< 16#0d03 -> true; %% Malayalam Combining Diacritics 3252is_combining_char(C) when C >= 16#0d3e, C =< 16#0d43 -> true; 3253is_combining_char(C) when C >= 16#0d46, C =< 16#0d48 -> true; 3254is_combining_char(C) when C >= 16#0d4a, C =< 16#0d4d -> true; 3255is_combining_char(16#0d57) -> true; 3256is_combining_char(16#0e31) -> true; %% Thai Combining Diacritics 3257is_combining_char(C) when C >= 16#0e34, C =< 16#0e3a -> true; 3258is_combining_char(C) when C >= 16#0e47, C =< 16#0e4e -> true; 3259is_combining_char(16#0eb1) -> true; %% Lao Combining Diacritics 3260is_combining_char(C) when C >= 16#0eb4, C =< 16#0eb9 -> true; 3261is_combining_char(C) when C >= 16#0ebb, C =< 16#0ebc -> true; 3262is_combining_char(C) when C >= 16#0ec8, C =< 16#0ecd -> true; 3263is_combining_char(C) when C >= 16#0f18, C =< 16#0f19 -> true; %% Tibetan Combining Diacritics 3264is_combining_char(16#0f35) -> true; 3265is_combining_char(16#0f37) -> true; 3266is_combining_char(16#0f39) -> true; 3267is_combining_char(16#0f3e) -> true; 3268is_combining_char(16#0f3f) -> true; 3269is_combining_char(C) when C >= 16#0f71, C =< 16#0f84 -> true; 3270is_combining_char(C) when C >= 16#0f86, C =< 16#0f8b -> true; 3271is_combining_char(C) when C >= 16#0f90, C =< 16#0f95 -> true; 3272is_combining_char(16#0f97) -> true; 3273is_combining_char(C) when C >= 16#0f99, C =< 16#0fad -> true; 3274is_combining_char(C) when C >= 16#0fb1, C =< 16#0fb7 -> true; 3275is_combining_char(16#0fb9) -> true; 3276is_combining_char(C) when C >= 16#20d0, C =< 16#20dc -> true; %% Math/Technical Combining Diacritics 3277is_combining_char(16#20e1) -> true; 3278is_combining_char(C) when C >= 16#302a, C =< 16#302f -> true; %% Ideographic Diacritics 3279is_combining_char(16#3099) -> true; %% Hiragana/Katakana Combining Diacritics 3280is_combining_char(16#309a) -> true; 3281is_combining_char(_) -> false. 3282 3283 3284%%---------------------------------------------------------------------- 3285%% Function : is_digit(Char) -> Result 3286%% Parameters: Char = char() 3287%% Result : true | false 3288%% Description: Check if character is a digit. 3289%% [88] Digit 3290%%---------------------------------------------------------------------- 3291is_digit(C) when C >= 16#0030, C =< 16#0039 -> true; %% Basic ASCII digits 0-9 3292is_digit(C) when C >= 16#0660, C =< 16#0669 -> true; %% Arabic Digits 0-9 3293is_digit(C) when C >= 16#06F0, C =< 16#06F9 -> true; %% Eastern Arabic-Indic Digits 0-9 3294is_digit(C) when C >= 16#0966, C =< 16#096f -> true; %% Devanagari Digits 0-9 3295is_digit(C) when C >= 16#09e6, C =< 16#09ef -> true; %% Bengali Digits 0-9 3296is_digit(C) when C >= 16#0a66, C =< 16#0a6f -> true; %% Gurmukhi Digits 0-9 3297is_digit(C) when C >= 16#0ae6, C =< 16#0aef -> true; %% Gujarati Digits 0-9 3298is_digit(C) when C >= 16#0b66, C =< 16#0b6f -> true; %% Oriya Digits 0-9 3299is_digit(C) when C >= 16#0be7, C =< 16#0bef -> true; %% Tamil Digits 0-9 3300is_digit(C) when C >= 16#0c66, C =< 16#0c6f -> true; %% Telugu Digits 0-9 3301is_digit(C) when C >= 16#0ce6, C =< 16#0cef -> true; %% Kannada Digits 0-9 3302is_digit(C) when C >= 16#0d66, C =< 16#0d6f -> true; %% Malayalam Digits 0-9 3303is_digit(C) when C >= 16#0e50, C =< 16#0e59 -> true; %% Thai Digits 0-9 3304is_digit(C) when C >= 16#0ed0, C =< 16#0ed9 -> true; %% Lao Digits 0-9 3305is_digit(C) when C >= 16#0f20, C =< 16#0f29 -> true; %% Tibetan Digits 0-9 3306is_digit(_) -> false. 3307 3308 3309%%---------------------------------------------------------------------- 3310%% Function : is_extender(Char) -> Result 3311%% Parameters: Char = char() 3312%% Result : true | false 3313%% Description: Check if character is an extender character. 3314%% [89] Extender 3315%%---------------------------------------------------------------------- 3316is_extender(16#00b7) -> true; %% Middle Dot 3317is_extender(16#02d0) -> true; %% Triangular Colon and Half Colon 3318is_extender(16#02d1) -> true; 3319is_extender(16#0387) -> true; %% Greek Ano Teleia 3320is_extender(16#0640) -> true; %% Arabic Tatweel 3321is_extender(16#0e46) -> true; %% Thai Maiyamok 3322is_extender(16#0ec6) -> true; %% Lao Ko La 3323is_extender(16#3005) -> true; %% Ideographic Iteration Mark 3324is_extender(C) when C >= 16#3031, C =< 16#3035 -> true; %% Japanese Kana Repetition Marks 3325is_extender(C) when C >= 16#309d, C =< 16#309e -> true; %% Japanese Hiragana Iteration Marks 3326is_extender(C) when C >= 16#30fc, C =< 16#30fe -> true; %% Japanese Kana Iteration Marks 3327is_extender(_) -> false. 3328 3329 3330 3331%%====================================================================== 3332%% Callback and Continuation function handling 3333%%====================================================================== 3334%%---------------------------------------------------------------------- 3335%% Function : event_callback(Event, State) -> Result 3336%% Parameters: Event = term() 3337%% State = #xmerl_sax_parser_state{} 3338%% Result : #xmerl_sax_parser_state{} 3339%% Description: Function that uses provided fun to send parser events. 3340%%---------------------------------------------------------------------- 3341event_callback(Event, 3342 #xmerl_sax_parser_state{ 3343 event_fun=CbFun, 3344 event_state=EventState, 3345 line_no=N, 3346 entity=E, 3347 current_location=L 3348 } = State) -> 3349 try 3350 NewEventState = CbFun(Event, {L, E, N}, EventState), 3351 State#xmerl_sax_parser_state{event_state=NewEventState} 3352 catch 3353 throw:ErrorTerm -> 3354 throw({event_receiver_error, State, ErrorTerm}); 3355 exit:Reason -> 3356 throw({event_receiver_error, State, {'EXIT', Reason}}) 3357 end. 3358 3359%%---------------------------------------------------------------------- 3360%% Function : cf(Rest, State, NextCall) -> Result 3361%% Parameters: Rest = string() | binary() 3362%% State = #xmerl_sax_parser_state{} 3363%% NextCall = fun() 3364%% Result : {Rest, State} 3365%% Description: Function that uses provided fun to read another chunk from 3366%% input stream and calls the fun in NextCall. 3367%%---------------------------------------------------------------------- 3368cf(_Rest, #xmerl_sax_parser_state{continuation_fun = undefined} = State, _) -> 3369 ?fatal_error(State, "Continuation function undefined"); 3370cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = CState} = State, 3371 NextCall) -> 3372 Result = 3373 try 3374 CFun(CState) 3375 catch 3376 throw:ErrorTerm -> 3377 ?fatal_error(State, ErrorTerm); 3378 exit:Reason -> 3379 ?fatal_error(State, {'EXIT', Reason}) 3380 end, 3381 case Result of 3382 {?STRING_EMPTY, _} -> 3383 ?fatal_error(State, "No more bytes"); 3384 {NewBytes, NewContState} -> 3385 NextCall(?APPEND_STRING(Rest, NewBytes), 3386 State#xmerl_sax_parser_state{continuation_state = NewContState}) 3387 end. 3388 3389%%---------------------------------------------------------------------- 3390%% Function : cf(Rest, State, NextCall, P) -> Result 3391%% Parameters: Rest = string() | binary() 3392%% State = #xmerl_sax_parser_state{} 3393%% NextCall = fun() 3394%% P = term() 3395%% Result : {Rest, State} 3396%% Description: Function that uses provided fun to read another chunk from 3397%% input stream and calls the fun in NextCall with P as last parameter. 3398%%---------------------------------------------------------------------- 3399cf(_Rest, #xmerl_sax_parser_state{continuation_fun = undefined} = State, _P, _) -> 3400 ?fatal_error(State, "Continuation function undefined"); 3401cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = CState} = State, 3402 P, NextCall) -> 3403 Result = 3404 try 3405 CFun(CState) 3406 catch 3407 throw:ErrorTerm -> 3408 ?fatal_error(State, ErrorTerm); 3409 exit:Reason -> 3410 ?fatal_error(State, {'EXIT', Reason}) 3411 end, 3412 case Result of 3413 {?STRING_EMPTY, _} -> 3414 ?fatal_error(State, "No more bytes"); 3415 {NewBytes, NewContState} -> 3416 NextCall(?APPEND_STRING(Rest, NewBytes), 3417 State#xmerl_sax_parser_state{continuation_state = NewContState}, 3418 P) 3419 end. 3420 3421 3422%%---------------------------------------------------------------------- 3423%% Function : cf(Rest, State, P1, P2, NextCall) -> Result 3424%% Parameters: Rest = string() | binary() 3425%% State = #xmerl_sax_parser_state{} 3426%% NextCall = fun() 3427%% P1 = term() 3428%% P2 = term() 3429%% Result : {Rest, State} 3430%% Description: Function that uses provided fun to read another chunk from 3431%% input stream and calls the fun in NextCall with P1 and 3432%% P2 as last parameters. 3433%%---------------------------------------------------------------------- 3434cf(_Rest, #xmerl_sax_parser_state{continuation_fun = undefined} = State, _P1, _P2, _) -> 3435 ?fatal_error(State, "Continuation function undefined"); 3436cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = CState} = State, 3437 P1, P2, NextCall) -> 3438 Result = 3439 try 3440 CFun(CState) 3441 catch 3442 throw:ErrorTerm -> 3443 ?fatal_error(State, ErrorTerm); 3444 exit:Reason -> 3445 ?fatal_error(State, {'EXIT', Reason}) 3446 end, 3447 case Result of 3448 {?STRING_EMPTY, _} -> 3449 ?fatal_error(State, "No more bytes"); 3450 {NewBytes, NewContState} -> 3451 NextCall(?APPEND_STRING(Rest, NewBytes), 3452 State#xmerl_sax_parser_state{continuation_state = NewContState}, 3453 P1, P2) 3454 end. 3455 3456 3457 3458%%---------------------------------------------------------------------- 3459%% Function : unicode_incomplete_check(Args, ErrString) -> Result 3460%% Parameters: Args = [Bytes, State | RestOfArgs] 3461%% Bytes = string() | binary() 3462%% State = #xmerl_sax_parser_state{} 3463%% RestOfArgs = 3464%% ErrString = string() 3465%% Result : {Rest, State} 3466%% Description: 3467%%---------------------------------------------------------------------- 3468unicode_incomplete_check([Bytes, #xmerl_sax_parser_state{encoding=Enc} = State | _] = Args, ErrString) when is_binary(Bytes) -> 3469 case unicode:characters_to_list(Bytes, Enc) of 3470 {incomplete, _, _} -> 3471 apply(?MODULE, cf, Args); 3472 {error, _Encoded, _Rest} -> 3473 ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))); 3474 _ when ErrString =/= undefined -> 3475 ?fatal_error(State, ErrString) 3476 end; 3477unicode_incomplete_check([Bytes,State | _], ErrString) when is_list(Bytes), ErrString =/= undefined -> 3478 ?fatal_error(State, ErrString). 3479 3480 3481%%---------------------------------------------------------------------- 3482%% Function : check_uri(Uri, CL) -> Result 3483%% Parameters: Uri = string() 3484%% CL = string() 3485%% Result : {atom(), string()} 3486%% Description: 3487%%---------------------------------------------------------------------- 3488check_uri("http://" ++ _ = Url, _CL) -> 3489 {http, Url}; 3490check_uri("file://" ++ Path, _CL) -> 3491 {file, Path}; 3492check_uri(Path, CL) -> % ordinary filepath other URI's not supported yet 3493 %% "file://" already removed when current_location set 3494 Tag = get_uri_tag(CL), 3495 case filename:pathtype(Path) of 3496 relative -> 3497 case Tag of 3498 false -> 3499 {file, filename:join(CL, Path)}; 3500 T -> 3501 {T, CL ++ "/" ++ Path} 3502 end; 3503 absolute -> 3504 case Tag of 3505 false -> 3506 {file, filename:absname(Path)}; 3507 T -> 3508 {T, CL ++ "/" ++ Path} 3509 end; 3510 volumerelative -> % only windows 3511 case Tag of 3512 false -> 3513 [Vol | _] = re:split(CL, ":", [{return,list}]), 3514 {file, filename:join(Vol ++ ":", Path)}; 3515 T -> 3516 {T, CL ++ "/" ++ Path} 3517 end 3518 end. 3519 3520%%---------------------------------------------------------------------- 3521%% Function : get_uri_tag(Uri) -> Result 3522%% Parameters: Uri = string() 3523%% Result : true |false 3524%% Description: http / file is the only supported URI for the moment 3525%%---------------------------------------------------------------------- 3526get_uri_tag(Uri) -> 3527 case re:split(Uri, "://", [{return,list}]) of 3528 [Tag, _] -> 3529 list_to_atom(Tag); 3530 [_] -> 3531 false 3532 end. 3533 3534%%---------------------------------------------------------------------- 3535%% Function : http_get_file(Host, Port, Key) -> Result 3536%% Parameters: Host = string() 3537%% Port = integer() 3538%% Key = string() 3539%% Result : string() 3540%% Description: 3541%%---------------------------------------------------------------------- 3542http_get_file(Host, Port, Key) -> 3543 ConnectTimeOut = 10000, 3544 SendTimeout = 10000, 3545 FilenameTempl = filename:basename(Key), 3546 3547 {Filename, FD} = create_tempfile(FilenameTempl), 3548 Socket = create_connection(Host, Port, ConnectTimeOut), 3549 Request = "GET " ++ Key ++ " HTTP/1.0\r\n\r\n", 3550 3551 case gen_tcp:send(Socket, Request) of 3552 ok -> 3553 try 3554 receive_msg(Socket, FD, true, SendTimeout) 3555 catch 3556 throw:{error, Error} -> 3557 ok = file:close(FD), 3558 ok = file:delete(Filename), 3559 throw({error, Error}) 3560 end; 3561 {error, _Reason} -> 3562 ok = file:close(FD), 3563 ok = file:delete(Filename), 3564 throw({error, lists:flatten(io_lib:format("Couldn't fetch http://~s:~p/~s", 3565 [Host, Port, Key]))}) 3566 end, 3567 ok = file:close(FD), 3568 Filename. 3569 3570%%---------------------------------------------------------------------- 3571%% Function : receive_msg(Socket, FD, WaitForHeader, Timeout) -> Result 3572%% Parameters: Socket = io_device() 3573%% FD = io_device() 3574%% WaitForHeader = boolean() 3575%% Timeout = integer() 3576%% Result : ok 3577%% Description: 3578%%---------------------------------------------------------------------- 3579receive_msg(Socket, FD, WaitForHeader, Timeout) -> 3580 receive 3581 {tcp_closed, Socket} -> 3582 ok; 3583 {tcp, Socket, Response} when WaitForHeader == false -> 3584 ok = file:write(FD, Response), 3585 receive_msg(Socket, FD, WaitForHeader, Timeout); 3586 {tcp, Socket, Response} -> 3587 MsgBody = remove_header(Response), 3588 ok = file:write(FD, MsgBody), 3589 receive_msg(Socket, FD, false, Timeout); 3590 {tcp_error, Socket, _Reason} -> 3591 gen_tcp:close(Socket), 3592 throw({error, "http connection failed"}) 3593 after Timeout -> 3594 gen_tcp:close(Socket), 3595 throw({error, "http connection timedout"}) 3596 end. 3597 3598 3599remove_header(<<"\r\n\r\n", MsgBody/binary>>) -> 3600 MsgBody; 3601remove_header(<<_C, Rest/binary>>) -> 3602 remove_header(Rest). 3603 3604%%---------------------------------------------------------------------- 3605%% Function : create_connection(Host, Port, Timeout) -> Result 3606%% Parameters: Host = string() 3607%% Port = integer() 3608%% Timeout = integer() 3609%% Result : io_device() 3610%% Description: 3611%%---------------------------------------------------------------------- 3612create_connection(Host, Port, Timeout) -> 3613 case gen_tcp:connect(Host, Port,[{packet,0}, binary, {reuseaddr,true}], Timeout) of 3614 {ok,Socket} -> 3615 Socket; 3616 {error, Reason} -> 3617 throw({error, lists:flatten(io_lib:format("Can't connect to ~s:~p ~p\n", 3618 [Host, Port, Reason]))}) 3619 end. 3620 3621%%---------------------------------------------------------------------- 3622%% Function : http(Url) -> Result 3623%% Parameters: Url = string() 3624%% Result : {Host, PortInt, Key} 3625%% Description: 3626%%---------------------------------------------------------------------- 3627http("http://" ++ Address) -> 3628 case string:tokens(Address, ":") of 3629 [Host, Rest] -> 3630 %% At his stage we know that address contains a Port number. 3631 {Port, Key} = split_to_slash(Rest, []), 3632 case catch list_to_integer(Port) of 3633 PortInt when is_integer(PortInt) -> 3634 {Host, PortInt, Key}; 3635 _ -> 3636 throw({error, "Malformed key; port not an integer, should be http://Host:Port/path or http://Host/path"}) 3637 end; 3638 [Address] -> 3639 %% Use default port 3640 {Host, Key} = split_to_slash(Address, []), 3641 {Host, ?HTTP_DEF_PORT, Key}; 3642 _What -> 3643 throw({error, "Malformed key; should be http://Host:Port/path or http://Host/path"}) 3644 end. 3645 3646%%---------------------------------------------------------------------- 3647%% Function : split_to_slash(String, Acc) -> Result 3648%% Parameters: String = string() 3649%% Acc = string() 3650%% Result : {string(), string()} 3651%% Description: 3652%%---------------------------------------------------------------------- 3653split_to_slash([], _Acc) -> 3654 throw({error, "No Key given Host:Port/Key"}); 3655split_to_slash([$/|Rest], Acc) -> 3656 {lists:reverse(Acc), [$/|Rest]}; 3657split_to_slash([H|T], Acc) -> 3658 split_to_slash(T, [H|Acc]). 3659 3660 3661%%---------------------------------------------------------------------- 3662%% Function : create_tempfile(Template) -> Result 3663%% Parameters: Template = string() 3664%% Result : string() 3665%% Description: 3666%%---------------------------------------------------------------------- 3667create_tempfile(Template) -> 3668 TmpDir = 3669 case os:type() of 3670 {unix, _} -> 3671 case file:read_file_info("/tmp") of 3672 {ok, _} -> 3673 "/tmp"; 3674 {error,enoent} -> 3675 throw({error, "/tmp doesn't exist"}) 3676 end; 3677 {win32, _} -> 3678 case os:getenv("TMP") of 3679 false -> 3680 case os:getenv("TEMP") of 3681 false -> 3682 throw({error, "Variabel TMP or TEMP doesn't exist"}); 3683 P2 -> 3684 P2 3685 end; 3686 P1 -> 3687 P1 3688 end 3689 end, 3690 TmpNameBase = filename:join([TmpDir, os:getpid() ++ Template ++ "."]), 3691 create_tempfile_1(TmpNameBase, 1). 3692 3693create_tempfile_1(TmpNameBase, N) -> 3694 FileName = TmpNameBase ++ integer_to_list(N), 3695 case file:open(FileName, [write, binary]) of 3696 {error, _Reason} -> 3697 create_tempfile_1(TmpNameBase, N+1); 3698 {ok, FD} -> 3699 {FileName, FD} 3700 end. 3701 3702 3703%%---------------------------------------------------------------------- 3704%% Function : filter_endtag_stack(EndTagStack) -> Result 3705%% Parameters: EndTagStack = [{term(), string(), string(), 3706%% term(), nslist(), nslist()}] 3707%% Result : [string()] 3708%% Description: Returns a stack with just local names. 3709%%---------------------------------------------------------------------- 3710filter_endtag_stack(EndTagStack) -> 3711 filter_endtag_stack(EndTagStack,[]). 3712 3713filter_endtag_stack([], Acc) -> 3714 lists:reverse(Acc); 3715filter_endtag_stack([{_,_,N,_,_,_}| Ts], Acc) -> 3716 filter_endtag_stack(Ts, [N |Acc]). 3717 3718 3719%%---------------------------------------------------------------------- 3720%% Function : format_error(Tag, State, Reason) -> Result 3721%% Parameters: Tag = atom(), 3722%% State = xmerl_sax_parser_state() 3723%% Reason = string() 3724%% Result : {atom(), {string(), string(), integer()}, string(), [string()], event_state()} 3725%% Description: Format the resulting error tuple 3726%%---------------------------------------------------------------------- 3727format_error(Tag, State, Reason) -> 3728 {Tag, 3729 { 3730 State#xmerl_sax_parser_state.current_location, 3731 State#xmerl_sax_parser_state.entity, 3732 State#xmerl_sax_parser_state.line_no 3733 }, 3734 Reason, 3735 filter_endtag_stack(State#xmerl_sax_parser_state.end_tags), 3736 State#xmerl_sax_parser_state.event_state}. 3737 3738