1%% 2%% %CopyrightBegin% 3%% 4%% Copyright Ericsson AB 2003-2016. All Rights Reserved. 5%% 6%% Licensed under the Apache License, Version 2.0 (the "License"); 7%% you may not use this file except in compliance with the License. 8%% You may obtain a copy of the License at 9%% 10%% http://www.apache.org/licenses/LICENSE-2.0 11%% 12%% Unless required by applicable law or agreed to in writing, software 13%% distributed under the License is distributed on an "AS IS" BASIS, 14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15%% See the License for the specific language governing permissions and 16%% limitations under the License. 17%% 18%% %CopyrightEnd% 19%% 20 21-module(xmerl_validate). 22 23-export([validate/2]). 24 25 26-include("xmerl.hrl"). % record def, macros 27-include("xmerl_internal.hrl"). 28 29 30%% +type validate(xmerl_scanner(),xmlElement())-> 31%% xmlElment() | {error,tuple()}. 32validate(#xmerl_scanner{doctype_name=DTName,doctype_DTD=OpProv}, 33 #xmlElement{name=Name}) 34 when DTName=/=Name,OpProv=/=option_provided-> 35 {error, {mismatched_root_element,Name,DTName}}; 36validate(#xmerl_scanner{rules=Rules}=S, 37 XML=#xmlElement{name=Name})-> 38 catch do_validation(read_rules(Rules,Name),XML,Rules,S); 39validate(_, XML) -> 40 {error, {no_xml_element, XML}}. 41 42 43 44%% +type validate(rules(),xmlElement())-> 45%% {ok,xmlElement()} | {error,tuple()}. 46do_validation(undefined,#xmlElement{name=Name}, _Rules,_S) -> 47 {error,{unknown_element,Name}}; 48do_validation(El_Rule,XML,Rules,S)-> 49 case catch valid_attributes(El_Rule#xmlElement.attributes, 50 XML#xmlElement.attributes,S) of 51 {'EXIT',Reason} -> 52 {error,Reason}; 53 {error,Reason} -> 54 {error,Reason}; 55 Attr_2-> 56% XML_=XML#xmlElement{attributes=Attr_2}, 57 El_Rule_Cont = El_Rule#xmlElement.content, 58 WSActionMode = ws_action_mode(El_Rule#xmlElement.elementdef, 59 El_Rule_Cont,S), 60 XML_Cont = XML#xmlElement.content, 61 check_direct_ws_SDD(XML_Cont,WSActionMode), 62 case valid_contents(El_Rule_Cont, 63 XML_Cont,Rules,S,WSActionMode) of 64 {error,Reason}-> 65 {error,Reason}; 66 {error,Reason,N}-> 67 {error,Reason,N}; 68 XMLS -> 69 XML#xmlElement{attributes=Attr_2,content=XMLS} 70 end 71 end. 72 73check_direct_ws_SDD(XML,always_preserve) -> 74 case XML of 75 [#xmlText{}|_Rest] -> 76 exit({error,{illegal_whitespace_standalone_doc,XML}}); 77 _ -> ok 78 end, 79 case lists:reverse(XML) of 80 [#xmlText{}|_Rest2] -> 81 exit({error,{illegal_whitespace_standalone_doc,XML}}); 82 _ -> ok 83 end; 84check_direct_ws_SDD(_,_) -> ok. 85 86ws_action_mode({external,_},Content,#xmerl_scanner{standalone=yes}) -> 87 case element_content(Content) of 88 children -> 89 always_preserve; 90 _ -> 91 preserve 92 end; 93ws_action_mode(_,_,_) -> 94 preserve. 95 96element_content(A) when is_atom(A),A /= any, A /= empty -> 97 children; 98element_content({choice,L}) when is_list(L) -> 99 element_content(L); 100element_content({seq,L}) when is_list(L) -> 101 element_content(L); 102element_content(['#PCDATA'|_T]) -> 103 mixed; 104element_content('#PCDATA') -> 105 mixed; 106element_content({'*',Rest}) -> 107 element_content(Rest); 108element_content(_) -> children. 109 110%% +type read_rules(DTD::atom(),Element_Name::atom())-> 111%% undefined | xmlElement(). 112read_rules(_, pcdata) -> 113 pcdata; 114read_rules(T, Name) -> 115 case ets:lookup(T, {elem_def, Name}) of 116 [] -> 117 undefined; 118 [{_K, V}] -> 119 V 120 end. 121 122%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 123%%%%%%%%%%%%% Attributes Validation %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 124%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 125%% +deftype attribute_rule() = {Attr_Name::atom(),attribute_type(), 126%% attribute_priority()}. 127 128%% +type valid_attributes([attribute_rule()],[xmlAttribute()])-> 129%% [xmlAttribute()] | {error,attribute_unknow}. 130valid_attributes(All_Attr,[#xmlAttribute{}|_T]=Attr,S)-> 131 single_ID_definition(All_Attr), 132 vc_Name_Token_IDREFS(All_Attr,Attr), 133 lists:foreach(fun(#xmlAttribute{name=Name})-> 134 case is_attribute_exist(Name,All_Attr) of 135 true -> 136 ok; 137 false -> 138 exit({error,{attribute_unknown,Name}}) 139 end 140 end, 141 Attr), 142 lists:flatten(lists:foldl(fun({Name,DataType,IF,DefDecl,Env},Attr_2)-> 143 Attr_2++ 144 [valid_attribute(Name,DataType,IF, 145 DefDecl,Attr,Env,S)] 146 end,[],All_Attr)); 147valid_attributes([],[],_) -> 148 []; 149valid_attributes(All_Attr,[],S) -> 150 single_ID_definition(All_Attr), 151 lists:flatten(lists:foldl(fun({Name,DataType,IF,DefDecl,Env},Attr_2)-> 152 Attr_2++[valid_attribute(Name, 153 DataType,IF, 154 DefDecl, 155 [], 156 Env,S)] 157 end,[],All_Attr)). 158 159%%%% [60] DefaultDecl::= 160%%%% '#REQUIRED' | '#IMPLIED' 161%%%% | (('#FIXED' S)? AttValue) 162%% +deftype attribute_priority = '#REQUIRED'|'#FIXED'|'#IMPLIED'. 163 164%% +type valid_attribute(Name::atom(),DataType::attribute_value(), 165%% IF::attribute_priority(),[xmlAttribute()])-> 166%% [xmlAttribute()] | exit(). 167valid_attribute(Name,DataType,IF,DefaultDecl,List_of_Attributes,Env,S)-> 168 SA = S#xmerl_scanner.standalone, 169 Attr=search_attr(Name,List_of_Attributes), 170 check_SDD_validity(SA,Env,Attr,IF), 171 case {DefaultDecl,IF,Attr} of 172 {'#REQUIRED',_,no_attribute}-> 173 exit({error,{Name,is_required}}); 174 {'#IMPLIED',_,no_attribute}-> 175 []; %% and no default value 176 {'#FIXED',DefVal,#xmlAttribute{value=DefVal}=Attr} -> 177 Attr; 178 {'#FIXED',A,no_attribute} -> 179 #xmlAttribute{name=Name,value=A}; % FIXED declare value becomes default. 180 {'#FIXED',A,B} -> 181 exit({error,{fixed_default_value_missmatch,A,B}}); 182 {_,Value,no_attribute} when is_list(Value)-> 183 #xmlAttribute{name=Name,value=Value}; 184 {_,_,#xmlAttribute{}=Attr}-> 185 %% do test data value, and default_value 186 test_attribute_value(DataType,Attr,IF,S); 187 {DefDecl,Else,XML} -> 188 exit({error,{unknow_attribute_type,DefDecl,Else,XML}}) 189 end. 190 191vc_Name_Token_IDREFS([{Name,Type,_,_,_}|Rest],Attrs) 192 when Type=='NMTOKEN';Type=='NMTOKENS'-> 193 case lists:keysearch(Name,#xmlAttribute.name,Attrs) of 194 {value,A} -> 195 valid_nmtoken_value(A#xmlAttribute.value,Type); 196 _ -> ok 197 end, 198 vc_Name_Token_IDREFS(Rest,Attrs); 199vc_Name_Token_IDREFS([{Name,Type,_,_,_}|Rest],Attrs) 200 when Type=='IDREFS'-> 201 case lists:keysearch(Name,#xmlAttribute.name,Attrs) of 202 {value,A} -> 203 valid_IDREFS(A#xmlAttribute.value,Type); 204 _ -> ok 205 end, 206 vc_Name_Token_IDREFS(Rest,Attrs); 207vc_Name_Token_IDREFS([_H|Rest],Attrs) -> 208 vc_Name_Token_IDREFS(Rest,Attrs); 209vc_Name_Token_IDREFS([],_) -> ok. 210 211valid_nmtoken_value([],'NMTOKENS') -> 212 exit({error,{at_least_one_Nmtoken_required}}); 213% valid_nmtoken_value([H|_T] = L,'NMTOKENS') when is_list(H) -> 214% ValidChar = 215% fun(X) -> 216% case xmerl_lib:is_namechar(X) of 217% false -> 218% exit({error,{invalid_character_in_Nmtoken,X}}); 219% _ -> ok 220% end 221% end, 222% ValidCharList = 223% fun([Nmtok|T],F) -> 224% lists:foreach(ValidChar,Nmtok), 225% F(T,F); 226% ([],_) -> ok 227% end, 228% ValidCharList(L,ValidChar); 229valid_nmtoken_value(Nmtok,_) -> 230 ValidChar = 231 fun(X) when ?whitespace(X),Nmtok=='NMTOKENS' -> 232 ok; 233 (X) -> 234 case xmerl_lib:is_namechar(X) of 235 false -> 236 exit({error,{invalid_character_in_Nmtoken,X}}); 237 _ -> ok 238 end 239 end, 240 lists:foreach(ValidChar,Nmtok). 241 242valid_IDREFS([],'IDREFS') -> 243 exit({error,{at_least_one_IDREF_Name_required}}); 244valid_IDREFS(_Str,'IDREFS') -> 245 ok. 246 247single_ID_definition([{_,'ID',_,_,_}=Att1|Rest]) -> 248 case lists:keysearch('ID',2,Rest) of 249 {value,Att2} -> 250 exit({error,{just_one_ID_definition_allowed,Att1,Att2}}); 251 _ -> ok 252 end; 253single_ID_definition([_H|T]) -> 254 single_ID_definition(T); 255single_ID_definition([]) -> 256 ok. 257 258check_SDD_validity(yes,{external,_},#xmlAttribute{name=Name,normalized=true},_) -> 259 exit({error,{externally_defed_attribute_normalized_in_standalone_doc,Name}}); 260check_SDD_validity(yes,{external,_},no_attribute,V) when V /= no_value-> 261 exit({error,{externally_defed_attribute_with_default_value_missing_in_standalone_doc}}); 262check_SDD_validity(_,_,_,_) -> 263 ok. 264 265search_attr(Name,[#xmlAttribute{name=Name}=H|_T])-> 266 H; 267search_attr(Name,[#xmlAttribute{}|T])-> 268 search_attr(Name,T); 269search_attr(_Name,_T) -> 270 no_attribute. 271 272is_attribute_exist(Name,[{Name,_,_,_,_}|_T])-> 273 true; 274is_attribute_exist(Name,[{_Attr,_,_,_,_}|T]) -> 275 is_attribute_exist(Name,T); 276is_attribute_exist(_Name,[]) -> 277 false. 278 279%%%%[54] AttType::= StringType | TokenizedType | EnumeratedType 280%%%%[55] StringType::= 'CDATA' 281%%%%[56] TokenizedType::= 'ID'|'IDREF'| 'IDREFS'|'ENTITY'| 'ENTITIES' 282%%%% | 'NMTOKEN'| 'NMTOKENS' 283%%%%[57] EnumeratedType::= NotationType | Enumeration 284%%%%[58] NotationType::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 285%%%%[59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 286 287%% +deftype attribute_type()-> 'CDATA' | 'ID'|'IDREF'| 'IDREFS'|'ENTITY'| 288%% 'ENTITIES'| 'NMTOKEN'| 'NMTOKENS' 289%% {enumeration,[List_of_value::atom()]}. 290 291%% +type test_attribute_value(attribute_type(),xmlAttribute())-> 292%% xmlAttribute()| exit. 293%%%% test the constraint validity of Attribute value. 294test_attribute_value('CDATA',#xmlAttribute{}=Attr,_,_) -> 295 Attr; 296test_attribute_value('NMTOKEN',#xmlAttribute{name=Name,value=V}=Attr, 297 Default,_S) -> 298 Fun = 299 fun (X)-> 300 case xmerl_lib:is_namechar(X) of 301 true-> 302 ok; 303 false-> 304 %%?dbg("nmtoken,value_incorrect: ~p~n",[V]), 305 exit({error,{invalid_value_nmtoken,Name,V}}) 306 end 307 end, 308 lists:foreach(Fun,V), 309 if 310 is_list(Default) -> 311 lists:foreach(Fun,Default); 312 true -> ok 313 end, 314 Attr; 315test_attribute_value('NMTOKENS',#xmlAttribute{name=Name,value=V}=Attr, 316 Default,_S) -> 317 Fun = 318 fun (X)-> 319 case xmerl_lib:is_namechar(X) of 320 true-> 321 ok; 322 false when ?whitespace(X)-> 323 ok; 324 false -> 325 exit({error,{invalid_value_nmtokens,Name,V}}) 326 end 327 end, 328 lists:foreach(Fun,V), 329 if 330 is_list(Default) -> 331 lists:foreach(Fun,Default); 332 true -> ok 333 end, 334 Attr; 335test_attribute_value(Ent,#xmlAttribute{name=_Name,value=V}=Attr,_Default, 336 S=#xmerl_scanner{rules_read_fun=Read}) 337 when Ent == 'ENTITY'; Ent == 'ENTITIES'-> 338 %% The default value is already checked 339 NameListFun = 340 fun([],Acc,_) -> 341 lists:reverse(Acc); 342 (Str,Acc,Fun) -> 343 {N,Str2} = scan_name(Str,[]), 344 Fun(Str2,[N|Acc],Fun) 345 end, 346 NameList = NameListFun(V,[],NameListFun), 347 VC_Entity_Name = 348 fun(X) -> 349 case Read(entity,X,S) of 350 {_,external,{_,{ndata,_}}} -> 351 ok; 352 _ -> exit({error,{vc_Entity_Name,X,V}}) 353 end 354 end, 355 lists:foreach(VC_Entity_Name,NameList), 356 Attr; 357test_attribute_value({Type,L},#xmlAttribute{value=Value}=Attr,Default,_S) 358 when Type == enumeration; Type == notation -> 359 ValidDefault = 360 if 361 is_atom(Default) -> true; 362 true -> lists:member(list_to_atom(Default),L) 363 end, 364 NoDuplicatesFun = 365 fun(_,_,notation) -> true; 366 ([],_,_) -> true; 367 ([H|T],F,Enum) -> 368 case lists:member(H,T) of 369 true -> false; 370 _ -> F(T,F,Enum) 371 end 372 end, 373 NoDuplicates = NoDuplicatesFun(L,NoDuplicatesFun,Type), 374 case {lists:member(list_to_atom(Value),L),ValidDefault,NoDuplicates} of 375 {true,true,true}-> 376 Attr; 377 {false,_,_} -> 378 exit({error,{attribute_value_unknow,Value,{list,L}}}); 379 {_,false,_} -> 380 exit({error,{attribute_default_value_unknow,Default,{list,L}}}); 381 {_,_,false} -> 382 exit({error,{duplicate_tokens_not_allowed,{list,L}}}) 383 end; 384test_attribute_value(_Rule,Attr,_,_) -> 385% ?dbg("Attr Value*****~nRule~p~nValue~p~n",[Rule,Attr]), 386 Attr. 387 388 389%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 390%%%%%%%%%%%%% Contents Validation %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 391%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 392%%%%Element-content Models 393%%%%[47] children::= (choice | seq) ('?' | '*' | '+')? 394%%%%[48] cp::= (Name | choice | seq) ('?' | '*' | '+')? 395%%%%[49] choice::= '(' S? cp ( S? '|' S? cp )+ S? ')' 396%%%%[50] seq::= '(' S? cp ( S? ',' S? cp )* S? ')' 397%%%%[51] Mixed::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' 398%%%% | '(' S? '#PCDATA' S? ')' 399 400 401%% +type valid_contents([rule()],[xmlElement()])-> 402%% [xmlElement() | {error,???}. 403valid_contents(Rule, XMLS, Rules, S, WSActionMode)-> 404 case parse(Rule, XMLS, Rules, WSActionMode, S) of 405 {error, Reason} -> 406 {error, Reason}; 407 {error, Reason, N} -> 408 {error, Reason, N}; 409 {XML_N, Rest} -> %The list may consist of xmlComment{} records 410 case lists:dropwhile(fun(X) when is_record(X, xmlComment) -> true; (_) -> false end, Rest) of 411 [] -> 412 lists:flatten(XML_N); 413 [#xmlElement{name=Name} |_T] -> 414 exit({error, {element, Name, isnt_comprise_in_the_rule, Rule}}); 415 [#xmlText{} = Txt |_T] -> 416 exit({error, {element, text, Txt, isnt_comprise_in_the_rule, Rule}}) 417 end 418 end. 419 420parse({'*', SubRule}, XMLS, Rules, WSaction, S)-> 421 star(SubRule, XMLS, Rules, WSaction, [], S); 422parse({'+',SubRule}, XMLS, Rules, WSaction, S) -> 423 plus(SubRule, XMLS, Rules, WSaction, S); 424parse({choice,CHOICE}, XMLS, Rules, WSaction, S)-> 425% case XMLS of 426% [] -> 427% ?dbg("~p~n",[{choice,CHOICE,[]}]); 428% [#xmlElement{name=Name,pos=Pos}|_] -> 429% ?dbg("~p~n",[{choice,CHOICE,{Name,Pos}}]); 430% [#xmlText{value=V}|_] -> 431% ?dbg("~p~n",[{choice,CHOICE,{text,V}}]) 432% end, 433 choice(CHOICE, XMLS, Rules, WSaction, S); 434parse(empty, [], _Rules, _WSaction, _S) -> 435 {[], []}; 436parse({'?', SubRule}, XMLS, Rules, _WSaction, S)-> 437 question(SubRule, XMLS, Rules, S); 438parse({seq,List}, XMLS, Rules, WSaction, S) -> 439 seq(List, XMLS, Rules, WSaction, S); 440parse(El_Name, [#xmlElement{name=El_Name} = XML |T], Rules, _WSaction, S) 441 when is_atom(El_Name)-> 442 case do_validation(read_rules(Rules, El_Name), XML, Rules, S) of 443 {error, R} -> 444% {error,R}; 445 exit(R); 446 {error, R, _N}-> 447% {error,R,N}; 448 exit(R); 449 XML_-> 450 {[XML_], T} 451 end; 452parse(any, Cont, Rules, _WSaction, S) -> 453 case catch parse_any(Cont, Rules, S) of 454 Err = {error, _} -> Err; 455 ValidContents -> {ValidContents, []} 456 end; 457parse(El_Name, [#xmlElement{name=Name} |_T] = XMLS, _Rules, _WSa, _S) when is_atom(El_Name) -> 458 {error, 459 {element_seq_not_conform,{wait, El_Name}, {is, Name}}, 460 {{next, XMLS}, {act, []}}}; 461parse(El_Name, [#xmlComment{} |T], Rules, WSa, S) -> 462 parse(El_Name, T, Rules, WSa, S); 463parse(_El_Name, [#xmlPI{} = H |T], _Rules, _WSa, _S) -> 464 {[H], T}; 465parse('#PCDATA', XMLS, _Rules, _WSa, _S)-> 466 %%% PCDATA it is 0 , 1 or more #xmlText{}. 467 parse_pcdata(XMLS); 468parse(El_Name, [#xmlText{}|_T] = XMLS, _Rules, _WSa, _S)-> 469 {error, 470 {text_in_place_of, El_Name}, 471 {{next, XMLS}, {act, []}}}; 472parse([], _, _, _, _) -> 473 {error, no_rule}; 474parse(Rule, [], _, _, _) -> 475 {error, {no_xml_element, Rule}}. 476 477parse_any([],_Rules,_S) -> 478 []; 479parse_any([H|T],Rules,S) -> 480 case parse_any(H,Rules,S) of 481 [Cont] -> 482 [Cont|parse_any(T,Rules,S)]; 483 Err -> throw(Err) 484 end; 485parse_any(#xmlElement{}=XML,Rules,S) -> 486 case do_validation(read_rules(Rules,el_name(XML)),XML,Rules,S) of 487 {error,R} -> 488 {error,R}; 489 {error,R,N}-> 490 {error,R,N}; 491 XML_-> 492 [XML_] 493 end; 494parse_any(El,_Rules,_S) -> 495 [El]. 496 497 498 499%% XXX remove first function clause 500% choice(_Choice,[#xmlText{}=T|R],_Rules) -> 501% {[T],R}; 502choice([CH|CHS],[_XML|_T]=XMLS,Rules,WSaction,S)-> 503 {WS,XMLS1} = whitespace_action(XMLS,ws_action(WSaction,remove)), 504 case parse(CH,XMLS1,Rules,ws_action(WSaction,remove),S) of 505 {error,_R} -> 506 choice(CHS,XMLS,Rules,WSaction,S); 507 {error,_R,_N} -> 508 choice(CHS,XMLS,Rules,WSaction,S); %% XXX add a case {[],XML} 509 {[],XMLS1} -> %% Maybe a sequence with * or ? elements that 510 %% didn't match 511 case CHS of 512 [] -> % choice has succeded but without matching XMLS1 513 {[],XMLS1}; 514 _ -> % there are more choice alternatives to try with 515 choice(CHS,XMLS1,Rules,WSaction,S) 516 end; 517%% choice(CHS,XMLS1,Rules,WSaction,S); 518 {Tree,XMLS2}-> 519 {WS2,XMLS3} = whitespace_action(XMLS2,ws_action(WSaction,remove)), 520 {WS2++[Tree]++WS,XMLS3} 521 end; 522choice([],XMLS,_,WSaction,_S)-> 523 case whitespace_action(XMLS,ws_action(WSaction,remove)) of 524 Res={_,[]} -> Res; 525 _ -> 526 {error,element_unauthorize_in_choice,{{next,XMLS},{act,[]}}} 527 end; 528choice(_,[],_,_,_S) -> 529 {[],[]}. 530 531plus(Rule,XMLS,Rules,WSaction,S) -> 532 %% 1 or more 533 {WS,XMLS1}=whitespace_action(XMLS,WSaction), 534 case parse(Rule,XMLS1,Rules,WSaction,S) of 535 {error, Reason,_XML} -> 536 {error, Reason}; 537 {error, X} -> 538 {error, X}; 539 {Tree, XMLS2} -> 540 case star(Rule, XMLS2,Rules,WSaction,[],S) of 541 {[], _} -> 542 {WS++[Tree], XMLS2}; 543 {Tree_1, XMLS3} -> 544 {WS++[Tree]++Tree_1, XMLS3} 545 end 546 end. 547 548star(_Rule,XML,_Rules,_WSa,Tree,_S) when length(XML)==0-> 549 {[Tree],[]}; 550star(Rule,XMLS,Rules,WSaction,Tree,S) -> 551 {WS,XMLS1} = whitespace_action(XMLS,WSaction), 552 case parse(Rule,XMLS1,Rules,WSaction,S) of 553 {error, _E, {{next,N},{act,A}}}-> 554 %%?dbg("Error~p~n",[_E]), 555 {WS++Tree++A,N}; 556 {error, _E}-> 557 %%?dbg("Error~p~n",[_E]), 558% {WS++[Tree],[]}; 559 case whitespace_action(XMLS,ws_action(WSaction,remove)) of 560 {[],_} -> 561 {WS++[Tree],XMLS}; 562 {WS2,XMLS2} -> 563 {WS2++[Tree],XMLS2} 564 end; 565 {Tree1,XMLS2}-> 566 star(Rule,XMLS2,Rules,WSaction,Tree++WS++[Tree1],S) 567 end. 568 569question(_Rule, [],_Rules,_S) -> 570 {[],[]}; 571question(Rule, Toks,Rules,S) -> 572 %% 0 or 1 573 case parse(Rule, Toks,Rules,preserve,S) of 574 {error, _E, _Next}-> 575 {[],Toks}; 576 {error, _E} -> 577 {[], Toks}; 578 {T,Toks1} -> 579 {T, Toks1} 580 end. 581 582seq(H,Toks,Rules,WSaction,S)-> 583 case seq2(H,Toks,Rules,[],WSaction,S) of 584 {error,E}-> 585 {error,E}; 586 {error,R,N}-> 587 {error,R,N}; 588 {Tree,Toks2}-> 589 {Tree,Toks2} 590 end. 591 592seq2([],[],_,Tree,_WSa,_S)-> 593 {Tree,[]}; 594% seq2([],[#xmlElement{name=Name}|_T]=XMLS,_,Tree,_WSa,_S)-> 595% {error,{sequence_finish,Name,isnt_in_the_right_place}, 596% {{next,XMLS},{act,Tree}}}; 597seq2([],[#xmlText{}]=XML,_,Tree,_WSa,_S)-> 598 case whitespace_action(XML,remove) of 599 {[],_} -> 600 {error,sequence_finish,{{next,XML},{act,Tree}}}; 601 {WS,Rest} -> 602 {WS++Tree,Rest} 603 end; 604seq2([],Rest,_,Tree,_WSa,_S) -> 605 {WS,Rest2}=whitespace_action(Rest,remove), 606 {WS++Tree,Rest2}; 607seq2([H|T],Toks,Rules,Tree,WSaction,S) -> 608 {WS,Toks1} = whitespace_action(Toks,ws_action(WSaction,remove)), 609 case parse(H,Toks1,Rules,remove,S) of %% H maybe only match parts of Toks 610 {error,Reason,_XML}-> 611 {error,Reason}; 612 {error,E}-> 613 {error,E}; 614 {[],Toks2}-> 615 seq2(T,Toks2,Rules,Tree,WSaction,S); 616 {Tree1,Toks2} when is_list(Tree1)-> 617 seq2(T,Toks2,Rules,Tree++WS++Tree1,WSaction,S); 618 {Tree1,Toks2}-> 619 seq2(T,Toks2,Rules,Tree++WS++[Tree1],WSaction,S) 620 end. 621 622el_name(#xmlElement{name=Name})-> 623 Name. 624 625parse_pcdata([#xmlText{}=H|T])-> 626 parse_pcdata(T,[H]); 627parse_pcdata([#xmlComment{}|T])-> 628 parse_pcdata(T,[]); 629parse_pcdata(H) -> 630 {[],H}. 631 632parse_pcdata([#xmlText{}=H|T],Acc)-> 633 parse_pcdata(T,Acc++[H]); 634parse_pcdata([#xmlComment{}|T],Acc)-> 635 parse_pcdata(T,Acc); 636parse_pcdata(H,Acc) -> 637 {Acc,H}. 638 639whitespace([]) -> 640 true; 641whitespace([H|T]) when ?whitespace(H) -> 642 whitespace(T); 643whitespace(_) -> 644 false. 645 646whitespace_action(XML,remove) -> 647 whitespace_remove(XML,[]); 648whitespace_action(XML,_) -> 649 {[],XML}. 650 651whitespace_remove([#xmlText{value=V,type=text}=T|R]=L,Acc) -> 652 case whitespace(V) of 653 true -> 654 whitespace_remove(R,[T|Acc]); 655 _ -> 656 {lists:reverse(Acc),L} 657 end; 658whitespace_remove(L,Acc) -> 659 {lists:reverse(Acc),L}. 660 661ws_action(always_preserve=A,_) -> 662 A; 663ws_action(_,B) -> 664 B. 665 666scan_name(N,_) when is_atom(N) -> 667 N; 668scan_name([$\s|T],Acc) -> 669 {list_to_atom(lists:reverse(Acc)),T}; 670scan_name([H|T],Acc) -> 671 scan_name(T,[H|Acc]); 672scan_name("",Acc) -> 673 {list_to_atom(lists:reverse(Acc)),[]}. 674