1%%
2%% %CopyrightBegin%
3%%
4%% Copyright Ericsson AB 2003-2016. All Rights Reserved.
5%%
6%% Licensed under the Apache License, Version 2.0 (the "License");
7%% you may not use this file except in compliance with the License.
8%% You may obtain a copy of the License at
9%%
10%%     http://www.apache.org/licenses/LICENSE-2.0
11%%
12%% Unless required by applicable law or agreed to in writing, software
13%% distributed under the License is distributed on an "AS IS" BASIS,
14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15%% See the License for the specific language governing permissions and
16%% limitations under the License.
17%%
18%% %CopyrightEnd%
19%%
20
21-module(xmerl_validate).
22
23-export([validate/2]).
24
25
26-include("xmerl.hrl").		% record def, macros
27-include("xmerl_internal.hrl").
28
29
30%% +type validate(xmerl_scanner(),xmlElement())->
31%%              xmlElment() | {error,tuple()}.
32validate(#xmerl_scanner{doctype_name=DTName,doctype_DTD=OpProv},
33	 #xmlElement{name=Name})
34  when DTName=/=Name,OpProv=/=option_provided->
35    {error, {mismatched_root_element,Name,DTName}};
36validate(#xmerl_scanner{rules=Rules}=S,
37	 XML=#xmlElement{name=Name})->
38    catch do_validation(read_rules(Rules,Name),XML,Rules,S);
39validate(_, XML) ->
40    {error, {no_xml_element, XML}}.
41
42
43
44%% +type validate(rules(),xmlElement())->
45%%              {ok,xmlElement()} | {error,tuple()}.
46do_validation(undefined,#xmlElement{name=Name}, _Rules,_S) ->
47    {error,{unknown_element,Name}};
48do_validation(El_Rule,XML,Rules,S)->
49    case catch valid_attributes(El_Rule#xmlElement.attributes,
50			  XML#xmlElement.attributes,S) of
51	{'EXIT',Reason} ->
52	    {error,Reason};
53	{error,Reason} ->
54	    {error,Reason};
55	Attr_2->
56%	    XML_=XML#xmlElement{attributes=Attr_2},
57	    El_Rule_Cont = El_Rule#xmlElement.content,
58	    WSActionMode = ws_action_mode(El_Rule#xmlElement.elementdef,
59					  El_Rule_Cont,S),
60	    XML_Cont = XML#xmlElement.content,
61	    check_direct_ws_SDD(XML_Cont,WSActionMode),
62	    case valid_contents(El_Rule_Cont,
63				XML_Cont,Rules,S,WSActionMode) of
64		{error,Reason}->
65		    {error,Reason};
66		{error,Reason,N}->
67		    {error,Reason,N};
68		XMLS ->
69		    XML#xmlElement{attributes=Attr_2,content=XMLS}
70	    end
71    end.
72
73check_direct_ws_SDD(XML,always_preserve) ->
74    case XML of
75	[#xmlText{}|_Rest] ->
76	    exit({error,{illegal_whitespace_standalone_doc,XML}});
77	_ -> ok
78    end,
79    case lists:reverse(XML) of
80	[#xmlText{}|_Rest2] ->
81	    exit({error,{illegal_whitespace_standalone_doc,XML}});
82	_ -> ok
83    end;
84check_direct_ws_SDD(_,_) -> ok.
85
86ws_action_mode({external,_},Content,#xmerl_scanner{standalone=yes}) ->
87    case element_content(Content) of
88	children ->
89	    always_preserve;
90	_ ->
91	    preserve
92    end;
93ws_action_mode(_,_,_) ->
94    preserve.
95
96element_content(A) when is_atom(A),A /= any, A /= empty ->
97    children;
98element_content({choice,L}) when is_list(L) ->
99    element_content(L);
100element_content({seq,L}) when is_list(L) ->
101    element_content(L);
102element_content(['#PCDATA'|_T]) ->
103    mixed;
104element_content('#PCDATA') ->
105    mixed;
106element_content({'*',Rest}) ->
107    element_content(Rest);
108element_content(_) -> children.
109
110%% +type read_rules(DTD::atom(),Element_Name::atom())->
111%%              undefined | xmlElement().
112read_rules(_, pcdata) ->
113    pcdata;
114read_rules(T, Name) ->
115    case ets:lookup(T, {elem_def, Name}) of
116	[] ->
117	    undefined;
118	[{_K, V}] ->
119	    V
120    end.
121
122%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
123%%%%%%%%%%%%% Attributes Validation %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
124%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
125%% +deftype attribute_rule() = {Attr_Name::atom(),attribute_type(),
126%%                              attribute_priority()}.
127
128%% +type valid_attributes([attribute_rule()],[xmlAttribute()])->
129%%              [xmlAttribute()] | {error,attribute_unknow}.
130valid_attributes(All_Attr,[#xmlAttribute{}|_T]=Attr,S)->
131    single_ID_definition(All_Attr),
132    vc_Name_Token_IDREFS(All_Attr,Attr),
133    lists:foreach(fun(#xmlAttribute{name=Name})->
134			  case is_attribute_exist(Name,All_Attr) of
135			      true ->
136				  ok;
137			      false ->
138				  exit({error,{attribute_unknown,Name}})
139			  end
140		  end,
141		  Attr),
142    lists:flatten(lists:foldl(fun({Name,DataType,IF,DefDecl,Env},Attr_2)->
143				      Attr_2++
144					  [valid_attribute(Name,DataType,IF,
145							   DefDecl,Attr,Env,S)]
146			      end,[],All_Attr));
147valid_attributes([],[],_) ->
148    [];
149valid_attributes(All_Attr,[],S) ->
150    single_ID_definition(All_Attr),
151    lists:flatten(lists:foldl(fun({Name,DataType,IF,DefDecl,Env},Attr_2)->
152				      Attr_2++[valid_attribute(Name,
153							       DataType,IF,
154							       DefDecl,
155							       [],
156							       Env,S)]
157			      end,[],All_Attr)).
158
159%%%%  [60]      DefaultDecl::=
160%%%%                              '#REQUIRED' | '#IMPLIED'
161%%%%                            | (('#FIXED' S)? AttValue)
162%% +deftype attribute_priority = '#REQUIRED'|'#FIXED'|'#IMPLIED'.
163
164%% +type valid_attribute(Name::atom(),DataType::attribute_value(),
165%%                       IF::attribute_priority(),[xmlAttribute()])->
166%%         [xmlAttribute()] | exit().
167valid_attribute(Name,DataType,IF,DefaultDecl,List_of_Attributes,Env,S)->
168    SA = S#xmerl_scanner.standalone,
169    Attr=search_attr(Name,List_of_Attributes),
170    check_SDD_validity(SA,Env,Attr,IF),
171    case {DefaultDecl,IF,Attr} of
172	{'#REQUIRED',_,no_attribute}->
173	    exit({error,{Name,is_required}});
174	{'#IMPLIED',_,no_attribute}->
175	    []; %% and no default value
176	{'#FIXED',DefVal,#xmlAttribute{value=DefVal}=Attr} ->
177	    Attr;
178	{'#FIXED',A,no_attribute} ->
179	    #xmlAttribute{name=Name,value=A}; % FIXED declare value becomes default.
180	{'#FIXED',A,B} ->
181	    exit({error,{fixed_default_value_missmatch,A,B}});
182	{_,Value,no_attribute} when is_list(Value)->
183	    #xmlAttribute{name=Name,value=Value};
184	{_,_,#xmlAttribute{}=Attr}->
185	    %% do test data value, and default_value
186	    test_attribute_value(DataType,Attr,IF,S);
187	{DefDecl,Else,XML} ->
188	    exit({error,{unknow_attribute_type,DefDecl,Else,XML}})
189    end.
190
191vc_Name_Token_IDREFS([{Name,Type,_,_,_}|Rest],Attrs)
192  when Type=='NMTOKEN';Type=='NMTOKENS'->
193    case lists:keysearch(Name,#xmlAttribute.name,Attrs) of
194	{value,A} ->
195	    valid_nmtoken_value(A#xmlAttribute.value,Type);
196	_ -> ok
197    end,
198    vc_Name_Token_IDREFS(Rest,Attrs);
199vc_Name_Token_IDREFS([{Name,Type,_,_,_}|Rest],Attrs)
200  when Type=='IDREFS'->
201    case lists:keysearch(Name,#xmlAttribute.name,Attrs) of
202	{value,A} ->
203	    valid_IDREFS(A#xmlAttribute.value,Type);
204	_ -> ok
205    end,
206    vc_Name_Token_IDREFS(Rest,Attrs);
207vc_Name_Token_IDREFS([_H|Rest],Attrs) ->
208    vc_Name_Token_IDREFS(Rest,Attrs);
209vc_Name_Token_IDREFS([],_) -> ok.
210
211valid_nmtoken_value([],'NMTOKENS') ->
212    exit({error,{at_least_one_Nmtoken_required}});
213% valid_nmtoken_value([H|_T] = L,'NMTOKENS') when is_list(H) ->
214%     ValidChar =
215% 	fun(X) ->
216% 		case xmerl_lib:is_namechar(X) of
217% 		    false ->
218% 			exit({error,{invalid_character_in_Nmtoken,X}});
219% 		    _ -> ok
220% 		end
221% 	end,
222%     ValidCharList =
223% 	fun([Nmtok|T],F) ->
224% 		lists:foreach(ValidChar,Nmtok),
225% 		F(T,F);
226% 	   ([],_) -> ok
227% 	end,
228%     ValidCharList(L,ValidChar);
229valid_nmtoken_value(Nmtok,_) ->
230    ValidChar =
231	fun(X) when ?whitespace(X),Nmtok=='NMTOKENS' ->
232		ok;
233	   (X) ->
234		case xmerl_lib:is_namechar(X) of
235		    false ->
236			exit({error,{invalid_character_in_Nmtoken,X}});
237		    _ -> ok
238		end
239	end,
240    lists:foreach(ValidChar,Nmtok).
241
242valid_IDREFS([],'IDREFS') ->
243    exit({error,{at_least_one_IDREF_Name_required}});
244valid_IDREFS(_Str,'IDREFS') ->
245    ok.
246
247single_ID_definition([{_,'ID',_,_,_}=Att1|Rest]) ->
248    case lists:keysearch('ID',2,Rest) of
249	{value,Att2} ->
250	    exit({error,{just_one_ID_definition_allowed,Att1,Att2}});
251	_ -> ok
252    end;
253single_ID_definition([_H|T]) ->
254    single_ID_definition(T);
255single_ID_definition([]) ->
256    ok.
257
258check_SDD_validity(yes,{external,_},#xmlAttribute{name=Name,normalized=true},_) ->
259    exit({error,{externally_defed_attribute_normalized_in_standalone_doc,Name}});
260check_SDD_validity(yes,{external,_},no_attribute,V) when V /= no_value->
261    exit({error,{externally_defed_attribute_with_default_value_missing_in_standalone_doc}});
262check_SDD_validity(_,_,_,_) ->
263    ok.
264
265search_attr(Name,[#xmlAttribute{name=Name}=H|_T])->
266    H;
267search_attr(Name,[#xmlAttribute{}|T])->
268    search_attr(Name,T);
269search_attr(_Name,_T) ->
270    no_attribute.
271
272is_attribute_exist(Name,[{Name,_,_,_,_}|_T])->
273    true;
274is_attribute_exist(Name,[{_Attr,_,_,_,_}|T]) ->
275    is_attribute_exist(Name,T);
276is_attribute_exist(_Name,[]) ->
277    false.
278
279%%%%[54] AttType::=     StringType | TokenizedType | EnumeratedType
280%%%%[55] StringType::=  'CDATA'
281%%%%[56] TokenizedType::= 'ID'|'IDREF'| 'IDREFS'|'ENTITY'| 'ENTITIES'
282%%%%                     | 'NMTOKEN'| 'NMTOKENS'
283%%%%[57] EnumeratedType::= NotationType | Enumeration
284%%%%[58] NotationType::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
285%%%%[59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
286
287%% +deftype attribute_type()-> 'CDATA' | 'ID'|'IDREF'| 'IDREFS'|'ENTITY'|
288%%                             'ENTITIES'| 'NMTOKEN'| 'NMTOKENS'
289%%                             {enumeration,[List_of_value::atom()]}.
290
291%% +type test_attribute_value(attribute_type(),xmlAttribute())->
292%%             xmlAttribute()| exit.
293%%%% test the constraint validity of Attribute value.
294test_attribute_value('CDATA',#xmlAttribute{}=Attr,_,_) ->
295    Attr;
296test_attribute_value('NMTOKEN',#xmlAttribute{name=Name,value=V}=Attr,
297		     Default,_S) ->
298    Fun =
299	fun (X)->
300		case xmerl_lib:is_namechar(X) of
301		    true->
302			ok;
303		    false->
304			%%?dbg("nmtoken,value_incorrect:  ~p~n",[V]),
305			exit({error,{invalid_value_nmtoken,Name,V}})
306		end
307	end,
308    lists:foreach(Fun,V),
309    if
310	is_list(Default) ->
311	    lists:foreach(Fun,Default);
312	true -> ok
313    end,
314    Attr;
315test_attribute_value('NMTOKENS',#xmlAttribute{name=Name,value=V}=Attr,
316		     Default,_S) ->
317    Fun =
318	fun (X)->
319		case xmerl_lib:is_namechar(X) of
320		    true->
321			ok;
322		    false when ?whitespace(X)->
323			ok;
324		    false ->
325			exit({error,{invalid_value_nmtokens,Name,V}})
326		end
327	end,
328    lists:foreach(Fun,V),
329    if
330	is_list(Default) ->
331	    lists:foreach(Fun,Default);
332	true -> ok
333    end,
334    Attr;
335test_attribute_value(Ent,#xmlAttribute{name=_Name,value=V}=Attr,_Default,
336		     S=#xmerl_scanner{rules_read_fun=Read})
337  when Ent == 'ENTITY'; Ent == 'ENTITIES'->
338    %% The default value is already checked
339    NameListFun =
340	fun([],Acc,_) ->
341		lists:reverse(Acc);
342	   (Str,Acc,Fun) ->
343		{N,Str2} = scan_name(Str,[]),
344		Fun(Str2,[N|Acc],Fun)
345	end,
346    NameList = NameListFun(V,[],NameListFun),
347    VC_Entity_Name =
348	fun(X) ->
349		case Read(entity,X,S) of
350		    {_,external,{_,{ndata,_}}} ->
351			ok;
352		    _ -> exit({error,{vc_Entity_Name,X,V}})
353                end
354	end,
355    lists:foreach(VC_Entity_Name,NameList),
356    Attr;
357test_attribute_value({Type,L},#xmlAttribute{value=Value}=Attr,Default,_S)
358  when Type == enumeration; Type == notation ->
359    ValidDefault =
360	if
361	    is_atom(Default) -> true;
362	    true -> lists:member(list_to_atom(Default),L)
363	end,
364    NoDuplicatesFun =
365	fun(_,_,notation) -> true;
366	   ([],_,_) -> true;
367	   ([H|T],F,Enum) ->
368		case lists:member(H,T) of
369		    true -> false;
370		    _ -> F(T,F,Enum)
371		end
372	end,
373    NoDuplicates = NoDuplicatesFun(L,NoDuplicatesFun,Type),
374    case {lists:member(list_to_atom(Value),L),ValidDefault,NoDuplicates} of
375	{true,true,true}->
376	    Attr;
377	{false,_,_} ->
378	    exit({error,{attribute_value_unknow,Value,{list,L}}});
379	{_,false,_} ->
380	    exit({error,{attribute_default_value_unknow,Default,{list,L}}});
381	{_,_,false} ->
382	    exit({error,{duplicate_tokens_not_allowed,{list,L}}})
383    end;
384test_attribute_value(_Rule,Attr,_,_) ->
385%    ?dbg("Attr Value*****~nRule~p~nValue~p~n",[Rule,Attr]),
386    Attr.
387
388
389%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
390%%%%%%%%%%%%% Contents Validation %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
391%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
392%%%%Element-content Models
393%%%%[47] children::= (choice | seq) ('?' | '*' | '+')?
394%%%%[48] cp::=       (Name | choice | seq) ('?' | '*' | '+')?
395%%%%[49] choice::=  '(' S? cp ( S? '|' S? cp )+ S? ')'
396%%%%[50] seq::=     '(' S? cp ( S? ',' S? cp )* S? ')'
397%%%%[51] Mixed::=   '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
398%%%%              | '(' S? '#PCDATA' S? ')'
399
400
401%% +type valid_contents([rule()],[xmlElement()])->
402%%              [xmlElement() | {error,???}.
403valid_contents(Rule, XMLS, Rules, S, WSActionMode)->
404    case parse(Rule, XMLS, Rules, WSActionMode, S) of
405	{error, Reason} ->
406	    {error, Reason};
407	{error, Reason, N} ->
408	    {error, Reason, N};
409	{XML_N, Rest} ->   %The list may consist of xmlComment{} records
410	    case lists:dropwhile(fun(X) when is_record(X, xmlComment) -> true; (_) -> false end, Rest) of
411		[] ->
412		    lists:flatten(XML_N);
413		[#xmlElement{name=Name} |_T] ->
414		    exit({error, {element, Name, isnt_comprise_in_the_rule, Rule}});
415		[#xmlText{} = Txt |_T] ->
416		    exit({error, {element, text, Txt, isnt_comprise_in_the_rule, Rule}})
417	    end
418    end.
419
420parse({'*', SubRule}, XMLS, Rules, WSaction, S)->
421    star(SubRule, XMLS, Rules, WSaction, [], S);
422parse({'+',SubRule}, XMLS, Rules, WSaction, S) ->
423    plus(SubRule, XMLS, Rules, WSaction, S);
424parse({choice,CHOICE}, XMLS, Rules, WSaction, S)->
425%    case XMLS of
426%	[] ->
427%	    ?dbg("~p~n",[{choice,CHOICE,[]}]);
428%	[#xmlElement{name=Name,pos=Pos}|_] ->
429%	    ?dbg("~p~n",[{choice,CHOICE,{Name,Pos}}]);
430%	[#xmlText{value=V}|_] ->
431%	    ?dbg("~p~n",[{choice,CHOICE,{text,V}}])
432%    end,
433    choice(CHOICE, XMLS, Rules, WSaction, S);
434parse(empty, [], _Rules, _WSaction, _S) ->
435    {[], []};
436parse({'?', SubRule}, XMLS, Rules, _WSaction, S)->
437    question(SubRule, XMLS, Rules, S);
438parse({seq,List}, XMLS, Rules, WSaction, S) ->
439    seq(List, XMLS, Rules, WSaction, S);
440parse(El_Name, [#xmlElement{name=El_Name} = XML |T], Rules, _WSaction, S)
441  when is_atom(El_Name)->
442    case do_validation(read_rules(Rules, El_Name), XML, Rules, S) of
443	{error, R} ->
444%	    {error,R};
445	    exit(R);
446	{error, R, _N}->
447%	    {error,R,N};
448	    exit(R);
449	XML_->
450	    {[XML_], T}
451    end;
452parse(any, Cont, Rules, _WSaction, S) ->
453    case catch parse_any(Cont, Rules, S) of
454	Err = {error, _} -> Err;
455	ValidContents -> {ValidContents, []}
456    end;
457parse(El_Name, [#xmlElement{name=Name} |_T] = XMLS, _Rules, _WSa, _S) when is_atom(El_Name) ->
458    {error,
459     {element_seq_not_conform,{wait, El_Name}, {is, Name}},
460     {{next, XMLS}, {act, []}}};
461parse(El_Name, [#xmlComment{} |T], Rules, WSa, S) ->
462    parse(El_Name, T, Rules, WSa, S);
463parse(_El_Name, [#xmlPI{} = H |T], _Rules, _WSa, _S) ->
464    {[H], T};
465parse('#PCDATA', XMLS, _Rules, _WSa, _S)->
466    %%% PCDATA it is 0 , 1 or more #xmlText{}.
467    parse_pcdata(XMLS);
468parse(El_Name, [#xmlText{}|_T] = XMLS, _Rules, _WSa, _S)->
469    {error,
470     {text_in_place_of, El_Name},
471     {{next, XMLS}, {act, []}}};
472parse([], _, _, _, _) ->
473    {error, no_rule};
474parse(Rule, [], _, _, _) ->
475    {error, {no_xml_element, Rule}}.
476
477parse_any([],_Rules,_S) ->
478    [];
479parse_any([H|T],Rules,S) ->
480    case parse_any(H,Rules,S) of
481	[Cont] ->
482	   [Cont|parse_any(T,Rules,S)];
483	Err -> throw(Err)
484    end;
485parse_any(#xmlElement{}=XML,Rules,S) ->
486    case do_validation(read_rules(Rules,el_name(XML)),XML,Rules,S) of
487	{error,R} ->
488	    {error,R};
489	{error,R,N}->
490	    {error,R,N};
491	XML_->
492	    [XML_]
493    end;
494parse_any(El,_Rules,_S) ->
495    [El].
496
497
498
499%% XXX remove first function clause
500% choice(_Choice,[#xmlText{}=T|R],_Rules) ->
501%     {[T],R};
502choice([CH|CHS],[_XML|_T]=XMLS,Rules,WSaction,S)->
503    {WS,XMLS1} = whitespace_action(XMLS,ws_action(WSaction,remove)),
504    case parse(CH,XMLS1,Rules,ws_action(WSaction,remove),S) of
505	{error,_R} ->
506	    choice(CHS,XMLS,Rules,WSaction,S);
507	{error,_R,_N} ->
508	    choice(CHS,XMLS,Rules,WSaction,S); %% XXX add a case {[],XML}
509	{[],XMLS1} -> %% Maybe a sequence with * or ? elements that
510                      %% didn't match
511 	    case CHS of
512 		[] -> % choice has succeded but without matching XMLS1
513 		    {[],XMLS1};
514 		_ -> % there are more choice alternatives to try with
515 		    choice(CHS,XMLS1,Rules,WSaction,S)
516	    end;
517%%	    choice(CHS,XMLS1,Rules,WSaction,S);
518	{Tree,XMLS2}->
519	    {WS2,XMLS3} = whitespace_action(XMLS2,ws_action(WSaction,remove)),
520	    {WS2++[Tree]++WS,XMLS3}
521    end;
522choice([],XMLS,_,WSaction,_S)->
523    case whitespace_action(XMLS,ws_action(WSaction,remove)) of
524	Res={_,[]} -> Res;
525	_ ->
526	    {error,element_unauthorize_in_choice,{{next,XMLS},{act,[]}}}
527    end;
528choice(_,[],_,_,_S) ->
529    {[],[]}.
530
531plus(Rule,XMLS,Rules,WSaction,S) ->
532    %% 1 or more
533    {WS,XMLS1}=whitespace_action(XMLS,WSaction),
534    case parse(Rule,XMLS1,Rules,WSaction,S) of
535	{error, Reason,_XML} ->
536	    {error, Reason};
537	{error, X} ->
538	    {error, X};
539	{Tree, XMLS2} ->
540	    case star(Rule, XMLS2,Rules,WSaction,[],S) of
541		{[], _} ->
542		    {WS++[Tree], XMLS2};
543		{Tree_1, XMLS3} ->
544		    {WS++[Tree]++Tree_1, XMLS3}
545	    end
546    end.
547
548star(_Rule,XML,_Rules,_WSa,Tree,_S) when length(XML)==0->
549    {[Tree],[]};
550star(Rule,XMLS,Rules,WSaction,Tree,S) ->
551    {WS,XMLS1} = whitespace_action(XMLS,WSaction),
552    case parse(Rule,XMLS1,Rules,WSaction,S) of
553	{error, _E, {{next,N},{act,A}}}->
554	    %%?dbg("Error~p~n",[_E]),
555	    {WS++Tree++A,N};
556	{error, _E}->
557	    %%?dbg("Error~p~n",[_E]),
558%	    {WS++[Tree],[]};
559	    case  whitespace_action(XMLS,ws_action(WSaction,remove)) of
560		{[],_} ->
561		    {WS++[Tree],XMLS};
562		{WS2,XMLS2} ->
563		    {WS2++[Tree],XMLS2}
564	    end;
565	{Tree1,XMLS2}->
566	    star(Rule,XMLS2,Rules,WSaction,Tree++WS++[Tree1],S)
567    end.
568
569question(_Rule, [],_Rules,_S) ->
570    {[],[]};
571question(Rule, Toks,Rules,S) ->
572    %% 0 or 1
573    case parse(Rule, Toks,Rules,preserve,S) of
574	{error, _E, _Next}->
575	    {[],Toks};
576	{error, _E} ->
577	    {[], Toks};
578	{T,Toks1} ->
579	    {T, Toks1}
580    end.
581
582seq(H,Toks,Rules,WSaction,S)->
583    case seq2(H,Toks,Rules,[],WSaction,S) of
584	{error,E}->
585	    {error,E};
586	{error,R,N}->
587	    {error,R,N};
588	{Tree,Toks2}->
589	    {Tree,Toks2}
590    end.
591
592seq2([],[],_,Tree,_WSa,_S)->
593    {Tree,[]};
594% seq2([],[#xmlElement{name=Name}|_T]=XMLS,_,Tree,_WSa,_S)->
595%     {error,{sequence_finish,Name,isnt_in_the_right_place},
596%      {{next,XMLS},{act,Tree}}};
597seq2([],[#xmlText{}]=XML,_,Tree,_WSa,_S)->
598    case whitespace_action(XML,remove) of
599	{[],_} ->
600	    {error,sequence_finish,{{next,XML},{act,Tree}}};
601	{WS,Rest} ->
602	    {WS++Tree,Rest}
603    end;
604seq2([],Rest,_,Tree,_WSa,_S) ->
605     {WS,Rest2}=whitespace_action(Rest,remove),
606    {WS++Tree,Rest2};
607seq2([H|T],Toks,Rules,Tree,WSaction,S) ->
608    {WS,Toks1} = whitespace_action(Toks,ws_action(WSaction,remove)),
609    case parse(H,Toks1,Rules,remove,S) of %% H maybe only match parts of Toks
610	{error,Reason,_XML}->
611	    {error,Reason};
612	{error,E}->
613	    {error,E};
614	{[],Toks2}->
615	    seq2(T,Toks2,Rules,Tree,WSaction,S);
616	{Tree1,Toks2} when is_list(Tree1)->
617	    seq2(T,Toks2,Rules,Tree++WS++Tree1,WSaction,S);
618	{Tree1,Toks2}->
619	    seq2(T,Toks2,Rules,Tree++WS++[Tree1],WSaction,S)
620    end.
621
622el_name(#xmlElement{name=Name})->
623    Name.
624
625parse_pcdata([#xmlText{}=H|T])->
626    parse_pcdata(T,[H]);
627parse_pcdata([#xmlComment{}|T])->
628    parse_pcdata(T,[]);
629parse_pcdata(H) ->
630    {[],H}.
631
632parse_pcdata([#xmlText{}=H|T],Acc)->
633    parse_pcdata(T,Acc++[H]);
634parse_pcdata([#xmlComment{}|T],Acc)->
635    parse_pcdata(T,Acc);
636parse_pcdata(H,Acc) ->
637    {Acc,H}.
638
639whitespace([]) ->
640    true;
641whitespace([H|T]) when ?whitespace(H) ->
642    whitespace(T);
643whitespace(_) ->
644    false.
645
646whitespace_action(XML,remove) ->
647    whitespace_remove(XML,[]);
648whitespace_action(XML,_) ->
649    {[],XML}.
650
651whitespace_remove([#xmlText{value=V,type=text}=T|R]=L,Acc) ->
652    case whitespace(V) of
653	true ->
654	    whitespace_remove(R,[T|Acc]);
655	_ ->
656	    {lists:reverse(Acc),L}
657    end;
658whitespace_remove(L,Acc) ->
659    {lists:reverse(Acc),L}.
660
661ws_action(always_preserve=A,_)  ->
662    A;
663ws_action(_,B) ->
664    B.
665
666scan_name(N,_) when is_atom(N) ->
667    N;
668scan_name([$\s|T],Acc) ->
669    {list_to_atom(lists:reverse(Acc)),T};
670scan_name([H|T],Acc) ->
671    scan_name(T,[H|Acc]);
672scan_name("",Acc) ->
673    {list_to_atom(lists:reverse(Acc)),[]}.
674