1%%-*-erlang-*-
2%% %CopyrightBegin%
3%%
4%% Copyright Ericsson AB 2008-2017. All Rights Reserved.
5%%
6%% Licensed under the Apache License, Version 2.0 (the "License");
7%% you may not use this file except in compliance with the License.
8%% You may obtain a copy of the License at
9%%
10%%     http://www.apache.org/licenses/LICENSE-2.0
11%%
12%% Unless required by applicable law or agreed to in writing, software
13%% distributed under the License is distributed on an "AS IS" BASIS,
14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15%% See the License for the specific language governing permissions and
16%% limitations under the License.
17%%
18%% %CopyrightEnd%
19%%----------------------------------------------------------------------
20%% Start of common source
21%%----------------------------------------------------------------------
22%-compile(export_all).
23
24%%----------------------------------------------------------------------
25%% Include files
26%%----------------------------------------------------------------------
27-include("xmerl_sax_parser.hrl").
28
29%%----------------------------------------------------------------------
30%% External exports
31%%----------------------------------------------------------------------
32-export([parse/2,
33	 parse_dtd/2,
34	 is_name_char/1,
35	 is_name_start/1]).
36
37%%----------------------------------------------------------------------
38%% Internal exports
39%%----------------------------------------------------------------------
40-export([
41	 cf/3,
42	 cf/4,
43	 cf/5
44        ]).
45
46%%----------------------------------------------------------------------
47%% Records
48%%----------------------------------------------------------------------
49
50%%----------------------------------------------------------------------
51%% Macros
52%%----------------------------------------------------------------------
53-define(HTTP_DEF_PORT, 80).
54
55%%======================================================================
56%% External functions
57%%======================================================================
58%%----------------------------------------------------------------------
59%% Function: parse(Xml, State) -> Result
60%% Input:    Xml = string() | binary()
61%%           State = #xmerl_sax_parser_state{}
62%% Output:   Result = {ok, Rest, EventState} |
63%%           EventState = term()
64%% Description: Parsing XML from input stream.
65%%----------------------------------------------------------------------
66parse(Xml, State) ->
67    RefTable = maps:new(),
68
69    try
70        State1 =  event_callback(startDocument, State),
71        Result = parse_document(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}),
72        handle_end_document(Result)
73    catch
74        throw:Exception ->
75            handle_end_document(Exception);
76        _:OtherError ->
77            handle_end_document({other, OtherError, State})
78    end.
79
80    % case catch parse_document(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of
81    %     {ok, Rest, State2} ->
82    %         State3 =  event_callback(endDocument, State2),
83    %         case check_if_rest_ok(State3#xmerl_sax_parser_state.input_type, Rest) of
84    %             true ->
85    %                 {ok, State3#xmerl_sax_parser_state.event_state, Rest};
86    %             false ->
87    %                 format_error(fatal_error, State3, "Input found after legal document")
88    %         end;
89    %     {fatal_error, {State2, Reason}} ->
90    %         State3 =  event_callback(endDocument, State2),
91    %         format_error(fatal_error, State3, Reason);
92    %     {event_receiver_error, State2, {Tag, Reason}} ->
93    %         State3 =  event_callback(endDocument, State2),
94    %         format_error(Tag, State3, Reason);
95    %     {endDocument, Rest, State2} ->
96    %         State3 =  event_callback(endDocument, State2),
97    %         {ok, State3#xmerl_sax_parser_state.event_state, Rest};
98    %     Other ->
99    %         _State2 = event_callback(endDocument, State1),
100    %         {fatal_error, Other}
101    % end.
102
103%%----------------------------------------------------------------------
104%% Function: parse_dtd(Xml, State) -> Result
105%% Input:    Xml = string() | binary()
106%%           State = #xmerl_sax_parser_state{}
107%% Output:   Result = {ok, Rest, EventState} |
108%%           EventState = term()
109%% Description: Parsing XML DTD from input stream.
110%%----------------------------------------------------------------------
111parse_dtd(Xml, State) ->
112    RefTable = maps:new(),
113
114    try
115        State1 =  event_callback(startDocument, State),
116        Result = parse_external_entity_1(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}),
117        handle_end_document(Result)
118    catch
119        throw:Exception ->
120              handle_end_document(Exception);
121        _:OtherError ->
122             handle_end_document({other, OtherError, State})
123    end.
124
125
126    % case catch parse_external_entity_1(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of
127    %     {fatal_error, {State2, Reason}} ->
128    %         State3 =  event_callback(endDocument, State2),
129    %         format_error(fatal_error, State3, Reason);
130    %     {event_receiver_error, State2, {Tag, Reason}} ->
131    %         State3 =  event_callback(endDocument, State2),
132    %         format_error(Tag, State3, Reason);
133    %     {Rest, State2} when is_record(State2, xmerl_sax_parser_state) ->
134    %         State3 =  event_callback(endDocument, State2),
135    %         {ok, State3#xmerl_sax_parser_state.event_state, Rest};
136    %     {endDocument, Rest, State2} when is_record(State2, xmerl_sax_parser_state) ->
137    %         State3 =  event_callback(endDocument, State2),
138    %         {ok, State3#xmerl_sax_parser_state.event_state, Rest};
139    %     Other ->
140    %         _State2 = event_callback(endDocument, State1),
141    %         {fatal_error, Other}
142    % end.
143
144
145%%======================================================================
146%% Internal functions
147%%======================================================================
148
149%%----------------------------------------------------------------------
150%% Function: handle_end_document(ParserResult) -> Result
151%% Input:    ParseResult = term()
152%% Output:   Result = {ok, Rest, EventState} |
153%%           EventState = term()
154%% Description: Ends the parsing and formats output
155%%----------------------------------------------------------------------
156handle_end_document({ok, Rest, State}) ->
157    %%ok case from parse
158    try
159        State1 = event_callback(endDocument, State),
160        case check_if_rest_ok(State1#xmerl_sax_parser_state.input_type, Rest) of
161            true ->
162                {ok, State1#xmerl_sax_parser_state.event_state, Rest};
163            false ->
164                format_error(fatal_error, State1, "Input found after legal document")
165        end
166     catch
167         throw:{event_receiver_error, State2, {Tag, Reason}} ->
168              format_error(Tag, State2, Reason);
169          _:Other ->
170              {fatal_error, Other}
171     end;
172handle_end_document({endDocument, Rest, State}) ->
173    %% ok case from parse and parse_dtd
174    try
175        State1 = event_callback(endDocument, State),
176        {ok, State1#xmerl_sax_parser_state.event_state, Rest}
177    catch
178        throw:{event_receiver_error, State2, {Tag, Reason}} ->
179              format_error(Tag, State2, Reason);
180          _:Other ->
181              {fatal_error, Other}
182     end;
183handle_end_document({fatal_error, {State, Reason}}) ->
184    try
185        State1 = event_callback(endDocument, State),
186        format_error(fatal_error, State1, Reason)
187    catch
188        throw:{event_receiver_error, State2, {Tag, Reason}} ->
189              format_error(Tag, State2, Reason);
190          _:Other ->
191              {fatal_error, Other}
192     end;
193handle_end_document({event_receiver_error, State, {Tag, Reason}}) ->
194    try
195        State1 =  event_callback(endDocument, State),
196        format_error(Tag, State1, Reason)
197    catch
198        throw:{event_receiver_error, State2, {Tag, Reason}} ->
199              format_error(Tag, State2, Reason);
200          _:Other ->
201              {fatal_error, Other}
202     end;
203handle_end_document({Rest, State}) when is_record(State, xmerl_sax_parser_state) ->
204    %%ok case from parse_dtd
205    try
206        State1 =  event_callback(endDocument, State),
207        {ok, State1#xmerl_sax_parser_state.event_state, Rest}
208    catch
209        throw:{event_receiver_error, State2, {Tag, Reason}} ->
210              format_error(Tag, State2, Reason);
211          _:Other ->
212              {fatal_error, Other}
213     end;
214handle_end_document({other, Error, State}) ->
215    try
216        _State1 = event_callback(endDocument, State),
217        {fatal_error, Error}
218    catch
219        throw:{event_receiver_error, State2, {Tag, Reason}} ->
220              format_error(Tag, State2, Reason);
221          _:Other ->
222              {fatal_error, Other}
223     end.
224
225%%----------------------------------------------------------------------
226%% Function: parse_document(Rest, State) -> Result
227%% Input:    Rest = string() | binary()
228%%           State = #xmerl_sax_parser_state{}
229%% Output:   Result = {ok, Rest, State}
230%% Description: Parsing an XML document
231%%              [1] document ::= prolog element Misc*
232%%----------------------------------------------------------------------
233parse_document(Rest, State) when is_record(State, xmerl_sax_parser_state) ->
234    {Rest1, State1} = parse_byte_order_mark(Rest, State),
235    {Rest2, State2} = parse_misc(Rest1, State1, true),
236    {ok, Rest2, State2}.
237
238?PARSE_BYTE_ORDER_MARK(Bytes, State).
239
240%%----------------------------------------------------------------------
241%% Function: parse_xml_decl(Rest, State) -> Result
242%% Input:    Rest = string() | binary()
243%%           State = #xmerl_sax_parser_state{}
244%% Output:   Result = {Rest, State}
245%% Description: Parsing the xml directive in the prolog.
246%%             [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
247%%             [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
248%%----------------------------------------------------------------------
249parse_xml_decl(?STRING_EMPTY, State) ->
250    cf(?STRING_EMPTY, State, fun parse_xml_decl/2);
251parse_xml_decl(?STRING("<") = Bytes, State) ->
252    cf(Bytes, State, fun parse_xml_decl/2);
253parse_xml_decl(?STRING("<?") = Bytes, State) ->
254    cf(Bytes, State, fun parse_xml_decl/2);
255parse_xml_decl(?STRING("<?x") = Bytes, State) ->
256    cf(Bytes, State, fun parse_xml_decl/2);
257parse_xml_decl(?STRING("<?xm") = Bytes, State) ->
258    cf(Bytes, State, fun parse_xml_decl/2);
259parse_xml_decl(?STRING("<?xml") = Bytes, State) ->
260    cf(Bytes, State, fun parse_xml_decl/2);
261parse_xml_decl(?STRING_REST("<?xml", Rest1), State) ->
262    parse_xml_decl_rest(Rest1, State);
263?PARSE_XML_DECL(Bytes, State).
264
265parse_xml_decl_rest(?STRING_UNBOUND_REST(C, Rest) = Bytes, State) ->
266    if
267	?is_whitespace(C) ->
268	    {_XmlAttributes, Rest1, State1} = parse_version_info(Rest, State, []),
269	    parse_prolog(Rest1, State1);
270	true ->
271	     parse_prolog(?STRING_REST("<?xml", Bytes), State)
272    end;
273parse_xml_decl_rest(Bytes, State) ->
274    unicode_incomplete_check([Bytes, State, fun parse_xml_decl_rest/2], undefined).
275
276
277
278%%----------------------------------------------------------------------
279%% Function: parse_prolog(Rest, State) -> Result
280%% Input:    Rest = string() | binary()
281%%           State = #xmerl_sax_parser_state{}
282%% Output:   Result = {Rest, State}
283%% Description: Parsing XML prolog
284%%             [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
285%%----------------------------------------------------------------------
286parse_prolog(?STRING_EMPTY, State) ->
287    cf(?STRING_EMPTY, State, fun parse_prolog/2);
288parse_prolog(?STRING("<") = Bytes, State) ->
289    cf(Bytes, State, fun parse_prolog/2);
290parse_prolog(?STRING_REST("<?", Rest), State) ->
291    case parse_pi(Rest, State) of
292	{Rest1, State1} ->
293	    parse_prolog(Rest1, State1);
294	{endDocument, Rest1, State1} ->
295	    parse_prolog(Rest1, State1)
296    end;
297parse_prolog(?STRING_REST("<!", Rest), State) ->
298    parse_prolog_1(Rest, State);
299parse_prolog(?STRING_REST("<", Rest), State) ->
300    parse_stag(Rest, State);
301parse_prolog(?STRING_UNBOUND_REST(C, _) = Rest, State) when ?is_whitespace(C) ->
302    {_WS, Rest1, State1} = whitespace(Rest, State, []),
303    parse_prolog(Rest1, State1);
304parse_prolog(Bytes, State) ->
305    unicode_incomplete_check([Bytes, State, fun parse_prolog/2],
306			     "expecting < or whitespace").
307
308parse_prolog_1(?STRING_EMPTY, State) ->
309    cf(?STRING_EMPTY, State, fun parse_prolog_1/2);
310parse_prolog_1(?STRING("D") = Bytes, State) ->
311    cf(Bytes, State, fun parse_prolog_1/2);
312parse_prolog_1(?STRING("DO") = Bytes, State) ->
313    cf(Bytes, State, fun parse_prolog_1/2);
314parse_prolog_1(?STRING("DOC") = Bytes, State) ->
315    cf(Bytes, State, fun parse_prolog_1/2);
316parse_prolog_1(?STRING("DOCT") = Bytes, State) ->
317    cf(Bytes, State, fun parse_prolog_1/2);
318parse_prolog_1(?STRING("DOCTY") = Bytes, State) ->
319    cf(Bytes, State, fun parse_prolog_1/2);
320parse_prolog_1(?STRING("DOCTYP") = Bytes, State) ->
321    cf(Bytes, State, fun parse_prolog_1/2);
322parse_prolog_1(?STRING_REST("DOCTYPE", Rest), State) ->
323    {Rest1, State1} = parse_doctype(Rest, State),
324    State2 = event_callback(endDTD, State1),
325    parse_prolog(Rest1, State2);
326parse_prolog_1(?STRING("-"), State) ->
327    cf(?STRING("-"), State, fun parse_prolog_1/2);
328parse_prolog_1(?STRING_REST("--", Rest), State) ->
329	    {Rest1, State1} = parse_comment(Rest, State, []),
330	    parse_prolog(Rest1, State1);
331parse_prolog_1(Bytes, State) ->
332    unicode_incomplete_check([Bytes, State, fun parse_prolog_1/2],
333			     "expecting comment or DOCTYPE").
334
335
336
337%%----------------------------------------------------------------------
338%% Function: parse_version_info(Rest, State, Acc) -> Result
339%% Input:    Rest = string() | binary()
340%%           State = #xmerl_sax_parser_state{}
341%%           Acc = [{Name, Value}]
342%%           Name = string()
343%%           Value = string()
344%% Output:   Result = {[{Name, Value}], Rest, State}
345%% Description: Parsing the version number in the XML directive.
346%%              [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
347%%----------------------------------------------------------------------
348parse_version_info(?STRING_EMPTY, State, Acc) ->
349    cf(?STRING_EMPTY, State, Acc, fun parse_version_info/3);
350parse_version_info(?STRING_UNBOUND_REST(C, _) = Rest, State, Acc) when ?is_whitespace(C) ->
351    {_WS, Rest1, State1} = whitespace(Rest, State, []),
352    parse_version_info(Rest1, State1, Acc);
353parse_version_info(?STRING_UNBOUND_REST(C,Rest), State, Acc) ->
354    case is_name_start(C) of
355	true ->
356	    case parse_name(Rest, State, [C]) of
357		{"version", Rest1, State1} ->
358		    {Rest2, State2} = parse_eq(Rest1, State1),
359		    {Version, Rest3, State3} = parse_att_value(Rest2, State2),
360		    parse_xml_decl_rest(Rest3, State3, [{"version",Version}|Acc]);
361		{_, _, State1} ->
362		    ?fatal_error(State1, "expecting attribute version")
363	    end;
364	false ->
365	    ?fatal_error(State, "expecting attribute version")
366    end;
367parse_version_info(Bytes, State, Acc)   ->
368    unicode_incomplete_check([Bytes, State, Acc, fun parse_version_info/3],
369			     undefined).
370
371
372
373%%----------------------------------------------------------------------
374%% Function: parse_xml_decl_rest(Rest, State, Acc) -> Result
375%% Input:    Rest = string() | binary()
376%%           State = #xmerl_sax_parser_state{}
377%%           Acc = [{Name, Value}]
378%%           Name = string()
379%%           Value = string()
380%% Output:   Result = {[{Name, Value}], Rest, State}
381%% Description: Checks if there is more to parse in the XML directive.
382%%----------------------------------------------------------------------
383parse_xml_decl_rest(?STRING_EMPTY, State, Acc) ->
384    cf(?STRING_EMPTY, State, Acc, fun parse_xml_decl_rest/3);
385parse_xml_decl_rest(?STRING("?") = Rest, State, Acc) ->
386    cf(Rest, State, Acc, fun parse_xml_decl_rest/3);
387parse_xml_decl_rest(?STRING_REST("?>", Rest), State, Acc) ->
388    {lists:reverse(Acc), Rest, State};
389parse_xml_decl_rest(?STRING_UNBOUND_REST(C, _) = Rest, State, Acc) when ?is_whitespace(C) ->
390    {_WS, Rest1, State1} = whitespace(Rest, State, []),
391    parse_xml_decl_encoding(Rest1, State1, Acc);
392parse_xml_decl_rest(Bytes, State, Acc) ->
393    unicode_incomplete_check([Bytes, State, Acc, fun parse_xml_decl_rest/3],
394			     "expecting encoding, standalone, whitespace or ?>").
395
396
397%%----------------------------------------------------------------------
398%% Function: parse_xml_decl_encoding(Rest, State, Acc) -> Result
399%% Input:    Rest = string() | binary()
400%%           State = #xmerl_sax_parser_state{}
401%%           Acc = [{Name, Value}]
402%%           Name = string()
403%%           Value = string()
404%% Output:   Result = {[{Name, Value}], Rest, State}
405%% Description: Parse the encoding attribute in the XML directive.
406%%              [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
407%               [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
408%%----------------------------------------------------------------------
409parse_xml_decl_encoding(?STRING_EMPTY, State, Acc) ->
410    cf(?STRING_EMPTY, State, Acc, fun parse_xml_decl_encoding/3);
411parse_xml_decl_encoding(?STRING_REST("e", Rest), State, Acc) ->
412    case parse_name(Rest, State,[$e]) of
413	{"encoding", Rest1, State1} ->
414	    {Rest2, State2} = parse_eq(Rest1, State1),
415	    {Enc, Rest3, State3} = parse_att_value(Rest2, State2),
416	    parse_xml_decl_encoding_1(Rest3, State3, [{"encoding",Enc} |Acc]);
417        {Name, _Rest1, State1} ->
418	    ?fatal_error(State1, "Attribute " ++ Name ++
419			 " not allowed in xml declaration")
420    end;
421parse_xml_decl_encoding(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) ->
422    parse_xml_decl_standalone(Bytes, State, Acc);
423parse_xml_decl_encoding(Bytes, State, Acc) ->
424    unicode_incomplete_check([Bytes, State, Acc, fun parse_xml_decl_encoding/3],
425			     undefined).
426
427
428parse_xml_decl_encoding_1(?STRING_UNBOUND_REST(C, _) = Bytes, State, Acc) when ?is_whitespace(C) ->
429    {_WS, Rest1, State1} = whitespace(Bytes, State, []),
430    parse_xml_decl_standalone(Rest1, State1, Acc);
431parse_xml_decl_encoding_1(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) ->
432    parse_xml_decl_rest(Bytes, State, Acc);
433parse_xml_decl_encoding_1(Bytes, State, Acc) ->
434    unicode_incomplete_check([Bytes, State, Acc, fun parse_xml_decl_encoding_1/3],
435			     undefined).
436
437
438%%----------------------------------------------------------------------
439%% Function: parse_xml_decl_standalone(Rest, State, Acc) -> Result
440%% Input:    Rest = string() | binary()
441%%           State = #xmerl_sax_parser_state{}
442%%           Acc = [{Name, Value}]
443%%           Name = string()
444%%           Value = string()
445%% Output:   Result = {[{Name, Value}], Rest, State}
446%% Description: Parse the standalone attribute in the XML directive.
447%%              [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") |
448%%                              ('"' ('yes' | 'no') '"'))
449%%----------------------------------------------------------------------
450parse_xml_decl_standalone(?STRING_EMPTY, State, Acc) ->
451    cf(?STRING_EMPTY, State, Acc, fun parse_xml_decl_standalone/3);
452parse_xml_decl_standalone(?STRING_REST("s", Rest), State, Acc) ->
453    case parse_name(Rest, State,[$s]) of
454	{"standalone", Rest1, State1} ->
455	    {Rest2, State2} = parse_eq(Rest1, State1),
456	    {Standalone, Rest3, State3} = parse_att_value(Rest2, State2),
457	    case Standalone of
458		"yes" -> ok;
459		"no" -> ok;
460		_ ->
461		    ?fatal_error(State3, "Wrong value of attribute standalone in xml declaration, must be yes or no")
462	    end,
463	    {_WS, Rest4, State4} = whitespace(Rest3, State3, []),
464	    parse_xml_decl_rest(Rest4, State4#xmerl_sax_parser_state{standalone=list_to_atom(Standalone)},
465				[{"standalone",Standalone} |Acc]);
466        {Name, _Rest1, State1} ->
467	    ?fatal_error(State1, "Attribute " ++ Name ++
468			 " not allowed in xml declaration")
469    end;
470parse_xml_decl_standalone(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) ->
471    parse_xml_decl_rest(Bytes, State, Acc);
472parse_xml_decl_standalone(Bytes, State, Acc) ->
473    unicode_incomplete_check([Bytes, State, Acc, fun parse_xml_decl_standalone/3],
474			     undefined).
475
476
477
478%%----------------------------------------------------------------------
479%% Function: parse_pi(Rest, State) -> Result
480%% Input:    Rest = string() | binary()
481%%           State = #xmerl_sax_parser_state{}
482%% Output:   Result = {Rest, State}
483%% Description: Parse processing instructions.
484%%              [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
485%%              [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
486%%----------------------------------------------------------------------
487parse_pi(?STRING_EMPTY, State) ->
488    cf(?STRING_EMPTY, State, fun parse_pi/2);
489parse_pi(?STRING_UNBOUND_REST(C, Rest) = Bytes, State) ->
490    case is_name_start(C) of
491	true ->
492	    {PiTarget, Rest1, State1} =
493		parse_name(Rest, State, [C]),
494	    case string:to_lower(PiTarget) of
495		"xml" ->
496		    case check_if_new_doc_allowed(State#xmerl_sax_parser_state.input_type,
497						  State#xmerl_sax_parser_state.end_tags) of
498			true ->
499			    {endDocument, Bytes, State};
500			false ->
501			    ?fatal_error(State1, "<?xml  ...?> not first in document")
502		    end;
503		_ ->
504		    {PiData, Rest2, State2} = parse_pi_1(Rest1, State1),
505		    State3 =  event_callback({processingInstruction, PiTarget, PiData}, State2),
506		    {Rest2, State3}
507	    end;
508	false ->
509	    ?fatal_error(State, "expecting name")
510    end;
511parse_pi(Bytes, State) ->
512    unicode_incomplete_check([Bytes, State, fun parse_pi/2], undefined).
513
514check_if_new_doc_allowed(stream, []) ->
515    true;
516check_if_new_doc_allowed(_, _) ->
517    false.
518
519check_if_rest_ok(file, []) ->
520    true;
521check_if_rest_ok(file, <<>>) ->
522    true;
523check_if_rest_ok(stream, _) ->
524    true;
525check_if_rest_ok(_, _) ->
526    false.
527
528
529%%----------------------------------------------------------------------
530%% Function: parse_pi_1(Rest, State) -> Result
531%% Input:    Rest = string() | binary()
532%%           State = #xmerl_sax_parser_state{}
533%% Output:   Result = {Rest, State}
534%% Description: Parse processing instructions.
535%%----------------------------------------------------------------------
536parse_pi_1(?STRING_EMPTY, State) ->
537    cf(?STRING_EMPTY, State, fun parse_pi_1/2);
538parse_pi_1(?STRING_UNBOUND_REST(C,_) = Rest, State) when ?is_whitespace(C) ->
539    {_WS, Rest1, State1} =
540		whitespace(Rest, State, []),
541    parse_pi_data(Rest1, State1, []);
542parse_pi_1(?STRING_REST("?>", Rest), State) ->
543    {[], Rest, State};
544parse_pi_1(Bytes, State) ->
545    unicode_incomplete_check([Bytes, State, fun parse_pi/2],
546			     "expecting whitespace or '?>'").
547
548
549%%----------------------------------------------------------------------
550%% Function: parse_name(Rest, State, Acc) -> Result
551%% Input:    Rest = string() | binary()
552%%           State = #xmerl_sax_parser_state{}
553%%           Acc = string()
554%% Output:   Result = {Name, Rest, State}
555%%           Name = string()
556%% Description: Parse a name. Next character is put in the accumulator
557%%              if it's a valid name character.
558%%              [5] Name ::= (Letter | '_' | ':') (NameChar)*
559%%----------------------------------------------------------------------
560parse_name(?STRING_EMPTY, State, Acc) ->
561    cf(?STRING_EMPTY, State, Acc, fun parse_name/3);
562parse_name(?STRING_UNBOUND_REST(C, Rest) = Bytes, State, Acc) ->
563    case is_name_char(C) of
564	true ->
565	    parse_name(Rest, State, [C|Acc]);
566	false ->
567	    {lists:reverse(Acc), Bytes, State}
568    end;
569parse_name(Bytes, State, Acc) ->
570    unicode_incomplete_check([Bytes, State, Acc, fun parse_name/3], undefined).
571
572
573%%----------------------------------------------------------------------
574%% Function: parse_ns_name(Rest, State, Prefix, Name) -> Result
575%% Input:    Rest = string() | binary()
576%%           State = #xmerl_sax_parser_state{}
577%%           Prefix = string()
578%%           Name = string()
579%% Output:   Result = {{Prefix, Name}, Rest, State}
580%%           Name = string()
581%% Description: Parse a namespace name. Next character is put in the
582%%              accumulator if it's a valid name character.
583%%              The difference between this function and parse_name/3 is
584%%              that a colon is interpreted as a separator between the
585%%              namespace prefix and the name.
586%%----------------------------------------------------------------------
587parse_ns_name(?STRING_EMPTY, State, Prefix, Name) ->
588    cf(?STRING_EMPTY, State, Prefix, Name, fun parse_ns_name/4);
589parse_ns_name(?STRING_UNBOUND_REST($:, Rest), State, [], Name) ->
590    parse_ns_name(Rest, State, lists:reverse(Name), []);
591parse_ns_name(?STRING_UNBOUND_REST(C, Rest) = Bytes, State, Prefix, Name) ->
592    case is_name_char(C) of
593	true ->
594	    parse_ns_name(Rest, State, Prefix, [C|Name]);
595	false ->
596	    {{Prefix,lists:reverse(Name)}, Bytes, State}
597    end;
598parse_ns_name(Bytes, State, Prefix, Name) ->
599    unicode_incomplete_check([Bytes, State, Prefix, Name, fun parse_ns_name/4],
600			     undefined).
601
602
603%%----------------------------------------------------------------------
604%% Function: parse_pi_data(Rest, State, Acc) -> Result
605%% Input:    Rest = string() | binary()
606%%           State = #xmerl_sax_parser_state{}
607%%           Acc = string()
608%% Output:   Result = {PiData, Rest, State}
609%%           PiData = string()
610%% Description: Parse the data part of the processing instruction.
611%%              If next character is valid it's put in the accumulator.
612%%----------------------------------------------------------------------
613parse_pi_data(?STRING_EMPTY, State, Acc) ->
614    cf(?STRING_EMPTY, State, Acc, fun parse_pi_data/3);
615parse_pi_data(?STRING("?") = Bytes, State, Acc) ->
616    cf(Bytes, State, Acc, fun parse_pi_data/3);
617parse_pi_data(?STRING("\r") = Bytes, State, Acc) ->
618    cf(Bytes, State, Acc, fun parse_pi_data/3);
619parse_pi_data(?STRING_REST("?>", Rest), State, Acc) ->
620    {lists:reverse(Acc), Rest, State};
621parse_pi_data(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) ->
622    parse_pi_data(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]);
623parse_pi_data(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) ->
624    parse_pi_data(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]);
625parse_pi_data(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) ->
626    parse_pi_data(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]);
627parse_pi_data(?STRING_UNBOUND_REST(C, Rest), State, Acc) when ?is_char(C)->
628    parse_pi_data(Rest, State, [C|Acc]);
629parse_pi_data(Bytes, State, Acc) ->
630    unicode_incomplete_check([Bytes, State, Acc, fun parse_pi_data/3],
631			     "not an character").
632
633
634%%----------------------------------------------------------------------
635%% Function: parse_cdata(Rest, State) -> Result
636%% Input:    Rest = string() | binary()
637%%           State = #xmerl_sax_parser_state{}
638%% Output:   Result = {Rest, State}
639%% Description: Start the parsing of a CDATA block.
640%%              [18] CDSect ::= CDStart CData CDEnd
641%%              [19] CDStart ::= '<![CDATA['
642%%              [20] CData ::= (Char* - (Char* ']]>' Char*))
643%%              [21] CDEnd ::= ']]>'
644%%----------------------------------------------------------------------
645parse_cdata(?STRING_EMPTY, State) ->
646    cf(?STRING_EMPTY, State, fun parse_cdata/2);
647parse_cdata(?STRING("[") = Bytes, State) ->
648    cf(Bytes, State, fun parse_cdata/2);
649parse_cdata(?STRING("[C") = Bytes, State) ->
650    cf(Bytes, State, fun parse_cdata/2);
651parse_cdata(?STRING("[CD") = Bytes, State) ->
652    cf(Bytes, State, fun parse_cdata/2);
653parse_cdata(?STRING("[CDA") = Bytes, State) ->
654    cf(Bytes, State, fun parse_cdata/2);
655parse_cdata(?STRING("[CDAT") = Bytes, State) ->
656    cf(Bytes, State, fun parse_cdata/2);
657parse_cdata(?STRING("[CDATA") = Bytes, State) ->
658    cf(Bytes, State, fun parse_cdata/2);
659parse_cdata(?STRING_REST("[CDATA[", Rest), State) ->
660    State1 = event_callback(startCDATA, State),
661    parse_cdata(Rest, State1, []);
662parse_cdata(Bytes, State) ->
663    unicode_incomplete_check([Bytes, State, fun parse_cdata/2],
664			     "expecting comment or CDATA").
665
666
667%%----------------------------------------------------------------------
668%% Function: parse_cdata(Rest, State, Acc) -> Result
669%% Input:    Rest = string() | binary()
670%%           State = #xmerl_sax_parser_state{}
671%%           Acc = string()
672%% Output:   Result = {Rest, State}
673%% Description: Parse a CDATA block.
674%%----------------------------------------------------------------------
675parse_cdata(?STRING_EMPTY, State, Acc) ->
676    cf(?STRING_EMPTY, State, Acc, fun parse_cdata/3);
677parse_cdata(?STRING("\r") = Bytes, State, Acc) ->
678    cf(Bytes, State, Acc, fun parse_cdata/3);
679parse_cdata(?STRING("]") = Bytes, State, Acc) ->
680    cf(Bytes, State, Acc, fun parse_cdata/3);
681parse_cdata(?STRING("]]") = Bytes, State, Acc) ->
682    cf(Bytes, State, Acc, fun parse_cdata/3);
683parse_cdata(?STRING_REST("]]>", Rest), State, Acc) ->
684    State1 = event_callback({characters, lists:reverse(Acc)}, State),
685    State2 = event_callback(endCDATA, State1),
686    parse_content(Rest, State2, [], true);
687parse_cdata(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) ->
688    parse_cdata(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]);
689parse_cdata(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) ->
690    parse_cdata(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]);
691parse_cdata(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) ->
692    parse_cdata(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]);
693parse_cdata(?STRING_UNBOUND_REST(C, Rest), State, Acc) when ?is_char(C) ->
694    parse_cdata(Rest, State, [C|Acc]);
695parse_cdata(?STRING_UNBOUND_REST(C, _), State, _) ->
696    ?fatal_error(State, "CDATA contains bad character value: " ++ [C]);
697parse_cdata(Bytes, State, Acc) ->
698    unicode_incomplete_check([Bytes, State, Acc, fun parse_cdata/3],
699			     undefined).
700
701
702%%----------------------------------------------------------------------
703%% Function: parse_comment(Rest, State, Acc) -> Result
704%% Input:    Rest = string() | binary()
705%%           State = #xmerl_sax_parser_state{}
706%%           Acc = string()
707%% Output:   Result = {Rest, State}
708%% Description: Parse a comment.
709%%              [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
710%%----------------------------------------------------------------------
711parse_comment(?STRING_EMPTY, State, Acc) ->
712    cf(?STRING_EMPTY, State, Acc, fun parse_comment/3);
713parse_comment(?STRING("\r") = Bytes, State, Acc) ->
714    cf(Bytes, State, Acc, fun parse_comment/3);
715parse_comment(?STRING("-") = Bytes, State, Acc) ->
716    cf(Bytes, State, Acc, fun parse_comment/3);
717parse_comment(?STRING("--") = Bytes, State, Acc) ->
718    cf(Bytes, State, Acc, fun parse_comment/3);
719parse_comment(?STRING_REST("-->", Rest), State, Acc) ->
720    State1 = event_callback({comment, lists:reverse(Acc)}, State),
721    {Rest, State1};
722parse_comment(?STRING_REST("--",  _), State, _) ->
723    ?fatal_error(State, "comment contains '--'");
724parse_comment(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) ->
725    parse_comment(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf|Acc]);
726parse_comment(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) ->
727    parse_comment(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf|Acc]);
728parse_comment(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) ->
729    parse_comment(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf|Acc]);
730parse_comment(?STRING_UNBOUND_REST(C, Rest), State, Acc) ->
731    if
732	?is_char(C) ->
733	    parse_comment(Rest, State, [C|Acc]);
734	true ->
735	     ?fatal_error(State, "Bad character in comment: " ++ C)
736    end;
737parse_comment(Bytes, State, Acc)   ->
738     unicode_incomplete_check([Bytes, State, Acc, fun parse_comment/3],
739			     undefined).
740
741
742%%----------------------------------------------------------------------
743%% Function: parse_misc(Rest, State, Eod) -> Result
744%% Input:    Rest = string() | binary()
745%%           State = #xmerl_sax_parser_state{}
746%%           Eod = true |false
747%% Output:   Result = {Rest, State}
748%% Description: Parse a misc clause, could be a comment, a processing
749%%              instruction or whitespace. If the input stream is empty
750%%              (Eod parameter true) then we return current state and quit.
751%%              [27] Misc ::= Comment | PI |  S
752%%----------------------------------------------------------------------
753parse_misc(?STRING_EMPTY, State, true) ->
754    {?STRING_EMPTY, State};
755parse_misc(?STRING_EMPTY, State, Eod) ->
756    cf(?STRING_EMPTY, State, Eod, fun parse_misc/3);
757parse_misc(?STRING("<") = Rest, State, Eod) ->
758    cf(Rest, State, Eod, fun parse_misc/3);
759parse_misc(?STRING_REST("<?", Rest), State, Eod) ->
760    case parse_pi(Rest, State) of
761	{Rest1, State1} ->
762	    parse_misc(Rest1, State1, Eod);
763	{endDocument, _Rest1, State1} ->
764	    IValue = ?TO_INPUT_FORMAT("<?"),
765	    {?APPEND_STRING(IValue, Rest), State1}
766    end;
767parse_misc(?STRING("<!") = Rest, State, Eod) ->
768    cf(Rest, State, Eod, fun parse_misc/3);
769parse_misc(?STRING("<!-") = Rest, State, Eod) ->
770    cf(Rest, State, Eod, fun parse_misc/3);
771parse_misc(?STRING_REST("<!--", Rest), State, Eod) ->
772    {Rest1, State1} = parse_comment(Rest, State, []),
773    parse_misc(Rest1, State1, Eod);
774parse_misc(?STRING_UNBOUND_REST(C, _) = Rest, State, Eod) when ?is_whitespace(C) ->
775    {_WS, Rest1, State1} = whitespace(Rest, State, []),
776    parse_misc(Rest1, State1, Eod);
777parse_misc(Rest, State, _Eod) ->
778    {Rest, State}.
779%%    unicode_incomplete_check([Bytes, State, Eod, fun parse_misc/3],
780%%			     "expecting comment or PI").
781
782%%----------------------------------------------------------------------
783%% Function: parse_stag(Rest, State) -> Result
784%% Input:    Rest = string() | binary()
785%%           State = #xmerl_sax_parser_state{}
786%% Output:   Result = {Rest, State}
787%% Description: Parsing a start tag.
788%%              [40] STag ::= '<' Name (S Attribute)* S? '>'
789%%----------------------------------------------------------------------
790parse_stag(?STRING_EMPTY, State) ->
791    cf(?STRING_EMPTY, State, fun parse_stag/2);
792parse_stag(?STRING_UNBOUND_REST(C, Rest), State) ->
793    case is_name_start(C) of
794	true ->
795	    {TagName, Rest1, State1} =
796		parse_ns_name(Rest, State, [], [C]),
797	    parse_attributes(Rest1, State1, {TagName, [], []});
798	false ->
799	    ?fatal_error(State, "expecting name")
800    end;
801parse_stag(Bytes, State) ->
802    unicode_incomplete_check([Bytes, State, fun parse_stag/2],
803			      undefined).
804
805%%----------------------------------------------------------------------
806%% Function: parse_attributes(Rest, State, CurrentTag) -> Result
807%% Input:    Rest = string() | binary()
808%%           State = #xmerl_sax_parser_state{}
809%%           CurrentTag = {Name, AttList, NewNsList}
810%%           Name = string()
811%%           AttList = [{Name, Value}]
812%%           NewNsList = [{Name, Value}]
813%% Output:   Result = {Rest, State}
814%% Description: Parsing the attribute list in the start tag. The current
815%%              tag tuple contains the tag name, a list of attributes
816%%              (exclusive NS attributes) and a list of new NS attributes.
817%%              [41] Attribute ::= Name Eq AttValue
818%%----------------------------------------------------------------------
819parse_attributes(?STRING_EMPTY, State, CurrentTag) ->
820    cf(?STRING_EMPTY, State, CurrentTag, fun parse_attributes/3);
821parse_attributes(?STRING("/") = Bytes, State, CurrentTag) ->
822    cf(Bytes, State, CurrentTag, fun parse_attributes/3);
823parse_attributes(?STRING_REST("/>", Rest), State, {Tag, AttList, NewNsList}) ->
824    CompleteNsList =  NewNsList ++ State#xmerl_sax_parser_state.ns,
825    {Uri, LocalName, QName, Attributes} = fix_ns(Tag, AttList, CompleteNsList),
826    State1 =  send_start_prefix_mapping_event(lists:reverse(NewNsList), State),
827    State2 =  event_callback({startElement, Uri, LocalName, QName, Attributes}, State1),
828    State3 =  event_callback({endElement, Uri, LocalName, QName}, State2),
829    State4 =  send_end_prefix_mapping_event(NewNsList, State3),
830    parse_content(Rest, State4, [], true);
831parse_attributes(?STRING_REST(">", Rest), #xmerl_sax_parser_state{end_tags=ETags, ns = OldNsList} = State,
832		 {Tag, AttList, NewNsList}) ->
833    CompleteNsList =  NewNsList ++ OldNsList,
834    {Uri, LocalName, QName, Attributes} = fix_ns(Tag, AttList, CompleteNsList),
835    State1 =  send_start_prefix_mapping_event(lists:reverse(NewNsList), State),
836    State2 =  event_callback({startElement, Uri, LocalName, QName, Attributes}, State1),
837    parse_content(Rest, State2#xmerl_sax_parser_state{end_tags=[{Tag, Uri, LocalName, QName,
838							  OldNsList, NewNsList} |ETags],
839					       ns = CompleteNsList},
840		  [], true);
841parse_attributes(?STRING_UNBOUND_REST(C, _) = Rest, State, CurrentTag) when ?is_whitespace(C) ->
842    {_WS, Rest1, State1} = whitespace(Rest, State, []),
843    parse_attributes(Rest1, State1, CurrentTag);
844parse_attributes(?STRING_UNBOUND_REST(C, Rest), State, {Tag, AttList, NsList}) ->
845    case is_name_start(C) of
846	true ->
847	    {AttrName, Rest1, State1} =
848		parse_ns_name(Rest, State, [], [C]),
849	    {Rest2, State2} = parse_eq(Rest1, State1),
850	    {AttValue, Rest3, State3} = parse_att_value(Rest2, State2),
851	    case AttrName of
852		{"xmlns", NsName} ->
853		    parse_attributes(Rest3, State3, {Tag, AttList, [{NsName, AttValue} |NsList]});
854		{"", "xmlns"} ->
855		    parse_attributes(Rest3, State3, {Tag, AttList, [{"", AttValue} |NsList]});
856		{_Prefix, _LocalName} ->
857		    case lists:keyfind(AttrName, 1, AttList) of
858			false ->
859			    parse_attributes(Rest3, State3, {Tag, [{AttrName, AttValue}|AttList], NsList});
860			_ ->
861			    ElName =
862				case Tag of
863				    {"", N} -> N;
864				    {Ns, N} -> Ns ++ ":" ++ N
865				end,
866			    ?fatal_error(State,  "Attribute exist more than once in element: " ++ ElName)
867		    end
868	    end;
869	false ->
870	    ?fatal_error(State,  "Invalid start character in attribute name: " ++ [C])
871    end;
872parse_attributes(Bytes, State, CurrentTag) ->
873    unicode_incomplete_check([Bytes, State, CurrentTag, fun parse_attributes/3],
874			      "expecting name, whitespace, /> or >").
875
876
877
878%%----------------------------------------------------------------------
879%% Function: fix_ns({Prefix, Name}, Attributes, Ns) -> Result
880%% Input:    Prefix = string()
881%%           Name = string()
882%%           Attributes = [{Name, Value}]
883%%           Ns = [{Prefix, Uri}]
884%%           Uri = string()
885%% Output:   Result = {Uri, Name, QualifiedName, Attributes}
886%%           QualifiedName = string()
887%% Description: Fix the name space prefixing for the attributes and start tag.
888%%----------------------------------------------------------------------
889% fix_ns({"", Name}, Attributes, Ns) ->
890%     Attributes2 = fix_attributes_ns(Attributes, Ns, []),
891%     {"", Name, Name, Attributes2};
892fix_ns({Prefix, Name}, Attributes, Ns) ->
893    Uri =
894	case lists:keysearch(Prefix, 1, Ns) of
895	    {value, {Prefix, U}} ->
896		U;
897	    false ->
898		""
899	end,
900    Attributes2 = fix_attributes_ns(Attributes, Ns, []),
901
902    {Uri, Name, {Prefix, Name}, Attributes2}.
903
904%%----------------------------------------------------------------------
905%% Function: fix_attributes_ns(Attributes, Ns, Acc) -> Result
906%% Input:    Attributes = [{{Prefix, Name}, Value}]
907%%           Prefix = string()
908%%           Name = string()
909%%           Value = string()
910%%           Ns = [{Prefix, Uri}]
911%%           Uri = string()
912%% Output:   Result = [{Uri, Name, Value}]
913%% Description: Fix the name spaces for the attributes.
914%%----------------------------------------------------------------------
915fix_attributes_ns([], _, Acc) ->
916    Acc;
917fix_attributes_ns([{{"", Name}, AttrValue} | Attrs], Ns, Acc) ->
918    fix_attributes_ns(Attrs, Ns, [{"", "", Name, AttrValue} |Acc]);
919fix_attributes_ns([{{Prefix, Name}, AttrValue} | Attrs], Ns, Acc) ->
920    Uri =
921	case lists:keysearch(Prefix, 1, Ns) of
922	    {value, {Prefix, U}} ->
923		U;
924	    false ->
925		""
926	end,
927    fix_attributes_ns(Attrs, Ns, [{Uri, Prefix, Name, AttrValue} |Acc]).
928
929
930%%----------------------------------------------------------------------
931%% Function: send_start_prefix_mapping_event(Ns, State) -> Result
932%% Input:    Ns = [{Prefix, Uri}]
933%%           Prefix = string()
934%%           Uri = string()
935%%           State = #xmerl_sax_parser_state{}
936%% Output:   Result = #xmerl_sax_parser_state{}
937%% Description: Loops over a name space list and sends startPrefixMapping events.
938%%----------------------------------------------------------------------
939send_start_prefix_mapping_event([], State) ->
940    State;
941send_start_prefix_mapping_event([{Prefix, Uri} |Ns], State) ->
942    State1 = event_callback({startPrefixMapping, Prefix, Uri}, State),
943    send_start_prefix_mapping_event(Ns, State1).
944
945
946%%----------------------------------------------------------------------
947%% Function: send_end_prefix_mapping_event(Ns, State) -> Result
948%% Input:    Ns = [{Prefix, Uri}]
949%%           Prefix = string()
950%%           Uri = string()
951%%           State = #xmerl_sax_parser_state{}
952%% Output:   Result = #xmerl_sax_parser_state{}
953%% Description: Loops over a name space list and sends endPrefixMapping events.
954%%----------------------------------------------------------------------
955send_end_prefix_mapping_event([], State) ->
956    State;
957send_end_prefix_mapping_event([{Prefix, _Uri} |Ns], State) ->
958    State1 = event_callback({endPrefixMapping, Prefix}, State),
959    send_end_prefix_mapping_event(Ns, State1).
960
961
962%%----------------------------------------------------------------------
963%% Function: parse_eq(Rest, State) -> Result
964%% Input:    Rest = string() | binary()
965%%           State = #xmerl_sax_parser_state{}
966%% Output:   Result = {Rest, State}
967%% Description: Parsing an '=' from the stream.
968%%              [25] Eq ::= S? '=' S?
969%%----------------------------------------------------------------------
970parse_eq(?STRING_EMPTY, State) ->
971    cf(?STRING_EMPTY, State, fun parse_eq/2);
972parse_eq(?STRING_REST("=", Rest), State) ->
973    {Rest, State};
974parse_eq(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
975    {_WS, Rest, State1} =
976        whitespace(Bytes, State, []),
977    parse_eq(Rest, State1);
978parse_eq(Bytes, State) ->
979    unicode_incomplete_check([Bytes, State, fun parse_eq/2],
980			     "expecting = or whitespace").
981
982
983%%----------------------------------------------------------------------
984%% Function: parse_att_value(Rest, State) -> Result
985%% Input:    Rest = string() | binary()
986%%           State = #xmerl_sax_parser_state{}
987%% Output:   Result = {Rest, State}
988%% Description: Start the parsing of an attribute value by checking the delimiter
989%%              [10] AttValue ::= '"' ([^<&"] | Reference)* '"'
990%%              	       |  "'" ([^<&'] | Reference)* "'"
991%%----------------------------------------------------------------------
992parse_att_value(?STRING_EMPTY, State) ->
993    cf(?STRING_EMPTY, State, fun parse_att_value/2);
994parse_att_value(?STRING_UNBOUND_REST(C, Rest), State)  when C == $'; C == $"  ->
995    parse_att_value(Rest, State, C, []);
996parse_att_value(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
997    {_WS, Rest, State1} =
998        whitespace(Bytes, State, []),
999    parse_att_value(Rest, State1);
1000parse_att_value(Bytes, State) ->
1001    unicode_incomplete_check([Bytes, State, fun parse_att_value/2],
1002			     "\', \" or whitespace expected").
1003
1004
1005%%----------------------------------------------------------------------
1006%% Function  : parse_att_value(Rest, State, Stop, Acc) -> Result
1007%% Parameters: Rest = string() | binary()
1008%%             State = #xmerl_sax_parser_state{}
1009%%             Stop = $' | $"
1010%%             Acc = string()
1011%% Result    : {Value, Rest, State}
1012%%             Value = string()
1013%% Description: Parse an attribute value
1014%%----------------------------------------------------------------------
1015parse_att_value(?STRING_EMPTY, State, undefined, Acc) ->
1016    {Acc, [], State}; %% stop clause when parsing references
1017parse_att_value(?STRING_EMPTY, State, Stop, Acc) ->
1018    cf(?STRING_EMPTY, State, Stop, Acc, fun parse_att_value/4);
1019parse_att_value(?STRING("\r") = Bytes, State, Stop, Acc) ->
1020    cf(Bytes, State, Stop, Acc, fun parse_att_value/4);
1021parse_att_value(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) ->
1022    parse_att_value(Rest,
1023		    State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
1024parse_att_value(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) ->
1025    parse_att_value(Rest,
1026		    State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
1027parse_att_value(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc)  ->
1028    parse_att_value(Rest,
1029		    State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
1030parse_att_value(?STRING_REST("\t", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc)  ->
1031    parse_att_value(Rest,
1032		    State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
1033parse_att_value(?STRING_REST("&", Rest), State, Stop, Acc)  ->
1034    {Ref, Rest1, State1} = parse_reference(Rest, State, true),
1035    case Ref of
1036	{character, _, CharValue}  ->
1037	    parse_att_value(Rest1, State1, Stop, [CharValue | Acc]);
1038	{internal_general, true, _, Value} ->
1039	    parse_att_value(Rest1, State1, Stop, Value ++ Acc);
1040	{internal_general, false, _, Value} ->
1041	    {ParsedValue, [], State2} = parse_att_value(?TO_INPUT_FORMAT(Value), State1, undefined, []),
1042	    parse_att_value(Rest1, State2, Stop, ParsedValue ++ Acc);
1043	{external_general, Name, _} ->
1044	    ?fatal_error(State1, "External parsed entity reference in attribute value: " ++ Name);
1045	{not_found, Name} ->
1046	    case State#xmerl_sax_parser_state.skip_external_dtd of
1047		false ->
1048		    ?fatal_error(State1, "Entity not declared: " ++ Name); %%VC: Entity Declared
1049		true ->
1050		    parse_att_value(Rest1, State1, Stop, ";" ++ lists:reverse(Name) ++ "&" ++ Acc)
1051	    end;
1052	{unparsed, Name, _}  ->
1053	    ?fatal_error(State1, "Unparsed entity reference in  attribute value: " ++ Name)
1054    end;
1055parse_att_value(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) ->
1056    {lists:reverse(Acc), Rest, State};
1057parse_att_value(?STRING_UNBOUND_REST($<, _Rest), State, _Stop, _Acc)   ->
1058    ?fatal_error(State,  "< not allowed in attribute value");
1059parse_att_value(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc)   ->
1060    if
1061	?is_char(C) ->
1062	    parse_att_value(Rest, State, Stop, [C|Acc]);
1063	true ->
1064	     ?fatal_error(State, lists:flatten(io_lib:format("Bad character in attribute value: ~p", [C])))
1065    end;
1066parse_att_value(Bytes, State, Stop, Acc)   ->
1067    unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_att_value/4],
1068			     undefined).
1069
1070
1071%%----------------------------------------------------------------------
1072%% Function  : parse_etag(Rest, State) -> Result
1073%% Parameters: Rest = string() | binary()
1074%%             State = #xmerl_sax_parser_state{}
1075%% Result    : {Rest, State}
1076%% Description: Parse the end tag
1077%%              [42] ETag ::= '</' Name S? '>'
1078%%----------------------------------------------------------------------
1079parse_etag(?STRING_EMPTY, State) ->
1080    cf(?STRING_EMPTY, State, fun parse_etag/2);
1081parse_etag(?STRING_UNBOUND_REST(C, Rest),
1082	   #xmerl_sax_parser_state{end_tags=[{ETag, _Uri, _LocalName, _QName, _OldNsList, _NewNsList}
1083				      |_RestOfETags]} = State) ->
1084    case is_name_start(C) of
1085	true ->
1086	    {Tag, Rest1, State1} = parse_ns_name(Rest, State, [], [C]),
1087	    case Tag == ETag of
1088		true ->
1089		    {_WS, Rest2, State2} = whitespace(Rest1, State1, []),
1090		    parse_etag_1(Rest2, State2, Tag);
1091		false ->
1092		    case State1#xmerl_sax_parser_state.match_end_tags of
1093			true ->
1094			    {P,TN} = Tag,
1095			    ?fatal_error(State1, "EndTag: " ++ P ++ ":" ++ TN ++
1096					 ", does not match StartTag");
1097			false ->
1098			    {_WS, Rest2, State2} = whitespace(Rest1, State1, []),
1099			    parse_etag_1(Rest2, State2, Tag)
1100		    end
1101	    end;
1102	false ->
1103	    ?fatal_error(State, "Name expected")
1104    end;
1105parse_etag(?STRING_UNBOUND_REST(_C, _) = Rest, #xmerl_sax_parser_state{end_tags=[]}= State) ->
1106    {Rest, State};
1107parse_etag(Bytes, State) ->
1108    unicode_incomplete_check([Bytes, State, fun parse_etag/2],
1109			     undefined).
1110
1111parse_etag_1(?STRING_REST(">", Rest),
1112	     #xmerl_sax_parser_state{end_tags=[{_ETag, Uri, LocalName, QName, OldNsList, NewNsList}
1113					|RestOfETags],
1114                                    input_type=InputType} = State, _Tag) ->
1115    State1 =  event_callback({endElement, Uri, LocalName, QName}, State),
1116    State2 =  send_end_prefix_mapping_event(NewNsList, State1),
1117    case check_if_new_doc_allowed(InputType, RestOfETags) of
1118        true ->
1119            throw({endDocument, Rest, State2#xmerl_sax_parser_state{ns = OldNsList}});
1120        false ->
1121            parse_content(Rest,
1122                          State2#xmerl_sax_parser_state{end_tags=RestOfETags,
1123                                                        ns = OldNsList},
1124                          [], true)
1125    end;
1126parse_etag_1(?STRING_UNBOUND_REST(_C, _), State, Tag) ->
1127    {P,TN} = Tag,
1128    ?fatal_error(State, "Bad EndTag: " ++ P ++ ":" ++ TN);
1129parse_etag_1(Bytes, State, Tag) ->
1130    unicode_incomplete_check([Bytes, State, Tag, fun parse_etag_1/3],
1131			     undefined).
1132
1133%%----------------------------------------------------------------------
1134%% Function: parse_content(Rest, State, Acc, IgnorableWS) -> Result
1135%% Parameters: Rest = string() | binary()
1136%%             State = #xmerl_sax_parser_state{}
1137%%             Acc = string()
1138%%             IgnorableWS = true | false
1139%% Result    : {Rest, State}
1140%% Description: Parsing the content part of tags
1141%%              [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1142%%----------------------------------------------------------------------
1143parse_content(?STRING_EMPTY, State, Acc, IgnorableWS) ->
1144    case check_if_document_complete(State, "No more bytes") of
1145	true ->
1146	    State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
1147	    {?STRING_EMPTY, State1};
1148	false ->
1149	    case catch cf(?STRING_EMPTY, State, Acc, IgnorableWS, fun parse_content/4) of
1150		{Rest, State1} when is_record(State1, xmerl_sax_parser_state) ->
1151		    {Rest, State1};
1152		{fatal_error, {State1, Msg}} ->
1153		    case check_if_document_complete(State1, Msg) of
1154			true ->
1155			    State2 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State1),
1156			    {?STRING_EMPTY, State2};
1157			false ->
1158			    ?fatal_error(State1, Msg)
1159		    end;
1160		Other ->
1161		    throw(Other)
1162	    end
1163    end;
1164parse_content(?STRING("\r") = Bytes, State, Acc, IgnorableWS) ->
1165    cf(Bytes, State, Acc, IgnorableWS, fun parse_content/4);
1166parse_content(?STRING("<") = Bytes, State, Acc, IgnorableWS) ->
1167    cf(Bytes, State, Acc, IgnorableWS, fun parse_content/4);
1168parse_content(?STRING_REST("</", Rest), State, Acc, IgnorableWS) ->
1169    State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
1170    parse_etag(Rest, State1);
1171parse_content(?STRING("<!") = Bytes, State, _Acc, IgnorableWS) ->
1172    cf(Bytes, State, [], IgnorableWS, fun parse_content/4);
1173parse_content(?STRING("<!-") = Bytes, State, _Acc, IgnorableWS) ->
1174    cf(Bytes, State, [], IgnorableWS, fun parse_content/4);
1175parse_content(?STRING_REST("<!--", Rest), State, Acc, IgnorableWS) ->
1176    State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
1177    {Rest1, State2} = parse_comment(Rest, State1, []),
1178    parse_content(Rest1, State2, [], true);
1179parse_content(?STRING_REST("<?", Rest), State, Acc, IgnorableWS) ->
1180    State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
1181    case parse_pi(Rest, State1) of
1182	{Rest1, State2} ->
1183	    parse_content(Rest1, State2, [], true);
1184	{endDocument, _Rest1, State2} ->
1185	    IValue = ?TO_INPUT_FORMAT("<?"),
1186	    {?APPEND_STRING(IValue, Rest), State2}
1187    end;
1188parse_content(?STRING_REST("<!", Rest1) = Rest, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) ->
1189    case ET of
1190	[] ->
1191	    {Rest, State}; %% Skicka ignorable WS ???
1192	_ ->
1193	    State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
1194	    parse_cdata(Rest1, State1)
1195    end;
1196parse_content(?STRING_REST("<", Rest1) = Rest, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) ->
1197    case ET of
1198	[] ->
1199	    {Rest, State}; %% Skicka ignorable WS ???
1200	_ ->
1201	    State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
1202	    parse_stag(Rest1, State1)
1203    end;
1204parse_content(?STRING_REST("\n", Rest), State, Acc, IgnorableWS) ->
1205    N = State#xmerl_sax_parser_state.line_no,
1206    parse_content(Rest, State#xmerl_sax_parser_state{line_no=N+1},[?lf |Acc], IgnorableWS);
1207parse_content(?STRING_REST("\r\n", Rest), State, Acc, IgnorableWS) ->
1208    N = State#xmerl_sax_parser_state.line_no,
1209    parse_content(Rest, State#xmerl_sax_parser_state{line_no=N+1},[?lf |Acc], IgnorableWS);
1210parse_content(?STRING_REST("\r", Rest), State, Acc, IgnorableWS) ->
1211    N = State#xmerl_sax_parser_state.line_no,
1212    parse_content(Rest, State#xmerl_sax_parser_state{line_no=N+1},[?lf |Acc], IgnorableWS);
1213parse_content(?STRING_REST(" ", Rest), State, Acc, IgnorableWS) ->
1214    parse_content(Rest, State,[?space |Acc], IgnorableWS);
1215parse_content(?STRING_REST("\t", Rest), State, Acc, IgnorableWS) ->
1216    parse_content(Rest, State,[?tab |Acc], IgnorableWS);
1217parse_content(?STRING_REST("]]>", _Rest), State, _Acc, _IgnorableWS) ->
1218    ?fatal_error(State, "\"]]>\" is not allowed in content");
1219parse_content(?STRING_UNBOUND_REST(_C, _) = Rest,
1220	      #xmerl_sax_parser_state{end_tags = []} = State,
1221	      _Acc, _IgnorableWS) ->
1222    {Rest, State};
1223parse_content(?STRING_REST("&", Rest), State, Acc, _IgnorableWS) ->
1224    {Ref, Rest1, State1} = parse_reference(Rest, State, true),
1225    case Ref of
1226	{character, _, CharValue}  ->
1227	    parse_content(Rest1, State1, [CharValue | Acc], false);
1228	{internal_general, true, _, Value} ->
1229	    parse_content(Rest1, State1, Value ++ Acc, false);
1230	{internal_general, false, _, Value} ->
1231	    IValue = ?TO_INPUT_FORMAT(Value),
1232	    parse_content(?APPEND_STRING(IValue, Rest1), State1, Acc, false);
1233	{external_general, _, {PubId, SysId}} ->
1234	    State2 = parse_external_entity(State1, PubId, SysId),
1235	    parse_content(Rest1, State2, Acc, false);
1236	{not_found, Name} ->
1237	    case State#xmerl_sax_parser_state.skip_external_dtd of
1238		false ->
1239		    ?fatal_error(State1, "Entity not declared: " ++ Name); %%VC: Entity Declared
1240		true ->
1241		    parse_content(Rest1, State1, ";" ++ lists:reverse(Name) ++ "&" ++ Acc, false)
1242	    end;
1243	{unparsed, Name, _}  ->
1244	    ?fatal_error(State1, "Unparsed entity reference in content: " ++ Name)
1245    end;
1246parse_content(?STRING_UNBOUND_REST(C, Rest), State, Acc, _IgnorableWS) ->
1247    if
1248	?is_char(C) ->
1249	    parse_content(Rest, State, [C|Acc], false);
1250	true ->
1251	     ?fatal_error(State, lists:flatten(io_lib:format("Bad character in content: ~p", [C])))
1252    end;
1253parse_content(Bytes, State, Acc, IgnorableWS)   ->
1254    unicode_incomplete_check([Bytes, State, Acc, IgnorableWS, fun parse_content/4],
1255			     undefined).
1256
1257
1258%%----------------------------------------------------------------------
1259%% Function: check_if_document_complete(State, ErrorMsg) -> Result
1260%% Parameters: State = #xmerl_sax_parser_state{}
1261%%             ErrorMsg = string()
1262%% Result    : boolean()
1263%% Description: Checks that the document is complete if we don't have more data..
1264%%----------------------------------------------------------------------
1265check_if_document_complete(#xmerl_sax_parser_state{end_tags = []},
1266			    "No more bytes") ->
1267    true;
1268check_if_document_complete(#xmerl_sax_parser_state{end_tags = []},
1269			    "Continuation function undefined") ->
1270    true;
1271check_if_document_complete(_, _) ->
1272    false.
1273
1274%%----------------------------------------------------------------------
1275%% Function: send_character_event(Length, IgnorableWS, String, State) -> Result
1276%% Parameters: Length = integer()
1277%%             IgnorableWS = true | false
1278%%             String = string()
1279%%             State = #xmerl_sax_parser_state{}
1280%% Result    : #xmerl_sax_parser_state{}
1281%% Description: Sends the correct type of character event depending on if
1282%%              it's whitespaces that can be ignored or not.
1283%%----------------------------------------------------------------------
1284send_character_event(0, _, _, State) ->
1285    State;
1286send_character_event(_, false, String, State) ->
1287    event_callback({characters, String}, State);
1288send_character_event(_, true, String, State) ->
1289    event_callback({ignorableWhitespace, String}, State).
1290
1291
1292%%----------------------------------------------------------------------
1293%% Function: whitespace(Rest, State, Acc) -> Result
1294%% Parameters: Rest = string() | binary()
1295%%             State = #xmerl_sax_parser_state{}
1296%%             Acc = string()
1297%% Result    : {Rest, State}
1298%% Description: Parse whitespaces.
1299%%              [3] S ::= (#x20 | #x9 | #xD | #xA)+
1300%%----------------------------------------------------------------------
1301whitespace(?STRING_EMPTY, State, Acc) ->
1302    case cf(?STRING_EMPTY, State, Acc, fun whitespace/3) of
1303	{?STRING_EMPTY, State} ->
1304	    {lists:reverse(Acc), ?STRING_EMPTY, State};
1305	Ret ->
1306	    Ret
1307    end;
1308whitespace(?STRING("\r") = Bytes, State, Acc) ->
1309    case cf(Bytes, State, Acc, fun whitespace/3) of
1310	{?STRING("\r") = Bytes, State} ->
1311	    {lists:reverse(Acc), Bytes, State};
1312	Ret ->
1313	    Ret
1314    end;
1315whitespace(?STRING_REST("\n", Rest), State, Acc) ->
1316    N = State#xmerl_sax_parser_state.line_no,
1317    whitespace(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]);
1318whitespace(?STRING_REST("\r\n", Rest), State, Acc) ->
1319    N = State#xmerl_sax_parser_state.line_no,
1320    whitespace(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]);
1321whitespace(?STRING_REST("\r", Rest), State, Acc) ->
1322    N = State#xmerl_sax_parser_state.line_no,
1323    whitespace(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]);
1324whitespace(?STRING_UNBOUND_REST(C, Rest), State, Acc) when ?is_whitespace(C) ->
1325    whitespace(Rest, State, [C|Acc]);
1326?WHITESPACE(Bytes, State, Acc).
1327
1328%%----------------------------------------------------------------------
1329%% Function: parse_reference(Rest, State, HaveToExist) -> Result
1330%% Parameters: Rest = string() | binary()
1331%%             State = #xmerl_sax_parser_state{}
1332%% Result    : {Value, Rest, State}
1333%% Description: Parse entity references.
1334%%              [66] CharRef ::= '&#' [0-9]+ ';'
1335%%              	       | '&#x' [0-9a-fA-F]+ ';'
1336%%              [67] Reference ::= EntityRef | CharRef
1337%%              [68] EntityRef ::= '&' Name ';'
1338%%----------------------------------------------------------------------
1339parse_reference(?STRING_EMPTY, State, HaveToExist) ->
1340    cf(?STRING_EMPTY, State, HaveToExist, fun parse_reference/3);
1341parse_reference(?STRING("#") = Bytes, State, HaveToExist) ->
1342    cf(Bytes, State, HaveToExist, fun parse_reference/3);
1343parse_reference(?STRING_REST("#x", Rest), State, _HaveToExist) ->
1344    {CharValue, RefString, Rest1, State1} = parse_hex(Rest, State, []),
1345    if
1346	?is_char(CharValue) ->
1347	    {{character, is_delimiter(CharValue), CharValue},
1348	     Rest1, State1};
1349	true ->
1350	    ?fatal_error(State1, "Not a legal character: #x" ++ RefString) %%WFC: Legal Character
1351    end;
1352parse_reference(?STRING_REST("#", Rest), State, _HaveToExist) ->
1353    {CharValue, RefString, Rest1, State1} = parse_digit(Rest, State, []),
1354    if
1355	?is_char(CharValue) ->
1356	    {{character, is_delimiter(CharValue), CharValue},
1357	     Rest1, State1};
1358	true ->
1359	    ?fatal_error(State1, "Not a legal character: #" ++ RefString)%%WFC: Legal Character
1360    end;
1361parse_reference(?STRING_UNBOUND_REST(C, Rest), State, HaveToExist) ->
1362    case is_name_start(C) of
1363	true ->
1364	    {Name, Rest1, State1} = parse_name(Rest, State, [C]),
1365	    parse_reference_1(Rest1, State1, HaveToExist, Name);
1366	false ->
1367	    ?fatal_error(State, "name expected")
1368    end;
1369parse_reference(Bytes, State, HaveToExist) ->
1370    unicode_incomplete_check([Bytes, State, HaveToExist, fun parse_reference/3],
1371			     underfined).
1372
1373
1374parse_reference_1(?STRING_REST(";", Rest), State, HaveToExist, Name) ->
1375    case look_up_reference(Name, HaveToExist, State) of
1376	{internal_general, Name, RefValue} ->
1377	    {{internal_general, is_delimiter(RefValue), Name, RefValue},
1378	     Rest, State};
1379	Result ->
1380	    {Result, Rest, State}
1381    end;
1382parse_reference_1(Bytes, State, HaveToExist, Name) ->
1383    unicode_incomplete_check([Bytes, State, HaveToExist, Name, fun parse_reference_1/4],
1384			     "Missing semicolon after reference: " ++ Name).
1385
1386
1387
1388%%----------------------------------------------------------------------
1389%% Function: is_delimiter(Character) -> Result
1390%% Parameters: Character
1391%% Result    :
1392%%----------------------------------------------------------------------
1393is_delimiter(38) ->
1394     true;
1395is_delimiter(60) ->
1396     true;
1397is_delimiter(62) ->
1398     true;
1399is_delimiter(39) ->
1400     true;
1401is_delimiter(34) ->
1402     true;
1403is_delimiter("&") ->
1404     true;
1405is_delimiter("<") ->
1406     true;
1407is_delimiter(">") ->
1408     true;
1409is_delimiter("'") ->
1410     true;
1411is_delimiter("\"") ->
1412     true;
1413is_delimiter(_) ->
1414     false.
1415
1416%%----------------------------------------------------------------------
1417%% Function: parse_pe_reference(Rest, State) -> Result
1418%% Parameters: Rest = string() | binary()
1419%%             State = #xmerl_sax_parser_state{}
1420%%             Acc = string()
1421%% Result    : {Result, Rest, State}
1422%% Description: Parse a parameter entity reference.
1423%%              [69] PEReference ::= '%' Name ';'
1424%%----------------------------------------------------------------------
1425parse_pe_reference(?STRING_EMPTY, State) ->
1426    cf(?STRING_EMPTY, State, fun parse_pe_reference/2);
1427parse_pe_reference(?STRING_UNBOUND_REST(C, Rest), State) ->
1428    case is_name_start(C) of
1429	true ->
1430	    {Name, Rest1, State1} = parse_name(Rest, State, [C]),
1431	    parse_pe_reference_1(Rest1, State1, Name);
1432	false ->
1433	    ?fatal_error(State, "Name expected")
1434    end;
1435parse_pe_reference(Bytes, State) ->
1436    unicode_incomplete_check([Bytes, State, fun parse_pe_reference/2],
1437			     underfined).
1438
1439
1440parse_pe_reference_1(?STRING_REST(";", Rest), State, Name) ->
1441    Name1 = "%" ++ Name,
1442    Result = look_up_reference(Name1, true, State),
1443    {Result, Rest, State};
1444parse_pe_reference_1(Bytes, State, Name) ->
1445    unicode_incomplete_check([Bytes, State, Name, fun parse_pe_reference_1/3],
1446			     "missing ; after reference " ++ Name).
1447
1448
1449%%----------------------------------------------------------------------
1450%% Function: insert_reference(Name, Ref, State) -> Result
1451%% Parameters: Name = string()
1452%%             Ref = {Type, Value}
1453%%             Type = atom()
1454%%             Value = term()
1455%%             State = #xmerl_sax_parser_state{}
1456%% Result    :
1457%%----------------------------------------------------------------------
1458insert_reference(Name, Value, #xmerl_sax_parser_state{ref_table = Map} = State) ->
1459    case maps:find(Name, Map) of
1460        error ->
1461            State#xmerl_sax_parser_state{ref_table = maps:put(Name, Value, Map)};
1462	_ ->
1463	    State
1464    end.
1465
1466
1467%%----------------------------------------------------------------------
1468%% Function: look_up_reference(Reference, State) -> Result
1469%% Parameters: Reference = string()
1470%%             State = #xmerl_sax_parser_state{}
1471%% Result    :
1472%%----------------------------------------------------------------------
1473look_up_reference("amp", _, _) ->
1474    {internal_general, "amp", "&"};
1475look_up_reference("lt", _, _) ->
1476    {internal_general, "lt", "<"};
1477look_up_reference("gt", _, _) ->
1478    {internal_general, "gt", ">"};
1479look_up_reference("apos", _, _) ->
1480    {internal_general, "apos", "'"};
1481look_up_reference("quot", _, _) ->
1482    {internal_general, "quot", "\""};
1483look_up_reference(Name, HaveToExist, State) ->
1484    case maps:find(Name, State#xmerl_sax_parser_state.ref_table) of
1485	{ok, {Type, Value}} ->
1486	    {Type, Name, Value};
1487	_ ->
1488	    case HaveToExist of
1489		true ->
1490		    case State#xmerl_sax_parser_state.standalone of
1491			yes ->
1492			    ?fatal_error(State, "Entity not declared: " ++ Name); %%WFC: Entity Declared
1493			no ->
1494			    {not_found, Name}  %%VC: Entity Declared
1495		    end;
1496		false ->
1497		    {not_found, Name}
1498	    end
1499    end.
1500
1501
1502%%----------------------------------------------------------------------
1503%% Function: parse_hex(Rest, State, Acc) -> Result
1504%% Parameters: Rest = string() | binary()
1505%%             State = #xmerl_sax_parser_state{}
1506%%             Acc = string()
1507%% Result    : {Value, Reference, Rest, State}
1508%%             Value = integer()
1509%%             Reference = string()
1510%% Description: Parse a hex reference.
1511%%----------------------------------------------------------------------
1512parse_hex(?STRING_EMPTY, State, Acc) ->
1513    cf(?STRING_EMPTY, State, Acc, fun parse_hex/3);
1514parse_hex(?STRING_REST(";", Rest), State, Acc) ->
1515    RefString = lists:reverse(Acc),
1516    {erlang:list_to_integer(RefString, 16), RefString, Rest, State};
1517parse_hex(?STRING_UNBOUND_REST(C, Rest), State, Acc) when ?is_hex_digit(C) ->
1518    parse_hex(Rest, State, [C |Acc]);
1519parse_hex(Bytes, State, Acc) ->
1520    unicode_incomplete_check([Bytes, State, Acc, fun parse_hex/3],
1521			     "Bad hex value in reference: ").
1522
1523
1524%%----------------------------------------------------------------------
1525%% Function: parse_digit(Rest, State, Acc) -> Result
1526%% Parameters: Rest = string() | binary()
1527%%             State = #xmerl_sax_parser_state{}
1528%%             Acc = string()
1529%% Result    : {Value, Reference, Rest, State}
1530%%             Value = integer()
1531%%             Reference = string()
1532%% Description: Parse a decimal reference.
1533%%----------------------------------------------------------------------
1534parse_digit(?STRING_EMPTY, State, Acc) ->
1535    cf(?STRING_EMPTY, State, Acc, fun parse_digit/3);
1536parse_digit(?STRING_REST(";", Rest), State, Acc) ->
1537    RefString = lists:reverse(Acc),
1538    {list_to_integer(RefString), RefString, Rest, State};
1539parse_digit(?STRING_UNBOUND_REST(C, Rest), State, Acc) ->
1540    case is_digit(C) of
1541	true ->
1542	    parse_digit(Rest, State, [C |Acc]);
1543	false ->
1544	    ?fatal_error(State, "Character in reference not a digit: " ++ [C])
1545    end;
1546parse_digit(Bytes, State, Acc) ->
1547    unicode_incomplete_check([Bytes, State, Acc, fun parse_digit/3],
1548			     undefined).
1549
1550%%----------------------------------------------------------------------
1551%% Function: parse_system_litteral(Rest, State, Stop, Acc) -> Result
1552%% Parameters: Rest = string() | binary()
1553%%             State = #xmerl_sax_parser_state{}
1554%%             Stop = $' | $"
1555%%             Acc = string()
1556%% Result    : {Value, Reference, Rest, State}
1557%%             Value = integer()
1558%%             Reference = string()
1559%% Description: Parse a system litteral.
1560%%              [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1561%%----------------------------------------------------------------------
1562parse_system_litteral(?STRING_EMPTY, State, Stop, Acc) ->
1563    cf(?STRING_EMPTY, State, Stop, Acc, fun parse_system_litteral/4);
1564parse_system_litteral(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) ->
1565    {lists:reverse(Acc), Rest, State};
1566parse_system_litteral(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) ->
1567    parse_system_litteral(Rest, State, Stop, [C |Acc]);
1568parse_system_litteral(Bytes, State, Stop, Acc) ->
1569    unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_system_litteral/4],
1570			     undefined).
1571
1572%%----------------------------------------------------------------------
1573%% Function: parse_pubid_litteral(Rest, State, Stop, Acc) -> Result
1574%% Parameters: Rest = string() | binary()
1575%%             State = #xmerl_sax_parser_state{}
1576%%             Stop = $' | $"
1577%%             Acc = string()
1578%% Result    : {Value, Reference, Rest, State}
1579%%             Value = integer()
1580%%             Reference = string()
1581%% Description: Parse a public idlitteral.
1582%%              [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1583%%----------------------------------------------------------------------
1584parse_pubid_litteral(?STRING_EMPTY, State, Stop, Acc) ->
1585    cf(?STRING_EMPTY, State, Stop, Acc, fun parse_pubid_litteral/4);
1586parse_pubid_litteral(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) ->
1587    {lists:reverse(Acc), Rest, State};
1588parse_pubid_litteral(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) ->
1589    case is_pubid_char(C) of
1590	true ->
1591	    parse_pubid_litteral(Rest, State, Stop, [C |Acc]);
1592	false ->
1593	    ?fatal_error(State, "Character not allowed in pubid litteral: " ++ [C])
1594    end;
1595parse_pubid_litteral(Bytes, State, Stop, Acc) ->
1596    unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_pubid_litteral/4],
1597			     undefined).
1598
1599%%======================================================================
1600%% DTD Parsing
1601%%======================================================================
1602
1603%%----------------------------------------------------------------------
1604%% Function  : parse_doctype(Rest, State, Level, Acc) -> Result
1605%% Parameters: Rest = string() | binary()
1606%%             State = #xmerl_sax_parser_state{}
1607%%             Level = integer()
1608%%             Acc = string()
1609%% Result    : {string(), Rest, State}
1610%% Description: This function is just searching the end of the doctype
1611%%              declaration and doesn't parse it. It's used when the
1612%%              parse_dtd option is set to skip.
1613%%----------------------------------------------------------------------
1614%% Just returns doctype as string
1615%% parse_doctype(?STRING_EMPTY, State, Level, Acc) ->
1616%%     cf(?STRING_EMPTY, State, Level, Acc, fun parse_doctype/4);
1617%% parse_doctype(?STRING("\r"), State, Level, Acc) ->
1618%%     cf(?STRING("\r"), State, Level, Acc, fun parse_doctype/4);
1619%% parse_doctype(?STRING_REST(">", Rest), State, 0, Acc) ->
1620%%     {Acc, Rest, State};
1621%% parse_doctype(?STRING_REST(">", Rest), State, Level, Acc) ->
1622%%     parse_doctype(Rest, State, Level-1, Acc);
1623%% parse_doctype(?STRING_REST("<", Rest), State, Level, Acc) ->
1624%%     parse_doctype(Rest, State, Level+1, [$<|Acc]);
1625%% parse_doctype(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Level, Acc) ->
1626%%     parse_doctype(Rest, State#xmerl_sax_parser_state{line_no=N+1}, Level, [?lf |Acc]);
1627%% parse_doctype(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Level, Acc) ->
1628%%     parse_doctype(Rest, State#xmerl_sax_parser_state{line_no=N+1}, Level, [?lf |Acc]);
1629%% parse_doctype(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Level, Acc) ->
1630%%     parse_doctype(Rest, State#xmerl_sax_parser_state{line_no=N+1}, Level, [?lf |Acc]);
1631%% parse_doctype(?STRING_UNBOUND_REST(C, Rest), State, Level, Acc) ->
1632%%     parse_doctype(Rest, State, Level, [C|Acc]);
1633%% parse_doctype(Bytes, State, Level, Acc) ->
1634%%     unicode_incomplete_check([Bytes, State, Level, Acc, fun parse_doctype/4],
1635%% 			     undefined).
1636
1637
1638%%----------------------------------------------------------------------
1639%% Function  : parse_doctype(Rest, State) -> Result
1640%% Parameters: Rest = string() | binary()
1641%%             State = #xmerl_sax_parser_state{}
1642%% Result    : {Rest, State}
1643%% Description: This function starts an parsing of the DTD
1644%%              that sends apropriate events.
1645%%              [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
1646%%                          ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1647%%----------------------------------------------------------------------
1648parse_doctype(?STRING_EMPTY, State) ->
1649    cf(?STRING_EMPTY, State, fun parse_doctype/2);
1650parse_doctype(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
1651    {_WS, Rest, State1} = whitespace(Bytes, State, []),
1652    parse_doctype(Rest, State1);
1653parse_doctype(?STRING_UNBOUND_REST(C, Rest), State) ->
1654    case is_name_start(C) of
1655	true ->
1656	    {Name, Rest1, State1} = parse_name(Rest, State, [C]),
1657	    parse_doctype_1(Rest1, State1, Name, false);
1658	false ->
1659	    ?fatal_error(State, "expecting name or whitespace")
1660    end;
1661parse_doctype(Bytes, State) ->
1662    unicode_incomplete_check([Bytes, State, fun parse_doctype/2],
1663			     undefined).
1664
1665
1666%%----------------------------------------------------------------------
1667%% Function  : parse_doctype_1(Rest, State, Name, Definition) -> Result
1668%% Parameters: Rest = string() | binary()
1669%%             State = #xmerl_sax_parser_state{}
1670%%             Name = string()
1671%%             Definition = true |false
1672%% Result    : {Rest, State}
1673%% Description: Gets the DTD name as a parameter and contine parse the DOCTYPE
1674%%              directive
1675%%----------------------------------------------------------------------
1676parse_doctype_1(?STRING_EMPTY, State, Name, Definition) ->
1677    cf(?STRING_EMPTY, State, Name, Definition, fun parse_doctype_1/4);
1678parse_doctype_1(?STRING_REST(">", Rest), State, _, _) ->
1679    {Rest, State};
1680parse_doctype_1(?STRING_REST("[", Rest), State, Name, Definition) ->
1681    State1 =
1682	case Definition of
1683	    false ->
1684		event_callback({startDTD, Name, "", ""}, State);
1685	    true ->
1686		State
1687	end,
1688    {Rest1, State2} = parse_doctype_decl(Rest, State1),
1689    {_WS, Rest2, State3} = whitespace(Rest1, State2, []),
1690    parse_doctype_2(Rest2, State3);
1691parse_doctype_1(?STRING_UNBOUND_REST(C, _) = Rest, State, Name, Definition) when ?is_whitespace(C) ->
1692    {_WS, Rest1, State1} = whitespace(Rest, State, []),
1693    parse_doctype_1(Rest1, State1, Name, Definition);
1694parse_doctype_1(?STRING_UNBOUND_REST(C, _) = Rest, State, Name, _Definition) when C == $S; C == $P ->
1695    {PubId, SysId, Rest1, State1} = parse_external_id(Rest, State, false),
1696    State2 = event_callback({startDTD, Name, PubId, SysId}, State1),
1697    State3 =
1698	case State2#xmerl_sax_parser_state.skip_external_dtd of
1699	    false ->
1700		parse_external_entity(State2#xmerl_sax_parser_state{file_type=dtd}, PubId, SysId);
1701	    true ->
1702		State2
1703	end,
1704    parse_doctype_1(Rest1, State3, Name, true);
1705parse_doctype_1(Bytes, State, Name, Definition) ->
1706    unicode_incomplete_check([Bytes, State, Name, Definition, fun parse_doctype_1/4],
1707			     "expecting >, external id or declaration part").
1708
1709
1710parse_doctype_2(?STRING_REST(">", Rest), State) ->
1711    {Rest, State};
1712parse_doctype_2(Bytes, State) ->
1713    unicode_incomplete_check([Bytes, State, fun parse_doctype_2/2],
1714			     "expecting >").
1715
1716
1717%%----------------------------------------------------------------------
1718%% Function  : parse_external_entity(State, PubId, SysId) -> Result
1719%% Parameters: State = #xmerl_sax_parser_state{}
1720%%             PubId = string()
1721%%             SysId = string()
1722%% Result    : {Rest, State}
1723%% Description: Starts the parsing of an external entity by calling the resolver and
1724%%              then sends the input to the parsing function.
1725%%----------------------------------------------------------------------
1726%% The public id is not handled
1727parse_external_entity(State, _PubId, SysId) ->
1728
1729    ExtRef = check_uri(SysId, State#xmerl_sax_parser_state.current_location),
1730
1731    SaveState =  event_callback({startEntity, SysId}, State),
1732
1733    State1 = State#xmerl_sax_parser_state{line_no=1,
1734					  continuation_state=undefined,
1735					  continuation_fun=fun xmerl_sax_parser:default_continuation_cb/1,
1736					  end_tags = []},
1737
1738
1739    {EventState, RefTable} = handle_external_entity(ExtRef, State1),
1740
1741    NewState =  event_callback({endEntity, SysId},
1742                               SaveState#xmerl_sax_parser_state{event_state=EventState,
1743                                                                ref_table=RefTable}),
1744    NewState#xmerl_sax_parser_state{file_type=normal}.
1745
1746
1747
1748%%----------------------------------------------------------------------
1749%% Function  : handle_external_entity(ExtRef, State) -> Result
1750%% Parameters: ExtRef = {file, string()} | {http, string()}
1751%%             State = #xmerl_sax_parser_state{}
1752%% Result    : string() | binary()
1753%% Description: Returns working directory, entity and the opened
1754%%              filedescriptor.
1755%%----------------------------------------------------------------------
1756handle_external_entity({file, FileToOpen}, State) ->
1757
1758    case file:open(FileToOpen, [raw, read, binary])  of
1759        {error, Reason} ->
1760	    ?fatal_error(State, "Couldn't open external entity "++ FileToOpen ++ " : "
1761			 ++ file:format_error(Reason));
1762        {ok, FD} ->
1763	    {?STRING_EMPTY, EntityState} =
1764		parse_external_entity_1(<<>>,
1765					State#xmerl_sax_parser_state{continuation_state=FD,
1766								     current_location=filename:dirname(FileToOpen),
1767								     entity=filename:basename(FileToOpen),
1768								     input_type=file}),
1769	    ok = file:close(FD),
1770	    {EntityState#xmerl_sax_parser_state.event_state,
1771             EntityState#xmerl_sax_parser_state.ref_table}
1772    end;
1773handle_external_entity({http, Url}, State) ->
1774
1775    try
1776	{Host, Port, Key} = http(Url),
1777	TmpFile = http_get_file(Host, Port, Key),
1778	case file:open(TmpFile, [raw, read, binary])  of
1779	    {error, Reason} ->
1780		?fatal_error(State, "Couldn't open temporary file " ++ TmpFile ++ " : "
1781		       ++ file:format_error(Reason));
1782	    {ok, FD} ->
1783		{?STRING_EMPTY, EntityState} =
1784		    parse_external_entity_byte_order_mark(<<>>,
1785					    State#xmerl_sax_parser_state{continuation_state=FD,
1786									 current_location=filename:dirname(Url),
1787									 entity=filename:basename(Url),
1788									 input_type=file}),
1789		ok = file:close(FD),
1790		ok = file:delete(TmpFile),
1791		{EntityState#xmerl_sax_parser_state.event_state,
1792                 EntityState#xmerl_sax_parser_state.ref_table}
1793
1794	end
1795    catch
1796	throw:{error, Error} ->
1797	    ?fatal_error(State, Error)
1798    end;
1799handle_external_entity({Tag, _Url}, State) ->
1800    ?fatal_error(State, "Unsupported URI type: " ++ atom_to_list(Tag)).
1801
1802?PARSE_EXTERNAL_ENTITY_BYTE_ORDER_MARK(Bytes, State).
1803
1804%%----------------------------------------------------------------------
1805%% Function  : parse_external_entity_1(Rest, State) -> Result
1806%% Parameters: Rest = string() | binary()
1807%%             State = #xmerl_sax_parser_state{}
1808%% Result    : {Rest, State}
1809%% Description: Parse the external entity.
1810%%----------------------------------------------------------------------
1811parse_external_entity_1(?STRING_EMPTY, #xmerl_sax_parser_state{file_type=Type} = State) ->
1812    case catch cf(?STRING_EMPTY, State, fun parse_external_entity_1/2) of
1813	{Rest, State1} when is_record(State1, xmerl_sax_parser_state) ->
1814	    {Rest, State1};
1815	{fatal_error, {State1, "No more bytes"}} when Type == dtd; Type == entity ->
1816	    {?STRING_EMPTY, State1};
1817	Other ->
1818	    throw(Other)
1819    end;
1820parse_external_entity_1(?STRING("<") = Bytes, State) ->
1821    cf(Bytes, State, fun parse_external_entity_1/2);
1822parse_external_entity_1(?STRING("<?") = Bytes, State) ->
1823    cf(Bytes, State, fun parse_external_entity_1/2);
1824parse_external_entity_1(?STRING("<?x") = Bytes, State) ->
1825    cf(Bytes, State, fun parse_external_entity_1/2);
1826parse_external_entity_1(?STRING("<?xm") = Bytes, State) ->
1827    cf(Bytes, State, fun parse_external_entity_1/2);
1828parse_external_entity_1(?STRING("<?xml") = Bytes, State) ->
1829    cf(Bytes, State, fun parse_external_entity_1/2);
1830parse_external_entity_1(?STRING_REST("<?xml", Rest) = Bytes,
1831			#xmerl_sax_parser_state{file_type=Type} = State) ->
1832    {Rest1, State1} =
1833	case is_next_char_whitespace(Rest, State) of
1834	    false ->
1835		{Bytes, State};
1836	    true ->
1837		{_XmlAttributes, R, S} = parse_version_info(Rest, State, []),
1838		%S1 =  event_callback({processingInstruction, "xml", XmlAttributes}, S),% The XML decl. should not be reported as a PI
1839		{R, S}
1840	end,
1841    case Type of
1842	dtd ->
1843	    case catch parse_doctype_decl(Rest1, State1)  of
1844		{Rest2, State2} when is_record(State2, xmerl_sax_parser_state) ->
1845		    {Rest2, State2};
1846		{fatal_error, {State2, "No more bytes"}} ->
1847		    {?STRING_EMPTY, State2};
1848		Other ->
1849		    throw(Other)
1850	    end;
1851
1852	_ -> % Type is normal or entity
1853	    parse_content(Rest1, State1, [], true)
1854    end;
1855parse_external_entity_1(?STRING_UNBOUND_REST(_C, _) = Bytes,
1856			#xmerl_sax_parser_state{file_type=Type} = State) ->
1857    case Type of
1858	normal ->
1859	    parse_content(Bytes, State, [], true);
1860	dtd ->
1861	    parse_doctype_decl(Bytes, State);
1862	entity ->
1863	    parse_doctype_decl(Bytes, State)    end;
1864parse_external_entity_1(Bytes, State) ->
1865    unicode_incomplete_check([Bytes, State, fun parse_external_entity_1/2],
1866			     undefined).
1867
1868%%----------------------------------------------------------------------
1869%% Function  : is_next_char_whitespace(Bytes, State) -> Result
1870%% Parameters: Bytes = string() | binary()
1871%%             State = #xmerl_sax_parser_state{}
1872%% Result    : true | false
1873%% Description: Checks if first character is whitespace.
1874%%----------------------------------------------------------------------
1875is_next_char_whitespace(?STRING_UNBOUND_REST(C, _), _) when ?is_whitespace(C) ->
1876    true;
1877is_next_char_whitespace(?STRING_UNBOUND_REST(_C, _), _) ->
1878    false;
1879is_next_char_whitespace(Bytes, State) ->
1880    unicode_incomplete_check([Bytes, State, fun is_next_char_whitespace/2],
1881			     undefined).
1882
1883%%----------------------------------------------------------------------
1884%% Function  : parse_external_id(Rest, State, OptionalSystemId) -> Result
1885%% Parameters: Rest = string() | binary()
1886%%             State = #xmerl_sax_parser_state{}
1887%%             OptionalSystemId = true | false
1888%% Result    : {PubId, SysId, Rest, State}
1889%%             PubId = string()
1890%%             SysId = string()
1891%% Description: Parse an external id. The function is used in two cases one
1892%%              where the system is optional and one where it's required
1893%%              after a public id.
1894%%              [75] ExternalID ::= 'SYSTEM' S SystemLiteral
1895%%             		          | 'PUBLIC' S PubidLiteral S SystemLiteral
1896%%----------------------------------------------------------------------
1897parse_external_id(?STRING_EMPTY, State, OptionalSystemId) ->
1898    cf(?STRING_EMPTY, State, OptionalSystemId, fun parse_external_id/3);
1899parse_external_id(?STRING("S") = Bytes, State,OptionalSystemId) ->
1900    cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
1901parse_external_id(?STRING("SY") = Bytes, State, OptionalSystemId) ->
1902    cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
1903parse_external_id(?STRING("SYS") = Bytes, State, OptionalSystemId) ->
1904    cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
1905parse_external_id(?STRING("SYST") = Bytes, State, OptionalSystemId) ->
1906    cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
1907parse_external_id(?STRING("SYSTE") = Bytes, State, OptionalSystemId) ->
1908    cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
1909parse_external_id(?STRING_REST("SYSTEM", Rest), State, _) ->
1910    {SysId, Rest1, State1} = parse_system_id(Rest, State, false),
1911    {"", SysId, Rest1, State1};
1912parse_external_id(?STRING("P") = Bytes, State, OptionalSystemId) ->
1913    cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
1914parse_external_id(?STRING("PU") = Bytes, State, OptionalSystemId) ->
1915    cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
1916parse_external_id(?STRING("PUB") = Bytes, State, OptionalSystemId) ->
1917    cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
1918parse_external_id(?STRING("PUBL") = Bytes, State, OptionalSystemId) ->
1919    cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
1920parse_external_id(?STRING("PUBLI") = Bytes, State, OptionalSystemId) ->
1921    cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
1922parse_external_id(?STRING_REST("PUBLIC", Rest), State, OptionalSystemId) ->
1923    parse_public_id(Rest, State, OptionalSystemId);
1924parse_external_id(Bytes, State, OptionalSystemId) ->
1925    unicode_incomplete_check([Bytes, State, OptionalSystemId, fun parse_external_id/3],
1926			     "expecting SYSTEM or PUBLIC").
1927
1928
1929%%----------------------------------------------------------------------
1930%% Function  : parse_system_id(Rest, State, OptionalSystemId) -> Result
1931%% Parameters: Rest = string() | binary()
1932%%             State = #xmerl_sax_parser_state{}
1933%%             OptionalSystemId = true | false
1934%% Result    : {SysId, Rest, State}
1935%%             SysId = string()
1936%% Description: Parse a system id. The function is used in two cases one
1937%%              where the system is optional and one where it's required.
1938%%----------------------------------------------------------------------
1939parse_system_id(?STRING_UNBOUND_REST(C, _) = Bytes, State, OptionalSystemId) when ?is_whitespace(C) ->
1940    {_WS, Rest, State1} = whitespace(Bytes, State, []),
1941    check_system_litteral(Rest, State1, OptionalSystemId);
1942parse_system_id(?STRING_UNBOUND_REST(_C, _) = Bytes, State, true) ->
1943    {"", Bytes, State};
1944parse_system_id(Bytes, State, OptionalSystemId) ->
1945    unicode_incomplete_check([Bytes, State, OptionalSystemId, fun parse_system_id/3],
1946			     "whitespace expected").
1947
1948check_system_litteral(?STRING_UNBOUND_REST(C, Rest), State, _OptionalSystemId) when C == $'; C == $" ->
1949    parse_system_litteral(Rest, State, C, []);
1950check_system_litteral(?STRING_UNBOUND_REST(_C, _) = Bytes, State, true) ->
1951    {"", Bytes, State};
1952check_system_litteral(Bytes, State, OptionalSystemId) ->
1953    unicode_incomplete_check([Bytes, State, OptionalSystemId, fun check_system_litteral/3],
1954			     "\" or \' expected").
1955
1956
1957%%----------------------------------------------------------------------
1958%% Function  : parse_public_id(Rest, State, OptionalSystemId) -> Result
1959%% Parameters: Rest = string() | binary()
1960%%             State = #xmerl_sax_parser_state{}
1961%%             OptionalSystemId = true | false
1962%% Result    : {PubId, SysId, Rest, State}
1963%%             PubId = string()
1964%%             SysId = string()
1965%% Description: Parse a public id. The function is used in two cases one
1966%%              where the following system is optional and one where it's required.
1967%%----------------------------------------------------------------------
1968parse_public_id(?STRING_UNBOUND_REST(C, _) = Bytes, State, OptionalSystemId) when ?is_whitespace(C) ->
1969    {_WS, Rest, State1} = whitespace(Bytes, State, []),
1970    check_public_litteral(Rest, State1, OptionalSystemId);
1971parse_public_id(Bytes, State,OptionalSystemId) ->
1972    unicode_incomplete_check([Bytes, State, OptionalSystemId, fun parse_public_id/3],
1973			     "whitespace expected").
1974
1975
1976check_public_litteral(?STRING_UNBOUND_REST(C, Rest), State, OptionalSystemId) when C == $'; C == $" ->
1977    {PubId, Rest1, State1} = parse_pubid_litteral(Rest, State, C, []),
1978    {SysId, Rest2, State2} = parse_system_id(Rest1, State1, OptionalSystemId),
1979    {PubId, SysId, Rest2, State2};
1980check_public_litteral(Bytes, State, OptionalSystemId) ->
1981    unicode_incomplete_check([Bytes, State, OptionalSystemId, fun check_public_litteral/3],
1982			     "\" or \' expected").
1983
1984
1985%%----------------------------------------------------------------------
1986%% Function  : parse_doctype_decl(Rest, State) -> Result
1987%% Parameters: Rest = string() | binary()
1988%%             State = #xmerl_sax_parser_state{}
1989%% Result    : {Rest, State}
1990%% Description: Parse the DOCTYPE declaration part
1991%%              [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl
1992%%                                | NotationDecl | PI | Comment
1993%%----------------------------------------------------------------------
1994parse_doctype_decl(?STRING_EMPTY, State) ->
1995    cf(?STRING_EMPTY, State, fun parse_doctype_decl/2);
1996parse_doctype_decl(?STRING("<"), State) ->
1997    cf(?STRING("<"), State, fun parse_doctype_decl/2);
1998parse_doctype_decl(?STRING_REST("<?", Rest), State) ->
1999    case parse_pi(Rest, State) of
2000	{Rest1, State1} ->
2001	     parse_doctype_decl(Rest1, State1);
2002	{endDocument, _Rest1, State1} ->
2003	    IValue = ?TO_INPUT_FORMAT("<?"),
2004	    {?APPEND_STRING(IValue, Rest), State1}
2005    end;
2006parse_doctype_decl(?STRING_REST("%", Rest), State) ->
2007    {Ref, Rest1, State1} = parse_pe_reference(Rest, State),
2008    case Ref of
2009	{internal_parameter, _, RefValue} ->
2010	    IValue = ?TO_INPUT_FORMAT(" " ++ RefValue ++ " "),
2011	    parse_doctype_decl(?APPEND_STRING(IValue, Rest1), State1);
2012	{external_parameter, _, {PubId, SysId}} ->
2013	    State2 = parse_external_entity(State1#xmerl_sax_parser_state{file_type = entity}, PubId, SysId),
2014	    parse_doctype_decl(Rest1, State2);
2015	 {not_found, Name} ->
2016	    case State#xmerl_sax_parser_state.skip_external_dtd of
2017		false ->
2018		    ?fatal_error(State1, "Entity not declared: " ++ Name); %%WFC: Entity Declared
2019		true ->
2020		    parse_doctype_decl(Rest1, State1)
2021	    end
2022    end;
2023parse_doctype_decl(?STRING_REST("<!", Rest1), State) ->
2024    parse_doctype_decl_1(Rest1, State);
2025parse_doctype_decl(?STRING_REST("]", Rest), State) ->
2026    {Rest, State};
2027parse_doctype_decl(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
2028    {_WS, Rest, State1} = whitespace(Bytes, State, []),
2029    parse_doctype_decl(Rest, State1);
2030parse_doctype_decl(Bytes, State) ->
2031    unicode_incomplete_check([Bytes, State, fun parse_doctype_decl/2],
2032			     "expecting ELEMENT, ATTLIST, ENTITY, NOTATION or comment").
2033
2034
2035%%----------------------------------------------------------------------
2036%% Function  : parse_doctype_decl_1(Rest, State) -> Result
2037%% Parameters: Rest = string() | binary()
2038%%             State = #xmerl_sax_parser_state{}
2039%% Result    : {Rest, State}
2040%% Description: Main switching function for the different markup declarations
2041%%              of the DOCTYPE.
2042%%----------------------------------------------------------------------
2043parse_doctype_decl_1(?STRING_EMPTY, State) ->
2044    cf(?STRING_EMPTY, State, fun parse_doctype_decl_1/2);
2045
2046parse_doctype_decl_1(?STRING("E") = Bytes, State) ->
2047    cf(Bytes, State, fun parse_doctype_decl_1/2);
2048parse_doctype_decl_1(?STRING("EL") = Bytes, State) ->
2049    cf(Bytes, State, fun parse_doctype_decl_1/2);
2050parse_doctype_decl_1(?STRING("ELE") = Bytes, State) ->
2051    cf(Bytes, State, fun parse_doctype_decl_1/2);
2052parse_doctype_decl_1(?STRING("ELEM") = Bytes, State) ->
2053    cf(Bytes, State, fun parse_doctype_decl_1/2);
2054parse_doctype_decl_1(?STRING("ELEME") = Bytes, State) ->
2055    cf(Bytes, State, fun parse_doctype_decl_1/2);
2056parse_doctype_decl_1(?STRING("ELEMEN") = Bytes, State) ->
2057    cf(Bytes, State, fun parse_doctype_decl_1/2);
2058parse_doctype_decl_1(?STRING_REST("ELEMENT", Rest), State) ->
2059    {Rest1, State1} = parse_element_decl(Rest, State),
2060    parse_doctype_decl(Rest1, State1);
2061
2062parse_doctype_decl_1(?STRING("A") = Bytes, State) ->
2063    cf(Bytes, State, fun parse_doctype_decl_1/2);
2064parse_doctype_decl_1(?STRING("AT") = Bytes, State) ->
2065    cf(Bytes, State, fun parse_doctype_decl_1/2);
2066parse_doctype_decl_1(?STRING("ATT") = Bytes, State) ->
2067    cf(Bytes, State, fun parse_doctype_decl_1/2);
2068parse_doctype_decl_1(?STRING("ATTL") = Bytes, State) ->
2069    cf(Bytes, State, fun parse_doctype_decl_1/2);
2070parse_doctype_decl_1(?STRING("ATTLI") = Bytes, State) ->
2071    cf(Bytes, State, fun parse_doctype_decl_1/2);
2072parse_doctype_decl_1(?STRING("ATTLIS") = Bytes, State) ->
2073    cf(Bytes, State, fun parse_doctype_decl_1/2);
2074parse_doctype_decl_1(?STRING_REST("ATTLIST", Rest), State) ->
2075    {Rest1, State1} = parse_att_list_decl(Rest, State),
2076    parse_doctype_decl(Rest1, State1);
2077
2078%% E clause not needed here because already taken care of above.
2079parse_doctype_decl_1(?STRING("EN") = Bytes, State) ->
2080    cf(Bytes, State, fun parse_doctype_decl_1/2);
2081parse_doctype_decl_1(?STRING("ENT") = Bytes, State) ->
2082    cf(Bytes, State, fun parse_doctype_decl_1/2);
2083parse_doctype_decl_1(?STRING("ENTI") = Bytes, State) ->
2084    cf(Bytes, State, fun parse_doctype_decl_1/2);
2085parse_doctype_decl_1(?STRING("ENTIT") = Bytes, State) ->
2086    cf(Bytes, State, fun parse_doctype_decl_1/2);
2087parse_doctype_decl_1(?STRING_REST("ENTITY", Rest), State) ->
2088    {Rest1, State1} = parse_entity_decl(Rest, State),
2089    parse_doctype_decl(Rest1, State1);
2090
2091parse_doctype_decl_1(?STRING("N") = Bytes, State) ->
2092    cf(Bytes, State, fun parse_doctype_decl_1/2);
2093parse_doctype_decl_1(?STRING("NO") = Bytes, State) ->
2094    cf(Bytes, State, fun parse_doctype_decl_1/2);
2095parse_doctype_decl_1(?STRING("NOT") = Bytes, State) ->
2096    cf(Bytes, State, fun parse_doctype_decl_1/2);
2097parse_doctype_decl_1(?STRING("NOTA") = Bytes, State) ->
2098    cf(Bytes, State, fun parse_doctype_decl_1/2);
2099parse_doctype_decl_1(?STRING("NOTAT") = Bytes, State) ->
2100    cf(Bytes, State, fun parse_doctype_decl_1/2);
2101parse_doctype_decl_1(?STRING("NOTATI") = Bytes, State) ->
2102    cf(Bytes, State, fun parse_doctype_decl_1/2);
2103parse_doctype_decl_1(?STRING("NOTATIO") = Bytes, State) ->
2104    cf(Bytes, State, fun parse_doctype_decl_1/2);
2105parse_doctype_decl_1(?STRING_REST("NOTATION", Rest), State) ->
2106    {Rest1, State1} = parse_notation_decl(Rest, State),
2107    parse_doctype_decl(Rest1, State1);
2108parse_doctype_decl_1(?STRING("-") = Bytes, State) ->
2109    cf(Bytes, State, fun parse_doctype_decl_1/2);
2110parse_doctype_decl_1(?STRING_REST("--", Rest), State) ->
2111    {Rest1, State1} = parse_comment(Rest, State, []),
2112    parse_doctype_decl(Rest1, State1);
2113parse_doctype_decl_1(Bytes, State) ->
2114    unicode_incomplete_check([Bytes, State, fun parse_doctype_decl_1/2],
2115			     "expecting ELEMENT, ATTLIST, ENTITY, NOTATION or comment").
2116
2117
2118%%----------------------------------------------------------------------
2119%% Function  : parse_element_decl(Rest, State) -> Result
2120%% Parameters: Rest = string() | binary()
2121%%             State = #xmerl_sax_parser_state{}
2122%% Result    : {Rest, State}
2123%% Description: Parse element declarations.
2124%%              [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
2125%%----------------------------------------------------------------------
2126parse_element_decl(?STRING_EMPTY, State) ->
2127    cf(?STRING_EMPTY, State, fun parse_element_decl/2);
2128parse_element_decl(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
2129    {_WS, Rest, State1} = whitespace(Bytes, State, []),
2130    parse_element_decl_1(Rest, State1);
2131parse_element_decl(Bytes, State) ->
2132    unicode_incomplete_check([Bytes, State, fun parse_element_decl/2],
2133			     "whitespace expected").
2134
2135parse_element_decl_1(?STRING_UNBOUND_REST(C, Rest), State) ->
2136    case is_name_start(C) of
2137	true ->
2138	    {Name, Rest1, State1} = parse_name(Rest, State, [C]),
2139	    {Model, Rest2, State2} = parse_element_content(Rest1, State1),
2140	    State3 =  event_callback({elementDecl, Name, Model}, State2),
2141	    {Rest2, State3};
2142	false ->
2143	    ?fatal_error(State, "name expected")
2144    end;
2145parse_element_decl_1(Bytes, State) ->
2146    unicode_incomplete_check([Bytes, State, fun parse_element_decl_1/2],
2147			     undefined).
2148
2149
2150%%----------------------------------------------------------------------
2151%% Function  : parse_element_content(Rest, State) -> Result
2152%% Parameters: Rest = string() | binary()
2153%%             State = #xmerl_sax_parser_state{}
2154%% Result    : {Rest, State}
2155%% Description: Parse contents of an element declaration.
2156%%              [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
2157%%----------------------------------------------------------------------
2158parse_element_content(?STRING_EMPTY, State) ->
2159        cf(?STRING_EMPTY, State, fun parse_element_content/2);
2160parse_element_content(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
2161    {_WS, Rest, State1} = whitespace(Bytes, State, []),
2162    parse_element_content_1(Rest, State1, []);
2163parse_element_content(Bytes, State) ->
2164    unicode_incomplete_check([Bytes, State, fun parse_element_content/2],
2165			     "whitespace expected").
2166
2167
2168%%----------------------------------------------------------------------
2169%% Function  : parse_element_content_1(Rest, State, Acc) -> Result
2170%% Parameters: Rest = string() | binary()
2171%%             State = #xmerl_sax_parser_state{}
2172%%             Acc = string()
2173%% Result    : {Content, Rest, State}
2174%%             Content = string()
2175%% Description: Parse contents of an element declaration.
2176%%----------------------------------------------------------------------
2177parse_element_content_1(?STRING_EMPTY, State, Acc) ->
2178        cf(?STRING_EMPTY, State, Acc, fun parse_element_content_1/3);
2179parse_element_content_1(?STRING_REST(">", Rest), State, Acc) ->
2180    {lists:reverse(delete_leading_whitespace(Acc)), Rest, State};
2181parse_element_content_1(?STRING_UNBOUND_REST(C, Rest), State, Acc) ->
2182    parse_element_content_1(Rest, State, [C|Acc]);
2183parse_element_content_1(Bytes, State, Acc) ->
2184    unicode_incomplete_check([Bytes, State, Acc, fun parse_element_content_1/3],
2185			     undefined).
2186
2187delete_leading_whitespace([C |Acc]) when ?is_whitespace(C)->
2188    delete_leading_whitespace(Acc);
2189delete_leading_whitespace(Acc) ->
2190    Acc.
2191
2192%%----------------------------------------------------------------------
2193%% Function  : parse_att_list_decl(Rest, State) -> Result
2194%% Parameters: Rest = string() | binary()
2195%%             State = #xmerl_sax_parser_state{}
2196%% Result    : {Rest, State}
2197%% Description: Parse an attribute list declaration.
2198%%              [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
2199%%----------------------------------------------------------------------
2200parse_att_list_decl(?STRING_EMPTY, State) ->
2201    cf(?STRING_EMPTY, State, fun parse_att_list_decl/2);
2202parse_att_list_decl(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
2203    {_WS, Rest, State1} = whitespace(Bytes, State, []),
2204    parse_att_list_decl_1(Rest, State1);
2205parse_att_list_decl(Bytes, State) ->
2206    unicode_incomplete_check([Bytes, State, fun parse_att_list_decl/2],
2207			     "whitespace expected").
2208
2209
2210parse_att_list_decl_1(?STRING_UNBOUND_REST(C, Rest), State) ->
2211    case is_name_start(C) of
2212	true ->
2213	    {ElementName, Rest1, State1} = parse_name(Rest, State, [C]),
2214	    parse_att_defs(Rest1, State1, ElementName);
2215	false ->
2216	    ?fatal_error(State, "name expected")
2217    end;
2218parse_att_list_decl_1(Bytes, State) ->
2219    unicode_incomplete_check([Bytes, State, fun parse_att_list_decl_1/2],
2220			     undefined).
2221
2222
2223%%----------------------------------------------------------------------
2224%% Function  : parse_att_defs(Rest, State, ElementName) -> Result
2225%% Parameters: Rest = string() | binary()
2226%%             State = #xmerl_sax_parser_state{}
2227%%             ElementName = string()
2228%% Result    : {Rest, State}
2229%% Description: Parse an attribute definition.
2230%%              [53] AttDef ::= S Name S AttType S DefaultDecl
2231%%----------------------------------------------------------------------
2232parse_att_defs(?STRING_EMPTY, State, ElementName) ->
2233    cf(?STRING_EMPTY, State, ElementName, fun parse_att_defs/3);
2234parse_att_defs(?STRING_REST(">", Rest), State, _ElementName) ->
2235    {Rest, State};
2236parse_att_defs(?STRING_UNBOUND_REST(C, _) = Rest, State, ElementName) when ?is_whitespace(C) ->
2237    {_WS, Rest1, State1} = whitespace(Rest, State, []),
2238    parse_att_defs(Rest1, State1, ElementName);
2239parse_att_defs(?STRING_UNBOUND_REST(C, Rest), State, ElementName) ->
2240    case is_name_start(C) of
2241	true ->
2242	    {AttrName, Rest1, State1} = parse_name(Rest, State, [C]),
2243	    {Type, Rest2, State2} = parse_att_type(Rest1, State1),
2244	    {Mode, Value, Rest3, State3} = parse_default_decl(Rest2, State2),
2245	    State4 = event_callback({attributeDecl, ElementName, AttrName, Type, Mode, Value}, State3),
2246	    parse_att_defs(Rest3, State4, ElementName);
2247	false ->
2248	    ?fatal_error(State, "whitespace or name expected")
2249    end;
2250parse_att_defs(Bytes, State, ElementName) ->
2251    unicode_incomplete_check([Bytes, State, ElementName, fun parse_att_defs/3],
2252			     undefined).
2253
2254
2255%%----------------------------------------------------------------------
2256%% Function  : parse_att_type(Rest, State) -> Result
2257%% Parameters: Rest = string() | binary()
2258%%             State = #xmerl_sax_parser_state{}
2259%% Result    : {Type, Rest, State}
2260%%             Type = string()
2261%% Description: Parse an attribute type.
2262%%              [54] AttType ::= StringType | TokenizedType | EnumeratedType
2263%%              [55] StringType  ::= 'CDATA'
2264%%              [56] TokenizedType  ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY'
2265%%                                    | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
2266%%              [57] EnumeratedType ::= NotationType | Enumeration
2267%%              [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
2268%%              [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
2269%%----------------------------------------------------------------------
2270parse_att_type(?STRING_EMPTY, State) ->
2271    cf(?STRING_EMPTY, State, fun parse_att_type/2);
2272parse_att_type(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
2273    {_WS, Rest, State1} = whitespace(Bytes, State, []),
2274    case parse_att_type_1(Rest, State1, []) of
2275	{Type, Rest1, State2} when Type == "("; Type == "NOTATION" ->
2276	    {T, Rest2, State3} = parse_until_right_paren(Rest1, State2, []),
2277	    {Type ++ T, Rest2, State3};
2278	{Type, Rest1, State2} ->
2279	    case check_att_type(Type) of
2280		true ->
2281		    {Type, Rest1, State2};
2282		false ->
2283		    ?fatal_error(State2, "wrong attribute type")
2284	    end
2285    end;
2286parse_att_type(Bytes, State) ->
2287    unicode_incomplete_check([Bytes, State, fun parse_att_type/2],
2288			     "whitespace expected").
2289
2290
2291%%----------------------------------------------------------------------
2292%% Function  : parse_att_type_1(Rest, State, Acc) -> Result
2293%% Parameters: Rest = string() | binary()
2294%%             State = #xmerl_sax_parser_state{}
2295%%             Acc = string()
2296%% Result    : {Type, Rest, State}
2297%%             Type = string()
2298%% Description: Parse an attribute type.
2299%%----------------------------------------------------------------------
2300parse_att_type_1(?STRING_EMPTY, State, Acc) ->
2301    cf(?STRING_EMPTY, State, Acc, fun parse_att_type_1/3);
2302parse_att_type_1(?STRING_UNBOUND_REST(C, _) = Bytes, State, Acc)  when ?is_whitespace(C) ->
2303    {lists:reverse(Acc), Bytes, State};
2304parse_att_type_1(?STRING_REST("(", Rest), State, []) ->
2305    {"(", Rest, State};
2306parse_att_type_1(?STRING_UNBOUND_REST(C, Rest), State, Acc) ->
2307    parse_att_type_1(Rest, State, [C|Acc]);
2308parse_att_type_1(Bytes, State, Acc) ->
2309    unicode_incomplete_check([Bytes, State, Acc, fun parse_att_type_1/3],
2310			     undefined).
2311
2312%%----------------------------------------------------------------------
2313%% Function  : check_att_type(Type) -> Result
2314%% Parameters: Type = string()
2315%% Result    : true | false
2316%% Description:Check if an attribute type is valid.
2317%%----------------------------------------------------------------------
2318check_att_type("CDATA") ->
2319    true;
2320check_att_type("ID") ->
2321    true;
2322check_att_type("IDREF") ->
2323    true;
2324check_att_type("IDREFS") ->
2325    true;
2326check_att_type("ENTITY") ->
2327    true;
2328check_att_type("ENTITIES") ->
2329    true;
2330check_att_type("NMTOKEN") ->
2331    true;
2332check_att_type("NMTOKENS") ->
2333    true;
2334check_att_type(_) ->
2335    false.
2336
2337
2338%%----------------------------------------------------------------------
2339%% Function  : parse_until_right_paren(Rest, State, Acc) -> Result
2340%% Parameters: Rest = string() | binary()
2341%%             State = #xmerl_sax_parser_state{}
2342%%             Acc = string()
2343%% Result    : {Type, Rest, State}
2344%%             Type = string()
2345%% Description: Parse an enumurated type until ')'.
2346%%----------------------------------------------------------------------
2347parse_until_right_paren(?STRING_EMPTY, State, Acc) ->
2348    cf(?STRING_EMPTY, State, Acc, fun parse_until_right_paren/3);
2349parse_until_right_paren(?STRING_REST(")", Rest), State, Acc) ->
2350    {lists:reverse(")" ++ Acc), Rest, State};
2351parse_until_right_paren(?STRING_UNBOUND_REST(C, Rest), State, Acc) ->
2352    parse_until_right_paren(Rest, State, [C|Acc]);
2353parse_until_right_paren(Bytes, State, Acc) ->
2354    unicode_incomplete_check([Bytes, State, Acc, fun parse_until_right_paren/3],
2355			     undefined).
2356
2357
2358%%----------------------------------------------------------------------
2359%% Function  : parse_default_decl(Rest, State) -> Result
2360%% Parameters: Rest = string() | binary()
2361%%             State = #xmerl_sax_parser_state{}
2362%% Result    : {Default, Rest, State}
2363%%             Default = string()
2364%% Description: Parse a default declaration.
2365%%              [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
2366%%----------------------------------------------------------------------
2367parse_default_decl(?STRING_EMPTY, State) ->
2368    cf(?STRING_EMPTY, State, fun parse_default_decl/2);
2369parse_default_decl(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
2370    {_WS, Rest, State1} = whitespace(Bytes, State, []),
2371    parse_default_decl_1(Rest, State1);
2372parse_default_decl(Bytes, State) ->
2373    unicode_incomplete_check([Bytes, State, fun parse_default_decl/2],
2374			     "whitespace expected").
2375
2376
2377%%----------------------------------------------------------------------
2378%% Function  : parse_default_decl_1(Rest, State) -> Result
2379%% Parameters: Rest = string() | binary()
2380%%             State = #xmerl_sax_parser_state{}
2381%% Result    : {Default, Rest, State}
2382%%             Default = string()
2383%% Description: Parse a default declaration.
2384%%----------------------------------------------------------------------
2385parse_default_decl_1(?STRING_EMPTY, State) ->
2386    cf(?STRING_EMPTY, State, fun parse_default_decl_1/2);
2387parse_default_decl_1(?STRING_REST("#", _Rest) = Bytes, State) ->
2388    case Bytes of
2389	?STRING("#R") ->
2390	    cf(Bytes, State, fun parse_default_decl_1/2);
2391	?STRING("#RE") ->
2392	    cf(Bytes, State, fun parse_default_decl_1/2);
2393	?STRING("#REQ") ->
2394	    cf(Bytes, State, fun parse_default_decl_1/2);
2395	?STRING("#REQU") ->
2396	    cf(Bytes, State, fun parse_default_decl_1/2);
2397	?STRING("#REQUI") ->
2398	    cf(Bytes, State, fun parse_default_decl_1/2);
2399	?STRING("#REQUIR") ->
2400	    cf(Bytes, State, fun parse_default_decl_1/2);
2401	?STRING("#REQUIRE") ->
2402	    cf(Bytes, State, fun parse_default_decl_1/2);
2403	?STRING_REST("#REQUIRED", Rest1) ->
2404	    {"#REQUIRED", undefined, Rest1, State};
2405
2406	?STRING("#I") ->
2407	    cf(Bytes, State, fun parse_default_decl_1/2);
2408	?STRING("#IM") ->
2409	    cf(Bytes, State, fun parse_default_decl_1/2);
2410	?STRING("#IMP") ->
2411	    cf(Bytes, State, fun parse_default_decl_1/2);
2412	?STRING("#IMPL") ->
2413	    cf(Bytes, State, fun parse_default_decl_1/2);
2414	?STRING("#IMPLI") ->
2415	    cf(Bytes, State, fun parse_default_decl_1/2);
2416	?STRING("#IMPLIE") ->
2417	    cf(Bytes, State, fun parse_default_decl_1/2);
2418	?STRING_REST("#IMPLIED", Rest1)  ->
2419	    {"#IMPLIED", undefined, Rest1, State};
2420
2421	?STRING("#F") ->
2422	    cf(Bytes, State, fun parse_default_decl_1/2);
2423	?STRING("#FI") ->
2424	    cf(Bytes, State, fun parse_default_decl_1/2);
2425	?STRING("#FIX") ->
2426	    cf(Bytes, State, fun parse_default_decl_1/2);
2427	?STRING("#FIXE") ->
2428	    cf(Bytes, State, fun parse_default_decl_1/2);
2429	?STRING_REST("#FIXED", Rest1)  ->
2430	    parse_fixed(Rest1, State);
2431	_  ->
2432	    ?fatal_error(State, "REQUIRED, IMPLIED or FIXED expected after #")
2433    end;
2434parse_default_decl_1(?STRING_UNBOUND_REST(C, Rest), State) when C == $'; C == $" ->
2435    {DefaultValue, Rest1, State1} = parse_att_value(Rest, State, C, []),
2436    {"", DefaultValue, Rest1, State1};
2437parse_default_decl_1(Bytes, State) ->
2438    unicode_incomplete_check([Bytes, State, fun parse_default_decl_1/2],
2439			     "bad default declaration").
2440
2441
2442parse_fixed(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
2443    {DefaultValue, Rest, State1} = parse_att_value(Bytes, State), % parse_att_value removes leading WS
2444    {"#FIXED", DefaultValue, Rest, State1};
2445parse_fixed(Bytes, State) ->
2446    unicode_incomplete_check([Bytes, State, fun parse_fixed/2],
2447			     "whitespace expected").
2448
2449%%----------------------------------------------------------------------
2450%% Function  : parse_entity_decl(Rest, State) -> Result
2451%% Parameters: Rest = string() | binary()
2452%%             State = #xmerl_sax_parser_state{}
2453%% Result    : {Rest, State}
2454%% Description: Parse an entity declaration.
2455%%              [70] EntityDecl ::= GEDecl | PEDecl
2456%%              [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
2457%%              [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
2458%%----------------------------------------------------------------------
2459parse_entity_decl(?STRING_EMPTY, State) ->
2460    cf(?STRING_EMPTY, State, fun parse_entity_decl/2);
2461parse_entity_decl(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
2462    {_WS, Rest, State1} = whitespace(Bytes, State, []),
2463    parse_entity_decl_1(Rest, State1);
2464parse_entity_decl(Bytes, State) ->
2465    unicode_incomplete_check([Bytes, State, fun parse_entity_decl/2],
2466			     "whitespace expected").
2467
2468
2469%%----------------------------------------------------------------------
2470%% Function  : parse_entity_decl_1(Rest, State) -> Result
2471%% Parameters: Rest = string() | binary()
2472%%             State = #xmerl_sax_parser_state{}
2473%% Result    : {Rest, State}
2474%% Description: Parse an entity declaration.
2475%%----------------------------------------------------------------------
2476parse_entity_decl_1(?STRING_EMPTY, State) ->
2477    cf(?STRING_EMPTY, State, fun parse_entity_decl_1/2);
2478parse_entity_decl_1(?STRING_REST("%", Rest), State) ->
2479    case is_next_char_whitespace(Rest, State) of
2480	true ->
2481	    {_WS, Rest1, State1} = whitespace(Rest, State, []),
2482	    parse_pe_name(Rest1, State1);
2483	false ->
2484	    ?fatal_error(State, "whitespace expected")
2485    end;
2486parse_entity_decl_1(?STRING_UNBOUND_REST(C, Rest), State) ->
2487    case is_name_start(C) of
2488	true ->
2489	    {Name, Rest1, State1} = parse_name(Rest, State, [C]),
2490	    case is_next_char_whitespace(Rest1, State1) of
2491		true ->
2492		    {_WS, Rest2, State2} = whitespace(Rest1, State1, []),
2493		    parse_entity_def(Rest2, State2, Name);
2494		false ->
2495		    ?fatal_error(State1, "whitespace expected")
2496	    end;
2497	false ->
2498	    ?fatal_error(State, "name or % expected")
2499    end;
2500parse_entity_decl_1(Bytes, State) ->
2501    unicode_incomplete_check([Bytes, State, fun parse_entity_decl_1/2],
2502			     undefined).
2503
2504
2505
2506
2507parse_pe_name(?STRING_UNBOUND_REST(C, Rest), State) ->
2508    case is_name_start(C) of
2509	true ->
2510	    {Name, Rest1, State1} = parse_name(Rest, State, [C]),
2511	    case is_next_char_whitespace(Rest1, State1) of
2512		true ->
2513		    {_WS, Rest2, State2} = whitespace(Rest1, State1, []),
2514		    parse_pe_def(Rest2, State2, Name);
2515		false ->
2516		    ?fatal_error(State1, "whitespace expected")
2517	    end;
2518	false ->
2519	    ?fatal_error(State, "name expected")
2520    end;
2521parse_pe_name(Bytes, State) ->
2522    unicode_incomplete_check([Bytes, State, fun parse_pe_name/2],
2523			     undefined).
2524
2525
2526
2527%%----------------------------------------------------------------------
2528%% Function  : parse_entity_def(Rest, State, Name) -> Result
2529%% Parameters: Rest = string() | binary()
2530%%             State = #xmerl_sax_parser_state{}
2531%%             Name = string()
2532%% Result    : {Rest, State}
2533%% Description: Parse an entity definition.
2534%%              [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
2535%%----------------------------------------------------------------------
2536parse_entity_def(?STRING_EMPTY, State, Name) ->
2537    cf(?STRING_EMPTY, State, Name, fun parse_entity_def/3);
2538parse_entity_def(?STRING_UNBOUND_REST(C, Rest), State, Name) when C == $'; C == $" ->
2539    {Value, Rest1, State1} = parse_entity_value(Rest, State, C, []),
2540    State2 = insert_reference(Name, {internal_general, Value}, State1),
2541    State3 =  event_callback({internalEntityDecl, Name, Value}, State2),
2542    {_WS, Rest2, State4} = whitespace(Rest1, State3, []),
2543    parse_def_end(Rest2, State4);
2544parse_entity_def(?STRING_UNBOUND_REST(C, _) = Rest, State, Name) when C == $S; C == $P  ->
2545    {PubId, SysId, Rest1, State1} = parse_external_id(Rest, State, false),
2546    {Ndata, Rest2, State2} = parse_ndata(Rest1, State1),
2547    case Ndata of
2548	undefined ->
2549	    State3 = insert_reference(Name, {external_general, {PubId, SysId}},
2550                                      State2),
2551	    State4 =  event_callback({externalEntityDecl, Name, PubId, SysId}, State3),
2552	    {Rest2, State4};
2553	_ ->
2554	    State3 = insert_reference(Name, {unparsed, {PubId, SysId, Ndata}},
2555                                      State2),
2556	    State4 =  event_callback({unparsedEntityDecl, Name, PubId, SysId, Ndata}, State3),
2557	    {Rest2, State4}
2558    end;
2559parse_entity_def(Bytes, State, Name) ->
2560    unicode_incomplete_check([Bytes, State, Name, fun parse_entity_def/3],
2561			     "\", \', SYSTEM or PUBLIC expected").
2562
2563
2564parse_def_end(?STRING_REST(">", Rest), State) ->
2565    {Rest, State};
2566parse_def_end(Bytes, State) ->
2567    unicode_incomplete_check([Bytes, State, fun parse_def_end/2],
2568			     "> expected").
2569
2570
2571
2572%%----------------------------------------------------------------------
2573%% Function  : parse_ndata(Rest, State) -> Result
2574%% Parameters: Rest = string() | binary()
2575%%             State = #xmerl_sax_parser_state{}
2576%% Result    : {Rest, State}
2577%% Description: Parse an NDATA declaration.
2578%%              [76] NDataDecl ::= S 'NDATA' S Name
2579%%----------------------------------------------------------------------
2580parse_ndata(?STRING_EMPTY, State) ->
2581    cf(?STRING_EMPTY, State, fun parse_ndata/2);
2582parse_ndata(?STRING_REST(">", Rest), State) ->
2583    {undefined, Rest, State};
2584parse_ndata(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
2585    {_WS, Rest1, State1} = whitespace(Bytes, State, []),
2586    parse_ndata_decl(Rest1, State1);
2587parse_ndata(Bytes, State) ->
2588    unicode_incomplete_check([Bytes, State, fun parse_ndata/2],
2589			     "Space before NDATA or > expected").
2590
2591%%----------------------------------------------------------------------
2592%% Function  : parse_entity_value(Rest, State, Stop, Acc) -> Result
2593%% Parameters: Rest = string() | binary()
2594%%             State = #xmerl_sax_parser_state{}
2595%%             Stop = $' | $"
2596%%             Acc = string()
2597%% Result    : {Value, Rest, State}
2598%%             Value = string()
2599%% Description: Parse an entity value
2600%%----------------------------------------------------------------------
2601parse_entity_value(?STRING_EMPTY, State, undefined, Acc) ->
2602    {Acc, [], State}; %% stop clause when parsing references
2603parse_entity_value(?STRING_EMPTY, State, Stop, Acc) ->
2604    cf(?STRING_EMPTY, State, Stop, Acc, fun parse_entity_value/4);
2605parse_entity_value(?STRING("\r"), State, Stop, Acc) ->
2606    cf(?STRING("\r"), State, Stop, Acc, fun parse_entity_value/4);
2607parse_entity_value(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) ->
2608    parse_entity_value(Rest,
2609		   State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
2610parse_entity_value(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) ->
2611    parse_entity_value(Rest,
2612		   State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
2613parse_entity_value(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc)  ->
2614    parse_entity_value(Rest,
2615		   State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
2616parse_entity_value(?STRING_REST("\t", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc)  ->
2617    parse_entity_value(Rest,
2618		   State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
2619parse_entity_value(?STRING_REST("&", Rest), State, Stop, Acc)  ->
2620    {Ref, Rest1, State1} = parse_reference(Rest, State, false),
2621    case Ref of
2622	{character, _, CharValue}  ->
2623	    parse_entity_value(Rest1, State1, Stop, [CharValue | Acc]);
2624	{internal_general, _, Name, _} ->
2625	    parse_entity_value(Rest1, State1, Stop, ";" ++ lists:reverse(Name) ++ "&" ++ Acc);
2626	{external_general, Name, _} ->
2627	    parse_entity_value(Rest1, State1, Stop, ";" ++ lists:reverse(Name) ++ "&" ++ Acc);
2628	{not_found, Name} ->
2629	    parse_entity_value(Rest1, State1, Stop, ";" ++ lists:reverse(Name) ++ "&" ++ Acc);
2630	{unparsed, Name, _} ->
2631	    ?fatal_error(State1, "Unparsed entity reference in entity value: " ++ Name)
2632    end;
2633parse_entity_value(?STRING_REST("%", Rest), #xmerl_sax_parser_state{file_type=Type} = State, Stop, Acc) ->
2634    {Ref, Rest1, State1} = parse_pe_reference(Rest, State),
2635    case Type of
2636	normal -> %WFC: PEs in Internal Subset
2637	    {_, Name, _} = Ref,
2638	    ?fatal_error(State1, "A parameter reference may not occur not within "
2639			 "markup declarations in the internal DTD subset: " ++ Name);
2640	_ ->
2641	    case Ref of
2642		{internal_parameter, _, RefValue} ->
2643		    IValue = ?TO_INPUT_FORMAT(" " ++ RefValue ++ " "),
2644		    parse_entity_value(?APPEND_STRING(IValue, Rest1), State1, Stop, Acc);
2645		{external_parameter, _, {_PubId, _SysId}} ->
2646		    ?fatal_error(State1, "Parameter references in entity value not supported yet.");
2647		{not_found, Name} ->
2648		    case State#xmerl_sax_parser_state.skip_external_dtd of
2649			false ->
2650			    ?fatal_error(State1, "Entity not declared: " ++ Name); %%VC: Entity Declared
2651			true ->
2652			    parse_entity_value(Rest1, State1, Stop, ";" ++ lists:reverse(Name) ++ "&" ++ Acc)
2653		    end
2654
2655	    end
2656    end;
2657parse_entity_value(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) ->
2658    {lists:reverse(Acc), Rest, State};
2659parse_entity_value(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc)   ->
2660    if
2661	?is_char(C) ->
2662	    parse_entity_value(Rest, State, Stop, [C|Acc]);
2663	true ->
2664	     ?fatal_error(State, lists:flatten(io_lib:format("Bad character in entity value: ~p", [C])))
2665    end;
2666parse_entity_value(Bytes, State, Stop, Acc)   ->
2667    unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_entity_value/4],
2668			     undefined).
2669
2670%%----------------------------------------------------------------------
2671%% Function  : parse_ndata_decl(Rest, State) -> Result
2672%% Parameters: Rest = string() | binary()
2673%%             State = #xmerl_sax_parser_state{}
2674%% Result    : {Name, Rest, State}
2675%%             Name = string()
2676%% Description: Parse an NDATA declaration.
2677%%              [76] NDataDecl ::= S 'NDATA' S Name
2678%%----------------------------------------------------------------------
2679parse_ndata_decl(?STRING_EMPTY, State) ->
2680    cf(?STRING_EMPTY, State, fun parse_ndata_decl/2);
2681parse_ndata_decl(?STRING_REST(">", Rest), State) ->
2682    {undefined, Rest, State};
2683parse_ndata_decl(?STRING("N") = Bytes, State) ->
2684    cf(Bytes, State, fun parse_ndata_decl/2);
2685parse_ndata_decl(?STRING("ND") = Bytes, State) ->
2686    cf(Bytes, State, fun parse_ndata_decl/2);
2687parse_ndata_decl(?STRING("NDA") = Bytes, State) ->
2688    cf(Bytes, State, fun parse_ndata_decl/2);
2689parse_ndata_decl(?STRING("NDAT") = Bytes, State) ->
2690    cf(Bytes, State, fun parse_ndata_decl/2);
2691parse_ndata_decl(?STRING_REST("NDATA", Rest), State) ->
2692    parse_ndata_decl_1(Rest, State);
2693parse_ndata_decl(Bytes, State) ->
2694    unicode_incomplete_check([Bytes, State, fun parse_ndata_decl/2],
2695			     "NDATA or > expected").
2696
2697
2698parse_ndata_decl_1(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
2699    {_WS, Rest, State1} = whitespace(Bytes, State, []),
2700    parse_ndecl_name(Rest, State1);
2701parse_ndata_decl_1(Bytes, State) ->
2702    unicode_incomplete_check([Bytes, State, fun parse_ndata_decl/2],
2703				     "whitespace expected").
2704
2705
2706parse_ndecl_name(?STRING_UNBOUND_REST(C, Rest), State) ->
2707    case is_name_start(C) of
2708	true ->
2709	    {Name, Rest1, State1} = parse_name(Rest, State, [C]),
2710	    {_WS, Rest2, State2} = whitespace(Rest1, State1, []),
2711	    {Rest3, State3} = parse_def_end(Rest2, State2),
2712	    {Name, Rest3, State3};
2713	false ->
2714	    ?fatal_error(State, "name expected")
2715    end;
2716parse_ndecl_name(Bytes, State) ->
2717    unicode_incomplete_check([Bytes, State, fun parse_ndecl_name/2],
2718			     undefined).
2719
2720%%----------------------------------------------------------------------
2721%% Function  : parse_pe_def(Rest, State, Name) -> Result
2722%% Parameters: Rest = string() | binary()
2723%%             State = #xmerl_sax_parser_state{}
2724%%             Name = string()
2725%% Result    : {Rest, State}
2726%% Description: Parse an parameter entity definition.
2727%%              [74] PEDef ::= EntityValue | ExternalID
2728%%----------------------------------------------------------------------
2729parse_pe_def(?STRING_EMPTY, State, Name) ->
2730    cf(?STRING_EMPTY, State, Name, fun parse_pe_def/3);
2731parse_pe_def(?STRING_UNBOUND_REST(C, Rest), State, Name) when C == $'; C == $" ->
2732    {Value, Rest1, State1} = parse_entity_value(Rest, State, C, []),
2733    Name1 = "%" ++ Name,
2734    State2 = insert_reference(Name1, {internal_parameter, Value},
2735                              State1),
2736    State3 =  event_callback({internalEntityDecl, Name1, Value}, State2),
2737    {_WS, Rest2, State4} = whitespace(Rest1, State3, []),
2738    parse_def_end(Rest2, State4);
2739parse_pe_def(?STRING_UNBOUND_REST(C, _) = Bytes, State, Name) when C == $S; C == $P  ->
2740    {PubId, SysId, Rest1, State1} = parse_external_id(Bytes, State, false),
2741    Name1 = "%" ++ Name,
2742    State2 = insert_reference(Name1, {external_parameter, {PubId, SysId}},
2743                              State1),
2744    State3 =  event_callback({externalEntityDecl, Name1, PubId, SysId}, State2),
2745    {_WS, Rest2, State4} = whitespace(Rest1, State3, []),
2746    parse_def_end(Rest2, State4);
2747parse_pe_def(Bytes, State, Name) ->
2748    unicode_incomplete_check([Bytes, State, Name, fun parse_pe_def/3],
2749			     "\", \', SYSTEM or PUBLIC expected").
2750
2751
2752%%----------------------------------------------------------------------
2753%% Function  : parse_notation_decl(Rest, State) -> Result
2754%% Parameters: Rest = string() | binary()
2755%%             State = #xmerl_sax_parser_state{}
2756%% Result    : {Rest, State}
2757%% Description: Parse a NOTATION declaration.
2758%%              [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
2759%%----------------------------------------------------------------------
2760parse_notation_decl(?STRING_EMPTY, State) ->
2761    cf(?STRING_EMPTY, State, fun parse_notation_decl/2);
2762parse_notation_decl(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
2763    {_WS, Rest, State1} = whitespace(Bytes, State, []),
2764    parse_notation_decl_1(Rest, State1);
2765parse_notation_decl(Bytes, State) ->
2766    unicode_incomplete_check([Bytes, State, fun parse_notation_decl/2],
2767			     "whitespace expected").
2768
2769
2770parse_notation_decl_1(?STRING_UNBOUND_REST(C, Rest), State) ->
2771    case is_name_start(C) of
2772	true ->
2773	    {Name, Rest1, State1} = parse_name(Rest, State, [C]),
2774	    {PubId, SysId, Rest2, State2} = parse_notation_id(Rest1, State1),
2775	    State3 =  event_callback({notationDecl, Name, PubId, SysId}, State2),
2776	    {Rest2, State3};
2777	false ->
2778	    ?fatal_error(State, "name expected")
2779    end;
2780parse_notation_decl_1(Bytes, State) ->
2781    unicode_incomplete_check([Bytes, State, fun parse_notation_decl_1/2],
2782			     undefined).
2783
2784%%----------------------------------------------------------------------
2785%% Function  : parse_notation_id(Rest, State) -> Result
2786%% Parameters: Rest = string() | binary()
2787%%             State = #xmerl_sax_parser_state{}
2788%% Result    : {PubId, SysId, Rest, State}
2789%%             PubId = string()
2790%%             SysId = string()
2791%% Description: Parse a NOTATION identity. The public id case is a special
2792%%              variant of extenal id where just the public part is allowed.
2793%%              This is allowed if the third parameter in parse_external_id/3
2794%%              is true.
2795%%              [83] PublicID ::= 'PUBLIC' S PubidLiteral
2796%%----------------------------------------------------------------------
2797parse_notation_id(?STRING_EMPTY, State) ->
2798    cf(?STRING_EMPTY, State, fun parse_notation_id/2);
2799%parse_notation_id(?STRING_REST(">", Rest), State)  ->
2800%    {"", "", Rest, State};
2801parse_notation_id(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
2802    {_WS, Rest, State1} = whitespace(Bytes, State, []),
2803    parse_notation_id_1(Rest, State1);
2804parse_notation_id(Bytes, State) ->
2805    unicode_incomplete_check([Bytes, State, fun parse_notation_id/2],
2806			     "whitespace expected").
2807
2808%%----------------------------------------------------------------------
2809%% Function  : parse_notation_id_1(Rest, State) -> Result
2810%% Parameters: Rest = string() | binary()
2811%%             State = #xmerl_sax_parser_state{}
2812%% Result    : {PubId, SysId, Rest, State}
2813%%             PubId = string()
2814%%             SysId = string()
2815%% Description: Parse a NOTATION identity.
2816%%----------------------------------------------------------------------
2817parse_notation_id_1(?STRING_EMPTY, State) ->
2818    cf(?STRING_EMPTY, State, fun parse_notation_id_1/2);
2819parse_notation_id_1(?STRING_UNBOUND_REST(C, _) = Bytes, State) when C == $S; C == $P ->
2820    {PubId, SysId, Rest1, State1} = parse_external_id(Bytes, State, true),
2821    {_WS, Rest2, State2} = whitespace(Rest1, State1, []),
2822    {Rest3, State3} = parse_def_end(Rest2, State2),
2823    {PubId, SysId, Rest3, State3};
2824%parse_notation_id_1(?STRING_REST(">", Rest), State) ->
2825%    {"", "", Rest, State};
2826parse_notation_id_1(Bytes, State) ->
2827    unicode_incomplete_check([Bytes, State, fun parse_notation_id_1/2],
2828			     "external id or public id expected").
2829
2830
2831%%======================================================================
2832%% Character checks and definitions
2833%%======================================================================
2834
2835%%----------------------------------------------------------------------
2836%% Definitions of the first 256 characters
2837%% 0 - not classified,
2838%% 1 - base_char or ideographic,
2839%% 2 - combining_char or digit or extender,
2840%% 3 - $. or $- or $_ or $:
2841%%----------------------------------------------------------------------
2842-define(SMALL, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2843                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,0,2,2,2,2,2,2,2,2,2,2,3,0,
2844                0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2845                1,0,0,0,0,3,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2846                1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2847                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2848                0,0,0,2,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2849                1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2850                1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1}).
2851
2852
2853%%----------------------------------------------------------------------
2854%% Function  : is_name_start(Char) -> Result
2855%% Parameters: Char = char()
2856%% Result    : true | false
2857%% Description: Check if character is a valid start of a name.
2858%%              [5] Name ::= (Letter | '_' | ':') (NameChar)*
2859%%----------------------------------------------------------------------
2860is_name_start($_) ->
2861    true;
2862is_name_start($:) ->
2863    true;
2864is_name_start(C) ->
2865    is_letter(C).
2866
2867
2868%%----------------------------------------------------------------------
2869%% Function  : is_name_start(Char) -> Result
2870%% Parameters: Char = char()
2871%% Result    : true | false
2872%% Description: Check if character is a valid name character.
2873%%              [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':'
2874%%                               | CombiningChar | Extender
2875%%----------------------------------------------------------------------
2876is_name_char(C) ->
2877    try element(C, ?SMALL) > 0
2878	catch  _:_ ->
2879		       case is_letter(C) of
2880			   true ->
2881			       true;
2882			   false ->
2883			       case is_digit(C) of
2884				   true -> true;
2885				   false ->
2886				       case is_combining_char(C) of
2887					   true -> true;
2888					   false ->
2889					       is_extender(C)
2890				       end
2891			       end
2892		       end
2893	       end.
2894
2895
2896%%----------------------------------------------------------------------
2897%% Function  : is_pubid_char(Char) -> Result
2898%% Parameters: Char = char()
2899%% Result    : true | false
2900%% Description: Check if character is a public identity character.
2901%%              [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9]
2902%%                                 | [-'()+,./:=?;!*#@$_%]
2903%%----------------------------------------------------------------------
2904is_pubid_char(?space) ->
2905    true;
2906is_pubid_char(?cr) ->
2907    true;
2908is_pubid_char(?lf) ->
2909    true;
2910is_pubid_char($!) ->
2911    true;
2912is_pubid_char($:) ->
2913    true;
2914is_pubid_char($;) ->
2915    true;
2916is_pubid_char($=) ->
2917    true;
2918is_pubid_char($@) ->
2919    true;
2920is_pubid_char($_) ->
2921    true;
2922is_pubid_char(C) when $# =< C, C =< $% ->
2923    true;
2924is_pubid_char(C) when $' =< C, C =< $/ ->
2925    true;
2926is_pubid_char(C) ->
2927    case is_letter(C) of
2928	true ->
2929	    true;
2930	false ->
2931	    is_digit(C)
2932    end.
2933
2934
2935%%----------------------------------------------------------------------
2936%% Function  : is_letter(Char) -> Result
2937%% Parameters: Char = char()
2938%% Result    : true | false
2939%% Description: Check if character is a letter.
2940%%              [84] Letter ::= BaseChar | Ideographic
2941%%----------------------------------------------------------------------
2942is_letter(C) ->
2943    try element(C, ?SMALL) =:= 1
2944    catch _:_ ->
2945        case is_base_char(C) of
2946	    false ->
2947	        is_ideographic(C);
2948    	    true ->
2949	        true
2950        end
2951    end.
2952
2953
2954%%----------------------------------------------------------------------
2955%% Function  : is_letter(Char) -> Result
2956%% Parameters: Char = char()
2957%% Result    : true | false
2958%% Description: Check if character is a basic character.
2959%%              [85] BaseChar
2960%%----------------------------------------------------------------------
2961is_base_char(C) when C >= 16#0041, C =< 16#005A -> true; %% ASCII Latin
2962is_base_char(C) when C >= 16#0061, C =< 16#007A -> true;
2963is_base_char(C) when C >= 16#00C0, C =< 16#00D6 -> true; %% ISO Latin
2964is_base_char(C) when C >= 16#00D8, C =< 16#00F6 -> true;
2965is_base_char(C) when C >= 16#00F8, C =< 16#00FF -> true;
2966is_base_char(C) when C >= 16#0100, C =< 16#0131 -> true; %% Accented Latin
2967is_base_char(C) when C >= 16#0134, C =< 16#013E -> true;
2968is_base_char(C) when C >= 16#0141, C =< 16#0148 -> true;
2969is_base_char(C) when C >= 16#014A, C =< 16#017E -> true;
2970is_base_char(C) when C >= 16#0180, C =< 16#01C3 -> true;
2971is_base_char(C) when C >= 16#01CD, C =< 16#01F0 -> true;
2972is_base_char(C) when C >= 16#01F4, C =< 16#01F5 -> true;
2973is_base_char(C) when C >= 16#01FA, C =< 16#0217 -> true;
2974is_base_char(C) when C >= 16#0250, C =< 16#02A8 -> true; %% IPA
2975is_base_char(C) when C >= 16#02BB, C =< 16#02C1 -> true; %% Spacing Modifiers
2976is_base_char(16#0386) -> true;                           %% Greek
2977is_base_char(C) when C >= 16#0388, C =< 16#038A -> true;
2978is_base_char(16#038C) -> true;
2979is_base_char(C) when C >= 16#038E, C =< 16#03A1 -> true;
2980is_base_char(C) when C >= 16#03A3, C =< 16#03CE -> true;
2981is_base_char(C) when C >= 16#03D0, C =< 16#03D6 -> true;
2982is_base_char(16#03DA) -> true;
2983is_base_char(16#03DC) -> true;
2984is_base_char(16#03DE) -> true;
2985is_base_char(16#03E0) -> true;
2986is_base_char(C) when C >= 16#03E2, C =< 16#03F3 -> true;
2987is_base_char(C) when C >= 16#0401, C =< 16#040C -> true; %% Cyrillic
2988is_base_char(C) when C >= 16#040E, C =< 16#044F -> true;
2989is_base_char(C) when C >= 16#0451, C =< 16#045C -> true;
2990is_base_char(C) when C >= 16#045E, C =< 16#0481 -> true;
2991is_base_char(C) when C >= 16#0490, C =< 16#04C4 -> true;
2992is_base_char(C) when C >= 16#04C7, C =< 16#04C8 -> true;
2993is_base_char(C) when C >= 16#04CB, C =< 16#04CC -> true;
2994is_base_char(C) when C >= 16#04D0, C =< 16#04EB -> true;
2995is_base_char(C) when C >= 16#04EE, C =< 16#04F5 -> true;
2996is_base_char(C) when C >= 16#04F8, C =< 16#04F9 -> true;
2997is_base_char(C) when C >= 16#0531, C =< 16#0556 -> true; %% Armenian
2998is_base_char(16#0559) -> true;
2999is_base_char(C) when C >= 16#0561, C =< 16#0586 -> true;
3000is_base_char(C) when C >= 16#05D0, C =< 16#05EA -> true; %% Hebrew
3001is_base_char(C) when C >= 16#05F0, C =< 16#05F2 -> true;
3002is_base_char(C) when C >= 16#0621, C =< 16#063A -> true; %% Arabic
3003is_base_char(C) when C >= 16#0641, C =< 16#064A -> true;
3004is_base_char(C) when C >= 16#0671, C =< 16#06B7 -> true;
3005is_base_char(C) when C >= 16#06BA, C =< 16#06BE -> true;
3006is_base_char(C) when C >= 16#06C0, C =< 16#06CE -> true;
3007is_base_char(C) when C >= 16#06D0, C =< 16#06D3 -> true;
3008is_base_char(16#06D5) -> true;
3009is_base_char(C) when C >= 16#06E5, C =< 16#06E6 -> true;
3010is_base_char(C) when C >= 16#0905, C =< 16#0939 -> true; %% Devanagari
3011is_base_char(16#093D) -> true;
3012is_base_char(C) when C >= 16#0958, C =< 16#0961 -> true;
3013is_base_char(C) when C >= 16#0985, C =< 16#098C -> true; %% Bengali
3014is_base_char(C) when C >= 16#098F, C =< 16#0990 -> true;
3015is_base_char(C) when C >= 16#0993, C =< 16#09A8 -> true;
3016is_base_char(C) when C >= 16#09AA, C =< 16#09B0 -> true;
3017is_base_char(16#09B2) -> true;
3018is_base_char(C) when C >= 16#09B6, C =< 16#09B9 -> true;
3019is_base_char(C) when C >= 16#09DC, C =< 16#09DD -> true;
3020is_base_char(C) when C >= 16#09DF, C =< 16#09E1 -> true;
3021is_base_char(C) when C >= 16#09F0, C =< 16#09F1 -> true;
3022is_base_char(C) when C >= 16#0A05, C =< 16#0A0A -> true; %% Gurmukhi
3023is_base_char(C) when C >= 16#0A0F, C =< 16#0A10 -> true;
3024is_base_char(C) when C >= 16#0A13, C =< 16#0A28 -> true;
3025is_base_char(C) when C >= 16#0A2A, C =< 16#0A30 -> true;
3026is_base_char(C) when C >= 16#0A32, C =< 16#0A33 -> true;
3027is_base_char(C) when C >= 16#0A35, C =< 16#0A36 -> true;
3028is_base_char(C) when C >= 16#0A38, C =< 16#0A39 -> true;
3029is_base_char(C) when C >= 16#0A59, C =< 16#0A5C -> true;
3030is_base_char(16#0A5E) -> true;
3031is_base_char(C) when C >= 16#0A72, C =< 16#0A74 -> true;
3032is_base_char(C) when C >= 16#0A85, C =< 16#0A8B -> true; %% Gujarati
3033is_base_char(16#0A8D) -> true;
3034is_base_char(C) when C >= 16#0A8F, C =< 16#0A91 -> true;
3035is_base_char(C) when C >= 16#0A93, C =< 16#0AA8 -> true;
3036is_base_char(C) when C >= 16#0AAA, C =< 16#0AB0 -> true;
3037is_base_char(C) when C >= 16#0AB2, C =< 16#0AB3 -> true;
3038is_base_char(C) when C >= 16#0AB5, C =< 16#0AB9 -> true;
3039is_base_char(16#0ABD) -> true;
3040is_base_char(16#0AE0) -> true;
3041is_base_char(C) when C >= 16#0B05, C =< 16#0B0C -> true; %% Oriya
3042is_base_char(C) when C >= 16#0B0F, C =< 16#0B10 -> true;
3043is_base_char(C) when C >= 16#0B13, C =< 16#0B28 -> true;
3044is_base_char(C) when C >= 16#0B2A, C =< 16#0B30 -> true;
3045is_base_char(C) when C >= 16#0B32, C =< 16#0B33 -> true;
3046is_base_char(C) when C >= 16#0B36, C =< 16#0B39 -> true;
3047is_base_char(16#0B3D) -> true;
3048is_base_char(C) when C >= 16#0B5C, C =< 16#0B5D -> true;
3049is_base_char(C) when C >= 16#0B5F, C =< 16#0B61 -> true;
3050is_base_char(C) when C >= 16#0B85, C =< 16#0B8A -> true; %% Tamil
3051is_base_char(C) when C >= 16#0B8E, C =< 16#0B90 -> true;
3052is_base_char(C) when C >= 16#0B92, C =< 16#0B95 -> true;
3053is_base_char(C) when C >= 16#0B99, C =< 16#0B9A -> true;
3054is_base_char(16#0B9C) -> true;
3055is_base_char(C) when C >= 16#0B9E, C =< 16#0B9F -> true;
3056is_base_char(C) when C >= 16#0BA3, C =< 16#0BA4 -> true;
3057is_base_char(C) when C >= 16#0BA8, C =< 16#0BAA -> true;
3058is_base_char(C) when C >= 16#0BAE, C =< 16#0BB5 -> true;
3059is_base_char(C) when C >= 16#0BB7, C =< 16#0BB9 -> true;
3060is_base_char(C) when C >= 16#0C05, C =< 16#0C0C -> true; %% Telugu
3061is_base_char(C) when C >= 16#0C0E, C =< 16#0C10 -> true;
3062is_base_char(C) when C >= 16#0C12, C =< 16#0C28 -> true;
3063is_base_char(C) when C >= 16#0C2A, C =< 16#0C33 -> true;
3064is_base_char(C) when C >= 16#0C35, C =< 16#0C39 -> true;
3065is_base_char(C) when C >= 16#0C60, C =< 16#0C61 -> true;
3066is_base_char(C) when C >= 16#0C85, C =< 16#0C8C -> true; %% Kannada
3067is_base_char(C) when C >= 16#0C8E, C =< 16#0C90 -> true;
3068is_base_char(C) when C >= 16#0C92, C =< 16#0CA8 -> true;
3069is_base_char(C) when C >= 16#0CAA, C =< 16#0CB3 -> true;
3070is_base_char(C) when C >= 16#0CB5, C =< 16#0CB9 -> true;
3071is_base_char(16#0CDE) -> true;
3072is_base_char(C) when C >= 16#0CE0, C =< 16#0CE1 -> true;
3073is_base_char(C) when C >= 16#0D05, C =< 16#0D0C -> true; %% Malayalam
3074is_base_char(C) when C >= 16#0D0E, C =< 16#0D10 -> true;
3075is_base_char(C) when C >= 16#0D12, C =< 16#0D28 -> true;
3076is_base_char(C) when C >= 16#0D2A, C =< 16#0D39 -> true;
3077is_base_char(C) when C >= 16#0D60, C =< 16#0D61 -> true;
3078is_base_char(C) when C >= 16#0E01, C =< 16#0E2E -> true; %% Thai
3079is_base_char(16#0E30) -> true;
3080is_base_char(C) when C >= 16#0E32, C =< 16#0E33 -> true;
3081is_base_char(C) when C >= 16#0E40, C =< 16#0E45 -> true;
3082is_base_char(C) when C >= 16#0E81, C =< 16#0E82 -> true; %% Lao
3083is_base_char(16#0E84) -> true;
3084is_base_char(C) when C >= 16#0E87, C =< 16#0E88 -> true;
3085is_base_char(16#0E8A) -> true;
3086is_base_char(16#0E8D) -> true;
3087is_base_char(C) when C >= 16#0E94, C =< 16#0E97 -> true;
3088is_base_char(C) when C >= 16#0E99, C =< 16#0E9F -> true;
3089is_base_char(C) when C >= 16#0EA1, C =< 16#0EA3 -> true;
3090is_base_char(16#0EA5) -> true;
3091is_base_char(16#0EA7) -> true;
3092is_base_char(C) when C >= 16#0EAA, C =< 16#0EAB -> true;
3093is_base_char(C) when C >= 16#0EAD, C =< 16#0EAE -> true;
3094is_base_char(16#0EB0) -> true;
3095is_base_char(C) when C >= 16#0EB2, C =< 16#0EB3 -> true;
3096is_base_char(16#0EBD) -> true;
3097is_base_char(C) when C >= 16#0EC0, C =< 16#0EC4 -> true;
3098is_base_char(C) when C >= 16#0F40, C =< 16#0F47 -> true; %% Tibetan
3099is_base_char(C) when C >= 16#0F49, C =< 16#0F69 -> true;
3100is_base_char(C) when C >= 16#10A0, C =< 16#10C5 -> true; %% Hangul Jamo
3101is_base_char(C) when C >= 16#10D0, C =< 16#10F6 -> true;
3102is_base_char(16#1100) -> true;
3103is_base_char(C) when C >= 16#1102, C =< 16#1103 -> true;
3104is_base_char(C) when C >= 16#1105, C =< 16#1107 -> true;
3105is_base_char(16#1109) -> true;
3106is_base_char(C) when C >= 16#110B, C =< 16#110C -> true;
3107is_base_char(C) when C >= 16#110E, C =< 16#1112 -> true;
3108is_base_char(16#113C) -> true;
3109is_base_char(16#113E) -> true;
3110is_base_char(16#1140) -> true;
3111is_base_char(16#114C) -> true;
3112is_base_char(16#114E) -> true;
3113is_base_char(16#1150) -> true;
3114is_base_char(C) when C >= 16#1154, C =< 16#1155 -> true;
3115is_base_char(16#1159) -> true;
3116is_base_char(C) when C >= 16#115F, C =< 16#1161 -> true;
3117is_base_char(16#1163) -> true;
3118is_base_char(16#1165) -> true;
3119is_base_char(16#1167) -> true;
3120is_base_char(16#1169) -> true;
3121is_base_char(C) when C >= 16#116D, C =< 16#116E -> true;
3122is_base_char(C) when C >= 16#1172, C =< 16#1173 -> true;
3123is_base_char(16#1175) -> true;
3124is_base_char(16#119E) -> true;
3125is_base_char(16#11A8) -> true;
3126is_base_char(16#11AB) -> true;
3127is_base_char(C) when C >= 16#11AE, C =< 16#11AF -> true;
3128is_base_char(C) when C >= 16#11B7, C =< 16#11B8 -> true;
3129is_base_char(16#11BA) -> true;
3130is_base_char(C) when C >= 16#11BC, C =< 16#11C2 -> true;
3131is_base_char(16#11EB) -> true;
3132is_base_char(16#11F0) -> true;
3133is_base_char(16#11F9) -> true;
3134is_base_char(C) when C >= 16#1E00, C =< 16#1E9B -> true; %% Latin Extended Additional
3135is_base_char(C) when C >= 16#1EA0, C =< 16#1EF9 -> true;
3136is_base_char(C) when C >= 16#1F00, C =< 16#1F15 -> true; %% Greek Extended
3137is_base_char(C) when C >= 16#1F18, C =< 16#1F1D -> true;
3138is_base_char(C) when C >= 16#1F20, C =< 16#1F45 -> true;
3139is_base_char(C) when C >= 16#1F48, C =< 16#1F4D -> true;
3140is_base_char(C) when C >= 16#1F50, C =< 16#1F57 -> true;
3141is_base_char(16#1F59) -> true;
3142is_base_char(16#1F5B) -> true;
3143is_base_char(16#1F5D) -> true;
3144is_base_char(C) when C >= 16#1F5F, C =< 16#1F7D -> true;
3145is_base_char(C) when C >= 16#1F80, C =< 16#1FB4 -> true;
3146is_base_char(C) when C >= 16#1FB6, C =< 16#1FBC -> true;
3147is_base_char(16#1FBE) -> true;
3148is_base_char(C) when C >= 16#1FC2, C =< 16#1FC4 -> true;
3149is_base_char(C) when C >= 16#1FC6, C =< 16#1FCC -> true;
3150is_base_char(C) when C >= 16#1FD0, C =< 16#1FD3 -> true;
3151is_base_char(C) when C >= 16#1FD6, C =< 16#1FDB -> true;
3152is_base_char(C) when C >= 16#1FE0, C =< 16#1FEC -> true;
3153is_base_char(C) when C >= 16#1FF2, C =< 16#1FF4 -> true;
3154is_base_char(C) when C >= 16#1FF6, C =< 16#1FFC -> true;
3155is_base_char(16#2126) -> true;                           %% Letterlike Symbols
3156is_base_char(C) when C >= 16#212A, C =< 16#212B -> true;
3157is_base_char(16#212E) -> true;
3158is_base_char(C) when C >= 16#2180, C =< 16#2182 -> true; %% Number Forms
3159is_base_char(C) when C >= 16#3041, C =< 16#3094 -> true; %% Hiragana
3160is_base_char(C) when C >= 16#30A1, C =< 16#30FA -> true; %% Katakana
3161is_base_char(C) when C >= 16#3105, C =< 16#312C -> true; %% Bopomofo
3162is_base_char(C) when C >= 16#ac00, C =< 16#d7a3 -> true; %% Hangul Syllables
3163is_base_char(_) ->
3164    false.
3165
3166%%----------------------------------------------------------------------
3167%% Function  : is_ideographic(Char) -> Result
3168%% Parameters: Char = char()
3169%% Result    : true | false
3170%% Description: Check if character is an ideographic letter.
3171%%              [86] Ideographic
3172%%----------------------------------------------------------------------
3173is_ideographic(C) when C >= 16#4e00, C =< 16#9fa5 -> true; %% Unified CJK Ideographs
3174is_ideographic(16#3007) -> true;                           %% CJK Symbols and Punctuation
3175is_ideographic(C) when C >= 16#3021, C =< 16#3029 -> true;
3176is_ideographic(_) ->
3177    false.
3178
3179%%----------------------------------------------------------------------
3180%% Function  : is_ideographic(Char) -> Result
3181%% Parameters: Char = char()
3182%% Result    : true | false
3183%% Description: Check if character is a combining character.
3184%% [87] CombiningChar
3185%%----------------------------------------------------------------------
3186is_combining_char(C) when C >= 16#0300, C =< 16#0345 -> true; %% Combining Diacritics
3187is_combining_char(C) when C >= 16#0360, C =< 16#0361 -> true;
3188is_combining_char(C) when C >= 16#0483, C =< 16#0486 -> true; %% Cyrillic Combining Diacritics
3189is_combining_char(C) when C >= 16#0591, C =< 16#05a1 -> true; %% Hebrew Combining Diacritics
3190is_combining_char(C) when C >= 16#05a3, C =< 16#05b9 -> true;
3191is_combining_char(C) when C >= 16#05bb, C =< 16#05bd -> true;
3192is_combining_char(16#05bf) -> true;
3193is_combining_char(C) when C >= 16#05c1, C =< 16#05c2 -> true;
3194is_combining_char(16#05c4) -> true;
3195is_combining_char(C) when C >= 16#064b, C =< 16#0652 -> true; %% Arabic Combining Diacritics
3196is_combining_char(16#0670) -> true;
3197is_combining_char(C) when C >= 16#06d6, C =< 16#06dc -> true;
3198is_combining_char(C) when C >= 16#06dd, C =< 16#06df -> true;
3199is_combining_char(C) when C >= 16#06e0, C =< 16#06e4 -> true;
3200is_combining_char(C) when C >= 16#06e7, C =< 16#06e8 -> true;
3201is_combining_char(C) when C >= 16#06ea, C =< 16#06ed -> true;
3202is_combining_char(C) when C >= 16#0901, C =< 16#0903 -> true; %% Devanagari Combining Diacritics
3203is_combining_char(16#093c) -> true;
3204is_combining_char(C) when C >= 16#093e, C =< 16#094c -> true;
3205is_combining_char(16#094d) -> true;
3206is_combining_char(C) when C >= 16#0951, C =< 16#0954 -> true;
3207is_combining_char(C) when C >= 16#0962, C =< 16#0963 -> true;
3208is_combining_char(C) when C >= 16#0981, C =< 16#0983 -> true; %% Bengali Combining Diacritics
3209is_combining_char(16#09bc) -> true;
3210is_combining_char(16#09be) -> true;
3211is_combining_char(16#09bf) -> true;
3212is_combining_char(C) when C >= 16#09c0, C =< 16#09c4 -> true;
3213is_combining_char(C) when C >= 16#09c7, C =< 16#09c8 -> true;
3214is_combining_char(C) when C >= 16#09cb, C =< 16#09cd -> true;
3215is_combining_char(16#09d7) -> true;
3216is_combining_char(C) when C >= 16#09e2, C =< 16#09e3 -> true;
3217is_combining_char(16#0a02) -> true;                           %% Gurmukhi Combining Diacritics
3218is_combining_char(16#0a3c) -> true;
3219is_combining_char(16#0a3e) -> true;
3220is_combining_char(16#0a3f) -> true;
3221is_combining_char(C) when C >= 16#0a40, C =< 16#0a42 -> true;
3222is_combining_char(C) when C >= 16#0a47, C =< 16#0a48 -> true;
3223is_combining_char(C) when C >= 16#0a4b, C =< 16#0a4d -> true;
3224is_combining_char(C) when C >= 16#0a70, C =< 16#0a71 -> true;
3225is_combining_char(C) when C >= 16#0a81, C =< 16#0a83 -> true; %% Gujarati Combining Diacritics
3226is_combining_char(16#0abc) -> true;
3227is_combining_char(C) when C >= 16#0abe, C =< 16#0ac5 -> true;
3228is_combining_char(C) when C >= 16#0ac7, C =< 16#0ac9 -> true;
3229is_combining_char(C) when C >= 16#0acb, C =< 16#0acd -> true;
3230is_combining_char(C) when C >= 16#0b01, C =< 16#0b03 -> true; %% Oriya Combining Diacritics
3231is_combining_char(16#0b3c) -> true;
3232is_combining_char(C) when C >= 16#0b3e, C =< 16#0b43 -> true;
3233is_combining_char(C) when C >= 16#0b47, C =< 16#0b48 -> true;
3234is_combining_char(C) when C >= 16#0b4b, C =< 16#0b4d -> true;
3235is_combining_char(C) when C >= 16#0b56, C =< 16#0b57 -> true;
3236is_combining_char(C) when C >= 16#0b82, C =< 16#0b83 -> true; %% Tamil Combining Diacritics
3237is_combining_char(C) when C >= 16#0bbe, C =< 16#0bc2 -> true;
3238is_combining_char(C) when C >= 16#0bc6, C =< 16#0bc8 -> true;
3239is_combining_char(C) when C >= 16#0bca, C =< 16#0bcd -> true;
3240is_combining_char(16#0bd7) -> true;
3241is_combining_char(C) when C >= 16#0c01, C =< 16#0c03 -> true; %% Telugu Combining Diacritics
3242is_combining_char(C) when C >= 16#0c3e, C =< 16#0c44 -> true;
3243is_combining_char(C) when C >= 16#0c46, C =< 16#0c48 -> true;
3244is_combining_char(C) when C >= 16#0c4a, C =< 16#0c4d -> true;
3245is_combining_char(C) when C >= 16#0c55, C =< 16#0c56 -> true;
3246is_combining_char(C) when C >= 16#0c82, C =< 16#0c83 -> true; %% Kannada Combining Diacritics
3247is_combining_char(C) when C >= 16#0cbe, C =< 16#0cc4 -> true;
3248is_combining_char(C) when C >= 16#0cc6, C =< 16#0cc8 -> true;
3249is_combining_char(C) when C >= 16#0cca, C =< 16#0ccd -> true;
3250is_combining_char(C) when C >= 16#0cd5, C =< 16#0cd6 -> true;
3251is_combining_char(C) when C >= 16#0d02, C =< 16#0d03 -> true; %% Malayalam Combining Diacritics
3252is_combining_char(C) when C >= 16#0d3e, C =< 16#0d43 -> true;
3253is_combining_char(C) when C >= 16#0d46, C =< 16#0d48 -> true;
3254is_combining_char(C) when C >= 16#0d4a, C =< 16#0d4d -> true;
3255is_combining_char(16#0d57) -> true;
3256is_combining_char(16#0e31) -> true;                           %% Thai Combining Diacritics
3257is_combining_char(C) when C >= 16#0e34, C =< 16#0e3a -> true;
3258is_combining_char(C) when C >= 16#0e47, C =< 16#0e4e -> true;
3259is_combining_char(16#0eb1) -> true;                           %% Lao Combining Diacritics
3260is_combining_char(C) when C >= 16#0eb4, C =< 16#0eb9 -> true;
3261is_combining_char(C) when C >= 16#0ebb, C =< 16#0ebc -> true;
3262is_combining_char(C) when C >= 16#0ec8, C =< 16#0ecd -> true;
3263is_combining_char(C) when C >= 16#0f18, C =< 16#0f19 -> true; %% Tibetan Combining Diacritics
3264is_combining_char(16#0f35) -> true;
3265is_combining_char(16#0f37) -> true;
3266is_combining_char(16#0f39) -> true;
3267is_combining_char(16#0f3e) -> true;
3268is_combining_char(16#0f3f) -> true;
3269is_combining_char(C) when C >= 16#0f71, C =< 16#0f84 -> true;
3270is_combining_char(C) when C >= 16#0f86, C =< 16#0f8b -> true;
3271is_combining_char(C) when C >= 16#0f90, C =< 16#0f95 -> true;
3272is_combining_char(16#0f97) -> true;
3273is_combining_char(C) when C >= 16#0f99, C =< 16#0fad -> true;
3274is_combining_char(C) when C >= 16#0fb1, C =< 16#0fb7 -> true;
3275is_combining_char(16#0fb9) -> true;
3276is_combining_char(C) when C >= 16#20d0, C =< 16#20dc -> true; %% Math/Technical Combining Diacritics
3277is_combining_char(16#20e1) -> true;
3278is_combining_char(C) when C >= 16#302a, C =< 16#302f -> true; %% Ideographic Diacritics
3279is_combining_char(16#3099) -> true;                           %% Hiragana/Katakana Combining Diacritics
3280is_combining_char(16#309a) -> true;
3281is_combining_char(_) -> false.
3282
3283
3284%%----------------------------------------------------------------------
3285%% Function  : is_digit(Char) -> Result
3286%% Parameters: Char = char()
3287%% Result    : true | false
3288%% Description: Check if character is a digit.
3289%%              [88] Digit
3290%%----------------------------------------------------------------------
3291is_digit(C) when C >= 16#0030, C =< 16#0039 -> true; %% Basic ASCII digits 0-9
3292is_digit(C) when C >= 16#0660, C =< 16#0669 -> true; %% Arabic Digits 0-9
3293is_digit(C) when C >= 16#06F0, C =< 16#06F9 -> true; %% Eastern Arabic-Indic Digits 0-9
3294is_digit(C) when C >= 16#0966, C =< 16#096f -> true; %% Devanagari Digits 0-9
3295is_digit(C) when C >= 16#09e6, C =< 16#09ef -> true; %% Bengali Digits 0-9
3296is_digit(C) when C >= 16#0a66, C =< 16#0a6f -> true; %% Gurmukhi Digits 0-9
3297is_digit(C) when C >= 16#0ae6, C =< 16#0aef -> true; %% Gujarati Digits 0-9
3298is_digit(C) when C >= 16#0b66, C =< 16#0b6f -> true; %% Oriya Digits 0-9
3299is_digit(C) when C >= 16#0be7, C =< 16#0bef -> true; %% Tamil Digits 0-9
3300is_digit(C) when C >= 16#0c66, C =< 16#0c6f -> true; %% Telugu Digits 0-9
3301is_digit(C) when C >= 16#0ce6, C =< 16#0cef -> true; %% Kannada Digits 0-9
3302is_digit(C) when C >= 16#0d66, C =< 16#0d6f -> true; %% Malayalam Digits 0-9
3303is_digit(C) when C >= 16#0e50, C =< 16#0e59 -> true; %% Thai Digits 0-9
3304is_digit(C) when C >= 16#0ed0, C =< 16#0ed9 -> true; %% Lao Digits 0-9
3305is_digit(C) when C >= 16#0f20, C =< 16#0f29 -> true; %% Tibetan Digits 0-9
3306is_digit(_) -> false.
3307
3308
3309%%----------------------------------------------------------------------
3310%% Function  : is_extender(Char) -> Result
3311%% Parameters: Char = char()
3312%% Result    : true | false
3313%% Description: Check if character is an extender character.
3314%%              [89] Extender
3315%%----------------------------------------------------------------------
3316is_extender(16#00b7) -> true;                           %% Middle Dot
3317is_extender(16#02d0) -> true;                           %% Triangular Colon and Half Colon
3318is_extender(16#02d1) -> true;
3319is_extender(16#0387) -> true;                           %% Greek Ano Teleia
3320is_extender(16#0640) -> true;                           %% Arabic Tatweel
3321is_extender(16#0e46) -> true;                           %% Thai Maiyamok
3322is_extender(16#0ec6) -> true;                           %% Lao Ko La
3323is_extender(16#3005) -> true;                           %% Ideographic Iteration Mark
3324is_extender(C) when C >= 16#3031, C =< 16#3035 -> true; %% Japanese Kana Repetition Marks
3325is_extender(C) when C >= 16#309d, C =< 16#309e -> true; %% Japanese Hiragana Iteration Marks
3326is_extender(C) when C >= 16#30fc, C =< 16#30fe -> true; %% Japanese Kana Iteration Marks
3327is_extender(_) -> false.
3328
3329
3330
3331%%======================================================================
3332%% Callback and Continuation function handling
3333%%======================================================================
3334%%----------------------------------------------------------------------
3335%% Function  : event_callback(Event, State) -> Result
3336%% Parameters: Event = term()
3337%%             State = #xmerl_sax_parser_state{}
3338%% Result    : #xmerl_sax_parser_state{}
3339%% Description: Function that uses provided fun to send parser events.
3340%%----------------------------------------------------------------------
3341event_callback(Event,
3342	       #xmerl_sax_parser_state{
3343		 event_fun=CbFun,
3344		 event_state=EventState,
3345		 line_no=N,
3346		 entity=E,
3347		 current_location=L
3348		} = State) ->
3349    try
3350	NewEventState = CbFun(Event, {L, E, N}, EventState),
3351	State#xmerl_sax_parser_state{event_state=NewEventState}
3352    catch
3353	throw:ErrorTerm ->
3354	    throw({event_receiver_error, State, ErrorTerm});
3355	  exit:Reason ->
3356	    throw({event_receiver_error, State, {'EXIT', Reason}})
3357    end.
3358
3359%%----------------------------------------------------------------------
3360%% Function  : cf(Rest, State, NextCall) -> Result
3361%% Parameters: Rest = string() | binary()
3362%%             State = #xmerl_sax_parser_state{}
3363%%             NextCall = fun()
3364%% Result    : {Rest, State}
3365%% Description: Function that uses provided fun to read another chunk from
3366%%              input stream and calls the fun in NextCall.
3367%%----------------------------------------------------------------------
3368cf(_Rest, #xmerl_sax_parser_state{continuation_fun = undefined} = State, _) ->
3369    ?fatal_error(State, "Continuation function undefined");
3370cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = CState} = State,
3371   NextCall) ->
3372    Result =
3373	try
3374	    CFun(CState)
3375	catch
3376	    throw:ErrorTerm ->
3377		?fatal_error(State, ErrorTerm);
3378            exit:Reason ->
3379		?fatal_error(State, {'EXIT', Reason})
3380	end,
3381    case Result of
3382	{?STRING_EMPTY, _} ->
3383	    ?fatal_error(State, "No more bytes");
3384	{NewBytes, NewContState} ->
3385	    NextCall(?APPEND_STRING(Rest, NewBytes),
3386		     State#xmerl_sax_parser_state{continuation_state = NewContState})
3387    end.
3388
3389%%----------------------------------------------------------------------
3390%% Function  : cf(Rest, State, NextCall, P) -> Result
3391%% Parameters: Rest = string() | binary()
3392%%             State = #xmerl_sax_parser_state{}
3393%%             NextCall = fun()
3394%%             P = term()
3395%% Result    : {Rest, State}
3396%% Description: Function that uses provided fun to read another chunk from
3397%%              input stream and calls the fun in NextCall with P as last parameter.
3398%%----------------------------------------------------------------------
3399cf(_Rest, #xmerl_sax_parser_state{continuation_fun = undefined} = State, _P, _) ->
3400    ?fatal_error(State, "Continuation function undefined");
3401cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = CState} = State,
3402   P, NextCall) ->
3403    Result =
3404	    try
3405		CFun(CState)
3406	    catch
3407		throw:ErrorTerm ->
3408		    ?fatal_error(State, ErrorTerm);
3409		  exit:Reason ->
3410		    ?fatal_error(State, {'EXIT', Reason})
3411	    end,
3412    case Result of
3413	{?STRING_EMPTY,  _} ->
3414	    ?fatal_error(State, "No more bytes");
3415	{NewBytes, NewContState} ->
3416	    NextCall(?APPEND_STRING(Rest, NewBytes),
3417		     State#xmerl_sax_parser_state{continuation_state = NewContState},
3418		     P)
3419    end.
3420
3421
3422%%----------------------------------------------------------------------
3423%% Function  : cf(Rest, State, P1, P2, NextCall) -> Result
3424%% Parameters: Rest = string() | binary()
3425%%             State = #xmerl_sax_parser_state{}
3426%%             NextCall = fun()
3427%%             P1 = term()
3428%%             P2 = term()
3429%% Result    : {Rest, State}
3430%% Description: Function that uses provided fun to read another chunk from
3431%%              input stream and calls the fun in NextCall with P1 and
3432%%              P2 as last parameters.
3433%%----------------------------------------------------------------------
3434cf(_Rest, #xmerl_sax_parser_state{continuation_fun = undefined} = State, _P1, _P2, _) ->
3435    ?fatal_error(State, "Continuation function undefined");
3436cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = CState} = State,
3437   P1, P2, NextCall) ->
3438    Result =
3439	    try
3440		CFun(CState)
3441	    catch
3442		throw:ErrorTerm ->
3443		    ?fatal_error(State, ErrorTerm);
3444		  exit:Reason ->
3445		    ?fatal_error(State, {'EXIT', Reason})
3446	    end,
3447    case Result of
3448	{?STRING_EMPTY,  _} ->
3449	    ?fatal_error(State, "No more bytes");
3450	{NewBytes, NewContState} ->
3451	    NextCall(?APPEND_STRING(Rest, NewBytes),
3452		     State#xmerl_sax_parser_state{continuation_state = NewContState},
3453		     P1, P2)
3454    end.
3455
3456
3457
3458%%----------------------------------------------------------------------
3459%% Function  : unicode_incomplete_check(Args, ErrString) -> Result
3460%% Parameters: Args = [Bytes, State | RestOfArgs]
3461%%             Bytes = string() | binary()
3462%%             State = #xmerl_sax_parser_state{}
3463%%             RestOfArgs =
3464%%             ErrString = string()
3465%% Result    : {Rest, State}
3466%% Description:
3467%%----------------------------------------------------------------------
3468unicode_incomplete_check([Bytes, #xmerl_sax_parser_state{encoding=Enc} = State | _] = Args, ErrString) when is_binary(Bytes) ->
3469    case unicode:characters_to_list(Bytes, Enc) of
3470	{incomplete, _, _} ->
3471	    apply(?MODULE, cf, Args);
3472	{error, _Encoded, _Rest} ->
3473	    ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc])));
3474	_ when ErrString =/= undefined ->
3475	    ?fatal_error(State, ErrString)
3476  end;
3477unicode_incomplete_check([Bytes,State | _], ErrString) when is_list(Bytes), ErrString =/= undefined ->
3478    ?fatal_error(State, ErrString).
3479
3480
3481%%----------------------------------------------------------------------
3482%% Function  : check_uri(Uri, CL) -> Result
3483%% Parameters: Uri = string()
3484%%             CL = string()
3485%% Result    : {atom(), string()}
3486%% Description:
3487%%----------------------------------------------------------------------
3488check_uri("http://" ++ _ = Url, _CL) ->
3489    {http, Url};
3490check_uri("file://" ++ Path, _CL) ->
3491    {file, Path};
3492check_uri(Path, CL) -> % ordinary filepath other URI's not supported yet
3493    %% "file://" already removed when current_location set
3494    Tag = get_uri_tag(CL),
3495    case filename:pathtype(Path) of
3496	relative ->
3497	    case Tag of
3498		false ->
3499		    {file, filename:join(CL, Path)};
3500		T ->
3501		    {T, CL ++ "/" ++ Path}
3502	    end;
3503	absolute ->
3504	    case Tag of
3505		false ->
3506		    {file, filename:absname(Path)};
3507		T ->
3508		    {T, CL ++ "/" ++ Path}
3509	    end;
3510	volumerelative -> % only windows
3511	    case Tag of
3512		false ->
3513		    [Vol | _] = re:split(CL, ":", [{return,list}]),
3514		    {file, filename:join(Vol ++ ":", Path)};
3515		T ->
3516		    {T, CL ++ "/" ++ Path}
3517	    end
3518    end.
3519
3520%%----------------------------------------------------------------------
3521%% Function  : get_uri_tag(Uri) -> Result
3522%% Parameters: Uri = string()
3523%% Result    : true |false
3524%% Description: http / file is the only supported URI for the moment
3525%%----------------------------------------------------------------------
3526get_uri_tag(Uri) ->
3527    case re:split(Uri, "://", [{return,list}]) of
3528	[Tag, _] ->
3529	    list_to_atom(Tag);
3530	[_] ->
3531	    false
3532    end.
3533
3534%%----------------------------------------------------------------------
3535%% Function  : http_get_file(Host, Port, Key) -> Result
3536%% Parameters: Host = string()
3537%%             Port = integer()
3538%%             Key = string()
3539%% Result    : string()
3540%% Description:
3541%%----------------------------------------------------------------------
3542http_get_file(Host, Port, Key) ->
3543    ConnectTimeOut = 10000,
3544    SendTimeout = 10000,
3545    FilenameTempl = filename:basename(Key),
3546
3547    {Filename, FD} = create_tempfile(FilenameTempl),
3548    Socket = create_connection(Host, Port, ConnectTimeOut),
3549    Request = "GET " ++ Key ++ " HTTP/1.0\r\n\r\n",
3550
3551    case gen_tcp:send(Socket, Request) of
3552	ok ->
3553	    try
3554		receive_msg(Socket, FD, true, SendTimeout)
3555	    catch
3556		throw:{error, Error} ->
3557		    ok = file:close(FD),
3558		    ok = file:delete(Filename),
3559		    throw({error, Error})
3560	    end;
3561	{error, _Reason} ->
3562	    ok = file:close(FD),
3563	    ok = file:delete(Filename),
3564	    throw({error, lists:flatten(io_lib:format("Couldn't fetch http://~s:~p/~s",
3565						      [Host, Port, Key]))})
3566    end,
3567    ok = file:close(FD),
3568    Filename.
3569
3570%%----------------------------------------------------------------------
3571%% Function  : receive_msg(Socket, FD, WaitForHeader, Timeout) -> Result
3572%% Parameters: Socket = io_device()
3573%%             FD = io_device()
3574%%             WaitForHeader = boolean()
3575%%             Timeout = integer()
3576%% Result    : ok
3577%% Description:
3578%%----------------------------------------------------------------------
3579receive_msg(Socket, FD, WaitForHeader, Timeout) ->
3580    receive
3581	{tcp_closed, Socket} ->
3582	    ok;
3583	{tcp, Socket, Response} when WaitForHeader == false  ->
3584	    ok = file:write(FD, Response),
3585	    receive_msg(Socket, FD, WaitForHeader, Timeout);
3586	{tcp, Socket, Response} ->
3587	    MsgBody = remove_header(Response),
3588	    ok = file:write(FD, MsgBody),
3589	    receive_msg(Socket, FD, false, Timeout);
3590	{tcp_error, Socket, _Reason} ->
3591	    gen_tcp:close(Socket),
3592	    throw({error, "http connection failed"})
3593    after Timeout ->
3594	    gen_tcp:close(Socket),
3595	    throw({error, "http connection timedout"})
3596    end.
3597
3598
3599remove_header(<<"\r\n\r\n", MsgBody/binary>>) ->
3600    MsgBody;
3601remove_header(<<_C, Rest/binary>>) ->
3602    remove_header(Rest).
3603
3604%%----------------------------------------------------------------------
3605%% Function  : create_connection(Host, Port, Timeout) -> Result
3606%% Parameters: Host = string()
3607%%             Port = integer()
3608%%             Timeout = integer()
3609%% Result    : io_device()
3610%% Description:
3611%%----------------------------------------------------------------------
3612create_connection(Host, Port, Timeout) ->
3613    case gen_tcp:connect(Host, Port,[{packet,0}, binary, {reuseaddr,true}], Timeout) of
3614	{ok,Socket} ->
3615	    Socket;
3616	{error, Reason} ->
3617	    throw({error, lists:flatten(io_lib:format("Can't connect to ~s:~p ~p\n",
3618						      [Host, Port, Reason]))})
3619    end.
3620
3621%%----------------------------------------------------------------------
3622%% Function  : http(Url) -> Result
3623%% Parameters: Url = string()
3624%% Result    : {Host, PortInt, Key}
3625%% Description:
3626%%----------------------------------------------------------------------
3627http("http://" ++ Address) ->
3628    case string:tokens(Address, ":") of
3629	[Host, Rest] ->
3630	    %% At his stage we know that address contains a Port number.
3631	    {Port, Key} = split_to_slash(Rest, []),
3632	    case catch list_to_integer(Port) of
3633		PortInt when is_integer(PortInt) ->
3634		    {Host, PortInt, Key};
3635		_ ->
3636		    throw({error, "Malformed key; port not an integer, should be http://Host:Port/path or http://Host/path"})
3637	    end;
3638	[Address] ->
3639	    %% Use default port
3640	    {Host, Key} = split_to_slash(Address, []),
3641	    {Host, ?HTTP_DEF_PORT, Key};
3642	_What ->
3643	    throw({error, "Malformed key; should be http://Host:Port/path or http://Host/path"})
3644    end.
3645
3646%%----------------------------------------------------------------------
3647%% Function  : split_to_slash(String, Acc) -> Result
3648%% Parameters: String = string()
3649%%             Acc = string()
3650%% Result    : {string(), string()}
3651%% Description:
3652%%----------------------------------------------------------------------
3653split_to_slash([], _Acc) ->
3654    throw({error, "No Key given Host:Port/Key"});
3655split_to_slash([$/|Rest], Acc) ->
3656    {lists:reverse(Acc), [$/|Rest]};
3657split_to_slash([H|T], Acc) ->
3658    split_to_slash(T, [H|Acc]).
3659
3660
3661%%----------------------------------------------------------------------
3662%% Function  : create_tempfile(Template) -> Result
3663%% Parameters: Template = string()
3664%% Result    : string()
3665%% Description:
3666%%----------------------------------------------------------------------
3667create_tempfile(Template) ->
3668    TmpDir =
3669	case os:type() of
3670	    {unix, _} ->
3671		case file:read_file_info("/tmp") of
3672		    {ok, _} ->
3673			"/tmp";
3674		    {error,enoent} ->
3675			throw({error, "/tmp doesn't exist"})
3676		end;
3677	    {win32, _} ->
3678		case os:getenv("TMP") of
3679		    false ->
3680			case os:getenv("TEMP") of
3681			    false ->
3682				throw({error, "Variabel TMP or TEMP doesn't exist"});
3683			    P2 ->
3684				P2
3685			end;
3686		    P1 ->
3687			P1
3688		end
3689	end,
3690    TmpNameBase = filename:join([TmpDir, os:getpid() ++ Template ++ "."]),
3691    create_tempfile_1(TmpNameBase, 1).
3692
3693create_tempfile_1(TmpNameBase, N) ->
3694    FileName = TmpNameBase ++ integer_to_list(N),
3695    case file:open(FileName, [write, binary])  of
3696	{error, _Reason} ->
3697	    create_tempfile_1(TmpNameBase, N+1);
3698	{ok, FD} ->
3699	    {FileName, FD}
3700    end.
3701
3702
3703%%----------------------------------------------------------------------
3704%% Function  : filter_endtag_stack(EndTagStack) -> Result
3705%% Parameters: EndTagStack = [{term(), string(), string(),
3706%%                             term(), nslist(), nslist()}]
3707%% Result    : [string()]
3708%% Description: Returns a stack with just local names.
3709%%----------------------------------------------------------------------
3710filter_endtag_stack(EndTagStack) ->
3711    filter_endtag_stack(EndTagStack,[]).
3712
3713filter_endtag_stack([], Acc) ->
3714    lists:reverse(Acc);
3715filter_endtag_stack([{_,_,N,_,_,_}| Ts], Acc) ->
3716    filter_endtag_stack(Ts, [N |Acc]).
3717
3718
3719%%----------------------------------------------------------------------
3720%% Function  : format_error(Tag, State, Reason) -> Result
3721%% Parameters: Tag = atom(),
3722%%             State = xmerl_sax_parser_state()
3723%%             Reason = string()
3724%% Result    : {atom(), {string(), string(), integer()}, string(), [string()], event_state()}
3725%% Description: Format the resulting error tuple
3726%%----------------------------------------------------------------------
3727format_error(Tag, State, Reason) ->
3728    {Tag,
3729     {
3730       State#xmerl_sax_parser_state.current_location,
3731       State#xmerl_sax_parser_state.entity,
3732       State#xmerl_sax_parser_state.line_no
3733      },
3734     Reason,
3735     filter_endtag_stack(State#xmerl_sax_parser_state.end_tags),
3736     State#xmerl_sax_parser_state.event_state}.
3737
3738