1/*  $Id$
2
3    Part of SWI-Prolog
4
5    Author:        Jan Wielemaker & Richard O'Keefe
6    E-mail:        wielemaker@science.uva.nl
7    WWW:           http://www.swi-prolog.org
8    Copyright (C): 1985-2004, University of Amsterdam
9
10    This program is free software; you can redistribute it and/or
11    modify it under the terms of the GNU General Public License
12    as published by the Free Software Foundation; either version 2
13    of the License, or (at your option) any later version.
14
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License for more details.
19
20    You should have received a copy of the GNU Lesser General Public
21    License along with this library; if not, write to the Free Software
22    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
23
24    As a special exception, if you link this library with other files,
25    compiled with a Free Software compiler, to produce an executable, this
26    library does not by itself cause the resulting executable to be covered
27    by the GNU General Public License. This exception does not however
28    invalidate any other reasons why the executable file might be covered by
29    the GNU General Public License.
30*/
31
32:- module(sgml_write,
33	  [ html_write/2,       	%          +Data, +Options
34	    html_write/3,		% +Stream, +Data, +Options
35	    sgml_write/2,		%          +Data, +Options
36	    sgml_write/3,		% +Stream, +Data, +Options
37	    xml_write/2,		%          +Data, +Options
38	    xml_write/3			% +Stream, +Data, +Options
39	  ]).
40:- use_module(library(lists)).
41:- use_module(library(sgml)).
42:- use_module(library(debug)).
43:- use_module(library(assoc)).
44:- use_module(library(option)).
45:- use_module(library(error)).
46
47/** <module> XML/SGML writer module
48
49This library provides the inverse functionality   of  the sgml.pl parser
50library, writing XML, SGML and HTML documents from the parsed output. It
51is intended to allow rewriting in a  different dialect or encoding or to
52perform document transformation in Prolog on the parsed representation.
53
54The current implementation is  particularly   keen  on getting character
55encoding and the use of character  entities   right.  Some work has been
56done providing layout, but space handling in   XML  and SGML make this a
57very hazardous area.
58
59The Prolog-based low-level character and  escape   handling  is the real
60bottleneck in this library and will probably be   moved  to C in a later
61stage.
62
63@see	library(http/html_write) provides a high-level library for
64	emitting HTML and XHTML.
65*/
66
67%%	xml_write(+Data, +Options) is det.
68%%	sgml_write(+Data, +Options) is det.
69%%	html_write(+Data, +Options) is det.
70%%	xml_write(+Stream, +Data, +Options) is det.
71%%	sgml_write(+Stream, +Data, +Options) is det.
72%%	html_write(+Stream, +Data, +Options) is det.
73%
74%	Write a term as created by the SGML/XML parser to a stream in
75%	SGML or XML format.  Options:
76%
77%		* dtd(DTD)
78%		The DTD.  This is needed for SGML documents that contain
79%		elements with content model EMPTY.  Characters which may
80%		not be written directly in the Stream's encoding will be
81%		written using character data entities from the DTD if at
82%		all possible, otherwise as numeric character references.
83%		Note that the DTD will NOT be written out at all; as yet
84%		there is no way to write out an internal subset,  though
85%		it would not be hard to add one.
86%
87%		* doctype(DocType)
88%		Document type for the SGML document type declaration.
89%		If omitted it is taken from the root element.  There is
90%		never any point in having this be disagree with the
91%		root element.  A <!DOCTYPE> declaration will be written
92%		if and only if at least one of doctype(_), public(_), or
93%		system(_) is provided in Options.
94%
95%		* public(PubId)
96%		The public identifier to be written in the <!DOCTYPE> line.
97%
98%		* system(SysId)
99%		The system identifier to be written in the <!DOCTYPE> line.
100%
101%		* header(Bool)
102%		If Bool is 'false', do not emit the <xml ...> header
103%		line.  (xml_write/3 only)
104%
105%		* nsmap(Map:list(Id=URI))
106%		When emitting embedded XML, assume these namespaces
107%		are already defined from the environment.  (xml_write/3
108%		only).
109%
110%		* indent(Indent)
111%		Indentation of the document (for embedding)
112%
113%		* layout(Bool)
114%		Emit/do not emit layout characters to make output
115%		readable.
116%
117%		* net(Bool)
118%		Use/do not use Null End Tags.
119%		For XML, this applies only to empty elements, so you get
120%
121%		==
122%		    <foo/>	(default, net(true))
123%		    <foo></foo>	(net(false))
124%		==
125%
126%		For SGML, this applies to empty elements, so you get
127%
128%		==
129%		    <foo>	(if foo is declared to be EMPTY in the DTD)
130%		    <foo></foo>	(default, net(false))
131%		    <foo//	(net(true))
132%		==
133%
134%		and also to elements with character content not containing /
135%
136%		==
137%		    <b>xxx</b>	(default, net(false))
138%		    <b/xxx/	(net(true)).
139%		==
140%
141%	Note that if the stream is UTF-8, the system will write special
142%	characters as UTF-8 sequences, while if it is ISO Latin-1 it
143%	will use (character) entities if there is a DTD that provides
144%	them, otherwise it will use numeric character references.
145
146xml_write(Data, Options) :-
147	current_output(Stream),
148	xml_write(Stream, Data, Options).
149
150xml_write(Stream0, Data, Options) :-
151	fix_user_stream(Stream0, Stream),
152	(   stream_property(Stream, encoding(text))
153	->  set_stream(Stream, encoding(utf8)),
154	    call_cleanup(xml_write(Stream, Data, Options),
155			 set_stream(Stream, encoding(text)))
156	;   new_state(xml, State),
157	    init_state(Options, State),
158	    get_state(State, nsmap, NSMap),
159	    add_missing_namespaces(Data, NSMap, Data1),
160	    emit_xml_encoding(Stream, Options),
161	    emit_doctype(Options, Data, Stream),
162	    write_initial_indent(State, Stream),
163	    emit(Data1, Stream, State)
164	).
165
166
167sgml_write(Data, Options) :-
168	current_output(Stream),
169	sgml_write(Stream, Data, Options).
170
171sgml_write(Stream0, Data, Options) :-
172	fix_user_stream(Stream0, Stream),
173	(   stream_property(Stream, encoding(text))
174	->  set_stream(Stream, encoding(utf8)),
175	    call_cleanup(sgml_write(Stream, Data, Options),
176			 set_stream(Stream, encoding(text)))
177	;   new_state(sgml, State),
178	    init_state(Options, State),
179	    write_initial_indent(State, Stream),
180	    emit_doctype(Options, Data, Stream),
181	    emit(Data, Stream, State)
182	).
183
184
185html_write(Data, Options) :-
186	current_output(Stream),
187	html_write(Stream, Data, Options).
188
189html_write(Stream, Data, Options) :-
190	sgml_write(Stream, Data,
191		   [ dtd(html)
192		   | Options
193		   ]).
194
195fix_user_stream(user, user_output) :- !.
196fix_user_stream(Stream, Stream).
197
198
199init_state([], _).
200init_state([H|T], State) :-
201	update_state(H, State),
202	init_state(T, State).
203
204update_state(dtd(DTD), State) :- !,
205	(   atom(DTD)
206	->  dtd(DTD, DTDObj)
207	;   DTDObj = DTD
208	),
209	set_state(State, dtd, DTDObj),
210	dtd_character_entities(DTDObj, EntityMap),
211	set_state(State, entity_map, EntityMap).
212update_state(nsmap(Map), State) :- !,
213	set_state(State, nsmap, Map).
214update_state(indent(Indent), State) :- !,
215	must_be(integer, Indent),
216	set_state(State, indent, Indent).
217update_state(layout(Bool), State) :- !,
218	must_be(boolean, Bool),
219	set_state(State, layout, Bool).
220update_state(doctype(_), _) :- !.
221update_state(public(_),  _) :- !.
222update_state(system(_),  _) :- !.
223update_state(net(Bool), State) :- !,
224	must_be(boolean, Bool),
225	set_state(State, net, Bool).
226update_state(header(Bool), _) :- !,
227	must_be(boolean, Bool).
228update_state(Option, _) :-
229	domain_error(xml_write_option, Option).
230
231%	emit_xml_encoding(+Stream, +Options)
232%
233%	Emit the XML fileheader with   encoding information. Setting the
234%	right encoding on the output stream  must be done before calling
235%	xml_write/3.
236
237emit_xml_encoding(Out, Options) :-
238	option(header(Hdr), Options, true),
239	Hdr == true, !,
240	stream_property(Out, encoding(Encoding)),
241	(   (   Encoding == utf8
242	    ;	Encoding == wchar_t
243	    )
244	->  format(Out, '<?xml version="1.0" encoding="UTF-8"?>~n~n', [])
245	;   Encoding == iso_latin_1
246	->  format(Out, '<?xml version="1.0" encoding="ISO-8859-1"?>~n~n', [])
247	;   domain_error(xml_encoding, Encoding)
248	).
249emit_xml_encoding(_, _).
250
251
252%%	emit_doctype(+Options, +Data, +Stream)
253%
254%	Emit the document-type declaration.
255%	There is a problem with the first clause if we are emitting SGML:
256%	the SGML DTDs for HTML up to HTML 4 *do not allow* any 'version'
257%	attribute; so the only time this is useful is when it is illegal!
258
259emit_doctype(_Options, Data, Out) :-
260	(   memberchk(element(html,Att,_), Data)
261	;   Data = element(html,Att,_)
262	),
263	memberchk(version=Version, Att),
264	!,
265	format(Out, '<!DOCTYPE HTML PUBLIC "~w">~n~n', [Version]).
266emit_doctype(Options, Data, Out) :-
267	(   memberchk(public(PubId), Options) -> true
268	;   PubId = (-)
269	),
270	(   memberchk(system(SysId), Options) -> true
271	;   SysId = (-)
272	),
273	\+ (PubId == (-),
274	    SysId == (-),
275	    \+ memberchk(doctype(_), Options)
276	),
277	(   Data  =   element(DocType,_,_)
278	;   memberchk(element(DocType,_,_), Data)
279	;   memberchk(doctype(DocType), Options)
280	),
281	!,
282	write_doctype(Out, DocType, PubId, SysId).
283emit_doctype(_, _, _).
284
285write_doctype(Out, DocType, -, -) :- !,
286	format(Out, '<!DOCTYPE ~w []>~n~n', [DocType]).
287write_doctype(Out, DocType, -, SysId) :- !,
288	format(Out, '<!DOCTYPE ~w SYSTEM "~w">~n~n', [DocType,SysId]).
289write_doctype(Out, DocType, PubId, -) :- !,
290	format(Out, '<!DOCTYPE ~w PUBLIC "~w">~n~n', [DocType,PubId]).
291write_doctype(Out, DocType, PubId, SysId) :-
292	format(Out, '<!DOCTYPE ~w PUBLIC "~w" "~w">~n~n', [DocType,PubId,SysId]).
293
294
295%%	emit(+Element, +Out, +State, +Options)
296%
297%	Emit a single element
298
299emit([], _, _) :- !.
300emit([H|T], Out, State) :- !,
301	emit(H, Out, State),
302	emit(T, Out, State).
303emit(CDATA, Out, State) :-
304	atom(CDATA), !,
305	sgml_write_content(Out, CDATA, State).
306emit(Element, Out, State) :-
307	\+ \+ emit_element(Element, Out, State).
308
309emit_element(pi(PI), Out, State) :-
310	get_state(State, entity_map, EntityMap),
311	write(Out, <?),
312	write_quoted(Out, PI, "", EntityMap),
313	(   get_state(State, dialect, xml) ->
314	    write(Out, ?>)
315	;   write(Out, >)
316	).
317emit_element(element(Name, Attributes, Content), Out, State) :-
318	att_length(Attributes, State, Alen),
319	(   Alen > 60,
320	    get_state(State, layout, true)
321	->  Sep = nl,
322	    AttIndent = 4
323	;   Sep = sp,
324	    AttIndent = 0
325	),
326	(   get_state(State, dialect, xml)
327	->  update_nsmap(Attributes, State)
328	;   true
329	),
330	put_char(Out, '<'),
331	emit_name(Name, Out, State),
332	(   AttIndent > 0
333	->  \+ \+ ( inc_indent(State, AttIndent),
334	            attributes(Attributes, Sep, Out, State)
335		  )
336	;   attributes(Attributes, Sep, Out, State)
337	),
338	content(Content, Out, Name, State).
339
340attributes([], _, _, _).
341attributes([H|T], Sep, Out, State) :-
342	(   Sep == nl
343	->  write_indent(State, Out)
344	;   put_char(Out, ' ')
345	),
346	attribute(H, Out, State),
347	attributes(T, Sep, Out, State).
348
349attribute(Name=Value, Out, State) :-
350	emit_name(Name, Out, State),
351	put_char(Out, =),
352	sgml_write_attribute(Out, Value, State).
353
354att_length(Atts, State, Len) :-
355	att_length(Atts, State, 0, Len).
356
357att_length([], _, Len, Len).
358att_length([A0|T], State, Len0, Len) :-
359	alen(A0, State, AL),
360	Len1 is Len0 + 1 + AL,
361	att_length(T, State, Len1, Len).
362
363alen(URI:Name=Value, State, Len) :- !,
364	atom_length(Value, AL),
365	vlen(Name, NL),
366	get_state(State, nsmap, Nsmap),
367	(   memberchk(NS=URI, Nsmap)
368	->  atom_length(NS, NsL)
369	;   atom_length(URI, NsL)
370	),
371	Len is AL+NL+NsL+3.
372alen(Name=Value, _, Len) :-
373	atom_length(Name, NL),
374	vlen(Value, AL),
375	Len is AL+NL+3.
376
377vlen(Value, Len) :-
378	is_list(Value), !,
379	vlen_list(Value, 0, Len).
380vlen(Value, Len) :-
381	atom_length(Value, Len).
382
383vlen_list([], L, L).
384vlen_list([H|T], L0, L) :-
385	atom_length(H, HL),
386	(   L0 == 0
387	->  L1 is L0 + HL
388	;   L1 is L0 + HL + 1
389	),
390	vlen_list(T, L1, L).
391
392
393emit_name(Name, Out, _) :-
394	atom(Name), !,
395	write(Out, Name).
396emit_name(URI:Name, Out, State) :-
397	get_state(State, nsmap, NSMap),
398	memberchk(NS=URI, NSMap), !,
399	(   NS == []
400	->  write(Out, Name)
401	;   format(Out, '~w:~w', [NS, Name])
402	).
403emit_name(Term, Out, _) :-
404	write(Out, Term).
405
406%%	update_nsmap(+Attributes, !State)
407%
408%	Modify the nsmap of State to reflect modifications due to xmlns
409%	arguments.
410
411update_nsmap(Attributes, State) :-
412	get_state(State, nsmap, Map0),
413	update_nsmap(Attributes, Map0, Map),
414	set_state(State, nsmap, Map).
415
416update_nsmap([], Map, Map).
417update_nsmap([xmlns:NS=URI|T], Map0, Map) :- !,
418	set_nsmap(NS, URI, Map0, Map1),
419	update_nsmap(T, Map1, Map).
420update_nsmap([xmlns=URI|T], Map0, Map) :- !,
421	set_nsmap([], URI, Map0, Map1),
422	update_nsmap(T, Map1, Map).
423update_nsmap([_|T], Map0, Map) :- !,
424	update_nsmap(T, Map0, Map).
425
426set_nsmap(NS, URI, Map0, Map) :-
427	select(NS=_, Map0, Map1), !,
428	Map = [NS=URI|Map1].
429set_nsmap(NS, URI, Map, [NS=URI|Map]).
430
431
432%%	content(+Content, +Out, +Element, +State, +Options)
433%
434%	Emit the content part of a structure  as well as the termination
435%	for the content. For empty content   we have three versions: XML
436%	style '/>', SGML declared EMPTY element (nothing) or normal SGML
437%	element (we must close with the same element name).
438
439content([], Out, Element, State) :- !,	% empty element
440    (   get_state(State, net, true)
441    ->  (   get_state(State, dialect, xml) ->
442            write(Out, />)
443	;   empty_element(State, Element) ->
444	    write(Out, >)
445	;   write(Out, //)
446	)
447    ;/* get_state(State, net, false) */
448	write(Out, >),
449	(   get_state(State, dialect, sgml),
450	    empty_element(State, Element)
451	->  true
452	;   emit_close(Element, Out, State)
453	)
454    ).
455content([Atom], Out, Element, State) :-
456	atom(Atom), !,
457	(   get_state(State, dialect, sgml),
458	    get_state(State, net, true),
459	    \+ sub_atom(Atom, _, _, _, /),
460	    atom_length(Atom, Len),
461	    Len < 20
462	->  write(Out, /),
463	    sgml_write_content(Out, Atom, State),
464	    write(Out, /)
465	;/* XML or not NET */
466	    write(Out, >),
467	    sgml_write_content(Out, Atom, State),
468	    emit_close(Element, Out, State)
469	).
470content(Content, Out, Element, State) :-
471	get_state(State, layout, true),
472	/* If xml:space='preserve' is present, */
473	/* we MUST NOT tamper with white space at all. */
474	\+ (Element = element(_,Atts,_),
475	    memberchk('xml:space'=preserve, Atts)
476	),
477	element_content(Content, Elements),
478	!,
479	format(Out, >, []),
480	\+ \+ (
481	    inc_indent(State),
482	    write_element_content(Elements, Out, State)
483	),
484	write_indent(State, Out),
485	emit_close(Element, Out, State).
486content(Content, Out, Element, State) :-
487	format(Out, >, []),
488	write_mixed_content(Content, Out, Element, State),
489	emit_close(Element, Out, State).
490
491
492emit_close(Element, Out, State) :-
493	write(Out, '</'),
494	emit_name(Element, Out, State),
495	write(Out, '>').
496
497
498write_mixed_content([], _, _, _).
499write_mixed_content([H|T], Out, Element, State) :-
500	write_mixed_content_element(H, Out, State),
501	write_mixed_content(T, Out, Element, State).
502
503write_mixed_content_element(H, Out, State) :-
504	(   atom(H)
505	->  sgml_write_content(Out, H, State)
506	;   functor(H, element, 3)
507	->  emit(H, Out, State)
508	;   functor(H, pi, 1)
509	->  emit(H, Out, State)
510	;   H = sdata(Data)		% cannot be written without entity!
511	->  print_message(warning, sgml_write(sdata_as_cdata(Data))),
512	    sgml_write_content(Out, Data, State)
513	;   assertion(fail)
514	).
515
516
517element_content([], []).
518element_content([element(Name,Atts,C)|T0], [element(Name,Atts,C)|T]) :- !,
519	element_content(T0, T).
520element_content([Blank|T0], T) :-
521	atom(Blank),
522	atom_codes(Blank, Codes),
523	all_blanks(Codes),
524	element_content(T0, T).
525
526all_blanks([]).
527all_blanks([H|T]) :-
528	code_type(H, space),
529	all_blanks(T).
530
531write_element_content([], _, _).
532write_element_content([H|T], Out, State) :-
533	write_indent(State, Out),
534	emit(H, Out, State),
535	write_element_content(T, Out, State).
536
537
538		 /*******************************
539		 *	     NAMESPACES		*
540		 *******************************/
541
542%%	add_missing_namespaces(+DOM0, +NsMap, -DOM)
543%
544%	Add xmlns:NS=URI definitions to the toplevel element(s) to
545%	deal with missing namespaces.
546
547add_missing_namespaces([], _, []) :- !.
548add_missing_namespaces([H0|T0], Def, [H|T]) :- !,
549	add_missing_namespaces(H0, Def, H),
550	add_missing_namespaces(T0, Def, T).
551add_missing_namespaces(Elem0, Def, Elem) :-
552	Elem0 = element(Name, Atts0, Content), !,
553	missing_namespaces(Elem0, Def, Missing),
554	(   Missing == []
555	->  Elem = Elem0
556	;   add_missing_ns(Missing, Atts0, Atts),
557	    Elem = element(Name, Atts, Content)
558	).
559add_missing_namespaces(DOM, _, DOM).	% CDATA, etc.
560
561add_missing_ns([], Atts, Atts).
562add_missing_ns([H|T], Atts0, Atts) :-
563	generate_ns(H, NS),
564	add_missing_ns(T, [xmlns:NS=H|Atts0], Atts).
565
566%%	generate_ns(+URI, -NS) is det.
567%
568%	Generate a namespace (NS) identifier for URI.
569
570generate_ns(URI, NS) :-
571	default_ns(URI, NS), !.
572generate_ns(_, NS) :-
573	gensym(xns, NS).
574
575:- multifile
576	rdf_db:ns/2.
577
578default_ns('http://www.w3.org/2001/XMLSchema-instance', xsi).
579default_ns('http://www.w3.org/1999/xhtml', xhtml).
580default_ns(URI, NS) :-
581	rdf_db:ns(NS, URI).
582
583%%	missing_namespaces(+DOM, +NSMap, -Missing)
584%
585%	Return a list of URIs appearing in DOM that are not covered
586%	by xmlns definitions.
587
588missing_namespaces(DOM, Defined, Missing) :-
589	missing_namespaces(DOM, Defined, [], Missing).
590
591missing_namespaces([], _, L, L) :- !.
592missing_namespaces([H|T], Def, L0, L) :- !,
593	missing_namespaces(H, Def, L0, L1),
594	missing_namespaces(T, Def, L1, L).
595missing_namespaces(element(Name, Atts, Content), Def, L0, L) :- !,
596	update_nsmap(Atts, Def, Def1),
597	missing_ns(Name, Def1, L0, L1),
598	missing_att_ns(Atts, Def1, L1, L2),
599	missing_namespaces(Content, Def1, L2, L).
600missing_namespaces(_, _, L, L).
601
602missing_att_ns([], _, M, M).
603missing_att_ns([Name=_|T], Def, M0, M) :-
604	missing_ns(Name, Def, M0, M1),
605	missing_att_ns(T, Def, M1, M).
606
607missing_ns(URI:_, Def, M0, M) :- !,
608	(   (   memberchk(_=URI, Def)
609	    ;	memberchk(URI, M0)
610	    ;	URI = xml		% predefined ones
611	    ;	URI = xmlns
612	    )
613	->  M = M0
614	;   M = [URI|M0]
615	).
616missing_ns(_, _, M, M).
617
618		 /*******************************
619		 *	   QUOTED WRITE		*
620		 *******************************/
621
622sgml_write_attribute(Out, Values, State) :-
623	is_list(Values), !,
624	get_state(State, entity_map, EntityMap),
625	put_char(Out, '"'),
626	write_quoted_list(Values, Out, """<&>", EntityMap),
627	put_char(Out, '"').
628sgml_write_attribute(Out, Value, State) :-
629	get_state(State, entity_map, EntityMap),
630	put_char(Out, '"'),
631	write_quoted(Out, Value, """<&>", EntityMap),
632	put_char(Out, '"').
633
634write_quoted_list([], _, _, _).
635write_quoted_list([H|T], Out, Escape, EntityMap) :-
636	write_quoted(Out, H, Escape, EntityMap),
637	(   T == []
638	->  true
639	;   put_char(Out, ' '),
640	    write_quoted_list(T, Out, Escape, EntityMap)
641	).
642
643
644sgml_write_content(Out, Value, State) :-
645	get_state(State, entity_map, EntityMap),
646	write_quoted(Out, Value, "<&>", EntityMap).
647
648
649write_quoted(Out, Atom, Escape, EntityMap) :-
650	atom_codes(Atom, Codes),
651	writeq(Codes, Out, Escape, EntityMap).
652
653
654writeq([], _, _, _).
655writeq([H|T], Out, Escape, EntityMap) :-
656	(   memberchk(H, Escape)
657	->  write_entity(H, Out, EntityMap)
658	;   H >= 256
659	->  (   stream_property(Out, encoding(Enc)),
660		unicode_encoding(Enc)
661	    ->	put_code(Out, H)
662	    ;	write_entity(H, Out, EntityMap)
663	    )
664	;   put_code(Out, H)
665	),
666	writeq(T, Out, Escape, EntityMap).
667
668unicode_encoding(utf8).
669unicode_encoding(wchar_t).
670unicode_encoding(unicode_le).
671unicode_encoding(unicode_be).
672
673write_entity(Code, Out, EntityMap) :-
674	(   get_assoc(Code, EntityMap, EntityName)
675	->  format(Out, '&~w;', [EntityName])
676	;   format(Out, '&#~w;', [Code])
677	).
678
679
680		 /*******************************
681		 *	    INDENTATION		*
682		 *******************************/
683
684write_initial_indent(State, Out) :-
685	(   get_state(State, indent, Indent),
686	    Indent > 0
687	->  emit_indent(Indent, Out)
688	;   true
689	).
690
691write_indent(State, _) :-
692	get_state(State, layout, false), !.
693write_indent(State, Out) :-
694	get_state(State, indent, Indent),
695	emit_indent(Indent, Out).
696
697emit_indent(Indent, Out) :-
698	Tabs is Indent // 8,
699	Spaces is Indent mod 8,
700	format(Out, '~N', []),
701	write_n(Tabs, '\t', Out),
702	write_n(Spaces, ' ', Out).
703
704write_n(N, Char, Out) :-
705	(   N > 0
706	->  put_char(Out, Char),
707	    N2 is N - 1,
708	    write_n(N2, Char, Out)
709	;   true
710	).
711
712inc_indent(State) :-
713	inc_indent(State, 2).
714
715inc_indent(State, Inc) :-
716	state(indent, Arg),
717	arg(Arg, State, I0),
718	I is I0 + Inc,
719	setarg(Arg, State, I).
720
721
722		 /*******************************
723		 *	   DTD HANDLING		*
724		 *******************************/
725
726%%	empty_element(+State, +Element)
727%
728%	True if Element is declared  with   EMPTY  content in the (SGML)
729%	DTD.
730
731empty_element(State, Element) :-
732	get_state(State, dtd, DTD),
733	DTD \== (-),
734	dtd_property(DTD, element(Element, _, empty)).
735
736%%	dtd_character_entities(+DTD, -Map)
737%
738%	Return an assoc mapping character entities   to their name. Note
739%	that the entity representation is a bit dubious. Entities should
740%	allow for a wide-character version and avoid the &#..; trick.
741
742dtd_character_entities(DTD, Map) :-
743	empty_assoc(Empty),
744	dtd_property(DTD, entities(Entities)),
745	fill_entity_map(Entities, DTD, Empty, Map).
746
747fill_entity_map([], _, Map, Map).
748fill_entity_map([H|T], DTD, Map0, Map) :-
749	(   dtd_property(DTD, entity(H, CharEntity)),
750	    atom(CharEntity),
751	    (	sub_atom(CharEntity, 0, _, _, '&#'),
752		sub_atom(CharEntity, _, _, 0, ';')
753	    ->  sub_atom(CharEntity, 2, _, 1, Name),
754		atom_number(Name, Code)
755	    ;	atom_length(CharEntity, 1),
756		char_code(CharEntity, Code)
757	    )
758	->  put_assoc(Code, Map0, H, Map1),
759	    fill_entity_map(T, DTD, Map1, Map)
760	;   fill_entity_map(T, DTD, Map0, Map)
761	).
762
763
764
765		 /*******************************
766		 *	      FIELDS		*
767		 *******************************/
768
769state(indent,     1).			% current indentation
770state(layout,	  2).			% use layout (true/false)
771state(dtd,        3).			% DTD for entity names
772state(entity_map, 4).			% compiled entity-map
773state(dialect,	  5).			% xml/sgml
774state(nsmap,	  6).			% defined namespaces
775state(net,	  7).			% Should null end-tags be used?
776
777new_state(Dialect,
778    state(
779	0,		% indent
780	true,		% layout
781	-,		% DTD
782	EntityMap,	% entity_map
783	Dialect,	% dialect
784	[],		% NS=Full map
785	Net		% Null End-Tags?
786    )) :-
787	(   Dialect == sgml
788	->  Net = false,
789	    empty_assoc(EntityMap)
790	;   Net = true,
791	    xml_entities(EntityMap)
792	).
793
794get_state(State, Field, Value) :-
795	state(Field, Arg),
796	arg(Arg, State, Value).
797
798set_state(State, Field, Value) :-
799	state(Field, Arg),
800	setarg(Arg, State, Value).
801
802xml_entities(Map) :-
803	list_to_assoc([ 60 - lt,
804			61 - amp,
805			62 - gt,
806			39 - apos,
807			34 - quot
808		      ], Map).
809
810
811		 /*******************************
812		 *	      MESSAGES		*
813		 *******************************/
814
815:- multifile
816	prolog:message/3.
817
818prolog:message(sgml_write(sdata_as_cdata(Data))) -->
819	[ 'SGML-write: emitting SDATA as CDATA: "~p"'-[Data] ].
820