1/* $Id$ 2 3 Part of SWI-Prolog 4 5 Author: Jan Wielemaker & Richard O'Keefe 6 E-mail: wielemaker@science.uva.nl 7 WWW: http://www.swi-prolog.org 8 Copyright (C): 1985-2004, University of Amsterdam 9 10 This program is free software; you can redistribute it and/or 11 modify it under the terms of the GNU General Public License 12 as published by the Free Software Foundation; either version 2 13 of the License, or (at your option) any later version. 14 15 This program is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 GNU General Public License for more details. 19 20 You should have received a copy of the GNU Lesser General Public 21 License along with this library; if not, write to the Free Software 22 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 23 24 As a special exception, if you link this library with other files, 25 compiled with a Free Software compiler, to produce an executable, this 26 library does not by itself cause the resulting executable to be covered 27 by the GNU General Public License. This exception does not however 28 invalidate any other reasons why the executable file might be covered by 29 the GNU General Public License. 30*/ 31 32:- module(sgml_write, 33 [ html_write/2, % +Data, +Options 34 html_write/3, % +Stream, +Data, +Options 35 sgml_write/2, % +Data, +Options 36 sgml_write/3, % +Stream, +Data, +Options 37 xml_write/2, % +Data, +Options 38 xml_write/3 % +Stream, +Data, +Options 39 ]). 40:- use_module(library(lists)). 41:- use_module(library(sgml)). 42:- use_module(library(debug)). 43:- use_module(library(assoc)). 44:- use_module(library(option)). 45:- use_module(library(error)). 46 47/** <module> XML/SGML writer module 48 49This library provides the inverse functionality of the sgml.pl parser 50library, writing XML, SGML and HTML documents from the parsed output. It 51is intended to allow rewriting in a different dialect or encoding or to 52perform document transformation in Prolog on the parsed representation. 53 54The current implementation is particularly keen on getting character 55encoding and the use of character entities right. Some work has been 56done providing layout, but space handling in XML and SGML make this a 57very hazardous area. 58 59The Prolog-based low-level character and escape handling is the real 60bottleneck in this library and will probably be moved to C in a later 61stage. 62 63@see library(http/html_write) provides a high-level library for 64 emitting HTML and XHTML. 65*/ 66 67%% xml_write(+Data, +Options) is det. 68%% sgml_write(+Data, +Options) is det. 69%% html_write(+Data, +Options) is det. 70%% xml_write(+Stream, +Data, +Options) is det. 71%% sgml_write(+Stream, +Data, +Options) is det. 72%% html_write(+Stream, +Data, +Options) is det. 73% 74% Write a term as created by the SGML/XML parser to a stream in 75% SGML or XML format. Options: 76% 77% * dtd(DTD) 78% The DTD. This is needed for SGML documents that contain 79% elements with content model EMPTY. Characters which may 80% not be written directly in the Stream's encoding will be 81% written using character data entities from the DTD if at 82% all possible, otherwise as numeric character references. 83% Note that the DTD will NOT be written out at all; as yet 84% there is no way to write out an internal subset, though 85% it would not be hard to add one. 86% 87% * doctype(DocType) 88% Document type for the SGML document type declaration. 89% If omitted it is taken from the root element. There is 90% never any point in having this be disagree with the 91% root element. A <!DOCTYPE> declaration will be written 92% if and only if at least one of doctype(_), public(_), or 93% system(_) is provided in Options. 94% 95% * public(PubId) 96% The public identifier to be written in the <!DOCTYPE> line. 97% 98% * system(SysId) 99% The system identifier to be written in the <!DOCTYPE> line. 100% 101% * header(Bool) 102% If Bool is 'false', do not emit the <xml ...> header 103% line. (xml_write/3 only) 104% 105% * nsmap(Map:list(Id=URI)) 106% When emitting embedded XML, assume these namespaces 107% are already defined from the environment. (xml_write/3 108% only). 109% 110% * indent(Indent) 111% Indentation of the document (for embedding) 112% 113% * layout(Bool) 114% Emit/do not emit layout characters to make output 115% readable. 116% 117% * net(Bool) 118% Use/do not use Null End Tags. 119% For XML, this applies only to empty elements, so you get 120% 121% == 122% <foo/> (default, net(true)) 123% <foo></foo> (net(false)) 124% == 125% 126% For SGML, this applies to empty elements, so you get 127% 128% == 129% <foo> (if foo is declared to be EMPTY in the DTD) 130% <foo></foo> (default, net(false)) 131% <foo// (net(true)) 132% == 133% 134% and also to elements with character content not containing / 135% 136% == 137% <b>xxx</b> (default, net(false)) 138% <b/xxx/ (net(true)). 139% == 140% 141% Note that if the stream is UTF-8, the system will write special 142% characters as UTF-8 sequences, while if it is ISO Latin-1 it 143% will use (character) entities if there is a DTD that provides 144% them, otherwise it will use numeric character references. 145 146xml_write(Data, Options) :- 147 current_output(Stream), 148 xml_write(Stream, Data, Options). 149 150xml_write(Stream0, Data, Options) :- 151 fix_user_stream(Stream0, Stream), 152 ( stream_property(Stream, encoding(text)) 153 -> set_stream(Stream, encoding(utf8)), 154 call_cleanup(xml_write(Stream, Data, Options), 155 set_stream(Stream, encoding(text))) 156 ; new_state(xml, State), 157 init_state(Options, State), 158 get_state(State, nsmap, NSMap), 159 add_missing_namespaces(Data, NSMap, Data1), 160 emit_xml_encoding(Stream, Options), 161 emit_doctype(Options, Data, Stream), 162 write_initial_indent(State, Stream), 163 emit(Data1, Stream, State) 164 ). 165 166 167sgml_write(Data, Options) :- 168 current_output(Stream), 169 sgml_write(Stream, Data, Options). 170 171sgml_write(Stream0, Data, Options) :- 172 fix_user_stream(Stream0, Stream), 173 ( stream_property(Stream, encoding(text)) 174 -> set_stream(Stream, encoding(utf8)), 175 call_cleanup(sgml_write(Stream, Data, Options), 176 set_stream(Stream, encoding(text))) 177 ; new_state(sgml, State), 178 init_state(Options, State), 179 write_initial_indent(State, Stream), 180 emit_doctype(Options, Data, Stream), 181 emit(Data, Stream, State) 182 ). 183 184 185html_write(Data, Options) :- 186 current_output(Stream), 187 html_write(Stream, Data, Options). 188 189html_write(Stream, Data, Options) :- 190 sgml_write(Stream, Data, 191 [ dtd(html) 192 | Options 193 ]). 194 195fix_user_stream(user, user_output) :- !. 196fix_user_stream(Stream, Stream). 197 198 199init_state([], _). 200init_state([H|T], State) :- 201 update_state(H, State), 202 init_state(T, State). 203 204update_state(dtd(DTD), State) :- !, 205 ( atom(DTD) 206 -> dtd(DTD, DTDObj) 207 ; DTDObj = DTD 208 ), 209 set_state(State, dtd, DTDObj), 210 dtd_character_entities(DTDObj, EntityMap), 211 set_state(State, entity_map, EntityMap). 212update_state(nsmap(Map), State) :- !, 213 set_state(State, nsmap, Map). 214update_state(indent(Indent), State) :- !, 215 must_be(integer, Indent), 216 set_state(State, indent, Indent). 217update_state(layout(Bool), State) :- !, 218 must_be(boolean, Bool), 219 set_state(State, layout, Bool). 220update_state(doctype(_), _) :- !. 221update_state(public(_), _) :- !. 222update_state(system(_), _) :- !. 223update_state(net(Bool), State) :- !, 224 must_be(boolean, Bool), 225 set_state(State, net, Bool). 226update_state(header(Bool), _) :- !, 227 must_be(boolean, Bool). 228update_state(Option, _) :- 229 domain_error(xml_write_option, Option). 230 231% emit_xml_encoding(+Stream, +Options) 232% 233% Emit the XML fileheader with encoding information. Setting the 234% right encoding on the output stream must be done before calling 235% xml_write/3. 236 237emit_xml_encoding(Out, Options) :- 238 option(header(Hdr), Options, true), 239 Hdr == true, !, 240 stream_property(Out, encoding(Encoding)), 241 ( ( Encoding == utf8 242 ; Encoding == wchar_t 243 ) 244 -> format(Out, '<?xml version="1.0" encoding="UTF-8"?>~n~n', []) 245 ; Encoding == iso_latin_1 246 -> format(Out, '<?xml version="1.0" encoding="ISO-8859-1"?>~n~n', []) 247 ; domain_error(xml_encoding, Encoding) 248 ). 249emit_xml_encoding(_, _). 250 251 252%% emit_doctype(+Options, +Data, +Stream) 253% 254% Emit the document-type declaration. 255% There is a problem with the first clause if we are emitting SGML: 256% the SGML DTDs for HTML up to HTML 4 *do not allow* any 'version' 257% attribute; so the only time this is useful is when it is illegal! 258 259emit_doctype(_Options, Data, Out) :- 260 ( memberchk(element(html,Att,_), Data) 261 ; Data = element(html,Att,_) 262 ), 263 memberchk(version=Version, Att), 264 !, 265 format(Out, '<!DOCTYPE HTML PUBLIC "~w">~n~n', [Version]). 266emit_doctype(Options, Data, Out) :- 267 ( memberchk(public(PubId), Options) -> true 268 ; PubId = (-) 269 ), 270 ( memberchk(system(SysId), Options) -> true 271 ; SysId = (-) 272 ), 273 \+ (PubId == (-), 274 SysId == (-), 275 \+ memberchk(doctype(_), Options) 276 ), 277 ( Data = element(DocType,_,_) 278 ; memberchk(element(DocType,_,_), Data) 279 ; memberchk(doctype(DocType), Options) 280 ), 281 !, 282 write_doctype(Out, DocType, PubId, SysId). 283emit_doctype(_, _, _). 284 285write_doctype(Out, DocType, -, -) :- !, 286 format(Out, '<!DOCTYPE ~w []>~n~n', [DocType]). 287write_doctype(Out, DocType, -, SysId) :- !, 288 format(Out, '<!DOCTYPE ~w SYSTEM "~w">~n~n', [DocType,SysId]). 289write_doctype(Out, DocType, PubId, -) :- !, 290 format(Out, '<!DOCTYPE ~w PUBLIC "~w">~n~n', [DocType,PubId]). 291write_doctype(Out, DocType, PubId, SysId) :- 292 format(Out, '<!DOCTYPE ~w PUBLIC "~w" "~w">~n~n', [DocType,PubId,SysId]). 293 294 295%% emit(+Element, +Out, +State, +Options) 296% 297% Emit a single element 298 299emit([], _, _) :- !. 300emit([H|T], Out, State) :- !, 301 emit(H, Out, State), 302 emit(T, Out, State). 303emit(CDATA, Out, State) :- 304 atom(CDATA), !, 305 sgml_write_content(Out, CDATA, State). 306emit(Element, Out, State) :- 307 \+ \+ emit_element(Element, Out, State). 308 309emit_element(pi(PI), Out, State) :- 310 get_state(State, entity_map, EntityMap), 311 write(Out, <?), 312 write_quoted(Out, PI, "", EntityMap), 313 ( get_state(State, dialect, xml) -> 314 write(Out, ?>) 315 ; write(Out, >) 316 ). 317emit_element(element(Name, Attributes, Content), Out, State) :- 318 att_length(Attributes, State, Alen), 319 ( Alen > 60, 320 get_state(State, layout, true) 321 -> Sep = nl, 322 AttIndent = 4 323 ; Sep = sp, 324 AttIndent = 0 325 ), 326 ( get_state(State, dialect, xml) 327 -> update_nsmap(Attributes, State) 328 ; true 329 ), 330 put_char(Out, '<'), 331 emit_name(Name, Out, State), 332 ( AttIndent > 0 333 -> \+ \+ ( inc_indent(State, AttIndent), 334 attributes(Attributes, Sep, Out, State) 335 ) 336 ; attributes(Attributes, Sep, Out, State) 337 ), 338 content(Content, Out, Name, State). 339 340attributes([], _, _, _). 341attributes([H|T], Sep, Out, State) :- 342 ( Sep == nl 343 -> write_indent(State, Out) 344 ; put_char(Out, ' ') 345 ), 346 attribute(H, Out, State), 347 attributes(T, Sep, Out, State). 348 349attribute(Name=Value, Out, State) :- 350 emit_name(Name, Out, State), 351 put_char(Out, =), 352 sgml_write_attribute(Out, Value, State). 353 354att_length(Atts, State, Len) :- 355 att_length(Atts, State, 0, Len). 356 357att_length([], _, Len, Len). 358att_length([A0|T], State, Len0, Len) :- 359 alen(A0, State, AL), 360 Len1 is Len0 + 1 + AL, 361 att_length(T, State, Len1, Len). 362 363alen(URI:Name=Value, State, Len) :- !, 364 atom_length(Value, AL), 365 vlen(Name, NL), 366 get_state(State, nsmap, Nsmap), 367 ( memberchk(NS=URI, Nsmap) 368 -> atom_length(NS, NsL) 369 ; atom_length(URI, NsL) 370 ), 371 Len is AL+NL+NsL+3. 372alen(Name=Value, _, Len) :- 373 atom_length(Name, NL), 374 vlen(Value, AL), 375 Len is AL+NL+3. 376 377vlen(Value, Len) :- 378 is_list(Value), !, 379 vlen_list(Value, 0, Len). 380vlen(Value, Len) :- 381 atom_length(Value, Len). 382 383vlen_list([], L, L). 384vlen_list([H|T], L0, L) :- 385 atom_length(H, HL), 386 ( L0 == 0 387 -> L1 is L0 + HL 388 ; L1 is L0 + HL + 1 389 ), 390 vlen_list(T, L1, L). 391 392 393emit_name(Name, Out, _) :- 394 atom(Name), !, 395 write(Out, Name). 396emit_name(URI:Name, Out, State) :- 397 get_state(State, nsmap, NSMap), 398 memberchk(NS=URI, NSMap), !, 399 ( NS == [] 400 -> write(Out, Name) 401 ; format(Out, '~w:~w', [NS, Name]) 402 ). 403emit_name(Term, Out, _) :- 404 write(Out, Term). 405 406%% update_nsmap(+Attributes, !State) 407% 408% Modify the nsmap of State to reflect modifications due to xmlns 409% arguments. 410 411update_nsmap(Attributes, State) :- 412 get_state(State, nsmap, Map0), 413 update_nsmap(Attributes, Map0, Map), 414 set_state(State, nsmap, Map). 415 416update_nsmap([], Map, Map). 417update_nsmap([xmlns:NS=URI|T], Map0, Map) :- !, 418 set_nsmap(NS, URI, Map0, Map1), 419 update_nsmap(T, Map1, Map). 420update_nsmap([xmlns=URI|T], Map0, Map) :- !, 421 set_nsmap([], URI, Map0, Map1), 422 update_nsmap(T, Map1, Map). 423update_nsmap([_|T], Map0, Map) :- !, 424 update_nsmap(T, Map0, Map). 425 426set_nsmap(NS, URI, Map0, Map) :- 427 select(NS=_, Map0, Map1), !, 428 Map = [NS=URI|Map1]. 429set_nsmap(NS, URI, Map, [NS=URI|Map]). 430 431 432%% content(+Content, +Out, +Element, +State, +Options) 433% 434% Emit the content part of a structure as well as the termination 435% for the content. For empty content we have three versions: XML 436% style '/>', SGML declared EMPTY element (nothing) or normal SGML 437% element (we must close with the same element name). 438 439content([], Out, Element, State) :- !, % empty element 440 ( get_state(State, net, true) 441 -> ( get_state(State, dialect, xml) -> 442 write(Out, />) 443 ; empty_element(State, Element) -> 444 write(Out, >) 445 ; write(Out, //) 446 ) 447 ;/* get_state(State, net, false) */ 448 write(Out, >), 449 ( get_state(State, dialect, sgml), 450 empty_element(State, Element) 451 -> true 452 ; emit_close(Element, Out, State) 453 ) 454 ). 455content([Atom], Out, Element, State) :- 456 atom(Atom), !, 457 ( get_state(State, dialect, sgml), 458 get_state(State, net, true), 459 \+ sub_atom(Atom, _, _, _, /), 460 atom_length(Atom, Len), 461 Len < 20 462 -> write(Out, /), 463 sgml_write_content(Out, Atom, State), 464 write(Out, /) 465 ;/* XML or not NET */ 466 write(Out, >), 467 sgml_write_content(Out, Atom, State), 468 emit_close(Element, Out, State) 469 ). 470content(Content, Out, Element, State) :- 471 get_state(State, layout, true), 472 /* If xml:space='preserve' is present, */ 473 /* we MUST NOT tamper with white space at all. */ 474 \+ (Element = element(_,Atts,_), 475 memberchk('xml:space'=preserve, Atts) 476 ), 477 element_content(Content, Elements), 478 !, 479 format(Out, >, []), 480 \+ \+ ( 481 inc_indent(State), 482 write_element_content(Elements, Out, State) 483 ), 484 write_indent(State, Out), 485 emit_close(Element, Out, State). 486content(Content, Out, Element, State) :- 487 format(Out, >, []), 488 write_mixed_content(Content, Out, Element, State), 489 emit_close(Element, Out, State). 490 491 492emit_close(Element, Out, State) :- 493 write(Out, '</'), 494 emit_name(Element, Out, State), 495 write(Out, '>'). 496 497 498write_mixed_content([], _, _, _). 499write_mixed_content([H|T], Out, Element, State) :- 500 write_mixed_content_element(H, Out, State), 501 write_mixed_content(T, Out, Element, State). 502 503write_mixed_content_element(H, Out, State) :- 504 ( atom(H) 505 -> sgml_write_content(Out, H, State) 506 ; functor(H, element, 3) 507 -> emit(H, Out, State) 508 ; functor(H, pi, 1) 509 -> emit(H, Out, State) 510 ; H = sdata(Data) % cannot be written without entity! 511 -> print_message(warning, sgml_write(sdata_as_cdata(Data))), 512 sgml_write_content(Out, Data, State) 513 ; assertion(fail) 514 ). 515 516 517element_content([], []). 518element_content([element(Name,Atts,C)|T0], [element(Name,Atts,C)|T]) :- !, 519 element_content(T0, T). 520element_content([Blank|T0], T) :- 521 atom(Blank), 522 atom_codes(Blank, Codes), 523 all_blanks(Codes), 524 element_content(T0, T). 525 526all_blanks([]). 527all_blanks([H|T]) :- 528 code_type(H, space), 529 all_blanks(T). 530 531write_element_content([], _, _). 532write_element_content([H|T], Out, State) :- 533 write_indent(State, Out), 534 emit(H, Out, State), 535 write_element_content(T, Out, State). 536 537 538 /******************************* 539 * NAMESPACES * 540 *******************************/ 541 542%% add_missing_namespaces(+DOM0, +NsMap, -DOM) 543% 544% Add xmlns:NS=URI definitions to the toplevel element(s) to 545% deal with missing namespaces. 546 547add_missing_namespaces([], _, []) :- !. 548add_missing_namespaces([H0|T0], Def, [H|T]) :- !, 549 add_missing_namespaces(H0, Def, H), 550 add_missing_namespaces(T0, Def, T). 551add_missing_namespaces(Elem0, Def, Elem) :- 552 Elem0 = element(Name, Atts0, Content), !, 553 missing_namespaces(Elem0, Def, Missing), 554 ( Missing == [] 555 -> Elem = Elem0 556 ; add_missing_ns(Missing, Atts0, Atts), 557 Elem = element(Name, Atts, Content) 558 ). 559add_missing_namespaces(DOM, _, DOM). % CDATA, etc. 560 561add_missing_ns([], Atts, Atts). 562add_missing_ns([H|T], Atts0, Atts) :- 563 generate_ns(H, NS), 564 add_missing_ns(T, [xmlns:NS=H|Atts0], Atts). 565 566%% generate_ns(+URI, -NS) is det. 567% 568% Generate a namespace (NS) identifier for URI. 569 570generate_ns(URI, NS) :- 571 default_ns(URI, NS), !. 572generate_ns(_, NS) :- 573 gensym(xns, NS). 574 575:- multifile 576 rdf_db:ns/2. 577 578default_ns('http://www.w3.org/2001/XMLSchema-instance', xsi). 579default_ns('http://www.w3.org/1999/xhtml', xhtml). 580default_ns(URI, NS) :- 581 rdf_db:ns(NS, URI). 582 583%% missing_namespaces(+DOM, +NSMap, -Missing) 584% 585% Return a list of URIs appearing in DOM that are not covered 586% by xmlns definitions. 587 588missing_namespaces(DOM, Defined, Missing) :- 589 missing_namespaces(DOM, Defined, [], Missing). 590 591missing_namespaces([], _, L, L) :- !. 592missing_namespaces([H|T], Def, L0, L) :- !, 593 missing_namespaces(H, Def, L0, L1), 594 missing_namespaces(T, Def, L1, L). 595missing_namespaces(element(Name, Atts, Content), Def, L0, L) :- !, 596 update_nsmap(Atts, Def, Def1), 597 missing_ns(Name, Def1, L0, L1), 598 missing_att_ns(Atts, Def1, L1, L2), 599 missing_namespaces(Content, Def1, L2, L). 600missing_namespaces(_, _, L, L). 601 602missing_att_ns([], _, M, M). 603missing_att_ns([Name=_|T], Def, M0, M) :- 604 missing_ns(Name, Def, M0, M1), 605 missing_att_ns(T, Def, M1, M). 606 607missing_ns(URI:_, Def, M0, M) :- !, 608 ( ( memberchk(_=URI, Def) 609 ; memberchk(URI, M0) 610 ; URI = xml % predefined ones 611 ; URI = xmlns 612 ) 613 -> M = M0 614 ; M = [URI|M0] 615 ). 616missing_ns(_, _, M, M). 617 618 /******************************* 619 * QUOTED WRITE * 620 *******************************/ 621 622sgml_write_attribute(Out, Values, State) :- 623 is_list(Values), !, 624 get_state(State, entity_map, EntityMap), 625 put_char(Out, '"'), 626 write_quoted_list(Values, Out, """<&>", EntityMap), 627 put_char(Out, '"'). 628sgml_write_attribute(Out, Value, State) :- 629 get_state(State, entity_map, EntityMap), 630 put_char(Out, '"'), 631 write_quoted(Out, Value, """<&>", EntityMap), 632 put_char(Out, '"'). 633 634write_quoted_list([], _, _, _). 635write_quoted_list([H|T], Out, Escape, EntityMap) :- 636 write_quoted(Out, H, Escape, EntityMap), 637 ( T == [] 638 -> true 639 ; put_char(Out, ' '), 640 write_quoted_list(T, Out, Escape, EntityMap) 641 ). 642 643 644sgml_write_content(Out, Value, State) :- 645 get_state(State, entity_map, EntityMap), 646 write_quoted(Out, Value, "<&>", EntityMap). 647 648 649write_quoted(Out, Atom, Escape, EntityMap) :- 650 atom_codes(Atom, Codes), 651 writeq(Codes, Out, Escape, EntityMap). 652 653 654writeq([], _, _, _). 655writeq([H|T], Out, Escape, EntityMap) :- 656 ( memberchk(H, Escape) 657 -> write_entity(H, Out, EntityMap) 658 ; H >= 256 659 -> ( stream_property(Out, encoding(Enc)), 660 unicode_encoding(Enc) 661 -> put_code(Out, H) 662 ; write_entity(H, Out, EntityMap) 663 ) 664 ; put_code(Out, H) 665 ), 666 writeq(T, Out, Escape, EntityMap). 667 668unicode_encoding(utf8). 669unicode_encoding(wchar_t). 670unicode_encoding(unicode_le). 671unicode_encoding(unicode_be). 672 673write_entity(Code, Out, EntityMap) :- 674 ( get_assoc(Code, EntityMap, EntityName) 675 -> format(Out, '&~w;', [EntityName]) 676 ; format(Out, '&#~w;', [Code]) 677 ). 678 679 680 /******************************* 681 * INDENTATION * 682 *******************************/ 683 684write_initial_indent(State, Out) :- 685 ( get_state(State, indent, Indent), 686 Indent > 0 687 -> emit_indent(Indent, Out) 688 ; true 689 ). 690 691write_indent(State, _) :- 692 get_state(State, layout, false), !. 693write_indent(State, Out) :- 694 get_state(State, indent, Indent), 695 emit_indent(Indent, Out). 696 697emit_indent(Indent, Out) :- 698 Tabs is Indent // 8, 699 Spaces is Indent mod 8, 700 format(Out, '~N', []), 701 write_n(Tabs, '\t', Out), 702 write_n(Spaces, ' ', Out). 703 704write_n(N, Char, Out) :- 705 ( N > 0 706 -> put_char(Out, Char), 707 N2 is N - 1, 708 write_n(N2, Char, Out) 709 ; true 710 ). 711 712inc_indent(State) :- 713 inc_indent(State, 2). 714 715inc_indent(State, Inc) :- 716 state(indent, Arg), 717 arg(Arg, State, I0), 718 I is I0 + Inc, 719 setarg(Arg, State, I). 720 721 722 /******************************* 723 * DTD HANDLING * 724 *******************************/ 725 726%% empty_element(+State, +Element) 727% 728% True if Element is declared with EMPTY content in the (SGML) 729% DTD. 730 731empty_element(State, Element) :- 732 get_state(State, dtd, DTD), 733 DTD \== (-), 734 dtd_property(DTD, element(Element, _, empty)). 735 736%% dtd_character_entities(+DTD, -Map) 737% 738% Return an assoc mapping character entities to their name. Note 739% that the entity representation is a bit dubious. Entities should 740% allow for a wide-character version and avoid the &#..; trick. 741 742dtd_character_entities(DTD, Map) :- 743 empty_assoc(Empty), 744 dtd_property(DTD, entities(Entities)), 745 fill_entity_map(Entities, DTD, Empty, Map). 746 747fill_entity_map([], _, Map, Map). 748fill_entity_map([H|T], DTD, Map0, Map) :- 749 ( dtd_property(DTD, entity(H, CharEntity)), 750 atom(CharEntity), 751 ( sub_atom(CharEntity, 0, _, _, '&#'), 752 sub_atom(CharEntity, _, _, 0, ';') 753 -> sub_atom(CharEntity, 2, _, 1, Name), 754 atom_number(Name, Code) 755 ; atom_length(CharEntity, 1), 756 char_code(CharEntity, Code) 757 ) 758 -> put_assoc(Code, Map0, H, Map1), 759 fill_entity_map(T, DTD, Map1, Map) 760 ; fill_entity_map(T, DTD, Map0, Map) 761 ). 762 763 764 765 /******************************* 766 * FIELDS * 767 *******************************/ 768 769state(indent, 1). % current indentation 770state(layout, 2). % use layout (true/false) 771state(dtd, 3). % DTD for entity names 772state(entity_map, 4). % compiled entity-map 773state(dialect, 5). % xml/sgml 774state(nsmap, 6). % defined namespaces 775state(net, 7). % Should null end-tags be used? 776 777new_state(Dialect, 778 state( 779 0, % indent 780 true, % layout 781 -, % DTD 782 EntityMap, % entity_map 783 Dialect, % dialect 784 [], % NS=Full map 785 Net % Null End-Tags? 786 )) :- 787 ( Dialect == sgml 788 -> Net = false, 789 empty_assoc(EntityMap) 790 ; Net = true, 791 xml_entities(EntityMap) 792 ). 793 794get_state(State, Field, Value) :- 795 state(Field, Arg), 796 arg(Arg, State, Value). 797 798set_state(State, Field, Value) :- 799 state(Field, Arg), 800 setarg(Arg, State, Value). 801 802xml_entities(Map) :- 803 list_to_assoc([ 60 - lt, 804 61 - amp, 805 62 - gt, 806 39 - apos, 807 34 - quot 808 ], Map). 809 810 811 /******************************* 812 * MESSAGES * 813 *******************************/ 814 815:- multifile 816 prolog:message/3. 817 818prolog:message(sgml_write(sdata_as_cdata(Data))) --> 819 [ 'SGML-write: emitting SDATA as CDATA: "~p"'-[Data] ]. 820