1/*  $Id$
2
3    Part of SWI-Prolog SGML/XML parser
4
5    Author:  Jan Wielemaker
6    E-mail:  jan@swi.psy.uva.nl
7    WWW:     http://www.swi.psy.uva.nl/projects/SWI-Prolog/
8    Copying: LGPL-2.  See the file COPYING or http://www.gnu.org
9
10    Copyright (C) 1990-2000 SWI, University of Amsterdam. All rights reserved.
11*/
12
13:- prolog_load_context(directory, CWD),
14   working_directory(_, CWD).
15
16:- asserta(file_search_path(foreign, '..')).
17:- asserta(file_search_path(library, '..')).
18
19:- use_module(library(sgml)).
20:- use_module(library(sgml_write)).
21
22test :-					% default test
23	fp('.').
24
25test(File) :-
26	file_name_extension(_, xml, File), !,
27	load_xml_file(File, Term),
28	xml_write(user_output, Term, []).
29test(File) :-
30	file_name_extension(_, sgml, File), !,
31	load_sgml_file(File, Term),
32	sgml_write(user_output, Term, []).
33test(File) :-
34	file_name_extension(_, html, File), !,
35	load_html_file(File, Term),
36	html_write(user_output, Term, []).
37
38test(File, Into, Encoding) :-
39	file_name_extension(_, xml, File), !,
40	load_xml_file(File, Term),
41	open(Into, write, Out, [encoding(Encoding)]),
42	xml_write(Out, Term, []),
43	close(Out).
44
45fp(Dir) :-
46	atom_concat(Dir, '/*', Pattern),
47	expand_file_name(Pattern, Files),
48	(   member(File, Files),
49	    file_name_extension(_, Ext, File),
50	    ml_file(Ext),
51	    file_base_name(File, Base),
52	    \+ blocked(Base),
53	    format(user_error, '~w ... ', [Base]),
54	    (	\+ utf8(Base)
55	    ->  format(user_error, ' (ISO Latin-1) ... ', []),
56		fixed_point(File, iso_latin_1)
57	    ;	true
58	    ),
59	    format(user_error, ' (UTF-8) ... ', []),
60	    fixed_point(File, utf8),
61	    format(user_error, ' done~n', []),
62	    fail
63	;   true
64	).
65
66ml_file(xml).
67ml_file(sgml).
68ml_file(html).
69
70%%	blocked(+File)
71%
72%	List of test-files that are blocked.  These are either negative
73%	tests or tests involving SDATA.
74
75blocked('bat.sgml').
76blocked('i.sgml').
77blocked('sdata.sgml').
78blocked('cent-nul.xml').
79blocked('defent.sgml').
80blocked('comment.xml').
81blocked('badxmlent.xml').
82
83
84%%	utf8(+File)
85%
86%	File requires UTF-8.  These are files that have UTF-8 characters
87%	in element or attribute names.
88
89utf8('utf8-ru.xml').
90
91
92%%	fixed_point(+File, +Encoding)
93%
94%	Perform write/read round-trip and  validate   the  data  has not
95%	changed.
96
97fixed_point(File, Encoding) :-
98	file_name_extension(_, xml, File), !,
99	fp(File, Encoding, load_xml_file, xml_write).
100fixed_point(File, Encoding) :-
101	file_name_extension(_, sgml, File), !,
102	fp(File, Encoding, load_sgml_file, sgml_write).
103fixed_point(File, Encoding) :-
104	file_name_extension(_, html, File), !,
105	fp(File, Encoding, load_html_file, html_write).
106
107fp(File, Encoding, Load, Write) :-
108	put_char(user_error, r),
109	call(Load, File, Term),
110	tmp_file(xml, TmpFile),
111	open(TmpFile, write, TmpOut, [encoding(Encoding)]),
112	put_char(user_error, w),
113	call(Write, TmpOut, Term, []),
114	close(TmpOut),
115%	cat(TmpFile, Encoding),
116	put_char(user_error, r),
117	call(Load, TmpFile, Term2),
118	delete_file(TmpFile),
119	(   eq(Term, Term2)
120	->  true
121	;   format(user_error, 'First file:~n', []),
122	    %pp(Term),
123	    save_in_file(f1, Term),
124	    format(user_error, 'Second file:~n', []),
125	    %pp(Term2),
126	    save_in_file(f2, Term2),
127	    fail
128	).
129
130save_in_file(File, Term) :-
131	open(File, write, Out, [encoding(iso_latin_1)]),
132	current_output(C0),
133	set_output(Out),
134	pp(Term),
135	set_output(C0),
136	close(Out).
137
138
139cat(File, Encoding) :-
140	open(File, read, In, [encoding(Encoding)]),
141	copy_stream_data(In, current_output),
142	close(In).
143
144%	eq(M1, M2)
145%
146%	Test two terms for equivalence.  The following mismatches are
147%	allowed:
148%
149%		* Order of attributes
150%		* Layout in `element-only' content
151
152eq(X, X) :- !.
153eq([], []) :- !.
154eq([B|T], L) :-				% delete blanks
155	blank_atom(B), !,
156	eq(T, L).
157eq(L, [B|T]) :-
158	blank_atom(B), !,
159	eq(T, L).
160eq([H1|T1], [H2|T2]) :- !,
161	eq(H1, H2),
162	eq(T1, T2).
163eq(element(Name, A1, C1), element(Name, A2, C2)) :-
164	att_eq(A1, A2),
165	ceq(C1, C2).
166eq(A1, A2) :-
167	atom(A1),
168	atom(A2), !,
169	normalise_blanks(A1, B1),
170	normalise_blanks(A2, B2),
171	(   B1 == B2
172	->  true
173	;   format(user_error,
174		   'ERROR: CDATA differs:~n\
175		   \t~p~n\
176		   \t~p~n',
177		   [B1, B2])
178	).
179eq(X, Y) :-
180	format(user_error,
181	       'ERROR: Content differs:~n\
182	       \t~p~n\
183	       \t~p~n',
184	       [X, Y]).
185
186att_eq(A1, A2) :-			% ordering is unimportant
187	sort(A1, S),
188	sort(A2, S), !.
189att_eq(A1, A2) :-
190	format(user_error,
191	       'ERROR: Attribute lists differ:~n\
192	       \t~p~n\
193	       \t~p~n',
194	       [A1, A2]).
195
196ceq(C1, C2) :-
197	element_content(C1, E1),
198	element_content(C2, E2), !,
199	eq(E1, E2).
200ceq(C1, C2) :-
201	eq(C1, C2).
202
203element_content([], []).
204element_content([element(Name,Atts,C)|T0], [element(Name,Atts,C)|T]) :- !,
205	element_content(T0, T).
206element_content([Blank|T0], T) :-
207	blank_atom(Blank),
208	element_content(T0, T).
209
210blank_atom(Atom) :-
211	atom(Atom),
212	atom_codes(Atom, Codes),
213	all_blanks(Codes).
214
215all_blanks([]).
216all_blanks([H|T]) :-
217	code_type(H, space),
218	all_blanks(T).
219
220normalise_blanks(Atom, Normalised) :-
221	atom_codes(Atom, Codes),
222	eat_blanks(Codes, Codes1),
223	normalise_blanks2(Codes1, N),
224	atom_codes(Normalised, N).
225
226normalise_blanks2([], []).
227normalise_blanks2([H|T0], T) :-
228	code_type(H, space), !,
229	eat_blanks(T0, T1),
230	(   T1 == []
231	->  T = []
232	;   T = [32|T2],
233	    normalise_blanks2(T1, T2)
234	).
235normalise_blanks2([H|T0], [H|T]) :-
236	normalise_blanks2(T0, T).
237
238eat_blanks([H|T0], T) :-
239	code_type(H, space), !,
240	eat_blanks(T0, T).
241eat_blanks(L, L).
242