1 /*  Part of SWI-Prolog
2 
3     Author:        Jan Wielemaker
4     E-mail:        J.Wielemaker@vu.nl
5     WWW:           http://www.swi-prolog.org
6     Copyright (c)  2000-2020, University of Amsterdam
7                               VU University Amsterdam
8     All rights reserved.
9 
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions
12     are met:
13 
14     1. Redistributions of source code must retain the above copyright
15        notice, this list of conditions and the following disclaimer.
16 
17     2. Redistributions in binary form must reproduce the above copyright
18        notice, this list of conditions and the following disclaimer in
19        the documentation and/or other materials provided with the
20        distribution.
21 
22     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26     COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33     POSSIBILITY OF SUCH DAMAGE.
34 */
35 
36 #define _ISOC99_SOURCE 1		/* fwprintf(), etc prototypes */
37 
38 #define DTD_MINOR_ERRORS 1		/* get detailed errors */
39 
40 #include <SWI-Stream.h>
41 #include <SWI-Prolog.h>
42 
43 #include <stdio.h>
44 #include "dtd.h"
45 #include "catalog.h"
46 #include "model.h"
47 #include "util.h"
48 #include <errno.h>
49 #include "error.h"
50 #include <stdlib.h>
51 #include <assert.h>
52 #include <string.h>
53 #include <wctype.h>
54 
55 #define streq(s1, s2) (strcmp(s1, s2) == 0)
56 
57 #define MAX_ERRORS	50
58 #define MAX_WARNINGS	50
59 
60 #define ENDSNUL ((size_t)-1)
61 
62 		 /*******************************
63 		 *     PARSER CONTEXT DATA	*
64 		 *******************************/
65 
66 #define PD_MAGIC	0x36472ba1	/* just a number */
67 
68 typedef enum
69 { SA_FILE = 0,				/* Stop at end-of-file */
70   SA_INPUT,				/* Do not complete input */
71   SA_ELEMENT,				/* Stop after first element */
72   SA_CONTENT,				/* Stop after close */
73   SA_DECL				/* Stop after declaration */
74 } stopat;
75 
76 typedef enum
77 { EM_QUIET = 0,				/* Suppress messages */
78   EM_PRINT,				/* Print message */
79   EM_STYLE				/* include style-messages */
80 } errormode;
81 
82 typedef struct _env
83 { term_t	tail;
84   struct _env *parent;
85 } env;
86 
87 
88 typedef struct _parser_data
89 { int	      magic;			/* PD_MAGIC */
90   dtd_parser *parser;			/* parser itself */
91 
92   int	      warnings;			/* #warnings seen */
93   int	      errors;			/* #errors seen */
94   int	      max_errors;		/* error limit */
95   int	      max_warnings;		/* warning limit */
96   errormode   error_mode;		/* how to handle errors */
97   int	      positions;		/* report file-positions */
98   term_t      exception;		/* pending exception from callback */
99 
100   predicate_t on_begin;			/* begin element */
101   predicate_t on_end;			/* end element */
102   predicate_t on_cdata;			/* cdata */
103   predicate_t on_entity;		/* entity */
104   predicate_t on_pi;			/* processing instruction */
105   predicate_t on_xmlns;			/* xmlns */
106   predicate_t on_urlns;			/* url --> namespace */
107   predicate_t on_error;			/* errors */
108   predicate_t on_decl;			/* declarations */
109 
110   stopat      stopat;			/* Where to stop */
111   int	      stopped;			/* Environment is complete */
112 
113   IOSTREAM*   source;			/* Where we are reading from */
114 
115   term_t      list;			/* output term (if any) */
116   term_t      tail;			/* tail of the list */
117   env	     *stack;			/* environment stack */
118   int	      free_on_close;		/* sgml_free parser on close */
119 } parser_data;
120 
121 
122 		 /*******************************
123 		 *	      CONSTANTS		*
124 		 *******************************/
125 
126 static functor_t FUNCTOR_and2;
127 static functor_t FUNCTOR_attribute_value1;
128 static functor_t FUNCTOR_bar2;
129 static functor_t FUNCTOR_comma2;
130 static functor_t FUNCTOR_default1;
131 static functor_t FUNCTOR_dialect1;
132 static functor_t FUNCTOR_keep_prefix1;
133 static functor_t FUNCTOR_document1;
134 static functor_t FUNCTOR_dtd1;
135 static functor_t FUNCTOR_dtd2;
136 static functor_t FUNCTOR_element3;
137 static functor_t FUNCTOR_entity1;
138 static functor_t FUNCTOR_equal2;
139 static functor_t FUNCTOR_file1;
140 static functor_t FUNCTOR_file4;
141 static functor_t FUNCTOR_dstream_position4;
142 static functor_t FUNCTOR_fixed1;
143 static functor_t FUNCTOR_line1;
144 static functor_t FUNCTOR_linepos1;
145 static functor_t FUNCTOR_list1;
146 static functor_t FUNCTOR_max_errors1;
147 static functor_t FUNCTOR_syntax_error1;
148 static functor_t FUNCTOR_error2;
149 static functor_t FUNCTOR_nameof1;
150 static functor_t FUNCTOR_notation1;
151 static functor_t FUNCTOR_omit2;
152 static functor_t FUNCTOR_opt1;
153 static functor_t FUNCTOR_plus1;
154 static functor_t FUNCTOR_rep1;
155 static functor_t FUNCTOR_sgml_parser1;
156 static functor_t FUNCTOR_parse1;
157 static functor_t FUNCTOR_source1;
158 static functor_t FUNCTOR_content_length1;
159 static functor_t FUNCTOR_call2;
160 static functor_t FUNCTOR_cdata1;
161 static functor_t FUNCTOR_charpos1;
162 static functor_t FUNCTOR_charpos2;
163 static functor_t FUNCTOR_ns2;		/* :/2 */
164 static functor_t FUNCTOR_prefix2;	/* ns/2. This is a bit confusing but ns2 was already taken */
165 static functor_t FUNCTOR_space1;
166 static functor_t FUNCTOR_pi1;
167 static functor_t FUNCTOR_sdata1;
168 static functor_t FUNCTOR_ndata1;
169 static functor_t FUNCTOR_number1;
170 static functor_t FUNCTOR_syntax_errors1;
171 static functor_t FUNCTOR_xml_no_ns1;
172 static functor_t FUNCTOR_minus2;
173 static functor_t FUNCTOR_positions1;
174 static functor_t FUNCTOR_position1;
175 static functor_t FUNCTOR_event_class1;
176 static functor_t FUNCTOR_doctype1;
177 static functor_t FUNCTOR_allowed1;
178 static functor_t FUNCTOR_context1;
179 static functor_t FUNCTOR_defaults1;
180 static functor_t FUNCTOR_shorttag1;
181 static functor_t FUNCTOR_case_sensitive_attributes1;
182 static functor_t FUNCTOR_case_preserving_attributes1;
183 static functor_t FUNCTOR_system_entities1;
184 static functor_t FUNCTOR_max_memory1;
185 static functor_t FUNCTOR_qualify_attributes1;
186 static functor_t FUNCTOR_encoding1;
187 static functor_t FUNCTOR_xmlns1;
188 static functor_t FUNCTOR_xmlns2;
189 
190 static atom_t ATOM_true;
191 static atom_t ATOM_false;
192 static atom_t ATOM_cdata;
193 static atom_t ATOM_rcdata;
194 static atom_t ATOM_pcdata;
195 static atom_t ATOM_empty;
196 static atom_t ATOM_any;
197 static atom_t ATOM_position;
198 static atom_t ATOM_atom;
199 static atom_t ATOM_string;
200 
201 #define mkfunctor(n, a) PL_new_functor(PL_new_atom(n), a)
202 
203 static void
initConstants()204 initConstants()
205 {
206   FUNCTOR_sgml_parser1	 = mkfunctor("sgml_parser", 1);
207   FUNCTOR_equal2	 = mkfunctor("=", 2);
208   FUNCTOR_dtd1		 = mkfunctor("dtd", 1);
209   FUNCTOR_element3	 = mkfunctor("element", 3);
210   FUNCTOR_entity1	 = mkfunctor("entity", 1);
211   FUNCTOR_document1	 = mkfunctor("document", 1);
212   FUNCTOR_dtd2		 = mkfunctor("dtd", 2);
213   FUNCTOR_omit2		 = mkfunctor("omit", 2);
214   FUNCTOR_and2		 = mkfunctor("&", 2);
215   FUNCTOR_comma2	 = mkfunctor(",", 2);
216   FUNCTOR_bar2		 = mkfunctor("|", 2);
217   FUNCTOR_opt1		 = mkfunctor("?", 1);
218   FUNCTOR_rep1		 = mkfunctor("*", 1);
219   FUNCTOR_plus1		 = mkfunctor("+", 1);
220   FUNCTOR_default1	 = mkfunctor("default", 1);
221   FUNCTOR_fixed1	 = mkfunctor("fixed", 1);
222   FUNCTOR_list1		 = mkfunctor("list", 1);
223   FUNCTOR_nameof1	 = mkfunctor("nameof", 1);
224   FUNCTOR_notation1	 = mkfunctor("notation", 1);
225   FUNCTOR_file1		 = mkfunctor("file", 1);
226   FUNCTOR_file4		 = mkfunctor("file", 4);
227   FUNCTOR_line1		 = mkfunctor("line", 1);
228   FUNCTOR_linepos1	 = mkfunctor("linepos", 1);
229   FUNCTOR_dialect1	 = mkfunctor("dialect", 1);
230   FUNCTOR_keep_prefix1	 = mkfunctor("keep_prefix", 1);
231   FUNCTOR_max_errors1	 = mkfunctor("max_errors", 1);
232   FUNCTOR_parse1	 = mkfunctor("parse", 1);
233   FUNCTOR_source1	 = mkfunctor("source", 1);
234   FUNCTOR_content_length1= mkfunctor("content_length", 1);
235   FUNCTOR_call2		 = mkfunctor("call", 2);
236   FUNCTOR_cdata1	 = mkfunctor("cdata", 1);
237   FUNCTOR_attribute_value1 = mkfunctor("attribute_value", 1);
238   FUNCTOR_charpos1	 = mkfunctor("charpos", 1);
239   FUNCTOR_charpos2	 = mkfunctor("charpos", 2);
240   FUNCTOR_ns2		 = mkfunctor(":", 2);
241   FUNCTOR_prefix2	 = mkfunctor("ns", 2);
242   FUNCTOR_space1	 = mkfunctor("space", 1);
243   FUNCTOR_pi1		 = mkfunctor("pi", 1);
244   FUNCTOR_sdata1	 = mkfunctor("sdata", 1);
245   FUNCTOR_ndata1	 = mkfunctor("ndata", 1);
246   FUNCTOR_number1	 = mkfunctor("number", 1);
247   FUNCTOR_syntax_errors1 = mkfunctor("syntax_errors", 1);
248   FUNCTOR_syntax_error1  = mkfunctor("syntax_error", 1);
249   FUNCTOR_error2         = mkfunctor("error", 2);
250   FUNCTOR_xml_no_ns1     = mkfunctor("xml_no_ns", 1);
251   FUNCTOR_minus2	 = mkfunctor("-", 2);
252   FUNCTOR_position1	 = mkfunctor("position", 1);
253   FUNCTOR_positions1	 = mkfunctor("positions", 1);
254   FUNCTOR_event_class1	 = mkfunctor("event_class", 1);
255   FUNCTOR_doctype1       = mkfunctor("doctype", 1);
256   FUNCTOR_allowed1       = mkfunctor("allowed", 1);
257   FUNCTOR_context1       = mkfunctor("context", 1);
258   FUNCTOR_defaults1	 = mkfunctor("defaults", 1);
259   FUNCTOR_shorttag1	 = mkfunctor("shorttag", 1);
260   FUNCTOR_case_sensitive_attributes1 = mkfunctor("case_sensitive_attributes", 1);
261   FUNCTOR_case_preserving_attributes1 = mkfunctor("case_preserving_attributes", 1);
262   FUNCTOR_system_entities1 = mkfunctor("system_entities", 1);
263   FUNCTOR_max_memory1	 = mkfunctor("max_memory", 1);
264   FUNCTOR_qualify_attributes1 = mkfunctor("qualify_attributes", 1);
265   FUNCTOR_encoding1	 = mkfunctor("encoding", 1);
266   FUNCTOR_xmlns1	 = mkfunctor("xmlns", 1);
267   FUNCTOR_xmlns2	 = mkfunctor("xmlns", 2);
268   FUNCTOR_dstream_position4 = PL_new_functor(PL_new_atom("$stream_position"), 4);
269 
270   ATOM_true = PL_new_atom("true");
271   ATOM_false = PL_new_atom("false");
272   ATOM_cdata = PL_new_atom("cdata");
273   ATOM_rcdata = PL_new_atom("rcdata");
274   ATOM_pcdata = PL_new_atom("#pcdata");
275   ATOM_empty = PL_new_atom("empty");
276   ATOM_any = PL_new_atom("any");
277   ATOM_atom = PL_new_atom("atom");
278   ATOM_string = PL_new_atom("string");
279   ATOM_position = PL_new_atom("#position");
280 }
281 
282 static int on_data(dtd_parser *p, data_type type, int len, const wchar_t *data);
283 
284 
285 		 /*******************************
286 		 *	       ACCESS		*
287 		 *******************************/
288 
289 static int
unify_parser(term_t parser,dtd_parser * p)290 unify_parser(term_t parser, dtd_parser *p)
291 { return PL_unify_term(parser, PL_FUNCTOR, FUNCTOR_sgml_parser1,
292 		         PL_POINTER, p);
293 }
294 
295 
296 static int
get_parser(term_t parser,dtd_parser ** p)297 get_parser(term_t parser, dtd_parser **p)
298 { if ( PL_is_functor(parser, FUNCTOR_sgml_parser1) )
299   { term_t a = PL_new_term_ref();
300     void *ptr;
301 
302     _PL_get_arg(1, parser, a);
303     if ( PL_get_pointer(a, &ptr) )
304     { dtd_parser *tmp = ptr;
305 
306       if ( tmp->magic == SGML_PARSER_MAGIC )
307       { *p = tmp;
308 
309         return TRUE;
310       }
311       return sgml2pl_error(ERR_EXISTENCE, "sgml_parser", parser);
312     }
313   }
314 
315   return sgml2pl_error(ERR_TYPE, "sgml_parser", parser);
316 }
317 
318 
319 static int
unify_dtd(term_t t,dtd * dtd)320 unify_dtd(term_t t, dtd *dtd)
321 { if ( dtd->doctype )
322     return PL_unify_term(t, PL_FUNCTOR, FUNCTOR_dtd2,
323 			 PL_POINTER, dtd,
324 			 PL_NWCHARS, (size_t)-1, dtd->doctype);
325   else
326     return PL_unify_term(t, PL_FUNCTOR, FUNCTOR_dtd2,
327 			 PL_POINTER, dtd,
328 			 PL_VARIABLE);
329 }
330 
331 
332 static int
get_dtd(term_t t,dtd ** dtdp)333 get_dtd(term_t t, dtd **dtdp)
334 { if ( PL_is_functor(t, FUNCTOR_dtd2) )
335   { term_t a = PL_new_term_ref();
336     void *ptr;
337 
338     _PL_get_arg(1, t, a);
339     if ( PL_get_pointer(a, &ptr) )
340     { dtd *tmp = ptr;
341 
342       if ( tmp->magic == SGML_DTD_MAGIC )
343       { *dtdp = tmp;
344 
345         return TRUE;
346       }
347       return sgml2pl_error(ERR_EXISTENCE, "dtd", t);
348     }
349   }
350 
351   return sgml2pl_error(ERR_TYPE, "dtd", t);
352 }
353 
354 
355 		 /*******************************
356 		 *	      NEW/FREE		*
357 		 *******************************/
358 
359 static foreign_t
pl_new_sgml_parser(term_t ref,term_t options)360 pl_new_sgml_parser(term_t ref, term_t options)
361 { term_t head = PL_new_term_ref();
362   term_t tail = PL_copy_term_ref(options);
363   term_t tmp  = PL_new_term_ref();
364 
365   dtd *dtd = NULL;
366   dtd_parser *p;
367 
368   while ( PL_get_list(tail, head, tail) )
369   { if ( PL_is_functor(head, FUNCTOR_dtd1) )
370     { _PL_get_arg(1, head, tmp);
371 
372       if ( PL_is_variable(tmp) )	/* dtd(X) */
373       { dtd = new_dtd(NULL);		/* no known doctype */
374 	dtd->references++;
375 	unify_dtd(tmp, dtd);
376       } else if ( !get_dtd(tmp, &dtd) )
377 	return FALSE;
378     }
379   }
380   if ( !PL_get_nil(tail) )
381     return sgml2pl_error(ERR_TYPE, "list", tail);
382 
383   p = new_dtd_parser(dtd);
384 
385   return unify_parser(ref, p);
386 }
387 
388 
389 static foreign_t
pl_free_sgml_parser(term_t parser)390 pl_free_sgml_parser(term_t parser)
391 { dtd_parser *p;
392 
393   if ( get_parser(parser, &p) )
394   { free_dtd_parser(p);
395     return TRUE;
396   }
397 
398   return FALSE;
399 }
400 
401 
402 static foreign_t
pl_new_dtd(term_t doctype,term_t ref)403 pl_new_dtd(term_t doctype, term_t ref)
404 { ichar *dt;
405   dtd *dtd;
406 
407   if ( !PL_get_wchars(doctype, NULL, &dt, CVT_ATOM|CVT_EXCEPTION) )
408     return FALSE;
409 
410   if ( !(dtd=new_dtd(dt)) )
411     return FALSE;
412 
413   dtd->references++;
414 
415   return unify_dtd(ref, dtd);
416 }
417 
418 
419 static foreign_t
pl_free_dtd(term_t t)420 pl_free_dtd(term_t t)
421 { dtd *dtd;
422 
423   if ( get_dtd(t, &dtd) )
424   { free_dtd(dtd);
425     return TRUE;
426   }
427 
428   return FALSE;
429 }
430 
431 		 /*******************************
432 		 *	   DATA EXCHANGE	*
433 		 *******************************/
434 
435 static int
put_atom_wchars(term_t t,wchar_t const * s)436 put_atom_wchars(term_t t, wchar_t const *s)
437 { PL_put_variable(t);
438   return PL_unify_wchars(t, PL_ATOM, ENDSNUL, s);
439 }
440 
441 
442 		 /*******************************
443 		 *	    PROPERTIES		*
444 		 *******************************/
445 
446 static foreign_t
pl_set_sgml_parser(term_t parser,term_t option)447 pl_set_sgml_parser(term_t parser, term_t option)
448 { dtd_parser *p;
449 
450   if ( !get_parser(parser, &p) )
451     return FALSE;
452 
453   if ( PL_is_functor(option, FUNCTOR_file1) )
454   { term_t a = PL_new_term_ref();
455     wchar_t *file;
456     dtd_symbol *fs;
457 
458     _PL_get_arg(1, option, a);
459     if ( !PL_get_wchars(a, NULL, &file, CVT_ATOM|CVT_EXCEPTION) )
460       return FALSE;
461     fs = dtd_add_symbol(p->dtd, file);	/* symbol will be freed */
462     set_file_dtd_parser(p, IN_FILE, fs->name);
463   } else if ( PL_is_functor(option, FUNCTOR_line1) )
464   { term_t a = PL_new_term_ref();
465 
466     _PL_get_arg(1, option, a);
467     if ( !PL_get_integer_ex(a, &p->location.line) )
468       return FALSE;
469   } else if ( PL_is_functor(option, FUNCTOR_linepos1) )
470   { term_t a = PL_new_term_ref();
471 
472     _PL_get_arg(1, option, a);
473     if ( !PL_get_integer_ex(a, &p->location.linepos) )
474       return FALSE;
475   } else if ( PL_is_functor(option, FUNCTOR_charpos1) )
476   { term_t a = PL_new_term_ref();
477 
478     _PL_get_arg(1, option, a);
479     if ( !PL_get_long_ex(a, &p->location.charpos) )
480       return FALSE;
481   } else if ( PL_is_functor(option, FUNCTOR_position1) )
482   { term_t a = PL_new_term_ref();
483 
484     _PL_get_arg(1, option, a);
485     if ( PL_is_functor(a, FUNCTOR_dstream_position4) )
486     { term_t arg = PL_new_term_ref();
487 
488       if ( !PL_get_arg(1,a,arg) || !PL_get_long_ex(arg, &p->location.charpos) ||
489 	   !PL_get_arg(2,a,arg) || !PL_get_integer_ex(arg,  &p->location.line) ||
490 	   !PL_get_arg(3,a,arg) || !PL_get_integer_ex(arg,  &p->location.linepos))
491 	return FALSE;
492     } else
493       return PL_type_error("stream_position", a);
494   } else if ( PL_is_functor(option, FUNCTOR_dialect1) )
495   { term_t a = PL_new_term_ref();
496     char *s;
497 
498     _PL_get_arg(1, option, a);
499     if ( !PL_get_atom_chars(a, &s) )
500       return sgml2pl_error(ERR_TYPE, "atom", a);
501 
502     if ( streq(s, "xml") )
503       set_dialect_dtd(p->dtd, p, DL_XML);
504     else if ( streq(s, "xmlns") )
505       set_dialect_dtd(p->dtd, p, DL_XMLNS);
506     else if ( streq(s, "sgml") )
507       set_dialect_dtd(p->dtd, p, DL_SGML);
508     else if ( streq(s, "html") || streq(s, "html4") )
509       set_dialect_dtd(p->dtd, p, DL_HTML);
510     else if ( streq(s, "html5") )
511       set_dialect_dtd(p->dtd, p, DL_HTML5);
512     else if ( streq(s, "xhtml") )
513       set_dialect_dtd(p->dtd, p, DL_XHTML);
514     else if ( streq(s, "xhtml5") )
515       set_dialect_dtd(p->dtd, p, DL_XHTML5);
516     else
517       return sgml2pl_error(ERR_DOMAIN, "sgml_dialect", a);
518   } else if ( PL_is_functor(option, FUNCTOR_space1) )
519   { term_t a = PL_new_term_ref();
520     char *s;
521 
522     _PL_get_arg(1, option, a);
523     if ( !PL_get_atom_chars(a, &s) )
524       return sgml2pl_error(ERR_TYPE, "atom", a);
525 
526     if ( streq(s, "preserve") )
527       p->dtd->space_mode = SP_PRESERVE;
528     else if ( streq(s, "default") )
529       p->dtd->space_mode = SP_DEFAULT;
530     else if ( streq(s, "remove") )
531       p->dtd->space_mode = SP_REMOVE;
532     else if ( streq(s, "sgml") )
533       p->dtd->space_mode = SP_SGML;
534     else if ( streq(s, "strict") )
535       p->dtd->space_mode = SP_STRICT;
536 
537     else
538       return sgml2pl_error(ERR_DOMAIN, "space", a);
539   } else if ( PL_is_functor(option, FUNCTOR_defaults1) )
540   { term_t a = PL_new_term_ref();
541     int val;
542 
543     _PL_get_arg(1, option, a);
544     if ( !PL_get_bool(a, &val) )
545       return sgml2pl_error(ERR_TYPE, "boolean", a);
546 
547     if ( val )
548       p->flags &= ~SGML_PARSER_NODEFS;
549     else
550       p->flags |= SGML_PARSER_NODEFS;
551   } else if ( PL_is_functor(option, FUNCTOR_qualify_attributes1) )
552   { term_t a = PL_new_term_ref();
553     int val;
554 
555     _PL_get_arg(1, option, a);
556     if ( !PL_get_bool(a, &val) )
557       return sgml2pl_error(ERR_TYPE, "boolean", a);
558 
559     if ( val )
560       p->flags |= SGML_PARSER_QUALIFY_ATTS;
561     else
562       p->flags &= ~SGML_PARSER_QUALIFY_ATTS;
563   } else if ( PL_is_functor(option, FUNCTOR_shorttag1) )
564   { term_t a = PL_new_term_ref();
565     int val;
566 
567     _PL_get_arg(1, option, a);
568     if ( !PL_get_bool(a, &val) )
569       return sgml2pl_error(ERR_TYPE, "boolean", a);
570 
571     set_option_dtd(p->dtd, OPT_SHORTTAG, val);
572   } else if ( PL_is_functor(option, FUNCTOR_case_sensitive_attributes1) )
573   { term_t a = PL_new_term_ref();
574     int val;
575 
576     _PL_get_arg(1, option, a);
577     if ( !PL_get_bool(a, &val) )
578       return sgml2pl_error(ERR_TYPE, "boolean", a);
579 
580     set_option_dtd(p->dtd, OPT_CASE_SENSITIVE_ATTRIBUTES, val);
581   } else if ( PL_is_functor(option, FUNCTOR_case_preserving_attributes1) )
582   { term_t a = PL_new_term_ref();
583     int val;
584 
585     _PL_get_arg(1, option, a);
586     if ( !PL_get_bool(a, &val) )
587       return sgml2pl_error(ERR_TYPE, "boolean", a);
588 
589     set_option_dtd(p->dtd, OPT_CASE_PRESERVING_ATTRIBUTES, val);
590   } else if ( PL_is_functor(option, FUNCTOR_system_entities1) )
591   { term_t a = PL_new_term_ref();
592     int val;
593 
594     _PL_get_arg(1, option, a);
595     if ( !PL_get_bool(a, &val) )
596       return sgml2pl_error(ERR_TYPE, "boolean", a);
597 
598     set_option_dtd(p->dtd, OPT_SYSTEM_ENTITIES, val);
599   } else if ( PL_is_functor(option, FUNCTOR_max_memory1) )
600   { term_t a = PL_new_term_ref();
601     int val;
602 
603     _PL_get_arg(1, option, a);
604     if ( !PL_get_integer(a, &val) )
605       return sgml2pl_error(ERR_TYPE, "integer", a);
606 
607     p->max_memory = val;
608     if ( p->buffer )
609       p->buffer->limit = val;
610     if ( p->cdata )
611       p->cdata->limit = val;
612   } else if ( PL_is_functor(option, FUNCTOR_number1) )
613   { term_t a = PL_new_term_ref();
614     char *s;
615 
616     _PL_get_arg(1, option, a);
617     if ( !PL_get_atom_chars(a, &s) )
618       return sgml2pl_error(ERR_TYPE, "atom", a);
619 
620     if ( streq(s, "token") )
621       p->dtd->number_mode = NU_TOKEN;
622     else if ( streq(s, "integer") )
623       p->dtd->number_mode = NU_INTEGER;
624     else
625       return sgml2pl_error(ERR_DOMAIN, "number", a);
626   } else if ( PL_is_functor(option, FUNCTOR_encoding1) )
627   { term_t a = PL_new_term_ref();
628     char *val;
629 
630     _PL_get_arg(1, option, a);
631     if ( !PL_get_atom_chars(a, &val) )
632       return sgml2pl_error(ERR_TYPE, "atom", a);
633     if ( !xml_set_encoding(p, val) )
634       return sgml2pl_error(ERR_DOMAIN, "encoding", a);
635   } else if ( PL_is_functor(option, FUNCTOR_doctype1) )
636   { term_t a = PL_new_term_ref();
637     ichar *s;
638 
639     _PL_get_arg(1, option, a);
640     if ( PL_is_variable(a) )
641     { p->enforce_outer_element = NULL;
642     } else
643     { if ( !PL_get_wchars(a, NULL, &s, CVT_ATOM) )
644 	return sgml2pl_error(ERR_TYPE, "atom_or_variable", a);
645 
646       p->enforce_outer_element = dtd_add_symbol(p->dtd, s);
647     }
648   } else if ( PL_is_functor(option, FUNCTOR_xmlns1) )
649   { term_t a = PL_new_term_ref();
650     ichar ns[1] = {0};
651     ichar *uri;
652 
653     _PL_get_arg(1, option, a);
654     if ( !PL_get_wchars(a, NULL, &uri, CVT_ATOM|CVT_EXCEPTION) )
655       return FALSE;
656 
657     xmlns_push(p, ns, uri);
658   } else if ( PL_is_functor(option, FUNCTOR_xmlns2) )
659   { term_t a = PL_new_term_ref();
660     ichar *ns, *uri;
661 
662     _PL_get_arg(1, option, a);
663     if ( !PL_get_wchars(a, NULL, &ns, CVT_ATOM|CVT_EXCEPTION) )
664       return FALSE;
665     _PL_get_arg(2, option, a);
666     if ( !PL_get_wchars(a, NULL, &uri, CVT_ATOM|CVT_EXCEPTION) )
667       return FALSE;
668 
669     xmlns_push(p, ns, uri);
670   } else if ( PL_is_functor(option, FUNCTOR_keep_prefix1) )
671   { term_t a = PL_new_term_ref();
672     int val;
673 
674     _PL_get_arg(1, option, a);
675     if ( !PL_get_bool(a, &val) )
676       return sgml2pl_error(ERR_TYPE, "boolean", a);
677     set_option_dtd(p->dtd, OPT_KEEP_PREFIX, val);
678   } else
679     return sgml2pl_error(ERR_DOMAIN, "sgml_parser_option", option);
680 
681   return TRUE;
682 }
683 
684 
685 static dtd_srcloc *
file_location(dtd_parser * p,dtd_srcloc * l)686 file_location(dtd_parser *p, dtd_srcloc *l)
687 { while(l->parent && l->type != IN_FILE)
688     l = l->parent;
689 
690   return l;
691 }
692 
693 
694 static foreign_t
pl_get_sgml_parser(term_t parser,term_t option)695 pl_get_sgml_parser(term_t parser, term_t option)
696 { dtd_parser *p;
697 
698   if ( !get_parser(parser, &p) )
699     return FALSE;
700 
701   if ( PL_is_functor(option, FUNCTOR_charpos1) )
702   { term_t a = PL_new_term_ref();
703 
704     _PL_get_arg(1, option, a);
705     return PL_unify_integer(a, file_location(p, &p->startloc)->charpos);
706   } else if ( PL_is_functor(option, FUNCTOR_line1) )
707   { term_t a = PL_new_term_ref();
708 
709     _PL_get_arg(1, option, a);
710     return PL_unify_integer(a, file_location(p, &p->startloc)->line);
711   } else if ( PL_is_functor(option, FUNCTOR_charpos2) )
712   { term_t a = PL_new_term_ref();
713 
714     if ( PL_get_arg(1, option, a) &&
715 	 PL_unify_integer(a, file_location(p, &p->startloc)->charpos) &&
716 	 PL_get_arg(2, option, a) &&
717 	 PL_unify_integer(a, file_location(p, &p->location)->charpos) )
718       return TRUE;
719     else
720       return FALSE;
721   } else if ( PL_is_functor(option, FUNCTOR_file1) )
722   { dtd_srcloc *l = file_location(p, &p->location);
723 
724     if ( l->type == IN_FILE && l->name.file )
725     { term_t a = PL_new_term_ref();
726 
727       _PL_get_arg(1, option, a);
728       return PL_unify_wchars(a, PL_ATOM, ENDSNUL, l->name.file);
729     }
730   } else if ( PL_is_functor(option, FUNCTOR_source1) )
731   { parser_data *pd = p->closure;
732 
733     if ( pd && pd->magic == PD_MAGIC && pd->source )
734     { term_t a = PL_new_term_ref();
735 
736       _PL_get_arg(1, option, a);
737       return PL_unify_stream(a, pd->source);
738     }
739   } else if ( PL_is_functor(option, FUNCTOR_dialect1) )
740   { term_t a = PL_new_term_ref();
741 
742     _PL_get_arg(1, option, a);
743     switch(p->dtd->dialect)
744     { case DL_SGML:
745 	return PL_unify_atom_chars(a, "sgml");
746       case DL_HTML:
747 	return PL_unify_atom_chars(a, "html");
748       case DL_HTML5:
749 	return PL_unify_atom_chars(a, "html5");
750       case DL_XHTML:
751 	return PL_unify_atom_chars(a, "xhtml");
752       case DL_XHTML5:
753 	return PL_unify_atom_chars(a, "xhtml5");
754       case DL_XML:
755 	return PL_unify_atom_chars(a, "xml");
756       case DL_XMLNS:
757 	return PL_unify_atom_chars(a, "xmlns");
758     }
759   } else if ( PL_is_functor(option, FUNCTOR_event_class1) )
760   { term_t a = PL_new_term_ref();
761 
762     _PL_get_arg(1, option, a);
763     switch(p->event_class)
764     { case EV_EXPLICIT:
765 	return PL_unify_atom_chars(a, "explicit");
766       case EV_OMITTED:
767 	return PL_unify_atom_chars(a, "omitted");
768       case EV_SHORTTAG:
769 	return PL_unify_atom_chars(a, "shorttag");
770       case EV_SHORTREF:
771 	return PL_unify_atom_chars(a, "shortref");
772     }
773   } else if ( PL_is_functor(option, FUNCTOR_dtd1) )
774   { term_t a = PL_new_term_ref();
775 
776     _PL_get_arg(1, option, a);
777 
778     return unify_dtd(a, p->dtd);
779   } else if ( PL_is_functor(option, FUNCTOR_doctype1) )
780   { term_t a = PL_new_term_ref();
781 
782     _PL_get_arg(1, option, a);
783     if ( p->enforce_outer_element )
784       return PL_unify_wchars(a, PL_ATOM, ENDSNUL,
785 			     p->enforce_outer_element->name);
786     else
787       return TRUE;			/* leave variable */
788   } else if ( PL_is_functor(option, FUNCTOR_allowed1) )
789   { term_t tail, head, tmp;
790     sgml_environment *env = p->environments;
791 
792     if ( !(tail = PL_new_term_ref()) ||
793 	 !(head = PL_new_term_ref()) ||
794 	 !(tmp = PL_new_term_ref()) )
795       return FALSE;
796 
797     _PL_get_arg(1, option, tail);
798 
799     if ( env )
800     { for( ; env; env = env->parent)
801       { dtd_element *buf[256];		/* MAX_VISITED! */
802 	int n = sizeof(buf)/sizeof(dtd_element *); /* not yet used! */
803 	int i;
804 
805 	state_allows_for(env->state, buf, &n);
806 
807 	for(i=0; i<n; i++)
808 	{ int rc;
809 
810 	  if ( buf[i] == CDATA_ELEMENT )
811 	    rc = PL_put_atom_chars(tmp, "#pcdata");
812 	  else
813 	    rc = put_atom_wchars(tmp, buf[i]->name->name);
814 
815 	  if ( !rc ||
816 	       !PL_unify_list(tail, head, tail) ||
817 	       !PL_unify(head, tmp) )
818 	    return FALSE;
819 	}
820 
821 	if ( !env->element->structure ||
822 	     !env->element->structure->omit_close )
823 	  break;
824       }
825     } else if ( p->enforce_outer_element )
826     { put_atom_wchars(tmp, p->enforce_outer_element->name);
827 
828       if ( !PL_unify_list(tail, head, tail) ||
829 	   !PL_unify(head, tmp) )
830 	return FALSE;
831     }
832 
833     return PL_unify_nil(tail);
834   } else if ( PL_is_functor(option, FUNCTOR_context1) )
835   { term_t tail = PL_new_term_ref();
836     term_t head = PL_new_term_ref();
837     term_t tmp = PL_new_term_ref();
838     sgml_environment *env = p->environments;
839 
840     _PL_get_arg(1, option, tail);
841 
842     for( ; env; env = env->parent)
843     { put_atom_wchars(tmp, env->element->name->name);
844 
845       if ( !PL_unify_list(tail, head, tail) ||
846 	   !PL_unify(head, tmp) )
847 	return FALSE;
848     }
849 
850     return PL_unify_nil(tail);
851   } else
852     return sgml2pl_error(ERR_DOMAIN, "parser_option", option);
853 
854   return FALSE;
855 }
856 
857 
858 static int
call_prolog(parser_data * pd,predicate_t pred,term_t av)859 call_prolog(parser_data *pd, predicate_t pred, term_t av)
860 { qid_t qid = PL_open_query(NULL, PL_Q_PASS_EXCEPTION, pred, av);
861   int rc = PL_next_solution(qid);
862 
863   PL_close_query(qid);
864 
865   if ( rc )
866   { pd->exception = FALSE;
867   } else
868   { if ( (pd->exception = PL_exception(0)) )
869       pd->stopped = TRUE;
870   }
871 
872   return rc;
873 }
874 
875 
876 static void
end_frame(fid_t fid,term_t ex)877 end_frame(fid_t fid, term_t ex)
878 { if ( ex )
879     PL_close_foreign_frame(fid);
880   else
881     PL_discard_foreign_frame(fid);
882 }
883 
884 
885 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
886 put_url(dtd_parser *p, term_t t, const ichar *url)
887     Store the url-part of a name-space qualifier in term.  We call
888     xml:xmlns(-Canonical, +Full) trying to resolve the specified
889     namespace to an internal canonical namespace.
890 
891     We do a little caching as there will generally be only a very
892     small pool of urls in use.  We assume the url-pointers we get
893     life for the time of the parser.  It might be possible that
894     multiple url pointers point to the same url, but this only clobbers
895     the cache a little.
896 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
897 
898 #define URL_CACHE 4			/* # entries cached */
899 
900 typedef struct
901 { const ichar *url;			/* URL pointer */
902   atom_t canonical;
903 } url_cache;
904 
905 static url_cache cache[URL_CACHE];
906 
907 static void
reset_url_cache()908 reset_url_cache()
909 { int i;
910   url_cache *c = cache;
911 
912   for(i=0; i<URL_CACHE; i++)
913   { c[i].url = NULL;
914     if ( c[i].canonical )
915       PL_unregister_atom(c[i].canonical);
916     c[i].canonical = 0;
917   }
918 }
919 
920 
921 WUNUSED static int
put_url(dtd_parser * p,term_t t,const ichar * url)922 put_url(dtd_parser *p, term_t t, const ichar *url)
923 { parser_data *pd = p->closure;
924   fid_t fid;
925   int i;
926 
927   if ( !pd->on_urlns )
928     return put_atom_wchars(t, url);
929 
930   for(i=0; i<URL_CACHE; i++)
931   { if ( cache[i].url == url )		/* cache hit */
932     { if ( cache[i].canonical )		/* and a canonical value */
933 	return PL_put_atom(t, cache[i].canonical);
934       else
935 	return put_atom_wchars(t, url);
936     }
937   }
938 					/* shift the cache */
939   i = URL_CACHE-1;
940   if ( cache[i].canonical )
941     PL_unregister_atom(cache[i].canonical);
942   for(i=URL_CACHE-1; i>0; i--)
943     cache[i] = cache[i-1];
944   cache[0].url = url;
945   cache[0].canonical = 0;
946 
947   if ( (fid = PL_open_foreign_frame()) )
948   { int rc;
949     term_t av = PL_new_term_refs(3);
950     atom_t a;
951 
952     rc = (put_atom_wchars(av+0, url) &&
953 	  unify_parser(av+2, p));
954 
955     if ( rc &&
956 	 PL_call_predicate(NULL, PL_Q_NORMAL, pd->on_urlns, av) &&
957 	 PL_get_atom(av+1, &a) )
958     { PL_register_atom(a);
959       cache[0].canonical = a;
960       PL_put_atom(t, a);
961     } else if ( rc )
962     { rc = put_atom_wchars(t, url);
963     }
964     PL_discard_foreign_frame(fid);
965 
966     return rc;
967   }
968 
969   return FALSE;
970 }
971 
972 
973 WUNUSED static int
put_attribute_name(dtd_parser * p,term_t t,dtd_symbol * nm)974 put_attribute_name(dtd_parser *p, term_t t, dtd_symbol *nm)
975 { const ichar *url, *local, *prefix;
976   if ( p->dtd->dialect == DL_XMLNS )
977   { xmlns_resolve_attribute(p, nm, &local, &url, &prefix);
978     if ( url )
979     { term_t av;
980       if ( p->dtd->keep_prefix)
981       { /* creates ns(prefix,url):local */
982         PL_put_variable(t);
983         return PL_unify_term(t, PL_FUNCTOR, FUNCTOR_ns2,
984                              PL_FUNCTOR, FUNCTOR_prefix2,
985                              PL_NWCHARS, ENDSNUL, prefix ? prefix : L"",
986                              PL_NWCHARS, ENDSNUL, url,
987                              PL_NWCHARS, ENDSNUL, local);
988       } else
989       { return ( (av=PL_new_term_refs(2)) &&
990                  put_url(p, av+0, url) &&
991                  put_atom_wchars(av+1, local) &&
992                  PL_cons_functor_v(t, FUNCTOR_ns2, av) );
993       }
994     } else
995       return put_atom_wchars(t, local);
996   } else
997     return put_atom_wchars(t, nm->name);
998 }
999 
1000 
1001 WUNUSED static int
put_element_name(dtd_parser * p,term_t t,dtd_element * e)1002 put_element_name(dtd_parser *p, term_t t, dtd_element *e)
1003 { const ichar *url, *local, *prefix;
1004 
1005   if ( p->dtd->dialect == DL_XMLNS )
1006   { assert(p->environments->element == e);
1007     xmlns_resolve_element(p, &local, &url, &prefix);
1008 
1009     if ( url )
1010     { term_t av;
1011       if ( p->dtd->keep_prefix )
1012       { /* creates ns(prefix,url):local */
1013 	return PL_unify_term(t, PL_FUNCTOR, FUNCTOR_ns2,
1014 			          PL_FUNCTOR, FUNCTOR_prefix2,
1015 			            PL_NWCHARS, ENDSNUL, prefix ? prefix : L"",
1016 				    PL_NWCHARS, ENDSNUL, url,
1017 				  PL_NWCHARS, ENDSNUL, local);
1018       } else
1019       { return ( (av=PL_new_term_refs(2)) &&
1020                  put_url(p, av+0, url) &&
1021                  put_atom_wchars(av+1, local) &&
1022                  PL_cons_functor_v(t, FUNCTOR_ns2, av) );
1023       }
1024     } else
1025       return put_atom_wchars(t, local);
1026   } else
1027     return put_atom_wchars(t, e->name->name);
1028 }
1029 
1030 
1031 static ichar *
istrblank(const ichar * s)1032 istrblank(const ichar *s)
1033 { for( ; *s; s++ )
1034   { if ( iswspace(*s) )
1035       return (ichar *)s;
1036   }
1037 
1038   return NULL;
1039 }
1040 
1041 
1042 WUNUSED static int
unify_listval(dtd_parser * p,term_t t,attrtype type,size_t len,const ichar * text)1043 unify_listval(dtd_parser *p,
1044 	      term_t t, attrtype type, size_t len, const ichar *text)
1045 { if ( type == AT_NUMBERS && p->dtd->number_mode == NU_INTEGER )
1046   { wchar_t *e;
1047 
1048 #if SIZEOF_LONG == 4 && defined(HAVE_WCSTOLL)
1049     int64_t v = wcstoll(text, &e, 10);
1050     if ( (size_t)(e-text) == len && errno != ERANGE )
1051       return PL_unify_int64(t, v);
1052 #else
1053     long v = wcstol(text, &e, 10);
1054 
1055     if ( (size_t)(e-text) == len && errno != ERANGE )
1056       return PL_unify_integer(t, v);
1057 #endif
1058 					/* TBD: Error!? */
1059   }
1060 
1061   return PL_unify_wchars(t, PL_ATOM, len, text);
1062 }
1063 
1064 
1065 WUNUSED static int
put_att_text(dtd_parser * p,term_t t,sgml_attribute * a)1066 put_att_text(dtd_parser *p, term_t t, sgml_attribute *a)
1067 { if ( a->value.textW )
1068   { PL_put_variable(t);
1069     return PL_unify_wchars(t, p->att_rep, a->value.number, a->value.textW);
1070   } else
1071     return FALSE;
1072 }
1073 
1074 
1075 WUNUSED static int
put_attribute_value(dtd_parser * p,term_t t,sgml_attribute * a)1076 put_attribute_value(dtd_parser *p, term_t t, sgml_attribute *a)
1077 { switch(a->definition->type)
1078   { case AT_CDATA:
1079       return put_att_text(p, t, a);
1080     case AT_NUMBER:
1081     { if ( !put_att_text(p, t, a) )
1082 	return PL_put_integer(t, a->value.number);
1083       return TRUE;
1084     }
1085     default:				/* multi-valued attribute */
1086     { if ( a->definition->islist && a->value.textW )
1087       { term_t tail, head;
1088 	const ichar *val = a->value.textW;
1089 	const ichar *e;
1090 
1091 	PL_put_variable(t);
1092 	if ( !(head = PL_new_term_ref()) ||
1093 	     !(tail = PL_copy_term_ref(t)) )
1094 	  return FALSE;
1095 
1096 	for(e=istrblank(val); e; val = e+1, e=istrblank(val))
1097 	{ if ( e == val )
1098 	    continue;			/* skip spaces */
1099 	  if ( !PL_unify_list(tail, head, tail) ||
1100 	       !unify_listval(p, head, a->definition->type, e-val, val) )
1101 	    return FALSE;
1102 	}
1103 
1104 	return ( PL_unify_list(tail, head, tail) &&
1105 		 unify_listval(p, head, a->definition->type,
1106 			       istrlen(val), val) &&
1107 		 PL_unify_nil(tail) );
1108       } else
1109 	return put_att_text(p, t, a);
1110     }
1111   }
1112 }
1113 
1114 
1115 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1116 Produce a tag-location in the format
1117 
1118 	start_location=file:char-char
1119 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1120 
1121 WUNUSED static int
put_tag_position(dtd_parser * p,term_t pos)1122 put_tag_position(dtd_parser *p, term_t pos)
1123 { dtd_srcloc *l = &p->startloc;
1124 
1125   if ( l->type == IN_FILE && l->name.file )
1126   { PL_put_variable(pos);
1127     return PL_unify_term(pos,
1128 			 PL_FUNCTOR, FUNCTOR_ns2,
1129 			   PL_NWCHARS, wcslen(l->name.file), l->name.file,
1130 			   PL_FUNCTOR, FUNCTOR_minus2,
1131 			 PL_LONG, l->charpos,
1132 			 PL_LONG, p->location.charpos);
1133   }
1134 
1135   return FALSE;
1136 }
1137 
1138 
1139 
1140 WUNUSED static int
unify_attribute_list(dtd_parser * p,term_t alist,int argc,sgml_attribute * argv)1141 unify_attribute_list(dtd_parser *p, term_t alist,
1142 		     int argc, sgml_attribute *argv)
1143 { int i;
1144   term_t tail = PL_copy_term_ref(alist);
1145   term_t h    = PL_new_term_ref();
1146   term_t a    = PL_new_term_refs(2);
1147   parser_data *pd = p->closure;
1148 
1149   for(i=0; i<argc; i++)
1150   { if ( !put_attribute_name(p, a+0, argv[i].definition->name) ||
1151 	 !put_attribute_value(p, a+1, &argv[i]) ||
1152 	 !PL_cons_functor_v(a, FUNCTOR_equal2, a) ||
1153 	 !PL_unify_list(tail, h, tail) ||
1154 	 !PL_unify(h, a) )
1155       return FALSE;
1156   }
1157 
1158   if ( pd->positions && put_tag_position(p, a+1) )
1159   { PL_put_atom(a, ATOM_position);
1160 
1161     if ( !PL_cons_functor_v(a, FUNCTOR_equal2, a) ||
1162 	 !PL_unify_list(tail, h, tail) ||
1163 	 !PL_unify(h, a) )
1164       return FALSE;
1165   }
1166 
1167   if ( PL_unify_nil(tail) )
1168   { PL_reset_term_refs(tail);
1169 
1170     return TRUE;
1171   }
1172 
1173   return FALSE;
1174 }
1175 
1176 
1177 
1178 static int
on_begin_(dtd_parser * p,dtd_element * e,int argc,sgml_attribute * argv)1179 on_begin_(dtd_parser *p, dtd_element *e, int argc, sgml_attribute *argv)
1180 { parser_data *pd = p->closure;
1181 
1182   if ( pd->stopped )
1183     return TRUE;
1184 
1185   if ( pd->tail )
1186   { term_t content = PL_new_term_ref();	/* element content */
1187     term_t alist   = PL_new_term_ref();	/* attribute list */
1188     term_t et	   = PL_new_term_ref();	/* element structure */
1189     term_t h       = PL_new_term_ref();
1190 
1191     if ( !h ||
1192 	 !put_element_name(p, h, e) ||
1193 	 !unify_attribute_list(p, alist, argc, argv) ||
1194 	 !PL_unify_term(et,
1195 			PL_FUNCTOR, FUNCTOR_element3,
1196 			  PL_TERM, h,
1197 			  PL_TERM, alist,
1198 			  PL_TERM, content) )
1199     { pd->exception = PL_exception(0);
1200       return FALSE;
1201     }
1202 
1203     if ( PL_unify_list(pd->tail, h, pd->tail) &&
1204 	 PL_unify(h, et) )
1205     { env *env = sgml_calloc(1, sizeof(*env));
1206 
1207       env->tail   = pd->tail;
1208       env->parent = pd->stack;
1209       pd->stack   = env;
1210 
1211       pd->tail = content;
1212       PL_reset_term_refs(alist);
1213 
1214       return TRUE;
1215     }
1216 
1217     pd->exception = PL_exception(0);
1218     return FALSE;
1219   }
1220 
1221   if ( pd->on_begin )
1222   { fid_t fid;
1223 
1224     if ( (fid = PL_open_foreign_frame()) )
1225     { int rc;
1226       term_t av = PL_new_term_refs(3);
1227 
1228       rc = ( put_element_name(p, av+0, e) &&
1229 	     unify_attribute_list(p, av+1, argc, argv) &&
1230 	     unify_parser(av+2, p) &&
1231 	     call_prolog(pd, pd->on_begin, av)
1232 	   );
1233 
1234       PL_discard_foreign_frame(fid);
1235       if ( rc )
1236 	return TRUE;
1237     }
1238 
1239     pd->exception = PL_exception(0);
1240     return FALSE;
1241   }
1242 
1243   return TRUE;
1244 }
1245 
1246 
1247 static int
on_end(dtd_parser * p,dtd_element * e)1248 on_end(dtd_parser *p, dtd_element *e)
1249 { parser_data *pd = p->closure;
1250 
1251   if ( pd->stopped )
1252     return TRUE;
1253 
1254   if ( pd->on_end )
1255   { fid_t fid;
1256 
1257     if ( (fid = PL_open_foreign_frame()) )
1258     { int rc;
1259       term_t av = PL_new_term_refs(2);
1260 
1261       PL_STRINGS_MARK();
1262       rc = ( put_element_name(p, av+0, e) &&
1263 	     unify_parser(av+1, p) &&
1264 	     call_prolog(pd, pd->on_end, av)
1265 	   );
1266       PL_STRINGS_RELEASE();
1267 
1268       PL_discard_foreign_frame(fid);
1269       if ( rc )
1270 	goto ok;
1271     }
1272 
1273     if ( (pd->exception = PL_exception(0)) )
1274       return FALSE;
1275   }
1276 
1277 ok:
1278   if ( pd->tail && !pd->stopped )
1279   { if ( !PL_unify_nil(pd->tail) )
1280       return FALSE;
1281     PL_reset_term_refs(pd->tail);	/* ? */
1282 
1283     if ( pd->stack )
1284     { env *parent = pd->stack->parent;
1285 
1286       pd->tail = pd->stack->tail;
1287       sgml_free(pd->stack);
1288       pd->stack = parent;
1289     } else
1290     { if ( pd->stopat == SA_CONTENT )
1291 	pd->stopped = TRUE;
1292     }
1293   }
1294 
1295   if ( pd->stopat == SA_ELEMENT && !p->environments->parent )
1296     pd->stopped = TRUE;
1297 
1298   return TRUE;
1299 }
1300 
1301 
1302 static int
on_entity_(dtd_parser * p,dtd_entity * e,int chr)1303 on_entity_(dtd_parser *p, dtd_entity *e, int chr)
1304 { parser_data *pd = p->closure;
1305 
1306   if ( pd->stopped )
1307     return TRUE;
1308 
1309   if ( pd->on_entity )
1310   { fid_t fid;
1311 
1312     if ( (fid=PL_open_foreign_frame()) )
1313     { int rc;
1314       term_t av = PL_new_term_refs(2);
1315 
1316       if ( e )
1317 	rc = put_atom_wchars(av+0, e->name->name);
1318       else
1319 	rc = PL_put_integer(av+0, chr);
1320 
1321       if ( rc )
1322 	rc = ( unify_parser(av+1, p) &&
1323 	       call_prolog(pd, pd->on_end, av)
1324 	     );
1325 
1326       PL_discard_foreign_frame(fid);
1327       if ( rc )
1328 	return TRUE;
1329     }
1330 
1331     pd->exception = PL_exception(0);
1332     return FALSE;
1333   }
1334 
1335   if ( pd->tail )
1336   { int rc;
1337     term_t h = PL_new_term_ref();
1338 
1339     if ( !h ||
1340 	 !PL_unify_list(pd->tail, h, pd->tail) )
1341     { pd->exception = PL_exception(0);
1342       return FALSE;
1343     }
1344 
1345     if ( e )
1346       rc = PL_unify_term(h,
1347 			 PL_FUNCTOR, FUNCTOR_entity1,
1348 			   PL_CHARS, e->name->name);
1349     else
1350       rc = PL_unify_term(h,
1351 			 PL_FUNCTOR, FUNCTOR_entity1,
1352 			   PL_INT, chr);
1353 
1354     PL_reset_term_refs(h);
1355     if ( !rc )
1356       pd->exception = PL_exception(0);
1357 
1358     return rc;
1359   }
1360 
1361   return TRUE;
1362 }
1363 
1364 
1365 static int
on_data_(dtd_parser * p,data_type type,int len,const wchar_t * data)1366 on_data_(dtd_parser *p, data_type type, int len, const wchar_t *data)
1367 { parser_data *pd = p->closure;
1368 
1369   if ( pd->on_cdata )
1370   { fid_t fid;
1371 
1372     if ( (fid=PL_open_foreign_frame()) )
1373     { int rc;
1374       term_t av = PL_new_term_refs(2);
1375 
1376       rc = ( PL_unify_wchars(av+0, PL_ATOM, len, data) &&
1377 	     unify_parser(av+1, p) &&
1378 	     call_prolog(pd, pd->on_cdata, av) );
1379 
1380       PL_discard_foreign_frame(fid);
1381       if ( rc )
1382 	return TRUE;
1383     }
1384 
1385     pd->exception = PL_exception(0);
1386     return FALSE;
1387   }
1388 
1389   if ( pd->tail && !pd->stopped )
1390   { term_t h = PL_new_term_ref();
1391 
1392     if ( PL_unify_list(pd->tail, h, pd->tail) )
1393     { int rval = TRUE;
1394       term_t a;
1395 
1396       switch(type)
1397       { case EC_CDATA:
1398 	  a = h;
1399 	  break;
1400 	case EC_SDATA:
1401 	{ term_t d = PL_new_term_ref();
1402 
1403 	  a = d;
1404 	  rval = PL_unify_term(h, PL_FUNCTOR, FUNCTOR_sdata1, PL_TERM, d);
1405 	  break;
1406 	}
1407 	case EC_NDATA:
1408 	{ term_t d = PL_new_term_ref();
1409 
1410 	  a = d;
1411 	  rval = PL_unify_term(h, PL_FUNCTOR, FUNCTOR_ndata1, PL_TERM, d);
1412 	  break;
1413 	}
1414 	default:
1415 	  rval = FALSE;
1416 	  assert(0);
1417       }
1418 
1419       if ( rval )
1420 	rval = PL_unify_wchars(a, p->cdata_rep, len, data);
1421 
1422       if ( rval )
1423       { PL_reset_term_refs(h);
1424 	return TRUE;
1425       } else
1426       { pd->exception = PL_exception(0);
1427       }
1428     }
1429   }
1430 
1431   return FALSE;
1432 }
1433 
1434 
1435 static int
on_cdata(dtd_parser * p,data_type type,int len,const wchar_t * data)1436 on_cdata(dtd_parser *p, data_type type, int len, const wchar_t *data)
1437 { return on_data(p, type, len, data);
1438 }
1439 
1440 
1441 static int
can_end_omitted(dtd_parser * p)1442 can_end_omitted(dtd_parser *p)
1443 { sgml_environment *env;
1444 
1445   for(env=p->environments; env; env = env->parent)
1446   { dtd_element *e = env->element;
1447 
1448     if ( !(e->structure && e->structure->omit_close) )
1449       return FALSE;
1450   }
1451 
1452   return TRUE;
1453 }
1454 
1455 
1456 static int
on_error_(dtd_parser * p,dtd_error * error)1457 on_error_(dtd_parser *p, dtd_error *error)
1458 { parser_data *pd = p->closure;
1459   const char *severity;
1460 
1461   if ( pd->stopped )
1462     return TRUE;
1463 
1464   if ( pd->stopat == SA_ELEMENT &&
1465        (error->minor == ERC_NOT_OPEN || error->minor == ERC_NOT_ALLOWED) &&
1466        can_end_omitted(p) )
1467   { end_document_dtd_parser(p);
1468     sgml_cplocation(&p->location, &p->startloc);
1469     pd->stopped = TRUE;
1470     return TRUE;
1471   }
1472 
1473   switch(error->severity)
1474   { case ERS_STYLE:
1475       if ( pd->error_mode != EM_STYLE )
1476 	return TRUE;
1477       severity = "informational";
1478       break;
1479     case ERS_WARNING:
1480       pd->warnings++;
1481       severity = "warning";
1482       break;
1483     case ERS_ERROR:
1484     default:				/* make compiler happy */
1485       pd->errors++;
1486       severity = "error";
1487       break;
1488   }
1489 
1490   if ( pd->on_error )			/* msg, parser */
1491   { fid_t fid;
1492 
1493     if ( (fid=PL_open_foreign_frame()) )
1494     { int rc;
1495       term_t av = PL_new_term_refs(3);
1496 
1497       rc = ( PL_put_atom_chars(av+0, severity) &&
1498 	     PL_unify_wchars(av+1, PL_ATOM,
1499 			     wcslen(error->plain_message),
1500 			     error->plain_message) &&
1501 	     unify_parser(av+2, p) &&
1502 	     call_prolog(pd, pd->on_error, av)
1503 	   );
1504       PL_discard_foreign_frame(fid);
1505       if ( rc )
1506 	return TRUE;
1507     }
1508     pd->exception = PL_exception(0);
1509     return FALSE;
1510   } else if ( pd->error_mode != EM_QUIET )
1511   { fid_t fid;
1512 
1513     if ( (fid=PL_open_foreign_frame()) )
1514     { int rc = TRUE;
1515       dtd_srcloc *l = file_location(p, &p->startloc);
1516 
1517       if ( pd->max_errors == 0 )
1518       { term_t ex = PL_new_term_ref();
1519 	term_t pos = PL_new_term_ref();
1520 
1521 	if ( l->name.file )
1522 	  rc = PL_unify_term(pos,
1523 			     PL_FUNCTOR, FUNCTOR_file4,
1524 			       PL_NWCHARS, (size_t)-1, l->name.file,
1525 			       PL_INT,   l->line,
1526 			       PL_INT,   l->linepos,
1527 			       PL_INT64, (int64_t)l->charpos);
1528 	if ( rc )
1529 	  rc = PL_unify_term(ex,
1530 			     PL_FUNCTOR, FUNCTOR_error2,
1531 			       PL_FUNCTOR, FUNCTOR_syntax_error1,
1532 			         PL_NWCHARS, (size_t)-1, error->plain_message,
1533 			       PL_TERM, pos);
1534 	if ( rc )
1535 	  rc = PL_raise_exception(ex);
1536       } else
1537       { predicate_t pred = PL_predicate("print_message", 2, "user");
1538 	term_t av = PL_new_term_refs(2);
1539 	term_t src = PL_new_term_ref();
1540 	term_t parser = PL_new_term_ref();
1541 
1542 	rc = ( unify_parser(parser, p) &&
1543 	       PL_put_atom_chars(av+0, severity) );
1544 
1545 	if ( rc )
1546 	{ if ( l->name.file )
1547 	  { if ( l->type == IN_FILE )
1548 	      rc = put_atom_wchars(src, l->name.file);
1549 	    else
1550 	      rc = put_atom_wchars(src, l->name.entity);
1551 	  } else
1552 	  { PL_put_nil(src);
1553 	  }
1554 	}
1555 
1556 	if ( rc )
1557 	  rc = PL_unify_term(av+1,
1558 			     PL_FUNCTOR_CHARS, "sgml", 4,
1559 			       PL_TERM, parser,
1560 			       PL_TERM, src,
1561 			       PL_INT, l->line,
1562 			       PL_NWCHARS, wcslen(error->plain_message),
1563 					   error->plain_message);
1564 
1565 	if ( rc )
1566 	  rc = PL_call_predicate(NULL, PL_Q_NODEBUG, pred, av);
1567 
1568 	PL_discard_foreign_frame(fid);
1569       }
1570 
1571       if ( rc )
1572 	return TRUE;
1573     }
1574 
1575     pd->exception = PL_exception(0);
1576     return FALSE;
1577   }
1578 
1579   return TRUE;
1580 }
1581 
1582 
1583 static int
on_xmlns_(dtd_parser * p,dtd_symbol * ns,dtd_symbol * url)1584 on_xmlns_(dtd_parser *p, dtd_symbol *ns, dtd_symbol *url)
1585 { parser_data *pd = p->closure;
1586 
1587   if ( pd->stopped )
1588     return TRUE;
1589 
1590   if ( pd->on_xmlns )
1591   { fid_t fid;
1592     term_t av;
1593 
1594     if ( (fid = PL_open_foreign_frame()) &&
1595 	 (av = PL_new_term_refs(3)) )
1596     { int rc;
1597 
1598       if ( ns )
1599       { rc = put_atom_wchars(av+0, ns->name);
1600       } else
1601       { PL_put_nil(av+0);
1602 	rc = TRUE;
1603       }
1604 
1605       if ( rc )
1606       { rc = ( put_atom_wchars(av+1, url->name) &&
1607 	       unify_parser(av+2, p) &&
1608 	       call_prolog(pd, pd->on_xmlns, av)
1609 	     );
1610       }
1611       end_frame(fid, pd->exception);
1612       PL_discard_foreign_frame(fid);
1613       if ( rc )
1614 	return TRUE;
1615     }
1616 
1617     pd->exception = PL_exception(0);
1618     return FALSE;
1619   }
1620 
1621   return TRUE;
1622 }
1623 
1624 
1625 static int
on_pi(dtd_parser * p,const ichar * pi)1626 on_pi(dtd_parser *p, const ichar *pi)
1627 { parser_data *pd = p->closure;
1628 
1629   if ( pd->stopped )
1630     return TRUE;
1631 
1632   if ( pd->on_pi )
1633   { fid_t fid;
1634 
1635     if ( (fid=PL_open_foreign_frame()) )
1636     { int rc;
1637       term_t av = PL_new_term_refs(2);
1638 
1639       rc = ( put_atom_wchars(av+0, pi) &&
1640 	     unify_parser(av+1, p) &&
1641 	     call_prolog(pd, pd->on_pi, av)
1642 	   );
1643 
1644       PL_discard_foreign_frame(fid);
1645       if ( rc )
1646 	return TRUE;
1647     }
1648 
1649     pd->exception = PL_exception(0);
1650     return FALSE;
1651   }
1652 
1653   if ( pd->tail )
1654   { term_t h;
1655 
1656     if ( !(h = PL_new_term_ref()) ||
1657 	 !PL_unify_list(pd->tail, h, pd->tail) )
1658     { pd->exception = PL_exception(0);
1659       return FALSE;
1660     }
1661 
1662     if ( !PL_unify_term(h,
1663 			PL_FUNCTOR, FUNCTOR_pi1,
1664 			  PL_NWCHARS, wcslen(pi), pi) )
1665     { pd->exception = PL_exception(0);
1666       return FALSE;
1667     }
1668 
1669     PL_reset_term_refs(h);
1670   }
1671 
1672   return TRUE;
1673 }
1674 
1675 
1676 static int
on_decl(dtd_parser * p,const ichar * decl)1677 on_decl(dtd_parser *p, const ichar *decl)
1678 { parser_data *pd = p->closure;
1679 
1680   if ( pd->stopped )
1681     return TRUE;
1682 
1683   if ( pd->on_decl )
1684   { fid_t fid;
1685     term_t av;
1686 
1687     if ( (fid = PL_open_foreign_frame()) &&
1688 	 (av = PL_new_term_refs(2)) )
1689     { int rc;
1690 
1691       rc = ( put_atom_wchars(av+0, decl) &&
1692 	     unify_parser(av+1, p) &&
1693 	     call_prolog(pd, pd->on_decl, av)
1694 	   );
1695       end_frame(fid, pd->exception);
1696       PL_discard_foreign_frame(fid);
1697       if ( rc )
1698 	return TRUE;
1699     }
1700 
1701     pd->exception = PL_exception(0);
1702     return FALSE;
1703   }
1704 
1705   if ( pd->stopat == SA_DECL )
1706     pd->stopped = TRUE;
1707 
1708   return TRUE;
1709 }
1710 
1711 
1712 static int
on_begin(dtd_parser * p,dtd_element * e,int argc,sgml_attribute * argv)1713 on_begin(dtd_parser *p, dtd_element *e, int argc, sgml_attribute *argv)
1714 { int rc;
1715 
1716   PL_STRINGS_MARK();
1717   rc = on_begin_(p, e, argc, argv);
1718   PL_STRINGS_RELEASE();
1719 
1720   return rc;
1721 }
1722 
1723 static int
on_data(dtd_parser * p,data_type type,int len,const wchar_t * data)1724 on_data(dtd_parser *p, data_type type, int len, const wchar_t *data)
1725 { int rc;
1726 
1727   PL_STRINGS_MARK();
1728   rc = on_data_(p, type, len, data);
1729   PL_STRINGS_RELEASE();
1730 
1731   return rc;
1732 }
1733 
1734 static int
on_entity(dtd_parser * p,dtd_entity * e,int chr)1735 on_entity(dtd_parser *p, dtd_entity *e, int chr)
1736 { int rc;
1737 
1738   PL_STRINGS_MARK();
1739   rc = on_entity_(p, e, chr);
1740   PL_STRINGS_RELEASE();
1741 
1742   return rc;
1743 }
1744 
1745 static int
on_error(dtd_parser * p,dtd_error * error)1746 on_error(dtd_parser *p, dtd_error *error)
1747 { int rc;
1748 
1749   PL_STRINGS_MARK();
1750   rc = on_error_(p, error);
1751   PL_STRINGS_RELEASE();
1752 
1753   return rc;
1754 }
1755 
1756 
1757 static int
on_xmlns(dtd_parser * p,dtd_symbol * ns,dtd_symbol * url)1758 on_xmlns(dtd_parser *p, dtd_symbol *ns, dtd_symbol *url)
1759 { int rc;
1760 
1761   PL_STRINGS_MARK();
1762   rc = on_xmlns_(p, ns, url);
1763   PL_STRINGS_RELEASE();
1764 
1765   return rc;
1766 }
1767 
1768 static int
write_parser(void * h,char * buf,int len)1769 write_parser(void *h, char *buf, int len)
1770 { parser_data *pd = h;
1771   unsigned char *s = (unsigned char *)buf;
1772   unsigned char *e = s+len;
1773 
1774   if ( !pd->parser || pd->parser->magic != SGML_PARSER_MAGIC )
1775   { errno = EINVAL;
1776     return -1;
1777   }
1778 
1779   if ( (pd->errors > pd->max_errors && pd->max_errors >= 0) || pd->stopped )
1780   { errno = EIO;
1781     return -1;
1782   }
1783 
1784   for(; s<e; s++)
1785   { putchar_dtd_parser(pd->parser, *s);
1786     if ( pd->exception )
1787       break;
1788   }
1789 
1790   return len;
1791 }
1792 
1793 
1794 static int
close_parser(void * h)1795 close_parser(void *h)
1796 { parser_data *pd = h;
1797   dtd_parser *p;
1798 
1799   if ( !(p=pd->parser) || p->magic != SGML_PARSER_MAGIC )
1800   { errno = EINVAL;
1801     return -1;
1802   }
1803 
1804   if ( pd->tail )
1805   { if ( !PL_unify_nil(pd->tail) )
1806       return -1;			/* resource error */
1807   }
1808 
1809   if ( p->dmode == DM_DTD )
1810     p->dtd->implicit = FALSE;		/* assume we loaded a DTD */
1811 
1812   if ( pd->free_on_close )
1813     free_dtd_parser(p);
1814   else
1815     p->closure = NULL;
1816 
1817   sgml_free(pd);
1818 
1819   return 0;
1820 }
1821 
1822 
1823 static IOFUNCTIONS sgml_stream_functions =
1824 { (Sread_function)  NULL,
1825   (Swrite_function) write_parser,
1826   (Sseek_function)  NULL,
1827   (Sclose_function) close_parser,
1828 		    NULL
1829 };
1830 
1831 
1832 static parser_data *
new_parser_data(dtd_parser * p)1833 new_parser_data(dtd_parser *p)
1834 { parser_data *pd;
1835 
1836   pd = sgml_calloc(1, sizeof(*pd));
1837   pd->magic = PD_MAGIC;
1838   pd->parser = p;
1839   pd->max_errors = MAX_ERRORS;
1840   pd->max_warnings = MAX_WARNINGS;
1841   pd->error_mode = EM_PRINT;
1842   pd->exception = FALSE;
1843   p->closure = pd;
1844 
1845   return pd;
1846 }
1847 
1848 
1849 static foreign_t
pl_open_dtd(term_t ref,term_t options,term_t stream)1850 pl_open_dtd(term_t ref, term_t options, term_t stream)
1851 { dtd *dtd;
1852   dtd_parser *p;
1853   parser_data *pd;
1854   IOSTREAM *s;
1855   term_t tail = PL_copy_term_ref(options);
1856   term_t option = PL_new_term_ref();
1857 
1858   if ( !get_dtd(ref, &dtd) )
1859     return FALSE;
1860   p = new_dtd_parser(dtd);
1861   p->dmode = DM_DTD;
1862   pd = new_parser_data(p);
1863   pd->free_on_close = TRUE;
1864 
1865   while( PL_get_list(tail, option, tail) )
1866   { if ( PL_is_functor(option, FUNCTOR_dialect1) )
1867     { term_t a = PL_new_term_ref();
1868       char *s;
1869 
1870       _PL_get_arg(1, option, a);
1871       if ( !PL_get_atom_chars(a, &s) )
1872 	return sgml2pl_error(ERR_TYPE, "atom", a);
1873 
1874       if ( streq(s, "xml") )
1875 	set_dialect_dtd(dtd, p, DL_XML);
1876       else if ( streq(s, "xmlns") )
1877 	set_dialect_dtd(dtd, p, DL_XMLNS);
1878       else if ( streq(s, "sgml") )
1879 	set_dialect_dtd(dtd, p, DL_SGML);
1880       else
1881 	return sgml2pl_error(ERR_DOMAIN, "sgml_dialect", a);
1882     }
1883   }
1884   if ( !PL_get_nil(tail) )
1885     return sgml2pl_error(ERR_TYPE, "list", options);
1886 
1887   s = Snew(pd, SIO_OUTPUT|SIO_FBUF, &sgml_stream_functions);
1888 
1889   if ( !PL_open_stream(stream, s) )
1890     return FALSE;
1891 
1892   return TRUE;
1893 }
1894 
1895 
1896 static int
set_callback_predicates(parser_data * pd,term_t option)1897 set_callback_predicates(parser_data *pd, term_t option)
1898 { term_t a = PL_new_term_ref();
1899   char *fname;
1900   atom_t pname;
1901   predicate_t *pp = NULL;		/* keep compiler happy */
1902   int arity;
1903   module_t m = NULL;
1904 
1905   _PL_get_arg(2, option, a);
1906   if ( !PL_strip_module(a, &m, a) )
1907     return FALSE;
1908   if ( !PL_get_atom(a, &pname) )
1909     return sgml2pl_error(ERR_TYPE, "atom", a);
1910   _PL_get_arg(1, option, a);
1911   if ( !PL_get_atom_chars(a, &fname) )
1912     return sgml2pl_error(ERR_TYPE, "atom", a);
1913 
1914   if ( streq(fname, "begin") )
1915   { pp = &pd->on_begin;			/* tag, attributes, parser */
1916     arity = 3;
1917   } else if ( streq(fname, "end") )
1918   { pp = &pd->on_end;			/* tag, parser */
1919     arity = 2;
1920   } else if ( streq(fname, "cdata") )
1921   { pp = &pd->on_cdata;			/* cdata, parser */
1922     arity = 2;
1923   } else if ( streq(fname, "entity") )
1924   { pp = &pd->on_entity;		/* name, parser */
1925     arity = 2;
1926   } else if ( streq(fname, "pi") )
1927   { pp = &pd->on_pi;			/* pi, parser */
1928     arity = 2;
1929   } else if ( streq(fname, "xmlns") )
1930   { pp = &pd->on_xmlns;			/* ns, url, parser */
1931     arity = 3;
1932   } else if ( streq(fname, "urlns") )
1933   { pp = &pd->on_urlns;			/* url, ns, parser */
1934     arity = 3;
1935   } else if ( streq(fname, "error") )
1936   { pp = &pd->on_error;			/* severity, message, parser */
1937     arity = 3;
1938   } else if ( streq(fname, "decl") )
1939   { pp = &pd->on_decl;			/* decl, parser */
1940     arity = 2;
1941   } else
1942     return sgml2pl_error(ERR_DOMAIN, "sgml_callback", a);
1943 
1944   *pp = PL_pred(PL_new_functor(pname, arity), m);
1945   return TRUE;
1946 }
1947 
1948 
1949 static foreign_t
pl_sgml_parse(term_t parser,term_t options)1950 pl_sgml_parse(term_t parser, term_t options)
1951 { dtd_parser *p;
1952   parser_data *pd;
1953   parser_data *oldpd;
1954   term_t head = PL_new_term_ref();
1955   term_t tail = PL_copy_term_ref(options);
1956   IOSTREAM *in = NULL;
1957   IOSTREAM *release = NULL;
1958   int recursive;
1959   int has_content_length = FALSE;
1960   int64_t content_length = 0;		/* content_length(Len) */
1961   int count = 0;
1962   int rc = TRUE;
1963 
1964   if ( !get_parser(parser, &p) )
1965     return FALSE;
1966 
1967   if ( p->closure )			/* recursive call */
1968   { recursive = TRUE;
1969 
1970     oldpd = p->closure;
1971     if ( oldpd->magic != PD_MAGIC || oldpd->parser != p )
1972       return sgml2pl_error(ERR_MISC, "sgml",
1973 			   "Parser associated with illegal data");
1974 
1975     pd = sgml_calloc(1, sizeof(*pd));
1976     *pd = *oldpd;
1977     p->closure = pd;
1978 
1979     in = pd->source;
1980   } else
1981   { recursive = FALSE;
1982     oldpd = NULL;			/* keep compiler happy */
1983 
1984     set_mode_dtd_parser(p, DM_DATA);
1985 
1986     p->on_begin_element = on_begin;
1987     p->on_end_element   = on_end;
1988     p->on_entity	= on_entity;
1989     p->on_pi		= on_pi;
1990     p->on_data          = on_cdata;
1991     p->on_error	        = on_error;
1992     p->on_xmlns		= on_xmlns;
1993     p->on_decl		= on_decl;
1994     p->cdata_rep        = PL_ATOM;
1995     p->att_rep          = PL_ATOM;
1996     pd = new_parser_data(p);
1997   }
1998 
1999   while ( PL_get_list(tail, head, tail) )
2000   { if ( PL_is_functor(head, FUNCTOR_document1) )
2001     { pd->list  = PL_new_term_ref();
2002       _PL_get_arg(1, head, pd->list);
2003       pd->tail  = PL_copy_term_ref(pd->list);
2004       pd->stack = NULL;
2005     } else if ( PL_is_functor(head, FUNCTOR_content_length1) )
2006     { term_t a = PL_new_term_ref();
2007 
2008       _PL_get_arg(1, head, a);
2009       if ( !PL_get_int64(a, &content_length) )
2010 	return sgml2pl_error(ERR_TYPE, "integer", a);
2011       has_content_length = TRUE;
2012     } else if ( PL_is_functor(head, FUNCTOR_call2) )
2013     { if ( !set_callback_predicates(pd, head) )
2014 	return FALSE;
2015     } else if ( PL_is_functor(head, FUNCTOR_xml_no_ns1) )
2016     { term_t a = PL_new_term_ref();
2017       char *s;
2018 
2019       _PL_get_arg(1, head, a);
2020       if ( !PL_get_atom_chars(a, &s) )
2021 	return sgml2pl_error(ERR_TYPE, "atom", a);
2022       if ( streq(s, "error") )
2023 	p->xml_no_ns = NONS_ERROR;
2024       else if ( streq(s, "quiet") )
2025 	p->xml_no_ns = NONS_QUIET;
2026       else
2027 	return sgml2pl_error(ERR_DOMAIN, "xml_no_ns", a);
2028     } else if ( PL_is_functor(head, FUNCTOR_parse1) )
2029     { term_t a = PL_new_term_ref();
2030       char *s;
2031 
2032       _PL_get_arg(1, head, a);
2033       if ( !PL_get_atom_chars(a, &s) )
2034 	return sgml2pl_error(ERR_TYPE, "atom", a);
2035       if ( streq(s, "element") )
2036 	pd->stopat = SA_ELEMENT;
2037       else if ( streq(s, "content") )
2038 	pd->stopat = SA_CONTENT;
2039       else if ( streq(s, "file") )
2040 	pd->stopat = SA_FILE;
2041       else if ( streq(s, "input") )
2042 	pd->stopat = SA_INPUT;
2043       else if ( streq(s, "declaration") )
2044 	pd->stopat = SA_DECL;
2045       else
2046 	return sgml2pl_error(ERR_DOMAIN, "parse", a);
2047     } else if ( PL_is_functor(head, FUNCTOR_max_errors1) )
2048     { term_t a = PL_new_term_ref();
2049 
2050       _PL_get_arg(1, head, a);
2051       if ( !PL_get_integer(a, &pd->max_errors) )
2052 	return sgml2pl_error(ERR_TYPE, "integer", a);
2053     } else if ( PL_is_functor(head, FUNCTOR_syntax_errors1) )
2054     { term_t a = PL_new_term_ref();
2055       char *s;
2056 
2057       _PL_get_arg(1, head, a);
2058       if ( !PL_get_atom_chars(a, &s) )
2059 	return sgml2pl_error(ERR_TYPE, "atom", a);
2060 
2061       if ( streq(s, "quiet") )
2062 	pd->error_mode = EM_QUIET;
2063       else if ( streq(s, "print") )
2064 	pd->error_mode = EM_PRINT;
2065       else if ( streq(s, "style") )
2066 	pd->error_mode = EM_STYLE;
2067       else
2068 	return sgml2pl_error(ERR_DOMAIN, "syntax_error", a);
2069     } else if ( PL_is_functor(head, FUNCTOR_positions1) )
2070     { term_t a = PL_new_term_ref();
2071       char *s;
2072 
2073       _PL_get_arg(1, head, a);
2074       if ( !PL_get_atom_chars(a, &s) )
2075 	return sgml2pl_error(ERR_TYPE, "atom", a);
2076 
2077       if ( streq(s, "true") )
2078 	pd->positions = TRUE;
2079       else if ( streq(s, "false") )
2080 	pd->positions = FALSE;
2081       else
2082 	return sgml2pl_error(ERR_DOMAIN, "positions", a);
2083     } else if ( PL_is_functor(head, FUNCTOR_cdata1) )
2084     { term_t arg = PL_new_term_ref();
2085       atom_t a;
2086       _PL_get_arg(1, head, arg);
2087       if (!PL_get_atom_ex(arg, &a))
2088 	return FALSE;
2089       if (a == ATOM_atom)
2090 	p->cdata_rep = PL_ATOM;
2091       else if (a == ATOM_string)
2092 	p->cdata_rep = PL_STRING;
2093       else
2094 	return sgml2pl_error(ERR_DOMAIN, "representation", a);
2095     } else if ( PL_is_functor(head, FUNCTOR_attribute_value1) )
2096     { term_t arg = PL_new_term_ref();
2097       atom_t a;
2098       _PL_get_arg(1, head, arg);
2099       if (!PL_get_atom_ex(arg, &a))
2100 	return FALSE;
2101       if (a == ATOM_atom)
2102 	p->att_rep = PL_ATOM;
2103       else if (a == ATOM_string)
2104 	p->att_rep = PL_STRING;
2105       else
2106 	return sgml2pl_error(ERR_DOMAIN, "representation", a);
2107     } else if ( PL_is_functor(head, FUNCTOR_source1) )
2108     { term_t a = PL_new_term_ref();
2109 
2110       _PL_get_arg(1, head, a);
2111       if ( !PL_get_stream(a, &in, SIO_INPUT) )
2112 	return FALSE;
2113       release = in;
2114     }/* else ignored option */
2115   }
2116   if ( !PL_get_nil_ex(tail) )
2117   { if ( release )
2118       PL_release_stream(release);
2119     return FALSE;
2120   }
2121 
2122 					/* Parsing input from a stream */
2123 #define CHECKERROR \
2124     { if ( pd->exception ) \
2125       { rc = FALSE; \
2126 	goto out; \
2127       } \
2128       if ( pd->errors > pd->max_errors && pd->max_errors >= 0 ) \
2129       { rc = sgml2pl_error(ERR_LIMIT, "max_errors", (long)pd->max_errors); \
2130 	goto out; \
2131       } \
2132     }
2133 
2134   if ( pd->stopat == SA_CONTENT && p->empty_element )
2135     goto out;
2136 
2137   if ( in )
2138   { int eof = FALSE;
2139 
2140     if ( in->encoding == ENC_OCTET )
2141       p->encoded = TRUE;		/* parser must decode */
2142     else
2143       p->encoded = FALSE;		/* already decoded */
2144 
2145     pd->stopped = FALSE;
2146 
2147     if ( !recursive )
2148     { pd->source = in;
2149       begin_document_dtd_parser(p);
2150     }
2151 
2152     while(!eof)
2153     { int c, ateof;
2154 
2155       if ( (++count % 8192) == 0 && PL_handle_signals() < 0 )
2156       { rc = FALSE;
2157 	goto out;
2158       }
2159 
2160       if ( has_content_length )
2161       { if ( content_length <= 0 )
2162 	  c = EOF;
2163 	else
2164 	  c = Sgetcode(in);
2165 	ateof = (--content_length <= 0);
2166       } else
2167       { c = Sgetcode(in);
2168 	ateof = Sfeof(in);
2169       }
2170 
2171       if ( ateof )
2172       { eof = TRUE;
2173 	if ( c == LF )			/* file ends in LF */
2174 	  c = CR;
2175 	else if ( c != CR )		/* file ends in normal char */
2176 	{ if ( has_content_length && in->position )
2177 	  { int64_t bc0 = in->position->byteno;
2178 	    putchar_dtd_parser(p, c);
2179 	    content_length -= in->position->byteno-bc0;
2180 	  } else
2181 	  { putchar_dtd_parser(p, c);
2182 	  }
2183 	  CHECKERROR;
2184 	  if ( pd->stopped )
2185 	    goto stopped;
2186 	  c = CR;
2187 	}
2188       } else if ( Sferror(in) )
2189       { rc = FALSE;
2190 	goto out;
2191       }
2192 
2193       if ( has_content_length && in->position )
2194       { int64_t bc0 = in->position->byteno;
2195 	putchar_dtd_parser(p, c);
2196 	content_length -= in->position->byteno-bc0;
2197       } else
2198       { putchar_dtd_parser(p, c);
2199       }
2200       CHECKERROR;
2201       if ( pd->stopped )
2202       { stopped:
2203 	pd->stopped = FALSE;
2204 	if ( pd->stopat != SA_CONTENT )
2205 	  reset_document_dtd_parser(p);	/* ensure a clean start */
2206 	goto out;
2207       }
2208     }
2209 
2210     if ( !recursive && pd->stopat != SA_INPUT )
2211       end_document_dtd_parser(p);
2212     CHECKERROR;
2213 
2214   out:
2215     if ( release )
2216       rc = PL_release_stream(release) && rc;
2217 
2218     reset_url_cache();
2219     if ( pd->tail && rc )
2220       rc = PL_unify_nil(pd->tail);
2221 
2222     if ( recursive )
2223     { p->closure = oldpd;
2224     } else
2225     { p->closure = NULL;
2226     }
2227 
2228     pd->magic = 0;			/* invalidate */
2229     sgml_free(pd);
2230 
2231     return rc;
2232   }
2233 
2234   reset_url_cache();
2235 
2236   return TRUE;
2237 }
2238 
2239 
2240 		 /*******************************
2241 		 *	  DTD PROPERTIES	*
2242 		 *******************************/
2243 
2244 static int put_model(term_t t, dtd_model *m) WUNUSED;
2245 
2246 /* doctype(DocType) */
2247 
2248 static int
dtd_prop_doctype(dtd * dtd,term_t prop)2249 dtd_prop_doctype(dtd *dtd, term_t prop)
2250 { if ( dtd->doctype )
2251     return PL_unify_wchars(prop, PL_ATOM, ENDSNUL, dtd->doctype);
2252   return FALSE;
2253 }
2254 
2255 
2256 /* elements(ListOfElements) */
2257 
2258 WUNUSED static int
make_model_list(term_t t,dtd_model * m,functor_t f)2259 make_model_list(term_t t, dtd_model *m, functor_t f)
2260 { if ( !m->next )
2261   { return put_model(t, m);
2262   } else
2263   { term_t av;
2264 
2265     if ( (av=PL_new_term_refs(2)) &&
2266 	 put_model(av+0, m) &&
2267 	 make_model_list(av+1, m->next, f) &&
2268 	 PL_cons_functor_v(t, f, av) )
2269     { PL_reset_term_refs(av);
2270       return TRUE;
2271     }
2272 
2273     return FALSE;
2274   }
2275 }
2276 
2277 
2278 WUNUSED static int
put_model(term_t t,dtd_model * m)2279 put_model(term_t t, dtd_model *m)
2280 { int rc = TRUE;
2281   functor_t f;
2282 
2283   switch(m->type)
2284   { case MT_PCDATA:
2285       rc = PL_put_atom(t, ATOM_pcdata);
2286       goto card;
2287     case MT_ELEMENT:
2288       rc = put_atom_wchars(t, m->content.element->name->name);
2289       goto card;
2290     case MT_AND:
2291       f = FUNCTOR_and2;
2292       break;
2293     case MT_SEQ:
2294       f = FUNCTOR_comma2;
2295       break;
2296     case MT_OR:
2297       f = FUNCTOR_bar2;
2298       break;
2299     case MT_UNDEF:
2300     default:
2301       assert(0);
2302       f = 0;
2303       break;
2304   }
2305 
2306   if ( rc )
2307   { if ( !m->content.group )
2308       rc = PL_put_atom(t, ATOM_empty);
2309     else
2310       rc = make_model_list(t, m->content.group, f);
2311   }
2312 
2313 card:
2314   if ( !rc )
2315     return FALSE;
2316 
2317   switch(m->cardinality)
2318   { case MC_ONE:
2319       break;
2320     case MC_OPT:
2321       rc = PL_cons_functor_v(t, FUNCTOR_opt1, t);
2322       break;
2323     case MC_REP:
2324       rc = PL_cons_functor_v(t, FUNCTOR_rep1, t);
2325       break;
2326     case MC_PLUS:
2327       rc = PL_cons_functor_v(t, FUNCTOR_plus1, t);
2328       break;
2329   }
2330 
2331   return rc;
2332 }
2333 
2334 
2335 WUNUSED static int
put_content(term_t t,dtd_edef * def)2336 put_content(term_t t, dtd_edef *def)
2337 { switch(def->type)
2338   { case C_EMPTY:
2339       return PL_put_atom(t, ATOM_empty);
2340     case C_CDATA:
2341       return PL_put_atom(t, ATOM_cdata);
2342     case C_RCDATA:
2343       return PL_put_atom(t, ATOM_rcdata);
2344     case C_ANY:
2345       return PL_put_atom(t, ATOM_any);
2346     default:
2347       if ( def->content )
2348 	return put_model(t, def->content);
2349 
2350       return TRUE;
2351   }
2352 }
2353 
2354 
2355 static int
dtd_prop_elements(dtd * dtd,term_t prop)2356 dtd_prop_elements(dtd *dtd, term_t prop)
2357 { term_t tail = PL_copy_term_ref(prop);
2358   term_t head = PL_new_term_ref();
2359   term_t et   = PL_new_term_ref();
2360   dtd_element *e;
2361 
2362   for( e=dtd->elements; e; e=e->next )
2363   { put_atom_wchars(et, e->name->name);
2364     if ( !PL_unify_list(tail, head, tail) ||
2365 	 !PL_unify(head, et) )
2366       return FALSE;
2367   }
2368 
2369   return PL_unify_nil(tail);
2370 }
2371 
2372 
2373 static int
get_element(dtd * dtd,term_t name,dtd_element ** elem)2374 get_element(dtd *dtd, term_t name, dtd_element **elem)
2375 { ichar *s;
2376   dtd_element *e;
2377   dtd_symbol *id;
2378 
2379   if ( !PL_get_wchars(name, NULL, &s, CVT_ATOM|CVT_EXCEPTION) )
2380     return FALSE;
2381 
2382   if ( !(id=dtd_find_symbol(dtd, s)) ||
2383        !(e=id->element) )
2384     return FALSE;
2385 
2386   *elem = e;
2387   return TRUE;
2388 }
2389 
2390 
2391 
2392 
2393 static int
dtd_prop_element(dtd * dtd,term_t name,term_t omit,term_t content)2394 dtd_prop_element(dtd *dtd, term_t name, term_t omit, term_t content)
2395 { dtd_element *e;
2396   dtd_edef *def;
2397   term_t model = PL_new_term_ref();
2398 
2399   if ( !get_element(dtd, name, &e) || !(def=e->structure) )
2400     return FALSE;
2401 
2402   if ( !PL_unify_term(omit, PL_FUNCTOR, FUNCTOR_omit2,
2403 		        PL_ATOM, def->omit_open ?  ATOM_true : ATOM_false,
2404 		        PL_ATOM, def->omit_close ? ATOM_true : ATOM_false) )
2405     return FALSE;
2406 
2407   return ( put_content(model, def) &&
2408 	   PL_unify(content, model) );
2409 }
2410 
2411 
2412 static int
dtd_prop_attributes(dtd * dtd,term_t ename,term_t atts)2413 dtd_prop_attributes(dtd *dtd, term_t ename, term_t atts)
2414 { dtd_element *e;
2415   term_t tail = PL_copy_term_ref(atts);
2416   term_t head = PL_new_term_ref();
2417   term_t elem = PL_new_term_ref();
2418   dtd_attr_list *al;
2419 
2420   if ( !get_element(dtd, ename, &e) )
2421     return FALSE;
2422 
2423   for(al=e->attributes; al; al=al->next)
2424   { put_atom_wchars(elem, al->attribute->name->name);
2425 
2426     if ( !PL_unify_list(tail, head, tail) ||
2427 	 !PL_unify(head, elem) )
2428       return FALSE;
2429   }
2430 
2431   return PL_unify_nil(tail);
2432 }
2433 
2434 
2435 typedef struct _plattrdef
2436 { attrtype	type;			/* AT_* */
2437   const char *	name;			/* name */
2438   int	       islist;			/* list-type */
2439   atom_t	atom;			/* name as atom */
2440 } plattrdef;
2441 
2442 static plattrdef plattrs[] =
2443 {
2444   { AT_CDATA,	 "cdata",    FALSE },
2445   { AT_ENTITY,	 "entity",   FALSE },
2446   { AT_ENTITIES, "entity",   TRUE },
2447   { AT_ID,	 "id",	     FALSE },
2448   { AT_IDREF,	 "idref",    FALSE },
2449   { AT_IDREFS,	 "idref",    TRUE },
2450   { AT_NAME,	 "name",     FALSE },
2451   { AT_NAMES,	 "name",     TRUE },
2452 /*{ AT_NAMEOF,	 "nameof",   FALSE },*/
2453   { AT_NMTOKEN,	 "nmtoken",  FALSE },
2454   { AT_NMTOKENS, "nmtoken",  TRUE },
2455   { AT_NUMBER,	 "number",   FALSE },
2456   { AT_NUMBERS,	 "number",   TRUE },
2457   { AT_NUTOKEN,	 "nutoken",  FALSE },
2458   { AT_NUTOKENS, "nutoken",  TRUE },
2459   { AT_NOTATION, "notation", FALSE },
2460 
2461   { AT_CDATA,    NULL,       FALSE }
2462 };
2463 
2464 
2465 static int
unify_attribute_type(term_t type,dtd_attr * a)2466 unify_attribute_type(term_t type, dtd_attr *a)
2467 { plattrdef *ad = plattrs;
2468 
2469   for( ; ad->name; ad++ )
2470   { if ( ad->type == a->type )
2471     { if ( !ad->atom )
2472 	ad->atom = PL_new_atom(ad->name);
2473 
2474       if ( ad->islist )
2475 	return PL_unify_term(type, PL_FUNCTOR, FUNCTOR_list1,
2476 			     PL_ATOM, ad->atom);
2477       else
2478 	return PL_unify_atom(type, ad->atom);
2479     }
2480   }
2481 
2482   if ( a->type == AT_NAMEOF || a->type == AT_NOTATION )
2483   { dtd_name_list *nl;
2484     term_t tail, head, elem;
2485 
2486     if ( !(tail = PL_new_term_ref()) ||
2487 	 !(head = PL_new_term_ref()) ||
2488 	 !(elem = PL_new_term_ref()) ||
2489 	 !PL_unify_functor(type,
2490 			   a->type == AT_NAMEOF ?
2491 			     FUNCTOR_nameof1 :
2492 			     FUNCTOR_notation1) )
2493       return FALSE;
2494 
2495     _PL_get_arg(1, type, tail);
2496 
2497     for(nl = a->typeex.nameof; nl; nl = nl->next)
2498     { if ( !put_atom_wchars(elem, nl->value->name) ||
2499 	   !PL_unify_list(tail, head, tail) ||
2500 	   !PL_unify(head, elem) )
2501 	return FALSE;
2502     }
2503     return PL_unify_nil(tail);
2504   }
2505 
2506   assert(0);
2507   return FALSE;
2508 }
2509 
2510 
2511 
2512 static int
unify_attribute_default(term_t defval,dtd_attr * a)2513 unify_attribute_default(term_t defval, dtd_attr *a)
2514 { int v;
2515 
2516   switch(a->def)
2517   { case AT_REQUIRED:
2518       return PL_unify_atom_chars(defval, "required");
2519     case AT_CURRENT:
2520       return PL_unify_atom_chars(defval, "current");
2521     case AT_CONREF:
2522       return PL_unify_atom_chars(defval, "conref");
2523     case AT_IMPLIED:
2524       return PL_unify_atom_chars(defval, "implied");
2525     case AT_DEFAULT:
2526       v = PL_unify_functor(defval, FUNCTOR_default1);
2527       goto common;
2528     case AT_FIXED:
2529       v = PL_unify_functor(defval, FUNCTOR_fixed1);
2530     common:
2531       if ( v )
2532       { term_t tmp;
2533 
2534 	if ( !(tmp=PL_new_term_ref()) )
2535 	  return FALSE;
2536 
2537 	_PL_get_arg(1, defval, tmp);
2538 	switch( a->type )
2539 	{ case AT_CDATA:
2540 	    return PL_unify_wchars(tmp, PL_ATOM, ENDSNUL, a->att_def.cdata);
2541 	  case AT_NAME:
2542 	  case AT_NMTOKEN:
2543 	  case AT_NAMEOF:
2544 	  case AT_NOTATION:
2545 	    return PL_unify_wchars(tmp, PL_ATOM, ENDSNUL, a->att_def.name->name);
2546 	  case AT_NUMBER:
2547 	    return PL_unify_integer(tmp, a->att_def.number);
2548 	  default:
2549 	    assert(0);
2550 	}
2551       } else
2552 	return FALSE;
2553     default:
2554       assert(0);
2555       return FALSE;
2556   }
2557 }
2558 
2559 
2560 static int
dtd_prop_attribute(dtd * dtd,term_t ename,term_t aname,term_t type,term_t def_value)2561 dtd_prop_attribute(dtd *dtd, term_t ename, term_t aname,
2562 		   term_t type, term_t def_value)
2563 { dtd_element *e;
2564   ichar *achars;
2565   dtd_symbol *asym;
2566   dtd_attr_list *al;
2567 
2568 
2569   if ( !get_element(dtd, ename, &e) )
2570     return FALSE;
2571   if ( !PL_get_wchars(aname, NULL, &achars, CVT_ATOM|CVT_EXCEPTION) )
2572     return FALSE;
2573   if ( !(asym=dtd_find_symbol(dtd, achars)) )
2574     return FALSE;
2575 
2576   for(al=e->attributes; al; al=al->next)
2577   { if ( al->attribute->name == asym )
2578     { if ( unify_attribute_type(type, al->attribute) &&
2579 	   unify_attribute_default(def_value, al->attribute) )
2580 	return TRUE;
2581 
2582       return FALSE;
2583     }
2584   }
2585 
2586   return FALSE;
2587 }
2588 
2589 
2590 static int
dtd_prop_entities(dtd * dtd,term_t list)2591 dtd_prop_entities(dtd *dtd, term_t list)
2592 { term_t tail = PL_copy_term_ref(list);
2593   term_t head = PL_new_term_ref();
2594   term_t et   = PL_new_term_ref();
2595   dtd_entity *e;
2596 
2597   for( e=dtd->entities; e; e=e->next )
2598   { put_atom_wchars(et, e->name->name);
2599     if ( !PL_unify_list(tail, head, tail) ||
2600 	 !PL_unify(head, et) )
2601       return FALSE;
2602   }
2603 
2604   return PL_unify_nil(tail);
2605 }
2606 
2607 
2608 static int
dtd_prop_entity(dtd * dtd,term_t ename,term_t value)2609 dtd_prop_entity(dtd *dtd, term_t ename, term_t value)
2610 { ichar *s;
2611   dtd_entity *e;
2612   dtd_symbol *id;
2613 
2614   if ( !PL_get_wchars(ename, NULL, &s, CVT_ATOM|CVT_EXCEPTION) )
2615     return FALSE;
2616 
2617   if ( !(id=dtd_find_symbol(dtd, s)) ||
2618        !(e=id->entity)  )
2619     return FALSE;
2620 
2621   switch(e->type)
2622   { case ET_SYSTEM:
2623       return PL_unify_term(value, PL_FUNCTOR_CHARS, "system", 1,
2624 			   PL_CHARS, e->exturl);
2625     case ET_PUBLIC:
2626       if ( e->exturl )
2627 	return PL_unify_term(value, PL_FUNCTOR_CHARS, "public", 2,
2628 			     PL_CHARS, e->extid,
2629 			     PL_CHARS, e->exturl);
2630       else
2631 	return PL_unify_term(value, PL_FUNCTOR_CHARS, "public", 2,
2632 			     PL_CHARS, e->extid,
2633 			     PL_VARIABLE);
2634 
2635     case ET_LITERAL:
2636     default:
2637       if ( e->value )
2638       { const char *wrap;
2639 
2640 	switch(e->content)
2641 	{ case EC_SGML:     wrap = "sgml"; break;
2642 	  case EC_STARTTAG: wrap = "start_tag"; break;
2643 	  case EC_ENDTAG:   wrap = "end_tag"; break;
2644 	  case EC_CDATA:    wrap = NULL; break;
2645 	  case EC_SDATA:    wrap = "sdata"; break;
2646 	  case EC_NDATA:    wrap = "ndata"; break;
2647 	  case EC_PI:       wrap = "pi"; break;
2648 	  default:	    wrap = NULL; assert(0);
2649 	}
2650 
2651 	if ( wrap )
2652 	  return PL_unify_term(value, PL_FUNCTOR_CHARS, wrap, 1,
2653 			       PL_CHARS, e->value);
2654 	else
2655 	  return PL_unify_wchars(value, PL_ATOM, wcslen(e->value), e->value);
2656       }
2657   }
2658 
2659   assert(0);
2660   return FALSE;
2661 }
2662 
2663 
2664 static int
dtd_prop_notations(dtd * dtd,term_t list)2665 dtd_prop_notations(dtd *dtd, term_t list)
2666 { dtd_notation *n;
2667   term_t tail = PL_copy_term_ref(list);
2668   term_t head = PL_new_term_ref();
2669 
2670   for(n=dtd->notations; n; n=n->next)
2671   { if ( PL_unify_list(tail, head, tail) &&
2672 	 PL_unify_wchars(head, PL_ATOM, wcslen(n->name->name), n->name->name) )
2673       continue;
2674 
2675     return FALSE;
2676   }
2677 
2678   return PL_unify_nil(tail);
2679 }
2680 
2681 
2682 static int
dtd_prop_notation(dtd * dtd,term_t nname,term_t desc)2683 dtd_prop_notation(dtd *dtd, term_t nname, term_t desc)
2684 { char *s;
2685   dtd_symbol *id;
2686 
2687   if ( !PL_get_atom_chars(nname, &s) )
2688     return sgml2pl_error(ERR_TYPE, "atom", nname);
2689 
2690   if ( (id=dtd_find_symbol(dtd, (ichar *)s)) )
2691   { dtd_notation *n;
2692 
2693     for(n=dtd->notations; n; n=n->next)
2694     { if ( n->name == id )
2695       { term_t tail = PL_copy_term_ref(desc);
2696 	term_t head = PL_new_term_ref();
2697 
2698 	if ( n->system )
2699 	{ if ( !PL_unify_list(tail, head, tail) ||
2700 	       !PL_unify_term(head,
2701 			      PL_FUNCTOR_CHARS, "system", 1,
2702 			        PL_CHARS, n->system) )
2703 	    return FALSE;
2704 	}
2705 	if ( n->public )
2706 	{ if ( !PL_unify_list(tail, head, tail) ||
2707 	       !PL_unify_term(head,
2708 			      PL_FUNCTOR_CHARS, "public", 1,
2709 			        PL_CHARS, n->public) )
2710 	    return FALSE;
2711 	}
2712 
2713 	return PL_unify_nil(tail);
2714       }
2715     }
2716   }
2717 
2718   return FALSE;
2719 }
2720 
2721 
2722 
2723 typedef struct _prop
2724 { int (*func)();
2725   const char *name;
2726   int arity;
2727   functor_t functor;
2728 } prop;
2729 
2730 
2731 static prop dtd_props[] =
2732 { { dtd_prop_doctype,    "doctype",    1 },
2733   { dtd_prop_elements,	 "elements",   1 },
2734   { dtd_prop_element,	 "element",    3 },
2735   { dtd_prop_attributes, "attributes", 2, },
2736   { dtd_prop_attribute,	 "attribute",  4, },
2737   { dtd_prop_entities,	 "entities",   1, },
2738   { dtd_prop_entity,	 "entity",     2, },
2739   { dtd_prop_notations,	 "notations",  1, },
2740   { dtd_prop_notation,	 "notation",   2, },
2741   { NULL }
2742 };
2743 
2744 
2745 static void
initprops()2746 initprops()
2747 { static int done = FALSE;
2748 
2749   if ( !done )
2750   { prop *p;
2751 
2752     done = TRUE;
2753     for(p=dtd_props; p->func; p++)
2754       p->functor = PL_new_functor(PL_new_atom(p->name), p->arity);
2755   }
2756 }
2757 
2758 
2759 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2760 dtd_property(DTD, doctype(DocType))
2761 dtd_property(DTD, elements(ListOfNames))
2762 dtd_property(DTD, element(Name, Omit, Model))
2763 dtd_property(DTD, attributes(ElementName, ListOfAttributes)),
2764 dtd_property(DTD, attribute(ElementName, AttributeName, Type, Default))
2765 dtd_property(DTD, entities(ListOfEntityNames))
2766 dtd_property(DTD, entity(Name, Type))
2767 dtd_property(DTD, notations(ListOfNotationNames)
2768 dtd_property(DTD, notation(Name, File))
2769 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2770 
2771 static foreign_t
pl_dtd_property(term_t ref,term_t property)2772 pl_dtd_property(term_t ref, term_t property)
2773 { dtd *dtd;
2774   const prop *p;
2775 
2776   initprops();
2777 
2778   if ( !get_dtd(ref, &dtd) )
2779     return FALSE;
2780 
2781   for(p=dtd_props; p->func; p++)
2782   { if ( PL_is_functor(property, p->functor) )
2783     { term_t a = PL_new_term_refs(p->arity);
2784       int i;
2785 
2786       for(i=0; i<p->arity; i++)
2787 	_PL_get_arg(i+1, property, a+i);
2788 
2789       switch(p->arity)
2790       { case 1:
2791 	  return (*p->func)(dtd, a+0);
2792 	case 2:
2793 	  return (*p->func)(dtd, a+0, a+1);
2794 	case 3:
2795 	  return (*p->func)(dtd, a+0, a+1, a+2);
2796 	case 4:
2797 	  return (*p->func)(dtd, a+0, a+1, a+2, a+3);
2798 	default:
2799 	  assert(0);
2800 	  return FALSE;
2801       }
2802     }
2803   }
2804 
2805   return sgml2pl_error(ERR_DOMAIN, "dtd_property", property);
2806 }
2807 
2808 		 /*******************************
2809 		 *	     CATALOG		*
2810 		 *******************************/
2811 
2812 static foreign_t
pl_sgml_register_catalog_file(term_t file,term_t where)2813 pl_sgml_register_catalog_file(term_t file, term_t where)
2814 { wchar_t *fn;
2815   char *w;
2816   catalog_location loc;
2817 
2818   if ( !PL_get_wchars(file, NULL, &fn, CVT_ATOM|CVT_EXCEPTION) )
2819     return FALSE;
2820   if ( !PL_get_atom_chars(where, &w) )
2821     return sgml2pl_error(ERR_TYPE, "atom", where);
2822 
2823   if ( streq(w, "start") )
2824     loc = CTL_START;
2825   else if ( streq(w, "end") )
2826     loc = CTL_END;
2827   else
2828     return sgml2pl_error(ERR_DOMAIN, "location", where);
2829 
2830   return register_catalog_file(fn, loc);
2831 }
2832 
2833 
2834 		 /*******************************
2835 		 *	      INSTALL		*
2836 		 *******************************/
2837 
2838 extern install_t install_xml_quote(void);
2839 extern install_t install_xsd(void);
2840 #ifdef O_STATISTICS
2841 extern void sgml_statistics(void);
2842 #endif
2843 
2844 install_t
install(void)2845 install(void)
2846 { initConstants();
2847 
2848   init_ring();
2849 
2850   PL_register_foreign("new_dtd",	  2, pl_new_dtd,	  0);
2851   PL_register_foreign("free_dtd",	  1, pl_free_dtd,	  0);
2852   PL_register_foreign("new_sgml_parser",  2, pl_new_sgml_parser,  0);
2853   PL_register_foreign("free_sgml_parser", 1, pl_free_sgml_parser, 0);
2854   PL_register_foreign("set_sgml_parser",  2, pl_set_sgml_parser,  0);
2855   PL_register_foreign("get_sgml_parser",  2, pl_get_sgml_parser,  0);
2856   PL_register_foreign("open_dtd",         3, pl_open_dtd,	  0);
2857   PL_register_foreign("sgml_parse",       2, pl_sgml_parse,
2858 		      PL_FA_TRANSPARENT);
2859   PL_register_foreign("_sgml_register_catalog_file", 2,
2860 		      pl_sgml_register_catalog_file, 0);
2861 
2862   PL_register_foreign("$dtd_property",	  2, pl_dtd_property, 0);
2863 
2864   install_xml_quote();
2865   install_xsd();
2866 #ifdef O_STATISTICS
2867   atexit(sgml_statistics);
2868 #endif
2869 }
2870 
2871 
2872 install_t
uninstall(void)2873 uninstall(void)
2874 { stop_ring();
2875 }
2876