1 /* Part of SWI-Prolog
2
3 Author: Jan Wielemaker
4 E-mail: J.Wielemaker@vu.nl
5 WWW: http://www.swi-prolog.org
6 Copyright (c) 2000-2020, University of Amsterdam
7 VU University Amsterdam
8 All rights reserved.
9
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions
12 are met:
13
14 1. Redistributions of source code must retain the above copyright
15 notice, this list of conditions and the following disclaimer.
16
17 2. Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in
19 the documentation and/or other materials provided with the
20 distribution.
21
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 POSSIBILITY OF SUCH DAMAGE.
34 */
35
36 #define _ISOC99_SOURCE 1 /* fwprintf(), etc prototypes */
37
38 #define DTD_MINOR_ERRORS 1 /* get detailed errors */
39
40 #include <SWI-Stream.h>
41 #include <SWI-Prolog.h>
42
43 #include <stdio.h>
44 #include "dtd.h"
45 #include "catalog.h"
46 #include "model.h"
47 #include "util.h"
48 #include <errno.h>
49 #include "error.h"
50 #include <stdlib.h>
51 #include <assert.h>
52 #include <string.h>
53 #include <wctype.h>
54
55 #define streq(s1, s2) (strcmp(s1, s2) == 0)
56
57 #define MAX_ERRORS 50
58 #define MAX_WARNINGS 50
59
60 #define ENDSNUL ((size_t)-1)
61
62 /*******************************
63 * PARSER CONTEXT DATA *
64 *******************************/
65
66 #define PD_MAGIC 0x36472ba1 /* just a number */
67
68 typedef enum
69 { SA_FILE = 0, /* Stop at end-of-file */
70 SA_INPUT, /* Do not complete input */
71 SA_ELEMENT, /* Stop after first element */
72 SA_CONTENT, /* Stop after close */
73 SA_DECL /* Stop after declaration */
74 } stopat;
75
76 typedef enum
77 { EM_QUIET = 0, /* Suppress messages */
78 EM_PRINT, /* Print message */
79 EM_STYLE /* include style-messages */
80 } errormode;
81
82 typedef struct _env
83 { term_t tail;
84 struct _env *parent;
85 } env;
86
87
88 typedef struct _parser_data
89 { int magic; /* PD_MAGIC */
90 dtd_parser *parser; /* parser itself */
91
92 int warnings; /* #warnings seen */
93 int errors; /* #errors seen */
94 int max_errors; /* error limit */
95 int max_warnings; /* warning limit */
96 errormode error_mode; /* how to handle errors */
97 int positions; /* report file-positions */
98 term_t exception; /* pending exception from callback */
99
100 predicate_t on_begin; /* begin element */
101 predicate_t on_end; /* end element */
102 predicate_t on_cdata; /* cdata */
103 predicate_t on_entity; /* entity */
104 predicate_t on_pi; /* processing instruction */
105 predicate_t on_xmlns; /* xmlns */
106 predicate_t on_urlns; /* url --> namespace */
107 predicate_t on_error; /* errors */
108 predicate_t on_decl; /* declarations */
109
110 stopat stopat; /* Where to stop */
111 int stopped; /* Environment is complete */
112
113 IOSTREAM* source; /* Where we are reading from */
114
115 term_t list; /* output term (if any) */
116 term_t tail; /* tail of the list */
117 env *stack; /* environment stack */
118 int free_on_close; /* sgml_free parser on close */
119 } parser_data;
120
121
122 /*******************************
123 * CONSTANTS *
124 *******************************/
125
126 static functor_t FUNCTOR_and2;
127 static functor_t FUNCTOR_attribute_value1;
128 static functor_t FUNCTOR_bar2;
129 static functor_t FUNCTOR_comma2;
130 static functor_t FUNCTOR_default1;
131 static functor_t FUNCTOR_dialect1;
132 static functor_t FUNCTOR_keep_prefix1;
133 static functor_t FUNCTOR_document1;
134 static functor_t FUNCTOR_dtd1;
135 static functor_t FUNCTOR_dtd2;
136 static functor_t FUNCTOR_element3;
137 static functor_t FUNCTOR_entity1;
138 static functor_t FUNCTOR_equal2;
139 static functor_t FUNCTOR_file1;
140 static functor_t FUNCTOR_file4;
141 static functor_t FUNCTOR_dstream_position4;
142 static functor_t FUNCTOR_fixed1;
143 static functor_t FUNCTOR_line1;
144 static functor_t FUNCTOR_linepos1;
145 static functor_t FUNCTOR_list1;
146 static functor_t FUNCTOR_max_errors1;
147 static functor_t FUNCTOR_syntax_error1;
148 static functor_t FUNCTOR_error2;
149 static functor_t FUNCTOR_nameof1;
150 static functor_t FUNCTOR_notation1;
151 static functor_t FUNCTOR_omit2;
152 static functor_t FUNCTOR_opt1;
153 static functor_t FUNCTOR_plus1;
154 static functor_t FUNCTOR_rep1;
155 static functor_t FUNCTOR_sgml_parser1;
156 static functor_t FUNCTOR_parse1;
157 static functor_t FUNCTOR_source1;
158 static functor_t FUNCTOR_content_length1;
159 static functor_t FUNCTOR_call2;
160 static functor_t FUNCTOR_cdata1;
161 static functor_t FUNCTOR_charpos1;
162 static functor_t FUNCTOR_charpos2;
163 static functor_t FUNCTOR_ns2; /* :/2 */
164 static functor_t FUNCTOR_prefix2; /* ns/2. This is a bit confusing but ns2 was already taken */
165 static functor_t FUNCTOR_space1;
166 static functor_t FUNCTOR_pi1;
167 static functor_t FUNCTOR_sdata1;
168 static functor_t FUNCTOR_ndata1;
169 static functor_t FUNCTOR_number1;
170 static functor_t FUNCTOR_syntax_errors1;
171 static functor_t FUNCTOR_xml_no_ns1;
172 static functor_t FUNCTOR_minus2;
173 static functor_t FUNCTOR_positions1;
174 static functor_t FUNCTOR_position1;
175 static functor_t FUNCTOR_event_class1;
176 static functor_t FUNCTOR_doctype1;
177 static functor_t FUNCTOR_allowed1;
178 static functor_t FUNCTOR_context1;
179 static functor_t FUNCTOR_defaults1;
180 static functor_t FUNCTOR_shorttag1;
181 static functor_t FUNCTOR_case_sensitive_attributes1;
182 static functor_t FUNCTOR_case_preserving_attributes1;
183 static functor_t FUNCTOR_system_entities1;
184 static functor_t FUNCTOR_max_memory1;
185 static functor_t FUNCTOR_qualify_attributes1;
186 static functor_t FUNCTOR_encoding1;
187 static functor_t FUNCTOR_xmlns1;
188 static functor_t FUNCTOR_xmlns2;
189
190 static atom_t ATOM_true;
191 static atom_t ATOM_false;
192 static atom_t ATOM_cdata;
193 static atom_t ATOM_rcdata;
194 static atom_t ATOM_pcdata;
195 static atom_t ATOM_empty;
196 static atom_t ATOM_any;
197 static atom_t ATOM_position;
198 static atom_t ATOM_atom;
199 static atom_t ATOM_string;
200
201 #define mkfunctor(n, a) PL_new_functor(PL_new_atom(n), a)
202
203 static void
initConstants()204 initConstants()
205 {
206 FUNCTOR_sgml_parser1 = mkfunctor("sgml_parser", 1);
207 FUNCTOR_equal2 = mkfunctor("=", 2);
208 FUNCTOR_dtd1 = mkfunctor("dtd", 1);
209 FUNCTOR_element3 = mkfunctor("element", 3);
210 FUNCTOR_entity1 = mkfunctor("entity", 1);
211 FUNCTOR_document1 = mkfunctor("document", 1);
212 FUNCTOR_dtd2 = mkfunctor("dtd", 2);
213 FUNCTOR_omit2 = mkfunctor("omit", 2);
214 FUNCTOR_and2 = mkfunctor("&", 2);
215 FUNCTOR_comma2 = mkfunctor(",", 2);
216 FUNCTOR_bar2 = mkfunctor("|", 2);
217 FUNCTOR_opt1 = mkfunctor("?", 1);
218 FUNCTOR_rep1 = mkfunctor("*", 1);
219 FUNCTOR_plus1 = mkfunctor("+", 1);
220 FUNCTOR_default1 = mkfunctor("default", 1);
221 FUNCTOR_fixed1 = mkfunctor("fixed", 1);
222 FUNCTOR_list1 = mkfunctor("list", 1);
223 FUNCTOR_nameof1 = mkfunctor("nameof", 1);
224 FUNCTOR_notation1 = mkfunctor("notation", 1);
225 FUNCTOR_file1 = mkfunctor("file", 1);
226 FUNCTOR_file4 = mkfunctor("file", 4);
227 FUNCTOR_line1 = mkfunctor("line", 1);
228 FUNCTOR_linepos1 = mkfunctor("linepos", 1);
229 FUNCTOR_dialect1 = mkfunctor("dialect", 1);
230 FUNCTOR_keep_prefix1 = mkfunctor("keep_prefix", 1);
231 FUNCTOR_max_errors1 = mkfunctor("max_errors", 1);
232 FUNCTOR_parse1 = mkfunctor("parse", 1);
233 FUNCTOR_source1 = mkfunctor("source", 1);
234 FUNCTOR_content_length1= mkfunctor("content_length", 1);
235 FUNCTOR_call2 = mkfunctor("call", 2);
236 FUNCTOR_cdata1 = mkfunctor("cdata", 1);
237 FUNCTOR_attribute_value1 = mkfunctor("attribute_value", 1);
238 FUNCTOR_charpos1 = mkfunctor("charpos", 1);
239 FUNCTOR_charpos2 = mkfunctor("charpos", 2);
240 FUNCTOR_ns2 = mkfunctor(":", 2);
241 FUNCTOR_prefix2 = mkfunctor("ns", 2);
242 FUNCTOR_space1 = mkfunctor("space", 1);
243 FUNCTOR_pi1 = mkfunctor("pi", 1);
244 FUNCTOR_sdata1 = mkfunctor("sdata", 1);
245 FUNCTOR_ndata1 = mkfunctor("ndata", 1);
246 FUNCTOR_number1 = mkfunctor("number", 1);
247 FUNCTOR_syntax_errors1 = mkfunctor("syntax_errors", 1);
248 FUNCTOR_syntax_error1 = mkfunctor("syntax_error", 1);
249 FUNCTOR_error2 = mkfunctor("error", 2);
250 FUNCTOR_xml_no_ns1 = mkfunctor("xml_no_ns", 1);
251 FUNCTOR_minus2 = mkfunctor("-", 2);
252 FUNCTOR_position1 = mkfunctor("position", 1);
253 FUNCTOR_positions1 = mkfunctor("positions", 1);
254 FUNCTOR_event_class1 = mkfunctor("event_class", 1);
255 FUNCTOR_doctype1 = mkfunctor("doctype", 1);
256 FUNCTOR_allowed1 = mkfunctor("allowed", 1);
257 FUNCTOR_context1 = mkfunctor("context", 1);
258 FUNCTOR_defaults1 = mkfunctor("defaults", 1);
259 FUNCTOR_shorttag1 = mkfunctor("shorttag", 1);
260 FUNCTOR_case_sensitive_attributes1 = mkfunctor("case_sensitive_attributes", 1);
261 FUNCTOR_case_preserving_attributes1 = mkfunctor("case_preserving_attributes", 1);
262 FUNCTOR_system_entities1 = mkfunctor("system_entities", 1);
263 FUNCTOR_max_memory1 = mkfunctor("max_memory", 1);
264 FUNCTOR_qualify_attributes1 = mkfunctor("qualify_attributes", 1);
265 FUNCTOR_encoding1 = mkfunctor("encoding", 1);
266 FUNCTOR_xmlns1 = mkfunctor("xmlns", 1);
267 FUNCTOR_xmlns2 = mkfunctor("xmlns", 2);
268 FUNCTOR_dstream_position4 = PL_new_functor(PL_new_atom("$stream_position"), 4);
269
270 ATOM_true = PL_new_atom("true");
271 ATOM_false = PL_new_atom("false");
272 ATOM_cdata = PL_new_atom("cdata");
273 ATOM_rcdata = PL_new_atom("rcdata");
274 ATOM_pcdata = PL_new_atom("#pcdata");
275 ATOM_empty = PL_new_atom("empty");
276 ATOM_any = PL_new_atom("any");
277 ATOM_atom = PL_new_atom("atom");
278 ATOM_string = PL_new_atom("string");
279 ATOM_position = PL_new_atom("#position");
280 }
281
282 static int on_data(dtd_parser *p, data_type type, int len, const wchar_t *data);
283
284
285 /*******************************
286 * ACCESS *
287 *******************************/
288
289 static int
unify_parser(term_t parser,dtd_parser * p)290 unify_parser(term_t parser, dtd_parser *p)
291 { return PL_unify_term(parser, PL_FUNCTOR, FUNCTOR_sgml_parser1,
292 PL_POINTER, p);
293 }
294
295
296 static int
get_parser(term_t parser,dtd_parser ** p)297 get_parser(term_t parser, dtd_parser **p)
298 { if ( PL_is_functor(parser, FUNCTOR_sgml_parser1) )
299 { term_t a = PL_new_term_ref();
300 void *ptr;
301
302 _PL_get_arg(1, parser, a);
303 if ( PL_get_pointer(a, &ptr) )
304 { dtd_parser *tmp = ptr;
305
306 if ( tmp->magic == SGML_PARSER_MAGIC )
307 { *p = tmp;
308
309 return TRUE;
310 }
311 return sgml2pl_error(ERR_EXISTENCE, "sgml_parser", parser);
312 }
313 }
314
315 return sgml2pl_error(ERR_TYPE, "sgml_parser", parser);
316 }
317
318
319 static int
unify_dtd(term_t t,dtd * dtd)320 unify_dtd(term_t t, dtd *dtd)
321 { if ( dtd->doctype )
322 return PL_unify_term(t, PL_FUNCTOR, FUNCTOR_dtd2,
323 PL_POINTER, dtd,
324 PL_NWCHARS, (size_t)-1, dtd->doctype);
325 else
326 return PL_unify_term(t, PL_FUNCTOR, FUNCTOR_dtd2,
327 PL_POINTER, dtd,
328 PL_VARIABLE);
329 }
330
331
332 static int
get_dtd(term_t t,dtd ** dtdp)333 get_dtd(term_t t, dtd **dtdp)
334 { if ( PL_is_functor(t, FUNCTOR_dtd2) )
335 { term_t a = PL_new_term_ref();
336 void *ptr;
337
338 _PL_get_arg(1, t, a);
339 if ( PL_get_pointer(a, &ptr) )
340 { dtd *tmp = ptr;
341
342 if ( tmp->magic == SGML_DTD_MAGIC )
343 { *dtdp = tmp;
344
345 return TRUE;
346 }
347 return sgml2pl_error(ERR_EXISTENCE, "dtd", t);
348 }
349 }
350
351 return sgml2pl_error(ERR_TYPE, "dtd", t);
352 }
353
354
355 /*******************************
356 * NEW/FREE *
357 *******************************/
358
359 static foreign_t
pl_new_sgml_parser(term_t ref,term_t options)360 pl_new_sgml_parser(term_t ref, term_t options)
361 { term_t head = PL_new_term_ref();
362 term_t tail = PL_copy_term_ref(options);
363 term_t tmp = PL_new_term_ref();
364
365 dtd *dtd = NULL;
366 dtd_parser *p;
367
368 while ( PL_get_list(tail, head, tail) )
369 { if ( PL_is_functor(head, FUNCTOR_dtd1) )
370 { _PL_get_arg(1, head, tmp);
371
372 if ( PL_is_variable(tmp) ) /* dtd(X) */
373 { dtd = new_dtd(NULL); /* no known doctype */
374 dtd->references++;
375 unify_dtd(tmp, dtd);
376 } else if ( !get_dtd(tmp, &dtd) )
377 return FALSE;
378 }
379 }
380 if ( !PL_get_nil(tail) )
381 return sgml2pl_error(ERR_TYPE, "list", tail);
382
383 p = new_dtd_parser(dtd);
384
385 return unify_parser(ref, p);
386 }
387
388
389 static foreign_t
pl_free_sgml_parser(term_t parser)390 pl_free_sgml_parser(term_t parser)
391 { dtd_parser *p;
392
393 if ( get_parser(parser, &p) )
394 { free_dtd_parser(p);
395 return TRUE;
396 }
397
398 return FALSE;
399 }
400
401
402 static foreign_t
pl_new_dtd(term_t doctype,term_t ref)403 pl_new_dtd(term_t doctype, term_t ref)
404 { ichar *dt;
405 dtd *dtd;
406
407 if ( !PL_get_wchars(doctype, NULL, &dt, CVT_ATOM|CVT_EXCEPTION) )
408 return FALSE;
409
410 if ( !(dtd=new_dtd(dt)) )
411 return FALSE;
412
413 dtd->references++;
414
415 return unify_dtd(ref, dtd);
416 }
417
418
419 static foreign_t
pl_free_dtd(term_t t)420 pl_free_dtd(term_t t)
421 { dtd *dtd;
422
423 if ( get_dtd(t, &dtd) )
424 { free_dtd(dtd);
425 return TRUE;
426 }
427
428 return FALSE;
429 }
430
431 /*******************************
432 * DATA EXCHANGE *
433 *******************************/
434
435 static int
put_atom_wchars(term_t t,wchar_t const * s)436 put_atom_wchars(term_t t, wchar_t const *s)
437 { PL_put_variable(t);
438 return PL_unify_wchars(t, PL_ATOM, ENDSNUL, s);
439 }
440
441
442 /*******************************
443 * PROPERTIES *
444 *******************************/
445
446 static foreign_t
pl_set_sgml_parser(term_t parser,term_t option)447 pl_set_sgml_parser(term_t parser, term_t option)
448 { dtd_parser *p;
449
450 if ( !get_parser(parser, &p) )
451 return FALSE;
452
453 if ( PL_is_functor(option, FUNCTOR_file1) )
454 { term_t a = PL_new_term_ref();
455 wchar_t *file;
456 dtd_symbol *fs;
457
458 _PL_get_arg(1, option, a);
459 if ( !PL_get_wchars(a, NULL, &file, CVT_ATOM|CVT_EXCEPTION) )
460 return FALSE;
461 fs = dtd_add_symbol(p->dtd, file); /* symbol will be freed */
462 set_file_dtd_parser(p, IN_FILE, fs->name);
463 } else if ( PL_is_functor(option, FUNCTOR_line1) )
464 { term_t a = PL_new_term_ref();
465
466 _PL_get_arg(1, option, a);
467 if ( !PL_get_integer_ex(a, &p->location.line) )
468 return FALSE;
469 } else if ( PL_is_functor(option, FUNCTOR_linepos1) )
470 { term_t a = PL_new_term_ref();
471
472 _PL_get_arg(1, option, a);
473 if ( !PL_get_integer_ex(a, &p->location.linepos) )
474 return FALSE;
475 } else if ( PL_is_functor(option, FUNCTOR_charpos1) )
476 { term_t a = PL_new_term_ref();
477
478 _PL_get_arg(1, option, a);
479 if ( !PL_get_long_ex(a, &p->location.charpos) )
480 return FALSE;
481 } else if ( PL_is_functor(option, FUNCTOR_position1) )
482 { term_t a = PL_new_term_ref();
483
484 _PL_get_arg(1, option, a);
485 if ( PL_is_functor(a, FUNCTOR_dstream_position4) )
486 { term_t arg = PL_new_term_ref();
487
488 if ( !PL_get_arg(1,a,arg) || !PL_get_long_ex(arg, &p->location.charpos) ||
489 !PL_get_arg(2,a,arg) || !PL_get_integer_ex(arg, &p->location.line) ||
490 !PL_get_arg(3,a,arg) || !PL_get_integer_ex(arg, &p->location.linepos))
491 return FALSE;
492 } else
493 return PL_type_error("stream_position", a);
494 } else if ( PL_is_functor(option, FUNCTOR_dialect1) )
495 { term_t a = PL_new_term_ref();
496 char *s;
497
498 _PL_get_arg(1, option, a);
499 if ( !PL_get_atom_chars(a, &s) )
500 return sgml2pl_error(ERR_TYPE, "atom", a);
501
502 if ( streq(s, "xml") )
503 set_dialect_dtd(p->dtd, p, DL_XML);
504 else if ( streq(s, "xmlns") )
505 set_dialect_dtd(p->dtd, p, DL_XMLNS);
506 else if ( streq(s, "sgml") )
507 set_dialect_dtd(p->dtd, p, DL_SGML);
508 else if ( streq(s, "html") || streq(s, "html4") )
509 set_dialect_dtd(p->dtd, p, DL_HTML);
510 else if ( streq(s, "html5") )
511 set_dialect_dtd(p->dtd, p, DL_HTML5);
512 else if ( streq(s, "xhtml") )
513 set_dialect_dtd(p->dtd, p, DL_XHTML);
514 else if ( streq(s, "xhtml5") )
515 set_dialect_dtd(p->dtd, p, DL_XHTML5);
516 else
517 return sgml2pl_error(ERR_DOMAIN, "sgml_dialect", a);
518 } else if ( PL_is_functor(option, FUNCTOR_space1) )
519 { term_t a = PL_new_term_ref();
520 char *s;
521
522 _PL_get_arg(1, option, a);
523 if ( !PL_get_atom_chars(a, &s) )
524 return sgml2pl_error(ERR_TYPE, "atom", a);
525
526 if ( streq(s, "preserve") )
527 p->dtd->space_mode = SP_PRESERVE;
528 else if ( streq(s, "default") )
529 p->dtd->space_mode = SP_DEFAULT;
530 else if ( streq(s, "remove") )
531 p->dtd->space_mode = SP_REMOVE;
532 else if ( streq(s, "sgml") )
533 p->dtd->space_mode = SP_SGML;
534 else if ( streq(s, "strict") )
535 p->dtd->space_mode = SP_STRICT;
536
537 else
538 return sgml2pl_error(ERR_DOMAIN, "space", a);
539 } else if ( PL_is_functor(option, FUNCTOR_defaults1) )
540 { term_t a = PL_new_term_ref();
541 int val;
542
543 _PL_get_arg(1, option, a);
544 if ( !PL_get_bool(a, &val) )
545 return sgml2pl_error(ERR_TYPE, "boolean", a);
546
547 if ( val )
548 p->flags &= ~SGML_PARSER_NODEFS;
549 else
550 p->flags |= SGML_PARSER_NODEFS;
551 } else if ( PL_is_functor(option, FUNCTOR_qualify_attributes1) )
552 { term_t a = PL_new_term_ref();
553 int val;
554
555 _PL_get_arg(1, option, a);
556 if ( !PL_get_bool(a, &val) )
557 return sgml2pl_error(ERR_TYPE, "boolean", a);
558
559 if ( val )
560 p->flags |= SGML_PARSER_QUALIFY_ATTS;
561 else
562 p->flags &= ~SGML_PARSER_QUALIFY_ATTS;
563 } else if ( PL_is_functor(option, FUNCTOR_shorttag1) )
564 { term_t a = PL_new_term_ref();
565 int val;
566
567 _PL_get_arg(1, option, a);
568 if ( !PL_get_bool(a, &val) )
569 return sgml2pl_error(ERR_TYPE, "boolean", a);
570
571 set_option_dtd(p->dtd, OPT_SHORTTAG, val);
572 } else if ( PL_is_functor(option, FUNCTOR_case_sensitive_attributes1) )
573 { term_t a = PL_new_term_ref();
574 int val;
575
576 _PL_get_arg(1, option, a);
577 if ( !PL_get_bool(a, &val) )
578 return sgml2pl_error(ERR_TYPE, "boolean", a);
579
580 set_option_dtd(p->dtd, OPT_CASE_SENSITIVE_ATTRIBUTES, val);
581 } else if ( PL_is_functor(option, FUNCTOR_case_preserving_attributes1) )
582 { term_t a = PL_new_term_ref();
583 int val;
584
585 _PL_get_arg(1, option, a);
586 if ( !PL_get_bool(a, &val) )
587 return sgml2pl_error(ERR_TYPE, "boolean", a);
588
589 set_option_dtd(p->dtd, OPT_CASE_PRESERVING_ATTRIBUTES, val);
590 } else if ( PL_is_functor(option, FUNCTOR_system_entities1) )
591 { term_t a = PL_new_term_ref();
592 int val;
593
594 _PL_get_arg(1, option, a);
595 if ( !PL_get_bool(a, &val) )
596 return sgml2pl_error(ERR_TYPE, "boolean", a);
597
598 set_option_dtd(p->dtd, OPT_SYSTEM_ENTITIES, val);
599 } else if ( PL_is_functor(option, FUNCTOR_max_memory1) )
600 { term_t a = PL_new_term_ref();
601 int val;
602
603 _PL_get_arg(1, option, a);
604 if ( !PL_get_integer(a, &val) )
605 return sgml2pl_error(ERR_TYPE, "integer", a);
606
607 p->max_memory = val;
608 if ( p->buffer )
609 p->buffer->limit = val;
610 if ( p->cdata )
611 p->cdata->limit = val;
612 } else if ( PL_is_functor(option, FUNCTOR_number1) )
613 { term_t a = PL_new_term_ref();
614 char *s;
615
616 _PL_get_arg(1, option, a);
617 if ( !PL_get_atom_chars(a, &s) )
618 return sgml2pl_error(ERR_TYPE, "atom", a);
619
620 if ( streq(s, "token") )
621 p->dtd->number_mode = NU_TOKEN;
622 else if ( streq(s, "integer") )
623 p->dtd->number_mode = NU_INTEGER;
624 else
625 return sgml2pl_error(ERR_DOMAIN, "number", a);
626 } else if ( PL_is_functor(option, FUNCTOR_encoding1) )
627 { term_t a = PL_new_term_ref();
628 char *val;
629
630 _PL_get_arg(1, option, a);
631 if ( !PL_get_atom_chars(a, &val) )
632 return sgml2pl_error(ERR_TYPE, "atom", a);
633 if ( !xml_set_encoding(p, val) )
634 return sgml2pl_error(ERR_DOMAIN, "encoding", a);
635 } else if ( PL_is_functor(option, FUNCTOR_doctype1) )
636 { term_t a = PL_new_term_ref();
637 ichar *s;
638
639 _PL_get_arg(1, option, a);
640 if ( PL_is_variable(a) )
641 { p->enforce_outer_element = NULL;
642 } else
643 { if ( !PL_get_wchars(a, NULL, &s, CVT_ATOM) )
644 return sgml2pl_error(ERR_TYPE, "atom_or_variable", a);
645
646 p->enforce_outer_element = dtd_add_symbol(p->dtd, s);
647 }
648 } else if ( PL_is_functor(option, FUNCTOR_xmlns1) )
649 { term_t a = PL_new_term_ref();
650 ichar ns[1] = {0};
651 ichar *uri;
652
653 _PL_get_arg(1, option, a);
654 if ( !PL_get_wchars(a, NULL, &uri, CVT_ATOM|CVT_EXCEPTION) )
655 return FALSE;
656
657 xmlns_push(p, ns, uri);
658 } else if ( PL_is_functor(option, FUNCTOR_xmlns2) )
659 { term_t a = PL_new_term_ref();
660 ichar *ns, *uri;
661
662 _PL_get_arg(1, option, a);
663 if ( !PL_get_wchars(a, NULL, &ns, CVT_ATOM|CVT_EXCEPTION) )
664 return FALSE;
665 _PL_get_arg(2, option, a);
666 if ( !PL_get_wchars(a, NULL, &uri, CVT_ATOM|CVT_EXCEPTION) )
667 return FALSE;
668
669 xmlns_push(p, ns, uri);
670 } else if ( PL_is_functor(option, FUNCTOR_keep_prefix1) )
671 { term_t a = PL_new_term_ref();
672 int val;
673
674 _PL_get_arg(1, option, a);
675 if ( !PL_get_bool(a, &val) )
676 return sgml2pl_error(ERR_TYPE, "boolean", a);
677 set_option_dtd(p->dtd, OPT_KEEP_PREFIX, val);
678 } else
679 return sgml2pl_error(ERR_DOMAIN, "sgml_parser_option", option);
680
681 return TRUE;
682 }
683
684
685 static dtd_srcloc *
file_location(dtd_parser * p,dtd_srcloc * l)686 file_location(dtd_parser *p, dtd_srcloc *l)
687 { while(l->parent && l->type != IN_FILE)
688 l = l->parent;
689
690 return l;
691 }
692
693
694 static foreign_t
pl_get_sgml_parser(term_t parser,term_t option)695 pl_get_sgml_parser(term_t parser, term_t option)
696 { dtd_parser *p;
697
698 if ( !get_parser(parser, &p) )
699 return FALSE;
700
701 if ( PL_is_functor(option, FUNCTOR_charpos1) )
702 { term_t a = PL_new_term_ref();
703
704 _PL_get_arg(1, option, a);
705 return PL_unify_integer(a, file_location(p, &p->startloc)->charpos);
706 } else if ( PL_is_functor(option, FUNCTOR_line1) )
707 { term_t a = PL_new_term_ref();
708
709 _PL_get_arg(1, option, a);
710 return PL_unify_integer(a, file_location(p, &p->startloc)->line);
711 } else if ( PL_is_functor(option, FUNCTOR_charpos2) )
712 { term_t a = PL_new_term_ref();
713
714 if ( PL_get_arg(1, option, a) &&
715 PL_unify_integer(a, file_location(p, &p->startloc)->charpos) &&
716 PL_get_arg(2, option, a) &&
717 PL_unify_integer(a, file_location(p, &p->location)->charpos) )
718 return TRUE;
719 else
720 return FALSE;
721 } else if ( PL_is_functor(option, FUNCTOR_file1) )
722 { dtd_srcloc *l = file_location(p, &p->location);
723
724 if ( l->type == IN_FILE && l->name.file )
725 { term_t a = PL_new_term_ref();
726
727 _PL_get_arg(1, option, a);
728 return PL_unify_wchars(a, PL_ATOM, ENDSNUL, l->name.file);
729 }
730 } else if ( PL_is_functor(option, FUNCTOR_source1) )
731 { parser_data *pd = p->closure;
732
733 if ( pd && pd->magic == PD_MAGIC && pd->source )
734 { term_t a = PL_new_term_ref();
735
736 _PL_get_arg(1, option, a);
737 return PL_unify_stream(a, pd->source);
738 }
739 } else if ( PL_is_functor(option, FUNCTOR_dialect1) )
740 { term_t a = PL_new_term_ref();
741
742 _PL_get_arg(1, option, a);
743 switch(p->dtd->dialect)
744 { case DL_SGML:
745 return PL_unify_atom_chars(a, "sgml");
746 case DL_HTML:
747 return PL_unify_atom_chars(a, "html");
748 case DL_HTML5:
749 return PL_unify_atom_chars(a, "html5");
750 case DL_XHTML:
751 return PL_unify_atom_chars(a, "xhtml");
752 case DL_XHTML5:
753 return PL_unify_atom_chars(a, "xhtml5");
754 case DL_XML:
755 return PL_unify_atom_chars(a, "xml");
756 case DL_XMLNS:
757 return PL_unify_atom_chars(a, "xmlns");
758 }
759 } else if ( PL_is_functor(option, FUNCTOR_event_class1) )
760 { term_t a = PL_new_term_ref();
761
762 _PL_get_arg(1, option, a);
763 switch(p->event_class)
764 { case EV_EXPLICIT:
765 return PL_unify_atom_chars(a, "explicit");
766 case EV_OMITTED:
767 return PL_unify_atom_chars(a, "omitted");
768 case EV_SHORTTAG:
769 return PL_unify_atom_chars(a, "shorttag");
770 case EV_SHORTREF:
771 return PL_unify_atom_chars(a, "shortref");
772 }
773 } else if ( PL_is_functor(option, FUNCTOR_dtd1) )
774 { term_t a = PL_new_term_ref();
775
776 _PL_get_arg(1, option, a);
777
778 return unify_dtd(a, p->dtd);
779 } else if ( PL_is_functor(option, FUNCTOR_doctype1) )
780 { term_t a = PL_new_term_ref();
781
782 _PL_get_arg(1, option, a);
783 if ( p->enforce_outer_element )
784 return PL_unify_wchars(a, PL_ATOM, ENDSNUL,
785 p->enforce_outer_element->name);
786 else
787 return TRUE; /* leave variable */
788 } else if ( PL_is_functor(option, FUNCTOR_allowed1) )
789 { term_t tail, head, tmp;
790 sgml_environment *env = p->environments;
791
792 if ( !(tail = PL_new_term_ref()) ||
793 !(head = PL_new_term_ref()) ||
794 !(tmp = PL_new_term_ref()) )
795 return FALSE;
796
797 _PL_get_arg(1, option, tail);
798
799 if ( env )
800 { for( ; env; env = env->parent)
801 { dtd_element *buf[256]; /* MAX_VISITED! */
802 int n = sizeof(buf)/sizeof(dtd_element *); /* not yet used! */
803 int i;
804
805 state_allows_for(env->state, buf, &n);
806
807 for(i=0; i<n; i++)
808 { int rc;
809
810 if ( buf[i] == CDATA_ELEMENT )
811 rc = PL_put_atom_chars(tmp, "#pcdata");
812 else
813 rc = put_atom_wchars(tmp, buf[i]->name->name);
814
815 if ( !rc ||
816 !PL_unify_list(tail, head, tail) ||
817 !PL_unify(head, tmp) )
818 return FALSE;
819 }
820
821 if ( !env->element->structure ||
822 !env->element->structure->omit_close )
823 break;
824 }
825 } else if ( p->enforce_outer_element )
826 { put_atom_wchars(tmp, p->enforce_outer_element->name);
827
828 if ( !PL_unify_list(tail, head, tail) ||
829 !PL_unify(head, tmp) )
830 return FALSE;
831 }
832
833 return PL_unify_nil(tail);
834 } else if ( PL_is_functor(option, FUNCTOR_context1) )
835 { term_t tail = PL_new_term_ref();
836 term_t head = PL_new_term_ref();
837 term_t tmp = PL_new_term_ref();
838 sgml_environment *env = p->environments;
839
840 _PL_get_arg(1, option, tail);
841
842 for( ; env; env = env->parent)
843 { put_atom_wchars(tmp, env->element->name->name);
844
845 if ( !PL_unify_list(tail, head, tail) ||
846 !PL_unify(head, tmp) )
847 return FALSE;
848 }
849
850 return PL_unify_nil(tail);
851 } else
852 return sgml2pl_error(ERR_DOMAIN, "parser_option", option);
853
854 return FALSE;
855 }
856
857
858 static int
call_prolog(parser_data * pd,predicate_t pred,term_t av)859 call_prolog(parser_data *pd, predicate_t pred, term_t av)
860 { qid_t qid = PL_open_query(NULL, PL_Q_PASS_EXCEPTION, pred, av);
861 int rc = PL_next_solution(qid);
862
863 PL_close_query(qid);
864
865 if ( rc )
866 { pd->exception = FALSE;
867 } else
868 { if ( (pd->exception = PL_exception(0)) )
869 pd->stopped = TRUE;
870 }
871
872 return rc;
873 }
874
875
876 static void
end_frame(fid_t fid,term_t ex)877 end_frame(fid_t fid, term_t ex)
878 { if ( ex )
879 PL_close_foreign_frame(fid);
880 else
881 PL_discard_foreign_frame(fid);
882 }
883
884
885 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
886 put_url(dtd_parser *p, term_t t, const ichar *url)
887 Store the url-part of a name-space qualifier in term. We call
888 xml:xmlns(-Canonical, +Full) trying to resolve the specified
889 namespace to an internal canonical namespace.
890
891 We do a little caching as there will generally be only a very
892 small pool of urls in use. We assume the url-pointers we get
893 life for the time of the parser. It might be possible that
894 multiple url pointers point to the same url, but this only clobbers
895 the cache a little.
896 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
897
898 #define URL_CACHE 4 /* # entries cached */
899
900 typedef struct
901 { const ichar *url; /* URL pointer */
902 atom_t canonical;
903 } url_cache;
904
905 static url_cache cache[URL_CACHE];
906
907 static void
reset_url_cache()908 reset_url_cache()
909 { int i;
910 url_cache *c = cache;
911
912 for(i=0; i<URL_CACHE; i++)
913 { c[i].url = NULL;
914 if ( c[i].canonical )
915 PL_unregister_atom(c[i].canonical);
916 c[i].canonical = 0;
917 }
918 }
919
920
921 WUNUSED static int
put_url(dtd_parser * p,term_t t,const ichar * url)922 put_url(dtd_parser *p, term_t t, const ichar *url)
923 { parser_data *pd = p->closure;
924 fid_t fid;
925 int i;
926
927 if ( !pd->on_urlns )
928 return put_atom_wchars(t, url);
929
930 for(i=0; i<URL_CACHE; i++)
931 { if ( cache[i].url == url ) /* cache hit */
932 { if ( cache[i].canonical ) /* and a canonical value */
933 return PL_put_atom(t, cache[i].canonical);
934 else
935 return put_atom_wchars(t, url);
936 }
937 }
938 /* shift the cache */
939 i = URL_CACHE-1;
940 if ( cache[i].canonical )
941 PL_unregister_atom(cache[i].canonical);
942 for(i=URL_CACHE-1; i>0; i--)
943 cache[i] = cache[i-1];
944 cache[0].url = url;
945 cache[0].canonical = 0;
946
947 if ( (fid = PL_open_foreign_frame()) )
948 { int rc;
949 term_t av = PL_new_term_refs(3);
950 atom_t a;
951
952 rc = (put_atom_wchars(av+0, url) &&
953 unify_parser(av+2, p));
954
955 if ( rc &&
956 PL_call_predicate(NULL, PL_Q_NORMAL, pd->on_urlns, av) &&
957 PL_get_atom(av+1, &a) )
958 { PL_register_atom(a);
959 cache[0].canonical = a;
960 PL_put_atom(t, a);
961 } else if ( rc )
962 { rc = put_atom_wchars(t, url);
963 }
964 PL_discard_foreign_frame(fid);
965
966 return rc;
967 }
968
969 return FALSE;
970 }
971
972
973 WUNUSED static int
put_attribute_name(dtd_parser * p,term_t t,dtd_symbol * nm)974 put_attribute_name(dtd_parser *p, term_t t, dtd_symbol *nm)
975 { const ichar *url, *local, *prefix;
976 if ( p->dtd->dialect == DL_XMLNS )
977 { xmlns_resolve_attribute(p, nm, &local, &url, &prefix);
978 if ( url )
979 { term_t av;
980 if ( p->dtd->keep_prefix)
981 { /* creates ns(prefix,url):local */
982 PL_put_variable(t);
983 return PL_unify_term(t, PL_FUNCTOR, FUNCTOR_ns2,
984 PL_FUNCTOR, FUNCTOR_prefix2,
985 PL_NWCHARS, ENDSNUL, prefix ? prefix : L"",
986 PL_NWCHARS, ENDSNUL, url,
987 PL_NWCHARS, ENDSNUL, local);
988 } else
989 { return ( (av=PL_new_term_refs(2)) &&
990 put_url(p, av+0, url) &&
991 put_atom_wchars(av+1, local) &&
992 PL_cons_functor_v(t, FUNCTOR_ns2, av) );
993 }
994 } else
995 return put_atom_wchars(t, local);
996 } else
997 return put_atom_wchars(t, nm->name);
998 }
999
1000
1001 WUNUSED static int
put_element_name(dtd_parser * p,term_t t,dtd_element * e)1002 put_element_name(dtd_parser *p, term_t t, dtd_element *e)
1003 { const ichar *url, *local, *prefix;
1004
1005 if ( p->dtd->dialect == DL_XMLNS )
1006 { assert(p->environments->element == e);
1007 xmlns_resolve_element(p, &local, &url, &prefix);
1008
1009 if ( url )
1010 { term_t av;
1011 if ( p->dtd->keep_prefix )
1012 { /* creates ns(prefix,url):local */
1013 return PL_unify_term(t, PL_FUNCTOR, FUNCTOR_ns2,
1014 PL_FUNCTOR, FUNCTOR_prefix2,
1015 PL_NWCHARS, ENDSNUL, prefix ? prefix : L"",
1016 PL_NWCHARS, ENDSNUL, url,
1017 PL_NWCHARS, ENDSNUL, local);
1018 } else
1019 { return ( (av=PL_new_term_refs(2)) &&
1020 put_url(p, av+0, url) &&
1021 put_atom_wchars(av+1, local) &&
1022 PL_cons_functor_v(t, FUNCTOR_ns2, av) );
1023 }
1024 } else
1025 return put_atom_wchars(t, local);
1026 } else
1027 return put_atom_wchars(t, e->name->name);
1028 }
1029
1030
1031 static ichar *
istrblank(const ichar * s)1032 istrblank(const ichar *s)
1033 { for( ; *s; s++ )
1034 { if ( iswspace(*s) )
1035 return (ichar *)s;
1036 }
1037
1038 return NULL;
1039 }
1040
1041
1042 WUNUSED static int
unify_listval(dtd_parser * p,term_t t,attrtype type,size_t len,const ichar * text)1043 unify_listval(dtd_parser *p,
1044 term_t t, attrtype type, size_t len, const ichar *text)
1045 { if ( type == AT_NUMBERS && p->dtd->number_mode == NU_INTEGER )
1046 { wchar_t *e;
1047
1048 #if SIZEOF_LONG == 4 && defined(HAVE_WCSTOLL)
1049 int64_t v = wcstoll(text, &e, 10);
1050 if ( (size_t)(e-text) == len && errno != ERANGE )
1051 return PL_unify_int64(t, v);
1052 #else
1053 long v = wcstol(text, &e, 10);
1054
1055 if ( (size_t)(e-text) == len && errno != ERANGE )
1056 return PL_unify_integer(t, v);
1057 #endif
1058 /* TBD: Error!? */
1059 }
1060
1061 return PL_unify_wchars(t, PL_ATOM, len, text);
1062 }
1063
1064
1065 WUNUSED static int
put_att_text(dtd_parser * p,term_t t,sgml_attribute * a)1066 put_att_text(dtd_parser *p, term_t t, sgml_attribute *a)
1067 { if ( a->value.textW )
1068 { PL_put_variable(t);
1069 return PL_unify_wchars(t, p->att_rep, a->value.number, a->value.textW);
1070 } else
1071 return FALSE;
1072 }
1073
1074
1075 WUNUSED static int
put_attribute_value(dtd_parser * p,term_t t,sgml_attribute * a)1076 put_attribute_value(dtd_parser *p, term_t t, sgml_attribute *a)
1077 { switch(a->definition->type)
1078 { case AT_CDATA:
1079 return put_att_text(p, t, a);
1080 case AT_NUMBER:
1081 { if ( !put_att_text(p, t, a) )
1082 return PL_put_integer(t, a->value.number);
1083 return TRUE;
1084 }
1085 default: /* multi-valued attribute */
1086 { if ( a->definition->islist && a->value.textW )
1087 { term_t tail, head;
1088 const ichar *val = a->value.textW;
1089 const ichar *e;
1090
1091 PL_put_variable(t);
1092 if ( !(head = PL_new_term_ref()) ||
1093 !(tail = PL_copy_term_ref(t)) )
1094 return FALSE;
1095
1096 for(e=istrblank(val); e; val = e+1, e=istrblank(val))
1097 { if ( e == val )
1098 continue; /* skip spaces */
1099 if ( !PL_unify_list(tail, head, tail) ||
1100 !unify_listval(p, head, a->definition->type, e-val, val) )
1101 return FALSE;
1102 }
1103
1104 return ( PL_unify_list(tail, head, tail) &&
1105 unify_listval(p, head, a->definition->type,
1106 istrlen(val), val) &&
1107 PL_unify_nil(tail) );
1108 } else
1109 return put_att_text(p, t, a);
1110 }
1111 }
1112 }
1113
1114
1115 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1116 Produce a tag-location in the format
1117
1118 start_location=file:char-char
1119 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1120
1121 WUNUSED static int
put_tag_position(dtd_parser * p,term_t pos)1122 put_tag_position(dtd_parser *p, term_t pos)
1123 { dtd_srcloc *l = &p->startloc;
1124
1125 if ( l->type == IN_FILE && l->name.file )
1126 { PL_put_variable(pos);
1127 return PL_unify_term(pos,
1128 PL_FUNCTOR, FUNCTOR_ns2,
1129 PL_NWCHARS, wcslen(l->name.file), l->name.file,
1130 PL_FUNCTOR, FUNCTOR_minus2,
1131 PL_LONG, l->charpos,
1132 PL_LONG, p->location.charpos);
1133 }
1134
1135 return FALSE;
1136 }
1137
1138
1139
1140 WUNUSED static int
unify_attribute_list(dtd_parser * p,term_t alist,int argc,sgml_attribute * argv)1141 unify_attribute_list(dtd_parser *p, term_t alist,
1142 int argc, sgml_attribute *argv)
1143 { int i;
1144 term_t tail = PL_copy_term_ref(alist);
1145 term_t h = PL_new_term_ref();
1146 term_t a = PL_new_term_refs(2);
1147 parser_data *pd = p->closure;
1148
1149 for(i=0; i<argc; i++)
1150 { if ( !put_attribute_name(p, a+0, argv[i].definition->name) ||
1151 !put_attribute_value(p, a+1, &argv[i]) ||
1152 !PL_cons_functor_v(a, FUNCTOR_equal2, a) ||
1153 !PL_unify_list(tail, h, tail) ||
1154 !PL_unify(h, a) )
1155 return FALSE;
1156 }
1157
1158 if ( pd->positions && put_tag_position(p, a+1) )
1159 { PL_put_atom(a, ATOM_position);
1160
1161 if ( !PL_cons_functor_v(a, FUNCTOR_equal2, a) ||
1162 !PL_unify_list(tail, h, tail) ||
1163 !PL_unify(h, a) )
1164 return FALSE;
1165 }
1166
1167 if ( PL_unify_nil(tail) )
1168 { PL_reset_term_refs(tail);
1169
1170 return TRUE;
1171 }
1172
1173 return FALSE;
1174 }
1175
1176
1177
1178 static int
on_begin_(dtd_parser * p,dtd_element * e,int argc,sgml_attribute * argv)1179 on_begin_(dtd_parser *p, dtd_element *e, int argc, sgml_attribute *argv)
1180 { parser_data *pd = p->closure;
1181
1182 if ( pd->stopped )
1183 return TRUE;
1184
1185 if ( pd->tail )
1186 { term_t content = PL_new_term_ref(); /* element content */
1187 term_t alist = PL_new_term_ref(); /* attribute list */
1188 term_t et = PL_new_term_ref(); /* element structure */
1189 term_t h = PL_new_term_ref();
1190
1191 if ( !h ||
1192 !put_element_name(p, h, e) ||
1193 !unify_attribute_list(p, alist, argc, argv) ||
1194 !PL_unify_term(et,
1195 PL_FUNCTOR, FUNCTOR_element3,
1196 PL_TERM, h,
1197 PL_TERM, alist,
1198 PL_TERM, content) )
1199 { pd->exception = PL_exception(0);
1200 return FALSE;
1201 }
1202
1203 if ( PL_unify_list(pd->tail, h, pd->tail) &&
1204 PL_unify(h, et) )
1205 { env *env = sgml_calloc(1, sizeof(*env));
1206
1207 env->tail = pd->tail;
1208 env->parent = pd->stack;
1209 pd->stack = env;
1210
1211 pd->tail = content;
1212 PL_reset_term_refs(alist);
1213
1214 return TRUE;
1215 }
1216
1217 pd->exception = PL_exception(0);
1218 return FALSE;
1219 }
1220
1221 if ( pd->on_begin )
1222 { fid_t fid;
1223
1224 if ( (fid = PL_open_foreign_frame()) )
1225 { int rc;
1226 term_t av = PL_new_term_refs(3);
1227
1228 rc = ( put_element_name(p, av+0, e) &&
1229 unify_attribute_list(p, av+1, argc, argv) &&
1230 unify_parser(av+2, p) &&
1231 call_prolog(pd, pd->on_begin, av)
1232 );
1233
1234 PL_discard_foreign_frame(fid);
1235 if ( rc )
1236 return TRUE;
1237 }
1238
1239 pd->exception = PL_exception(0);
1240 return FALSE;
1241 }
1242
1243 return TRUE;
1244 }
1245
1246
1247 static int
on_end(dtd_parser * p,dtd_element * e)1248 on_end(dtd_parser *p, dtd_element *e)
1249 { parser_data *pd = p->closure;
1250
1251 if ( pd->stopped )
1252 return TRUE;
1253
1254 if ( pd->on_end )
1255 { fid_t fid;
1256
1257 if ( (fid = PL_open_foreign_frame()) )
1258 { int rc;
1259 term_t av = PL_new_term_refs(2);
1260
1261 PL_STRINGS_MARK();
1262 rc = ( put_element_name(p, av+0, e) &&
1263 unify_parser(av+1, p) &&
1264 call_prolog(pd, pd->on_end, av)
1265 );
1266 PL_STRINGS_RELEASE();
1267
1268 PL_discard_foreign_frame(fid);
1269 if ( rc )
1270 goto ok;
1271 }
1272
1273 if ( (pd->exception = PL_exception(0)) )
1274 return FALSE;
1275 }
1276
1277 ok:
1278 if ( pd->tail && !pd->stopped )
1279 { if ( !PL_unify_nil(pd->tail) )
1280 return FALSE;
1281 PL_reset_term_refs(pd->tail); /* ? */
1282
1283 if ( pd->stack )
1284 { env *parent = pd->stack->parent;
1285
1286 pd->tail = pd->stack->tail;
1287 sgml_free(pd->stack);
1288 pd->stack = parent;
1289 } else
1290 { if ( pd->stopat == SA_CONTENT )
1291 pd->stopped = TRUE;
1292 }
1293 }
1294
1295 if ( pd->stopat == SA_ELEMENT && !p->environments->parent )
1296 pd->stopped = TRUE;
1297
1298 return TRUE;
1299 }
1300
1301
1302 static int
on_entity_(dtd_parser * p,dtd_entity * e,int chr)1303 on_entity_(dtd_parser *p, dtd_entity *e, int chr)
1304 { parser_data *pd = p->closure;
1305
1306 if ( pd->stopped )
1307 return TRUE;
1308
1309 if ( pd->on_entity )
1310 { fid_t fid;
1311
1312 if ( (fid=PL_open_foreign_frame()) )
1313 { int rc;
1314 term_t av = PL_new_term_refs(2);
1315
1316 if ( e )
1317 rc = put_atom_wchars(av+0, e->name->name);
1318 else
1319 rc = PL_put_integer(av+0, chr);
1320
1321 if ( rc )
1322 rc = ( unify_parser(av+1, p) &&
1323 call_prolog(pd, pd->on_end, av)
1324 );
1325
1326 PL_discard_foreign_frame(fid);
1327 if ( rc )
1328 return TRUE;
1329 }
1330
1331 pd->exception = PL_exception(0);
1332 return FALSE;
1333 }
1334
1335 if ( pd->tail )
1336 { int rc;
1337 term_t h = PL_new_term_ref();
1338
1339 if ( !h ||
1340 !PL_unify_list(pd->tail, h, pd->tail) )
1341 { pd->exception = PL_exception(0);
1342 return FALSE;
1343 }
1344
1345 if ( e )
1346 rc = PL_unify_term(h,
1347 PL_FUNCTOR, FUNCTOR_entity1,
1348 PL_CHARS, e->name->name);
1349 else
1350 rc = PL_unify_term(h,
1351 PL_FUNCTOR, FUNCTOR_entity1,
1352 PL_INT, chr);
1353
1354 PL_reset_term_refs(h);
1355 if ( !rc )
1356 pd->exception = PL_exception(0);
1357
1358 return rc;
1359 }
1360
1361 return TRUE;
1362 }
1363
1364
1365 static int
on_data_(dtd_parser * p,data_type type,int len,const wchar_t * data)1366 on_data_(dtd_parser *p, data_type type, int len, const wchar_t *data)
1367 { parser_data *pd = p->closure;
1368
1369 if ( pd->on_cdata )
1370 { fid_t fid;
1371
1372 if ( (fid=PL_open_foreign_frame()) )
1373 { int rc;
1374 term_t av = PL_new_term_refs(2);
1375
1376 rc = ( PL_unify_wchars(av+0, PL_ATOM, len, data) &&
1377 unify_parser(av+1, p) &&
1378 call_prolog(pd, pd->on_cdata, av) );
1379
1380 PL_discard_foreign_frame(fid);
1381 if ( rc )
1382 return TRUE;
1383 }
1384
1385 pd->exception = PL_exception(0);
1386 return FALSE;
1387 }
1388
1389 if ( pd->tail && !pd->stopped )
1390 { term_t h = PL_new_term_ref();
1391
1392 if ( PL_unify_list(pd->tail, h, pd->tail) )
1393 { int rval = TRUE;
1394 term_t a;
1395
1396 switch(type)
1397 { case EC_CDATA:
1398 a = h;
1399 break;
1400 case EC_SDATA:
1401 { term_t d = PL_new_term_ref();
1402
1403 a = d;
1404 rval = PL_unify_term(h, PL_FUNCTOR, FUNCTOR_sdata1, PL_TERM, d);
1405 break;
1406 }
1407 case EC_NDATA:
1408 { term_t d = PL_new_term_ref();
1409
1410 a = d;
1411 rval = PL_unify_term(h, PL_FUNCTOR, FUNCTOR_ndata1, PL_TERM, d);
1412 break;
1413 }
1414 default:
1415 rval = FALSE;
1416 assert(0);
1417 }
1418
1419 if ( rval )
1420 rval = PL_unify_wchars(a, p->cdata_rep, len, data);
1421
1422 if ( rval )
1423 { PL_reset_term_refs(h);
1424 return TRUE;
1425 } else
1426 { pd->exception = PL_exception(0);
1427 }
1428 }
1429 }
1430
1431 return FALSE;
1432 }
1433
1434
1435 static int
on_cdata(dtd_parser * p,data_type type,int len,const wchar_t * data)1436 on_cdata(dtd_parser *p, data_type type, int len, const wchar_t *data)
1437 { return on_data(p, type, len, data);
1438 }
1439
1440
1441 static int
can_end_omitted(dtd_parser * p)1442 can_end_omitted(dtd_parser *p)
1443 { sgml_environment *env;
1444
1445 for(env=p->environments; env; env = env->parent)
1446 { dtd_element *e = env->element;
1447
1448 if ( !(e->structure && e->structure->omit_close) )
1449 return FALSE;
1450 }
1451
1452 return TRUE;
1453 }
1454
1455
1456 static int
on_error_(dtd_parser * p,dtd_error * error)1457 on_error_(dtd_parser *p, dtd_error *error)
1458 { parser_data *pd = p->closure;
1459 const char *severity;
1460
1461 if ( pd->stopped )
1462 return TRUE;
1463
1464 if ( pd->stopat == SA_ELEMENT &&
1465 (error->minor == ERC_NOT_OPEN || error->minor == ERC_NOT_ALLOWED) &&
1466 can_end_omitted(p) )
1467 { end_document_dtd_parser(p);
1468 sgml_cplocation(&p->location, &p->startloc);
1469 pd->stopped = TRUE;
1470 return TRUE;
1471 }
1472
1473 switch(error->severity)
1474 { case ERS_STYLE:
1475 if ( pd->error_mode != EM_STYLE )
1476 return TRUE;
1477 severity = "informational";
1478 break;
1479 case ERS_WARNING:
1480 pd->warnings++;
1481 severity = "warning";
1482 break;
1483 case ERS_ERROR:
1484 default: /* make compiler happy */
1485 pd->errors++;
1486 severity = "error";
1487 break;
1488 }
1489
1490 if ( pd->on_error ) /* msg, parser */
1491 { fid_t fid;
1492
1493 if ( (fid=PL_open_foreign_frame()) )
1494 { int rc;
1495 term_t av = PL_new_term_refs(3);
1496
1497 rc = ( PL_put_atom_chars(av+0, severity) &&
1498 PL_unify_wchars(av+1, PL_ATOM,
1499 wcslen(error->plain_message),
1500 error->plain_message) &&
1501 unify_parser(av+2, p) &&
1502 call_prolog(pd, pd->on_error, av)
1503 );
1504 PL_discard_foreign_frame(fid);
1505 if ( rc )
1506 return TRUE;
1507 }
1508 pd->exception = PL_exception(0);
1509 return FALSE;
1510 } else if ( pd->error_mode != EM_QUIET )
1511 { fid_t fid;
1512
1513 if ( (fid=PL_open_foreign_frame()) )
1514 { int rc = TRUE;
1515 dtd_srcloc *l = file_location(p, &p->startloc);
1516
1517 if ( pd->max_errors == 0 )
1518 { term_t ex = PL_new_term_ref();
1519 term_t pos = PL_new_term_ref();
1520
1521 if ( l->name.file )
1522 rc = PL_unify_term(pos,
1523 PL_FUNCTOR, FUNCTOR_file4,
1524 PL_NWCHARS, (size_t)-1, l->name.file,
1525 PL_INT, l->line,
1526 PL_INT, l->linepos,
1527 PL_INT64, (int64_t)l->charpos);
1528 if ( rc )
1529 rc = PL_unify_term(ex,
1530 PL_FUNCTOR, FUNCTOR_error2,
1531 PL_FUNCTOR, FUNCTOR_syntax_error1,
1532 PL_NWCHARS, (size_t)-1, error->plain_message,
1533 PL_TERM, pos);
1534 if ( rc )
1535 rc = PL_raise_exception(ex);
1536 } else
1537 { predicate_t pred = PL_predicate("print_message", 2, "user");
1538 term_t av = PL_new_term_refs(2);
1539 term_t src = PL_new_term_ref();
1540 term_t parser = PL_new_term_ref();
1541
1542 rc = ( unify_parser(parser, p) &&
1543 PL_put_atom_chars(av+0, severity) );
1544
1545 if ( rc )
1546 { if ( l->name.file )
1547 { if ( l->type == IN_FILE )
1548 rc = put_atom_wchars(src, l->name.file);
1549 else
1550 rc = put_atom_wchars(src, l->name.entity);
1551 } else
1552 { PL_put_nil(src);
1553 }
1554 }
1555
1556 if ( rc )
1557 rc = PL_unify_term(av+1,
1558 PL_FUNCTOR_CHARS, "sgml", 4,
1559 PL_TERM, parser,
1560 PL_TERM, src,
1561 PL_INT, l->line,
1562 PL_NWCHARS, wcslen(error->plain_message),
1563 error->plain_message);
1564
1565 if ( rc )
1566 rc = PL_call_predicate(NULL, PL_Q_NODEBUG, pred, av);
1567
1568 PL_discard_foreign_frame(fid);
1569 }
1570
1571 if ( rc )
1572 return TRUE;
1573 }
1574
1575 pd->exception = PL_exception(0);
1576 return FALSE;
1577 }
1578
1579 return TRUE;
1580 }
1581
1582
1583 static int
on_xmlns_(dtd_parser * p,dtd_symbol * ns,dtd_symbol * url)1584 on_xmlns_(dtd_parser *p, dtd_symbol *ns, dtd_symbol *url)
1585 { parser_data *pd = p->closure;
1586
1587 if ( pd->stopped )
1588 return TRUE;
1589
1590 if ( pd->on_xmlns )
1591 { fid_t fid;
1592 term_t av;
1593
1594 if ( (fid = PL_open_foreign_frame()) &&
1595 (av = PL_new_term_refs(3)) )
1596 { int rc;
1597
1598 if ( ns )
1599 { rc = put_atom_wchars(av+0, ns->name);
1600 } else
1601 { PL_put_nil(av+0);
1602 rc = TRUE;
1603 }
1604
1605 if ( rc )
1606 { rc = ( put_atom_wchars(av+1, url->name) &&
1607 unify_parser(av+2, p) &&
1608 call_prolog(pd, pd->on_xmlns, av)
1609 );
1610 }
1611 end_frame(fid, pd->exception);
1612 PL_discard_foreign_frame(fid);
1613 if ( rc )
1614 return TRUE;
1615 }
1616
1617 pd->exception = PL_exception(0);
1618 return FALSE;
1619 }
1620
1621 return TRUE;
1622 }
1623
1624
1625 static int
on_pi(dtd_parser * p,const ichar * pi)1626 on_pi(dtd_parser *p, const ichar *pi)
1627 { parser_data *pd = p->closure;
1628
1629 if ( pd->stopped )
1630 return TRUE;
1631
1632 if ( pd->on_pi )
1633 { fid_t fid;
1634
1635 if ( (fid=PL_open_foreign_frame()) )
1636 { int rc;
1637 term_t av = PL_new_term_refs(2);
1638
1639 rc = ( put_atom_wchars(av+0, pi) &&
1640 unify_parser(av+1, p) &&
1641 call_prolog(pd, pd->on_pi, av)
1642 );
1643
1644 PL_discard_foreign_frame(fid);
1645 if ( rc )
1646 return TRUE;
1647 }
1648
1649 pd->exception = PL_exception(0);
1650 return FALSE;
1651 }
1652
1653 if ( pd->tail )
1654 { term_t h;
1655
1656 if ( !(h = PL_new_term_ref()) ||
1657 !PL_unify_list(pd->tail, h, pd->tail) )
1658 { pd->exception = PL_exception(0);
1659 return FALSE;
1660 }
1661
1662 if ( !PL_unify_term(h,
1663 PL_FUNCTOR, FUNCTOR_pi1,
1664 PL_NWCHARS, wcslen(pi), pi) )
1665 { pd->exception = PL_exception(0);
1666 return FALSE;
1667 }
1668
1669 PL_reset_term_refs(h);
1670 }
1671
1672 return TRUE;
1673 }
1674
1675
1676 static int
on_decl(dtd_parser * p,const ichar * decl)1677 on_decl(dtd_parser *p, const ichar *decl)
1678 { parser_data *pd = p->closure;
1679
1680 if ( pd->stopped )
1681 return TRUE;
1682
1683 if ( pd->on_decl )
1684 { fid_t fid;
1685 term_t av;
1686
1687 if ( (fid = PL_open_foreign_frame()) &&
1688 (av = PL_new_term_refs(2)) )
1689 { int rc;
1690
1691 rc = ( put_atom_wchars(av+0, decl) &&
1692 unify_parser(av+1, p) &&
1693 call_prolog(pd, pd->on_decl, av)
1694 );
1695 end_frame(fid, pd->exception);
1696 PL_discard_foreign_frame(fid);
1697 if ( rc )
1698 return TRUE;
1699 }
1700
1701 pd->exception = PL_exception(0);
1702 return FALSE;
1703 }
1704
1705 if ( pd->stopat == SA_DECL )
1706 pd->stopped = TRUE;
1707
1708 return TRUE;
1709 }
1710
1711
1712 static int
on_begin(dtd_parser * p,dtd_element * e,int argc,sgml_attribute * argv)1713 on_begin(dtd_parser *p, dtd_element *e, int argc, sgml_attribute *argv)
1714 { int rc;
1715
1716 PL_STRINGS_MARK();
1717 rc = on_begin_(p, e, argc, argv);
1718 PL_STRINGS_RELEASE();
1719
1720 return rc;
1721 }
1722
1723 static int
on_data(dtd_parser * p,data_type type,int len,const wchar_t * data)1724 on_data(dtd_parser *p, data_type type, int len, const wchar_t *data)
1725 { int rc;
1726
1727 PL_STRINGS_MARK();
1728 rc = on_data_(p, type, len, data);
1729 PL_STRINGS_RELEASE();
1730
1731 return rc;
1732 }
1733
1734 static int
on_entity(dtd_parser * p,dtd_entity * e,int chr)1735 on_entity(dtd_parser *p, dtd_entity *e, int chr)
1736 { int rc;
1737
1738 PL_STRINGS_MARK();
1739 rc = on_entity_(p, e, chr);
1740 PL_STRINGS_RELEASE();
1741
1742 return rc;
1743 }
1744
1745 static int
on_error(dtd_parser * p,dtd_error * error)1746 on_error(dtd_parser *p, dtd_error *error)
1747 { int rc;
1748
1749 PL_STRINGS_MARK();
1750 rc = on_error_(p, error);
1751 PL_STRINGS_RELEASE();
1752
1753 return rc;
1754 }
1755
1756
1757 static int
on_xmlns(dtd_parser * p,dtd_symbol * ns,dtd_symbol * url)1758 on_xmlns(dtd_parser *p, dtd_symbol *ns, dtd_symbol *url)
1759 { int rc;
1760
1761 PL_STRINGS_MARK();
1762 rc = on_xmlns_(p, ns, url);
1763 PL_STRINGS_RELEASE();
1764
1765 return rc;
1766 }
1767
1768 static int
write_parser(void * h,char * buf,int len)1769 write_parser(void *h, char *buf, int len)
1770 { parser_data *pd = h;
1771 unsigned char *s = (unsigned char *)buf;
1772 unsigned char *e = s+len;
1773
1774 if ( !pd->parser || pd->parser->magic != SGML_PARSER_MAGIC )
1775 { errno = EINVAL;
1776 return -1;
1777 }
1778
1779 if ( (pd->errors > pd->max_errors && pd->max_errors >= 0) || pd->stopped )
1780 { errno = EIO;
1781 return -1;
1782 }
1783
1784 for(; s<e; s++)
1785 { putchar_dtd_parser(pd->parser, *s);
1786 if ( pd->exception )
1787 break;
1788 }
1789
1790 return len;
1791 }
1792
1793
1794 static int
close_parser(void * h)1795 close_parser(void *h)
1796 { parser_data *pd = h;
1797 dtd_parser *p;
1798
1799 if ( !(p=pd->parser) || p->magic != SGML_PARSER_MAGIC )
1800 { errno = EINVAL;
1801 return -1;
1802 }
1803
1804 if ( pd->tail )
1805 { if ( !PL_unify_nil(pd->tail) )
1806 return -1; /* resource error */
1807 }
1808
1809 if ( p->dmode == DM_DTD )
1810 p->dtd->implicit = FALSE; /* assume we loaded a DTD */
1811
1812 if ( pd->free_on_close )
1813 free_dtd_parser(p);
1814 else
1815 p->closure = NULL;
1816
1817 sgml_free(pd);
1818
1819 return 0;
1820 }
1821
1822
1823 static IOFUNCTIONS sgml_stream_functions =
1824 { (Sread_function) NULL,
1825 (Swrite_function) write_parser,
1826 (Sseek_function) NULL,
1827 (Sclose_function) close_parser,
1828 NULL
1829 };
1830
1831
1832 static parser_data *
new_parser_data(dtd_parser * p)1833 new_parser_data(dtd_parser *p)
1834 { parser_data *pd;
1835
1836 pd = sgml_calloc(1, sizeof(*pd));
1837 pd->magic = PD_MAGIC;
1838 pd->parser = p;
1839 pd->max_errors = MAX_ERRORS;
1840 pd->max_warnings = MAX_WARNINGS;
1841 pd->error_mode = EM_PRINT;
1842 pd->exception = FALSE;
1843 p->closure = pd;
1844
1845 return pd;
1846 }
1847
1848
1849 static foreign_t
pl_open_dtd(term_t ref,term_t options,term_t stream)1850 pl_open_dtd(term_t ref, term_t options, term_t stream)
1851 { dtd *dtd;
1852 dtd_parser *p;
1853 parser_data *pd;
1854 IOSTREAM *s;
1855 term_t tail = PL_copy_term_ref(options);
1856 term_t option = PL_new_term_ref();
1857
1858 if ( !get_dtd(ref, &dtd) )
1859 return FALSE;
1860 p = new_dtd_parser(dtd);
1861 p->dmode = DM_DTD;
1862 pd = new_parser_data(p);
1863 pd->free_on_close = TRUE;
1864
1865 while( PL_get_list(tail, option, tail) )
1866 { if ( PL_is_functor(option, FUNCTOR_dialect1) )
1867 { term_t a = PL_new_term_ref();
1868 char *s;
1869
1870 _PL_get_arg(1, option, a);
1871 if ( !PL_get_atom_chars(a, &s) )
1872 return sgml2pl_error(ERR_TYPE, "atom", a);
1873
1874 if ( streq(s, "xml") )
1875 set_dialect_dtd(dtd, p, DL_XML);
1876 else if ( streq(s, "xmlns") )
1877 set_dialect_dtd(dtd, p, DL_XMLNS);
1878 else if ( streq(s, "sgml") )
1879 set_dialect_dtd(dtd, p, DL_SGML);
1880 else
1881 return sgml2pl_error(ERR_DOMAIN, "sgml_dialect", a);
1882 }
1883 }
1884 if ( !PL_get_nil(tail) )
1885 return sgml2pl_error(ERR_TYPE, "list", options);
1886
1887 s = Snew(pd, SIO_OUTPUT|SIO_FBUF, &sgml_stream_functions);
1888
1889 if ( !PL_open_stream(stream, s) )
1890 return FALSE;
1891
1892 return TRUE;
1893 }
1894
1895
1896 static int
set_callback_predicates(parser_data * pd,term_t option)1897 set_callback_predicates(parser_data *pd, term_t option)
1898 { term_t a = PL_new_term_ref();
1899 char *fname;
1900 atom_t pname;
1901 predicate_t *pp = NULL; /* keep compiler happy */
1902 int arity;
1903 module_t m = NULL;
1904
1905 _PL_get_arg(2, option, a);
1906 if ( !PL_strip_module(a, &m, a) )
1907 return FALSE;
1908 if ( !PL_get_atom(a, &pname) )
1909 return sgml2pl_error(ERR_TYPE, "atom", a);
1910 _PL_get_arg(1, option, a);
1911 if ( !PL_get_atom_chars(a, &fname) )
1912 return sgml2pl_error(ERR_TYPE, "atom", a);
1913
1914 if ( streq(fname, "begin") )
1915 { pp = &pd->on_begin; /* tag, attributes, parser */
1916 arity = 3;
1917 } else if ( streq(fname, "end") )
1918 { pp = &pd->on_end; /* tag, parser */
1919 arity = 2;
1920 } else if ( streq(fname, "cdata") )
1921 { pp = &pd->on_cdata; /* cdata, parser */
1922 arity = 2;
1923 } else if ( streq(fname, "entity") )
1924 { pp = &pd->on_entity; /* name, parser */
1925 arity = 2;
1926 } else if ( streq(fname, "pi") )
1927 { pp = &pd->on_pi; /* pi, parser */
1928 arity = 2;
1929 } else if ( streq(fname, "xmlns") )
1930 { pp = &pd->on_xmlns; /* ns, url, parser */
1931 arity = 3;
1932 } else if ( streq(fname, "urlns") )
1933 { pp = &pd->on_urlns; /* url, ns, parser */
1934 arity = 3;
1935 } else if ( streq(fname, "error") )
1936 { pp = &pd->on_error; /* severity, message, parser */
1937 arity = 3;
1938 } else if ( streq(fname, "decl") )
1939 { pp = &pd->on_decl; /* decl, parser */
1940 arity = 2;
1941 } else
1942 return sgml2pl_error(ERR_DOMAIN, "sgml_callback", a);
1943
1944 *pp = PL_pred(PL_new_functor(pname, arity), m);
1945 return TRUE;
1946 }
1947
1948
1949 static foreign_t
pl_sgml_parse(term_t parser,term_t options)1950 pl_sgml_parse(term_t parser, term_t options)
1951 { dtd_parser *p;
1952 parser_data *pd;
1953 parser_data *oldpd;
1954 term_t head = PL_new_term_ref();
1955 term_t tail = PL_copy_term_ref(options);
1956 IOSTREAM *in = NULL;
1957 IOSTREAM *release = NULL;
1958 int recursive;
1959 int has_content_length = FALSE;
1960 int64_t content_length = 0; /* content_length(Len) */
1961 int count = 0;
1962 int rc = TRUE;
1963
1964 if ( !get_parser(parser, &p) )
1965 return FALSE;
1966
1967 if ( p->closure ) /* recursive call */
1968 { recursive = TRUE;
1969
1970 oldpd = p->closure;
1971 if ( oldpd->magic != PD_MAGIC || oldpd->parser != p )
1972 return sgml2pl_error(ERR_MISC, "sgml",
1973 "Parser associated with illegal data");
1974
1975 pd = sgml_calloc(1, sizeof(*pd));
1976 *pd = *oldpd;
1977 p->closure = pd;
1978
1979 in = pd->source;
1980 } else
1981 { recursive = FALSE;
1982 oldpd = NULL; /* keep compiler happy */
1983
1984 set_mode_dtd_parser(p, DM_DATA);
1985
1986 p->on_begin_element = on_begin;
1987 p->on_end_element = on_end;
1988 p->on_entity = on_entity;
1989 p->on_pi = on_pi;
1990 p->on_data = on_cdata;
1991 p->on_error = on_error;
1992 p->on_xmlns = on_xmlns;
1993 p->on_decl = on_decl;
1994 p->cdata_rep = PL_ATOM;
1995 p->att_rep = PL_ATOM;
1996 pd = new_parser_data(p);
1997 }
1998
1999 while ( PL_get_list(tail, head, tail) )
2000 { if ( PL_is_functor(head, FUNCTOR_document1) )
2001 { pd->list = PL_new_term_ref();
2002 _PL_get_arg(1, head, pd->list);
2003 pd->tail = PL_copy_term_ref(pd->list);
2004 pd->stack = NULL;
2005 } else if ( PL_is_functor(head, FUNCTOR_content_length1) )
2006 { term_t a = PL_new_term_ref();
2007
2008 _PL_get_arg(1, head, a);
2009 if ( !PL_get_int64(a, &content_length) )
2010 return sgml2pl_error(ERR_TYPE, "integer", a);
2011 has_content_length = TRUE;
2012 } else if ( PL_is_functor(head, FUNCTOR_call2) )
2013 { if ( !set_callback_predicates(pd, head) )
2014 return FALSE;
2015 } else if ( PL_is_functor(head, FUNCTOR_xml_no_ns1) )
2016 { term_t a = PL_new_term_ref();
2017 char *s;
2018
2019 _PL_get_arg(1, head, a);
2020 if ( !PL_get_atom_chars(a, &s) )
2021 return sgml2pl_error(ERR_TYPE, "atom", a);
2022 if ( streq(s, "error") )
2023 p->xml_no_ns = NONS_ERROR;
2024 else if ( streq(s, "quiet") )
2025 p->xml_no_ns = NONS_QUIET;
2026 else
2027 return sgml2pl_error(ERR_DOMAIN, "xml_no_ns", a);
2028 } else if ( PL_is_functor(head, FUNCTOR_parse1) )
2029 { term_t a = PL_new_term_ref();
2030 char *s;
2031
2032 _PL_get_arg(1, head, a);
2033 if ( !PL_get_atom_chars(a, &s) )
2034 return sgml2pl_error(ERR_TYPE, "atom", a);
2035 if ( streq(s, "element") )
2036 pd->stopat = SA_ELEMENT;
2037 else if ( streq(s, "content") )
2038 pd->stopat = SA_CONTENT;
2039 else if ( streq(s, "file") )
2040 pd->stopat = SA_FILE;
2041 else if ( streq(s, "input") )
2042 pd->stopat = SA_INPUT;
2043 else if ( streq(s, "declaration") )
2044 pd->stopat = SA_DECL;
2045 else
2046 return sgml2pl_error(ERR_DOMAIN, "parse", a);
2047 } else if ( PL_is_functor(head, FUNCTOR_max_errors1) )
2048 { term_t a = PL_new_term_ref();
2049
2050 _PL_get_arg(1, head, a);
2051 if ( !PL_get_integer(a, &pd->max_errors) )
2052 return sgml2pl_error(ERR_TYPE, "integer", a);
2053 } else if ( PL_is_functor(head, FUNCTOR_syntax_errors1) )
2054 { term_t a = PL_new_term_ref();
2055 char *s;
2056
2057 _PL_get_arg(1, head, a);
2058 if ( !PL_get_atom_chars(a, &s) )
2059 return sgml2pl_error(ERR_TYPE, "atom", a);
2060
2061 if ( streq(s, "quiet") )
2062 pd->error_mode = EM_QUIET;
2063 else if ( streq(s, "print") )
2064 pd->error_mode = EM_PRINT;
2065 else if ( streq(s, "style") )
2066 pd->error_mode = EM_STYLE;
2067 else
2068 return sgml2pl_error(ERR_DOMAIN, "syntax_error", a);
2069 } else if ( PL_is_functor(head, FUNCTOR_positions1) )
2070 { term_t a = PL_new_term_ref();
2071 char *s;
2072
2073 _PL_get_arg(1, head, a);
2074 if ( !PL_get_atom_chars(a, &s) )
2075 return sgml2pl_error(ERR_TYPE, "atom", a);
2076
2077 if ( streq(s, "true") )
2078 pd->positions = TRUE;
2079 else if ( streq(s, "false") )
2080 pd->positions = FALSE;
2081 else
2082 return sgml2pl_error(ERR_DOMAIN, "positions", a);
2083 } else if ( PL_is_functor(head, FUNCTOR_cdata1) )
2084 { term_t arg = PL_new_term_ref();
2085 atom_t a;
2086 _PL_get_arg(1, head, arg);
2087 if (!PL_get_atom_ex(arg, &a))
2088 return FALSE;
2089 if (a == ATOM_atom)
2090 p->cdata_rep = PL_ATOM;
2091 else if (a == ATOM_string)
2092 p->cdata_rep = PL_STRING;
2093 else
2094 return sgml2pl_error(ERR_DOMAIN, "representation", a);
2095 } else if ( PL_is_functor(head, FUNCTOR_attribute_value1) )
2096 { term_t arg = PL_new_term_ref();
2097 atom_t a;
2098 _PL_get_arg(1, head, arg);
2099 if (!PL_get_atom_ex(arg, &a))
2100 return FALSE;
2101 if (a == ATOM_atom)
2102 p->att_rep = PL_ATOM;
2103 else if (a == ATOM_string)
2104 p->att_rep = PL_STRING;
2105 else
2106 return sgml2pl_error(ERR_DOMAIN, "representation", a);
2107 } else if ( PL_is_functor(head, FUNCTOR_source1) )
2108 { term_t a = PL_new_term_ref();
2109
2110 _PL_get_arg(1, head, a);
2111 if ( !PL_get_stream(a, &in, SIO_INPUT) )
2112 return FALSE;
2113 release = in;
2114 }/* else ignored option */
2115 }
2116 if ( !PL_get_nil_ex(tail) )
2117 { if ( release )
2118 PL_release_stream(release);
2119 return FALSE;
2120 }
2121
2122 /* Parsing input from a stream */
2123 #define CHECKERROR \
2124 { if ( pd->exception ) \
2125 { rc = FALSE; \
2126 goto out; \
2127 } \
2128 if ( pd->errors > pd->max_errors && pd->max_errors >= 0 ) \
2129 { rc = sgml2pl_error(ERR_LIMIT, "max_errors", (long)pd->max_errors); \
2130 goto out; \
2131 } \
2132 }
2133
2134 if ( pd->stopat == SA_CONTENT && p->empty_element )
2135 goto out;
2136
2137 if ( in )
2138 { int eof = FALSE;
2139
2140 if ( in->encoding == ENC_OCTET )
2141 p->encoded = TRUE; /* parser must decode */
2142 else
2143 p->encoded = FALSE; /* already decoded */
2144
2145 pd->stopped = FALSE;
2146
2147 if ( !recursive )
2148 { pd->source = in;
2149 begin_document_dtd_parser(p);
2150 }
2151
2152 while(!eof)
2153 { int c, ateof;
2154
2155 if ( (++count % 8192) == 0 && PL_handle_signals() < 0 )
2156 { rc = FALSE;
2157 goto out;
2158 }
2159
2160 if ( has_content_length )
2161 { if ( content_length <= 0 )
2162 c = EOF;
2163 else
2164 c = Sgetcode(in);
2165 ateof = (--content_length <= 0);
2166 } else
2167 { c = Sgetcode(in);
2168 ateof = Sfeof(in);
2169 }
2170
2171 if ( ateof )
2172 { eof = TRUE;
2173 if ( c == LF ) /* file ends in LF */
2174 c = CR;
2175 else if ( c != CR ) /* file ends in normal char */
2176 { if ( has_content_length && in->position )
2177 { int64_t bc0 = in->position->byteno;
2178 putchar_dtd_parser(p, c);
2179 content_length -= in->position->byteno-bc0;
2180 } else
2181 { putchar_dtd_parser(p, c);
2182 }
2183 CHECKERROR;
2184 if ( pd->stopped )
2185 goto stopped;
2186 c = CR;
2187 }
2188 } else if ( Sferror(in) )
2189 { rc = FALSE;
2190 goto out;
2191 }
2192
2193 if ( has_content_length && in->position )
2194 { int64_t bc0 = in->position->byteno;
2195 putchar_dtd_parser(p, c);
2196 content_length -= in->position->byteno-bc0;
2197 } else
2198 { putchar_dtd_parser(p, c);
2199 }
2200 CHECKERROR;
2201 if ( pd->stopped )
2202 { stopped:
2203 pd->stopped = FALSE;
2204 if ( pd->stopat != SA_CONTENT )
2205 reset_document_dtd_parser(p); /* ensure a clean start */
2206 goto out;
2207 }
2208 }
2209
2210 if ( !recursive && pd->stopat != SA_INPUT )
2211 end_document_dtd_parser(p);
2212 CHECKERROR;
2213
2214 out:
2215 if ( release )
2216 rc = PL_release_stream(release) && rc;
2217
2218 reset_url_cache();
2219 if ( pd->tail && rc )
2220 rc = PL_unify_nil(pd->tail);
2221
2222 if ( recursive )
2223 { p->closure = oldpd;
2224 } else
2225 { p->closure = NULL;
2226 }
2227
2228 pd->magic = 0; /* invalidate */
2229 sgml_free(pd);
2230
2231 return rc;
2232 }
2233
2234 reset_url_cache();
2235
2236 return TRUE;
2237 }
2238
2239
2240 /*******************************
2241 * DTD PROPERTIES *
2242 *******************************/
2243
2244 static int put_model(term_t t, dtd_model *m) WUNUSED;
2245
2246 /* doctype(DocType) */
2247
2248 static int
dtd_prop_doctype(dtd * dtd,term_t prop)2249 dtd_prop_doctype(dtd *dtd, term_t prop)
2250 { if ( dtd->doctype )
2251 return PL_unify_wchars(prop, PL_ATOM, ENDSNUL, dtd->doctype);
2252 return FALSE;
2253 }
2254
2255
2256 /* elements(ListOfElements) */
2257
2258 WUNUSED static int
make_model_list(term_t t,dtd_model * m,functor_t f)2259 make_model_list(term_t t, dtd_model *m, functor_t f)
2260 { if ( !m->next )
2261 { return put_model(t, m);
2262 } else
2263 { term_t av;
2264
2265 if ( (av=PL_new_term_refs(2)) &&
2266 put_model(av+0, m) &&
2267 make_model_list(av+1, m->next, f) &&
2268 PL_cons_functor_v(t, f, av) )
2269 { PL_reset_term_refs(av);
2270 return TRUE;
2271 }
2272
2273 return FALSE;
2274 }
2275 }
2276
2277
2278 WUNUSED static int
put_model(term_t t,dtd_model * m)2279 put_model(term_t t, dtd_model *m)
2280 { int rc = TRUE;
2281 functor_t f;
2282
2283 switch(m->type)
2284 { case MT_PCDATA:
2285 rc = PL_put_atom(t, ATOM_pcdata);
2286 goto card;
2287 case MT_ELEMENT:
2288 rc = put_atom_wchars(t, m->content.element->name->name);
2289 goto card;
2290 case MT_AND:
2291 f = FUNCTOR_and2;
2292 break;
2293 case MT_SEQ:
2294 f = FUNCTOR_comma2;
2295 break;
2296 case MT_OR:
2297 f = FUNCTOR_bar2;
2298 break;
2299 case MT_UNDEF:
2300 default:
2301 assert(0);
2302 f = 0;
2303 break;
2304 }
2305
2306 if ( rc )
2307 { if ( !m->content.group )
2308 rc = PL_put_atom(t, ATOM_empty);
2309 else
2310 rc = make_model_list(t, m->content.group, f);
2311 }
2312
2313 card:
2314 if ( !rc )
2315 return FALSE;
2316
2317 switch(m->cardinality)
2318 { case MC_ONE:
2319 break;
2320 case MC_OPT:
2321 rc = PL_cons_functor_v(t, FUNCTOR_opt1, t);
2322 break;
2323 case MC_REP:
2324 rc = PL_cons_functor_v(t, FUNCTOR_rep1, t);
2325 break;
2326 case MC_PLUS:
2327 rc = PL_cons_functor_v(t, FUNCTOR_plus1, t);
2328 break;
2329 }
2330
2331 return rc;
2332 }
2333
2334
2335 WUNUSED static int
put_content(term_t t,dtd_edef * def)2336 put_content(term_t t, dtd_edef *def)
2337 { switch(def->type)
2338 { case C_EMPTY:
2339 return PL_put_atom(t, ATOM_empty);
2340 case C_CDATA:
2341 return PL_put_atom(t, ATOM_cdata);
2342 case C_RCDATA:
2343 return PL_put_atom(t, ATOM_rcdata);
2344 case C_ANY:
2345 return PL_put_atom(t, ATOM_any);
2346 default:
2347 if ( def->content )
2348 return put_model(t, def->content);
2349
2350 return TRUE;
2351 }
2352 }
2353
2354
2355 static int
dtd_prop_elements(dtd * dtd,term_t prop)2356 dtd_prop_elements(dtd *dtd, term_t prop)
2357 { term_t tail = PL_copy_term_ref(prop);
2358 term_t head = PL_new_term_ref();
2359 term_t et = PL_new_term_ref();
2360 dtd_element *e;
2361
2362 for( e=dtd->elements; e; e=e->next )
2363 { put_atom_wchars(et, e->name->name);
2364 if ( !PL_unify_list(tail, head, tail) ||
2365 !PL_unify(head, et) )
2366 return FALSE;
2367 }
2368
2369 return PL_unify_nil(tail);
2370 }
2371
2372
2373 static int
get_element(dtd * dtd,term_t name,dtd_element ** elem)2374 get_element(dtd *dtd, term_t name, dtd_element **elem)
2375 { ichar *s;
2376 dtd_element *e;
2377 dtd_symbol *id;
2378
2379 if ( !PL_get_wchars(name, NULL, &s, CVT_ATOM|CVT_EXCEPTION) )
2380 return FALSE;
2381
2382 if ( !(id=dtd_find_symbol(dtd, s)) ||
2383 !(e=id->element) )
2384 return FALSE;
2385
2386 *elem = e;
2387 return TRUE;
2388 }
2389
2390
2391
2392
2393 static int
dtd_prop_element(dtd * dtd,term_t name,term_t omit,term_t content)2394 dtd_prop_element(dtd *dtd, term_t name, term_t omit, term_t content)
2395 { dtd_element *e;
2396 dtd_edef *def;
2397 term_t model = PL_new_term_ref();
2398
2399 if ( !get_element(dtd, name, &e) || !(def=e->structure) )
2400 return FALSE;
2401
2402 if ( !PL_unify_term(omit, PL_FUNCTOR, FUNCTOR_omit2,
2403 PL_ATOM, def->omit_open ? ATOM_true : ATOM_false,
2404 PL_ATOM, def->omit_close ? ATOM_true : ATOM_false) )
2405 return FALSE;
2406
2407 return ( put_content(model, def) &&
2408 PL_unify(content, model) );
2409 }
2410
2411
2412 static int
dtd_prop_attributes(dtd * dtd,term_t ename,term_t atts)2413 dtd_prop_attributes(dtd *dtd, term_t ename, term_t atts)
2414 { dtd_element *e;
2415 term_t tail = PL_copy_term_ref(atts);
2416 term_t head = PL_new_term_ref();
2417 term_t elem = PL_new_term_ref();
2418 dtd_attr_list *al;
2419
2420 if ( !get_element(dtd, ename, &e) )
2421 return FALSE;
2422
2423 for(al=e->attributes; al; al=al->next)
2424 { put_atom_wchars(elem, al->attribute->name->name);
2425
2426 if ( !PL_unify_list(tail, head, tail) ||
2427 !PL_unify(head, elem) )
2428 return FALSE;
2429 }
2430
2431 return PL_unify_nil(tail);
2432 }
2433
2434
2435 typedef struct _plattrdef
2436 { attrtype type; /* AT_* */
2437 const char * name; /* name */
2438 int islist; /* list-type */
2439 atom_t atom; /* name as atom */
2440 } plattrdef;
2441
2442 static plattrdef plattrs[] =
2443 {
2444 { AT_CDATA, "cdata", FALSE },
2445 { AT_ENTITY, "entity", FALSE },
2446 { AT_ENTITIES, "entity", TRUE },
2447 { AT_ID, "id", FALSE },
2448 { AT_IDREF, "idref", FALSE },
2449 { AT_IDREFS, "idref", TRUE },
2450 { AT_NAME, "name", FALSE },
2451 { AT_NAMES, "name", TRUE },
2452 /*{ AT_NAMEOF, "nameof", FALSE },*/
2453 { AT_NMTOKEN, "nmtoken", FALSE },
2454 { AT_NMTOKENS, "nmtoken", TRUE },
2455 { AT_NUMBER, "number", FALSE },
2456 { AT_NUMBERS, "number", TRUE },
2457 { AT_NUTOKEN, "nutoken", FALSE },
2458 { AT_NUTOKENS, "nutoken", TRUE },
2459 { AT_NOTATION, "notation", FALSE },
2460
2461 { AT_CDATA, NULL, FALSE }
2462 };
2463
2464
2465 static int
unify_attribute_type(term_t type,dtd_attr * a)2466 unify_attribute_type(term_t type, dtd_attr *a)
2467 { plattrdef *ad = plattrs;
2468
2469 for( ; ad->name; ad++ )
2470 { if ( ad->type == a->type )
2471 { if ( !ad->atom )
2472 ad->atom = PL_new_atom(ad->name);
2473
2474 if ( ad->islist )
2475 return PL_unify_term(type, PL_FUNCTOR, FUNCTOR_list1,
2476 PL_ATOM, ad->atom);
2477 else
2478 return PL_unify_atom(type, ad->atom);
2479 }
2480 }
2481
2482 if ( a->type == AT_NAMEOF || a->type == AT_NOTATION )
2483 { dtd_name_list *nl;
2484 term_t tail, head, elem;
2485
2486 if ( !(tail = PL_new_term_ref()) ||
2487 !(head = PL_new_term_ref()) ||
2488 !(elem = PL_new_term_ref()) ||
2489 !PL_unify_functor(type,
2490 a->type == AT_NAMEOF ?
2491 FUNCTOR_nameof1 :
2492 FUNCTOR_notation1) )
2493 return FALSE;
2494
2495 _PL_get_arg(1, type, tail);
2496
2497 for(nl = a->typeex.nameof; nl; nl = nl->next)
2498 { if ( !put_atom_wchars(elem, nl->value->name) ||
2499 !PL_unify_list(tail, head, tail) ||
2500 !PL_unify(head, elem) )
2501 return FALSE;
2502 }
2503 return PL_unify_nil(tail);
2504 }
2505
2506 assert(0);
2507 return FALSE;
2508 }
2509
2510
2511
2512 static int
unify_attribute_default(term_t defval,dtd_attr * a)2513 unify_attribute_default(term_t defval, dtd_attr *a)
2514 { int v;
2515
2516 switch(a->def)
2517 { case AT_REQUIRED:
2518 return PL_unify_atom_chars(defval, "required");
2519 case AT_CURRENT:
2520 return PL_unify_atom_chars(defval, "current");
2521 case AT_CONREF:
2522 return PL_unify_atom_chars(defval, "conref");
2523 case AT_IMPLIED:
2524 return PL_unify_atom_chars(defval, "implied");
2525 case AT_DEFAULT:
2526 v = PL_unify_functor(defval, FUNCTOR_default1);
2527 goto common;
2528 case AT_FIXED:
2529 v = PL_unify_functor(defval, FUNCTOR_fixed1);
2530 common:
2531 if ( v )
2532 { term_t tmp;
2533
2534 if ( !(tmp=PL_new_term_ref()) )
2535 return FALSE;
2536
2537 _PL_get_arg(1, defval, tmp);
2538 switch( a->type )
2539 { case AT_CDATA:
2540 return PL_unify_wchars(tmp, PL_ATOM, ENDSNUL, a->att_def.cdata);
2541 case AT_NAME:
2542 case AT_NMTOKEN:
2543 case AT_NAMEOF:
2544 case AT_NOTATION:
2545 return PL_unify_wchars(tmp, PL_ATOM, ENDSNUL, a->att_def.name->name);
2546 case AT_NUMBER:
2547 return PL_unify_integer(tmp, a->att_def.number);
2548 default:
2549 assert(0);
2550 }
2551 } else
2552 return FALSE;
2553 default:
2554 assert(0);
2555 return FALSE;
2556 }
2557 }
2558
2559
2560 static int
dtd_prop_attribute(dtd * dtd,term_t ename,term_t aname,term_t type,term_t def_value)2561 dtd_prop_attribute(dtd *dtd, term_t ename, term_t aname,
2562 term_t type, term_t def_value)
2563 { dtd_element *e;
2564 ichar *achars;
2565 dtd_symbol *asym;
2566 dtd_attr_list *al;
2567
2568
2569 if ( !get_element(dtd, ename, &e) )
2570 return FALSE;
2571 if ( !PL_get_wchars(aname, NULL, &achars, CVT_ATOM|CVT_EXCEPTION) )
2572 return FALSE;
2573 if ( !(asym=dtd_find_symbol(dtd, achars)) )
2574 return FALSE;
2575
2576 for(al=e->attributes; al; al=al->next)
2577 { if ( al->attribute->name == asym )
2578 { if ( unify_attribute_type(type, al->attribute) &&
2579 unify_attribute_default(def_value, al->attribute) )
2580 return TRUE;
2581
2582 return FALSE;
2583 }
2584 }
2585
2586 return FALSE;
2587 }
2588
2589
2590 static int
dtd_prop_entities(dtd * dtd,term_t list)2591 dtd_prop_entities(dtd *dtd, term_t list)
2592 { term_t tail = PL_copy_term_ref(list);
2593 term_t head = PL_new_term_ref();
2594 term_t et = PL_new_term_ref();
2595 dtd_entity *e;
2596
2597 for( e=dtd->entities; e; e=e->next )
2598 { put_atom_wchars(et, e->name->name);
2599 if ( !PL_unify_list(tail, head, tail) ||
2600 !PL_unify(head, et) )
2601 return FALSE;
2602 }
2603
2604 return PL_unify_nil(tail);
2605 }
2606
2607
2608 static int
dtd_prop_entity(dtd * dtd,term_t ename,term_t value)2609 dtd_prop_entity(dtd *dtd, term_t ename, term_t value)
2610 { ichar *s;
2611 dtd_entity *e;
2612 dtd_symbol *id;
2613
2614 if ( !PL_get_wchars(ename, NULL, &s, CVT_ATOM|CVT_EXCEPTION) )
2615 return FALSE;
2616
2617 if ( !(id=dtd_find_symbol(dtd, s)) ||
2618 !(e=id->entity) )
2619 return FALSE;
2620
2621 switch(e->type)
2622 { case ET_SYSTEM:
2623 return PL_unify_term(value, PL_FUNCTOR_CHARS, "system", 1,
2624 PL_CHARS, e->exturl);
2625 case ET_PUBLIC:
2626 if ( e->exturl )
2627 return PL_unify_term(value, PL_FUNCTOR_CHARS, "public", 2,
2628 PL_CHARS, e->extid,
2629 PL_CHARS, e->exturl);
2630 else
2631 return PL_unify_term(value, PL_FUNCTOR_CHARS, "public", 2,
2632 PL_CHARS, e->extid,
2633 PL_VARIABLE);
2634
2635 case ET_LITERAL:
2636 default:
2637 if ( e->value )
2638 { const char *wrap;
2639
2640 switch(e->content)
2641 { case EC_SGML: wrap = "sgml"; break;
2642 case EC_STARTTAG: wrap = "start_tag"; break;
2643 case EC_ENDTAG: wrap = "end_tag"; break;
2644 case EC_CDATA: wrap = NULL; break;
2645 case EC_SDATA: wrap = "sdata"; break;
2646 case EC_NDATA: wrap = "ndata"; break;
2647 case EC_PI: wrap = "pi"; break;
2648 default: wrap = NULL; assert(0);
2649 }
2650
2651 if ( wrap )
2652 return PL_unify_term(value, PL_FUNCTOR_CHARS, wrap, 1,
2653 PL_CHARS, e->value);
2654 else
2655 return PL_unify_wchars(value, PL_ATOM, wcslen(e->value), e->value);
2656 }
2657 }
2658
2659 assert(0);
2660 return FALSE;
2661 }
2662
2663
2664 static int
dtd_prop_notations(dtd * dtd,term_t list)2665 dtd_prop_notations(dtd *dtd, term_t list)
2666 { dtd_notation *n;
2667 term_t tail = PL_copy_term_ref(list);
2668 term_t head = PL_new_term_ref();
2669
2670 for(n=dtd->notations; n; n=n->next)
2671 { if ( PL_unify_list(tail, head, tail) &&
2672 PL_unify_wchars(head, PL_ATOM, wcslen(n->name->name), n->name->name) )
2673 continue;
2674
2675 return FALSE;
2676 }
2677
2678 return PL_unify_nil(tail);
2679 }
2680
2681
2682 static int
dtd_prop_notation(dtd * dtd,term_t nname,term_t desc)2683 dtd_prop_notation(dtd *dtd, term_t nname, term_t desc)
2684 { char *s;
2685 dtd_symbol *id;
2686
2687 if ( !PL_get_atom_chars(nname, &s) )
2688 return sgml2pl_error(ERR_TYPE, "atom", nname);
2689
2690 if ( (id=dtd_find_symbol(dtd, (ichar *)s)) )
2691 { dtd_notation *n;
2692
2693 for(n=dtd->notations; n; n=n->next)
2694 { if ( n->name == id )
2695 { term_t tail = PL_copy_term_ref(desc);
2696 term_t head = PL_new_term_ref();
2697
2698 if ( n->system )
2699 { if ( !PL_unify_list(tail, head, tail) ||
2700 !PL_unify_term(head,
2701 PL_FUNCTOR_CHARS, "system", 1,
2702 PL_CHARS, n->system) )
2703 return FALSE;
2704 }
2705 if ( n->public )
2706 { if ( !PL_unify_list(tail, head, tail) ||
2707 !PL_unify_term(head,
2708 PL_FUNCTOR_CHARS, "public", 1,
2709 PL_CHARS, n->public) )
2710 return FALSE;
2711 }
2712
2713 return PL_unify_nil(tail);
2714 }
2715 }
2716 }
2717
2718 return FALSE;
2719 }
2720
2721
2722
2723 typedef struct _prop
2724 { int (*func)();
2725 const char *name;
2726 int arity;
2727 functor_t functor;
2728 } prop;
2729
2730
2731 static prop dtd_props[] =
2732 { { dtd_prop_doctype, "doctype", 1 },
2733 { dtd_prop_elements, "elements", 1 },
2734 { dtd_prop_element, "element", 3 },
2735 { dtd_prop_attributes, "attributes", 2, },
2736 { dtd_prop_attribute, "attribute", 4, },
2737 { dtd_prop_entities, "entities", 1, },
2738 { dtd_prop_entity, "entity", 2, },
2739 { dtd_prop_notations, "notations", 1, },
2740 { dtd_prop_notation, "notation", 2, },
2741 { NULL }
2742 };
2743
2744
2745 static void
initprops()2746 initprops()
2747 { static int done = FALSE;
2748
2749 if ( !done )
2750 { prop *p;
2751
2752 done = TRUE;
2753 for(p=dtd_props; p->func; p++)
2754 p->functor = PL_new_functor(PL_new_atom(p->name), p->arity);
2755 }
2756 }
2757
2758
2759 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2760 dtd_property(DTD, doctype(DocType))
2761 dtd_property(DTD, elements(ListOfNames))
2762 dtd_property(DTD, element(Name, Omit, Model))
2763 dtd_property(DTD, attributes(ElementName, ListOfAttributes)),
2764 dtd_property(DTD, attribute(ElementName, AttributeName, Type, Default))
2765 dtd_property(DTD, entities(ListOfEntityNames))
2766 dtd_property(DTD, entity(Name, Type))
2767 dtd_property(DTD, notations(ListOfNotationNames)
2768 dtd_property(DTD, notation(Name, File))
2769 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2770
2771 static foreign_t
pl_dtd_property(term_t ref,term_t property)2772 pl_dtd_property(term_t ref, term_t property)
2773 { dtd *dtd;
2774 const prop *p;
2775
2776 initprops();
2777
2778 if ( !get_dtd(ref, &dtd) )
2779 return FALSE;
2780
2781 for(p=dtd_props; p->func; p++)
2782 { if ( PL_is_functor(property, p->functor) )
2783 { term_t a = PL_new_term_refs(p->arity);
2784 int i;
2785
2786 for(i=0; i<p->arity; i++)
2787 _PL_get_arg(i+1, property, a+i);
2788
2789 switch(p->arity)
2790 { case 1:
2791 return (*p->func)(dtd, a+0);
2792 case 2:
2793 return (*p->func)(dtd, a+0, a+1);
2794 case 3:
2795 return (*p->func)(dtd, a+0, a+1, a+2);
2796 case 4:
2797 return (*p->func)(dtd, a+0, a+1, a+2, a+3);
2798 default:
2799 assert(0);
2800 return FALSE;
2801 }
2802 }
2803 }
2804
2805 return sgml2pl_error(ERR_DOMAIN, "dtd_property", property);
2806 }
2807
2808 /*******************************
2809 * CATALOG *
2810 *******************************/
2811
2812 static foreign_t
pl_sgml_register_catalog_file(term_t file,term_t where)2813 pl_sgml_register_catalog_file(term_t file, term_t where)
2814 { wchar_t *fn;
2815 char *w;
2816 catalog_location loc;
2817
2818 if ( !PL_get_wchars(file, NULL, &fn, CVT_ATOM|CVT_EXCEPTION) )
2819 return FALSE;
2820 if ( !PL_get_atom_chars(where, &w) )
2821 return sgml2pl_error(ERR_TYPE, "atom", where);
2822
2823 if ( streq(w, "start") )
2824 loc = CTL_START;
2825 else if ( streq(w, "end") )
2826 loc = CTL_END;
2827 else
2828 return sgml2pl_error(ERR_DOMAIN, "location", where);
2829
2830 return register_catalog_file(fn, loc);
2831 }
2832
2833
2834 /*******************************
2835 * INSTALL *
2836 *******************************/
2837
2838 extern install_t install_xml_quote(void);
2839 extern install_t install_xsd(void);
2840 #ifdef O_STATISTICS
2841 extern void sgml_statistics(void);
2842 #endif
2843
2844 install_t
install(void)2845 install(void)
2846 { initConstants();
2847
2848 init_ring();
2849
2850 PL_register_foreign("new_dtd", 2, pl_new_dtd, 0);
2851 PL_register_foreign("free_dtd", 1, pl_free_dtd, 0);
2852 PL_register_foreign("new_sgml_parser", 2, pl_new_sgml_parser, 0);
2853 PL_register_foreign("free_sgml_parser", 1, pl_free_sgml_parser, 0);
2854 PL_register_foreign("set_sgml_parser", 2, pl_set_sgml_parser, 0);
2855 PL_register_foreign("get_sgml_parser", 2, pl_get_sgml_parser, 0);
2856 PL_register_foreign("open_dtd", 3, pl_open_dtd, 0);
2857 PL_register_foreign("sgml_parse", 2, pl_sgml_parse,
2858 PL_FA_TRANSPARENT);
2859 PL_register_foreign("_sgml_register_catalog_file", 2,
2860 pl_sgml_register_catalog_file, 0);
2861
2862 PL_register_foreign("$dtd_property", 2, pl_dtd_property, 0);
2863
2864 install_xml_quote();
2865 install_xsd();
2866 #ifdef O_STATISTICS
2867 atexit(sgml_statistics);
2868 #endif
2869 }
2870
2871
2872 install_t
uninstall(void)2873 uninstall(void)
2874 { stop_ring();
2875 }
2876