1 /*
2 * Copyright © 1997-2016 World Wide Web Consortium
3 * See http://www.w3.org/Consortium/Legal/copyright-software
4 *
5 * Author: Bert Bos <bert@w3.org>
6 * Created: 1997
7 **/
8 #include "config.h"
9 #include <assert.h>
10 #include <stdlib.h>
11 #if STDC_HEADERS
12 # include <string.h>
13 #else
14 # ifndef HAVE_STRCHR
15 # define strchr index
16 # define strrchr rindex
17 # endif
18 # ifndef HAVE_STRDUP
19 # include "strdup.e"
20 # endif
21 #endif
22 #include <ctype.h>
23 #include <stdio.h>
24 #include <stdbool.h>
25 #include "export.h"
26 #include "heap.e"
27 #include "types.e"
28 #include "dtd.e"
29 #include "errexit.e"
30 #include "scan.e"
31
32 EXPORT typedef enum {
33 Element, Text, Comment, Declaration, Procins, Root
34 } Nodetype;
35
36 EXPORT typedef struct _node {
37 Nodetype tp;
38 string name;
39 pairlist attribs;
40 string text;
41 string url;
42 struct _node *parent;
43 struct _node *sister;
44 struct _node *children;
45 } Node, *Tree;
46
47
48
49 /* create -- create an empty tree */
create(void)50 EXPORT Tree create(void)
51 {
52 Tree t = malloc(sizeof(*t));
53 assert(t != NULL);
54 t->tp = Root;
55 t->name = "";
56 t->sister = t->children = NULL;
57 return t;
58 }
59
60 /* tree_delete -- recursively free the memory occupied by a tree */
tree_delete(Tree t)61 EXPORT void tree_delete(Tree t)
62 {
63 if (t != NULL) {
64 switch (t->tp) {
65 case Element:
66 dispose(t->name);
67 pairlist_delete(t->attribs);
68 tree_delete(t->sister);
69 tree_delete(t->children);
70 break;
71 case Text:
72 dispose(t->text);
73 assert(t->children == NULL);
74 tree_delete(t->sister);
75 break;
76 case Comment:
77 dispose(t->text);
78 assert(t->children == NULL);
79 tree_delete(t->sister);
80 break;
81 case Declaration:
82 dispose(t->name);
83 dispose(t->text);
84 dispose(t->url);
85 assert(t->children == NULL);
86 tree_delete(t->sister);
87 break;
88 case Procins:
89 dispose(t->text);
90 assert(t->children == NULL);
91 tree_delete(t->sister);
92 break;
93 case Root:
94 assert(t->sister == NULL);
95 tree_delete(t->children);
96 break;
97 default:
98 assert(!"Cannot happen");
99 }
100 dispose(t);
101 }
102 }
103
104 /* get_root -- return root of tree */
get_root(Tree t)105 EXPORT Tree get_root(Tree t)
106 {
107 while (t->tp != Root) t = t->parent;
108 return t;
109 }
110
111
112 /* get_attrib -- return a ptr to the value of a named attibute, or false */
get_attrib(const Node * e,const conststring attname)113 EXPORT conststring get_attrib(const Node *e, const conststring attname)
114 {
115 assert(e->tp == Element);
116 return pairlist_get(e->attribs, attname);
117 }
118
119
120 /* set_attrib -- set an attribute to a value */
set_attrib(Node * e,string name,conststring value)121 EXPORT void set_attrib(Node *e, string name, conststring value)
122 {
123 assert(e->tp == Element);
124 pairlist_set(&e->attribs, name, value);
125 }
126
127 /* delete_attrib -- remove attribute from element, false if not found */
delete_attrib(Node * e,const conststring name)128 EXPORT bool delete_attrib(Node *e, const conststring name)
129 {
130 assert(e->tp == Element);
131 return pairlist_unset(&e->attribs, name);
132 }
133
134 /* get_by_id -- recursively get the element node with the id in subtree n */
get_by_id(Node * n,const conststring id)135 static Tree get_by_id(Node *n, const conststring id)
136 {
137 conststring val;
138 Tree h, p;
139
140 assert(n->tp == Element);
141 if ((val = get_attrib(n, "id")) && eq(val, id)) return n;
142 for (h = n->children; h; h = h->sister)
143 if (h->tp == Element && (p = get_by_id(h, id))) return p;
144 return NULL;
145 }
146
147 /* get_elt_by_id -- get the element node with the ID attribute id */
get_elt_by_id(Node * n,const conststring id)148 EXPORT Tree get_elt_by_id(Node *n, const conststring id)
149 {
150 /* To do: should this use a hash table? */
151 Tree h, p;
152
153 h = get_root(n);
154 for (h = h->children; h; h = h->sister)
155 if (h->tp == Element && (p = get_by_id(h, id))) return p;
156 return NULL;
157 }
158
159 /* wrap_contents -- wrap contents of a node in an element, return new elt */
wrap_contents(Node * n,const string elem,pairlist attr)160 EXPORT Tree wrap_contents(Node *n, const string elem, pairlist attr)
161 {
162 Node *h, *k;
163
164 new(h);
165 h->tp = Element;
166 h->name = newstring(elem);
167 h->attribs = attr;
168 h->sister = NULL;
169 h->parent = n;
170 h->children = n->children;
171 n->children = h;
172 for (k = h->children; k; k = k->sister) k->parent = h;
173 return h;
174 }
175
176 /* wrap_elt -- wrap an element in a new element, return the new element */
wrap_elt(Node * n,const conststring elem,pairlist attr)177 EXPORT Tree wrap_elt(Node *n, const conststring elem, pairlist attr)
178 {
179 Node *h, *k;
180
181 new(h);
182 h->tp = Element;
183 h->name = newstring(elem);
184 h->attribs = attr;
185 h->sister = n->sister;
186 h->parent = n->parent;
187 h->children = n;
188 n->sister = NULL;
189 n->parent = h;
190 if (h->parent->children == n) {
191 h->parent->children = h;
192 } else {
193 k = h->parent->children;
194 while (k->sister != n) {assert(k->sister->sister); k = k->sister;}
195 k->sister = h;
196 }
197 return h;
198 }
199
200 /* rename_elt -- change the name of an element to elem */
rename_elt(Node * n,const string elem)201 EXPORT void rename_elt(Node *n, const string elem)
202 {
203 assert(n->tp == Element);
204 n->name = newstring(elem);
205 }
206
207 /* push -- add a child node to the tree */
push(Tree t,Node * n)208 static Tree push(Tree t, Node *n)
209 {
210 if (t->children == NULL) {
211 t->children = n;
212 } else {
213 Tree h = t->children;
214 while (h->sister != NULL) h = h->sister;
215 h->sister = n;
216 }
217 n->parent = t;
218 return n;
219 }
220
221 /* pop -- go up one level */
pop(Tree t)222 static Tree pop(Tree t)
223 {
224 assert(t != NULL);
225 assert(t->tp != Root);
226 return t->parent;
227 }
228
229 /* append -- add at end of children */
append(Tree t,Node * n)230 static Tree append(Tree t, Node *n)
231 {
232 assert(t != NULL);
233 if (t->children == NULL) {
234 t->children = n;
235 } else {
236 Tree h = t->children;
237 while (h->sister != NULL) h = h->sister;
238 h->sister = n;
239 }
240 n->parent = t;
241 return t;
242 }
243
244 /* lookup -- lookup info about an element case-insensitively */
lookup(const string e)245 static const ElementType *lookup(const string e)
246 {
247 char h[MAXNAMELEN+2];
248 strncpy(h, e, sizeof(h) - 1);
249 h[sizeof(h)-1] = '\0';
250 down(h);
251 return lookup_element(h, strlen(h));
252 }
253
254 /* is_known -- true if the element is an HTML 4 element */
is_known(const string e)255 EXPORT bool is_known(const string e)
256 {
257 return lookup(e) != NULL;
258 }
259
260 /* is_pre -- true if the element has preformatted content */
is_pre(const string e)261 EXPORT bool is_pre(const string e)
262 {
263 const ElementType *info = lookup(e);
264 return info && info->pre;
265 }
266
267 /* need_stag -- true if the element's start tag is required */
need_stag(const string e)268 EXPORT bool need_stag(const string e)
269 {
270 const ElementType *info = lookup(e);
271 return !info || info->stag;
272 }
273
274 /* need_etag -- true if the element's end tag is required */
need_etag(const string e)275 EXPORT bool need_etag(const string e)
276 {
277 const ElementType *info = lookup(e);
278 return !info || info->etag;
279 }
280
281 /* is_empty -- true if element is empty */
is_empty(const string e)282 EXPORT bool is_empty(const string e)
283 {
284 const ElementType *info = lookup(e);
285 return info && info->empty;
286 }
287
288 /* has_parent -- true if c accepts p as a parent */
has_parent(const string c,const string p)289 EXPORT bool has_parent(const string c, const string p)
290 {
291 const ElementType *info = lookup_element(c, strlen(c));
292 int i;
293 if (!info) return false;
294 for (i = 0; info->parents[i]; i++)
295 if (eq(info->parents[i], p)) return true;
296 return false;
297 }
298
299 /* preferred_parent -- return first possible parent of e */
preferred_parent(const string e)300 static string preferred_parent(const string e)
301 {
302 const ElementType *info = lookup_element(e, strlen(e));
303 assert(info != NULL); /* element is known */
304 assert(info->parents[0] != NULL); /* element is not root */
305 return info->parents[0];
306 }
307
308 /* is_root -- true if e has no possible parents */
is_root(const string e)309 static bool is_root(const string e)
310 {
311 const ElementType *info = lookup_element(e, strlen(e));
312 assert(info != NULL); /* element is known */
313 return info->parents[0] == NULL;
314 }
315
316 /* is_mixed -- true if e accepts text content */
is_mixed(const string e)317 EXPORT bool is_mixed(const string e)
318 {
319 const ElementType *info = lookup(e);
320 return !info || info->mixed;
321 }
322
323 /* break_before -- true if element looks better with a newline before it */
break_before(const string e)324 EXPORT bool break_before(const string e)
325 {
326 const ElementType *info = lookup(e);
327 return info && info->break_before;
328 }
329
330 /* break_after -- true if element looks better with a newline after it */
break_after(const string e)331 EXPORT bool break_after(const string e)
332 {
333 const ElementType *info = lookup(e);
334 return info && info->break_after;
335 }
336
337 /* is_cdata_elt -- true if element has character data content */
is_cdata_elt(const string e)338 EXPORT bool is_cdata_elt(const string e)
339 {
340 const ElementType *info = lookup(e);
341 return info && info->cdata;
342 }
343
344 /* build_path -- try to add omittable start tags to make elem acceptable */
build_path(Tree * t,string elem)345 static bool build_path(Tree *t, string elem)
346 {
347 const ElementType *info;
348 Node *n;
349 int i;
350
351 assert(is_known(elem));
352 assert(is_known((*t)->name));
353
354 /* Check if we are done */
355 if (has_parent(elem, (*t)->name)) return true;
356
357 /* Try recursively if any possible parent can be a child of t */
358 info = lookup(elem);
359 for (i = 0; info->parents[i]; i++) {
360 if (!need_stag(info->parents[i]) && build_path(t, info->parents[i])) {
361 /* Success, so add this parent and return true */
362 n = malloc(sizeof(*n));
363 assert(n != NULL);
364 n->tp = Element;
365 n->name = newstring(info->parents[i]);
366 assert(islower(n->name[0]));
367 n->attribs = NULL;
368 n->sister = n->children = NULL;
369 *t = push(*t, n);
370 return true;
371 }
372 }
373 return false;
374 }
375
376 /* tree_push -- add an element to the tree, without checking the DTD */
tree_push(Tree t,string elem,pairlist attr)377 EXPORT Tree tree_push(Tree t, string elem, pairlist attr)
378 {
379 Node *n;
380 new(n);
381 n->tp = Element;
382 n->name = newstring(elem);
383 n->attribs = attr;
384 n->sister = n->children = NULL;
385 return push(t, n);
386 }
387
388 /* html_push -- add an element to the tree, open or close missing elements */
html_push(Tree t,string elem,pairlist attr)389 EXPORT Tree html_push(Tree t, string elem, pairlist attr)
390 {
391 pairlist a;
392 Node *h, *n = malloc(sizeof(*n));
393 assert(n != NULL);
394 n->tp = Element;
395 n->name = down(newstring(elem));
396 for (a = attr; a; a = a->next) down(a->name);
397 n->attribs = attr;
398 n->sister = n->children = NULL;
399
400 /* Unknown elements are just pushed where they are */
401 if (!is_known(n->name)) return push(t, n);
402
403 if (is_root(n->name)) {
404 while (t->tp != Root) t = pop(t); /* Make sure root is at root */
405 } else if (is_known(t->name) && build_path(&t, n->name)) {
406 ; /* Added missing start tags */
407 } else {
408 /* Check if there is a possible parent further up the tree */
409 for (h=t; h->tp!=Root && is_known(h->name) && !has_parent(n->name,h->name);
410 h = h->parent) ;
411 /* Close omitted end tags */
412 if (h->tp != Root) while (t != h) t = pop(t);
413 /* If no valid parent, fabricate one */
414 if (t->tp == Root || (is_known(t->name) && !has_parent(n->name, t->name)))
415 t = html_push(t, preferred_parent(n->name), NULL);
416 }
417 t = push(t, n);
418
419 if (is_empty(n->name)) t = pop(t);
420 if (is_cdata_elt(n->name)) set_cdata_element(n->name); /* Change scanner */
421 return t;
422 }
423
424 /* tree_pop -- close an open element, without checking the DTD */
tree_pop(Tree t,string elem)425 EXPORT Tree tree_pop(Tree t, string elem)
426 {
427 assert(t != NULL);
428 if (t->tp == Root) errexit("End tag </%s> without matching start tag\n", elem);
429 assert(t->tp == Element);
430 if (*elem == '\0') return pop(t); /* Empty end tag </> */
431 if (eq(t->name, elem)) return pop(t);
432 errexit("End tag </%s> doesn't match start tag <%s>\n", elem, t->name);
433 return NULL; /* Keep lint happy */
434 }
435
436 /* html_pop -- close an open element */
html_pop(Tree t,string elem)437 EXPORT Tree html_pop(Tree t, string elem)
438 {
439 Tree h = t;
440 assert(t != NULL);
441 down(elem);
442 if (*elem == '\0') { /* </> */
443 if (t->tp != Root) t = pop(t);
444 } else { /* </name> */
445 for (h = t; h->tp != Root && !eq(h->name, elem); h = h->parent) ;
446 if (h->tp != Root) { /* Found open element */
447 while (t != h) t = pop(t);
448 t = pop(t);
449 }
450 }
451 return t;
452 }
453
454 /* append_comment -- add a comment to the tree */
append_comment(Tree t,string comment)455 EXPORT Tree append_comment(Tree t, string comment)
456 {
457 Node *n = malloc(sizeof(*n));
458 assert(n != NULL);
459 n->tp = Comment;
460 n->text = comment;
461 n->sister = n->children = NULL;
462 return append(t, n);
463 }
464
465 /* append_declaration -- add a declaration to the tree */
append_declaration(Tree t,string gi,string fpi,string url)466 EXPORT Tree append_declaration(Tree t, string gi,
467 string fpi, string url)
468 {
469 Node *n = malloc(sizeof(*n));
470 assert(n != NULL);
471 n->tp = Declaration;
472 n->name = down(gi);
473 n->text = fpi;
474 n->url = url;
475 n->sister = n->children = NULL;
476 return append(t, n);
477 }
478
479 /* append_procins -- append a processing instruction */
append_procins(Tree t,string procins)480 EXPORT Tree append_procins(Tree t, string procins)
481 {
482 Node *n = malloc(sizeof(*n));
483 assert(n != NULL);
484 n->tp = Procins;
485 n->text = procins;
486 n->sister = n->children = NULL;
487 return append(t, n);
488 }
489
490 /* tree_append_text -- append a text chunk, without checking the DTD */
tree_append_text(Tree t,string text)491 EXPORT Tree tree_append_text(Tree t, string text)
492 {
493 Node *n;
494
495 assert(text);
496 new(n);
497 n->tp = Text;
498 n->text = text;
499 n->sister = n->children = NULL;
500 return append(t, n);
501 }
502
503 /* append_text -- append a text chunk to the document tree */
append_text(Tree t,string text)504 EXPORT Tree append_text(Tree t, string text)
505 {
506 Node *n;
507 string new_parent;
508
509 if (only_space(text) && (t->tp == Root || !is_mixed(t->name))) {
510 /* Drop text, since it is non-significant whitespace */
511 return t;
512 }
513 if (t->tp == Root || !is_mixed(t->name)) {
514 /* Need heuristics to make a valid tree */
515 new_parent = preferred_parent("%data");
516 /* Close omitted end tags until text or preferred parent fits */
517 while (t->tp != Root && !is_mixed(t->name) && !need_etag(t->name)
518 && !has_parent(new_parent, t->name))
519 t = pop(t);
520 /* Fabricate a parent if needed */
521 if (t->tp == Root || !is_mixed(t->name))
522 t = html_push(t, new_parent, NULL);
523 }
524 n = malloc(sizeof(*n));
525 assert(n != NULL);
526 n->tp = Text;
527 n->text = text;
528 assert(n->text != NULL);
529 n->sister = n->children = NULL;
530 return append(t, n);
531 }
532
dump2(Tree n,FILE * f)533 static void dump2(Tree n, FILE *f)
534 {
535 pairlist h;
536 Tree l;
537
538 switch (n->tp) {
539 case Text: fprintf(f, "%s", n->text); break;
540 case Comment: fprintf(f, "<!--%s-->", n->text); break;
541 case Declaration:
542 fprintf(f, "<!DOCTYPE %s", n->name);
543 if (n->text) fprintf(f, " PUBLIC \"%s\">", n->text);
544 if (n->url) fprintf(f, " %s\"%s\">", n->text ? "" : "SYSTEM ", n->url);
545 fprintf(f, ">");
546 break;
547 case Procins: fprintf(f, "<?%s>", n->text); break;
548 case Element:
549 fprintf(f, "<%s", n->name);
550 for (h = n->attribs; h != NULL; h = h->next) {
551 fprintf(f, " %s", h->name);
552 if (h->value != NULL) fprintf(f, "=\"%s\"", h->value);
553 }
554 if (is_empty(n->name)) {
555 assert(n->children == NULL);
556 fprintf(f, " />");
557 } else {
558 fprintf(f, ">");
559 for (l = n->children; l != NULL; l = l->sister) dump2(l, f);
560 fprintf(f, "</%s>", n->name);
561 }
562 break;
563 default:
564 assert(!"Cannot happen");
565 }
566 }
567
568 /* dumptree -- write out the tree below t (t's children, not t itself)*/
dumptree(Tree t,FILE * f)569 EXPORT void dumptree(Tree t, FILE *f)
570 {
571 Tree h;
572
573 for (h = t->children; h != NULL; h = h->sister) dump2(h, f);
574 }
575