1 
2 #ifndef EL_DOM_NODE_H
3 #define EL_DOM_NODE_H
4 
5 #include "dom/string.h"
6 #include "util/hash.h"
7 
8 struct dom_node_list;
9 
10 enum dom_node_type {
11 	DOM_NODE_UNKNOWN		=  0, /* for internal purpose only */
12 
13 	DOM_NODE_ELEMENT		=  1,
14 	DOM_NODE_ATTRIBUTE		=  2,
15 	DOM_NODE_TEXT			=  3,
16 	DOM_NODE_CDATA_SECTION		=  4,
17 	DOM_NODE_ENTITY_REFERENCE	=  5,
18 	DOM_NODE_ENTITY			=  6,
19 	DOM_NODE_PROCESSING_INSTRUCTION	=  7,
20 	DOM_NODE_COMMENT		=  8,
21 	DOM_NODE_DOCUMENT		=  9,
22 	DOM_NODE_DOCUMENT_TYPE		= 10,
23 	DOM_NODE_DOCUMENT_FRAGMENT	= 11,
24 	DOM_NODE_NOTATION		= 12,
25 
26 	DOM_NODES
27 };
28 
29 /* Following is the node specific datastructures. They may contain no more
30  * than 3 pointers or something equivalent. */
31 
32 struct dom_node_id_item {
33 	/* The attibute node containing the id value */
34 	struct dom_node *id_attribute;
35 
36 	/* The node with the @id attribute */
37 	struct dom_node *node;
38 };
39 
40 struct dom_document_node {
41 	/* The document URI is stored in the string / length members. */
42 	/* An id to node hash for fast lookup. */
43 	struct hash *element_ids; /* -> {struct dom_node_id_item} */
44 
45 	/* Any meta data the root node carries such as document type nodes,
46 	 * entity and notation map nodes and maybe some internal CSS stylesheet
47 	 * node. */
48 	struct dom_node_list *meta_nodes;
49 
50 	/* The child nodes. May be NULL. Ordered like they where inserted. */
51 	struct dom_node_list *children;
52 };
53 
54 struct dom_id {
55 	struct dom_string public_id;
56 	struct dom_string system_id;
57 };
58 
59 struct dom_doctype_subset_info {
60 	struct dom_string internal;
61 	struct dom_id external;
62 };
63 
64 struct dom_document_type_node {
65 	/* These are really maps and should be sorted alphabetically. */
66 	struct dom_node_list *entities;
67 	struct dom_node_list *notations;
68 
69 	/* The string/length members of dom_node hold the name of the document
70 	 * type "<!DOCTYPE {name} ...>". This holds the ids for the external
71 	 * subset and the string of the internal subset. */
72 	struct dom_doctype_subset_infot *subset;
73 };
74 
75 /* Element nodes are indexed nodes stored in node lists of either
76  * other child nodes or the root node. */
77 struct dom_element_node {
78 	/* The child nodes. May be NULL. Ordered like they where inserted. */
79 	struct dom_node_list *children;
80 
81 	/* Only element nodes can have attributes and element nodes can only be
82 	 * child nodes so the map is put here.
83 	 *
84 	 * The @map may be NULL if there are none. The @map nodes are sorted
85 	 * alphabetically according to the attributes name so it has fast
86 	 * lookup. */
87 	struct dom_node_list *map;
88 
89 	/* For <xsl:stylesheet ...> elements this holds the offset of
90 	 * 'stylesheet' */
91 	uint16_t namespace_offset;
92 
93 	/* Special implementation dependent type specifier for example
94 	 * containing an enum value representing the element to reduce string
95 	 * comparing and only do one fast find mapping. */
96 	uint16_t type;
97 };
98 
99 /* Attribute nodes are named nodes stored in a node map of an element node. */
100 struct dom_attribute_node {
101 	/* The string that hold the attribute value. The @string / @length
102 	 * members of {struct dom_node} holds the name that identifies the node
103 	 * in the map. */
104 	struct dom_string value;
105 
106 	/* For xml:lang="en" attributes this holds the offset of 'lang' */
107 	uint16_t namespace_offset;
108 
109 	/* Special implementation dependent type specifier. For HTML it (will)
110 	 * contain an enum value representing the attribute HTML_CLASS, HTML_ID etc.
111 	 * to reduce string comparing and only do one fast find mapping. */
112 	uint16_t type;
113 
114 	/* Was the attribute specified in the DTD as a default attribute or was
115 	 * it added from the document source. */
116 	unsigned int specified:1;
117 
118 	/* Was the node->string allocated */
119 	unsigned int allocated:1;
120 
121 	/* Has the node->string been converted to internal charset. */
122 	unsigned int converted:1;
123 
124 	/* Is the attribute a unique identifier meaning the owner (element)
125 	 * should be added to the document nodes @element_id hash. */
126 	unsigned int id:1;
127 
128 	/* The attribute value references some other resource */
129 	unsigned int reference:1;
130 
131 	/* The attribute value is delimited by quotes */
132 	unsigned int quoted:1;
133 };
134 
135 struct dom_text_node {
136 	/* The number of newlines the text string contains */
137 	unsigned int newlines;
138 
139 	/* We will need to add text nodes even if they contain only whitespace.
140 	 * In order to quickly identify such nodes this member is used. */
141 	unsigned int only_space:1;
142 
143 	/* Was the node->string allocated */
144 	unsigned int allocated:1;
145 
146 	/* Has the node->string been converted to internal charset. */
147 	unsigned int converted:1;
148 };
149 
150 enum dom_proc_instruction_type {
151 	DOM_PROC_INSTRUCTION,
152 
153 	/* Keep this group sorted */
154 	DOM_PROC_INSTRUCTION_DBHTML,	/* DocBook toolchain instruction */
155 	DOM_PROC_INSTRUCTION_ELINKS,	/* Internal instruction hook */
156 	DOM_PROC_INSTRUCTION_XML,	/* XML instructions */
157 
158 	DOM_PROC_INSTRUCTION_TYPES
159 };
160 
161 struct dom_proc_instruction_node {
162 	/* The target of the processing instruction (xml for '<?xml ...  ?>')
163 	 * is in the @string / @length members. */
164 	/* This holds the value to be processed */
165 	struct dom_string instruction;
166 
167 	/* For fast checking of the target type */
168 	uint16_t type; /* enum dom_proc_instruction_type */
169 
170 	/* For some processing instructions like xml the instructions contain
171 	 * attributes and those attribute can be collected in this @map. */
172 	struct dom_node_list *map;
173 };
174 
175 union dom_node_data {
176 	struct dom_document_node	 document;
177 	struct dom_document_type_node	 document_type;
178 	struct dom_element_node		 element;
179 	struct dom_attribute_node	 attribute;
180 	struct dom_text_node		 text;
181 	struct dom_id			 notation;
182 	/* For entities string/length hold the notation name */
183 	struct dom_id			 entity;
184 	struct dom_proc_instruction_node proc_instruction;
185 
186 	/* Node types without a union member yet
187 	 *
188 	 * DOM_NODE_CDATA_SECTION,
189 	 * DOM_NODE_COMMENT,
190 	 * DOM_NODE_DOCUMENT_FRAGMENT,
191 	 * DOM_NODE_ENTITY_REFERENCE,
192 	 */
193 };
194 
195 /* This structure is size critical so keep ordering to make it easier to pack
196  * and avoid unneeded members. */
197 struct dom_node {
198 	/* The type of the node */
199 	uint16_t type; /* -> enum dom_node_type */
200 
201 	/* Can contain either stuff like element name or for attributes the
202 	 * attribute name. */
203 	struct dom_string string;
204 
205 	struct dom_node *parent;
206 
207 	/* Various info depending on the type of the node. */
208 	union dom_node_data data;
209 };
210 
211 /* A node list can be used for storing indexed nodes */
212 struct dom_node_list {
213 	size_t size;
214 	struct dom_node *entries[1];
215 };
216 
217 #define foreach_dom_node(list, node, i)			\
218 	for ((i) = 0; (i) < (list)->size; (i)++)	\
219 		if (((node) = (list)->entries[(i)]))
220 
221 #define foreachback_dom_node(list, node, i)		\
222 	for ((i) = (list)->size - 1; (i) > 0; (i)--)	\
223 		if (((node) = (list)->entries[(i)]))
224 
225 #define is_dom_node_list_member(list, member)		\
226 	((list) && 0 <= (member) && (member) < (list)->size)
227 
228 /* Adds @node to the list pointed to by @list_ptr at the given @position. If
229  * @position is -1 the node is added at the end. */
230 struct dom_node_list *
231 add_to_dom_node_list(struct dom_node_list **list_ptr,
232 		     struct dom_node *node, int position);
233 
234 void done_dom_node_list(struct dom_node_list *list);
235 
236 /* Returns the position or index where the @node has been inserted into the
237  * 'default' list of the @parent node. (Default means use get_dom_node_list()
238  * to acquire the list to search in. Returns -1, if the node is not found. */
239 int get_dom_node_list_index(struct dom_node *parent, struct dom_node *node);
240 
241 /* Returns the position or index where the @node should be inserted into the
242  * node @list in order to the list to be alphabetically sorted.  Assumes that
243  * @list is already sorted properly. */
244 int get_dom_node_map_index(struct dom_node_list *list, struct dom_node *node);
245 
246 /* Looks up the @node_map for a node matching the requested type and name.
247  * The @subtype maybe be 0 indication unknown subtype and only name should be
248  * tested else it will indicate either the element or attribute private
249  * subtype. */
250 struct dom_node *
251 get_dom_node_map_entry(struct dom_node_list *node_map,
252 		       enum dom_node_type type, uint16_t subtype,
253 		       struct dom_string *name);
254 
255 struct dom_node *
256 init_dom_node_(unsigned char *file, int line,
257 		struct dom_node *parent, enum dom_node_type type,
258 		struct dom_string *string);
259 #define init_dom_node(type, string) init_dom_node_(__FILE__, __LINE__, NULL, type, string)
260 #define add_dom_node(parent, type, string) init_dom_node_(__FILE__, __LINE__, parent, type, string)
261 
262 #define add_dom_element(parent, string) \
263 	add_dom_node(parent, DOM_NODE_ELEMENT, string)
264 
265 static inline struct dom_node *
add_dom_attribute(struct dom_node * parent,struct dom_string * name,struct dom_string * value)266 add_dom_attribute(struct dom_node *parent, struct dom_string *name,
267 		  struct dom_string *value)
268 {
269 	struct dom_node *node = add_dom_node(parent, DOM_NODE_ATTRIBUTE, name);
270 
271 	if (node && value) {
272 		copy_dom_string(&node->data.attribute.value, value);
273 	}
274 
275 	return node;
276 }
277 
278 static inline struct dom_node *
add_dom_proc_instruction(struct dom_node * parent,struct dom_string * string,struct dom_string * instruction)279 add_dom_proc_instruction(struct dom_node *parent, struct dom_string *string,
280 			 struct dom_string *instruction)
281 {
282 	struct dom_node *node = add_dom_node(parent, DOM_NODE_PROCESSING_INSTRUCTION, string);
283 
284 	if (node && instruction) {
285 		copy_dom_string(&node->data.proc_instruction.instruction, instruction);
286 	}
287 
288 	return node;
289 }
290 
291 /* Removes the node and all its children and free()s itself */
292 void done_dom_node(struct dom_node *node);
293 
294 /* Compare two nodes returning non-zero if they differ. */
295 int dom_node_casecmp(struct dom_node *node1, struct dom_node *node2);
296 
297 /* Returns the name of the node in an allocated string. */
298 struct dom_string *get_dom_node_name(struct dom_node *node);
299 
300 /* Returns the value of the node or NULL if no value is defined for the node
301  * type. */
302 struct dom_string *get_dom_node_value(struct dom_node *node);
303 
304 /* Returns the name used for identifying the node type. */
305 struct dom_string *get_dom_node_type_name(enum dom_node_type type);
306 
307 /* Based on the type of the parent and the node return a proper list
308  * or NULL. This is useful when adding a node to a parent node. */
309 static inline struct dom_node_list **
get_dom_node_list(struct dom_node * parent,struct dom_node * node)310 get_dom_node_list(struct dom_node *parent, struct dom_node *node)
311 {
312 	switch (parent->type) {
313 	case DOM_NODE_DOCUMENT:
314 		return &parent->data.document.children;
315 
316 	case DOM_NODE_ELEMENT:
317 		switch (node->type) {
318 		case DOM_NODE_ATTRIBUTE:
319 			return &parent->data.element.map;
320 
321 		default:
322 			return &parent->data.element.children;
323 		}
324 
325 	case DOM_NODE_DOCUMENT_TYPE:
326 		switch (node->type) {
327 		case DOM_NODE_ENTITY:
328 			return &parent->data.document_type.entities;
329 
330 		case DOM_NODE_NOTATION:
331 			return &parent->data.document_type.notations;
332 
333 		default:
334 			return NULL;
335 		}
336 
337 	case DOM_NODE_PROCESSING_INSTRUCTION:
338 		switch (node->type) {
339 		case DOM_NODE_ATTRIBUTE:
340 			return &parent->data.proc_instruction.map;
341 
342 		default:
343 			return NULL;
344 		}
345 
346 	default:
347 		return NULL;
348 	}
349 }
350 
351 #endif
352