1 #include "EXTERN.h"
2 #include "perl.h"
3 #include "XSUB.h"
4 
5 #define NEED_newRV_noinc
6 #define NEED_sv_2pv_flags
7 #include "ppport.h"
8 #include "utils.h"
9 
10 //#define DOM_GC_TRACE(msg, ...) fprintf(stderr, "[GC] " msg "\n", ##__VA_ARGS__);
11 #define DOM_GC_TRACE(...)
12 
13 // HACK: sv_derived_from_pvn faster than sv_derived_from
14 #if PERL_BCDVERSION > 0x5015004
15 	#undef sv_derived_from
16 	#define sv_derived_from(sv, name) sv_derived_from_pvn(sv, name, sizeof(name) - 1, 0)
17 #else
18 	#define sv_derived_from_pvn(sv, name, len) sv_derived_from(sv, name)
19 #endif
20 
21 // HACK: support older perl <5.6 (why not :D)
22 #if PERL_BCDVERSION < 0x5006000
23 	#define SvUTF8(x) (0)
24 	#define SvUTF8_on(x)
25 	#define SvUTF8_off(x)
26 #endif
27 
28 #define sub_croak(cv, msg, ...) do { \
29 	const GV *const __gv = CvGV(cv); \
30 	if (__gv) { \
31 		const char *__gvname = GvNAME(__gv); \
32 		const HV *__stash = GvSTASH(__gv); \
33 		const char *__hvname = __stash ? HvNAME(__stash) : NULL; \
34 		croak("%s%s%s(): " msg, __hvname ? __hvname : __gvname, __hvname ? "::" : "", __hvname ? __gvname : "", ##__VA_ARGS__); \
35 	} \
36 } while (0);
37 
38 typedef html5_dom_parser_t *			HTML5__DOM;
39 typedef myhtml_collection_t *			HTML5__DOM__Collection;
40 typedef myhtml_tree_node_t *			HTML5__DOM__Node;
41 typedef myhtml_tree_node_t *			HTML5__DOM__Element;
42 typedef myhtml_tree_node_t *			HTML5__DOM__Text;
43 typedef myhtml_tree_node_t *			HTML5__DOM__Comment;
44 typedef myhtml_tree_node_t *			HTML5__DOM__Document;
45 typedef myhtml_tree_node_t *			HTML5__DOM__Fragment;
46 typedef myhtml_tree_node_t *			HTML5__DOM__DocType;
47 typedef html5_dom_tree_t *				HTML5__DOM__Tree;
48 typedef html5_css_parser_t *			HTML5__DOM__CSS;
49 typedef html5_css_selector_t *			HTML5__DOM__CSS__Selector;
50 typedef html5_css_selector_entry_t *	HTML5__DOM__CSS__Selector__Entry;
51 typedef html5_dom_async_result *		HTML5__DOM__AsyncResult;
52 
sv_serialization_callback(const char * data,size_t len,void * ctx)53 static mystatus_t sv_serialization_callback(const char *data, size_t len, void *ctx) {
54 	sv_catpvn((SV *) ctx, data, len);
55 	return MyCORE_STATUS_OK;
56 }
57 
pack_pointer(const char * clazz,void * ptr)58 static inline SV *pack_pointer(const char *clazz, void *ptr) {
59 	SV *sv = newSV(0);
60 	sv_setref_pv(sv, clazz, ptr);
61 	return sv;
62 }
63 
html5_dom_recursive_node_text(myhtml_tree_node_t * node,SV * sv)64 static void html5_dom_recursive_node_text(myhtml_tree_node_t *node, SV *sv) {
65 	node = myhtml_node_child(node);
66 	while (node) {
67 		if (node->tag_id == MyHTML_TAG__TEXT) {
68 			size_t text_len = 0;
69 			const char *text = myhtml_node_text(node, &text_len);
70 			if (text_len)
71 				sv_catpvn(sv, text, text_len);
72 		} else if (node_is_element(node)) {
73 			html5_dom_recursive_node_text(node, sv);
74 		}
75 		node = myhtml_node_next(node);
76 	}
77 }
78 
create_tree_object(myhtml_tree_t * tree,SV * parent,html5_dom_parser_t * parser,bool used,bool utf8)79 static SV *create_tree_object(myhtml_tree_t *tree, SV *parent, html5_dom_parser_t *parser, bool used, bool utf8) {
80 	html5_dom_tree_t *tree_obj = (html5_dom_tree_t *) tree->context;
81 
82 	if (tree_obj)
83 		return newRV(tree_obj->sv);
84 
85 	tree->context = safemalloc(sizeof(html5_dom_tree_t));
86 	tree_obj = (html5_dom_tree_t *) tree->context;
87 
88 	tree_obj->tree = tree;
89 	tree_obj->parent = parent;
90 	tree_obj->parser = parser;
91 	tree_obj->fragment_tag_id = MyHTML_TAG__UNDEF;
92 	tree_obj->used = used;
93 	tree_obj->utf8 = utf8;
94 
95 	SvREFCNT_inc(parent);
96 
97 	SV *sv = pack_pointer("HTML5::DOM::Tree", tree_obj);
98 	tree_obj->sv = SvRV(sv);
99 
100 	DOM_GC_TRACE("DOM::Tree::NEW (refcnt=%d)", SvREFCNT(sv));
101 
102 	return sv;
103 }
104 
get_node_class(myhtml_tree_node_t * node)105 static inline const char *get_node_class(myhtml_tree_node_t *node) {
106 	html5_dom_tree_t *context = (html5_dom_tree_t *) node->tree->context;
107 
108 	if (node->tag_id != MyHTML_TAG__UNDEF) {
109 		if (node->tag_id == MyHTML_TAG__TEXT) {
110 			return "HTML5::DOM::Text";
111 		} else if (node->tag_id == MyHTML_TAG__COMMENT) {
112 			return "HTML5::DOM::Comment";
113 		} else if (node->tag_id == MyHTML_TAG__DOCTYPE) {
114 			return "HTML5::DOM::DocType";
115 		} else if (context->fragment_tag_id && node->tag_id == context->fragment_tag_id) {
116 			return "HTML5::DOM::Fragment";
117 		}
118 		return "HTML5::DOM::Element";
119 	}
120 
121 	// Modest myhtml bug - document node has tag_id == MyHTML_TAG__UNDEF
122 	if (node_is_document(node))
123 		return "HTML5::DOM::Document";
124 
125 	return "HTML5::DOM::Node";
126 }
127 
newSVpv_utf8_auto(myhtml_tree_t * tree,const char * value,STRLEN length)128 static inline SV *newSVpv_utf8_auto(myhtml_tree_t *tree, const char *value, STRLEN length) {
129 	html5_dom_tree_t *context = (html5_dom_tree_t *) tree->context;
130 	if (!context || !context->utf8) {
131 		return newSVpv(value, length);
132 	} else {
133 		SV *sv = newSVpv(value, length);
134 		SvUTF8_on(sv);
135 		return sv;
136 	}
137 }
138 
newSVpv_utf8_auto_css(html5_css_selector_t * selector,const char * value,STRLEN length)139 static inline SV *newSVpv_utf8_auto_css(html5_css_selector_t *selector, const char *value, STRLEN length) {
140 	if (!selector || !selector->utf8) {
141 		return newSVpv(value, length);
142 	} else {
143 		SV *sv = newSVpv(value, length);
144 		SvUTF8_on(sv);
145 		return sv;
146 	}
147 }
148 
tree_to_sv(myhtml_tree_t * tree)149 static SV *tree_to_sv(myhtml_tree_t *tree) {
150 	html5_dom_tree_t *context = (html5_dom_tree_t *) tree->context;
151 	return newRV(context->sv);
152 }
153 
myhtml_to_sv(myhtml_tree_t * tree)154 static SV *myhtml_to_sv(myhtml_tree_t *tree) {
155 	html5_dom_tree_t *context = (html5_dom_tree_t *) tree->context;
156 	return newRV(context->parent);
157 }
158 
node_to_sv(myhtml_tree_node_t * node)159 static SV *node_to_sv(myhtml_tree_node_t *node) {
160 	if (!node)
161 		return &PL_sv_undef;
162 
163 	SV *sv = (SV *) myhtml_node_get_data(node);
164 	if (!sv) {
165 		SV *node_ref = pack_pointer(get_node_class(node), (void *) node);
166 		sv = SvRV(node_ref);
167 		myhtml_node_set_data(node, (void *) sv);
168 
169 		DOM_GC_TRACE("DOM::Node::NEW (new refcnt=%d)", SvREFCNT(sv));
170 
171 		html5_dom_tree_t *tree = (html5_dom_tree_t *) node->tree->context;
172 		SvREFCNT_inc(tree->sv);
173 
174 		return node_ref;
175 	} else {
176 		SV *node_ref = newRV(sv);
177 		DOM_GC_TRACE("DOM::Node::NEW (reuse refcnt=%d)", SvREFCNT(sv));
178 		return node_ref;
179 	}
180 }
181 
collection_to_blessed_array(myhtml_collection_t * collection)182 static SV *collection_to_blessed_array(myhtml_collection_t *collection) {
183 	AV *arr = newAV();
184 	if (collection) {
185 		for (int i = 0; i < collection->length; ++i)
186 			av_push(arr, node_to_sv(collection->list[i]));
187 	}
188 	return sv_bless(newRV_noinc((SV *) arr), gv_stashpv("HTML5::DOM::Collection", 0));
189 }
190 
sv_stringify(SV * sv)191 static SV *sv_stringify(SV *sv) {
192 	if (SvROK(sv)) {
193 		SV *tmp_sv = SvRV(sv);
194 		if (SvOBJECT(tmp_sv)) {
195 			HV *stash = SvSTASH(tmp_sv);
196 			GV *to_string = gv_fetchmethod_autoload(stash, "\x28\x22\x22", 0);
197 
198 			if (to_string) {
199 				dSP;
200 				ENTER; SAVETMPS; PUSHMARK(SP);
201 				XPUSHs(sv_bless(sv_2mortal(newRV_inc(tmp_sv)), stash));
202 				PUTBACK;
203 				call_sv((SV *) GvCV(to_string), G_SCALAR);
204 				SPAGAIN;
205 
206 				SV *new_sv = POPs;
207 
208 				PUTBACK;
209 				FREETMPS; LEAVE;
210 
211 				return new_sv;
212 			}
213 		}
214 	}
215 	return sv;
216 }
217 
html5_node_find(CV * cv,html5_dom_parser_t * parser,myhtml_tree_node_t * scope,SV * query,SV * combinator,bool one)218 static SV *html5_node_find(CV *cv, html5_dom_parser_t *parser, myhtml_tree_node_t *scope, SV *query, SV *combinator, bool one) {
219 	mystatus_t status;
220 	mycss_selectors_entries_list_t *list = NULL;
221 	size_t list_size = 0;
222 	mycss_selectors_list_t *selector = NULL;
223 	modest_finder_selector_combinator_f selector_func = modest_finder_node_combinator_descendant;
224 	SV *result = &PL_sv_undef;
225 
226 	// Custom combinator as args
227 	if (combinator) {
228 		query = sv_stringify(query);
229 
230 		STRLEN combo_len;
231 		const char *combo = SvPV_const(combinator, combo_len);
232 
233 		if (combo_len > 0)
234 			selector_func = html5_find_selector_func(combo, combo_len);
235 	}
236 
237 	if (SvROK(query)) {
238 		if (sv_derived_from(query, "HTML5::DOM::CSS::Selector")) { // Precompiler selectors
239 			html5_css_selector_t *selector = INT2PTR(html5_css_selector_t *, SvIV((SV*)SvRV(query)));
240 			list = selector->list->entries_list;
241 			list_size = selector->list->entries_list_length;
242 		} else if (sv_derived_from(query, "HTML5::DOM::CSS::Selector::Entry")) { // One precompiled selector
243 			html5_css_selector_entry_t *selector = INT2PTR(html5_css_selector_entry_t *, SvIV((SV*)SvRV(query)));
244 			list = selector->list;
245 			list_size = 1;
246 		} else {
247 			sub_croak(cv, "%s: %s is not of type %s or %s", "HTML5::DOM::Tree::find", "query", "HTML5::DOM::CSS::Selector", "HTML5::DOM::CSS::Selector::Entry");
248 		}
249 	} else {
250 		// String selector, compile it
251 		query = sv_stringify(query);
252 
253 		STRLEN query_len;
254 		const char *query_str = SvPV_const(query, query_len);
255 
256 		status = html5_dom_init_css(parser);
257 		if (status)
258 			sub_croak(cv, "mycss_init failed: %d (%s)", status, modest_strerror(status));
259 
260 		selector = html5_parse_selector(parser->mycss_entry, query_str, query_len, &status);
261 
262 		if (!selector)
263 			sub_croak(cv, "bad selector: %s", query_str);
264 
265 		list = selector->entries_list;
266 		list_size = selector->entries_list_length;
267 	}
268 
269 	if (one) { // search one element
270 		myhtml_tree_node_t *node = (myhtml_tree_node_t *) html5_node_finder(parser, selector_func, scope, list, list_size, &status, 1);
271 		result = node_to_sv(node);
272 	} else { // search multiple elements
273 		myhtml_collection_t *collection = (myhtml_collection_t *) html5_node_finder(parser, selector_func, scope, list, list_size, &status, 0);
274 		result = collection_to_blessed_array(collection);
275 		if (collection)
276 			myhtml_collection_destroy(collection);
277 	}
278 
279 	// destroy parsed selector
280 	if (selector)
281 		mycss_selectors_list_destroy(mycss_entry_selectors(parser->mycss_entry), selector, true);
282 
283 	return result;
284 }
285 
html5_node_simple_find(CV * cv,myhtml_tree_node_t * self,SV * key,SV * val,SV * cmp,bool icase,int ix)286 static SV *html5_node_simple_find(CV *cv, myhtml_tree_node_t *self, SV *key, SV *val, SV *cmp, bool icase, int ix) {
287 	if (!self)
288 		return collection_to_blessed_array(NULL);
289 
290 	SV *result = &PL_sv_undef;
291 	key = sv_stringify(key);
292 
293 	STRLEN key_len;
294 	const char *key_str = SvPV_const(key, key_len);
295 
296 	myhtml_collection_t *collection = NULL;
297 	switch (ix) {
298 		case 0: case 1: // tag name
299 			collection = myhtml_get_nodes_by_name_in_scope(self->tree, NULL, self, key_str, key_len, NULL);
300 			result = collection_to_blessed_array(collection);
301 		break;
302 		case 2: case 3: // class
303 			collection = myhtml_get_nodes_by_attribute_value_whitespace_separated(self->tree, NULL, self, false, "class", 5, key_str, key_len, NULL);
304 			result = collection_to_blessed_array(collection);
305 		break;
306 		case 4: case 5: // id (first)
307 			collection = myhtml_get_nodes_by_attribute_value(self->tree, NULL, self, false, "id", 2, key_str, key_len, NULL);
308 			if (collection && collection->length)
309 				result = node_to_sv(collection->list[0]);
310 		break;
311 		case 6: case 7: // attribute
312 			if (val) {
313 				STRLEN val_len;
314 				const char *val_str = SvPV_const(val, val_len);
315 
316 				char cmp_type = '=';
317 				if (cmp) {
318 					cmp = sv_stringify(cmp);
319 					STRLEN cmp_len;
320 					const char *cmp_str = SvPV_const(cmp, cmp_len);
321 
322 					if (cmp_len)
323 						cmp_type = cmp_str[0];
324 				}
325 
326 				if (cmp_type == '=') {
327 					// [key=val]
328 					collection = myhtml_get_nodes_by_attribute_value(self->tree, NULL, self, icase, key_str, key_len, val_str, val_len, NULL);
329 				} else if (cmp_type == '~') {
330 					// [key~=val]
331 					collection = myhtml_get_nodes_by_attribute_value_whitespace_separated(self->tree, NULL, self, icase, key_str, key_len, val_str, val_len, NULL);
332 				} else if (cmp_type == '^') {
333 					// [key^=val]
334 					collection = myhtml_get_nodes_by_attribute_value_begin(self->tree, NULL, self, icase, key_str, key_len, val_str, val_len, NULL);
335 				} else if (cmp_type == '$') {
336 					// [key$=val]
337 					collection = myhtml_get_nodes_by_attribute_value_end(self->tree, NULL, self, icase, key_str, key_len, val_str, val_len, NULL);
338 				} else if (cmp_type == '*') {
339 					// [key*=val]
340 					collection = myhtml_get_nodes_by_attribute_value_contain(self->tree, NULL, self, icase, key_str, key_len, val_str, val_len, NULL);
341 				} else if (cmp_type == '|') {
342 					// [key|=val]
343 					collection = myhtml_get_nodes_by_attribute_value_hyphen_separated(self->tree, NULL, self, icase, key_str, key_len, val_str, val_len, NULL);
344 				} else {
345 					sub_croak(cv, "unknown cmp type: %c", cmp_type);
346 				}
347 			} else {
348 				// [key]
349 				collection = myhtml_get_nodes_by_attribute_key(self->tree, NULL, self, key_str, key_len, NULL);
350 			}
351 			result = collection_to_blessed_array(collection);
352 		break;
353 	}
354 
355 	if (collection)
356 		myhtml_collection_destroy(collection);
357 
358 	return result;
359 }
360 
hv_get_int_value(HV * hv,const char * key,int length,long def)361 static long hv_get_int_value(HV *hv, const char *key, int length, long def) {
362 	if (hv) {
363 		SV **sv = hv_fetch(hv, key, length, 0);
364 		if (sv && *sv)
365 			return SvIV(*sv);
366 	}
367 	return def;
368 }
369 
hv_get_encoding_value(HV * hv,const char * key,int length,myencoding_t def)370 static myencoding_t hv_get_encoding_value(HV *hv, const char *key, int length, myencoding_t def) {
371 	if (hv) {
372 		SV **sv = hv_fetch(hv, key, length, 0);
373 		if (sv && *sv) {
374 			SV *encoding = sv_stringify(*sv);
375 
376 			STRLEN enc_length;
377 			const char *enc_str = SvPV_const(encoding, enc_length);
378 
379 			if (enc_length > 0) {
380 				myencoding_t enc_id;
381 				if (isdigit(enc_str[0])) { // May be encoding id
382 					enc_id = SvIV(encoding);
383 					if (enc_id == MyENCODING_AUTO || enc_id == MyENCODING_DEFAULT || enc_id == MyENCODING_NOT_DETERMINED)
384 						return enc_id;
385 					if (!myencoding_name_by_id(enc_id, NULL))
386 						return MyENCODING_NOT_DETERMINED;
387 				} else { // May be encoding name
388 					if (!myencoding_by_name(enc_str, enc_length, &enc_id)) {
389 						if (enc_length == 4 && strcasecmp(enc_str, "auto") == 0)
390 							return MyENCODING_AUTO;
391 						if (enc_length == 7 && strcasecmp(enc_str, "default") == 0)
392 							return MyENCODING_DEFAULT;
393 						return MyENCODING_NOT_DETERMINED;
394 					}
395 				}
396 				return enc_id;
397 			}
398 		}
399 	}
400 	return def;
401 }
402 
hv_get_utf8_value(HV * hv,const char * key,int length,int def)403 static int hv_get_utf8_value(HV *hv, const char *key, int length, int def) {
404 	if (hv) {
405 		SV **sv = hv_fetch(hv, key, length, 0);
406 		if (sv && *sv) {
407 			SV *encoding = sv_stringify(*sv);
408 
409 			STRLEN enc_length;
410 			const char *enc_str = SvPV_const(encoding, enc_length);
411 
412 			if (enc_length > 0) {
413 				if (isdigit(enc_str[0])) {
414 					return SvIV(encoding) != 0;
415 				} else if (length == 4 && strcasecmp(enc_str, "auto") == 0) {
416 					return 2;
417 				}
418 				return enc_length > 0;
419 			}
420 		}
421 	}
422 	return def;
423 }
424 
html5_dom_parse_options(html5_dom_options_t * opts,html5_dom_options_t * extend,HV * options)425 static void html5_dom_parse_options(html5_dom_options_t *opts, html5_dom_options_t *extend, HV *options) {
426 	opts->threads					= hv_get_int_value(options, "threads", 7, extend ? extend->threads : 0);
427 	opts->ignore_whitespace			= hv_get_int_value(options, "ignore_whitespace", 17, extend ? extend->ignore_whitespace : 0) > 0;
428 	opts->ignore_doctype			= hv_get_int_value(options, "ignore_doctype", 14, extend ? extend->ignore_doctype : 0) > 0;
429 	opts->scripts					= hv_get_int_value(options, "scripts", 7, extend ? extend->scripts : 0) > 0;
430 	opts->encoding					= hv_get_encoding_value(options, "encoding", 8, extend ? extend->encoding : MyENCODING_AUTO);
431 	opts->default_encoding			= hv_get_encoding_value(options, "default_encoding", 16, extend ? extend->default_encoding : MyENCODING_DEFAULT);
432 	opts->encoding_use_meta			= hv_get_int_value(options, "encoding_use_meta", 17, extend ? extend->encoding_use_meta : 1) > 0;
433 	opts->encoding_use_bom			= hv_get_int_value(options, "encoding_use_bom", 16, extend ? extend->encoding_use_bom : 1) > 0;
434 	opts->encoding_prescan_limit	= hv_get_int_value(options, "encoding_prescan_limit", 22, extend ? extend->encoding_prescan_limit : 1024);
435 	opts->utf8						= hv_get_utf8_value(options, "utf8", 4, extend ? extend->utf8 : 2);
436 
437 	#ifdef MyCORE_BUILD_WITHOUT_THREADS
438 		opts->threads = 0;
439 	#endif
440 }
441 
html5_dom_check_options(CV * cv,html5_dom_options_t * opts)442 static void html5_dom_check_options(CV *cv, html5_dom_options_t *opts) {
443 	if (opts->encoding == MyENCODING_NOT_DETERMINED)
444 		sub_croak(cv, "invalid encoding value");
445 	if (opts->default_encoding == MyENCODING_NOT_DETERMINED || opts->default_encoding == MyENCODING_AUTO)
446 		sub_croak(cv, "invalid default_encoding value");
447 	if (opts->threads < 0)
448 		sub_croak(cv, "invalid threads count");
449 	if (opts->encoding_prescan_limit < 0)
450 		sub_croak(cv, "invalid encoding_prescan_limit value");
451 }
452 
453 // selectors to AST serialization
454 static void html5_dom_css_serialize_entry(html5_css_selector_t *self, mycss_selectors_list_t *selector, mycss_selectors_entry_t *entry, AV *result);
455 
html5_dom_css_serialize_selector(html5_css_selector_t * self,mycss_selectors_list_t * selector,AV * result)456 static void html5_dom_css_serialize_selector(html5_css_selector_t *self, mycss_selectors_list_t *selector, AV *result) {
457 	while (selector) {
458 		for (size_t i = 0; i < selector->entries_list_length; ++i) {
459 			mycss_selectors_entries_list_t *entries = &selector->entries_list[i];
460 			AV *chain = newAV();
461 			html5_dom_css_serialize_entry(self, selector, entries->entry, chain);
462 			av_push(result, newRV_noinc((SV *) chain));
463 		}
464 		selector = selector->next;
465 	}
466 }
467 
html5_dom_css_serialize_entry(html5_css_selector_t * self,mycss_selectors_list_t * selector,mycss_selectors_entry_t * entry,AV * result)468 static void html5_dom_css_serialize_entry(html5_css_selector_t *self, mycss_selectors_list_t *selector, mycss_selectors_entry_t *entry, AV *result) {
469 	// combinators names
470 	static const struct {
471 		const char name[16];
472 		size_t len;
473 	} combinators[] = {
474 		{"", 0},
475 		{"descendant", 10},	// >>
476 		{"child", 5},		// >
477 		{"sibling", 7},		// +
478 		{"adjacent", 8},	// ~
479 		{"column", 6}		// ||
480 	};
481 
482 	// attribute eq names
483 	static const struct {
484 		const char name[16];
485 		size_t len;
486 	} attr_match_names[] = {
487 		{"equal", 5},		// =
488 		{"include", 7},		// ~=
489 		{"dash", 4},		// |=
490 		{"prefix", 6},		// ^=
491 		{"suffix", 6},		// $=
492 		{"substring", 9}	// *=
493 	};
494 
495 	while (entry) {
496 		if (entry->combinator != MyCSS_SELECTORS_COMBINATOR_UNDEF) {
497 			HV *data = newHV();
498 			hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "type", 4)), newSVpv_utf8_auto_css(self, "combinator", 10), 0);
499 			hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "value", 5)), newSVpv_utf8_auto_css(self, combinators[entry->combinator].name, combinators[entry->combinator].len), 0);
500 			av_push(result, newRV_noinc((SV *) data));
501 		}
502 
503 		HV *data = newHV();
504 
505 		if ((selector->flags) & MyCSS_SELECTORS_FLAGS_SELECTOR_BAD)
506 			hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "invalid", 7)), newSViv(1), 0);
507 
508 		switch (entry->type) {
509 			case MyCSS_SELECTORS_TYPE_ID:
510 			case MyCSS_SELECTORS_TYPE_CLASS:
511 			case MyCSS_SELECTORS_TYPE_ELEMENT:
512 			case MyCSS_SELECTORS_TYPE_PSEUDO_CLASS:
513 			case MyCSS_SELECTORS_TYPE_PSEUDO_ELEMENT:
514 			{
515 				switch (entry->type) {
516 					case MyCSS_SELECTORS_TYPE_ELEMENT:
517 						hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "type", 4)), newSVpv_utf8_auto_css(self, "tag", 3), 0);
518 					break;
519 					case MyCSS_SELECTORS_TYPE_ID:
520 						hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "type", 4)), newSVpv_utf8_auto_css(self, "id", 2), 0);
521 					break;
522 					case MyCSS_SELECTORS_TYPE_CLASS:
523 						hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "type", 4)), newSVpv_utf8_auto_css(self, "class", 5), 0);
524 					break;
525 					case MyCSS_SELECTORS_TYPE_PSEUDO_CLASS:
526 						hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "type", 4)), newSVpv_utf8_auto_css(self, "pseudo-class", 12), 0);
527 					break;
528 					case MyCSS_SELECTORS_TYPE_PSEUDO_ELEMENT:
529 						hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "type", 4)), newSVpv_utf8_auto_css(self, "pseudo-element", 14), 0);
530 					break;
531 				}
532 
533 				if (entry->key)
534 					hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "value", 5)), newSVpv_utf8_auto_css(self, entry->key->length ? entry->key->data : "", entry->key->length), 0);
535 			}
536 			break;
537 			case MyCSS_SELECTORS_TYPE_ATTRIBUTE:
538 			{
539 				hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "type", 4)), newSVpv_utf8_auto_css(self, "attribute", 9), 0);
540 
541 				/* key */
542 				if (entry->key)
543 					hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "name", 4)), newSVpv_utf8_auto_css(self, entry->key->length ? entry->key->data : "", entry->key->length), 0);
544 
545 				/* value */
546 				if (mycss_selector_value_attribute(entry->value)->value) {
547 					mycore_string_t *str_value = mycss_selector_value_attribute(entry->value)->value;
548 					hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "value", 5)), newSVpv_utf8_auto_css(self, str_value->length ? str_value->data : "", str_value->length), 0);
549 				} else {
550 					hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "value", 5)), newSVpv_utf8_auto_css(self, "", 0), 0);
551 				}
552 
553 				/* match */
554 				int match = mycss_selector_value_attribute(entry->value)->match;
555 				hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "match", 5)), newSVpv_utf8_auto_css(self, attr_match_names[match].name, attr_match_names[match].len), 0);
556 
557 				/* modificator */
558 				if (mycss_selector_value_attribute(entry->value)->mod & MyCSS_SELECTORS_MOD_I) {
559 					hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "ignoreCase", 10)), newSViv(1), 0);
560 				} else {
561 					hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "ignoreCase", 10)), newSViv(0), 0);
562 				}
563 			}
564 			break;
565 			case MyCSS_SELECTORS_TYPE_PSEUDO_CLASS_FUNCTION:
566 			{
567 				hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "type", 4)), newSVpv_utf8_auto_css(self, "function", 8), 0);
568 
569 				switch (entry->sub_type) {
570 					case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_CONTAINS:
571 					case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_HAS:
572 					case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_NOT:
573 					case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_MATCHES:
574 					case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_CURRENT:
575 					{
576 						switch (entry->sub_type) {
577 							case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_CONTAINS:
578 								hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "name", 4)), newSVpv_utf8_auto_css(self, "contains", 8), 0);
579 							break;
580 							case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_HAS:
581 								hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "name", 4)), newSVpv_utf8_auto_css(self, "has", 3), 0);
582 							break;
583 							case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_NOT:
584 								hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "name", 4)), newSVpv_utf8_auto_css(self, "not", 3), 0);
585 							break;
586 							case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_MATCHES:
587 								hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "name", 4)), newSVpv_utf8_auto_css(self, "matches", 7), 0);
588 							break;
589 							case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_CURRENT:
590 								hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "name", 4)), newSVpv_utf8_auto_css(self, "current", 7), 0);
591 							break;
592 						}
593 
594 						AV *value = newAV();
595 						html5_dom_css_serialize_selector(self, entry->value, value);
596 						hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "value", 5)), newRV_noinc((SV *) value), 0);
597 					}
598 					break;
599 
600 					case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_NTH_CHILD:
601 					case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_NTH_LAST_CHILD:
602 					case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_NTH_COLUMN:
603 					case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_NTH_LAST_COLUMN:
604 					case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_NTH_OF_TYPE:
605 					case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_NTH_LAST_OF_TYPE:
606 					{
607 						mycss_an_plus_b_entry_t *a_plus_b = mycss_selector_value_an_plus_b(entry->value);
608 						hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "name", 4)), newSVpv_utf8_auto_css(self, "nth-child", 9), 0);
609 						hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "a", 1)), newSViv(a_plus_b->a), 0);
610 						hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "b", 1)), newSViv(a_plus_b->b), 0);
611 
612 						if (a_plus_b->of) {
613 							AV *of = newAV();
614 							html5_dom_css_serialize_selector(self, a_plus_b->of, of);
615 							hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "of", 2)), newRV_noinc((SV *) of), 0);
616 						}
617 					}
618 					break;
619 
620 					case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_DIR:
621 					{
622 						hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "name", 4)), newSVpv_utf8_auto_css(self, "dir", 3), 0);
623 						if (entry->value) {
624 							mycore_string_t *str_fname = mycss_selector_value_string(entry->value);
625 							hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "value", 5)), newSVpv_utf8_auto_css(self, str_fname->length ? str_fname->data : "", str_fname->length), 0);
626 						} else {
627 							hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "value", 5)), newSVpv_utf8_auto_css(self, "", 0), 0);
628 						}
629 					}
630 					break;
631 
632 					case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_DROP:
633 					{
634 						hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "name", 4)), newSVpv_utf8_auto_css(self, "drop", 4), 0);
635 						mycss_selectors_function_drop_type_t drop_val = mycss_selector_value_drop(entry->value);
636 
637 						AV *langs = newAV();
638 						if (drop_val & MyCSS_SELECTORS_FUNCTION_DROP_TYPE_ACTIVE)
639 							av_push(langs, newSVpv_utf8_auto_css(self, "active", 6));
640 						if (drop_val & MyCSS_SELECTORS_FUNCTION_DROP_TYPE_VALID)
641 							av_push(langs, newSVpv_utf8_auto_css(self, "valid", 5));
642 						if (drop_val & MyCSS_SELECTORS_FUNCTION_DROP_TYPE_INVALID)
643 							av_push(langs, newSVpv_utf8_auto_css(self, "invalid", 7));
644 						hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "value", 5)), newRV_noinc((SV *) langs), 0);
645 					}
646 					break;
647 
648 					case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_LANG:
649 					{
650 						hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "name", 4)), newSVpv_utf8_auto_css(self, "lang", 4), 0);
651 						AV *langs = newAV();
652 						if (entry->value) {
653 							mycss_selectors_value_lang_t *lang = mycss_selector_value_lang(entry->value);
654 							while (lang) {
655 								av_push(langs, newSVpv_utf8_auto_css(self, lang->str.length ? lang->str.data : "", lang->str.length));
656 								lang = lang->next;
657 							}
658 						}
659 						hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "value", 5)), newRV_noinc((SV *) langs), 0);
660 					}
661 					break;
662 
663 					default:
664 						hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "name", 4)), newSVpv_utf8_auto_css(self, "unknown", 7), 0);
665 					break;
666 				}
667 			}
668 			break;
669 
670 			default:
671 				hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "type", 4)), newSVpv_utf8_auto_css(self, "unknown", 7), 0);
672 			break;
673 		}
674 
675 		av_push(result, newRV_noinc((SV *) data));
676 
677 		entry = entry->next;
678 	}
679 }
680 
html5_dom_async_parse_init(CV * cv,html5_dom_parser_t * self,SV * html,HV * options,int ev_fd)681 static html5_dom_async_result *html5_dom_async_parse_init(CV *cv, html5_dom_parser_t *self, SV *html, HV *options, int ev_fd) {
682 	html5_dom_async_result *result = (html5_dom_async_result *) safemalloc(sizeof(html5_dom_async_result));
683 	memset(result, 0, sizeof(html5_dom_async_result));
684 
685 	result->fd = ev_fd;
686 
687 	// extends options
688 	html5_dom_parse_options(&result->opts, &self->opts, options);
689 	html5_dom_check_options(cv, &result->opts);
690 
691 	// Auto detect UTF8 flag
692 	if (result->opts.utf8 == 2)
693 		result->opts.utf8 = SvUTF8(html) ? 1 : 0;
694 
695 	mystatus_t status;
696 
697 	STRLEN html_len;
698 	const char *html_str = SvPV_const(html, html_len);
699 
700 	// copy html source
701 	result->html = safemalloc(html_len);
702 	result->length = html_len;
703 	memcpy(result->html, html_str, html_len);
704 
705 	#ifndef MyCORE_BUILD_WITHOUT_THREADS
706 		// create parsing thread
707 		result->thread = mythread_create();
708 		status = mythread_init(result->thread, MyTHREAD_TYPE_STREAM, 1, 0);
709 
710 		if (status) {
711 			mythread_destroy(result->thread, NULL, NULL, true);
712 			safefree(result->html);
713 			safefree(result);
714 			sub_croak(cv, "mythread_init failed: %d (%s)", status, modest_strerror(status));
715 			return NULL;
716 		}
717 
718 		result->thread->context = result;
719 
720 		status = myhread_entry_create(result->thread, html5_dom_mythread_function, html5_dom_async_parse_worker, MyTHREAD_OPT_STOP);
721 		mythread_option_set(result->thread, MyTHREAD_OPT_QUIT);
722 
723 		if (status) {
724 			mythread_destroy(result->thread, NULL, NULL, true);
725 			safefree(result->html);
726 			safefree(result);
727 			sub_croak(cv, "myhread_entry_create failed: %d (%s)", status, modest_strerror(status));
728 			return NULL;
729 		}
730 
731 		// start parsing thread
732 		status = mythread_resume(result->thread, MyTHREAD_OPT_UNDEF);
733 
734 		if (status) {
735 			mythread_destroy(result->thread, NULL, NULL, true);
736 			safefree(result->html);
737 			safefree(result);
738 			sub_croak(cv, "mythread_resume failed: %d (%s)", status, modest_strerror(status));
739 			return NULL;
740 		}
741 	#else
742 		// sync fallback
743 		html5_dom_async_parse(result);
744 	#endif
745 
746 	return result;
747 }
748 
html5_dom_async_parse_done(CV * cv,html5_dom_async_result * result,bool wait)749 static SV *html5_dom_async_parse_done(CV *cv, html5_dom_async_result *result, bool wait) {
750 	if (!wait && !result->done)
751 		return NULL;
752 
753 	#ifndef MyCORE_BUILD_WITHOUT_THREADS
754 		if (result->thread)
755 			result->thread = mythread_destroy(result->thread, NULL, NULL, true);
756 	#endif
757 
758 	if (result->html) {
759 		result->html = NULL;
760 		safefree(result->html);
761 	}
762 
763 	if (result->status) {
764 		sub_croak(cv, "parse failed: %d (%s)", result->status, modest_strerror(result->status));
765 		return NULL;
766 	}
767 
768 	if (result->tree) {
769 		DOM_GC_TRACE("DOM::new");
770 		SV *myhtml_sv = pack_pointer("HTML5::DOM", result->parser);
771 		result->tree_sv = (void *) create_tree_object(result->tree, SvRV(myhtml_sv), result->parser, false, result->opts.utf8);
772 		result->tree = NULL;
773 		SvREFCNT_dec(myhtml_sv);
774 	}
775 
776 	return result->tree_sv ? SvREFCNT_inc((SV *) result->tree_sv) : &PL_sv_undef;
777 }
778 
779 MODULE = HTML5::DOM  PACKAGE = HTML5::DOM
780 
781 #################################################################
782 # HTML5::DOM (Parser)
783 #################################################################
784 HTML5::DOM
785 new(SV *CLASS, HV *options = NULL)
786 CODE:
787 	DOM_GC_TRACE("DOM::new");
788 	mystatus_t status;
789 
790 	html5_dom_options_t opts = {0};
791 	html5_dom_parse_options(&opts, NULL, options);
792 	html5_dom_check_options(cv, &opts);
793 
794 	html5_dom_parser_t *self = html5_dom_parser_new(&opts);
795 
796 	self->myhtml = myhtml_create();
797 
798 	if (self->opts.threads <= 1) {
799 		status = myhtml_init(self->myhtml, MyHTML_OPTIONS_PARSE_MODE_SINGLE, 1, 0);
800 	} else {
801 		status = myhtml_init(self->myhtml, MyHTML_OPTIONS_DEFAULT, self->opts.threads, 0);
802 	}
803 
804 	if (status) {
805 		self = html5_dom_parser_free(self);
806 		sub_croak(cv, "myhtml_init failed: %d (%s)", status, modest_strerror(status));
807 	}
808 
809 	RETVAL = self;
810 OUTPUT:
811 	RETVAL
812 
813 # Init html chunk parser
814 SV *
815 parseChunkStart(HTML5::DOM self, HV *options = NULL)
816 CODE:
817 	mystatus_t status;
818 
819 	html5_dom_parse_options(&self->chunk_opts, &self->opts, options);
820 	html5_dom_check_options(cv, &self->chunk_opts);
821 
822 	if (self->tree) {
823 		if (self->tree->context) {
824 			html5_dom_tree_t *tree_context = (html5_dom_tree_t *) self->tree;
825 			tree_context->used = false;
826 		} else {
827 			myhtml_tree_destroy(self->tree);
828 		}
829 
830 		self->tree = NULL;
831 	}
832 
833 	self->tree = myhtml_tree_create();
834 	status = myhtml_tree_init(self->tree, self->myhtml);
835 	if (status) {
836 		myhtml_tree_destroy(self->tree);
837 		sub_croak(cv, "myhtml_tree_init failed: %d (%s)", status, modest_strerror(status));
838 	}
839 
840 	self->chunks = 0;
841 	myhtml_encoding_set(self->tree, self->chunk_opts.encoding == MyENCODING_AUTO ? self->chunk_opts.default_encoding : self->chunk_opts.encoding);
842 
843 	RETVAL = SvREFCNT_inc(ST(0));
844 OUTPUT:
845 	RETVAL
846 
847 # Parse html chunk
848 SV *
849 parseChunk(HTML5::DOM self, SV *html, HV *options = NULL)
850 CODE:
851 	mystatus_t status;
852 
853 	html = sv_stringify(html);
854 
855 	if (!self->tree) {
856 		self->tree = myhtml_tree_create();
857 		status = myhtml_tree_init(self->tree, self->myhtml);
858 		if (status) {
859 			myhtml_tree_destroy(self->tree);
860 			sub_croak(cv, "myhtml_tree_init failed: %d (%s)", status, modest_strerror(status));
861 		}
862 		memcpy(&self->opts, &self->chunk_opts, sizeof(html5_dom_options_t));
863 		myhtml_encoding_set(self->tree, self->chunk_opts.encoding == MyENCODING_AUTO ? self->chunk_opts.default_encoding : self->chunk_opts.encoding);
864 		self->chunks = 0;
865 	}
866 
867 	STRLEN html_length;
868 	const char *html_str = SvPV_const(html, html_length);
869 
870 	// Try detect encoding only in first chunk
871 	if (!self->chunks) {
872 		myhtml_encoding_set(self->tree, html5_dom_auto_encoding(&self->chunk_opts, &html_str, &html_length));
873 
874 		// Auto detect UTF8 flag
875 		if (self->chunk_opts.utf8 == 2)
876 			self->chunk_opts.utf8 = SvUTF8(html) ? 1 : 0;
877 
878 		html5_dom_apply_tree_options(self->tree, &self->chunk_opts);
879 	}
880 
881 	++self->chunks;
882 
883 	status = myhtml_parse_chunk(self->tree, html_str, html_length);
884 	if (status) {
885 		if (!self->tree->context)
886 			myhtml_tree_destroy(self->tree);
887 		sub_croak(cv, "myhtml_parse_chunk failed: %d (%s)", status, modest_strerror(status));
888 	}
889 
890 	RETVAL = SvREFCNT_inc(ST(0));
891 OUTPUT:
892 	RETVAL
893 
894 # Get current Tree from current chunked parsing session
895 SV *
896 parseChunkTree(HTML5::DOM self)
897 CODE:
898 	mystatus_t status;
899 
900 	if (!self->tree)
901 		sub_croak(cv, "call parseChunkStart or parseChunk first");
902 
903 	RETVAL = create_tree_object(self->tree, SvRV(ST(0)), self, true, self->chunk_opts.utf8);
904 OUTPUT:
905 	RETVAL
906 
907 # End of parse chunks (return Tree)
908 SV *
909 parseChunkEnd(HTML5::DOM self)
910 CODE:
911 	mystatus_t status;
912 
913 	if (!self->tree)
914 		sub_croak(cv, "call parseChunkStart or parseChunk first");
915 
916 	status = myhtml_parse_chunk_end(self->tree);
917 	if (status) {
918 		if (!self->tree->context)
919 			myhtml_tree_destroy(self->tree);
920 		sub_croak(cv, "myhtml_parse_chunk failed:%d (%s)", status, modest_strerror(status));
921 	}
922 
923 	if (self->tree) {
924 		html5_dom_tree_t *tree_context = (html5_dom_tree_t *) self->tree;
925 		tree_context->used = false;
926 	}
927 
928 	RETVAL = create_tree_object(self->tree, SvRV(ST(0)), self, false, self->chunk_opts.utf8);
929 	self->tree = NULL;
930 OUTPUT:
931 	RETVAL
932 
933 # Parse full html
934 SV *
935 parse(HTML5::DOM self, SV *html, HV *options = NULL)
936 CODE:
937 	mystatus_t status;
938 	html5_dom_options_t opts = {0};
939 
940 	html5_dom_parse_options(&opts, &self->opts, options);
941 	html5_dom_check_options(cv, &opts);
942 
943 	html = sv_stringify(html);
944 
945 	myhtml_tree_t *tree = myhtml_tree_create();
946 	status = myhtml_tree_init(tree, self->myhtml);
947 	if (status) {
948 		myhtml_tree_destroy(tree);
949 		sub_croak(cv, "myhtml_tree_init failed: %d (%s)", status, modest_strerror(status));
950 	}
951 
952 	STRLEN html_length;
953 	const char *html_str = SvPV_const(html, html_length);
954 
955 	myencoding_t encoding = html5_dom_auto_encoding(&opts, &html_str, &html_length);
956 
957 	// Auto detect UTF8 flag
958 	if (opts.utf8 == 2)
959 		opts.utf8 = SvUTF8(html) ? 1 : 0;
960 
961 	html5_dom_apply_tree_options(tree, &opts);
962 
963 	status = myhtml_parse(tree, encoding, html_str, html_length);
964 	if (status) {
965 		myhtml_tree_destroy(tree);
966 		sub_croak(cv, "myhtml_parse failed: %d (%s)", status, modest_strerror(status));
967 	}
968 
969 	RETVAL = create_tree_object(tree, SvRV(ST(0)), self, false, opts.utf8);
970 OUTPUT:
971 	RETVAL
972 
973 # Parse full html (in background)
974 HTML5::DOM::AsyncResult
975 _parseAsync(HTML5::DOM self, SV *html, HV *options = NULL, int ev_fd = -1)
976 CODE:
977 	DOM_GC_TRACE("DOM::AsyncResult::new");
978 	html = sv_stringify(html);
979 	RETVAL = html5_dom_async_parse_init(cv, self, html, options, ev_fd);
980 OUTPUT:
981 	RETVAL
982 
983 void
984 DESTROY(HTML5::DOM self)
985 CODE:
986 	DOM_GC_TRACE("DOM::DESTROY (refs=%d)", SvREFCNT(SvRV(ST(0))));
987 	html5_dom_parser_free(self);
988 
989 
990 
991 #################################################################
992 # HTML5::DOM::AsyncResult
993 #################################################################
994 MODULE = HTML5::DOM  PACKAGE = HTML5::DOM::AsyncResult
995 
996 # Wait for parsing done and return HTML5::DOM::Tree
997 SV *
998 wait(HTML5::DOM::AsyncResult self)
999 CODE:
1000 	RETVAL = html5_dom_async_parse_done(cv, self, true);
1001 OUTPUT:
1002 	RETVAL
1003 
1004 # True if parsing done
1005 int
1006 parsed(HTML5::DOM::AsyncResult self)
1007 CODE:
1008 	RETVAL = self->done ? 1 : 0;
1009 OUTPUT:
1010 	RETVAL
1011 
1012 # Return HTML5::DOM::Tree if parsing done
1013 SV *
1014 tree(HTML5::DOM::AsyncResult self)
1015 CODE:
1016 	RETVAL = html5_dom_async_parse_done(cv, self, false);
1017 OUTPUT:
1018 	RETVAL
1019 
1020 void
1021 DESTROY(HTML5::DOM::AsyncResult self)
1022 CODE:
1023 	DOM_GC_TRACE("DOM::AsyncResult::DESTROY (refs=%d)", SvREFCNT(SvRV(ST(0))));
1024 	if (self->thread)
1025 		self->thread = mythread_destroy(self->thread, NULL, NULL, true);
1026 
1027 	if (self->tree) {
1028 		self->tree = myhtml_tree_destroy(self->tree);
1029 
1030 		if (self->parser)
1031 			self->parser = html5_dom_parser_free(self->parser);
1032 	}
1033 
1034 	if (self->tree_sv)
1035 		SvREFCNT_dec((SV *) self->tree_sv);
1036 
1037 	if (self->html)
1038 		safefree(self->html);
1039 
1040 	safefree(self);
1041 
1042 #################################################################
1043 # HTML5::DOM::Tree
1044 #################################################################
1045 MODULE = HTML5::DOM  PACKAGE = HTML5::DOM::Tree
1046 
1047 SV *
1048 body(HTML5::DOM::Tree self)
1049 CODE:
1050 	RETVAL = node_to_sv(myhtml_tree_get_node_body(self->tree));
1051 OUTPUT:
1052 	RETVAL
1053 
1054 SV *
1055 createElement(HTML5::DOM::Tree self, SV *tag, SV *ns_name = NULL)
1056 CODE:
1057 	// Get namespace id by name
1058 	myhtml_namespace_t ns = MyHTML_NAMESPACE_HTML;
1059 	if (ns_name) {
1060 		ns_name = sv_stringify(ns_name);
1061 		STRLEN ns_name_len;
1062 		const char *ns_name_str = SvPV_const(ns_name, ns_name_len);
1063 		if (!myhtml_namespace_id_by_name(ns_name_str, ns_name_len, &ns))
1064 			sub_croak(cv, "unknown namespace: %s", ns_name_str);
1065 	}
1066 
1067 	// Get tag id by name
1068 	tag = sv_stringify(tag);
1069 	STRLEN tag_len;
1070 	const char *tag_str = SvPV_const(tag, tag_len);
1071 	myhtml_tag_id_t tag_id = html5_dom_tag_id_by_name(self->tree, tag_str, tag_len, true);
1072 
1073 	// create new node
1074 	myhtml_tree_node_t *node = myhtml_node_create(self->tree, tag_id, ns);
1075 
1076 	// if void - mark self-closed
1077 	if (myhtml_node_is_void_element(node)) {
1078 		if (!node->token) {
1079 			node->token = myhtml_token_node_create(node->tree->token, self->tree->mcasync_rules_token_id);
1080 			if (!node->token) {
1081 				myhtml_tree_node_delete(node);
1082 				sub_croak(cv, "myhtml_token_node_create failed");
1083 			}
1084 		}
1085 		node->token->type |= MyHTML_TOKEN_TYPE_CLOSE_SELF | MyHTML_TOKEN_TYPE_DONE;
1086 	}
1087 
1088 	RETVAL = node_to_sv(node);
1089 OUTPUT:
1090 	RETVAL
1091 
1092 SV *
1093 createComment(HTML5::DOM::Tree self, SV *text)
1094 CODE:
1095 	text = sv_stringify(text);
1096 	STRLEN text_len;
1097 	const char *text_str = SvPV_const(text, text_len);
1098 	myhtml_tree_node_t *node = myhtml_node_create(self->tree, MyHTML_TAG__COMMENT, MyHTML_NAMESPACE_HTML);
1099 	myhtml_node_text_set(node, text_str, text_len, MyENCODING_DEFAULT);
1100 	RETVAL = node_to_sv(node);
1101 OUTPUT:
1102 	RETVAL
1103 
1104 SV *
1105 createTextNode(HTML5::DOM::Tree self, SV *text)
1106 CODE:
1107 	text = sv_stringify(text);
1108 	STRLEN text_len;
1109 	const char *text_str = SvPV_const(text, text_len);
1110 	myhtml_tree_node_t *node = myhtml_node_create(self->tree, MyHTML_TAG__TEXT, MyHTML_NAMESPACE_HTML);
1111 	myhtml_node_text_set(node, text_str, text_len, MyENCODING_DEFAULT);
1112 	RETVAL = node_to_sv(node);
1113 OUTPUT:
1114 	RETVAL
1115 
1116 # Parse fragment
1117 SV *parseFragment(HTML5::DOM::Tree self, SV *text, SV *tag = NULL, SV *ns = NULL, HV *options = NULL)
1118 CODE:
1119 	text = sv_stringify(text);
1120 	STRLEN text_len;
1121 	const char *text_str = SvPV_const(text, text_len);
1122 
1123 	mystatus_t status;
1124 	myhtml_namespace_t ns_id = MyHTML_NAMESPACE_HTML;
1125 	myhtml_tag_id_t tag_id = MyHTML_TAG_DIV;
1126 
1127 	if (ns) {
1128 		ns = sv_stringify(ns);
1129 		STRLEN ns_len;
1130 		const char *ns_str = SvPV_const(ns, ns_len);
1131 
1132 		if (!myhtml_namespace_id_by_name(ns_str, ns_len, &ns_id))
1133 			sub_croak(cv, "unknown namespace: %s", ns_str);
1134 	}
1135 
1136 	if (tag) {
1137 		tag = sv_stringify(tag);
1138 		STRLEN tag_len;
1139 		const char *tag_str = SvPV_const(tag, tag_len);
1140 		tag_id = html5_dom_tag_id_by_name(self->tree, tag_str, tag_len, true);
1141 	}
1142 
1143 	html5_dom_options_t opts = {0};
1144 	html5_dom_parse_options(&opts, &self->parser->opts, options);
1145 	html5_dom_check_options(cv, &opts);
1146 
1147 	myhtml_tree_node_t *node = html5_dom_parse_fragment(&opts, self->tree, tag_id, ns_id, text_str, text_len, NULL, &status);
1148 	if (status)
1149 		sub_croak(cv, "myhtml_parse_fragment failed: %d (%s)", status, modest_strerror(status));
1150 
1151 	RETVAL = node_to_sv(node);
1152 OUTPUT:
1153 	RETVAL
1154 
1155 SV *
1156 head(HTML5::DOM::Tree self)
1157 CODE:
1158 	RETVAL = node_to_sv(myhtml_tree_get_node_head(self->tree));
1159 OUTPUT:
1160 	RETVAL
1161 
1162 SV *
1163 root(HTML5::DOM::Tree self)
1164 CODE:
1165 	RETVAL = node_to_sv(myhtml_tree_get_node_html(self->tree));
1166 OUTPUT:
1167 	RETVAL
1168 
1169 SV *
1170 document(HTML5::DOM::Tree self)
1171 CODE:
1172 	RETVAL = node_to_sv(myhtml_tree_get_document(self->tree));
1173 OUTPUT:
1174 	RETVAL
1175 
1176 SV *
1177 find(HTML5::DOM::Tree self, SV *query, SV *combinator = NULL)
1178 ALIAS:
1179 	at					= 1
1180 	querySelector		= 2
1181 	querySelectorAll	= 3
1182 CODE:
1183 	myhtml_tree_node_t *scope = myhtml_tree_get_document(self->tree);
1184 	if (scope) {
1185 		RETVAL = html5_node_find(cv, self->parser, scope, query, combinator, ix == 1 || ix == 2);
1186 	} else {
1187 		RETVAL = &PL_sv_undef;
1188 	}
1189 OUTPUT:
1190 	RETVAL
1191 
1192 # Wait for parsing done (when async mode) - removed
1193 SV *
1194 wait(HTML5::DOM::Tree self)
1195 CODE:
1196 	RETVAL = SvREFCNT_inc(ST(0));
1197 OUTPUT:
1198 	RETVAL
1199 
1200 # True if parsing done (when async mode) - removed
1201 int
1202 parsed(HTML5::DOM::Tree self)
1203 CODE:
1204 	RETVAL = 1;
1205 OUTPUT:
1206 	RETVAL
1207 
1208 # utf8(flag)				- enable or disable utf8 mode
1209 # utf8()					- get status of utf8 mode (0 - disabled, 1 - enabled)
1210 SV *
1211 utf8(HTML5::DOM::Tree self, SV *value = NULL)
1212 CODE:
1213 	if (!value) {
1214 		RETVAL = newSViv(self->utf8 ? 1 : 0);
1215 	} else {
1216 		value = sv_stringify(value);
1217 
1218 		STRLEN enc_length;
1219 		const char *enc_str = SvPV_const(value, enc_length);
1220 
1221 		if (enc_length > 0) {
1222 			if (isdigit(enc_str[0])) {
1223 				self->utf8 = SvIV(value) != 0;
1224 			} else {
1225 				self->utf8 = 1;
1226 			}
1227 		}
1228 
1229 		self->utf8 = 0;
1230 
1231 		RETVAL = SvREFCNT_inc(ST(0));
1232 	}
1233 OUTPUT:
1234 	RETVAL
1235 
1236 # findTag(val), getElementsByTagName(val)									- get nodes by tag name
1237 # findClass(val), getElementsByClassName(val)								- get nodes by class name
1238 # findId(val), getElementById(val)											- get node by id
1239 # findAttr(key), getElementByAttribute(key)									- get nodes by attribute key
1240 # findAttr(key, val, case, cmp), getElementByAttribute(key, val, case, cmp)	- get nodes by attribute value
1241 SV *
1242 findTag(HTML5::DOM::Tree self, SV *key, SV *val = NULL, bool icase = false, SV *cmp = NULL)
1243 ALIAS:
1244 	getElementsByTagName	= 1
1245 	findClass				= 2
1246 	getElementsByClassName	= 3
1247 	findId					= 4
1248 	getElementById			= 5
1249 	findAttr				= 6
1250 	getElementByAttribute	= 7
1251 CODE:
1252 	RETVAL = html5_node_simple_find(cv, myhtml_tree_get_document(self->tree), key, val, cmp, icase, ix);
1253 OUTPUT:
1254 	RETVAL
1255 
1256 # Get compat node
1257 SV *
1258 compatMode(HTML5::DOM::Tree self)
1259 CODE:
1260 	if (self->tree->compat_mode == MyHTML_TREE_COMPAT_MODE_QUIRKS) {
1261 		// if the document is in quirks mode.
1262 		RETVAL = newSVpv_utf8_auto(self->tree, "BackCompat", 10);
1263 	} else {
1264 		// if the document is in no-quirks (also known as "standards") mode or limited-quirks (also known as "almost standards") mode.
1265 		RETVAL = newSVpv_utf8_auto(self->tree, "CSS1Compat", 10);
1266 	}
1267 OUTPUT:
1268 	RETVAL
1269 
1270 # Get current tree encoding name
1271 SV *
1272 encoding(HTML5::DOM::Tree self)
1273 CODE:
1274 	size_t length = 0;
1275 	const char *name = myencoding_name_by_id(self->tree->encoding, &length);
1276 	RETVAL = newSVpv_utf8_auto(self->tree, name ? name : "", length);
1277 OUTPUT:
1278 	RETVAL
1279 
1280 # Get current tree encoding id
1281 SV *
1282 encodingId(HTML5::DOM::Tree self)
1283 CODE:
1284 	RETVAL = newSViv(self->tree->encoding);
1285 OUTPUT:
1286 	RETVAL
1287 
1288 # Tag id by tag name
1289 SV *
1290 tag2id(HTML5::DOM::Tree self, SV *tag)
1291 CODE:
1292 	tag = sv_stringify(tag);
1293 	STRLEN tag_len;
1294 	const char *tag_str = SvPV_const(tag, tag_len);
1295 	RETVAL = newSViv(html5_dom_tag_id_by_name(self->tree, tag_str, tag_len, false));
1296 OUTPUT:
1297 	RETVAL
1298 
1299 # Tag name by tag id
1300 SV *
1301 id2tag(HTML5::DOM::Tree self, int tag_id)
1302 CODE:
1303 	RETVAL = &PL_sv_undef;
1304 	const myhtml_tag_context_t *tag_ctx = myhtml_tag_get_by_id(self->tree->tags, tag_id);
1305 	if (tag_ctx)
1306 		RETVAL = newSVpv_utf8_auto(self->tree, tag_ctx->name ? tag_ctx->name : "", tag_ctx->name_length);
1307 OUTPUT:
1308 	RETVAL
1309 
1310 # Namespace id by namepsace name
1311 SV *
1312 namespace2id(HTML5::DOM::Tree self, SV *ns)
1313 CODE:
1314 	ns = sv_stringify(ns);
1315 	STRLEN ns_len;
1316 	const char *ns_str = SvPV_const(ns, ns_len);
1317 
1318 	myhtml_namespace_t ns_id;
1319 	if (!myhtml_namespace_id_by_name(ns_str, ns_len, &ns_id))
1320 		ns_id = MyHTML_NAMESPACE_UNDEF;
1321 
1322 	RETVAL = newSViv(ns_id);
1323 OUTPUT:
1324 	RETVAL
1325 
1326 # Namespace name by namepsace id
1327 SV *
1328 id2namespace(HTML5::DOM::Tree self, int ns_id)
1329 CODE:
1330 	size_t ns_len = 0;
1331 	const char *ns_name = myhtml_namespace_name_by_id(ns_id, &ns_len);
1332 	RETVAL = ns_name ? newSVpv_utf8_auto(self->tree, ns_name, ns_len) : &PL_sv_undef;
1333 OUTPUT:
1334 	RETVAL
1335 
1336 # Return tree parent parser
1337 SV *
1338 parser(HTML5::DOM::Tree self)
1339 CODE:
1340 	RETVAL = myhtml_to_sv(self->tree);
1341 OUTPUT:
1342 	RETVAL
1343 
1344 # Some bad idea to get "uniq id"
1345 SV *
1346 hash(HTML5::DOM::Node self)
1347 CODE:
1348 	RETVAL = newSViv(PTR2IV(self));
1349 OUTPUT:
1350 	RETVAL
1351 
1352 # Compare tree reference
1353 bool
1354 isSameTree(HTML5::DOM::Tree self, SV *other_tree)
1355 CODE:
1356 	RETVAL = false;
1357 	if (sv_derived_from(other_tree, "HTML5::DOM::Tree")) {
1358 		html5_dom_tree_t *tree = INT2PTR(html5_dom_tree_t *, SvIV((SV*)SvRV(other_tree)));
1359 		if (tree == self)
1360 			RETVAL = true;
1361 	}
1362 OUTPUT:
1363 	RETVAL
1364 
1365 void
1366 DESTROY(HTML5::DOM::Tree self)
1367 CODE:
1368 	DOM_GC_TRACE("DOM::Tree::DESTROY (refs=%d)", SvREFCNT(SvRV(ST(0))));
1369 	void *context = self->tree->context;
1370 	if (self->used) {
1371 		self->tree->context = NULL;
1372 	} else {
1373 		myhtml_tree_destroy(self->tree);
1374 	}
1375 	SvREFCNT_dec(self->parent);
1376 	safefree(context);
1377 
1378 
1379 #################################################################
1380 # HTML5::DOM::Node
1381 #################################################################
1382 MODULE = HTML5::DOM  PACKAGE = HTML5::DOM::Node
1383 # Tag id
1384 SV *
1385 tagId(HTML5::DOM::Node self, SV *new_tag_id = NULL)
1386 CODE:
1387 	if (new_tag_id) {
1388 		const myhtml_tag_context_t *tag_ctx = myhtml_tag_get_by_id(self->tree->tags, SvIV(new_tag_id));
1389 		if (tag_ctx) {
1390 			self->tag_id = SvIV(new_tag_id);
1391 		} else {
1392 			sub_croak(cv, "unknown tag id %ld", SvIV(new_tag_id));
1393 		}
1394 
1395 		RETVAL = SvREFCNT_inc(ST(0));
1396 	} else {
1397 		RETVAL = newSViv(self->tag_id);
1398 	}
1399 OUTPUT:
1400 	RETVAL
1401 
1402 # Namespace id
1403 SV *
1404 namespaceId(HTML5::DOM::Node self, SV *new_ns_id = NULL)
1405 CODE:
1406 	if (new_ns_id) {
1407 		if (!myhtml_namespace_name_by_id(SvIV(new_ns_id), NULL)) {
1408 			sub_croak(cv, "unknown namespace id %ld", SvIV(new_ns_id));
1409 		} else {
1410 			myhtml_node_namespace_set(self, SvIV(new_ns_id));
1411 		}
1412 		RETVAL = SvREFCNT_inc(ST(0));
1413 	} else {
1414 		RETVAL = newSViv(myhtml_node_namespace(self));
1415 	}
1416 OUTPUT:
1417 	RETVAL
1418 
1419 # Tag name
1420 SV *
1421 tag(HTML5::DOM::Node self, SV *new_tag_name = NULL)
1422 ALIAS:
1423 	nodeName	= 1
1424 	tagName		= 2
1425 CODE:
1426 	myhtml_tree_t *tree = self->tree;
1427 
1428 	// Set new tag name
1429 	if (new_tag_name) {
1430 		new_tag_name = sv_stringify(new_tag_name);
1431 		STRLEN new_tag_name_len;
1432 		const char *new_tag_name_str = SvPV_const(new_tag_name, new_tag_name_len);
1433 
1434 		if (!new_tag_name_len)
1435 			sub_croak(cv, "empty tag name not allowed.");
1436 
1437 		myhtml_tag_id_t tag_id = html5_dom_tag_id_by_name(self->tree, new_tag_name_str, new_tag_name_len, true);
1438 		self->tag_id = tag_id;
1439 
1440 		RETVAL = SvREFCNT_inc(ST(0));
1441 	}
1442 	// Get tag name
1443 	else {
1444 		RETVAL = &PL_sv_undef;
1445 
1446 		if (tree && tree->tags) {
1447 			const myhtml_tag_context_t *tag_ctx = myhtml_tag_get_by_id(tree->tags, self->tag_id);
1448 			if (tag_ctx) {
1449 				RETVAL = newSVpv_utf8_auto(self->tree, tag_ctx->name, tag_ctx->name_length);
1450 				if (ix == 1 || ix == 2) {
1451 					STRLEN value_len;
1452 					char *value = SvPV(RETVAL, value_len);
1453 					for (size_t i = 0; i < value_len; ++i)
1454 						value[i] = toupper(value[i]);
1455 				}
1456 			}
1457 		}
1458 	}
1459 OUTPUT:
1460 	RETVAL
1461 
1462 # Namespace name
1463 SV *
1464 namespace(HTML5::DOM::Node self, SV *new_ns = NULL)
1465 CODE:
1466 	myhtml_tree_t *tree = self->tree;
1467 
1468 	// Set new tag namespace
1469 	if (new_ns) {
1470 		new_ns = sv_stringify(new_ns);
1471 		STRLEN new_ns_len;
1472 		const char *new_ns_str = SvPV_const(new_ns, new_ns_len);
1473 
1474 		myhtml_namespace_t ns;
1475 		if (!myhtml_namespace_id_by_name(new_ns_str, new_ns_len, &ns))
1476 			sub_croak(cv, "unknown namespace: %s", new_ns_str);
1477 		myhtml_node_namespace_set(self, ns);
1478 
1479 		RETVAL = SvREFCNT_inc(ST(0));
1480 	}
1481 	// Get namespace name
1482 	else {
1483 		size_t ns_name_len;
1484 		const char *ns_name = myhtml_namespace_name_by_id(myhtml_node_namespace(self), &ns_name_len);
1485 		RETVAL = newSVpv_utf8_auto(self->tree, ns_name ? ns_name : "", ns_name_len);
1486 	}
1487 OUTPUT:
1488 	RETVAL
1489 
1490 # Return node parent tree
1491 SV *
1492 tree(HTML5::DOM::Node self)
1493 CODE:
1494 	RETVAL = tree_to_sv(self->tree);
1495 OUTPUT:
1496 	RETVAL
1497 
1498 # Non-recursive html serialization (example: <div id="some_id">)
1499 SV *
1500 nodeHtml(HTML5::DOM::Node self)
1501 CODE:
1502 	RETVAL = newSVpv_utf8_auto(self->tree, "", 0);
1503 	myhtml_serialization_node_callback(self, sv_serialization_callback, RETVAL);
1504 OUTPUT:
1505 	RETVAL
1506 
1507 # Return node type
1508 int
1509 nodeType(HTML5::DOM::Node self)
1510 CODE:
1511 	html5_dom_tree_t *context = (html5_dom_tree_t *) self->tree->context;
1512 	RETVAL = 0;
1513 	if (self->tag_id != MyHTML_TAG__UNDEF) {
1514 		if (self->tag_id == MyHTML_TAG__TEXT) {
1515 			RETVAL = TEXT_NODE;
1516 		} else if (self->tag_id == MyHTML_TAG__COMMENT) {
1517 			RETVAL = COMMENT_NODE;
1518 		} else if (self->tag_id == MyHTML_TAG__DOCTYPE) {
1519 			RETVAL = DOCUMENT_TYPE_NODE;
1520 		} else if (context->fragment_tag_id && self->tag_id == context->fragment_tag_id) {
1521 			RETVAL = DOCUMENT_FRAGMENT_NODE;
1522 		} else {
1523 			RETVAL = ELEMENT_NODE;
1524 		}
1525 	} else {
1526 		// Modest myhtml bug - document node has tag_id == MyHTML_TAG__UNDEF
1527 		if (node_is_document(self))
1528 			RETVAL = DOCUMENT_NODE;
1529 	}
1530 OUTPUT:
1531 	RETVAL
1532 
1533 # Node::html()			- Serialize text/comment node to html
1534 # Node::html(text)		- Same as Node::nodeValue(text)
1535 # Element::html(text)	- Remove all children nodes and add parsed fragment, return self
1536 SV *
1537 html(HTML5::DOM::Node self, SV *text = NULL)
1538 ALIAS:
1539 	innerHTML	= 1
1540 	outerHTML	= 2
1541 CODE:
1542 	if (text) {
1543 		if (ix == 2 && !myhtml_node_parent(self)) // outerHTML
1544 			sub_croak(cv, "This element has no parent node.");
1545 
1546 		text = sv_stringify(text);
1547 		STRLEN text_len;
1548 		const char *text_str = SvPV_const(text, text_len);
1549 
1550 		if (node_is_element(self) || node_is_document(self)) { // parse fragment and replace all node childrens with it
1551 			// parse fragment
1552 			mystatus_t status;
1553 			html5_fragment_parts_t parts = {0};
1554 			myhtml_tree_node_t *context_node = ix == 2 ? myhtml_node_parent(self) : self;
1555 			myhtml_tag_id_t context_tag_id = context_node->tag_id;
1556 
1557 			// hack for document node
1558 			if (node_is_document(context_node))
1559 				context_tag_id = MyHTML_TAG_HTML;
1560 
1561 			html5_dom_tree_t *tree_context = (html5_dom_tree_t *) self->tree->context;
1562 			html5_dom_options_t opts = {0};
1563 			html5_dom_parse_options(&opts, &tree_context->parser->opts, NULL);
1564 
1565 			// force set encoding to UTF-8
1566 			opts.encoding			= MyENCODING_DEFAULT;
1567 			opts.default_encoding	= MyENCODING_DEFAULT;
1568 
1569 			myhtml_tree_node_t *fragment = html5_dom_parse_fragment(&opts, self->tree, context_tag_id, myhtml_node_namespace(context_node), text_str, text_len, &parts, &status);
1570 			if (status)
1571 				sub_croak(cv, "myhtml_parse_fragment failed: %d (%s)", status, modest_strerror(status));
1572 
1573 			// remove all child nodes
1574 			myhtml_tree_node_t *node = myhtml_node_child(self);
1575 			while (node) {
1576 				myhtml_tree_node_t *next = myhtml_node_next(node);
1577 				myhtml_tree_node_remove(node);
1578 				html5_tree_node_delete_recursive(node);
1579 				node = next;
1580 			}
1581 
1582 			// cleanup references in tree
1583 			if (node_is_root(self)) {
1584 				self->tree->node_body = parts.node_body;
1585 				self->tree->node_head = parts.node_head;
1586 			} else if (node_is_document(self)) {
1587 				self->tree->node_html = parts.node_html;
1588 				self->tree->node_body = parts.node_body;
1589 				self->tree->node_head = parts.node_head;
1590 			}
1591 
1592 			if (fragment != self->tree->node_html) {
1593 				// add fragment
1594 				node = myhtml_node_child(fragment);
1595 				while (node) {
1596 					myhtml_tree_node_t *next = myhtml_node_next(node);
1597 					myhtml_tree_node_remove(node);
1598 					if (ix == 2) { // outerHTML
1599 						myhtml_tree_node_insert_before(self, node);
1600 					} else { // innerHTML
1601 						myhtml_tree_node_add_child(self, node);
1602 					}
1603 					node = next;
1604 				}
1605 
1606 				// remove self if outerHTML
1607 				if (ix == 2)
1608 					myhtml_tree_node_remove(self);
1609 
1610 				// free fragment
1611 				html5_tree_node_delete_recursive(fragment);
1612 			} else {
1613 				// fragment now is html node, why not?
1614 				fragment->tag_id = MyHTML_TAG_HTML;
1615 				myhtml_tree_node_remove(fragment);
1616 				myhtml_tree_node_add_child(self, fragment);
1617 			}
1618 		} else { // same as nodeValue, for user friendly API
1619 			myhtml_node_text_set(self, text_str, text_len, MyENCODING_DEFAULT);
1620 		}
1621 		RETVAL = SvREFCNT_inc(ST(0));
1622 	} else {
1623 		RETVAL = newSVpv_utf8_auto(self->tree, "", 0);
1624 		if (self->tag_id == MyHTML_TAG__UNDEF || ix == 1 || html5_dom_is_fragment(self)) { // innerHTML
1625 			myhtml_tree_node_t *node = myhtml_node_child(self);
1626 			while (node) {
1627 				myhtml_serialization_tree_callback(node, sv_serialization_callback, RETVAL);
1628 				node = myhtml_node_next(node);
1629 			}
1630 		} else { // outerHTML
1631 			myhtml_serialization_tree_callback(self, sv_serialization_callback, RETVAL);
1632 		}
1633 	}
1634 OUTPUT:
1635 	RETVAL
1636 
1637 # Node::text()			- Serialize tree to text
1638 # Node::text(text)		- Set node value, return self
1639 # Element::text(text)	- Remove all children nodes and add text node, return self
1640 SV *
1641 text(HTML5::DOM::Node self, SV *text = NULL)
1642 ALIAS:
1643 	nodeValue		= 1
1644 	innerText		= 2
1645 	textContent		= 3
1646 	data			= 4
1647 	outerText		= 5
1648 CODE:
1649 	static const char names[][16] = {
1650 		"text", "nodeValue", "innerText", "textContent", "data", "outerText"
1651 	};
1652 
1653 	myhtml_tree_t *tree = self->tree;
1654 	if (!node_is_element(self)) {
1655 		if (ix == 2 || ix == 3 || ix == 5) {
1656 			if (text) {
1657 				sub_croak(cv, "%s unsupported in %s", names[ix], get_node_class(self));
1658 			} else {
1659 				RETVAL = &PL_sv_undef;
1660 			}
1661 		} else if (text) { // set node value
1662 			text = sv_stringify(text);
1663 			STRLEN text_len;
1664 			const char *text_str = SvPV_const(text, text_len);
1665 
1666 			myhtml_node_text_set(self, text_str, text_len, MyENCODING_DEFAULT);
1667 			RETVAL = SvREFCNT_inc(ST(0));
1668 		} else { // get node value
1669 			size_t text_len = 0;
1670 			const char *text = myhtml_node_text(self, &text_len);
1671 			RETVAL = newSVpv_utf8_auto(self->tree, text ? text : "", text_len);
1672 		}
1673 	} else {
1674 		if (ix == 1 || ix == 4) {
1675 			if (text) {
1676 				sub_croak(cv, "%s unsupported in %s", names[ix], get_node_class(self));
1677 			} else {
1678 				RETVAL = &PL_sv_undef;
1679 			}
1680 		} else if (text) { // remove all childrens and add text node
1681 			text = sv_stringify(text);
1682 			STRLEN text_len;
1683 			const char *text_str = SvPV_const(text, text_len);
1684 
1685 			// remove all children nodes
1686 			myhtml_tree_node_t *node = myhtml_node_child(self);
1687 			while (node) {
1688 				myhtml_tree_node_t *next = myhtml_node_next(node);
1689 				myhtml_tree_node_remove(node);
1690 				html5_tree_node_delete_recursive(node);
1691 				node = next;
1692 			}
1693 
1694 			// cleanup references in tree
1695 			if (node_is_root(self)) {
1696 				self->tree->node_body = NULL;
1697 				self->tree->node_head = NULL;
1698 			} else if (node_is_document(self)) {
1699 				self->tree->node_html = NULL;
1700 				self->tree->node_body = NULL;
1701 				self->tree->node_head = NULL;
1702 			}
1703 
1704 			// innerText, outerText
1705 			if (ix == 2 || ix == 5) {
1706 				size_t last_pos = 0;
1707 				for (size_t i = 0; i < text_len; ++i) {
1708 					bool is_end = (i >= text_len - 1);
1709 					bool is_new_line = (text_str[i] == '\n' || text_str[i] == '\r');
1710 					if (is_end || is_new_line) {
1711 						if (is_end && !is_new_line)
1712 							++i;
1713 
1714 						// insert new text node
1715 						if (i - last_pos) {
1716 							myhtml_tree_node_t *text_node = myhtml_node_create(self->tree, MyHTML_TAG__TEXT, myhtml_node_namespace(self));
1717 							myhtml_node_text_set(text_node, &text_str[last_pos], i - last_pos, MyENCODING_DEFAULT);
1718 							if (ix == 5) { // outerText
1719 								myhtml_tree_node_insert_before(self, text_node);
1720 							} else { // innerText
1721 								myhtml_tree_node_add_child(self, text_node);
1722 							}
1723 						}
1724 
1725 						// insert new br
1726 						if (is_new_line) {
1727 							myhtml_tree_node_t *text_node = myhtml_node_create(self->tree, MyHTML_TAG_BR, myhtml_node_namespace(self));
1728 							if (!text_node->token) {
1729 								text_node->token = myhtml_token_node_create(self->tree->token, self->tree->mcasync_rules_token_id);
1730 								if (!text_node->token) {
1731 									myhtml_tree_node_delete(text_node);
1732 									sub_croak(cv, "myhtml_token_node_create failed");
1733 								}
1734 								text_node->token->type |= MyHTML_TOKEN_TYPE_CLOSE_SELF | MyHTML_TOKEN_TYPE_DONE;
1735 							}
1736 
1737 							if (ix == 5) { // outerText
1738 								myhtml_tree_node_insert_before(self, text_node);
1739 							} else { // innerText
1740 								myhtml_tree_node_add_child(self, text_node);
1741 							}
1742 						}
1743 
1744 						if (!is_end) {
1745 							if (text_str[i] == '\r' && text_str[i + 1] == '\n')
1746 								++i;
1747 							last_pos = i + 1;
1748 						}
1749 					}
1750 				}
1751 			}
1752 			// text, textContent
1753 			else {
1754 				myhtml_tree_node_t *text_node = myhtml_node_create(self->tree, MyHTML_TAG__TEXT, myhtml_node_namespace(self));
1755 				myhtml_node_text_set(text_node, text_str, text_len, MyENCODING_DEFAULT);
1756 				myhtml_tree_node_add_child(self, text_node);
1757 			}
1758 
1759 			RETVAL = SvREFCNT_inc(ST(0));
1760 
1761 			if (ix == 5) {
1762 				// remove self, if outerText
1763 				myhtml_tree_node_remove(self);
1764 			}
1765 		} else { // recursive serialize node to text
1766 			// innerText, outerText
1767 			if (ix == 2 || ix == 5) {
1768 				html5_dom_inner_text_state_t state = {0};
1769 				state.last_br = true;
1770 				state.new_line = true;
1771 
1772 				mycore_string_init(self->tree->mchar, self->tree->mchar_node_id, &state.value, 1);
1773 
1774 				myhtml_tree_node_t *next = myhtml_node_child(self);
1775 				while (next) {
1776 					html5_dom_recursive_node_inner_text(next, &state);
1777 					next = myhtml_node_next(next);
1778 				}
1779 				html5_dom_rtrim_mystring(&state.value, ' ');
1780 
1781 				RETVAL = newSVpv_utf8_auto(self->tree, state.value.length ? state.value.data : "", state.value.length);
1782 				mycore_string_destroy(&state.value, 0);
1783 			}
1784 			// text, textContent
1785 			else {
1786 				RETVAL = newSVpv_utf8_auto(self->tree, "", 0);
1787 				html5_dom_recursive_node_text(self, RETVAL);
1788 			}
1789 		}
1790 	}
1791 OUTPUT:
1792 	RETVAL
1793 
1794 # Wait for node parsing done (when async mode) - removed
1795 SV *
1796 wait(HTML5::DOM::Node self, bool deep = false)
1797 CODE:
1798 	RETVAL = SvREFCNT_inc(ST(0));
1799 OUTPUT:
1800 	RETVAL
1801 
1802 # True if node parsing done (when async mode) - removed
1803 int
1804 parsed(HTML5::DOM::Node self, bool deep = false)
1805 CODE:
1806 	RETVAL = 1;
1807 OUTPUT:
1808 	RETVAL
1809 
1810 # Next element
1811 SV *
1812 next(HTML5::DOM::Node self)
1813 ALIAS:
1814 	nextElementSibling	= 1
1815 CODE:
1816 	myhtml_tree_node_t *node = myhtml_node_next(self);
1817 	while (node && !node_is_element(node))
1818 		node = myhtml_node_next(node);
1819 	RETVAL = node_to_sv(node);
1820 OUTPUT:
1821 	RETVAL
1822 
1823 # Next node
1824 SV *
1825 nextNode(HTML5::DOM::Node self)
1826 ALIAS:
1827 	nextSibling	= 1
1828 CODE:
1829 	RETVAL = node_to_sv(myhtml_node_next(self));
1830 OUTPUT:
1831 	RETVAL
1832 
1833 # Prev element
1834 SV *
1835 prev(HTML5::DOM::Node self)
1836 ALIAS:
1837 	previousElementSibling	= 1
1838 CODE:
1839 	myhtml_tree_node_t *node = myhtml_node_prev(self);
1840 	while (node && !node_is_element(node))
1841 		node = myhtml_node_prev(node);
1842 	RETVAL = node_to_sv(node);
1843 OUTPUT:
1844 	RETVAL
1845 
1846 # Prev node
1847 SV *
1848 prevNode(HTML5::DOM::Node self)
1849 ALIAS:
1850 	previousSibling	= 1
1851 CODE:
1852 	RETVAL = node_to_sv(myhtml_node_prev(self));
1853 OUTPUT:
1854 	RETVAL
1855 
1856 # Parent node
1857 SV *
1858 parent(HTML5::DOM::Node self)
1859 ALIAS:
1860 	isConnected		= 1
1861 	parentNode		= 2
1862 	parentElement	= 3
1863 CODE:
1864 	RETVAL = ix == 1 ? newSViv(myhtml_node_parent(self) ? 1 : 0) : node_to_sv(myhtml_node_parent(self));
1865 OUTPUT:
1866 	RETVAL
1867 
1868 # Owner document
1869 SV *
1870 document(HTML5::DOM::Node self)
1871 ALIAS:
1872 	ownerDocument	= 1
1873 CODE:
1874 	RETVAL = node_to_sv(myhtml_tree_get_document(self->tree));
1875 OUTPUT:
1876 	RETVAL
1877 
1878 # Remove node from tree
1879 SV *
1880 remove(HTML5::DOM::Node self, HTML5::DOM::Node node = NULL)
1881 ALIAS:
1882 	removeChild	= 1
1883 CODE:
1884 	if (ix == 1) {
1885 		if (!node)
1886 			sub_croak(cv, "%s is not of type %s", "node", "HTML5::DOM::Node");
1887 		if (node->parent != self)
1888 			sub_croak(cv, "The node to be removed is not a child of this node.");
1889 		RETVAL = node_to_sv(myhtml_tree_node_remove(node));
1890 	} else {
1891 		RETVAL = node_to_sv(myhtml_tree_node_remove(self));
1892 	}
1893 OUTPUT:
1894 	RETVAL
1895 
1896 # Append child to parent before current node
1897 SV *
1898 before(HTML5::DOM::Node self, HTML5::DOM::Node a, HTML5::DOM::Node b = NULL)
1899 ALIAS:
1900 	insertBefore	= 1
1901 CODE:
1902 	myhtml_tree_node_t *reference_node, *new_node;
1903 
1904 	if (ix == 1) {
1905 		new_node = a;
1906 		reference_node = b;
1907 
1908 		if (!reference_node)
1909 			sub_croak(cv, "%s is not of type %s", "reference_node", "HTML5::DOM::Node");
1910 		if (reference_node->parent != self)
1911 			sub_croak(cv, "The node before which the new node is to be inserted is not a child of this node.");
1912 	} else {
1913 		new_node = a;
1914 		reference_node = self;
1915 	}
1916 
1917 	if (!myhtml_node_parent(reference_node))
1918 		sub_croak(cv, "can't insert before detached node");
1919 
1920 	if (reference_node->tree != new_node->tree) {
1921 		myhtml_tree_node_remove(new_node);
1922 		new_node = html5_dom_recursive_clone_node(reference_node->tree, new_node, NULL);
1923 		if (!new_node)
1924 			sub_croak(cv, "node copying internal error");
1925 	}
1926 
1927 	if (html5_dom_is_fragment(new_node)) {
1928 		myhtml_tree_node_t *fragment_child = myhtml_node_child(new_node);
1929 		while (fragment_child) {
1930 			myhtml_tree_node_t *next = myhtml_node_next(fragment_child);
1931 			myhtml_tree_node_remove(fragment_child);
1932 			myhtml_tree_node_insert_before(reference_node, fragment_child);
1933 			fragment_child = next;
1934 		}
1935 	} else {
1936 		myhtml_tree_node_remove(new_node);
1937 		myhtml_tree_node_insert_before(reference_node, new_node);
1938 	}
1939 
1940 	if (ix == 1) {
1941 		RETVAL = node_to_sv(new_node);
1942 	} else {
1943 		RETVAL = SvREFCNT_inc(ST(0));
1944 	}
1945 OUTPUT:
1946 	RETVAL
1947 
1948 # Append child to parent after current node
1949 SV *
1950 after(HTML5::DOM::Node self, HTML5::DOM::Node a, HTML5::DOM::Node b = NULL)
1951 ALIAS:
1952 	insertAfter	= 1
1953 CODE:
1954 	myhtml_tree_node_t *reference_node, *new_node;
1955 
1956 	if (ix == 1) {
1957 		new_node = a;
1958 		reference_node = b;
1959 
1960 		if (!reference_node)
1961 			sub_croak(cv, "%s is not of type %s", "reference_node", "HTML5::DOM::Node");
1962 		if (reference_node->parent != self)
1963 			sub_croak(cv, "The node after which the new node is to be inserted is not a child of this node.");
1964 	} else {
1965 		new_node = a;
1966 		reference_node = self;
1967 	}
1968 
1969 	if (!myhtml_node_parent(reference_node))
1970 		sub_croak(cv, "can't insert before detached node");
1971 
1972 	if (reference_node->tree != new_node->tree) {
1973 		myhtml_tree_node_remove(new_node);
1974 		new_node = html5_dom_recursive_clone_node(reference_node->tree, new_node, NULL);
1975 		if (!new_node)
1976 			sub_croak(cv, "node copying internal error");
1977 	}
1978 
1979 	if (html5_dom_is_fragment(new_node)) {
1980 		myhtml_tree_node_t *fragment_child = myhtml_node_last_child(new_node);
1981 		while (fragment_child) {
1982 			myhtml_tree_node_t *next = myhtml_node_prev(fragment_child);
1983 			myhtml_tree_node_remove(fragment_child);
1984 			myhtml_tree_node_insert_after(reference_node, fragment_child);
1985 			fragment_child = next;
1986 		}
1987 	} else {
1988 		myhtml_tree_node_remove(new_node);
1989 		myhtml_tree_node_insert_after(reference_node, new_node);
1990 	}
1991 
1992 	if (ix == 1) {
1993 		RETVAL = node_to_sv(new_node);
1994 	} else {
1995 		RETVAL = SvREFCNT_inc(ST(0));
1996 	}
1997 OUTPUT:
1998 	RETVAL
1999 
2000 # Append node child
2001 SV *
2002 append(HTML5::DOM::Node self, HTML5::DOM::Node child)
2003 ALIAS:
2004 	appendChild	= 1
2005 CODE:
2006 	if (!node_is_element(self))
2007 		sub_croak(cv, "can't append children to non-element node");
2008 
2009 	if (self->tree != child->tree) {
2010 		myhtml_tree_node_remove(child);
2011 		child = html5_dom_recursive_clone_node(self->tree, child, NULL);
2012 		if (!child)
2013 			sub_croak(cv, "node copying internal error");
2014 	}
2015 
2016 	if (html5_dom_is_fragment(child)) {
2017 		myhtml_tree_node_t *fragment_child = myhtml_node_child(child);
2018 		while (fragment_child) {
2019 			myhtml_tree_node_t *next = myhtml_node_next(fragment_child);
2020 			myhtml_tree_node_remove(fragment_child);
2021 			myhtml_tree_node_add_child(self, fragment_child);
2022 			fragment_child = next;
2023 		}
2024 	} else {
2025 		myhtml_tree_node_remove(child);
2026 		myhtml_tree_node_add_child(self, child);
2027 	}
2028 
2029 	if (ix == 1) {
2030 		RETVAL = node_to_sv(child);
2031 	} else {
2032 		RETVAL = SvREFCNT_inc(ST(0));
2033 	}
2034 OUTPUT:
2035 	RETVAL
2036 
2037 # Prepend node child
2038 SV *
2039 prepend(HTML5::DOM::Node self, HTML5::DOM::Node child)
2040 ALIAS:
2041 	prependChild	= 1
2042 CODE:
2043 	if (!node_is_element(self))
2044 		sub_croak(cv, "can't prepend children to non-element node");
2045 
2046 	if (self->tree != child->tree) {
2047 		myhtml_tree_node_remove(child);
2048 		child = html5_dom_recursive_clone_node(self->tree, child, NULL);
2049 		if (!child)
2050 			sub_croak(cv, "node copying internal error");
2051 	}
2052 
2053 	myhtml_tree_node_t *first_node = myhtml_node_child(self);
2054 	if (html5_dom_is_fragment(child)) {
2055 		myhtml_tree_node_t *fragment_child = myhtml_node_child(child);
2056 		while (fragment_child) {
2057 			myhtml_tree_node_t *next = myhtml_node_next(fragment_child);
2058 			myhtml_tree_node_remove(fragment_child);
2059 			if (first_node) {
2060 				myhtml_tree_node_insert_before(first_node, fragment_child);
2061 			} else {
2062 				myhtml_tree_node_add_child(self, fragment_child);
2063 			}
2064 			fragment_child = next;
2065 		}
2066 	} else {
2067 		myhtml_tree_node_remove(child);
2068 		if (first_node) {
2069 			myhtml_tree_node_insert_before(first_node, child);
2070 		} else {
2071 			myhtml_tree_node_add_child(self, child);
2072 		}
2073 	}
2074 
2075 	if (ix == 1) {
2076 		RETVAL = node_to_sv(child);
2077 	} else {
2078 		RETVAL = SvREFCNT_inc(ST(0));
2079 	}
2080 OUTPUT:
2081 	RETVAL
2082 
2083 # Replace node with child
2084 SV *
2085 replace(HTML5::DOM::Node self, HTML5::DOM::Node a, HTML5::DOM::Node b = NULL)
2086 ALIAS:
2087 	replaceChild	= 1
2088 CODE:
2089 	myhtml_tree_node_t *old_node, *new_node;
2090 
2091 	if (ix == 1) {
2092 		new_node = a;
2093 		old_node = b;
2094 
2095 		if (!old_node)
2096 			sub_croak(cv, "%s is not of type %s", "old_node", "HTML5::DOM::Node");
2097 		if (old_node->parent != self)
2098 			sub_croak(cv, "The node to be replaced is not a child of this node.");
2099 	} else {
2100 		new_node = a;
2101 		old_node = self;
2102 	}
2103 
2104 	if (old_node->tree != new_node->tree) {
2105 		myhtml_tree_node_remove(new_node);
2106 		new_node = html5_dom_recursive_clone_node(old_node->tree, new_node, NULL);
2107 		if (!new_node)
2108 			sub_croak(cv, "node copying internal error");
2109 	}
2110 
2111 	if (html5_dom_is_fragment(new_node)) {
2112 		myhtml_tree_node_t *fragment_child = myhtml_node_child(new_node);
2113 		while (fragment_child) {
2114 			myhtml_tree_node_t *next = myhtml_node_next(fragment_child);
2115 			myhtml_tree_node_remove(fragment_child);
2116 			myhtml_tree_node_insert_before(old_node, fragment_child);
2117 			fragment_child = next;
2118 		}
2119 	} else {
2120 		myhtml_tree_node_remove(new_node);
2121 		myhtml_tree_node_insert_before(old_node, new_node);
2122 	}
2123 
2124 	myhtml_tree_node_remove(old_node);
2125 
2126 	RETVAL = (ix == 1 ? node_to_sv(old_node) : SvREFCNT_inc(ST(0)));
2127 OUTPUT:
2128 	RETVAL
2129 
2130 # Clone node
2131 SV *
2132 clone(HTML5::DOM::Node self, bool deep = false, HTML5::DOM::Tree new_tree = NULL)
2133 ALIAS:
2134 	cloneNode	= 1
2135 CODE:
2136 	myhtml_tree_t *tree = new_tree ? new_tree->tree : self->tree;
2137 	if (deep) {
2138 		RETVAL = node_to_sv(html5_dom_recursive_clone_node(tree, self, NULL));
2139 	} else {
2140 		RETVAL = node_to_sv(html5_dom_copy_foreign_node(tree, self));
2141 	}
2142 OUTPUT:
2143 	RETVAL
2144 
2145 # True if node is void
2146 bool
2147 void(HTML5::DOM::Node self)
2148 CODE:
2149 	RETVAL = myhtml_node_is_void_element(self);
2150 OUTPUT:
2151 	RETVAL
2152 
2153 # True if node is self-closed
2154 bool
2155 selfClosed(HTML5::DOM::Node self)
2156 CODE:
2157 	RETVAL = myhtml_node_is_close_self(self);
2158 OUTPUT:
2159 	RETVAL
2160 
2161 # Node position in text input
2162 SV *
2163 position(HTML5::DOM::Node self)
2164 CODE:
2165 	HV *hash = newHV();
2166 	hv_store_ent(hash, sv_2mortal(newSVpv_utf8_auto(self->tree, "raw_begin", 9)), newSViv(self->token ? self->token->raw_begin : 0), 0);
2167 	hv_store_ent(hash, sv_2mortal(newSVpv_utf8_auto(self->tree, "raw_length", 10)), newSViv(self->token ? self->token->raw_length : 0), 0);
2168 	hv_store_ent(hash, sv_2mortal(newSVpv_utf8_auto(self->tree, "element_begin", 13)), newSViv(self->token ? self->token->element_begin : 0), 0);
2169 	hv_store_ent(hash, sv_2mortal(newSVpv_utf8_auto(self->tree, "element_length", 14)), newSViv(self->token ? self->token->element_length : 0), 0);
2170 	RETVAL = newRV_noinc((SV *) hash);
2171 OUTPUT:
2172 	RETVAL
2173 
2174 # Some bad idea to get "uniq id"
2175 SV *
2176 hash(HTML5::DOM::Node self)
2177 CODE:
2178 	RETVAL = newSViv(PTR2IV(self));
2179 OUTPUT:
2180 	RETVAL
2181 
2182 # Compare node reference
2183 bool
2184 isSameNode(HTML5::DOM::Node self, SV *other_node)
2185 CODE:
2186 	RETVAL = false;
2187 	if (sv_derived_from(other_node, "HTML5::DOM::Node")) {
2188 		myhtml_tree_node_t *node = INT2PTR(myhtml_tree_node_t *, SvIV((SV*)SvRV(other_node)));
2189 		if (node == self)
2190 			RETVAL = true;
2191 	}
2192 OUTPUT:
2193 	RETVAL
2194 
2195 void
2196 DESTROY(HTML5::DOM::Node self)
2197 CODE:
2198 	SV *sv = (SV *) myhtml_node_get_data(self);
2199 
2200 	DOM_GC_TRACE("DOM::Node::DESTROY (refcnt=%d)", sv ? SvREFCNT(sv) : -666);
2201 
2202 	if (sv) {
2203 		html5_dom_tree_t *tree = (html5_dom_tree_t *) self->tree->context;
2204 		myhtml_node_set_data(self, NULL);
2205 		// detached node, can be deleted
2206 		if (!myhtml_node_parent(self) && self != myhtml_tree_get_document(self->tree)) {
2207 			if (self == self->tree->node_html) {
2208 				self->tree->node_html = NULL;
2209 			} else if (self == self->tree->node_body) {
2210 				self->tree->node_body = NULL;
2211 			} else if (self == self->tree->node_head) {
2212 				self->tree->node_head = NULL;
2213 			} else if (self == self->tree->node_form) {
2214 				self->tree->node_form = NULL;
2215 			} else if (self == self->tree->fragment) {
2216 				self->tree->fragment = NULL;
2217 			} else if (self == self->tree->document) {
2218 				self->tree->document = NULL;
2219 			}
2220 			DOM_GC_TRACE("=> DOM::Node::FREE");
2221 			html5_tree_node_delete_recursive(self);
2222 		}
2223 		SvREFCNT_dec(tree->sv);
2224 	}
2225 
2226 #################################################################
2227 # HTML5::DOM::Element (extends Node)
2228 #################################################################
2229 MODULE = HTML5::DOM  PACKAGE = HTML5::DOM::Element
2230 # Find by css query
2231 SV *
2232 find(HTML5::DOM::Element self, SV *query, SV *combinator = NULL)
2233 ALIAS:
2234 	at					= 1
2235 	querySelector		= 2
2236 	querySelectorAll	= 3
2237 CODE:
2238 	html5_dom_tree_t *tree_context = (html5_dom_tree_t *) self->tree->context;
2239 	RETVAL = html5_node_find(cv, tree_context->parser, self, query, combinator, ix == 1 || ix == 2);
2240 OUTPUT:
2241 	RETVAL
2242 
2243 # findTag(val), getElementsByTagName(val)									- get nodes by tag name
2244 # findClass(val), getElementsByClassName(val)								- get nodes by class name
2245 # findId(val), getElementById(val)											- get node by id
2246 # findAttr(key), getElementByAttribute(key)									- get nodes by attribute key
2247 # findAttr(key, val, case, cmp), getElementByAttribute(key, val, case, cmp)	- get nodes by attribute value
2248 SV *
2249 findTag(HTML5::DOM::Element self, SV *key, SV *val = NULL, bool icase = false, SV *cmp = NULL)
2250 ALIAS:
2251 	getElementsByTagName	= 1
2252 	findClass				= 2
2253 	getElementsByClassName	= 3
2254 	findId					= 4
2255 	getElementById			= 5
2256 	findAttr				= 6
2257 	getElementByAttribute	= 7
2258 CODE:
2259 	RETVAL = html5_node_simple_find(cv, self, key, val, cmp, icase, ix);
2260 OUTPUT:
2261 	RETVAL
2262 
2263 # First child element
2264 SV *
2265 first(HTML5::DOM::Element self)
2266 ALIAS:
2267 	firstElementChild	= 1
2268 CODE:
2269 	myhtml_tree_node_t *node = myhtml_node_child(self);
2270 	while (node && !node_is_element(node))
2271 		node = myhtml_node_next(node);
2272 	RETVAL = node_to_sv(node);
2273 OUTPUT:
2274 	RETVAL
2275 
2276 # First child node
2277 SV *
2278 firstNode(HTML5::DOM::Element self)
2279 ALIAS:
2280 	firstChild	= 1
2281 CODE:
2282 	RETVAL = node_to_sv(myhtml_node_child(self));
2283 OUTPUT:
2284 	RETVAL
2285 
2286 # Last child element
2287 SV *
2288 last(HTML5::DOM::Element self)
2289 ALIAS:
2290 	lastElementChild	= 1
2291 CODE:
2292 	myhtml_tree_node_t *node = myhtml_node_last_child(self);
2293 	while (node && !node_is_element(node))
2294 		node = myhtml_node_prev(node);
2295 	RETVAL = node_to_sv(node);
2296 OUTPUT:
2297 	RETVAL
2298 
2299 # Last child node
2300 SV *
2301 lastNode(HTML5::DOM::Element self)
2302 ALIAS:
2303 	lastChild	= 1
2304 CODE:
2305 	RETVAL = node_to_sv(myhtml_node_last_child(self));
2306 OUTPUT:
2307 	RETVAL
2308 
2309 # return all attributes in a array
2310 SV *
2311 attrArray(HTML5::DOM::Element self)
2312 CODE:
2313 	AV *array = newAV();
2314 
2315 	myhtml_tree_attr_t *attr = myhtml_node_attribute_first(self);
2316 	while (attr) {
2317 		HV *hash = newHV();
2318 
2319 		size_t attr_key_len = 0;
2320 		const char *attr_key = myhtml_attribute_key(attr, &attr_key_len);
2321 
2322 		size_t attr_val_len = 0;
2323 		const char *attr_val = myhtml_attribute_value(attr, &attr_val_len);
2324 
2325 		size_t ns_len = 0;
2326 		const char *ns_name = myhtml_namespace_name_by_id(myhtml_attribute_namespace(attr), &ns_len);
2327 
2328 		hv_store_ent(hash, sv_2mortal(newSVpv_utf8_auto(self->tree, "name", 4)), newSVpv_utf8_auto(self->tree, attr_key ? attr_key : "", attr_key_len), 0);
2329 		hv_store_ent(hash, sv_2mortal(newSVpv_utf8_auto(self->tree, "value", 5)), newSVpv_utf8_auto(self->tree, attr_val ? attr_val : "", attr_val_len), 0);
2330 		hv_store_ent(hash, sv_2mortal(newSVpv_utf8_auto(self->tree, "namespace", 9)), newSVpv_utf8_auto(self->tree, ns_name ? ns_name : "", ns_len), 0);
2331 
2332 		av_push(array, newRV_noinc((SV *) hash));
2333 
2334 		attr = myhtml_attribute_next(attr);
2335 	}
2336 
2337 	RETVAL = newRV_noinc((SV *) array);
2338 OUTPUT:
2339 	RETVAL
2340 
2341 # attr()					- return all attributes in a hash
2342 # attr("key")				- return value of attribute "key" (undef is not exists)
2343 # attr("key", "value")		- set value for attribute "key" (return this)
2344 # attr({"key" => "value"})	- bulk set value for attribute "key" (return this)
2345 SV *
2346 attr(HTML5::DOM::Element self, SV *key = NULL, SV *value = NULL)
2347 ALIAS:
2348 	setAttribute	= 1
2349 	getAttribute	= 2
2350 CODE:
2351 	RETVAL = &PL_sv_undef;
2352 
2353 	if (ix == 1) { // setAttribute
2354 		if (!key)
2355 			sub_croak(cv, "attribute key required for setAttribute");
2356 
2357 		if (!value)
2358 			sub_croak(cv, "attribute value required for setAttribute");
2359 	} else if (ix == 2) { // getAttribute
2360 		if (!key)
2361 			sub_croak(cv, "attribute key required for getAttribute");
2362 
2363 		key = sv_stringify(key);
2364 		value = NULL;
2365 	}
2366 
2367 	if (key && value) { // Set value by key or delete by key
2368 		key = sv_stringify(key);
2369 		value = sv_stringify(value);
2370 
2371 		STRLEN key_len = 0;
2372 		const char *key_str = SvPV_const(key, key_len);
2373 
2374 		if (key_len) {
2375 			// if value is undef - only remove attribute
2376 			if (SvTYPE(value) != SVt_NULL) {
2377 				STRLEN val_len = 0;
2378 				const char *val_str = SvPV_const(value, val_len);
2379 				html5_dom_replace_attr_value(self, key_str, key_len, val_str, val_len, MyENCODING_DEFAULT);
2380 			} else {
2381 				myhtml_attribute_remove_by_key(self, key_str, key_len);
2382 			}
2383 		}
2384 
2385 		// return self
2386 		RETVAL = SvREFCNT_inc(ST(0));
2387 	} else if (key && !value) {
2388 		// Bulk attr set
2389 		if (SvROK(key) && SvTYPE(SvRV(key)) == SVt_PVHV) {
2390 			HE *entry;
2391 			HV *hash = (HV *) SvRV(key);
2392 
2393 			while ((entry = hv_iternext(hash)) != NULL) {
2394 				SV *value = hv_iterval(hash, entry);
2395 				I32 key_len;
2396 				const char *key_name = hv_iterkey(entry, &key_len);
2397 				if (value && key_len) {
2398 					value = sv_stringify(value);
2399 
2400 					// if value is undef - only remove attribute
2401 					if (SvTYPE(value) != SVt_NULL) {
2402 						STRLEN val_len = 0;
2403 						const char *val_str = SvPV_const(value, val_len);
2404 						html5_dom_replace_attr_value(self, key_name, key_len, val_str, val_len, MyENCODING_DEFAULT);
2405 					} else {
2406 						myhtml_attribute_remove_by_key(self, key_name, key_len);
2407 					}
2408 				}
2409 			}
2410 
2411 			RETVAL = SvREFCNT_inc(ST(0));
2412 		}
2413 		// Get attribute by key
2414 		else {
2415 			key = sv_stringify(key);
2416 
2417 			STRLEN key_len = 0;
2418 			const char *key_str = SvPV_const(key, key_len);
2419 
2420 			if (key_len) {
2421 				myhtml_tree_attr_t *attr = myhtml_attribute_by_key(self, key_str, key_len);
2422 				if (attr) {
2423 					size_t attr_val_len = 0;
2424 					const char *attr_val = myhtml_attribute_value(attr, &attr_val_len);
2425 					RETVAL = newSVpv_utf8_auto(self->tree, attr_val ? attr_val : "", attr_val_len);
2426 				}
2427 			}
2428 		}
2429 	} else { // Return all attributes in hash
2430 		HV *hash = newHV();
2431 
2432 		myhtml_tree_attr_t *attr = myhtml_node_attribute_first(self);
2433 		while (attr) {
2434 			size_t attr_key_len = 0;
2435 			const char *attr_key = myhtml_attribute_key(attr, &attr_key_len);
2436 
2437 			size_t attr_val_len = 0;
2438 			const char *attr_val = myhtml_attribute_value(attr, &attr_val_len);
2439 
2440 			hv_store_ent(hash, sv_2mortal(newSVpv_utf8_auto(self->tree, attr_key ? attr_key : "", attr_key_len)), newSVpv_utf8_auto(self->tree, attr_val ? attr_val : "", attr_val_len), 0);
2441 
2442 			attr = myhtml_attribute_next(attr);
2443 		}
2444 
2445 		RETVAL = newRV_noinc((SV *) hash);
2446 	}
2447 OUTPUT:
2448 	RETVAL
2449 
2450 # Remove attribute by key
2451 SV *
2452 removeAttr(HTML5::DOM::Element self, SV *key = NULL)
2453 ALIAS:
2454 	removeAttribute	= 1
2455 CODE:
2456 	key = sv_stringify(key);
2457 
2458 	STRLEN key_len = 0;
2459 	const char *key_str = SvPV_const(key, key_len);
2460 
2461 	if (key_len)
2462 		myhtml_attribute_remove_by_key(self, key_str, key_len);
2463 
2464 	RETVAL = SvREFCNT_inc(ST(0));
2465 OUTPUT:
2466 	RETVAL
2467 
2468 # Return collection with children elements
2469 SV *
2470 children(HTML5::DOM::Element self)
2471 CODE:
2472 	myhtml_tree_node_t *child = myhtml_node_child(self);
2473 	AV *arr = newAV();
2474 
2475 	while (child) {
2476 		if (node_is_element(child))
2477 			av_push(arr, node_to_sv(child));
2478 		child = myhtml_node_next(child);
2479 	}
2480 
2481 	RETVAL = sv_bless(newRV_noinc((SV *) arr), gv_stashpv("HTML5::DOM::Collection", 0));
2482 OUTPUT:
2483 	RETVAL
2484 
2485 # Return collection with children nodes
2486 SV *
2487 childrenNode(HTML5::DOM::Element self)
2488 ALIAS:
2489 	childNodes	= 1
2490 CODE:
2491 	myhtml_tree_node_t *child = myhtml_node_child(self);
2492 	AV *arr = newAV();
2493 
2494 	while (child) {
2495 		av_push(arr, node_to_sv(child));
2496 		child = myhtml_node_next(child);
2497 	}
2498 
2499 	RETVAL = sv_bless(newRV_noinc((SV *) arr), gv_stashpv("HTML5::DOM::Collection", 0));
2500 OUTPUT:
2501 	RETVAL
2502 
2503 # Return default display property for tag
2504 SV *
2505 getDefaultBoxType(HTML5::DOM::Element self)
2506 CODE:
2507 	const char *ret = NULL;
2508 	switch (html5_dom_get_ua_display_prop(self)) {
2509 		case TAG_UA_STYLE_NONE:
2510 			ret = "none";
2511 		break;
2512 		case TAG_UA_STYLE_INLINE:
2513 			ret = "inline";
2514 		break;
2515 		case TAG_UA_STYLE_BLOCK:
2516 			ret = "block";
2517 		break;
2518 		case TAG_UA_STYLE_INLINE_BLOCK:
2519 			ret = "inline-block";
2520 		break;
2521 		case TAG_UA_STYLE_LIST_ITEM:
2522 			ret = "list-item";
2523 		break;
2524 		case TAG_UA_STYLE_TABLE:
2525 			ret = "table";
2526 		break;
2527 		case TAG_UA_STYLE_TABLE_CAPTION:
2528 			ret = "table-caption";
2529 		break;
2530 		case TAG_UA_STYLE_TABLE_CELL:
2531 			ret = "table-cell";
2532 		break;
2533 		case TAG_UA_STYLE_TABLE_COLUMN:
2534 			ret = "table-column";
2535 		break;
2536 		case TAG_UA_STYLE_TABLE_COLUMN_GROUP:
2537 			ret = "table-column-group";
2538 		break;
2539 		case TAG_UA_STYLE_TABLE_HEADER_GROUP:
2540 			ret = "table-header-group";
2541 		break;
2542 		case TAG_UA_STYLE_TABLE_FOOTER_GROUP:
2543 			ret = "table-footer-group";
2544 		break;
2545 		case TAG_UA_STYLE_TABLE_ROW:
2546 			ret = "table-row";
2547 		break;
2548 		case TAG_UA_STYLE_TABLE_ROW_GROUP:
2549 			ret = "table-row-group";
2550 		break;
2551 		case TAG_UA_STYLE_RUBY:
2552 			ret = "ruby";
2553 		break;
2554 		case TAG_UA_STYLE_RUBY_BASE:
2555 			ret = "ruby-base";
2556 		break;
2557 		case TAG_UA_STYLE_RUBY_TEXT:
2558 			ret = "ruby-text";
2559 		break;
2560 		case TAG_UA_STYLE_RUBY_TEXT_CONTAINER:
2561 			ret = "ruby-text-container";
2562 		break;
2563 	}
2564 
2565 	RETVAL = ret ? newSVpv_utf8_auto(self->tree, ret, strlen(ret)) : &PL_sv_undef;
2566 OUTPUT:
2567 	RETVAL
2568 
2569 #################################################################
2570 # HTML5::DOM::DocType (extends Node)
2571 #################################################################
2572 MODULE = HTML5::DOM  PACKAGE = HTML5::DOM::DocType
2573 SV *name(HTML5::DOM::DocType self, SV *value = NULL)
2574 ALIAS:
2575 	publicId		= 1
2576 	systemId		= 2
2577 CODE:
2578 	static const char *TYPE_SYSTEM = "SYSTEM";
2579 	static const char *TYPE_PUBLIC = "PUBLIC";
2580 
2581 	myhtml_tree_attr_t *root_name = self->token ? self->token->attr_first : NULL;
2582 	myhtml_tree_attr_t *restrict_type = root_name ? root_name->next : NULL;
2583 	myhtml_tree_attr_t *public_id = restrict_type ? restrict_type->next : NULL;
2584 	myhtml_tree_attr_t *system_id = public_id ? public_id->next : NULL;
2585 
2586 	if (restrict_type && restrict_type->value.length == 6) {
2587 		if (mycore_strcasecmp(restrict_type->value.data, "SYSTEM") == 0) {
2588 			system_id = public_id;
2589 			public_id = NULL;
2590 		}
2591 	}
2592 
2593 	if (value) {
2594 		value = sv_stringify(value);
2595 
2596 		myhtml_tree_attr_t *attr_first = self->token ? self->token->attr_first : NULL;
2597 		myhtml_tree_attr_t *attr_last = self->token ? self->token->attr_last : NULL;
2598 
2599 		STRLEN val_len = 0;
2600 		const char *val_str = SvPV_const(value, val_len);
2601 
2602 		// root element name
2603 		if (ix == 0) {
2604 			myhtml_attribute_add(self, val_str, val_len, "", 0, MyENCODING_DEFAULT);
2605 		} else {
2606 			myhtml_attribute_add(self, root_name && root_name->key.length ? root_name->key.data : "", root_name ? root_name->key.length : 0, "", 0, MyENCODING_DEFAULT);
2607 		}
2608 
2609 		const char *restrict_type_str = NULL;
2610 
2611 		if ((ix == 2 && val_len) || (system_id && system_id->value.length))
2612 			restrict_type_str = TYPE_SYSTEM;
2613 
2614 		if ((ix == 1 && val_len) || (public_id && public_id->value.length))
2615 			restrict_type_str = TYPE_PUBLIC;
2616 
2617 		if (restrict_type_str) {
2618 			// SYSTEM or PUBLIC
2619 			myhtml_attribute_add(self, "", 0, restrict_type_str, 6, MyENCODING_DEFAULT);
2620 
2621 			if (restrict_type_str == TYPE_PUBLIC) {
2622 				// publicId
2623 				if (ix == 1) {
2624 					myhtml_attribute_add(self, "", 0, val_str, val_len, MyENCODING_DEFAULT);
2625 				} else {
2626 					myhtml_attribute_add(self, "", 0, public_id && public_id->value.length ? public_id->value.data : "", public_id ? public_id->value.length : 0, MyENCODING_DEFAULT);
2627 				}
2628 			}
2629 
2630 			// systemId
2631 			if (ix == 2) {
2632 				myhtml_attribute_add(self, "", 0, val_str, val_len, MyENCODING_DEFAULT);
2633 			} else {
2634 				myhtml_attribute_add(self, "", 0, system_id && system_id->value.length ? system_id->value.data : "", system_id ? system_id->value.length : 0, MyENCODING_DEFAULT);
2635 			}
2636 		}
2637 
2638 		// remove old
2639 		while (attr_last && attr_first) {
2640 			myhtml_tree_attr_t *next = attr_first->next;
2641 			myhtml_attribute_delete(self->tree, self, attr_first);
2642 
2643 			if (attr_first == attr_last)
2644 				break;
2645 
2646 			attr_first = next;
2647 		}
2648 
2649 		RETVAL = SvREFCNT_inc(ST(0));
2650 	} else {
2651 		RETVAL = &PL_sv_undef;
2652 
2653 		switch (ix) {
2654 			case 0: /* name */
2655 				RETVAL = newSVpv_utf8_auto(self->tree, root_name && root_name->key.length ? root_name->key.data : "", root_name ? root_name->key.length : 0);
2656 			break;
2657 
2658 			case 1: /* publicId */
2659 				RETVAL = newSVpv_utf8_auto(self->tree, public_id && public_id->value.length ? public_id->value.data : "", public_id ? public_id->value.length : 0);
2660 			break;
2661 
2662 			case 2: /* systemId */
2663 				RETVAL = newSVpv_utf8_auto(self->tree, system_id && system_id->value.length ? system_id->value.data : "", system_id ? system_id->value.length : 0);
2664 			break;
2665 		}
2666 	}
2667 OUTPUT:
2668 	RETVAL
2669 
2670 #################################################################
2671 # HTML5::DOM::CSS (Parser)
2672 #################################################################
2673 MODULE = HTML5::DOM  PACKAGE = HTML5::DOM::CSS
2674 HTML5::DOM::CSS
2675 new(SV *CLASS, HV *options = NULL)
2676 CODE:
2677 	DOM_GC_TRACE("DOM::CSS::new");
2678 	mystatus_t status;
2679 
2680 	mycss_t *mycss = mycss_create();
2681 	status = mycss_init(mycss);
2682 	if (status) {
2683 		mycss_destroy(mycss, 1);
2684 		sub_croak(cv, "mycss_init failed: %d (%s)", status, modest_strerror(status));
2685 	}
2686 
2687 	mycss_entry_t *entry = mycss_entry_create();
2688 	status = mycss_entry_init(mycss, entry);
2689 	if (status) {
2690 		mycss_destroy(mycss, 1);
2691 		mycss_entry_destroy(entry, 1);
2692 		sub_croak(cv, "mycss_entry_init failed: %d (%s)", status, modest_strerror(status));
2693 	}
2694 
2695 	html5_css_parser_t *self = (html5_css_parser_t *) safemalloc(sizeof(html5_css_parser_t));
2696 	self->mycss = mycss;
2697 	self->entry = entry;
2698 	self->encoding = MyENCODING_UTF_8;
2699 
2700 	html5_dom_parse_options(&self->opts, NULL, options);
2701 
2702 	RETVAL = self;
2703 OUTPUT:
2704 	RETVAL
2705 
2706 # Parse css selector
2707 SV *
2708 parseSelector(HTML5::DOM::CSS self, SV *query, HV *options = NULL)
2709 CODE:
2710 	mystatus_t status;
2711 
2712 	html5_dom_options_t opts;
2713 	html5_dom_parse_options(&opts, &self->opts, options);
2714 
2715 	query = sv_stringify(query);
2716 
2717 	STRLEN query_len;
2718 	const char *query_str = SvPV_const(query, query_len);
2719 
2720 	mycss_selectors_list_t *list = mycss_selectors_parse(mycss_entry_selectors(self->entry), MyENCODING_UTF_8, query_str, query_len, &status);
2721 
2722 	DOM_GC_TRACE("DOM::CSS::Selector::NEW");
2723 	html5_css_selector_t *selector = (html5_css_selector_t *) safemalloc(sizeof(html5_css_selector_t));
2724 	selector->parent = SvRV(ST(0));
2725 	selector->list = list;
2726 	selector->parser = self;
2727 
2728 	if (opts.utf8 == 2) {
2729 		selector->utf8 = SvUTF8(query) ? 1 : 0;
2730 	} else {
2731 		selector->utf8 = opts.utf8 != 0;
2732 	}
2733 
2734 	SvREFCNT_inc(selector->parent);
2735 	RETVAL = pack_pointer("HTML5::DOM::CSS::Selector", selector);
2736 OUTPUT:
2737 	RETVAL
2738 
2739 void
2740 DESTROY(HTML5::DOM::CSS self)
2741 CODE:
2742 	DOM_GC_TRACE("DOM::CSS::DESTROY (refs=%d)", SvREFCNT(SvRV(ST(0))));
2743 	mycss_entry_destroy(self->entry, 1);
2744 	mycss_destroy(self->mycss, 1);
2745 	safefree(self);
2746 
2747 
2748 #################################################################
2749 # HTML5::DOM::CSS::Selector
2750 #################################################################
2751 MODULE = HTML5::DOM  PACKAGE = HTML5::DOM::CSS::Selector
2752 
2753 # Serialize selector to text
2754 SV *
2755 text(HTML5::DOM::CSS::Selector self)
2756 CODE:
2757 	RETVAL = newSVpv_utf8_auto_css(self, "", 0);
2758 	if (self->list)
2759 		mycss_selectors_serialization_list(mycss_entry_selectors(self->parser->entry), self->list, sv_serialization_callback, RETVAL);
2760 OUTPUT:
2761 	RETVAL
2762 
2763 # True, if selector is valid
2764 bool
2765 valid(HTML5::DOM::CSS::Selector self)
2766 CODE:
2767 	RETVAL = self->list ? !(self->list->flags & MyCSS_SELECTORS_FLAGS_SELECTOR_BAD) : 0;
2768 OUTPUT:
2769 	RETVAL
2770 
2771 # Return AST tree
2772 SV *
2773 ast(HTML5::DOM::CSS::Selector self)
2774 CODE:
2775 	AV *result = newAV();
2776 	if (self->list)
2777 		html5_dom_css_serialize_selector(self, self->list, result);
2778 	RETVAL = newRV_noinc((SV *) result);
2779 OUTPUT:
2780 	RETVAL
2781 
2782 # Get count of selector entries
2783 int
2784 length(HTML5::DOM::CSS::Selector self)
2785 CODE:
2786 	RETVAL = self->list ? self->list->entries_list_length : 0;
2787 OUTPUT:
2788 	RETVAL
2789 
2790 # Get selector entry by index
2791 SV *
2792 entry(HTML5::DOM::CSS::Selector self, int index)
2793 CODE:
2794 	if (!self->list || index < 0 || index >= self->list->entries_list_length) {
2795 		RETVAL = &PL_sv_undef;
2796 	} else {
2797 		DOM_GC_TRACE("DOM::CSS::Selector::Entry::NEW");
2798 		html5_css_selector_entry_t *entry = (html5_css_selector_entry_t *) safemalloc(sizeof(html5_css_selector_entry_t));
2799 		entry->parent = SvRV(ST(0));
2800 		entry->selector = self;
2801 		entry->list = &self->list->entries_list[index];
2802 		SvREFCNT_inc(entry->parent);
2803 		RETVAL = pack_pointer("HTML5::DOM::CSS::Selector::Entry", entry);
2804 	}
2805 OUTPUT:
2806 	RETVAL
2807 
2808 # utf8(flag)				- enable or disable utf8 mode
2809 # utf8()					- get status of utf8 mode (0 - disabled, 1 - enabled)
2810 SV *
2811 utf8(HTML5::DOM::CSS::Selector self, SV *value = NULL)
2812 CODE:
2813 	if (!value) {
2814 		RETVAL = newSViv(self->utf8 ? 1 : 0);
2815 	} else {
2816 		value = sv_stringify(value);
2817 
2818 		STRLEN enc_length;
2819 		const char *enc_str = SvPV_const(value, enc_length);
2820 
2821 		if (enc_length > 0) {
2822 			if (isdigit(enc_str[0])) {
2823 				self->utf8 = SvIV(value) != 0;
2824 			} else {
2825 				self->utf8 = 1;
2826 			}
2827 		}
2828 
2829 		self->utf8 = 0;
2830 
2831 		RETVAL = SvREFCNT_inc(ST(0));
2832 	}
2833 OUTPUT:
2834 	RETVAL
2835 
2836 void
2837 DESTROY(HTML5::DOM::CSS::Selector self)
2838 CODE:
2839 	DOM_GC_TRACE("DOM::CSS::Selector::DESTROY (refs=%d)", SvREFCNT(SvRV(ST(0))));
2840 	if (self->list)
2841 		mycss_selectors_list_destroy(mycss_entry_selectors(self->parser->entry), self->list, true);
2842 	SvREFCNT_dec(self->parent);
2843 	safefree(self);
2844 
2845 
2846 #################################################################
2847 # HTML5::DOM::CSS::Selector::Entry
2848 #################################################################
2849 MODULE = HTML5::DOM  PACKAGE = HTML5::DOM::CSS::Selector::Entry
2850 
2851 # Serialize selector to text
2852 SV *
2853 text(HTML5::DOM::CSS::Selector::Entry self)
2854 CODE:
2855 	RETVAL = newSVpv_utf8_auto_css(self->selector, "", 0);
2856 	mycss_selectors_serialization_chain(mycss_entry_selectors(self->selector->parser->entry), self->list->entry, sv_serialization_callback, RETVAL);
2857 OUTPUT:
2858 	RETVAL
2859 
2860 # Return AST tree
2861 SV *
2862 ast(HTML5::DOM::CSS::Selector::Entry self)
2863 CODE:
2864 	AV *result = newAV();
2865 	html5_dom_css_serialize_entry(self->selector, self->selector->list, self->list->entry, result);
2866 	RETVAL = newRV_noinc((SV *) result);
2867 OUTPUT:
2868 	RETVAL
2869 
2870 # Return pseudo-element name
2871 SV *
2872 pseudoElement(HTML5::DOM::CSS::Selector::Entry self)
2873 CODE:
2874 	mycss_selectors_entry_t *entry = self->list->entry;
2875 	RETVAL = &PL_sv_undef;
2876 	while (entry) {
2877 		if (entry->type == MyCSS_SELECTORS_TYPE_PSEUDO_ELEMENT) {
2878 			RETVAL = newSVpv_utf8_auto_css(self->selector, entry->key->length ? entry->key->data : "", entry->key->length);
2879 			break;
2880 		}
2881 		entry = entry->next;
2882 	}
2883 OUTPUT:
2884 	RETVAL
2885 
2886 # True, if selector is valid
2887 bool
2888 valid(HTML5::DOM::CSS::Selector::Entry self)
2889 CODE:
2890 	RETVAL = !(self->selector->list->flags & MyCSS_SELECTORS_FLAGS_SELECTOR_BAD);
2891 OUTPUT:
2892 	RETVAL
2893 
2894 # Return selector specificity in hash {a, b, c}
2895 SV *
2896 specificity(HTML5::DOM::CSS::Selector::Entry self)
2897 CODE:
2898 	HV *hash = newHV();
2899 	hv_store_ent(hash, sv_2mortal(newSVpv_utf8_auto_css(self->selector, "a", 1)), newSViv(self->list->specificity.a), 0);
2900 	hv_store_ent(hash, sv_2mortal(newSVpv_utf8_auto_css(self->selector, "b", 1)), newSViv(self->list->specificity.b), 0);
2901 	hv_store_ent(hash, sv_2mortal(newSVpv_utf8_auto_css(self->selector, "c", 1)), newSViv(self->list->specificity.c), 0);
2902 	RETVAL = newRV_noinc((SV *) hash);
2903 OUTPUT:
2904 	RETVAL
2905 
2906 # Return selector specificity in array [a, b, c]
2907 SV *
2908 specificityArray(HTML5::DOM::CSS::Selector::Entry self)
2909 CODE:
2910 	AV *arr = newAV();
2911 	av_push(arr, newSViv(self->list->specificity.a));
2912 	av_push(arr, newSViv(self->list->specificity.b));
2913 	av_push(arr, newSViv(self->list->specificity.c));
2914 	RETVAL = newRV_noinc((SV *) arr);
2915 OUTPUT:
2916 	RETVAL
2917 
2918 void
2919 DESTROY(HTML5::DOM::CSS::Selector::Entry self)
2920 CODE:
2921 	DOM_GC_TRACE("DOM::CSS::Selector::Entry::DESTROY (refs=%d)", SvREFCNT(SvRV(ST(0))));
2922 	SvREFCNT_dec(self->parent);
2923 	safefree(self);
2924 
2925 #################################################################
2926 # HTML5::DOM::Encoding
2927 #################################################################
2928 MODULE = HTML5::DOM  PACKAGE = HTML5::DOM::Encoding
2929 
2930 SV *
2931 id2name(int id)
2932 CODE:
2933 	size_t len = 0;
2934 	const char *name = myencoding_name_by_id(id, &len);
2935 	RETVAL = name ? newSVpv(name, len) : &PL_sv_undef;
2936 OUTPUT:
2937 	RETVAL
2938 
2939 SV *
2940 name2id(SV *text)
2941 CODE:
2942 	text = sv_stringify(text);
2943 
2944 	STRLEN text_len;
2945 	const char *text_str = SvPV_const(text, text_len);
2946 
2947 	myencoding_t encoding = MyENCODING_NOT_DETERMINED;
2948 	myencoding_by_name(text_str, text_len, &encoding);
2949 	RETVAL =  encoding != MyENCODING_NOT_DETERMINED ? newSViv(encoding) : &PL_sv_undef;
2950 OUTPUT:
2951 	RETVAL
2952 
2953 int
2954 detect(SV *text, long max_len = 0)
2955 ALIAS:
2956 	detectByPrescanStream	= 1
2957 	detectCyrillic			= 2
2958 	detectUkrainian			= 21
2959 	detectRussian			= 22
2960 	detectUnicode			= 3
2961 	detectBom				= 4
2962 	detectByCharset			= 5
2963 CODE:
2964 	text = sv_stringify(text);
2965 
2966 	STRLEN text_len;
2967 	const char *text_str = SvPV_const(text, text_len);
2968 
2969 	if (max_len > 0 && max_len < text_len)
2970 		text_len = max_len;
2971 
2972 	myencoding_t encoding;
2973 
2974 	switch (ix) {
2975 		case 0:
2976 			if (!myencoding_detect(text_str, text_len, &encoding))
2977 				encoding = MyENCODING_NOT_DETERMINED;
2978 		break;
2979 		case 1:
2980 			encoding = myencoding_prescan_stream_to_determine_encoding(text_str, text_len);
2981 		break;
2982 		case 2:
2983 		case 21:
2984 		case 22:
2985 			if (!myencoding_detect_russian(text_str, text_len, &encoding))
2986 				encoding = MyENCODING_NOT_DETERMINED;
2987 		break;
2988 		case 3:
2989 			if (!myencoding_detect_unicode(text_str, text_len, &encoding))
2990 				encoding = MyENCODING_NOT_DETERMINED;
2991 		break;
2992 		case 4:
2993 			if (!myencoding_detect_bom(text_str, text_len, &encoding))
2994 				encoding = MyENCODING_NOT_DETERMINED;
2995 		break;
2996 		case 5:
2997 			if (!myencoding_extracting_character_encoding_from_charset(text_str, text_len, &encoding))
2998 				encoding = MyENCODING_NOT_DETERMINED;
2999 		break;
3000 	}
3001 
3002 	RETVAL = encoding;
3003 OUTPUT:
3004 	RETVAL
3005 
3006 void
3007 detectBomAndCut(SV *text, long max_len = 0)
3008 CODE:
3009 	text = sv_stringify(text);
3010 
3011 	STRLEN text_len;
3012 	const char *text_str = SvPV_const(text, text_len);
3013 
3014 	if (max_len > 0 && max_len < text_len)
3015 		text_len = max_len;
3016 
3017 	myencoding_t encoding;
3018 
3019 	if (!myencoding_detect_and_cut_bom(text_str, text_len, &encoding, &text_str, &text_len))
3020 		encoding = MyENCODING_NOT_DETERMINED;
3021 
3022 	ST(0) = newSViv(encoding);
3023 	ST(1) = newSVpv(text_str, text_len);
3024 
3025 	if (SvUTF8(text))
3026 		SvUTF8_on(ST(0));
3027 
3028 	sv_2mortal(ST(0));
3029 	sv_2mortal(ST(1));
3030 
3031 	XSRETURN(2);
3032 
3033 void
3034 detectAuto(SV *text, long max_len = 0, HV *options = NULL)
3035 CODE:
3036 	text = sv_stringify(text);
3037 
3038 	STRLEN text_len;
3039 	const char *text_str = SvPV_const(text, text_len);
3040 
3041 	if (max_len > 0 && max_len < text_len)
3042 		text_len = max_len;
3043 
3044 	html5_dom_options_t opts = {0};
3045 	html5_dom_parse_options(&opts, NULL, options);
3046 
3047 	opts.encoding				= MyENCODING_AUTO;
3048 	opts.default_encoding		= MyENCODING_NOT_DETERMINED;
3049 	opts.encoding_prescan_limit	= text_len;
3050 
3051 	myencoding_t encoding = html5_dom_auto_encoding(&opts, &text_str, &text_len);
3052 
3053 	ST(0) = newSViv(encoding);
3054 	ST(1) = newSVpv(text_str, text_len);
3055 
3056 	if (SvUTF8(text))
3057 		SvUTF8_on(ST(0));
3058 
3059 	sv_2mortal(ST(0));
3060 	sv_2mortal(ST(1));
3061 
3062 	XSRETURN(2);
3063